├── .gitignore ├── rustfmt.toml ├── README.md ├── Cargo.toml ├── src ├── element.rs ├── tree │ └── tag.rs ├── token.rs ├── tree_config.rs ├── lib.rs ├── node.rs └── tree.rs ├── LICENSE-MIT ├── .github └── workflows │ └── ci.yaml └── LICENSE-APACHE /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | use_small_heuristics = "Max" 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **eventree – a Rust library for creating lossless syntax trees.** 2 | 3 | ###### Please see the [docs](https://docs.rs/eventree). 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | categories = ["parsing", "data-structures"] 3 | description = "A library for creating lossless syntax trees" 4 | edition = "2021" 5 | license = "MIT OR Apache-2.0" 6 | name = "eventree" 7 | readme = "README.md" 8 | repository = "https://github.com/lunacookies/eventree" 9 | version = "0.7.0" 10 | 11 | [dependencies] 12 | static_assertions = "1.1.0" 13 | text-size = "1.1.1" 14 | 15 | [dev-dependencies] 16 | expect-test = "1.4.1" 17 | -------------------------------------------------------------------------------- /src/element.rs: -------------------------------------------------------------------------------- 1 | use crate::{SyntaxNode, SyntaxToken}; 2 | 3 | /// An element of a syntax tree. 4 | /// Either a node or a token. 5 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 6 | pub enum SyntaxElement { 7 | #[allow(missing_docs)] 8 | Node(SyntaxNode), 9 | #[allow(missing_docs)] 10 | Token(SyntaxToken), 11 | } 12 | 13 | impl SyntaxElement { 14 | /// Asserts this element is a node. Panics if it was actually a token. 15 | pub fn unwrap_node(self) -> SyntaxNode { 16 | match self { 17 | Self::Node(node) => node, 18 | Self::Token(_) => panic!("expected node"), 19 | } 20 | } 21 | 22 | /// Asserts this element is a token. Panics if it was actually a node. 23 | pub fn unwrap_token(self) -> SyntaxToken { 24 | match self { 25 | Self::Node(_) => panic!("expected token"), 26 | Self::Token(token) => token, 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main, staging, trying] 7 | 8 | env: 9 | RUSTFLAGS: "--deny warnings" 10 | MIRIFLAGS: "-Zmiri-strict-provenance" 11 | 12 | jobs: 13 | rust: 14 | name: Rust 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Install Rust 21 | uses: dtolnay/rust-toolchain@stable 22 | with: 23 | components: clippy, rustfmt 24 | 25 | - name: Build 26 | run: cargo build --all-targets --all-features 27 | 28 | - name: Clippy 29 | run: cargo clippy --all-targets --all-features 30 | 31 | - name: Test 32 | run: cargo test --all-targets --all-features 33 | 34 | - name: Format 35 | run: cargo fmt -- --check 36 | 37 | miri: 38 | name: Miri 39 | runs-on: ubuntu-latest 40 | 41 | steps: 42 | - uses: actions/checkout@v4 43 | 44 | - name: Install Rust 45 | uses: dtolnay/rust-toolchain@nightly 46 | with: 47 | components: miri 48 | 49 | - name: Miri 50 | run: cargo miri test --all-features 51 | -------------------------------------------------------------------------------- /src/tree/tag.rs: -------------------------------------------------------------------------------- 1 | use super::EventKind; 2 | use crate::TreeConfig; 3 | 4 | #[derive(Clone, Copy)] 5 | #[repr(transparent)] 6 | pub(super) struct Tag(u16); 7 | 8 | impl Tag { 9 | const MAX_KIND: u16 = (u16::MAX >> 1) - 1; // all 1s apart from first and last 10 | 11 | pub(super) fn start_node(kind: C::NodeKind) -> Self { 12 | let raw = C::node_kind_to_raw(kind); 13 | debug_assert!(raw <= Self::MAX_KIND); 14 | Self(raw | 1 << 15) // set high bit to 1 15 | } 16 | 17 | pub(super) fn add_token(kind: C::TokenKind) -> Self { 18 | let raw = C::token_kind_to_raw(kind); 19 | debug_assert!(raw <= Self::MAX_KIND); 20 | Self(raw) 21 | } 22 | 23 | pub(super) fn event_kind(self) -> EventKind { 24 | if self.high_bit_is_1() { 25 | EventKind::StartNode 26 | } else { 27 | EventKind::AddToken 28 | } 29 | } 30 | 31 | pub(super) fn get_start_node_kind(self) -> C::NodeKind { 32 | debug_assert_eq!(self.event_kind(), EventKind::StartNode); 33 | let raw = self.0 & u16::MAX >> 1; // zero out high bit 34 | debug_assert!(raw <= Self::MAX_KIND); 35 | unsafe { C::node_kind_from_raw(raw) } 36 | } 37 | 38 | pub(super) fn get_add_token_kind(self) -> C::TokenKind { 39 | debug_assert_eq!(self.event_kind(), EventKind::AddToken); 40 | debug_assert!(self.0 <= Self::MAX_KIND); 41 | unsafe { C::token_kind_from_raw(self.0) } 42 | } 43 | 44 | fn high_bit_is_1(self) -> bool { 45 | self.0 & 1 << 15 != 0 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/token.rs: -------------------------------------------------------------------------------- 1 | use crate::tree::EventIdx; 2 | use crate::{SyntaxTree, TextRange, TreeConfig}; 3 | use std::marker::PhantomData; 4 | 5 | /// A handle to a specific token in a specific [`SyntaxTree`]. 6 | /// 7 | /// All accessor methods will panic if used with a tree 8 | /// other than the one this token is from. 9 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 10 | pub struct SyntaxToken { 11 | idx: EventIdx, 12 | tree_id: u32, 13 | phantom: PhantomData, 14 | } 15 | 16 | static_assertions::assert_eq_size!(SyntaxToken<()>, Option>, u64); 17 | 18 | impl SyntaxToken { 19 | #[inline(always)] 20 | pub(crate) unsafe fn new(idx: EventIdx, tree_id: u32) -> Self { 21 | Self { idx, tree_id, phantom: PhantomData } 22 | } 23 | 24 | /// Returns the kind of this token. 25 | pub fn kind(self, tree: &SyntaxTree) -> C::TokenKind { 26 | self.verify_tree(tree); 27 | unsafe { tree.get_add_token(self.idx).kind } 28 | } 29 | 30 | /// Returns the text associated with this token. 31 | pub fn text(self, tree: &SyntaxTree) -> &str { 32 | self.verify_tree(tree); 33 | unsafe { 34 | let add_token = tree.get_add_token(self.idx); 35 | tree.get_text(add_token.start, add_token.end) 36 | } 37 | } 38 | 39 | /// Returns the range this token spans in the original input. 40 | pub fn range(self, tree: &SyntaxTree) -> TextRange { 41 | self.verify_tree(tree); 42 | let add_token = unsafe { tree.get_add_token(self.idx) }; 43 | TextRange::new(add_token.start.into(), add_token.end.into()) 44 | } 45 | 46 | fn verify_tree(self, tree: &SyntaxTree) { 47 | assert_eq!( 48 | self.tree_id, 49 | tree.id(), 50 | "tried to access token data from tree other than the one this token is from" 51 | ); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/tree_config.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::hash::Hash; 3 | 4 | /// A trait for converting between eventree’s 5 | /// [internal kind representation][`crate::SyntaxTree#tag`] 6 | /// and your own custom enums for the kinds of nodes and tokens. 7 | /// 8 | /// Since a `TreeConfig` is never actually constructed 9 | /// and exists just to connect a `NodeKind` and a `TokenKind`, 10 | /// an *uninhabitable type* such as `enum Foo {}` 11 | /// can be used. For instance: 12 | /// 13 | /// ``` 14 | /// #[derive(Debug, PartialEq)] 15 | /// #[repr(u8)] 16 | /// enum MyNodeKind { Root, Foo } 17 | /// 18 | /// #[derive(Debug, PartialEq)] 19 | /// #[repr(u8)] 20 | /// enum MyTokenKind { Bar, Baz } 21 | /// 22 | /// #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 23 | /// enum TreeConfig {} 24 | /// 25 | /// // SAFETY: 26 | /// // - we have less than 0b0111_1111_1111_1110 (32,766) enum variants 27 | /// // - values returned by to_raw can be passed into from_raw safely 28 | /// unsafe impl eventree::TreeConfig for TreeConfig { 29 | /// type NodeKind = MyNodeKind; 30 | /// type TokenKind = MyTokenKind; 31 | /// 32 | /// fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { 33 | /// node_kind as u16 34 | /// } 35 | /// 36 | /// fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { 37 | /// token_kind as u16 38 | /// } 39 | /// 40 | /// unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { 41 | /// std::mem::transmute(raw as u8) 42 | /// } 43 | /// 44 | /// unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { 45 | /// std::mem::transmute(raw as u8) 46 | /// } 47 | /// } 48 | /// ``` 49 | /// 50 | /// # Safety 51 | /// 52 | /// This trait is `unsafe` to implement 53 | /// because you must satisfy the following requirements: 54 | /// 55 | /// - all values returned by [`TreeConfig::node_kind_to_raw`] and [`TreeConfig::token_kind_to_raw`] 56 | /// must be less than or equal to `0b0111_1111_1111_1110` 57 | /// - values must be roundtrippable through the `to_raw` methods 58 | /// and back through the `from_raw` methods 59 | /// 60 | /// Not fulfilling these requirements can result in undefined behaviour. 61 | pub unsafe trait TreeConfig: 62 | Debug + Clone + Copy + PartialEq + Eq + PartialOrd + Ord + Hash 63 | { 64 | /// The kind of nodes in the syntax tree. 65 | type NodeKind: Debug; 66 | 67 | /// The kind of tokens in the syntax tree. 68 | type TokenKind: Debug; 69 | 70 | /// Converts your custom type to a `u16`. 71 | /// 72 | /// # Suggested implementation 73 | /// Generally you will implement this by casting your enum using `as` syntax. 74 | /// Putting any more complex logic than that here will result in worse tree performance. 75 | /// 76 | /// # Contract 77 | /// Part of this trait’s contract is that all values returned by this method 78 | /// are less than or equal to `0b0111_1111_1111_1110`. 79 | fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16; 80 | 81 | /// Converts your custom type to a `u16`. 82 | /// 83 | /// # Suggested implementation 84 | /// Generally you will implement this by casting your enum using `as` syntax. 85 | /// Putting any more complex logic than that here will result in worse tree performance. 86 | /// 87 | /// # Contract 88 | /// Part of this trait’s contract is that all values returned by this method 89 | /// are less than or equal to `0b0111_1111_1111_1110`. 90 | fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16; 91 | 92 | /// Turns a raw `u16` back into your custom type. 93 | /// 94 | /// # Safety 95 | /// This method must only be called with values returned by [`TreeConfig::node_kind_to_raw`]; 96 | /// if it isn’t, your implementation is allowed to invoke undefined behaviour 97 | /// (which is why this method is `unsafe`). 98 | /// 99 | /// # Suggested implementation 100 | /// One way to implement this method is to use [`std::mem::transmute`] 101 | /// (given that your [`TreeConfig::node_kind_to_raw`] method just returns your enum’s value). 102 | /// Make sure to specify the representation of your enum (e.g. with `#[repr(u8)]`) 103 | /// since [transmuting non-primitive types without a specified representation 104 | /// is undefined behaviour][ref]. 105 | /// 106 | /// Any expensive operations performed here will result in 107 | /// a degradation in tree performance. 108 | /// 109 | /// [ref]: https://doc.rust-lang.org/reference/type-layout.html#the-default-representation 110 | unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind; 111 | 112 | /// Turns a raw `u16` back into your custom type. 113 | /// 114 | /// # Safety 115 | /// This method must only be called with values returned by [`TreeConfig::token_kind_to_raw`]; 116 | /// if it isn’t, your implementation is allowed to invoke undefined behaviour 117 | /// (which is why this method is `unsafe`). 118 | /// 119 | /// # Suggested implementation 120 | /// One way to implement this method is to use [`std::mem::transmute`] 121 | /// (given that your [`TreeConfig::token_kind_to_raw`] method just returns your enum’s value). 122 | /// Make sure to specify the representation of your enum (e.g. with `#[repr(u8)]`) 123 | /// since [transmuting non-primitive types without a specified representation 124 | /// is undefined behaviour][ref]. 125 | /// 126 | /// Any expensive operations performed here will result in 127 | /// a degradation in tree performance. 128 | /// 129 | /// [ref]: https://doc.rust-lang.org/reference/type-layout.html#the-default-representation 130 | unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind; 131 | } 132 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! ###### A Rust library for creating lossless syntax trees. 2 | //! 3 | //! Let’s construct a syntax tree that can represent the following expression: 4 | //! 5 | //! ```text 6 | //! foo+10*20 7 | //! ``` 8 | //! 9 | //! This is the tree we want to build: 10 | //! 11 | //! ```text 12 | //! Root 13 | //! BinaryExpr 14 | //! Ident "foo" 15 | //! Plus "+" 16 | //! BinaryExpr 17 | //! Number "10" 18 | //! Star "*" 19 | //! Number "20" 20 | //! ``` 21 | //! 22 | //! What kinds of nodes and tokens do we have here? 23 | //! 24 | //! ``` 25 | //! enum NodeKind { 26 | //! Root, 27 | //! BinaryExpr, 28 | //! } 29 | //! 30 | //! enum TokenKind { 31 | //! Number, 32 | //! Ident, 33 | //! Plus, 34 | //! Star, 35 | //! } 36 | //! ``` 37 | //! 38 | //! Before we can use these enums, 39 | //! we have to teach eventree how to convert between them and `u16`s, 40 | //! which can be stored generically in the syntax tree 41 | //! no matter what enums the users of this library define. 42 | //! I know that it’s a lot of boilerplate and that all those `unsafe`s look really scary, 43 | //! but I promise it isn’t too bad! 44 | //! 45 | //! ``` 46 | //! #[derive(Debug, PartialEq)] 47 | //! #[repr(u8)] 48 | //! enum NodeKind { 49 | //! Root, 50 | //! BinaryExpr, 51 | //! } 52 | //! 53 | //! #[derive(Debug, PartialEq)] 54 | //! #[repr(u8)] 55 | //! enum TokenKind { 56 | //! Number, 57 | //! Ident, 58 | //! Plus, 59 | //! Star, 60 | //! } 61 | //! 62 | //! #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 63 | //! enum TreeConfig {} 64 | //! 65 | //! unsafe impl eventree::TreeConfig for TreeConfig { 66 | //! type NodeKind = NodeKind; 67 | //! type TokenKind = TokenKind; 68 | //! 69 | //! fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { 70 | //! node_kind as u16 71 | //! } 72 | //! 73 | //! fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { 74 | //! token_kind as u16 75 | //! } 76 | //! 77 | //! unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { 78 | //! std::mem::transmute(raw as u8) 79 | //! } 80 | //! 81 | //! unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { 82 | //! std::mem::transmute(raw as u8) 83 | //! } 84 | //! } 85 | //! ``` 86 | //! 87 | //! Continue by creating a [`SyntaxBuilder`], 88 | //! which lets you construct syntax trees: 89 | //! 90 | //! ``` 91 | //! # #[derive(Debug, PartialEq)] 92 | //! # #[repr(u8)] 93 | //! # enum NodeKind { Root, BinaryExpr } 94 | //! # #[derive(Debug, PartialEq)] 95 | //! # #[repr(u8)] 96 | //! # enum TokenKind { Number, Ident, Plus, Star } 97 | //! # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 98 | //! # enum TreeConfig {} 99 | //! # unsafe impl eventree::TreeConfig for TreeConfig { 100 | //! # type NodeKind = NodeKind; 101 | //! # type TokenKind = TokenKind; 102 | //! # fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { node_kind as u16 } 103 | //! # fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { token_kind as u16 } 104 | //! # unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { std::mem::transmute(raw as u8) } 105 | //! # unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { std::mem::transmute(raw as u8) } 106 | //! # } 107 | //! let mut builder = eventree::SyntaxBuilder::::new("foo+10*20"); 108 | //! ``` 109 | //! 110 | //! eventree, as the name implies (thanks [Quirl](https://github.com/domenicquirl/)!), 111 | //! is based around *events.* 112 | //! To explain what that means, let me bring back that syntax tree from earlier: 113 | //! 114 | //! ```text 115 | //! Root 116 | //! BinaryExpr 117 | //! Ident "foo" 118 | //! Plus "+" 119 | //! BinaryExpr 120 | //! Number "10" 121 | //! Star "*" 122 | //! Number "20" 123 | //! ``` 124 | //! 125 | //! And now as events: 126 | //! 127 | //! ```text 128 | //! START_NODE Root 129 | //! START_NODE BinaryExpr 130 | //! ADD_TOKEN Ident "foo" 131 | //! ADD_TOKEN Plus "+" 132 | //! START_NODE BinaryExpr 133 | //! ADD_TOKEN Number "10" 134 | //! ADD_TOKEN Star "*" 135 | //! ADD_TOKEN Number "20" 136 | //! FINISH_NODE 137 | //! FINISH_NODE 138 | //! FINISH_NODE 139 | //! ``` 140 | //! 141 | //! What’s great about this is that we’ve transformed a tree structure into a flat sequence. 142 | //! Maybe it’s a bit more obvious if I show it like this: 143 | //! 144 | //! ```text 145 | //! [ 146 | //! START_NODE Root, 147 | //! START_NODE BinaryExpr, 148 | //! ADD_TOKEN Ident "foo", 149 | //! ADD_TOKEN Plus "+", 150 | //! START_NODE BinaryExpr, 151 | //! ADD_TOKEN Number "10", 152 | //! ADD_TOKEN Star "*", 153 | //! ADD_TOKEN Number "20", 154 | //! FINISH_NODE, 155 | //! FINISH_NODE, 156 | //! FINISH_NODE, 157 | //! ] 158 | //! ``` 159 | //! 160 | //! What eventree does is it stores a sequence of events like the one above 161 | //! in an [efficient format][`SyntaxTree#format`], 162 | //! while providing convenient APIs for traversing the tree. 163 | //! 164 | //! Before we get too ahead of ourselves, let’s construct the tree: 165 | //! 166 | //! ``` 167 | //! # #[derive(Debug, PartialEq)] 168 | //! # #[repr(u8)] 169 | //! # enum NodeKind { Root, BinaryExpr } 170 | //! # #[derive(Debug, PartialEq)] 171 | //! # #[repr(u8)] 172 | //! # enum TokenKind { Number, Ident, Plus, Star } 173 | //! # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 174 | //! # enum TreeConfig {} 175 | //! # unsafe impl eventree::TreeConfig for TreeConfig { 176 | //! # type NodeKind = NodeKind; 177 | //! # type TokenKind = TokenKind; 178 | //! # fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { node_kind as u16 } 179 | //! # fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { token_kind as u16 } 180 | //! # unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { std::mem::transmute(raw as u8) } 181 | //! # unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { std::mem::transmute(raw as u8) } 182 | //! # } 183 | //! use eventree::{SyntaxBuilder, TextRange}; 184 | //! 185 | //! let mut builder = SyntaxBuilder::::new("foo+10*20"); 186 | //! builder.start_node(NodeKind::Root); 187 | //! builder.start_node(NodeKind::BinaryExpr); 188 | //! builder.add_token(TokenKind::Ident, TextRange::new(0.into(), 3.into())); 189 | //! builder.add_token(TokenKind::Plus, TextRange::new(3.into(), 4.into())); 190 | //! builder.start_node(NodeKind::BinaryExpr); 191 | //! builder.add_token(TokenKind::Number, TextRange::new(4.into(), 6.into())); 192 | //! builder.add_token(TokenKind::Star, TextRange::new(6.into(), 7.into())); 193 | //! builder.add_token(TokenKind::Number, TextRange::new(7.into(), 9.into())); 194 | //! builder.finish_node(); 195 | //! builder.finish_node(); 196 | //! builder.finish_node(); 197 | //! ``` 198 | //! 199 | //! Note how rather than specifying the text of each token directly 200 | //! we’re instead just passing the range of each one in the original input. 201 | //! 202 | //! The last thing we’ll go over is some examples of the APIs eventree provides. 203 | //! 204 | //! ``` 205 | //! # #[derive(Debug, PartialEq)] 206 | //! # #[repr(u8)] 207 | //! # enum NodeKind { Root, BinaryExpr } 208 | //! # #[derive(Debug, PartialEq)] 209 | //! # #[repr(u8)] 210 | //! # enum TokenKind { Number, Ident, Plus, Star } 211 | //! # #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 212 | //! # enum TreeConfig {} 213 | //! # unsafe impl eventree::TreeConfig for TreeConfig { 214 | //! # type NodeKind = NodeKind; 215 | //! # type TokenKind = TokenKind; 216 | //! # fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { node_kind as u16 } 217 | //! # fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { token_kind as u16 } 218 | //! # unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { std::mem::transmute(raw as u8) } 219 | //! # unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { std::mem::transmute(raw as u8) } 220 | //! # } 221 | //! use eventree::{SyntaxBuilder, SyntaxNode, SyntaxToken, SyntaxTree, TextRange}; 222 | //! 223 | //! let mut builder = SyntaxBuilder::::new("foo+10*20"); 224 | //! builder.start_node(NodeKind::Root); 225 | //! // ... 226 | //! # builder.start_node(NodeKind::BinaryExpr); 227 | //! # builder.add_token(TokenKind::Ident, TextRange::new(0.into(), 3.into())); 228 | //! # builder.add_token(TokenKind::Plus, TextRange::new(3.into(), 4.into())); 229 | //! # builder.start_node(NodeKind::BinaryExpr); 230 | //! # builder.add_token(TokenKind::Number, TextRange::new(4.into(), 6.into())); 231 | //! # builder.add_token(TokenKind::Star, TextRange::new(6.into(), 7.into())); 232 | //! # builder.add_token(TokenKind::Number, TextRange::new(7.into(), 9.into())); 233 | //! # builder.finish_node(); 234 | //! # builder.finish_node(); 235 | //! builder.finish_node(); 236 | //! 237 | //! let tree = builder.finish(); 238 | //! 239 | //! // let’s get the root of the tree 240 | //! let root = tree.root(); 241 | //! 242 | //! // we can get the kind, text and range of nodes 243 | //! assert_eq!(root.kind(&tree), NodeKind::Root); 244 | //! assert_eq!(root.text(&tree), "foo+10*20"); 245 | //! assert_eq!(root.range(&tree), TextRange::new(0.into(), 9.into())); 246 | //! 247 | //! // we can get the child nodes in the root; there’s just one, the BinaryExpr 248 | //! let mut child_nodes = root.child_nodes(&tree); 249 | //! let binary_expr = child_nodes.next().unwrap(); 250 | //! assert_eq!(binary_expr.kind(&tree), NodeKind::BinaryExpr); 251 | //! assert!(child_nodes.next().is_none()); 252 | //! 253 | //! // let’s look at the descendant tokens of the BinaryExpr 254 | //! let mut descendant_tokens = binary_expr.descendant_tokens(&tree); 255 | //! 256 | //! // we can also get the kind, text and range of tokens 257 | //! let ident = descendant_tokens.next().unwrap(); 258 | //! assert_eq!(ident.kind(&tree), TokenKind::Ident); 259 | //! assert_eq!(ident.text(&tree), "foo"); 260 | //! assert_eq!(ident.range(&tree), TextRange::new(0.into(), 3.into())); 261 | //! 262 | //! // let’s finish off by going through all descendant tokens 263 | //! // until we reach the end 264 | //! assert_eq!(descendant_tokens.next().unwrap().text(&tree), "+"); 265 | //! assert_eq!(descendant_tokens.next().unwrap().text(&tree), "10"); 266 | //! assert_eq!(descendant_tokens.next().unwrap().text(&tree), "*"); 267 | //! assert_eq!(descendant_tokens.next().unwrap().text(&tree), "20"); 268 | //! assert!(descendant_tokens.next().is_none()); 269 | //! ``` 270 | //! 271 | //! I hope this was helpful! 272 | 273 | #![warn(missing_docs, unreachable_pub, rust_2018_idioms)] 274 | 275 | mod element; 276 | mod node; 277 | mod token; 278 | mod tree; 279 | mod tree_config; 280 | 281 | pub use self::element::SyntaxElement; 282 | pub use self::node::SyntaxNode; 283 | pub use self::token::SyntaxToken; 284 | pub use self::tree::{Event, RawEvent, SyntaxBuilder, SyntaxTree, SyntaxTreeBuf}; 285 | pub use self::tree_config::TreeConfig; 286 | 287 | pub use text_size::{TextLen, TextRange, TextSize}; 288 | -------------------------------------------------------------------------------- /src/node.rs: -------------------------------------------------------------------------------- 1 | use crate::tree::{EventIdx, EventKind, ADD_TOKEN_SIZE, START_NODE_SIZE}; 2 | use crate::{SyntaxElement, SyntaxToken, SyntaxTree, TextRange, TreeConfig}; 3 | use std::hash::Hash; 4 | use std::marker::PhantomData; 5 | 6 | /// A handle to a specific node in a specific [`SyntaxTree`]. 7 | /// 8 | /// A syntax tree’s root node can be obtained by calling [`SyntaxTree::root`]. 9 | /// 10 | /// All accessor methods will panic if used with a tree 11 | /// other than the one this node is from. 12 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 13 | pub struct SyntaxNode { 14 | idx: EventIdx, 15 | tree_id: u32, 16 | phantom: PhantomData, 17 | } 18 | 19 | static_assertions::assert_eq_size!(SyntaxNode<()>, Option>, u64); 20 | 21 | impl SyntaxNode { 22 | #[inline(always)] 23 | pub(crate) unsafe fn new(idx: EventIdx, tree_id: u32) -> Self { 24 | Self { idx, tree_id, phantom: PhantomData } 25 | } 26 | 27 | /// Returns the kind of this node. 28 | pub fn kind(self, tree: &SyntaxTree) -> C::NodeKind { 29 | self.verify_tree(tree); 30 | unsafe { tree.get_start_node(self.idx).kind } 31 | } 32 | 33 | /// Returns an iterator over the direct child nodes and tokens of this node. 34 | pub fn children(self, tree: &SyntaxTree) -> impl Iterator> + '_ { 35 | self.verify_tree(tree); 36 | Children { 37 | idx: self.idx + START_NODE_SIZE, 38 | finish_idx: unsafe { tree.get_start_node(self.idx).finish_node_idx }, 39 | tree, 40 | tree_id: self.tree_id, 41 | } 42 | } 43 | 44 | /// Returns an iterator over the direct child nodes of this node. 45 | pub fn child_nodes(self, tree: &SyntaxTree) -> impl Iterator> + '_ { 46 | self.verify_tree(tree); 47 | ChildNodes { 48 | idx: self.idx + START_NODE_SIZE, 49 | finish_idx: unsafe { tree.get_start_node(self.idx).finish_node_idx }, 50 | tree, 51 | tree_id: self.tree_id, 52 | } 53 | } 54 | 55 | /// Returns an iterator over the direct child tokens of this node. 56 | pub fn child_tokens(self, tree: &SyntaxTree) -> impl Iterator> + '_ { 57 | self.verify_tree(tree); 58 | ChildTokens { 59 | idx: self.idx + START_NODE_SIZE, 60 | finish_idx: unsafe { tree.get_start_node(self.idx).finish_node_idx }, 61 | tree, 62 | tree_id: self.tree_id, 63 | } 64 | } 65 | 66 | /// Returns an iterator over the descendant nodes and tokens of this node 67 | /// in depth-first order. 68 | pub fn descendants(self, tree: &SyntaxTree) -> impl Iterator> + '_ { 69 | self.verify_tree(tree); 70 | Descendants { 71 | idx: self.idx + START_NODE_SIZE, 72 | finish_idx: unsafe { tree.get_start_node(self.idx).finish_node_idx }, 73 | tree, 74 | tree_id: self.tree_id, 75 | } 76 | } 77 | 78 | /// Returns an iterator over the descendant nodes of this node 79 | /// in depth-first order. 80 | pub fn descendant_nodes( 81 | self, 82 | tree: &SyntaxTree, 83 | ) -> impl Iterator> + '_ { 84 | self.verify_tree(tree); 85 | DescendantNodes { 86 | idx: self.idx + START_NODE_SIZE, 87 | finish_idx: unsafe { tree.get_start_node(self.idx).finish_node_idx }, 88 | tree, 89 | tree_id: self.tree_id, 90 | } 91 | } 92 | 93 | /// Returns an iterator over the descendant tokens of this node 94 | /// in depth-first order. 95 | pub fn descendant_tokens( 96 | self, 97 | tree: &SyntaxTree, 98 | ) -> impl Iterator> + '_ { 99 | self.verify_tree(tree); 100 | DescendantTokens { 101 | idx: self.idx + START_NODE_SIZE, 102 | finish_idx: unsafe { tree.get_start_node(self.idx).finish_node_idx }, 103 | tree, 104 | tree_id: self.tree_id, 105 | } 106 | } 107 | 108 | /// Returns the range this node spans in the original input. 109 | pub fn range(self, tree: &SyntaxTree) -> TextRange { 110 | self.verify_tree(tree); 111 | let start_node = unsafe { tree.get_start_node(self.idx) }; 112 | TextRange::new(start_node.start.into(), start_node.end.into()) 113 | } 114 | 115 | /// Returns the text of all the tokens this node contains. 116 | pub fn text(self, tree: &SyntaxTree) -> &str { 117 | self.verify_tree(tree); 118 | unsafe { 119 | let start_node = tree.get_start_node(self.idx); 120 | tree.get_text(start_node.start, start_node.end) 121 | } 122 | } 123 | 124 | fn verify_tree(self, tree: &SyntaxTree) { 125 | assert_eq!( 126 | self.tree_id, 127 | tree.id(), 128 | "tried to access node data from tree other than the one this node is from" 129 | ); 130 | } 131 | } 132 | 133 | struct Children<'a, C> { 134 | idx: EventIdx, 135 | finish_idx: EventIdx, 136 | tree: &'a SyntaxTree, 137 | tree_id: u32, 138 | } 139 | 140 | impl Iterator for Children<'_, C> { 141 | type Item = SyntaxElement; 142 | 143 | fn next(&mut self) -> Option { 144 | if self.finish_idx <= self.idx { 145 | return None; 146 | } 147 | 148 | unsafe { 149 | match self.tree.event_kind(self.idx) { 150 | EventKind::StartNode => { 151 | let finish_node_idx = self.tree.get_start_node(self.idx).finish_node_idx; 152 | let element = SyntaxElement::Node(SyntaxNode::new(self.idx, self.tree_id)); 153 | self.idx = finish_node_idx; 154 | Some(element) 155 | } 156 | EventKind::AddToken => { 157 | let element = SyntaxElement::Token(SyntaxToken::new(self.idx, self.tree_id)); 158 | self.idx += ADD_TOKEN_SIZE; 159 | Some(element) 160 | } 161 | } 162 | } 163 | } 164 | } 165 | 166 | struct ChildNodes<'a, C> { 167 | idx: EventIdx, 168 | finish_idx: EventIdx, 169 | tree: &'a SyntaxTree, 170 | tree_id: u32, 171 | } 172 | 173 | impl Iterator for ChildNodes<'_, C> { 174 | type Item = SyntaxNode; 175 | 176 | fn next(&mut self) -> Option { 177 | while self.idx < self.finish_idx { 178 | unsafe { 179 | match self.tree.event_kind(self.idx) { 180 | EventKind::StartNode => { 181 | let finish_node_idx = self.tree.get_start_node(self.idx).finish_node_idx; 182 | let node = SyntaxNode::new(self.idx, self.tree_id); 183 | self.idx = finish_node_idx; 184 | return Some(node); 185 | } 186 | EventKind::AddToken => { 187 | self.idx += ADD_TOKEN_SIZE; 188 | continue; 189 | } 190 | } 191 | } 192 | } 193 | 194 | None 195 | } 196 | } 197 | 198 | struct ChildTokens<'a, C> { 199 | finish_idx: EventIdx, 200 | idx: EventIdx, 201 | tree: &'a SyntaxTree, 202 | tree_id: u32, 203 | } 204 | 205 | impl Iterator for ChildTokens<'_, C> { 206 | type Item = SyntaxToken; 207 | 208 | fn next(&mut self) -> Option { 209 | while self.idx < self.finish_idx { 210 | unsafe { 211 | match self.tree.event_kind(self.idx) { 212 | EventKind::StartNode => { 213 | let finish_node_idx = self.tree.get_start_node(self.idx).finish_node_idx; 214 | self.idx = finish_node_idx; 215 | continue; 216 | } 217 | EventKind::AddToken => { 218 | let token = SyntaxToken::new(self.idx, self.tree_id); 219 | self.idx += ADD_TOKEN_SIZE; 220 | return Some(token); 221 | } 222 | } 223 | } 224 | } 225 | 226 | None 227 | } 228 | } 229 | 230 | struct Descendants<'a, C> { 231 | finish_idx: EventIdx, 232 | idx: EventIdx, 233 | tree: &'a SyntaxTree, 234 | tree_id: u32, 235 | } 236 | 237 | impl Iterator for Descendants<'_, C> { 238 | type Item = SyntaxElement; 239 | 240 | fn next(&mut self) -> Option { 241 | debug_assert!(self.idx <= self.finish_idx); 242 | if self.idx == self.finish_idx { 243 | return None; 244 | } 245 | 246 | unsafe { 247 | match self.tree.event_kind(self.idx) { 248 | EventKind::StartNode => { 249 | let element = SyntaxElement::Node(SyntaxNode::new(self.idx, self.tree_id)); 250 | self.idx += START_NODE_SIZE; 251 | Some(element) 252 | } 253 | EventKind::AddToken => { 254 | let element = SyntaxElement::Token(SyntaxToken::new(self.idx, self.tree_id)); 255 | self.idx += ADD_TOKEN_SIZE; 256 | Some(element) 257 | } 258 | } 259 | } 260 | } 261 | } 262 | 263 | struct DescendantNodes<'a, C> { 264 | finish_idx: EventIdx, 265 | idx: EventIdx, 266 | tree: &'a SyntaxTree, 267 | tree_id: u32, 268 | } 269 | 270 | impl Iterator for DescendantNodes<'_, C> { 271 | type Item = SyntaxNode; 272 | 273 | fn next(&mut self) -> Option { 274 | while self.idx < self.finish_idx { 275 | unsafe { 276 | match self.tree.event_kind(self.idx) { 277 | EventKind::StartNode => { 278 | let node = SyntaxNode::new(self.idx, self.tree_id); 279 | self.idx += START_NODE_SIZE; 280 | return Some(node); 281 | } 282 | EventKind::AddToken => { 283 | self.idx += ADD_TOKEN_SIZE; 284 | continue; 285 | } 286 | } 287 | } 288 | } 289 | 290 | None 291 | } 292 | } 293 | 294 | struct DescendantTokens<'a, C> { 295 | finish_idx: EventIdx, 296 | idx: EventIdx, 297 | tree: &'a SyntaxTree, 298 | tree_id: u32, 299 | } 300 | 301 | impl Iterator for DescendantTokens<'_, C> { 302 | type Item = SyntaxToken; 303 | 304 | fn next(&mut self) -> Option { 305 | while self.idx < self.finish_idx { 306 | unsafe { 307 | match self.tree.event_kind(self.idx) { 308 | EventKind::StartNode => { 309 | self.idx += START_NODE_SIZE; 310 | continue; 311 | } 312 | EventKind::AddToken => { 313 | let token = SyntaxToken::new(self.idx, self.tree_id); 314 | self.idx += ADD_TOKEN_SIZE; 315 | return Some(token); 316 | } 317 | } 318 | } 319 | } 320 | 321 | None 322 | } 323 | } 324 | 325 | #[cfg(test)] 326 | mod tests { 327 | use super::*; 328 | use crate::{SyntaxBuilder, SyntaxTreeBuf}; 329 | use std::sync::OnceLock; 330 | 331 | #[derive(Debug, PartialEq)] 332 | #[repr(u8)] 333 | enum NodeKind { 334 | Root, 335 | BinaryExpr, 336 | Call, 337 | } 338 | 339 | #[derive(Debug, PartialEq)] 340 | #[repr(u8)] 341 | enum TokenKind { 342 | Asterisk, 343 | Ident, 344 | IntLiteral, 345 | Plus, 346 | } 347 | 348 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 349 | enum TreeConfig {} 350 | 351 | unsafe impl crate::TreeConfig for TreeConfig { 352 | type NodeKind = NodeKind; 353 | type TokenKind = TokenKind; 354 | 355 | fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { 356 | node_kind as u16 357 | } 358 | 359 | fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { 360 | token_kind as u16 361 | } 362 | 363 | unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { 364 | std::mem::transmute(raw as u8) 365 | } 366 | 367 | unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { 368 | std::mem::transmute(raw as u8) 369 | } 370 | } 371 | 372 | fn example_tree() -> &'static SyntaxTree { 373 | static BUF: OnceLock> = OnceLock::new(); 374 | 375 | BUF.get_or_init(|| { 376 | let mut builder = SyntaxBuilder::new("2*5+10foo"); 377 | 378 | builder.start_node(NodeKind::Root); 379 | { 380 | builder.start_node(NodeKind::BinaryExpr); 381 | { 382 | builder.start_node(NodeKind::BinaryExpr); 383 | builder.add_token(TokenKind::IntLiteral, TextRange::new(0.into(), 1.into())); 384 | builder.add_token(TokenKind::Asterisk, TextRange::new(1.into(), 2.into())); 385 | builder.add_token(TokenKind::IntLiteral, TextRange::new(2.into(), 3.into())); 386 | builder.finish_node(); 387 | } 388 | builder.add_token(TokenKind::Plus, TextRange::new(3.into(), 4.into())); 389 | builder.add_token(TokenKind::IntLiteral, TextRange::new(4.into(), 6.into())); 390 | builder.finish_node(); 391 | } 392 | { 393 | builder.start_node(NodeKind::Call); 394 | builder.add_token(TokenKind::Ident, TextRange::new(6.into(), 9.into())); 395 | builder.finish_node(); 396 | } 397 | builder.finish_node(); 398 | 399 | builder.finish() 400 | }) 401 | } 402 | 403 | #[test] 404 | fn children() { 405 | let tree = example_tree(); 406 | let root = tree.root(); 407 | 408 | let mut children = root.children(tree); 409 | let binary_expr = children.next().unwrap().unwrap_node(); 410 | assert_eq!(binary_expr.kind(tree), NodeKind::BinaryExpr); 411 | let call = children.next().unwrap().unwrap_node(); 412 | assert_eq!(call.kind(tree), NodeKind::Call); 413 | assert!(children.next().is_none()); 414 | 415 | let mut children = binary_expr.children(tree); 416 | assert_eq!(children.next().unwrap().unwrap_node().kind(tree), NodeKind::BinaryExpr); 417 | assert_eq!(children.next().unwrap().unwrap_token().kind(tree), TokenKind::Plus); 418 | assert_eq!(children.next().unwrap().unwrap_token().kind(tree), TokenKind::IntLiteral); 419 | assert!(children.next().is_none()); 420 | 421 | let mut children = call.children(tree); 422 | assert_eq!(children.next().unwrap().unwrap_token().kind(tree), TokenKind::Ident); 423 | assert!(children.next().is_none()); 424 | } 425 | 426 | #[test] 427 | fn child_nodes() { 428 | let tree = example_tree(); 429 | let root = tree.root(); 430 | 431 | let mut child_nodes = root.child_nodes(tree); 432 | let binary_expr = child_nodes.next().unwrap(); 433 | assert_eq!(binary_expr.kind(tree), NodeKind::BinaryExpr); 434 | let call = child_nodes.next().unwrap(); 435 | assert_eq!(call.kind(tree), NodeKind::Call); 436 | assert!(child_nodes.next().is_none()); 437 | 438 | let mut child_nodes = binary_expr.child_nodes(tree); 439 | assert_eq!(child_nodes.next().unwrap().kind(tree), NodeKind::BinaryExpr); 440 | assert!(child_nodes.next().is_none()); 441 | 442 | let mut child_nodes = call.child_nodes(tree); 443 | assert!(child_nodes.next().is_none()); 444 | } 445 | 446 | #[test] 447 | fn child_tokens() { 448 | let tree = example_tree(); 449 | let root = tree.root(); 450 | 451 | let mut child_tokens = root.child_tokens(tree); 452 | assert!(child_tokens.next().is_none()); 453 | 454 | let mut child_nodes = root.child_nodes(tree); 455 | let binary_expr = child_nodes.next().unwrap(); 456 | assert_eq!(binary_expr.kind(tree), NodeKind::BinaryExpr); 457 | let call = child_nodes.next().unwrap(); 458 | assert_eq!(call.kind(tree), NodeKind::Call); 459 | assert!(child_nodes.next().is_none()); 460 | 461 | let mut child_tokens = binary_expr.child_tokens(tree); 462 | assert_eq!(child_tokens.next().unwrap().kind(tree), TokenKind::Plus); 463 | assert_eq!(child_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 464 | assert!(child_tokens.next().is_none()); 465 | 466 | let mut child_tokens = call.child_tokens(tree); 467 | assert_eq!(child_tokens.next().unwrap().kind(tree), TokenKind::Ident); 468 | assert!(child_tokens.next().is_none()); 469 | } 470 | 471 | #[test] 472 | fn descendants() { 473 | let tree = example_tree(); 474 | let root = tree.root(); 475 | 476 | let mut descendants = root.descendants(tree); 477 | let binary_expr = descendants.next().unwrap().unwrap_node(); 478 | assert_eq!(binary_expr.kind(tree), NodeKind::BinaryExpr); 479 | 480 | let binary_expr_2 = descendants.next().unwrap().unwrap_node(); 481 | assert_eq!(binary_expr_2.kind(tree), NodeKind::BinaryExpr); 482 | assert_eq!(descendants.next().unwrap().unwrap_token().kind(tree), TokenKind::IntLiteral); 483 | assert_eq!(descendants.next().unwrap().unwrap_token().kind(tree), TokenKind::Asterisk); 484 | assert_eq!(descendants.next().unwrap().unwrap_token().kind(tree), TokenKind::IntLiteral); 485 | 486 | assert_eq!(descendants.next().unwrap().unwrap_token().kind(tree), TokenKind::Plus); 487 | assert_eq!(descendants.next().unwrap().unwrap_token().kind(tree), TokenKind::IntLiteral); 488 | 489 | let call = descendants.next().unwrap().unwrap_node(); 490 | assert_eq!(call.kind(tree), NodeKind::Call); 491 | assert_eq!(descendants.next().unwrap().unwrap_token().kind(tree), TokenKind::Ident); 492 | assert!(descendants.next().is_none()); 493 | 494 | let mut descendants = binary_expr.child_nodes(tree); 495 | assert_eq!(descendants.next().unwrap().kind(tree), NodeKind::BinaryExpr); 496 | assert!(descendants.next().is_none()); 497 | 498 | let mut descendant_nodes = call.child_nodes(tree); 499 | assert!(descendant_nodes.next().is_none()); 500 | } 501 | 502 | #[test] 503 | fn descendant_nodes() { 504 | let tree = example_tree(); 505 | let root = tree.root(); 506 | 507 | let mut descendant_nodes = root.descendant_nodes(tree); 508 | let binary_expr = descendant_nodes.next().unwrap(); 509 | assert_eq!(binary_expr.kind(tree), NodeKind::BinaryExpr); 510 | let binary_expr_2 = descendant_nodes.next().unwrap(); 511 | assert_eq!(binary_expr_2.kind(tree), NodeKind::BinaryExpr); 512 | let call = descendant_nodes.next().unwrap(); 513 | assert_eq!(call.kind(tree), NodeKind::Call); 514 | assert!(descendant_nodes.next().is_none()); 515 | 516 | let mut descendant_nodes = binary_expr.child_nodes(tree); 517 | assert_eq!(descendant_nodes.next().unwrap().kind(tree), NodeKind::BinaryExpr); 518 | assert!(descendant_nodes.next().is_none()); 519 | 520 | let mut descendant_nodes = call.child_nodes(tree); 521 | assert!(descendant_nodes.next().is_none()); 522 | } 523 | 524 | #[test] 525 | fn descendant_tokens() { 526 | let tree = example_tree(); 527 | let root = tree.root(); 528 | 529 | let mut descendant_tokens = root.descendant_tokens(tree); 530 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 531 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::Asterisk); 532 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 533 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::Plus); 534 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 535 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::Ident); 536 | assert!(descendant_tokens.next().is_none()); 537 | 538 | let mut child_nodes = root.child_nodes(tree); 539 | 540 | let binary_expr = child_nodes.next().unwrap(); 541 | assert_eq!(binary_expr.kind(tree), NodeKind::BinaryExpr); 542 | let mut descendant_tokens = binary_expr.descendant_tokens(tree); 543 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 544 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::Asterisk); 545 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 546 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::Plus); 547 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::IntLiteral); 548 | assert!(descendant_tokens.next().is_none()); 549 | 550 | let call = child_nodes.next().unwrap(); 551 | assert_eq!(call.kind(tree), NodeKind::Call); 552 | let mut descendant_tokens = call.descendant_tokens(tree); 553 | assert_eq!(descendant_tokens.next().unwrap().kind(tree), TokenKind::Ident); 554 | assert!(descendant_tokens.next().is_none()); 555 | 556 | assert!(child_nodes.next().is_none()); 557 | } 558 | } 559 | -------------------------------------------------------------------------------- /src/tree.rs: -------------------------------------------------------------------------------- 1 | mod tag; 2 | 3 | use self::tag::Tag; 4 | use crate::{SyntaxNode, SyntaxToken, TextRange, TreeConfig}; 5 | use std::fmt; 6 | use std::marker::PhantomData; 7 | use std::num::NonZeroU32; 8 | use std::ops::{Add, AddAssign, Deref}; 9 | use std::sync::atomic::{AtomicU32, Ordering}; 10 | 11 | /// `SyntaxTreeBuf` owns the syntax tree allocation. 12 | /// To construct a tree, see [`SyntaxBuilder`]. 13 | /// To access its contents, see [`SyntaxTree`]’s methods. 14 | pub struct SyntaxTreeBuf { 15 | data: Box>, 16 | } 17 | 18 | /// `SyntaxTree` stores the syntax tree. 19 | /// To construct a tree, see [`SyntaxBuilder`]. 20 | /// To access its contents, see [`SyntaxTree::root`]. 21 | /// 22 | /// `SyntaxTree`, like all other `Syntax*` types, is generic over a [`TreeConfig`], 23 | /// which specifies how the kinds of nodes and tokens 24 | /// can be converted between the library consumer’s custom enum and a raw concrete type. 25 | /// 26 | /// # Format 27 | /// 28 | /// The in-memory format of the syntax tree as described below 29 | /// is subject to change and an implementation detail. 30 | /// 31 | /// The tree has four sections: 32 | /// 33 | /// - `u32` ID 34 | /// - `u32` length of text 35 | /// - `[u8]` UTF-8 encoded text 36 | /// - `[u8]` events 37 | /// 38 | /// These are stored contiguously in one memory allocation. 39 | /// Nodes and tokens are a `u32` byte index into this allocation. 40 | /// All numerical types are stored in the target platform’s native endianness. 41 | /// 42 | /// ## ID 43 | /// 44 | /// To ensure nodes and tokens are only used with the tree they were created from, 45 | /// every tree is assigned a `u32` ID from an atomic global counter. 46 | /// Nodes and tokens both store the ID of their tree, 47 | /// which is checked when node or token data is accessed. 48 | /// 49 | /// ## Text 50 | /// 51 | /// The text of the entire source file must be provided upfront, 52 | /// allowing it to be stored efficiently all in one place. 53 | /// This makes getting the text of nodes and tokens incredibly cheap: 54 | /// we can just index into the text section of the tree 55 | /// using the range of the node or token. 56 | /// 57 | /// ## Events 58 | /// 59 | /// Following the name of this library, 60 | /// the tree is stored as a flat sequence of events. 61 | /// The encoding is as follows: 62 | /// 63 | /// - *start node* (14 bytes): 64 | /// - `u16` tag 65 | /// - `u32` index of first event following the end of this node 66 | /// - `u32` range start 67 | /// - `u32` range end 68 | /// - *add token* (10 bytes): 69 | /// - `u16` tag 70 | /// - `u32` range start 71 | /// - `u32` range end 72 | /// 73 | /// A separate *finish node* event kind is unnecessary 74 | /// because *start node* events store where such an event would be located. 75 | /// 76 | /// ### Tag 77 | /// 78 | /// Simplistically, the tag is the following type, 79 | /// but packed into a single `u16`. 80 | /// 81 | /// ``` 82 | /// # type Kind = u16; 83 | /// enum Tag { StartNode(Kind), AddToken(Kind) } 84 | /// ``` 85 | /// 86 | /// *start node* or *add token* are distinguished by the highest bit: 87 | /// `1` means *start node*, and `0` means *add token*. 88 | /// The remaining fifteen bits store the kind. 89 | #[repr(transparent)] 90 | pub struct SyntaxTree { 91 | phantom: PhantomData, 92 | data: [u8], 93 | } 94 | 95 | /// This type is used to construct a [`SyntaxTree`]. 96 | /// 97 | /// Due to the custom in-memory format used for [`SyntaxTree`], 98 | /// the text of your entire input must be provided up-front in [`SyntaxBuilder::new`]. 99 | pub struct SyntaxBuilder { 100 | data: Vec, 101 | is_root_set: bool, 102 | current_len: u32, 103 | start_node_idxs: Vec, 104 | nesting: u32, 105 | phantom: PhantomData, 106 | } 107 | 108 | pub(crate) const START_NODE_SIZE: EventSize = EventSize(std::mem::size_of::() as u32); 109 | pub(crate) const ADD_TOKEN_SIZE: EventSize = EventSize(std::mem::size_of::() as u32); 110 | 111 | const FINISH_NODE_IDX_PLACEHOLDER: u32 = 0; 112 | 113 | fn gen_tree_id() -> u32 { 114 | static CURRENT: AtomicU32 = AtomicU32::new(0); 115 | CURRENT.fetch_add(1, Ordering::Relaxed) 116 | } 117 | 118 | impl SyntaxBuilder { 119 | /// Constructs a new empty `SyntaxBuilder` with the provided source text. 120 | pub fn new(text: &str) -> Self { 121 | Self::with_capacity(text, 0, 0) 122 | } 123 | 124 | /// Constructs a new empty `SyntaxBuilder` with the provided source text 125 | /// and room for the specified event counts. 126 | /// 127 | /// Make sure to benchmark before switching to this method 128 | /// because precomputing event counts can be slow, 129 | /// even slower than just using [`SyntaxBuilder::new`]. 130 | pub fn with_capacity(text: &str, start_nodes: usize, add_tokens: usize) -> Self { 131 | assert!(text.len() < u32::MAX as usize); 132 | 133 | let id = gen_tree_id(); 134 | 135 | let mut data = Vec::with_capacity( 136 | 4 + 4 137 | + text.len() 138 | + start_nodes * START_NODE_SIZE.to_usize() 139 | + add_tokens * ADD_TOKEN_SIZE.to_usize(), 140 | ); 141 | 142 | data.extend_from_slice(&id.to_ne_bytes()); 143 | data.extend_from_slice(&(text.len() as u32).to_ne_bytes()); 144 | data.extend_from_slice(text.as_bytes()); 145 | 146 | Self { 147 | data, 148 | is_root_set: false, 149 | current_len: 0, 150 | start_node_idxs: Vec::new(), 151 | nesting: 0, 152 | phantom: PhantomData, 153 | } 154 | } 155 | 156 | /// Starts a new node with the specified kind. 157 | /// 158 | /// # Panics 159 | /// 160 | /// - if you have finished creating a root node and try to create another 161 | #[inline(always)] 162 | pub fn start_node(&mut self, kind: C::NodeKind) { 163 | if self.is_root_set { 164 | assert_ne!(self.nesting, 0, "root node already created"); 165 | } else { 166 | self.is_root_set = true; 167 | } 168 | 169 | self.nesting += 1; 170 | 171 | self.start_node_idxs.push(self.data.len()); 172 | 173 | self.data.reserve(START_NODE_SIZE.to_usize()); 174 | unsafe { 175 | self.end_ptr().cast::().write_unaligned(RawStartNode { 176 | tag: Tag::start_node::(kind), 177 | finish_node_idx: FINISH_NODE_IDX_PLACEHOLDER, 178 | start: self.current_len, 179 | end: self.current_len, 180 | }); 181 | 182 | self.data.set_len(self.data.len() + START_NODE_SIZE.to_usize()); 183 | } 184 | } 185 | 186 | /// Adds a token with the provided kind and range to the current node. 187 | /// 188 | /// # Panics 189 | /// 190 | /// - if you try to add a token before starting a node 191 | /// - if the provided range is out of bounds 192 | /// - if the provided range does not lie on a UTF-8 character boundary 193 | #[inline(always)] 194 | pub fn add_token(&mut self, kind: C::TokenKind, range: TextRange) { 195 | assert!(self.nesting > 0, "cannot add token before starting node"); 196 | 197 | assert!( 198 | u32::from(range.end()) <= self.text_len(), 199 | "token is out of range: range is {range:?}, but text is 0..{}", 200 | self.text_len() 201 | ); 202 | 203 | let all_text = self.all_text(); 204 | assert!( 205 | all_text.is_char_boundary(u32::from(range.start()) as usize) 206 | && all_text.is_char_boundary(u32::from(range.end()) as usize), 207 | "tried to create token that does not lie on UTF-8 character boundary" 208 | ); 209 | 210 | let start = u32::from(range.start()); 211 | let end = u32::from(range.end()); 212 | self.current_len = end; 213 | 214 | self.data.reserve(ADD_TOKEN_SIZE.to_usize()); 215 | 216 | unsafe { 217 | self.end_ptr().cast::().write_unaligned(RawAddToken { 218 | tag: Tag::add_token::(kind), 219 | start, 220 | end, 221 | }); 222 | 223 | self.data.set_len(self.data.len() + ADD_TOKEN_SIZE.to_usize()); 224 | } 225 | } 226 | 227 | /// Completes the current node and makes the parent node current. 228 | /// 229 | /// # Panics 230 | /// 231 | /// - if all outstanding nodes have already been finished 232 | #[inline(always)] 233 | pub fn finish_node(&mut self) { 234 | assert!(self.nesting > 0, "no nodes are yet to be finished"); 235 | self.nesting -= 1; 236 | 237 | let start_node_idx = self.start_node_idxs.pop().unwrap(); 238 | let finish_node_idx = self.data.len() as u32; 239 | 240 | unsafe { 241 | let ptr = &mut *self.data.as_mut_ptr().add(start_node_idx).cast::(); 242 | debug_assert_eq!(ptr.tag.event_kind(), EventKind::StartNode); 243 | 244 | // debug_assert_eq tries to take a reference to the field, 245 | // which isn’t allowed since it’s packed, 246 | // so we use a manual debug_assert instead 247 | debug_assert!(ptr.finish_node_idx == FINISH_NODE_IDX_PLACEHOLDER); 248 | 249 | ptr.finish_node_idx = finish_node_idx; 250 | ptr.end = self.current_len; 251 | } 252 | } 253 | 254 | /// Completes the tree and freezes it into the read-only [`SyntaxTreeBuf`] type. 255 | /// 256 | /// # Panics 257 | /// 258 | /// - if no nodes have been created 259 | /// - if there are nodes which have not been finished 260 | pub fn finish(self) -> SyntaxTreeBuf { 261 | let Self { data, is_root_set, current_len: _, start_node_idxs: _, nesting, phantom: _ } = 262 | self; 263 | 264 | assert!(is_root_set, "no nodes created"); 265 | 266 | assert_eq!(nesting, 0, "did not finish all nodes ({nesting} unfinished nodes)"); 267 | 268 | // into_boxed_slice calls shrink_to_fit for us 269 | SyntaxTreeBuf { 270 | data: unsafe { 271 | std::mem::transmute::, Box>>(data.into_boxed_slice()) 272 | }, 273 | } 274 | } 275 | 276 | fn all_text(&self) -> &str { 277 | let len = self.text_len() as usize; 278 | unsafe { 279 | let s = self.data.get_unchecked(8..len + 8); 280 | 281 | // has to stay unchecked even in debug mode 282 | // since this method is called every time a token is added 283 | // 284 | // if we perform an operation in this method that depends on the input size, 285 | // then tree construction becomes O(n^2) 286 | // (since input size and the number of tokens are roughly proportional) 287 | std::str::from_utf8_unchecked(s) 288 | } 289 | } 290 | 291 | fn text_len(&self) -> u32 { 292 | unsafe { self.data.as_ptr().cast::().add(1).read_unaligned() } 293 | } 294 | 295 | fn end_ptr(&mut self) -> *mut u8 { 296 | unsafe { self.data.as_mut_ptr().add(self.data.len()) } 297 | } 298 | } 299 | 300 | impl SyntaxTree { 301 | /// Returns the root node of this tree. 302 | pub fn root(&self) -> SyntaxNode { 303 | unsafe { SyntaxNode::new(self.root_idx(), self.id()) } 304 | } 305 | 306 | /// Returns an iterator over the events stored in this tree. 307 | /// 308 | /// The difference between this method and [`SyntaxTree::raw_events`] is that 309 | /// this method returns [`SyntaxNode`]s and [`SyntaxToken`]s, 310 | /// while [`SyntaxTree::raw_events`] returns the data actually stored in the tree. 311 | pub fn events(&self) -> impl Iterator> + '_ { 312 | Events { idx: self.root_idx(), tree: self, finish_node_idxs: Vec::new() } 313 | } 314 | 315 | /// Returns an iterator over the raw events stored in this tree. 316 | /// 317 | /// As compared to [`SyntaxTree::events`], 318 | /// this method emits the data actually stored in the tree, 319 | /// as opposed to handles to that data ([`SyntaxNode`]s and [`SyntaxToken`]s). 320 | /// 321 | /// This method does not compute any more information 322 | /// than what is stored in the tree. 323 | /// The only difference between the [`RawEvent`]s returned by this method 324 | /// and what is stored inside the tree 325 | /// is that the events returned by this method are fixed-length and typed, 326 | /// while the tree’s internal storage is variable-length and untyped. 327 | pub fn raw_events(&self) -> impl Iterator> + '_ { 328 | RawEvents { idx: self.root_idx(), tree: self, finish_node_idxs: Vec::new() } 329 | } 330 | 331 | pub(crate) fn root_idx(&self) -> EventIdx { 332 | unsafe { 333 | let text_len = self.data.as_ptr().cast::().add(1).read_unaligned(); 334 | EventIdx::new(text_len + 8) 335 | } 336 | } 337 | 338 | pub(crate) fn id(&self) -> u32 { 339 | unsafe { self.data.as_ptr().cast::().read_unaligned() } 340 | } 341 | 342 | pub(crate) unsafe fn get_text(&self, start: u32, end: u32) -> &str { 343 | let start = start as usize + 8; 344 | let end = end as usize + 8; 345 | 346 | let slice = self.data.get_unchecked(start..end); 347 | 348 | if cfg!(debug_assertions) { 349 | std::str::from_utf8(slice).unwrap() 350 | } else { 351 | std::str::from_utf8_unchecked(slice) 352 | } 353 | } 354 | 355 | pub(crate) unsafe fn get_start_node(&self, idx: EventIdx) -> StartNode { 356 | let idx = idx.0.get() as usize; 357 | debug_assert!(idx + START_NODE_SIZE.to_usize() <= self.data.len()); 358 | 359 | let ptr = self.data.as_ptr().add(idx).cast::(); 360 | let raw = ptr.read_unaligned(); 361 | 362 | StartNode { 363 | kind: raw.tag.get_start_node_kind::(), 364 | finish_node_idx: EventIdx::new(raw.finish_node_idx), 365 | start: raw.start, 366 | end: raw.end, 367 | } 368 | } 369 | 370 | pub(crate) unsafe fn get_add_token(&self, idx: EventIdx) -> AddToken { 371 | let idx = idx.0.get() as usize; 372 | debug_assert!(idx + ADD_TOKEN_SIZE.to_usize() <= self.data.len()); 373 | 374 | let ptr = self.data.as_ptr().add(idx).cast::(); 375 | let raw = ptr.read_unaligned(); 376 | 377 | AddToken { kind: raw.tag.get_add_token_kind::(), start: raw.start, end: raw.end } 378 | } 379 | 380 | pub(crate) unsafe fn event_kind(&self, idx: EventIdx) -> EventKind { 381 | self.tag_at_idx(idx).event_kind() 382 | } 383 | 384 | fn tag_at_idx(&self, idx: EventIdx) -> Tag { 385 | let idx = idx.0.get() as usize; 386 | debug_assert!(idx < self.data.len()); 387 | unsafe { self.data.as_ptr().add(idx).cast::().read_unaligned() } 388 | } 389 | } 390 | 391 | impl SyntaxTreeBuf { 392 | /// Returns a reference to the contained syntax tree data. 393 | pub fn as_tree(&self) -> &SyntaxTree { 394 | &self.data 395 | } 396 | } 397 | 398 | impl Deref for SyntaxTreeBuf { 399 | type Target = SyntaxTree; 400 | 401 | fn deref(&self) -> &Self::Target { 402 | self.as_tree() 403 | } 404 | } 405 | 406 | #[repr(C, packed)] 407 | pub(crate) struct StartNode { 408 | pub(crate) kind: C::NodeKind, 409 | pub(crate) finish_node_idx: EventIdx, 410 | pub(crate) start: u32, 411 | pub(crate) end: u32, 412 | } 413 | 414 | #[repr(C, packed)] 415 | struct RawStartNode { 416 | tag: Tag, 417 | finish_node_idx: u32, 418 | start: u32, 419 | end: u32, 420 | } 421 | 422 | #[repr(C, packed)] 423 | pub(crate) struct AddToken { 424 | pub(crate) kind: C::TokenKind, 425 | pub(crate) start: u32, 426 | pub(crate) end: u32, 427 | } 428 | 429 | #[repr(C, packed)] 430 | struct RawAddToken { 431 | tag: Tag, 432 | start: u32, 433 | end: u32, 434 | } 435 | 436 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 437 | pub(crate) struct EventIdx(NonZeroU32); 438 | 439 | impl EventIdx { 440 | pub(crate) unsafe fn new(idx: u32) -> Self { 441 | if cfg!(debug_assertions) { 442 | Self(NonZeroU32::new(idx).unwrap()) 443 | } else { 444 | Self(NonZeroU32::new_unchecked(idx)) 445 | } 446 | } 447 | } 448 | 449 | #[derive(Clone, Copy)] 450 | pub(crate) struct EventSize(u32); 451 | 452 | impl EventSize { 453 | fn to_usize(self) -> usize { 454 | self.0 as usize 455 | } 456 | } 457 | 458 | impl Add for EventIdx { 459 | type Output = Self; 460 | 461 | fn add(self, rhs: EventSize) -> Self::Output { 462 | unsafe { Self::new(self.0.get() + rhs.0) } 463 | } 464 | } 465 | 466 | impl AddAssign for EventIdx { 467 | fn add_assign(&mut self, rhs: EventSize) { 468 | *self = *self + rhs; 469 | } 470 | } 471 | 472 | #[derive(Debug, PartialEq)] 473 | pub(crate) enum EventKind { 474 | StartNode, 475 | AddToken, 476 | } 477 | 478 | struct Events<'a, C> { 479 | idx: EventIdx, 480 | tree: &'a SyntaxTree, 481 | finish_node_idxs: Vec, 482 | } 483 | 484 | impl Iterator for Events<'_, C> { 485 | type Item = Event; 486 | 487 | fn next(&mut self) -> Option { 488 | if self.finish_node_idxs.last().copied() == Some(self.idx) { 489 | self.finish_node_idxs.pop(); 490 | return Some(Event::FinishNode); 491 | } 492 | 493 | if self.idx.0.get() >= self.tree.data.len() as u32 { 494 | return None; 495 | } 496 | 497 | match unsafe { self.tree.event_kind(self.idx) } { 498 | EventKind::StartNode => { 499 | let node = unsafe { SyntaxNode::new(self.idx, self.tree.id()) }; 500 | let finish_node_idx = unsafe { self.tree.get_start_node(self.idx).finish_node_idx }; 501 | self.finish_node_idxs.push(finish_node_idx); 502 | self.idx += START_NODE_SIZE; 503 | Some(Event::StartNode(node)) 504 | } 505 | EventKind::AddToken => { 506 | let token = unsafe { SyntaxToken::new(self.idx, self.tree.id()) }; 507 | self.idx += ADD_TOKEN_SIZE; 508 | Some(Event::AddToken(token)) 509 | } 510 | } 511 | } 512 | } 513 | 514 | struct RawEvents<'a, C> { 515 | idx: EventIdx, 516 | tree: &'a SyntaxTree, 517 | finish_node_idxs: Vec, 518 | } 519 | 520 | impl Iterator for RawEvents<'_, C> { 521 | type Item = RawEvent; 522 | 523 | fn next(&mut self) -> Option { 524 | if self.finish_node_idxs.last().copied() == Some(self.idx) { 525 | self.finish_node_idxs.pop(); 526 | return Some(RawEvent::FinishNode); 527 | } 528 | 529 | if self.idx.0.get() >= self.tree.data.len() as u32 { 530 | return None; 531 | } 532 | 533 | match unsafe { self.tree.event_kind(self.idx) } { 534 | EventKind::StartNode => { 535 | let start_node = unsafe { self.tree.get_start_node(self.idx) }; 536 | let range = TextRange::new(start_node.start.into(), start_node.end.into()); 537 | self.finish_node_idxs.push(start_node.finish_node_idx); 538 | self.idx += START_NODE_SIZE; 539 | Some(RawEvent::StartNode { kind: start_node.kind, range }) 540 | } 541 | EventKind::AddToken => { 542 | let add_token = unsafe { self.tree.get_add_token(self.idx) }; 543 | let range = TextRange::new(add_token.start.into(), add_token.end.into()); 544 | self.idx += ADD_TOKEN_SIZE; 545 | Some(RawEvent::AddToken { kind: add_token.kind, range }) 546 | } 547 | } 548 | } 549 | } 550 | 551 | /// The events in a syntax tree, as emitted by [`SyntaxTree::events`]. 552 | /// See that method’s documentation for more. 553 | #[derive(Clone, Copy, PartialEq, Eq, Hash)] 554 | pub enum Event { 555 | #[allow(missing_docs)] 556 | StartNode(SyntaxNode), 557 | #[allow(missing_docs)] 558 | AddToken(SyntaxToken), 559 | #[allow(missing_docs)] 560 | FinishNode, 561 | } 562 | 563 | /// The events in a syntax tree, as emitted by [`SyntaxTree::raw_events`]. 564 | /// See that method’s documentation for more. 565 | /// 566 | /// All data here is exactly as it is stored in the tree, with nothing extra computed. 567 | #[derive(Clone, Copy, PartialEq, Eq, Hash)] 568 | pub enum RawEvent { 569 | #[allow(missing_docs)] 570 | StartNode { kind: C::NodeKind, range: TextRange }, 571 | #[allow(missing_docs)] 572 | AddToken { kind: C::TokenKind, range: TextRange }, 573 | #[allow(missing_docs)] 574 | FinishNode, 575 | } 576 | 577 | impl fmt::Debug for SyntaxTreeBuf { 578 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 579 | self.as_tree().fmt(f) 580 | } 581 | } 582 | 583 | impl fmt::Debug for SyntaxTree { 584 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 585 | if !f.alternate() { 586 | return f.debug_struct("SyntaxTree").field("data", &&self.data).finish(); 587 | } 588 | 589 | let mut indentation_level = 0_usize; 590 | 591 | for event in self.events() { 592 | match event { 593 | Event::StartNode(node) => { 594 | for _ in 0..indentation_level { 595 | write!(f, " ")?; 596 | } 597 | indentation_level += 1; 598 | let kind = node.kind(self); 599 | let range = node.range(self); 600 | writeln!(f, "{kind:?}@{range:?}")?; 601 | } 602 | Event::AddToken(token) => { 603 | for _ in 0..indentation_level { 604 | write!(f, " ")?; 605 | } 606 | let kind = token.kind(self); 607 | let range = token.range(self); 608 | let text = token.text(self); 609 | writeln!(f, "{kind:?}@{range:?} {text:?}")?; 610 | } 611 | Event::FinishNode => indentation_level -= 1, 612 | } 613 | } 614 | 615 | Ok(()) 616 | } 617 | } 618 | 619 | impl fmt::Debug for RawEvent { 620 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 621 | match self { 622 | Self::StartNode { kind, range } => write!(f, "START_NODE {kind:?} {range:?}"), 623 | Self::AddToken { kind, range } => write!(f, "ADD_TOKEN {kind:?} {range:?}"), 624 | Self::FinishNode => write!(f, "FINISH_NODE"), 625 | } 626 | } 627 | } 628 | 629 | #[cfg(test)] 630 | mod tests { 631 | use super::*; 632 | use expect_test::expect; 633 | use std::sync::OnceLock; 634 | 635 | #[derive(Debug, PartialEq)] 636 | #[repr(u8)] 637 | enum NodeKind { 638 | Root, 639 | Block, 640 | Function, 641 | } 642 | 643 | #[derive(Debug, PartialEq)] 644 | #[repr(u8)] 645 | enum TokenKind { 646 | Arrow, 647 | Comment, 648 | FncKw, 649 | Ident, 650 | LBrace, 651 | LetKw, 652 | RBrace, 653 | Semicolon, 654 | } 655 | 656 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 657 | enum TreeConfig {} 658 | 659 | unsafe impl crate::TreeConfig for TreeConfig { 660 | type NodeKind = NodeKind; 661 | type TokenKind = TokenKind; 662 | 663 | fn node_kind_to_raw(node_kind: Self::NodeKind) -> u16 { 664 | node_kind as u16 665 | } 666 | 667 | fn token_kind_to_raw(token_kind: Self::TokenKind) -> u16 { 668 | token_kind as u16 669 | } 670 | 671 | unsafe fn node_kind_from_raw(raw: u16) -> Self::NodeKind { 672 | std::mem::transmute(raw as u8) 673 | } 674 | 675 | unsafe fn token_kind_from_raw(raw: u16) -> Self::TokenKind { 676 | std::mem::transmute(raw as u8) 677 | } 678 | } 679 | 680 | enum D { 681 | U16(u16), 682 | U32(u32), 683 | Text(&'static str), 684 | } 685 | 686 | fn check( 687 | input: &str, 688 | f: impl Fn(&mut SyntaxBuilder), 689 | data: [D; N], 690 | ) { 691 | let mut builder = SyntaxBuilder::new(input); 692 | f(&mut builder); 693 | let tree = builder.finish(); 694 | 695 | let data: Vec<_> = data 696 | .into_iter() 697 | .flat_map(|num| match num { 698 | D::U16(n) => n.to_ne_bytes().to_vec(), 699 | D::U32(n) => n.to_ne_bytes().to_vec(), 700 | D::Text(s) => s.as_bytes().to_vec(), 701 | }) 702 | .collect(); 703 | 704 | // don’t include tag in tests 705 | assert_eq!(tree.as_tree().data[4..], data); 706 | } 707 | 708 | fn big_tree() -> &'static SyntaxTree { 709 | static BUF: OnceLock> = OnceLock::new(); 710 | 711 | BUF.get_or_init(|| { 712 | let mut builder = SyntaxBuilder::new("# foo\nfncbar->{};"); 713 | 714 | builder.start_node(NodeKind::Root); 715 | builder.add_token(TokenKind::Comment, TextRange::new(0.into(), 6.into())); 716 | builder.start_node(NodeKind::Function); 717 | builder.add_token(TokenKind::FncKw, TextRange::new(6.into(), 9.into())); 718 | builder.add_token(TokenKind::Ident, TextRange::new(9.into(), 12.into())); 719 | builder.add_token(TokenKind::Arrow, TextRange::new(12.into(), 14.into())); 720 | builder.start_node(NodeKind::Block); 721 | builder.add_token(TokenKind::LBrace, TextRange::new(14.into(), 15.into())); 722 | builder.add_token(TokenKind::RBrace, TextRange::new(15.into(), 16.into())); 723 | builder.finish_node(); 724 | builder.add_token(TokenKind::Semicolon, TextRange::new(16.into(), 17.into())); 725 | builder.finish_node(); 726 | builder.finish_node(); 727 | 728 | builder.finish() 729 | }) 730 | } 731 | 732 | #[test] 733 | fn just_root() { 734 | check( 735 | "", 736 | |b| { 737 | b.start_node(NodeKind::Root); 738 | b.finish_node(); 739 | }, 740 | [D::U32(0), D::U16(NodeKind::Root as u16 | 1 << 15), D::U32(22), D::U32(0), D::U32(0)], 741 | ); 742 | } 743 | 744 | #[test] 745 | fn add_token() { 746 | check( 747 | "let", 748 | |b| { 749 | b.start_node(NodeKind::Root); 750 | b.add_token(TokenKind::LetKw, TextRange::new(0.into(), 3.into())); 751 | b.finish_node(); 752 | }, 753 | [ 754 | D::U32(3), 755 | D::Text("let"), 756 | D::U16(NodeKind::Root as u16 | 1 << 15), 757 | D::U32(35), 758 | D::U32(0), 759 | D::U32(3), 760 | D::U16(TokenKind::LetKw as u16), 761 | D::U32(0), 762 | D::U32(3), 763 | ], 764 | ); 765 | } 766 | 767 | #[test] 768 | fn debug_empty() { 769 | let mut builder = SyntaxBuilder::::new(""); 770 | builder.start_node(NodeKind::Root); 771 | builder.finish_node(); 772 | 773 | let tree = builder.finish(); 774 | expect![[r##" 775 | Root@0..0 776 | "##]] 777 | .assert_eq(&format!("{tree:#?}")); 778 | } 779 | 780 | #[test] 781 | fn debug_complex() { 782 | expect![[r##" 783 | Root@0..17 784 | Comment@0..6 "# foo\n" 785 | Function@6..17 786 | FncKw@6..9 "fnc" 787 | Ident@9..12 "bar" 788 | Arrow@12..14 "->" 789 | Block@14..16 790 | LBrace@14..15 "{" 791 | RBrace@15..16 "}" 792 | Semicolon@16..17 ";" 793 | "##]] 794 | .assert_eq(&format!("{:#?}", big_tree())); 795 | } 796 | 797 | #[test] 798 | fn events() { 799 | let tree = big_tree(); 800 | let mut events = tree.events(); 801 | 802 | let root = match events.next() { 803 | Some(Event::StartNode(root)) => root, 804 | _ => unreachable!(), 805 | }; 806 | assert_eq!(root.kind(tree), NodeKind::Root); 807 | 808 | assert!(matches!(events.next(), Some(Event::AddToken(_)))); 809 | assert!(matches!(events.next(), Some(Event::StartNode(_)))); 810 | assert!(matches!(events.next(), Some(Event::AddToken(_)))); 811 | assert!(matches!(events.next(), Some(Event::AddToken(_)))); 812 | assert!(matches!(events.next(), Some(Event::AddToken(_)))); 813 | assert!(matches!(events.next(), Some(Event::StartNode(_)))); 814 | assert!(matches!(events.next(), Some(Event::AddToken(_)))); 815 | assert!(matches!(events.next(), Some(Event::AddToken(_)))); 816 | assert!(matches!(events.next(), Some(Event::FinishNode))); 817 | 818 | let semicolon = match events.next() { 819 | Some(Event::AddToken(semicolon)) => semicolon, 820 | _ => unreachable!(), 821 | }; 822 | assert_eq!(semicolon.kind(tree), TokenKind::Semicolon); 823 | 824 | assert!(matches!(events.next(), Some(Event::FinishNode))); 825 | assert!(matches!(events.next(), Some(Event::FinishNode))); 826 | assert!(events.next().is_none()); 827 | } 828 | 829 | #[test] 830 | fn raw_events() { 831 | expect![[r#" 832 | [ 833 | START_NODE Root 0..17, 834 | ADD_TOKEN Comment 0..6, 835 | START_NODE Function 6..17, 836 | ADD_TOKEN FncKw 6..9, 837 | ADD_TOKEN Ident 9..12, 838 | ADD_TOKEN Arrow 12..14, 839 | START_NODE Block 14..16, 840 | ADD_TOKEN LBrace 14..15, 841 | ADD_TOKEN RBrace 15..16, 842 | FINISH_NODE, 843 | ADD_TOKEN Semicolon 16..17, 844 | FINISH_NODE, 845 | FINISH_NODE, 846 | ] 847 | "#]] 848 | .assert_debug_eq(&big_tree().raw_events().collect::>()); 849 | } 850 | 851 | #[test] 852 | #[should_panic(expected = "no nodes are yet to be finished")] 853 | fn no_start_node() { 854 | let mut builder = SyntaxBuilder::::new(""); 855 | builder.finish_node(); 856 | } 857 | 858 | #[test] 859 | #[should_panic(expected = "did not finish all nodes (1 unfinished nodes)")] 860 | fn no_finish_node() { 861 | let mut builder = SyntaxBuilder::::new(""); 862 | builder.start_node(NodeKind::Root); 863 | builder.finish(); 864 | } 865 | 866 | #[test] 867 | #[should_panic(expected = "did not finish all nodes (2 unfinished nodes)")] 868 | fn too_many_start_node_calls() { 869 | let mut builder = SyntaxBuilder::::new(""); 870 | builder.start_node(NodeKind::Root); 871 | builder.start_node(NodeKind::Function); 872 | builder.start_node(NodeKind::Block); 873 | builder.start_node(NodeKind::Block); 874 | builder.finish_node(); 875 | builder.finish_node(); 876 | builder.finish(); 877 | } 878 | 879 | #[test] 880 | #[should_panic(expected = "no nodes are yet to be finished")] 881 | fn too_many_finish_node_calls() { 882 | let mut builder = SyntaxBuilder::::new(""); 883 | builder.start_node(NodeKind::Root); 884 | builder.start_node(NodeKind::Function); 885 | builder.start_node(NodeKind::Block); 886 | builder.finish_node(); 887 | builder.finish_node(); 888 | builder.finish_node(); 889 | builder.finish_node(); 890 | } 891 | 892 | #[test] 893 | #[should_panic(expected = "root node already created")] 894 | fn second_root() { 895 | let mut builder = SyntaxBuilder::::new(""); 896 | builder.start_node(NodeKind::Root); 897 | builder.finish_node(); 898 | builder.start_node(NodeKind::Block); 899 | } 900 | 901 | #[test] 902 | #[should_panic(expected = "no nodes created")] 903 | fn empty_without_text() { 904 | SyntaxBuilder::::new("").finish(); 905 | } 906 | 907 | #[test] 908 | #[should_panic(expected = "no nodes created")] 909 | fn empty_with_text() { 910 | SyntaxBuilder::::new("foo").finish(); 911 | } 912 | 913 | #[test] 914 | #[should_panic(expected = "cannot add token before starting node")] 915 | fn add_token_before_starting_node() { 916 | let mut builder = SyntaxBuilder::::new("let"); 917 | builder.add_token(TokenKind::LetKw, TextRange::new(0.into(), 3.into())); 918 | } 919 | 920 | #[test] 921 | #[should_panic(expected = "token is out of range: range is 0..1, but text is 0..0")] 922 | fn add_token_with_out_of_bounds_range() { 923 | let mut builder = SyntaxBuilder::::new(""); 924 | builder.start_node(NodeKind::Root); 925 | builder.add_token(TokenKind::LetKw, TextRange::new(0.into(), 1.into())); 926 | } 927 | 928 | #[test] 929 | #[should_panic( 930 | expected = "tried to access node data from tree other than the one this node is from" 931 | )] 932 | fn access_node_data_from_other_tree() { 933 | let mut builder = SyntaxBuilder::::new(""); 934 | builder.start_node(NodeKind::Root); 935 | builder.finish_node(); 936 | let tree = builder.finish(); 937 | 938 | let mut builder = SyntaxBuilder::::new(""); 939 | builder.start_node(NodeKind::Root); 940 | builder.finish_node(); 941 | let tree2 = builder.finish(); 942 | 943 | tree.root().text(&tree2); 944 | } 945 | 946 | #[test] 947 | #[should_panic( 948 | expected = "tried to access token data from tree other than the one this token is from" 949 | )] 950 | fn access_token_data_from_other_tree() { 951 | let mut builder = SyntaxBuilder::::new("->"); 952 | builder.start_node(NodeKind::Root); 953 | builder.add_token(TokenKind::Arrow, TextRange::new(0.into(), 2.into())); 954 | builder.finish_node(); 955 | let tree = builder.finish(); 956 | 957 | let mut builder = SyntaxBuilder::new(""); 958 | builder.start_node(NodeKind::Root); 959 | builder.finish_node(); 960 | let tree2 = builder.finish(); 961 | 962 | let arrow_token = tree.root().child_tokens(&tree).next().unwrap(); 963 | arrow_token.text(&tree2); 964 | } 965 | 966 | #[test] 967 | #[should_panic( 968 | expected = "tried to create token that does not lie on UTF-8 character boundary" 969 | )] 970 | fn create_token_not_on_utf8_char_boundary() { 971 | let mut builder = SyntaxBuilder::::new("å"); 972 | builder.start_node(NodeKind::Root); 973 | builder.add_token(TokenKind::Ident, TextRange::new(1.into(), 2.into())); 974 | } 975 | } 976 | --------------------------------------------------------------------------------