├── .config └── nextest.toml ├── .github └── workflows │ ├── main.yml │ └── test-report.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── cstree-derive ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src │ ├── errors.rs │ ├── lib.rs │ ├── parsing.rs │ ├── parsing │ └── attributes.rs │ └── symbols.rs ├── cstree ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches │ └── main.rs ├── examples │ ├── math.rs │ ├── readme.rs │ ├── s_expressions.rs │ └── salsa.rs ├── src │ ├── getting_started.rs │ ├── green.rs │ ├── green │ │ ├── builder.rs │ │ ├── element.rs │ │ ├── iter.rs │ │ ├── node.rs │ │ └── token.rs │ ├── interning.rs │ ├── interning │ │ ├── default_interner.rs │ │ ├── lasso_compat.rs │ │ ├── lasso_compat │ │ │ ├── token_interner.rs │ │ │ └── traits.rs │ │ ├── salsa_compat.rs │ │ └── traits.rs │ ├── lib.rs │ ├── serde_impls.rs │ ├── syntax │ │ ├── element.rs │ │ ├── iter.rs │ │ ├── mod.rs │ │ ├── node.rs │ │ ├── resolved.rs │ │ ├── text.rs │ │ └── token.rs │ └── utility_types.rs └── tests │ └── it │ ├── basic.rs │ ├── main.rs │ ├── regressions.rs │ ├── rollback.rs │ ├── sendsync.rs │ └── serde.rs ├── rustfmt.toml └── test_suite ├── Cargo.toml └── tests ├── derive.rs ├── ui.rs └── ui ├── repr ├── missing_repr.rs ├── missing_repr.stderr ├── wrong_repr_c.rs ├── wrong_repr_c.stderr ├── wrong_repr_u16.rs └── wrong_repr_u16.stderr └── static_text ├── empty_expr.rs ├── empty_expr.stderr ├── missing_text.rs ├── missing_text.stderr ├── non_expr.rs ├── non_expr.stderr ├── non_string_expr.rs ├── non_string_expr.stderr ├── text_assigned.rs └── text_assigned.stderr /.config/nextest.toml: -------------------------------------------------------------------------------- 1 | 2 | [profile.default] 3 | # Print out output for failing tests as soon as they fail, and also at the end 4 | # of the run (for easy scrollability). 5 | failure-output = "immediate-final" 6 | 7 | [profile.ci-default-features] 8 | # Do not cancel the test run on the first failure. 9 | fail-fast = false 10 | 11 | [profile.ci-default-features.junit] 12 | path = "junit-default-features.xml" 13 | report-name = "default-features" 14 | 15 | [profile.ci-all-features] 16 | # Do not cancel the test run on the first failure. 17 | fail-fast = false 18 | 19 | [profile.ci-all-features.junit] 20 | path = "junit-all-features.xml" 21 | report-name = "all-features" 22 | 23 | [profile.ci-all-features-release] 24 | # Do not cancel the test run on the first failure. 25 | fail-fast = false 26 | 27 | [profile.ci-all-features-release.junit] 28 | path = "junit-all-features-release.xml" 29 | report-name = "all-features-release" 30 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | # Daily 8 | schedule: 9 | - cron: "0 4 1/20 * *" 10 | 11 | # Allows to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | permissions: 15 | contents: read 16 | 17 | # If new code is pushed to a PR branch, then cancel in progress workflows for that PR. Ensures that 18 | # we don't waste CI time, and returns results quicker https://github.com/jonhoo/rust-ci-conf/pull/5 19 | concurrency: 20 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 21 | cancel-in-progress: true 22 | 23 | env: 24 | RUST_LOG: info 25 | RUST_BACKTRACE: 1 26 | CARGO_TERM_COLOR: always 27 | CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse 28 | 29 | jobs: 30 | test: 31 | name: Test ${{ matrix.rust }} on ${{ matrix.os }} 32 | runs-on: ${{ matrix.os }} 33 | 34 | strategy: 35 | fail-fast: false 36 | matrix: 37 | os: [ubuntu-latest, windows-latest, macos-latest] 38 | rust: [stable, nightly] 39 | 40 | steps: 41 | # setup 42 | - uses: actions/checkout@v4 43 | - uses: hecrj/setup-rust-action@v1 44 | with: 45 | rust-version: ${{ matrix.rust }} 46 | - uses: taiki-e/install-action@nextest 47 | 48 | # tests 49 | - name: Library Tests | Default Features 50 | run: cargo nextest run --profile ci-default-features --tests --examples --verbose 51 | - name: Library Tests | All Features 52 | run: cargo nextest run --profile ci-all-features --tests --examples --verbose --all-features 53 | - name: Library Tests | All Features (Release) 54 | run: cargo nextest run --profile ci-all-features-release --tests --examples --verbose --all-features --release 55 | - name: Doc Tests 56 | run: cargo test --doc --verbose --all-features 57 | 58 | # upload test results 59 | - uses: actions/upload-artifact@v4 60 | if: success() || failure() # run this step even if previous step failed 61 | with: 62 | name: test-results-${{ matrix.rust }}-${{ matrix.os }} 63 | path: target/nextest/**/junit-*.xml 64 | 65 | check: 66 | name: Check 67 | runs-on: ubuntu-latest 68 | steps: 69 | - uses: actions/checkout@v4 70 | - uses: hecrj/setup-rust-action@v1 71 | with: 72 | rust-version: nightly 73 | - name: Cargo Check 74 | run: cargo check --all-targets --all-features 75 | 76 | clippy: 77 | name: Clippy 78 | runs-on: ubuntu-latest 79 | 80 | env: 81 | RUSTFLAGS: -Dwarnings 82 | 83 | steps: 84 | - uses: actions/checkout@v4 85 | - uses: hecrj/setup-rust-action@v1 86 | with: 87 | components: clippy 88 | - run: cargo clippy --all-targets --all-features --verbose -- -D warnings 89 | 90 | rustfmt: 91 | name: Rustfmt 92 | runs-on: ubuntu-latest 93 | 94 | steps: 95 | - uses: actions/checkout@v4 96 | - uses: hecrj/setup-rust-action@v1 97 | with: 98 | rust-version: nightly 99 | components: rustfmt 100 | - run: cargo fmt -p cstree -- --check 101 | 102 | rustdoc: 103 | name: Check doc links 104 | runs-on: ubuntu-latest 105 | env: 106 | RUSTDOCFLAGS: -Dwarnings --cfg doc_cfg 107 | 108 | steps: 109 | - uses: actions/checkout@v4 110 | - uses: hecrj/setup-rust-action@v1 111 | with: 112 | rust-version: nightly 113 | - run: cargo doc --all-features --document-private-items --no-deps 114 | 115 | miri-test: 116 | name: Miri ${{ matrix.os }} 117 | runs-on: ${{ matrix.os }} 118 | 119 | strategy: 120 | fail-fast: false 121 | matrix: 122 | os: [ubuntu-latest, windows-latest, macos-latest] 123 | 124 | steps: 125 | - uses: actions/checkout@v4 126 | - uses: hecrj/setup-rust-action@v1 127 | with: 128 | rust-version: nightly 129 | components: miri 130 | env: 131 | MIRIFLAGS: -Zmiri-disable-isolation -Zmiri-strict-provenance 132 | - run: cargo miri test --verbose --all-features 133 | 134 | sanitizers: 135 | name: ${{ matrix.sanitizer }} sanitizer 136 | runs-on: ubuntu-latest 137 | strategy: 138 | fail-fast: false 139 | matrix: 140 | sanitizer: [address, memory, thread, leak] 141 | steps: 142 | - uses: actions/checkout@v4 143 | - uses: hecrj/setup-rust-action@v1 144 | with: 145 | rust-version: nightly 146 | components: rust-src 147 | - name: Test with sanitizer 148 | env: 149 | RUSTFLAGS: -Zsanitizer=${{ matrix.sanitizer }} 150 | RUSTDOCFLAGS: -Zsanitizer=${{ matrix.sanitizer }} 151 | # only needed by asan 152 | ASAN_OPTIONS: detect_stack_use_after_return=1 153 | # Asan's leak detection occasionally complains 154 | # about some small leaks if backtraces are captured, 155 | # so ensure they're not 156 | RUST_BACKTRACE: 0 157 | run: | 158 | cargo test -Zbuild-std --verbose --target=x86_64-unknown-linux-gnu --all-features 159 | -------------------------------------------------------------------------------- /.github/workflows/test-report.yml: -------------------------------------------------------------------------------- 1 | name: 'Test Report' 2 | on: 3 | workflow_run: 4 | workflows: ['CI'] # runs after CI workflow 5 | types: 6 | - completed 7 | jobs: 8 | report: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: dorny/test-reporter@v1 12 | with: 13 | artifact: /test-results-([a-z]*)-(.*)/ # artifact name: test-results-- 14 | name: Results | $1 $2 # Name of the check run which will be created 15 | path: './**/junit-*.xml' # Path to test results (inside artifact .zip) 16 | reporter: java-junit # Format of test results -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | 3 | target 4 | *checksum* 5 | Cargo.lock 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Unreleased 4 | 5 | * `&I` and `&mut I` will now implement `Resolver` if `I` implements `Resolver`. 6 | * `&mut I` will now implement `Interner` if `I` implements `Interner`. 7 | * Added an implementation for `Arc` to implement `Resolver` and `Interner` so an `Arc` may be used alternatively to a reference to share access to the interner. 8 | 9 | ## `v0.12.2` 10 | 11 | * `Checkpoint`s for the `GreenNodeBuilder` can now be used across node boundaries, meaning you can use them to wrap (finished) nodes in addition to just tokens. 12 | * A new method `Checkpoint::revert_to` has been added which resets a `GreenNodeBuilder` to the state it was in when the checkpoint was taken, allowing a parser to backtrack to the checkpoint. 13 | 14 | ## `v0.12.1` 15 | 16 | * Implement `Hash` and `Eq` for `ResolvedNode` and `ResolvedToken` 17 | 18 | ## `v0.12.0` 19 | 20 | * Documentation has been improved in most areas, together with a switch to a more principled module structure that allows explicitly documenting submodules. 21 | * The `Language` trait has been deprecated in favour of a new `Syntax` trait. `Syntax` provides the same methods that `Language` did before, but is implemented directly on the syntax kind enum instead of an additional type representing the language. 22 | * The supertrait requirements on `PartialOrd`, `Ord`, and `Hash` have been dropped. 23 | * This allows us to optionally provide a derive macro for `Syntax`. To enable the macro, add the `derive` feature flag in your `Cargo.toml` and `#[derive(Syntax)]` away! 24 | * The `interning` module has been rewritten. It now provides fuctions for obtaining a default interner (`new_interner` and `new_threaded_interner`) and provides a small, dependency-free interner implementation. 25 | * Compatibility with other interners can be enable via feature flags. 26 | * **Note** that compatibilty with `lasso` is not enabled by default. Use the `lasso_compat` feature to match the previous default. 27 | * If you are using `lasso` interners directly that you are also passing to `cstree`, note that while e.g. the `GreenNodeBuilder` can work with `lasso::Rodeo`s, you will not be able to convert between `lasso`'s `Spur` and `cstree`'s `TokenKey`. The `TokenKey` can, however, be used as the key type for `lasso` interners at no additional cost by working wiht a `Rodeo` instead of the `lasso`-default `Rodeo`. 28 | * Introduced `Syntax::static_text` to optimize tokens that always appear with the same text (estimated 10-15% faster tree building when used, depending on the ratio of static to dynamic tokens). 29 | * Since `cstree`s are lossless, `GreenNodeBuilder::token` must still be passed the source text even for static tokens. 30 | * Internal performance improvements for up to 10% faster tree building by avoiding unnecessary duplication of elements. 31 | * Use `NonNull` for the internal representation of `SyntaxNode`, meaning it now benefits from niche optimizations (`Option` is now the same size as `SyntaxNode` itself: the size of a pointer). 32 | * `SyntaxKind` has been renamed to `RawSyntaxKind` to no longer conflict with user-defined `SyntaxKind` enumerations. 33 | * `RawSyntaxKind` has been changed to use a 32-bit index internally, which means existing `Language` implementations and syntax kind `enum`s need to be adjusted to `#[repr(u32)]` and the corresponding conversions. 34 | * The crate's export module structure has been reorganized to give different groups of definitions their own submodules. A `cstree::prelude` module is available, containing the most commonly needed types that were previously accessible via `use cstree::*`. Otherwise, the module structure is now as follows: 35 | * `cstree` 36 | * `Syntax` 37 | * `RawSyntaxKind` 38 | * `build` 39 | * `GreenNodeBuilder` 40 | * `NodeCache` 41 | * `Checkpoint` 42 | * `green` 43 | * `GreenNode` 44 | * `GreenToken` 45 | * `GreenNodeChildren` 46 | * `syntax` 47 | * `{Syntax,Resolved}Node` 48 | * `{Syntax,Resolved}Token` 49 | * `{Syntax,Resolved}Element` 50 | * `{Syntax,Resolved}ElementRef` 51 | * `SyntaxNodeChildren` 52 | * `SyntaxElementChildren` 53 | * `SyntaxText` 54 | * `interning` 55 | * `TokenKey` and the `InternKey` trait 56 | * `Interner` and `Resolver` traits 57 | * `new_interner` and `TokenInterner` 58 | * `new_threaded_interner` and `MultiThreadedTokenInterner` (with the `multi_threaded_interning` feature enabled) 59 | * compatibility implementations for interning crates depending on selected feature flags 60 | * `text` 61 | * `TextSize` 62 | * `TextRange` 63 | * `SyntaxText` (re-export) 64 | * `traversal` 65 | * `Direction` 66 | * `WalkEvent` 67 | * `util` 68 | * `NodeOrToken` 69 | * `TokenAtOffset` 70 | * `sync` 71 | * `Arc` 72 | * `prelude` 73 | * re-exports of the most-used items 74 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "cstree", 4 | "cstree-derive", 5 | "test_suite", 6 | ] 7 | resolver = "2" 8 | 9 | [workspace.package] 10 | edition = "2021" 11 | version = "0.12.2" # when updating, also update `#![doc(html_root_url)]` and any inter-crate dependencies (such as `cstree`'s dependency on `cstree-derive`) 12 | authors = [ 13 | "Domenic Quirl ", 14 | "Aleksey Kladov ", 15 | ] 16 | license = "MIT OR Apache-2.0" 17 | repository = "https://github.com/domenicquirl/cstree" 18 | readme = "README.md" 19 | rust-version = "1.84" 20 | 21 | [profile.release] 22 | debug = true 23 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /cstree-derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cstree_derive" 3 | description = "Macro implementation of `#[derive(Syntax)]`" 4 | keywords = ["cstree", "derive"] 5 | edition.workspace = true 6 | version.workspace = true 7 | authors.workspace = true 8 | license.workspace = true 9 | repository.workspace = true 10 | readme.workspace = true 11 | rust-version.workspace = true 12 | 13 | [lib] 14 | name = "cstree_derive" 15 | proc-macro = true 16 | 17 | [dependencies] 18 | proc-macro2 = "1.0.56" 19 | quote = "1.0.26" 20 | syn = { version = "2.0.14" } 21 | 22 | [dev-dependencies] 23 | cstree = { path = "../cstree" } 24 | -------------------------------------------------------------------------------- /cstree-derive/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /cstree-derive/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /cstree-derive/README.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /cstree-derive/src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::{cell::RefCell, fmt, thread}; 2 | 3 | use quote::ToTokens; 4 | 5 | /// Context to collect multiple errors and output them all after parsing in order to not abort 6 | /// immediately on the first error. 7 | /// 8 | /// Ensures that the errors are handled using [`check`](ErrorContext::check) by otherwise panicking 9 | /// on `Drop`. 10 | #[derive(Debug, Default)] 11 | pub(crate) struct ErrorContext { 12 | errors: RefCell>>, 13 | } 14 | 15 | impl ErrorContext { 16 | /// Create a new context. 17 | /// 18 | /// This context contains no errors, but will still trigger a panic if it is not `check`ed. 19 | pub fn new() -> Self { 20 | ErrorContext { 21 | errors: RefCell::new(Some(Vec::new())), 22 | } 23 | } 24 | 25 | /// Add an error to the context that points to `source`. 26 | pub fn error_at(&self, source: S, msg: T) { 27 | self.errors 28 | .borrow_mut() 29 | .as_mut() 30 | .unwrap() 31 | // Transform `ToTokens` here so we don't monomorphize `new_spanned` so much. 32 | .push(syn::Error::new_spanned(source.into_token_stream(), msg)); 33 | } 34 | 35 | /// Add a `syn` parse error directly. 36 | pub fn syn_error(&self, err: syn::Error) { 37 | self.errors.borrow_mut().as_mut().unwrap().push(err); 38 | } 39 | 40 | /// Consume the context, producing a formatted error string if there are errors. 41 | pub fn check(self) -> Result<(), Vec> { 42 | let errors = self.errors.borrow_mut().take().unwrap(); 43 | match errors.len() { 44 | 0 => Ok(()), 45 | _ => Err(errors), 46 | } 47 | } 48 | } 49 | 50 | impl Drop for ErrorContext { 51 | fn drop(&mut self) { 52 | if !thread::panicking() && self.errors.borrow().is_some() { 53 | panic!("forgot to check for errors"); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /cstree-derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides `cstree`'s derive macro for `Syntax`. 2 | //! 3 | //! ``` 4 | //! # use cstree_derive::Syntax; 5 | //! # 6 | //! # #[derive(Debug, Copy, Clone, PartialEq, Eq)] 7 | //! #[derive(Syntax)] 8 | //! # #[repr(u32)] 9 | //! # enum SyntaxKind { Root } 10 | //! ``` 11 | //! 12 | //! Please refer to [the `cstree` main crate] for how to set this up. 13 | //! 14 | //! [the `cstree` main crate]: https://docs.rs/cstree/ 15 | 16 | use errors::ErrorContext; 17 | use parsing::SyntaxKindEnum; 18 | use proc_macro2::TokenStream; 19 | use quote::{quote, quote_spanned}; 20 | use syn::{parse_macro_input, spanned::Spanned, DeriveInput}; 21 | 22 | mod errors; 23 | mod parsing; 24 | mod symbols; 25 | 26 | use symbols::*; 27 | 28 | #[proc_macro_derive(Syntax, attributes(static_text))] 29 | pub fn language(input: proc_macro::TokenStream) -> proc_macro::TokenStream { 30 | let ast = parse_macro_input!(input as DeriveInput); 31 | expand_syntax(ast).unwrap_or_else(to_compile_errors).into() 32 | } 33 | 34 | fn expand_syntax(ast: DeriveInput) -> Result> { 35 | let error_handler = ErrorContext::new(); 36 | let Ok(syntax_kind_enum) = SyntaxKindEnum::parse_from_ast(&error_handler, &ast) else { 37 | return Err(error_handler.check().unwrap_err()); 38 | }; 39 | 40 | // Check that the `enum` is `#[repr(u32)]` 41 | match &syntax_kind_enum.repr { 42 | Some(repr) if repr == U32 => (), 43 | Some(_) | None => error_handler.error_at( 44 | syntax_kind_enum.source, 45 | "syntax kind definitions must be `#[repr(u32)]` to derive `Syntax`", 46 | ), 47 | } 48 | 49 | error_handler.check()?; 50 | 51 | let name = &syntax_kind_enum.name; 52 | let variant_count = syntax_kind_enum.variants.len() as u32; 53 | let static_texts = syntax_kind_enum.variants.iter().map(|variant| { 54 | let variant_name = &variant.name; 55 | let static_text = match variant.static_text.as_deref() { 56 | Some(text) => quote!(::core::option::Option::Some(#text)), 57 | None => quote!(::core::option::Option::None), 58 | }; 59 | quote_spanned!(variant.source.span()=> 60 | #name :: #variant_name => #static_text, 61 | ) 62 | }); 63 | let trait_impl = quote_spanned! { syntax_kind_enum.source.span()=> 64 | #[automatically_derived] 65 | impl ::cstree::Syntax for #name { 66 | fn from_raw(raw: ::cstree::RawSyntaxKind) -> Self { 67 | assert!(raw.0 < #variant_count, "Invalid raw syntax kind: {}", raw.0); 68 | // Safety: discriminant is valid by the assert above 69 | unsafe { ::std::mem::transmute::(raw.0) } 70 | } 71 | 72 | fn into_raw(self) -> ::cstree::RawSyntaxKind { 73 | ::cstree::RawSyntaxKind(self as u32) 74 | } 75 | 76 | fn static_text(self) -> ::core::option::Option<&'static str> { 77 | match self { 78 | #( #static_texts )* 79 | } 80 | } 81 | } 82 | }; 83 | Ok(trait_impl) 84 | } 85 | 86 | fn to_compile_errors(errors: Vec) -> proc_macro2::TokenStream { 87 | let compile_errors = errors.iter().map(syn::Error::to_compile_error); 88 | quote!(#(#compile_errors)*) 89 | } 90 | -------------------------------------------------------------------------------- /cstree-derive/src/parsing.rs: -------------------------------------------------------------------------------- 1 | mod attributes; 2 | 3 | use syn::{punctuated::Punctuated, Token}; 4 | 5 | use crate::{errors::ErrorContext, symbols::*}; 6 | 7 | use self::attributes::Attr; 8 | 9 | /// Convenience for recording errors inside `ErrorContext` instead of the `Err` variant of the `Result`. 10 | pub(crate) type Result = std::result::Result; 11 | 12 | pub(crate) struct SyntaxKindEnum<'i> { 13 | pub(crate) name: syn::Ident, 14 | pub(crate) repr: Option, 15 | pub(crate) variants: Vec>, 16 | pub(crate) source: &'i syn::DeriveInput, 17 | } 18 | 19 | impl<'i> SyntaxKindEnum<'i> { 20 | pub(crate) fn parse_from_ast(error_handler: &ErrorContext, item: &'i syn::DeriveInput) -> Result { 21 | let syn::Data::Enum(data) = &item.data else { 22 | error_handler.error_at(item, "`Syntax` can only be derived on enums"); 23 | return Err(()); 24 | }; 25 | 26 | let name = item.ident.clone(); 27 | 28 | let mut repr = Attr::none(error_handler, REPR); 29 | for repr_attr in item.attrs.iter().filter(|&attr| attr.path().is_ident(&REPR)) { 30 | if let syn::Meta::List(nested) = &repr_attr.meta { 31 | if let Ok(nested) = nested.parse_args_with(Punctuated::::parse_terminated) { 32 | for meta in nested { 33 | if let syn::Meta::Path(path) = meta { 34 | if let Some(ident) = path.get_ident() { 35 | repr.set(repr_attr, ident.clone()); 36 | } 37 | } 38 | } 39 | } 40 | } 41 | } 42 | 43 | let variants = data 44 | .variants 45 | .iter() 46 | .map(|variant| SyntaxKindVariant::parse_from_ast(error_handler, variant)) 47 | .collect(); 48 | 49 | Ok(Self { 50 | name, 51 | repr: repr.get(), 52 | variants, 53 | source: item, 54 | }) 55 | } 56 | } 57 | 58 | pub(crate) struct SyntaxKindVariant<'i> { 59 | pub(crate) name: syn::Ident, 60 | pub(crate) static_text: Option, 61 | pub(crate) source: &'i syn::Variant, 62 | } 63 | 64 | impl<'i> SyntaxKindVariant<'i> { 65 | pub(crate) fn parse_from_ast(error_handler: &ErrorContext, variant: &'i syn::Variant) -> Self { 66 | let name = variant.ident.clone(); 67 | 68 | // Check that `variant` is a unit variant 69 | match &variant.fields { 70 | syn::Fields::Unit => (), 71 | syn::Fields::Named(_) | syn::Fields::Unnamed(_) => { 72 | error_handler.error_at(variant, "syntax kinds with fields are not supported"); 73 | } 74 | } 75 | 76 | // Check that discriminants are unaltered 77 | if variant.discriminant.is_some() { 78 | error_handler.error_at( 79 | variant, 80 | "syntax kinds are not allowed to have custom discriminant values", 81 | ); 82 | } 83 | 84 | let mut static_text = Attr::none(error_handler, STATIC_TEXT); 85 | for text in variant 86 | .attrs 87 | .iter() 88 | .flat_map(|attr| get_static_text(error_handler, attr)) 89 | { 90 | static_text.set(&text, text.value()); 91 | } 92 | Self { 93 | name, 94 | static_text: static_text.get(), 95 | source: variant, 96 | } 97 | } 98 | } 99 | 100 | fn get_static_text(error_handler: &ErrorContext, attr: &syn::Attribute) -> Option { 101 | use syn::Meta::*; 102 | 103 | if attr.path() != STATIC_TEXT { 104 | return None; 105 | } 106 | 107 | match &attr.meta { 108 | List(list) => match list.parse_args() { 109 | Ok(lit) => Some(lit), 110 | Err(e) => { 111 | error_handler.error_at( 112 | list, 113 | "argument to `static_text` must be a string literal: `#[static_text(\"...\")]`", 114 | ); 115 | error_handler.syn_error(e); 116 | None 117 | } 118 | }, 119 | Path(_) => { 120 | error_handler.error_at(attr, "missing text for `static_text`: try `#[static_text(\"...\")]`"); 121 | None 122 | } 123 | NameValue(_) => { 124 | error_handler.error_at( 125 | attr, 126 | "`static_text` takes the text as a function argument: `#[static_text(\"...\")]`", 127 | ); 128 | None 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /cstree-derive/src/parsing/attributes.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | 3 | use super::*; 4 | use proc_macro2::TokenStream; 5 | use quote::ToTokens; 6 | 7 | #[derive(Debug)] 8 | pub(crate) struct Attr<'i, T> { 9 | error_handler: &'i ErrorContext, 10 | name: Symbol, 11 | tokens: TokenStream, 12 | value: Option, 13 | } 14 | 15 | impl<'i, T> Attr<'i, T> { 16 | pub(super) fn none(error_handler: &'i ErrorContext, name: Symbol) -> Self { 17 | Attr { 18 | error_handler, 19 | name, 20 | tokens: TokenStream::new(), 21 | value: None, 22 | } 23 | } 24 | 25 | pub(super) fn set(&mut self, source: S, value: T) { 26 | let tokens = source.into_token_stream(); 27 | 28 | if self.value.is_some() { 29 | self.error_handler 30 | .error_at(tokens, format!("duplicate attribute: `{}`", self.name)); 31 | } else { 32 | self.tokens = tokens; 33 | self.value = Some(value); 34 | } 35 | } 36 | 37 | pub(super) fn set_opt(&mut self, source: S, value: Option) { 38 | if let Some(value) = value { 39 | self.set(source, value); 40 | } 41 | } 42 | 43 | pub(super) fn set_if_none(&mut self, value: T) { 44 | if self.value.is_none() { 45 | self.value = Some(value); 46 | } 47 | } 48 | 49 | pub(super) fn get(self) -> Option { 50 | self.value 51 | } 52 | 53 | pub(super) fn get_with_tokens(self) -> Option<(TokenStream, T)> { 54 | match self.value { 55 | Some(v) => Some((self.tokens, v)), 56 | None => None, 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /cstree-derive/src/symbols.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{self}; 2 | use syn::{Ident, Path}; 3 | 4 | #[derive(Copy, Clone)] 5 | pub struct Symbol(&'static str); 6 | 7 | pub const STATIC_TEXT: Symbol = Symbol("static_text"); 8 | pub const REPR: Symbol = Symbol("repr"); 9 | pub const U32: Symbol = Symbol("u32"); 10 | 11 | impl Symbol { 12 | pub const fn new(text: &'static str) -> Self { 13 | Self(text) 14 | } 15 | } 16 | 17 | impl PartialEq for Ident { 18 | fn eq(&self, word: &Symbol) -> bool { 19 | self == word.0 20 | } 21 | } 22 | 23 | impl PartialEq for &Ident { 24 | fn eq(&self, word: &Symbol) -> bool { 25 | *self == word.0 26 | } 27 | } 28 | 29 | impl PartialEq for Path { 30 | fn eq(&self, word: &Symbol) -> bool { 31 | self.is_ident(word.0) 32 | } 33 | } 34 | 35 | impl PartialEq for &Path { 36 | fn eq(&self, word: &Symbol) -> bool { 37 | self.is_ident(word.0) 38 | } 39 | } 40 | 41 | impl fmt::Display for Symbol { 42 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 43 | formatter.write_str(self.0) 44 | } 45 | } 46 | impl fmt::Debug for Symbol { 47 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 48 | formatter.debug_tuple("Symbol").field(&self.0).finish() 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /cstree/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cstree" 3 | description = "Library for generic lossless syntax trees" 4 | categories = ["parsing", "data-structures"] 5 | keywords = ["cstree", "parser", "parsing", "cst"] 6 | edition.workspace = true 7 | version.workspace = true 8 | authors.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | readme.workspace = true 12 | rust-version.workspace = true 13 | 14 | [dependencies] 15 | text-size = "1.1.0" 16 | rustc-hash = "2.1.1" 17 | parking_lot = "0.12.1" 18 | 19 | # Arc 20 | triomphe = { version = "0.1.8", default-features = false, features = ["stable_deref_trait", "std"] } 21 | 22 | # Default Interner 23 | indexmap = "2.4.0" 24 | 25 | [dependencies.cstree_derive] 26 | path = "../cstree-derive" 27 | version = "0.12.2" # must match the `cstree` version in the virtual workspace manifest 28 | optional = true 29 | 30 | [dependencies.lasso] 31 | version = "0.7" 32 | features = ["inline-more"] 33 | optional = true 34 | 35 | # [dependencies.salsa] 36 | # git = "https://github.com/salsa-rs/salsa/" 37 | # version = "0.1" 38 | # optional = true 39 | # package = "salsa-2022" 40 | 41 | [dependencies.serde] 42 | version = "1.0" 43 | optional = true 44 | default-features = false 45 | features = ["derive", "std"] 46 | 47 | [dev-dependencies] 48 | m_lexer = "0.0.4" 49 | serde_json = "1.0" 50 | serde_test = "1.0" 51 | crossbeam-utils = "0.8" 52 | criterion = { version = "0.5.1", features = ["html_reports"] } 53 | 54 | [[bench]] 55 | name = "main" 56 | harness = false 57 | 58 | [features] 59 | default = [] 60 | # Derive macro for `Syntax` 61 | derive = ["dep:cstree_derive"] 62 | # Implementations of `serde::{De,}Serialize` for CSTrees. 63 | serialize = ["serde", "lasso?/serialize", "triomphe/serde"] 64 | # Interoperability with the `lasso` interning crate. 65 | # When enabled, `cstree`'s default interners will use `lasso` internally, too. 66 | lasso_compat = ["lasso"] 67 | # Additionally provide threadsafe interner types. 68 | # Where applicable (and if the corresponding features are selected), provide compatibility 69 | # implementations for multi-thread interners from other crates. 70 | multi_threaded_interning = ["lasso_compat", "lasso/multi-threaded"] 71 | # Interoperability with the `salsa` framework for incremental computation. 72 | # Use this feature for "Salsa 2022". 73 | # WARNING: This feature is considered unstable! 74 | # salsa_2022_compat = ["salsa"] 75 | 76 | [[example]] 77 | name = "math" 78 | required-features = ["derive"] 79 | 80 | [[example]] 81 | name = "s_expressions" 82 | required-features = ["derive"] 83 | 84 | [[example]] 85 | name = "salsa" 86 | required-features = ["salsa_2022_compat"] 87 | 88 | [package.metadata.docs.rs] 89 | all-features = true 90 | rustdoc-args = ["--cfg", "doc_cfg"] 91 | -------------------------------------------------------------------------------- /cstree/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | ../LICENSE-APACHE -------------------------------------------------------------------------------- /cstree/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | ../LICENSE-MIT -------------------------------------------------------------------------------- /cstree/README.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /cstree/benches/main.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; 2 | use cstree::{ 3 | build::*, 4 | green::GreenNode, 5 | interning::{new_interner, Interner}, 6 | RawSyntaxKind, Syntax, 7 | }; 8 | 9 | #[derive(Debug)] 10 | pub enum Element<'s> { 11 | Node(Vec>), 12 | Token(&'s str), 13 | Plus, 14 | } 15 | 16 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 17 | pub enum TestKind { 18 | Element { n: u32 }, 19 | Plus, 20 | } 21 | 22 | impl Syntax for TestKind { 23 | fn from_raw(raw: RawSyntaxKind) -> Self { 24 | if raw.0 == u32::MAX - 1 { 25 | TestKind::Plus 26 | } else { 27 | TestKind::Element { n: raw.0 } 28 | } 29 | } 30 | 31 | fn into_raw(self) -> RawSyntaxKind { 32 | match self { 33 | TestKind::Element { n } => RawSyntaxKind(n), 34 | TestKind::Plus => RawSyntaxKind(u32::MAX - 1), 35 | } 36 | } 37 | 38 | fn static_text(self) -> Option<&'static str> { 39 | match self { 40 | TestKind::Plus => Some("+"), 41 | TestKind::Element { .. } => None, 42 | } 43 | } 44 | } 45 | 46 | pub fn build_tree_with_cache(root: &Element<'_>, cache: &mut NodeCache<'_, I>, use_static_text: bool) -> GreenNode 47 | where 48 | I: Interner, 49 | { 50 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_cache(cache); 51 | build_recursive(root, &mut builder, 0, use_static_text); 52 | let (node, cache) = builder.finish(); 53 | assert!(cache.is_none()); 54 | node 55 | } 56 | 57 | pub fn build_recursive( 58 | root: &Element<'_>, 59 | builder: &mut GreenNodeBuilder<'_, '_, TestKind, I>, 60 | mut from: u32, 61 | use_static_text: bool, 62 | ) -> u32 63 | where 64 | I: Interner, 65 | { 66 | match root { 67 | Element::Node(children) => { 68 | builder.start_node(TestKind::Element { n: from }); 69 | for child in children { 70 | from = build_recursive(child, builder, from + 1, use_static_text); 71 | } 72 | builder.finish_node(); 73 | } 74 | Element::Token(text) => { 75 | builder.token(TestKind::Element { n: from }, text); 76 | } 77 | Element::Plus if use_static_text => { 78 | builder.static_token(TestKind::Plus); 79 | } 80 | Element::Plus => { 81 | builder.token(TestKind::Plus, "+"); 82 | } 83 | } 84 | from 85 | } 86 | 87 | fn two_level_tree() -> Element<'static> { 88 | use Element::*; 89 | Node(vec![ 90 | Node(vec![Token("0.0"), Plus, Token("0.1")]), 91 | Node(vec![Token("1.0")]), 92 | Node(vec![Token("2.0"), Plus, Token("2.1"), Plus, Token("2.2")]), 93 | ]) 94 | } 95 | 96 | pub fn create(c: &mut Criterion) { 97 | #[cfg(not(feature = "lasso_compat"))] 98 | const GROUP_NAME: &str = "two-level tree (default interner)"; 99 | #[cfg(feature = "lasso_compat")] 100 | const GROUP_NAME: &str = "two-level tree (lasso)"; 101 | 102 | let mut group = c.benchmark_group(GROUP_NAME); 103 | group.throughput(Throughput::Elements(1)); 104 | 105 | let mut interner = new_interner(); 106 | let mut cache = NodeCache::with_interner(&mut interner); 107 | let tree = two_level_tree(); 108 | 109 | group.bench_function("with static text", |b| { 110 | b.iter(|| { 111 | let tree = build_tree_with_cache(&tree, &mut cache, true); 112 | black_box(tree); 113 | }) 114 | }); 115 | 116 | group.bench_function("without static text", |b| { 117 | b.iter(|| { 118 | let tree = build_tree_with_cache(&tree, &mut cache, false); 119 | black_box(tree); 120 | }) 121 | }); 122 | 123 | group.finish(); 124 | } 125 | 126 | criterion_group!(benches, create); 127 | criterion_main!(benches); 128 | -------------------------------------------------------------------------------- /cstree/examples/math.rs: -------------------------------------------------------------------------------- 1 | //! Example that takes the input 2 | //! 1 + 2 * 3 + 4 3 | //! and builds the tree 4 | //! - Marker(Root) 5 | //! - Marker(Operation) 6 | //! - Marker(Operation) 7 | //! - "1" Token(Number) 8 | //! - "+" Token(Add) 9 | //! - Marker(Operation) 10 | //! - "2" Token(Number) 11 | //! - "*" Token(Mul) 12 | //! - "3" Token(Number) 13 | //! - "+" Token(Add) 14 | //! - "4" Token(Number) 15 | 16 | use cstree::{build::GreenNodeBuilder, interning::Resolver, util::NodeOrToken, Syntax}; 17 | use std::iter::Peekable; 18 | 19 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 20 | #[repr(u32)] 21 | enum SyntaxKind { 22 | Whitespace, 23 | 24 | #[static_text("+")] 25 | Add, 26 | #[static_text("-")] 27 | Sub, 28 | #[static_text("*")] 29 | Mul, 30 | #[static_text("/")] 31 | Div, 32 | 33 | Number, 34 | Error, 35 | Operation, 36 | Root, 37 | } 38 | type MySyntax = SyntaxKind; 39 | use SyntaxKind::*; 40 | 41 | impl From for cstree::RawSyntaxKind { 42 | fn from(kind: SyntaxKind) -> Self { 43 | Self(kind as u32) 44 | } 45 | } 46 | 47 | type SyntaxNode = cstree::syntax::SyntaxNode; 48 | #[allow(unused)] 49 | type SyntaxToken = cstree::syntax::SyntaxToken; 50 | #[allow(unused)] 51 | type SyntaxElement = cstree::util::NodeOrToken; 52 | type SyntaxElementRef<'a> = cstree::util::NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>; 53 | 54 | struct Parser<'input, I: Iterator> { 55 | builder: GreenNodeBuilder<'static, 'static, MySyntax>, 56 | iter: Peekable, 57 | } 58 | impl<'input, I: Iterator> Parser<'input, I> { 59 | fn peek(&mut self) -> Option { 60 | while self.iter.peek().map(|&(t, _)| t == Whitespace).unwrap_or(false) { 61 | self.bump(); 62 | } 63 | self.iter.peek().map(|&(t, _)| t) 64 | } 65 | 66 | fn bump(&mut self) { 67 | if let Some((token, string)) = self.iter.next() { 68 | self.builder.token(token, string); 69 | } 70 | } 71 | 72 | fn parse_val(&mut self) { 73 | match self.peek() { 74 | Some(Number) => self.bump(), 75 | _ => { 76 | self.builder.start_node(Error); 77 | self.bump(); 78 | self.builder.finish_node(); 79 | } 80 | } 81 | } 82 | 83 | fn handle_operation(&mut self, tokens: &[SyntaxKind], next: fn(&mut Self)) { 84 | let checkpoint = self.builder.checkpoint(); 85 | next(self); 86 | while self.peek().map(|t| tokens.contains(&t)).unwrap_or(false) { 87 | self.builder.start_node_at(checkpoint, Operation); 88 | self.bump(); 89 | next(self); 90 | self.builder.finish_node(); 91 | } 92 | } 93 | 94 | fn parse_mul(&mut self) { 95 | self.handle_operation(&[Mul, Div], Self::parse_val) 96 | } 97 | 98 | fn parse_add(&mut self) { 99 | self.handle_operation(&[Add, Sub], Self::parse_mul) 100 | } 101 | 102 | fn parse(mut self) -> (SyntaxNode, impl Resolver) { 103 | self.builder.start_node(Root); 104 | self.parse_add(); 105 | self.builder.finish_node(); 106 | 107 | let (tree, cache) = self.builder.finish(); 108 | (SyntaxNode::new_root(tree), cache.unwrap().into_interner().unwrap()) 109 | } 110 | } 111 | 112 | fn print(indent: usize, element: SyntaxElementRef<'_>, resolver: &impl Resolver) { 113 | let kind = element.kind(); 114 | print!("{:indent$}", "", indent = indent); 115 | match element { 116 | NodeOrToken::Node(node) => { 117 | println!("- {:?}", kind); 118 | for child in node.children_with_tokens() { 119 | print(indent + 2, child, resolver); 120 | } 121 | } 122 | 123 | NodeOrToken::Token(token) => println!("- {:?} {:?}", token.resolve_text(resolver), kind), 124 | } 125 | } 126 | 127 | fn main() { 128 | let (ast, resolver) = Parser { 129 | builder: GreenNodeBuilder::new(), 130 | iter: vec![ 131 | // 1 + 2 * 3 + 4 132 | (Number, "1"), 133 | (Whitespace, " "), 134 | (Add, "+"), 135 | (Whitespace, " "), 136 | (Number, "2"), 137 | (Whitespace, " "), 138 | (Mul, "*"), 139 | (Whitespace, " "), 140 | (Number, "3"), 141 | (Whitespace, " "), 142 | (Add, "+"), 143 | (Whitespace, " "), 144 | (Number, "4"), 145 | ] 146 | .into_iter() 147 | .peekable(), 148 | } 149 | .parse(); 150 | print(0, (&ast).into(), &resolver); 151 | } 152 | -------------------------------------------------------------------------------- /cstree/examples/readme.rs: -------------------------------------------------------------------------------- 1 | use std::{io::Write, iter::Peekable}; 2 | 3 | use cstree::{ 4 | interning::Interner, 5 | prelude::*, 6 | syntax::{ResolvedElementRef, ResolvedNode}, 7 | }; 8 | 9 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 10 | #[repr(u32)] 11 | pub enum SyntaxKind { 12 | /* Tokens */ 13 | Int, // 42 14 | Plus, // + 15 | Minus, // - 16 | LParen, // ( 17 | RParen, // ) 18 | /* Nodes */ 19 | Expr, 20 | Root, 21 | } 22 | type Calculator = SyntaxKind; 23 | 24 | impl Syntax for Calculator { 25 | fn from_raw(raw: RawSyntaxKind) -> Self { 26 | // This just needs to be the inverse of `into_raw`, but could also 27 | // be an `impl TryFrom for SyntaxKind` or any other conversion. 28 | match raw.0 { 29 | 0 => SyntaxKind::Int, 30 | 1 => SyntaxKind::Plus, 31 | 2 => SyntaxKind::Minus, 32 | 3 => SyntaxKind::LParen, 33 | 4 => SyntaxKind::RParen, 34 | 5 => SyntaxKind::Expr, 35 | 6 => SyntaxKind::Root, 36 | n => panic!("Unknown raw syntax kind: {n}"), 37 | } 38 | } 39 | 40 | fn into_raw(self) -> RawSyntaxKind { 41 | RawSyntaxKind(self as u32) 42 | } 43 | 44 | fn static_text(self) -> Option<&'static str> { 45 | match self { 46 | SyntaxKind::Plus => Some("+"), 47 | SyntaxKind::Minus => Some("-"), 48 | SyntaxKind::LParen => Some("("), 49 | SyntaxKind::RParen => Some(")"), 50 | _ => None, 51 | } 52 | } 53 | } 54 | 55 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] 56 | pub enum Token<'input> { 57 | Int(&'input str), 58 | Plus, 59 | Minus, 60 | LParen, 61 | RParen, 62 | EoF, 63 | } 64 | 65 | pub struct Lexer<'input> { 66 | input: &'input str, 67 | at_eof: bool, 68 | } 69 | 70 | impl<'input> Lexer<'input> { 71 | pub fn new(input: &'input str) -> Self { 72 | Self { input, at_eof: false } 73 | } 74 | 75 | fn next_token(&mut self) -> Result, String> { 76 | loop { 77 | let Some(next_char) = self.input.chars().next() else { 78 | self.at_eof = true; 79 | return Ok(Token::EoF); 80 | }; 81 | 82 | let token = match next_char { 83 | '+' => Token::Plus, 84 | '-' => Token::Minus, 85 | '(' => Token::LParen, 86 | ')' => Token::RParen, 87 | c if c.is_ascii_digit() => { 88 | let (last_digit_idx, _char) = self 89 | .input 90 | .char_indices() 91 | .take_while(|(_idx, c)| c.is_ascii_digit()) 92 | .last() 93 | .expect("matched at least one"); 94 | // Advance lexer 95 | let number = Token::Int(&self.input[..=last_digit_idx]); 96 | self.input = &self.input[(last_digit_idx + 1)..]; 97 | return Ok(number); 98 | } 99 | c if c.is_whitespace() => { 100 | // Skip whitespace 101 | let (last_ws_idx, _char) = self 102 | .input 103 | .char_indices() 104 | .take_while(|(_idx, c)| c.is_whitespace()) 105 | .last() 106 | .expect("matched at least one"); 107 | // Advance lexer 108 | self.input = &self.input[(last_ws_idx + 1)..]; 109 | continue; 110 | } 111 | c => return Err(format!("Unknown start of token: '{c}'")), 112 | }; 113 | 114 | // Advance lexer 115 | self.input = &self.input[1..]; 116 | return Ok(token); 117 | } 118 | } 119 | } 120 | 121 | impl<'input> Iterator for Lexer<'input> { 122 | type Item = Token<'input>; 123 | 124 | fn next(&mut self) -> Option { 125 | if self.at_eof { 126 | None 127 | } else { 128 | Some(self.next_token().expect("Failed to lex input")) 129 | } 130 | } 131 | } 132 | 133 | pub struct Parser<'input> { 134 | lexer: Peekable>, 135 | builder: GreenNodeBuilder<'static, 'static, Calculator>, 136 | } 137 | 138 | impl<'input> Parser<'input> { 139 | pub fn new(input: &'input str) -> Self { 140 | Self { 141 | lexer: Lexer::new(input).peekable(), 142 | builder: GreenNodeBuilder::new(), 143 | } 144 | } 145 | 146 | pub fn bump(&mut self) -> Option> { 147 | self.lexer.next() 148 | } 149 | 150 | pub fn parse(&mut self) -> Result<(), String> { 151 | self.builder.start_node(SyntaxKind::Root); 152 | self.parse_expr()?; 153 | self.builder.finish_node(); 154 | Ok(()) 155 | } 156 | 157 | fn parse_lhs(&mut self) -> Result<(), String> { 158 | // An expression may start either with a number, or with an opening parenthesis that is the start of a 159 | // parenthesized expression 160 | let next_token = *self.lexer.peek().unwrap(); 161 | match next_token { 162 | Token::Int(n) => { 163 | self.bump(); 164 | self.builder.token(SyntaxKind::Int, n); 165 | } 166 | Token::LParen => { 167 | // Wrap the grouped expression inside a node containing it and its parentheses 168 | self.builder.start_node(SyntaxKind::Expr); 169 | self.bump(); 170 | self.builder.static_token(SyntaxKind::LParen); 171 | self.parse_expr()?; // Inner expression 172 | if self.bump() != Some(Token::RParen) { 173 | return Err("Missing ')'".to_string()); 174 | } 175 | self.builder.static_token(SyntaxKind::RParen); 176 | self.builder.finish_node(); 177 | } 178 | Token::EoF => return Err("Unexpected end of file: expected expression".to_string()), 179 | t => return Err(format!("Unexpected start of expression: '{t:?}'")), 180 | } 181 | Ok(()) 182 | } 183 | 184 | fn parse_expr(&mut self) -> Result<(), String> { 185 | // Remember our current position 186 | let before_expr = self.builder.checkpoint(); 187 | 188 | // Parse the start of the expression 189 | self.parse_lhs()?; 190 | 191 | // Check if the expression continues with `+ ` or `- ` 192 | let Some(next_token) = self.lexer.peek() else { 193 | return Ok(()); 194 | }; 195 | let op = match *next_token { 196 | Token::Plus => SyntaxKind::Plus, 197 | Token::Minus => SyntaxKind::Minus, 198 | Token::RParen | Token::EoF => return Ok(()), 199 | t => return Err(format!("Expected operator, found '{t:?}'")), 200 | }; 201 | 202 | // If so, retroactively wrap the (already parsed) LHS and the following RHS inside an `Expr` node 203 | self.builder.start_node_at(before_expr, SyntaxKind::Expr); 204 | self.bump(); 205 | self.builder.static_token(op); 206 | self.parse_expr()?; // RHS 207 | self.builder.finish_node(); 208 | Ok(()) 209 | } 210 | 211 | pub fn finish(mut self) -> (GreenNode, impl Interner) { 212 | assert!(self.lexer.next().map(|t| t == Token::EoF).unwrap_or(true)); 213 | let (tree, cache) = self.builder.finish(); 214 | (tree, cache.unwrap().into_interner().unwrap()) 215 | } 216 | } 217 | 218 | fn main() { 219 | use std::io; 220 | 221 | let mut buf = String::new(); 222 | loop { 223 | print!("Enter expression: "); 224 | io::stdout().flush().unwrap(); 225 | buf.clear(); 226 | if let Err(e) = io::stdin().read_line(&mut buf) { 227 | eprintln!("Error reading input: {e}"); 228 | continue; 229 | } 230 | let mut parser = Parser::new(&buf); 231 | if let Err(e) = parser.parse() { 232 | eprintln!("Parse error: {e}"); 233 | continue; 234 | } 235 | 236 | let (tree, interner) = parser.finish(); 237 | let root = SyntaxNode::::new_root_with_resolver(tree, interner); 238 | 239 | if let Some(expr) = root.first_child_or_token() { 240 | let result = eval_elem(expr, &mut root.children_with_tokens()); 241 | println!("Result: {result}"); 242 | } 243 | } 244 | } 245 | 246 | fn eval(expr: &ResolvedNode) -> i64 { 247 | let mut children = expr.children_with_tokens(); 248 | let lhs = eval_elem(children.next().expect("empty expr"), &mut children); 249 | let Some(op) = children.next().map(|elem| elem.kind()) else { 250 | // Literal expression 251 | return lhs; 252 | }; 253 | let rhs = eval_elem(children.next().expect("missing RHS"), &mut children); 254 | 255 | match op { 256 | SyntaxKind::Plus => lhs + rhs, 257 | SyntaxKind::Minus => lhs - rhs, 258 | _ => unreachable!("invalid op"), 259 | } 260 | } 261 | 262 | fn eval_elem<'e>( 263 | expr: ResolvedElementRef<'_, Calculator>, 264 | children: &mut impl Iterator>, 265 | ) -> i64 { 266 | use cstree::util::NodeOrToken; 267 | 268 | match expr { 269 | NodeOrToken::Node(n) => { 270 | assert_eq!(n.kind(), SyntaxKind::Expr); 271 | eval(n) 272 | } 273 | NodeOrToken::Token(t) => match t.kind() { 274 | SyntaxKind::Int => { 275 | let number_str = t.text(); 276 | number_str.parse().expect("parsed int could not be evaluated") 277 | } 278 | SyntaxKind::LParen => { 279 | let inner = children.next().expect("missing content inside parens"); 280 | // It's important that we consume the `)` here, as otherwise `eval` might mistake it for an operator 281 | assert_eq!( 282 | children 283 | .next() 284 | .and_then(|elem| elem.into_token()) 285 | .map(|token| token.kind()), 286 | Some(SyntaxKind::RParen) 287 | ); 288 | eval_elem(inner, children) 289 | } 290 | _ => unreachable!("invalid start of expression"), 291 | }, 292 | } 293 | } 294 | 295 | #[cfg(test)] 296 | mod tests { 297 | use super::*; 298 | 299 | #[test] 300 | fn lex() { 301 | let input = "11 + 2-(5 + 4)"; 302 | let lexer = Lexer::new(input); 303 | let tokens: Vec<_> = lexer.into_iter().collect(); 304 | assert_eq!( 305 | tokens, 306 | vec![ 307 | Token::Int("11"), 308 | Token::Plus, 309 | Token::Int("2"), 310 | Token::Minus, 311 | Token::LParen, 312 | Token::Int("5"), 313 | Token::Plus, 314 | Token::Int("4"), 315 | Token::RParen, 316 | Token::EoF 317 | ] 318 | ); 319 | } 320 | 321 | #[test] 322 | fn parse() { 323 | let input = "11 + 2-(5 + 4)"; 324 | let mut parser = Parser::new(input); 325 | parser.parse().unwrap(); 326 | let (tree, interner) = parser.finish(); 327 | let root = SyntaxNode::::new_root_with_resolver(tree, interner); 328 | dbg!(root); 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /cstree/examples/salsa.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "salsa_2022_compat")] 2 | 3 | use cstree::{build::GreenNodeBuilder, impl_cstree_interning_for_salsa}; 4 | 5 | #[salsa::jar(db = Db)] 6 | pub struct Jar(crate::SourceId); 7 | 8 | pub trait Db: salsa::DbWithJar {} 9 | impl Db for DB where DB: ?Sized + salsa::DbWithJar {} 10 | 11 | #[salsa::interned] 12 | pub struct SourceId { 13 | #[return_ref] 14 | pub text: String, 15 | } 16 | 17 | #[derive(Default)] 18 | #[salsa::db(crate::Jar)] 19 | struct Database { 20 | storage: salsa::Storage, 21 | } 22 | 23 | impl salsa::Database for Database {} 24 | 25 | impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId); 26 | 27 | use cstree::{syntax::SyntaxNode, testing::*}; 28 | 29 | fn main() { 30 | let db = Database::default(); 31 | let interned = SourceId::new(&db, "foo".to_string()); 32 | let original = interned.text(&db); 33 | assert_eq!(original, "foo"); 34 | 35 | let interner = db.as_interner(); 36 | let mut shared_interner = &interner; 37 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_interner(&mut shared_interner); 38 | let (tree, _no_interner_because_it_was_borrowed) = { 39 | builder.start_node(TestSyntaxKind::Plus); 40 | builder.token(TestSyntaxKind::Float, "2.05"); 41 | builder.token(TestSyntaxKind::Whitespace, " "); 42 | builder.token(TestSyntaxKind::Plus, "+"); 43 | builder.token(TestSyntaxKind::Whitespace, " "); 44 | builder.token(TestSyntaxKind::Float, "7.32"); 45 | builder.finish_node(); 46 | builder.finish() 47 | }; 48 | let tree: SyntaxNode = SyntaxNode::new_root(tree); 49 | assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32"); 50 | } 51 | -------------------------------------------------------------------------------- /cstree/src/green.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the inner, "green" tree. 2 | //! The [`GreenNodeBuilder`](crate::build::GreenNodeBuilder) from the [`build` module](crate::build) is the main entry 3 | //! point to constructing [`GreenNode`]s and [`GreenToken`]s. 4 | 5 | pub(super) mod builder; 6 | mod element; 7 | mod iter; 8 | mod node; 9 | mod token; 10 | 11 | pub(crate) use self::element::GreenElementRef; 12 | use self::element::{GreenElement, PackedGreenElement}; 13 | 14 | pub use self::{iter::GreenNodeChildren, node::GreenNode, token::GreenToken}; 15 | 16 | #[cfg(test)] 17 | mod tests { 18 | use super::*; 19 | use node::GreenNodeHead; 20 | use token::GreenTokenData; 21 | 22 | #[test] 23 | #[cfg_attr(miri, ignore)] 24 | fn assert_send_sync() { 25 | fn f() {} 26 | f::(); 27 | f::(); 28 | f::(); 29 | f::(); 30 | } 31 | 32 | #[test] 33 | #[cfg_attr(miri, ignore)] 34 | #[rustfmt::skip] 35 | fn assert_green_sizes() { 36 | use std::mem::size_of; 37 | 38 | assert_eq!(size_of::(), size_of::<*const u8>()); 39 | assert_eq!(size_of::(), size_of::<*const u8>()); 40 | assert_eq!(size_of::(), size_of::() * 3); 41 | assert_eq!(size_of::(), size_of::() * 3); 42 | assert_eq!(size_of::(), size_of::<*const u8>() * 2); 43 | assert_eq!(size_of::(), size_of::<*const u8>()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /cstree/src/green/element.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, hash, mem}; 2 | 3 | // NOTE from `thin_dst`: 4 | // This MUST be size=1 such that pointer math actually advances the pointer. 5 | type ErasedPtr = *const u8; 6 | 7 | use crate::{ 8 | green::{GreenNode, GreenToken}, 9 | text::TextSize, 10 | util::NodeOrToken, 11 | RawSyntaxKind, 12 | }; 13 | 14 | pub(super) type GreenElement = NodeOrToken; 15 | pub(crate) type GreenElementRef<'a> = NodeOrToken<&'a GreenNode, &'a GreenToken>; 16 | 17 | #[repr(transparent)] 18 | pub(crate) struct PackedGreenElement { 19 | ptr: ErasedPtr, 20 | } 21 | 22 | impl From for GreenElement { 23 | #[inline] 24 | fn from(node: GreenNode) -> GreenElement { 25 | NodeOrToken::Node(node) 26 | } 27 | } 28 | 29 | impl<'a> From<&'a GreenNode> for GreenElementRef<'a> { 30 | #[inline] 31 | fn from(node: &'a GreenNode) -> GreenElementRef<'a> { 32 | NodeOrToken::Node(node) 33 | } 34 | } 35 | 36 | impl From for PackedGreenElement { 37 | #[inline] 38 | fn from(node: GreenNode) -> PackedGreenElement { 39 | unsafe { mem::transmute(node) } 40 | } 41 | } 42 | 43 | impl From for GreenElement { 44 | #[inline] 45 | fn from(token: GreenToken) -> GreenElement { 46 | NodeOrToken::Token(token) 47 | } 48 | } 49 | 50 | impl<'a> From<&'a GreenToken> for GreenElementRef<'a> { 51 | #[inline] 52 | fn from(token: &'a GreenToken) -> GreenElementRef<'a> { 53 | NodeOrToken::Token(token) 54 | } 55 | } 56 | 57 | impl From for PackedGreenElement { 58 | #[inline] 59 | fn from(token: GreenToken) -> PackedGreenElement { 60 | unsafe { mem::transmute(token) } 61 | } 62 | } 63 | 64 | impl GreenElement { 65 | /// Returns kind of this element. 66 | #[inline] 67 | pub fn kind(&self) -> RawSyntaxKind { 68 | self.as_ref().kind() 69 | } 70 | 71 | /// Returns the length of the text covered by this element. 72 | #[inline] 73 | pub fn text_len(&self) -> TextSize { 74 | self.as_ref().text_len() 75 | } 76 | } 77 | 78 | impl GreenElementRef<'_> { 79 | /// Returns kind of this element. 80 | #[inline] 81 | pub fn kind(&self) -> RawSyntaxKind { 82 | match self { 83 | NodeOrToken::Node(it) => it.kind(), 84 | NodeOrToken::Token(it) => it.kind(), 85 | } 86 | } 87 | 88 | /// Returns the length of the text covered by this element. 89 | #[inline] 90 | pub fn text_len(self) -> TextSize { 91 | match self { 92 | NodeOrToken::Node(it) => it.text_len(), 93 | NodeOrToken::Token(it) => it.text_len(), 94 | } 95 | } 96 | } 97 | 98 | impl From for PackedGreenElement { 99 | fn from(element: GreenElement) -> Self { 100 | match element { 101 | NodeOrToken::Node(node) => node.into(), 102 | NodeOrToken::Token(token) => token.into(), 103 | } 104 | } 105 | } 106 | 107 | impl From for GreenElement { 108 | fn from(element: PackedGreenElement) -> Self { 109 | if element.is_node() { 110 | NodeOrToken::Node(element.into_node().unwrap()) 111 | } else { 112 | NodeOrToken::Token(element.into_token().unwrap()) 113 | } 114 | } 115 | } 116 | 117 | impl PackedGreenElement { 118 | pub(crate) fn is_node(&self) -> bool { 119 | self.ptr.addr() & super::token::IS_TOKEN_TAG == 0 120 | } 121 | 122 | pub(crate) fn as_node(&self) -> Option<&GreenNode> { 123 | if self.is_node() { 124 | unsafe { Some(&*(&self.ptr as *const ErasedPtr as *const GreenNode)) } 125 | } else { 126 | None 127 | } 128 | } 129 | 130 | pub(crate) fn into_node(self) -> Option { 131 | if self.is_node() { 132 | unsafe { Some(mem::transmute::(self)) } 133 | } else { 134 | None 135 | } 136 | } 137 | 138 | pub(crate) fn as_token(&self) -> Option<&GreenToken> { 139 | if !self.is_node() { 140 | unsafe { Some(&*(&self.ptr as *const ErasedPtr as *const GreenToken)) } 141 | } else { 142 | None 143 | } 144 | } 145 | 146 | pub(crate) fn into_token(self) -> Option { 147 | if !self.is_node() { 148 | unsafe { Some(mem::transmute::(self)) } 149 | } else { 150 | None 151 | } 152 | } 153 | 154 | pub(crate) fn as_ref(&self) -> GreenElementRef<'_> { 155 | if self.is_node() { 156 | NodeOrToken::Node(self.as_node().unwrap()) 157 | } else { 158 | NodeOrToken::Token(self.as_token().unwrap()) 159 | } 160 | } 161 | } 162 | 163 | impl fmt::Debug for PackedGreenElement { 164 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 165 | if self.is_node() { 166 | self.as_node().unwrap().fmt(f) 167 | } else { 168 | self.as_token().unwrap().fmt(f) 169 | } 170 | } 171 | } 172 | 173 | impl Eq for PackedGreenElement {} 174 | impl PartialEq for PackedGreenElement { 175 | fn eq(&self, other: &Self) -> bool { 176 | self.as_node() == other.as_node() && self.as_token() == other.as_token() 177 | } 178 | } 179 | 180 | impl hash::Hash for PackedGreenElement { 181 | fn hash(&self, state: &mut H) 182 | where 183 | H: hash::Hasher, 184 | { 185 | if self.is_node() { 186 | self.as_node().unwrap().hash(state) 187 | } else { 188 | self.as_token().unwrap().hash(state) 189 | } 190 | } 191 | } 192 | 193 | impl Drop for PackedGreenElement { 194 | fn drop(&mut self) { 195 | if self.is_node() { 196 | PackedGreenElement { ptr: self.ptr }.into_node(); 197 | } else { 198 | PackedGreenElement { ptr: self.ptr }.into_token(); 199 | } 200 | } 201 | } 202 | 203 | unsafe impl Send for PackedGreenElement 204 | where 205 | GreenToken: Send, 206 | GreenNode: Send, 207 | { 208 | } 209 | unsafe impl Sync for PackedGreenElement 210 | where 211 | GreenToken: Sync, 212 | GreenNode: Sync, 213 | { 214 | } 215 | -------------------------------------------------------------------------------- /cstree/src/green/iter.rs: -------------------------------------------------------------------------------- 1 | //! Green tree iterators. 2 | 3 | use std::{iter::FusedIterator, slice}; 4 | 5 | use super::{element::PackedGreenElement, GreenElementRef}; 6 | 7 | /// An iterator over a [`GreenNode`](crate::green::GreenNode)'s children. 8 | #[derive(Debug, Clone)] 9 | pub struct GreenNodeChildren<'a> { 10 | pub(super) inner: slice::Iter<'a, PackedGreenElement>, 11 | } 12 | 13 | // NB: forward everything stable that iter::Slice specializes as of Rust 1.39.0 14 | impl ExactSizeIterator for GreenNodeChildren<'_> { 15 | #[inline(always)] 16 | fn len(&self) -> usize { 17 | self.inner.len() 18 | } 19 | } 20 | 21 | impl<'a> Iterator for GreenNodeChildren<'a> { 22 | type Item = GreenElementRef<'a>; 23 | 24 | #[inline] 25 | fn next(&mut self) -> Option> { 26 | self.inner.next().map(PackedGreenElement::as_ref) 27 | } 28 | 29 | #[inline] 30 | fn size_hint(&self) -> (usize, Option) { 31 | self.inner.size_hint() 32 | } 33 | 34 | #[inline] 35 | fn count(self) -> usize 36 | where 37 | Self: Sized, 38 | { 39 | self.inner.count() 40 | } 41 | 42 | #[inline] 43 | fn nth(&mut self, n: usize) -> Option { 44 | self.inner.nth(n).map(PackedGreenElement::as_ref) 45 | } 46 | 47 | #[inline] 48 | fn last(mut self) -> Option 49 | where 50 | Self: Sized, 51 | { 52 | self.next_back() 53 | } 54 | 55 | #[inline] 56 | fn fold(self, init: Acc, mut f: Fold) -> Acc 57 | where 58 | Fold: FnMut(Acc, Self::Item) -> Acc, 59 | { 60 | let mut accum = init; 61 | for x in self { 62 | accum = f(accum, x); 63 | } 64 | accum 65 | } 66 | } 67 | 68 | impl DoubleEndedIterator for GreenNodeChildren<'_> { 69 | #[inline] 70 | fn next_back(&mut self) -> Option { 71 | self.inner.next_back().map(PackedGreenElement::as_ref) 72 | } 73 | 74 | #[inline] 75 | fn nth_back(&mut self, n: usize) -> Option { 76 | self.inner.nth_back(n).map(PackedGreenElement::as_ref) 77 | } 78 | 79 | #[inline] 80 | fn rfold(mut self, init: Acc, mut f: Fold) -> Acc 81 | where 82 | Fold: FnMut(Acc, Self::Item) -> Acc, 83 | { 84 | let mut accum = init; 85 | while let Some(x) = self.next_back() { 86 | accum = f(accum, x); 87 | } 88 | accum 89 | } 90 | } 91 | 92 | impl FusedIterator for GreenNodeChildren<'_> {} 93 | -------------------------------------------------------------------------------- /cstree/src/green/node.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | hash::{Hash, Hasher}, 3 | slice, 4 | }; 5 | 6 | use rustc_hash::FxHasher; 7 | 8 | use crate::{ 9 | green::{iter::GreenNodeChildren, GreenElement, PackedGreenElement}, 10 | text::TextSize, 11 | RawSyntaxKind, 12 | }; 13 | use triomphe::{Arc, HeaderWithLength, ThinArc}; 14 | 15 | #[repr(align(2))] //to use 1 bit for pointer tagging. NB: this is an at-least annotation 16 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 17 | pub(super) struct GreenNodeHead { 18 | pub(super) kind: RawSyntaxKind, 19 | pub(super) text_len: TextSize, 20 | pub(super) child_hash: u32, 21 | } 22 | 23 | /// Internal node in the immutable "green" tree. 24 | /// It contains other nodes and tokens as its children. 25 | #[derive(Clone)] 26 | pub struct GreenNode { 27 | pub(super) data: ThinArc, 28 | } 29 | 30 | impl std::fmt::Debug for GreenNode { 31 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 32 | self.data.with_arc(|data| data.fmt(f)) 33 | } 34 | } 35 | 36 | impl GreenNode { 37 | /// Creates a new Node. 38 | #[inline] 39 | pub fn new(kind: RawSyntaxKind, children: I) -> GreenNode 40 | where 41 | I: IntoIterator, 42 | I::IntoIter: ExactSizeIterator, 43 | { 44 | let mut hasher = FxHasher::default(); 45 | let mut text_len: TextSize = 0.into(); 46 | let children = children 47 | .into_iter() 48 | .inspect(|it| { 49 | text_len += it.text_len(); 50 | it.hash(&mut hasher); 51 | }) 52 | .map(PackedGreenElement::from); 53 | let header = HeaderWithLength::new( 54 | GreenNodeHead { 55 | kind, 56 | text_len: 0.into(), 57 | child_hash: 0, 58 | }, 59 | children.len(), 60 | ); 61 | let mut data = Arc::from_header_and_iter(header, children); 62 | 63 | // XXX: fixup `text_len` and `child_hash` after construction, because 64 | // we can't iterate `children` twice. 65 | let header = &mut Arc::get_mut(&mut data).unwrap().header.header; 66 | header.text_len = text_len; 67 | header.child_hash = hasher.finish() as u32; 68 | GreenNode { 69 | data: Arc::into_thin(data), 70 | } 71 | } 72 | 73 | /// Creates a new Node. 74 | #[inline] 75 | pub(super) fn new_with_len_and_hash( 76 | kind: RawSyntaxKind, 77 | children: I, 78 | text_len: TextSize, 79 | child_hash: u32, 80 | ) -> GreenNode 81 | where 82 | I: IntoIterator, 83 | I::IntoIter: ExactSizeIterator, 84 | { 85 | let children = children.into_iter().map(PackedGreenElement::from); 86 | let header = HeaderWithLength::new( 87 | GreenNodeHead { 88 | kind, 89 | text_len: 0.into(), 90 | child_hash: 0, 91 | }, 92 | children.len(), 93 | ); 94 | let mut data = Arc::from_header_and_iter(header, children); 95 | 96 | // XXX: fixup `text_len` and `child_hash` after construction, because 97 | // we can't iterate `children` twice. 98 | let header = &mut Arc::get_mut(&mut data).unwrap().header.header; 99 | header.text_len = text_len; 100 | header.child_hash = child_hash; 101 | GreenNode { 102 | data: Arc::into_thin(data), 103 | } 104 | } 105 | 106 | #[inline] 107 | pub(super) fn from_head_and_children(header: GreenNodeHead, children: I) -> GreenNode 108 | where 109 | I: IntoIterator, 110 | I::IntoIter: ExactSizeIterator, 111 | { 112 | let children = children.into_iter().map(PackedGreenElement::from); 113 | let header = HeaderWithLength::new(header, children.len()); 114 | GreenNode { 115 | data: Arc::into_thin(Arc::from_header_and_iter(header, children)), 116 | } 117 | } 118 | 119 | /// [`RawSyntaxKind`] of this node. 120 | #[inline] 121 | pub fn kind(&self) -> RawSyntaxKind { 122 | self.data.header.header.kind 123 | } 124 | 125 | /// Returns the length of text covered by this node. 126 | #[inline] 127 | pub fn text_len(&self) -> TextSize { 128 | self.data.header.header.text_len 129 | } 130 | 131 | #[inline] 132 | pub(crate) fn iter(&self) -> slice::Iter<'_, PackedGreenElement> { 133 | self.data.slice.iter() 134 | } 135 | 136 | /// Iterator over all children of this node. 137 | #[inline] 138 | pub fn children(&self) -> GreenNodeChildren<'_> { 139 | GreenNodeChildren { 140 | inner: self.data.slice.iter(), 141 | } 142 | } 143 | } 144 | 145 | impl Hash for GreenNode { 146 | #[inline] 147 | fn hash(&self, state: &mut H) { 148 | self.data.header.header.hash(state); 149 | } 150 | } 151 | 152 | impl PartialEq for GreenNode { 153 | fn eq(&self, other: &Self) -> bool { 154 | self.data == other.data 155 | } 156 | } 157 | 158 | impl Eq for GreenNode {} 159 | -------------------------------------------------------------------------------- /cstree/src/green/token.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, hash, mem::ManuallyDrop, ptr::NonNull}; 2 | 3 | use crate::{ 4 | interning::{Resolver, TokenKey}, 5 | text::TextSize, 6 | RawSyntaxKind, 7 | }; 8 | use triomphe::Arc; 9 | 10 | #[repr(align(2))] // to use 1 bit for pointer tagging. NB: this is an at-least annotation 11 | #[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] 12 | pub(super) struct GreenTokenData { 13 | pub(super) kind: RawSyntaxKind, 14 | pub(super) text: Option, 15 | pub(super) text_len: TextSize, 16 | } 17 | 18 | /// Leaf node in the immutable "green" tree. 19 | pub struct GreenToken { 20 | ptr: NonNull, 21 | } 22 | 23 | unsafe impl Send for GreenToken {} // where GreenTokenData: Send + Sync 24 | unsafe impl Sync for GreenToken {} // where GreenTokenData: Send + Sync 25 | 26 | pub(super) const IS_TOKEN_TAG: usize = 0x1; 27 | impl GreenToken { 28 | fn add_tag(ptr: NonNull) -> NonNull { 29 | unsafe { 30 | let ptr = ptr.as_ptr().map_addr(|addr| addr | IS_TOKEN_TAG); 31 | NonNull::new_unchecked(ptr) 32 | } 33 | } 34 | 35 | fn remove_tag(ptr: NonNull) -> NonNull { 36 | unsafe { 37 | let ptr = ptr.as_ptr().map_addr(|addr| addr & !IS_TOKEN_TAG); 38 | NonNull::new_unchecked(ptr) 39 | } 40 | } 41 | 42 | fn data(&self) -> &GreenTokenData { 43 | unsafe { &*Self::remove_tag(self.ptr).as_ptr() } 44 | } 45 | 46 | /// Creates a new Token. 47 | #[inline] 48 | pub(super) fn new(data: GreenTokenData) -> GreenToken { 49 | let ptr = Arc::into_raw(Arc::new(data)); 50 | let ptr = NonNull::new(ptr as *mut _).unwrap(); 51 | GreenToken { 52 | ptr: Self::add_tag(ptr), 53 | } 54 | } 55 | 56 | /// [`RawSyntaxKind`] of this Token. 57 | #[inline] 58 | pub fn kind(&self) -> RawSyntaxKind { 59 | self.data().kind 60 | } 61 | 62 | /// The original source text of this Token. 63 | #[inline] 64 | pub fn text<'i, I>(&self, resolver: &'i I) -> Option<&'i str> 65 | where 66 | I: Resolver + ?Sized, 67 | { 68 | self.data().text.map(|key| resolver.resolve(key)) 69 | } 70 | 71 | /// Returns the length of text covered by this token. 72 | #[inline] 73 | pub fn text_len(&self) -> TextSize { 74 | self.data().text_len 75 | } 76 | 77 | /// Returns the interned key of text covered by this token. 78 | /// This key may be used for comparisons with other keys of strings interned by the same interner. 79 | /// 80 | /// See also [`text`](GreenToken::text). 81 | #[inline] 82 | pub fn text_key(&self) -> Option { 83 | self.data().text 84 | } 85 | } 86 | 87 | impl fmt::Debug for GreenToken { 88 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 89 | let data = self.data(); 90 | f.debug_struct("GreenToken") 91 | .field("kind", &data.kind) 92 | .field("text", &data.text) 93 | .finish() 94 | } 95 | } 96 | 97 | impl Clone for GreenToken { 98 | fn clone(&self) -> Self { 99 | let ptr = Self::remove_tag(self.ptr); 100 | let ptr = unsafe { 101 | let arc = ManuallyDrop::new(Arc::from_raw(ptr.as_ptr())); 102 | Arc::into_raw(Arc::clone(&arc)) 103 | }; 104 | let ptr = unsafe { NonNull::new_unchecked(ptr as *mut _) }; 105 | GreenToken { 106 | ptr: Self::add_tag(ptr), 107 | } 108 | } 109 | } 110 | 111 | impl Eq for GreenToken {} 112 | impl PartialEq for GreenToken { 113 | fn eq(&self, other: &Self) -> bool { 114 | self.data() == other.data() 115 | } 116 | } 117 | 118 | impl hash::Hash for GreenToken { 119 | fn hash(&self, state: &mut H) 120 | where 121 | H: hash::Hasher, 122 | { 123 | self.data().hash(state) 124 | } 125 | } 126 | 127 | impl Drop for GreenToken { 128 | fn drop(&mut self) { 129 | unsafe { 130 | Arc::from_raw(Self::remove_tag(self.ptr).as_ptr()); 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /cstree/src/interning.rs: -------------------------------------------------------------------------------- 1 | //! Types and Traits for efficient String storage and deduplication. 2 | //! 3 | //! Because `cstree` is aimed at _concrete_ syntax trees that faithfully represent all of the original program input, 4 | //! `cstree` aks for the text of each token when building a syntax tree. You'll notice this when looking at 5 | //! [`GreenNodeBuilder::token`], which takes the kind of token and a refernce to the text of the token in the source. 6 | //! 7 | //! Of course, there are tokens whose text will always be the same, such as punctuation (like a semicolon), keywords 8 | //! (like `fn`), or operators (like `<=`). Use [`Syntax::static_text`] when implementing `Syntax` to make `cstree` 9 | //! aware of such tokens. 10 | //! 11 | //! There is, however, another category of tokens whose text will appear repeatedly, but for which we cannot know the 12 | //! text upfront. Any variable, type, or method that is user-defined will likely be named more than once, but there is 13 | //! no way to know beforehand what names a user will choose. 14 | //! 15 | //! In order to avoid storing the source text for these tokens many times over, `cstree` _interns_ the text of its 16 | //! tokens (if that text is not static). What this means is that each unique string is only stored once. When a new 17 | //! token is added - say, a variable -, we check if we already know its contents (the variable name). If the text is 18 | //! new, we save it and give it a unique Id. If we have seen the text before, we look up its unique Id and don't need to 19 | //! keep the new data around. As an additional benefit, interning also makes it much cheaper to copy source text around 20 | //! and also to compare it with other source text, since what is actually being copied or compared is just an integer. 21 | //! 22 | //! ## I just want to build a syntax tree 23 | //! 24 | //! If you don't want to worry about this for now, you (mostly) can! All required functionality is implemented in 25 | //! `cstree` and you can just use [`GreenNodeBuilder::new`] to obtain a tree builder with everything set up (see the 26 | //! [crate documentation] for more on how to get started). This will create an interner, which the builder returns 27 | //! together with the syntax tree on [`finish`] as part of its node cache (call [`NodeCache::into_interner`] on the 28 | //! result to get the interner out). 29 | //! 30 | //! Here begins the part where you do have to think about interning: `cstree` needs the interner you get when you want 31 | //! to look at the source text for some part of the syntax tree, so you'll have to keep it around somehow until the 32 | //! point where you need it. 33 | //! 34 | //! How best to do this depends on what you need the text for. If the code that accesses the text is close-by, it might 35 | //! be enough to pass the return value to the functions that need it (within `cstree` or in your code). Other options 36 | //! could be to store the interner together with the syntax tree. If you use [`SyntaxNode::new_root_with_resolver`], you 37 | //! get a syntax tree that can handle text without any need to manage and pass an interner (the reason the method is 38 | //! called `_with_resolver` and not `_with_interner` is that it doesn't actually needs a full [`Interner`] -- once the 39 | //! tree is created, no more text will be added, so it just needs to be able to look up text. This part is called a 40 | //! [`Resolver`]). Or you could put the interner somewhere "global", where you can easily access it from anywhere. 41 | //! 42 | //! ## Using other interners 43 | //! 44 | //! By default, `cstree` uses its own, simple interner implementation. You can obtain an interner by calling 45 | //! [`new_interner`], or bring your own by implementing the [`Resolver`] and [`Interner`] traits defined in this module. 46 | //! Most methods in `cstree` require that you support interning [`TokenKey`]s. `TokenKey` implements [`InternKey`], so 47 | //! your implementation can use that to convert to whatever types it uses for its internal representation. Note that 48 | //! there is no way to change the size of the internal representation. 49 | //! 50 | //! ### `lasso` 51 | //! Using features, you can enable support for some third-party interners. The primary one is [`lasso`], a crate focused 52 | //! on efficient interning of text strings. This is enabled via the `lasso_compat` feature and adds the necessary trait 53 | //! implementation to make `lasso`'s interners work with `cstree` (as well as a re-export of the matching version of 54 | //! `lasso` here). If enabled, `cstree`'s built-in interning functionality is replaced with `lasso`'s more efficient one 55 | //! transparently, so you'll now be returned a `lasso` interner from [`new_interner`]. 56 | // 57 | // ### `salsa` 58 | // If you are using the "2022" version of the `salsa` incremental query framework, it is possible to use its interning 59 | // capabilities with `cstree` as well. Support for this is experimental, and you have to opt in via the 60 | // `salsa_2022_compat` feature. For instructions on how to do this, and whether you actually want to, please refer to 61 | // [the `salsa_compat` module documentation]. 62 | #![cfg_attr( 63 | feature = "multi_threaded_interning", 64 | doc = r###" 65 | ## Multi-threaded interners 66 | 67 | If you want to use your interner on more than one thread, the interner needs to support interning new text through 68 | shared access. With the `multi_threaded_interning` feature, you can get such an interner by calling 69 | [`new_threaded_interner`]. The feature also enables support for `ThreadedRodeo`, the multi-threaded interner from 70 | `lasso`. 71 | 72 | **You can pass a reference or an Arc to that interner to anything that expects an [`Interner`]!** 73 | While the interning methods on [`Interner`] require a `&mut self` to also work for single-threaded interners, both 74 | [`Resolver`] and [`Interner`] will be implemented for `&interner` and `Arc::new(interner)` if `interner` is multi-threaded: 75 | 76 | ``` 77 | # use cstree::testing::*; 78 | # use cstree::interning::*; 79 | # use std::sync::Arc; 80 | let interner = Arc::new(new_threaded_interner()); 81 | let mut builder: GreenNodeBuilder> = 82 | GreenNodeBuilder::from_interner(Arc::clone(&interner)); 83 | 84 | // or: 85 | // let interner = new_threaded_interner(); 86 | // let mut builder: GreenNodeBuilder = 87 | // GreenNodeBuilder::from_interner(&interner); 88 | 89 | # builder.start_node(Root); 90 | # builder.token(Int, "42"); 91 | # builder.finish_node(); 92 | parse(&mut builder, "42"); 93 | let (tree, cache) = builder.finish(); 94 | let int = tree.children().next().unwrap(); 95 | assert_eq!(int.as_token().unwrap().text(&interner), Some("42")); 96 | ``` 97 | 98 | Here, we use `from_interner`, but pass it only a shared reference to "own". Take care to denote the type signature 99 | of the `GreenNodeBuilder` appropriately. 100 | "### 101 | )] 102 | //! 103 | //! [crate documentation]: crate 104 | //! [`Syntax::static_text`]: crate::Syntax::static_text 105 | //! [`GreenNodeBuilder::token`]: crate::build::GreenNodeBuilder::token 106 | //! [`GreenNodeBuilder::new`]: crate::build::GreenNodeBuilder::new 107 | //! [`finish`]: crate::build::GreenNodeBuilder::finish 108 | //! [`NodeCache::into_interner`]: crate::build::NodeCache::into_interner 109 | //! [`SyntaxNode::new_root_with_resolver`]: crate::syntax::SyntaxNode::new_root_with_resolver 110 | //! [`lasso`]: lasso 111 | // [the `salsa_compat` module documentation]: salsa_compat 112 | 113 | mod traits; 114 | pub use self::traits::*; 115 | 116 | mod default_interner; 117 | 118 | #[cfg(not(feature = "lasso_compat"))] 119 | #[doc(inline)] 120 | pub use default_interner::TokenInterner; 121 | 122 | #[cfg(feature = "lasso_compat")] 123 | mod lasso_compat; 124 | 125 | #[cfg(feature = "lasso_compat")] 126 | #[doc(inline)] 127 | pub use lasso_compat::TokenInterner; 128 | 129 | #[cfg(feature = "multi_threaded_interning")] 130 | #[doc(inline)] 131 | pub use lasso_compat::MultiThreadedTokenInterner; 132 | 133 | #[cfg(feature = "lasso_compat")] 134 | #[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))] 135 | pub use lasso; 136 | 137 | #[cfg(feature = "salsa_2022_compat")] 138 | #[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))] 139 | pub mod salsa_compat; 140 | 141 | use core::fmt; 142 | use std::num::NonZeroU32; 143 | 144 | /// The intern key type for the source text of [`GreenToken`s](crate::green::GreenToken). 145 | /// Each unique key uniquely identifies a deduplicated, interned source string. 146 | #[derive(Clone, Copy, PartialEq, Eq, Hash)] 147 | #[repr(transparent)] 148 | pub struct TokenKey { 149 | inner: NonZeroU32, 150 | } 151 | 152 | // Safety: we match `+ 1` and `- 1`, so it is always possible to round-trip. 153 | unsafe impl InternKey for TokenKey { 154 | #[inline] 155 | fn into_u32(self) -> u32 { 156 | self.inner.get() - 1 157 | } 158 | 159 | fn try_from_u32(key: u32) -> Option { 160 | (key < u32::MAX).then(|| Self { 161 | // Safety: non-zero by increment. 162 | // Overflow is impossible under the check above. 163 | inner: unsafe { NonZeroU32::new_unchecked(key + 1) }, 164 | }) 165 | } 166 | } 167 | 168 | impl fmt::Debug for TokenKey { 169 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 170 | f.write_fmt(format_args!("TokenKey({})", self.inner)) 171 | } 172 | } 173 | 174 | /// Constructs a new, single-threaded [`Interner`]. 175 | /// 176 | /// If you need the interner to be multi-threaded, see [`new_threaded_interner`]. 177 | #[inline] 178 | pub fn new_interner() -> TokenInterner { 179 | TokenInterner::new() 180 | } 181 | 182 | /// Constructs a new [`Interner`] that can be used across multiple threads. 183 | /// 184 | /// Note that you can use `&MultiThreadedTokenInterner` and `Arc` to access interning methods 185 | /// through a shared reference, as well as construct new syntax trees. See [the module documentation](self) for more 186 | /// information and examples. 187 | #[cfg(feature = "multi_threaded_interning")] 188 | #[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))] 189 | #[inline] 190 | pub fn new_threaded_interner() -> MultiThreadedTokenInterner { 191 | MultiThreadedTokenInterner::new() 192 | } 193 | -------------------------------------------------------------------------------- /cstree/src/interning/default_interner.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(feature = "lasso_compat"))] 2 | 3 | use core::fmt; 4 | use std::sync::Arc as StdArc; 5 | 6 | use indexmap::IndexSet; 7 | use rustc_hash::FxBuildHasher; 8 | 9 | use super::{InternKey, Interner, Resolver, TokenKey}; 10 | 11 | /// The default [`Interner`] used to deduplicate green token strings. 12 | #[derive(Debug)] 13 | pub struct TokenInterner { 14 | id_set: IndexSet, 15 | } 16 | 17 | impl TokenInterner { 18 | pub(in crate::interning) fn new() -> Self { 19 | Self { 20 | id_set: IndexSet::default(), 21 | } 22 | } 23 | } 24 | 25 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 26 | pub enum InternerError { 27 | KeySpaceExhausted, 28 | } 29 | 30 | impl fmt::Display for InternerError { 31 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 32 | match self { 33 | InternerError::KeySpaceExhausted => write!(f, "key space exhausted"), 34 | } 35 | } 36 | } 37 | 38 | impl std::error::Error for InternerError {} 39 | 40 | impl Resolver for TokenInterner { 41 | fn try_resolve(&self, key: TokenKey) -> Option<&str> { 42 | let index = key.into_u32() as usize; 43 | self.id_set.get_index(index).map(String::as_str) 44 | } 45 | } 46 | 47 | impl Resolver for StdArc { 48 | fn try_resolve(&self, key: TokenKey) -> Option<&str> { 49 | let index = key.into_u32() as usize; 50 | self.id_set.get_index(index).map(String::as_str) 51 | } 52 | } 53 | 54 | // `TokenKey` can represent `1` to `u32::MAX` (due to the `NonNull` niche), so `u32::MAX` elements. 55 | // Set indices start at 0, so everything shifts down by 1. 56 | const N_INDICES: usize = u32::MAX as usize; 57 | 58 | impl Interner for TokenInterner { 59 | type Error = InternerError; 60 | 61 | fn try_get_or_intern(&mut self, text: &str) -> Result { 62 | if let Some(index) = self.id_set.get_index_of(text) { 63 | let raw_key = u32::try_from(index).unwrap_or_else(|_| { 64 | panic!("found interned text with invalid index `{index}` (index too high for keyspace)") 65 | }); 66 | return Ok(TokenKey::try_from_u32(raw_key).unwrap_or_else(|| { 67 | panic!("found interned text with invalid index `{index}` (index too high for keyspace)") 68 | })); 69 | } else if self.id_set.len() >= N_INDICES { 70 | return Err(InternerError::KeySpaceExhausted); 71 | } 72 | 73 | let (index, added) = self.id_set.insert_full(text.to_string()); 74 | debug_assert!(added, "tried to intern duplicate text"); 75 | let raw_key = u32::try_from(index).unwrap_or_else(|_| panic!("interned `{index}` despite keyspace exhaustion")); 76 | TokenKey::try_from_u32(raw_key).ok_or(InternerError::KeySpaceExhausted) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /cstree/src/interning/lasso_compat.rs: -------------------------------------------------------------------------------- 1 | //! Bridge between `cstree`'s and `lasso`'s types and traits. 2 | 3 | #![cfg(feature = "lasso_compat")] 4 | 5 | mod token_interner; 6 | #[doc(inline)] 7 | pub use token_interner::*; 8 | 9 | mod traits; 10 | -------------------------------------------------------------------------------- /cstree/src/interning/lasso_compat/token_interner.rs: -------------------------------------------------------------------------------- 1 | //! Default interner implementations based on `lasso`. 2 | 3 | #![cfg(feature = "lasso_compat")] 4 | 5 | use std::{fmt, hash::BuildHasher, num::NonZeroUsize}; 6 | 7 | use lasso::{Capacity, Rodeo, ThreadedRodeo}; 8 | use rustc_hash::FxBuildHasher; 9 | 10 | use crate::interning::{Interner, Resolver, TokenKey}; 11 | 12 | /// Default number of strings that the interner will initially allocate space for. 13 | /// Value recommended by the author of `lasso`. 14 | const DEFAULT_STRING_CAPACITY: usize = 512; 15 | 16 | /// Default memory in bytes that the interner will initially allocate space for. 17 | /// Value recommended by the author of `lasso`. 18 | const DEFAULT_BYTE_CAPACITY: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4096) }; 19 | 20 | macro_rules! impl_traits { 21 | (for $interner:ty $(, if #[cfg(feature = $feature:literal)])?) => { 22 | $(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])? 23 | impl Resolver for $interner { 24 | #[inline] 25 | fn try_resolve(&self, key: TokenKey) -> Option<&str> { 26 | self.rodeo.try_resolve(&key) 27 | } 28 | 29 | #[inline] 30 | fn resolve(&self, key: TokenKey) -> &str { 31 | self.rodeo.resolve(&key) 32 | } 33 | } 34 | 35 | $(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])? 36 | impl Interner for $interner { 37 | type Error = lasso::LassoError; 38 | 39 | #[inline] 40 | fn try_get_or_intern(&mut self, text: &str) -> Result { 41 | self.rodeo.try_get_or_intern(text) 42 | } 43 | 44 | #[inline] 45 | fn get_or_intern(&mut self, text: &str) -> TokenKey { 46 | self.rodeo.get_or_intern(text) 47 | } 48 | } 49 | }; 50 | } 51 | 52 | /// The default [`Interner`] used to deduplicate green token strings. 53 | pub struct TokenInterner { 54 | rodeo: Rodeo, 55 | } 56 | 57 | impl TokenInterner { 58 | pub(in crate::interning) fn new() -> Self { 59 | Self { 60 | rodeo: Rodeo::with_capacity_and_hasher( 61 | Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY), 62 | FxBuildHasher, 63 | ), 64 | } 65 | } 66 | 67 | /// Returns the [`Rodeo`] backing this interner. 68 | #[cfg_attr(doc_cfg, doc(cfg(feature = "lasso_compat")))] 69 | #[inline] 70 | pub fn into_inner(self) -> Rodeo { 71 | self.rodeo 72 | } 73 | } 74 | 75 | impl fmt::Debug for TokenInterner { 76 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 77 | f.write_str("TokenInterner") 78 | } 79 | } 80 | 81 | impl_traits!(for TokenInterner); 82 | 83 | #[cfg(feature = "multi_threaded_interning")] 84 | pub use multi_threaded::MultiThreadedTokenInterner; 85 | 86 | #[cfg(feature = "multi_threaded_interning")] 87 | mod multi_threaded { 88 | use super::*; 89 | 90 | use std::sync::Arc as StdArc; 91 | 92 | /// A threadsafe [`Interner`] for deduplicating [`GreenToken`](crate::green::GreenToken) strings. 93 | /// 94 | /// Note that [`Interner`] and [`Resolver`] are also implemented for `&MultiThreadTokenInterner` and 95 | /// `Arc` so you can pass a mutable reference to either of these in shared contexts. 96 | #[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))] 97 | pub struct MultiThreadedTokenInterner { 98 | rodeo: ThreadedRodeo, 99 | } 100 | 101 | impl MultiThreadedTokenInterner { 102 | pub(in crate::interning) fn new() -> Self { 103 | Self { 104 | rodeo: ThreadedRodeo::with_capacity_and_hasher( 105 | Capacity::new(DEFAULT_STRING_CAPACITY, DEFAULT_BYTE_CAPACITY), 106 | FxBuildHasher, 107 | ), 108 | } 109 | } 110 | } 111 | 112 | impl fmt::Debug for MultiThreadedTokenInterner { 113 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 114 | f.write_str("MultiThreadedTokenInterner") 115 | } 116 | } 117 | 118 | impl_traits!(for MultiThreadedTokenInterner, if #[cfg(feature = "multi_threaded_interning")]); 119 | impl_traits!(for StdArc, if #[cfg(feature = "multi_threaded_interning")]); 120 | } 121 | -------------------------------------------------------------------------------- /cstree/src/interning/lasso_compat/traits.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "lasso_compat")] 2 | 3 | use core::fmt; 4 | use std::hash::{BuildHasher, Hash}; 5 | 6 | use crate::interning::{ 7 | traits::{InternKey, Interner, Resolver}, 8 | TokenKey, 9 | }; 10 | 11 | // Safety: `InternKey` has the same invariant as `lasso::Key` 12 | unsafe impl lasso::Key for TokenKey { 13 | fn into_usize(self) -> usize { 14 | self.into_u32() as usize 15 | } 16 | 17 | fn try_from_usize(int: usize) -> Option { 18 | let raw_key = u32::try_from(int).ok()?; 19 | Self::try_from_u32(raw_key) 20 | } 21 | } 22 | 23 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 24 | pub enum LassoCompatError { 25 | LassoError(lasso::LassoError), 26 | KeyConversionError { lasso_key: usize }, 27 | } 28 | 29 | impl From for LassoCompatError { 30 | #[inline] 31 | fn from(error: lasso::LassoError) -> Self { 32 | Self::LassoError(error) 33 | } 34 | } 35 | 36 | impl fmt::Display for LassoCompatError { 37 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 38 | match self { 39 | LassoCompatError::LassoError(lasso_error) => write!(f, "{lasso_error}"), 40 | LassoCompatError::KeyConversionError { lasso_key } => write!( 41 | f, 42 | "invalid key: failed to convert `lasso::Key` `{lasso_key}` to `InternKey`" 43 | ), 44 | } 45 | } 46 | } 47 | 48 | impl std::error::Error for LassoCompatError {} 49 | 50 | macro_rules! compat_resolver { 51 | ($resolver:ident $(where $($t:ident : $bound:ident),+)? $(if #[cfg(feature = $feature:literal)])?) => { 52 | $(#[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))])? 53 | impl Resolver for lasso::$resolver 54 | where 55 | K: lasso::Key, 56 | $($($t: $bound),+)? 57 | { 58 | fn try_resolve(&self, key: TokenKey) -> Option<&str> { 59 | let raw_key = TokenKey::into_u32(key); 60 | let lasso_key = K::try_from_usize(raw_key as usize)?; 61 | >::try_resolve(self, &lasso_key) 62 | } 63 | 64 | fn resolve(&self, key: TokenKey) -> &str { 65 | let raw_key = TokenKey::into_u32(key); 66 | let lasso_key = K::try_from_usize(raw_key as usize).expect(&format!( 67 | "invalid key: failed to convert `{key:?}` to `lasso::Key`" 68 | )); 69 | >::resolve(self, &lasso_key) 70 | } 71 | } 72 | }; 73 | } 74 | 75 | macro_rules! compat_interner { 76 | ($interner:ident $(where $($t:ident : $bound:ident),+)? if #[cfg(feature = $feature:literal)]) => { 77 | #[cfg_attr(doc_cfg, doc(cfg(feature = $feature)))] 78 | impl Interner for lasso::$interner 79 | where 80 | K: lasso::Key, 81 | S: BuildHasher, 82 | $($($t: $bound),+)? 83 | { 84 | type Error = LassoCompatError; 85 | 86 | fn try_get_or_intern(&mut self, text: &str) -> Result { 87 | let lasso_key = >::try_get_or_intern(self, text)?; 88 | let raw_key = K::into_usize(lasso_key); 89 | u32::try_from(raw_key) 90 | .ok() 91 | .and_then(TokenKey::try_from_u32) 92 | .ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key }) 93 | } 94 | 95 | fn get_or_intern(&mut self, text: &str) -> TokenKey { 96 | let lasso_key = >::get_or_intern(self, text); 97 | let raw_key = K::into_usize(lasso_key); 98 | u32::try_from(raw_key) 99 | .ok() 100 | .and_then(TokenKey::try_from_u32) 101 | .ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key }) 102 | .unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})")) 103 | } 104 | } 105 | }; 106 | } 107 | 108 | compat_resolver!(RodeoReader if #[cfg(feature = "lasso_compat")]); 109 | compat_resolver!(RodeoResolver if #[cfg(feature = "lasso_compat")]); 110 | 111 | compat_resolver!(Rodeo if #[cfg(feature = "lasso_compat")]); 112 | compat_interner!(Rodeo if #[cfg(feature = "lasso_compat")]); 113 | 114 | #[cfg(feature = "multi_threaded_interning")] 115 | mod multi_threaded { 116 | use super::*; 117 | 118 | compat_resolver!(ThreadedRodeo where K: Hash, S: BuildHasher, S: Clone if #[cfg(feature = "multi_threaded_interning")]); 119 | 120 | compat_interner!(ThreadedRodeo where K: Hash, S: Clone if #[cfg(feature = "multi_threaded_interning")]); 121 | 122 | #[cfg_attr(doc_cfg, doc(cfg(feature = "multi_threaded_interning")))] 123 | impl Interner for &lasso::ThreadedRodeo 124 | where 125 | K: lasso::Key + Hash, 126 | S: BuildHasher + Clone, 127 | { 128 | type Error = as Interner>::Error; 129 | 130 | fn try_get_or_intern(&mut self, text: &str) -> Result { 131 | let lasso_key = >::try_get_or_intern(self, text)?; 132 | let raw_key = K::into_usize(lasso_key); 133 | u32::try_from(raw_key) 134 | .ok() 135 | .and_then(TokenKey::try_from_u32) 136 | .ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key }) 137 | } 138 | 139 | fn get_or_intern(&mut self, text: &str) -> TokenKey { 140 | let lasso_key = >::get_or_intern(self, text); 141 | let raw_key = K::into_usize(lasso_key); 142 | u32::try_from(raw_key) 143 | .ok() 144 | .and_then(TokenKey::try_from_u32) 145 | .ok_or(LassoCompatError::KeyConversionError { lasso_key: raw_key }) 146 | .unwrap_or_else(|_| panic!("invalid key: failed to convert `lasso::Key` `{raw_key}` to `InternKey` (failed to intern {text:?})")) 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /cstree/src/interning/salsa_compat.rs: -------------------------------------------------------------------------------- 1 | //! # Using a `salsa` database as the interner for `cstree` 2 | //! 3 | //!

5 | //! Warning: Compatibility is only provided for "Salsa 2022". 6 | //! This version is currently under active development and cstree's 7 | //! compatibility features are unstable until there is an official 8 | //! release. 9 | //! Older versions of `salsa` are not supported. 10 | //!

11 | //! 12 | //! If you are using the `salsa` query system, you already have access to an implemenation of interning through 13 | //! [`#[salsa::interned]`](macro@salsa::interned). This is all that is needed to use `cstree` and this module provides 14 | //! the utilities needed to use `salsa`'s interners for working with syntax trees. 15 | //! 16 | //! Note that the primary benefit of this is that it avoids additional dependencies because it uses an interner that you 17 | //! already depend on, but it can also be beneficial to use an interner that is more specialized towards string 18 | //! interning. In particular, using `salsa`'s interning requires allocating all strings that are interned even if they 19 | //! are deduplicated because they already exist in the interner. 20 | //! 21 | //! ## How to do it 22 | //! 23 | //! ``` 24 | //! # use cstree::testing::*; 25 | //! # use cstree::interning::salsa_compat::salsa; 26 | //! # use cstree::impl_cstree_interning_for_salsa; 27 | //! // Define the `salsa` jar, database and intern Id 28 | //! #[salsa::jar(db = Db)] 29 | //! pub struct Jar(SourceId); 30 | //! 31 | //! pub trait Db: salsa::DbWithJar {} 32 | //! impl Db for DB where DB: ?Sized + salsa::DbWithJar {} 33 | //! 34 | //! // If you are not a doctest and can put `Jar` at the root of your crate, 35 | //! // this can just be `#[salsa::interned]`. 36 | //! #[salsa::interned(jar = Jar)] 37 | //! pub struct SourceId { 38 | //! #[return_ref] 39 | //! pub text: String, 40 | //! } 41 | //! 42 | //! #[derive(Default)] 43 | //! #[salsa::db(Jar)] 44 | //! struct Database { 45 | //! storage: salsa::Storage, 46 | //! } 47 | //! impl salsa::Database for Database {} 48 | //! 49 | //! // Let `cstree` define a conversion trait and implement it for your database. 50 | //! // `Database` is your db type, `SourceId` is your interning id, and `text` is 51 | //! // its text field (all as defined above). 52 | //! impl_cstree_interning_for_salsa!(impl Interning for Database => text as SourceId); 53 | //! 54 | //! // Build a tree with the `salsa` interner 55 | //! let db = Database::default(); 56 | //! let interner = db.as_interner(); // <-- conversion happens here 57 | //! let mut shared_interner = &interner; 58 | //! let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_interner(&mut shared_interner); 59 | //! let (tree, _no_interner_because_it_was_borrowed) = { 60 | //! builder.start_node(TestSyntaxKind::Plus); 61 | //! builder.token(TestSyntaxKind::Float, "2.05"); 62 | //! builder.token(TestSyntaxKind::Whitespace, " "); 63 | //! builder.token(TestSyntaxKind::Plus, "+"); 64 | //! builder.token(TestSyntaxKind::Whitespace, " "); 65 | //! builder.token(TestSyntaxKind::Float, "7.32"); 66 | //! builder.finish_node(); 67 | //! builder.finish() 68 | //! }; 69 | //! let tree: SyntaxNode = SyntaxNode::new_root(tree); 70 | //! assert_eq!(tree.resolve_text(shared_interner), "2.05 + 7.32"); 71 | //! ``` 72 | //! 73 | //! The full code is also available in the `salsa` example. 74 | //! 75 | //! ## Working with `InternWithDb` directly 76 | //! If you don't want the trait, or macros, or if you just need more control about what happens during interning and 77 | //! resolution, you can skip using [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa) and use 78 | //! [`InternWithDb`] directly. 79 | //! 80 | //! Because `salsa` generates inherent methods (and not, for example, a trait implementation), we need information about 81 | //! the used interning id either way. All that `as_interner` does is construct an instance of `InternWithDb` that uses 82 | //! the generated methods to invoke `salsa`s interner. The implementation expands to 83 | //! ```text 84 | //! InternWithDb::new( 85 | //! db, 86 | //! |db, text| SourceId::new(db, text), 87 | //! |db, id| id.text(db), 88 | //! ) 89 | //! ``` 90 | //! but you may provide any function that doesn't capture. 91 | 92 | #![cfg(feature = "salsa_2022_compat")] 93 | 94 | #[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))] 95 | pub use salsa; 96 | 97 | use core::fmt; 98 | 99 | use super::{InternKey, Interner, Resolver, TokenKey}; 100 | 101 | #[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))] 102 | impl salsa::AsId for TokenKey { 103 | fn as_id(self) -> salsa::Id { 104 | salsa::Id::from_u32(self.into_u32()) 105 | } 106 | 107 | /// Create an instance of the intern-key from an ID. 108 | /// 109 | /// # Panics 110 | /// Panics if the given `id` from `salsa` cannot be represented by a [`TokenKey`]. 111 | fn from_id(id: salsa::Id) -> Self { 112 | TokenKey::try_from_u32(id.as_u32()) 113 | .unwrap_or_else(|| panic!("`salsa::Id` is invalid for `TokenKey`'s keyspace: {id:?}")) 114 | } 115 | } 116 | 117 | /// Generates an extension trait `SalsaAsInterner` that lets you call `db.as_interner()` on your [`salsa::Database`] to 118 | /// obtain a `cstree` compatible [`Interner`]. 119 | /// 120 | /// The `as_interner` method returns an instance of [`InternWithDb`] that uses the functions generated by `salsa` for 121 | /// your Id type to perform interning and resolution. 122 | /// 123 | /// If you have defined your interned text as 124 | /// ```ignore 125 | /// #[salsa::interned] 126 | /// pub struct SourceId { 127 | /// #[return_ref] 128 | /// pub text: String, 129 | /// } 130 | /// ``` 131 | /// the syntax is 132 | /// ```ignore 133 | /// impl_cstree_interning_for_salsa!(impl Interning for YourDatabase => text as SourceId); 134 | /// ``` 135 | /// where `text` the name of the interned field. 136 | /// Note that the use of `#[return_ref]` is required. 137 | #[macro_export] 138 | #[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))] 139 | macro_rules! impl_cstree_interning_for_salsa { 140 | (impl Interning for $db:ty => $name:ident as $id:ty) => { 141 | trait SalsaAsInterner { 142 | fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id>; 143 | } 144 | 145 | impl SalsaAsInterner for Database { 146 | fn as_interner(&self) -> ::cstree::interning::salsa_compat::InternWithDb<'_, $db, $id> { 147 | ::cstree::interning::salsa_compat::InternWithDb::new( 148 | self, 149 | |db, text| <$id>::new(db, text), 150 | |db, id| id.$name(db), 151 | ) 152 | } 153 | } 154 | }; 155 | } 156 | 157 | /// This type allows you to wrap access to a [`salsa::Database`] together with an interning and a lookup function, which 158 | /// makes it implement [`Interner`] and [`Resolver`]. The [module documentation](self) shows how to use this with your 159 | /// own database, or you can use [`impl_cstree_interning_for_salsa`](crate::impl_cstree_interning_for_salsa). 160 | /// 161 | /// The interning traits are also implemented by `&InternWithDb`, as the `salsa` database supports interning through 162 | /// shared references (see also [the `interning` module documentation](super)). 163 | #[cfg_attr(doc_cfg, doc(cfg(feature = "salsa_2022_compat")))] 164 | pub struct InternWithDb<'db, Db: salsa::Database, Id: salsa::interned::InternedId> { 165 | db: &'db Db, 166 | intern: fn(&Db, text: String) -> Id, 167 | lookup: fn(&Db, Id) -> &str, 168 | } 169 | 170 | impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> fmt::Debug for InternWithDb<'db, Db, Id> { 171 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 172 | f.write_str("InternWithDb") 173 | } 174 | } 175 | 176 | impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> InternWithDb<'db, Db, Id> { 177 | /// Create an [`Interner`] that works with `cstree` but uses the given `db` from `salsa`. 178 | /// To do this, you need to provide a function for interning new strings that creates the [`InternedId`] that you 179 | /// defined with [`#[salsa::interned]`](macro@salsa::interned), and a second one that resolves an Id using your 180 | /// database. See the [module documentation](self) for an example. 181 | /// 182 | /// [`InternedId`]: salsa::interned::InternedId 183 | pub fn new(db: &'db Db, intern: fn(&Db, text: String) -> Id, lookup: fn(&Db, Id) -> &str) -> Self { 184 | Self { db, intern, lookup } 185 | } 186 | } 187 | 188 | impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver for InternWithDb<'db, Db, Id> { 189 | fn try_resolve(&self, key: TokenKey) -> Option<&'db str> { 190 | use salsa::AsId; 191 | 192 | let key = Id::from_id(key.as_id()); 193 | let text = (self.lookup)(self.db, key); 194 | Some(text) 195 | } 196 | } 197 | 198 | impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner for InternWithDb<'db, Db, Id> { 199 | type Error = std::convert::Infallible; 200 | 201 | fn try_get_or_intern(&mut self, text: &str) -> Result { 202 | use salsa::AsId; 203 | 204 | let id = (self.intern)(self.db, text.to_string()); 205 | Ok(TokenKey::from_id(id.as_id())) 206 | } 207 | } 208 | 209 | impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Resolver for &InternWithDb<'db, Db, Id> { 210 | fn try_resolve(&self, key: TokenKey) -> Option<&'db str> { 211 | use salsa::AsId; 212 | 213 | let key = Id::from_id(key.as_id()); 214 | let text = (self.lookup)(self.db, key); 215 | Some(text) 216 | } 217 | } 218 | 219 | impl<'db, Db: salsa::Database, Id: salsa::interned::InternedId> Interner for &InternWithDb<'db, Db, Id> { 220 | type Error = std::convert::Infallible; 221 | 222 | fn try_get_or_intern(&mut self, text: &str) -> Result { 223 | use salsa::AsId; 224 | 225 | let id = (self.intern)(self.db, text.to_string()); 226 | Ok(TokenKey::from_id(id.as_id())) 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /cstree/src/interning/traits.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | use super::TokenKey; 4 | 5 | /// Common interface for all intern keys via conversion to and from `u32`. 6 | /// 7 | /// # Safety 8 | /// Implementations must guarantee that keys can round-trip in both directions: going from `Self` to `u32` to `Self` and 9 | /// going from `u32` to `Self` to `u32` must each yield the original value. 10 | pub unsafe trait InternKey: Copy + Eq + fmt::Debug { 11 | /// Convert `self` into its raw representation. 12 | fn into_u32(self) -> u32; 13 | 14 | /// Try to reconstruct an intern key from its raw representation. 15 | /// Returns `None` if `key` is not a valid key. 16 | fn try_from_u32(key: u32) -> Option; 17 | } 18 | 19 | /// The read-only part of an interner. 20 | /// Allows to perform lookups of intern keys to resolve them to their interned text. 21 | pub trait Resolver { 22 | /// Tries to resolve the given `key` and return its interned text. 23 | /// 24 | /// If `self` does not contain any text for `key`, `None` is returned. 25 | fn try_resolve(&self, key: Key) -> Option<&str>; 26 | 27 | /// Resolves `key` to its interned text. 28 | /// 29 | /// # Panics 30 | /// Panics if there is no text for `key`. 31 | /// 32 | /// Compatibility implementations for interners from other crates may also panic if `key` cannot be converted to the 33 | /// key type of the external interner. Please ensure you configure any external interners appropriately (for 34 | /// example by choosing an appropriately sized key type). 35 | fn resolve(&self, key: Key) -> &str { 36 | self.try_resolve(key) 37 | .unwrap_or_else(|| panic!("failed to resolve `{key:?}`")) 38 | } 39 | } 40 | 41 | impl Resolver for &R { 42 | fn try_resolve(&self, key: TokenKey) -> Option<&str> { 43 | (**self).try_resolve(key) 44 | } 45 | 46 | fn resolve(&self, key: TokenKey) -> &str { 47 | (**self).resolve(key) 48 | } 49 | } 50 | 51 | impl Resolver for &mut R { 52 | fn try_resolve(&self, key: TokenKey) -> Option<&str> { 53 | (**self).try_resolve(key) 54 | } 55 | 56 | fn resolve(&self, key: TokenKey) -> &str { 57 | (**self).resolve(key) 58 | } 59 | } 60 | 61 | /// A full interner, which can intern new strings returning intern keys and also resolve intern keys to the interned 62 | /// value. 63 | /// 64 | /// **Note:** Because single-threaded interners may require mutable access, the methods on this trait take `&mut self`. 65 | /// In order to use a multi- (or single)-threaded interner that allows access through a shared reference, it is 66 | /// implemented for `&MultiThreadedTokenInterner` and `Arc`, allowing it 67 | /// to be used with a `&mut &MultiThreadedTokenInterner` and `&mut Arc`. 68 | pub trait Interner: Resolver { 69 | /// Represents possible ways in which interning may fail. 70 | /// For example, this might be running out of fresh intern keys, or failure to allocate sufficient space for a new 71 | /// value. 72 | type Error; 73 | 74 | /// Interns `text` and returns a new intern key for it. 75 | /// If `text` was already previously interned, it will not be used and the existing intern key for its value will be 76 | /// returned. 77 | fn try_get_or_intern(&mut self, text: &str) -> Result; 78 | 79 | /// Interns `text` and returns a new intern key for it. 80 | /// 81 | /// # Panics 82 | /// Panics if the internment process raises an [`Error`](Interner::Error). 83 | fn get_or_intern(&mut self, text: &str) -> Key { 84 | self.try_get_or_intern(text) 85 | .unwrap_or_else(|_| panic!("failed to intern `{text:?}`")) 86 | } 87 | } 88 | 89 | impl Interner for &mut I { 90 | type Error = I::Error; 91 | 92 | fn try_get_or_intern(&mut self, text: &str) -> Result { 93 | (**self).try_get_or_intern(text) 94 | } 95 | 96 | fn get_or_intern(&mut self, text: &str) -> TokenKey { 97 | (**self).get_or_intern(text) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /cstree/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! `cstree` is a generic library for creating and working with concrete syntax trees (CSTs). 2 | //! 3 | //! "Traditional" abstract syntax trees (ASTs) usually contain different types of nodes which 4 | //! represent different syntactical elements of the source text of a document and reduce its 5 | //! information to the minimal amount necessary to correctly interpret it. In contrast, CSTs are 6 | //! lossless representations of the entire input where all tree nodes are represented homogeneously 7 | //! (i.e., the nodes are _untyped_), but are tagged with a [`RawSyntaxKind`] to determine the kind 8 | //! of grammatical element they represent. 9 | //! 10 | //! One big advantage of this representation is that it cannot only recreate the original source 11 | //! exactly, but also lends itself very well to the representation of _incomplete or erroneous_ 12 | //! trees and is thus highly suited for usage in contexts such as IDEs or any other application 13 | //! where a user is _editing_ the source text. 14 | //! 15 | //! The concept of and the data structures for `cstree`'s syntax trees are inspired in part by 16 | //! Swift's [libsyntax](https://github.com/apple/swift/tree/5e2c815edfd758f9b1309ce07bfc01c4bc20ec23/lib/Syntax). 17 | //! Trees consist of two layers: the inner tree (called _green_ tree) contains the actual source 18 | //! text as position independent green nodes. Tokens and nodes that appear identically at multiple 19 | //! places in the source are deduplicated in this representation in order to store the tree 20 | //! efficiently. This means that a green tree may not actually structurally be a tree. To remedy 21 | //! this, the real syntax tree is constructed on top of the green tree as a secondary tree (called 22 | //! the _red_ tree), which models the exact source structure. 23 | //! As a possible third layer, a strongly typed AST [can be built] on top of the red tree. 24 | //! 25 | //! [can be built]: #ast-layer 26 | //! 27 | //! The `cstree` implementation is a fork of the excellent [`rowan`](https://github.com/rust-analyzer/rowan/), 28 | //! developed by the authors of [rust-analyzer](https://github.com/rust-analyzer/rust-analyzer/) who 29 | //! wrote up a conceptual overview of their implementation in 30 | //! [their repository](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#trees). 31 | //! Notable differences of `cstree` compared to `rowan`: 32 | //! 33 | //! - Syntax trees (red trees) are created lazily, but are persistent. Once a red node has been 34 | //! created by `cstree`, it will remain allocated. In contrast, `rowan` re-creates the red layer on 35 | //! the fly on each traversal of the tree. Apart from the trade-off discussed 36 | //! [here](https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md#memoized-rednodes), 37 | //! this helps to achieve good tree traversal speed while helping to provide the following: 38 | //! - Syntax (red) nodes are `Send` and `Sync`, allowing to share realized trees across threads. This is achieved by 39 | //! atomically reference counting syntax trees as a whole, which also gets rid of the need to reference count 40 | //! individual nodes. 41 | //! - [`SyntaxNode`](syntax::SyntaxNode)s can hold custom data. 42 | //! - `cstree` trees are trees over interned strings. This means `cstree` will deduplicate the text of tokens with the 43 | //! same source string, such as identifiers with the same name. In this position, `rowan` stores each token's text 44 | //! together with its metadata as a custom DST (dynamically-sized type). 45 | //! - `cstree` includes some performance optimizations for tree creation: it only allocates space for new nodes on the 46 | //! heap if they are not in cache and avoids recursively hashing subtrees by pre-hashing them. 47 | //! - `cstree` includes some performance optimizations for tree traversal: persisting red nodes allows tree traversal 48 | //! methods to return references instead of cloning nodes, which involves updating the tree's reference count. You can 49 | //! still `clone` the reference to obtain an owned node, but you only pay that cost when you need to. 50 | //! - The downside of offering thread safe syntax trees is that `cstree` cannot offer any mutability API for its CSTs. 51 | //! Trees can still be updated into new trees through [replacing] nodes, but cannot be mutated in place. 52 | //! 53 | //! [replacing]: syntax::SyntaxNode::replace_with 54 | //! 55 | //! ## Getting Started 56 | //! If you're looking at `cstree`, you're probably looking at or already writing a parser and are considering using 57 | //! concrete syntax trees as its output. We'll talk more about parsing below -- first, let's have a look at what needs 58 | //! to happen to go from input text to a `cstree` syntax tree: 59 | //! 60 | //! 1. Define an enumeration of the types of tokens (like keywords) and nodes (like "an expression") that you want to 61 | //! have in your syntax and implement [`Syntax`] 62 | //! 63 | //! 2. Create a [`GreenNodeBuilder`](build::GreenNodeBuilder) and call 64 | //! [`start_node`](build::GreenNodeBuilder::start_node), [`token`](build::GreenNodeBuilder::token) and 65 | //! [`finish_node`](build::GreenNodeBuilder::finish_node) from your parser 66 | //! 67 | //! 3. Call [`SyntaxNode::new_root`](syntax::SyntaxNode::new_root) or 68 | //! [`SyntaxNode::new_root_with_resolver`](syntax::SyntaxNode::new_root_with_resolver) with the resulting 69 | //! [`GreenNode`](green::GreenNode) to obtain a syntax tree that you can traverse 70 | //! 71 | //! There's a full [getting started guide] that walks through each of the above steps in detail in the documentation for 72 | //! the `getting_started` module. The walkthrough goes through the necessary steps bit by bit and skips the lexer, but 73 | //! the full code plus a runnable interpreter for the small walkthrough language are available in the `readme` example. 74 | //! [Additional examples] can be found in the `examples/` folder in the repository. 75 | //! A good starting point is the `s_expressions` example, which implements a parser for a small S-Expression language 76 | //! with guiding comments. 77 | //! 78 | //! [getting started guide]: getting_started/index.html 79 | //! [Additional examples]: https://github.com/domenicquirl/cstree/tree/master/cstree/examples 80 | //! 81 | //! ## License 82 | //! 83 | //! `cstree` is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0). 84 | //! 85 | //! See `LICENSE-APACHE` and `LICENSE-MIT` for details. 86 | 87 | #![forbid(missing_debug_implementations, unconditional_recursion)] 88 | #![deny(unsafe_code, future_incompatible)] 89 | #![allow( 90 | unstable_name_collisions, // strict provenance - must come after `future_incompatible` to take precedence 91 | unexpected_cfgs, // nightly docs.rs features and `salsa-2022` feature until that is figured out 92 | clippy::duplicated_attributes, // interning modules 93 | )] 94 | #![warn(missing_docs)] 95 | // Docs.rs 96 | #![doc(html_root_url = "https://docs.rs/cstree/0.12.2")] 97 | #![cfg_attr(doc_cfg, feature(doc_cfg))] 98 | 99 | pub mod getting_started; 100 | 101 | #[allow(unsafe_code)] 102 | pub mod green; 103 | #[allow(unsafe_code)] 104 | pub mod syntax; 105 | 106 | #[allow(unsafe_code)] 107 | pub mod interning; 108 | 109 | #[cfg(feature = "serialize")] 110 | mod serde_impls; 111 | #[allow(missing_docs)] 112 | mod utility_types; 113 | 114 | use std::fmt; 115 | 116 | /// `RawSyntaxKind` is a type tag for each token or node. 117 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 118 | pub struct RawSyntaxKind(pub u32); 119 | 120 | /// Typesafe representations of text ranges and sizes. 121 | pub mod text { 122 | pub use crate::syntax::SyntaxText; 123 | pub use text_size::{TextLen, TextRange, TextSize}; 124 | } 125 | 126 | /// A tree builder for the construction of syntax trees. 127 | /// 128 | /// Please refer to the documentation on [`GreenNodeBuilder`](build::GreenNodeBuilder) itself and the ["getting started" 129 | /// section](../index.html#getting-started) from the top-level documentation for an introduction to how to build a 130 | /// syntax tree. 131 | pub mod build { 132 | pub use crate::green::builder::{Checkpoint, GreenNodeBuilder, NodeCache}; 133 | } 134 | 135 | /// A convenient collection of the most used parts of `cstree`. 136 | pub mod prelude { 137 | pub use crate::{ 138 | build::GreenNodeBuilder, 139 | green::{GreenNode, GreenToken}, 140 | syntax::{SyntaxElement, SyntaxNode, SyntaxToken}, 141 | RawSyntaxKind, Syntax, 142 | }; 143 | } 144 | 145 | /// Types for syntax tree traversal / moving through trees. 146 | pub mod traversal { 147 | pub use crate::utility_types::{Direction, WalkEvent}; 148 | } 149 | 150 | /// Utility types. It shouldn't be needed to reference these directly, but they are returned in several places in 151 | /// `cstree` and may come in handy. 152 | pub mod util { 153 | pub use crate::utility_types::{NodeOrToken, TokenAtOffset}; 154 | } 155 | 156 | /// Synchronization primitives. 157 | pub mod sync { 158 | /// An atomically reference counted shared pointer. 159 | /// 160 | /// This is like [`Arc`](std::sync::Arc) in the standard library, but more efficient for how `cstree` stores 161 | /// syntax trees internally. This Arc does not support weak reference counting. 162 | pub use triomphe::Arc; 163 | } 164 | 165 | /// A type that represents what items in your language can be. 166 | /// Typically, this is an `enum` with variants such as `Identifier`, `Literal`, ... 167 | /// 168 | /// The `Syntax` trait is the bridge between the internal `cstree` representation and your 169 | /// language's types. 170 | /// This is essential for providing a [`SyntaxNode`] API that can be used with your types, as in the 171 | /// `s_expressions` example: 172 | /// 173 | /// ``` 174 | /// #[derive(Debug, Clone, Copy, PartialEq, Eq, cstree::Syntax)] 175 | /// # #[allow(non_camel_case_types)] 176 | /// #[repr(u32)] 177 | /// enum SyntaxKind { 178 | /// #[static_text("+")] 179 | /// Plus, // `+` 180 | /// #[static_text("-")] 181 | /// Minus, // `-` 182 | /// Integer, // like `15` 183 | /// Expression, // combined expression, like `5 + 4 - 3` 184 | /// Whitespace, // whitespace is explicit 185 | /// } 186 | /// ``` 187 | /// 188 | /// `cstree` provides a procedural macro called `cstree_derive` to automatically generate `Syntax` implementations for 189 | /// syntax kind enums if its `derive` feature is enabled. 190 | /// 191 | /// [`SyntaxNode`]: crate::syntax::SyntaxNode 192 | pub trait Syntax: Sized + Copy + fmt::Debug + Eq { 193 | /// Construct a semantic item kind from the compact representation. 194 | fn from_raw(raw: RawSyntaxKind) -> Self; 195 | 196 | /// Convert a semantic item kind into a more compact representation. 197 | fn into_raw(self) -> RawSyntaxKind; 198 | 199 | /// Fixed text for a particular syntax kind. 200 | /// Implement for kinds that will only ever represent the same text, such as punctuation (like a 201 | /// semicolon), keywords (like `fn`), or operators (like `<=`). 202 | /// 203 | /// Indicating tokens that have a `static_text` this way allows `cstree` to store them more efficiently, which makes 204 | /// it faster to add them to a syntax tree and to look up their text. Since there can often be many occurrences 205 | /// of these tokens inside a file, doing so will improve the performance of using `cstree`. 206 | fn static_text(self) -> Option<&'static str>; 207 | } 208 | 209 | #[cfg(feature = "derive")] 210 | #[allow(unused_imports)] 211 | #[macro_use] 212 | extern crate cstree_derive; 213 | 214 | #[cfg(feature = "derive")] 215 | /// Derive macro available if `cstree` is built with `features = ["derive"]`. 216 | pub use cstree_derive::Syntax; 217 | 218 | #[doc(hidden)] 219 | #[allow(unsafe_code, unused)] 220 | pub mod testing { 221 | pub use crate::prelude::*; 222 | pub fn parse(_b: &mut GreenNodeBuilder, _s: &str) {} 223 | 224 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 225 | #[repr(u32)] 226 | #[allow(non_camel_case_types)] 227 | pub enum TestSyntaxKind { 228 | Plus, 229 | Identifier, 230 | Int, 231 | Float, 232 | Operation, 233 | Root, 234 | Whitespace, 235 | __LAST, 236 | } 237 | pub type MySyntax = TestSyntaxKind; 238 | pub use TestSyntaxKind::*; 239 | 240 | impl Syntax for TestSyntaxKind { 241 | fn from_raw(raw: RawSyntaxKind) -> Self { 242 | assert!(raw.0 <= TestSyntaxKind::__LAST as u32); 243 | unsafe { std::mem::transmute::(raw.0) } 244 | } 245 | 246 | fn into_raw(self) -> RawSyntaxKind { 247 | RawSyntaxKind(self as u32) 248 | } 249 | 250 | fn static_text(self) -> Option<&'static str> { 251 | match self { 252 | TestSyntaxKind::Plus => Some("+"), 253 | _ => None, 254 | } 255 | } 256 | } 257 | } 258 | -------------------------------------------------------------------------------- /cstree/src/serde_impls.rs: -------------------------------------------------------------------------------- 1 | //! Serialization and Deserialization for syntax trees. 2 | 3 | use crate::{ 4 | build::GreenNodeBuilder, 5 | interning::{Resolver, TokenKey}, 6 | syntax::{ResolvedNode, SyntaxNode}, 7 | traversal::WalkEvent, 8 | util::NodeOrToken, 9 | RawSyntaxKind, Syntax, 10 | }; 11 | use serde::{ 12 | de::{Error, SeqAccess, Visitor}, 13 | ser::SerializeTuple, 14 | Deserialize, Serialize, 15 | }; 16 | use std::{collections::VecDeque, fmt, marker::PhantomData}; 17 | 18 | /// Expands to the first expression, if there's 19 | /// no expression following, otherwise return the second expression. 20 | /// 21 | /// Required for having two different values if the argument is `$(...)?`. 22 | macro_rules! data_list { 23 | ($_:expr, $list:expr) => { 24 | $list 25 | }; 26 | 27 | ($list:expr,) => { 28 | $list 29 | }; 30 | } 31 | 32 | /// Generate the code that should be put inside the [`Serialize`] implementation 33 | /// of a [`SyntaxNode`]-like type. 34 | /// 35 | /// It serializes a [`SyntaxNode`] into a tuple with 2 elements. 36 | /// The first element is the serialized event stream that was generated 37 | /// by [`SyntaxNode::preorder_with_tokens()`]. 38 | /// The second element is a list of `D`s, where `D` is the data of the nodes. 39 | /// The data may only be serialized if it's `Some(data)`. Each `EnterNode` event 40 | /// contains a boolean which indicates if this node has a data. If it has one, 41 | /// the deserializer should pop the first element from the data list and continue. 42 | /// 43 | /// Takes the `Syntax` (`$l`), `SyntaxNode` (`$node`), `Resolver` (`$resolver`), 44 | /// `Serializer` (`$serializer`), and an optional `data_list` which must be a `mut Vec`. 45 | macro_rules! gen_serialize { 46 | ($l:ident, $node:expr, $resolver:expr, $ser:ident, $($data_list:ident)?) => {{ 47 | #[allow(unused_variables)] 48 | let events = $node.preorder_with_tokens().filter_map(|event| match event { 49 | WalkEvent::Enter(NodeOrToken::Node(node)) => { 50 | let has_data = false; 51 | $(let has_data = node 52 | .get_data() 53 | .map(|data| { 54 | $data_list.push(data); 55 | true 56 | }) 57 | .unwrap_or(false);)? 58 | 59 | Some(Event::EnterNode($l::into_raw(node.kind()), has_data)) 60 | } 61 | WalkEvent::Enter(NodeOrToken::Token(tok)) => Some(Event::Token($l::into_raw(tok.kind()), tok.resolve_text($resolver))), 62 | 63 | WalkEvent::Leave(NodeOrToken::Node(_)) => Some(Event::LeaveNode), 64 | WalkEvent::Leave(NodeOrToken::Token(_)) => None, 65 | }); 66 | 67 | let mut tuple = $ser.serialize_tuple(2)?; 68 | 69 | // TODO(Stupremee): We can easily avoid this allocation but it would 70 | // require more weird and annoying-to-write code, so I'll skip it for now. 71 | tuple.serialize_element(&events.collect::>())?; 72 | tuple.serialize_element(&data_list!(Vec::<()>::new(), $($data_list)?))?; 73 | 74 | tuple.end() 75 | }}; 76 | } 77 | 78 | #[derive(Deserialize, Serialize)] 79 | #[serde(tag = "t", content = "c")] 80 | enum Event<'text> { 81 | /// The second parameter indicates if this node needs data. 82 | /// If the boolean is true, the next element inside the data list 83 | /// must be attached to this node. 84 | EnterNode(RawSyntaxKind, bool), 85 | Token(RawSyntaxKind, &'text str), 86 | LeaveNode, 87 | } 88 | 89 | /// Make a `SyntaxNode` serializable but without serializing the data. 90 | pub(crate) struct SerializeWithResolver<'node, 'resolver, S: Syntax, D: 'static, R: ?Sized> { 91 | pub(crate) node: &'node SyntaxNode, 92 | pub(crate) resolver: &'resolver R, 93 | } 94 | 95 | /// Make a `SyntaxNode` serializable which will include the data for serialization. 96 | pub(crate) struct SerializeWithData<'node, 'resolver, S: Syntax, D: 'static, R: ?Sized> { 97 | pub(crate) node: &'node SyntaxNode, 98 | pub(crate) resolver: &'resolver R, 99 | } 100 | 101 | impl Serialize for SerializeWithData<'_, '_, S, D, R> 102 | where 103 | S: Syntax, 104 | R: Resolver + ?Sized, 105 | D: Serialize, 106 | { 107 | fn serialize(&self, serializer: Ser) -> Result 108 | where 109 | Ser: serde::Serializer, 110 | { 111 | let mut data_list = Vec::new(); 112 | gen_serialize!(S, self.node, self.resolver, serializer, data_list) 113 | } 114 | } 115 | 116 | impl Serialize for SerializeWithResolver<'_, '_, S, D, R> 117 | where 118 | S: Syntax, 119 | R: Resolver + ?Sized, 120 | { 121 | fn serialize(&self, serializer: Ser) -> Result 122 | where 123 | Ser: serde::Serializer, 124 | { 125 | gen_serialize!(S, self.node, self.resolver, serializer,) 126 | } 127 | } 128 | 129 | impl Serialize for ResolvedNode 130 | where 131 | S: Syntax, 132 | D: Serialize, 133 | { 134 | fn serialize(&self, serializer: Ser) -> Result 135 | where 136 | Ser: serde::Serializer, 137 | { 138 | let node = SerializeWithResolver { 139 | node: self, 140 | resolver: self.resolver().as_ref(), 141 | }; 142 | node.serialize(serializer) 143 | } 144 | } 145 | 146 | impl<'de, S, D> Deserialize<'de> for ResolvedNode 147 | where 148 | S: Syntax, 149 | D: Deserialize<'de>, 150 | { 151 | // Deserialization is done by walking down the deserialized event stream, 152 | // which is the first element inside the tuple. The events 153 | // are then passed to a `GreenNodeBuilder` which will do all 154 | // the hard work for use. 155 | // 156 | // While walking the event stream, we also store a list of booleans, 157 | // which indicate which node needs to set data. After creating the tree, 158 | // we walk down the nodes, check if the bool at `data_list[idx]` is true, 159 | // and if so, pop the first element of the data list and attach the data 160 | // to the current node. 161 | fn deserialize(deserializer: De) -> Result 162 | where 163 | De: serde::Deserializer<'de>, 164 | { 165 | struct EventVisitor { 166 | _marker: PhantomData ResolvedNode>, 167 | } 168 | 169 | impl<'de, S, D> Visitor<'de> for EventVisitor 170 | where 171 | S: Syntax, 172 | D: Deserialize<'de>, 173 | { 174 | type Value = (ResolvedNode, VecDeque); 175 | 176 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 177 | formatter.write_str("a list of tree events") 178 | } 179 | 180 | fn visit_seq(self, mut seq: A) -> Result 181 | where 182 | A: SeqAccess<'de>, 183 | { 184 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); 185 | let mut data_indices = VecDeque::new(); 186 | 187 | while let Some(next) = seq.next_element::>()? { 188 | match next { 189 | Event::EnterNode(kind, has_data) => { 190 | builder.start_node(S::from_raw(kind)); 191 | data_indices.push_back(has_data); 192 | } 193 | Event::Token(kind, text) => builder.token(S::from_raw(kind), text), 194 | Event::LeaveNode => builder.finish_node(), 195 | } 196 | } 197 | 198 | let (tree, cache) = builder.finish(); 199 | let tree = ResolvedNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap()); 200 | Ok((tree, data_indices)) 201 | } 202 | } 203 | 204 | struct ProcessedEvents(ResolvedNode, VecDeque); 205 | impl<'de, S, D> Deserialize<'de> for ProcessedEvents 206 | where 207 | S: Syntax, 208 | D: Deserialize<'de>, 209 | { 210 | fn deserialize(deserializer: DE) -> Result 211 | where 212 | DE: serde::Deserializer<'de>, 213 | { 214 | let (tree, ids) = deserializer.deserialize_seq(EventVisitor { _marker: PhantomData })?; 215 | Ok(Self(tree, ids)) 216 | } 217 | } 218 | 219 | let (ProcessedEvents(tree, data_indices), mut data) = 220 | <(ProcessedEvents, VecDeque)>::deserialize(deserializer)?; 221 | 222 | tree.descendants().zip(data_indices).try_for_each(|(node, has_data)| { 223 | if has_data { 224 | let data = data 225 | .pop_front() 226 | .ok_or_else(|| De::Error::custom("invalid serialized tree"))?; 227 | node.set_data(data); 228 | } 229 | >::Ok(()) 230 | })?; 231 | 232 | if !data.is_empty() { 233 | Err(De::Error::custom( 234 | "serialized SyntaxNode contained too many data elements", 235 | )) 236 | } else { 237 | Ok(tree) 238 | } 239 | } 240 | } 241 | 242 | impl Serialize for RawSyntaxKind { 243 | fn serialize(&self, serializer: Ser) -> Result 244 | where 245 | Ser: serde::Serializer, 246 | { 247 | serializer.serialize_u32(self.0) 248 | } 249 | } 250 | 251 | impl<'de> Deserialize<'de> for RawSyntaxKind { 252 | fn deserialize(deserializer: De) -> Result 253 | where 254 | De: serde::Deserializer<'de>, 255 | { 256 | Ok(Self(u32::deserialize(deserializer)?)) 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /cstree/src/syntax/element.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, sync::atomic::AtomicU32}; 2 | 3 | use text_size::{TextRange, TextSize}; 4 | 5 | use super::*; 6 | use crate::{ 7 | green::GreenElementRef, 8 | interning::{Resolver, TokenKey}, 9 | util::{NodeOrToken, TokenAtOffset}, 10 | RawSyntaxKind, Syntax, 11 | }; 12 | 13 | /// An element of the tree, can be either a node or a token. 14 | pub type SyntaxElement = NodeOrToken, SyntaxToken>; 15 | 16 | impl From> for SyntaxElement { 17 | fn from(node: SyntaxNode) -> SyntaxElement { 18 | NodeOrToken::Node(node) 19 | } 20 | } 21 | 22 | impl From> for SyntaxElement { 23 | fn from(token: SyntaxToken) -> SyntaxElement { 24 | NodeOrToken::Token(token) 25 | } 26 | } 27 | 28 | impl SyntaxElement { 29 | /// Returns this element's [`Display`](fmt::Display) representation as a string. 30 | /// 31 | /// To avoid allocating for every element, see [`write_display`](type.SyntaxElement.html#method.write_display). 32 | pub fn display(&self, resolver: &R) -> String 33 | where 34 | R: Resolver + ?Sized, 35 | { 36 | match self { 37 | NodeOrToken::Node(it) => it.display(resolver), 38 | NodeOrToken::Token(it) => it.display(resolver), 39 | } 40 | } 41 | 42 | /// Writes this element's [`Display`](fmt::Display) representation into the given `target`. 43 | pub fn write_display(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result 44 | where 45 | R: Resolver + ?Sized, 46 | { 47 | match self { 48 | NodeOrToken::Node(it) => it.write_display(resolver, target), 49 | NodeOrToken::Token(it) => it.write_display(resolver, target), 50 | } 51 | } 52 | 53 | /// Returns this element's [`Debug`](fmt::Debug) representation as a string. 54 | /// If `recursive` is `true`, prints the entire subtree rooted in this element. 55 | /// Otherwise, only this element's kind and range are written. 56 | /// 57 | /// To avoid allocating for every element, see [`write_debug`](type.SyntaxElement.html#method.write_debug). 58 | pub fn debug(&self, resolver: &R, recursive: bool) -> String 59 | where 60 | R: Resolver + ?Sized, 61 | { 62 | match self { 63 | NodeOrToken::Node(it) => it.debug(resolver, recursive), 64 | NodeOrToken::Token(it) => it.debug(resolver), 65 | } 66 | } 67 | 68 | /// Writes this element's [`Debug`](fmt::Debug) representation into the given `target`. 69 | /// If `recursive` is `true`, prints the entire subtree rooted in this element. 70 | /// Otherwise, only this element's kind and range are written. 71 | pub fn write_debug(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result 72 | where 73 | R: Resolver + ?Sized, 74 | { 75 | match self { 76 | NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive), 77 | NodeOrToken::Token(it) => it.write_debug(resolver, target), 78 | } 79 | } 80 | } 81 | 82 | /// A reference to an element of the tree, can be either a reference to a node or one to a token. 83 | pub type SyntaxElementRef<'a, S, D = ()> = NodeOrToken<&'a SyntaxNode, &'a SyntaxToken>; 84 | 85 | impl<'a, S: Syntax, D> From<&'a SyntaxNode> for SyntaxElementRef<'a, S, D> { 86 | fn from(node: &'a SyntaxNode) -> Self { 87 | NodeOrToken::Node(node) 88 | } 89 | } 90 | 91 | impl<'a, S: Syntax, D> From<&'a SyntaxToken> for SyntaxElementRef<'a, S, D> { 92 | fn from(token: &'a SyntaxToken) -> Self { 93 | NodeOrToken::Token(token) 94 | } 95 | } 96 | 97 | impl<'a, S: Syntax, D> From<&'a SyntaxElement> for SyntaxElementRef<'a, S, D> { 98 | fn from(element: &'a SyntaxElement) -> Self { 99 | match element { 100 | NodeOrToken::Node(it) => Self::Node(it), 101 | NodeOrToken::Token(it) => Self::Token(it), 102 | } 103 | } 104 | } 105 | 106 | impl SyntaxElementRef<'_, S, D> { 107 | /// Returns this element's [`Display`](fmt::Display) representation as a string. 108 | /// 109 | /// To avoid allocating for every element, see [`write_display`](type.SyntaxElementRef.html#method.write_display). 110 | pub fn display(&self, resolver: &R) -> String 111 | where 112 | R: Resolver + ?Sized, 113 | { 114 | match self { 115 | NodeOrToken::Node(it) => it.display(resolver), 116 | NodeOrToken::Token(it) => it.display(resolver), 117 | } 118 | } 119 | 120 | /// Writes this element's [`Display`](fmt::Display) representation into the given `target`. 121 | pub fn write_display(&self, resolver: &R, target: &mut impl fmt::Write) -> fmt::Result 122 | where 123 | R: Resolver + ?Sized, 124 | { 125 | match self { 126 | NodeOrToken::Node(it) => it.write_display(resolver, target), 127 | NodeOrToken::Token(it) => it.write_display(resolver, target), 128 | } 129 | } 130 | 131 | /// Returns this element's [`Debug`](fmt::Debug) representation as a string. 132 | /// If `recursive` is `true`, prints the entire subtree rooted in this element. 133 | /// Otherwise, only this element's kind and range are written. 134 | /// 135 | /// To avoid allocating for every element, see [`write_debug`](type.SyntaxElementRef.html#method.write_debug). 136 | pub fn debug(&self, resolver: &R, recursive: bool) -> String 137 | where 138 | R: Resolver + ?Sized, 139 | { 140 | match self { 141 | NodeOrToken::Node(it) => it.debug(resolver, recursive), 142 | NodeOrToken::Token(it) => it.debug(resolver), 143 | } 144 | } 145 | 146 | /// Writes this element's [`Debug`](fmt::Debug) representation into the given `target`. 147 | /// If `recursive` is `true`, prints the entire subtree rooted in this element. 148 | /// Otherwise, only this element's kind and range are written. 149 | pub fn write_debug(&self, resolver: &R, target: &mut impl fmt::Write, recursive: bool) -> fmt::Result 150 | where 151 | R: Resolver + ?Sized, 152 | { 153 | match self { 154 | NodeOrToken::Node(it) => it.write_debug(resolver, target, recursive), 155 | NodeOrToken::Token(it) => it.write_debug(resolver, target), 156 | } 157 | } 158 | } 159 | 160 | impl SyntaxElement { 161 | pub(super) fn new( 162 | element: GreenElementRef<'_>, 163 | parent: &SyntaxNode, 164 | index: u32, 165 | offset: TextSize, 166 | ref_count: *mut AtomicU32, 167 | ) -> SyntaxElement { 168 | match element { 169 | NodeOrToken::Node(node) => SyntaxNode::new_child(node, parent, index, offset, ref_count).into(), 170 | NodeOrToken::Token(_) => SyntaxToken::new(parent, index, offset).into(), 171 | } 172 | } 173 | 174 | /// The range this element covers in the source text, in bytes. 175 | #[inline] 176 | pub fn text_range(&self) -> TextRange { 177 | match self { 178 | NodeOrToken::Node(it) => it.text_range(), 179 | NodeOrToken::Token(it) => it.text_range(), 180 | } 181 | } 182 | 183 | /// The internal representation of the kind of this element. 184 | #[inline] 185 | pub fn syntax_kind(&self) -> RawSyntaxKind { 186 | match self { 187 | NodeOrToken::Node(it) => it.syntax_kind(), 188 | NodeOrToken::Token(it) => it.syntax_kind(), 189 | } 190 | } 191 | 192 | /// The kind of this element in terms of your language. 193 | #[inline] 194 | pub fn kind(&self) -> S { 195 | match self { 196 | NodeOrToken::Node(it) => it.kind(), 197 | NodeOrToken::Token(it) => it.kind(), 198 | } 199 | } 200 | 201 | /// The parent node of this element, except if this element is the root. 202 | #[inline] 203 | pub fn parent(&self) -> Option<&SyntaxNode> { 204 | match self { 205 | NodeOrToken::Node(it) => it.parent(), 206 | NodeOrToken::Token(it) => Some(it.parent()), 207 | } 208 | } 209 | 210 | /// Returns an iterator along the chain of parents of this node. 211 | #[inline] 212 | pub fn ancestors(&self) -> impl Iterator> { 213 | match self { 214 | NodeOrToken::Node(it) => it.ancestors(), 215 | NodeOrToken::Token(it) => it.parent().ancestors(), 216 | } 217 | } 218 | 219 | /// Return the leftmost token in the subtree of this element. 220 | #[inline] 221 | pub fn first_token(&self) -> Option<&SyntaxToken> { 222 | match self { 223 | NodeOrToken::Node(it) => it.first_token(), 224 | NodeOrToken::Token(it) => Some(it), 225 | } 226 | } 227 | 228 | /// Return the rightmost token in the subtree of this element. 229 | #[inline] 230 | pub fn last_token(&self) -> Option<&SyntaxToken> { 231 | match self { 232 | NodeOrToken::Node(it) => it.last_token(), 233 | NodeOrToken::Token(it) => Some(it), 234 | } 235 | } 236 | 237 | /// The tree element to the right of this one, i.e. the next child of this element's parent after this element. 238 | #[inline] 239 | pub fn next_sibling_or_token(&self) -> Option> { 240 | match self { 241 | NodeOrToken::Node(it) => it.next_sibling_or_token(), 242 | NodeOrToken::Token(it) => it.next_sibling_or_token(), 243 | } 244 | } 245 | 246 | /// The tree element to the left of this one, i.e. the previous child of this element's parent after this element. 247 | #[inline] 248 | pub fn prev_sibling_or_token(&self) -> Option> { 249 | match self { 250 | NodeOrToken::Node(it) => it.prev_sibling_or_token(), 251 | NodeOrToken::Token(it) => it.prev_sibling_or_token(), 252 | } 253 | } 254 | } 255 | 256 | impl<'a, S: Syntax, D> SyntaxElementRef<'a, S, D> { 257 | /// The range this element covers in the source text, in bytes. 258 | #[inline] 259 | pub fn text_range(&self) -> TextRange { 260 | match self { 261 | NodeOrToken::Node(it) => it.text_range(), 262 | NodeOrToken::Token(it) => it.text_range(), 263 | } 264 | } 265 | 266 | /// The internal representation of the kind of this element. 267 | #[inline] 268 | pub fn syntax_kind(&self) -> RawSyntaxKind { 269 | match self { 270 | NodeOrToken::Node(it) => it.syntax_kind(), 271 | NodeOrToken::Token(it) => it.syntax_kind(), 272 | } 273 | } 274 | 275 | /// The kind of this element in terms of your language. 276 | #[inline] 277 | pub fn kind(&self) -> S { 278 | match self { 279 | NodeOrToken::Node(it) => it.kind(), 280 | NodeOrToken::Token(it) => it.kind(), 281 | } 282 | } 283 | 284 | /// The parent node of this element, except if this element is the root. 285 | #[inline] 286 | pub fn parent(&self) -> Option<&'a SyntaxNode> { 287 | match self { 288 | NodeOrToken::Node(it) => it.parent(), 289 | NodeOrToken::Token(it) => Some(it.parent()), 290 | } 291 | } 292 | 293 | /// Returns an iterator along the chain of parents of this node. 294 | #[inline] 295 | pub fn ancestors(&self) -> impl Iterator> { 296 | match self { 297 | NodeOrToken::Node(it) => it.ancestors(), 298 | NodeOrToken::Token(it) => it.parent().ancestors(), 299 | } 300 | } 301 | 302 | /// Return the leftmost token in the subtree of this element. 303 | #[inline] 304 | pub fn first_token(&self) -> Option<&'a SyntaxToken> { 305 | match self { 306 | NodeOrToken::Node(it) => it.first_token(), 307 | NodeOrToken::Token(it) => Some(it), 308 | } 309 | } 310 | 311 | /// Return the rightmost token in the subtree of this element. 312 | #[inline] 313 | pub fn last_token(&self) -> Option<&'a SyntaxToken> { 314 | match self { 315 | NodeOrToken::Node(it) => it.last_token(), 316 | NodeOrToken::Token(it) => Some(it), 317 | } 318 | } 319 | 320 | /// The tree element to the right of this one, i.e. the next child of this element's parent after this element. 321 | #[inline] 322 | pub fn next_sibling_or_token(&self) -> Option> { 323 | match self { 324 | NodeOrToken::Node(it) => it.next_sibling_or_token(), 325 | NodeOrToken::Token(it) => it.next_sibling_or_token(), 326 | } 327 | } 328 | 329 | /// The tree element to the left of this one, i.e. the previous child of this element's parent after this element. 330 | #[inline] 331 | pub fn prev_sibling_or_token(&self) -> Option> { 332 | match self { 333 | NodeOrToken::Node(it) => it.prev_sibling_or_token(), 334 | NodeOrToken::Token(it) => it.prev_sibling_or_token(), 335 | } 336 | } 337 | 338 | #[inline] 339 | pub(super) fn token_at_offset(&self, offset: TextSize) -> TokenAtOffset> { 340 | assert!(self.text_range().start() <= offset && offset <= self.text_range().end()); 341 | match self { 342 | NodeOrToken::Token(token) => TokenAtOffset::Single((*token).clone()), 343 | NodeOrToken::Node(node) => node.token_at_offset(offset), 344 | } 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /cstree/src/syntax/iter.rs: -------------------------------------------------------------------------------- 1 | //! Red tree iterators. 2 | 3 | use std::iter::FusedIterator; 4 | 5 | use text_size::TextSize; 6 | 7 | use crate::{ 8 | green::{GreenElementRef, GreenNodeChildren}, 9 | syntax::{SyntaxElementRef, SyntaxNode}, 10 | Syntax, 11 | }; 12 | 13 | #[derive(Clone, Debug)] 14 | struct Iter<'n> { 15 | green: GreenNodeChildren<'n>, 16 | offset: TextSize, 17 | index: usize, 18 | } 19 | 20 | impl<'n> Iter<'n> { 21 | fn new(parent: &'n SyntaxNode) -> Self { 22 | let offset = parent.text_range().start(); 23 | let green: GreenNodeChildren<'_> = parent.green().children(); 24 | Iter { 25 | green, 26 | offset, 27 | index: 0, 28 | } 29 | } 30 | } 31 | 32 | impl<'n> Iterator for Iter<'n> { 33 | type Item = (GreenElementRef<'n>, usize, TextSize); 34 | 35 | #[inline(always)] 36 | fn next(&mut self) -> Option { 37 | self.green.next().map(|element| { 38 | let offset = self.offset; 39 | let index = self.index; 40 | self.offset += element.text_len(); 41 | self.index += 1; 42 | (element, index, offset) 43 | }) 44 | } 45 | 46 | #[inline(always)] 47 | fn size_hint(&self) -> (usize, Option) { 48 | self.green.size_hint() 49 | } 50 | 51 | #[inline(always)] 52 | fn count(self) -> usize 53 | where 54 | Self: Sized, 55 | { 56 | self.green.count() 57 | } 58 | } 59 | 60 | impl ExactSizeIterator for Iter<'_> { 61 | #[inline(always)] 62 | fn len(&self) -> usize { 63 | self.green.len() 64 | } 65 | } 66 | impl FusedIterator for Iter<'_> {} 67 | 68 | /// An iterator over the child nodes of a [`SyntaxNode`]. 69 | #[derive(Clone, Debug)] 70 | pub struct SyntaxNodeChildren<'n, S: Syntax, D: 'static = ()> { 71 | inner: Iter<'n>, 72 | parent: &'n SyntaxNode, 73 | } 74 | 75 | impl<'n, S: Syntax, D> SyntaxNodeChildren<'n, S, D> { 76 | #[inline] 77 | pub(super) fn new(parent: &'n SyntaxNode) -> Self { 78 | Self { 79 | inner: Iter::new(parent), 80 | parent, 81 | } 82 | } 83 | } 84 | 85 | impl<'n, S: Syntax, D> Iterator for SyntaxNodeChildren<'n, S, D> { 86 | type Item = &'n SyntaxNode; 87 | 88 | #[inline(always)] 89 | fn next(&mut self) -> Option { 90 | for (element, index, offset) in &mut self.inner { 91 | if let Some(&node) = element.as_node() { 92 | return Some(self.parent.get_or_add_node(node, index, offset).as_node().unwrap()); 93 | } 94 | } 95 | None 96 | } 97 | 98 | #[inline(always)] 99 | fn size_hint(&self) -> (usize, Option) { 100 | self.inner.size_hint() 101 | } 102 | 103 | #[inline(always)] 104 | fn count(self) -> usize 105 | where 106 | Self: Sized, 107 | { 108 | self.inner.count() 109 | } 110 | } 111 | 112 | impl ExactSizeIterator for SyntaxNodeChildren<'_, S, D> { 113 | #[inline(always)] 114 | fn len(&self) -> usize { 115 | self.inner.len() 116 | } 117 | } 118 | impl FusedIterator for SyntaxNodeChildren<'_, S, D> {} 119 | 120 | /// An iterator over the children of a [`SyntaxNode`]. 121 | #[derive(Clone, Debug)] 122 | pub struct SyntaxElementChildren<'n, S: Syntax, D: 'static = ()> { 123 | inner: Iter<'n>, 124 | parent: &'n SyntaxNode, 125 | } 126 | 127 | impl<'n, S: Syntax, D> SyntaxElementChildren<'n, S, D> { 128 | #[inline] 129 | pub(super) fn new(parent: &'n SyntaxNode) -> Self { 130 | Self { 131 | inner: Iter::new(parent), 132 | parent, 133 | } 134 | } 135 | } 136 | 137 | impl<'n, S: Syntax, D> Iterator for SyntaxElementChildren<'n, S, D> { 138 | type Item = SyntaxElementRef<'n, S, D>; 139 | 140 | #[inline(always)] 141 | fn next(&mut self) -> Option { 142 | let parent = self.parent; 143 | self.inner 144 | .next() 145 | .map(|(green, index, offset)| parent.get_or_add_element(green, index, offset)) 146 | } 147 | 148 | #[inline(always)] 149 | fn size_hint(&self) -> (usize, Option) { 150 | self.inner.size_hint() 151 | } 152 | 153 | #[inline(always)] 154 | fn count(self) -> usize 155 | where 156 | Self: Sized, 157 | { 158 | self.inner.count() 159 | } 160 | } 161 | 162 | impl ExactSizeIterator for SyntaxElementChildren<'_, S, D> { 163 | #[inline(always)] 164 | fn len(&self) -> usize { 165 | self.inner.len() 166 | } 167 | } 168 | impl FusedIterator for SyntaxElementChildren<'_, S, D> {} 169 | -------------------------------------------------------------------------------- /cstree/src/syntax/mod.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the outer, "red" tree. 2 | //! 3 | //! Inner [`SyntaxNode`]s represent only structural information, but can hold additional, user-defined data. 4 | //! Leaf [`SyntaxToken`]s represent individual pieces of source text. 5 | //! Use [`SyntaxNode::new_root`] and [`SyntaxNode::new_root_with_resolver`] to construct a syntax 6 | //! tree on top of a green tree. 7 | 8 | mod element; 9 | pub use element::{SyntaxElement, SyntaxElementRef}; 10 | mod node; 11 | pub use node::SyntaxNode; 12 | mod token; 13 | pub use token::SyntaxToken; 14 | mod resolved; 15 | pub use resolved::{ResolvedElement, ResolvedElementRef, ResolvedNode, ResolvedToken}; 16 | mod iter; 17 | pub use iter::{SyntaxElementChildren, SyntaxNodeChildren}; 18 | 19 | mod text; 20 | pub use text::SyntaxText; 21 | 22 | // A note on `#[inline]` usage in this module: 23 | // In `rowan`, there are two layers of `SyntaxXY`s: the `cursor` layer and the `api` layer. 24 | // The `cursor` layer handles all of the actual methods on the tree, while the `api` layer is 25 | // generic over the `Syntax` of the tree and otherwise forwards its implementation to the `cursor` 26 | // layer. 27 | // Here, we have unified the `cursor` and the `api` layer into the `syntax` layer. 28 | // This means that all of our types here are generic over a `Syntax`, including the 29 | // implementations which, in `rowan`, are part of the `cursor` layer. 30 | // Very apparently, this makes the compiler less willing to inline. Almost every "regular use" 31 | // method in this file has some kind of `#[inline]` annotation to counteract that. This is _NOT_ 32 | // just for fun, not inlining decreases tree traversal speed by approx. 50% at the time of writing 33 | // this. 34 | // 35 | // - DQ 01/2021 36 | 37 | #[cfg(test)] 38 | mod tests { 39 | use super::*; 40 | use crate::testing::*; 41 | 42 | #[test] 43 | #[cfg_attr(miri, ignore)] 44 | fn assert_send_sync() { 45 | fn f() {} 46 | f::>(); 47 | f::>(); 48 | f::>(); 49 | f::>(); 50 | 51 | f::>(); 52 | f::>(); 53 | f::>(); 54 | f::>(); 55 | } 56 | 57 | #[test] 58 | #[cfg_attr(miri, ignore)] 59 | #[rustfmt::skip] 60 | fn assert_syntax_sizes() { 61 | use std::mem::size_of; 62 | 63 | assert_eq!(size_of::>(), size_of::<*const u8>()); 64 | // verify niche opt of `NonNull` 65 | assert_eq!(size_of::>>(), size_of::<*const u8>()); 66 | // parent + child index + text len 67 | assert_eq!(size_of::>(), size_of::>() + size_of::() * 2); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /cstree/src/utility_types.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | /// Convenience type to represent tree elements which may either be a node or a token. 4 | /// 5 | /// Used for both red and green tree, references to elements, ... 6 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 7 | pub enum NodeOrToken { 8 | Node(N), 9 | Token(T), 10 | } 11 | 12 | impl NodeOrToken { 13 | pub fn into_node(self) -> Option { 14 | match self { 15 | NodeOrToken::Node(node) => Some(node), 16 | NodeOrToken::Token(_) => None, 17 | } 18 | } 19 | 20 | pub fn into_token(self) -> Option { 21 | match self { 22 | NodeOrToken::Node(_) => None, 23 | NodeOrToken::Token(token) => Some(token), 24 | } 25 | } 26 | 27 | pub fn as_node(&self) -> Option<&N> { 28 | match self { 29 | NodeOrToken::Node(node) => Some(node), 30 | NodeOrToken::Token(_) => None, 31 | } 32 | } 33 | 34 | pub fn as_token(&self) -> Option<&T> { 35 | match self { 36 | NodeOrToken::Node(_) => None, 37 | NodeOrToken::Token(token) => Some(token), 38 | } 39 | } 40 | 41 | pub(crate) fn as_ref(&self) -> NodeOrToken<&N, &T> { 42 | match self { 43 | NodeOrToken::Node(node) => NodeOrToken::Node(node), 44 | NodeOrToken::Token(token) => NodeOrToken::Token(token), 45 | } 46 | } 47 | } 48 | 49 | impl NodeOrToken<&N, &T> { 50 | pub(crate) fn cloned(&self) -> NodeOrToken { 51 | match *self { 52 | NodeOrToken::Node(node) => NodeOrToken::Node(node.clone()), 53 | NodeOrToken::Token(token) => NodeOrToken::Token(token.clone()), 54 | } 55 | } 56 | } 57 | 58 | impl fmt::Display for NodeOrToken { 59 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 60 | match self { 61 | NodeOrToken::Node(node) => node.fmt(f), 62 | NodeOrToken::Token(token) => token.fmt(f), 63 | } 64 | } 65 | } 66 | 67 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 68 | pub enum Direction { 69 | Next, 70 | Prev, 71 | } 72 | 73 | /// `WalkEvent` describes tree walking process. 74 | #[derive(Debug, Copy, Clone)] 75 | pub enum WalkEvent { 76 | /// Fired before traversing the node. 77 | Enter(T), 78 | /// Fired after the node is traversed. 79 | Leave(T), 80 | } 81 | 82 | impl WalkEvent { 83 | pub fn map U, U>(self, f: F) -> WalkEvent { 84 | match self { 85 | WalkEvent::Enter(it) => WalkEvent::Enter(f(it)), 86 | WalkEvent::Leave(it) => WalkEvent::Leave(f(it)), 87 | } 88 | } 89 | } 90 | 91 | #[derive(Debug)] 92 | pub(crate) enum MaybeOwned<'a, T> { 93 | Owned(T), 94 | Borrowed(&'a mut T), 95 | } 96 | 97 | impl MaybeOwned<'_, T> { 98 | pub(crate) fn into_owned(self) -> Option { 99 | match self { 100 | MaybeOwned::Owned(owned) => Some(owned), 101 | MaybeOwned::Borrowed(_) => None, 102 | } 103 | } 104 | } 105 | 106 | impl std::ops::Deref for MaybeOwned<'_, T> { 107 | type Target = T; 108 | 109 | fn deref(&self) -> &T { 110 | match self { 111 | MaybeOwned::Owned(it) => it, 112 | MaybeOwned::Borrowed(it) => it, 113 | } 114 | } 115 | } 116 | 117 | impl std::ops::DerefMut for MaybeOwned<'_, T> { 118 | fn deref_mut(&mut self) -> &mut T { 119 | match self { 120 | MaybeOwned::Owned(it) => it, 121 | MaybeOwned::Borrowed(it) => it, 122 | } 123 | } 124 | } 125 | 126 | impl Default for MaybeOwned<'_, T> { 127 | fn default() -> Self { 128 | MaybeOwned::Owned(T::default()) 129 | } 130 | } 131 | 132 | /// There might be zero, one or two leaves at a given offset. 133 | #[derive(Clone, Debug)] 134 | pub enum TokenAtOffset { 135 | /// No leaves at offset -- possible for the empty file. 136 | None, 137 | /// Only a single leaf at offset. 138 | Single(T), 139 | /// Offset is exactly between two leaves. 140 | Between(T, T), 141 | } 142 | 143 | impl TokenAtOffset { 144 | pub fn map U, U>(self, f: F) -> TokenAtOffset { 145 | match self { 146 | TokenAtOffset::None => TokenAtOffset::None, 147 | TokenAtOffset::Single(it) => TokenAtOffset::Single(f(it)), 148 | TokenAtOffset::Between(l, r) => TokenAtOffset::Between(f(l), f(r)), 149 | } 150 | } 151 | 152 | /// Convert to option, preferring the right leaf in case of a tie. 153 | pub fn right_biased(self) -> Option { 154 | match self { 155 | TokenAtOffset::None => None, 156 | TokenAtOffset::Single(node) => Some(node), 157 | TokenAtOffset::Between(_, right) => Some(right), 158 | } 159 | } 160 | 161 | /// Convert to option, preferring the left leaf in case of a tie. 162 | pub fn left_biased(self) -> Option { 163 | match self { 164 | TokenAtOffset::None => None, 165 | TokenAtOffset::Single(node) => Some(node), 166 | TokenAtOffset::Between(left, _) => Some(left), 167 | } 168 | } 169 | } 170 | 171 | impl Iterator for TokenAtOffset { 172 | type Item = T; 173 | 174 | fn next(&mut self) -> Option { 175 | match std::mem::replace(self, TokenAtOffset::None) { 176 | TokenAtOffset::None => None, 177 | TokenAtOffset::Single(node) => { 178 | *self = TokenAtOffset::None; 179 | Some(node) 180 | } 181 | TokenAtOffset::Between(left, right) => { 182 | *self = TokenAtOffset::Single(right); 183 | Some(left) 184 | } 185 | } 186 | } 187 | 188 | fn size_hint(&self) -> (usize, Option) { 189 | match self { 190 | TokenAtOffset::None => (0, Some(0)), 191 | TokenAtOffset::Single(_) => (1, Some(1)), 192 | TokenAtOffset::Between(_, _) => (2, Some(2)), 193 | } 194 | } 195 | } 196 | 197 | impl ExactSizeIterator for TokenAtOffset {} 198 | -------------------------------------------------------------------------------- /cstree/tests/it/basic.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use cstree::{ 3 | build::{GreenNodeBuilder, NodeCache}, 4 | interning::{new_interner, Resolver}, 5 | text::TextRange, 6 | RawSyntaxKind, 7 | }; 8 | 9 | fn build_tree(root: &Element<'_>) -> (SyntaxNode, impl Resolver) { 10 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); 11 | build_recursive(root, &mut builder, 0); 12 | let (node, cache) = builder.finish(); 13 | (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) 14 | } 15 | 16 | fn two_level_tree() -> Element<'static> { 17 | use Element::*; 18 | Node(vec![ 19 | Node(vec![Token("0.0"), Token("0.1")]), 20 | Node(vec![Token("1.0")]), 21 | Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]), 22 | ]) 23 | } 24 | 25 | fn tree_with_eq_tokens() -> Element<'static> { 26 | use Element::*; 27 | Node(vec![ 28 | Node(vec![Token("a"), Token("b")]), 29 | Node(vec![Token("c")]), 30 | Node(vec![Token("a"), Token("b"), Token("c")]), 31 | ]) 32 | } 33 | 34 | #[test] 35 | fn create() { 36 | let tree = two_level_tree(); 37 | let (tree, resolver) = build_tree::<()>(&tree); 38 | assert_eq!(tree.syntax_kind(), RawSyntaxKind(0)); 39 | assert_eq!(tree.kind(), SyntaxKind(0)); 40 | { 41 | let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap(); 42 | let leaf1_0 = leaf1_0.into_token().unwrap(); 43 | assert_eq!(leaf1_0.syntax_kind(), RawSyntaxKind(5)); 44 | assert_eq!(leaf1_0.kind(), SyntaxKind(5)); 45 | assert_eq!(leaf1_0.resolve_text(&resolver), "1.0"); 46 | assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into())); 47 | } 48 | { 49 | let node2 = tree.children().nth(2).unwrap(); 50 | assert_eq!(node2.syntax_kind(), RawSyntaxKind(6)); 51 | assert_eq!(node2.kind(), SyntaxKind(6)); 52 | assert_eq!(node2.children_with_tokens().count(), 3); 53 | assert_eq!(node2.resolve_text(&resolver), "2.02.12.2"); 54 | } 55 | } 56 | 57 | #[test] 58 | fn token_text_eq() { 59 | let tree = tree_with_eq_tokens(); 60 | let (tree, _) = build_tree::<()>(&tree); 61 | assert_eq!(tree.kind(), SyntaxKind(0)); 62 | 63 | let leaf0_0 = tree.children().next().unwrap().children_with_tokens().next().unwrap(); 64 | let leaf0_0 = leaf0_0.into_token().unwrap(); 65 | let leaf0_1 = tree.children().next().unwrap().children_with_tokens().nth(1).unwrap(); 66 | let leaf0_1 = leaf0_1.into_token().unwrap(); 67 | 68 | let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap(); 69 | let leaf1_0 = leaf1_0.into_token().unwrap(); 70 | 71 | let leaf2_0 = tree.children().nth(2).unwrap().children_with_tokens().next().unwrap(); 72 | let leaf2_0 = leaf2_0.into_token().unwrap(); 73 | let leaf2_1 = tree.children().nth(2).unwrap().children_with_tokens().nth(1).unwrap(); 74 | let leaf2_1 = leaf2_1.into_token().unwrap(); 75 | let leaf2_2 = tree.children().nth(2).unwrap().children_with_tokens().nth(2).unwrap(); 76 | let leaf2_2 = leaf2_2.into_token().unwrap(); 77 | 78 | assert!(leaf0_0.text_eq(leaf2_0)); 79 | assert!(leaf0_1.text_eq(leaf2_1)); 80 | assert!(leaf1_0.text_eq(leaf2_2)); 81 | assert!(!leaf0_0.text_eq(leaf0_1)); 82 | assert!(!leaf2_1.text_eq(leaf2_2)); 83 | assert!(!leaf1_0.text_eq(leaf2_0)); 84 | } 85 | 86 | #[test] 87 | fn data() { 88 | let tree = two_level_tree(); 89 | let (tree, _resolver) = build_tree::(&tree); 90 | { 91 | let node2 = tree.children().nth(2).unwrap(); 92 | assert_eq!(*node2.try_set_data("data".into()).unwrap(), "data"); 93 | let data = node2.get_data().unwrap(); 94 | assert_eq!(data.as_str(), "data"); 95 | node2.set_data("payload".into()); 96 | let data = node2.get_data().unwrap(); 97 | assert_eq!(data.as_str(), "payload"); 98 | } 99 | { 100 | let node2 = tree.children().nth(2).unwrap(); 101 | assert!(node2.try_set_data("already present".into()).is_err()); 102 | let data = node2.get_data().unwrap(); 103 | assert_eq!(data.as_str(), "payload"); 104 | node2.set_data("new data".into()); 105 | } 106 | { 107 | let node2 = tree.children().nth(2).unwrap(); 108 | let data = node2.get_data().unwrap(); 109 | assert_eq!(data.as_str(), "new data"); 110 | node2.clear_data(); 111 | // re-use `data` after node data was cleared 112 | assert_eq!(data.as_str(), "new data"); 113 | } 114 | { 115 | let node2 = tree.children().nth(2).unwrap(); 116 | assert_eq!(node2.get_data(), None); 117 | } 118 | } 119 | 120 | #[test] 121 | fn with_interner() { 122 | let mut interner = new_interner(); 123 | let mut cache = NodeCache::with_interner(&mut interner); 124 | let tree = two_level_tree(); 125 | let tree = build_tree_with_cache(&tree, &mut cache); 126 | let tree: SyntaxNode = SyntaxNode::new_root(tree); 127 | let resolver = interner; 128 | { 129 | let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap(); 130 | let leaf1_0 = leaf1_0.into_token().unwrap(); 131 | assert_eq!(leaf1_0.resolve_text(&resolver), "1.0"); 132 | assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into())); 133 | } 134 | { 135 | let node2 = tree.children().nth(2).unwrap(); 136 | assert_eq!(node2.resolve_text(&resolver), "2.02.12.2"); 137 | } 138 | } 139 | 140 | #[test] 141 | fn inline_resolver() { 142 | let mut interner = new_interner(); 143 | let mut cache = NodeCache::with_interner(&mut interner); 144 | let tree = two_level_tree(); 145 | let tree = build_tree_with_cache(&tree, &mut cache); 146 | let tree: ResolvedNode = SyntaxNode::new_root_with_resolver(tree, interner); 147 | { 148 | let leaf1_0 = tree.children().nth(1).unwrap().children_with_tokens().next().unwrap(); 149 | let leaf1_0 = leaf1_0.into_token().unwrap(); 150 | assert_eq!(leaf1_0.text(), "1.0"); 151 | assert_eq!(leaf1_0.text_range(), TextRange::at(6.into(), 3.into())); 152 | assert_eq!(format!("{}", leaf1_0), leaf1_0.text()); 153 | assert_eq!(format!("{:?}", leaf1_0), "SyntaxKind(5)@6..9 \"1.0\""); 154 | } 155 | { 156 | let node2 = tree.children().nth(2).unwrap(); 157 | assert_eq!(node2.text(), "2.02.12.2"); 158 | let resolver = node2.resolver(); 159 | assert_eq!(node2.resolve_text(resolver.as_ref()), node2.text()); 160 | assert_eq!(format!("{}", node2).as_str(), node2.text()); 161 | assert_eq!(format!("{:?}", node2), "SyntaxKind(6)@9..18"); 162 | assert_eq!( 163 | format!("{:#?}", node2), 164 | r#"SyntaxKind(6)@9..18 165 | SyntaxKind(7)@9..12 "2.0" 166 | SyntaxKind(8)@12..15 "2.1" 167 | SyntaxKind(9)@15..18 "2.2" 168 | "# 169 | ); 170 | } 171 | } 172 | 173 | #[test] 174 | fn assert_debug_display() { 175 | use std::fmt; 176 | fn f() {} 177 | 178 | f::(); 179 | f::(); 180 | f::(); 181 | f::>(); 182 | f::>(); 183 | 184 | fn dbg() {} 185 | dbg::>(); 186 | } 187 | -------------------------------------------------------------------------------- /cstree/tests/it/main.rs: -------------------------------------------------------------------------------- 1 | mod basic; 2 | mod regressions; 3 | mod rollback; 4 | mod sendsync; 5 | #[cfg(feature = "serialize")] 6 | mod serde; 7 | 8 | use cstree::{ 9 | build::{GreenNodeBuilder, NodeCache}, 10 | green::GreenNode, 11 | interning::{Interner, Resolver}, 12 | util::NodeOrToken, 13 | RawSyntaxKind, Syntax, 14 | }; 15 | 16 | pub type SyntaxNode = cstree::syntax::SyntaxNode; 17 | pub type SyntaxToken = cstree::syntax::SyntaxToken; 18 | pub type SyntaxElement = cstree::syntax::SyntaxElement; 19 | pub type SyntaxElementRef<'a, D = ()> = cstree::syntax::SyntaxElementRef<'a, SyntaxKind, D>; 20 | 21 | pub type ResolvedNode = cstree::syntax::ResolvedNode; 22 | pub type ResolvedToken = cstree::syntax::ResolvedToken; 23 | pub type ResolvedElement = cstree::syntax::ResolvedElement; 24 | pub type ResolvedElementRef<'a, D = ()> = cstree::syntax::ResolvedElementRef<'a, SyntaxKind, D>; 25 | 26 | #[derive(Debug)] 27 | pub enum Element<'s> { 28 | Node(Vec>), 29 | Token(&'s str), 30 | } 31 | 32 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 33 | #[repr(transparent)] 34 | pub struct SyntaxKind(u32); 35 | 36 | impl Syntax for SyntaxKind { 37 | fn from_raw(raw: RawSyntaxKind) -> Self { 38 | Self(raw.0) 39 | } 40 | 41 | fn into_raw(self) -> RawSyntaxKind { 42 | RawSyntaxKind(self.0) 43 | } 44 | 45 | fn static_text(self) -> Option<&'static str> { 46 | None 47 | } 48 | } 49 | 50 | pub fn build_tree_with_cache(root: &Element<'_>, cache: &mut NodeCache<'_, I>) -> GreenNode 51 | where 52 | I: Interner, 53 | { 54 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::with_cache(cache); 55 | build_recursive(root, &mut builder, 0); 56 | let (node, cache) = builder.finish(); 57 | assert!(cache.is_none()); 58 | node 59 | } 60 | 61 | pub fn build_recursive( 62 | root: &Element<'_>, 63 | builder: &mut GreenNodeBuilder<'_, '_, SyntaxKind, I>, 64 | mut from: u32, 65 | ) -> u32 66 | where 67 | I: Interner, 68 | { 69 | match root { 70 | Element::Node(children) => { 71 | builder.start_node(SyntaxKind(from)); 72 | for child in children { 73 | from = build_recursive(child, builder, from + 1); 74 | } 75 | builder.finish_node(); 76 | } 77 | Element::Token(text) => { 78 | builder.token(SyntaxKind(from), text); 79 | } 80 | } 81 | from 82 | } 83 | 84 | #[track_caller] 85 | pub fn assert_tree_eq( 86 | (left, left_res): (&SyntaxNode, &impl Resolver), 87 | (right, right_res): (&SyntaxNode, &impl Resolver), 88 | ) { 89 | if left.green() == right.green() { 90 | return; 91 | } 92 | 93 | if left.kind() != right.kind() || left.children_with_tokens().len() != right.children_with_tokens().len() { 94 | panic!("{} !=\n{}", left.debug(left_res, true), right.debug(right_res, true)) 95 | } 96 | 97 | for elem in left.children_with_tokens().zip(right.children_with_tokens()) { 98 | match elem { 99 | (NodeOrToken::Node(ln), NodeOrToken::Node(rn)) => assert_tree_eq((ln, left_res), (rn, right_res)), 100 | (NodeOrToken::Node(n), NodeOrToken::Token(t)) => { 101 | panic!("{} != {}", n.debug(left_res, true), t.debug(right_res)) 102 | } 103 | (NodeOrToken::Token(t), NodeOrToken::Node(n)) => { 104 | panic!("{} != {}", t.debug(left_res), n.debug(right_res, true)) 105 | } 106 | (NodeOrToken::Token(lt), NodeOrToken::Token(rt)) => { 107 | if lt.syntax_kind() != rt.syntax_kind() || lt.resolve_text(left_res) != rt.resolve_text(right_res) { 108 | panic!("{} != {}", lt.debug(left_res), rt.debug(right_res)) 109 | } 110 | } 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /cstree/tests/it/regressions.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[test] 4 | fn empty_tree_arc() { 5 | // this test is not here for the test itself, but to run it through MIRI, who complained about out-of-bound 6 | // `ThinArc` pointers for a root `GreenNode` with no children 7 | 8 | use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode}; 9 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 10 | #[repr(u32)] 11 | enum SyntaxKind { 12 | Root, 13 | } 14 | 15 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); 16 | builder.start_node(SyntaxKind::Root); 17 | builder.finish_node(); 18 | let (green, _) = builder.finish(); 19 | let root: SyntaxNode = SyntaxNode::new_root(green); 20 | assert_eq!(root.kind(), SyntaxKind::Root); 21 | } 22 | -------------------------------------------------------------------------------- /cstree/tests/it/rollback.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use cstree::interning::Resolver; 3 | 4 | type GreenNodeBuilder<'cache, 'interner> = cstree::build::GreenNodeBuilder<'cache, 'interner, SyntaxKind>; 5 | 6 | fn with_builder(f: impl FnOnce(&mut GreenNodeBuilder)) -> (SyntaxNode, impl Resolver) { 7 | let mut builder = GreenNodeBuilder::new(); 8 | f(&mut builder); 9 | let (node, cache) = builder.finish(); 10 | (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) 11 | } 12 | 13 | #[test] 14 | #[should_panic = "`left == right` failed"] 15 | fn comparison_works() { 16 | let (first, res1) = with_builder(|_| {}); 17 | let (second, res2) = with_builder(|builder| { 18 | builder.start_node(SyntaxKind(0)); 19 | builder.token(SyntaxKind(1), "hi"); 20 | builder.finish_node(); 21 | }); 22 | assert_tree_eq((&first, &res1), (&second, &res2)); 23 | } 24 | 25 | #[test] 26 | fn no_rollback_token() { 27 | let (first, res1) = with_builder(|builder| { 28 | builder.start_node(SyntaxKind(0)); 29 | builder.token(SyntaxKind(1), "hi"); 30 | builder.finish_node(); 31 | }); 32 | let (second, res2) = with_builder(|builder| { 33 | let checkpoint = builder.checkpoint(); 34 | builder.token(SyntaxKind(1), "hi"); 35 | builder.start_node_at(checkpoint, SyntaxKind(0)); 36 | builder.finish_node(); 37 | }); 38 | assert_tree_eq((&first, &res1), (&second, &res2)); 39 | } 40 | 41 | #[test] 42 | fn no_rollback_node() { 43 | let (first, res1) = with_builder(|builder| { 44 | builder.start_node(SyntaxKind(2)); 45 | builder.start_node(SyntaxKind(0)); 46 | builder.token(SyntaxKind(1), "hi"); 47 | builder.finish_node(); 48 | builder.finish_node(); 49 | }); 50 | let (second, res2) = with_builder(|builder| { 51 | let checkpoint = builder.checkpoint(); 52 | builder.start_node(SyntaxKind(0)); 53 | builder.token(SyntaxKind(1), "hi"); 54 | builder.finish_node(); 55 | builder.start_node_at(checkpoint, SyntaxKind(2)); 56 | builder.finish_node(); 57 | }); 58 | assert_tree_eq((&first, &res1), (&second, &res2)); 59 | } 60 | 61 | #[test] 62 | #[should_panic = "unfinished nodes"] 63 | fn no_rollback_unfinished_node() { 64 | let (second, res2) = with_builder(|builder| { 65 | let checkpoint = builder.checkpoint(); 66 | builder.start_node(SyntaxKind(0)); 67 | builder.token(SyntaxKind(1), "hi"); 68 | builder.start_node_at(checkpoint, SyntaxKind(2)); 69 | builder.finish_node(); 70 | builder.finish_node(); 71 | }); 72 | println!("{}", second.debug(&res2, true)); 73 | } 74 | 75 | #[test] 76 | fn simple() { 77 | let (first, res1) = with_builder(|builder| { 78 | builder.start_node(SyntaxKind(0)); 79 | builder.finish_node(); 80 | }); 81 | let (second, res2) = with_builder(|builder| { 82 | builder.start_node(SyntaxKind(0)); 83 | 84 | // Add a token, then remove it. 85 | let initial = builder.checkpoint(); 86 | builder.token(SyntaxKind(1), "hi"); 87 | builder.revert_to(initial); 88 | 89 | builder.finish_node(); 90 | }); 91 | assert_tree_eq((&first, &res1), (&second, &res2)); 92 | } 93 | 94 | #[test] 95 | fn nested() { 96 | let (first, res1) = with_builder(|builder| { 97 | builder.start_node(SyntaxKind(0)); 98 | builder.finish_node(); 99 | }); 100 | 101 | let (second, res2) = with_builder(|builder| { 102 | builder.start_node(SyntaxKind(0)); 103 | // Add two tokens, then remove both. 104 | let initial = builder.checkpoint(); 105 | builder.token(SyntaxKind(1), "hi"); 106 | builder.token(SyntaxKind(2), "hello"); 107 | builder.revert_to(initial); 108 | 109 | builder.finish_node(); 110 | }); 111 | 112 | let (third, res3) = with_builder(|builder| { 113 | builder.start_node(SyntaxKind(0)); 114 | 115 | // Add two tokens, then remove one after the other. 116 | let initial = builder.checkpoint(); 117 | builder.token(SyntaxKind(1), "hi"); 118 | let second = builder.checkpoint(); 119 | builder.token(SyntaxKind(2), "hello"); 120 | builder.revert_to(second); 121 | builder.revert_to(initial); 122 | 123 | builder.finish_node(); 124 | }); 125 | 126 | assert_tree_eq((&first, &res1), (&second, &res2)); 127 | assert_tree_eq((&first, &res1), (&third, &res3)); 128 | } 129 | 130 | #[test] 131 | fn unfinished_node() { 132 | let (first, res1) = with_builder(|builder| { 133 | builder.start_node(SyntaxKind(2)); 134 | builder.finish_node(); 135 | }); 136 | let (second, res2) = with_builder(|builder| { 137 | builder.start_node(SyntaxKind(2)); 138 | let checkpoint = builder.checkpoint(); 139 | builder.start_node(SyntaxKind(0)); 140 | builder.token(SyntaxKind(1), "hi"); 141 | builder.revert_to(checkpoint); 142 | builder.finish_node(); 143 | }); 144 | assert_tree_eq((&first, &res1), (&second, &res2)); 145 | } 146 | 147 | #[test] 148 | #[should_panic = "checkpoint no longer valid after reverting to an earlier checkpoint"] 149 | fn misuse() { 150 | let (first, res1) = with_builder(|builder| { 151 | builder.start_node(SyntaxKind(0)); 152 | builder.finish_node(); 153 | }); 154 | let (second, res2) = with_builder(|builder| { 155 | builder.start_node(SyntaxKind(0)); 156 | 157 | // Add two tokens, but remove them in the wrong order. 158 | let initial = builder.checkpoint(); 159 | builder.token(SyntaxKind(1), "hi"); 160 | let new = builder.checkpoint(); 161 | builder.token(SyntaxKind(2), "hello"); 162 | builder.revert_to(initial); 163 | builder.revert_to(new); 164 | 165 | builder.finish_node(); 166 | }); 167 | 168 | assert_tree_eq((&first, &res1), (&second, &res2)); 169 | } 170 | 171 | #[test] 172 | #[should_panic = "did you already `revert_to`?"] 173 | fn misuse2() { 174 | with_builder(|builder| { 175 | builder.start_node(SyntaxKind(0)); 176 | 177 | // Take two snapshots across a node boundary, but revert them in the wrong order. 178 | let initial = builder.checkpoint(); 179 | builder.start_node(SyntaxKind(3)); 180 | builder.token(SyntaxKind(1), "hi"); 181 | let new = builder.checkpoint(); 182 | builder.token(SyntaxKind(2), "hello"); 183 | builder.revert_to(initial); 184 | builder.revert_to(new); 185 | 186 | builder.finish_node(); 187 | }); 188 | } 189 | 190 | #[test] 191 | fn misuse3() { 192 | let (first, res1) = with_builder(|builder| { 193 | builder.start_node(SyntaxKind(0)); 194 | builder.token(SyntaxKind(3), "no"); 195 | builder.finish_node(); 196 | }); 197 | 198 | let (second, res2) = with_builder(|builder| { 199 | builder.start_node(SyntaxKind(0)); 200 | 201 | // Add two tokens, revert to the initial state, add three tokens, and try to revert to an earlier checkpoint. 202 | let initial = builder.checkpoint(); 203 | builder.token(SyntaxKind(1), "hi"); 204 | let new = builder.checkpoint(); 205 | builder.token(SyntaxKind(2), "hello"); 206 | builder.revert_to(initial); 207 | 208 | // This is wrong, but there's not a whole lot the library can do about it. 209 | builder.token(SyntaxKind(3), "no"); 210 | builder.token(SyntaxKind(4), "bad"); 211 | builder.token(SyntaxKind(4), "wrong"); 212 | builder.revert_to(new); 213 | 214 | builder.finish_node(); 215 | }); 216 | 217 | assert_tree_eq((&first, &res1), (&second, &res2)); 218 | } 219 | 220 | #[test] 221 | #[should_panic = "was `finish_node` called early or did you already `revert_to`"] 222 | fn misuse_combined() { 223 | with_builder(|builder| { 224 | builder.start_node(SyntaxKind(0)); 225 | 226 | // Take two snapshots across a node boundary, revert to the earlier one but then try to start a node at the 227 | // later one. 228 | let initial = builder.checkpoint(); 229 | builder.start_node(SyntaxKind(3)); 230 | builder.token(SyntaxKind(1), "hi"); 231 | let new = builder.checkpoint(); 232 | builder.token(SyntaxKind(2), "hello"); 233 | builder.revert_to(initial); 234 | builder.start_node_at(new, SyntaxKind(4)); 235 | 236 | builder.finish_node(); 237 | }); 238 | } 239 | 240 | #[test] 241 | #[should_panic = "reverting to an earlier checkpoint"] 242 | fn misuse_combined2() { 243 | with_builder(|builder| { 244 | builder.start_node(SyntaxKind(0)); 245 | 246 | // Take two snapshots with only tokens between them, revert to the earlier one but then try to start a node at 247 | // the later one. 248 | let initial = builder.checkpoint(); 249 | builder.token(SyntaxKind(1), "hi"); 250 | let new = builder.checkpoint(); 251 | builder.token(SyntaxKind(2), "hello"); 252 | builder.revert_to(initial); 253 | builder.start_node_at(new, SyntaxKind(3)); 254 | 255 | builder.finish_node(); 256 | }); 257 | } 258 | 259 | #[test] 260 | fn revert_then_start() { 261 | let (first, res1) = with_builder(|builder| { 262 | builder.start_node(SyntaxKind(0)); 263 | builder.start_node(SyntaxKind(3)); 264 | builder.token(SyntaxKind(2), "hello"); 265 | builder.finish_node(); 266 | builder.finish_node(); 267 | }); 268 | let (second, res2) = with_builder(|builder| { 269 | builder.start_node(SyntaxKind(0)); 270 | 271 | // Take two snapshots with only tokens between them, revert to the earlier one but then try to start a node at 272 | // the later one. 273 | let initial = builder.checkpoint(); 274 | builder.token(SyntaxKind(1), "hi"); 275 | builder.revert_to(initial); 276 | builder.start_node_at(initial, SyntaxKind(3)); 277 | builder.token(SyntaxKind(2), "hello"); 278 | builder.finish_node(); 279 | 280 | builder.finish_node(); 281 | }); 282 | assert_tree_eq((&first, &res1), (&second, &res2)); 283 | } 284 | 285 | #[test] 286 | fn start_then_revert() { 287 | let (first, res1) = with_builder(|builder| { 288 | builder.start_node(SyntaxKind(0)); 289 | builder.token(SyntaxKind(2), "hello"); 290 | builder.finish_node(); 291 | }); 292 | let (second, res2) = with_builder(|builder| { 293 | builder.start_node(SyntaxKind(0)); 294 | 295 | // Take two snapshots with only tokens between them, revert to the earlier one but then try to start a node at 296 | // the later one. 297 | let initial = builder.checkpoint(); 298 | builder.token(SyntaxKind(1), "hi"); 299 | builder.start_node_at(initial, SyntaxKind(3)); 300 | builder.revert_to(initial); 301 | builder.token(SyntaxKind(2), "hello"); 302 | 303 | builder.finish_node(); 304 | }); 305 | assert_tree_eq((&first, &res1), (&second, &res2)); 306 | } 307 | -------------------------------------------------------------------------------- /cstree/tests/it/sendsync.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::redundant_clone)] 2 | 3 | use crossbeam_utils::thread::scope; 4 | use std::{thread, time::Duration}; 5 | 6 | use super::{build_recursive, Element, ResolvedNode, SyntaxKind, SyntaxNode}; 7 | use cstree::build::GreenNodeBuilder; 8 | 9 | // Excercise the multi-threaded interner when the corresponding feature is enabled. 10 | 11 | #[cfg(feature = "multi_threaded_interning")] 12 | use cstree::interning::{new_threaded_interner, MultiThreadedTokenInterner}; 13 | 14 | #[cfg(not(feature = "multi_threaded_interning"))] 15 | fn get_builder() -> GreenNodeBuilder<'static, 'static, SyntaxKind> { 16 | GreenNodeBuilder::new() 17 | } 18 | 19 | #[cfg(feature = "multi_threaded_interning")] 20 | fn get_builder() -> GreenNodeBuilder<'static, 'static, SyntaxKind, MultiThreadedTokenInterner> { 21 | let interner = new_threaded_interner(); 22 | GreenNodeBuilder::from_interner(interner) 23 | } 24 | 25 | fn build_tree(root: &Element<'_>) -> ResolvedNode { 26 | let mut builder = get_builder(); 27 | build_recursive(root, &mut builder, 0); 28 | let (node, cache) = builder.finish(); 29 | SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap()) 30 | } 31 | 32 | fn two_level_tree() -> Element<'static> { 33 | use Element::*; 34 | Node(vec![ 35 | Node(vec![Token("0.0"), Token("0.1")]), 36 | Node(vec![Token("1.0")]), 37 | Node(vec![Token("2.0"), Token("2.1"), Token("2.2")]), 38 | ]) 39 | } 40 | 41 | #[test] 42 | #[cfg_attr(miri, ignore)] 43 | fn send() { 44 | let tree = two_level_tree(); 45 | let tree = build_tree::<()>(&tree); 46 | let thread_tree = tree.clone(); 47 | let thread = thread::spawn(move || { 48 | let leaf1_0 = thread_tree 49 | .children() 50 | .nth(1) 51 | .unwrap() 52 | .children_with_tokens() 53 | .next() 54 | .unwrap(); 55 | let leaf1_0 = leaf1_0.into_token().unwrap(); 56 | leaf1_0.text().to_string() 57 | }); 58 | assert_eq!(thread.join().unwrap(), "1.0"); 59 | } 60 | 61 | #[test] 62 | #[cfg_attr(miri, ignore)] 63 | fn send_data() { 64 | let tree = two_level_tree(); 65 | let tree = build_tree::(&tree); 66 | let thread_tree = tree.clone(); 67 | { 68 | let node2 = tree.children().nth(2).unwrap(); 69 | assert_eq!(*node2.try_set_data("data".into()).unwrap(), "data"); 70 | let data = node2.get_data().unwrap(); 71 | assert_eq!(data.as_str(), "data"); 72 | node2.set_data("payload".into()); 73 | let data = node2.get_data().unwrap(); 74 | assert_eq!(data.as_str(), "payload"); 75 | } 76 | let t = thread::spawn(move || { 77 | let node2 = thread_tree.children().nth(2).unwrap(); 78 | assert!(node2.try_set_data("already present".into()).is_err()); 79 | let data = node2.get_data().unwrap(); 80 | assert_eq!(data.as_str(), "payload"); 81 | node2.set_data("new data".into()); 82 | }); 83 | // wait for t to finish 84 | t.join().unwrap(); 85 | { 86 | let node2 = tree.children().nth(2).unwrap(); 87 | let data = node2.get_data().unwrap(); 88 | assert_eq!(data.as_str(), "new data"); 89 | node2.clear_data(); 90 | // re-use `data` after node data was cleared 91 | assert_eq!(data.as_str(), "new data"); 92 | } 93 | let thread_tree = tree.clone(); 94 | thread::spawn(move || { 95 | let node2 = thread_tree.children().nth(2).unwrap(); 96 | assert_eq!(node2.get_data(), None); 97 | }) 98 | .join() 99 | .unwrap(); 100 | } 101 | 102 | #[test] 103 | #[cfg_attr(miri, ignore)] 104 | fn sync() { 105 | let tree = two_level_tree(); 106 | let tree = build_tree::<()>(&tree); 107 | let thread_tree = &tree; 108 | let result = scope(move |s| { 109 | s.spawn(move |_| { 110 | let leaf1_0 = thread_tree 111 | .children() 112 | .nth(1) 113 | .unwrap() 114 | .children_with_tokens() 115 | .next() 116 | .unwrap(); 117 | let leaf1_0 = leaf1_0.into_token().unwrap(); 118 | leaf1_0.resolve_text(thread_tree.resolver().as_ref()).to_string() 119 | }) 120 | .join() 121 | .unwrap() 122 | }); 123 | assert_eq!(result.unwrap(), "1.0"); 124 | } 125 | 126 | #[test] 127 | #[cfg_attr(miri, ignore)] 128 | fn drop_send() { 129 | let tree = two_level_tree(); 130 | let tree = build_tree::<()>(&tree); 131 | let thread_tree = tree.clone(); 132 | let thread = thread::spawn(move || { 133 | drop(thread_tree); 134 | }); 135 | thread.join().unwrap(); 136 | thread::sleep(Duration::from_millis(500)); 137 | drop(tree); 138 | 139 | let tree = two_level_tree(); 140 | let tree = build_tree::<()>(&tree); 141 | let thread_tree = tree.clone(); 142 | drop(tree); 143 | let thread = thread::spawn(move || { 144 | thread::sleep(Duration::from_millis(500)); 145 | drop(thread_tree); 146 | }); 147 | thread.join().unwrap(); 148 | } 149 | 150 | #[test] 151 | #[cfg_attr(miri, ignore)] 152 | #[allow(dropping_references)] 153 | fn drop_sync() { 154 | let tree = two_level_tree(); 155 | let tree = build_tree::<()>(&tree); 156 | let thread_tree = &tree; 157 | scope(move |s| { 158 | s.spawn(move |_| { 159 | drop(thread_tree); 160 | }); 161 | }) 162 | .unwrap(); 163 | thread::sleep(Duration::from_millis(500)); 164 | drop(tree); 165 | } 166 | -------------------------------------------------------------------------------- /cstree/tests/it/serde.rs: -------------------------------------------------------------------------------- 1 | use crate::{build_recursive, build_tree_with_cache, ResolvedNode}; 2 | 3 | use super::{Element, SyntaxKind, SyntaxNode}; 4 | use cstree::{ 5 | build::{GreenNodeBuilder, NodeCache}, 6 | interning::new_interner, 7 | util::NodeOrToken, 8 | }; 9 | use serde_test::Token; 10 | use std::fmt; 11 | 12 | /// Macro for generating a list of `serde_test::Token`s using a simpler DSL. 13 | macro_rules! event_tokens { 14 | ($($name:ident($($token:tt)*)),*) => { 15 | [ 16 | $( 17 | event_tokens!(@token, $name($($token)*)) 18 | ),* 19 | ].concat() 20 | }; 21 | 22 | (@token, token($kind:expr, $str:expr)) => { 23 | [ 24 | Token::Struct { name: "Event", len: 2 }, 25 | Token::BorrowedStr("t"), 26 | Token::UnitVariant{ name: "Event", variant: "Token" }, 27 | Token::BorrowedStr("c"), 28 | Token::Tuple { len: 2 }, 29 | Token::U32($kind), 30 | Token::BorrowedStr($str), 31 | Token::TupleEnd, 32 | Token::StructEnd, 33 | ].as_ref() 34 | }; 35 | 36 | (@token, node($kind:expr, $data:expr)) => { 37 | [ 38 | Token::Struct { name: "Event", len: 2 }, 39 | Token::BorrowedStr("t"), 40 | Token::UnitVariant{ name: "Event", variant: "EnterNode" }, 41 | Token::BorrowedStr("c"), 42 | Token::Tuple { len: 2 }, 43 | Token::U32($kind), 44 | Token::Bool($data), 45 | Token::TupleEnd, 46 | Token::StructEnd, 47 | ].as_ref() 48 | }; 49 | 50 | (@token, leave_node()) => { 51 | [ 52 | Token::Struct { name: "Event", len: 1 }, 53 | Token::BorrowedStr("t"), 54 | Token::UnitVariant{ name: "Event", variant: "LeaveNode" }, 55 | Token::StructEnd, 56 | ].as_ref() 57 | }; 58 | 59 | (@token, data($data:expr)) => { 60 | [Token::Str($data)].as_ref() 61 | }; 62 | 63 | (@token, seq($len:expr)) => { 64 | [Token::Seq { len: Option::Some($len) }].as_ref() 65 | }; 66 | 67 | (@token, seq_end()) => { 68 | [Token::SeqEnd].as_ref() 69 | }; 70 | 71 | (@token, tuple($len:expr)) => { 72 | [Token::Tuple { len: $len }].as_ref() 73 | }; 74 | 75 | (@token, tuple_end()) => { 76 | [Token::TupleEnd].as_ref() 77 | }; 78 | 79 | (@token,) => {}; 80 | } 81 | 82 | fn three_level_tree_with_data_tokens() -> Vec { 83 | event_tokens!( 84 | tuple(2), 85 | seq(14), 86 | node(0, true), 87 | node(1, true), 88 | node(2, true), 89 | token(3, "foo"), 90 | token(4, "bar"), 91 | leave_node(), 92 | token(5, "baz"), 93 | leave_node(), 94 | node(6, true), 95 | token(7, "pub"), 96 | token(8, "fn"), 97 | token(9, "tree"), 98 | leave_node(), 99 | leave_node(), 100 | seq_end(), 101 | seq(4), 102 | data("1"), 103 | data("2"), 104 | data("3"), 105 | data("4"), 106 | seq_end(), 107 | tuple_end() 108 | ) 109 | } 110 | 111 | fn three_level_tree_tokens() -> Vec { 112 | event_tokens!( 113 | tuple(2), 114 | seq(14), 115 | node(0, false), 116 | node(1, false), 117 | node(2, false), 118 | token(3, "foo"), 119 | token(4, "bar"), 120 | leave_node(), 121 | token(5, "baz"), 122 | leave_node(), 123 | node(6, false), 124 | token(7, "pub"), 125 | token(8, "fn"), 126 | token(9, "tree"), 127 | leave_node(), 128 | leave_node(), 129 | seq_end(), 130 | seq(0), 131 | seq_end(), 132 | tuple_end() 133 | ) 134 | } 135 | 136 | struct NonSerializable; 137 | 138 | /// Serializable SyntaxNode that doesn't have a identity `PartialEq` implementation, 139 | /// but checks if both trees have equal nodes and tokens. 140 | struct TestNode { 141 | node: ResolvedNode, 142 | with_data: bool, 143 | } 144 | 145 | impl TestNode { 146 | fn new(node: ResolvedNode) -> Self { 147 | Self { node, with_data: false } 148 | } 149 | 150 | fn with_data(node: ResolvedNode) -> Self { 151 | Self { node, with_data: true } 152 | } 153 | } 154 | 155 | impl fmt::Debug for TestNode { 156 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 157 | fmt::Debug::fmt(&self.node, f) 158 | } 159 | } 160 | 161 | impl serde::Serialize for TestNode { 162 | fn serialize(&self, serializer: S) -> Result 163 | where 164 | S: serde::Serializer, 165 | { 166 | if self.with_data { 167 | self.node.as_serialize_with_data().serialize(serializer) 168 | } else { 169 | self.node.serialize(serializer) 170 | } 171 | } 172 | } 173 | 174 | impl<'de> serde::Deserialize<'de> for TestNode { 175 | fn deserialize(deserializer: D) -> Result 176 | where 177 | D: serde::Deserializer<'de>, 178 | { 179 | Ok(Self { 180 | node: ResolvedNode::deserialize(deserializer)?, 181 | with_data: true, 182 | }) 183 | } 184 | } 185 | 186 | impl PartialEq for TestNode { 187 | fn eq(&self, other: &TestNode) -> bool { 188 | self.node.kind() == other.node.kind() 189 | && self.node.get_data() == other.node.get_data() 190 | && self.node.text_range() == other.node.text_range() 191 | && self 192 | .node 193 | .children_with_tokens() 194 | .zip(other.node.children_with_tokens()) 195 | .all(|(this, other)| match (this, other) { 196 | (NodeOrToken::Node(this), NodeOrToken::Node(other)) => { 197 | TestNode::new(this.clone()) == TestNode::new(other.clone()) 198 | } 199 | (NodeOrToken::Token(this), NodeOrToken::Token(other)) => { 200 | this.kind() == other.kind() && this.text_range() == other.text_range() 201 | } 202 | _ => unreachable!(), 203 | }) 204 | } 205 | } 206 | 207 | #[rustfmt::skip] 208 | fn three_level_tree() -> Element<'static> { 209 | use Element::*; 210 | 211 | Node(vec![ 212 | Node(vec![ 213 | Node(vec![ 214 | Token("foo"), 215 | Token("bar") 216 | ]), 217 | Token("baz") 218 | ]), 219 | Node(vec![ 220 | Token("pub"), 221 | Token("fn"), 222 | Token("tree") 223 | ]), 224 | ]) 225 | } 226 | 227 | fn build_tree(root: Element<'_>) -> ResolvedNode { 228 | let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); 229 | build_recursive(&root, &mut builder, 0); 230 | let (node, cache) = builder.finish(); 231 | SyntaxNode::new_root_with_resolver(node, cache.unwrap().into_interner().unwrap()) 232 | } 233 | 234 | fn attach_data(node: &SyntaxNode) { 235 | node.descendants().enumerate().for_each(|(idx, node)| { 236 | node.set_data(format!("{}", idx + 1)); 237 | }); 238 | } 239 | 240 | #[test] 241 | fn serialize_tree_with_data_with_resolver() { 242 | let mut interner = new_interner(); 243 | let mut cache = NodeCache::with_interner(&mut interner); 244 | 245 | let root = three_level_tree(); 246 | let root = build_tree_with_cache(&root, &mut cache); 247 | let tree = SyntaxNode::::new_root(root.clone()); 248 | attach_data(&tree); 249 | 250 | let serialized = serde_json::to_string(&tree.as_serialize_with_data_with_resolver(&interner)).unwrap(); 251 | let deserialized: TestNode = serde_json::from_str(&serialized).unwrap(); 252 | 253 | let expected = SyntaxNode::new_root_with_resolver(root, interner); 254 | attach_data(&expected); 255 | assert_eq!(TestNode::new(expected), deserialized); 256 | } 257 | 258 | #[test] 259 | fn serialize_tree_with_resolver() { 260 | let mut interner = new_interner(); 261 | let mut cache = NodeCache::with_interner(&mut interner); 262 | 263 | let root = three_level_tree(); 264 | let root = build_tree_with_cache(&root, &mut cache); 265 | let tree = SyntaxNode::::new_root(root.clone()); 266 | 267 | let serialized = serde_json::to_string(&tree.as_serialize_with_resolver(&interner)).unwrap(); 268 | let deserialized: TestNode = serde_json::from_str(&serialized).unwrap(); 269 | 270 | let expected = SyntaxNode::new_root_with_resolver(root, interner); 271 | assert_eq!(TestNode::new(expected), deserialized); 272 | } 273 | 274 | #[test] 275 | fn serialize_tree_with_data() { 276 | let tree = build_tree(three_level_tree()); 277 | let tree = TestNode::with_data(tree); 278 | attach_data(&tree.node); 279 | 280 | serde_test::assert_tokens(&tree, three_level_tree_with_data_tokens().as_slice()); 281 | } 282 | 283 | #[test] 284 | fn serialize_tree_without_data() { 285 | let tree = build_tree(three_level_tree()); 286 | let tree = TestNode::new(tree); 287 | 288 | serde_test::assert_tokens(&tree, three_level_tree_tokens().as_slice()); 289 | } 290 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | unstable_features = true 2 | 3 | edition = "2021" 4 | 5 | max_width = 120 6 | comment_width = 120 7 | wrap_comments = true 8 | 9 | format_code_in_doc_comments = true 10 | format_macro_matchers = true 11 | 12 | imports_granularity = "Crate" 13 | 14 | reorder_impl_items = true 15 | 16 | use_field_init_shorthand = true 17 | 18 | # should be 1, but as of writing is too unstable and introduces blank lines at the start of random blocks 19 | blank_lines_lower_bound = 0 20 | 21 | struct_field_align_threshold = 8 22 | -------------------------------------------------------------------------------- /test_suite/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cstree_test_suite" 3 | publish = false 4 | version = "0.0.0" 5 | edition.workspace = true 6 | authors.workspace = true 7 | license.workspace = true 8 | repository.workspace = true 9 | readme.workspace = true 10 | rust-version.workspace = true 11 | 12 | [dependencies] 13 | cstree = { path = "../cstree", features = ["derive"] } 14 | 15 | [dev-dependencies] 16 | trybuild = { version = "1.0.80", features = ["diff"] } 17 | -------------------------------------------------------------------------------- /test_suite/tests/derive.rs: -------------------------------------------------------------------------------- 1 | use cstree::{RawSyntaxKind, Syntax}; 2 | 3 | #[test] 4 | fn basic() { 5 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 6 | #[repr(u32)] 7 | pub enum SyntaxKind { 8 | A, 9 | #[static_text("b")] 10 | B, 11 | } 12 | pub type MySyntax = SyntaxKind; 13 | 14 | assert_eq!(MySyntax::into_raw(SyntaxKind::A), RawSyntaxKind(0)); 15 | assert_eq!(MySyntax::into_raw(SyntaxKind::B), RawSyntaxKind(1)); 16 | 17 | assert_eq!(MySyntax::from_raw(RawSyntaxKind(0)), SyntaxKind::A); 18 | assert_eq!(MySyntax::from_raw(RawSyntaxKind(1)), SyntaxKind::B); 19 | 20 | assert!(MySyntax::static_text(SyntaxKind::A).is_none()); 21 | assert_eq!(MySyntax::static_text(SyntaxKind::B), Some("b")); 22 | } 23 | -------------------------------------------------------------------------------- /test_suite/tests/ui.rs: -------------------------------------------------------------------------------- 1 | #[test] 2 | #[cfg_attr(miri, ignore)] 3 | fn ui() { 4 | let t = trybuild::TestCases::new(); 5 | t.compile_fail("tests/ui/**/*.rs"); 6 | } 7 | -------------------------------------------------------------------------------- /test_suite/tests/ui/repr/missing_repr.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | pub enum SyntaxKind { 5 | A, 6 | #[static_text("b")] 7 | B, 8 | } 9 | 10 | fn main() {} 11 | -------------------------------------------------------------------------------- /test_suite/tests/ui/repr/missing_repr.stderr: -------------------------------------------------------------------------------- 1 | error: syntax kind definitions must be `#[repr(u32)]` to derive `Syntax` 2 | --> tests/ui/repr/missing_repr.rs:4:1 3 | | 4 | 4 | / pub enum SyntaxKind { 5 | 5 | | A, 6 | 6 | | #[static_text("b")] 7 | 7 | | B, 8 | 8 | | } 9 | | |_^ 10 | -------------------------------------------------------------------------------- /test_suite/tests/ui/repr/wrong_repr_c.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(C)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text("b")] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/repr/wrong_repr_c.stderr: -------------------------------------------------------------------------------- 1 | error: syntax kind definitions must be `#[repr(u32)]` to derive `Syntax` 2 | --> tests/ui/repr/wrong_repr_c.rs:4:1 3 | | 4 | 4 | / #[repr(C)] 5 | 5 | | pub enum SyntaxKind { 6 | 6 | | A, 7 | 7 | | #[static_text("b")] 8 | 8 | | B, 9 | 9 | | } 10 | | |_^ 11 | -------------------------------------------------------------------------------- /test_suite/tests/ui/repr/wrong_repr_u16.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(u16)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text("b")] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/repr/wrong_repr_u16.stderr: -------------------------------------------------------------------------------- 1 | error: syntax kind definitions must be `#[repr(u32)]` to derive `Syntax` 2 | --> tests/ui/repr/wrong_repr_u16.rs:4:1 3 | | 4 | 4 | / #[repr(u16)] 5 | 5 | | pub enum SyntaxKind { 6 | 6 | | A, 7 | 7 | | #[static_text("b")] 8 | 8 | | B, 9 | 9 | | } 10 | | |_^ 11 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/empty_expr.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(u32)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text()] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/empty_expr.stderr: -------------------------------------------------------------------------------- 1 | error: argument to `static_text` must be a string literal: `#[static_text("...")]` 2 | --> tests/ui/static_text/empty_expr.rs:7:7 3 | | 4 | 7 | #[static_text()] 5 | | ^^^^^^^^^^^^^ 6 | 7 | error: unexpected end of input, expected string literal 8 | --> tests/ui/static_text/empty_expr.rs:7:19 9 | | 10 | 7 | #[static_text()] 11 | | ^ 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/missing_text.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(u32)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/missing_text.stderr: -------------------------------------------------------------------------------- 1 | error: missing text for `static_text`: try `#[static_text("...")]` 2 | --> tests/ui/static_text/missing_text.rs:7:5 3 | | 4 | 7 | #[static_text] 5 | | ^^^^^^^^^^^^^^ 6 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/non_expr.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(u32)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text(SyntaxKind)] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/non_expr.stderr: -------------------------------------------------------------------------------- 1 | error: argument to `static_text` must be a string literal: `#[static_text("...")]` 2 | --> tests/ui/static_text/non_expr.rs:7:7 3 | | 4 | 7 | #[static_text(SyntaxKind)] 5 | | ^^^^^^^^^^^^^^^^^^^^^^^ 6 | 7 | error: expected string literal 8 | --> tests/ui/static_text/non_expr.rs:7:19 9 | | 10 | 7 | #[static_text(SyntaxKind)] 11 | | ^^^^^^^^^^ 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/non_string_expr.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(u32)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text(foo + 3)] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/non_string_expr.stderr: -------------------------------------------------------------------------------- 1 | error: argument to `static_text` must be a string literal: `#[static_text("...")]` 2 | --> tests/ui/static_text/non_string_expr.rs:7:7 3 | | 4 | 7 | #[static_text(foo + 3)] 5 | | ^^^^^^^^^^^^^^^^^^^^ 6 | 7 | error: expected string literal 8 | --> tests/ui/static_text/non_string_expr.rs:7:19 9 | | 10 | 7 | #[static_text(foo + 3)] 11 | | ^^^ 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/text_assigned.rs: -------------------------------------------------------------------------------- 1 | use cstree::Syntax; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Syntax)] 4 | #[repr(u32)] 5 | pub enum SyntaxKind { 6 | A, 7 | #[static_text = "b"] 8 | B, 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /test_suite/tests/ui/static_text/text_assigned.stderr: -------------------------------------------------------------------------------- 1 | error: `static_text` takes the text as a function argument: `#[static_text("...")]` 2 | --> tests/ui/static_text/text_assigned.rs:7:5 3 | | 4 | 7 | #[static_text = "b"] 5 | | ^^^^^^^^^^^^^^^^^^^^ 6 | --------------------------------------------------------------------------------