├── .github
    └── workflows
    │   └── build-test.yml
├── .gitignore
├── .rustfmt.toml
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── benches
    ├── c_lexer.rs
    ├── long.c
    ├── sort.c
    └── spaces.c
├── examples
    ├── calc
    │   ├── README.md
    │   └── main.rs
    ├── clike
    │   ├── README.md
    │   ├── fib.c
    │   ├── main.rs
    │   └── sort.c
    ├── json
    │   ├── README.md
    │   ├── example.json
    │   └── main.rs
    └── sexp
    │   ├── README.md
    │   ├── example.sexp
    │   └── main.rs
├── laps_macros
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── lib.rs
    │   ├── parse.rs
    │   ├── spanned.rs
    │   ├── token_ast.rs
    │   ├── token_kind.rs
    │   ├── tokenize.rs
    │   └── utils.rs
├── laps_regex
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── dfa.rs
    │   ├── fa.rs
    │   ├── lib.rs
    │   ├── mir.rs
    │   ├── nfa.rs
    │   ├── re.rs
    │   └── table.rs
└── src
    ├── ast.rs
    ├── input.rs
    ├── lexer.rs
    ├── lib.rs
    ├── parse.rs
    ├── reader.rs
    ├── span.rs
    └── token.rs


/.github/workflows/build-test.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build_test:
11 |     name: Build and Test
12 |     runs-on: ubuntu-latest
13 |     if: "!contains(github.event.head_commit.message, 'skip-ci')"
14 |     timeout-minutes: 30
15 |     steps:
16 |     - name: Checkout laps
17 |       uses: actions/checkout@v2
18 | 
19 |     - name: Build
20 |       run: cargo check --workspace &&
21 |            cargo check --workspace --all-features
22 | 
23 |     - name: Clippy
24 |       run: cargo clippy --workspace -- -D warnings &&
25 |            cargo clippy --workspace --all-targets --all-features -- -D warnings
26 | 
27 |     - name: Test
28 |       run: cargo test --workspace &&
29 |            cargo test --workspace --all-features
30 | 
31 |   build_examples:
32 |     name: Build Examples
33 |     runs-on: ubuntu-latest
34 |     if: "!contains(github.event.head_commit.message, 'skip-ci')"
35 |     timeout-minutes: 30
36 |     strategy:
37 |       matrix:
38 |         example-name: ['sexp', 'calc', 'json', 'clike']
39 | 
40 |     steps:
41 |     - name: Checkout laps
42 |       uses: actions/checkout@v2
43 | 
44 |     - name: Build
45 |       run: cargo check --example ${{matrix.example-name}} --features macros
46 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # macOS
 2 | *.DS_Store
 3 | 
 4 | # Cargo
 5 | /target
 6 | /Cargo.lock
 7 | 
 8 | # VS Code
 9 | .vscode
10 | 
11 | # Debugging
12 | debug
13 | 


--------------------------------------------------------------------------------
/.rustfmt.toml:
--------------------------------------------------------------------------------
1 | tab_spaces = 2
2 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to the `laps` will be documented in this file.
  4 | 
  5 | ## Unreleased
  6 | 
  7 | ### Added
  8 | 
  9 | * AST type `NonEmptyOptSepList` and `OptSepList`.
 10 | 
 11 | ## 0.1.7 - 2023-12-30
 12 | 
 13 | ### Added
 14 | 
 15 | * AST type `TokenPrefix`.
 16 | 
 17 | ### Fixed
 18 | 
 19 | * Issue about method `maybe` of ASTs generate by macro `token_ast`.
 20 | 
 21 | ## 0.1.6 - 2023-12-24
 22 | 
 23 | ### Added
 24 | 
 25 | * Implement `Parse` and `Spanned` trait for tuples.
 26 | * AST type `OptPrefix`, `OptTokenPrefix`, `OptSepSeq` and `NonEmptyOptSepSeq`.
 27 | * Trait `TrySpan`.
 28 | * Attribute `try_span` for derive macro `Spanned`.
 29 | 
 30 | ### Changed
 31 | 
 32 | * Improve performance of minimizing DFA.
 33 | * Mark AST `Quoted` as deprecated.
 34 | * Derived `PartialOrd` and `Ord` trait for AST types (except `Quoted`).
 35 | 
 36 | ### Fixed
 37 | 
 38 | * Issue about parsing if-guard in `token_ast`.
 39 | 
 40 | ## 0.1.5 - 2023-12-17
 41 | 
 42 | ### Added
 43 | 
 44 | * Method `file_type` for `Span`.
 45 | 
 46 | ### Changed
 47 | 
 48 | * Improve performance of compiling regular expressions again.
 49 | 
 50 | ## 0.1.4 - 2023-12-13
 51 | 
 52 | ### Added
 53 | 
 54 | * Method `inner` and `inner_mut` for `TokenBuffer`.
 55 | 
 56 | ### Fixed
 57 | 
 58 | * Return type of method `Lexer::input_mut`.
 59 | 
 60 | ## 0.1.3 - 2023-12-10
 61 | 
 62 | ### Added
 63 | 
 64 | * `laps::lexer::signed_int_literal` for parsing integer literal with an optional sign.
 65 | * If-guard support in `kind` pattern of `token_ast` macro.
 66 | * Method `new` for `Reader` and `ByteReader`.
 67 | * Method `set_line_col` for trait `InputStream`, for supporting C preprocessor.
 68 | * Method `input` and `input_mut` for `Lexer`.
 69 | 
 70 | ### Changed
 71 | 
 72 | * Some document comments.
 73 | * Improve performance of compiling regular expressions.
 74 | * Bumped dependency `colored` to version 2.1.0.
 75 | 
 76 | ### Fixed
 77 | 
 78 | * Issues about printing line information in `Span`.
 79 | 
 80 | ## 0.1.2 - 2023-07-13
 81 | 
 82 | ### Changed
 83 | 
 84 | * Made `Span` fully thread-safe (embarrassed).
 85 | 
 86 | ## 0.1.1 - 2023-07-13
 87 | 
 88 | ### Changed
 89 | 
 90 | * Made `Span` thread-safe.
 91 | * Enabled LTO for release mode.
 92 | * Supported transition table compression.
 93 | 
 94 | ## 0.1.0 - 2023-06-17
 95 | 
 96 | ### Added
 97 | 
 98 | * Sub-crate `laps_regex` for generating state-transition table for multiple regular expressions.
 99 | * Trait and derive macro `Tokenize`, allow users to get a lexer by deriving `Tokenize` for a token kind.
100 | 
101 | ### Changed
102 | 
103 | * New and more intuitive syntax for macro `token_ast`.
104 | * `Span` and `InputStream` now supports generic character types.
105 | * Removed trait `TokenBuilder` and struct `Ident`.
106 | * Removed some lexing methods in trait `InputStream`.
107 | 
108 | ## 0.0.2 - 2023-01-13
109 | 
110 | ### Added
111 | 
112 | * `derive` syntax for macro `token_ast`.
113 | * More examples, including `sexp`, `calc` and `json`.
114 | * More documentation comments.
115 | * Module `prelude` for some common traits and macros.
116 | * `token_kind` now implements `Clone`, `1ryFrom<Kind>` and `1ryFrom<&Kind>` for token kinds.
117 | * `token_ast` now implements `unwrap` and `unwrap_ref` methods for token ASTs.
118 | 
119 | ### Changed
120 | 
121 | * Updated version of some dependencies.
122 | * Feature `no-logger` to default feature `logger`.
123 | * License to either Apache 2.0 or MIT.
124 | 
125 | ### Fixed
126 | 
127 | * Fault about byte buffer offset (`byte_buf_offset`) in `Reader`.
128 | * Fault about namespace of some Rust preludes in procedure macros.
129 | * Fault about error message in method `TokenStream::expect`.
130 | * Fault about string width calculation in `Span`'s error logging related methods.
131 | 
132 | ## 0.0.1 - 2022-10-25
133 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |   "laps_macros",
 4 |   "laps_regex",
 5 | ]
 6 | 
 7 | [package]
 8 | name = "laps"
 9 | version = "0.1.7"
10 | authors = ["MaxXing <x@MaxXSoft.net>"]
11 | edition = "2021"
12 | description = "Build lexers and parsers by deriving traits."
13 | repository = "https://github.com/MaxXSoft/laps"
14 | documentation = "https://docs.rs/laps"
15 | categories = ["parsing", "text-processing"]
16 | keywords = ["parser", "lexer", "ast"]
17 | readme = "README.md"
18 | license = "MIT OR Apache-2.0"
19 | 
20 | [package.metadata.docs.rs]
21 | all-features = true
22 | rustdoc-args = ["--cfg", "docsrs"]
23 | 
24 | [features]
25 | default = ["logger"]
26 | # Enable the front-end logger instead of returning error messages as strings.
27 | logger = ["dep:colored"]
28 | # Enable additional macros, such as derive macros, etc.
29 | macros = ["dep:laps_macros"]
30 | 
31 | [dependencies]
32 | colored = { version = "2.1.0", optional = true }
33 | laps_macros = { path = "./laps_macros", version = "0.1.5", optional = true }
34 | unicode-width = "0.1.10"
35 | 
36 | [dev-dependencies]
37 | criterion = "0.5.1"
38 | 
39 | [profile]
40 | release = { lto = true }
41 | bench = { lto = true }
42 | 
43 | [[example]]
44 | name = "sexp"
45 | required-features = ["macros"]
46 | 
47 | [[example]]
48 | name = "calc"
49 | required-features = ["macros"]
50 | 
51 | [[example]]
52 | name = "json"
53 | required-features = ["macros"]
54 | 
55 | [[example]]
56 | name = "clike"
57 | required-features = ["macros"]
58 | 
59 | [[bench]]
60 | name = "c_lexer"
61 | harness = false
62 | required-features = ["macros"]
63 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 MaxXing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # laps
  2 | 
  3 | [<img alt="github" src="https://img.shields.io/badge/github-MaxXSoft/laps-8da0cb?style=for-the-badge&labelColor=555555&logo=github" height="20">](https://github.com/MaxXSoft/laps)
  4 | [<img alt="crates.io" src="https://img.shields.io/crates/v/laps.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/laps)
  5 | [<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-laps-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo=data:image/svg+xml;base64,PHN2ZyByb2xlPSJpbWciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgdmlld0JveD0iMCAwIDUxMiA1MTIiPjxwYXRoIGZpbGw9IiNmNWY1ZjUiIGQ9Ik00ODguNiAyNTAuMkwzOTIgMjE0VjEwNS41YzAtMTUtOS4zLTI4LjQtMjMuNC0zMy43bC0xMDAtMzcuNWMtOC4xLTMuMS0xNy4xLTMuMS0yNS4zIDBsLTEwMCAzNy41Yy0xNC4xIDUuMy0yMy40IDE4LjctMjMuNCAzMy43VjIxNGwtOTYuNiAzNi4yQzkuMyAyNTUuNSAwIDI2OC45IDAgMjgzLjlWMzk0YzAgMTMuNiA3LjcgMjYuMSAxOS45IDMyLjJsMTAwIDUwYzEwLjEgNS4xIDIyLjEgNS4xIDMyLjIgMGwxMDMuOS01MiAxMDMuOSA1MmMxMC4xIDUuMSAyMi4xIDUuMSAzMi4yIDBsMTAwLTUwYzEyLjItNi4xIDE5LjktMTguNiAxOS45LTMyLjJWMjgzLjljMC0xNS05LjMtMjguNC0yMy40LTMzLjd6TTM1OCAyMTQuOGwtODUgMzEuOXYtNjguMmw4NS0zN3Y3My4zek0xNTQgMTA0LjFsMTAyLTM4LjIgMTAyIDM4LjJ2LjZsLTEwMiA0MS40LTEwMi00MS40di0uNnptODQgMjkxLjFsLTg1IDQyLjV2LTc5LjFsODUtMzguOHY3NS40em0wLTExMmwtMTAyIDQxLjQtMTAyLTQxLjR2LS42bDEwMi0zOC4yIDEwMiAzOC4ydi42em0yNDAgMTEybC04NSA0Mi41di03OS4xbDg1LTM4Ljh2NzUuNHptMC0xMTJsLTEwMiA0MS40LTEwMi00MS40di0uNmwxMDItMzguMiAxMDIgMzguMnYuNnoiPjwvcGF0aD48L3N2Zz4K" height="20">](https://docs.rs/laps)
  6 | [<img alt="build status" src="https://img.shields.io/github/actions/workflow/status/MaxXSoft/laps/build-test.yml?branch=master&style=for-the-badge" height="20">](https://github.com/MaxXSoft/laps/actions?query=branch%3Amaster)
  7 | 
  8 | Lexer and parser collections.
  9 | 
 10 | With `laps`, you can build lexers/parsers by just defining tokens/ASTs and deriving `Tokenize`/`Parse` trait for them.
 11 | 
 12 | ## Usage
 13 | 
 14 | Add `laps` to your project by running `cargo add`:
 15 | 
 16 | ```
 17 | cargo add laps --features macros
 18 | ```
 19 | 
 20 | ## Example
 21 | 
 22 | Implement a lexer for [S-expression](https://en.wikipedia.org/wiki/S-expression):
 23 | 
 24 | ```rust
 25 | use laps::prelude::*;
 26 | 
 27 | #[token_kind]
 28 | #[derive(Debug, Tokenize)]
 29 | enum TokenKind {
 30 |   // This token will be skipped.
 31 |   #[skip(r"\s+")]
 32 |   _Skip,
 33 |   /// Parentheses.
 34 |   #[regex(r"[()]")]
 35 |   Paren(char),
 36 |   /// Atom.
 37 |   #[regex(r"[^\s()]+")]
 38 |   Atom(String),
 39 |   /// End-of-file.
 40 |   #[eof]
 41 |   Eof,
 42 | }
 43 | ```
 44 | 
 45 | And the parser and [ASTs](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (or actually [CSTs](https://en.wikipedia.org/wiki/Parse_tree)):
 46 | 
 47 | ```rust
 48 | type Token = laps::token::Token<TokenKind>;
 49 | 
 50 | token_ast! {
 51 |   macro Token<TokenKind> {
 52 |     [atom] => { kind: TokenKind::Atom(_), prompt: "atom" },
 53 |     [lpr] => { kind: TokenKind::Paren('(') },
 54 |     [rpr] => { kind: TokenKind::Paren(')') },
 55 |     [eof] => { kind: TokenKind::Eof },
 56 |   }
 57 | }
 58 | 
 59 | #[derive(Parse)]
 60 | #[token(Token)]
 61 | enum Statement {
 62 |   Elem(Elem),
 63 |   End(Token![eof]),
 64 | }
 65 | 
 66 | #[derive(Parse)]
 67 | #[token(Token)]
 68 | struct SExp(Token![lpr], Vec<Elem>, Token![rpr]);
 69 | 
 70 | #[derive(Parse)]
 71 | #[token(Token)]
 72 | enum Elem {
 73 |   Atom(Token![atom]),
 74 |   SExp(SExp),
 75 | }
 76 | ```
 77 | 
 78 | The above implementation is very close in form to the corresponding EBNF representation of the S-expression:
 79 | 
 80 | ```ebnf
 81 | Statement ::= Elem | EOF;
 82 | SExp      ::= "(" {Elem} ")";
 83 | Elem      ::= ATOM | SExp;
 84 | ```
 85 | 
 86 | ## More Examples
 87 | 
 88 | See the [`examples` directory](examples), which contains the following examples:
 89 | 
 90 | * [`sexp`](examples/sexp): a [S-expression](https://en.wikipedia.org/wiki/S-expression) parser.
 91 | * [`calc`](examples/calc): a simple expression calculator.
 92 | * [`json`](examples/json): a simple JSON parser.
 93 | * [`clike`](examples/clike): interpreter for a C-like programming language.
 94 | 
 95 | ## Accelerating Code Completion for IDEs
 96 | 
 97 | By default, Cargo does not enable optimizations for procedural macros, which may result in slower code completion if you are using `laps` to generate lexers. To avoid this, you can add the following configuration to `Cargo.toml`:
 98 | 
 99 | ```toml
100 | [profile.dev.build-override]
101 | opt-level = 3
102 | ```
103 | 
104 | You can also try to manually enable/disable parallelization for lexer generation by adding:
105 | 
106 | ```rust
107 | #[derive(Tokenize)]
108 | #[enable_par(true)] // or #[enable_par(false)]
109 | enum TokenKind {
110 |   // ...
111 | }
112 | ```
113 | 
114 | The parallelization setting only affects compilation speed and has no effect at runtime, it's set automatically by `laps` by default.
115 | 
116 | ## Changelog
117 | 
118 | See [CHANGELOG.md](CHANGELOG.md).
119 | 
120 | ## License
121 | 
122 | Copyright (C) 2022-2023 MaxXing. Licensed under either of [Apache 2.0](LICENSE-APACHE) or [MIT](LICENSE-MIT) at your option.
123 | 


--------------------------------------------------------------------------------
/benches/c_lexer.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
  2 | use laps::{lexer::int_literal, prelude::*, reader::Reader};
  3 | use std::{fmt, fs::read_to_string, str::FromStr};
  4 | 
  5 | #[token_kind]
  6 | #[derive(Debug, Tokenize)]
  7 | enum TokenKind {
  8 |   #[skip(r"\s+")]
  9 |   _Skip,
 10 |   /// Keyword.
 11 |   #[regex(r"int|void|if|else|while|break|continue|return")]
 12 |   Keyword(Keyword),
 13 |   /// Identifier.
 14 |   #[regex(r"[_a-zA-Z][_a-zA-Z0-9]*")]
 15 |   Ident(String),
 16 |   /// Integer-literal.
 17 |   #[regex(r"[0-9]|[1-9][0-9]+|0x[0-9a-fA-F]+", int_literal)]
 18 |   Int(u64),
 19 |   /// Operator.
 20 |   #[regex(r"\+|-|\*|/|%|<|>|<=|>=|==|!=|&&|\|\||!|=")]
 21 |   Operator(Operator),
 22 |   /// Other character.
 23 |   #[regex(r".")]
 24 |   Other(char),
 25 |   /// End-of-file.
 26 |   #[eof]
 27 |   Eof,
 28 | }
 29 | 
 30 | #[derive(Clone, Copy, Debug, PartialEq)]
 31 | enum Keyword {
 32 |   Int,
 33 |   Void,
 34 |   If,
 35 |   Else,
 36 |   While,
 37 |   Break,
 38 |   Continue,
 39 |   Return,
 40 | }
 41 | 
 42 | impl FromStr for Keyword {
 43 |   type Err = ();
 44 | 
 45 |   fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
 46 |     match s {
 47 |       "int" => Ok(Keyword::Int),
 48 |       "void" => Ok(Keyword::Void),
 49 |       "if" => Ok(Keyword::If),
 50 |       "else" => Ok(Keyword::Else),
 51 |       "while" => Ok(Keyword::While),
 52 |       "break" => Ok(Keyword::Break),
 53 |       "continue" => Ok(Keyword::Continue),
 54 |       "return" => Ok(Keyword::Return),
 55 |       _ => Err(()),
 56 |     }
 57 |   }
 58 | }
 59 | 
 60 | impl fmt::Display for Keyword {
 61 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 62 |     match self {
 63 |       Self::Int => write!(f, "int"),
 64 |       Self::Void => write!(f, "void"),
 65 |       Self::If => write!(f, "if"),
 66 |       Self::Else => write!(f, "else"),
 67 |       Self::While => write!(f, "while"),
 68 |       Self::Break => write!(f, "break"),
 69 |       Self::Continue => write!(f, "continue"),
 70 |       Self::Return => write!(f, "return"),
 71 |     }
 72 |   }
 73 | }
 74 | 
 75 | #[derive(Clone, Copy, Debug, PartialEq)]
 76 | enum Operator {
 77 |   Add,
 78 |   Sub,
 79 |   Mul,
 80 |   Div,
 81 |   Mod,
 82 |   Lt,
 83 |   Gt,
 84 |   Le,
 85 |   Ge,
 86 |   Eq,
 87 |   Ne,
 88 |   And,
 89 |   Or,
 90 |   Not,
 91 |   Assign,
 92 | }
 93 | 
 94 | impl FromStr for Operator {
 95 |   type Err = ();
 96 | 
 97 |   fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
 98 |     match s {
 99 |       "+" => Ok(Self::Add),
100 |       "-" => Ok(Self::Sub),
101 |       "*" => Ok(Self::Mul),
102 |       "/" => Ok(Self::Div),
103 |       "%" => Ok(Self::Mod),
104 |       "<" => Ok(Self::Lt),
105 |       ">" => Ok(Self::Gt),
106 |       "<=" => Ok(Self::Le),
107 |       ">=" => Ok(Self::Ge),
108 |       "==" => Ok(Self::Eq),
109 |       "!=" => Ok(Self::Ne),
110 |       "&&" => Ok(Self::And),
111 |       "||" => Ok(Self::Or),
112 |       "!" => Ok(Self::Not),
113 |       "=" => Ok(Self::Assign),
114 |       _ => Err(()),
115 |     }
116 |   }
117 | }
118 | 
119 | impl fmt::Display for Operator {
120 |   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
121 |     match self {
122 |       Self::Add => write!(f, "+"),
123 |       Self::Sub => write!(f, "-"),
124 |       Self::Mul => write!(f, "*"),
125 |       Self::Div => write!(f, "/"),
126 |       Self::Mod => write!(f, "%"),
127 |       Self::Lt => write!(f, "<"),
128 |       Self::Gt => write!(f, ">"),
129 |       Self::Le => write!(f, "<="),
130 |       Self::Ge => write!(f, ">="),
131 |       Self::Eq => write!(f, "=="),
132 |       Self::Ne => write!(f, "!="),
133 |       Self::And => write!(f, "&&"),
134 |       Self::Or => write!(f, "||"),
135 |       Self::Not => write!(f, "!"),
136 |       Self::Assign => write!(f, "="),
137 |     }
138 |   }
139 | }
140 | 
141 | fn tokenize(s: &str) {
142 |   let mut lexer = TokenKind::lexer(Reader::from(s));
143 |   loop {
144 |     let token = lexer.next_token().unwrap();
145 |     match token.kind {
146 |       TokenKind::Eof => break,
147 |       t => black_box(t),
148 |     };
149 |   }
150 | }
151 | 
152 | fn bench_tokenize(c: &mut Criterion) {
153 |   let mut group = c.benchmark_group("c_lexer");
154 |   for src in ["sort", "spaces", "long"] {
155 |     let input = read_to_string(format!("benches/{src}.c")).unwrap();
156 |     group.throughput(Throughput::Bytes(input.as_bytes().len() as u64));
157 |     group.bench_with_input(src, &input, |b, s| b.iter(|| tokenize(s)));
158 |   }
159 |   group.finish();
160 | }
161 | 
162 | criterion_group!(benches, bench_tokenize);
163 | criterion_main!(benches);
164 | 


--------------------------------------------------------------------------------
/benches/sort.c:
--------------------------------------------------------------------------------
 1 | int buf[2][100];
 2 | 
 3 | // sort [l, r)
 4 | void merge_sort(int l, int r)
 5 | {
 6 |     if (l + 1 >= r)
 7 |         return;
 8 | 
 9 |     int mid = (l + r) / 2;
10 |     merge_sort(l, mid);
11 |     merge_sort(mid, r);
12 | 
13 |     int i = l, j = mid, k = l;
14 |     while (i < mid && j < r) {
15 |         if (buf[0][i] < buf[0][j]) {
16 |             buf[1][k] = buf[0][i];
17 |             i = i + 1;
18 |         } else {
19 |             buf[1][k] = buf[0][j];
20 |             j = j + 1;
21 |         }
22 |         k = k + 1;
23 |     }
24 |     while (i < mid) {
25 |         buf[1][k] = buf[0][i];
26 |         i = i + 1;
27 |         k = k + 1;
28 |     }
29 |     while (j < r) {
30 |         buf[1][k] = buf[0][j];
31 |         j = j + 1;
32 |         k = k + 1;
33 |     }
34 | 
35 |     while (l < r) {
36 |         buf[0][l] = buf[1][l];
37 |         l = l + 1;
38 |     }
39 | }
40 | 
41 | int main()
42 | {
43 |     int n = getarray(buf[0]);
44 |     merge_sort(0, n);
45 |     putarray(n, buf[0]);
46 |     return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/benches/spaces.c:
--------------------------------------------------------------------------------
  1 | int __HELLO [
  2 | 
  3 | 	
  4 | 100
  5 | 		]	
  6 | = {
  7 | 87, 101, 108, 99,
  8 | 111, 109, 101,	 
  9 | 32, 116, 111,	32,
 10 | 116,	 104,
 11 | 101, 32, 	74,
 12 | 97,
 13 | 	
 14 | 112, 97,
 15 |  
 16 | 	114, 	105,	32,	80, 97,	
 17 | 	
 18 | 	
 19 | 
 20 | 
 21 | 114, 107,	 33, 10 }; /* Names of	
 22 | kemono
 23 | friends */ int	N4__mE___[6][50]		= { { 83, 97,	97, 98,
 24 | 97, 
 25 | 114,
 26 | 117  }, 	{	75, 97, 98,	
 27 | 
 28 | 97,  110
 29 |  
 30 | }, { 
 31 |  
 32 | 
 33 | 72,
 34 | 
 35 | 	  97,
 36 | 115, 104, 105,
 37 | 98, 105, 114, 111,
 38 | 
 39 | 	
 40 | 
 41 | 	
 42 | 		
 43 | 
 44 | 107,
 45 | 	 111,
 46 |  
 47 | 
 48 | 117
 49 | 
 50 | }, { 65, 114,
 51 | 
 52 | 97, 
 53 | 
 54 | 105,
 55 | 103,	
 56 | 
 57 | 117,
 58 | 109, 
 59 | 
 60 | 
 61 | 			97 },	
 62 | 	{ 72, 117,
 63 | 110, 98, 111, 114,
 64 | 117,
 65 | 
 66 | 116, 111,	  32, 80,
 67 | 101, 110,
 68 |  
 69 | 	103, 105, 	110
 70 | },
 71 |   {	84, 97, 105, 114, 105, 107, 117, 32, 79,
 72 | 	
 73 | 	 
 74 | 111, 107,
 75 | 97, 
 76 | 109,
 77 | 	
 78 | 	
 79 |  
 80 | 	 
 81 | 
 82 | 
 83 |  105	} };
 84 | 	int
 85 | 
 86 | saY_HeI10_To[40] = { 32,
 87 | 115, 97,  121,  
 88 | 	
 89 | 		115,
 90 | 
 91 | 32,
 92 | 104,
 93 | 
 94 |  101, 108, 108,	111, 
 95 | 
 96 |  32,
 97 |  
 98 | 	
 99 | 116, 111, 
100 | 32 };	int
101 | 		RET[5]
102 | =
103 | {10}; int putstr(  
104 | int str[	] )  { 
105 |   int
106 | 
107 |  iNd__1X ;	iNd__1X		= 0 ; while ( str
108 | 		[	iNd__1X
109 | 	] ) { 
110 | 	
111 |  putch (
112 | 
113 | str[ iNd__1X 
114 | ]  
115 | 	) ; iNd__1X
116 | =
117 | iNd__1X
118 |  	
119 |  + 1 	
120 | 
121 | ;	} return	iNd__1X
122 |  ; } int main( /* no param */ )	{
123 | putstr(
124 | __HELLO	)  ;	int i =	
125 |  0 ; /* say
126 | 
127 |  
128 | 	hello to
129 |  kemono friends 
130 | ~ */ while (  
131 |  
132 |  1 ) {
133 | 
134 | int _  
135 |  = i
136 |  
137 | / 6
138 | 	
139 | ; int __
140 | = 	
141 | i % 6
142 | 
143 | ;
144 |      
145 | 	if 
146 | ( 
147 | 
148 | _ 
149 | 
150 | !=
151 | 
152 | 
153 | 	__ )
154 | 	{ putstr(
155 | 
156 | N4__mE___
157 | 
158 | 	[ 	_	
159 |  ] ) 
160 | ; putstr(	
161 | saY_HeI10_To	) ;
162 | putstr(
163 | N4__mE___ [ 
164 | 		
165 |  
166 | 
167 | __ ]	 )
168 | 
169 | 	
170 | ;
171 | 
172 | 		putstr(	
173 | RET
174 | ) ;
175 | }
176 | /*
177 | 	 do
178 | 	
179 | 	linear
180 | modulo 
181 | to find 	the 	 next pair of friends  */ i = ( i
182 | *
183 |  	
184 | 
185 | 17
186 | 
187 | +	 23
188 | )
189 | %
190 | 
191 | 
192 | 32	 
193 | 
194 |  
195 |  ;
196 | if ( i	
197 | ==
198 | 	0	) { break ;		}
199 | 
200 |  
201 |  } return 0; }


--------------------------------------------------------------------------------
/examples/calc/README.md:
--------------------------------------------------------------------------------
 1 | # calc
 2 | 
 3 | A simple expression calculator, with a front-end built with `laps`.
 4 | 
 5 | Supporting addition, subtraction, multiplication, division, modulo and brackets.
 6 | 
 7 | ## Usage
 8 | 
 9 | Run in the repository root:
10 | 
11 | ```
12 | echo '-10 * (2 + 5) * 2 - 5.3' | cargo run --example calc --features=macros
13 | ```
14 | 
15 | The output will be:
16 | 
17 | ```
18 | -145.3
19 | ```
20 | 


--------------------------------------------------------------------------------
/examples/calc/main.rs:
--------------------------------------------------------------------------------
  1 | use laps::{ast::NonEmptySepList, prelude::*, reader::Reader, span::Result, token::TokenBuffer};
  2 | 
  3 | /// Kinds of the token.
  4 | ///
  5 | /// The tokenizer (lexer) will read user input and turn it into a stream of
  6 | /// tokens based on regular expressions.
  7 | #[token_kind]
  8 | #[derive(Tokenize)]
  9 | enum TokenKind {
 10 |   // This token will be skipped.
 11 |   #[skip(r"\s+")]
 12 |   _Skip,
 13 |   /// Floating-point number.
 14 |   #[regex(r"[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?")]
 15 |   Float(f64),
 16 |   /// Other character.
 17 |   #[regex(r".")]
 18 |   Other(char),
 19 |   /// End-of-file.
 20 |   #[eof]
 21 |   Eof,
 22 | }
 23 | 
 24 | /// Type of token.
 25 | ///
 26 | /// [`laps::token::Token`] has two fields, one is the token kind and
 27 | /// the other is the span of this token, representing the location of
 28 | /// the token in the input.
 29 | type Token = laps::token::Token<TokenKind>;
 30 | 
 31 | token_ast! {
 32 |   /// Macro for referencing ASTs corresponding to tokens.
 33 |   ///
 34 |   /// The [`token_ast`] macro defines ASTs for tokens, and automatically
 35 |   /// implements methods for parsing them.
 36 |   macro Token<TokenKind> {
 37 |     [float] => { kind: TokenKind::Float(_), prompt: "floating-point" },
 38 |     [+] => { kind: TokenKind::Other('+') },
 39 |     [-] => { kind: TokenKind::Other('-') },
 40 |     [*] => { kind: TokenKind::Other('*') },
 41 |     [/] => { kind: TokenKind::Other('/') },
 42 |     [%] => { kind: TokenKind::Other('%') },
 43 |     [lpr] => { kind: TokenKind::Other('(') },
 44 |     [rpr] => { kind: TokenKind::Other(')') },
 45 |     [eof] => { kind: TokenKind::Eof },
 46 |   }
 47 | }
 48 | 
 49 | // EBNF of arithmetic expression:
 50 | //
 51 | // Expr    ::= AddExpr EOF;
 52 | // AddExpr ::= MulExpr {AddOps MulExpr};
 53 | // AddOps  ::= "+" | "-";
 54 | // MulExpr ::= Value {MulOps Value};
 55 | // MulOps  ::= "*" | "/" | "%";
 56 | // Value   ::= FLOAT | "-" Value | "(" AddExpr ")";
 57 | //
 58 | // So we define the following ASTs, and implement there parsers by deriving
 59 | // the `Parse` trait.
 60 | 
 61 | #[derive(Parse)]
 62 | #[token(Token)]
 63 | struct Expr {
 64 |   add: AddExpr,
 65 |   _eof: Token![eof],
 66 | }
 67 | 
 68 | type AddExpr = NonEmptySepList<MulExpr, AddOps>;
 69 | 
 70 | #[derive(Parse)]
 71 | #[token(Token)]
 72 | enum AddOps {
 73 |   Add(Token![+]),
 74 |   Sub(Token![-]),
 75 | }
 76 | 
 77 | type MulExpr = NonEmptySepList<Value, MulOps>;
 78 | 
 79 | #[derive(Parse)]
 80 | #[token(Token)]
 81 | enum MulOps {
 82 |   Mul(Token![*]),
 83 |   Div(Token![/]),
 84 |   Mod(Token![%]),
 85 | }
 86 | 
 87 | #[derive(Parse)]
 88 | #[token(Token)]
 89 | enum Value {
 90 |   Num(Token![float]),
 91 |   Neg(Token![-], Box<Self>),
 92 |   Paren(Token![lpr], Box<AddExpr>, Token![rpr]),
 93 | }
 94 | 
 95 | // Some implementations for calculating the parsed expression.
 96 | 
 97 | trait Calculate {
 98 |   fn calc(&self) -> Result<f64>;
 99 | }
100 | 
101 | impl Calculate for Expr {
102 |   fn calc(&self) -> Result<f64> {
103 |     self.add.calc()
104 |   }
105 | }
106 | 
107 | impl Calculate for AddExpr {
108 |   fn calc(&self) -> Result<f64> {
109 |     match self {
110 |       Self::One(e) => e.calc(),
111 |       Self::More(l, op, r) => {
112 |         let (l, r) = (l.calc()?, r.calc()?);
113 |         Ok(match op {
114 |           AddOps::Add(_) => l + r,
115 |           AddOps::Sub(_) => l - r,
116 |         })
117 |       }
118 |     }
119 |   }
120 | }
121 | 
122 | impl Calculate for MulExpr {
123 |   fn calc(&self) -> Result<f64> {
124 |     match self {
125 |       Self::One(e) => e.calc(),
126 |       Self::More(l, op, r) => {
127 |         let (l, r) = (l.calc()?, r.calc()?);
128 |         Ok(match op {
129 |           MulOps::Mul(_) => l * r,
130 |           MulOps::Div(_) => l / r,
131 |           MulOps::Mod(_) => l % r,
132 |         })
133 |       }
134 |     }
135 |   }
136 | }
137 | 
138 | impl Calculate for Value {
139 |   fn calc(&self) -> Result<f64> {
140 |     match self {
141 |       Self::Num(num) => Ok(*num.unwrap_ref::<&f64, _>()),
142 |       Self::Neg(_, value) => Ok(-value.calc()?),
143 |       Self::Paren(_, add, _) => add.calc(),
144 |     }
145 |   }
146 | }
147 | 
148 | fn main() -> Result<()> {
149 |   // Create a reader and a lexer.
150 |   let reader = Reader::from_stdin();
151 |   let lexer = TokenKind::lexer(reader);
152 |   // Create a token buffer for parsing.
153 |   // Token buffer can temporarily hold tokens to help the parser perform
154 |   // some look-ahead operations.
155 |   let mut tokens = TokenBuffer::new(lexer);
156 |   // Parse and calculate expression, and print the result.
157 |   println!("{}", tokens.parse::<Expr>()?.calc()?);
158 |   Ok(())
159 | }
160 | 


--------------------------------------------------------------------------------
/examples/clike/README.md:
--------------------------------------------------------------------------------
 1 | # clike
 2 | 
 3 | Interpreter for a C-like programming language, with a front-end built with `laps`.
 4 | 
 5 | ## Usage
 6 | 
 7 | Run in the repository root:
 8 | 
 9 | ```
10 | cargo run --example clike --features=macros -- examples/clike/fib.c
11 | ```
12 | 
13 | Input:
14 | 
15 | ```
16 | 10
17 | ```
18 | 
19 | Output:
20 | 
21 | ```
22 | 55
23 | ```
24 | 


--------------------------------------------------------------------------------
/examples/clike/fib.c:
--------------------------------------------------------------------------------
 1 | int fib(int n) {
 2 |   if (n <= 2) {
 3 |     return 1;
 4 |   } else {
 5 |     return fib(n - 1) + fib(n - 2);
 6 |   }
 7 | }
 8 | 
 9 | int main() {
10 |   return putint(fib(getint()));
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/clike/sort.c:
--------------------------------------------------------------------------------
 1 | int arr[20];
 2 | 
 3 | int qsort(int l, int r) {
 4 |   int i = l;
 5 |   int j = r;
 6 |   int p = arr[(l + r) / 2];
 7 |   while (i <= j) {
 8 |     while (arr[i] < p) i = i + 1;
 9 |     while (arr[j] > p) j = j - 1;
10 |     if (i > j) break;
11 |     int u = arr[i];
12 |     arr[i] = arr[j];
13 |     arr[j] = u;
14 |     i = i + 1;
15 |     j = j - 1;
16 |   }
17 |   if (i < r) qsort(i, r);
18 |   if (j > l) qsort(l, j);
19 |   return 0;
20 | }
21 | 
22 | int main() {
23 |   int i = 0;
24 |   while (i < 20) {
25 |     arr[i] = getint();
26 |     i = i + 1;
27 |   }
28 |   qsort(0, 19);
29 |   i = 0;
30 |   while (i < 20) {
31 |     putint(arr[i]);
32 |     i = i + 1;
33 |   }
34 |   return 0;
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/json/README.md:
--------------------------------------------------------------------------------
 1 | # json
 2 | 
 3 | A simple JSON parser, with a front-end built with `laps`.
 4 | 
 5 | ## Usage
 6 | 
 7 | Run in the repository root:
 8 | 
 9 | ```
10 | cargo run --example json --features=macros -- examples/json/example.json
11 | ```
12 | 
13 | The structure of the parsed JSON will be printed.
14 | 


--------------------------------------------------------------------------------
/examples/json/main.rs:
--------------------------------------------------------------------------------
  1 | use laps::ast::SepSeq;
  2 | use laps::prelude::*;
  3 | use laps::reader::Reader;
  4 | use laps::return_error;
  5 | use laps::token::TokenBuffer;
  6 | use std::{collections::HashMap, env, fmt, io::Read, process, str::FromStr};
  7 | 
  8 | // ==============================
  9 | // Token definitions.
 10 | // ==============================
 11 | 
 12 | type Token = laps::token::Token<TokenKind>;
 13 | 
 14 | #[token_kind]
 15 | #[derive(Tokenize)]
 16 | enum TokenKind {
 17 |   #[skip(r"[ \r\n\t]+")]
 18 |   _Skip,
 19 |   /// Keyword.
 20 |   #[regex(r"true|false|null")]
 21 |   Keyword(Keyword),
 22 |   /// Number.
 23 |   #[regex(r"-?([0-9]|[1-9][0-9]+)(\.[0-9]+)?([Ee][+-]?[0-9]+)?")]
 24 |   Number(f64),
 25 |   /// String.
 26 |   #[regex(r#""([^\x00-\x1f"\\]|\\(["\\/bfnrt]|u[0-9a-fA-F]{4}))*""#, json_str)]
 27 |   String(String),
 28 |   /// Other character.
 29 |   #[regex(r".")]
 30 |   Other(char),
 31 |   /// End-of-file.
 32 |   #[eof]
 33 |   Eof,
 34 | }
 35 | 
 36 | #[derive(Clone, PartialEq)]
 37 | enum Keyword {
 38 |   True,
 39 |   False,
 40 |   Null,
 41 | }
 42 | 
 43 | impl FromStr for Keyword {
 44 |   type Err = ();
 45 | 
 46 |   fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
 47 |     match s {
 48 |       "true" => Ok(Self::True),
 49 |       "false" => Ok(Self::False),
 50 |       "null" => Ok(Self::Null),
 51 |       _ => Err(()),
 52 |     }
 53 |   }
 54 | }
 55 | 
 56 | impl fmt::Display for Keyword {
 57 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 58 |     match self {
 59 |       Self::True => write!(f, "true"),
 60 |       Self::False => write!(f, "false"),
 61 |       Self::Null => write!(f, "null"),
 62 |     }
 63 |   }
 64 | }
 65 | 
 66 | fn json_str(s: &str) -> Option<String> {
 67 |   let mut buf = String::new();
 68 |   let mut escape = false;
 69 |   let mut hex_num = 0;
 70 |   let mut hex = 0;
 71 |   for c in s[1..s.len() - 1].chars() {
 72 |     if escape {
 73 |       if hex_num > 0 && c.is_ascii_digit() {
 74 |         hex = hex * 16 + c.to_digit(16)?;
 75 |         hex_num -= 1;
 76 |         if hex_num == 0 {
 77 |           buf.push(char::from_u32(hex)?);
 78 |           hex = 0;
 79 |           escape = false;
 80 |         }
 81 |       } else if c == 'u' {
 82 |         hex_num = 4;
 83 |       } else {
 84 |         match c {
 85 |           '"' => buf.push('"'),
 86 |           '\\' => buf.push('\\'),
 87 |           '/' => buf.push('/'),
 88 |           'b' => buf.push('\x08'),
 89 |           'f' => buf.push('\x0c'),
 90 |           'n' => buf.push('\n'),
 91 |           'r' => buf.push('\r'),
 92 |           't' => buf.push('\t'),
 93 |           _ => return None,
 94 |         }
 95 |         escape = false;
 96 |       }
 97 |     } else {
 98 |       match c {
 99 |         '\\' => escape = true,
100 |         c => buf.push(c),
101 |       }
102 |     }
103 |   }
104 |   Some(buf)
105 | }
106 | 
107 | // ==============================
108 | // AST definitions.
109 | // ==============================
110 | 
111 | token_ast! {
112 |   macro Token<TokenKind> {
113 |     [true] => { kind: TokenKind::Keyword(Keyword::True) },
114 |     [false] => { kind: TokenKind::Keyword(Keyword::False) },
115 |     [null] => { kind: TokenKind::Keyword(Keyword::Null) },
116 |     [num] => { kind: TokenKind::Number(_), prompt: "number" },
117 |     [str] => { kind: TokenKind::String(_), prompt: "string" },
118 |     [:] => { kind: TokenKind::Other(':') },
119 |     [,] => { kind: TokenKind::Other(',') },
120 |     [lbk] => { kind: TokenKind::Other('{') },
121 |     [rbk] => { kind: TokenKind::Other('}') },
122 |     [lbc] => { kind: TokenKind::Other('[') },
123 |     [rbc] => { kind: TokenKind::Other(']') },
124 |     [eof] => { kind: TokenKind::Eof },
125 |   }
126 | }
127 | 
128 | #[derive(Parse)]
129 | #[token(Token)]
130 | struct JsonDef {
131 |   value: ValueDef,
132 |   _eof: Token![eof],
133 | }
134 | 
135 | #[derive(Parse)]
136 | #[token(Token)]
137 | enum ValueDef {
138 |   ObjectDef(ObjectDef),
139 |   ArrayDef(ArrayDef),
140 |   String(Token![str]),
141 |   Number(Token![num]),
142 |   True(Token![true]),
143 |   False(Token![false]),
144 |   Null(Token![null]),
145 | }
146 | 
147 | #[derive(Parse)]
148 | #[token(Token)]
149 | struct ObjectDef {
150 |   _lbk: Token![lbk],
151 |   members: SepSeq<Member, Token![,]>,
152 |   _rbk: Token![rbk],
153 | }
154 | 
155 | #[derive(Parse)]
156 | #[token(Token)]
157 | struct Member {
158 |   name: Token![str],
159 |   _colon: Token![:],
160 |   value: ValueDef,
161 | }
162 | 
163 | #[derive(Parse)]
164 | #[token(Token)]
165 | struct ArrayDef {
166 |   _lbc: Token![lbc],
167 |   values: SepSeq<ValueDef, Token![,]>,
168 |   _rbc: Token![rbc],
169 | }
170 | 
171 | // ==============================
172 | // Converter.
173 | // ==============================
174 | 
175 | #[derive(Debug)]
176 | enum Value {
177 |   Object(HashMap<String, Value>),
178 |   Array(Vec<Value>),
179 |   String(String),
180 |   Number(f64),
181 |   Bool(bool),
182 |   Null,
183 | }
184 | 
185 | impl From<JsonDef> for Value {
186 |   fn from(json: JsonDef) -> Self {
187 |     json.value.into()
188 |   }
189 | }
190 | 
191 | impl From<ValueDef> for Value {
192 |   fn from(value: ValueDef) -> Self {
193 |     match value {
194 |       ValueDef::ObjectDef(obj) => obj.into(),
195 |       ValueDef::ArrayDef(arr) => arr.into(),
196 |       ValueDef::String(s) => Self::String(s.unwrap()),
197 |       ValueDef::Number(n) => Self::Number(n.unwrap()),
198 |       ValueDef::True(_) => Self::Bool(true),
199 |       ValueDef::False(_) => Self::Bool(false),
200 |       ValueDef::Null(_) => Self::Null,
201 |     }
202 |   }
203 | }
204 | 
205 | impl From<ObjectDef> for Value {
206 |   fn from(obj: ObjectDef) -> Self {
207 |     Self::Object(
208 |       obj
209 |         .members
210 |         .into_iter()
211 |         .map(|Member { name, value, .. }| (name.unwrap(), value.into()))
212 |         .collect(),
213 |     )
214 |   }
215 | }
216 | 
217 | impl From<ArrayDef> for Value {
218 |   fn from(arr: ArrayDef) -> Self {
219 |     Self::Array(arr.values.into_iter().map(From::from).collect())
220 |   }
221 | }
222 | 
223 | fn main() {
224 |   let mut args = env::args();
225 |   args.next();
226 |   match args.next() {
227 |     Some(path) => parse_and_dump(Reader::from_path(path).expect("invalid path")),
228 |     None => parse_and_dump(Reader::from_stdin()),
229 |   }
230 | }
231 | 
232 | fn parse_and_dump<T>(reader: Reader<T>)
233 | where
234 |   T: Read,
235 | {
236 |   let span = reader.span().clone();
237 |   let lexer = TokenKind::lexer(reader);
238 |   let mut tokens = TokenBuffer::new(lexer);
239 |   if let Ok(json) = tokens.parse::<JsonDef>() {
240 |     let value = Value::from(json);
241 |     println!("{value:#?}");
242 |   } else {
243 |     span.log_summary();
244 |     process::exit(span.error_num() as i32);
245 |   }
246 | }
247 | 


--------------------------------------------------------------------------------
/examples/sexp/README.md:
--------------------------------------------------------------------------------
 1 | # sexp
 2 | 
 3 | A [S-expression](https://en.wikipedia.org/wiki/S-expression) parser built with `laps`.
 4 | 
 5 | ## Usage
 6 | 
 7 | Run in the repository root:
 8 | 
 9 | ```
10 | cat examples/sexp/example.sexp | cargo run --example sexp --features=macros
11 | ```
12 | 
13 | The structure of the parsed S-expression AST will be printed.
14 | 


--------------------------------------------------------------------------------
/examples/sexp/example.sexp:
--------------------------------------------------------------------------------
 1 | atom
 2 | 
 3 | ()
 4 | 
 5 | (() (()))
 6 | 
 7 | (The (quick (brown fox))
 8 |  jumps over ((the) lazy) dog.)
 9 | 
10 | (defun factorial (x)
11 |   (if (zerop x)
12 |     1
13 |     (* x (factorial (- x 1)))))
14 | 


--------------------------------------------------------------------------------
/examples/sexp/main.rs:
--------------------------------------------------------------------------------
 1 | use laps::{prelude::*, reader::Reader, span::Result, token::TokenBuffer};
 2 | 
 3 | /// Kinds of the token.
 4 | ///
 5 | /// The tokenizer (lexer) will read user input and turn it into a stream of
 6 | /// tokens based on regular expressions.
 7 | #[token_kind]
 8 | #[derive(Debug, Tokenize)]
 9 | enum TokenKind {
10 |   // This token will be skipped.
11 |   #[skip(r"\s+")]
12 |   _Skip,
13 |   /// Parentheses.
14 |   #[regex(r"[()]")]
15 |   Paren(char),
16 |   /// Atom.
17 |   #[regex(r"[^\s()]+")]
18 |   Atom(String),
19 |   /// End-of-file.
20 |   #[eof]
21 |   Eof,
22 | }
23 | 
24 | /// Type of token.
25 | ///
26 | /// [`laps::token::Token`] has two fields, one is the token kind and
27 | /// the other is the span of this token, representing the location of
28 | /// the token in the input.
29 | type Token = laps::token::Token<TokenKind>;
30 | 
31 | token_ast! {
32 |   /// Macro for referencing ASTs corresponding to tokens.
33 |   ///
34 |   /// The [`token_ast`] macro defines ASTs for tokens, and automatically
35 |   /// implements methods for parsing them.
36 |   #[derive(Clone, Debug, PartialEq)]
37 |   macro Token<TokenKind> {
38 |     [atom] => { kind: TokenKind::Atom(_), prompt: "atom" },
39 |     [lpr] => { kind: TokenKind::Paren('(') },
40 |     [rpr] => { kind: TokenKind::Paren(')') },
41 |     [eof] => { kind: TokenKind::Eof },
42 |   }
43 | }
44 | 
45 | // EBNF of S-expression:
46 | //
47 | // Statement ::= Elem | EOF;
48 | // SExp      ::= "(" {Elem} ")";
49 | // Elem      ::= ATOM | SExp;
50 | //
51 | // So we define the following ASTs, and implement there parsers by deriving
52 | // the `Parse` trait.
53 | 
54 | #[derive(Parse, Debug)]
55 | #[token(Token)]
56 | enum Statement {
57 |   Elem(Elem),
58 |   End(Token![eof]),
59 | }
60 | 
61 | #[derive(Parse, Debug)]
62 | #[token(Token)]
63 | struct SExp(Token![lpr], Vec<Elem>, Token![rpr]);
64 | 
65 | #[derive(Parse, Debug)]
66 | #[token(Token)]
67 | enum Elem {
68 |   Atom(Token![atom]),
69 |   SExp(SExp),
70 | }
71 | 
72 | fn main() -> Result<()> {
73 |   // Create a reader and a lexer.
74 |   let reader = Reader::from_stdin();
75 |   let lexer = TokenKind::lexer(reader);
76 |   // Create a token buffer for parsing.
77 |   // Token buffer can temporarily hold tokens to help the parser perform
78 |   // some look-ahead operations.
79 |   let mut tokens = TokenBuffer::new(lexer);
80 |   // Parse S-expressions and print them until the end of the input.
81 |   loop {
82 |     match tokens.parse::<Statement>()? {
83 |       Statement::End(_) => break Ok(()),
84 |       stmt => println!("{stmt:#?}"),
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/laps_macros/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "laps_macros"
 3 | version = "0.1.5"
 4 | authors = ["MaxXing <x@MaxXSoft.net>"]
 5 | edition = "2021"
 6 | description = "Macros for crate `laps`."
 7 | repository = "https://github.com/MaxXSoft/laps"
 8 | documentation = "https://docs.rs/laps_macros"
 9 | categories = ["parsing", "development-tools::procedural-macro-helpers"]
10 | keywords = ["laps", "parser", "lexer", "derive", "proc_macro"]
11 | readme = "README.md"
12 | license = "MIT OR Apache-2.0"
13 | 
14 | [lib]
15 | proc-macro = true
16 | 
17 | [dependencies]
18 | proc-macro2 = "1.0"
19 | quote = "1.0"
20 | syn = { version = "2.0", features = ["full"] }
21 | laps_regex = { path = "../laps_regex", version = "0.1.1" }
22 | 


--------------------------------------------------------------------------------
/laps_macros/README.md:
--------------------------------------------------------------------------------
1 | # laps_macros
2 | 
3 | Macros for crate [`laps`](https://crates.io/crates/laps), including derive macros and other helper macros.
4 | 
5 | ## License
6 | 
7 | Copyright (C) 2022-2023 MaxXing. Licensed under either of Apache 2.0 or MIT at your option.
8 | 


--------------------------------------------------------------------------------
/laps_macros/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Macros for crate [`laps`](https://crates.io/crates/laps),
  2 | //! including derive macros and other helper macros.
  3 | 
  4 | mod parse;
  5 | mod spanned;
  6 | mod token_ast;
  7 | mod token_kind;
  8 | mod tokenize;
  9 | mod utils;
 10 | 
 11 | use proc_macro::TokenStream;
 12 | use utils::result_to_tokens;
 13 | 
 14 | /// Generates the `Parse` trait implementation.
 15 | ///
 16 | /// # Helper Attributes
 17 | ///
 18 | /// * `#[token(type)]`: implements `Parse` trait for token streams that
 19 | ///   produce tokens of the given type.
 20 | /// * `#[starts_with(token_ast0, token_ast1, ...)]`: specifies which tokens
 21 | ///   the current AST may start with. This will affect the implementation of
 22 | ///   method `maybe` of the `Parse` trait.
 23 | #[proc_macro_derive(Parse, attributes(token, starts_with))]
 24 | pub fn derive_parse(item: TokenStream) -> TokenStream {
 25 |   result_to_tokens!(parse::derive_parse(item))
 26 | }
 27 | 
 28 | /// Generates the `Spanned` trait implementation.
 29 | ///
 30 | /// # `#[try_span]`
 31 | ///
 32 | /// Tells the macro that a field implements the `TrySpan` trait.
 33 | /// This may be helpful when:
 34 | ///
 35 | /// ```
 36 | /// # use laps_macros::Spanned;
 37 | /// # mod laps {
 38 | /// #   pub mod span {
 39 | /// #     pub type Result<T> = std::result::Result<T, ()>;
 40 | /// #     pub struct Span;
 41 | /// #     impl Span {
 42 | /// #       pub fn into_end_updated(self, span: Self) -> Self { todo!() }
 43 | /// #     }
 44 | /// #     pub trait Spanned {
 45 | /// #       fn span(&self) -> Span;
 46 | /// #     }
 47 | /// #     pub trait TrySpan {
 48 | /// #       fn try_span(&self) -> Option<Span>;
 49 | /// #     }
 50 | /// #     impl<T> TrySpan for T where T: Spanned {
 51 | /// #       fn try_span(&self) -> Option<Span> { todo!() }
 52 | /// #     }
 53 | /// #     impl<T> TrySpan for Option<T> where T: TrySpan {
 54 | /// #       fn try_span(&self) -> Option<Span> { todo!() }
 55 | /// #     }
 56 | /// #   }
 57 | /// # }
 58 | /// # struct Atom;
 59 | /// # impl laps::span::Spanned for Atom {
 60 | /// #   fn span(&self) -> laps::span::Span { todo!() }
 61 | /// # }
 62 | /// # type ReturnKeyword = Atom;
 63 | /// # type Value = Atom;
 64 | /// #[derive(Spanned)]
 65 | /// struct Return {
 66 | ///   ret: ReturnKeyword,
 67 | ///   #[try_span]
 68 | ///   value: Option<Value>,
 69 | /// }
 70 | /// ```
 71 | ///
 72 | /// The following deriving fails to compile:
 73 | ///
 74 | /// ```compile_fail
 75 | /// # use laps_macros::Spanned;
 76 | /// # mod laps {
 77 | /// #   pub mod span {
 78 | /// #     pub type Result<T> = std::result::Result<T, ()>;
 79 | /// #     pub struct Span;
 80 | /// #     impl Span {
 81 | /// #       pub fn into_end_updated(self, span: Self) -> Self { todo!() }
 82 | /// #     }
 83 | /// #     pub trait Spanned {
 84 | /// #       fn span(&self) -> Span;
 85 | /// #     }
 86 | /// #     pub trait TrySpan {
 87 | /// #       fn try_span(&self) -> Option<Span>;
 88 | /// #     }
 89 | /// #     impl<T> TrySpan for T where T: Spanned {
 90 | /// #       fn try_span(&self) -> Option<Span> { todo!() }
 91 | /// #     }
 92 | /// #     impl<T> TrySpan for Option<T> where T: TrySpan {
 93 | /// #       fn try_span(&self) -> Option<Span> { todo!() }
 94 | /// #     }
 95 | /// #   }
 96 | /// # }
 97 | /// # struct Atom;
 98 | /// # impl laps::span::Spanned for Atom {
 99 | /// #   fn span(&self) -> laps::span::Span { todo!() }
100 | /// # }
101 | /// # type ReturnKeyword = Atom;
102 | /// # type Value = Atom;
103 | /// #[derive(Spanned)]
104 | /// struct Return {
105 | ///   ret: ReturnKeyword,
106 | ///   value: Option<Value>,
107 | /// }
108 | /// ```
109 | #[proc_macro_derive(Spanned, attributes(try_span))]
110 | pub fn derive_spanned(item: TokenStream) -> TokenStream {
111 |   result_to_tokens!(spanned::derive_spanned(item))
112 | }
113 | 
114 | /// Generates the `Tokenize` trait implementation for token kinds. This macro
115 | /// can only be applied to `enum`s.
116 | ///
117 | /// # Helper Attributes
118 | ///
119 | /// * `#[char_type(type)]`: optional, specifies `CharType` of `Tokenize` trait.
120 | ///   Defaults to [`char`], and can only be [`char`] or [`u8`].
121 | /// * `#[enable_par(true/false)]`: optional, set to `true` to generate lexer in
122 | ///   parallel, `false` to disable parallelization. Defaults to automatic
123 | ///   selection.
124 | /// * `#[regex(regex [, parser])]`: marks a enum variant can be matched by the
125 | ///   given regular expression. The `parser` parameter is optional, which is a
126 | ///   function that converts a <code>&[str]</code> (`char_type` = [`char`]) or
127 | ///   a <code>&[[u8]]</code> (`char_type` = [`u8`]) to [`Option<T>`], which `T`
128 | ///   is type of the tuple field of this variant. If `parser` is omitted,
129 | ///   [`std::str::FromStr`] will be called.
130 | /// * `#[skip(regex)]`: marks a enum variant can be matched by the
131 | ///   given regular expression, and it should be skipped.
132 | /// * `#[eof]`: marks a enum variant should be returned when the tokenizer
133 | ///   encountered end-of-file.
134 | ///
135 | /// # Notes
136 | ///
137 | /// The variants that appear first will be matched first.
138 | #[proc_macro_derive(Tokenize, attributes(char_type, enable_par, regex, skip, eof))]
139 | pub fn derive_tokenize(item: TokenStream) -> TokenStream {
140 |   result_to_tokens!(tokenize::derive_tokenize(item))
141 | }
142 | 
143 | /// Implements [`From`], [`TryFrom`] and [`Display`](std::fmt::Display)
144 | /// trait for token kind enums.
145 | ///
146 | /// The [`From`] and [`TryFrom`] trait will only be implemented for variants
147 | /// with single unnamed field.
148 | ///
149 | /// # Examples
150 | ///
151 | /// ```
152 | /// # use laps_macros::token_kind;
153 | /// #[token_kind]
154 | /// enum TokenKind {
155 | ///   /// String literal.
156 | ///   Str(String),
157 | ///   /// Integer literal.
158 | ///   Int(i32),
159 | ///   /// End-of-file.
160 | ///   Eof,
161 | /// }
162 | /// ```
163 | ///
164 | /// will be expanded to:
165 | ///
166 | /// ```
167 | /// #[derive(Clone, PartialEq)]
168 | /// enum TokenKind {
169 | ///   // ...
170 | /// # Str(String),
171 | /// # Int(i32),
172 | /// # Eof,
173 | /// }
174 | ///
175 | /// impl From<String> for TokenKind {
176 | ///   fn from(s: String) -> Self {
177 | ///     Self::Str(s)
178 | ///   }
179 | /// }
180 | ///
181 | /// impl TryFrom<TokenKind> for String {
182 | ///   type Error = ();
183 | ///   fn try_from(kind: TokenKind) -> Result<Self, Self::Error> {
184 | ///     match kind {
185 | ///       TokenKind::Str(s) => Ok(s),
186 | ///       _ => Err(()),
187 | ///     }
188 | ///   }
189 | /// }
190 | ///
191 | /// impl<'a> TryFrom<&'a TokenKind> for &'a String {
192 | ///   type Error = ();
193 | ///   fn try_from(kind: &'a TokenKind) -> Result<Self, Self::Error> {
194 | ///     match kind {
195 | ///       TokenKind::Str(s) => Ok(s),
196 | ///       _ => Err(()),
197 | ///     }
198 | ///   }
199 | /// }
200 | ///
201 | /// // Same for `TokenKind::Int`.
202 | /// // ...
203 | ///
204 | /// impl std::fmt::Display for TokenKind {
205 | ///   fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
206 | ///     match self {
207 | ///       Self::Str(s) => write!(f, "string literal `{s}`"),
208 | ///       Self::Int(i) => write!(f, "integer literal `{i}`"),
209 | ///       Self::Eof => write!(f, "end-of-file"),
210 | ///     }
211 | ///   }
212 | /// }
213 | /// ```
214 | #[proc_macro_attribute]
215 | pub fn token_kind(attr: TokenStream, item: TokenStream) -> TokenStream {
216 |   result_to_tokens!(token_kind::token_kind(attr, item))
217 | }
218 | 
219 | /// Generates ASTs for tokens, also generates a macro
220 | /// for referencing AST types.
221 | ///
222 | /// The generated ASTs can be parsed from token stream that produces
223 | /// `laps::tokens::Token` with the given type as its kind.
224 | ///
225 | /// # Examples
226 | ///
227 | /// ```
228 | /// # use laps_macros::token_ast;
229 | /// # mod laps {
230 | /// #   pub mod span {
231 | /// #     pub type Result<T> = std::result::Result<T, ()>;
232 | /// #     pub struct Span;
233 | /// #     pub trait Spanned {
234 | /// #       fn span(&self) -> Span;
235 | /// #     }
236 | /// #   }
237 | /// #   pub mod token {
238 | /// #     #[derive(Clone, Debug, PartialEq, Eq, Hash)]
239 | /// #     pub struct Token<Kind> {
240 | /// #       pub kind: Kind,
241 | /// #       pub span: (),
242 | /// #     }
243 | /// #     impl<Kind> super::span::Spanned for Token<Kind> {
244 | /// #       fn span(&self) -> super::span::Span { super::span::Span }
245 | /// #     }
246 | /// #     impl<Kind> AsRef<Kind> for Token<Kind> {
247 | /// #       fn as_ref(&self) -> &Kind {
248 | /// #         &self.kind
249 | /// #       }
250 | /// #     }
251 | /// #     pub trait TokenStream {
252 | /// #       type Token;
253 | /// #       fn next_token(&mut self) -> super::span::Result<Self::Token>;
254 | /// #       fn peek(&mut self) -> super::span::Result<Self::Token>;
255 | /// #       fn expect<T>(&mut self, _: T) -> super::span::Result<Self::Token>;
256 | /// #     }
257 | /// #   }
258 | /// #   pub mod parse {
259 | /// #     pub trait Parse<TS>: Sized {
260 | /// #       fn parse(_: &mut TS) -> super::span::Result<Self>;
261 | /// #       fn maybe(_: &mut TS) -> super::span::Result<bool>;
262 | /// #     }
263 | /// #   }
264 | /// #   macro_rules! return_error {
265 | /// #     ($span:expr, $($arg:tt)+) => {
266 | /// #       return Err(())
267 | /// #     };
268 | /// #   }
269 | /// #   pub(crate) use return_error;
270 | /// # }
271 | /// # fn main() {}
272 | /// #[derive(Clone, Debug, PartialEq, Eq, Hash)]
273 | /// enum TokenKind {
274 | ///   /// String literal.
275 | ///   Str(String),
276 | ///   /// Integer literal.
277 | ///   Int(i32),
278 | ///   /// Other character.
279 | ///   Other(char),
280 | ///   /// End-of-file.
281 | ///   Eof,
282 | /// }
283 | ///
284 | /// // Declare ASTs and there name, define macro `Token` for referencing ASTs.
285 | /// // You can use `Token![..]` to represent the generated ASTs,
286 | /// // such as `Token![str]`, `Token![+]`, ...
287 | /// // All of the generated ASTs are single-field structures, you can access
288 | /// // the inner token by using `ast.0`.
289 | /// token_ast! {
290 | ///   // optional, all derives will be applied to the generated AST structures.
291 | ///   #[derive(Debug, PartialEq)]
292 | ///   pub(crate) macro Token</* specify the token kind type here */ TokenKind> {
293 | ///     // pattern, and prompt for error messages
294 | ///     [str] => { kind: TokenKind::Str(_), prompt: "string literal" },
295 | ///     [int] => { kind: TokenKind::Int(_), prompt: "integer literal" },
296 | ///     [0] => { kind: TokenKind::Int(i) if *i == 0, prompt: "zero" },
297 | ///     // use default prompt of the token kind
298 | ///     [+] => { kind: TokenKind::Other('+') },
299 | ///     [-] => { kind: TokenKind::Other('-') },
300 | ///     [*] => { kind: TokenKind::Other('*') },
301 | ///     [/] => { kind: TokenKind::Other('/') },
302 | ///     [eof] => { kind: TokenKind::Eof },
303 | ///   }
304 | /// }
305 | /// ```
306 | #[proc_macro]
307 | pub fn token_ast(item: TokenStream) -> TokenStream {
308 |   result_to_tokens!(token_ast::token_ast(item))
309 | }
310 | 


--------------------------------------------------------------------------------
/laps_macros/src/parse.rs:
--------------------------------------------------------------------------------
  1 | use crate::utils::{ident, match_attr, return_error};
  2 | use proc_macro::TokenStream;
  3 | use proc_macro2::{Ident, TokenStream as TokenStream2};
  4 | use quote::{quote, ToTokens, TokenStreamExt};
  5 | use std::iter;
  6 | use syn::{
  7 |   parse::Parser, punctuated::Punctuated, AttrStyle, Attribute, Data, DataEnum, DataStruct,
  8 |   DeriveInput, Expr, Field, Fields, GenericParam, Generics, Path, PredicateType, Result, Token,
  9 |   Type, TypePath, WhereClause, WherePredicate,
 10 | };
 11 | 
 12 | /// Entry function of `#[derive(Parse)]`.
 13 | pub fn derive_parse(item: TokenStream) -> Result<TokenStream> {
 14 |   // parse input tokens and check
 15 |   let input: DeriveInput = syn::parse(item)?;
 16 |   if !matches!(&input.data, Data::Struct(_) | Data::Enum(_)) {
 17 |     return_error!("`#[derive(Parse)]` only supports structs and enums");
 18 |   }
 19 |   // parse attributes
 20 |   let token = parse_token(&input.attrs)?;
 21 |   let starts_with = parse_starts_with(&input.attrs)?;
 22 |   // get generic related stuffs
 23 |   let ts_type = ident("__LAPS_MACROS_TS");
 24 |   let (_, ty_generics, where_clause) = input.generics.split_for_impl();
 25 |   let impl_generics = gen_impl_generics(&input.generics, &ts_type);
 26 |   let where_clause = gen_where_clause(&ts_type, token, where_clause)?;
 27 |   // get method implementations
 28 |   let (parse, maybe) = match &input.data {
 29 |     Data::Struct(s) => gen_struct_methods(s, &ts_type, starts_with),
 30 |     Data::Enum(e) => gen_enum_methods(e, &ts_type, starts_with),
 31 |     _ => unreachable!(),
 32 |   }?;
 33 |   // generate implementations
 34 |   let name = input.ident;
 35 |   Ok(TokenStream::from(quote! {
 36 |     impl #impl_generics laps::parse::Parse<#ts_type>
 37 |     for #name #ty_generics #where_clause {
 38 |       #parse
 39 |       #maybe
 40 |     }
 41 |   }))
 42 | }
 43 | 
 44 | /// Parses attribute `#[token(...)]`.
 45 | fn parse_token(attrs: &Vec<Attribute>) -> Result<Option<Path>> {
 46 |   let mut token = None;
 47 |   match_attr! {
 48 |     for meta in attrs if "token" && token.is_none() => {
 49 |       token = Some(syn::parse2(meta.tokens.clone())?);
 50 |     }
 51 |   }
 52 |   Ok(token)
 53 | }
 54 | 
 55 | /// Parses attribute `#[starts_with(...)]`.
 56 | fn parse_starts_with(attrs: &Vec<Attribute>) -> Result<Vec<Expr>> {
 57 |   let mut starts_with = Vec::new();
 58 |   match_attr! {
 59 |     for meta in attrs if "starts_with" && starts_with.is_empty() => {
 60 |       let exprs: Punctuated<Expr, Token![,]> = Punctuated::parse_separated_nonempty.parse2(meta.tokens.clone())?;
 61 |       starts_with = exprs.into_iter().collect();
 62 |     }
 63 |   }
 64 |   Ok(starts_with)
 65 | }
 66 | 
 67 | /// Generates `impl` generics.
 68 | fn gen_impl_generics(generics: &Generics, ts_type: &Ident) -> TokenStream2 {
 69 |   let mut tokens = TokenStream2::new();
 70 |   <Token![<]>::default().to_tokens(&mut tokens);
 71 |   // generate lifetimes
 72 |   for param in &generics.params {
 73 |     if let GenericParam::Lifetime(_) = param {
 74 |       param.to_tokens(&mut tokens);
 75 |       <Token![,]>::default().to_tokens(&mut tokens);
 76 |     }
 77 |   }
 78 |   // generate other parameters
 79 |   let is_outer = |attr: &&Attribute| matches!(attr.style, AttrStyle::Outer);
 80 |   for param in &generics.params {
 81 |     match param {
 82 |       GenericParam::Lifetime(_) => continue,
 83 |       GenericParam::Type(param) => {
 84 |         tokens.append_all(param.attrs.iter().filter(is_outer));
 85 |         param.ident.to_tokens(&mut tokens);
 86 |         if !param.bounds.is_empty() {
 87 |           <Token![:]>::default().to_tokens(&mut tokens);
 88 |           param.bounds.to_tokens(&mut tokens);
 89 |         }
 90 |       }
 91 |       GenericParam::Const(param) => {
 92 |         tokens.append_all(param.attrs.iter().filter(is_outer));
 93 |         param.const_token.to_tokens(&mut tokens);
 94 |         param.ident.to_tokens(&mut tokens);
 95 |         param.colon_token.to_tokens(&mut tokens);
 96 |         param.ty.to_tokens(&mut tokens);
 97 |       }
 98 |     }
 99 |     <Token![,]>::default().to_tokens(&mut tokens);
100 |   }
101 |   // generate token stream type name
102 |   ts_type.to_tokens(&mut tokens);
103 |   <Token![>]>::default().to_tokens(&mut tokens);
104 |   tokens
105 | }
106 | 
107 | /// Generates `where` clause.
108 | fn gen_where_clause(
109 |   ts_type: &Ident,
110 |   token: Option<Path>,
111 |   where_clause: Option<&WhereClause>,
112 | ) -> Result<WhereClause> {
113 |   // `TokenStream` trait bound
114 |   let mut ts_trait = Punctuated::new();
115 |   let ts_trait_tokens = match token {
116 |     Some(token) => quote!(laps::token::TokenStream<Token = #token>),
117 |     None => quote!(laps::token::TokenStream),
118 |   };
119 |   ts_trait.push(syn::parse2(ts_trait_tokens).unwrap());
120 |   // generate where predicates for token stream type
121 |   let param_ty = Type::Path(TypePath {
122 |     qself: None,
123 |     path: ts_type.clone().into(),
124 |   });
125 |   let pred = WherePredicate::Type(PredicateType {
126 |     lifetimes: None,
127 |     bounded_ty: param_ty,
128 |     colon_token: Default::default(),
129 |     bounds: ts_trait,
130 |   });
131 |   // create where clause
132 |   let mut predicates = Punctuated::new();
133 |   if let Some(wc) = where_clause {
134 |     predicates.extend(wc.predicates.iter().cloned());
135 |   }
136 |   predicates.push(pred);
137 |   Ok(WhereClause {
138 |     where_token: Default::default(),
139 |     predicates,
140 |   })
141 | }
142 | 
143 | /// Generates trait methods for the given struct data.
144 | fn gen_struct_methods(
145 |   data: &DataStruct,
146 |   ts_type: &Ident,
147 |   starts_with: Vec<Expr>,
148 | ) -> Result<(TokenStream2, TokenStream2)> {
149 |   // generate `parse` method
150 |   let constructor = gen_constructor(&data.fields);
151 |   let parse = quote! {
152 |     fn parse(tokens: &mut #ts_type) -> laps::span::Result<Self> {
153 |       std::result::Result::Ok(Self #constructor)
154 |     }
155 |   };
156 |   // generate `maybe` method
157 |   let result = if !starts_with.is_empty() {
158 |     gen_maybe(starts_with)
159 |   } else if let Some(Field { ty, .. }) = first_field(&data.fields) {
160 |     quote!(<#ty>::maybe(tokens))
161 |   } else {
162 |     quote!(std::result::Result::Ok(true))
163 |   };
164 |   let maybe = quote! {
165 |     fn maybe(tokens: &mut #ts_type) -> laps::span::Result<bool> {
166 |       #result
167 |     }
168 |   };
169 |   Ok((parse, maybe))
170 | }
171 | 
172 | /// Generates trait methods for the given enum data.
173 | fn gen_enum_methods(
174 |   data: &DataEnum,
175 |   ts_type: &Ident,
176 |   starts_with: Vec<Expr>,
177 | ) -> Result<(TokenStream2, TokenStream2)> {
178 |   // generate `parse` method
179 |   let mut branches = TokenStream2::new();
180 |   for (i, variant) in data.variants.iter().enumerate() {
181 |     if i != 0 {
182 |       <Token![else]>::default().to_tokens(&mut branches);
183 |     }
184 |     if i != data.variants.len() - 1 {
185 |       <Token![if]>::default().to_tokens(&mut branches);
186 |       branches.append_all(match first_field(&variant.fields) {
187 |         Some(Field { ty, .. }) => quote!(<#ty>::maybe(tokens)?),
188 |         None => quote!(true),
189 |       });
190 |     }
191 |     let ident = &variant.ident;
192 |     let constructor = gen_constructor(&variant.fields);
193 |     branches.append_all(quote!({ Self::#ident #constructor }));
194 |   }
195 |   let parse = quote! {
196 |     fn parse(tokens: &mut #ts_type) -> laps::span::Result<Self> {
197 |       std::result::Result::Ok(#branches)
198 |     }
199 |   };
200 |   // generate `maybe` method
201 |   let result = if !starts_with.is_empty() {
202 |     gen_maybe(starts_with)
203 |   } else if data.variants.is_empty() {
204 |     quote!(std::result::Result::Ok(true))
205 |   } else {
206 |     let mut tokens = TokenStream2::new();
207 |     for (i, variant) in data.variants.iter().enumerate() {
208 |       if i != 0 {
209 |         <Token![||]>::default().to_tokens(&mut tokens);
210 |       }
211 |       tokens.append_all(match first_field(&variant.fields) {
212 |         Some(Field { ty, .. }) => quote!(<#ty>::maybe(tokens)?),
213 |         None => quote!(true),
214 |       });
215 |     }
216 |     quote!(std::result::Result::Ok(#tokens))
217 |   };
218 |   let maybe = quote! {
219 |     fn maybe(tokens: &mut #ts_type) -> laps::span::Result<bool> {
220 |       #result
221 |     }
222 |   };
223 |   Ok((parse, maybe))
224 | }
225 | 
226 | /// Generates the constructor for the given fields.
227 | fn gen_constructor(fields: &Fields) -> TokenStream2 {
228 |   match fields {
229 |     Fields::Named(f) => {
230 |       let fields = f
231 |         .named
232 |         .iter()
233 |         .map(|Field { ident, .. }| quote!(#ident: tokens.parse()?,));
234 |       quote!({#(#fields)*})
235 |     }
236 |     Fields::Unnamed(f) => {
237 |       let fields = iter::repeat(quote!(tokens.parse()?,)).take(f.unnamed.len());
238 |       quote!((#(#fields)*))
239 |     }
240 |     Fields::Unit => quote!(),
241 |   }
242 | }
243 | 
244 | /// Generates the body of the `maybe` method by the given tokens.
245 | fn gen_maybe(starts_with: Vec<Expr>) -> TokenStream2 {
246 |   let maybe_chain: TokenStream2 = starts_with
247 |     .into_iter()
248 |     .flat_map(|expr| quote!(.maybe(#expr)?))
249 |     .collect();
250 |   quote!(tokens.lookahead()#maybe_chain.result())
251 | }
252 | 
253 | /// Returns the first field of the given fields.
254 | fn first_field(fields: &Fields) -> Option<&Field> {
255 |   match fields {
256 |     Fields::Named(f) => f.named.first(),
257 |     Fields::Unnamed(f) => f.unnamed.first(),
258 |     Fields::Unit => None,
259 |   }
260 | }
261 | 


--------------------------------------------------------------------------------
/laps_macros/src/spanned.rs:
--------------------------------------------------------------------------------
  1 | use crate::utils::{error, return_error};
  2 | use proc_macro::TokenStream;
  3 | use proc_macro2::{Ident, Literal, TokenStream as TokenStream2};
  4 | use quote::quote;
  5 | use syn::{
  6 |   punctuated::Punctuated, spanned::Spanned, Attribute, Data, DataEnum, DataStruct, DeriveInput,
  7 |   Field, Fields, Meta, Result, Token, Variant,
  8 | };
  9 | 
 10 | /// Entry function of `#[derive(Spanned)]`.
 11 | pub fn derive_spanned(item: TokenStream) -> Result<TokenStream> {
 12 |   // parse input tokens
 13 |   let input: DeriveInput = syn::parse(item)?;
 14 |   // generate trait implementation
 15 |   let name = &input.ident;
 16 |   let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
 17 |   let body = match &input.data {
 18 |     Data::Struct(DataStruct {
 19 |       fields: Fields::Named(f),
 20 |       ..
 21 |     }) if !f.named.is_empty() => gen_struct_body(&f.named)?,
 22 |     Data::Struct(DataStruct {
 23 |       fields: Fields::Unnamed(f),
 24 |       ..
 25 |     }) if !f.unnamed.is_empty() => gen_struct_body(&f.unnamed)?,
 26 |     Data::Enum(DataEnum { variants, .. }) if !variants.is_empty() => gen_enum_body(variants)?,
 27 |     _ => {
 28 |       return_error!("`#[derive(Spanned)]` only supports non-unit and non-empty structs and enums");
 29 |     }
 30 |   };
 31 |   Ok(TokenStream::from(quote! {
 32 |     impl #impl_generics laps::span::Spanned
 33 |     for #name #ty_generics #where_clause {
 34 |       fn span(&self) -> laps::span::Span {
 35 |         use laps::span::TrySpan;
 36 |         #body
 37 |       }
 38 |     }
 39 |   }))
 40 | }
 41 | 
 42 | /// Generates body of the `span` method for struct fields.
 43 | fn gen_struct_body(fields: &Punctuated<Field, Token![,]>) -> Result<TokenStream2> {
 44 |   let arm = gen_fields_span(quote!(Self), fields)?;
 45 |   Ok(quote!(match self { #arm }))
 46 | }
 47 | 
 48 | /// Generates body of the `span` method for enum variants.
 49 | fn gen_enum_body(variants: &Punctuated<Variant, Token![,]>) -> Result<TokenStream2> {
 50 |   let mut arms = TokenStream2::new();
 51 |   for variant in variants {
 52 |     let name = &variant.ident;
 53 |     let name = quote!(Self::#name);
 54 |     let arm = match &variant.fields {
 55 |       Fields::Named(f) if !f.named.is_empty() => gen_fields_span(name, &f.named)?,
 56 |       Fields::Unnamed(f) if !f.unnamed.is_empty() => gen_fields_span(name, &f.unnamed)?,
 57 |       _ => return_error!(
 58 |         variant.span(),
 59 |         "`#[derive(Spanned)]` only supports non-unit and non-empty variants in enums"
 60 |       ),
 61 |     };
 62 |     arms.extend(arm);
 63 |   }
 64 |   Ok(quote!(match self { #arms }))
 65 | }
 66 | 
 67 | /// Generates span of the given fields.
 68 | fn gen_fields_span(
 69 |   name: TokenStream2,
 70 |   fields: &Punctuated<Field, Token![,]>,
 71 | ) -> Result<TokenStream2> {
 72 |   let (exts, ts_ids) = gen_fields_extract(name, fields)?;
 73 |   let first = gen_first_span(ts_ids.iter())?;
 74 |   let last = gen_first_span(ts_ids.iter().rev())?;
 75 |   Ok(quote!(#exts => #first.into_end_updated(#last),))
 76 | }
 77 | 
 78 | /// Generates the extraction of the given fields.
 79 | fn gen_fields_extract(
 80 |   name: TokenStream2,
 81 |   fields: &Punctuated<Field, Token![,]>,
 82 | ) -> Result<(TokenStream2, Vec<(bool, Ident)>)> {
 83 |   let mut exts = TokenStream2::new();
 84 |   let mut ts_ids = Vec::new();
 85 |   for (i, field) in fields.iter().enumerate() {
 86 |     let ts = has_try_span(&field.attrs)?;
 87 |     let span = field.span();
 88 |     let (ext, ts_id) = if let Some(id) = &field.ident {
 89 |       let new_id = Ident::new(&format!("_{id}"), span);
 90 |       (quote!(#id: #new_id,), (ts, new_id))
 91 |     } else {
 92 |       let index = Literal::usize_unsuffixed(i);
 93 |       let id = Ident::new(&format!("_f{i}"), span);
 94 |       (quote!(#index: #id,), (ts, id))
 95 |     };
 96 |     exts.extend(ext);
 97 |     ts_ids.push(ts_id);
 98 |   }
 99 |   Ok((quote!(#name { #exts }), ts_ids))
100 | }
101 | 
102 | /// Returns `true` if the given attributes contains `try_span`.
103 | fn has_try_span(attrs: &[Attribute]) -> Result<bool> {
104 |   let mut result = false;
105 |   for attr in attrs {
106 |     match &attr.meta {
107 |       Meta::Path(path) if path.is_ident("try_span") => {
108 |         if result {
109 |           return_error!(attr.span(), "attribute `try_span` is bound more than once");
110 |         }
111 |         result = true;
112 |       }
113 |       _ => {}
114 |     }
115 |   }
116 |   Ok(result)
117 | }
118 | 
119 | /// Generates the first span of the given iterator of `try_span` flag
120 | /// and identifier.
121 | fn gen_first_span<'a, I>(mut ts_ids: I) -> Result<TokenStream2>
122 | where
123 |   I: Iterator<Item = &'a (bool, Ident)>,
124 | {
125 |   let (ts, id) = ts_ids.next().ok_or(error!(
126 |     "attribute `try_span` can not be applied to all the fields"
127 |   ))?;
128 |   Ok(if *ts {
129 |     let span = gen_first_span(ts_ids)?;
130 |     quote!(match #id.try_span() {
131 |       std::option::Option::Some(span) => span,
132 |       std::option::Option::None => #span,
133 |     })
134 |   } else {
135 |     quote!(#id.span())
136 |   })
137 | }
138 | 


--------------------------------------------------------------------------------
/laps_macros/src/token_ast.rs:
--------------------------------------------------------------------------------
  1 | use crate::utils::{ident, return_error};
  2 | use proc_macro::TokenStream;
  3 | use proc_macro2::{Ident, TokenStream as TokenStream2};
  4 | use quote::quote;
  5 | use syn::{
  6 |   braced, bracketed,
  7 |   parse::{Parse, ParseStream},
  8 |   punctuated::{Pair, Punctuated},
  9 |   spanned::Spanned,
 10 |   Attribute, Expr, GenericArgument, LitStr, Meta, Pat, Path, PathArguments, PathSegment, Result,
 11 |   Token, Type, Visibility,
 12 | };
 13 | 
 14 | struct TokenAst {
 15 |   attrs: Vec<Attribute>,
 16 |   derives: Vec<Attribute>,
 17 |   vis: Visibility,
 18 |   current_mod: Path,
 19 |   name: Ident,
 20 |   token_kind: Type,
 21 |   arms: Punctuated<TokenAstArm, Token![,]>,
 22 | }
 23 | 
 24 | impl Parse for TokenAst {
 25 |   fn parse(input: ParseStream) -> Result<Self> {
 26 |     // parse attributes and derives
 27 |     let (derives, attrs) = input
 28 |       .call(Attribute::parse_outer)?
 29 |       .into_iter()
 30 |       .partition(|attr| matches!(&attr.meta, Meta::List(l) if l.path.is_ident("derive")));
 31 |     // parse visibility and `macro`
 32 |     let vis = input.parse()?;
 33 |     input.parse::<Token![macro]>()?;
 34 |     // parse current module, name and token kind
 35 |     let mut current_mod: Path = input.parse()?;
 36 |     let (name, token_kind) = match current_mod.segments.pop() {
 37 |       Some(Pair::End(PathSegment {
 38 |         ident,
 39 |         arguments: PathArguments::AngleBracketed(mut a),
 40 |       })) => match a.args.pop() {
 41 |         Some(Pair::End(GenericArgument::Type(ty))) if a.args.is_empty() => (ident, ty),
 42 |         _ => return_error!(a.span(), "must have only one type parameter"),
 43 |       },
 44 |       _ => return_error!(current_mod.span(), "invalid path"),
 45 |     };
 46 |     // parse arms
 47 |     let brace_content;
 48 |     braced!(brace_content in input);
 49 |     let arms = Punctuated::parse_terminated(&brace_content)?;
 50 |     Ok(Self {
 51 |       attrs,
 52 |       derives,
 53 |       vis,
 54 |       current_mod,
 55 |       name,
 56 |       token_kind,
 57 |       arms,
 58 |     })
 59 |   }
 60 | }
 61 | 
 62 | struct TokenAstArm {
 63 |   token: TokenStream2,
 64 |   pat: Pat,
 65 |   guard: Option<Expr>,
 66 |   prompt: Option<LitStr>,
 67 | }
 68 | 
 69 | impl Parse for TokenAstArm {
 70 |   fn parse(input: ParseStream) -> Result<Self> {
 71 |     // parse token
 72 |     let bracket_content;
 73 |     bracketed!(bracket_content in input);
 74 |     let token = bracket_content.parse()?;
 75 |     // parse arm
 76 |     input.parse::<Token![=>]>()?;
 77 |     let brace_content;
 78 |     braced!(brace_content in input);
 79 |     // parse `kind:`
 80 |     let kind: Ident = brace_content.parse()?;
 81 |     if kind != "kind" {
 82 |       return_error!(kind.span(), "must be `kind`");
 83 |     }
 84 |     brace_content.parse::<Token![:]>()?;
 85 |     // parse pattern
 86 |     let pat = Pat::parse_multi_with_leading_vert(&brace_content)?;
 87 |     // parse if guard
 88 |     let guard = if brace_content.peek(Token![if]) {
 89 |       brace_content.parse::<Token![if]>()?;
 90 |       Some(brace_content.parse()?)
 91 |     } else {
 92 |       None
 93 |     };
 94 |     // parse the optional prompt part
 95 |     let prompt = if brace_content.peek(Token![,]) && brace_content.peek2(syn::Ident) {
 96 |       brace_content.parse::<Token![,]>()?;
 97 |       // parse `prompt:`
 98 |       let prompt_ident: Ident = brace_content.parse()?;
 99 |       if prompt_ident != "prompt" {
100 |         return_error!(prompt_ident.span(), "must be `prompt`");
101 |       }
102 |       brace_content.parse::<Token![:]>()?;
103 |       // parse prompt
104 |       let prompt = brace_content.parse()?;
105 |       // parse the optional comma
106 |       if brace_content.peek(Token![,]) {
107 |         brace_content.parse::<Token![,]>()?;
108 |       }
109 |       Some(prompt)
110 |     } else {
111 |       None
112 |     };
113 |     Ok(Self {
114 |       token,
115 |       pat,
116 |       guard,
117 |       prompt,
118 |     })
119 |   }
120 | }
121 | 
122 | /// Entry function of `token_ast`.
123 | pub fn token_ast(item: TokenStream) -> Result<TokenStream> {
124 |   // parse macro input
125 |   let input: TokenAst = syn::parse(item)?;
126 |   // generate AST definitions
127 |   let (ast_defs, ast_names) = gen_ast_defs(&input)?;
128 |   // generate macro definition
129 |   let macro_def = gen_macro_def(&input, ast_names);
130 |   Ok(TokenStream::from(quote!(#ast_defs #macro_def)))
131 | }
132 | 
133 | /// Generates AST definitions.
134 | ///
135 | /// Returns definitions and AST names.
136 | fn gen_ast_defs(input: &TokenAst) -> Result<(TokenStream2, Vec<TokenStream2>)> {
137 |   // generate AST names
138 |   let names = (0..input.arms.len()).map(|i| ident(&format!("Token{i}")));
139 |   // generate AST definitions
140 |   let kind = &input.token_kind;
141 |   let field_vis = match &input.vis {
142 |     Visibility::Inherited => quote!(pub(super)),
143 |     Visibility::Restricted(res) => {
144 |       let path = res.path.as_ref();
145 |       match path.segments.first() {
146 |         Some(p) if p.arguments.is_none() && path.leading_colon.is_none() => {
147 |           if p.ident == "self" {
148 |             quote!(pub(super))
149 |           } else if p.ident == "crate" {
150 |             quote!(pub(in #path))
151 |           } else {
152 |             quote!(pub(in super::#path))
153 |           }
154 |         }
155 |         _ => return_error!(path.span(), "invalid path in visibility"),
156 |       }
157 |     }
158 |     vis => quote!(#vis),
159 |   };
160 |   let token = quote!(laps::token::Token<#kind>);
161 |   let derive = if input.derives.is_empty() {
162 |     quote!(#[derive(PartialEq)])
163 |   } else {
164 |     let derives = &input.derives;
165 |     quote!(#(#derives)*)
166 |   };
167 |   let defs: Vec<_> = names
168 |     .clone()
169 |     .zip(&input.arms)
170 |     .map(|(name, TokenAstArm { pat, guard, prompt, .. })| {
171 |       let if_guard = guard.as_ref().map(|e| quote!(if #e));
172 |       let parse_body = match prompt {
173 |         Some(prompt) => quote! {
174 |           let token = tokens.next_token()?;
175 |           match &token.kind {
176 |             #[allow(unused_parens)]
177 |             #pat #if_guard => std::result::Result::Ok(Self(token)),
178 |             _ => laps::return_error!(token.span, std::concat!("expected ", #prompt, ", found {}"), token),
179 |           }
180 |         },
181 |         None => match if_guard {
182 |           Some(e) => return_error!(e.span(), "if-guard must be used with `prompt`"),
183 |           None => quote!(tokens.expect(#pat).map(Self)),
184 |         },
185 |       };
186 |       Ok(quote! {
187 |         #derive
188 |         pub struct #name(#field_vis #token);
189 |         impl #name {
190 |           /// Unwraps the inner token kind and returns its value.
191 |           ///
192 |           /// # Panics
193 |           ///
194 |           /// Panics if the inner token kind does not contain a value of
195 |           /// the type `T`.
196 |           #field_vis fn unwrap<T, E>(self) -> T
197 |           where
198 |             T: std::convert::TryFrom<#kind, Error = E>,
199 |             E: std::fmt::Debug,
200 |           {
201 |             self.0.kind.try_into().unwrap()
202 |           }
203 | 
204 |           /// Unwraps the inner token kind and returns its value.
205 |           ///
206 |           /// # Panics
207 |           ///
208 |           /// Panics if the inner token kind does not contain a value of
209 |           /// the type `T`.
210 |           #field_vis fn unwrap_ref<'a, T, E>(&'a self) -> T
211 |           where
212 |             T: std::convert::TryFrom<&'a #kind, Error = E>,
213 |             E: std::fmt::Debug,
214 |           {
215 |             self.0.as_ref().try_into().unwrap()
216 |           }
217 |         }
218 |         impl<TS> laps::parse::Parse<TS> for #name
219 |         where
220 |           TS: laps::token::TokenStream<Token = #token>
221 |         {
222 |           fn parse(tokens: &mut TS) -> laps::span::Result<Self> {
223 |             #parse_body
224 |           }
225 |           fn maybe(tokens: &mut TS) -> laps::span::Result<bool> {
226 |             #[allow(unused_parens)]
227 |             std::result::Result::Ok(matches!(&tokens.peek()?.kind, #pat #if_guard))
228 |           }
229 |         }
230 |         impl laps::span::Spanned for #name {
231 |           fn span(&self) -> laps::span::Span {
232 |             self.0.span()
233 |           }
234 |         }
235 |       })
236 |     })
237 |     .collect::<Result<_>>()?;
238 |   let vis = &input.vis;
239 |   let mod_name = ident(&format!("__token_ast_{}", input.name));
240 |   let ast_defs = quote! {
241 |     #[doc(hidden)]
242 |     #[allow(non_snake_case)]
243 |     #vis mod #mod_name {
244 |       use super::*;
245 |       #(#defs)*
246 |     }
247 |   };
248 |   // generate full paths for all ASTs
249 |   let current_mod = &input.current_mod;
250 |   let ast_names = names.map(|ident| quote!(#current_mod #mod_name::#ident));
251 |   Ok((ast_defs, ast_names.collect()))
252 | }
253 | 
254 | /// Generates the macro definition.
255 | fn gen_macro_def(input: &TokenAst, ast_names: Vec<TokenStream2>) -> TokenStream2 {
256 |   // generate arms
257 |   let arms = ast_names
258 |     .into_iter()
259 |     .zip(&input.arms)
260 |     .map(|(name, TokenAstArm { token, .. })| quote!([#token] => {#name};));
261 |   // generate definition
262 |   let attrs = &input.attrs;
263 |   let name = &input.name;
264 |   let macro_def = quote! {
265 |     #(#attrs)*
266 |     macro_rules! #name {
267 |       #(#arms)*
268 |     }
269 |   };
270 |   // generate definition with visibility
271 |   match &input.vis {
272 |     Visibility::Inherited => quote!(#macro_def),
273 |     Visibility::Public(_) => quote!(#[macro_export] #macro_def),
274 |     vis => quote! {
275 |       #macro_def
276 |       #vis use #name;
277 |     },
278 |   }
279 | }
280 | 


--------------------------------------------------------------------------------
/laps_macros/src/token_kind.rs:
--------------------------------------------------------------------------------
  1 | use crate::utils::{camel_to_lower, parse_doc_comments, return_error};
  2 | use proc_macro::TokenStream;
  3 | use proc_macro2::TokenStream as TokenStream2;
  4 | use quote::quote;
  5 | use syn::{Fields, ItemEnum, Result};
  6 | 
  7 | /// Entry function of `#[token_kind]`.
  8 | pub fn token_kind(attr: TokenStream, item: TokenStream) -> Result<TokenStream> {
  9 |   // parse input
 10 |   if !attr.is_empty() {
 11 |     return_error!("only `#[token_kind]` can be used");
 12 |   }
 13 |   let input = syn::parse(item)?;
 14 |   // generate trait implementations
 15 |   let froms = gen_from_impls(&input);
 16 |   let display = gen_display_impl(&input);
 17 |   Ok(TokenStream::from(quote! {
 18 |     #[derive(Clone, PartialEq)]
 19 |     #input
 20 |     #froms #display
 21 |   }))
 22 | }
 23 | 
 24 | /// Generates `From` and `TryFrom` trait implementations.
 25 | fn gen_from_impls(input: &ItemEnum) -> TokenStream2 {
 26 |   let mut impls = TokenStream2::new();
 27 |   let ident = &input.ident;
 28 |   // for all variants
 29 |   for variant in &input.variants {
 30 |     let variant_name = &variant.ident;
 31 |     // check if is unnamed, and has only one field
 32 |     match &variant.fields {
 33 |       Fields::Unnamed(f) if f.unnamed.len() == 1 => {
 34 |         let ty = &f.unnamed.first().unwrap().ty;
 35 |         impls.extend(quote! {
 36 |           impl std::convert::From<#ty> for #ident {
 37 |             fn from(v: #ty) -> Self {
 38 |               Self::#variant_name(v)
 39 |             }
 40 |           }
 41 |           impl std::convert::TryFrom<#ident> for #ty {
 42 |             type Error = ();
 43 |             fn try_from(v: #ident) -> std::result::Result<Self, Self::Error> {
 44 |               match v {
 45 |                 #ident::#variant_name(v) => std::result::Result::Ok(v),
 46 |                 _ => std::result::Result::Err(()),
 47 |               }
 48 |             }
 49 |           }
 50 |           impl<'a> std::convert::TryFrom<&'a #ident> for &'a #ty {
 51 |             type Error = ();
 52 |             fn try_from(v: &'a #ident) -> std::result::Result<Self, Self::Error> {
 53 |               match v {
 54 |                 #ident::#variant_name(v) => std::result::Result::Ok(v),
 55 |                 _ => std::result::Result::Err(()),
 56 |               }
 57 |             }
 58 |           }
 59 |         });
 60 |       }
 61 |       _ => {}
 62 |     }
 63 |   }
 64 |   impls
 65 | }
 66 | 
 67 | /// Generates `Display` trait implementation.
 68 | fn gen_display_impl(input: &ItemEnum) -> TokenStream2 {
 69 |   let ident = &input.ident;
 70 |   // generate match arms
 71 |   let mut arms = TokenStream2::new();
 72 |   for variant in &input.variants {
 73 |     let ident = &variant.ident;
 74 |     let prompt = parse_doc_comments(&variant.attrs).map_or_else(
 75 |       || camel_to_lower(ident.to_string()),
 76 |       |mut p| {
 77 |         p.make_ascii_lowercase();
 78 |         if p.ends_with('.') {
 79 |           p.pop();
 80 |         }
 81 |         p
 82 |       },
 83 |     );
 84 |     arms.extend(match &variant.fields {
 85 |       Fields::Unnamed(f) if f.unnamed.len() == 1 => {
 86 |         let prompt = prompt + " `{}`";
 87 |         quote!(Self::#ident(v) => std::write!(f, #prompt, v),)
 88 |       }
 89 |       Fields::Named(_) => quote!(Self::#ident { .. } => std::write!(f, #prompt),),
 90 |       Fields::Unnamed(_) => quote!(Self::#ident(..) => std::write!(f, #prompt),),
 91 |       Fields::Unit => quote!(Self::#ident => std::write!(f, #prompt),),
 92 |     });
 93 |   }
 94 |   quote! {
 95 |     impl std::fmt::Display for #ident {
 96 |       fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 97 |         match self {
 98 |           #arms
 99 |         }
100 |       }
101 |     }
102 |   }
103 | }
104 | 


--------------------------------------------------------------------------------
/laps_macros/src/utils.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::{Ident, Span};
  2 | use syn::parse::{Parse, ParseStream};
  3 | use syn::{parenthesized, Attribute, Expr, ExprLit, Lit, Meta, MetaNameValue, Result};
  4 | 
  5 | /// Generates a compile error.
  6 | macro_rules! error {
  7 |   ($msg:expr) => {
  8 |     syn::Error::new(proc_macro2::Span::call_site(), $msg)
  9 |   };
 10 |   ($span:expr, $msg:expr) => {
 11 |     syn::Error::new($span, $msg)
 12 |   };
 13 | }
 14 | pub(crate) use error;
 15 | 
 16 | /// Generates a compile error and returns.
 17 | macro_rules! return_error {
 18 |   ($msg:expr) => {
 19 |     return Err(crate::utils::error!($msg))
 20 |   };
 21 |   ($span:expr, $msg:expr) => {
 22 |     return Err(crate::utils::error!($span, $msg))
 23 |   };
 24 | }
 25 | pub(crate) use return_error;
 26 | 
 27 | /// Converts `Result<TokenStream>` to `TokenStream`.
 28 | macro_rules! result_to_tokens {
 29 |   ($result:expr) => {
 30 |     match $result {
 31 |       Ok(data) => data,
 32 |       Err(err) => err.to_compile_error().into(),
 33 |     }
 34 |   };
 35 | }
 36 | pub(crate) use result_to_tokens;
 37 | 
 38 | /// Helper macro for handling attributes like `#[name(...)]`.
 39 | macro_rules! match_attr {
 40 |   (for $id:ident in $attrs:ident if $name:literal && $cond:expr => $body:block) => {
 41 |     for $id in $attrs {
 42 |       match &$id.meta {
 43 |         syn::Meta::List($id) if $id.path.is_ident($name) => {
 44 |           if $cond $body else {
 45 |             use syn::spanned::Spanned;
 46 |             crate::utils::return_error!(
 47 |               $id.span(),
 48 |               concat!("attribute `", $name, "` is bound more than once")
 49 |             );
 50 |           }
 51 |         }
 52 |         _ => {}
 53 |       }
 54 |     }
 55 |   };
 56 | }
 57 | pub(crate) use match_attr;
 58 | 
 59 | /// Data of `(...)`.
 60 | pub struct Parenthesized<T>(pub T);
 61 | 
 62 | impl<T: Parse> Parse for Parenthesized<T> {
 63 |   fn parse(input: ParseStream) -> Result<Self> {
 64 |     let content;
 65 |     parenthesized!(content in input);
 66 |     Ok(Self(content.parse()?))
 67 |   }
 68 | }
 69 | 
 70 | /// Creates a new identifier by the given string.
 71 | pub fn ident(s: &str) -> Ident {
 72 |   Ident::new(s, Span::call_site())
 73 | }
 74 | 
 75 | /// Parses doc comments.
 76 | pub fn parse_doc_comments(attrs: &[Attribute]) -> Option<String> {
 77 |   attrs
 78 |     .iter()
 79 |     .filter_map(|attr| match &attr.meta {
 80 |       Meta::NameValue(MetaNameValue {
 81 |         path,
 82 |         value: Expr::Lit(ExprLit {
 83 |           lit: Lit::Str(s), ..
 84 |         }),
 85 |         ..
 86 |       }) if path.is_ident("doc") => Some(s.value().trim().to_string()),
 87 |       _ => None,
 88 |     })
 89 |     .reduce(|mut s, cur| {
 90 |       s.reserve(cur.len() + 1);
 91 |       s.push(' ');
 92 |       s.push_str(&cur);
 93 |       s
 94 |     })
 95 |     .and_then(|s| {
 96 |       let s = s.trim().to_string();
 97 |       (!s.is_empty()).then_some(s)
 98 |     })
 99 | }
100 | 
101 | /// Converts the given camel case string to lower case space-delimited string.
102 | pub fn camel_to_lower(s: String) -> String {
103 |   let mut ans = String::new();
104 |   for c in s.chars() {
105 |     if c.is_ascii_uppercase() {
106 |       if !ans.is_empty() {
107 |         ans.push(' ');
108 |       }
109 |       ans.push(c.to_ascii_lowercase());
110 |     } else {
111 |       ans.push(c);
112 |     }
113 |   }
114 |   ans
115 | }
116 | 


--------------------------------------------------------------------------------
/laps_regex/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "laps_regex"
 3 | version = "0.1.1"
 4 | authors = ["MaxXing <x@MaxXSoft.net>"]
 5 | edition = "2021"
 6 | description = "Tools for generating NFAs, DFAs and state-transition tables from regular expressions."
 7 | repository = "https://github.com/MaxXSoft/laps"
 8 | documentation = "https://docs.rs/laps_regex"
 9 | categories = ["parsing", "text-processing"]
10 | keywords = ["laps", "regex", "lexer", "parser", "automaton"]
11 | readme = "README.md"
12 | license = "MIT OR Apache-2.0"
13 | 
14 | [dependencies]
15 | rayon = "1.8.0"
16 | regex-syntax = "0.7.2"
17 | 


--------------------------------------------------------------------------------
/laps_regex/README.md:
--------------------------------------------------------------------------------
 1 | # laps_regex
 2 | 
 3 | Tools for generating NFAs, DFAs and state-transition tables from regular expressions.
 4 | 
 5 | This library is built for crate [`laps`](https://crates.io/crates/laps).
 6 | 
 7 | ## Example: Matching UTF-8 Strings
 8 | 
 9 | ```rust
10 | use laps_regex::re::{RegexBuilder, CharsMatcher};
11 | 
12 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
13 | enum Token {
14 |   Keyword,
15 |   Identifier,
16 |   Number,
17 | }
18 | 
19 | let matcher: CharsMatcher<_> = RegexBuilder::new()
20 |   .add("if|else|while", Token::Keyword)
21 |   .add("[_a-zA-Z][_a-zA-Z0-9]*", Token::Identifier)
22 |   .add("[0-9]|[1-9][0-9]+", Token::Number)
23 |   .build()
24 |   .unwrap();
25 | 
26 | assert_eq!(matcher.is_str_match("if"), Some(&Token::Keyword));
27 | assert_eq!(matcher.is_str_match("while1"), Some(&Token::Identifier));
28 | assert_eq!(matcher.is_str_match("42"), Some(&Token::Number));
29 | assert_eq!(matcher.is_str_match("?"), None);
30 | ```
31 | 
32 | ## Example: Matching Bytes
33 | 
34 | ```rust
35 | use laps_regex::re::{RegexBuilder, BytesMatcher};
36 | 
37 | let matcher: BytesMatcher<_> = RegexBuilder::new()
38 |   .add("hello|hi", 0)
39 |   .add("goodbye|bye", 1)
40 |   .build_bytes()
41 |   .unwrap();
42 | 
43 | assert_eq!(matcher.is_match(b"hello"), Some(&0));
44 | assert_eq!(matcher.is_match(&[0x62, 0x79, 0x65]), Some(&1));
45 | ```
46 | 
47 | ## License
48 | 
49 | Copyright (C) 2022-2023 MaxXing. Licensed under either of Apache 2.0 or MIT at your option.
50 | 


--------------------------------------------------------------------------------
/laps_regex/src/dfa.rs:
--------------------------------------------------------------------------------
  1 | //! Deterministic finite automaton ([`DFA`]) related implementations.
  2 | //!
  3 | //! A DFA can be built from a nondeterministic finite automaton ([`NFA`]).
  4 | 
  5 | use crate::fa::{CachedClosures, Closure, ClosureBuilder, DenseFA, State};
  6 | use crate::nfa::NFA;
  7 | use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
  8 | use std::hash::Hash;
  9 | use std::{fmt, io};
 10 | 
 11 | /// Helper macro for finding the first matching tag of the given states.
 12 | macro_rules! first_tag {
 13 |   ($nfa_tags:expr, $states:expr) => {
 14 |     $nfa_tags
 15 |       .iter()
 16 |       .find_map(|(tag, id)| $states.contains(id).then(|| tag.clone()))
 17 |   };
 18 | }
 19 | 
 20 | /// A deterministic finite automaton (DFA)
 21 | /// with symbol type `S` and tag type `T`.
 22 | #[derive(Debug)]
 23 | pub struct DFA<S, T> {
 24 |   fa: DenseFA<Vec<(S, S)>>,
 25 |   tags: HashMap<usize, T>,
 26 | }
 27 | 
 28 | impl<S, T> DFA<S, T> {
 29 |   /// Creates a new DFA from the given [`NFA`].
 30 |   ///
 31 |   /// Set `enable_par` to [`Some(true)`] to construct the DFA in parallel,
 32 |   /// [`Some(false)`] to disable parallelization, and [`None`] to choose
 33 |   /// automatically.
 34 |   pub fn new(nfa: NFA<S, T>, enable_par: Option<bool>) -> Self
 35 |   where
 36 |     S: Clone + Hash + Eq + Ord + Sync,
 37 |     T: Clone + Hash + Eq + Ord,
 38 |   {
 39 |     let (dfa, syms) = Self::new_from_nfa(nfa, enable_par);
 40 |     let partition = Self::minimize(&dfa, &syms);
 41 |     Self::rebuild(dfa, syms, partition)
 42 |   }
 43 | 
 44 |   /// Creates a new DFA from the given [`NFA`]. Returns the created DFA
 45 |   /// and its symbol set.
 46 |   ///
 47 |   /// The created DFA is not minimal.
 48 |   fn new_from_nfa(nfa: NFA<S, T>, enable_par: Option<bool>) -> (Self, Vec<Vec<(S, S)>>)
 49 |   where
 50 |     S: Clone + Hash + Eq + Sync,
 51 |     T: Clone + Ord,
 52 |   {
 53 |     let (nfa, nfa_tags) = nfa.into_fa_tags();
 54 |     // stuffs for maintaining tags mappings between NFA and DFA
 55 |     let mut nfa_tags: Vec<_> = nfa_tags.into_iter().map(|(id, tag)| (tag, id)).collect();
 56 |     nfa_tags.sort_unstable();
 57 |     // create DFA, update the initial state
 58 |     let mut init_cached = CachedClosures::new();
 59 |     let init_id = nfa.init_id();
 60 |     let cb = ClosureBuilder::from(nfa);
 61 |     let init = cb.epsilon_closure(&mut init_cached, [init_id]);
 62 |     let mut fa = DenseFA::new();
 63 |     let mut tags = HashMap::new();
 64 |     if let Some(tag) = first_tag!(nfa_tags, init) {
 65 |       fa.set_final_state(fa.init_id());
 66 |       tags.insert(fa.init_id(), tag);
 67 |     }
 68 |     // create other states
 69 |     let syms: Vec<_> = cb.symbol_set().into_iter().collect();
 70 |     let constructor = Constructor {
 71 |       nfa_tags,
 72 |       cb,
 73 |       tags,
 74 |       states: vec![init.clone()],
 75 |       ids: HashMap::from([(init, fa.init_id())]),
 76 |       fa,
 77 |       enable_par,
 78 |     };
 79 |     (constructor.construct(init_cached, &syms).into_dfa(), syms)
 80 |   }
 81 | 
 82 |   /// Creates a minimal DFA by the given DFA and symbol set.
 83 |   fn minimize(dfa: &Self, syms: &[Vec<(S, S)>]) -> VecDeque<HashSet<usize>>
 84 |   where
 85 |     S: Ord + Hash,
 86 |     T: Hash + Eq,
 87 |   {
 88 |     let Self { fa, tags } = dfa;
 89 |     // get the initial partition
 90 |     let mut partition = tags
 91 |       .iter()
 92 |       .fold(
 93 |         HashMap::new(),
 94 |         |mut m: HashMap<_, HashSet<_>>, (id, tag)| {
 95 |           m.entry(tag).or_default().insert(*id);
 96 |           m
 97 |         },
 98 |       )
 99 |       .into_values()
100 |       .collect::<VecDeque<_>>();
101 |     let others: HashSet<_> = fa
102 |       .states()
103 |       .keys()
104 |       .filter_map(|id| (!fa.finals().contains(id)).then_some(*id))
105 |       .collect();
106 |     if !others.is_empty() {
107 |       partition.push_back(others);
108 |     }
109 |     // get new partition until there are no changes
110 |     let mut num_states = partition.len();
111 |     loop {
112 |       // create mapping from state IDs to partition index
113 |       let index_map: HashMap<_, _> = partition
114 |         .iter()
115 |         .enumerate()
116 |         .flat_map(|(i, ids)| ids.iter().map(move |id| (*id, i)))
117 |         .collect();
118 |       for _ in 0..num_states {
119 |         let states = partition.pop_front().unwrap();
120 |         // check if can be divided
121 |         if states.len() <= 1 {
122 |           partition.push_back(states);
123 |           continue;
124 |         }
125 |         // get a new division
126 |         let mut division: HashMap<_, HashSet<usize>> = HashMap::new();
127 |         for id in states {
128 |           // get division ID set
129 |           let div_id: BTreeSet<_> = syms
130 |             .iter()
131 |             .filter_map(|s| {
132 |               // get the next state after accepting symbol `s`
133 |               let next = fa.state(id).unwrap().next_state(s);
134 |               // get partition index of the next state
135 |               let index = next.and_then(|next| index_map.get(&next).copied());
136 |               index.map(|i| (s, i))
137 |             })
138 |             .collect();
139 |           // update division
140 |           division.entry(div_id).or_default().insert(id);
141 |         }
142 |         // add to the partition
143 |         partition.extend(division.into_values());
144 |       }
145 |       // check and update the number of states
146 |       if partition.len() == num_states {
147 |         break;
148 |       }
149 |       num_states = partition.len();
150 |     }
151 |     partition
152 |   }
153 | 
154 |   /// Rebuilds a DFA by the given partition.
155 |   fn rebuild(dfa: Self, syms: Vec<Vec<(S, S)>>, partition: VecDeque<HashSet<usize>>) -> Self
156 |   where
157 |     S: Clone + Eq + Hash,
158 |     T: Clone,
159 |   {
160 |     let Self {
161 |       fa: dfa,
162 |       tags: dfa_tags,
163 |     } = dfa;
164 |     let mut fa = DenseFA::new();
165 |     // rebuild mapping of states
166 |     let mut tags = HashMap::new();
167 |     let partition: Vec<_> = partition
168 |       .into_iter()
169 |       .map(|ids| {
170 |         // add new state
171 |         let id = if ids.contains(&dfa.init_id()) {
172 |           fa.init_id()
173 |         } else {
174 |           fa.add_state()
175 |         };
176 |         // check if is a final state
177 |         if let Some(tag) = ids.iter().find_map(|id| dfa_tags.get(id)) {
178 |           fa.set_final_state(id);
179 |           tags.insert(id, tag.clone());
180 |         }
181 |         (ids, id)
182 |       })
183 |       .collect();
184 |     let states: HashMap<_, _> = partition
185 |       .iter()
186 |       .flat_map(|(ids, cur_id)| ids.iter().map(|id| (*id, *cur_id)))
187 |       .collect();
188 |     // rebuild edges
189 |     for (ids, cur_id) in &partition {
190 |       let state = fa.state_mut(*cur_id).unwrap();
191 |       let mut added_edges = HashSet::new();
192 |       for id in ids {
193 |         for s in &syms {
194 |           if added_edges.contains(s) {
195 |             continue;
196 |           }
197 |           // get the next state after accepting symbol `s`
198 |           let next = dfa.state(*id).unwrap().next_state(s);
199 |           if let Some(next) = next {
200 |             // add a new edge
201 |             state.add(s.clone(), states[&next]);
202 |             added_edges.insert(s.clone());
203 |           }
204 |         }
205 |       }
206 |     }
207 |     Self { fa, tags }
208 |   }
209 | 
210 |   /// Converts the current NFA into a
211 |   /// [`FiniteAutomaton`](crate::fa::FiniteAutomaton) and a tag set.
212 |   pub fn into_fa_tags(self) -> FATags<S, T> {
213 |     (self.fa, self.tags)
214 |   }
215 | 
216 |   /// Dumps the current finite automaton to the given writer as Graphviz.
217 |   pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
218 |   where
219 |     S: fmt::Debug,
220 |     W: io::Write,
221 |   {
222 |     self.fa.dump(writer)
223 |   }
224 | }
225 | 
226 | impl<S, T> From<NFA<S, T>> for DFA<S, T>
227 | where
228 |   S: Clone + Hash + Eq + Ord + Sync,
229 |   T: Clone + Hash + Eq + Ord,
230 | {
231 |   fn from(nfa: NFA<S, T>) -> Self {
232 |     Self::new(nfa, None)
233 |   }
234 | }
235 | 
236 | /// A pair of [`DFA`]'s internal finite automaton and the tag map.
237 | ///
238 | /// Used by method [`into_fa_tags`](DFA#method.into_fa_tags) of [`DFA`].
239 | pub type FATags<S, T> = (DenseFA<Vec<(S, S)>>, HashMap<usize, T>);
240 | 
241 | /// A [`NFA`] to [`DFA`] constructor.
242 | struct Constructor<S, T> {
243 |   nfa_tags: Vec<(T, usize)>,
244 |   cb: ClosureBuilder<Vec<(S, S)>>,
245 |   fa: DenseFA<Vec<(S, S)>>,
246 |   tags: HashMap<usize, T>,
247 |   states: Vec<Closure>,
248 |   ids: HashMap<Closure, usize>,
249 |   enable_par: Option<bool>,
250 | }
251 | 
252 | impl<S, T> Constructor<S, T>
253 | where
254 |   S: Clone + Hash + Eq + Sync,
255 |   T: Clone,
256 | {
257 |   /// Consumes the current constructor, constructs a [`DFA`] using
258 |   /// the powerset construction algorithm.
259 |   fn construct(self, cached: CachedClosures, syms: &[Vec<(S, S)>]) -> Self {
260 |     let enable_par = self.enable_par.unwrap_or_else(|| {
261 |       let parallelism = std::thread::available_parallelism()
262 |         .map(Into::into)
263 |         .unwrap_or(1);
264 |       parallelism > 1 && syms.len() > parallelism * 8
265 |     });
266 |     if enable_par {
267 |       self.construct_par(cached, syms)
268 |     } else {
269 |       self.construct_normal(cached, syms)
270 |     }
271 |   }
272 | 
273 |   /// Consumes the current constructor, constructs a [`DFA`] using
274 |   /// the powerset construction algorithm.
275 |   ///
276 |   /// This method runs serially.
277 |   fn construct_normal(mut self, mut cached: CachedClosures, syms: &[Vec<(S, S)>]) -> Self {
278 |     while let Some(cur) = self.states.pop() {
279 |       let cur_id = self.ids[&cur];
280 |       for s in syms {
281 |         // get next states in parallel
282 |         let next = self.cb.state_closure(&mut cached, &cur, s);
283 |         if next.is_empty() {
284 |           continue;
285 |         }
286 |         self.add_to_fa(cur_id, s.clone(), next);
287 |       }
288 |     }
289 |     self
290 |   }
291 | 
292 |   /// Consumes the current constructor, constructs a [`DFA`] using
293 |   /// the powerset construction algorithm.
294 |   ///
295 |   /// This method runs in parallel.
296 |   fn construct_par(mut self, cached: CachedClosures, syms: &[Vec<(S, S)>]) -> Self {
297 |     use rayon::prelude::*;
298 |     let mut nexts = Vec::new();
299 |     let mut cached_epsilons = vec![cached; syms.len()];
300 |     while let Some(cur) = self.states.pop() {
301 |       let cur_id = self.ids[&cur];
302 |       // get next states in parallel
303 |       syms
304 |         .par_iter()
305 |         .zip(&mut cached_epsilons)
306 |         .map(|(s, c)| self.cb.state_closure(c, &cur, s))
307 |         .collect_into_vec(&mut nexts);
308 |       // add to the finite automanton
309 |       for (s, next) in syms.iter().zip(nexts.drain(..)) {
310 |         if next.is_empty() {
311 |           continue;
312 |         }
313 |         self.add_to_fa(cur_id, s.clone(), next);
314 |       }
315 |     }
316 |     self
317 |   }
318 | 
319 |   fn add_to_fa(&mut self, cur_id: usize, s: Vec<(S, S)>, next: Closure) {
320 |     // get the ID of the next state
321 |     let id = if let Some(id) = self.ids.get(&next) {
322 |       *id
323 |     } else {
324 |       // add a new state
325 |       let id = if let Some(tag) = first_tag!(self.nfa_tags, next) {
326 |         let id = self.fa.add_final_state();
327 |         self.tags.insert(id, tag);
328 |         id
329 |       } else {
330 |         self.fa.add_state()
331 |       };
332 |       // update states and ID map
333 |       self.states.push(next.clone());
334 |       self.ids.insert(next, id);
335 |       id
336 |     };
337 |     // add an edge to the next state
338 |     self.fa.state_mut(cur_id).unwrap().add(s, id);
339 |   }
340 | 
341 |   /// Converts the current constructor into a [`DFA`].
342 |   fn into_dfa(self) -> DFA<S, T> {
343 |     DFA {
344 |       fa: self.fa,
345 |       tags: self.tags,
346 |     }
347 |   }
348 | }
349 | 


--------------------------------------------------------------------------------
/laps_regex/src/fa.rs:
--------------------------------------------------------------------------------
  1 | //! Finite automaton representations.
  2 | //!
  3 | //! This module contains [`FiniteAutomaton`], which is a simple finite
  4 | //! automaton implementation, and [`State`], which represents a state in
  5 | //! the automaton.
  6 | 
  7 | use std::collections::{BTreeSet, HashMap, HashSet};
  8 | use std::hash::Hash;
  9 | use std::marker::PhantomData;
 10 | use std::sync::{Mutex, MutexGuard, OnceLock};
 11 | use std::{fmt, io};
 12 | 
 13 | /// The next state ID.
 14 | static NEXT_STATE_ID: OnceLock<Mutex<usize>> = OnceLock::new();
 15 | 
 16 | /// Acquires and returns the next state ID.
 17 | fn next_state_id() -> MutexGuard<'static, usize> {
 18 |   NEXT_STATE_ID
 19 |     .get_or_init(|| Mutex::new(0))
 20 |     .lock()
 21 |     .expect("failed to acquire the next state ID")
 22 | }
 23 | 
 24 | /// Returns a new state ID and updates the ID counter.
 25 | fn get_and_update_state_id() -> usize {
 26 |   let mut id = next_state_id();
 27 |   let cur = *id;
 28 |   *id += 1;
 29 |   cur
 30 | }
 31 | 
 32 | /// Trait for state of finite automaton.
 33 | pub trait State<S> {
 34 |   /// Creates a new empty state.
 35 |   fn new() -> Self;
 36 | 
 37 |   /// Adds a new edge to the current state.
 38 |   fn add(&mut self, sym: S, state: usize);
 39 | 
 40 |   /// Dumps the current state to the given writer as Graphviz.
 41 |   fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
 42 |   where
 43 |     S: fmt::Debug,
 44 |     W: io::Write;
 45 | }
 46 | 
 47 | /// A state of the finite automaton with symbol type `S`.
 48 | ///
 49 | /// This state uses [`Vec`] to store edges internally.
 50 | #[derive(Debug)]
 51 | pub struct DenseState<S> {
 52 |   outs: Vec<(S, usize)>,
 53 | }
 54 | 
 55 | impl<S> DenseState<S> {
 56 |   /// Returns the output edges.
 57 |   pub fn outs(&self) -> &[(S, usize)] {
 58 |     &self.outs
 59 |   }
 60 | 
 61 |   /// Returns ID of the next state after accepting the given symbol `sym`.
 62 |   ///
 63 |   /// This method will return only the first matching state.
 64 |   /// Returns [`None`] if no matching state.
 65 |   pub fn next_state(&self, sym: &S) -> Option<usize>
 66 |   where
 67 |     S: PartialEq,
 68 |   {
 69 |     self
 70 |       .outs
 71 |       .iter()
 72 |       .find_map(|(s, id)| (s == sym).then_some(*id))
 73 |   }
 74 | }
 75 | 
 76 | impl<S> State<S> for DenseState<S> {
 77 |   fn new() -> Self {
 78 |     Self { outs: Vec::new() }
 79 |   }
 80 | 
 81 |   fn add(&mut self, sym: S, state: usize) {
 82 |     self.outs.push((sym, state));
 83 |   }
 84 | 
 85 |   fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
 86 |   where
 87 |     S: fmt::Debug,
 88 |     W: io::Write,
 89 |   {
 90 |     for (s, to) in &self.outs {
 91 |       writeln!(writer, "  {id} -> {to} [label = \"{s:?}\"]")?;
 92 |     }
 93 |     Ok(())
 94 |   }
 95 | }
 96 | 
 97 | /// A state of the finite automaton with symbol type `S`.
 98 | ///
 99 | /// This state uses [`HashMap<S, HashSet<_>>`] to store edges
100 | /// and all their output states.
101 | #[derive(Debug)]
102 | pub struct MultiState<S> {
103 |   outs: HashMap<S, HashSet<usize>>,
104 | }
105 | 
106 | impl<S> MultiState<S> {
107 |   /// Returns the map of output edges.
108 |   pub fn outs(&self) -> &HashMap<S, HashSet<usize>> {
109 |     &self.outs
110 |   }
111 | }
112 | 
113 | impl<S> State<S> for MultiState<S>
114 | where
115 |   S: Eq + Hash,
116 | {
117 |   fn new() -> Self {
118 |     Self {
119 |       outs: HashMap::new(),
120 |     }
121 |   }
122 | 
123 |   fn add(&mut self, sym: S, state: usize) {
124 |     self.outs.entry(sym).or_default().insert(state);
125 |   }
126 | 
127 |   fn dump<W>(&self, writer: &mut W, id: usize) -> io::Result<()>
128 |   where
129 |     S: fmt::Debug,
130 |     W: io::Write,
131 |   {
132 |     for (s, to_ids) in &self.outs {
133 |       for to in to_ids {
134 |         writeln!(writer, "  {id} -> {to} [label = \"{s:?}\"]")?;
135 |       }
136 |     }
137 |     Ok(())
138 |   }
139 | }
140 | 
141 | /// A finite automaton with symbol type `S`.
142 | #[derive(Debug)]
143 | pub struct FiniteAutomaton<Sym, State: self::State<Sym>> {
144 |   states: HashMap<usize, State>,
145 |   init: usize,
146 |   finals: HashSet<usize>,
147 |   sym: PhantomData<Sym>,
148 | }
149 | 
150 | impl<Sym, State: self::State<Sym>> FiniteAutomaton<Sym, State> {
151 |   /// Creates an empty finite automaton.
152 |   pub fn new() -> Self {
153 |     let init = get_and_update_state_id();
154 |     Self {
155 |       states: [(init, State::new())].into(),
156 |       init,
157 |       finals: HashSet::new(),
158 |       sym: PhantomData,
159 |     }
160 |   }
161 | 
162 |   /// Creates a new state in the current finite automaton.
163 |   ///
164 |   /// Returns the state ID.
165 |   pub fn add_state(&mut self) -> usize {
166 |     let id = get_and_update_state_id();
167 |     self.states.insert(id, State::new());
168 |     id
169 |   }
170 | 
171 |   /// Creates a new final state in the current finite automaton.
172 |   ///
173 |   /// Returns the state ID.
174 |   pub fn add_final_state(&mut self) -> usize {
175 |     let id = self.add_state();
176 |     self.finals.insert(id);
177 |     id
178 |   }
179 | 
180 |   /// Sets the given state as a final state.
181 |   ///
182 |   /// Returns [`false`](bool) if the given state does not exist.
183 |   pub fn set_final_state(&mut self, id: usize) -> bool {
184 |     if self.states.contains_key(&id) {
185 |       self.finals.insert(id);
186 |       true
187 |     } else {
188 |       false
189 |     }
190 |   }
191 | 
192 |   /// Sets the given state as a normal state.
193 |   ///
194 |   /// Returns [`false`](bool) if the given state does not exist.
195 |   pub fn set_normal_state(&mut self, id: usize) -> bool {
196 |     if self.states.contains_key(&id) {
197 |       self.finals.remove(&id);
198 |       true
199 |     } else {
200 |       false
201 |     }
202 |   }
203 | 
204 |   /// Unions the current finite automaton with the given finite automaton.
205 |   ///
206 |   /// The initial state of the given finite automaton will be added to
207 |   /// the current finite automaton as normal states. All final states of
208 |   /// the given finite automaton will be kept.
209 |   pub fn union(&mut self, fa: Self) {
210 |     self.states.extend(fa.states);
211 |     self.finals.extend(fa.finals);
212 |   }
213 | 
214 |   /// Returns a reference to the state map.
215 |   pub fn states(&self) -> &HashMap<usize, State> {
216 |     &self.states
217 |   }
218 | 
219 |   /// Returns a reference to the given state.
220 |   ///
221 |   /// Returns [`None`] if the given state does not exist.
222 |   pub fn state(&self, id: usize) -> Option<&State> {
223 |     self.states.get(&id)
224 |   }
225 | 
226 |   /// Returns a mutable reference to the given state.
227 |   ///
228 |   /// Returns [`None`] if the given state does not exist.
229 |   pub fn state_mut(&mut self, id: usize) -> Option<&mut State> {
230 |     self.states.get_mut(&id)
231 |   }
232 | 
233 |   /// Returns a reference to the initial state.
234 |   pub fn init(&self) -> &State {
235 |     self.states.get(&self.init).unwrap()
236 |   }
237 | 
238 |   /// Returns a mutable reference to the given initial state.
239 |   pub fn init_mut(&mut self) -> &mut State {
240 |     self.states.get_mut(&self.init).unwrap()
241 |   }
242 | 
243 |   /// Returns the ID of the initial state.
244 |   pub fn init_id(&self) -> usize {
245 |     self.init
246 |   }
247 | 
248 |   /// Returns a reference to the ID set of the final states.
249 |   pub fn finals(&self) -> &HashSet<usize> {
250 |     &self.finals
251 |   }
252 | 
253 |   /// Returns the ID of the final state.
254 |   ///
255 |   /// Returns [`None`] if there is no final state or more than one final state.
256 |   pub fn final_id(&self) -> Option<usize> {
257 |     if self.finals().len() > 1 {
258 |       None
259 |     } else {
260 |       self.finals().iter().next().copied()
261 |     }
262 |   }
263 | 
264 |   /// Dumps the current finite automaton to the given writer as Graphviz.
265 |   pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
266 |   where
267 |     Sym: fmt::Debug,
268 |     W: io::Write,
269 |   {
270 |     writeln!(writer, "digraph finite_automaton {{")?;
271 |     writeln!(writer, "  rankdir = LR")?;
272 |     writeln!(writer, "  node [shape = doublecircle];")?;
273 |     write!(writer, " ")?;
274 |     for id in &self.finals {
275 |       write!(writer, " {id}")?;
276 |     }
277 |     writeln!(writer, ";")?;
278 |     writeln!(writer, "  node [shape = circle];")?;
279 |     for (id, state) in &self.states {
280 |       state.dump(writer, *id)?;
281 |     }
282 |     writeln!(writer, "}}")?;
283 |     Ok(())
284 |   }
285 | }
286 | 
287 | impl<Sym, State: self::State<Sym>> Default for FiniteAutomaton<Sym, State> {
288 |   fn default() -> Self {
289 |     Self::new()
290 |   }
291 | }
292 | 
293 | /// Finite automaton which state type is [`DenseState`].
294 | pub type DenseFA<S> = FiniteAutomaton<S, DenseState<S>>;
295 | 
296 | /// Finite automaton which state type is [`MultiState`].
297 | pub type MultiFA<S> = FiniteAutomaton<S, MultiState<S>>;
298 | 
299 | /// Builder for calculating closures from a finite automation.
300 | pub struct ClosureBuilder<S> {
301 |   empty_edges: HashMap<usize, HashSet<usize>>,
302 |   normal_edges: HashMap<usize, MultiState<S>>,
303 | }
304 | 
305 | impl<S> From<MultiFA<Option<S>>> for ClosureBuilder<S>
306 | where
307 |   S: Eq + Hash,
308 | {
309 |   fn from(fa: MultiFA<Option<S>>) -> Self {
310 |     let mut empty_edges = HashMap::new();
311 |     let mut normal_edges: HashMap<_, MultiState<S>> = HashMap::new();
312 |     for (id, s) in fa.states {
313 |       for (s, to) in s.outs {
314 |         match s {
315 |           Some(s) => normal_edges
316 |             .entry(id)
317 |             .or_insert_with(|| State::new())
318 |             .outs
319 |             .insert(s, to),
320 |           None => empty_edges.insert(id, to),
321 |         };
322 |       }
323 |     }
324 |     Self {
325 |       empty_edges,
326 |       normal_edges,
327 |     }
328 |   }
329 | }
330 | 
331 | impl<S> ClosureBuilder<S> {
332 |   /// Returns the symbol set of the current finite automaton.
333 |   pub fn symbol_set(&self) -> HashSet<S>
334 |   where
335 |     S: Clone + Eq + Hash,
336 |   {
337 |     self
338 |       .normal_edges
339 |       .values()
340 |       .flat_map(|s| s.outs().keys().cloned())
341 |       .collect()
342 |   }
343 | 
344 |   /// Returns the epsilon closure of the given state.
345 |   pub fn epsilon_closure<Ids>(&self, cached: &mut CachedClosures, ids: Ids) -> Closure
346 |   where
347 |     Ids: Into<Closure>,
348 |   {
349 |     let mut closure = ids.into();
350 |     if closure.is_empty() {
351 |       closure
352 |     } else if let Some(c) = cached.get(&closure) {
353 |       c.clone()
354 |     } else {
355 |       let ids = closure.clone();
356 |       let mut next_ids: Vec<_> = closure.iter().copied().collect();
357 |       while let Some(id) = next_ids.pop() {
358 |         if let Some(to_ids) = self.empty_edges.get(&id) {
359 |           for id in to_ids {
360 |             if closure.insert(*id) {
361 |               next_ids.push(*id);
362 |             }
363 |           }
364 |         }
365 |       }
366 |       cached.insert(ids, closure.clone());
367 |       closure
368 |     }
369 |   }
370 | 
371 |   /// Returns a set of all possible states that can be reached
372 |   /// after accepting symbol `s` on the given states.
373 |   pub fn state_closure(&self, cached: &mut CachedClosures, states: &Closure, s: &S) -> Closure
374 |   where
375 |     S: Eq + Hash,
376 |   {
377 |     let mut next_states = Closure::new();
378 |     for id in states {
379 |       if let Some(ids) = self.normal_edges.get(id).and_then(|st| st.outs().get(s)) {
380 |         next_states.extend(ids);
381 |       }
382 |     }
383 |     self.epsilon_closure(cached, next_states)
384 |   }
385 | }
386 | 
387 | /// Closure of a state of finite automaton.
388 | pub type Closure = BTreeSet<usize>;
389 | 
390 | /// Cached closures.
391 | pub type CachedClosures = HashMap<Closure, Closure>;
392 | 


--------------------------------------------------------------------------------
/laps_regex/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! # laps_regex
 2 | //!
 3 | //! Tools for generating NFAs, DFAs and state-transition tables from
 4 | //! regular expressions.
 5 | //!
 6 | //! This library is built for crate [`laps`](https://crates.io/crates/laps).
 7 | //!
 8 | //! ## Example: Matching UTF-8 Strings
 9 | //!
10 | //! ```
11 | //! use laps_regex::re::{RegexBuilder, CharsMatcher};
12 | //!
13 | //! #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
14 | //! enum Token {
15 | //!   Keyword,
16 | //!   Identifier,
17 | //!   Number,
18 | //! }
19 | //!
20 | //! let matcher: CharsMatcher<_> = RegexBuilder::new()
21 | //!   .add("if|else|while", Token::Keyword)
22 | //!   .add("[_a-zA-Z][_a-zA-Z0-9]*", Token::Identifier)
23 | //!   .add("[0-9]|[1-9][0-9]+", Token::Number)
24 | //!   .build()
25 | //!   .unwrap();
26 | //!
27 | //! assert_eq!(matcher.is_str_match("if"), Some(&Token::Keyword));
28 | //! assert_eq!(matcher.is_str_match("while1"), Some(&Token::Identifier));
29 | //! assert_eq!(matcher.is_str_match("42"), Some(&Token::Number));
30 | //! assert_eq!(matcher.is_str_match("?"), None);
31 | //! ```
32 | //!
33 | //! ## Example: Matching Bytes
34 | //!
35 | //! ```
36 | //! use laps_regex::re::{RegexBuilder, BytesMatcher};
37 | //!
38 | //! let matcher: BytesMatcher<_> = RegexBuilder::new()
39 | //!   .add("hello|hi", 0)
40 | //!   .add("goodbye|bye", 1)
41 | //!   .build_bytes()
42 | //!   .unwrap();
43 | //!
44 | //! assert_eq!(matcher.is_match("hello".as_bytes()), Some(&0));
45 | //! assert_eq!(matcher.is_match(&[0x62, 0x79, 0x65]), Some(&1));
46 | //! ```
47 | 
48 | pub mod dfa;
49 | pub mod fa;
50 | pub mod mir;
51 | pub mod nfa;
52 | pub mod re;
53 | pub mod table;
54 | 


--------------------------------------------------------------------------------
/laps_regex/src/nfa.rs:
--------------------------------------------------------------------------------
  1 | //! Nondeterministic finite automaton ([`NFA`]) related implementations.
  2 | //!
  3 | //! An NFA can be built from a mid-level intermediate represention ([`Mir`]).
  4 | 
  5 | use crate::fa::{MultiFA, State};
  6 | use crate::mir::Mir;
  7 | use std::collections::HashMap;
  8 | use std::hash::Hash;
  9 | use std::{fmt, io};
 10 | 
 11 | /// A nondeterministic finite automaton (NFA)
 12 | /// with symbol type `S` and tag type `T`.
 13 | #[derive(Debug)]
 14 | pub struct NFA<S, T>
 15 | where
 16 |   S: Eq + Hash,
 17 | {
 18 |   fa: MultiFA<Option<Vec<(S, S)>>>,
 19 |   tags: HashMap<usize, T>,
 20 | }
 21 | 
 22 | impl<S, T> NFA<S, T>
 23 | where
 24 |   S: Eq + Hash,
 25 | {
 26 |   /// Creates a new NFA from [`Mir`].
 27 |   pub fn new(mir: Mir<S, T>) -> Self {
 28 |     match mir {
 29 |       Mir::Empty => Self::new_nfa_with_symbol(None),
 30 |       Mir::Ranges(rs) => Self::new_nfa_with_symbol(Some(rs)),
 31 |       Mir::Concat(c) => c.into_iter().map(Self::new).reduce(Self::concat).unwrap(),
 32 |       Mir::Alter(mut a) => {
 33 |         if a.len() == 1 {
 34 |           let (mir, tag) = a.swap_remove(0);
 35 |           let mut nfa = Self::new(mir);
 36 |           if let Some(tag) = tag {
 37 |             let fs = nfa.normalize();
 38 |             nfa.fa.set_final_state(fs);
 39 |             nfa.tags.insert(fs, tag);
 40 |           }
 41 |           nfa
 42 |         } else {
 43 |           a.into_iter()
 44 |             .map(|(mir, tag)| (Self::new(mir), tag))
 45 |             .reduce(Self::alter)
 46 |             .unwrap()
 47 |             .0
 48 |         }
 49 |       }
 50 |       Mir::Kleene(k) => {
 51 |         // create NFA and normalize
 52 |         let mut nfa = Self::new(*k);
 53 |         let id = nfa.normalize();
 54 |         // create a edge to the initial state
 55 |         let init = nfa.fa.init_id();
 56 |         nfa.fa.state_mut(id).unwrap().add(None, init);
 57 |         // set the initial state as a final state
 58 |         nfa.fa.set_final_state(init);
 59 |         nfa
 60 |       }
 61 |     }
 62 |   }
 63 | 
 64 |   /// Creates a new NFA which matches the given symbol.
 65 |   fn new_nfa_with_symbol(sym: Option<Vec<(S, S)>>) -> Self {
 66 |     let mut fa = MultiFA::new();
 67 |     let fs = fa.add_final_state();
 68 |     fa.init_mut().add(sym, fs);
 69 |     Self {
 70 |       fa,
 71 |       tags: HashMap::new(),
 72 |     }
 73 |   }
 74 | 
 75 |   /// Creates an alternation of the given two NFA-tag pairs.
 76 |   fn alter(
 77 |     (mut nfa1, tag1): (Self, Option<T>),
 78 |     (mut nfa2, tag2): (Self, Option<T>),
 79 |   ) -> (Self, Option<T>) {
 80 |     // create final state and tag mapping for `nfa1`
 81 |     let fs1 = nfa1.normalize();
 82 |     nfa1.fa.set_final_state(fs1);
 83 |     if let Some(tag1) = tag1 {
 84 |       nfa1.tags.insert(fs1, tag1);
 85 |     }
 86 |     // add empty edge to the initial state of `nfa2`
 87 |     nfa1.fa.init_mut().add(None, nfa2.fa.init_id());
 88 |     // create final state and tag mapping for `nfa2` if it has a tag
 89 |     if let Some(tag2) = tag2 {
 90 |       let fs2 = nfa2.normalize();
 91 |       nfa2.fa.set_final_state(fs2);
 92 |       nfa1.tags.insert(fs2, tag2);
 93 |     }
 94 |     // union states and tags of two NFAs
 95 |     nfa1.fa.union(nfa2.fa);
 96 |     nfa1.tags.extend(nfa2.tags);
 97 |     (nfa1, None)
 98 |   }
 99 | 
100 |   /// Concatenates the given two NFAs into a new NFA.
101 |   fn concat(mut nfa1: Self, nfa2: Self) -> Self {
102 |     let fs1 = nfa1.normalize();
103 |     nfa1.fa.state_mut(fs1).unwrap().add(None, nfa2.fa.init_id());
104 |     nfa1.fa.union(nfa2.fa);
105 |     nfa1.tags.extend(nfa2.tags);
106 |     nfa1
107 |   }
108 | 
109 |   /// Normalizes the current NFA.
110 |   ///
111 |   /// Keeps only final states with tags, set all other final states as
112 |   /// normal states, and route them to a new normal state with an empty edge.
113 |   ///
114 |   /// Returns the normal state ID.
115 |   fn normalize(&mut self) -> usize {
116 |     // try to get an untagged final state
117 |     let untagged = self
118 |       .fa
119 |       .finals()
120 |       .iter()
121 |       .copied()
122 |       .find(|id| !self.tags.contains_key(id));
123 |     // get the target state id
124 |     let target = if let Some(untagged) = untagged {
125 |       self.fa.set_normal_state(untagged);
126 |       untagged
127 |     } else {
128 |       self.fa.add_state()
129 |     };
130 |     // add edges to the target state
131 |     for id in self.fa.finals().clone() {
132 |       if id != target {
133 |         self.fa.state_mut(id).unwrap().add(None, target);
134 |         if !self.tags.contains_key(&id) {
135 |           self.fa.set_normal_state(id);
136 |         }
137 |       }
138 |     }
139 |     target
140 |   }
141 | 
142 |   /// Converts the current NFA into a
143 |   /// [`FiniteAutomaton`](crate::fa::FiniteAutomaton) and a tag set.
144 |   pub fn into_fa_tags(self) -> FATags<S, T> {
145 |     (self.fa, self.tags)
146 |   }
147 | 
148 |   /// Dumps the current finite automaton to the given writer as Graphviz.
149 |   pub fn dump<W>(&self, writer: &mut W) -> io::Result<()>
150 |   where
151 |     S: fmt::Debug,
152 |     W: io::Write,
153 |   {
154 |     self.fa.dump(writer)
155 |   }
156 | }
157 | 
158 | impl<S, T> From<Mir<S, T>> for NFA<S, T>
159 | where
160 |   S: Eq + Hash,
161 | {
162 |   fn from(mir: Mir<S, T>) -> Self {
163 |     Self::new(mir)
164 |   }
165 | }
166 | 
167 | /// A pair of [`NFA`]'s internal finite automaton and the tag map.
168 | ///
169 | /// Used by method [`into_fa_tags`](NFA#method.into_fa_tags) of [`NFA`].
170 | pub type FATags<S, T> = (MultiFA<Option<Vec<(S, S)>>>, HashMap<usize, T>);
171 | 


--------------------------------------------------------------------------------
/laps_regex/src/re.rs:
--------------------------------------------------------------------------------
  1 | //! User interfaces for building and matching regular expressions.
  2 | //!
  3 | //! This module contains the regular expression builder [`RegexBuilder`]
  4 | //! and the regular expression matcher [`RegexMatcher`].
  5 | 
  6 | use crate::dfa::DFA;
  7 | use crate::mir::{Error as MirError, Mir, MirBuilder, SymbolOp};
  8 | use crate::nfa::NFA;
  9 | use crate::table::StateTransTable;
 10 | use regex_syntax::hir::Hir;
 11 | use regex_syntax::{parse, Error as RegexError, ParserBuilder};
 12 | use std::fmt;
 13 | use std::hash::Hash;
 14 | 
 15 | /// A builder for regular expressions with tag type `T`.
 16 | pub struct RegexBuilder<T> {
 17 |   re_tags: Vec<(String, T)>,
 18 |   enable_par: Option<bool>,
 19 | }
 20 | 
 21 | impl<T> RegexBuilder<T> {
 22 |   /// Creates a new regular expression builder.
 23 |   pub fn new() -> Self {
 24 |     Self {
 25 |       re_tags: Vec::new(),
 26 |       enable_par: None,
 27 |     }
 28 |   }
 29 | 
 30 |   /// Adds a new regular expression to the builder, with the given tag.
 31 |   pub fn add(mut self, re: &str, tag: T) -> Self {
 32 |     self.re_tags.push((re.into(), tag));
 33 |     self
 34 |   }
 35 | 
 36 |   /// Sets to [`Some(true)`] to construct the DFA in parallel,
 37 |   /// [`Some(false)`] to disable parallelization, and [`None`] to
 38 |   /// choose automatically.
 39 |   ///
 40 |   /// Defaults to [`None`].
 41 |   pub fn enable_par(mut self, enable_par: Option<bool>) -> Self {
 42 |     self.enable_par = enable_par;
 43 |     self
 44 |   }
 45 | }
 46 | 
 47 | impl<T> RegexBuilder<T>
 48 | where
 49 |   T: Clone + Hash + Eq + Ord,
 50 | {
 51 |   /// Builds all regular expressions in the current builder as UTF-8 mode.
 52 |   ///
 53 |   /// Returns a [`RegexMatcher`], or an error.
 54 |   pub fn build<S>(self) -> Result<RegexMatcher<S, T>, Error<T>>
 55 |   where
 56 |     S: Hash + Eq + Clone + Ord + SymbolOp + Sync + Send,
 57 |     Mir<S, T>: MirBuilder,
 58 |   {
 59 |     self.build_impl(parse)
 60 |   }
 61 | 
 62 |   /// Builds all regular expressions in the current builder as bytes mode.
 63 |   ///
 64 |   /// Returns a [`RegexMatcher`], or an error.
 65 |   pub fn build_bytes<S>(self) -> Result<RegexMatcher<S, T>, Error<T>>
 66 |   where
 67 |     S: Hash + Eq + Clone + Ord + SymbolOp + Sync + Send,
 68 |     Mir<S, T>: MirBuilder,
 69 |   {
 70 |     self.build_impl(|re| ParserBuilder::new().utf8(false).build().parse(re))
 71 |   }
 72 | 
 73 |   /// Implementation of all building methods.
 74 |   fn build_impl<R, S>(self, re_parse: R) -> Result<RegexMatcher<S, T>, Error<T>>
 75 |   where
 76 |     R: Fn(&str) -> Result<Hir, RegexError>,
 77 |     S: Hash + Eq + Clone + Ord + SymbolOp + Sync + Send,
 78 |     Mir<S, T>: MirBuilder,
 79 |   {
 80 |     if self.re_tags.is_empty() {
 81 |       Err(Error::EmptyBuilder)
 82 |     } else {
 83 |       Mir::Alter(
 84 |         self
 85 |           .re_tags
 86 |           .into_iter()
 87 |           .map(|(re, tag)| {
 88 |             re_parse(&re)
 89 |               .map_err(|e| Error::Regex(Box::new(e), tag.clone()))
 90 |               .and_then(|hir| Mir::new(hir).map_err(Error::Mir))
 91 |               .map(|mir| (mir, Some(tag)))
 92 |           })
 93 |           .collect::<Result<_, _>>()?,
 94 |       )
 95 |       .optimize()
 96 |       .map(|mir| {
 97 |         RegexMatcher::new(StateTransTable::new(DFA::new(
 98 |           NFA::new(mir),
 99 |           self.enable_par,
100 |         )))
101 |       })
102 |       .map_err(Error::Mir)
103 |     }
104 |   }
105 | }
106 | 
107 | impl<T> Default for RegexBuilder<T> {
108 |   fn default() -> Self {
109 |     Self::new()
110 |   }
111 | }
112 | 
113 | /// Possible errors in building of regular expressions with tag type `T`.
114 | #[derive(Debug)]
115 | pub enum Error<T> {
116 |   /// There is no regular expressions in [`RegexBuilder`].
117 |   EmptyBuilder,
118 |   /// An error occurred during parsing the regular expression with the tag `T`.
119 |   Regex(Box<RegexError>, T),
120 |   /// An error occurred during compiling or optimizing regular expressions.
121 |   Mir(MirError),
122 | }
123 | 
124 | impl<T> fmt::Display for Error<T> {
125 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 |     match self {
127 |       Self::EmptyBuilder => write!(f, "no regular expressions in the builder"),
128 |       Self::Regex(e, _) => write!(f, "{e}"),
129 |       Self::Mir(e) => write!(f, "{e}"),
130 |     }
131 |   }
132 | }
133 | 
134 | /// A matcher for matching regular expressions.
135 | #[derive(Debug)]
136 | pub struct RegexMatcher<S, T> {
137 |   table: StateTransTable<S, T>,
138 |   state: usize,
139 | }
140 | 
141 | impl<S, T> RegexMatcher<S, T> {
142 |   /// Creates a new matcher from the given [`StateTransTable`].
143 |   fn new(table: StateTransTable<S, T>) -> Self {
144 |     Self {
145 |       state: table.init_id(),
146 |       table,
147 |     }
148 |   }
149 | 
150 |   /// Returns the current state ID.
151 |   pub fn state(&self) -> usize {
152 |     self.state
153 |   }
154 | 
155 |   /// Checks if the given bytes can be matched.
156 |   /// If so, returns a reference to the corresponding tag.
157 |   /// Otherwise, returns [`None`].
158 |   ///
159 |   /// Smaller tags have higher precedence.
160 |   pub fn is_match(&self, seq: &[S]) -> Option<&T>
161 |   where
162 |     S: Ord,
163 |   {
164 |     let mut id = self.table.init_id();
165 |     for s in seq {
166 |       if let Some(next) = self.table.next_state(id, s) {
167 |         id = next;
168 |       } else {
169 |         return None;
170 |       }
171 |     }
172 |     self.table.is_final(id)
173 |   }
174 | 
175 |   /// Returns true if the given symbol can be accepted.
176 |   ///
177 |   /// This method will update the internal state.
178 |   pub fn is_accept(&mut self, s: &S) -> bool
179 |   where
180 |     S: Ord,
181 |   {
182 |     if let Some(next) = self.table.next_state(self.state, s) {
183 |       self.state = next;
184 |       true
185 |     } else {
186 |       false
187 |     }
188 |   }
189 | 
190 |   /// Checks if the current state is a final state.
191 |   /// If so, returns a reference to the corresponding tag.
192 |   /// Otherwise, returns [`None`].
193 |   ///
194 |   /// Smaller tags have higher precedence.
195 |   pub fn is_final(&self) -> Option<&T> {
196 |     self.table.is_final(self.state)
197 |   }
198 | 
199 |   /// Checks if the given state is a final state.
200 |   /// If so, returns a reference to the corresponding tag.
201 |   /// Otherwise, returns [`None`].
202 |   ///
203 |   /// Smaller tags have higher precedence.
204 |   pub fn is_state_final(&self, id: usize) -> Option<&T> {
205 |     self.table.is_final(id)
206 |   }
207 | 
208 |   /// Resets the internal state of the current matcher to initial state.
209 |   pub fn reset(&mut self) {
210 |     self.state = self.table.init_id();
211 |   }
212 | }
213 | 
214 | impl<S, T> From<RegexMatcher<S, T>> for StateTransTable<S, T> {
215 |   fn from(matcher: RegexMatcher<S, T>) -> Self {
216 |     matcher.table
217 |   }
218 | }
219 | 
220 | /// A regular expression matcher for matching characters.
221 | pub type CharsMatcher<T> = RegexMatcher<char, T>;
222 | 
223 | impl<T> CharsMatcher<T> {
224 |   /// Checks if the given string can be matched.
225 |   /// If so, returns a reference to the corresponding tag.
226 |   /// Otherwise, returns [`None`].
227 |   ///
228 |   /// Smaller tags have higher precedence.
229 |   pub fn is_str_match(&self, s: &str) -> Option<&T> {
230 |     let mut id = self.table.init_id();
231 |     for c in s.chars() {
232 |       if let Some(next) = self.table.next_state(id, &c) {
233 |         id = next;
234 |       } else {
235 |         return None;
236 |       }
237 |     }
238 |     self.table.is_final(id)
239 |   }
240 | }
241 | 
242 | /// A regular expression matcher for matching bytes.
243 | pub type BytesMatcher<T> = RegexMatcher<u8, T>;
244 | 
245 | impl<T> BytesMatcher<T> {
246 |   /// Checks if the given string can be matched.
247 |   /// If so, returns a reference to the corresponding tag.
248 |   /// Otherwise, returns [`None`].
249 |   ///
250 |   /// Smaller tags have higher precedence.
251 |   pub fn is_str_match(&self, s: &str) -> Option<&T> {
252 |     let mut id = self.table.init_id();
253 |     for c in s.bytes() {
254 |       if let Some(next) = self.table.next_state(id, &c) {
255 |         id = next;
256 |       } else {
257 |         return None;
258 |       }
259 |     }
260 |     self.table.is_final(id)
261 |   }
262 | }
263 | 
264 | #[cfg(test)]
265 | mod test {
266 |   use super::*;
267 |   use Token::*;
268 | 
269 |   #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
270 |   enum Token {
271 |     Keyword,
272 |     Identifier,
273 |     Number,
274 |     Str,
275 |     Operator,
276 |     Skip,
277 |     Other,
278 |   }
279 | 
280 |   #[test]
281 |   fn match_string() {
282 |     let matcher: CharsMatcher<_> = RegexBuilder::new()
283 |       .add("if|else|while", Keyword)
284 |       .add("[_a-zA-Z][_a-zA-Z0-9]*", Identifier)
285 |       .add("[0-9]|[1-9][0-9]+", Number)
286 |       .build()
287 |       .unwrap();
288 |     assert_eq!(matcher.is_str_match("if"), Some(&Keyword));
289 |     assert_eq!(matcher.is_str_match("else"), Some(&Keyword));
290 |     assert_eq!(matcher.is_str_match("while"), Some(&Keyword));
291 |     assert_eq!(matcher.is_str_match("ifi"), Some(&Identifier));
292 |     assert_eq!(matcher.is_str_match("else1"), Some(&Identifier));
293 |     assert_eq!(matcher.is_str_match("_while"), Some(&Identifier));
294 |     assert_eq!(matcher.is_str_match("a_8"), Some(&Identifier));
295 |     assert_eq!(matcher.is_str_match("_"), Some(&Identifier));
296 |     assert_eq!(matcher.is_str_match("A_good_id"), Some(&Identifier));
297 |     assert_eq!(matcher.is_str_match("A_b@d_id"), None);
298 |     assert_eq!(matcher.is_str_match("0"), Some(&Number));
299 |     assert_eq!(matcher.is_str_match("5"), Some(&Number));
300 |     assert_eq!(matcher.is_str_match("12450"), Some(&Number));
301 |     assert_eq!(matcher.is_str_match("10"), Some(&Number));
302 |     assert_eq!(matcher.is_str_match("01"), None);
303 |     assert_eq!(matcher.is_str_match(""), None);
304 |     assert_eq!(matcher.is_str_match("?"), None);
305 |   }
306 | 
307 |   #[test]
308 |   fn match_bytes() {
309 |     let matcher: BytesMatcher<_> = RegexBuilder::new()
310 |       .add("hello|hi", 0)
311 |       .add("goodbye|bye", 1)
312 |       .build_bytes()
313 |       .unwrap();
314 |     assert_eq!(matcher.is_str_match("hello"), Some(&0));
315 |     assert_eq!(matcher.is_match(b"hello"), Some(&0));
316 |     assert_eq!(matcher.is_match(b"hi"), Some(&0));
317 |     assert_eq!(matcher.is_match(b"goodbye"), Some(&1));
318 |     assert_eq!(matcher.is_match(&[0x62, 0x79, 0x65]), Some(&1));
319 |   }
320 | 
321 |   #[test]
322 |   fn match_stream() {
323 |     use std::io::{Cursor, Read};
324 | 
325 |     struct Lexer<R> {
326 |       reader: R,
327 |       matcher: CharsMatcher<Token>,
328 |       last_char: Option<char>,
329 |     }
330 | 
331 |     impl<R> Lexer<R> {
332 |       fn new(reader: R) -> Self {
333 |         Self {
334 |           reader,
335 |           matcher: RegexBuilder::new()
336 |             .add("if|else|while", Keyword)
337 |             .add("[_a-zA-Z][_a-zA-Z0-9]*", Identifier)
338 |             .add("[0-9]|[1-9][0-9]+", Number)
339 |             .add("\"[^\"\r\n]*\"", Str)
340 |             .add(r"==|>|-=|\+=", Operator)
341 |             .add(r"\s+", Skip)
342 |             .add(".", Other)
343 |             .build()
344 |             .unwrap(),
345 |           last_char: None,
346 |         }
347 |       }
348 | 
349 |       fn unread(&mut self, c: char) {
350 |         self.last_char = Some(c);
351 |       }
352 |     }
353 | 
354 |     impl<R> Lexer<R>
355 |     where
356 |       R: Read,
357 |     {
358 |       fn read(&mut self) -> Option<char> {
359 |         let mut buf = [0];
360 |         match self.last_char.take() {
361 |           None => match self.reader.read(&mut buf) {
362 |             Ok(1) => Some(buf[0] as char),
363 |             _ => None,
364 |           },
365 |           c => c,
366 |         }
367 |       }
368 | 
369 |       fn next_token_impl(&mut self) -> Option<(Token, String)> {
370 |         let mut last_state;
371 |         let mut buf = String::new();
372 |         self.matcher.reset();
373 |         loop {
374 |           let c = self.read()?;
375 |           last_state = self.matcher.state();
376 |           if !self.matcher.is_accept(&c) {
377 |             self.unread(c);
378 |             break;
379 |           }
380 |           buf.push(c);
381 |         }
382 |         self.matcher.is_state_final(last_state).map(|t| (*t, buf))
383 |       }
384 | 
385 |       fn next_token(&mut self) -> Option<(Token, String)> {
386 |         loop {
387 |           let ts = self.next_token_impl();
388 |           if !matches!(ts, Some((Skip, _))) {
389 |             return ts;
390 |           }
391 |         }
392 |       }
393 |     }
394 | 
395 |     let mut lexer = Lexer::new(Cursor::new(
396 |       r#"
397 |       while (test(b) =="hello!") {
398 |         if (b> 5){
399 |           b-=1;
400 |         } else {
401 |           b += 2;
402 |         }
403 |       }
404 |     "#,
405 |     ));
406 | 
407 |     assert_eq!(lexer.next_token(), Some((Keyword, "while".into())));
408 |     assert_eq!(lexer.next_token(), Some((Other, "(".into())));
409 |     assert_eq!(lexer.next_token(), Some((Identifier, "test".into())));
410 |     assert_eq!(lexer.next_token(), Some((Other, "(".into())));
411 |     assert_eq!(lexer.next_token(), Some((Identifier, "b".into())));
412 |     assert_eq!(lexer.next_token(), Some((Other, ")".into())));
413 |     assert_eq!(lexer.next_token(), Some((Operator, "==".into())));
414 |     assert_eq!(lexer.next_token(), Some((Str, "\"hello!\"".into())));
415 |     assert_eq!(lexer.next_token(), Some((Other, ")".into())));
416 |     assert_eq!(lexer.next_token(), Some((Other, "{".into())));
417 |     assert_eq!(lexer.next_token(), Some((Keyword, "if".into())));
418 |     assert_eq!(lexer.next_token(), Some((Other, "(".into())));
419 |     assert_eq!(lexer.next_token(), Some((Identifier, "b".into())));
420 |     assert_eq!(lexer.next_token(), Some((Operator, ">".into())));
421 |     assert_eq!(lexer.next_token(), Some((Number, "5".into())));
422 |     assert_eq!(lexer.next_token(), Some((Other, ")".into())));
423 |     assert_eq!(lexer.next_token(), Some((Other, "{".into())));
424 |     assert_eq!(lexer.next_token(), Some((Identifier, "b".into())));
425 |     assert_eq!(lexer.next_token(), Some((Operator, "-=".into())));
426 |     assert_eq!(lexer.next_token(), Some((Number, "1".into())));
427 |     assert_eq!(lexer.next_token(), Some((Other, ";".into())));
428 |     assert_eq!(lexer.next_token(), Some((Other, "}".into())));
429 |     assert_eq!(lexer.next_token(), Some((Keyword, "else".into())));
430 |     assert_eq!(lexer.next_token(), Some((Other, "{".into())));
431 |     assert_eq!(lexer.next_token(), Some((Identifier, "b".into())));
432 |     assert_eq!(lexer.next_token(), Some((Operator, "+=".into())));
433 |     assert_eq!(lexer.next_token(), Some((Number, "2".into())));
434 |     assert_eq!(lexer.next_token(), Some((Other, ";".into())));
435 |     assert_eq!(lexer.next_token(), Some((Other, "}".into())));
436 |     assert_eq!(lexer.next_token(), Some((Other, "}".into())));
437 |     assert_eq!(lexer.next_token(), None);
438 |   }
439 | 
440 |   #[test]
441 |   fn match_word() {
442 |     let matcher: CharsMatcher<_> = RegexBuilder::new().add(r"\w+", 0).build().unwrap();
443 |     assert_eq!(matcher.is_str_match("if"), Some(&0));
444 |     assert_eq!(matcher.is_str_match("hello"), Some(&0));
445 |     assert_eq!(matcher.is_str_match(".hello"), None);
446 |     assert_eq!(matcher.is_str_match("??"), None);
447 |   }
448 | 
449 |   #[test]
450 |   fn match_xeno_tokens() {
451 |     let res = [
452 |       r"\s+",
453 |       r"(0b[01]+|0o[0-7]+|0x[0-9a-fA-F]+|[0-9]+)([iIuU](8|16|32|64))?",
454 |       r"[0-9]+(\.([0-9]+([eE][+-]?[0-9]+)?([fF](32|64))?)?|([eE][+-]?[0-9]+)([fF](32|64))?|([fF](32|64)))",
455 |       r#"'([^'\\\n\r\t]|\\'|\\"|\\x[0-7][0-9a-fA-F]|\\n|\\r|\\t|\\\\|\\0|\\u\{[0-9a-fA-F]{1,6}\})'"#,
456 |       r#"b'([\x20-\x26\x28-\x5b\x5d-\x7e]|\\x[0-9a-fA-F]{2}|\\n|\\r|\\t|\\\\|\\0|\\'|\\")'"#,
457 |       r#""([^'\\\n\r\t]|\\'|\\"|\\x[0-7][0-9a-fA-F]|\\n|\\r|\\t|\\\\|\\0|\\u\{[0-9a-fA-F]{1,6}\})*""#,
458 |       r####"r"[^"]*"|r#"([^"]|"[^#])*"#|r##"([^"]|"[^#]|"#[^#])*"##|r###"([^"]|"[^#]|"#[^#]|"##[^#])*"###"####,
459 |       r#"b"([\x20-\x26\x28-\x5b\x5d-\x7e]|\\x[0-9a-fA-F]{2}|\\n|\\r|\\t|\\\\|\\0|\\'|\\")*""#,
460 |       r"\+|-|\*|/|%|&|\||!|\^|<<|>>|&&|\|\||==|!=|<|<=|>|>=|=|\+=|-=|\*=|/=|%=|&=|\|=|\^=|<<=|>>=|\(|\)|\[|\]|\{|\}|\.|\.\.|\.\.\.|->|,|:|@|_|\?",
461 |       r"[~!@#$%^&*()_\-+={}\[\]|\\:;<,>.?/]+",
462 |       r#"[^\s~!@#$%^&*()_\-+={}\[\]|\\:;<,>.?/0-9][^\s~!@#$%^&*()\-+={}\[\]|\\:;<,>.?/]*"#,
463 |     ];
464 |     let matcher: CharsMatcher<_> = res
465 |       .iter()
466 |       .enumerate()
467 |       .fold(RegexBuilder::new(), |b, (i, re)| b.add(re, i))
468 |       .build()
469 |       .unwrap();
470 |     assert_eq!(matcher.is_str_match("123"), Some(&1));
471 |   }
472 | }
473 | 


--------------------------------------------------------------------------------
/laps_regex/src/table.rs:
--------------------------------------------------------------------------------
  1 | //! State-transition table ([`StateTransTable`]) related implementations.
  2 | //!
  3 | //! A state-transition table can be built from a deterministic finite
  4 | //! automaton ([`DFA`]).
  5 | 
  6 | use crate::dfa::DFA;
  7 | use crate::mir::SymbolOp;
  8 | use std::collections::{BTreeMap, HashMap};
  9 | use std::hash::Hash;
 10 | 
 11 | /// A state-transition table with symbol type `S` and tag type `T`.
 12 | #[derive(Debug)]
 13 | pub struct StateTransTable<S, T> {
 14 |   /// State-transition table, which is a `num_equivs * num_states` 2d array.
 15 |   table: Box<[usize]>,
 16 |   /// Initial state ID.
 17 |   init_id: usize,
 18 |   /// Number of states.
 19 |   num_states: usize,
 20 |   /// Mapping between symbol ranges and equivalence class ID.
 21 |   ///
 22 |   /// The key of the map is the right bound of the range, and
 23 |   /// the value is `(left_bound, equiv_id)`.
 24 |   sym_map: BTreeMap<S, (S, usize)>,
 25 |   /// Mapping between state IDs and tags.
 26 |   ///
 27 |   /// Only the state presents in this map are final states.
 28 |   tags: HashMap<usize, T>,
 29 | }
 30 | 
 31 | impl<S, T> StateTransTable<S, T> {
 32 |   /// Creates a new state-transition table from the given [`DFA`].
 33 |   pub fn new(dfa: DFA<S, T>) -> Self
 34 |   where
 35 |     S: Clone + Hash + Eq + Ord + SymbolOp,
 36 |   {
 37 |     let (equivs, trans_table, init_id, tags) = TempTable::new(dfa).into_optimized();
 38 |     // get number of states
 39 |     let num_states = trans_table[0].len();
 40 |     // get the final table
 41 |     let table = trans_table
 42 |       .into_iter()
 43 |       .flat_map(|s| s.into_iter())
 44 |       .collect::<Vec<_>>()
 45 |       .into_boxed_slice();
 46 |     // get symbol map
 47 |     let sym_map = equivs
 48 |       .into_iter()
 49 |       .enumerate()
 50 |       .flat_map(|(i, es)| es.into_iter().map(move |(l, r)| (r, (l, i))))
 51 |       .collect();
 52 |     Self {
 53 |       table,
 54 |       init_id,
 55 |       num_states,
 56 |       sym_map,
 57 |       tags,
 58 |     }
 59 |   }
 60 | 
 61 |   /// Returns a reference to the internal transition table,
 62 |   /// which is a `num_equivs * num_states` 2d array.
 63 |   pub fn table(&self) -> &[usize] {
 64 |     &self.table
 65 |   }
 66 | 
 67 |   /// Returns the ID of the initial state.
 68 |   pub fn init_id(&self) -> usize {
 69 |     self.init_id
 70 |   }
 71 | 
 72 |   /// Returns number of states.
 73 |   pub fn num_states(&self) -> usize {
 74 |     self.num_states
 75 |   }
 76 | 
 77 |   /// Returns a reference to the mapping between symbol ranges
 78 |   /// and equivalence class ID.
 79 |   ///
 80 |   /// The key of the map is the right bound of the range, and
 81 |   /// the value is `(left_bound, equiv_id)`.
 82 |   pub fn sym_map(&self) -> &BTreeMap<S, (S, usize)> {
 83 |     &self.sym_map
 84 |   }
 85 | 
 86 |   /// Returns a reference to the mapping between state IDs and tags.
 87 |   ///
 88 |   /// Only the state presents in this map are final states.
 89 |   pub fn tags(&self) -> &HashMap<usize, T> {
 90 |     &self.tags
 91 |   }
 92 | 
 93 |   /// Returns the ID of the next state after
 94 |   /// accepting symbol `s` on the given state.
 95 |   ///
 96 |   /// Returns [`None`] if the given state ID is invalid,
 97 |   /// or the given state can not accept symbol `s`.
 98 |   pub fn next_state(&self, id: usize, s: &S) -> Option<usize>
 99 |   where
100 |     S: Ord,
101 |   {
102 |     // check if the ID is valid
103 |     if id >= self.num_states {
104 |       return None;
105 |     }
106 |     // get equivalence class ID
107 |     let equiv = match self.sym_map.range(s..).next() {
108 |       Some((_, (l, id))) if s >= l => *id,
109 |       _ => return None,
110 |     };
111 |     // get the next state
112 |     let next = self.table[equiv * self.num_states + id];
113 |     (next < self.num_states).then_some(next)
114 |   }
115 | 
116 |   /// Checks if the given state ID corresponds to a final state.
117 |   ///
118 |   /// Returns [`Some(tag)`] which `tag` corresponds to a user-input
119 |   /// regular expression, otherwise returns [`None`].
120 |   pub fn is_final(&self, id: usize) -> Option<&T> {
121 |     self.tags.get(&id)
122 |   }
123 | }
124 | 
125 | impl<S, T> From<DFA<S, T>> for StateTransTable<S, T>
126 | where
127 |   S: Clone + Hash + Eq + Ord + SymbolOp,
128 | {
129 |   fn from(dfa: DFA<S, T>) -> Self {
130 |     Self::new(dfa)
131 |   }
132 | }
133 | 
134 | /// A temporary state-transition table.
135 | ///
136 | /// This structure will be constructed during the creation of
137 | /// [`StateTransTable`].
138 | struct TempTable<S, T> {
139 |   table: HashMap<Vec<(S, S)>, Vec<usize>>,
140 |   tags: HashMap<usize, T>,
141 |   init_id: usize,
142 | }
143 | 
144 | impl<S, T> TempTable<S, T> {
145 |   /// Creates a new temporary state-transition table from the given [`DFA`].
146 |   fn new(dfa: DFA<S, T>) -> Self
147 |   where
148 |     S: Clone + Hash + Eq,
149 |   {
150 |     let (fa, tags) = dfa.into_fa_tags();
151 |     let num_states = fa.states().len();
152 |     // assign IDs for all states
153 |     let mut ids = HashMap::new();
154 |     for id in fa.states().keys() {
155 |       let next_id = ids.len();
156 |       ids.insert(*id, next_id);
157 |     }
158 |     // build the table
159 |     let mut table = HashMap::new();
160 |     for (id, state) in fa.states() {
161 |       let id = ids[id];
162 |       for (sym, next) in state.outs() {
163 |         // create or get a state table
164 |         let states = table.entry(sym.clone()).or_insert_with(|| {
165 |           let mut v = Vec::new();
166 |           v.resize(num_states, num_states);
167 |           v
168 |         });
169 |         // update it
170 |         states[id] = ids[next];
171 |       }
172 |     }
173 |     // build the tag map
174 |     let tags = tags.into_iter().map(|(id, tag)| (ids[&id], tag)).collect();
175 |     Self {
176 |       table,
177 |       tags,
178 |       init_id: ids[&fa.init_id()],
179 |     }
180 |   }
181 | 
182 |   /// Optimizes the current table.
183 |   ///
184 |   /// Returns equivalence classes, state-transition table,
185 |   /// initial state ID and tags.
186 |   fn into_optimized(self) -> OptimizedTable<S, T>
187 |   where
188 |     S: Ord + SymbolOp,
189 |   {
190 |     // sort the table
191 |     let mut table: Vec<_> = self.table.into_iter().map(|(s, t)| (t, s)).collect();
192 |     table.sort_unstable();
193 |     // get equivalence classes and the state-transition table
194 |     let mut equivs: Vec<Vec<(S, S)>> = Vec::new();
195 |     let mut trans_table = Vec::new();
196 |     for (states, sym) in table {
197 |       match trans_table.last() {
198 |         Some(t) if t == &states => {
199 |           // get the last equivalence classes
200 |           let equiv = equivs.last_mut().unwrap();
201 |           // get the last symbol of the last equivalence classes
202 |           // and the first symbol of the current range
203 |           let (_, last_r) = equiv.last_mut().unwrap();
204 |           let mut iter = sym.into_iter();
205 |           let first_sym = iter.next().unwrap();
206 |           // check if the current symbol can be merged into the last one
207 |           if last_r.next().as_ref() == Some(&first_sym.0) {
208 |             *last_r = first_sym.1;
209 |           } else {
210 |             equiv.push(first_sym);
211 |           }
212 |           // add the rest symbols
213 |           equiv.extend(iter);
214 |         }
215 |         _ => {
216 |           equivs.push(sym);
217 |           trans_table.push(states);
218 |         }
219 |       }
220 |     }
221 |     (equivs, trans_table, self.init_id, self.tags)
222 |   }
223 | }
224 | 
225 | /// Intermediate result of an optimized state-transition table.
226 | ///
227 | /// Contains equivalence classes, optimized state-transition table,
228 | /// initial state ID and tags.
229 | type OptimizedTable<S, T> = (Vec<Vec<(S, S)>>, Vec<Vec<usize>>, usize, HashMap<usize, T>);
230 | 


--------------------------------------------------------------------------------
/src/ast.rs:
--------------------------------------------------------------------------------
  1 | //! Some common predefined AST structures that can be used in parser.
  2 | 
  3 | use crate::parse::Parse;
  4 | use crate::span::{Result, Span, Spanned, TrySpan};
  5 | use crate::token::TokenStream;
  6 | use std::marker::PhantomData;
  7 | use std::slice::{Iter, IterMut};
  8 | use std::vec::IntoIter;
  9 | 
 10 | /// Implements [`IntoIterator`] trait for the given wrapper type.
 11 | macro_rules! impl_into_iterator {
 12 |   ($t:ident<$($generic:ident),+>, $item:ident) => {
 13 |     impl<'a, $($generic),+> IntoIterator for &'a $t<$($generic),+> {
 14 |       type Item = &'a $item;
 15 |       type IntoIter = Iter<'a, $item>;
 16 |       fn into_iter(self) -> Self::IntoIter {
 17 |         self.0.as_slice().into_iter()
 18 |       }
 19 |     }
 20 |     impl<'a, $($generic),+> IntoIterator for &'a mut $t<$($generic),+> {
 21 |       type Item = &'a mut $item;
 22 |       type IntoIter = IterMut<'a, $item>;
 23 |       fn into_iter(self) -> Self::IntoIter {
 24 |         self.0.as_mut_slice().into_iter()
 25 |       }
 26 |     }
 27 |     impl<$($generic),+> IntoIterator for $t<$($generic),+> {
 28 |       type Item = $item;
 29 |       type IntoIter = IntoIter<$item>;
 30 |       fn into_iter(self) -> Self::IntoIter {
 31 |         self.0.into_iter()
 32 |       }
 33 |     }
 34 |   };
 35 | }
 36 | 
 37 | /// A non-empty sequence of AST `T`, which `T` can occur one or more times,
 38 | /// like `T`, `T T`, `T T T`, ...
 39 | ///
 40 | /// The inner [`Vec`] is guaranteed not to be empty.
 41 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 42 | pub struct NonEmptySeq<T>(pub Vec<T>);
 43 | impl_into_iterator!(NonEmptySeq<T>, T);
 44 | 
 45 | impl<TS, T> Parse<TS> for NonEmptySeq<T>
 46 | where
 47 |   TS: TokenStream,
 48 |   T: Parse<TS>,
 49 | {
 50 |   fn parse(tokens: &mut TS) -> Result<Self> {
 51 |     let mut ts = vec![tokens.parse()?];
 52 |     while T::maybe(tokens)? {
 53 |       ts.push(tokens.parse()?);
 54 |     }
 55 |     Ok(Self(ts))
 56 |   }
 57 | 
 58 |   fn maybe(tokens: &mut TS) -> Result<bool> {
 59 |     T::maybe(tokens)
 60 |   }
 61 | }
 62 | 
 63 | impl<T> Spanned for NonEmptySeq<T>
 64 | where
 65 |   T: Spanned,
 66 | {
 67 |   fn span(&self) -> Span {
 68 |     if self.0.len() == 1 {
 69 |       self.0.first().unwrap().span()
 70 |     } else {
 71 |       self
 72 |         .0
 73 |         .first()
 74 |         .unwrap()
 75 |         .span()
 76 |         .into_end_updated(self.0.last().unwrap().span())
 77 |     }
 78 |   }
 79 | }
 80 | 
 81 | /// A sequence of AST `T`, separated by AST `S`,
 82 | /// like `<empty>`, `T`, `T S T`, `T S T S T`, ...
 83 | ///
 84 | /// The delimiter will not be stored.
 85 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 86 | pub struct SepSeq<T, S>(pub Vec<T>, PhantomData<S>);
 87 | impl_into_iterator!(SepSeq<T, S>, T);
 88 | 
 89 | impl<TS, T, S> Parse<TS> for SepSeq<T, S>
 90 | where
 91 |   TS: TokenStream,
 92 |   T: Parse<TS>,
 93 |   S: Parse<TS>,
 94 | {
 95 |   fn parse(tokens: &mut TS) -> Result<Self> {
 96 |     let mut ts = Vec::new();
 97 |     if T::maybe(tokens)? {
 98 |       loop {
 99 |         ts.push(tokens.parse()?);
100 |         if !S::maybe(tokens)? {
101 |           break;
102 |         }
103 |         S::parse(tokens)?;
104 |       }
105 |     }
106 |     Ok(Self(ts, PhantomData))
107 |   }
108 | 
109 |   fn maybe(_: &mut TS) -> Result<bool> {
110 |     Ok(true)
111 |   }
112 | }
113 | 
114 | impl<T, S> TrySpan for SepSeq<T, S>
115 | where
116 |   T: TrySpan,
117 | {
118 |   fn try_span(&self) -> Option<Span> {
119 |     self.0.try_span()
120 |   }
121 | }
122 | 
123 | /// A non-empty sequence of AST `T`, separated by AST `S`,
124 | /// like `T`, `T S T`, `T S T S T`, ...
125 | ///
126 | /// The delimiter will not be stored, and the inner [`Vec`]
127 | /// is guaranteed not to be empty.
128 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
129 | pub struct NonEmptySepSeq<T, S>(pub Vec<T>, PhantomData<S>);
130 | impl_into_iterator!(NonEmptySepSeq<T, S>, T);
131 | 
132 | impl<TS, T, S> Parse<TS> for NonEmptySepSeq<T, S>
133 | where
134 |   TS: TokenStream,
135 |   T: Parse<TS>,
136 |   S: Parse<TS>,
137 | {
138 |   fn parse(tokens: &mut TS) -> Result<Self> {
139 |     let mut ts = vec![tokens.parse()?];
140 |     while S::maybe(tokens)? {
141 |       S::parse(tokens)?;
142 |       ts.push(tokens.parse()?);
143 |     }
144 |     Ok(Self(ts, PhantomData))
145 |   }
146 | 
147 |   fn maybe(tokens: &mut TS) -> Result<bool> {
148 |     T::maybe(tokens)
149 |   }
150 | }
151 | 
152 | impl<T, S> Spanned for NonEmptySepSeq<T, S>
153 | where
154 |   T: Spanned,
155 | {
156 |   fn span(&self) -> Span {
157 |     let span = self.0.first().unwrap().span();
158 |     if self.0.len() == 1 {
159 |       span
160 |     } else {
161 |       span.into_end_updated(self.0.last().unwrap().span())
162 |     }
163 |   }
164 | }
165 | 
166 | /// A sequence of AST `T`, separated by AST `S`, ending with an optional `S`,
167 | /// like `<empty>`, `T`, `T S`, `T S T`, `T S T S`, `T S T S T`, ...
168 | ///
169 | /// The delimiter will not be stored.
170 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
171 | pub struct OptSepSeq<T, S>(pub Vec<T>, PhantomData<S>);
172 | impl_into_iterator!(OptSepSeq<T, S>, T);
173 | 
174 | impl<TS, T, S> Parse<TS> for OptSepSeq<T, S>
175 | where
176 |   TS: TokenStream,
177 |   T: Parse<TS>,
178 |   S: Parse<TS>,
179 | {
180 |   fn parse(tokens: &mut TS) -> Result<Self> {
181 |     let mut ts = Vec::new();
182 |     while T::maybe(tokens)? {
183 |       ts.push(tokens.parse()?);
184 |       if !S::maybe(tokens)? {
185 |         break;
186 |       }
187 |       S::parse(tokens)?;
188 |     }
189 |     Ok(Self(ts, PhantomData))
190 |   }
191 | 
192 |   fn maybe(_: &mut TS) -> Result<bool> {
193 |     Ok(true)
194 |   }
195 | }
196 | 
197 | impl<T, S> TrySpan for OptSepSeq<T, S>
198 | where
199 |   T: TrySpan,
200 | {
201 |   fn try_span(&self) -> Option<Span> {
202 |     self.0.try_span()
203 |   }
204 | }
205 | 
206 | /// A non-empty sequence of AST `T`, separated by AST `S`, ending with an
207 | /// optional `S`, like `T`, `T S`, `T S T`, `T S T S`, `T S T S T`, ...
208 | ///
209 | /// The delimiter will not be stored, and the inner [`Vec`]
210 | /// is guaranteed not to be empty.
211 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
212 | pub struct NonEmptyOptSepSeq<T, S>(pub Vec<T>, PhantomData<S>);
213 | impl_into_iterator!(NonEmptyOptSepSeq<T, S>, T);
214 | 
215 | impl<TS, T, S> Parse<TS> for NonEmptyOptSepSeq<T, S>
216 | where
217 |   TS: TokenStream,
218 |   T: Parse<TS>,
219 |   S: Parse<TS>,
220 | {
221 |   fn parse(tokens: &mut TS) -> Result<Self> {
222 |     let mut ts = vec![tokens.parse()?];
223 |     while S::maybe(tokens)? {
224 |       S::parse(tokens)?;
225 |       if !T::maybe(tokens)? {
226 |         break;
227 |       }
228 |       ts.push(tokens.parse()?);
229 |     }
230 |     Ok(Self(ts, PhantomData))
231 |   }
232 | 
233 |   fn maybe(tokens: &mut TS) -> Result<bool> {
234 |     T::maybe(tokens)
235 |   }
236 | }
237 | 
238 | impl<T, S> Spanned for NonEmptyOptSepSeq<T, S>
239 | where
240 |   T: Spanned,
241 | {
242 |   fn span(&self) -> Span {
243 |     let span = self.0.first().unwrap().span();
244 |     if self.0.len() == 1 {
245 |       span
246 |     } else {
247 |       span.into_end_updated(self.0.last().unwrap().span())
248 |     }
249 |   }
250 | }
251 | 
252 | /// A non-empty linked list of AST `T`, separated by AST `S`,
253 | /// like `T`, `T S T`, `T S T S T`, ...
254 | ///
255 | /// The delimiter will be stored.
256 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
257 | pub enum NonEmptySepList<T, S> {
258 |   /// One element.
259 |   One(T),
260 |   /// More than one element.
261 |   More(T, S, Box<Self>),
262 | }
263 | 
264 | impl<TS, T, S> Parse<TS> for NonEmptySepList<T, S>
265 | where
266 |   TS: TokenStream,
267 |   T: Parse<TS>,
268 |   S: Parse<TS>,
269 | {
270 |   fn parse(tokens: &mut TS) -> Result<Self> {
271 |     let t = tokens.parse()?;
272 |     Ok(if S::maybe(tokens)? {
273 |       Self::More(t, tokens.parse()?, tokens.parse()?)
274 |     } else {
275 |       Self::One(t)
276 |     })
277 |   }
278 | 
279 |   fn maybe(tokens: &mut TS) -> Result<bool> {
280 |     T::maybe(tokens)
281 |   }
282 | }
283 | 
284 | impl<T, S> Spanned for NonEmptySepList<T, S>
285 | where
286 |   T: Spanned,
287 | {
288 |   fn span(&self) -> Span {
289 |     match self {
290 |       Self::One(t) => t.span(),
291 |       Self::More(t, _, l) => t.span().into_end_updated(l.span()),
292 |     }
293 |   }
294 | }
295 | 
296 | /// A linked list of AST `T`, separated by AST `S`,
297 | /// like `<empty>`, `T`, `T S T`, `T S T S T`, ...
298 | ///
299 | /// The delimiter will be stored.
300 | pub type SepList<T, S> = Option<NonEmptySepList<T, S>>;
301 | 
302 | /// A non-empty linked list of AST `T`, separated by AST `S`, ending with
303 | /// an optional `S`, like `T`, `T S`, `T S T`, `T S T S`, `T S T S T`, ...
304 | ///
305 | /// The delimiter will be stored.
306 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
307 | pub enum NonEmptyOptSepList<T, S> {
308 |   /// One element.
309 |   One(T),
310 |   /// One element with a separator.
311 |   OneWithSep(T, S),
312 |   /// More than one element.
313 |   More(T, S, Box<Self>),
314 | }
315 | 
316 | impl<TS, T, S> Parse<TS> for NonEmptyOptSepList<T, S>
317 | where
318 |   TS: TokenStream,
319 |   T: Parse<TS>,
320 |   S: Parse<TS>,
321 | {
322 |   fn parse(tokens: &mut TS) -> Result<Self> {
323 |     let t = tokens.parse()?;
324 |     Ok(if S::maybe(tokens)? {
325 |       let s = tokens.parse()?;
326 |       if T::maybe(tokens)? {
327 |         Self::More(t, s, tokens.parse()?)
328 |       } else {
329 |         Self::OneWithSep(t, s)
330 |       }
331 |     } else {
332 |       Self::One(t)
333 |     })
334 |   }
335 | 
336 |   fn maybe(tokens: &mut TS) -> Result<bool> {
337 |     T::maybe(tokens)
338 |   }
339 | }
340 | 
341 | impl<T, S> Spanned for NonEmptyOptSepList<T, S>
342 | where
343 |   T: Spanned,
344 |   S: Spanned,
345 | {
346 |   fn span(&self) -> Span {
347 |     match self {
348 |       Self::One(t) => t.span(),
349 |       Self::OneWithSep(t, s) => t.span().into_end_updated(s.span()),
350 |       Self::More(t, _, l) => t.span().into_end_updated(l.span()),
351 |     }
352 |   }
353 | }
354 | 
355 | /// A linked list of AST `T`, separated by AST `S`, ending with
356 | /// an optional `S`, like `<empty>`, `T`, `T S`, `T S T`, `T S T S`,
357 | /// `T S T S T`, ...
358 | ///
359 | /// The delimiter will be stored.
360 | pub type OptSepList<T, S> = Option<NonEmptyOptSepList<T, S>>;
361 | 
362 | /// An AST `T` quoted by AST `L` and AST `R`, like `L T R`.
363 | #[deprecated(
364 |   since = "0.1.6",
365 |   note = "will be removed in 0.2.0, please use tuple `(L, T, R)` instead"
366 | )]
367 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
368 | pub struct Quoted<L, T, R>(pub L, pub T, pub R);
369 | 
370 | #[allow(deprecated)]
371 | impl<TS, L, T, R> Parse<TS> for Quoted<L, T, R>
372 | where
373 |   TS: TokenStream,
374 |   L: Parse<TS>,
375 |   T: Parse<TS>,
376 |   R: Parse<TS>,
377 | {
378 |   fn parse(tokens: &mut TS) -> Result<Self> {
379 |     Ok(Self(tokens.parse()?, tokens.parse()?, tokens.parse()?))
380 |   }
381 | 
382 |   fn maybe(tokens: &mut TS) -> Result<bool> {
383 |     L::maybe(tokens)
384 |   }
385 | }
386 | 
387 | #[allow(deprecated)]
388 | impl<L, T, R> Spanned for Quoted<L, T, R>
389 | where
390 |   L: Spanned,
391 |   R: Spanned,
392 | {
393 |   fn span(&self) -> Span {
394 |     self.0.span().into_end_updated(self.2.span())
395 |   }
396 | }
397 | 
398 | /// An AST `T` with an optional prefix `P`, like `T` or `P T`.
399 | ///
400 | /// The `maybe` method of AST returns `true` when either `P::maybe` returns
401 | /// `true` or `T::maybe` returns `true`. This may not work in the following
402 | /// example:
403 | ///
404 | /// ```
405 | /// # use laps::{prelude::*, span::Result, ast::OptPrefix, token::{Tokenizer, TokenBuffer}};
406 | /// # struct Prefix;
407 | /// # impl<TS> Parse<TS> for Prefix
408 | /// # where
409 | /// #   TS: TokenStream,
410 | /// # {
411 | /// #   fn parse(_: &mut TS) -> Result<Self> { Ok(Self) }
412 | /// #   fn maybe(_: &mut TS) -> Result<bool> { Ok(true) }
413 | /// # }
414 | /// # struct Item1;
415 | /// # impl<TS> Parse<TS> for Item1
416 | /// # where
417 | /// #   TS: TokenStream,
418 | /// # {
419 | /// #   fn parse(_: &mut TS) -> Result<Self> { Ok(Self) }
420 | /// #   fn maybe(_: &mut TS) -> Result<bool> { Ok(true) }
421 | /// # }
422 | /// # struct Item2;
423 | /// # impl<TS> Parse<TS> for Item2
424 | /// # where
425 | /// #   TS: TokenStream,
426 | /// # {
427 | /// #   fn parse(_: &mut TS) -> Result<Self> { Ok(Self) }
428 | /// #   fn maybe(_: &mut TS) -> Result<bool> { Ok(true) }
429 | /// # }
430 | /// # struct Lexer;
431 | /// # impl Tokenizer for Lexer {
432 | /// #   type Token = ();
433 | /// #   fn next_token(&mut self) -> Result<()> { Ok(()) }
434 | /// # }
435 | /// # let mut tokens = TokenBuffer::new(Lexer);
436 | /// # impl<TS> Parse<TS> for Items
437 | /// # where
438 | /// #   TS: TokenStream,
439 | /// # {
440 | /// #   fn parse(_: &mut TS) -> Result<Self> { Ok(Self::Item1(OptPrefix(None, Item1))) }
441 | /// #   fn maybe(_: &mut TS) -> Result<bool> { Ok(true) }
442 | /// # }
443 | /// enum Items {
444 | ///   Item1(OptPrefix<Prefix, Item1>),
445 | ///   Item2(OptPrefix<Prefix, Item2>),
446 | /// }
447 | ///
448 | /// let items: Items = tokens.parse().unwrap();
449 | /// ```
450 | ///
451 | /// The `items` may always be `Items::Item1` whether the input is
452 | /// `Prefix Item1` or `Prefix Item2` with a naive implementation of trait
453 | /// `Parse` for `Items` (like `#[derive(Parse)]`).
454 | ///
455 | /// For more precise implementation of `maybe` method, please use
456 | /// [`OptTokenPrefix`] if possible.
457 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
458 | pub struct OptPrefix<P, T>(pub Option<P>, pub T);
459 | 
460 | impl<TS, P, T> Parse<TS> for OptPrefix<P, T>
461 | where
462 |   TS: TokenStream,
463 |   P: Parse<TS>,
464 |   T: Parse<TS>,
465 | {
466 |   fn parse(tokens: &mut TS) -> Result<Self> {
467 |     Ok(Self(tokens.parse()?, tokens.parse()?))
468 |   }
469 | 
470 |   fn maybe(tokens: &mut TS) -> Result<bool> {
471 |     Ok(P::maybe(tokens)? || T::maybe(tokens)?)
472 |   }
473 | }
474 | 
475 | impl<P, T> Spanned for OptPrefix<P, T>
476 | where
477 |   P: Spanned,
478 |   T: Spanned,
479 | {
480 |   fn span(&self) -> Span {
481 |     match &self.0 {
482 |       Some(p) => p.span().into_end_updated(self.1.span()),
483 |       None => self.1.span(),
484 |     }
485 |   }
486 | }
487 | 
488 | /// An AST `T` with an optional prefix `P`, like `T` or `P T`.
489 | ///
490 | /// The `maybe` method of AST treats `P` as a single token, and returns
491 | /// `true` if both `P::maybe` returns `true` and `T::maybe` returns `true`,
492 | /// otherwise returns the result of `T::maybe`.
493 | ///
494 | /// # Notes
495 | ///
496 | /// Do not use this AST type if `P` is not a single token.
497 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
498 | pub struct OptTokenPrefix<P, T>(pub Option<P>, pub T);
499 | 
500 | impl<TS, P, T> Parse<TS> for OptTokenPrefix<P, T>
501 | where
502 |   TS: TokenStream,
503 |   P: Parse<TS>,
504 |   T: Parse<TS>,
505 | {
506 |   fn parse(tokens: &mut TS) -> Result<Self> {
507 |     Ok(Self(tokens.parse()?, tokens.parse()?))
508 |   }
509 | 
510 |   fn maybe(tokens: &mut TS) -> Result<bool> {
511 |     if P::maybe(tokens)? {
512 |       let token = tokens.next_token()?;
513 |       let result = T::maybe(tokens)?;
514 |       tokens.unread(token);
515 |       Ok(result)
516 |     } else {
517 |       T::maybe(tokens)
518 |     }
519 |   }
520 | }
521 | 
522 | impl<P, T> Spanned for OptTokenPrefix<P, T>
523 | where
524 |   P: Spanned,
525 |   T: Spanned,
526 | {
527 |   fn span(&self) -> Span {
528 |     match &self.0 {
529 |       Some(p) => p.span().into_end_updated(self.1.span()),
530 |       None => self.1.span(),
531 |     }
532 |   }
533 | }
534 | 
535 | /// An AST `T` with a prefix `P`, like `T` or `P T`.
536 | ///
537 | /// The `maybe` method of AST treats `P` as a single token, and returns
538 | /// `true` if both `P::maybe` returns `true` and `T::maybe` returns `true`.
539 | ///
540 | /// # Notes
541 | ///
542 | /// Do not use this AST type if `P` is not a single token.
543 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
544 | pub struct TokenPrefix<P, T>(pub P, pub T);
545 | 
546 | impl<TS, P, T> Parse<TS> for TokenPrefix<P, T>
547 | where
548 |   TS: TokenStream,
549 |   P: Parse<TS>,
550 |   T: Parse<TS>,
551 | {
552 |   fn parse(tokens: &mut TS) -> Result<Self> {
553 |     Ok(Self(tokens.parse()?, tokens.parse()?))
554 |   }
555 | 
556 |   fn maybe(tokens: &mut TS) -> Result<bool> {
557 |     if P::maybe(tokens)? {
558 |       let token = tokens.next_token()?;
559 |       let result = T::maybe(tokens)?;
560 |       tokens.unread(token);
561 |       Ok(result)
562 |     } else {
563 |       Ok(false)
564 |     }
565 |   }
566 | }
567 | 
568 | impl<P, T> Spanned for TokenPrefix<P, T>
569 | where
570 |   P: Spanned,
571 |   T: Spanned,
572 | {
573 |   fn span(&self) -> Span {
574 |     self.0.span().into_end_updated(self.1.span())
575 |   }
576 | }
577 | 


--------------------------------------------------------------------------------
/src/input.rs:
--------------------------------------------------------------------------------
  1 | //! Utilities for constructing lexers.
  2 | //!
  3 | //! This module conntains the [`InputStream`] trait, which can be
  4 | //! implemented for input streams, i.e. streams that return characters.
  5 | //! This trait has already been implemented for
  6 | //! [`Reader`](crate::reader::Reader) and
  7 | //! [`ByteReader`](crate::reader::ByteReader).
  8 | //!
  9 | //! The [`InputStream`] trait provides many useful utility methods for
 10 | //! reading characters and the corresponding [`Span`]s from the input stream.
 11 | 
 12 | use crate::span::{Location, Result, Span};
 13 | 
 14 | /// Trait for input streams.
 15 | pub trait InputStream {
 16 |   /// The type of the character produced by the input stream.
 17 |   type CharType;
 18 | 
 19 |   /// Reads the next character from the input stream.
 20 |   ///
 21 |   /// Returns the character and the last location (location before reading
 22 |   /// the character) if successful, or <code>[Ok]&#40;[None]&#41;</code>
 23 |   /// if EOF was encountered, or [`Err`] if something wrong.
 24 |   fn next_char_loc(&mut self) -> Result<(Option<Self::CharType>, Location)>;
 25 | 
 26 |   /// Unreads the given character and the last location
 27 |   /// and put it back to the input stream.
 28 |   fn unread(&mut self, last: (Option<Self::CharType>, Location));
 29 | 
 30 |   /// Returns a reference to the current span in the lexer.
 31 |   fn span(&self) -> &Span;
 32 | 
 33 |   /// Sets the line and column of the current span.
 34 |   ///
 35 |   /// This could be useful if something like the C preprocessor
 36 |   /// is to be supported:
 37 |   ///
 38 |   /// ```text
 39 |   /// # 37 "<stdin>"
 40 |   /// ...
 41 |   /// ```
 42 |   fn set_line_col(&mut self, line: u32, col: u32);
 43 | 
 44 |   /// Reads the next character from the input stream.
 45 |   ///
 46 |   /// Returns the character if successful,
 47 |   /// or <code>[Ok]&#40;[None]&#41;</code> if EOF was encountered,
 48 |   /// or [`Err`] if something wrong.
 49 |   fn next_char(&mut self) -> Result<Option<Self::CharType>> {
 50 |     self.next_char_loc().map(|(c, _)| c)
 51 |   }
 52 | 
 53 |   /// Reads the next character from the input stream.
 54 |   ///
 55 |   /// Returns the character and its span if successful,
 56 |   /// or <code>[Ok]&#40;([None], _)&#41;</code> if EOF was encountered,
 57 |   /// or [`Err`] if something wrong.
 58 |   fn next_char_span(&mut self) -> Result<(Option<Self::CharType>, Span)> {
 59 |     self.next_char_loc().map(|(c, _)| (c, self.span().clone()))
 60 |   }
 61 | 
 62 |   /// Reads the next character from the input stream.
 63 |   ///
 64 |   /// Returns a reference to the span of the read character if successful,
 65 |   /// or [`Err`] if something wrong.
 66 |   fn next_span(&mut self) -> Result<&Span> {
 67 |     self.next_char_loc()?;
 68 |     Ok(self.span())
 69 |   }
 70 | 
 71 |   /// Peeks the next character from the input stream.
 72 |   ///
 73 |   /// Does not advance the position of the input stream.
 74 |   fn peek(&mut self) -> Result<Option<Self::CharType>>
 75 |   where
 76 |     Self::CharType: Clone,
 77 |   {
 78 |     let (c, loc) = self.next_char_loc()?;
 79 |     self.unread((c.clone(), loc));
 80 |     Ok(c)
 81 |   }
 82 | 
 83 |   /// Peeks the next character from the input stream.
 84 |   /// Returns the peeked character and its span.
 85 |   ///
 86 |   /// Does not advance the position of the input stream.
 87 |   fn peek_with_span(&mut self) -> Result<(Option<Self::CharType>, Span)>
 88 |   where
 89 |     Self::CharType: Clone,
 90 |   {
 91 |     let (c, loc) = self.next_char_loc()?;
 92 |     let span = self.span().clone();
 93 |     self.unread((c.clone(), loc));
 94 |     Ok((c, span))
 95 |   }
 96 | 
 97 |   /// Skips characters until a character specified by the predicate is encountered.
 98 |   fn skip_until<F>(&mut self, mut f: F) -> Result<()>
 99 |   where
100 |     Self::CharType: Clone,
101 |     F: FnMut(Self::CharType) -> bool,
102 |   {
103 |     while self.peek()?.map_or(false, |c| !f(c)) {
104 |       self.next_char()?;
105 |     }
106 |     Ok(())
107 |   }
108 | 
109 |   /// Collects characters into a vector until a character specified by the
110 |   /// predicate is encountered.
111 |   fn collect_until<F>(&mut self, mut f: F) -> Result<Vec<Self::CharType>>
112 |   where
113 |     Self::CharType: Clone,
114 |     F: FnMut(&Self::CharType) -> bool,
115 |   {
116 |     let mut v = Vec::new();
117 |     while let Some(c) = self.peek()? {
118 |       if f(&c) {
119 |         break;
120 |       }
121 |       v.push(c);
122 |       self.next_char()?;
123 |     }
124 |     Ok(v)
125 |   }
126 | 
127 |   /// Collects characters into a vector until a character specified by the
128 |   /// predicate is encountered.
129 |   ///
130 |   /// Returns the collected vector and its span.
131 |   fn collect_with_span_until<F>(&mut self, mut f: F) -> Result<(Vec<Self::CharType>, Span)>
132 |   where
133 |     Self::CharType: Clone,
134 |     F: FnMut(&Self::CharType) -> bool,
135 |   {
136 |     let mut v = Vec::new();
137 |     let mut span = match self.peek_with_span()? {
138 |       (Some(c), span) if !f(&c) => span,
139 |       (_, span) => return Ok((v, span)),
140 |     };
141 |     while let Some(c) = self.peek()? {
142 |       if f(&c) {
143 |         break;
144 |       }
145 |       v.push(c);
146 |       span.update_end(self.next_span()?);
147 |     }
148 |     Ok((v, span))
149 |   }
150 | }
151 | 
152 | #[cfg(test)]
153 | mod test {
154 |   use super::*;
155 |   use crate::reader::Reader;
156 | 
157 |   #[test]
158 |   fn next_char_or_span() {
159 |     let mut reader = Reader::from("123 abc");
160 |     assert_eq!(reader.next_char(), Ok(Some('1')));
161 |     assert_eq!(reader.next_char(), Ok(Some('2')));
162 |     let (c, span) = reader.next_char_span().unwrap();
163 |     assert_eq!(c, Some('3'));
164 |     assert_eq!(format!("{span}"), "1:3-1:3");
165 |     let (c, span) = reader.next_char_span().unwrap();
166 |     assert_eq!(c, Some(' '));
167 |     assert_eq!(format!("{span}"), "1:4-1:4");
168 |     assert_eq!(format!("{}", reader.next_span().unwrap()), "1:5-1:5");
169 |     assert_eq!(format!("{}", reader.next_span().unwrap()), "1:6-1:6");
170 |     assert_eq!(reader.next_char(), Ok(Some('c')));
171 |     assert_eq!(reader.next_char(), Ok(None));
172 |     assert_eq!(reader.next_char(), Ok(None));
173 |   }
174 | 
175 |   #[test]
176 |   fn skip_until() {
177 |     let mut reader = Reader::from("123  abc");
178 |     assert_eq!(reader.skip_until(|c| c.is_whitespace()), Ok(()));
179 |     assert_eq!(reader.next_char(), Ok(Some(' ')));
180 |     assert_eq!(reader.next_char(), Ok(Some(' ')));
181 |     assert_eq!(reader.next_char(), Ok(Some('a')));
182 |     assert_eq!(reader.next_char(), Ok(Some('b')));
183 |     assert_eq!(reader.next_char(), Ok(Some('c')));
184 |     assert_eq!(reader.next_char(), Ok(None));
185 |     assert_eq!(reader.next_char(), Ok(None));
186 |   }
187 | 
188 |   #[test]
189 |   fn collect_until() {
190 |     let mut reader = Reader::from("123 abc");
191 |     assert_eq!(reader.collect_until(|c| *c == '1'), Ok(vec![]));
192 |     assert_eq!(
193 |       reader.collect_with_span_until(|c| *c == '1').unwrap().0,
194 |       vec![]
195 |     );
196 |     assert_eq!(
197 |       reader.collect_until(|c| c.is_whitespace()),
198 |       Ok("123".chars().collect())
199 |     );
200 |     assert_eq!(reader.next_char(), Ok(Some(' ')));
201 |     let (s, span) = reader.collect_with_span_until(|_| false).unwrap();
202 |     assert_eq!(s, "abc".chars().collect::<Vec<_>>());
203 |     assert_eq!(format!("{span}"), "1:5-1:7");
204 |     assert_eq!(reader.next_char(), Ok(None));
205 |     assert_eq!(reader.next_char(), Ok(None));
206 |   }
207 | }
208 | 


--------------------------------------------------------------------------------
/src/lexer.rs:
--------------------------------------------------------------------------------
  1 | //! Implementations for constructing lexers.
  2 | //!
  3 | //! This module contains:
  4 | //!
  5 | //! * [`Tokenize`]: trait for tokenizing token kinds. With feature `macros`
  6 | //!   enabled, you can derive this trait for token kinds.
  7 | //! * [`Lexer`]: a lexer implementation for token kinds that implemented
  8 | //!   [`Tokenize`] trait.
  9 | //! * Some helper functions for constructing lexers.
 10 | 
 11 | use crate::input::InputStream;
 12 | use crate::token::{Token, Tokenizer};
 13 | use std::marker::PhantomData;
 14 | use std::num::ParseIntError;
 15 | 
 16 | #[cfg(feature = "macros")]
 17 | pub use laps_macros::Tokenize;
 18 | 
 19 | /// Trait for token kinds that can be tokenized from an input stream.
 20 | pub trait Tokenize: Sized {
 21 |   /// The type of the character produced by the input stream.
 22 |   type CharType;
 23 | 
 24 |   /// Reads the next token from the given input stream.
 25 |   ///
 26 |   /// Returns the token ([`Token<Self>`]) if successful, otherwise [`Err`].
 27 |   fn next_token<I>(input: &mut I) -> crate::span::Result<Token<Self>>
 28 |   where
 29 |     I: InputStream<CharType = Self::CharType>;
 30 | 
 31 |   /// Creates a lexer from the given input stream that
 32 |   /// produces the current token kind.
 33 |   fn lexer<I>(input: I) -> Lexer<I, Self> {
 34 |     Lexer {
 35 |       input,
 36 |       token: PhantomData,
 37 |     }
 38 |   }
 39 | }
 40 | 
 41 | /// A lexer with input stream type `I` and token kind type `K`.
 42 | ///
 43 | /// This lexer will produce tokens of type [`Token<K>`].
 44 | pub struct Lexer<I, K> {
 45 |   input: I,
 46 |   token: PhantomData<K>,
 47 | }
 48 | 
 49 | impl<I, K> Lexer<I, K> {
 50 |   /// Converts the lexer into its inner input stream.
 51 |   pub fn into_input(self) -> I {
 52 |     self.input
 53 |   }
 54 | 
 55 |   /// Returns a reference to the inner input stream.
 56 |   pub fn input(&self) -> &I {
 57 |     &self.input
 58 |   }
 59 | 
 60 |   /// Returns a mutable reference to the inner input stream.
 61 |   pub fn input_mut(&mut self) -> &mut I {
 62 |     &mut self.input
 63 |   }
 64 | }
 65 | 
 66 | impl<I, K, C> Tokenizer for Lexer<I, K>
 67 | where
 68 |   I: InputStream<CharType = C>,
 69 |   K: Tokenize<CharType = C>,
 70 | {
 71 |   type Token = Token<K>;
 72 | 
 73 |   fn next_token(&mut self) -> crate::span::Result<Self::Token> {
 74 |     K::next_token(&mut self.input)
 75 |   }
 76 | }
 77 | 
 78 | /// Parses integer literals from the given string.
 79 | /// Supports decimal, binary, hexadecimal and octal.
 80 | ///
 81 | /// Returns the integer if successful, otherwise returns [`None`].
 82 | ///
 83 | /// # Examples
 84 | ///
 85 | /// ```
 86 | /// use laps::lexer::int_literal;
 87 | ///
 88 | /// assert_eq!(int_literal("0"), Some(0));
 89 | /// assert_eq!(int_literal("00"), Some(0));
 90 | /// assert_eq!(int_literal("42"), Some(42));
 91 | /// assert_eq!(int_literal("0x1a"), Some(26));
 92 | /// assert_eq!(int_literal("0b0110"), Some(6));
 93 | /// assert_eq!(int_literal("0o777"), Some(511));
 94 | /// assert_eq!(int_literal::<i32>("z"), None);
 95 | /// assert_eq!(int_literal::<i32>("0f"), None);
 96 | /// assert_eq!(int_literal::<i32>("0b777"), None);
 97 | /// ```
 98 | pub fn int_literal<T>(s: &str) -> Option<T>
 99 | where
100 |   T: IntLiteral,
101 | {
102 |   // check if is a valid integer literal
103 |   let mut chars = s.chars();
104 |   let (radix, starts_from) = match (chars.next(), chars.next()) {
105 |     (Some('0'), Some(c)) if "box".contains(c) => (
106 |       match c {
107 |         'b' => 2,
108 |         'o' => 8,
109 |         'x' => 16,
110 |         _ => unreachable!(),
111 |       },
112 |       2,
113 |     ),
114 |     (Some(c), None) if c.is_ascii_digit() => (10, 0),
115 |     (Some(c1), Some(c2)) if c1.is_ascii_digit() && c2.is_ascii_digit() => (10, 0),
116 |     _ => return None,
117 |   };
118 |   if !chars.all(|c| c.is_digit(radix)) {
119 |     return None;
120 |   }
121 |   // convert to integer
122 |   T::from_str_radix(&s[starts_from..], radix).ok()
123 | }
124 | 
125 | /// Parses integer literals with an optional sign from the given string.
126 | /// Supports decimal, binary, hexadecimal and octal.
127 | ///
128 | /// Returns the integer if successful, otherwise returns [`None`].
129 | ///
130 | /// # Examples
131 | ///
132 | /// ```
133 | /// use laps::lexer::signed_int_literal;
134 | ///
135 | /// assert_eq!(signed_int_literal("0"), Some(0));
136 | /// assert_eq!(signed_int_literal("+00"), Some(0));
137 | /// assert_eq!(signed_int_literal("-42"), Some(-42));
138 | /// assert_eq!(signed_int_literal("-0x1a"), Some(-26));
139 | /// assert_eq!(signed_int_literal("0b0110"), Some(6));
140 | /// assert_eq!(signed_int_literal("+0o777"), Some(511));
141 | /// assert_eq!(signed_int_literal::<u32>("-1"), Some(u32::MAX));
142 | /// assert_eq!(signed_int_literal::<i32>("+"), None);
143 | /// assert_eq!(signed_int_literal::<i32>("--1"), None);
144 | /// assert_eq!(signed_int_literal::<i32>("-0b777"), None);
145 | /// ```
146 | pub fn signed_int_literal<T>(s: &str) -> Option<T>
147 | where
148 |   T: IntLiteral,
149 | {
150 |   let first = s.chars().next()?;
151 |   if first == '+' || first == '-' {
152 |     int_literal(&s[1..]).map(|n: T| if first == '-' { n.wrapping_neg() } else { n })
153 |   } else {
154 |     int_literal(s)
155 |   }
156 | }
157 | 
158 | /// A helper trait for function [`int_literal`].
159 | ///
160 | /// Users are not allowed to implement this trait for other types.
161 | pub trait IntLiteral: Sized + sealed_traits::SealedIntLiteral {
162 |   /// Converts a string slice in a given base to an integer.
163 |   ///
164 |   /// This is identical to `from_str_radix` method of primitive integer types,
165 |   /// such as [`i32::from_str_radix`](i32#method.from_str_radix).
166 |   fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError>;
167 | 
168 |   /// Wrapping negates the current number.
169 |   fn wrapping_neg(self) -> Self;
170 | }
171 | 
172 | /// Helper macro for implementing `IntLiteral` for integers.
173 | macro_rules! impl_int_literal {
174 |   ($ty:ty) => {
175 |     impl IntLiteral for $ty {
176 |       fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
177 |         <$ty>::from_str_radix(s, radix)
178 |       }
179 | 
180 |       fn wrapping_neg(self) -> Self {
181 |         self.wrapping_neg()
182 |       }
183 |     }
184 |   };
185 | }
186 | 
187 | impl_int_literal!(i8);
188 | impl_int_literal!(i16);
189 | impl_int_literal!(i32);
190 | impl_int_literal!(i64);
191 | impl_int_literal!(i128);
192 | impl_int_literal!(isize);
193 | impl_int_literal!(u8);
194 | impl_int_literal!(u16);
195 | impl_int_literal!(u32);
196 | impl_int_literal!(u64);
197 | impl_int_literal!(u128);
198 | impl_int_literal!(usize);
199 | 
200 | /// Sealed trait for trait `IntLiteral`.
201 | mod sealed_traits {
202 |   pub trait SealedIntLiteral {}
203 |   impl SealedIntLiteral for i8 {}
204 |   impl SealedIntLiteral for i16 {}
205 |   impl SealedIntLiteral for i32 {}
206 |   impl SealedIntLiteral for i64 {}
207 |   impl SealedIntLiteral for i128 {}
208 |   impl SealedIntLiteral for isize {}
209 |   impl SealedIntLiteral for u8 {}
210 |   impl SealedIntLiteral for u16 {}
211 |   impl SealedIntLiteral for u32 {}
212 |   impl SealedIntLiteral for u64 {}
213 |   impl SealedIntLiteral for u128 {}
214 |   impl SealedIntLiteral for usize {}
215 | }
216 | 
217 | /// Parses string literals (`"..."`) from the given string.
218 | ///
219 | /// Supported escapes:
220 | /// * `\r`, `\n`, `\t`, `\0`, `\\`.
221 | /// * `\'`, `\"`.
222 | /// * `\x00`-`\xff` (`\xFF`).
223 | /// * `\u{0}`-`\u{d7ff}` and `\u{e000}`-`\u{10ffff}` (`\u{10FFFF}`).
224 | ///
225 | /// Returns the string if successful, otherwise returns [`None`].
226 | ///
227 | /// # Examples
228 | ///
229 | /// ```
230 | /// use laps::lexer::str_literal;
231 | ///
232 | /// assert_eq!(str_literal(r#""hello""#), Some("hello".into()));
233 | /// assert_eq!(str_literal(r#""你好""#), Some("你好".into()));
234 | /// assert_eq!(str_literal(r#""""#), Some("".into()));
235 | /// assert_eq!(str_literal(r#""\"\n\t\\""#), Some("\"\n\t\\".into()));
236 | /// assert_eq!(str_literal(r#""#), None);
237 | /// assert_eq!(str_literal(r#""hello"#), None);
238 | /// ```
239 | pub fn str_literal(s: &str) -> Option<String> {
240 |   let mut chars = s.chars();
241 |   // check the first quote
242 |   if chars.next()? != '"' {
243 |     return None;
244 |   }
245 |   // get string literal
246 |   let mut s = String::new();
247 |   loop {
248 |     match parse_char_literal(&mut chars, '"') {
249 |       ParseResult::Char(c) => s.push(c),
250 |       ParseResult::Quote => break,
251 |       ParseResult::Error => return None,
252 |     }
253 |   }
254 |   // check the last quote
255 |   chars.next().is_none().then_some(s)
256 | }
257 | 
258 | /// Parses character literals (`'...'`) from the given string.
259 | ///
260 | /// Supported escapes:
261 | /// * `\r`, `\n`, `\t`, `\0`, `\\`.
262 | /// * `\'`, `\"`.
263 | /// * `\x00`-`\xff` (`\xFF`).
264 | /// * `\u{0}`-`\u{d7ff}` and `\u{e000}`-`\u{10ffff}` (`\u{10FFFF}`).
265 | ///
266 | /// Returns the character if successful, otherwise returns [`None`].
267 | ///
268 | /// # Examples
269 | ///
270 | /// ```
271 | /// use laps::lexer::char_literal;
272 | ///
273 | /// assert_eq!(char_literal(r#"'a'"#), Some('a'));
274 | /// assert_eq!(char_literal(r#"'😂'"#), Some('😂'));
275 | /// assert_eq!(char_literal(r#"'\n'"#), Some('\n'));
276 | /// assert_eq!(char_literal(r#""#), None);
277 | /// assert_eq!(char_literal(r#"''"#), None);
278 | /// assert_eq!(char_literal(r#"'a"#), None);
279 | /// ```
280 | pub fn char_literal(s: &str) -> Option<char> {
281 |   let mut chars = s.chars();
282 |   // check the first quote
283 |   if chars.next()? != '\'' {
284 |     return None;
285 |   }
286 |   // get character literal
287 |   match parse_char_literal(&mut chars, '\'') {
288 |     ParseResult::Char(c) => ((chars.next(), chars.next()) == (Some('\''), None)).then_some(c),
289 |     _ => None,
290 |   }
291 | }
292 | 
293 | /// Parses a char literal (do not include quotes)
294 | /// from the given character iterator.
295 | fn parse_char_literal<I>(iter: &mut I, quote: char) -> ParseResult
296 | where
297 |   I: Iterator<Item = char>,
298 | {
299 |   match iter.next() {
300 |     Some('\n') | Some('\r') | Some('\t') => ParseResult::Error,
301 |     Some('\\') => match iter.next() {
302 |       Some('r') => ParseResult::Char('\r'),
303 |       Some('n') => ParseResult::Char('\n'),
304 |       Some('t') => ParseResult::Char('\t'),
305 |       Some('0') => ParseResult::Char('\0'),
306 |       Some('\\') => ParseResult::Char('\\'),
307 |       Some('\'') => ParseResult::Char('\''),
308 |       Some('\"') => ParseResult::Char('\"'),
309 |       Some('x') => {
310 |         // get escaped char
311 |         let c = iter
312 |           .next()
313 |           .and_then(|c| c.to_digit(16))
314 |           .zip(iter.next().and_then(|c| c.to_digit(16)))
315 |           .map(|(c1, c2)| (c1 * 16 + c2) as u8 as char);
316 |         match c {
317 |           Some(c) => ParseResult::Char(c),
318 |           None => ParseResult::Error,
319 |         }
320 |       }
321 |       Some('u') => {
322 |         // check '{'
323 |         if iter.next() != Some('{') {
324 |           return ParseResult::Error;
325 |         }
326 |         // get hex value
327 |         let mut hex = 0u32;
328 |         for c in iter {
329 |           match c.to_digit(16) {
330 |             Some(h) => match hex.checked_mul(16) {
331 |               Some(h16) => hex = h16 + h,
332 |               None => break,
333 |             },
334 |             None if c == '}' => match char::from_u32(hex) {
335 |               Some(c) => return ParseResult::Char(c),
336 |               None => break,
337 |             },
338 |             None => break,
339 |           }
340 |         }
341 |         ParseResult::Error
342 |       }
343 |       _ => ParseResult::Error,
344 |     },
345 |     Some(c) if c == quote => ParseResult::Quote,
346 |     Some(c) => ParseResult::Char(c),
347 |     None => ParseResult::Error,
348 |   }
349 | }
350 | 
351 | /// Result type of `parse_char_literal`.
352 | enum ParseResult {
353 |   Char(char),
354 |   Quote,
355 |   Error,
356 | }
357 | 
358 | #[cfg(test)]
359 | mod test {
360 |   use super::*;
361 | 
362 |   #[test]
363 |   fn parse_int() {
364 |     assert_eq!(int_literal("123"), Some(123));
365 |     assert_eq!(int_literal("0"), Some(0));
366 |     assert_eq!(int_literal("000"), Some(0));
367 |     assert_eq!(int_literal("0x0"), Some(0x0));
368 |     assert_eq!(int_literal("0xFf"), Some(0xff));
369 |     assert_eq!(int_literal("0b110"), Some(0b110));
370 |     assert_eq!(int_literal("0o765"), Some(0o765));
371 |     assert_eq!(int_literal::<i32>(""), None);
372 |     assert_eq!(int_literal::<i32>("?"), None);
373 |     assert_eq!(int_literal::<i32>("0x?"), None);
374 |     assert_eq!(int_literal::<i32>("99999999999999999999999999999999"), None);
375 |   }
376 | 
377 |   #[test]
378 |   fn parse_str() {
379 |     assert_eq!(str_literal(r#""""#), Some("".into()));
380 |     assert_eq!(str_literal(r#""a""#), Some("a".into()));
381 |     assert_eq!(str_literal(r#""🤡👈""#), Some("🤡👈".into()));
382 |     assert_eq!(str_literal(r#""\t""#), Some("\t".into()));
383 |     assert_eq!(str_literal(r#""\n""#), Some("\n".into()));
384 |     assert_eq!(str_literal(r#""\r""#), Some("\r".into()));
385 |     assert_eq!(str_literal(r#""\\r""#), Some("\\r".into()));
386 |     assert_eq!(str_literal(r#""\'""#), Some("\'".into()));
387 |     assert_eq!(str_literal(r#""\"""#), Some("\"".into()));
388 |     assert_eq!(str_literal(r#""\x4a""#), Some("\x4a".into()));
389 |     assert_eq!(str_literal(r#""\u{1234}""#), Some("\u{1234}".into()));
390 |     assert_eq!(
391 |       str_literal(r#""\u{1234}\u{5678}""#),
392 |       Some("\u{1234}\u{5678}".into())
393 |     );
394 |     assert_eq!(str_literal(r#""\u{10ffff}""#), Some("\u{10ffff}".into()));
395 |     assert_eq!(str_literal(r#""a\x4aa""#), Some("a\x4aa".into()));
396 |     assert_eq!(str_literal(r#""'""#), Some("'".into()));
397 |     assert_eq!(str_literal(r#"?"#), None);
398 |     assert_eq!(str_literal(r#"""#), None);
399 |     assert_eq!(str_literal(r#""aa"#), None);
400 |     assert_eq!(str_literal(r#""\"#), None);
401 |     assert_eq!(
402 |       str_literal(
403 |         r#""
404 | ""#
405 |       ),
406 |       None,
407 |     );
408 |     assert_eq!(
409 |       str_literal(
410 |         r#""aa
411 | ""#
412 |       ),
413 |       None,
414 |     );
415 |     assert_eq!(str_literal(r#""\?""#), None);
416 |     assert_eq!(str_literal(r#""\x""#), None);
417 |     assert_eq!(str_literal(r#""\x4""#), None);
418 |     assert_eq!(str_literal(r#""\u""#), None);
419 |     assert_eq!(str_literal(r#""\u{""#), None);
420 |     assert_eq!(str_literal(r#""\u{111111111""#), None);
421 |     assert_eq!(str_literal(r#""\u{111111111}""#), None);
422 |     assert_eq!(str_literal(r#""\u{d800}""#), None);
423 |     assert_eq!(str_literal(r#""\u{dfff}""#), None);
424 |   }
425 | 
426 |   #[test]
427 |   fn parse_char() {
428 |     assert_eq!(char_literal("'a'"), Some('a'));
429 |     assert_eq!(char_literal("'🤔'"), Some('🤔'));
430 |     assert_eq!(char_literal(r"'\t'"), Some('\t'));
431 |     assert_eq!(char_literal(r"'\n'"), Some('\n'));
432 |     assert_eq!(char_literal(r"'\r'"), Some('\r'));
433 |     assert_eq!(char_literal(r"'\\'"), Some('\\'));
434 |     assert_eq!(char_literal(r"'\''"), Some('\''));
435 |     assert_eq!(char_literal(r#"'\"'"#), Some('\"'));
436 |     assert_eq!(char_literal(r"'\x4a'"), Some('\x4a'));
437 |     assert_eq!(char_literal(r"'\u{1234}'"), Some('\u{1234}'));
438 |     assert_eq!(char_literal(r"'\u{10ffff}'"), Some('\u{10ffff}'));
439 |     assert_eq!(char_literal(r#"'"'"#), Some('"'));
440 |     assert_eq!(char_literal("?"), None);
441 |     assert_eq!(char_literal("'"), None);
442 |     assert_eq!(char_literal("''"), None);
443 |     assert_eq!(char_literal("'a"), None);
444 |     assert_eq!(char_literal("'ab"), None);
445 |     assert_eq!(char_literal("'ab'"), None);
446 |     assert_eq!(
447 |       char_literal(
448 |         r#"'
449 | '"#
450 |       ),
451 |       None,
452 |     );
453 |     assert_eq!(
454 |       char_literal(
455 |         r#"'a
456 | '"#
457 |       ),
458 |       None,
459 |     );
460 |     assert_eq!(char_literal(r"'\'"), None);
461 |     assert_eq!(char_literal(r"'\?'"), None);
462 |     assert_eq!(char_literal(r"'\x'"), None);
463 |     assert_eq!(char_literal(r"'\x4'"), None);
464 |     assert_eq!(char_literal(r"'\u'"), None);
465 |     assert_eq!(char_literal(r"'\u{'"), None);
466 |     assert_eq!(char_literal(r"'\u{111111111'"), None);
467 |     assert_eq!(char_literal(r"'\u{111111111}'"), None);
468 |     assert_eq!(str_literal(r"'\u{d800}'"), None);
469 |     assert_eq!(str_literal(r"'\u{dfff}'"), None);
470 |   }
471 | }
472 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2 | 
  3 | //! Lexer and parser collections.
  4 | //!
  5 | //! With `laps`, you can build lexers/parsers by just defining tokens/ASTs
  6 | //! and deriving [`Tokenize`](lexer::Tokenize)/[`Parse`](parse::Parse)
  7 | //! trait for them.
  8 | //!
  9 | //! # Example
 10 | //!
 11 | //! Implement a lexer for
 12 | //! [S-expression](https://en.wikipedia.org/wiki/S-expression):
 13 | //!
 14 | #![cfg_attr(not(feature = "macros"), doc = " ```ignore")]
 15 | #![cfg_attr(feature = "macros", doc = " ```")]
 16 | //! # fn main() {}
 17 | //! use laps::prelude::*;
 18 | //!
 19 | //! #[token_kind]
 20 | //! #[derive(Debug, Tokenize)]
 21 | //! enum TokenKind {
 22 | //!   // This token will be skipped.
 23 | //!   #[skip(r"\s+")]
 24 | //!   _Skip,
 25 | //!   /// Parentheses.
 26 | //!   #[regex(r"[()]")]
 27 | //!   Paren(char),
 28 | //!   /// Atom.
 29 | //!   #[regex(r"[^\s()]+")]
 30 | //!   Atom(String),
 31 | //!   /// End-of-file.
 32 | //!   #[eof]
 33 | //!   Eof,
 34 | //! }
 35 | //! ```
 36 | //!
 37 | //! And the parser and [ASTs](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
 38 | //! (or actually [CSTs](https://en.wikipedia.org/wiki/Parse_tree)):
 39 | //!
 40 | #![cfg_attr(not(feature = "macros"), doc = " ```ignore")]
 41 | #![cfg_attr(feature = "macros", doc = " ```")]
 42 | //! # fn main() {}
 43 | //! # use laps::prelude::*;
 44 | //! # #[token_kind]
 45 | //! # #[derive(Debug, Tokenize)]
 46 | //! # enum TokenKind {
 47 | //! #   // This token will be skipped.
 48 | //! #   #[skip(r"\s+")]
 49 | //! #   _Skip,
 50 | //! #   /// Parentheses.
 51 | //! #   #[regex(r"[()]")]
 52 | //! #   Paren(char),
 53 | //! #   /// Atom.
 54 | //! #   #[regex(r"[^\s()]+")]
 55 | //! #   Atom(String),
 56 | //! #   /// End-of-file.
 57 | //! #   #[eof]
 58 | //! #   Eof,
 59 | //! # }
 60 | //! type Token = laps::token::Token<TokenKind>;
 61 | //!
 62 | //! token_ast! {
 63 | //!   macro Token<TokenKind> {
 64 | //!     [atom] => { kind: TokenKind::Atom(_), prompt: "atom" },
 65 | //!     [lpr] => { kind: TokenKind::Paren('(') },
 66 | //!     [rpr] => { kind: TokenKind::Paren(')') },
 67 | //!     [eof] => { kind: TokenKind::Eof },
 68 | //!   }
 69 | //! }
 70 | //!
 71 | //! #[derive(Parse)]
 72 | //! #[token(Token)]
 73 | //! enum Statement {
 74 | //!   Elem(Elem),
 75 | //!   End(Token![eof]),
 76 | //! }
 77 | //!
 78 | //! #[derive(Parse)]
 79 | //! #[token(Token)]
 80 | //! struct SExp(Token![lpr], Vec<Elem>, Token![rpr]);
 81 | //!
 82 | //! #[derive(Parse)]
 83 | //! #[token(Token)]
 84 | //! enum Elem {
 85 | //!   Atom(Token![atom]),
 86 | //!   SExp(SExp),
 87 | //! }
 88 | //! ```
 89 | //!
 90 | //! The above implementation is very close in form to the corresponding
 91 | //! EBNF representation of the S-expression:
 92 | //!
 93 | //! ```text
 94 | //! Statement ::= Elem | EOF;
 95 | //! SExp      ::= "(" {Elem} ")";
 96 | //! Elem      ::= ATOM | SExp;
 97 | //! ```
 98 | //!
 99 | //! # More Examples
100 | //!
101 | //! See the
102 | //! [`examples` directory](https://github.com/MaxXSoft/laps/tree/master/examples),
103 | //! which contains the following examples:
104 | //!
105 | //! * [`sexp`](https://github.com/MaxXSoft/laps/tree/master/examples/sexp):
106 | //!   a [S-expression](https://en.wikipedia.org/wiki/S-expression) parser.
107 | //! * [`calc`](https://github.com/MaxXSoft/laps/tree/master/examples/calc):
108 | //!   a simple expression calculator.
109 | //! * [`json`](https://github.com/MaxXSoft/laps/tree/master/examples/json):
110 | //!   a simple JSON parser.
111 | //! * [`clike`](https://github.com/MaxXSoft/laps/tree/master/examples/clike):
112 | //!   interpreter for a C-like programming language.
113 | //!
114 | //! # Accelerating Code Completion for IDEs
115 | //!
116 | //! By default, Cargo does not enable optimizations for procedural macros,
117 | //! which may result in slower code completion if you are using `laps` to
118 | //! generate lexers. To avoid this, you can add the following configuration
119 | //! to `Cargo.toml`:
120 | //!
121 | //! ```toml
122 | //! [profile.dev.build-override]
123 | //! opt-level = 3
124 | //! ```
125 | //!
126 | //! You can also try to manually enable/disable parallelization for lexer
127 | //! generation by adding:
128 | //!
129 | #![cfg_attr(not(feature = "macros"), doc = " ```ignore")]
130 | #![cfg_attr(feature = "macros", doc = " ```")]
131 | //! # fn main() {}
132 | //! # use laps::prelude::*;
133 | //! #[derive(Tokenize)]
134 | //! #[enable_par(true)] // or #[enable_par(false)]
135 | //! enum TokenKind {
136 | //!   // ...
137 | //! # #[regex(r"[^\s()]+")]
138 | //! # Atom(String),
139 | //! # #[eof]
140 | //! # Eof,
141 | //! }
142 | //! ```
143 | 
144 | pub mod ast;
145 | pub mod input;
146 | pub mod lexer;
147 | pub mod parse;
148 | pub mod reader;
149 | pub mod span;
150 | pub mod token;
151 | 
152 | /// A prelude of some common traits and macros (if enabled feature `macros`)
153 | /// in [`laps`](crate).
154 | ///
155 | /// ```
156 | /// use laps::prelude::*;
157 | /// ```
158 | pub mod prelude {
159 |   pub use crate::input::InputStream;
160 |   pub use crate::lexer::Tokenize;
161 |   pub use crate::parse::Parse;
162 |   pub use crate::span::Spanned;
163 |   pub use crate::token::{TokenStream, Tokenizer};
164 | 
165 |   #[cfg(feature = "macros")]
166 |   pub use crate::token::{token_ast, token_kind};
167 | }
168 | 


--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
  1 | //! Implementations for constructing parsers.
  2 | //!
  3 | //! This module contains the [`Parse`] trait, which can be implemented
  4 | //! for all types that can be parsed from a token stream, such as ASTs.
  5 | 
  6 | use crate::span::Result;
  7 | use crate::token::TokenStream;
  8 | 
  9 | #[cfg(feature = "macros")]
 10 | pub use laps_macros::Parse;
 11 | 
 12 | /// Parsing trait for all types that can be parsed from a token stream.
 13 | pub trait Parse<TS>: Sized
 14 | where
 15 |   TS: TokenStream,
 16 | {
 17 |   /// Parses a instance of the current type from the given token stream.
 18 |   fn parse(tokens: &mut TS) -> Result<Self>;
 19 | 
 20 |   /// Checks if the current type may be parsed from the given token stream.
 21 |   ///
 22 |   /// Does not advance the position of the token stream.
 23 |   fn maybe(tokens: &mut TS) -> Result<bool>;
 24 | }
 25 | 
 26 | impl<TS, T> Parse<TS> for Box<T>
 27 | where
 28 |   TS: TokenStream,
 29 |   T: Parse<TS>,
 30 | {
 31 |   fn parse(tokens: &mut TS) -> Result<Self> {
 32 |     tokens.parse().map(Box::new)
 33 |   }
 34 | 
 35 |   fn maybe(tokens: &mut TS) -> Result<bool> {
 36 |     T::maybe(tokens)
 37 |   }
 38 | }
 39 | 
 40 | impl<TS, T> Parse<TS> for Option<T>
 41 | where
 42 |   TS: TokenStream,
 43 |   T: Parse<TS>,
 44 | {
 45 |   fn parse(tokens: &mut TS) -> Result<Self> {
 46 |     T::maybe(tokens)?.then(|| tokens.parse()).transpose()
 47 |   }
 48 | 
 49 |   fn maybe(_: &mut TS) -> Result<bool> {
 50 |     Ok(true)
 51 |   }
 52 | }
 53 | 
 54 | impl<TS, T> Parse<TS> for Vec<T>
 55 | where
 56 |   TS: TokenStream,
 57 |   T: Parse<TS>,
 58 | {
 59 |   fn parse(tokens: &mut TS) -> Result<Self> {
 60 |     let mut ts = Vec::new();
 61 |     while T::maybe(tokens)? {
 62 |       ts.push(tokens.parse()?);
 63 |     }
 64 |     Ok(ts)
 65 |   }
 66 | 
 67 |   fn maybe(_: &mut TS) -> Result<bool> {
 68 |     Ok(true)
 69 |   }
 70 | }
 71 | 
 72 | /// Helper macro for implementing [`Parse`] trait for tuples.
 73 | macro_rules! impl_for_tuple {
 74 |   ($t:ident $($ts:ident)*) => {
 75 |     impl<TS, $t $(,$ts)*> Parse<TS> for ($t, $($ts,)*)
 76 |     where
 77 |       TS: TokenStream,
 78 |       $t: Parse<TS>,
 79 |       $($ts: Parse<TS>,)*
 80 |     {
 81 |       fn parse(tokens: &mut TS) -> Result<Self> {
 82 |         Ok((tokens.parse()?, $(tokens.parse::<$ts>()?,)*))
 83 |       }
 84 | 
 85 |       fn maybe(tokens: &mut TS) -> Result<bool> {
 86 |         $t::maybe(tokens)
 87 |       }
 88 |     }
 89 |   };
 90 | }
 91 | 
 92 | impl_for_tuple!(A);
 93 | impl_for_tuple!(A B);
 94 | impl_for_tuple!(A B C);
 95 | impl_for_tuple!(A B C D);
 96 | impl_for_tuple!(A B C D E);
 97 | impl_for_tuple!(A B C D E F);
 98 | impl_for_tuple!(A B C D E F G);
 99 | impl_for_tuple!(A B C D E F G H);
100 | impl_for_tuple!(A B C D E F G H I);
101 | impl_for_tuple!(A B C D E F G H I J);
102 | impl_for_tuple!(A B C D E F G H I J K);
103 | impl_for_tuple!(A B C D E F G H I J K L);
104 | impl_for_tuple!(A B C D E F G H I J K L M);
105 | impl_for_tuple!(A B C D E F G H I J K L M N);
106 | impl_for_tuple!(A B C D E F G H I J K L M N O);
107 | impl_for_tuple!(A B C D E F G H I J K L M N O P);
108 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q);
109 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R);
110 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S);
111 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T);
112 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T U);
113 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T U V);
114 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T U V W);
115 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T U V W X);
116 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T U V W X Y);
117 | impl_for_tuple!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
118 | 


--------------------------------------------------------------------------------
/src/reader.rs:
--------------------------------------------------------------------------------
  1 | //! Reader related implementations for lexers.
  2 | //!
  3 | //! Reader implements [`InputStream`] trait, and it can read and buffer
  4 | //! characters and their corresponding spans from any types that implement
  5 | //! the [`Read`] trait.
  6 | //!
  7 | //! This module contains two kinds of readers: [`Reader`] will try to read
  8 | //! UTF-8 characters from the stream, and will report fatal error if there are
  9 | //! no valid UTF-8 characters. [`ByteReader`] will read bytes from the stream.
 10 | 
 11 | use crate::input::InputStream;
 12 | use crate::log_raw_fatal_error;
 13 | use crate::span::{FileType, Location, Result, Span};
 14 | use std::fs::File;
 15 | use std::io::{self, stdin, Cursor, Read, Stdin};
 16 | use std::path::Path;
 17 | use std::str::{from_utf8, from_utf8_unchecked};
 18 | 
 19 | /// Size of the byte buffer.
 20 | const BYTE_BUFFER_SIZE: usize = 1024;
 21 | 
 22 | /// A generic UTF-8 character reader for lexers.
 23 | ///
 24 | /// The generic parameter `BUFFER_SIZE` specifies the size of the internal
 25 | /// buffer of [`Reader`].
 26 | ///
 27 | /// [`Reader`] always tries to read UTF-8 characters from the stream.
 28 | /// If there are no valid UTF-8 characters, [`Reader`] will return a
 29 | /// fatal error ([`Error::Fatal`](crate::span::Error::Fatal)).
 30 | pub struct Reader<T, const BUFFER_SIZE: usize = BYTE_BUFFER_SIZE> {
 31 |   reader: T,
 32 |   span: Span,
 33 | 
 34 |   // Buffers in `Reader`:
 35 |   // Read bytes to buffer `byte_buf`, start at offset `byte_buf_offset`,
 36 |   // then convert bytes to UTF-8 characters and stores them into `char_buf`.
 37 |   // If there are some remaining bytes can not be converted, move them to the
 38 |   // begining of the `byte_buf`, and update `byte_buf_offset`.
 39 |   char_buf: Vec<char>,
 40 |   byte_buf: Box<[u8; BUFFER_SIZE]>,
 41 |   byte_buf_offset: usize,
 42 | }
 43 | 
 44 | impl<T, const BUFFER_SIZE: usize> Reader<T, BUFFER_SIZE> {
 45 |   /// Creates a new reader with the given inner reader and file type.
 46 |   pub fn new(reader: T, file_type: FileType) -> Self {
 47 |     Self {
 48 |       reader,
 49 |       span: Span::new(file_type),
 50 |       char_buf: Vec::new(),
 51 |       byte_buf: Box::new([0; BUFFER_SIZE]),
 52 |       byte_buf_offset: 0,
 53 |     }
 54 |   }
 55 | 
 56 |   /// Returns the next character and the last location from the reader.
 57 |   fn next_char_loc_from_reader(&mut self) -> Result<(Option<char>, Location)>
 58 |   where
 59 |     T: Read,
 60 |   {
 61 |     // get the current location
 62 |     let loc = self.span.start();
 63 |     // read bytes to buffer
 64 |     let count = self
 65 |       .reader
 66 |       .read(&mut self.byte_buf[self.byte_buf_offset..])
 67 |       .map_err(|e| log_raw_fatal_error!(self.span, "{e}"))?
 68 |       + self.byte_buf_offset;
 69 |     // handle EOF
 70 |     if count == 0 {
 71 |       return Ok((None, loc));
 72 |     }
 73 |     // converts bytes to UTF-8 string
 74 |     let (s, end) = match from_utf8(&self.byte_buf[..count]) {
 75 |       Ok(s) => (s, None),
 76 |       Err(e) => {
 77 |         let end = e.valid_up_to();
 78 |         // safe due to the above check
 79 |         let s = unsafe { from_utf8_unchecked(&self.byte_buf[..end]) };
 80 |         (s, Some(end))
 81 |       }
 82 |     };
 83 |     // get the character and fill the char buffer
 84 |     let mut chars = s.chars();
 85 |     let c = if let Some(c) = chars.next() {
 86 |       self.char_buf.extend(chars.rev());
 87 |       c
 88 |     } else {
 89 |       return log_raw_fatal_error!(self.span, "invalid UTF-8 character").into();
 90 |     };
 91 |     // update byte buffer and its offset
 92 |     if let Some(end) = end {
 93 |       self.byte_buf.copy_within(end..count, 0);
 94 |       self.byte_buf_offset = count - end;
 95 |     } else {
 96 |       self.byte_buf_offset = 0;
 97 |     }
 98 |     // update the span
 99 |     self.span.update(&c);
100 |     Ok((Some(c), loc))
101 |   }
102 | }
103 | 
104 | /// A generic byte reader for lexers.
105 | ///
106 | /// The generic parameter `BUFFER_SIZE` specifies the size of the internal
107 | /// buffer of [`ByteReader`].
108 | pub struct ByteReader<T, const BUFFER_SIZE: usize = BYTE_BUFFER_SIZE> {
109 |   reader: T,
110 |   span: Span,
111 |   char_buf: Vec<u8>,
112 | }
113 | 
114 | impl<T, const BUFFER_SIZE: usize> ByteReader<T, BUFFER_SIZE> {
115 |   /// Creates a new reader with the given inner reader and file type.
116 |   pub fn new(reader: T, file_type: FileType) -> Self {
117 |     Self {
118 |       reader,
119 |       span: Span::new(file_type),
120 |       char_buf: Vec::new(),
121 |     }
122 |   }
123 | 
124 |   /// Returns the next byte and the last location from the reader.
125 |   fn next_char_loc_from_reader(&mut self) -> Result<(Option<u8>, Location)>
126 |   where
127 |     T: Read,
128 |   {
129 |     // get the current location
130 |     let loc = self.span.start();
131 |     // read bytes to buffer
132 |     let mut buf = [0; BUFFER_SIZE];
133 |     let count = self
134 |       .reader
135 |       .read(&mut buf)
136 |       .map_err(|e| log_raw_fatal_error!(self.span, "{e}"))?;
137 |     // handle EOF
138 |     if count == 0 {
139 |       return Ok((None, loc));
140 |     }
141 |     // get the byte and fill the char buffer
142 |     let b = buf[0];
143 |     self.char_buf.extend(buf[1..count].iter().rev());
144 |     // update the span
145 |     self.span.update(&b);
146 |     Ok((Some(b), loc))
147 |   }
148 | }
149 | 
150 | /// Implements necessary methods for the given reader.
151 | macro_rules! impl_reader {
152 |   ($name:ident, $char:ty) => {
153 |     impl<T> $name<T> {
154 |       /// Converts the reader into its inner reader.
155 |       pub fn into_inner(self) -> T {
156 |         self.reader
157 |       }
158 |     }
159 | 
160 |     impl $name<File> {
161 |       /// Creates a new reader from the file at the given path.
162 |       pub fn from_path<P>(path: P) -> io::Result<Self>
163 |       where
164 |         P: AsRef<Path> + Clone,
165 |       {
166 |         File::open(path.clone()).map(|f| Self::new(f, FileType::File(Box::from(path.as_ref()))))
167 |       }
168 |     }
169 | 
170 |     impl $name<Stdin> {
171 |       /// Creates a new reader from the standard input.
172 |       pub fn from_stdin() -> Self {
173 |         stdin().into()
174 |       }
175 |     }
176 | 
177 |     impl From<Stdin> for $name<Stdin> {
178 |       /// Creates a new reader from the standard input.
179 |       fn from(stdin: Stdin) -> Self {
180 |         Self::new(stdin, FileType::Stdin)
181 |       }
182 |     }
183 | 
184 |     impl From<String> for $name<Cursor<String>> {
185 |       /// Creates a new reader from the given [`String`].
186 |       fn from(s: String) -> Self {
187 |         Self::new(Cursor::new(s), FileType::Buffer)
188 |       }
189 |     }
190 | 
191 |     impl<'a> From<&'a str> for $name<Cursor<&'a str>> {
192 |       /// Creates a new reader from the given <code>&amp;[str]</code>.
193 |       fn from(s: &'a str) -> Self {
194 |         Self::new(Cursor::new(s), FileType::Buffer)
195 |       }
196 |     }
197 | 
198 |     impl<'a> From<&'a [u8]> for $name<&'a [u8]> {
199 |       /// Creates a new reader from the given <code>&amp;[[u8]]</code>.
200 |       fn from(b: &'a [u8]) -> Self {
201 |         Self::new(b, FileType::Buffer)
202 |       }
203 |     }
204 | 
205 |     impl<T> InputStream for $name<T>
206 |     where
207 |       T: Read,
208 |     {
209 |       type CharType = $char;
210 | 
211 |       fn next_char_loc(&mut self) -> Result<(Option<$char>, Location)> {
212 |         if let Some(c) = self.char_buf.pop() {
213 |           let loc = self.span.start();
214 |           self.span.update(&c);
215 |           Ok((Some(c), loc))
216 |         } else {
217 |           self.next_char_loc_from_reader()
218 |         }
219 |       }
220 | 
221 |       fn unread(&mut self, last: (Option<$char>, Location)) {
222 |         self.span.update_loc(last.1);
223 |         if let Some(c) = last.0 {
224 |           self.char_buf.push(c);
225 |         }
226 |       }
227 | 
228 |       fn span(&self) -> &Span {
229 |         &self.span
230 |       }
231 | 
232 |       fn set_line_col(&mut self, line: u32, col: u32) {
233 |         self.span.update_loc(Location { line, col });
234 |       }
235 | 
236 |       fn peek(&mut self) -> Result<Option<$char>> {
237 |         if let Some(c) = self.char_buf.last() {
238 |           Ok(Some(*c))
239 |         } else {
240 |           let char_loc = self.next_char_loc_from_reader()?;
241 |           self.unread(char_loc);
242 |           Ok(char_loc.0)
243 |         }
244 |       }
245 | 
246 |       fn peek_with_span(&mut self) -> Result<(Option<$char>, Span)> {
247 |         if let Some(c) = self.char_buf.last() {
248 |           Ok((Some(*c), self.span.clone().into_updated(c)))
249 |         } else {
250 |           let char_loc = self.next_char_loc_from_reader()?;
251 |           let span = self.span.clone();
252 |           self.unread(char_loc);
253 |           Ok((char_loc.0, span))
254 |         }
255 |       }
256 |     }
257 |   };
258 | }
259 | 
260 | impl_reader!(Reader, char);
261 | impl_reader!(ByteReader, u8);
262 | 
263 | #[cfg(test)]
264 | mod test {
265 |   use super::*;
266 | 
267 |   #[test]
268 |   fn next_char_loc_unread() {
269 |     let mut reader = Reader::from("123 abc");
270 |     assert_eq!(reader.next_char_loc().unwrap().0, Some('1'));
271 |     let last = reader.next_char_loc().unwrap();
272 |     assert_eq!(last.0, Some('2'));
273 |     reader.unread(last);
274 |     let loc = last.1;
275 |     assert_eq!(reader.next_char_loc().unwrap(), (Some('2'), loc));
276 |     assert_eq!(reader.next_char_loc().unwrap().0, Some('3'));
277 |     assert_eq!(reader.next_char_loc().unwrap().0, Some(' '));
278 |     assert_eq!(reader.next_char_loc().unwrap().0, Some('a'));
279 |     assert_eq!(reader.next_char_loc().unwrap().0, Some('b'));
280 |     assert_eq!(reader.next_char_loc().unwrap().0, Some('c'));
281 |     let last = reader.next_char_loc().unwrap();
282 |     assert_eq!(last.0, None);
283 |     reader.unread(last);
284 |     let loc = last.1;
285 |     assert_eq!(reader.next_char_loc().unwrap(), (None, loc));
286 |     assert_eq!(reader.next_char_loc().unwrap().0, None);
287 |   }
288 | 
289 |   #[test]
290 |   fn peek_span() {
291 |     let mut reader = Reader::from("123 abc");
292 |     assert_eq!(reader.peek(), Ok(Some('1')));
293 |     assert_eq!(format!("{}", reader.span()), "1:0-1:0");
294 |     assert_eq!(reader.peek(), Ok(Some('1')));
295 |     assert_eq!(format!("{}", reader.span()), "1:0-1:0");
296 |     reader.next_char_loc().unwrap();
297 |     assert_eq!(reader.peek(), Ok(Some('2')));
298 |     assert_eq!(format!("{}", reader.span()), "1:1-1:1");
299 |   }
300 | 
301 |   #[test]
302 |   fn peek_with_span() {
303 |     let mut reader = Reader::from("123 abc");
304 |     let (c, span) = reader.peek_with_span().unwrap();
305 |     assert_eq!(c, Some('1'));
306 |     assert_eq!(format!("{span}"), "1:1-1:1");
307 |     let (c, span) = reader.peek_with_span().unwrap();
308 |     assert_eq!(c, Some('1'));
309 |     assert_eq!(format!("{span}"), "1:1-1:1");
310 |     reader.next_char_loc().unwrap();
311 |     let (c, span) = reader.peek_with_span().unwrap();
312 |     assert_eq!(c, Some('2'));
313 |     assert_eq!(format!("{span}"), "1:2-1:2");
314 |   }
315 | 
316 |   #[test]
317 |   fn unicode_chars() {
318 |     let mut bytes: Vec<_> = "你好, abc✨".into();
319 |     bytes.push(0xff);
320 |     bytes.push(b'z');
321 |     let mut reader = Reader::from(bytes.as_slice());
322 |     assert_eq!(reader.next_char(), Ok(Some('你')));
323 |     assert_eq!(reader.next_char(), Ok(Some('好')));
324 |     assert_eq!(reader.next_char(), Ok(Some(',')));
325 |     assert_eq!(reader.next_char(), Ok(Some(' ')));
326 |     assert_eq!(reader.next_char(), Ok(Some('a')));
327 |     assert_eq!(reader.next_char(), Ok(Some('b')));
328 |     assert_eq!(reader.next_char(), Ok(Some('c')));
329 |     assert_eq!(reader.next_char(), Ok(Some('✨')));
330 |     assert!(reader.next_char().is_err());
331 |     assert!(reader.next_char().is_err());
332 |     let mut reader = ByteReader::from(bytes.as_slice());
333 |     assert_eq!(reader.next_char(), Ok(Some(0xe4)));
334 |     assert_eq!(reader.next_char(), Ok(Some(0xbd)));
335 |     assert_eq!(reader.next_char(), Ok(Some(0xa0)));
336 |     assert_eq!(reader.next_char(), Ok(Some(0xe5)));
337 |     assert_eq!(reader.next_char(), Ok(Some(0xa5)));
338 |     assert_eq!(reader.next_char(), Ok(Some(0xbd)));
339 |     assert_eq!(reader.next_char(), Ok(Some(b',')));
340 |     assert_eq!(reader.next_char(), Ok(Some(b' ')));
341 |     assert_eq!(reader.next_char(), Ok(Some(b'a')));
342 |     assert_eq!(reader.next_char(), Ok(Some(b'b')));
343 |     assert_eq!(reader.next_char(), Ok(Some(b'c')));
344 |     assert_eq!(reader.next_char(), Ok(Some(0xe2)));
345 |     assert_eq!(reader.next_char(), Ok(Some(0x9c)));
346 |     assert_eq!(reader.next_char(), Ok(Some(0xa8)));
347 |     assert_eq!(reader.next_char(), Ok(Some(0xff)));
348 |     assert_eq!(reader.next_char(), Ok(Some(b'z')));
349 |     assert_eq!(reader.next_char(), Ok(None));
350 |     assert_eq!(reader.next_char(), Ok(None));
351 |   }
352 | }
353 | 


--------------------------------------------------------------------------------
/src/token.rs:
--------------------------------------------------------------------------------
  1 | //! Token ([`Token`]) related implementations, including
  2 | //! tokenizer ([`Tokenizer`]) and token stream ([`TokenStream`]).
  3 | //!
  4 | //! All of these implementations can be used in lexers and parsers,
  5 | //! specifically:
  6 | //!
  7 | //! * [`Token`]: generic token representations, can be produced by lexers.
  8 | //! * [`Tokenizer`]: trait for tokenizers (structures that can produce
  9 | //!   tokens), all lexers should implement this trait.
 10 | //! * [`TokenStream`]: a tokenizer wrapper trait, provides several helper
 11 | //!   methods for parsing, can be used in parsers.
 12 | //! * [`TokenBuffer`]: a structure that implements the [`TokenStream`] trait,
 13 | //!   can be used in parsers.
 14 | 
 15 | use crate::log_error;
 16 | use crate::parse::Parse;
 17 | use crate::span::{Result, Span, Spanned};
 18 | use std::borrow::{Borrow, BorrowMut};
 19 | use std::collections::VecDeque;
 20 | use std::{fmt, hash};
 21 | 
 22 | #[cfg(feature = "macros")]
 23 | pub use laps_macros::{token_ast, token_kind};
 24 | 
 25 | /// A generic token.
 26 | #[derive(Clone, Debug)]
 27 | pub struct Token<Kind> {
 28 |   /// Kind of the token.
 29 |   pub kind: Kind,
 30 |   /// Span of the token.
 31 |   pub span: Span,
 32 | }
 33 | 
 34 | impl<Kind> Token<Kind> {
 35 |   /// Creates a new token from the given value and span.
 36 |   pub fn new<T>(value: T, span: Span) -> Self
 37 |   where
 38 |     Kind: From<T>,
 39 |   {
 40 |     Self {
 41 |       kind: value.into(),
 42 |       span,
 43 |     }
 44 |   }
 45 | }
 46 | 
 47 | impl<Kind> Spanned for Token<Kind> {
 48 |   fn span(&self) -> Span {
 49 |     self.span.clone()
 50 |   }
 51 | }
 52 | 
 53 | impl<Kind> PartialEq<Kind> for Token<Kind>
 54 | where
 55 |   Kind: PartialEq,
 56 | {
 57 |   fn eq(&self, other: &Kind) -> bool {
 58 |     self.kind.eq(other)
 59 |   }
 60 | }
 61 | 
 62 | impl<Kind> PartialEq for Token<Kind>
 63 | where
 64 |   Kind: PartialEq,
 65 | {
 66 |   fn eq(&self, other: &Self) -> bool {
 67 |     self.kind.eq(&other.kind)
 68 |   }
 69 | }
 70 | 
 71 | impl<Kind> Eq for Token<Kind> where Kind: Eq {}
 72 | 
 73 | impl<Kind> hash::Hash for Token<Kind>
 74 | where
 75 |   Kind: hash::Hash,
 76 | {
 77 |   fn hash<H: hash::Hasher>(&self, state: &mut H) {
 78 |     self.kind.hash(state)
 79 |   }
 80 | }
 81 | 
 82 | impl<Kind> fmt::Display for Token<Kind>
 83 | where
 84 |   Kind: fmt::Display,
 85 | {
 86 |   fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 87 |     self.kind.fmt(f)
 88 |   }
 89 | }
 90 | 
 91 | impl<Kind> Borrow<Kind> for Token<Kind> {
 92 |   fn borrow(&self) -> &Kind {
 93 |     &self.kind
 94 |   }
 95 | }
 96 | 
 97 | impl<Kind> BorrowMut<Kind> for Token<Kind> {
 98 |   fn borrow_mut(&mut self) -> &mut Kind {
 99 |     &mut self.kind
100 |   }
101 | }
102 | 
103 | impl<Kind> AsRef<Kind> for Token<Kind> {
104 |   fn as_ref(&self) -> &Kind {
105 |     &self.kind
106 |   }
107 | }
108 | 
109 | impl<Kind> AsMut<Kind> for Token<Kind> {
110 |   fn as_mut(&mut self) -> &mut Kind {
111 |     &mut self.kind
112 |   }
113 | }
114 | 
115 | /// Trait for tokenizers.
116 | pub trait Tokenizer {
117 |   /// Type of the token produced by the tokenizer.
118 |   type Token;
119 | 
120 |   /// Reads the next token from the token stream.
121 |   ///
122 |   /// Returns the token if successful, otherwise [`Err`].
123 |   fn next_token(&mut self) -> Result<Self::Token>;
124 | }
125 | 
126 | /// Trait for token streams.
127 | pub trait TokenStream: Tokenizer {
128 |   /// Unreads the given token and put it back to the token stream.
129 |   fn unread(&mut self, token: Self::Token);
130 | 
131 |   /// Parses an AST of type `T` from the token stream.
132 |   fn parse<T>(&mut self) -> Result<T>
133 |   where
134 |     T: Parse<Self>,
135 |     Self: Sized,
136 |   {
137 |     T::parse(self)
138 |   }
139 | 
140 |   /// Peeks the next token from the token stream.
141 |   ///
142 |   /// Does not advance the position of the token stream.
143 |   fn peek(&mut self) -> Result<Self::Token>
144 |   where
145 |     Self::Token: Clone,
146 |   {
147 |     let token = self.next_token()?;
148 |     self.unread(token.clone());
149 |     Ok(token)
150 |   }
151 | 
152 |   /// Peeks the next 2 tokens from the token stream.
153 |   ///
154 |   /// Does not advance the position of the token stream.
155 |   fn peek2(&mut self) -> Result<(Self::Token, Self::Token)>
156 |   where
157 |     Self::Token: Clone,
158 |   {
159 |     let token1 = self.next_token()?;
160 |     let token2 = self.next_token()?;
161 |     self.unread(token2.clone());
162 |     self.unread(token1.clone());
163 |     Ok((token1, token2))
164 |   }
165 | 
166 |   /// Peeks the next N tokens from the token stream.
167 |   ///
168 |   /// Does not advance the position of the token stream.
169 |   fn peek_n(&mut self, n: usize) -> Result<Vec<Self::Token>>
170 |   where
171 |     Self::Token: Clone,
172 |   {
173 |     let v = (0..n)
174 |       .map(|_| self.next_token())
175 |       .collect::<Result<Vec<_>>>()?;
176 |     v.iter().rev().for_each(|t| self.unread(t.clone()));
177 |     Ok(v)
178 |   }
179 | 
180 |   /// Skips tokens untils a token specified by the predicate is encountered.
181 |   fn skip_until<F>(&mut self, mut f: F) -> Result<()>
182 |   where
183 |     F: FnMut(&Self::Token) -> bool,
184 |   {
185 |     loop {
186 |       let token = self.next_token()?;
187 |       if f(&token) {
188 |         self.unread(token);
189 |         break Ok(());
190 |       }
191 |     }
192 |   }
193 | 
194 |   /// Collects tokens into a [`Vec`] until a token specified by the predicate
195 |   /// is encountered.
196 |   fn collect_until<F>(&mut self, mut f: F) -> Result<Vec<Self::Token>>
197 |   where
198 |     F: FnMut(&Self::Token) -> bool,
199 |   {
200 |     let mut v = Vec::new();
201 |     loop {
202 |       let token = self.next_token()?;
203 |       if f(&token) {
204 |         self.unread(token);
205 |         break Ok(v);
206 |       }
207 |       v.push(token);
208 |     }
209 |   }
210 | 
211 |   /// Checks if the next token is the same as the given token,
212 |   /// and returns the token if it is, otherwise returns an error.
213 |   fn expect<T>(&mut self, token: T) -> Result<Self::Token>
214 |   where
215 |     Self::Token: PartialEq<T> + Spanned + fmt::Display,
216 |     T: fmt::Display,
217 |   {
218 |     let next = self.next_token()?;
219 |     if next == token {
220 |       Ok(next)
221 |     } else {
222 |       let err = log_error!(next.span(), "expected {token}, found {next}");
223 |       self.unread(next);
224 |       Err(err)
225 |     }
226 |   }
227 | 
228 |   /// Constructs a helper for peeking a sequence of tokens.
229 |   fn lookahead(&mut self) -> Lookahead<Self, Self::Token>
230 |   where
231 |     Self: Sized,
232 |   {
233 |     Lookahead {
234 |       tokens: self,
235 |       buf: Vec::new(),
236 |       #[cfg(feature = "macros")]
237 |       last_result: true,
238 |     }
239 |   }
240 | }
241 | 
242 | /// Support for checking token sequences without
243 | /// advancing the position of the token stream.
244 | pub struct Lookahead<'ts, TS, T>
245 | where
246 |   TS: TokenStream<Token = T>,
247 | {
248 |   tokens: &'ts mut TS,
249 |   buf: Vec<T>,
250 |   #[cfg(feature = "macros")]
251 |   last_result: bool,
252 | }
253 | 
254 | impl<'ts, TS, T> Lookahead<'ts, TS, T>
255 | where
256 |   TS: TokenStream<Token = T>,
257 | {
258 |   /// Peeks the next token from the token stream.
259 |   pub fn peek_next(&mut self) -> Result<T>
260 |   where
261 |     T: Clone,
262 |   {
263 |     let token = self.tokens.next_token()?;
264 |     self.buf.push(token.clone());
265 |     Ok(token)
266 |   }
267 | 
268 |   #[cfg(feature = "macros")]
269 |   /// Checks if the next token maybe the given token.
270 |   ///
271 |   /// Accepts token AST types only, see [`token_ast`].
272 |   pub fn maybe<F, TA>(mut self, _: F) -> Result<Self>
273 |   where
274 |     F: FnOnce(T) -> TA,
275 |     TA: Parse<TS>,
276 |   {
277 |     if self.last_result {
278 |       self.last_result = TA::maybe(self.tokens)?;
279 |     }
280 |     self.buf.push(self.tokens.next_token()?);
281 |     Ok(self)
282 |   }
283 | 
284 |   #[cfg(feature = "macros")]
285 |   /// Consumes and returns the final result of the
286 |   /// [`maybe`](Lookahead#method.maybe) chain.
287 |   pub fn result(self) -> Result<bool> {
288 |     Ok(self.last_result)
289 |   }
290 | }
291 | 
292 | impl<'ts, TS, T> Drop for Lookahead<'ts, TS, T>
293 | where
294 |   TS: TokenStream<Token = T>,
295 | {
296 |   fn drop(&mut self) {
297 |     while let Some(token) = self.buf.pop() {
298 |       self.tokens.unread(token)
299 |     }
300 |   }
301 | }
302 | 
303 | /// A token buffer that implements trait [`TokenStream`].
304 | ///
305 | /// Contains a tokenizer of type `TN`, produces tokens of type `T`.
306 | pub struct TokenBuffer<TN, T> {
307 |   tokenizer: TN,
308 |   token_buf: VecDeque<T>,
309 | }
310 | 
311 | impl<TN, T> TokenBuffer<TN, T> {
312 |   /// Creates a new token buffer by the given tokenizer.
313 |   pub fn new(tokenizer: TN) -> Self {
314 |     Self {
315 |       tokenizer,
316 |       token_buf: VecDeque::new(),
317 |     }
318 |   }
319 | 
320 |   /// Converts the token buffer into the inner tokenizer.
321 |   pub fn into_inner(self) -> TN {
322 |     self.tokenizer
323 |   }
324 | 
325 |   /// Returns a reference to the inner tokenizer.
326 |   pub fn inner(&self) -> &TN {
327 |     &self.tokenizer
328 |   }
329 | 
330 |   /// Returns a mutable reference to the inner tokenizer.
331 |   pub fn inner_mut(&mut self) -> &mut TN {
332 |     &mut self.tokenizer
333 |   }
334 | 
335 |   /// Extends the token buffer by `n` new tokens.
336 |   fn extend_by(&mut self, n: usize) -> Result<()>
337 |   where
338 |     TN: Tokenizer<Token = T>,
339 |   {
340 |     for _ in 0..n {
341 |       self.token_buf.push_back(self.tokenizer.next_token()?);
342 |     }
343 |     Ok(())
344 |   }
345 | }
346 | 
347 | impl<TN, T> From<TN> for TokenBuffer<TN, T> {
348 |   /// Converts the given tokenizer to a token buffer.
349 |   fn from(tokenizer: TN) -> Self {
350 |     Self::new(tokenizer)
351 |   }
352 | }
353 | 
354 | impl<TN, T> Tokenizer for TokenBuffer<TN, T>
355 | where
356 |   TN: Tokenizer<Token = T>,
357 | {
358 |   type Token = T;
359 | 
360 |   fn next_token(&mut self) -> Result<Self::Token> {
361 |     match self.token_buf.pop_front() {
362 |       Some(t) => Ok(t),
363 |       None => self.tokenizer.next_token(),
364 |     }
365 |   }
366 | }
367 | 
368 | impl<TN, T> TokenStream for TokenBuffer<TN, T>
369 | where
370 |   TN: Tokenizer<Token = T>,
371 | {
372 |   fn unread(&mut self, token: Self::Token) {
373 |     self.token_buf.push_front(token)
374 |   }
375 | 
376 |   fn peek(&mut self) -> Result<Self::Token>
377 |   where
378 |     Self::Token: Clone,
379 |   {
380 |     if let Some(t) = self.token_buf.front() {
381 |       Ok(t.clone())
382 |     } else {
383 |       let t = self.tokenizer.next_token()?;
384 |       self.token_buf.push_front(t.clone());
385 |       Ok(t)
386 |     }
387 |   }
388 | 
389 |   fn peek2(&mut self) -> Result<(Self::Token, Self::Token)>
390 |   where
391 |     Self::Token: Clone,
392 |   {
393 |     if self.token_buf.len() < 2 {
394 |       self.extend_by(2 - self.token_buf.len())?;
395 |     }
396 |     let mut iter = self.token_buf.iter();
397 |     match (iter.next(), iter.next()) {
398 |       (Some(t1), Some(t2)) => Ok((t1.clone(), t2.clone())),
399 |       _ => unreachable!(),
400 |     }
401 |   }
402 | 
403 |   fn peek_n(&mut self, n: usize) -> Result<Vec<Self::Token>>
404 |   where
405 |     Self::Token: Clone,
406 |   {
407 |     if self.token_buf.len() < n {
408 |       self.extend_by(n - self.token_buf.len())?;
409 |     }
410 |     Ok(self.token_buf.iter().take(n).cloned().collect())
411 |   }
412 | }
413 | 


--------------------------------------------------------------------------------