├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── hitori-examples ├── Cargo.toml └── src │ ├── all_patterns.rs │ ├── all_patterns │ ├── bad_password.rs │ ├── hello.rs │ └── true_.rs │ ├── any_patterns.rs │ ├── any_patterns │ ├── false_.rs │ └── float_type.rs │ ├── capturing.rs │ ├── capturing │ ├── fraction.rs │ ├── rectangle.rs │ └── shopping_list.rs │ ├── generics.rs │ ├── generics │ └── all_in.rs │ ├── lib.rs │ ├── positions.rs │ ├── positions │ └── train_cars.rs │ ├── putting_everything_together.rs │ ├── putting_everything_together │ ├── email.rs │ ├── ipv4.rs │ └── uri.rs │ ├── repetitions.rs │ └── repetitions │ ├── binary_u32.rs │ ├── identifier.rs │ └── would_you_kindly.rs ├── hitori-macros ├── Cargo.toml └── src │ ├── expand.rs │ ├── expand │ ├── starts_with_block.rs │ └── starts_with_block │ │ ├── cache.rs │ │ ├── repeat.rs │ │ └── state.rs │ ├── lib.rs │ ├── parse.rs │ ├── parse │ ├── args.rs │ ├── position.rs │ └── repeat.rs │ └── utils.rs └── hitori ├── Cargo.toml ├── benches └── hitori_vs_regex.rs └── src ├── generic.rs ├── lib.rs ├── string.rs └── traits.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | env: 9 | RUSTFLAGS: -Dwarnings 10 | 11 | jobs: 12 | build: 13 | name: ${{format('Build ({0})', matrix.name || matrix.rust)}} 14 | runs-on: ${{matrix.os || 'ubuntu'}}-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | rust: [stable, beta, 1.64.0] 19 | include: 20 | - rust: nightly 21 | components: rustc-dev 22 | - rust: nightly 23 | name: WebAssembly 24 | target: wasm32-unknown-unknown 25 | - rust: nightly 26 | name: WASI 27 | target: wasm32-wasi 28 | - rust: nightly 29 | name: Windows 30 | os: windows 31 | env: 32 | target: ${{matrix.target && format('--target={0}', matrix.target)}} 33 | timeout-minutes: 45 34 | steps: 35 | - uses: actions/checkout@v3 36 | - uses: dtolnay/rust-toolchain@master 37 | with: 38 | toolchain: ${{matrix.rust}} 39 | targets: ${{matrix.target}} 40 | components: ${{matrix.components}} 41 | - run: cargo check -p hitori ${{env.target}} --no-default-features 42 | - run: cargo check -p hitori ${{env.target}} --no-default-features --features macros 43 | - run: cargo check -p hitori ${{env.target}} --no-default-features --features alloc 44 | - if: matrix.os != 'windows' 45 | run: "! cargo check -p hitori ${{env.target}} --no-default-features --features find-hitori" 46 | - run: cargo check -p hitori ${{env.target}} 47 | - run: cargo check -p hitori ${{env.target}} --features find-hitori 48 | - if: matrix.components == 'rustc-dev' 49 | run: "git submodule init && git submodule update && cargo check -p hitori --benches --all-features --release" 50 | - run: cargo check -p hitori-examples ${{env.target}} 51 | 52 | test: 53 | name: Test 54 | runs-on: ubuntu-latest 55 | strategy: 56 | fail-fast: false 57 | matrix: 58 | working-directory: [hitori, hitori-examples] 59 | timeout-minutes: 45 60 | steps: 61 | - uses: actions/checkout@v3 62 | - uses: dtolnay/rust-toolchain@nightly 63 | with: 64 | components: llvm-tools, rustc-dev 65 | - working-directory: ${{matrix.working-directory}} 66 | run: cargo test --all-features --release --tests 67 | - working-directory: hitori 68 | run: "git submodule init && git submodule update && cargo test --bench hitori_vs_regex" 69 | 70 | doc: 71 | name: Doc 72 | runs-on: ubuntu-latest 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | working-directory: [hitori, hitori-examples] 77 | env: 78 | RUSTDOCFLAGS: --cfg=doc_cfg -Dbroken_intra_doc_links 79 | timeout-minutes: 45 80 | steps: 81 | - uses: actions/checkout@v3 82 | - uses: dtolnay/rust-toolchain@nightly 83 | - working-directory: ${{matrix.working-directory}} 84 | run: cargo test --all-features --doc 85 | - working-directory: ${{matrix.working-directory}} 86 | run: cargo doc --all-features 87 | 88 | msrv: 89 | name: Minimal versions 90 | runs-on: ubuntu-latest 91 | strategy: 92 | fail-fast: false 93 | matrix: 94 | working-directory: [hitori, hitori-examples] 95 | timeout-minutes: 45 96 | steps: 97 | - uses: actions/checkout@v3 98 | - uses: dtolnay/rust-toolchain@nightly 99 | - working-directory: ${{matrix.working-directory}} 100 | run: cargo update -Z minimal-versions 101 | - working-directory: ${{matrix.working-directory}} 102 | run: cargo check --all-features 103 | 104 | miri: 105 | name: Miri 106 | runs-on: ubuntu-latest 107 | strategy: 108 | fail-fast: false 109 | matrix: 110 | working-directory: [hitori, hitori-examples] 111 | timeout-minutes: 45 112 | steps: 113 | - uses: actions/checkout@v3 114 | - uses: dtolnay/rust-toolchain@miri 115 | - working-directory: ${{matrix.working-directory}} 116 | env: 117 | MIRIFLAGS: -Zmiri-strict-provenance 118 | run: cargo miri test --all-features 119 | 120 | clippy: 121 | name: Clippy 122 | runs-on: ubuntu-latest 123 | strategy: 124 | fail-fast: false 125 | matrix: 126 | working-directory: [hitori, hitori-examples] 127 | timeout-minutes: 45 128 | steps: 129 | - uses: actions/checkout@v3 130 | - uses: dtolnay/rust-toolchain@nightly 131 | with: 132 | components: clippy, rustc-dev 133 | - working-directory: ${{matrix.working-directory}} 134 | run: "git submodule init && git submodule update && cargo clippy --all-features --tests --benches -- -Dclippy::all -Dclippy::pedantic" 135 | 136 | fmt: 137 | name: Fmt 138 | runs-on: ubuntu-latest 139 | timeout-minutes: 45 140 | steps: 141 | - uses: actions/checkout@v3 142 | - uses: dtolnay/rust-toolchain@nightly 143 | with: 144 | components: rustfmt, rustc-dev 145 | - run: cargo fmt --all -- --check 146 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | /Cargo.lock 3 | 4 | .vscode/ 5 | .idea/ 6 | *.vim 7 | *.vi 8 | 9 | .env 10 | 11 | .DS_Store 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "hitori/benches/regex-benchmark"] 2 | path = hitori/benches/regex-benchmark 3 | url = https://github.com/mariomka/regex-benchmark.git 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["hitori", "hitori-examples", "hitori-macros"] 3 | 4 | [workspace.package] 5 | version = "0.2.3" 6 | authors = ["Artur Helmanau "] 7 | license = "MIT OR Apache-2.0" 8 | edition = "2021" 9 | repository = "https://github.com/30bit/hitori" 10 | rust-version = "1.64" 11 | readme = "README.md" 12 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![hitori crate](https://img.shields.io/crates/v/hitori.svg)](https://crates.io/crates/hitori) 2 | [![hitori documentation](https://docs.rs/hitori/badge.svg)](https://docs.rs/hitori) 3 | [![Build status](https://github.com/30bit/hitori/workflows/ci/badge.svg)](https://github.com/30bit/hitori/actions) 4 | 5 | Hitori is generic compile-time regular expressions library. 6 | It works by creating series of if-statements and for-loops for each expression. 7 | 8 | *See code samples along with the traits, impls and structs they expand to in [examples].* 9 | 10 | # Limitations 11 | 12 | Pattern matching is step-by-step. It is impossible to to detach last element of a repetition. 13 | For example, using [regex] one can rewrite `a+` as `a*a` and it would still match any 14 | sequence of `a`s longer than zero. With [hitori], however, `a*` would consume 15 | all the `a`s, and the expression won't match. 16 | 17 | Step-by step pattern matching also leads to diminished performance when matching 18 | large texts and an expression contains repetitions of frequent characters. 19 | 20 | # Crate features 21 | 22 | - **`alloc`** *(enabled by default)* – string replace functions and blanket implementations 23 | of [hitori] traits for boxes using alloc crate. 24 | - **`macros`** *(enabled by default)* – [`impl_expr_mut`] and [`impl_expr`] macros. 25 | - **`find-hitori`** – finds hitori package to be used in macros 26 | even if it has been renamed in Cargo.toml. **`macros`** feature is required. 27 | - **`examples`** – includes [examples] module into the build. 28 | 29 | # License 30 | 31 | Licensed under either of [Apache License, Version 2.0](LICENSE-APACHE) or [MIT license](LICENSE-MIT) at your option. 32 | 33 | Unless you explicitly state otherwise, any contribution intentionally submitted 34 | for inclusion in this crate by you, as defined in the Apache-2.0 license, shall 35 | be dual licensed as above, without any additional terms or conditions. 36 | 37 | [examples]: https://docs.rs/hitori-examples 38 | [regex]: https://docs.rs/regex 39 | [hitori]: https://docs.rs/hitori 40 | [`impl_expr_mut`]: https://docs.rs/hitori/latest/hitori/attr.impl_expr.html 41 | [`impl_expr`]: https://docs.rs/hitori/latest/hitori/attr.impl_expr.html -------------------------------------------------------------------------------- /hitori-examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hitori-examples" 3 | description = "Examples for hitori" 4 | documentation = "https://docs.rs/hitori-examples" 5 | version.workspace = true 6 | authors.workspace = true 7 | edition.workspace = true 8 | license.workspace = true 9 | repository.workspace = true 10 | rust-version.workspace = true 11 | readme.workspace = true 12 | 13 | [dependencies] 14 | hitori = { version = "=0.2.3", path = "../hitori" } 15 | -------------------------------------------------------------------------------- /hitori-examples/src/all_patterns.rs: -------------------------------------------------------------------------------- 1 | //! Sequences of subpatterns can be matched using an all-pattern. 2 | //! In [hitori] syntax it is represented as a tuple of its subpatterns. 3 | //! 4 | //! ``` 5 | #![doc = include_str!("all_patterns/hello.rs")] 6 | //! 7 | //! assert!(hitori::string::starts_with(Hello, "hello").is_some()); 8 | //! assert!(hitori::string::starts_with(Hello, "world").is_none()); 9 | //! ``` 10 | //! *equivalent to `hello` in [regex] syntax* 11 | //! 12 | //! ### Trailing comma 13 | //! 14 | //! The only way to apply attributes such as `#[hitori::capture]` or 15 | //! `#[hitori::repeat]` to a single character test is by wrapping it 16 | //! inside of an all-pattern. In that case trailing comma 17 | //! is **not** optional. 18 | //! 19 | //! ``` 20 | #![doc = include_str!("all_patterns/bad_password.rs")] 21 | //! 22 | //! assert!(hitori::string::starts_with(BadPassword, "12345").is_some()); 23 | //! assert!(hitori::string::starts_with(BadPassword, "cUFK^06#43Gs").is_none()); 24 | //! ``` 25 | //! *equivalent to `\d{1, 8}` in [regex] syntax* 26 | //! 27 | //! ### Empty all-pattern 28 | //! 29 | //! An empty all-pattern is always true. 30 | //! 31 | //! ``` 32 | #![doc = include_str!("all_patterns/true_.rs")] 33 | //! 34 | //! for s in ["Hello, world!", "34", "hitori"] { 35 | //! assert!(hitori::string::starts_with(True, s).is_some()); 36 | //! } 37 | //! ``` 38 | //! 39 | //! [hitori]: https://docs.rs/hitori 40 | //! [regex]: https://docs.rs/regex 41 | 42 | mod bad_password; 43 | mod hello; 44 | mod true_; 45 | 46 | pub use bad_password::{BadPassword, BadPasswordCapture}; 47 | pub use hello::{Hello, HelloCapture}; 48 | pub use true_::{True, TrueCapture}; 49 | -------------------------------------------------------------------------------- /hitori-examples/src/all_patterns/bad_password.rs: -------------------------------------------------------------------------------- 1 | /// Numeric-only password with up to 8 characters 2 | pub struct BadPassword; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for BadPassword { 6 | const PATTERN: _ = #[hitori::repeat(gt = 0, le = 8)] 7 | (|ch: char| ch.is_ascii_digit(),); // removing a comma in this line won't compile 8 | } 9 | -------------------------------------------------------------------------------- /hitori-examples/src/all_patterns/hello.rs: -------------------------------------------------------------------------------- 1 | /// "hello" 2 | pub struct Hello; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for Hello { 6 | const PATTERN: _ = ( 7 | |ch| ch == 'h', 8 | |ch| ch == 'e', 9 | |ch| ch == 'l', 10 | |ch| ch == 'l', 11 | |ch| ch == 'o', 12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /hitori-examples/src/all_patterns/true_.rs: -------------------------------------------------------------------------------- 1 | /// An empty all-pattern 2 | pub struct True; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for True { 6 | const PATTERN: _ = (); 7 | } 8 | -------------------------------------------------------------------------------- /hitori-examples/src/any_patterns.rs: -------------------------------------------------------------------------------- 1 | //! An any-pattern matches if one of its subpatterns matches. 2 | //! In [hitori] syntax it is represented as an array of its subpatterns. 3 | //! 4 | //! ``` 5 | #![doc = include_str!("any_patterns/float_type.rs")] 6 | //! 7 | //! assert!(hitori::string::starts_with(FloatType, "f64").is_some()); 8 | //! assert!(hitori::string::starts_with(FloatType, "f128").is_none()); 9 | //! ``` 10 | //! *equivalent to `f(32|64)` in [regex] syntax* 11 | //! 12 | //! ### Empty any-pattern 13 | //! 14 | //! An empty any-pattern is always false. 15 | //! 16 | //! ``` 17 | #![doc = include_str!("any_patterns/false_.rs")] 18 | //! 19 | //! for s in ["Hello, world!", "34", "hitori"] { 20 | //! assert!(hitori::string::starts_with(False, s).is_none()); 21 | //! } 22 | //! ``` 23 | //! 24 | //! [hitori]: https://docs.rs/hitori 25 | //! [regex]: https://docs.rs/regex 26 | 27 | mod false_; 28 | mod float_type; 29 | 30 | pub use false_::{False, FalseCapture}; 31 | pub use float_type::{FloatType, FloatTypeCapture}; 32 | -------------------------------------------------------------------------------- /hitori-examples/src/any_patterns/false_.rs: -------------------------------------------------------------------------------- 1 | /// An empty any-pattern 2 | pub struct False; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for False { 6 | const PATTERN: _ = []; 7 | } 8 | -------------------------------------------------------------------------------- /hitori-examples/src/any_patterns/float_type.rs: -------------------------------------------------------------------------------- 1 | /// `f32` or `f64` 2 | pub struct FloatType; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for FloatType { 6 | const PATTERN: _ = ( 7 | |ch| ch == 'f', 8 | [ 9 | (|ch| ch == '3', |ch| ch == '2'), 10 | (|ch| ch == '6', |ch| ch == '4'), 11 | ], 12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /hitori-examples/src/capturing.rs: -------------------------------------------------------------------------------- 1 | //! Capturing is done by annotating an all-pattern or an any-pattern with 2 | //! `#[hitori::capture]`. 3 | //! 4 | //! Captured index ranges are returned 5 | //! as fields of [`ExprMut::Capture`] struct that is generated by the macro. 6 | //! 7 | //! The name of the struct is a concatenation of implementor's name and 8 | //! `Capture` by default. 9 | //! 10 | //! ``` 11 | #![doc = include_str!("capturing/fraction.rs")] 12 | //! 13 | //! let s = "4/5"; 14 | //! let matched = hitori::string::starts_with(Fraction, s).unwrap(); 15 | //! assert_eq!(&s[matched.capture.numerator.unwrap()], "4"); 16 | //! assert_eq!(&s[matched.capture.denominator.unwrap()], "5"); 17 | //! 18 | //! assert!(hitori::string::starts_with(Fraction, "1/0").is_none()); 19 | //! ``` 20 | //! *equivalent to `(?P\d)/(?P[1-9])` in [regex] syntax* 21 | //! 22 | //! ### Reused capture names 23 | //! 24 | //! Unlike in [regex], non-unique capture names are allowed. The last capture into 25 | //! the same destination is the one that is returned in [`ExprMut::Capture`] struct. 26 | //! 27 | //! ``` 28 | #![doc = include_str!("capturing/shopping_list.rs")] 29 | //! 30 | //! let s = "🍄🧀"; 31 | //! let matched = hitori::string::starts_with(ShoppingList, s).unwrap(); 32 | //! assert_eq!(&s[matched.capture.last_item.unwrap()], "🧀"); 33 | //! ``` 34 | //! *equivalent to `(?P🍄)?(?P🫑)?(?P🧀)?(?P🥚)?` 35 | //! in [regex] syntax* 36 | //! 37 | //! ### Multiple destinations 38 | //! 39 | //! An expression can be captured into multiple destinations using 40 | //! `#[hitori::capture(dest1, dest2, dest3)]` syntax. 41 | //! 42 | //! ``` 43 | #![doc = include_str!("capturing/rectangle.rs")] 44 | //! 45 | //! let s = "◾ 9"; 46 | //! let matched = hitori::string::starts_with(Rectangle, s).unwrap(); 47 | //! assert_eq!(&s[matched.capture.width.unwrap()], "9"); 48 | //! assert_eq!(&s[matched.capture.height.unwrap()], "9"); 49 | //! 50 | //! let s = "▬ 5 6"; 51 | //! let matched = hitori::string::starts_with(Rectangle, s).unwrap(); 52 | //! assert_eq!(&s[matched.capture.width.unwrap()], "5"); 53 | //! assert_eq!(&s[matched.capture.height.unwrap()], "6"); 54 | //! ``` 55 | //! *equivalent to `◾ (?P(?P\d))|▬ (?P\d) (?P\d)` 56 | //! in [regex] syntax* 57 | //! 58 | //! [`ExprMut::Capture`]: hitori::ExprMut::Capture 59 | //! [regex]: https://docs.rs/regex 60 | 61 | mod fraction; 62 | mod rectangle; 63 | mod shopping_list; 64 | 65 | pub use fraction::{Fraction, FractionCapture}; 66 | pub use rectangle::{Rectangle, RectangleCapture}; 67 | pub use shopping_list::{ShoppingList, ShoppingListCapture}; 68 | -------------------------------------------------------------------------------- /hitori-examples/src/capturing/fraction.rs: -------------------------------------------------------------------------------- 1 | /// Single-digit numerator and denominator 2 | pub struct Fraction; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for Fraction { 6 | const PATTERN: _ = ( 7 | // Capture into `FractCapture.numerator` 8 | #[hitori::capture(numerator)] 9 | (|ch: char| ch.is_ascii_digit(),), 10 | |ch| ch == '/', 11 | // Capture into `FractCapture.denominator` 12 | #[hitori::capture(denominator)] 13 | (|ch| ch > '0' && ch <= '9',), 14 | ); 15 | } 16 | -------------------------------------------------------------------------------- /hitori-examples/src/capturing/rectangle.rs: -------------------------------------------------------------------------------- 1 | /// Either a square with a side length, or a rectangle with width and height 2 | pub struct Rectangle; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for Rectangle { 6 | const PATTERN: _ = [ 7 | ( 8 | |ch| ch == '◾', 9 | char::is_whitespace, 10 | // Capture into both `RectangleCapture.width` and `RectangleCapture.height` 11 | #[hitori::capture(width, height)] 12 | (|ch: char| ch.is_ascii_digit(),), 13 | ), 14 | ( 15 | |ch| ch == '▬', 16 | char::is_whitespace, 17 | #[hitori::capture(width)] 18 | (|ch: char| ch.is_ascii_digit(),), 19 | char::is_whitespace, 20 | #[hitori::capture(height)] 21 | (|ch: char| ch.is_ascii_digit(),), 22 | ), 23 | ]; 24 | } 25 | -------------------------------------------------------------------------------- /hitori-examples/src/capturing/shopping_list.rs: -------------------------------------------------------------------------------- 1 | /// Sequence of 🍄🫑🧀🥚, where each item may or may not be present, 2 | /// capturing the last item in the list 3 | pub struct ShoppingList; 4 | 5 | #[hitori::impl_expr] 6 | impl Expr for ShoppingList { 7 | const PATTERN: _ = ( 8 | #[hitori::repeat(le = 1)] 9 | ( 10 | #[hitori::capture(last_item)] 11 | (|ch| ch == '🍄',), 12 | ), 13 | #[hitori::repeat(le = 1)] 14 | ( 15 | #[hitori::capture(last_item)] 16 | (|ch| ch == '🫑',), 17 | ), 18 | #[hitori::repeat(le = 1)] 19 | ( 20 | #[hitori::capture(last_item)] 21 | (|ch| ch == '🧀',), 22 | ), 23 | #[hitori::repeat(le = 1)] 24 | ( 25 | #[hitori::capture(last_item)] 26 | (|ch| ch == '🥚',), 27 | ), 28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /hitori-examples/src/generics.rs: -------------------------------------------------------------------------------- 1 | //! [`ExprMut`] implementor type can be generic and implementation 2 | //! of the trait can be blanket. 3 | //! 4 | //! ``` 5 | #![doc = include_str!("generics/all_in.rs")] 6 | //! 7 | //! let lang = AllIn(&['+', '-', '<', '>', '.', ',', '[', ']', '\t', '\n', '\r']); 8 | //! let prog = ">++++++++[<+++++++++>-]<."; 9 | //! assert!(hitori::string::starts_with(lang, prog).is_some()) 10 | //! ``` 11 | //! 12 | //! [`ExprMut`]: hitori::ExprMut 13 | 14 | mod all_in; 15 | 16 | pub use all_in::{AllIn, AllInCapture}; 17 | -------------------------------------------------------------------------------- /hitori-examples/src/generics/all_in.rs: -------------------------------------------------------------------------------- 1 | /// Checks that all characters are contained in `self.0` 2 | pub struct AllIn<'a, Ch>(pub &'a [Ch]); 3 | 4 | #[hitori::impl_expr] 5 | impl<'a, Idx: Clone, Ch: PartialEq> Expr for AllIn<'a, Ch> { 6 | const PATTERN: _ = #[hitori::repeat(ge = 0)] 7 | (|ch| self.0.contains(&ch),); 8 | } 9 | -------------------------------------------------------------------------------- /hitori-examples/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Structs and trait implementations generated by 2 | //! [`impl_expr`] and [`impl_expr_mut`] along with the source code. 3 | //! 4 | //! [`impl_expr`]: hitori::impl_expr 5 | //! [`impl_expr_mut`]: hitori::impl_expr_mut 6 | 7 | pub mod all_patterns; 8 | pub mod any_patterns; 9 | pub mod capturing; 10 | pub mod generics; 11 | pub mod positions; 12 | pub mod putting_everything_together; 13 | pub mod repetitions; 14 | -------------------------------------------------------------------------------- /hitori-examples/src/positions.rs: -------------------------------------------------------------------------------- 1 | //! Annotating an all-pattern or an any-pattern with `#[hitori::position]` checks 2 | //! position of a matched subpattern relative to input start. 3 | //! 4 | //! There are 2 possible arguments: 5 | //! 6 | //! - **`first`** – subpattern matched from the beginning of an input 7 | //! - **`last`** – subpattern matched to the end of an input 8 | //! 9 | //! ``` 10 | #![doc = include_str!("positions/train_cars.rs")] 11 | //! 12 | //! assert!(hitori::string::starts_with(TrainCars, "🚃").is_some()); 13 | //! assert!(hitori::string::starts_with(TrainCars, "🚃🚃🚃🚃🚃").is_some()); 14 | //! assert!(hitori::string::starts_with(TrainCars, " 🚃").is_none()); 15 | //! assert!(hitori::string::starts_with(TrainCars, "🚃 ").is_none()); 16 | //! assert!(hitori::string::starts_with(TrainCars, "🚃🚃🚃🚃🚃 ").is_none()); 17 | //! assert!(hitori::string::starts_with(TrainCars, " 🚃🚃🚃🚃🚃").is_none()); 18 | //! ``` 19 | //! *equivalent to 20 | //! `^(?P(?P🚃))$|^(?P🚃)🚃{3}(?P🚃)$` 21 | //! in [regex] syntax* 22 | //! 23 | //! [regex]: https://docs.rs/regex 24 | 25 | mod train_cars; 26 | 27 | pub use train_cars::{TrainCars, TrainCarsCapture}; 28 | -------------------------------------------------------------------------------- /hitori-examples/src/positions/train_cars.rs: -------------------------------------------------------------------------------- 1 | /// String of either one 🚃, or five 🚃, capturing first and last 🚃 2 | pub struct TrainCars; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for TrainCars { 6 | const PATTERN: _ = [ 7 | #[hitori::capture(first_car, last_car)] 8 | ( 9 | #[hitori::position(first, last)] 10 | (|ch| ch == '🚃',), 11 | ), 12 | ( 13 | #[hitori::capture(first_car)] 14 | ( 15 | #[hitori::position(first)] 16 | (|ch| ch == '🚃',), 17 | ), 18 | #[hitori::repeat(eq = 3)] 19 | (|ch| ch == '🚃',), 20 | #[hitori::capture(last_car)] 21 | ( 22 | #[hitori::position(last)] 23 | (|ch| ch == '🚃',), 24 | ), 25 | ), 26 | ]; 27 | } 28 | -------------------------------------------------------------------------------- /hitori-examples/src/putting_everything_together.rs: -------------------------------------------------------------------------------- 1 | //! More complex examples 2 | //! 3 | //! ### Email 4 | //! 5 | //! ``` 6 | #![doc = include_str!("putting_everything_together/email.rs")] 7 | //! 8 | //! let s = "user@example.com"; 9 | //! let matched = hitori::string::starts_with(Email, s).unwrap(); 10 | //! assert_eq!(&s[matched.capture.user.unwrap()], "user"); 11 | //! assert_eq!(&s[matched.capture.domain_with_extension.unwrap()], "example.com"); 12 | //! assert_eq!(&s[matched.capture.domain_extension.unwrap()], "com"); 13 | //! ``` 14 | //! *equivalent to `[\w\.+-]+@[\w\.-]+\.[\w\.-]+` in [regex] syntax* 15 | //! 16 | //! ### Uri 17 | //! 18 | //! ``` 19 | #![doc = include_str!("putting_everything_together/uri.rs")] 20 | //! 21 | //! let s = "postgres://user@localhost:5432/my_db"; 22 | //! let matched = hitori::string::starts_with(Uri, s).unwrap(); 23 | //! assert_eq!(&s[matched.capture.schema.unwrap()], "postgres"); 24 | //! assert_eq!(&s[matched.capture.path.unwrap()], "user@localhost:5432/my_db"); 25 | //! assert!(matched.capture.query.is_none()); 26 | //! assert!(matched.capture.fragment.is_none()); 27 | //! ``` 28 | //! *equivalent to `[\w]+://[^/\s?#][^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?` 29 | //! in [regex] syntax* 30 | //! 31 | //! ### IpV4 32 | //! 33 | //! ``` 34 | #![doc = include_str!("putting_everything_together/ipv4.rs")] 35 | //! 36 | //! assert!(hitori::string::starts_with(IpV4, "255.240.111.255").is_some()); 37 | //! assert!(hitori::string::starts_with(IpV4, "66.249.64.13").is_some()); 38 | //! assert!(hitori::string::starts_with(IpV4, "216.58.214.14").is_some()); 39 | //! assert!(hitori::string::starts_with(IpV4, "255.256.111.255").is_none()); 40 | //! ``` 41 | //! *equivalent to 42 | //! `(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])` 43 | //! in [regex] syntax* 44 | //! 45 | //! [hitori]: https://docs.rs/hitori 46 | //! [regex]: https://docs.rs/regex 47 | 48 | mod email; 49 | mod ipv4; 50 | mod uri; 51 | 52 | pub use email::{Email, EmailCapture}; 53 | pub use ipv4::{IpV4, IpV4Capture}; 54 | pub use uri::{Uri, UriCapture}; 55 | -------------------------------------------------------------------------------- /hitori-examples/src/putting_everything_together/email.rs: -------------------------------------------------------------------------------- 1 | /// Email address 2 | pub struct Email; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for Email { 6 | const PATTERN: _ = ( 7 | #[hitori::capture(user)] 8 | ( 9 | #[hitori::repeat(ge = 1)] 10 | (|ch: char| { 11 | ch == '.' || ch == '+' || ch == '-' || ch == '_' || ch.is_ascii_alphanumeric() 12 | },), 13 | ), 14 | |ch| ch == '@', 15 | #[hitori::capture(domain_with_extension)] 16 | ( 17 | #[hitori::repeat(ge = 0)] 18 | (|ch: char| ch == '-' || ch == '_' || ch.is_ascii_alphanumeric(),), 19 | #[hitori::repeat(ge = 1)] 20 | ( 21 | |ch| ch == '.', 22 | #[hitori::capture(domain_extension)] 23 | ( 24 | #[hitori::repeat(ge = 1)] 25 | (|ch: char| ch == '-' || ch == '_' || ch.is_ascii_alphanumeric(),), 26 | ), 27 | ), 28 | ), 29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /hitori-examples/src/putting_everything_together/ipv4.rs: -------------------------------------------------------------------------------- 1 | /// Internet Protocol v4 address 2 | pub struct IpV4; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for IpV4 { 6 | const PATTERN: _ = ( 7 | #[hitori::repeat(eq = 3)] 8 | ( 9 | [ 10 | ( 11 | |ch| ch == '2', 12 | |ch| ch == '5', 13 | |ch| ('0'..='5').contains(&ch), 14 | ), 15 | ( 16 | |ch| ch == '2', 17 | |ch| ('0'..='4').contains(&ch), 18 | |ch: char| ch.is_ascii_digit(), 19 | ), 20 | ( 21 | |ch| ch == '0' || ch == '1', 22 | |ch: char| ch.is_ascii_digit(), 23 | |ch: char| ch.is_ascii_digit(), 24 | ), 25 | ( 26 | |ch: char| ch.is_ascii_digit(), 27 | |ch: char| ch.is_ascii_digit(), 28 | ), 29 | ], 30 | |ch| ch == '.', 31 | ), 32 | [ 33 | ( 34 | |ch| ch == '2', 35 | |ch| ch == '5', 36 | |ch| ('0'..='5').contains(&ch), 37 | ), 38 | ( 39 | |ch| ch == '2', 40 | |ch| ('0'..='4').contains(&ch), 41 | |ch: char| ch.is_ascii_digit(), 42 | ), 43 | ( 44 | |ch| ch == '0' || ch == '1', 45 | |ch: char| ch.is_ascii_digit(), 46 | |ch: char| ch.is_ascii_digit(), 47 | ), 48 | ( 49 | |ch: char| ch.is_ascii_digit(), 50 | |ch: char| ch.is_ascii_digit(), 51 | ), 52 | ], 53 | ); 54 | } 55 | -------------------------------------------------------------------------------- /hitori-examples/src/putting_everything_together/uri.rs: -------------------------------------------------------------------------------- 1 | /// Uniform Resource Identifier 2 | pub struct Uri; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for Uri { 6 | const PATTERN: _ = ( 7 | #[hitori::capture(schema)] 8 | ( 9 | #[hitori::repeat(ge = 1)] 10 | (|ch: char| ch == '_' || ch.is_ascii_alphanumeric(),), 11 | ), 12 | |ch| ch == ':', 13 | |ch| ch == '/', 14 | |ch| ch == '/', 15 | #[hitori::capture(path)] 16 | ( 17 | |ch: char| ch != '/' && ch != '?' && ch != '#' && !ch.is_ascii_whitespace(), 18 | #[hitori::repeat(ge = 1)] 19 | (|ch: char| ch != '?' && ch != '#' && !ch.is_ascii_whitespace(),), 20 | ), 21 | #[hitori::repeat(le = 1)] 22 | ( 23 | |ch| ch == '?', 24 | #[hitori::capture(query)] 25 | ( 26 | #[hitori::repeat(ge = 0)] 27 | (|ch: char| ch != '#' && !ch.is_ascii_whitespace(),), 28 | ), 29 | ), 30 | #[hitori::repeat(le = 1)] 31 | ( 32 | |ch| ch == '#', 33 | #[hitori::capture(fragment)] 34 | ( 35 | #[hitori::repeat(ge = 0)] 36 | (|ch: char| !ch.is_ascii_whitespace(),), 37 | ), 38 | ), 39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /hitori-examples/src/repetitions.rs: -------------------------------------------------------------------------------- 1 | //! Repetition is done by annotating an all-pattern or an any-pattern with 2 | //! `#[hitori::repeat]`. 3 | //! 4 | //! There are 5 possible name-value arguments: 5 | //! 6 | //! - **`eq = x`** – exactly `x` times 7 | //! - **`lt = x`** – less than `x` times. 8 | //! - **`le = x`** – less or equal to `x` times. 9 | //! - **`gt = x`** – greater than `x` times. 10 | //! - **`ge = x`** – greater or equal to `x` times. 11 | //! 12 | //! Value assigned to the bound must be either literal 13 | //! [`usize`] (like `lt = 410` or `ge = 20usize`) 14 | //! or literal string containing an expression that evaluates to 15 | //! [`usize`] (like `eq = "self.name.len()"`). 16 | //! 17 | //! ``` 18 | #![doc = include_str!("repetitions/identifier.rs")] 19 | //! 20 | //! for s in ["_", "x1", "my_var32"] { 21 | //! assert!(hitori::string::starts_with(Identifier, s).is_some()); 22 | //! } 23 | //! ``` 24 | //! *equivalent to `[[:alpha:]_]\w*` in [regex] syntax* 25 | //! 26 | //! ### Combining bounds 27 | //! 28 | //! Lower bounds (`gt` and `ge`) can be combined with upper bounds (`lt` and `le`). 29 | //! Default lower bound is `ge = 0`, while an upper bound is unbounded by default. 30 | //! 31 | //! ``` 32 | #![doc = include_str!("repetitions/binary_u32.rs")] 33 | //! 34 | //! assert!(hitori::string::starts_with(BinaryU32, "0b110011010").is_some()); 35 | //! ``` 36 | //! *equivalent to `0b[01]{1,32}` in [regex] syntax* 37 | //! 38 | //! ### Expression bounds 39 | //! 40 | //! Expression bounds can be used when the number of times to repeat 41 | //! is not a literal [`usize`] (e.g. constants, function outputs 42 | //! and [`ExprMut`] implementor's fields and methods). 43 | //! 44 | //! ``` 45 | #![doc = include_str!("repetitions/would_you_kindly.rs")] 46 | //! 47 | //! let s = "Would you kindly lower that weapon for a moment?"; 48 | //! let expr = WouldYouKindly::default(); 49 | //! let matched = hitori::string::starts_with(expr, s).unwrap(); 50 | //! assert_eq!(&s[matched.capture.request.unwrap()], "lower that weapon for a moment"); 51 | //!``` 52 | //! *equivalent to `Would you kindly (?P[^?!]+)[?!]` in [regex] syntax* 53 | //! 54 | //! [regex]: https://docs.rs/regex 55 | //! [`ExprMut`]: hitori::ExprMut 56 | 57 | mod binary_u32; 58 | mod identifier; 59 | mod would_you_kindly; 60 | 61 | pub use binary_u32::{BinaryU32, BinaryU32Capture}; 62 | pub use identifier::{Identifier, IdentifierCapture}; 63 | pub use would_you_kindly::{WouldYouKindly, WouldYouKindlyCapture}; 64 | -------------------------------------------------------------------------------- /hitori-examples/src/repetitions/binary_u32.rs: -------------------------------------------------------------------------------- 1 | /// Literal [`u32`] binary notation (e.g. `0b110011010`) 2 | pub struct BinaryU32; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for BinaryU32 { 6 | const PATTERN: _ = ( 7 | |ch| ch == '0', 8 | |ch| ch == 'b', 9 | #[hitori::repeat(ge = 1, le = 32)] 10 | (|ch| ch == '0' || ch == '1',), 11 | ); 12 | } 13 | -------------------------------------------------------------------------------- /hitori-examples/src/repetitions/identifier.rs: -------------------------------------------------------------------------------- 1 | /// Rust identifier such as `my_var32` 2 | pub struct Identifier; 3 | 4 | #[hitori::impl_expr] 5 | impl Expr for Identifier { 6 | const PATTERN: _ = ( 7 | |ch: char| ch == '_' || ch.is_alphabetic(), 8 | #[hitori::repeat(ge = 0)] 9 | (|ch: char| ch == '_' || ch.is_alphanumeric(),), 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /hitori-examples/src/repetitions/would_you_kindly.rs: -------------------------------------------------------------------------------- 1 | const PHRASE: &str = "Would you kindly "; 2 | 3 | /// A captured request that is prefixed with `Would you kindly` and suffixed with a `?` or '!' 4 | pub struct WouldYouKindly { 5 | phrase_chars: core::str::Chars<'static>, 6 | } 7 | 8 | impl Default for WouldYouKindly { 9 | fn default() -> Self { 10 | Self { 11 | phrase_chars: PHRASE.chars(), 12 | } 13 | } 14 | } 15 | 16 | #[hitori::impl_expr_mut] 17 | impl ExprMut for WouldYouKindly { 18 | const PATTERN: _ = ( 19 | #[hitori::repeat(eq = "PHRASE.len()")] 20 | (|ch| ch == self.phrase_chars.next().unwrap(),), 21 | #[hitori::capture(request)] 22 | ( 23 | #[hitori::repeat(ge = 1)] 24 | (|ch| ch != '?' && ch != '!',), 25 | ), 26 | |ch| ch == '?' || ch == '!', 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /hitori-macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hitori-macros" 3 | description = "Macros for hitori" 4 | documentation = "https://docs.rs/hitori-macros" 5 | version.workspace = true 6 | authors.workspace = true 7 | edition.workspace = true 8 | license.workspace = true 9 | repository.workspace = true 10 | rust-version.workspace = true 11 | readme.workspace = true 12 | 13 | [lib] 14 | proc-macro = true 15 | 16 | [features] 17 | find-hitori = ["proc-macro-crate"] 18 | debug = ["rust-format"] 19 | 20 | [dependencies] 21 | proc-macro-crate = { version = "1.3.1", optional = true } 22 | proc-macro2 = "1.0.51" 23 | quote = { version = "1.0.23", default-features = false } 24 | syn = { version = "2.0.15", default-features = false, features = [ 25 | "full", 26 | "parsing", 27 | "printing", 28 | ] } 29 | rust-format = { version = "0.3.4", features = [ 30 | "token_stream", 31 | ], optional = true } 32 | -------------------------------------------------------------------------------- /hitori-macros/src/expand.rs: -------------------------------------------------------------------------------- 1 | mod starts_with_block; 2 | 3 | use crate::{parse, utils::hitori_ident}; 4 | use proc_macro2::{Ident, TokenStream}; 5 | use quote::{format_ident, quote, ToTokens}; 6 | use std::collections::BTreeSet; 7 | use syn::{punctuated::Punctuated, GenericParam, Token, Type, Visibility, WhereClause}; 8 | 9 | fn impl_decl( 10 | hitori_ident: &Ident, 11 | self_ty: &Type, 12 | trait_ident: &Ident, 13 | idx_ty: &Type, 14 | ch_ty: &Type, 15 | generic_params: &Punctuated, 16 | where_clause: Option<&WhereClause>, 17 | ) -> TokenStream { 18 | quote! { 19 | impl<#generic_params> #hitori_ident::#trait_ident<#idx_ty, #ch_ty> for #self_ty 20 | #where_clause 21 | } 22 | } 23 | 24 | fn starts_with_sig( 25 | hitori_ident: &Ident, 26 | is_mut: bool, 27 | starts_with_ident: &Ident, 28 | iter_ident: &Ident, 29 | idx_ty: &Type, 30 | ch_ty: &Type, 31 | inline: bool, 32 | ) -> TokenStream { 33 | let inline = inline.then(|| quote! { #[inline] }); 34 | let mut_ = is_mut.then(::default); 35 | quote! { 36 | #inline 37 | fn #starts_with_ident<#iter_ident>( 38 | &#mut_ self, 39 | mut start: #idx_ty, 40 | is_first: bool, 41 | iter: #iter_ident, 42 | ) -> ::core::option::Option<#hitori_ident::Match< 43 | #idx_ty, 44 | >::Capture, 45 | #iter_ident::IntoIter, 46 | >> 47 | where 48 | #iter_ident: ::core::iter::IntoIterator, 49 | #iter_ident::IntoIter: ::core::clone::Clone, 50 | } 51 | } 52 | 53 | fn type_capture(capture_ident: &Ident, idx_ty: &Type) -> TokenStream { 54 | quote! { type Capture = #capture_ident<#idx_ty>; } 55 | } 56 | 57 | fn capture( 58 | vis: &Visibility, 59 | ident: &Ident, 60 | idx_ident: &Ident, 61 | default_idx_ty: Option<&Type>, 62 | field_idents: &BTreeSet, 63 | ) -> TokenStream { 64 | let (members, default_block, doc) = if field_idents.is_empty() { 65 | ( 66 | quote! {( ::core::marker::PhantomData<#idx_ident> );}, 67 | quote! {( ::core::marker::PhantomData )}, 68 | Some(quote! { #[doc = "This is an empty placeholder-struct"] }), 69 | ) 70 | } else { 71 | ( 72 | quote! {{ 73 | #( 74 | #vis #field_idents: ::core::option::Option<::core::ops::Range<#idx_ident>>, 75 | )* 76 | }}, 77 | quote! {{ 78 | #( 79 | #field_idents: ::core::option::Option::None, 80 | )* 81 | }}, 82 | None, 83 | ) 84 | }; 85 | let idx_bound = default_idx_ty.map_or_else( 86 | || idx_ident.to_token_stream(), 87 | |ty| quote! { #idx_ident = #ty }, 88 | ); 89 | quote! { 90 | #doc 91 | #[derive( 92 | ::core::clone::Clone, 93 | ::core::cmp::Eq, 94 | ::core::cmp::PartialEq, 95 | ::core::fmt::Debug, 96 | )] 97 | #vis struct #ident<#idx_bound> #members 98 | impl<#idx_ident> ::core::default::Default for #ident<#idx_ident> { 99 | fn default() -> Self { 100 | Self #default_block 101 | } 102 | } 103 | } 104 | } 105 | 106 | fn derived_impl_expr_mut_starts_with_block( 107 | hitori_ident: &Ident, 108 | idx_ty: &Type, 109 | ch_ty: &Type, 110 | ) -> TokenStream { 111 | quote! { 112 | >::starts_with(self, start, is_first, iter) 113 | } 114 | } 115 | 116 | pub fn expand(parsed: parse::Output) -> syn::Result { 117 | let hitori_ident = hitori_ident(); 118 | let impl_decl = |trait_ident| { 119 | impl_decl( 120 | &hitori_ident, 121 | &parsed.self_ty, 122 | trait_ident, 123 | &parsed.idx_ty, 124 | &parsed.ch_ty, 125 | &parsed.generic_params, 126 | parsed.where_clause.as_ref(), 127 | ) 128 | }; 129 | let starts_with_sig = |is_mut, inline| { 130 | let starts_with_ident = if is_mut { 131 | format_ident!("starts_with_mut") 132 | } else { 133 | format_ident!("starts_with") 134 | }; 135 | starts_with_sig( 136 | &hitori_ident, 137 | is_mut, 138 | &starts_with_ident, 139 | &parsed.iter_ident, 140 | &parsed.idx_ty, 141 | &parsed.ch_ty, 142 | inline, 143 | ) 144 | }; 145 | 146 | let type_capture = type_capture(&parsed.capture_ident, &parsed.idx_ty); 147 | let (mut output, impl_decl, type_capture, starts_with_sig) = if parsed.is_mut { 148 | ( 149 | TokenStream::new(), 150 | impl_decl(&parsed.trait_ident), 151 | Some(type_capture), 152 | starts_with_sig(true, false), 153 | ) 154 | } else { 155 | let impl_expr_decl = impl_decl(&parsed.trait_ident); 156 | let impl_expr_mut_decl = impl_decl(&format_ident!("ExprMut")); 157 | let impl_expr_mut_starts_with_sig = starts_with_sig(true, true); 158 | let impl_expr_mut_starts_with_block = 159 | derived_impl_expr_mut_starts_with_block(&hitori_ident, &parsed.idx_ty, &parsed.ch_ty); 160 | ( 161 | quote! { 162 | #impl_expr_mut_decl { 163 | #type_capture 164 | #impl_expr_mut_starts_with_sig { #impl_expr_mut_starts_with_block } 165 | } 166 | }, 167 | impl_expr_decl, 168 | None, 169 | starts_with_sig(false, false), 170 | ) 171 | }; 172 | 173 | let starts_with_block::Output { 174 | tokens: starts_with_block, 175 | inner_capture_idents, 176 | } = starts_with_block::Input { 177 | hitori_ident: &hitori_ident, 178 | is_mut: parsed.is_mut, 179 | capture_ident: &parsed.capture_ident, 180 | self_ty: &parsed.self_ty, 181 | iter_ident: &parsed.iter_ident, 182 | idx_ty: &parsed.idx_ty, 183 | ch_ty: &parsed.ch_ty, 184 | expr: &parsed.expr, 185 | wrapper_ident: &parsed.wrapper_ident, 186 | generic_params: parsed.generic_params, 187 | where_clause: parsed.where_clause.as_ref(), 188 | } 189 | .expand()?; 190 | 191 | output.extend(quote! { 192 | #impl_decl { 193 | #type_capture 194 | #starts_with_sig { #starts_with_block } 195 | } 196 | }); 197 | 198 | output.extend(capture( 199 | &parsed.capture_vis, 200 | &parsed.capture_ident, 201 | &parsed.capture_idx_ident, 202 | (!parsed.is_idx_generic).then_some(&parsed.idx_ty), 203 | &inner_capture_idents, 204 | )); 205 | 206 | Ok(output) 207 | } 208 | -------------------------------------------------------------------------------- /hitori-macros/src/expand/starts_with_block.rs: -------------------------------------------------------------------------------- 1 | mod cache; 2 | mod repeat; 3 | mod state; 4 | 5 | use crate::{ 6 | parse::{position::Position, repeat::Repeat}, 7 | utils::{ 8 | eq_by_fmt, find_le_one_hitori_attr, hitori_attr_ident_eq_str, 9 | lifetimes_into_punctuated_unit_refs, remove_generic_params_bounds, 10 | }, 11 | }; 12 | use proc_macro2::{Ident, TokenStream}; 13 | use quote::{quote, ToTokens as _}; 14 | use state::State; 15 | use std::collections::BTreeSet; 16 | use syn::{punctuated::Punctuated, Attribute, Expr, GenericParam, Token, Type, WhereClause}; 17 | 18 | #[allow(clippy::too_many_arguments)] 19 | fn partial_impl_wrapper( 20 | is_mut: bool, 21 | capture_ident: &Ident, 22 | self_ty: &Type, 23 | iter_ident: &Ident, 24 | idx_ty: &Type, 25 | ch_ty: &Type, 26 | wrapper_ident: &Ident, 27 | mut generic_params: Punctuated, 28 | where_clause: Option<&WhereClause>, 29 | ) -> TokenStream { 30 | fn wrapper_params( 31 | generic_params: &Punctuated, 32 | iter_ident: &Ident, 33 | ) -> TokenStream { 34 | let mut output = quote! { '__self, #generic_params }; 35 | if !generic_params.empty_or_trailing() { 36 | ::default().to_tokens(&mut output); 37 | } 38 | iter_ident.to_tokens(&mut output); 39 | output 40 | } 41 | 42 | let maybe_bounds_wrapper_params = wrapper_params(&generic_params, iter_ident); 43 | 44 | let mut phantom_data_params = lifetimes_into_punctuated_unit_refs( 45 | generic_params 46 | .iter() 47 | .take_while(|param| matches!(param, GenericParam::Lifetime(_))) 48 | .map(|param| match param { 49 | GenericParam::Lifetime(l) => l, 50 | _ => unreachable!(), 51 | }), 52 | ); 53 | 54 | remove_generic_params_bounds(&mut generic_params); 55 | let no_bounds_wrapper_params = wrapper_params(&generic_params, iter_ident); 56 | 57 | for pair in generic_params.pairs() { 58 | if matches!(pair.value(), GenericParam::Type(_)) { 59 | pair.to_tokens(&mut phantom_data_params); 60 | } 61 | } 62 | 63 | let where_clause = { 64 | let mut output = where_clause.as_ref().map_or_else( 65 | || quote! { where }, 66 | |existing| { 67 | if existing.predicates.empty_or_trailing() { 68 | quote! { #where_clause } 69 | } else { 70 | quote! { #where_clause, } 71 | } 72 | }, 73 | ); 74 | output.extend(quote! { 75 | #iter_ident: ::core::iter::Iterator + ::core::clone::Clone, 76 | }); 77 | output 78 | }; 79 | 80 | let mut_ = is_mut.then_some(::default()); 81 | 82 | let mut output = quote! { 83 | struct #wrapper_ident<#maybe_bounds_wrapper_params> #where_clause { 84 | __target: &'__self #mut_ #self_ty, 85 | __capture: #capture_ident<#idx_ty>, 86 | __end: #idx_ty, 87 | __is_first: bool, 88 | __iter: #iter_ident, 89 | __phantom: ::core::marker::PhantomData<(#phantom_data_params)>, 90 | }; 91 | 92 | impl<#maybe_bounds_wrapper_params> ::core::ops::Deref 93 | for #wrapper_ident<#no_bounds_wrapper_params> 94 | #where_clause 95 | { 96 | type Target = #self_ty; 97 | 98 | fn deref(&self) -> &Self::Target { 99 | self.__target 100 | } 101 | } 102 | }; 103 | 104 | if is_mut { 105 | output.extend(quote! { 106 | impl<#maybe_bounds_wrapper_params> ::core::ops::DerefMut 107 | for #wrapper_ident<#no_bounds_wrapper_params> 108 | #where_clause 109 | { 110 | fn deref_mut(&mut self) -> &mut Self::Target { 111 | self.__target 112 | } 113 | } 114 | }); 115 | } 116 | 117 | output.extend(quote! { 118 | impl<#maybe_bounds_wrapper_params> #wrapper_ident<#no_bounds_wrapper_params> #where_clause 119 | }); 120 | 121 | output 122 | } 123 | 124 | enum HitoriAttribute { 125 | Repeat(Repeat), 126 | Capture(Punctuated), 127 | Position(Position), 128 | } 129 | 130 | impl HitoriAttribute { 131 | fn find(attrs: &[Attribute]) -> syn::Result> { 132 | match find_le_one_hitori_attr(attrs) { 133 | Ok(Some(attr)) => Ok(Some(if hitori_attr_ident_eq_str(attr, "capture") { 134 | let capture_idents = attr.parse_args_with(Punctuated::parse_terminated)?; 135 | if capture_idents.is_empty() { 136 | return Err(syn::Error::new_spanned( 137 | attr, 138 | "capture must contain at least one identifier \ 139 | (e.g. `#[hitori::capture(this)]`)", 140 | )); 141 | } 142 | Self::Capture(capture_idents) 143 | } else if hitori_attr_ident_eq_str(attr, "repeat") { 144 | let tokens = &attr.meta.require_list()?.tokens; 145 | if tokens.is_empty() || eq_by_fmt(tokens, quote! { () }) { 146 | return Err(syn::Error::new_spanned( 147 | attr, 148 | "repeat must contain at least one bound \ 149 | (e.g. `#[hitori::repeat(ge = 0)]`)", 150 | )); 151 | } 152 | Self::Repeat(attr.parse_args()?) 153 | } else if hitori_attr_ident_eq_str(attr, "position") { 154 | Self::Position(attr.parse_args()?) 155 | } else { 156 | return Err(syn::Error::new_spanned( 157 | attr, 158 | "expected `capture`, or `repeat`, or `position`", 159 | )); 160 | })), 161 | Ok(None) => Ok(None), 162 | Err([first, second]) => Err(syn::Error::new_spanned( 163 | first, 164 | if hitori_attr_ident_eq_str(first, "capture") 165 | && hitori_attr_ident_eq_str(second, "capture") 166 | { 167 | "to capture group into multiple destinations, \ 168 | use single `capture` attribute and \ 169 | add each identifier to its argument list \ 170 | (e.g. `#[hitori::capture(a, b, c)] _group`)" 171 | } else if hitori_attr_ident_eq_str(first, "position") 172 | && hitori_attr_ident_eq_str(second, "position") 173 | && { 174 | matches!( 175 | (first.parse_args(), second.parse_args()), 176 | (Ok(Position::First), Ok(Position::Last)) 177 | | (Ok(Position::Last), Ok(Position::First)) 178 | ) 179 | } 180 | { 181 | "to check that a group is both `first` and `last` \ 182 | use `#[hitori::position(first, last)]`" 183 | } else { 184 | "there cannot be two `hitori` attributes on a single group" 185 | }, 186 | )), 187 | } 188 | } 189 | } 190 | 191 | enum Group<'a> { 192 | All(&'a Punctuated), 193 | Any(&'a Punctuated), 194 | } 195 | 196 | enum Tree<'a> { 197 | Group(Group<'a>, Option), 198 | Test(&'a Expr), 199 | } 200 | 201 | impl<'a> TryFrom<&'a Expr> for Tree<'a> { 202 | type Error = syn::Error; 203 | 204 | fn try_from(expr: &'a Expr) -> syn::Result { 205 | Ok(match &expr { 206 | Expr::Tuple(tuple) => Tree::Group( 207 | Group::All(&tuple.elems), 208 | HitoriAttribute::find(&tuple.attrs)?, 209 | ), 210 | Expr::Array(arr) => { 211 | Tree::Group(Group::Any(&arr.elems), HitoriAttribute::find(&arr.attrs)?) 212 | } 213 | Expr::Paren(paren) => { 214 | return Err(syn::Error::new_spanned( 215 | paren, 216 | "add a trailing comma inside of the parenthesis", 217 | )); 218 | } 219 | _ => Tree::Test(expr), 220 | }) 221 | } 222 | } 223 | 224 | pub struct Output { 225 | pub tokens: TokenStream, 226 | pub inner_capture_idents: BTreeSet, 227 | } 228 | 229 | pub struct Input<'a> { 230 | pub hitori_ident: &'a Ident, 231 | pub is_mut: bool, 232 | pub capture_ident: &'a Ident, 233 | pub self_ty: &'a Type, 234 | pub iter_ident: &'a Ident, 235 | pub idx_ty: &'a Type, 236 | pub ch_ty: &'a Type, 237 | pub expr: &'a Expr, 238 | pub wrapper_ident: &'a Ident, 239 | pub generic_params: Punctuated, 240 | pub where_clause: Option<&'a WhereClause>, 241 | } 242 | 243 | impl<'a> Input<'a> { 244 | pub fn expand(self) -> syn::Result { 245 | let mut st = State::default(); 246 | let inner_capture_idents = st.push_tree(self.expr.try_into()?)?; 247 | let hitori_ident = self.hitori_ident; 248 | let partial_impl_wrapper = partial_impl_wrapper( 249 | self.is_mut, 250 | self.capture_ident, 251 | self.self_ty, 252 | self.iter_ident, 253 | self.idx_ty, 254 | self.ch_ty, 255 | self.wrapper_ident, 256 | self.generic_params, 257 | self.where_clause, 258 | ); 259 | let impl_wrapper_block = st.impl_wrapper_block; 260 | let total_matches_ident = st.prev_subexpr_matches_ident.unwrap(); 261 | let wrapper_ident = self.wrapper_ident; 262 | let tokens = quote! { 263 | #partial_impl_wrapper { 264 | #impl_wrapper_block 265 | } 266 | let mut wrapper = #wrapper_ident { 267 | __target: self, 268 | __capture: ::core::default::Default::default(), 269 | __end: start.clone(), 270 | __is_first: is_first, 271 | __iter: ::core::iter::IntoIterator::into_iter(iter), 272 | __phantom: ::core::marker::PhantomData, 273 | }; 274 | if wrapper.#total_matches_ident() { 275 | ::core::option::Option::Some(#hitori_ident::Match { 276 | range: start..wrapper.__end, 277 | capture: wrapper.__capture, 278 | iter_remainder: wrapper.__iter, 279 | is_iter_advanced: !wrapper.__is_first, 280 | }) 281 | } else { 282 | ::core::option::Option::None 283 | } 284 | }; 285 | Ok(Output { 286 | tokens, 287 | inner_capture_idents, 288 | }) 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /hitori-macros/src/expand/starts_with_block/cache.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::unique_ident; 2 | use proc_macro2::{Ident, TokenStream}; 3 | use quote::{format_ident, quote}; 4 | 5 | pub struct Capture(C); 6 | 7 | impl<'a, C: Iterator + Clone> Capture { 8 | pub fn new>(capture_idents: I) -> Self { 9 | Self(capture_idents.into_iter()) 10 | } 11 | 12 | pub fn cache(&self) -> TokenStream { 13 | let idents = self.0.clone(); 14 | quote! { 15 | #( 16 | let mut #idents = 17 | ::core::clone::Clone::clone(&self.__capture.#idents); 18 | )* 19 | } 20 | } 21 | 22 | pub fn restore(&self) -> TokenStream { 23 | let idents = self.0.clone(); 24 | quote! { 25 | #( 26 | self.__capture.#idents = #idents; 27 | )* 28 | } 29 | } 30 | } 31 | 32 | pub struct Vars { 33 | iter: Ident, 34 | is_first: Ident, 35 | end: Ident, 36 | } 37 | 38 | impl Default for Vars { 39 | fn default() -> Self { 40 | Self { 41 | iter: format_ident!("iter"), 42 | is_first: format_ident!("is_first"), 43 | end: format_ident!("end"), 44 | } 45 | } 46 | } 47 | 48 | impl Vars { 49 | pub fn unique_in<'a, I>(idents: I) -> Self 50 | where 51 | I: IntoIterator, 52 | I::IntoIter: Clone, 53 | { 54 | let capture_idents = idents.into_iter(); 55 | Self { 56 | iter: unique_ident(&capture_idents, "iter".into()), 57 | is_first: unique_ident(&capture_idents, "is_first".into()), 58 | end: unique_ident(&capture_idents, "end".into()), 59 | } 60 | } 61 | 62 | pub fn cache(&self) -> TokenStream { 63 | let iter = &self.iter; 64 | let is_first = &self.is_first; 65 | let end = &self.end; 66 | quote! { 67 | let mut #iter = ::core::clone::Clone::clone(&self.__iter); 68 | let mut #is_first = self.__is_first; 69 | let mut #end = ::core::clone::Clone::clone(&self.__end); 70 | } 71 | } 72 | 73 | pub fn update(&self) -> TokenStream { 74 | let iter = &self.iter; 75 | let is_first = &self.is_first; 76 | let end = &self.end; 77 | quote! { 78 | #iter = ::core::clone::Clone::clone(&self.__iter); 79 | #is_first = self.__is_first; 80 | #end = ::core::clone::Clone::clone(&self.__end); 81 | } 82 | } 83 | 84 | pub fn restore(&self) -> TokenStream { 85 | let iter = &self.iter; 86 | let is_first = &self.is_first; 87 | let end = &self.end; 88 | quote! { 89 | self.__iter = #iter; 90 | self.__is_first = #is_first; 91 | self.__end = #end; 92 | } 93 | } 94 | 95 | pub fn restore_clone(&self) -> TokenStream { 96 | let iter = &self.iter; 97 | let is_first = &self.is_first; 98 | let end = &self.end; 99 | quote! { 100 | self.__iter = ::core::clone::Clone::clone(&#iter); 101 | self.__is_first = #is_first; 102 | self.__end = ::core::clone::Clone::clone(&#end); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /hitori-macros/src/expand/starts_with_block/repeat.rs: -------------------------------------------------------------------------------- 1 | use super::cache; 2 | use crate::parse::repeat::Repeat; 3 | use proc_macro2::{Ident, TokenStream}; 4 | use quote::quote; 5 | use std::collections::BTreeSet; 6 | 7 | fn bounds_decl(repeat: &Repeat) -> TokenStream { 8 | match repeat { 9 | Repeat::Exact(lo_included) 10 | | Repeat::InInclusive { 11 | lo_included, 12 | hi_excluded: None, 13 | } => { 14 | quote! { 15 | let lo: usize = #lo_included; 16 | } 17 | } 18 | Repeat::InInclusive { 19 | lo_included, 20 | hi_excluded: Some(hi_excluded), 21 | } => { 22 | quote! { 23 | let lo: usize = #lo_included; 24 | let hi: usize = #hi_excluded; 25 | if lo >= hi { 26 | return false; 27 | } 28 | } 29 | } 30 | } 31 | } 32 | 33 | fn lo_test(inner_matches_ident: &Ident, inner_capture_idents: &BTreeSet) -> TokenStream { 34 | let capture = cache::Capture::new(inner_capture_idents); 35 | let capture_cache = capture.cache(); 36 | let capture_restore = capture.restore(); 37 | quote! { 38 | #capture_cache 39 | for _ in 0..lo { 40 | if !self.#inner_matches_ident() { 41 | #capture_restore 42 | return false; 43 | } 44 | } 45 | } 46 | } 47 | 48 | fn vars_cache_update_restore(inner_capture_idents: &BTreeSet) -> [TokenStream; 3] { 49 | let vars = cache::Vars::unique_in(inner_capture_idents); 50 | [vars.cache(), vars.update(), vars.restore()] 51 | } 52 | 53 | fn some_hi_test( 54 | inner_matches_ident: &Ident, 55 | [vars_cache, vars_update, vars_restore]: &[TokenStream; 3], 56 | ) -> TokenStream { 57 | quote! { 58 | if lo + 1 == hi { 59 | return true; 60 | } 61 | #vars_cache 62 | for _ in lo + 2..hi { 63 | if self.#inner_matches_ident() { 64 | #vars_update 65 | } else { 66 | #vars_restore 67 | return true; 68 | } 69 | } 70 | if !self.#inner_matches_ident() { 71 | #vars_restore 72 | } 73 | } 74 | } 75 | 76 | fn none_hi_test( 77 | inner_matches_ident: &Ident, 78 | [vars_cache, vars_update, vars_restore]: &[TokenStream; 3], 79 | ) -> TokenStream { 80 | quote! { 81 | #vars_cache 82 | while self.#inner_matches_ident() { 83 | #vars_update 84 | } 85 | #vars_restore 86 | } 87 | } 88 | 89 | pub fn expand_block( 90 | repeat: &Repeat, 91 | inner_matches_ident: &Ident, 92 | inner_capture_idents: &BTreeSet, 93 | ) -> TokenStream { 94 | let mut output = bounds_decl(repeat); 95 | output.extend(lo_test(inner_matches_ident, inner_capture_idents)); 96 | if let Repeat::InInclusive { hi_excluded, .. } = repeat { 97 | let vars_streams = vars_cache_update_restore(inner_capture_idents); 98 | output.extend(if hi_excluded.is_some() { 99 | some_hi_test(inner_matches_ident, &vars_streams) 100 | } else { 101 | none_hi_test(inner_matches_ident, &vars_streams) 102 | }); 103 | } 104 | output.extend(quote! { true }); 105 | output 106 | } 107 | -------------------------------------------------------------------------------- /hitori-macros/src/expand/starts_with_block/state.rs: -------------------------------------------------------------------------------- 1 | use super::{cache, repeat, Group, HitoriAttribute, Tree}; 2 | use crate::parse::{position::Position, repeat::Repeat}; 3 | use proc_macro2::{Ident, TokenStream}; 4 | use quote::{format_ident, quote}; 5 | use std::collections::BTreeSet; 6 | use syn::{punctuated::Punctuated, Expr, Token}; 7 | 8 | #[derive(Default)] 9 | pub struct State { 10 | pub impl_wrapper_block: TokenStream, 11 | pub prev_subexpr_matches_ident: Option, 12 | next_subexpr_index: usize, 13 | } 14 | 15 | impl State { 16 | fn set_next_subexpr(&mut self, prefix: &str) { 17 | self.prev_subexpr_matches_ident = Some(format_ident!( 18 | "__{prefix}_subexpr{}_matches", 19 | self.next_subexpr_index 20 | )); 21 | self.next_subexpr_index += 1; 22 | } 23 | 24 | pub fn unwrap_prev_subexpr_matches_ident(&self) -> &Ident { 25 | self.prev_subexpr_matches_ident.as_ref().unwrap() 26 | } 27 | 28 | fn push_subexpr_matches(&mut self, prefix: &str, block: &TokenStream) { 29 | self.set_next_subexpr(prefix); 30 | let ident = self.unwrap_prev_subexpr_matches_ident(); 31 | self.impl_wrapper_block.extend(quote! { 32 | fn #ident(&mut self) -> bool { #block } 33 | }); 34 | } 35 | 36 | fn push_group_all( 37 | &mut self, 38 | all: &Punctuated, 39 | ) -> syn::Result> { 40 | let mut inner_capture_idents = BTreeSet::new(); 41 | let mut block = TokenStream::new(); 42 | let mut new_branch_capture_idents = vec![]; 43 | 44 | for expr in all { 45 | let branch_capture_idents = self.push_tree(expr.try_into()?)?; 46 | let branch_matches_ident = self.unwrap_prev_subexpr_matches_ident(); 47 | 48 | new_branch_capture_idents.clear(); 49 | for ident in branch_capture_idents { 50 | if inner_capture_idents.insert(ident.clone()) { 51 | new_branch_capture_idents.push(ident); 52 | } 53 | } 54 | block.extend(cache::Capture::new(&new_branch_capture_idents).cache()); 55 | 56 | let capture_restore = cache::Capture::new(&inner_capture_idents).restore(); 57 | block.extend(quote! { 58 | if !self.#branch_matches_ident() { 59 | #capture_restore 60 | return false; 61 | } 62 | }); 63 | } 64 | 65 | block.extend(quote! { true }); 66 | self.push_subexpr_matches("all", &block); 67 | Ok(inner_capture_idents) 68 | } 69 | 70 | fn push_group_any( 71 | &mut self, 72 | any: &Punctuated, 73 | ) -> syn::Result> { 74 | let vars = cache::Vars::default(); 75 | 76 | let mut block = TokenStream::new(); 77 | if any.len() > 1 { 78 | block.extend(vars.cache()); 79 | } 80 | let mut inner_capture_idents = BTreeSet::new(); 81 | 82 | let mut restoring_branch = 83 | |expr: &Expr, cache_other_vars_restore: &TokenStream| -> syn::Result<()> { 84 | inner_capture_idents.append(&mut self.push_tree(expr.try_into()?)?); 85 | let branch_subexpr_matches = self.unwrap_prev_subexpr_matches_ident(); 86 | block.extend(quote! { 87 | if self.#branch_subexpr_matches() { 88 | return true; 89 | } 90 | #cache_other_vars_restore 91 | }); 92 | Ok(()) 93 | }; 94 | 95 | if any.len() > 2 { 96 | let vars_restore = vars.restore_clone(); 97 | for expr in any.iter().take(any.len() - 2) { 98 | restoring_branch(expr, &vars_restore)?; 99 | } 100 | } 101 | if any.len() > 1 { 102 | restoring_branch(&any[any.len() - 2], &vars.restore())?; 103 | } 104 | if !any.is_empty() { 105 | inner_capture_idents.append(&mut self.push_tree(any.last().unwrap().try_into()?)?); 106 | let branch_subexpr_matches = self.unwrap_prev_subexpr_matches_ident(); 107 | block.extend(quote! { 108 | if self.#branch_subexpr_matches() { 109 | return true; 110 | } 111 | }); 112 | } 113 | 114 | block.extend(quote! { false }); 115 | self.push_subexpr_matches("any", &block); 116 | Ok(inner_capture_idents) 117 | } 118 | 119 | fn push_group(&mut self, group: &Group) -> syn::Result> { 120 | match group { 121 | Group::All(exactly_one) | Group::Any(exactly_one) if exactly_one.len() == 1 => { 122 | self.push_tree((&exactly_one[0]).try_into()?) 123 | } 124 | Group::All(all) => self.push_group_all(all), 125 | Group::Any(any) => self.push_group_any(any), 126 | } 127 | } 128 | 129 | fn push_repeated_group( 130 | &mut self, 131 | group: &Group, 132 | repeat: &Repeat, 133 | ) -> syn::Result> { 134 | let inner_capture_idents = self.push_group(group)?; 135 | self.push_subexpr_matches( 136 | "repeat", 137 | &repeat::expand_block( 138 | repeat, 139 | self.unwrap_prev_subexpr_matches_ident(), 140 | &inner_capture_idents, 141 | ), 142 | ); 143 | Ok(inner_capture_idents) 144 | } 145 | 146 | fn push_captured_group( 147 | &mut self, 148 | group: &Group, 149 | capture_idents: Punctuated, 150 | ) -> syn::Result> { 151 | let mut inner_capture_idents = self.push_group(group)?; 152 | if capture_idents.is_empty() { 153 | return Ok(inner_capture_idents); 154 | } 155 | 156 | let inner_matches_ident = self.unwrap_prev_subexpr_matches_ident(); 157 | let capture_idents_xcpt_last_iter = capture_idents.iter().take(capture_idents.len() - 1); 158 | let last_capture_ident = capture_idents.last().unwrap(); 159 | 160 | self.push_subexpr_matches("capture", "e! { 161 | let start = ::core::clone::Clone::clone(&self.__end); 162 | if !self.#inner_matches_ident() { 163 | return false; 164 | } 165 | #( 166 | self.__capture.#capture_idents_xcpt_last_iter = 167 | Some(::core::clone::Clone::clone(&start)..::core::clone::Clone::clone(&self.__end)); 168 | )* 169 | self.__capture.#last_capture_ident = 170 | Some(start..::core::clone::Clone::clone(&self.__end)); 171 | true 172 | }); 173 | 174 | inner_capture_idents.extend(capture_idents); 175 | Ok(inner_capture_idents) 176 | } 177 | 178 | fn push_positioned_group( 179 | &mut self, 180 | group: &Group, 181 | position: &Position, 182 | ) -> syn::Result> { 183 | let inner_capture_idents = self.push_group(group)?; 184 | if matches!(position, Position::First | Position::FirstAndLast) { 185 | let inner_subexpr_matches = self.unwrap_prev_subexpr_matches_ident(); 186 | self.push_subexpr_matches( 187 | "first", 188 | "e! { 189 | self.__is_first && self.#inner_subexpr_matches() 190 | }, 191 | ); 192 | } 193 | if matches!(position, Position::Last | Position::FirstAndLast) { 194 | let inner_subexpr_matches = self.unwrap_prev_subexpr_matches_ident(); 195 | self.push_subexpr_matches( 196 | "last", 197 | "e! { 198 | if !self.#inner_subexpr_matches() { 199 | return false 200 | } 201 | let iter = ::core::clone::Clone::clone(&self.__iter); 202 | let is_last = self.__iter.next().is_none(); 203 | self.__iter = iter; 204 | is_last 205 | }, 206 | ); 207 | } 208 | Ok(inner_capture_idents) 209 | } 210 | 211 | fn push_test(&mut self, test: &Expr) { 212 | self.push_subexpr_matches( 213 | "test", 214 | "e! { 215 | let next = if let ::core::option::Option::Some(next) = 216 | ::core::iter::Iterator::next(&mut self.__iter) 217 | { 218 | next 219 | } else { 220 | return false; 221 | }; 222 | if (#test)(next.1) { 223 | self.__is_first = false; 224 | self.__end = next.0; 225 | true 226 | } else { 227 | false 228 | } 229 | }, 230 | ); 231 | } 232 | 233 | pub(super) fn push_tree(&mut self, tree: Tree) -> syn::Result> { 234 | match tree { 235 | Tree::Group(group, maybe_attr) => match maybe_attr { 236 | Some(attr) => match attr { 237 | HitoriAttribute::Repeat(repeat) => self.push_repeated_group(&group, &repeat), 238 | HitoriAttribute::Capture(capture_idents) => { 239 | self.push_captured_group(&group, capture_idents) 240 | } 241 | HitoriAttribute::Position(position) => { 242 | self.push_positioned_group(&group, &position) 243 | } 244 | }, 245 | None => self.push_group(&group), 246 | }, 247 | Tree::Test(test) => { 248 | self.push_test(test); 249 | Ok(BTreeSet::new()) 250 | } 251 | } 252 | } 253 | } 254 | -------------------------------------------------------------------------------- /hitori-macros/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod expand; 2 | mod parse; 3 | mod utils; 4 | 5 | use proc_macro::TokenStream; 6 | use syn::Error; 7 | 8 | fn parse_expand(is_mut: bool, attr: TokenStream, item: TokenStream) -> TokenStream { 9 | let output = parse::parse(is_mut, attr.into(), item.into()) 10 | .and_then(expand::expand) 11 | .unwrap_or_else(Error::into_compile_error); 12 | #[cfg(feature = "debug")] 13 | utils::debug(output.clone()).unwrap(); 14 | output.into() 15 | } 16 | 17 | #[proc_macro_attribute] 18 | pub fn impl_expr(attr: TokenStream, item: TokenStream) -> TokenStream { 19 | parse_expand(false, attr, item) 20 | } 21 | 22 | #[proc_macro_attribute] 23 | pub fn impl_expr_mut(attr: TokenStream, item: TokenStream) -> TokenStream { 24 | parse_expand(true, attr, item) 25 | } 26 | -------------------------------------------------------------------------------- /hitori-macros/src/parse.rs: -------------------------------------------------------------------------------- 1 | pub mod position; 2 | pub mod repeat; 3 | 4 | mod args; 5 | 6 | use crate::utils::{ 7 | eq_by_fmt, generic_arg_try_into_type, has_type_any_generic_params, ident_not_in_generic_params, 8 | type_as_type_path, 9 | }; 10 | use args::Args; 11 | use proc_macro2::{Ident, TokenStream}; 12 | use quote::{format_ident, ToTokens as _}; 13 | use syn::{ 14 | parse2, 15 | punctuated::{self, Punctuated}, 16 | Expr, GenericParam, ImplItem, ImplItemConst, ItemImpl, Path, PathArguments, PathSegment, Token, 17 | Type, TypePath, Visibility, WhereClause, 18 | }; 19 | 20 | fn trait_ident_and_args(mut path: Path) -> syn::Result<(Ident, [Type; 2])> { 21 | Err( 22 | if path.segments.len() != 1 || path.leading_colon.is_some() { 23 | syn::Error::new_spanned(path, "expected identifier") 24 | } else if let Some(punctuated::Pair::End(PathSegment { ident, arguments })) = 25 | path.segments.pop() 26 | { 27 | match arguments { 28 | PathArguments::AngleBracketed(args) => { 29 | if args.args.len() == 2 { 30 | let mut args = args.args.into_iter(); 31 | let idx_arg = generic_arg_try_into_type(args.next().unwrap())?; 32 | let ch_arg = generic_arg_try_into_type(args.next().unwrap())?; 33 | return Ok((ident, [idx_arg, ch_arg])); 34 | } 35 | syn::Error::new_spanned(args, "expected 2 arguments") 36 | } 37 | PathArguments::Parenthesized(args) => { 38 | syn::Error::new_spanned(args, "expected angle brackets around arguments") 39 | } 40 | PathArguments::None => syn::Error::new_spanned(ident, "expected 2 arguments"), 41 | } 42 | } else { 43 | unreachable!() 44 | }, 45 | ) 46 | } 47 | 48 | fn const_expr(items: Vec) -> syn::Result { 49 | fn error(result: syn::Result) -> syn::Error { 50 | match result { 51 | Ok(const_) => syn::Error::new_spanned(const_, "multiple const items"), 52 | Err(err) => err, 53 | } 54 | } 55 | 56 | fn combine_errors( 57 | mut init: syn::Error, 58 | iter: impl Iterator>, 59 | ) -> syn::Error { 60 | for result in iter { 61 | init.combine(error(result)); 62 | } 63 | init 64 | } 65 | 66 | let mut const_iter = items.into_iter().map(|item| { 67 | Err(syn::Error::new_spanned( 68 | match item { 69 | ImplItem::Const(const_) => { 70 | return Err(if const_.ident != "PATTERN" { 71 | syn::Error::new_spanned(const_.ident, "not `PATTERN`") 72 | } else if !eq_by_fmt(&const_.ty, ::default()) { 73 | syn::Error::new_spanned(const_.ty, "not an underscore") 74 | } else { 75 | return Ok(const_); 76 | }); 77 | } 78 | item => item.into_token_stream(), 79 | }, 80 | "not a const item", 81 | )) 82 | }); 83 | 84 | Err(match const_iter.next() { 85 | Some(Ok(ImplItemConst { expr, .. })) => match const_iter.next() { 86 | Some(next) => combine_errors(error(next), const_iter), 87 | None => return Ok(expr), 88 | }, 89 | Some(Err(err)) => combine_errors(err, const_iter), 90 | None => syn::Error::new_spanned(TokenStream::new(), "empty impl"), 91 | }) 92 | } 93 | 94 | pub struct Output { 95 | pub is_mut: bool, 96 | pub capture_vis: Visibility, 97 | pub capture_ident: Ident, 98 | pub capture_idx_ident: Ident, 99 | pub self_ty: Box, 100 | pub trait_ident: Ident, 101 | pub iter_ident: Ident, 102 | pub idx_ty: Type, 103 | pub is_idx_generic: bool, 104 | pub ch_ty: Type, 105 | pub expr: Expr, 106 | pub wrapper_ident: Ident, 107 | pub generic_params: Punctuated, 108 | pub where_clause: Option, 109 | } 110 | 111 | impl Output { 112 | fn new(is_mut: bool, args: Args, item: ItemImpl) -> syn::Result { 113 | let iter_ident = ident_not_in_generic_params(&item.generics.params, "I".into()); 114 | let wrapper_ident = ident_not_in_generic_params(&item.generics.params, "Self_".into()); 115 | 116 | let (trait_ident, [idx_ty, ch_ty]) = trait_ident_and_args( 117 | item.trait_ 118 | .ok_or_else(|| syn::Error::new_spanned(&item.self_ty, "not a trait impl"))? 119 | .1, 120 | )?; 121 | 122 | if is_mut { 123 | if trait_ident != "ExprMut" { 124 | return Err(syn::Error::new_spanned(trait_ident, "not `ExprMut`")); 125 | } 126 | } else if trait_ident != "Expr" { 127 | return Err(syn::Error::new_spanned(trait_ident, "not `Expr`")); 128 | } 129 | 130 | let is_idx_generic = has_type_any_generic_params(&item.generics.params, &idx_ty); 131 | 132 | let vis = args 133 | .capture_vis 134 | .unwrap_or_else(|| Visibility::Public(::default())); 135 | 136 | let capture_ident = if let Some(ident) = args.capture_ident { 137 | ident 138 | } else { 139 | match type_as_type_path(&item.self_ty) { 140 | Some(TypePath { 141 | path: Path { segments, .. }, 142 | .. 143 | }) if !segments.is_empty() => { 144 | let self_ident = &segments.last().unwrap().ident; 145 | format_ident!("{self_ident}Capture") 146 | } 147 | _ => format_ident!("Capture"), 148 | } 149 | }; 150 | 151 | let capture_idx_ident = if is_idx_generic 152 | && type_as_type_path(&idx_ty) 153 | .and_then(|type_path| type_path.path.get_ident()) 154 | .map(|idx_ident| idx_ident == "Idx") 155 | .unwrap_or_default() 156 | { 157 | format_ident!("Idx_") 158 | } else { 159 | format_ident!("Idx") 160 | }; 161 | 162 | const_expr(item.items).map(|expr| Output { 163 | is_mut, 164 | capture_vis: vis, 165 | capture_ident, 166 | capture_idx_ident, 167 | self_ty: item.self_ty, 168 | trait_ident, 169 | iter_ident, 170 | idx_ty, 171 | is_idx_generic, 172 | ch_ty, 173 | expr, 174 | wrapper_ident, 175 | generic_params: item.generics.params, 176 | where_clause: item.generics.where_clause, 177 | }) 178 | } 179 | } 180 | 181 | pub fn parse(is_mut: bool, attr: TokenStream, item: TokenStream) -> syn::Result { 182 | Output::new(is_mut, parse2(attr)?, parse2(item)?) 183 | } 184 | -------------------------------------------------------------------------------- /hitori-macros/src/parse/args.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::path_eq_ident_str; 2 | use proc_macro2::Ident; 3 | use syn::{ 4 | parse::{Parse, ParseStream}, 5 | punctuated::Punctuated, 6 | Expr, ExprLit, Lit, Meta, MetaNameValue, Token, Visibility, 7 | }; 8 | 9 | pub struct Args { 10 | pub capture_vis: Option, 11 | pub capture_ident: Option, 12 | } 13 | 14 | impl TryFrom> for Args { 15 | type Error = syn::Error; 16 | 17 | fn try_from(args: Punctuated) -> syn::Result { 18 | let mut capture_ident = None; 19 | let mut capture_vis = None; 20 | 21 | for arg in &args { 22 | match arg { 23 | Meta::NameValue(MetaNameValue { 24 | path, 25 | value: 26 | Expr::Lit(ExprLit { 27 | lit: Lit::Str(s), .. 28 | }), 29 | .. 30 | }) => { 31 | if path_eq_ident_str(path, "with_capture") { 32 | if capture_ident.is_none() { 33 | capture_ident = Some(s.parse()?); 34 | } else { 35 | return Err(syn::Error::new_spanned(path, "duplicate `with_capture`")); 36 | } 37 | } else if path_eq_ident_str(path, "with_capture_vis") { 38 | if capture_vis.is_none() { 39 | capture_vis = Some(s.parse()?); 40 | } else { 41 | return Err(syn::Error::new_spanned( 42 | path, 43 | "duplicate `with_capture_vis`", 44 | )); 45 | } 46 | } 47 | } 48 | _ => { 49 | return Err(syn::Error::new_spanned( 50 | arg, 51 | "expected `with_capture` or `with_capture_vis` and literal string value", 52 | )) 53 | } 54 | } 55 | } 56 | 57 | Ok(Self { 58 | capture_vis, 59 | capture_ident, 60 | }) 61 | } 62 | } 63 | 64 | impl Parse for Args { 65 | fn parse(input: ParseStream) -> syn::Result { 66 | input 67 | .parse_terminated(Meta::parse, Token![,]) 68 | .and_then(TryInto::try_into) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /hitori-macros/src/parse/position.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Ident; 2 | use syn::{parse::Parse, punctuated::Punctuated, Token}; 3 | 4 | pub enum Position { 5 | First, 6 | Last, 7 | FirstAndLast, 8 | } 9 | 10 | impl Parse for Position { 11 | fn parse(input: syn::parse::ParseStream) -> syn::Result { 12 | let idents = Punctuated::::parse_terminated(input)?; 13 | let mut first = false; 14 | let mut last = false; 15 | 16 | for ident in &idents { 17 | if ident == "first" { 18 | if first { 19 | return Err(syn::Error::new_spanned(ident, "duplicate")); 20 | } 21 | first = true; 22 | } else if ident == "last" { 23 | if last { 24 | return Err(syn::Error::new_spanned(ident, "duplicate")); 25 | } 26 | last = true; 27 | } 28 | } 29 | 30 | match (first, last) { 31 | (true, true) => Ok(Self::FirstAndLast), 32 | (true, false) => Ok(Self::First), 33 | (false, true) => Ok(Self::Last), 34 | (false, false) => Err(syn::Error::new_spanned( 35 | idents, 36 | "expected `first`, or `last`, or both", 37 | )), 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /hitori-macros/src/parse/repeat.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::{expr_add_one_usize, expr_try_from_lit_int_or_lit_str_expr, path_eq_ident_str}; 2 | use proc_macro2::Literal; 3 | use std::ops::Bound; 4 | use syn::{parse::Parse, punctuated::Punctuated, Expr, ExprLit, Lit, MetaNameValue, Token}; 5 | 6 | enum Internal { 7 | Exact(Expr), 8 | In { lo: Bound, hi: Bound }, 9 | } 10 | 11 | impl Internal { 12 | fn set_parse_exact( 13 | repeat: &mut Option, 14 | name_value: MetaNameValue, 15 | ) -> syn::Result<()> { 16 | if repeat.is_none() { 17 | *repeat = Some(Internal::Exact(expr_try_from_lit_int_or_lit_str_expr( 18 | name_value.value, 19 | )?)); 20 | Ok(()) 21 | } else { 22 | Err(syn::Error::new_spanned( 23 | &name_value.path, 24 | "must be the only bound", 25 | )) 26 | } 27 | } 28 | 29 | fn set_parse_in_lo( 30 | repeat: &mut Option, 31 | name_value: MetaNameValue, 32 | bound: fn(Expr) -> Bound, 33 | err_msg: &str, 34 | ) -> syn::Result<()> { 35 | if repeat.is_none() { 36 | *repeat = Some(Internal::In { 37 | lo: bound(expr_try_from_lit_int_or_lit_str_expr(name_value.value)?), 38 | hi: Bound::Unbounded, 39 | }); 40 | } else if let Some(Internal::In { 41 | lo: lo @ Bound::Unbounded, 42 | hi: _, 43 | }) = repeat 44 | { 45 | *lo = bound(expr_try_from_lit_int_or_lit_str_expr(name_value.value)?); 46 | } else { 47 | return Err(syn::Error::new_spanned(&name_value.path, err_msg)); 48 | } 49 | Ok(()) 50 | } 51 | 52 | fn set_parse_in_hi( 53 | repeat: &mut Option, 54 | name_value: MetaNameValue, 55 | bound: fn(Expr) -> Bound, 56 | err_msg: &str, 57 | ) -> syn::Result<()> { 58 | if repeat.is_none() { 59 | *repeat = Some(Internal::In { 60 | lo: Bound::Unbounded, 61 | hi: bound(expr_try_from_lit_int_or_lit_str_expr(name_value.value)?), 62 | }); 63 | } else if let Some(Internal::In { 64 | lo: _, 65 | hi: hi @ Bound::Unbounded, 66 | }) = repeat 67 | { 68 | *hi = bound(expr_try_from_lit_int_or_lit_str_expr(name_value.value)?); 69 | } else { 70 | return Err(syn::Error::new_spanned(&name_value.path, err_msg)); 71 | } 72 | Ok(()) 73 | } 74 | } 75 | 76 | impl Parse for Internal { 77 | fn parse(input: syn::parse::ParseStream) -> syn::Result { 78 | let meta = Punctuated::::parse_terminated(input)?; 79 | let mut output = None; 80 | 81 | for name_value in meta { 82 | if path_eq_ident_str(&name_value.path, "eq") { 83 | Self::set_parse_exact(&mut output, name_value)?; 84 | } else if path_eq_ident_str(&name_value.path, "lt") { 85 | Self::set_parse_in_hi( 86 | &mut output, 87 | name_value, 88 | Bound::Excluded, 89 | "cannot be combined with itself or `le`", 90 | )?; 91 | } else if path_eq_ident_str(&name_value.path, "le") { 92 | Self::set_parse_in_hi( 93 | &mut output, 94 | name_value, 95 | Bound::Included, 96 | "cannot be combined with itself or `lt`", 97 | )?; 98 | } else if path_eq_ident_str(&name_value.path, "gt") { 99 | Self::set_parse_in_lo( 100 | &mut output, 101 | name_value, 102 | Bound::Excluded, 103 | "cannot be combined with itself or `ge`", 104 | )?; 105 | } else if path_eq_ident_str(&name_value.path, "ge") { 106 | Self::set_parse_in_lo( 107 | &mut output, 108 | name_value, 109 | Bound::Included, 110 | "cannot be combined with itself or `gt`", 111 | )?; 112 | } 113 | } 114 | 115 | Ok(output.unwrap_or_else(|| Internal::In { 116 | lo: Bound::Unbounded, 117 | hi: Bound::Unbounded, 118 | })) 119 | } 120 | } 121 | 122 | pub enum Repeat { 123 | Exact(Expr), 124 | InInclusive { 125 | lo_included: Expr, 126 | hi_excluded: Option, 127 | }, 128 | } 129 | 130 | impl From for Repeat { 131 | fn from(repeat: Internal) -> Self { 132 | match repeat { 133 | Internal::Exact(exact) => Self::Exact(exact), 134 | Internal::In { lo, hi } => Self::InInclusive { 135 | lo_included: match lo { 136 | Bound::Included(lo) => lo, 137 | Bound::Excluded(lo) => expr_add_one_usize(lo), 138 | Bound::Unbounded => Expr::Lit(ExprLit { 139 | attrs: vec![], 140 | lit: Lit::Int(Literal::usize_unsuffixed(0).into()), 141 | }), 142 | }, 143 | hi_excluded: match hi { 144 | Bound::Included(hi) => Some(expr_add_one_usize(hi)), 145 | Bound::Excluded(hi) => Some(hi), 146 | Bound::Unbounded => None, 147 | }, 148 | }, 149 | } 150 | } 151 | } 152 | 153 | impl Parse for Repeat { 154 | fn parse(input: syn::parse::ParseStream) -> syn::Result { 155 | Internal::parse(input).map(Into::into) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /hitori-macros/src/utils.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::{Ident, Literal, TokenStream}; 2 | use quote::{format_ident, quote, ToTokens}; 3 | use std::{convert, fmt::Write as _, mem}; 4 | use syn::{ 5 | punctuated::Punctuated, AssocType, Attribute, BinOp, Expr, ExprBinary, ExprLit, 6 | GenericArgument, GenericParam, LifetimeParam, Lit, ParenthesizedGenericArguments, Path, 7 | PathArguments, ReturnType, Token, Type, TypeImplTrait, TypeParam, TypeParamBound, TypeParen, 8 | TypePath, TypePtr, TypeReference, TypeTraitObject, 9 | }; 10 | 11 | pub fn hitori_ident() -> Ident { 12 | #[cfg(feature = "find-hitori")] 13 | match proc_macro_crate::crate_name("hitori").expect("expected `hitori` package in `Cargo.toml`") 14 | { 15 | proc_macro_crate::FoundCrate::Itself => format_ident!("hitori"), 16 | proc_macro_crate::FoundCrate::Name(name) => format_ident!("{name}"), 17 | } 18 | #[cfg(not(feature = "find-hitori"))] 19 | format_ident!("hitori") 20 | } 21 | 22 | pub fn hitori_attr_ident_eq_str(attr: &Attribute, s: &str) -> bool { 23 | let segments = &attr.path().segments; 24 | assert!(segments.len() == 2, "bug"); 25 | assert_eq!(segments[0].ident, "hitori", "bug"); 26 | segments[1].ident == s 27 | } 28 | 29 | fn is_hitori_attr_path(attr_path: &Path) -> bool { 30 | attr_path.leading_colon.is_none() 31 | && !attr_path.segments.empty_or_trailing() 32 | && attr_path.segments[0].arguments.is_empty() 33 | && attr_path.segments[0].ident == "hitori" 34 | } 35 | 36 | fn find_hitori_attr_index(attrs: &[Attribute]) -> Option { 37 | attrs 38 | .iter() 39 | .position(|attr| is_hitori_attr_path(attr.path()) && attr.path().segments.len() == 2) 40 | } 41 | 42 | struct FindHitoriAttrsIndices<'a>(&'a [Attribute]); 43 | 44 | impl<'a> Iterator for FindHitoriAttrsIndices<'a> { 45 | type Item = usize; 46 | 47 | fn next(&mut self) -> Option { 48 | if self.0.is_empty() { 49 | return None; 50 | } 51 | if let Some(index) = find_hitori_attr_index(self.0) { 52 | self.0 = &self.0[(index + 1)..]; 53 | Some(index) 54 | } else { 55 | self.0 = &[]; 56 | None 57 | } 58 | } 59 | } 60 | 61 | pub fn find_le_one_hitori_attr(attrs: &[Attribute]) -> Result, [&Attribute; 2]> { 62 | let mut indices = FindHitoriAttrsIndices(attrs); 63 | if let Some(mut first_index) = indices.next() { 64 | if let Some(mut second_index) = indices.next() { 65 | for next_index in indices { 66 | first_index = mem::replace(&mut second_index, next_index); 67 | } 68 | Err([&attrs[first_index], &attrs[second_index]]) 69 | } else { 70 | Ok(Some(&attrs[first_index])) 71 | } 72 | } else { 73 | Ok(None) 74 | } 75 | } 76 | 77 | pub fn eq_by_fmt(lhs: Lhs, rhs: Rhs) -> bool { 78 | let mut buf = String::with_capacity(128); 79 | write!(buf, "{}", lhs.into_token_stream()).unwrap(); 80 | let lhs_end = buf.len(); 81 | write!(buf, "{}", rhs.into_token_stream()).unwrap(); 82 | buf[..lhs_end] == buf[lhs_end..] 83 | } 84 | 85 | pub fn path_eq_ident_str(path: &Path, ident_str: &str) -> bool { 86 | path.get_ident() 87 | .map(|ident| ident == ident_str) 88 | .unwrap_or_default() 89 | } 90 | 91 | pub fn lifetimes_into_punctuated_unit_refs<'a>( 92 | iter: impl IntoIterator, 93 | ) -> TokenStream { 94 | let mut output = TokenStream::new(); 95 | for LifetimeParam { lifetime, .. } in iter { 96 | output.extend(quote! { & #lifetime (), }); 97 | } 98 | output 99 | } 100 | 101 | pub fn generic_arg_try_into_type(arg: GenericArgument) -> syn::Result { 102 | match &arg { 103 | GenericArgument::Type(_) => match arg { 104 | GenericArgument::Type(ty) => Ok(ty), 105 | _ => unreachable!(), 106 | }, 107 | _ => Err(syn::Error::new_spanned(arg, "expected type")), 108 | } 109 | } 110 | 111 | pub fn ident_not_in_generic_params( 112 | params: &Punctuated, 113 | init: String, 114 | ) -> Ident { 115 | unique_ident( 116 | ¶ms.iter().filter_map(|param| match param { 117 | GenericParam::Type(TypeParam { ident, .. }) => Some(ident), 118 | _ => None, 119 | }), 120 | init, 121 | ) 122 | } 123 | 124 | pub fn unique_ident<'a>( 125 | idents: &(impl Iterator + Clone), 126 | mut init: String, 127 | ) -> Ident { 128 | while idents.clone().any(|ident| ident == &init) { 129 | init.push('_'); 130 | } 131 | 132 | format_ident!("{init}") 133 | } 134 | 135 | pub fn type_as_type_path(ty: &Type) -> Option<&TypePath> { 136 | macro_rules! next { 137 | ($ty:expr) => { 138 | match $ty { 139 | Type::Paren(TypeParen { elem, .. }) 140 | | Type::Reference(TypeReference { elem, .. }) 141 | | Type::Ptr(TypePtr { elem, .. }) => elem, 142 | Type::Path(path) => return Some(path), 143 | _ => return None, 144 | } 145 | }; 146 | } 147 | let mut ty = next!(ty); 148 | loop { 149 | ty = next!(ty.as_ref()); 150 | } 151 | } 152 | 153 | pub fn expr_add_one_usize(expr: Expr) -> Expr { 154 | Expr::Binary(ExprBinary { 155 | attrs: vec![], 156 | left: Box::new(expr), 157 | op: BinOp::Add(::default()), 158 | right: Box::new(Expr::Lit(ExprLit { 159 | attrs: vec![], 160 | lit: Lit::Int(Literal::usize_suffixed(1).into()), 161 | })), 162 | }) 163 | } 164 | 165 | pub fn expr_try_from_lit_int_or_lit_str_expr(expr: Expr) -> syn::Result { 166 | if let Expr::Lit(lit) = &expr { 167 | match &lit.lit { 168 | Lit::Int(_) => return Ok(expr), 169 | Lit::Str(s) => return s.parse(), 170 | _ => (), 171 | } 172 | } 173 | Err(syn::Error::new_spanned( 174 | expr, 175 | "expected either a literal `usize` or an expression \ 176 | within literal string", 177 | )) 178 | } 179 | 180 | fn is_any_generic_param_eq_ident( 181 | params: &Punctuated, 182 | ident: &Ident, 183 | ) -> bool { 184 | params 185 | .iter() 186 | .filter_map(|param| match param { 187 | GenericParam::Type(TypeParam { ident, .. }) => Some(ident), 188 | _ => None, 189 | }) 190 | .any(|param_ident| ident == param_ident) 191 | } 192 | 193 | fn is_any_generic_param_eq_path_prefix( 194 | params: &Punctuated, 195 | path: &Path, 196 | ) -> bool { 197 | if params.is_empty() { 198 | false 199 | } else { 200 | is_any_generic_param_eq_ident(params, &path.segments[0].ident) 201 | } 202 | } 203 | 204 | fn is_any_generic_param_in_path_args( 205 | params: &Punctuated, 206 | path: &Path, 207 | ) -> bool { 208 | path.segments 209 | .iter() 210 | .map(|segment| match &segment.arguments { 211 | PathArguments::AngleBracketed(args) => args 212 | .args 213 | .iter() 214 | .any(|arg| has_generic_arg_any_generic_params(params, arg)), 215 | PathArguments::Parenthesized(ParenthesizedGenericArguments { 216 | inputs, 217 | output: ReturnType::Type(_, output), 218 | .. 219 | }) => { 220 | has_type_any_generic_params(params, output) 221 | && inputs 222 | .iter() 223 | .any(|input| has_type_any_generic_params(params, input)) 224 | } 225 | _ => false, 226 | }) 227 | .any(convert::identity) 228 | } 229 | 230 | pub fn has_path_any_generic_params( 231 | params: &Punctuated, 232 | path: &Path, 233 | ) -> bool { 234 | is_any_generic_param_eq_path_prefix(params, path) 235 | || is_any_generic_param_in_path_args(params, path) 236 | } 237 | 238 | pub fn has_type_path_any_generic_params( 239 | params: &Punctuated, 240 | ty: &TypePath, 241 | ) -> bool { 242 | if has_path_any_generic_params(params, &ty.path) { 243 | true 244 | } else if let Some(qself) = &ty.qself { 245 | has_type_any_generic_params(params, &qself.ty) 246 | } else { 247 | false 248 | } 249 | } 250 | 251 | pub fn has_type_param_bound_any_generic_params( 252 | params: &Punctuated, 253 | bound: &TypeParamBound, 254 | ) -> bool { 255 | match bound { 256 | TypeParamBound::Trait(bound) => is_any_generic_param_in_path_args(params, &bound.path), 257 | _ => false, 258 | } 259 | } 260 | 261 | pub fn has_type_any_generic_params( 262 | params: &Punctuated, 263 | ty: &Type, 264 | ) -> bool { 265 | if let Some(path) = type_as_type_path(ty) { 266 | has_type_path_any_generic_params(params, path) 267 | } else if let Type::ImplTrait(TypeImplTrait { bounds, .. }) 268 | | Type::TraitObject(TypeTraitObject { bounds, .. }) = ty 269 | { 270 | bounds 271 | .iter() 272 | .any(|bound| has_type_param_bound_any_generic_params(params, bound)) 273 | } else { 274 | false 275 | } 276 | } 277 | 278 | pub fn has_generic_arg_any_generic_params( 279 | params: &Punctuated, 280 | arg: &GenericArgument, 281 | ) -> bool { 282 | match arg { 283 | GenericArgument::Type(ty) | GenericArgument::AssocType(AssocType { ty, .. }) => { 284 | has_type_any_generic_params(params, ty) 285 | } 286 | GenericArgument::Constraint(constraint) => constraint 287 | .bounds 288 | .iter() 289 | .any(|bound| has_type_param_bound_any_generic_params(params, bound)), 290 | _ => false, 291 | } 292 | } 293 | 294 | pub fn remove_generic_params_bounds(params: &mut Punctuated) { 295 | for param in params { 296 | if let GenericParam::Type(ty) = param { 297 | ty.bounds = Punctuated::new(); 298 | } else if let GenericParam::Lifetime(l) = param { 299 | l.bounds = Punctuated::new(); 300 | } 301 | } 302 | } 303 | 304 | #[cfg(feature = "debug")] 305 | pub fn debug(tokens: proc_macro2::TokenStream) -> Result<(), Box> { 306 | use rust_format::{Formatter as _, RustFmt}; 307 | use std::{env, fs, path::PathBuf}; 308 | let dir = if let Ok(out_dir) = env::var("CARGO_TARGET_DIR") { 309 | out_dir.into() 310 | } else { 311 | let dir = PathBuf::from("target/hitori"); 312 | fs::create_dir_all(&dir)?; 313 | dir 314 | }; 315 | fs::write( 316 | dir.join("macros_debug.rs"), 317 | RustFmt::default().format_tokens(tokens)?, 318 | ) 319 | .map_err(Into::into) 320 | } 321 | -------------------------------------------------------------------------------- /hitori/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hitori" 3 | categories = ["text-processing"] 4 | description = "Generic compile-time regular expressions" 5 | documentation = "https://docs.rs/hitori" 6 | keywords = ["regex"] 7 | readme = "../README.md" 8 | version.workspace = true 9 | authors.workspace = true 10 | edition.workspace = true 11 | license.workspace = true 12 | repository.workspace = true 13 | rust-version.workspace = true 14 | 15 | [features] 16 | default = ["alloc", "macros"] 17 | # string replace functions and blanket implementations 18 | # of hitori traits for boxes using alloc crate. 19 | alloc = [] 20 | # `impl_expr_mut` and `impl_expr` macros. 21 | macros = ["hitori-macros"] 22 | # finds hitori package to be used in macros even if 23 | # it has been renamed in Cargo.toml. `macros` is required. 24 | find-hitori = ["hitori-macros?/find-hitori"] 25 | 26 | [dependencies] 27 | hitori-macros = { version = "=0.2.3", path = "../hitori-macros", optional = true } 28 | 29 | [dev-dependencies] 30 | regex = "1.7.3" 31 | -------------------------------------------------------------------------------- /hitori/benches/hitori_vs_regex.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | mod hitori_examples { 6 | include!("../../hitori-examples/src/putting_everything_together/email.rs"); 7 | include!("../../hitori-examples/src/putting_everything_together/ipv4.rs"); 8 | include!("../../hitori-examples/src/putting_everything_together/uri.rs"); 9 | } 10 | 11 | use hitori::Expr; 12 | use hitori_examples::{Email as HitoriEmail, IpV4 as HitoriIpV4, Uri as HitoriUri}; 13 | use regex::Regex; 14 | use test::Bencher; 15 | 16 | const TEXT: &str = include_str!("regex-benchmark/input-text.txt"); 17 | 18 | const REGEX_EMAIL_PATTERN: &str = r"[\w\.+-]+@[\w\.-]+\.[\w\.-]+"; 19 | 20 | const REGEX_URI_PATTERN: &str = r"[\w]+://[^/\s?#][^\s?#]+(?:\?[^\s#]*)?(?:#[^\s]*)?"; 21 | 22 | const REGEX_IPV4_PATTERN: &str = 23 | r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])"; 24 | 25 | fn hitori_find_count>(expr: &E) -> usize { 26 | hitori::string::FindIter::new(expr, TEXT).count() 27 | } 28 | 29 | fn regex_find_count(re: &Regex) -> usize { 30 | re.find_iter(TEXT).count() 31 | } 32 | 33 | #[test] 34 | fn hitori_email_count_eq_regex_email_count() { 35 | assert_eq!( 36 | hitori_find_count(&HitoriEmail), 37 | regex_find_count(&Regex::new(REGEX_EMAIL_PATTERN).unwrap()) 38 | ); 39 | } 40 | 41 | #[test] 42 | fn hitori_uri_count_eq_regex_uri_count() { 43 | assert_eq!( 44 | hitori_find_count(&HitoriUri), 45 | regex_find_count(&Regex::new(REGEX_URI_PATTERN).unwrap()) 46 | ); 47 | } 48 | 49 | #[test] 50 | fn hitori_ipv4_count_eq_regex_ipv4_count() { 51 | assert_eq!( 52 | hitori_find_count(&HitoriIpV4), 53 | regex_find_count(&Regex::new(REGEX_IPV4_PATTERN).unwrap()) 54 | ); 55 | } 56 | 57 | #[bench] 58 | fn hitori_email(b: &mut Bencher) { 59 | b.iter(|| hitori_find_count(&HitoriEmail)); 60 | } 61 | 62 | #[bench] 63 | fn regex_email(b: &mut Bencher) { 64 | b.iter(|| regex_find_count(&Regex::new(REGEX_EMAIL_PATTERN).unwrap())); 65 | } 66 | 67 | #[bench] 68 | fn regex_email_precompiled(b: &mut Bencher) { 69 | let re = Regex::new(REGEX_EMAIL_PATTERN).unwrap(); 70 | b.iter(|| regex_find_count(&re)); 71 | } 72 | 73 | #[bench] 74 | fn hitori_uri(b: &mut Bencher) { 75 | b.iter(|| hitori_find_count(&HitoriUri)); 76 | } 77 | 78 | #[bench] 79 | fn regex_uri(b: &mut Bencher) { 80 | b.iter(|| regex_find_count(&Regex::new(REGEX_URI_PATTERN).unwrap())); 81 | } 82 | 83 | #[bench] 84 | fn regex_uri_precompiled(b: &mut Bencher) { 85 | let re = Regex::new(REGEX_URI_PATTERN).unwrap(); 86 | b.iter(|| regex_find_count(&re)); 87 | } 88 | 89 | #[bench] 90 | fn hitori_ipv4(b: &mut Bencher) { 91 | b.iter(|| hitori_find_count(&HitoriIpV4)); 92 | } 93 | 94 | #[bench] 95 | fn regex_ipv4(b: &mut Bencher) { 96 | b.iter(|| regex_find_count(&Regex::new(REGEX_IPV4_PATTERN).unwrap())); 97 | } 98 | 99 | #[bench] 100 | fn regex_ipv4_precompiled(b: &mut Bencher) { 101 | let re = Regex::new(REGEX_IPV4_PATTERN).unwrap(); 102 | b.iter(|| regex_find_count(&re)); 103 | } 104 | -------------------------------------------------------------------------------- /hitori/src/generic.rs: -------------------------------------------------------------------------------- 1 | use crate::traits::{ExprMut, Match}; 2 | 3 | /// Checks if an [`Iterator`] starts with [`ExprMut`]-matched characters. 4 | /// 5 | /// # Arguments 6 | /// 7 | /// - **`start`** – this should be the start of the first character in the `iter`. 8 | /// - **`is_first`** – tells `expr` whether it is a start of an input. 9 | /// This affects `#[hitori::position(first)]` attribute. 10 | /// - **`iter`** – an iterator over the characters and indices of their **ends**. 11 | /// This is unlike what [`CharIndices`] produces, as the indices there are the 12 | /// starts of the characters. [`string`] module provides [`CharEnds`] iterator 13 | /// that could be used for strings instead. 14 | /// 15 | /// [`CharIndices`]: core::str::CharIndices 16 | /// [`string`]: crate::string 17 | /// [`CharEnds`]: crate::string::CharEnds 18 | #[inline] 19 | pub fn starts_with( 20 | mut expr: E, 21 | start: Idx, 22 | is_first: bool, 23 | iter: I, 24 | ) -> Option> 25 | where 26 | E: ExprMut, 27 | I: IntoIterator, 28 | I::IntoIter: Clone, 29 | { 30 | expr.starts_with_mut(start, is_first, iter) 31 | } 32 | 33 | /// Finds the first subsequence of characters that is matched by [`ExprMut`]. 34 | /// 35 | /// *See [`starts_with`] for arguments description* 36 | pub fn find( 37 | mut expr: E, 38 | mut start: Idx, 39 | is_first: bool, 40 | iter: I, 41 | ) -> Option> 42 | where 43 | E: ExprMut, 44 | Idx: Clone, 45 | I: IntoIterator, 46 | I::IntoIter: Clone, 47 | { 48 | let mut iter = iter.into_iter(); 49 | loop { 50 | if let Some(matched) = expr.starts_with_mut(start.clone(), is_first, iter.clone()) { 51 | return Some(matched); 52 | } else if let Some((new_start, _)) = iter.next() { 53 | start = new_start; 54 | } else { 55 | return None; 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /hitori/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Hitori is generic compile-time regular expressions library. 2 | //! It works by creating series of if-statements and for-loops for each expression. 3 | //! 4 | //! *See code samples along with the traits, impls and structs they expand to in [examples].* 5 | //! 6 | //! # Limitations 7 | //! 8 | //! Pattern matching is step-by-step. It is impossible to to detach last element of a repetition. 9 | //! For example, using [regex] one can rewrite `a+` as `a*a` and it would still match any 10 | //! sequence of `a`s longer than zero. With [hitori], however, `a*` would consume 11 | //! all the `a`s, and the expression won't match. 12 | //! 13 | //! Step-by step pattern matching also leads to diminished performance when matching 14 | //! large texts with an expression that contains repetitions of characters frequent in the text. 15 | //! 16 | //! # Crate features 17 | //! 18 | //! - **`alloc`** *(enabled by default)* – string replace functions and blanket implementations 19 | //! of [hitori] traits for boxes using alloc crate. 20 | //! - **`macros`** *(enabled by default)* – [`impl_expr_mut`] and [`impl_expr`] macros. 21 | //! - **`find-hitori`** – finds hitori package to be used in macros 22 | //! even if it has been renamed in Cargo.toml. **`macros`** feature is required. 23 | //! 24 | //! [examples]: https://docs.rs/hitori-examples 25 | //! [hitori]: https://docs.rs/hitori 26 | //! [regex]: https://docs.rs/regex 27 | 28 | #![no_std] 29 | #![cfg_attr( 30 | doc, 31 | feature(doc_cfg), 32 | allow(mixed_script_confusables, confusable_idents) 33 | )] 34 | 35 | #[cfg(all(feature = "find-hitori", not(feature = "macros")))] 36 | core::compile_error!( 37 | r#""find-hitori" feature doesn't do anything unless "macros" feature is enabled"# 38 | ); 39 | 40 | #[cfg(feature = "alloc")] 41 | extern crate alloc; 42 | 43 | pub mod string; 44 | 45 | mod generic; 46 | mod traits; 47 | 48 | pub use generic::{find, starts_with}; 49 | pub use traits::{Expr, ExprMut, Match}; 50 | 51 | /// Implements [`Expr`] and [`ExprMut`] for the type. 52 | /// 53 | /// *See [examples] for code samples along with impls and structs they expand to.* 54 | /// 55 | /// # Arguments 56 | /// 57 | /// - **`with_capture`** – sets the name of [`ExprMut::Capture`] struct. 58 | /// - **`with_capture_vis`** – sets visibility of [`ExprMut::Capture`] struct. 59 | /// 60 | /// [examples]: https://docs.rs/hitori-examples 61 | /// [`ExprMut::Capture`]: ExprMut::Capture 62 | #[cfg(feature = "macros")] 63 | #[cfg_attr(doc, doc(cfg(feature = "macros")))] 64 | pub use hitori_macros::impl_expr; 65 | 66 | /// Implements [`ExprMut`] for the type. 67 | /// 68 | /// *See [examples] for code samples along with impls and structs they expand to.* 69 | /// 70 | /// *See [`impl_expr`] for arguments description.* 71 | /// 72 | /// [examples]: https://docs.rs/hitori-examples 73 | #[cfg(feature = "macros")] 74 | #[cfg_attr(doc, doc(cfg(feature = "macros")))] 75 | pub use hitori_macros::impl_expr_mut; 76 | -------------------------------------------------------------------------------- /hitori/src/string.rs: -------------------------------------------------------------------------------- 1 | //! Items specific to [`ExprMut`] 2 | 3 | use crate::{ 4 | generic, 5 | traits::{ExprMut, Match}, 6 | }; 7 | #[cfg(feature = "alloc")] 8 | use alloc::{borrow::Cow, string::String}; 9 | use core::{iter::FusedIterator, mem, str::CharIndices}; 10 | 11 | /// Like [`CharIndices`], but tuples contain exclusive [`char`] ends 12 | /// instead of [`char`] starts 13 | #[derive(Clone)] 14 | pub struct CharEnds<'a> { 15 | next: char, 16 | indices: CharIndices<'a>, 17 | len: usize, 18 | } 19 | 20 | impl<'a> CharEnds<'a> { 21 | #[must_use] 22 | pub fn new(s: &'a str) -> Self { 23 | let mut indices = s.char_indices(); 24 | let (next, len) = match indices.next() { 25 | Some((_, next)) => (next, s.len()), 26 | None => (char::default(), 0), 27 | }; 28 | Self { next, indices, len } 29 | } 30 | } 31 | 32 | impl<'a> From<&'a str> for CharEnds<'a> { 33 | #[inline] 34 | fn from(s: &'a str) -> Self { 35 | Self::new(s) 36 | } 37 | } 38 | 39 | impl<'a> Iterator for CharEnds<'a> { 40 | type Item = (usize, char); 41 | 42 | #[inline] 43 | fn next(&mut self) -> Option { 44 | if self.len == 0 { 45 | None 46 | } else if let Some((end, next)) = self.indices.next() { 47 | Some((end, mem::replace(&mut self.next, next))) 48 | } else { 49 | Some((mem::replace(&mut self.len, 0), self.next)) 50 | } 51 | } 52 | } 53 | 54 | impl<'a> FusedIterator for CharEnds<'a> {} 55 | 56 | /// Shorthand for [`CharEnds::new`] 57 | #[inline] 58 | #[must_use] 59 | pub fn char_ends(s: &str) -> CharEnds { 60 | CharEnds::new(s) 61 | } 62 | 63 | /// Checks if a [`str`] starts with [`ExprMut`]-matched characters 64 | #[inline] 65 | pub fn starts_with(expr: E, s: &str) -> Option> 66 | where 67 | E: ExprMut, 68 | { 69 | generic::starts_with(expr, 0, true, CharEnds::from(s)) 70 | } 71 | 72 | /// An iterator of successive non-overlapping [`Match`]es 73 | /// that start where previous [`Match`] ends 74 | #[derive(Clone)] 75 | pub struct Repeat<'a, E> { 76 | expr: E, 77 | start: usize, 78 | iter: CharEnds<'a>, 79 | } 80 | 81 | impl<'a, E> Repeat<'a, E> { 82 | pub fn new(expr: E, s: &'a str) -> Self { 83 | Self { 84 | expr, 85 | start: 0, 86 | iter: s.into(), 87 | } 88 | } 89 | } 90 | 91 | impl<'a, E> Iterator for Repeat<'a, E> 92 | where 93 | E: ExprMut, 94 | { 95 | type Item = Match>; 96 | 97 | fn next(&mut self) -> Option { 98 | let matched = generic::starts_with( 99 | &mut self.expr, 100 | self.start, 101 | self.start == 0, 102 | self.iter.clone(), 103 | )?; 104 | self.start = matched.range.end; 105 | self.iter = matched.iter_remainder.clone(); 106 | Some(matched) 107 | } 108 | } 109 | 110 | /// Shorthand for [`Repeat::new`] 111 | #[inline] 112 | pub fn repeat(expr: E, s: &str) -> Repeat { 113 | Repeat::new(expr, s) 114 | } 115 | 116 | /// Finds the first substring that is matched by an [`ExprMut`] 117 | #[inline] 118 | pub fn find(expr: E, s: &str) -> Option> 119 | where 120 | E: ExprMut, 121 | { 122 | generic::find(expr, 0, true, CharEnds::from(s)) 123 | } 124 | 125 | /// Iterator of successive non-overlapping [`find`]s 126 | #[derive(Clone)] 127 | pub struct FindIter<'a, E> { 128 | expr: E, 129 | start: usize, 130 | iter: CharEnds<'a>, 131 | } 132 | 133 | impl<'a, E> FindIter<'a, E> { 134 | pub fn new(expr: E, s: &'a str) -> Self { 135 | Self { 136 | expr, 137 | start: 0, 138 | iter: s.into(), 139 | } 140 | } 141 | } 142 | 143 | impl<'a, E> Iterator for FindIter<'a, E> 144 | where 145 | E: ExprMut, 146 | { 147 | type Item = Match>; 148 | 149 | fn next(&mut self) -> Option { 150 | let found = generic::find( 151 | &mut self.expr, 152 | self.start, 153 | self.start == 0, 154 | self.iter.clone(), 155 | )?; 156 | self.start = found.range.end; 157 | self.iter = found.iter_remainder.clone(); 158 | Some(found) 159 | } 160 | } 161 | 162 | /// Shorthand for [`FindIter::new`] 163 | #[inline] 164 | pub fn find_iter(expr: E, s: &str) -> FindIter { 165 | FindIter::new(expr, s) 166 | } 167 | 168 | #[cfg(feature = "alloc")] 169 | fn find_iter_replace<'a, I, C, F>(find_iter: I, s: &'a str, mut rep: F) -> Cow<'a, str> 170 | where 171 | I: IntoIterator>>, 172 | F: FnMut(&mut String, I::Item), 173 | { 174 | let mut replaced = String::new(); 175 | let mut start = 0; 176 | for found in find_iter { 177 | replaced.push_str(&s[start..mem::replace(&mut start, found.range.start)]); 178 | rep(&mut replaced, found); 179 | } 180 | if replaced.is_empty() { 181 | s.into() 182 | } else { 183 | replaced.push_str(&s[start..]); 184 | replaced.into() 185 | } 186 | } 187 | 188 | /// Replaces every matched substring using `rep` closure 189 | /// 190 | /// First argument of `rep` is the current [`String`] accumulator. 191 | /// Second is the current [`Match`]. 192 | /// 193 | /// Writing to the accumulator could be done using [`write!`]. 194 | #[cfg(feature = "alloc")] 195 | #[cfg_attr(doc, doc(cfg(feature = "alloc")))] 196 | #[inline] 197 | pub fn replace<'a, E, F>(expr: E, s: &'a str, rep: F) -> Cow<'a, str> 198 | where 199 | E: ExprMut, 200 | F: FnMut(&mut String, Match>), 201 | { 202 | find_iter_replace(FindIter::new(expr, s), s, rep) 203 | } 204 | 205 | /// Replaces first `limit` matched substrings using `rep` closure. 206 | /// 207 | /// *See [`replace`] for `rep` argument description* 208 | #[cfg(feature = "alloc")] 209 | #[cfg_attr(doc, doc(cfg(feature = "alloc")))] 210 | pub fn replacen<'a, E, F>(expr: E, limit: usize, s: &'a str, rep: F) -> Cow<'a, str> 211 | where 212 | E: ExprMut, 213 | F: FnMut(&mut String, Match>), 214 | { 215 | find_iter_replace(FindIter::new(expr, s).take(limit), s, rep) 216 | } 217 | -------------------------------------------------------------------------------- /hitori/src/traits.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Range; 2 | 3 | /// Single [`ExprMut`] match 4 | #[derive(Clone)] 5 | pub struct Match { 6 | /// Index [`Range`] of matched subsequence of characters 7 | pub range: Range, 8 | /// Captured ranges 9 | pub capture: C, 10 | /// The rest of the `iter` argument (i.e. where matched is skipped) 11 | pub iter_remainder: I, 12 | /// Was the `iter` advanced before or during the match 13 | pub is_iter_advanced: bool, 14 | } 15 | 16 | /// Expression with a mutable state 17 | pub trait ExprMut { 18 | type Capture; 19 | 20 | /// *See [`starts_with`](crate::generic::starts_with)* 21 | fn starts_with_mut( 22 | &mut self, 23 | start: Idx, 24 | is_first: bool, 25 | iter: I, 26 | ) -> Option> 27 | where 28 | I: IntoIterator, 29 | I::IntoIter: Clone; 30 | } 31 | 32 | /// Expression with an immutable state 33 | pub trait Expr: ExprMut { 34 | /// *See [`starts_with`](crate::generic::starts_with)* 35 | fn starts_with( 36 | &self, 37 | start: Idx, 38 | is_first: bool, 39 | iter: I, 40 | ) -> Option> 41 | where 42 | I: IntoIterator, 43 | I::IntoIter: Clone; 44 | } 45 | 46 | macro_rules! impl_mut_for_mut { 47 | ($ty:ty) => { 48 | impl<'a, Idx, Ch, E: ExprMut> ExprMut for $ty { 49 | type Capture = E::Capture; 50 | 51 | #[inline] 52 | fn starts_with_mut( 53 | &mut self, 54 | start: Idx, 55 | is_first: bool, 56 | iter: I, 57 | ) -> Option> 58 | where 59 | I: IntoIterator, 60 | I::IntoIter: Clone, 61 | { 62 | E::starts_with_mut(self, start, is_first, iter) 63 | } 64 | } 65 | }; 66 | } 67 | 68 | impl_mut_for_mut!(&mut E); 69 | 70 | #[cfg(feature = "alloc")] 71 | #[cfg_attr(doc, doc(cfg(feature = "alloc")))] 72 | impl_mut_for_mut!(alloc::boxed::Box); 73 | 74 | macro_rules! impl_for_const { 75 | ($ty:ty: ExprMut) => { 76 | impl_for_const!($ty: ExprMut::starts_with_mut(mut, Capture)); 77 | }; 78 | ($ty:ty: Expr) => { 79 | impl_for_const!($ty: Expr::starts_with); 80 | }; 81 | ($ty:ty: $trait:ident::$starts_with:ident$(($mut:ident, $capture:ident))?) => { 82 | impl<'a, Idx, Ch, E: Expr> $trait for $ty { 83 | $(type $capture = E::Capture;)? 84 | 85 | #[inline] 86 | fn $starts_with( 87 | &$($mut)?self, 88 | start: Idx, 89 | is_first: bool, 90 | iter: I 91 | ) -> Option> 92 | where 93 | I: IntoIterator, 94 | I::IntoIter: Clone, 95 | { 96 | E::starts_with(self, start, is_first, iter) 97 | } 98 | } 99 | }; 100 | } 101 | 102 | impl_for_const!(&E: ExprMut); 103 | 104 | impl_for_const!(&E: Expr); 105 | 106 | #[cfg(feature = "alloc")] 107 | #[cfg_attr(doc, doc(cfg(feature = "alloc")))] 108 | impl_for_const!(alloc::boxed::Box: Expr); 109 | --------------------------------------------------------------------------------