├── .github
└── FUNDING.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE
├── README.md
├── bacon.toml
├── examples
└── regexes
│ ├── .gitignore
│ ├── Cargo.toml
│ ├── README.md
│ └── src
│ └── main.rs
├── src
├── lib.rs
└── proc_macros
│ ├── Cargo.toml
│ ├── args.rs
│ ├── mod.rs
│ └── regex_code.rs
└── tests
├── regex_if.rs
├── regex_switch.rs
└── replace.rs
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [Canop]
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | ### v3.4.1 - 2024-12-27
3 | - regex_captures_iter! macro - Fix #37
4 |
5 |
6 | ### v3.3.0 - 2024-08-23
7 | - revert a change regarding dependencies & features, brought with 3.2.0, which proved to be unconvenient for configuration with no default features - Fix #36
8 |
9 |
10 | ### v3.2.0 - 2024-07-25
11 | - `regex_if!` and `bytes_regex_if!`
12 | - `regex_switch!` and `bytes_regex_switch!`
13 |
14 |
15 | ### v3.1.0 - 2023-11-09
16 | - bytes_ prefixed macros create instances of `bytes::Regex` - Fix #30
17 |
18 |
19 | ### v3.0.2 - 2023-09-12
20 | - replace macros now accept a mut closure as replacer - Fix #27
21 |
22 |
23 | ### v3.0.1 - 2023-07-28
24 | - syn dependency updated to 2.0
25 |
26 |
27 | ### v3.0.0 - 2023-07-07
28 | - the `lite` feature switches the engine to `regex-lite` instead of `regex`. The whole regex|regex-lite crate is reexported under `lazy_regex::regex`
29 | - regex crate upgraded to 1.9
30 |
31 |
32 | ### v2.5.0 - 2023-03-09
33 | - `replace!` and `replace_all!` now supports non closure replacers - Fix #19
34 |
35 |
36 | ### v2.4.1 - 2023-01-05
37 | - rustc minimal version downgraded from 1.65 to to 1.56 by popular demand
38 |
39 |
40 | ### v2.4.0 - 2023-01-04
41 | - allow building with `--no-default-features`
42 | - regex crate upgraded from 1.5 to 1.7 (minor Unicode changes)
43 | - rustc minimal version now 1.65
44 |
45 |
46 | ### v2.3.1 - 2022-11-03
47 | - better error messages on bad regexes - thanks @necauqua
48 |
49 |
50 | ### v2.3.0 - 2022-03-05
51 | - support for [bytes](https://docs.rs/regex/latest/regex/bytes/index.html) regexes with the `B` suffix notation - thanks @bnoctis - Fix #11
52 |
53 |
54 | ### v2.2.2 - 2021-10-20
55 | Reexpose features of the regex crate
56 |
57 |
58 | ### v2.2.1 - 2021-06-07
59 | Add the `regex_replace!` macro for when you only want to replace one match
60 | Reexports more types of the regex crates
61 |
62 |
63 | ### v2.2.0 - 2021-06-04
64 | Add the `regex_replace_all!` macro to do replacements with a closure taking the right number of `&str` arguments according to the number of groups in the regular expression
65 |
66 |
67 | ### v2.1.0 - 2021-06-02
68 | Add the `lazy_regex!` macro returning a `Lazy` for easy use in a `pub static` shared declaration.
69 |
70 |
71 | ### v2.0.2 - 2021-05-31
72 | Fix a cross compilation problem, thanks @AlephAlpha - Fix #5
73 |
74 |
75 | ### v2.0.1 - 2021-05-20
76 | Improved documentation
77 |
78 |
79 | ### v2.0.0 - 2021-05-17
80 | - regular expressions are now checked at compile time
81 | - regex_is_match!
82 | - regex_find!
83 | - regex_captures!
84 |
85 |
86 | ### v1.1.0 - 2021-05-08
87 | - no more complementary import needed
88 | - now based on once_cell instead of lazy_static
89 |
90 |
91 | ### v1.0.0 - 2021-05-04
92 | - first public release
93 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "lazy-regex"
3 | version = "3.4.1"
4 | authors = ["Canop "]
5 | edition = "2021"
6 | description = "lazy static regular expressions checked at compile time"
7 | keywords = ["macro", "lazy", "static", "regex"]
8 | license = "MIT"
9 | categories = ["text-processing"]
10 | repository = "https://github.com/Canop/lazy-regex"
11 | readme = "README.md"
12 | rust-version = "1.65"
13 |
14 | [dependencies]
15 | once_cell = "1.20"
16 | regex = {version = "1.11", default-features = false, optional = true}
17 | regex-lite = {version = "0.1", optional = true}
18 |
19 | [dependencies.lazy-regex-proc_macros]
20 | path = "src/proc_macros"
21 | version = "3.4.1"
22 |
23 | [features]
24 | default = ["regex/default"]
25 | std = ["regex/std"]
26 | lite = ["regex-lite"]
27 | perf = ["regex/perf"]
28 | perf-cache = ["regex/perf-cache"]
29 | perf-dfa = ["regex/perf-dfa"]
30 | perf-inline = ["regex/perf-inline"]
31 | perf-literal = ["regex/perf-literal"]
32 | unicode = ["regex/unicode"]
33 | unicode-age = ["regex/unicode-age"]
34 | unicode-bool = ["regex/unicode-bool"]
35 | unicode-case = ["regex/unicode-case"]
36 | unicode-gencat = ["regex/unicode-gencat"]
37 | unicode-perl = ["regex/unicode-perl"]
38 | unicode-script = ["regex/unicode-script"]
39 | unicode-segment = ["regex/unicode-segment"]
40 |
41 | [workspace]
42 | members = ["src/proc_macros", "examples/regexes"]
43 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Canop
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![MIT][s2]][l2] [![Latest Version][s1]][l1] [![docs][s3]][l3] [![Chat on Miaou][s4]][l4]
2 |
3 | [s1]: https://img.shields.io/crates/v/lazy-regex.svg
4 | [l1]: https://crates.io/crates/lazy-regex
5 |
6 | [s2]: https://img.shields.io/badge/license-MIT-blue.svg
7 | [l2]: LICENSE
8 |
9 | [s3]: https://docs.rs/lazy-regex/badge.svg
10 | [l3]: https://docs.rs/lazy-regex/
11 |
12 | [s4]: https://miaou.dystroy.org/static/shields/room.svg
13 | [l4]: https://miaou.dystroy.org/3
14 |
15 |
16 | # lazy-regex
17 |
18 | With lazy-regex macros, regular expressions
19 |
20 | * are checked at compile time, with clear error messages
21 | * are wrapped in `once_cell` lazy static initializers so that they're compiled only once
22 | * can hold flags as suffix: `let case_insensitive_regex = regex!("ab*"i);`
23 | * are defined in a less verbose way
24 |
25 | The `regex!` macro returns references to normal instances of `regex::Regex` or `regex::bytes::Regex` so all the usual features are available.
26 |
27 | Other macros are specialized for testing a match, replacing with concise closures, or capturing groups as substrings in some common situations:
28 |
29 | * `regex_is_match!`
30 | * `regex_find!`
31 | * `regex_captures!`
32 | * `regex_replace!`
33 | * `regex_replace_all!`
34 | * `regex_switch!`
35 |
36 | They support the `B` flag for the `regex::bytes::Regex` variant.
37 |
38 | All macros exist with a `bytes_` prefix for building `bytes::Regex`, so you also have `bytes_regex!`, `bytes_regex_is_match!`, `bytes_regex_find!`, `bytes_regex_captures!`, `bytes_regex_replace!`, `bytes_regex_replace_all!`, and `bytes_regex_switch!`.
39 |
40 | Some structs of the regex crate are reexported to ease dependency managment.
41 | The regex crate itself is also reexported, to avoid the need to synchronize the versions/flavor (see [Features](#features_and_reexport) below)
42 |
43 | # Build Regexes
44 |
45 | ```rust
46 | use lazy_regex::regex;
47 |
48 | // build a simple regex
49 | let r = regex!("sa+$");
50 | assert_eq!(r.is_match("Saa"), false);
51 |
52 | // build a regex with flag(s)
53 | let r = regex!("sa+$"i);
54 | assert_eq!(r.is_match("Saa"), true);
55 |
56 | // you can use a raw literal
57 | let r = regex!(r#"^"+$"#);
58 | assert_eq!(r.is_match("\"\""), true);
59 |
60 | // or a raw literal with flag(s)
61 | let r = regex!(r#"^\s*("[a-t]*"\s*)+$"#i);
62 | assert_eq!(r.is_match(r#" "Aristote" "Platon" "#), true);
63 |
64 | // build a regex that operates on &[u8]
65 | let r = regex!("(byte)?string$"B);
66 | assert_eq!(r.is_match(b"bytestring"), true);
67 |
68 | // there's no problem using the multiline definition syntax
69 | let r = regex!(r#"(?x)
70 | (?P\w+)
71 | -
72 | (?P[0-9.]+)
73 | "#);
74 | assert_eq!(r.find("This is lazy_regex-2.2!").unwrap().as_str(), "lazy_regex-2.2");
75 | // (look at the regex_captures! macro to easily extract the groups)
76 |
77 | ```
78 | ```compile_fail
79 | // this line doesn't compile because the regex is invalid:
80 | let r = regex!("(unclosed");
81 |
82 | ```
83 | Supported regex flags: `i`, `m`, `s`, `x`, `U`.
84 |
85 | See [regex::RegexBuilder](https://docs.rs/regex/latest/regex/struct.RegexBuilder.html).
86 |
87 | # Test a match
88 |
89 | ```rust
90 | use lazy_regex::regex_is_match;
91 |
92 | let b = regex_is_match!("[ab]+", "car");
93 | assert_eq!(b, true);
94 | ```
95 |
96 |
97 | # Extract a value
98 |
99 | ```rust
100 | use lazy_regex::regex_find;
101 |
102 | let f_word = regex_find!(r#"\bf\w+\b"#, "The fox jumps.");
103 | assert_eq!(f_word, Some("fox"));
104 | let f_word = regex_find!(r#"\bf\w+\b"#B, b"The forest is silent.");
105 | assert_eq!(f_word, Some(b"forest" as &[u8]));
106 | ```
107 |
108 | # Capture
109 |
110 | ```rust
111 | use lazy_regex::regex_captures;
112 |
113 | let (_, letter) = regex_captures!("([a-z])[0-9]+"i, "form A42").unwrap();
114 | assert_eq!(letter, "A");
115 |
116 | let (whole, name, version) = regex_captures!(
117 | r#"(\w+)-([0-9.]+)"#, // a literal regex
118 | "This is lazy_regex-2.0!", // any expression
119 | ).unwrap();
120 | assert_eq!(whole, "lazy_regex-2.0");
121 | assert_eq!(name, "lazy_regex");
122 | assert_eq!(version, "2.0");
123 | ```
124 |
125 | There's no limit to the size of the tuple.
126 | It's checked at compile time to ensure you have the right number of capturing groups.
127 |
128 | You receive `""` for optional groups with no value.
129 |
130 | # Replace with captured groups
131 |
132 | The `regex_replace!` and `regex_replace_all!` macros bring once compilation and compilation time checks to the `replace` and `replace_all` functions.
133 |
134 | ## Replace with a closure
135 |
136 | ```rust
137 | use lazy_regex::regex_replace_all;
138 |
139 | let text = "Foo8 fuu3";
140 | let text = regex_replace_all!(
141 | r#"\bf(\w+)(\d)"#i,
142 | text,
143 | |_, name, digit| format!("F<{}>{}", name, digit),
144 | );
145 | assert_eq!(text, "F8 F3");
146 | ```
147 | The number of arguments given to the closure is checked at compilation time to match the number of groups in the regular expression.
148 |
149 | If it doesn't match you get, at compilation time, a clear error message.
150 |
151 | ## Replace with another kind of Replacer
152 |
153 | ```rust
154 | use lazy_regex::regex_replace_all;
155 | let text = "UwU";
156 | let output = regex_replace_all!("U", text, "O");
157 | assert_eq!(&output, "OwO");
158 | ```
159 |
160 | # Switch over regexes
161 |
162 | Execute the expression bound to the first matching regex, with named captured groups declared as varibles:
163 |
164 | ```rust
165 | use lazy_regex::regex_switch;
166 | pub enum ScrollCommand {
167 | Top,
168 | Bottom,
169 | Lines(i32),
170 | Pages(i32),
171 | }
172 | impl std::str::FromStr for ScrollCommand {
173 | type Err = ();
174 | fn from_str(s: &str) -> Result {
175 | regex_switch!(s,
176 | "^scroll-to-top$" => Self::Top,
177 | "^scroll-to-bottom$" => Self::Bottom,
178 | r#"^scroll-lines?\((?[+-]?\d{1,4})\)$"# => Self::Lines(n.parse().unwrap()),
179 | r#"^scroll-pages?\((?[+-]?\d{1,4})\)$"# => Self::Pages(n.parse().unwrap()),
180 | ).ok_or(())
181 | }
182 | }
183 | ```
184 |
185 | # Shared lazy static
186 |
187 | When a regular expression is used in several functions, you sometimes don't want
188 | to repeat it but have a shared static instance.
189 |
190 | The `regex!` macro, while being backed by a lazy static regex, returns a reference.
191 |
192 | If you want to have a shared lazy static regex, use the `lazy_regex!` macro:
193 |
194 | ```rust
195 | use lazy_regex::*;
196 |
197 | pub static GLOBAL_REX: Lazy = lazy_regex!("^ab+$"i);
198 | ```
199 |
200 | Like for the other macros, the regex is static, checked at compile time, and lazily built at first use.
201 |
202 | # Features and reexport
203 |
204 | With default features, `lazy-regex` use the `regex` crate with its default features, tailored for performances and complete Unicode support.
205 |
206 | You may enable a different set of regex features by directly enabling them when importing `lazy-regex`.
207 |
208 | It's also possible to use the [regex-lite](https://docs.rs/regex-lite/) crate instead of the [regex](https://docs.rs/regex/) crate by declaring the ``lite`` feature:
209 |
210 | ```TOML
211 | lazy-regex = { version = "3.0", default-features = false, features = ["lite"] }
212 | ```
213 |
214 | The `lite` flavor comes with slightly lower performances and a reduced Unicode support (see crate documentation) but also a much smaller binary size.
215 |
216 | If you need to refer to the regex crate in your code, prefer to use the reexport (i.e. `use lazy_regex::regex;`) so that you don't have a version or flavor conflict. When the `lite` feature is enabled, `lazy_regex::regex` refers to `regex_lite` so you don't have to change your code when switching regex engine.
217 |
218 |
--------------------------------------------------------------------------------
/bacon.toml:
--------------------------------------------------------------------------------
1 | # This is a configuration file for the bacon tool
2 | #
3 | # Bacon repository: https://github.com/Canop/bacon
4 | # Complete help on configuration: https://dystroy.org/bacon/config/
5 | # You can also check bacon's own bacon.toml file
6 | # as an example: https://github.com/Canop/bacon/blob/main/bacon.toml
7 |
8 | default_job = "check"
9 |
10 | [jobs.check]
11 | command = ["cargo", "check", "--color", "always"]
12 | need_stdout = false
13 |
14 | [jobs.check-all]
15 | command = ["cargo", "check", "--all-targets", "--color", "always"]
16 | need_stdout = false
17 |
18 | [jobs.lite]
19 | command = ["cargo", "check", "--all-targets", "--color", "always", "--no-default-features", "--features", "lite"]
20 | need_stdout = false
21 |
22 | [jobs.clippy]
23 | command = [
24 | "cargo", "clippy",
25 | "--all-targets",
26 | "--color", "always",
27 | ]
28 | need_stdout = false
29 |
30 | [jobs.test]
31 | command = [
32 | "cargo", "test", "--color", "always",
33 | "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124
34 | ]
35 | need_stdout = true
36 |
37 | [jobs.doc]
38 | command = ["cargo", "doc", "--color", "always", "--no-deps"]
39 | need_stdout = false
40 |
41 | # If the doc compiles, then it opens in your browser and bacon switches
42 | # to the previous job
43 | [jobs.doc-open]
44 | command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"]
45 | need_stdout = false
46 | on_success = "back" # so that we don't open the browser at each change
47 |
48 | [keybindings]
49 | # alt-m = "job:my-job"
50 |
--------------------------------------------------------------------------------
/examples/regexes/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated by Cargo
2 | # will have compiled files and executables
3 | /target/
4 |
5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
7 | Cargo.lock
8 |
9 | # These are backup files generated by rustfmt
10 | **/*.rs.bk
11 |
--------------------------------------------------------------------------------
/examples/regexes/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "regexes"
3 | version = "3.0.0"
4 | authors = ["dystroy "]
5 | edition = "2018"
6 | description = "An example for lazy-regex"
7 | license = "MIT"
8 | readme = "README.md"
9 |
10 | [dependencies]
11 | lazy-regex = { path = "../.."}
12 |
--------------------------------------------------------------------------------
/examples/regexes/README.md:
--------------------------------------------------------------------------------
1 |
2 | This example displays a few compilation regexes.
3 |
4 | To demonstrate compile time checks
5 |
6 | - uncomment line 23
7 | - run `cargo run`
8 |
9 | The program should fail to check with a clear error.
10 |
--------------------------------------------------------------------------------
/examples/regexes/src/main.rs:
--------------------------------------------------------------------------------
1 | use lazy_regex::*;
2 |
3 | pub static SHARED: Lazy = lazy_regex!("^test$");
4 |
5 | fn example_builds() {
6 | // build a simple regex
7 | let r = regex!("sa+$");
8 | assert_eq!(r.is_match("Saa"), false);
9 |
10 | // build a regex with flag(s)
11 | let r = regex!("sa+b?$"i);
12 | assert_eq!(r.is_match("Saa"), true);
13 |
14 | // you can use a raw literal
15 | let r = regex!(r#"^"+$"#);
16 | assert_eq!(r.is_match("\"\""), true);
17 |
18 | // and a raw literal with flag(s)
19 | let r = regex!(r#"^\s*("[a-t]*"\s*)+$"#i);
20 | assert_eq!(r.is_match(r#" "Aristote" "Platon" "#), true);
21 |
22 | // Try to uncomment the following line to see the compilation error
23 | // let r = regex!("(unclosed");
24 |
25 | // build a bytes::Regex macro
26 | let rb = bytes_regex!("b+");
27 | assert!(rb.is_match(b"abcd"));
28 | let rb = bytes_regex!("sa+b?$"i);
29 | assert_eq!(rb.is_match(b"Saa"), true);
30 |
31 | // build a bytes::Regex macro using the suffix syntax
32 | let rb = regex!("b+"B);
33 | assert!(rb.is_match(b"abcd"));
34 |
35 | // 4 equivalent ways to build a case insensitive bytes::Regex
36 | let case_insensitive_regex = bytes_regex!("^ab+$"i);
37 | assert!(case_insensitive_regex.is_match(b"abB"));
38 | let case_insensitive_regex = bytes_regex!("(?i)^ab+$");
39 | assert!(case_insensitive_regex.is_match(b"abB"));
40 | let case_insensitive_regex = regex!("^ab+$"iB);
41 | assert!(case_insensitive_regex.is_match(b"abB"));
42 | let case_insensitive_regex = regex!("(?i)^ab+$"B);
43 | assert!(case_insensitive_regex.is_match(b"abB"));
44 | }
45 |
46 | fn example_is_match() {
47 | let b = regex_is_match!("[ab]+", "car");
48 | assert_eq!(b, true);
49 | }
50 |
51 | fn example_using_shared_static() {
52 | let b = SHARED.is_match("not test");
53 | assert_eq!(b, false);
54 | }
55 |
56 | fn example_captures() {
57 | let (whole, name, version) = regex_captures!(
58 | r#"(\w+)-([0-9.]+)"#, // a literal regex
59 | "This is lazy_regex-2.0!", // any expression
60 | )
61 | .unwrap();
62 | assert_eq!(whole, "lazy_regex-2.0");
63 | assert_eq!(name, "lazy_regex");
64 | assert_eq!(version, "2.0");
65 | }
66 |
67 | fn examples_replace_all() {
68 | let text = "Foo fuu";
69 | let text = regex_replace_all!(
70 | r#"\bf(\w+)"#i,
71 | text,
72 | |_, suffix| format!("F<{}>", suffix),
73 | );
74 | assert_eq!(text, "F F");
75 |
76 | let text = "A = 5 + 3 and B=27+4";
77 | let text = regex_replace_all!(
78 | r#"(?x)
79 | (\d+)
80 | \s*
81 | \+
82 | \s*
83 | (\d+)
84 | "#,
85 | text,
86 | |_, a: &str, b: &str| {
87 | let a: u64 = a.parse().unwrap();
88 | let b: u64 = b.parse().unwrap();
89 | (a + b).to_string()
90 | },
91 | );
92 | assert_eq!(text, "A = 8 and B=31");
93 | }
94 |
95 | fn main() {
96 | // the regular expressions will be built only once
97 | for _ in 0..10 {
98 | example_builds();
99 | }
100 |
101 | example_is_match();
102 |
103 | for _ in 0..10 {
104 | example_captures();
105 | example_using_shared_static();
106 | examples_replace_all();
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | /*!
2 |
3 | With lazy-regex macros, regular expressions
4 |
5 | * are checked at compile time, with clear error messages
6 | * are wrapped in `once_cell` lazy static initializers so that they're compiled only once
7 | * can hold flags as suffix: `let case_insensitive_regex = regex!("ab*"i);`
8 | * are defined in a less verbose way
9 |
10 | The [regex!] macro returns references to normal instances of [regex::Regex] or [regex::bytes::Regex] so all the usual features are available.
11 |
12 | But most often, you won't even use the `regex!` macro but the other macros which are specialized for testing a match, replacing, or capturing groups in some common situations:
13 |
14 | * [Test a match](#test-a-match) with [regex_is_match!]
15 | * [Extract a value](#extract-a-value) with [regex_find!]
16 | * [Capture](#capture) with [regex_captures!] and [regex_captures_iter!]
17 | * [Replace with captured groups](#replace-with-captured-groups) with [regex_replace!] and [regex_replace_all!]
18 | * [Switch over patterns](#switch-over-patterns) with [regex_switch!]
19 |
20 | They support the `B` flag for the `regex::bytes::Regex` variant.
21 |
22 | All macros exist with a `bytes_` prefix for building `bytes::Regex`, so you also have [bytes_regex!], [bytes_regex_is_match!], [bytes_regex_find!], [bytes_regex_captures!], [bytes_regex_replace!], [bytes_regex_replace_all!], and [bytes_regex_switch!].
23 |
24 | Some structs of the regex crate are reexported to ease dependency managment.
25 |
26 | # Build Regexes
27 |
28 | ```rust
29 | use lazy_regex::regex;
30 |
31 | // build a simple regex
32 | let r = regex!("sa+$");
33 | assert_eq!(r.is_match("Saa"), false);
34 |
35 | // build a regex with flag(s)
36 | let r = regex!("sa+$"i);
37 | assert_eq!(r.is_match("Saa"), true);
38 |
39 | // you can use a raw literal
40 | let r = regex!(r#"^"+$"#);
41 | assert_eq!(r.is_match("\"\""), true);
42 |
43 | // or a raw literal with flag(s)
44 | let r = regex!(r#"^\s*("[a-t]*"\s*)+$"#i);
45 | assert_eq!(r.is_match(r#" "Aristote" "Platon" "#), true);
46 |
47 | // build a regex that operates on &[u8]
48 | let r = regex!("(byte)?string$"B);
49 | assert_eq!(r.is_match(b"bytestring"), true);
50 |
51 | // there's no problem using the multiline definition syntax
52 | let r = regex!(r"(?x)
53 | (?P\w+)
54 | -
55 | (?P[0-9.]+)
56 | ");
57 | assert_eq!(r.find("This is lazy_regex-2.2!").unwrap().as_str(), "lazy_regex-2.2");
58 | // (look at the regex_captures! macro to easily extract the groups)
59 |
60 | ```
61 | ```compile_fail
62 | // this line doesn't compile because the regex is invalid:
63 | let r = regex!("(unclosed");
64 |
65 | ```
66 | Supported regex flags: [`i`, `m`, `s`, `x`, `U`][regex::RegexBuilder], and you may also use `B` to build a bytes regex.
67 |
68 | The following regexes are equivalent:
69 | * `bytes_regex!("^ab+$"i)`
70 | * `bytes_regex!("(?i)^ab+$")`
71 | * `regex!("^ab+$"iB)`
72 | * `regex!("(?i)^ab+$"B)`
73 |
74 | They're all case insensitive instances of `regex::bytes::Regex`.
75 |
76 |
77 | # Test a match
78 |
79 | ```rust
80 | use lazy_regex::*;
81 |
82 | let b = regex_is_match!("[ab]+", "car");
83 | assert_eq!(b, true);
84 | let b = bytes_regex_is_match!("[ab]+", b"car");
85 | assert_eq!(b, true);
86 | ```
87 |
88 | doc: [regex_is_match!]
89 |
90 |
91 | # Extract a value
92 |
93 | ```rust
94 | use lazy_regex::regex_find;
95 |
96 | let f_word = regex_find!(r"\bf\w+\b", "The fox jumps.");
97 | assert_eq!(f_word, Some("fox"));
98 | let f_word = regex_find!(r"\bf\w+\b"B, b"The forest is silent.");
99 | assert_eq!(f_word, Some(b"forest" as &[u8]));
100 | ```
101 |
102 | doc: [regex_find!]
103 |
104 | # Capture
105 |
106 | ```rust
107 | use lazy_regex::regex_captures;
108 |
109 | let (_, letter) = regex_captures!("([a-z])[0-9]+"i, "form A42").unwrap();
110 | assert_eq!(letter, "A");
111 |
112 | let (whole, name, version) = regex_captures!(
113 | r"(\w+)-([0-9.]+)", // a literal regex
114 | "This is lazy_regex-2.0!", // any expression
115 | ).unwrap();
116 | assert_eq!(whole, "lazy_regex-2.0");
117 | assert_eq!(name, "lazy_regex");
118 | assert_eq!(version, "2.0");
119 | ```
120 |
121 | There's no limit to the size of the tuple.
122 | It's checked at compile time to ensure you have the right number of capturing groups.
123 |
124 | You receive `""` for optional groups with no value.
125 |
126 | See [regex_captures!] and [regex_captures_iter!]
127 |
128 | # Replace with captured groups
129 |
130 | The [regex_replace!] and [regex_replace_all!] macros bring once compilation and compilation time checks to the `replace` and `replace_all` functions.
131 |
132 | ## Replace with a closure
133 |
134 | ```rust
135 | use lazy_regex::regex_replace_all;
136 |
137 | let text = "Foo8 fuu3";
138 | let text = regex_replace_all!(
139 | r"\bf(\w+)(\d)"i,
140 | text,
141 | |_, name, digit| format!("F<{}>{}", name, digit),
142 | );
143 | assert_eq!(text, "F8 F3");
144 | ```
145 | The number of arguments given to the closure is checked at compilation time to match the number of groups in the regular expression.
146 |
147 | If it doesn't match you get a clear error message at compilation time.
148 |
149 | ## Replace with another kind of Replacer
150 |
151 | ```rust
152 | use lazy_regex::regex_replace_all;
153 | let text = "UwU";
154 | let output = regex_replace_all!("U", text, "O");
155 | assert_eq!(&output, "OwO");
156 | ```
157 |
158 | # Switch over patterns
159 |
160 | Execute the expression bound to the first matching regex, with named captured groups declared as varibles:
161 |
162 | ```rust
163 | use lazy_regex::regex_switch;
164 | #[derive(Debug, PartialEq)]
165 | pub enum ScrollCommand {
166 | Top,
167 | Bottom,
168 | Lines(i32),
169 | Pages(i32),
170 | JumpTo(String),
171 | }
172 | impl std::str::FromStr for ScrollCommand {
173 | type Err = &'static str;
174 | fn from_str(s: &str) -> Result {
175 | regex_switch!(s,
176 | "^scroll-to-top$" => Self::Top,
177 | "^scroll-to-bottom$" => Self::Bottom,
178 | r"^scroll-lines?\((?[+-]?\d{1,4})\)$" => Self::Lines(n.parse().unwrap()),
179 | r"^scroll-pages?\((?[+-]?\d{1,4})\)$" => Self::Pages(n.parse().unwrap()),
180 | r"^jump-to\((?\w+)\)$" => Self::JumpTo(name.to_string()),
181 | ).ok_or("unknown command")
182 | }
183 | }
184 | assert_eq!("scroll-lines(42)".parse(), Ok(ScrollCommand::Lines(42)));
185 | assert_eq!("scroll-lines(XLII)".parse::(), Err("unknown command"));
186 | ```
187 |
188 | doc: [regex_switch!]
189 |
190 | # Shared lazy static
191 |
192 | When a regular expression is used in several functions, you sometimes don't want
193 | to repeat it but have a shared static instance.
194 |
195 | The [regex!] macro, while being backed by a lazy static regex, returns a reference.
196 |
197 | If you want to have a shared lazy static regex, use the [lazy_regex!] macro:
198 |
199 | ```rust
200 | use lazy_regex::*;
201 |
202 | pub static GLOBAL_REX: Lazy = lazy_regex!("^ab+$"i);
203 | ```
204 |
205 | Like for the other macros, the regex is static, checked at compile time, and lazily built at first use.
206 |
207 | doc: [lazy_regex!]
208 |
209 | */
210 |
211 | pub use {
212 | lazy_regex_proc_macros::{
213 | lazy_regex,
214 | regex,
215 | regex_captures,
216 | regex_captures_iter,
217 | regex_find,
218 | regex_if,
219 | regex_is_match,
220 | regex_replace,
221 | regex_replace_all,
222 | regex_switch,
223 | bytes_lazy_regex,
224 | bytes_regex,
225 | bytes_regex_captures,
226 | bytes_regex_find,
227 | bytes_regex_if,
228 | bytes_regex_is_match,
229 | bytes_regex_replace,
230 | bytes_regex_replace_all,
231 | bytes_regex_switch,
232 | },
233 | once_cell::sync::Lazy,
234 | };
235 |
236 | #[cfg(not(feature = "lite"))]
237 | pub use {
238 | regex::{
239 | self,
240 | Captures, Regex, RegexBuilder,
241 | bytes::{
242 | Regex as BytesRegex,
243 | RegexBuilder as BytesRegexBuilder
244 | },
245 | },
246 | };
247 |
248 | #[cfg(feature = "lite")]
249 | pub use {
250 | regex_lite::{
251 | self as regex,
252 | Captures, Regex, RegexBuilder,
253 | },
254 | };
255 |
256 |
257 |
--------------------------------------------------------------------------------
/src/proc_macros/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "lazy-regex-proc_macros"
3 | version = "3.4.1"
4 | authors = ["Canop "]
5 | description = "proc macros for the lazy_regex crate"
6 | license = "MIT"
7 | edition = "2018"
8 | repository = "https://github.com/Canop/lazy-regex/tree/main/src/proc_macros"
9 |
10 | [dependencies]
11 | syn = { version = "2.0", features = ["full"] }
12 | proc-macro2 = "1.0"
13 | quote = "1.0"
14 | regex = "1.11"
15 |
16 | [lib]
17 | proc-macro = true
18 | path = "mod.rs"
19 |
--------------------------------------------------------------------------------
/src/proc_macros/args.rs:
--------------------------------------------------------------------------------
1 | use syn::{
2 | parse::{
3 | Parse,
4 | ParseStream,
5 | Result,
6 | },
7 | Expr,
8 | ExprClosure,
9 | LitStr,
10 | Token,
11 | };
12 |
13 | /// Wrapping of the two arguments given to one of the
14 | /// `regex_is_match`, `regex_find`, or `regex_captures`
15 | /// macros
16 | pub(crate) struct RexValArgs {
17 | pub regex_str: LitStr,
18 | pub value: Expr, // this expression is (or produces) the text to search or check
19 | }
20 |
21 | impl Parse for RexValArgs {
22 | fn parse(input: ParseStream<'_>) -> Result {
23 | let regex_str = input.parse::()?;
24 | input.parse::()?;
25 | let value = input.parse::()?;
26 | let _ = input.parse::(); // allow a trailing comma
27 | Ok(RexValArgs { regex_str, value })
28 | }
29 | }
30 |
31 | /// Wrapping of the three arguments given to the
32 | /// ``regex_replace` and regex_replace_all` macros
33 | pub(crate) struct ReplaceArgs {
34 | pub regex_str: LitStr,
35 | pub value: Expr,
36 | pub replacer: MaybeFun,
37 | }
38 |
39 | pub(crate) enum MaybeFun {
40 | Fun(ExprClosure),
41 | Expr(Expr),
42 | }
43 |
44 | impl Parse for ReplaceArgs {
45 | fn parse(input: ParseStream<'_>) -> Result {
46 | let regex_str = input.parse::()?;
47 | input.parse::()?;
48 | let value = input.parse::()?;
49 | input.parse::()?;
50 | // we try as a closure before, and as a general expr if
51 | // it doesn't work out
52 | let replacer = if let Ok(fun) = input.parse::() {
53 | MaybeFun::Fun(fun)
54 | } else {
55 | MaybeFun::Expr(input.parse::()?)
56 | };
57 | let _ = input.parse::(); // allow a trailing comma
58 | Ok(ReplaceArgs {
59 | regex_str,
60 | value,
61 | replacer,
62 | })
63 | }
64 | }
65 |
66 | /// Wrapping of the arguments given to a regex_if macro
67 | pub(crate) struct RexIfArgs {
68 | pub regex_str: LitStr,
69 | pub value: Expr, // this expression is (or produces) the text to search or check
70 | pub then: Expr,
71 | }
72 |
73 | impl Parse for RexIfArgs {
74 | fn parse(input: ParseStream<'_>) -> Result {
75 | let regex_str = input.parse::()?;
76 | input.parse::()?;
77 | let value = input.parse::()?;
78 | input.parse::()?;
79 | let then = input.parse::()?;
80 | let _ = input.parse::(); // allow a trailing comma
81 | Ok(Self {
82 | regex_str,
83 | value,
84 | then,
85 | })
86 | }
87 | }
88 |
89 | /// Wrapping of the arguments given to a regex_switch macro
90 | pub(crate) struct RexSwitchArgs {
91 | pub value: Expr, // this expression is (or produces) the text to search or check
92 | pub arms: Vec,
93 | }
94 | pub(crate) struct RexSwitchArmArgs {
95 | pub regex_str: LitStr,
96 | pub then: Expr,
97 | }
98 |
99 | impl Parse for RexSwitchArgs {
100 | fn parse(input: ParseStream<'_>) -> Result {
101 | let value = input.parse::()?;
102 | input.parse::()?;
103 | let mut arms = Vec::new();
104 | loop {
105 | let lookahead = input.lookahead1();
106 | if lookahead.peek(LitStr) {
107 | let arm = input.parse::()?;
108 | arms.push(arm);
109 | } else {
110 | break;
111 | }
112 | }
113 | Ok(Self {
114 | value,
115 | arms,
116 | })
117 | }
118 | }
119 | impl Parse for RexSwitchArmArgs {
120 | fn parse(input: ParseStream<'_>) -> Result {
121 | let regex_str = input.parse::()?;
122 | input.parse::]>()?;
123 | let then = input.parse::()?;
124 | let _ = input.parse::(); // allow a trailing comma
125 | Ok(Self {
126 | regex_str,
127 | then,
128 | })
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/src/proc_macros/mod.rs:
--------------------------------------------------------------------------------
1 | mod args;
2 | mod regex_code;
3 |
4 | use {
5 | crate::{args::*, regex_code::*},
6 | proc_macro::TokenStream,
7 | quote::quote,
8 | syn::{parse_macro_input, Expr},
9 | };
10 |
11 | // The following `process*` functions are convenience funcs
12 | // to reduce boilerplate in macro implementations below.
13 | fn process(input: TokenStream, as_bytes: bool, f: F) -> TokenStream
14 | where
15 | T: Into,
16 | F: Fn(RegexCode) -> T,
17 | {
18 | match RegexCode::from_token_stream(input, as_bytes) {
19 | Ok(r) => f(r).into(),
20 | Err(e) => e.to_compile_error().into(),
21 | }
22 | }
23 |
24 | fn process_with_value(input: TokenStream, as_bytes: bool, f: F) -> TokenStream
25 | where
26 | T: Into,
27 | F: Fn(RegexCode, Expr) -> T,
28 | {
29 | let parsed = parse_macro_input!(input as RexValArgs);
30 | match RegexCode::from_lit_str(parsed.regex_str, as_bytes) {
31 | Ok(r) => f(r, parsed.value).into(),
32 | Err(e) => e.to_compile_error().into(),
33 | }
34 | }
35 |
36 | /// Return a lazy static Regex checked at compilation time and
37 | /// built at first use.
38 | ///
39 | /// Flags can be specified as suffix:
40 | /// ```
41 | /// let case_insensitive_regex = regex!("^ab+$"i);
42 | /// ```
43 | ///
44 | /// The macro returns a reference to a [regex::Regex]
45 | /// or a [regex::bytes::Regex] instance,
46 | /// differentiated by the `B` flag:
47 | /// ```
48 | /// let verbose = regex!(r#"_([\d\.]+)"#)
49 | /// .replace("This is lazy-regex_2.2", " (version $1)");
50 | /// assert_eq!(verbose, "This is lazy-regex (version 2.2)");
51 | /// ```
52 | #[proc_macro]
53 | pub fn regex(input: TokenStream) -> TokenStream {
54 | process(input, false, |regex_code| regex_code.lazy_static())
55 | }
56 |
57 | /// Return a lazy static `regex::bytes::Regex` checked at compilation time and
58 | /// built at first use.
59 | ///
60 | /// Flags can be specified as suffix:
61 | /// ```
62 | /// let case_insensitive_regex = bytes_regex!("^ab+$"i);
63 | /// assert!(case_insensitive_regex.is_match(b"abB"));
64 | /// ```
65 | #[proc_macro]
66 | pub fn bytes_regex(input: TokenStream) -> TokenStream {
67 | process(input, true, |regex_code| regex_code.lazy_static())
68 | }
69 |
70 | /// Return an instance of `once_cell::sync::Lazy` or
71 | /// `once_cell::sync::Lazy` that
72 | /// you can use in a public static declaration.
73 | ///
74 | /// Example:
75 | ///
76 | /// ```
77 | /// pub static GLOBAL_REX: Lazy = lazy_regex!("^ab+$"i);
78 | /// ```
79 | ///
80 | /// As for other macros, the regex is checked at compilation time.
81 | #[proc_macro]
82 | pub fn lazy_regex(input: TokenStream) -> TokenStream {
83 | process(input, false, |regex_code| regex_code.build)
84 | }
85 |
86 | /// Return an instance of `once_cell::sync::Lazy` that
87 | /// you can use in a public static declaration.
88 | ///
89 | /// Example:
90 | ///
91 | /// ```
92 | /// pub static GLOBAL_REX: Lazy = bytes_lazy_regex!("^ab+$"i);
93 | /// ```
94 | ///
95 | /// As for other macros, the regex is checked at compilation time.
96 | #[proc_macro]
97 | pub fn bytes_lazy_regex(input: TokenStream) -> TokenStream {
98 | process(input, true, |regex_code| regex_code.build)
99 | }
100 |
101 | /// Test whether an expression matches a lazy static
102 | /// regular expression (the regex is checked at compile
103 | /// time)
104 | ///
105 | /// Example:
106 | /// ```
107 | /// let b = regex_is_match!("[ab]+", "car");
108 | /// assert_eq!(b, true);
109 | /// ```
110 | #[proc_macro]
111 | pub fn regex_is_match(input: TokenStream) -> TokenStream {
112 | process_with_value(input, false, |regex_code, value| {
113 | let statick = regex_code.statick();
114 | quote! {{
115 | #statick;
116 | RE.is_match(#value)
117 | }}
118 | })
119 | }
120 |
121 | /// Test whether an expression matches a lazy static
122 | /// bytes::Regex regular expression (the regex is checked
123 | /// at compile time)
124 | ///
125 | /// Example:
126 | /// ```
127 | /// let b = bytes_regex_is_match!("[ab]+", b"car");
128 | /// assert_eq!(b, true);
129 | /// ```
130 | #[proc_macro]
131 | pub fn bytes_regex_is_match(input: TokenStream) -> TokenStream {
132 | process_with_value(input, true, |regex_code, value| {
133 | let statick = regex_code.statick();
134 | quote! {{
135 | #statick;
136 | RE.is_match(#value)
137 | }}
138 | })
139 | }
140 |
141 | /// Extract the leftmost match of the regex in the
142 | /// second argument, as a `&str`, or a `&[u8]` if the `B` flag is set.
143 | ///
144 | /// Example:
145 | /// ```
146 | /// let f_word = regex_find!(r#"\bf\w+\b"#, "The fox jumps.");
147 | /// assert_eq!(f_word, Some("fox"));
148 | /// let f_word = regex_find!(r#"\bf\w+\b"#B, "The forest is silent.");
149 | /// assert_eq!(f_word, Some(b"forest" as &[u8]));
150 | /// ```
151 | #[proc_macro]
152 | pub fn regex_find(input: TokenStream) -> TokenStream {
153 | process_with_value(input, false, |regex_code, value| {
154 | let statick = regex_code.statick();
155 | let as_method = match regex_code.regex {
156 | RegexInstance::Regex(..) => quote!(as_str),
157 | RegexInstance::Bytes(..) => quote!(as_bytes),
158 | };
159 | quote! {{
160 | #statick;
161 | RE.find(#value).map(|mat| mat. #as_method ())
162 | }}
163 | })
164 | }
165 |
166 | /// Extract the leftmost match of the regex in the
167 | /// second argument as a `&[u8]`
168 | ///
169 | /// Example:
170 | /// ```
171 | /// let f_word = bytes_regex_find!(r#"\bf\w+\b"#, b"The fox jumps.");
172 | /// assert_eq!(f_word, Some("fox".as_bytes()));
173 | /// ```
174 | #[proc_macro]
175 | pub fn bytes_regex_find(input: TokenStream) -> TokenStream {
176 | process_with_value(input, true, |regex_code, value| {
177 | let statick = regex_code.statick();
178 | let as_method = match regex_code.regex {
179 | RegexInstance::Regex(..) => quote!(as_str),
180 | RegexInstance::Bytes(..) => quote!(as_bytes),
181 | };
182 | quote! {{
183 | #statick;
184 | RE.find(#value).map(|mat| mat. #as_method ())
185 | }}
186 | })
187 | }
188 |
189 | /// Extract captured groups as a tuple of &str.
190 | ///
191 | /// If there's no match, the macro returns `None`.
192 | ///
193 | /// If an optional group has no value, the tuple
194 | /// will contain `""` instead.
195 | ///
196 | /// Example:
197 | /// ```
198 | /// let (whole, name, version) = regex_captures!(
199 | /// r#"(\w+)-([0-9.]+)"#, // a literal regex
200 | /// "This is lazy_regex-2.0!", // any expression
201 | /// ).unwrap();
202 | /// assert_eq!(whole, "lazy_regex-2.0");
203 | /// assert_eq!(name, "lazy_regex");
204 | /// assert_eq!(version, "2.0");
205 | /// ```
206 | #[proc_macro]
207 | pub fn regex_captures(input: TokenStream) -> TokenStream {
208 | process_with_value(input, false, |regex_code, value| {
209 | let statick = regex_code.statick();
210 | let n = regex_code.captures_len();
211 | let groups = (0..n).map(|i| {
212 | quote! {
213 | caps.get(#i).map_or("", |c| c.as_str())
214 | }
215 | });
216 | quote! {{
217 | #statick;
218 | RE.captures(#value)
219 | .map(|caps| (
220 | #(#groups),*
221 | ))
222 | }}
223 | })
224 | }
225 |
226 | /// Extract captured groups as a tuple of &[u8]
227 | ///
228 | /// If there's no match, the macro returns `None`.
229 | ///
230 | /// If an optional group has no value, the tuple
231 | /// will contain `b""` instead.
232 | ///
233 | /// Example:
234 | /// ```
235 | /// let (whole, name, version) = bytes_regex_captures!(
236 | /// r#"(\w+)-([0-9.]+)"#, // a literal regex
237 | /// b"This is lazy_regex-2.0!", // any expression
238 | /// ).unwrap();
239 | /// assert_eq!(whole, b"lazy_regex-2.0");
240 | /// assert_eq!(name, b"lazy_regex");
241 | /// assert_eq!(version, "2.0".as_bytes());
242 | /// ```
243 | #[proc_macro]
244 | pub fn bytes_regex_captures(input: TokenStream) -> TokenStream {
245 | process_with_value(input, true, |regex_code, value| {
246 | let statick = regex_code.statick();
247 | let n = regex_code.captures_len();
248 | let groups = (0..n).map(|i| {
249 | quote! {
250 | caps.get(#i).map_or(&b""[..], |c| c.as_bytes())
251 | }
252 | });
253 | quote! {{
254 | #statick;
255 | RE.captures(#value)
256 | .map(|caps| (
257 | #(#groups),*
258 | ))
259 | }}
260 | })
261 | }
262 |
263 | /// Returns an iterator that yields successive non-overlapping matches in the given haystack.
264 | /// The iterator yields values of type `regex::Captures`.
265 | ///
266 | /// Example (adapted from the regex crate):
267 | /// ```
268 | /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
269 | /// let mut movies = vec![];
270 | /// let iter = regex_captures_iter!(r"'([^']+)'\s+\(([0-9]{4})\)", hay);
271 | /// for (_, [title, year]) in iter.map(|c| c.extract()) {
272 | /// movies.push((title, year.parse::().unwrap()));
273 | /// }
274 | /// assert_eq!(movies, vec![
275 | /// ("Citizen Kane", 1941),
276 | /// ("The Wizard of Oz", 1939),
277 | /// ("M", 1931),
278 | /// ]);
279 | /// ```
280 | #[proc_macro]
281 | pub fn regex_captures_iter(input: TokenStream) -> TokenStream {
282 | process_with_value(input, false, |regex_code, value| {
283 | let statick = regex_code.statick();
284 | quote! {{
285 | #statick;
286 | RE.captures_iter(#value)
287 | }}
288 | })
289 | }
290 |
291 | /// Returns an iterator that yields successive non-overlapping matches in the given haystack.
292 | #[proc_macro]
293 | pub fn bytes_regex_captures_iter(input: TokenStream) -> TokenStream {
294 | process_with_value(input, true, |regex_code, value| {
295 | let statick = regex_code.statick();
296 | quote! {{
297 | #statick;
298 | RE.captures_iter(#value)
299 | }}
300 | })
301 | }
302 |
303 | /// common implementation of regex_replace and regex_replace_all
304 | fn replacen(input: TokenStream, limit: usize) -> TokenStream {
305 | let parsed = parse_macro_input!(input as ReplaceArgs);
306 | let ReplaceArgs { regex_str, value, replacer } = parsed;
307 | let regex_code = match RegexCode::from_lit_str(regex_str, false) {
308 | Ok(r) => r,
309 | Err(e) => {
310 | return e.to_compile_error().into();
311 | }
312 | };
313 | let statick = regex_code.statick();
314 | let stream = match replacer {
315 | MaybeFun::Fun(fun) => {
316 | let n = regex_code.captures_len();
317 | let groups = (0..n).map(|i| {
318 | quote! {
319 | caps.get(#i).map_or("", |c| c.as_str())
320 | }
321 | });
322 | quote! {{
323 | #statick;
324 | RE.replacen(
325 | #value,
326 | #limit,
327 | |caps: &lazy_regex::Captures<'_>| {
328 | let mut fun = #fun;
329 | fun(
330 | #(#groups),*
331 | )
332 | })
333 | }}
334 | }
335 | MaybeFun::Expr(expr) => {
336 | quote! {{
337 | #statick;
338 | RE.replacen(#value, #limit, #expr)
339 | }}
340 | }
341 | };
342 | stream.into()
343 | }
344 |
345 | /// common implementation of bytes_regex_replace and bytes_regex_replace_all
346 | fn bytes_replacen(input: TokenStream, limit: usize) -> TokenStream {
347 | let parsed = parse_macro_input!(input as ReplaceArgs);
348 | let ReplaceArgs { regex_str, value, replacer } = parsed;
349 | let regex_code = match RegexCode::from_lit_str(regex_str, true) {
350 | Ok(r) => r,
351 | Err(e) => {
352 | return e.to_compile_error().into();
353 | }
354 | };
355 | let statick = regex_code.statick();
356 | let stream = match replacer {
357 | MaybeFun::Fun(fun) => {
358 | let n = regex_code.captures_len();
359 | let groups = (0..n).map(|i| {
360 | quote! {
361 | caps.get(#i).map_or(&b""[..], |c| c.as_bytes())
362 | }
363 | });
364 | quote! {{
365 | #statick;
366 | RE.replacen(
367 | #value,
368 | #limit,
369 | |caps: &lazy_regex::regex::bytes::Captures<'_>| {
370 | let mut fun = #fun;
371 | fun(
372 | #(#groups),*
373 | )
374 | })
375 | }}
376 | }
377 | MaybeFun::Expr(expr) => {
378 | quote! {{
379 | #statick;
380 | RE.replacen(#value, #limit, #expr)
381 | }}
382 | }
383 | };
384 | stream.into()
385 | }
386 |
387 | /// Replaces the leftmost match in the second argument
388 | /// using the replacer given as third argument.
389 | ///
390 | /// When the replacer is a closure, it is given one or more `&str`,
391 | /// the first one for the whole match and the following ones for
392 | /// the groups.
393 | /// Any optional group with no value is replaced with `""`.
394 | ///
395 | /// Example:
396 | /// ```
397 | /// let text = "Fuu fuuu";
398 | /// let text = regex_replace!(
399 | /// "f(u*)"i,
400 | /// text,
401 | /// |_, suffix: &str| format!("F{}", suffix.len()),
402 | /// );
403 | /// assert_eq!(text, "F2 fuuu");
404 | /// ```
405 | #[proc_macro]
406 | pub fn regex_replace(input: TokenStream) -> TokenStream {
407 | replacen(input, 1)
408 | }
409 |
410 | /// Replaces the leftmost match in the second argument
411 | /// using the replacer given as third argument.
412 | ///
413 | /// When the replacer is a closure, it is given one or more `&str`,
414 | /// the first one for the whole match and the following ones for
415 | /// the groups.
416 | /// Any optional group with no value is replaced with `b""`.
417 | ///
418 | /// Example:
419 | /// ```
420 | /// println!("{:?}", "ck ck".as_bytes());
421 | /// let text = b"Fuu fuuu";
422 | /// let text = bytes_regex_replace!(
423 | /// "f(u*)"i,
424 | /// text,
425 | /// b"ck",
426 | /// );
427 | /// assert_eq!(text, "ck fuuu".as_bytes());
428 | /// ```
429 | #[proc_macro]
430 | pub fn bytes_regex_replace(input: TokenStream) -> TokenStream {
431 | bytes_replacen(input, 1)
432 | }
433 |
434 | /// Replaces all non-overlapping matches in the second argument
435 | /// using the replacer given as third argument.
436 | ///
437 | /// When the replacer is a closure, it is given one or more `&str`,
438 | /// the first one for the whole match and the following ones for
439 | /// the groups.
440 | /// Any optional group with no value is replaced with `""`.
441 | ///
442 | /// Example:
443 | /// ```
444 | /// let text = "Foo fuu";
445 | /// let text = regex_replace_all!(
446 | /// r#"\bf(?P\w+)"#i,
447 | /// text,
448 | /// |_, suffix| format!("F<{}>", suffix),
449 | /// );
450 | /// assert_eq!(text, "F F");
451 | /// ```
452 | #[proc_macro]
453 | pub fn regex_replace_all(input: TokenStream) -> TokenStream {
454 | replacen(input, 0)
455 | }
456 |
457 | /// Replaces all non-overlapping matches in the second argument
458 | /// using the replacer given as third argument.
459 | ///
460 | /// When the replacer is a closure, it is given one or more `&str`,
461 | /// the first one for the whole match and the following ones for
462 | /// the groups.
463 | /// Any optional group with no value is replaced with `""`.
464 | ///
465 | /// Example:
466 | /// ```
467 | /// let text = b"Foo fuu";
468 | /// let text = bytes_regex_replace_all!(
469 | /// r#"\bf(?P\w+)"#i,
470 | /// text,
471 | /// b"H",
472 | /// );
473 | /// assert_eq!(text, "H H".as_bytes());
474 | /// ```
475 | #[proc_macro]
476 | pub fn bytes_regex_replace_all(input: TokenStream) -> TokenStream {
477 | bytes_replacen(input, 0)
478 | }
479 |
480 | /// Return an Option, with T being the type returned by the block or expression
481 | /// given as third argument.
482 | ///
483 | /// If the regex matches, executes the expression and return it as Some.
484 | /// Return None if the regex doesn't match.
485 | ///
486 | /// ```
487 | /// let grey = regex_if!(r#"^gr(a|e)y\((?\d{1,2})\)$"#, "grey(22)", {
488 | /// level.parse().unwrap()
489 | /// });
490 | /// assert_eq!(grey, Some(22));
491 | /// ```
492 | #[proc_macro]
493 | pub fn regex_if(input: TokenStream) -> TokenStream {
494 | let RexIfArgs {
495 | regex_str,
496 | value,
497 | then,
498 | } = parse_macro_input!(input as RexIfArgs);
499 | let regex_code = match RegexCode::from_lit_str(regex_str, false) {
500 | Ok(r) => r,
501 | Err(e) => {
502 | return e.to_compile_error().into();
503 | }
504 | };
505 | let statick = regex_code.statick();
506 | let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
507 | let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
508 | quote! {
509 | let #var_name: &str = caps.get(#idx).map_or("", |c| c.as_str());
510 | }
511 | });
512 | quote! {{
513 | #statick;
514 | match RE.captures(#value) {
515 | Some(caps) => {
516 | #(#assigns);*
517 | Some(#then)
518 | }
519 | None => None,
520 | }
521 | }}.into()
522 | }
523 |
524 | #[proc_macro]
525 | pub fn bytes_regex_if(input: TokenStream) -> TokenStream {
526 | let RexIfArgs {
527 | regex_str,
528 | value,
529 | then,
530 | } = parse_macro_input!(input as RexIfArgs);
531 | let regex_code = match RegexCode::from_lit_str(regex_str, true) {
532 | Ok(r) => r,
533 | Err(e) => {
534 | return e.to_compile_error().into();
535 | }
536 | };
537 | let statick = regex_code.statick();
538 | let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
539 | let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
540 | quote! {
541 | let #var_name: &[u8] = caps.get(#idx).map_or(&b""[..], |c| c.as_bytes());
542 | }
543 | });
544 | quote! {{
545 | #statick;
546 | match RE.captures(#value) {
547 | Some(caps) => {
548 | #(#assigns);*
549 | Some(#then)
550 | }
551 | None => None,
552 | }
553 | }}.into()
554 | }
555 |
556 | /// Define a set of lazy static statically compiled regexes, with a block
557 | /// or expression for each one. The first matching expression is computed
558 | /// with the named capture groups declaring `&str` variables available for this
559 | /// computation.
560 | /// If no regex matches, return `None`.
561 | ///
562 | /// Example:
563 | /// ```
564 | /// #[derive(Debug, PartialEq)]
565 | /// enum Color {
566 | /// Grey(u8),
567 | /// Pink,
568 | /// Rgb(u8, u8, u8),
569 | /// }
570 | ///
571 | /// let input = "rgb(1, 2, 3)";
572 | /// let color = regex_switch!(input,
573 | /// r#"^gr(a|e)y\((?\d{1,2})\)$"#i => {
574 | /// Color::Grey(level.parse()?)
575 | /// }
576 | /// "^pink"i => Color::Pink,
577 | /// r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb (
578 | /// r.parse()?,
579 | /// g.parse()?,
580 | /// b.parse()?,
581 | /// ),
582 | /// );
583 | /// assert_eq!(color, Some(Color::Rgb(1, 2, 3)));
584 | ///
585 | /// ```
586 | #[proc_macro]
587 | pub fn regex_switch(input: TokenStream) -> TokenStream {
588 | let RexSwitchArgs {
589 | value,
590 | arms,
591 | } = parse_macro_input!(input as RexSwitchArgs);
592 | let mut q_arms = Vec::new();
593 | for RexSwitchArmArgs { regex_str, then } in arms.into_iter() {
594 | let regex_code = match RegexCode::from_lit_str(regex_str, false) {
595 | Ok(r) => r,
596 | Err(e) => {
597 | return e.to_compile_error().into();
598 | }
599 | };
600 | let statick = regex_code.statick();
601 | let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
602 | let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
603 | quote! {
604 | let #var_name: &str = caps.get(#idx).map_or("", |c| c.as_str());
605 | }
606 | });
607 | q_arms.push(
608 | quote! {{
609 | #statick;
610 | if let Some(caps) = RE.captures(#value) {
611 | #(#assigns);*
612 | let output = Some(#then);
613 | break 'switch output;
614 | }
615 | }}
616 | );
617 | }
618 | quote! {{
619 | 'switch: {
620 | #(#q_arms)*
621 | None
622 | }
623 | }}.into()
624 | }
625 |
626 | /// Define a set of lazy static statically compiled regexes, with a block
627 | /// or expression for each one. The first matching expression is computed
628 | /// with the named capture groups declaring `&str` variables available for this
629 | /// computation.
630 | /// If no regex matches, return `None`.
631 | ///
632 | /// Example:
633 | /// ```
634 | /// #[derive(Debug, PartialEq)]
635 | /// enum Color {
636 | /// Grey(u8),
637 | /// Pink,
638 | /// Rgb(u8, u8, u8),
639 | /// }
640 | ///
641 | /// let input = "rgb(1, 2, 3)";
642 | /// let color = regex_switch!(input,
643 | /// r#"^gr(a|e)y\((?\d{1,2})\)$"#i => {
644 | /// Color::Grey(level.parse()?)
645 | /// }
646 | /// "^pink"i => Color::Pink,
647 | /// r#"^rgb\((?\d+),\s*(?\d+),\s*(?\d+),?\)$"#i => Color::Rgb (
648 | /// r.parse()?,
649 | /// g.parse()?,
650 | /// b.parse()?,
651 | /// ),
652 | /// );
653 | /// assert_eq!(color, Some(Color::Rgb(1, 2, 3)));
654 | ///
655 | /// ```
656 | #[proc_macro]
657 | pub fn bytes_regex_switch(input: TokenStream) -> TokenStream {
658 | let RexSwitchArgs {
659 | value,
660 | arms,
661 | } = parse_macro_input!(input as RexSwitchArgs);
662 | let mut q_arms = Vec::new();
663 | for RexSwitchArmArgs { regex_str, then } in arms.into_iter() {
664 | let regex_code = match RegexCode::from_lit_str(regex_str, true) {
665 | Ok(r) => r,
666 | Err(e) => {
667 | return e.to_compile_error().into();
668 | }
669 | };
670 | let statick = regex_code.statick();
671 | let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
672 | let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
673 | quote! {
674 | let #var_name: &[u8] = caps.get(#idx).map_or(&b""[..], |c| c.as_bytes());
675 | }
676 | });
677 | q_arms.push(
678 | quote! {{
679 | #statick;
680 | if let Some(caps) = RE.captures(#value) {
681 | #(#assigns);*
682 | let output = Some(#then);
683 | break 'switch output;
684 | }
685 | }}
686 | );
687 | }
688 | quote! {{
689 | 'switch: {
690 | #(#q_arms)*
691 | None
692 | }
693 | }}.into()
694 | }
695 |
--------------------------------------------------------------------------------
/src/proc_macros/regex_code.rs:
--------------------------------------------------------------------------------
1 | use {
2 | proc_macro::TokenStream,
3 | proc_macro2::TokenStream as TokenStream2,
4 | quote::quote,
5 | syn::LitStr,
6 | };
7 |
8 | /// The lazy static regex building code, which is produced and
9 | /// inserted by all lazy-regex macros
10 | pub(crate) struct RegexCode {
11 | pub build: TokenStream2,
12 | pub regex: RegexInstance,
13 | }
14 |
15 | pub(crate) enum RegexInstance {
16 | Regex(regex::Regex),
17 | Bytes(regex::bytes::Regex),
18 | }
19 |
20 | impl RegexCode {
21 | pub fn from_token_stream(token_stream: TokenStream, is_bytes: bool) -> Result {
22 | Self::from_lit_str(syn::parse::(token_stream)?, is_bytes)
23 | }
24 | pub fn from_lit_str(lit_str: LitStr, mut is_bytes: bool) -> Result {
25 | let pattern = lit_str.value();
26 | let mut case_insensitive = false;
27 | let mut multi_line = false;
28 | let mut dot_matches_new_line = false;
29 | let mut ignore_whitespace = false;
30 | let mut swap_greed = false;
31 | for (i, ch) in lit_str.suffix().chars().enumerate() {
32 | match ch {
33 | 'i' => case_insensitive = true,
34 | 'm' => multi_line = true,
35 | 's' => dot_matches_new_line = true,
36 | 'x' => ignore_whitespace = true,
37 | 'U' => swap_greed = true,
38 | 'B' => is_bytes = true, // non-standard!
39 | _ => {
40 | let lit = lit_str.token();
41 | let pos = lit.to_string().len() - i;
42 | // subspan only works on nighlty
43 | return Err(syn::Error::new(
44 | lit.subspan(pos - 1..pos).unwrap_or_else(|| lit.span()),
45 | format!("unrecognized regex flag {:?}", ch),
46 | ));
47 | }
48 | };
49 | }
50 |
51 | let regex = if is_bytes {
52 | regex::bytes::Regex::new(&pattern).map(RegexInstance::Bytes)
53 | } else {
54 | regex::Regex::new(&pattern).map(RegexInstance::Regex)
55 | };
56 | let regex = regex.map_err(|e| syn::Error::new(lit_str.span(), e.to_string()))?;
57 |
58 | let builder_token = if is_bytes {
59 | quote!(BytesRegexBuilder)
60 | } else {
61 | quote!(RegexBuilder)
62 | };
63 | let build = quote! {
64 | lazy_regex::Lazy::new(|| {
65 | //println!("compiling regex {:?}", #pattern);
66 | lazy_regex:: #builder_token ::new(#pattern)
67 | .case_insensitive(#case_insensitive)
68 | .multi_line(#multi_line)
69 | .dot_matches_new_line(#dot_matches_new_line)
70 | .ignore_whitespace(#ignore_whitespace)
71 | .swap_greed(#swap_greed)
72 | .build()
73 | .unwrap()
74 | })
75 | };
76 | Ok(Self { build, regex })
77 | }
78 | }
79 |
80 | impl RegexCode {
81 | pub fn statick(&self) -> TokenStream2 {
82 | let build = &self.build;
83 | let regex_token = match self.regex {
84 | RegexInstance::Regex(..) => quote!(Regex),
85 | RegexInstance::Bytes(..) => quote!(BytesRegex),
86 | };
87 | quote! {
88 | static RE: lazy_regex::Lazy = #build;
89 | }
90 | }
91 |
92 | pub fn lazy_static(&self) -> TokenStream2 {
93 | let statick = self.statick();
94 | quote! {{
95 | #statick;
96 | &RE
97 | }}
98 | }
99 |
100 | pub fn captures_len(&self) -> usize {
101 | match &self.regex {
102 | RegexInstance::Regex(regex) => regex.captures_len(),
103 | RegexInstance::Bytes(regex) => regex.captures_len(),
104 | }
105 | }
106 | pub fn named_groups(&self) -> Vec<(usize, &str)> {
107 | match &self.regex {
108 | RegexInstance::Regex(regex) => regex
109 | .capture_names()
110 | .enumerate()
111 | .filter_map(|(i, n)| Some((i, n?)))
112 | .collect(),
113 | RegexInstance::Bytes(regex) => regex
114 | .capture_names()
115 | .enumerate()
116 | .filter_map(|(i, n)| Some((i, n?)))
117 | .collect(),
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/tests/regex_if.rs:
--------------------------------------------------------------------------------
1 | use {
2 | lazy_regex::{
3 | bytes_regex_if,
4 | regex_if,
5 | },
6 | std::num::ParseIntError,
7 | };
8 |
9 | #[test]
10 | fn test_regex_if() {
11 | fn extract_grey_level(s: &str) -> Option {
12 | regex_if!(
13 | r#"^gr(a|e)y\((?\d{1,2})\)$"#,
14 | s,
15 | level.parse().unwrap(),
16 | )
17 | }
18 | assert_eq!(extract_grey_level("gray(15)"), Some(15));
19 | assert_eq!(extract_grey_level("grey(22)"), Some(22));
20 | assert_eq!(extract_grey_level("grey(268)"), None);
21 | assert_eq!(extract_grey_level("red"), None);
22 | }
23 |
24 | #[test]
25 | fn test_regex_if_with_error_handling() {
26 | fn extract_grey_level(s: &str) -> Result