├── .gitignore ├── .vscode └── settings.json ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── ROADMAP.md ├── ctreg-macro ├── Cargo.toml └── src │ ├── lib.rs │ └── render.rs └── ctreg ├── Cargo.toml ├── src └── lib.rs └── tests └── tests.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # These are backup files generated by rustfmt 6 | **/*.rs.bk 7 | 8 | # MSVC Windows builds of rustc generate these, which store debugging information 9 | *.pdb 10 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": ["ctreg", "tredge"] 3 | } 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 4 | 5 | ## 1.0.3 6 | 7 | Changelog started 8 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.3" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "cool_asserts" 16 | version = "2.0.3" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "ee9f254e53f61e2688d3677fa2cbe4e9b950afd56f48819c98817417cf6b28ec" 19 | dependencies = [ 20 | "indent_write", 21 | ] 22 | 23 | [[package]] 24 | name = "ctreg" 25 | version = "1.0.3" 26 | dependencies = [ 27 | "cool_asserts", 28 | "ctreg-macro", 29 | "regex-automata", 30 | "regex-syntax", 31 | ] 32 | 33 | [[package]] 34 | name = "ctreg-macro" 35 | version = "1.0.1" 36 | dependencies = [ 37 | "lazy_format", 38 | "proc-macro2", 39 | "quote", 40 | "regex-automata", 41 | "regex-syntax", 42 | "syn", 43 | "thiserror", 44 | ] 45 | 46 | [[package]] 47 | name = "indent_write" 48 | version = "2.2.0" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "0cfe9645a18782869361d9c8732246be7b410ad4e919d3609ebabdac00ba12c3" 51 | 52 | [[package]] 53 | name = "lazy_format" 54 | version = "2.0.3" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "e479e99b287d578ed5f6cd4c92cdf48db219088adb9c5b14f7c155b71dfba792" 57 | 58 | [[package]] 59 | name = "memchr" 60 | version = "2.7.2" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 63 | 64 | [[package]] 65 | name = "proc-macro2" 66 | version = "1.0.81" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" 69 | dependencies = [ 70 | "unicode-ident", 71 | ] 72 | 73 | [[package]] 74 | name = "quote" 75 | version = "1.0.36" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 78 | dependencies = [ 79 | "proc-macro2", 80 | ] 81 | 82 | [[package]] 83 | name = "regex-automata" 84 | version = "0.4.6" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 87 | dependencies = [ 88 | "aho-corasick", 89 | "memchr", 90 | "regex-syntax", 91 | ] 92 | 93 | [[package]] 94 | name = "regex-syntax" 95 | version = "0.8.3" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" 98 | 99 | [[package]] 100 | name = "syn" 101 | version = "2.0.60" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" 104 | dependencies = [ 105 | "proc-macro2", 106 | "quote", 107 | "unicode-ident", 108 | ] 109 | 110 | [[package]] 111 | name = "thiserror" 112 | version = "1.0.59" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" 115 | dependencies = [ 116 | "thiserror-impl", 117 | ] 118 | 119 | [[package]] 120 | name = "thiserror-impl" 121 | version = "1.0.59" 122 | source = "registry+https://github.com/rust-lang/crates.io-index" 123 | checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" 124 | dependencies = [ 125 | "proc-macro2", 126 | "quote", 127 | "syn", 128 | ] 129 | 130 | [[package]] 131 | name = "unicode-ident" 132 | version = "1.0.12" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 135 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["ctreg", "ctreg-macro"] 3 | resolver = "2" 4 | 5 | [workspace.package] 6 | edition = "2021" 7 | authors = ["Nathan West "] 8 | 9 | readme = "README.md" 10 | license = "MPL-2.0" 11 | repository = "https://github.com/Lucretiel/ctreg" 12 | 13 | [workspace.dependencies] 14 | cool_asserts = { version = "2.0.3" } 15 | thiserror = { version = "1.0.59" } 16 | 17 | # TODO: In the near future, add feature flags mirroring those in regex and 18 | # disable default features 19 | regex-automata = { version = "0.4.6" } 20 | regex-syntax = { version = "0.8.2" } 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ctreg 2 | 3 | 4 | 5 | `ctreg` (pronounced cuh-tredge, in the style of Cthulhu) is a macro providing 6 | static typing to your regular expressions, allowing syntax errors to be detected 7 | at compile time and capture groups to be matched infallibly. 8 | 9 | ```rust 10 | use ctreg::regex; 11 | 12 | // Create a regular expression with the macro. This regular expression is 13 | // analyzed at compile time and its parsed, normalized representation is 14 | // emitted as the `HelloWorld` type. 15 | regex! { pub HelloWorld = "(?[a-zA-Z0-9-_.]+)(, (?[a-zA-Z0-9-_.]+))?!" } 16 | 17 | // Create an instance of the regular expression. 18 | let regex = HelloWorld::new(); 19 | 20 | // Use `is_match` to test if there was a match 21 | assert!(regex.is_match("Hello, World!")); 22 | assert!(regex.is_match("Goodbye!")); 23 | assert!(!regex.is_match("Nothing to see here.")); 24 | 25 | // Use `find` to find the location of a match 26 | let cap = regex.find("abc Greetings, Rustacean! 123").unwrap(); 27 | assert_eq!(cap.content, "Greetings, Rustacean!"); 28 | assert_eq!(cap.start, 4); 29 | assert_eq!(cap.end, 25); 30 | 31 | assert!(regex.find("Nothing to see here.").is_none()); 32 | 33 | // Use `captures` to find all of the named capture groups of a match (`greeting` 34 | // and `target`, in this case). Capture groups are emitted at compile time and 35 | // evaluated infallibly. 36 | let groups = regex.captures("ah, Bonjour, reader!").unwrap(); 37 | assert_eq!(groups.greeting.content, "Bonjour"); 38 | assert_eq!(groups.target.unwrap().content, "reader"); 39 | 40 | let groups = regex.captures("This is goodbye!").unwrap(); 41 | assert_eq!(groups.greeting.content, "goodbye"); 42 | assert!(groups.target.is_none()); 43 | 44 | assert!(regex.captures("nothing to see here.").is_none()); 45 | ``` 46 | 47 | 48 | -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Things we still need 2 | 3 | - Attributes for the generated type 4 | - `cfg` attr 5 | - docs 6 | - other arbitrary attributes 7 | - More regex search methods: iteration, anchored search 8 | - Introspection? Unnecessary in principle, but might be nice for eg serializing 9 | - Regex value handling via OnceLock or something similar. 10 | - Enums for alternations. 11 | - `nom` integration? 12 | - Tests covering all of the regex syntaxes 13 | - Unicode and performance feature flags 14 | -------------------------------------------------------------------------------- /ctreg-macro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ctreg-macro" 3 | version = "1.0.1" 4 | description = "Proc-macro implementation for ctreg" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | readme.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | 12 | [dependencies] 13 | regex-syntax = { workspace = true } 14 | regex-automata = { workspace = true, features = ["meta"] } 15 | 16 | thiserror = { workspace = true } 17 | lazy_format = "2.0.3" 18 | 19 | quote = "1.0.36" 20 | syn = { version = "2.0.60", default-features = false, features = [ 21 | "parsing", 22 | "proc-macro", 23 | ] } 24 | proc-macro2 = { version = "1.0.81", default-features = false } 25 | 26 | [lib] 27 | proc-macro = true 28 | 29 | [features] 30 | -------------------------------------------------------------------------------- /ctreg-macro/src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | Implementation of the proc macro for `ctreg`. You should never use this crate 3 | directly. 4 | */ 5 | 6 | mod render; 7 | 8 | extern crate proc_macro; 9 | use proc_macro::TokenStream; 10 | 11 | use lazy_format::lazy_format; 12 | use proc_macro2::TokenStream as TokenStream2; 13 | use quote::{format_ident, quote}; 14 | use regex_automata::meta::Regex; 15 | use regex_syntax::{ 16 | hir::{self, Capture, Hir, HirKind, Repetition}, 17 | parse as parse_regex, 18 | }; 19 | use render::hir_expression; 20 | use syn::{ 21 | parse::{Parse, ParseStream}, 22 | parse_macro_input, 23 | spanned::Spanned, 24 | Ident, Token, 25 | }; 26 | use thiserror::Error; 27 | 28 | use self::render::{CaptureType, HirType, InputType, RegexType}; 29 | 30 | struct Request { 31 | public: Option, 32 | type_name: syn::Ident, 33 | regex: syn::LitStr, 34 | } 35 | 36 | impl Parse for Request { 37 | fn parse(input: ParseStream) -> syn::Result { 38 | let public = input.parse()?; 39 | let type_name = input.parse()?; 40 | let _eq: Token![=] = input.parse()?; 41 | let regex = input.parse()?; 42 | 43 | Ok(Self { 44 | public, 45 | type_name, 46 | regex, 47 | }) 48 | } 49 | } 50 | 51 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 52 | enum HirRepState { 53 | Definite, 54 | Optional, 55 | Repeating, 56 | } 57 | 58 | impl HirRepState { 59 | fn from_reps(repetition: &Repetition) -> Self { 60 | match (repetition.min, repetition.max) { 61 | (1, Some(1)) => Self::Definite, 62 | (0, Some(1)) => Self::Optional, 63 | _ => Self::Repeating, 64 | } 65 | } 66 | 67 | fn and(self, other: HirRepState) -> Self { 68 | Ord::max(self, other) 69 | } 70 | 71 | fn with(self, repetition: &Repetition) -> Self { 72 | self.and(Self::from_reps(repetition)) 73 | } 74 | } 75 | 76 | #[derive(Debug, Clone, Copy)] 77 | struct GroupInfo<'a> { 78 | name: &'a str, 79 | optional: bool, 80 | index: u32, 81 | } 82 | 83 | fn get_group_index(groups: &[GroupInfo<'_>]) -> u32 { 84 | groups.last().map(|group| group.index).unwrap_or(0) + 1 85 | } 86 | 87 | #[derive(Debug, Error)] 88 | enum HirError { 89 | #[error("duplicate group name: {0:?}")] 90 | DuplicateGroupName(String), 91 | 92 | #[error("capture group {0:?} is repeating; capture groups can't repeat")] 93 | RepeatingCaptureGroup(String), 94 | 95 | #[error("capture group name {0:?} is not a valid rust identifier")] 96 | BadName(String), 97 | } 98 | 99 | /// Analyze and rewrite the syntax tree 100 | /// 101 | /// - Collect information about the capture groups we'll be using 102 | /// - Erase anonymous capture groups 103 | fn process_hir_recurse<'a>( 104 | hir: &'a Hir, 105 | groups: &mut Vec>, 106 | state: HirRepState, 107 | ) -> Result { 108 | match *hir.kind() { 109 | // Literals and their equivalents are passed verbatim 110 | HirKind::Empty => Ok(Hir::empty()), 111 | HirKind::Literal(hir::Literal(ref lit)) => Ok(Hir::literal(lit.clone())), 112 | HirKind::Class(ref class) => Ok(Hir::class(class.clone())), 113 | HirKind::Look(look) => Ok(Hir::look(look)), 114 | 115 | // Need to compute the repetition state for repetitions 116 | HirKind::Repetition(ref repetition) => { 117 | let state = state.with(repetition); 118 | let sub = process_hir_recurse(&repetition.sub, groups, state)?; 119 | 120 | Ok(Hir::repetition(Repetition { 121 | sub: Box::new(sub), 122 | ..*repetition 123 | })) 124 | } 125 | 126 | // Capture groups are the most complicated. Need to remove anonymous 127 | // groups, renumber other groups, and check repetition / optional states. 128 | HirKind::Capture(ref capture) => { 129 | let Some(name) = capture.name.as_deref() else { 130 | // Anonymous groups don't capture in ctreg 131 | return process_hir_recurse(&capture.sub, groups, state); 132 | }; 133 | 134 | // Let syn do the work for us of validating that this is a correct 135 | // rust identifier 136 | let _ident: Ident = 137 | syn::parse_str(name).map_err(|_| HirError::BadName(name.to_owned()))?; 138 | 139 | // Check duplicate groups 140 | if groups.iter().any(|group| group.name == name) { 141 | return Err(HirError::DuplicateGroupName(name.to_owned())); 142 | } 143 | 144 | // Check repeating groups 145 | if state == HirRepState::Repeating { 146 | return Err(HirError::RepeatingCaptureGroup(name.to_owned())); 147 | } 148 | 149 | let group_index = get_group_index(groups); 150 | 151 | groups.push(GroupInfo { 152 | name, 153 | optional: matches!(state, HirRepState::Optional), 154 | index: group_index, 155 | }); 156 | 157 | let sub = process_hir_recurse(&capture.sub, groups, state)?; 158 | 159 | Ok(Hir::capture(Capture { 160 | index: group_index, 161 | name: Some(name.into()), 162 | sub: Box::new(sub), 163 | })) 164 | } 165 | 166 | // Concatenations are trivial 167 | HirKind::Concat(ref concat) => concat 168 | .iter() 169 | .map(|sub| process_hir_recurse(sub, groups, state)) 170 | .collect::>() 171 | .map(Hir::concat), 172 | 173 | // regex syntax guarantees that alternations have at least 2 variants, 174 | // so each one is unconditionally optional. In the future we could 175 | // produce an enum, to reflect that at least one variant will exist 176 | HirKind::Alternation(ref alt) => alt 177 | .iter() 178 | .map(|sub| process_hir_recurse(sub, groups, state.and(HirRepState::Optional))) 179 | .collect::>() 180 | .map(Hir::alternation), 181 | } 182 | } 183 | 184 | fn process_hir(hir: &Hir) -> Result<(Hir, Vec>), HirError> { 185 | let mut groups = Vec::new(); 186 | 187 | process_hir_recurse(hir, &mut groups, HirRepState::Definite).map(|hir| (hir, groups)) 188 | } 189 | 190 | fn regex_impl_result(input: &Request) -> Result { 191 | let hir = parse_regex(&input.regex.value()).map_err(|error| { 192 | syn::Error::new( 193 | input.regex.span(), 194 | lazy_format!("error compiling regex:\n{error}"), 195 | ) 196 | })?; 197 | 198 | let (hir, groups) = 199 | process_hir(&hir).map_err(|error| syn::Error::new(input.regex.span(), error))?; 200 | 201 | // We don't actually use the compiled regex for anything, we just need to 202 | // ensure that the `hir` does compile correctly. 203 | let _compiled_regex = Regex::builder().build_from_hir(&hir).map_err(|error| { 204 | syn::Error::new( 205 | input.regex.span(), 206 | lazy_format!("error compiling regex:\n{error}"), 207 | ) 208 | })?; 209 | 210 | let public = input.public; 211 | let type_name = &input.type_name; 212 | 213 | let slots_ident = Ident::new("slots", type_name.span()); 214 | let haystack_ident = Ident::new("haystack", type_name.span()); 215 | 216 | let mod_name = format_ident!("Mod{type_name}"); 217 | let matches_type_name = format_ident!("{type_name}Captures"); 218 | 219 | let matches_fields_definitions = groups.iter().map(|&GroupInfo { name, optional, .. }| { 220 | let type_name = match optional { 221 | false => quote! { #CaptureType<'a> }, 222 | true => quote! { ::core::option::Option<#CaptureType<'a>> }, 223 | }; 224 | 225 | let field_name = format_ident!("{name}", span = type_name.span()); 226 | 227 | quote! { #field_name : #type_name } 228 | }); 229 | 230 | let matches_field_populators = groups.iter().map( 231 | |&GroupInfo { 232 | name, 233 | optional, 234 | index, 235 | }| { 236 | let slot_start = (index as usize) * 2; 237 | let slot_end = slot_start + 1; 238 | 239 | let field_name = format_ident!("{name}", span = type_name.span()); 240 | 241 | let populate = quote! {{ 242 | let slot_start = #slots_ident[#slot_start]; 243 | let slot_end = #slots_ident[#slot_end]; 244 | 245 | match slot_start { 246 | None => None, 247 | Some(start) => { 248 | let start = start.get(); 249 | let end = unsafe { slot_end.unwrap_unchecked() }.get(); 250 | let content = unsafe { #haystack_ident.get_unchecked(start..end) }; 251 | 252 | Some(#CaptureType {start, end, content}) 253 | } 254 | } 255 | }}; 256 | 257 | let expr = match optional { 258 | true => populate, 259 | false => quote! { 260 | match #populate { 261 | Some(capture) => capture, 262 | None => unsafe { ::core::hint::unreachable_unchecked() }, 263 | } 264 | }, 265 | }; 266 | 267 | quote! { #field_name : #expr } 268 | }, 269 | ); 270 | 271 | let num_capture_groups = groups.len(); 272 | 273 | let captures_impl = (num_capture_groups > 0).then(|| quote! { 274 | impl #type_name { 275 | #[inline] 276 | #[must_use] 277 | pub fn captures<'i>(&self, #haystack_ident: &'i str) -> ::core::option::Option<#matches_type_name<'i>> { 278 | let mut #slots_ident = [::core::option::Option::None; (#num_capture_groups + 1) * 2]; 279 | let _ = self.regex.search_slots(&#InputType::new(#haystack_ident), &mut #slots_ident)?; 280 | 281 | ::core::option::Option::Some(#matches_type_name { 282 | #(#matches_field_populators ,)* 283 | }) 284 | } 285 | } 286 | 287 | #[derive(Debug, Clone, Copy)] 288 | pub struct #matches_type_name<'a> { 289 | #(pub #matches_fields_definitions,)* 290 | } 291 | }); 292 | 293 | let captures_export = captures_impl.is_some().then(|| { 294 | quote! { 295 | #public use #mod_name::#matches_type_name 296 | } 297 | }); 298 | 299 | let rendered_hir = hir_expression(&hir); 300 | 301 | Ok(quote! { 302 | // The implementations are put into a submodule to ensure that the 303 | // caller of the regex macro doesn't have access to the internals 304 | // of these types 305 | #[doc(hidden)] 306 | #[allow(non_snake_case)] 307 | mod #mod_name { 308 | #[derive(Debug, Clone)] 309 | pub struct #type_name { 310 | regex: #RegexType, 311 | } 312 | 313 | impl #type_name { 314 | #[inline] 315 | #[must_use] 316 | pub fn new() -> Self { 317 | let hir: #HirType = #rendered_hir; 318 | let regex = #RegexType::builder() 319 | .build_from_hir(&hir) 320 | .expect("regex compilation failed, despite compile-time verification"); 321 | Self { regex } 322 | } 323 | 324 | #[inline] 325 | #[must_use] 326 | pub fn is_match(&self, haystack: &str) -> bool { 327 | self.regex.is_match(haystack) 328 | } 329 | 330 | #[inline] 331 | #[must_use] 332 | pub fn find<'i>(&self, haystack: &'i str) -> ::core::option::Option<#CaptureType<'i>> { 333 | let capture = self.regex.find(haystack)?; 334 | let span = capture.span(); 335 | 336 | let start = span.start; 337 | let end = span.end; 338 | let content = unsafe { haystack.get_unchecked(start..end) }; 339 | 340 | Some(#CaptureType { start, end, content }) 341 | } 342 | } 343 | 344 | impl ::core::default::Default for #type_name { 345 | fn default() -> Self { 346 | Self::new() 347 | } 348 | } 349 | 350 | #captures_impl 351 | } 352 | 353 | #public use #mod_name::#type_name; 354 | #captures_export; 355 | 356 | }) 357 | } 358 | 359 | #[proc_macro] 360 | pub fn regex_impl(input: TokenStream) -> TokenStream { 361 | let input = parse_macro_input!(input as Request); 362 | 363 | regex_impl_result(&input) 364 | .unwrap_or_else(|error| error.into_compile_error()) 365 | .into() 366 | } 367 | -------------------------------------------------------------------------------- /ctreg-macro/src/render.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::{Literal as LiteralToken, TokenStream as TokenStream2}; 2 | use quote::{quote, ToTokens}; 3 | use regex_syntax::hir::{Capture, Class, Hir, HirKind, Literal, Look, Repetition}; 4 | 5 | macro_rules! quote_push { 6 | ($tokens:ident, { $($t:tt)* }) => { { quote::quote_each_token! { $tokens $($t)* }; } }; 7 | } 8 | 9 | macro_rules! prefixes { 10 | ($( 11 | $Name:ident = {$($t:tt)*} 12 | )*) => {$( 13 | pub struct $Name; 14 | 15 | impl ToTokens for $Name { 16 | fn to_tokens(&self, mut tokens: &mut TokenStream2) { 17 | quote_push!(tokens, {$($t)*}) 18 | } 19 | } 20 | )*} 21 | } 22 | 23 | prefixes! { 24 | CaptureType = { ::ctreg::Capture } 25 | Private = { ::ctreg::ඞ } 26 | 27 | AutomataMod = { #Private::regex_automata } 28 | RegexType = { #AutomataMod::meta::Regex } 29 | InputType = { #AutomataMod::Input } 30 | 31 | HirMod = { #Private::regex_syntax::hir } 32 | HirType = { #HirMod::Hir } 33 | } 34 | 35 | #[inline] 36 | #[must_use] 37 | fn render_class(class: &Class) -> TokenStream2 { 38 | match *class { 39 | Class::Unicode(ref class) => { 40 | let class = class.ranges().iter().map(|range| { 41 | let start = range.start(); 42 | let end = range.end(); 43 | 44 | quote! { #HirMod ::ClassUnicodeRange::new(#start, #end) } 45 | }); 46 | 47 | quote! { 48 | #HirMod ::Class::Unicode(#HirMod ::ClassUnicode::new([#(#class,)*])) 49 | } 50 | } 51 | Class::Bytes(ref class) => { 52 | let class = class.ranges().iter().map(|range| { 53 | let start = range.start(); 54 | let end = range.end(); 55 | 56 | quote! { #HirMod ::ClassBytesRange::new(#start, #end) } 57 | }); 58 | 59 | quote! { 60 | #HirMod ::Class::Bytes(#HirMod ::ClassBytes::new([#(#class,)*])) 61 | } 62 | } 63 | } 64 | } 65 | 66 | macro_rules! render_look { 67 | ($($Variant:ident)*) => { 68 | #[inline] 69 | #[must_use] 70 | fn render_look(look: Look) -> TokenStream2 { 71 | match look {$( 72 | Look::$Variant => quote! { #HirMod ::Look::$Variant }, 73 | )*} 74 | } 75 | } 76 | } 77 | 78 | render_look! { 79 | Start 80 | End 81 | StartLF 82 | EndLF 83 | StartCRLF 84 | EndCRLF 85 | WordAscii 86 | WordAsciiNegate 87 | WordUnicode 88 | WordUnicodeNegate 89 | WordStartAscii 90 | WordEndAscii 91 | WordStartUnicode 92 | WordEndUnicode 93 | WordStartHalfAscii 94 | WordEndHalfAscii 95 | WordStartHalfUnicode 96 | WordEndHalfUnicode 97 | } 98 | 99 | #[inline] 100 | #[must_use] 101 | pub fn hir_expression(hir: &Hir) -> TokenStream2 { 102 | match *hir.kind() { 103 | HirKind::Empty => { 104 | quote! { #HirType::empty() } 105 | } 106 | HirKind::Literal(Literal(ref literal)) => { 107 | let literal = LiteralToken::byte_string(literal); 108 | 109 | quote! { #HirType::literal(*#literal) } 110 | } 111 | HirKind::Class(ref class) => { 112 | let class = render_class(class); 113 | 114 | quote! { #HirType::class(#class) } 115 | } 116 | HirKind::Look(look) => { 117 | let look = render_look(look); 118 | 119 | quote! { #HirType::look(#look) } 120 | } 121 | HirKind::Repetition(Repetition { 122 | min, 123 | max, 124 | greedy, 125 | ref sub, 126 | }) => { 127 | let max = match max { 128 | None => quote! { ::core::option::Option::None }, 129 | Some(max) => quote! { ::core::option::Option::Some(#max) }, 130 | }; 131 | 132 | let sub = hir_expression(sub); 133 | 134 | quote! { 135 | #HirType::repetition(#HirMod::Repetition { 136 | min: #min, 137 | max: #max, 138 | greedy: #greedy, 139 | sub: ::std::boxed::Box::new(#sub), 140 | }) 141 | } 142 | } 143 | HirKind::Capture(Capture { 144 | index, 145 | ref name, 146 | ref sub, 147 | }) => { 148 | let name = match name.as_deref() { 149 | None => quote! { ::core::option::Option::None }, 150 | Some(name) => quote! { 151 | ::core::option::Option::Some( 152 | ::core::convert::From::from(#name) 153 | ) 154 | }, 155 | }; 156 | 157 | let sub = hir_expression(sub); 158 | 159 | quote! { 160 | #HirType::capture(#HirMod::Capture { 161 | index: #index, 162 | name: #name, 163 | sub: ::std::boxed::Box::new(#sub), 164 | }) 165 | } 166 | } 167 | HirKind::Concat(ref concat) => { 168 | let concat = concat.iter().map(hir_expression); 169 | 170 | quote! { 171 | #HirType::concat(::std::vec::Vec::from([#(#concat,)*])) 172 | } 173 | } 174 | HirKind::Alternation(ref alternation) => { 175 | let alternation = alternation.iter().map(hir_expression); 176 | 177 | quote! { 178 | #HirType::alternation(::std::vec::Vec::from([#(#alternation,)*])) 179 | } 180 | } 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /ctreg/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ctreg" 3 | version = "1.0.3" 4 | description = "Compile-time regular expressions the way they were always meant to be" 5 | 6 | edition.workspace = true 7 | authors.workspace = true 8 | readme.workspace = true 9 | license.workspace = true 10 | repository.workspace = true 11 | 12 | [dependencies] 13 | ctreg-macro = { path = "../ctreg-macro", version = "1.0.0" } 14 | regex-syntax = { workspace = true } 15 | regex-automata = { workspace = true, features = ["meta"] } 16 | 17 | [dev-dependencies] 18 | cool_asserts = { workspace = true } 19 | 20 | [features] 21 | # If enabled, the demo module will be included. This should really only be 22 | # needed when generating docs. 23 | demo = [] 24 | 25 | [package.metadata.docs.rs] 26 | features = ["demo"] 27 | -------------------------------------------------------------------------------- /ctreg/src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | `ctreg` (pronounced cuh-tredge, in the style of Cthulhu) is a macro providing 3 | static typing to your regular expressions, allowing syntax errors to be 4 | detected at compile time and capture groups to be matched infallibly. 5 | 6 | ``` 7 | use ctreg::regex; 8 | 9 | // Create a regular expression with the macro. This regular expression is 10 | // analyzed at compile time and its normalized representation is emitted as the 11 | // `HelloWorld` type. 12 | regex! { pub HelloWorld = "(?[a-zA-Z0-9-_.]+)(, (?[a-zA-Z0-9-_.]+))?!" } 13 | 14 | // Create an instance of the regular expression. 15 | let regex = HelloWorld::new(); 16 | 17 | // Use `is_match` to test if there was a match 18 | assert!(regex.is_match("Hello, World!")); 19 | assert!(regex.is_match("Goodbye!")); 20 | assert!(!regex.is_match("Nothing to see here.")); 21 | 22 | // Use `find` to find the location of a match 23 | let cap = regex.find("abc Greetings, Rustacean! 123").unwrap(); 24 | assert_eq!(cap.content, "Greetings, Rustacean!"); 25 | assert_eq!(cap.start, 4); 26 | assert_eq!(cap.end, 25); 27 | 28 | assert!(regex.find("Nothing to see here.").is_none()); 29 | 30 | // Use `captures` to find all of the named capture groups of a match (`greeting` 31 | // and `target`, in this case). Capture groups are emitted at compile time and 32 | // evaluated infallibly. 33 | let groups = regex.captures("ah, Bonjour, reader!").unwrap(); 34 | assert_eq!(groups.greeting.content, "Bonjour"); 35 | assert_eq!(groups.target.unwrap().content, "reader"); 36 | 37 | let groups = regex.captures("This is goodbye!").unwrap(); 38 | assert_eq!(groups.greeting.content, "goodbye"); 39 | assert!(groups.target.is_none()); 40 | 41 | assert!(regex.captures("nothing to see here.").is_none()); 42 | ``` 43 | 44 | # Syntax Checking 45 | 46 | If the regular expression includes any syntax errors, this will appear as a 47 | compile error, rather than a runtime panic. 48 | ```compile_fail 49 | use ctreg::regex; 50 | 51 | regex! { HelloWorld = "(?Mismatched Parenthesis" }; 52 | 53 | let regex = HelloWorld::new(); 54 | ``` 55 | */ 56 | 57 | #[doc(hidden)] 58 | pub mod ඞ { 59 | pub use ::regex_automata; 60 | pub use ::regex_syntax; 61 | } 62 | 63 | #[doc(hidden)] 64 | pub use ctreg_macro::regex_impl; 65 | 66 | /** 67 | Create a type representing a regular expression. See the [module docs][crate] 68 | for an example. 69 | 70 | This macro creates a type, called `$Type`, representing the given `$regex`. The 71 | regular expression is analyzed at compile time, and the `$Type` is emitted 72 | containing its normalized representation, with a regex-like API for searching 73 | and matching capture groups. See the [`demo::HelloWorld`] type for an example 74 | of the methods it generates. 75 | 76 | Additionally, it creates a type called `${Type}Captures`, which contains a 77 | [`Capture`] field for each named capture group in the regular expression. See 78 | the [`demo::HelloWorldCaptures`] type for an example of this. The 79 | [`captures`][demo::HelloWorld::captures] method performs a capturing search, 80 | which returns this type. This search is evaluated infallibly: all groups that 81 | are unconditionally present in the regular expression are also present in the 82 | captures type. Any groups that are optional or part of an alternation appear as 83 | an `Option`. Named capture groups cannot be part of repetitions, since 84 | there isn't a sensible thing to capture. 85 | 86 | To keep the API and output types simple, anonymous capture groups are not 87 | present in the capture groups, and are treated identically to non-capturing 88 | groups. 89 | 90 | If the regex has no named capture groups, no `captures` method or `Captures` 91 | type is generated. 92 | 93 | Because it is not currently possible to create a regular expression in a 94 | `const` context, this macro operates by creating a type instead of an object; 95 | this type's constructor builds a regex at runtime using the post-parse 96 | [normalized form](https://docs.rs/regex-syntax/latest/regex_syntax/hir/struct.Hir.html) 97 | of the expression. In the spirit of 0-cost abstraction, we currently ask the 98 | caller to use their own `OnceLock` or whatever other abstraction is appropriate 99 | to manage the creation and lifespan of this object. This may change in the 100 | future. 101 | */ 102 | #[macro_export] 103 | macro_rules! regex { 104 | ($Type:ident = $regex:literal) => { 105 | $crate::regex_impl! { $Type = $regex } 106 | }; 107 | 108 | (pub $Type:ident = $regex:literal) => { 109 | $crate::regex_impl! { pub $Type = $regex } 110 | }; 111 | } 112 | 113 | /** 114 | Represents a single match of a regex in a haystack. It contains `start` and 115 | `end`, which are byte offsets of the location of the match, as well as the 116 | actual `content` of the match. 117 | 118 | This type is used by [`find`](demo::HelloWorld::find) to indicate the overall 119 | location of the match, and by [`captures`](demo::HelloWorld::captures), which 120 | returns a separate [`Capture`] for each named capture group that matched. 121 | 122 | This type is equivalent to the [`Match` 123 | ](https://docs.rs/regex/latest/regex/struct.Match.html) type from the `regex` 124 | crate. 125 | */ 126 | #[derive(Debug, Clone, Copy)] 127 | pub struct Capture<'a> { 128 | pub start: usize, 129 | pub end: usize, 130 | pub content: &'a str, 131 | } 132 | 133 | /** 134 | Demo module, showing the types created by the [`regex`] macro. 135 | 136 | This module contains the output of: 137 | 138 | ``` 139 | # use ctreg::regex; 140 | regex! { pub HelloWorld = "(?[a-zA-Z0-9-_.]+)(, (?[a-zA-Z0-9-_.]+))?!" } 141 | ``` 142 | 143 | It is intended to provide a comprehensive demonstration of the types and 144 | methods generated by [`regex`]. 145 | */ 146 | #[cfg(feature = "demo")] 147 | pub mod demo { 148 | use super::Capture; 149 | 150 | /** 151 | Example regular expression object. 152 | 153 | This type is the output of the [`regex`] macro, compiling the regular expression: 154 | 155 | ```text 156 | (?[a-zA-Z0-9-_.]+)(, (?[a-zA-Z0-9-_.]+))?! 157 | ``` 158 | 159 | It matches strings like `"Hello, World!`" and `"Goodbye!"`. It is provided 160 | here as an example of the type and methods created by [`regex`]. 161 | 162 | See also the [`HelloWorldCaptures`] type, which is the generated type for 163 | getting capture groups. 164 | */ 165 | #[derive(Debug, Clone)] 166 | pub struct HelloWorld { 167 | regex: ::regex_automata::meta::Regex, 168 | } 169 | 170 | impl HelloWorld { 171 | /** 172 | Construct a new instance of this regular expression object 173 | */ 174 | #[inline] 175 | #[must_use] 176 | pub fn new() -> Self { 177 | let hir: ::regex_syntax::hir::Hir = ::regex_syntax::hir::Hir::concat(Vec::from([ 178 | ::regex_syntax::hir::Hir::capture(::regex_syntax::hir::Capture { 179 | index: 1u32, 180 | name: Some(From::from("greeting")), 181 | sub: Box::new(::regex_syntax::hir::Hir::repetition( 182 | ::regex_syntax::hir::Repetition { 183 | min: 1u32, 184 | max: None, 185 | greedy: true, 186 | sub: Box::new(::regex_syntax::hir::Hir::class( 187 | ::regex_syntax::hir::Class::Unicode( 188 | ::regex_syntax::hir::ClassUnicode::new([ 189 | ::regex_syntax::hir::ClassUnicodeRange::new('-', '.'), 190 | ::regex_syntax::hir::ClassUnicodeRange::new('0', '9'), 191 | ::regex_syntax::hir::ClassUnicodeRange::new('A', 'Z'), 192 | ::regex_syntax::hir::ClassUnicodeRange::new('_', '_'), 193 | ::regex_syntax::hir::ClassUnicodeRange::new('a', 'z'), 194 | ]), 195 | ), 196 | )), 197 | }, 198 | )), 199 | }), 200 | ::regex_syntax::hir::Hir::repetition(::regex_syntax::hir::Repetition { 201 | min: 0u32, 202 | max: Some(1u32), 203 | greedy: true, 204 | sub: Box::new(::regex_syntax::hir::Hir::concat(Vec::from([ 205 | ::regex_syntax::hir::Hir::literal(*b", "), 206 | ::regex_syntax::hir::Hir::capture(::regex_syntax::hir::Capture { 207 | index: 2u32, 208 | name: Some(From::from("target")), 209 | sub: Box::new(::regex_syntax::hir::Hir::repetition( 210 | ::regex_syntax::hir::Repetition { 211 | min: 1u32, 212 | max: None, 213 | greedy: true, 214 | sub: Box::new(::regex_syntax::hir::Hir::class( 215 | ::regex_syntax::hir::Class::Unicode( 216 | ::regex_syntax::hir::ClassUnicode::new([ 217 | ::regex_syntax::hir::ClassUnicodeRange::new( 218 | '-', '.', 219 | ), 220 | ::regex_syntax::hir::ClassUnicodeRange::new( 221 | '0', '9', 222 | ), 223 | ::regex_syntax::hir::ClassUnicodeRange::new( 224 | 'A', 'Z', 225 | ), 226 | ::regex_syntax::hir::ClassUnicodeRange::new( 227 | '_', '_', 228 | ), 229 | ::regex_syntax::hir::ClassUnicodeRange::new( 230 | 'a', 'z', 231 | ), 232 | ]), 233 | ), 234 | )), 235 | }, 236 | )), 237 | }), 238 | ]))), 239 | }), 240 | ::regex_syntax::hir::Hir::literal(*b"!"), 241 | ])); 242 | let regex = ::regex_automata::meta::Regex::builder() 243 | .build_from_hir(&hir) 244 | .expect("regex compilation failed, despite compile-time verification"); 245 | Self { regex } 246 | } 247 | 248 | /** 249 | Test if this regular expression matches the `haystack` string, without 250 | getting any information about the location of the match. 251 | 252 | Prefer this method if you only care *that* there was a match, as it 253 | might be faster than [`find`][HelloWorld::find] or 254 | [`captures`][HelloWorld::captures]. 255 | */ 256 | #[inline] 257 | #[must_use] 258 | pub fn is_match(&self, haystack: &str) -> bool { 259 | self.regex.is_match(haystack) 260 | } 261 | 262 | /** 263 | Find the first match of this regex in the `haystack`, and return it as a 264 | [`Capture`]. 265 | 266 | Prefer this method if you only care about the overall location of a match 267 | in the haystack, without regard for the specific capture groups. 268 | */ 269 | #[inline] 270 | #[must_use] 271 | pub fn find<'i>(&self, haystack: &'i str) -> Option> { 272 | let capture = self.regex.find(haystack)?; 273 | let span = capture.span(); 274 | let start = span.start; 275 | let end = span.end; 276 | let content = unsafe { haystack.get_unchecked(start..end) }; 277 | Some(Capture { 278 | start, 279 | end, 280 | content, 281 | }) 282 | } 283 | } 284 | 285 | impl Default for HelloWorld { 286 | fn default() -> Self { 287 | Self::new() 288 | } 289 | } 290 | 291 | impl HelloWorld { 292 | /** 293 | Search for the first match of this regex in the `haystack`, and return 294 | an object containing all of the named capture groups that were found. 295 | */ 296 | #[inline] 297 | #[must_use] 298 | pub fn captures<'i>(&self, haystack: &'i str) -> Option> { 299 | let mut slots = [None; (2usize + 1) * 2]; 300 | let _ = self 301 | .regex 302 | .search_slots(&::regex_automata::Input::new(haystack), &mut slots)?; 303 | 304 | Some(HelloWorldCaptures { 305 | #[allow(clippy::blocks_in_conditions)] 306 | greeting: match { 307 | let slot_start = slots[2usize]; 308 | let slot_end = slots[3usize]; 309 | match slot_start { 310 | None => None, 311 | Some(start) => { 312 | let start = start.get(); 313 | let end = unsafe { slot_end.unwrap_unchecked() }.get(); 314 | let content = unsafe { haystack.get_unchecked(start..end) }; 315 | Some(Capture { 316 | start, 317 | end, 318 | content, 319 | }) 320 | } 321 | } 322 | } { 323 | Some(capture) => capture, 324 | None => unsafe { core::hint::unreachable_unchecked() }, 325 | }, 326 | target: { 327 | let slot_start = slots[4usize]; 328 | let slot_end = slots[5usize]; 329 | match slot_start { 330 | None => None, 331 | Some(start) => { 332 | let start = start.get(); 333 | let end = unsafe { slot_end.unwrap_unchecked() }.get(); 334 | let content = unsafe { haystack.get_unchecked(start..end) }; 335 | Some(Capture { 336 | start, 337 | end, 338 | content, 339 | }) 340 | } 341 | } 342 | }, 343 | }) 344 | } 345 | } 346 | 347 | /** 348 | Example captures object. 349 | 350 | This type is the output of the [`regex`] macro for the capture groups 351 | returned by the [`HelloWorld`] expression. 352 | */ 353 | #[derive(Debug, Clone, Copy)] 354 | pub struct HelloWorldCaptures<'a> { 355 | /** 356 | The greeting is an non-optional [`Capture`], because there will always 357 | be a greeting when the expression matches. 358 | */ 359 | pub greeting: Capture<'a>, 360 | 361 | /** 362 | The target is an optional [`Capture`], because the group is inside an 363 | `()?` optional group, so it may not be present even if the expression 364 | matched. Optional groups are also created by alternations. 365 | */ 366 | pub target: Option>, 367 | } 368 | } 369 | -------------------------------------------------------------------------------- /ctreg/tests/tests.rs: -------------------------------------------------------------------------------- 1 | use cool_asserts::assert_matches; 2 | use ctreg::{regex, Capture}; 3 | 4 | regex! { 5 | HelloWorld = r"(?[a-zA-Z0-9-_.]+)(, (?[a-zA-Z0-9-_.]+))?!" 6 | } 7 | 8 | #[test] 9 | fn test_is_match() { 10 | let regex = HelloWorld::new(); 11 | 12 | assert!(regex.is_match("Hello, World!")); 13 | assert!(!regex.is_match("nothing to see here")); 14 | assert!(regex.is_match("Panic! At the Repo")); 15 | } 16 | 17 | #[test] 18 | fn test_find() { 19 | let regex = HelloWorld::new(); 20 | 21 | let found = regex.find("abc Hello, World! Def").unwrap(); 22 | assert_matches!( 23 | found, 24 | Capture { 25 | start: 4, 26 | end: 17, 27 | content: "Hello, World!" 28 | } 29 | ); 30 | } 31 | 32 | #[test] 33 | fn test_capture_miss() { 34 | let regex = HelloWorld::new(); 35 | 36 | assert!(regex.captures("Nothing to see here.").is_none()); 37 | } 38 | 39 | #[test] 40 | fn test_captures() { 41 | let regex = HelloWorld::new(); 42 | 43 | let caps = regex.captures("Hello, World!").unwrap(); 44 | 45 | assert_matches!( 46 | caps, 47 | HelloWorldCaptures { 48 | greeting: Capture { 49 | start: 0, 50 | end: 5, 51 | content: "Hello" 52 | }, 53 | target: Some(Capture { 54 | start: 7, 55 | end: 12, 56 | content: "World", 57 | }) 58 | } 59 | ) 60 | } 61 | 62 | #[test] 63 | fn test_opt_capture() { 64 | let regex = HelloWorld::new(); 65 | 66 | let caps = regex.captures("12344321!").unwrap(); 67 | 68 | assert_matches!( 69 | caps, 70 | HelloWorldCaptures { 71 | greeting: Capture { 72 | start: 0, 73 | end: 8, 74 | content: "12344321" 75 | }, 76 | target: None, 77 | } 78 | ) 79 | } 80 | --------------------------------------------------------------------------------