├── .gitignore ├── .travis.sh ├── .travis.yml ├── CHANGES.md ├── COPYING ├── COPYING.LESSER ├── COPYING.LINKING ├── README.md ├── common ├── regexp.ml └── regexp.mli ├── dune-project ├── dune-workspace.dev ├── ppx_regexp.opam ├── ppx_regexp ├── dune └── ppx_regexp.ml ├── ppx_tyre.opam ├── ppx_tyre ├── dune ├── ppx_tyre.ml └── ppx_tyre.mli └── tests ├── dune ├── main.ml ├── test_ppx_regexp.ml ├── test_ppx_regexp_unused.ml ├── test_ppx_tyre.ml └── test_regexp.ml /.gitignore: -------------------------------------------------------------------------------- 1 | .merlin 2 | /_build 3 | /ppx_regexp.install 4 | /ppx_tyre.install 5 | -------------------------------------------------------------------------------- /.travis.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | set -ex 3 | cd `dirname $0` 4 | sudo apt -y install m4 5 | opam pin add -yn ${PKG_NAME} . 6 | opam depext -y ${PKG_NAME} 7 | opam install -yt ${PKG_NAME} 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | sudo: required 3 | 4 | services: 5 | - docker 6 | 7 | env: 8 | matrix: 9 | - IMAGE_NAME=ocaml/opam2:4.02 PKG_NAME=ppx_regexp 10 | - IMAGE_NAME=ocaml/opam2:4.02 PKG_NAME=ppx_tyre 11 | - IMAGE_NAME=ocaml/opam2:4.04 PKG_NAME=ppx_regexp 12 | - IMAGE_NAME=ocaml/opam2:4.04 PKG_NAME=ppx_tyre 13 | - IMAGE_NAME=ocaml/opam2:4.07 PKG_NAME=ppx_regexp 14 | - IMAGE_NAME=ocaml/opam2:4.07 PKG_NAME=ppx_tyre 15 | - IMAGE_NAME=ocaml/opam2:4.08 PKG_NAME=ppx_regexp 16 | - IMAGE_NAME=ocaml/opam2:4.08 PKG_NAME=ppx_tyre 17 | - IMAGE_NAME=ocaml/opam2:4.09 PKG_NAME=ppx_regexp 18 | - IMAGE_NAME=ocaml/opam2:4.09 PKG_NAME=ppx_tyre 19 | 20 | before_install: 21 | - docker pull $IMAGE_NAME 22 | 23 | script: 24 | - docker run --privileged -v `pwd`:/mnt:ro --env PKG_NAME=$PKG_NAME 25 | $IMAGE_NAME /mnt/.travis.sh 26 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ## v0.5.1 - 2022-06-09 2 | 3 | - Fix invalid AST due to empty binding list in `ppx_regexp`. 4 | 5 | ## v0.5.0 - 2022-06-06 6 | 7 | - Migrate `ppx_regexp` to ppxlib. 8 | - Change license exception for `ppx_regexp` to the LGPL-3.0 Linking 9 | Exception. 10 | 11 | ## v0.4.3 - 2019-11-25 12 | 13 | - Fixed nested `[%pcre]` usage for `ppx_regexp`. 14 | - Extended compiler support to 4.02.3 up to 4.09.0 (at least) for both PPXes. 15 | - Upgrade to AST 4.09 to support newer compiler features. 16 | 17 | ## v0.4.2 - 2019-03-24 18 | 19 | - Fix top level group elimination for `ppx_regexp` (#8). 20 | 21 | ## v0.4.1 - 2018-09-04 22 | 23 | - Fix multi-group top level regexp for `ppx_tyre`. 24 | 25 | ## v0.4.0 - 2018-08-20 26 | 27 | - Switched to internal regexp parser. 28 | - Added syntax extension for `tyre` (Gabriel Radanne). 29 | - Fixed type of captures under alternatives for `%pcre`. 30 | - Better error reporting, including locations. 31 | - The PPX now declares its runtime libraries. 32 | 33 | ## v0.3.2 - 2018-03-01 34 | 35 | - Prepare for re 1.7.2. 36 | 37 | ## v0.3.1 - 2017-08-21 38 | 39 | - Fix accidental shadowing of open from another interface-less module using 40 | `ppx_regexp`. 41 | - Support binding of group 0 and the universal pattern. 42 | - Switch to `ppx_tools_versioned`. This provides support for 4.02.3 in the 43 | main branch. 44 | 45 | ## v0.3.0 - 2017-06-04 46 | 47 | - Initial release for OCaml 4.03.0 and 4.04.1. 48 | 49 | ## v0.2.0 - 2017-06-04 50 | 51 | - Initial release for OCaml 4.02.3. 52 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /COPYING.LESSER: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /COPYING.LINKING: -------------------------------------------------------------------------------- 1 | LGPL-3.0 LINKING EXCEPTION 2 | 3 | As a special exception to the GNU Lesser General Public License 4 | version 3 ("LGPL3"), the copyright holders of this Library give you 5 | permission to convey to a third party a Combined Work that links 6 | statically or dynamically to this Library without providing any 7 | Minimal Corresponding Source or Minimal Application Code as set out in 8 | 4d or providing the installation information set out in section 4e, 9 | provided that you comply with the other provisions of LGPL3 and 10 | provided that you meet, for the Application the terms and conditions 11 | of the license(s) which apply to the Application. 12 | 13 | Except as stated in this special exception, the provisions of LGPL3 14 | will continue to comply in full to this Library. If you modify this 15 | Library, you may apply this exception to your version of this Library, 16 | but you are not obliged to do so. If you do not wish to do so, delete 17 | this exception statement from your version. This exception does not 18 | (and cannot) modify any license terms which apply to the Application, 19 | with which you must still comply. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status][ci-build-status]][ci] 2 | 3 | # Two PPXes for Working with Regular Expressions 4 | 5 | This repo provides two PPXes providing regular expression-based routing: 6 | 7 | - `ppx_regexp` maps to [re][] with the conventional last-match extraction 8 | into `string` and `string option`. 9 | - `ppx_tyre` maps to [Tyre][tyre] providing typed extraction into options, 10 | lists, tuples, objects, and polymorphic variants. 11 | 12 | Another difference is that `ppx_regexp` works directly on strings 13 | essentially hiding the library calls, while `ppx_tyre` provides `Tyre.t` and 14 | `Tyre.route` which can be composed an applied using the Tyre library. 15 | 16 | ## `ppx_regexp` - Regular Expression Matching with OCaml Patterns 17 | 18 | This syntax extension turns 19 | ```ocaml 20 | function%pcre 21 | | {|re1|} -> e1 22 | ... 23 | | {|reN|} -> eN 24 | | _ -> e0 25 | ``` 26 | into suitable invocations of the [Re library][re], and similar for 27 | `match%pcre`. The patterns are plain strings of the form accepted by 28 | `Re_pcre`, with the following additions: 29 | 30 | - `(?...)` defines a group and binds whatever it matches as `var`. 31 | The type of `var` will be `string` if the match is guaranteed given that 32 | the whole pattern matches, and `string option` if the variable is bound 33 | to or nested below an optionally matched group. 34 | 35 | - `?` at the start of a pattern binds group 0 as `var : string`. 36 | This may not be the full string if the pattern is unanchored. 37 | 38 | A variable is allowed for the universal case and is bound to the matched 39 | string. A regular alias is currently not allowed for patterns, since it is 40 | not obvious whether is should bind the full string or group 0. 41 | 42 | ### Example 43 | 44 | The following prints out times and hosts for SMTP connections to the Postfix 45 | daemon: 46 | ```ocaml 47 | (* Link with re, re.pcre, lwt, lwt.unix. 48 | Preprocess with ppx_regexp. 49 | Adjust to your OS. *) 50 | 51 | open Lwt.Infix 52 | 53 | let check_line = 54 | (function%pcre 55 | | {|(?.*:\d\d) .* postfix/smtpd\[[0-9]+\]: connect from (?[a-z0-9.-]+)|} -> 56 | Lwt_io.printlf "%s %s" t host 57 | | _ -> 58 | Lwt.return_unit) 59 | 60 | let () = Lwt_main.run begin 61 | Lwt_io.printl "SMTP connections from:" >>= fun () -> 62 | Lwt_stream.iter_s check_line (Lwt_io.lines_of_file "/var/log/syslog") 63 | end 64 | ``` 65 | 66 | ## `ppx_tyre` - Syntax Support for Tyre Routes 67 | 68 | ### Typed regular expressions 69 | 70 | This PPX compiles 71 | ```ocaml 72 | [%tyre {|re|}] 73 | ``` 74 | into `'a Tyre.t`. 75 | 76 | For instance, We can define a pattern that recognize strings of the form "dim:3x5" like so: 77 | 78 | ```ocaml 79 | # open Tyre ;; 80 | # let dim = [%tyre "dim:(?&int)x(?&int)"] ;; 81 | val dim : (int * int) Tyre.t 82 | ``` 83 | 84 | The syntax `(?&id)` allows to call a typed regular expression named `id` of type `'a Tyre.t`, such as `Tyre.int`. 85 | 86 | For convenience, you can also use *named* capture groups to name the captured elements. 87 | ```ocaml 88 | # let dim = [%tyre "dim:(?(?&int))x(?&y:int)"] ;; 89 | val dim : < x : int; y : int > Tyre.t 90 | ``` 91 | 92 | Names given using the syntax `(?re)` will be used for the fields 93 | of the results. `(?&y:int)` is a shortcut for `(?(?&int))`. 94 | This can also be used for alternatives, for instance: 95 | 96 | ```ocaml 97 | # let id_or_name = [%tyre "id:(?&id:int)|name:(?[[:alnum:]]+)"] ;; 98 | val id_or_name : [ `id of int | `name of string ] Tyre.t 99 | ``` 100 | 101 | Expressions of type `Tyre.t` can then be composed as part of bigger regular 102 | expressions, or compiled with `Tyre.compile`. 103 | See [tyre][]'s documentation for details. 104 | 105 | ### Routes 106 | 107 | `ppx_tyre` can also be used for routing, in the style of `ppx_regexp`: 108 | 109 | ```ocaml 110 | function%tyre 111 | | {|re1|} -> e1 112 | ... 113 | | {|reN|} -> eN 114 | ``` 115 | 116 | is turned into a `'a Type.route`, where `re`, `re1`, ... are regular expressions 117 | using the same syntax as above. `"re" as v` is considered like `(?re)` and 118 | `"re1" | "re2"` is turned into a regular expression alternative. 119 | 120 | Once routes are defined, matching is done with `Tyre.exec`. 121 | 122 | ### Details 123 | 124 | The syntax follow Perl's syntax: 125 | 126 | - `re?` extracts an option of what `re` extracts. 127 | - `re+`, `re*`, `re{n,m}` extracts a list of what `re` extracts. 128 | - `(?&qname)` refers to any identifier bound to a typed regular expression 129 | of type `'a Tyre.t`. 130 | - Normal parens are *non-capturing*. 131 | - There are two ways to capture: 132 | - Anonymous capture `(+re)` 133 | - Named capture `(?re)` 134 | - One or more `(?re)` at the top level can be used to bind variables 135 | instead of `as ...`. 136 | - One or more `(?re)` in a sequence extracts an object where each method 137 | `v` is bound to what `re` extracts. 138 | - An alternative with one `(?re)` per branch extracts a polymorphic 139 | variant where each constructor `` `v`` receives what `re` extracts as its 140 | argument. 141 | - `(?&v:qname)` is a shortcut for `(?(?&qname))`. 142 | 143 | ## Limitations 144 | 145 | ### No Pattern Guards 146 | 147 | Pattern guards are not supported. This is due to the fact that all match 148 | cases are combined into a single regular expression, so if one of the 149 | patterns succeed, the match is committed before we can check the guard 150 | condition. 151 | 152 | ### No Exhaustiveness Check 153 | 154 | The syntax extension will always warn if no catch-all case is provided. No 155 | exhaustiveness check is attempted. Doing it right would require 156 | reimplementing full regular expression parsing and an algorithm which would 157 | ideally produce a counter-example. 158 | 159 | ## Bug Reports 160 | 161 | The processor is currently new and not well tested. Please break it and 162 | file bug reports in the GitHub issue tracker. Any exception raised by 163 | generated code except for `Match_failure` is a bug. 164 | 165 | 166 | [ci]: https://travis-ci.org/paurkedal/ppx_regexp 167 | [ci-build-status]: https://travis-ci.org/paurkedal/ppx_regexp.svg?branch=master 168 | [re]: https://github.com/ocaml/ocaml-re 169 | [tyre]: https://github.com/Drup/tyre 170 | -------------------------------------------------------------------------------- /common/regexp.ml: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2018--2022 Petter A. Urkedal 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the OCaml static compilation exception or (at 7 | * your option) the LGPL-3.0 Linking Exception. 8 | * 9 | * This library is distributed in the hope that it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 12 | * License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public License 15 | * along with this library. If not, see . 16 | *) 17 | 18 | let mkloc = Location.mkloc 19 | 20 | let (%) f g x = f (g x) 21 | 22 | type 'a t = 'a node Location.loc 23 | and 'a node = 24 | | Code of 'a 25 | | Seq of 'a t list 26 | | Alt of 'a t list 27 | | Opt of 'a t 28 | | Repeat of (int * int option) Location.loc * 'a t 29 | | Nongreedy of 'a t 30 | | Capture of 'a t 31 | | Capture_as of string Location.loc * 'a t 32 | | Call of Longident.t Location.loc 33 | (* TODO: | Case_sense of t | Case_blind of t *) 34 | 35 | let nonepsilon = function {Location.txt = Seq []; _} -> false | _ -> true 36 | 37 | let simplify_seq ~loc es = 38 | (match List.filter nonepsilon es with 39 | | [e] -> e 40 | | es -> mkloc (Seq es) loc) 41 | 42 | let simplify_alt es = 43 | (match es with 44 | | [e] -> e.Location.txt 45 | | es -> Alt es) 46 | 47 | module Int_map = struct 48 | module M = Map.Make (struct type t = int let compare = compare end) 49 | 50 | [@@@ocaml.warning "-32"] 51 | let find_last f m = List.find (fun (k, _) -> f k) (List.rev (M.bindings m)) 52 | 53 | include M 54 | end 55 | 56 | let parse_exn ?(pos = Lexing.dummy_pos) s = 57 | let l = String.length s in 58 | let get i = if i = l then ')' else s.[i] in 59 | 60 | (* Location Tracking *) 61 | let position_of_index = 62 | if pos = Lexing.dummy_pos then (fun _ -> Lexing.dummy_pos) else 63 | let newlines = 64 | let rec loop acc lnum i = 65 | if i = l then acc else 66 | if s.[i] <> '\n' then loop acc lnum (i + 1) else 67 | loop (Int_map.add (i + 1) (lnum + 1) acc) (lnum + 1) (i + 1) 68 | in 69 | loop (Int_map.singleton 0 pos.pos_lnum) pos.pos_lnum 0 70 | in 71 | fun i -> 72 | let j, pos_lnum = Int_map.find_last (fun j -> j <= i) newlines in 73 | { pos with 74 | pos_lnum; 75 | pos_bol = pos.pos_bol + j; 76 | pos_cnum = pos.pos_cnum + i; } 77 | in 78 | let make_loc (i, j) = 79 | let open Location in 80 | if pos = Lexing.dummy_pos then Location.none else 81 | { loc_start = position_of_index i; 82 | loc_end = position_of_index j; 83 | loc_ghost = false } 84 | in 85 | let wrap_loc (i, j) x = Location.{txt = x; loc = make_loc (i, j)} in 86 | let with_loc f i = let j, e = f i in j, wrap_loc (i, j) e in 87 | let suffix_loc j f (e : _ Location.loc) = 88 | let e' = f e in 89 | if pos = Lexing.dummy_pos then Location.mknoloc e' else 90 | let loc = Location.{ 91 | loc_start = e.loc.loc_start; 92 | loc_end = position_of_index j; 93 | loc_ghost = false; 94 | } in 95 | mkloc e' loc 96 | in 97 | 98 | let fail (i, j) msg = Location.raise_errorf ~loc:(make_loc (i, j)) "%s" msg in 99 | 100 | (* Identifiers *) 101 | let scan_ident i = 102 | let rec scan_cont j = 103 | (match get j with 104 | | 'A'..'Z' | 'a'..'z' | '0'..'9' | '_' | '\'' -> scan_cont (j + 1) 105 | | _ -> (j, String.sub s i (j - i))) 106 | in 107 | (match get i with 108 | | 'A'..'Z' | 'a'..'z' | '_' -> scan_cont (i + 1) 109 | | _ -> fail (i, i) "Expecting an identifier.") 110 | in 111 | let rec scan_longident_cont lidr i = 112 | if get i <> '.' then (i, lidr) else 113 | let j, idr = scan_ident (i + 1) in 114 | scan_longident_cont (Longident.Ldot (lidr, idr)) j 115 | in 116 | let scan_longident i = 117 | let j, idr = scan_ident i in 118 | scan_longident_cont (Longident.Lident idr) j 119 | in 120 | let scan_ident = with_loc scan_ident in 121 | let scan_longident = with_loc scan_longident in 122 | let scan_longident_cont idr = 123 | with_loc (scan_longident_cont (Longident.Lident idr)) in 124 | 125 | (* Non-Nested Parts *) 126 | let re_perl (i, j) = 127 | let sij = String.sub s i (j - i) in 128 | try ignore (Re.Perl.re sij); wrap_loc (i, j) (Code sij) 129 | with Re.Perl.Parse_error | Re.Perl.Not_supported -> 130 | fail (i, j) "Rejected by Re.Perl." 131 | in 132 | let scan_escape i = 133 | if i + 1 = l then fail (i, i+1) "Escape at end of regular expression." else 134 | (match s.[i + 1] with 135 | | 'a'..'z' | 'A'..'Z' -> (i + 2, re_perl (i, i + 2)) 136 | | _ -> (i + 2, re_perl (i, i + 2))) 137 | in 138 | let rec scan_cset i j = 139 | if j = l then fail (i, i + 1) "Unbalanced '['." else 140 | (match s.[j] with 141 | | '\\' -> 142 | if j + 1 = l then 143 | fail (j, j + 1) "Backslash at end of RE while scanning character set." 144 | else 145 | scan_cset i (j + 2) 146 | | '[' when get (j + 1) = ':' -> 147 | (match String.index_from s (j + 1) ']' with 148 | | exception Not_found -> 149 | fail (j + 1, j + 2) "Unbalanced '[' in character set." 150 | | k -> scan_cset i (k + 1)) 151 | | ']' when j <> i + 1 && (j <> i + 2 || s.[i + 1] <> '^') -> 152 | (j + 1, re_perl (i, j + 1)) 153 | | _ -> scan_cset i (j + 1)) 154 | in 155 | 156 | (* Repeat and Opt *) 157 | let scan_int_opt i = 158 | let rec loop i n = 159 | if i = l then (i, n) else 160 | (match s.[i] with 161 | | '0'..'9' as ch -> loop (i + 1) (10 * n + (Char.code ch - 48)) 162 | | _ -> (i, n)) 163 | in 164 | let j, n = loop i 0 in 165 | (j, (if i = j then None else Some n)) 166 | in 167 | let scan_range i = 168 | let j, n_min = scan_int_opt i in 169 | let n_min = 170 | (match n_min with 171 | | None -> fail (i, i) "Missing lower bound for range." 172 | | Some n -> n) in 173 | (match get j with 174 | | ',' -> 175 | let j, n_max = scan_int_opt (j + 1) in 176 | (match n_max with 177 | | Some n_max when n_max < n_min -> fail (i, j) "Reversed repeat range." 178 | | _ -> ()); 179 | (j, n_min, n_max) 180 | | _ -> 181 | (j, n_min, (Some n_min))) 182 | in 183 | let apply_to_head (i, j) f = function 184 | | [] -> fail (i, j) "Operator must follow an operand." 185 | | e :: es -> f e :: es 186 | in 187 | let scan_greedyness i = 188 | let j, greedyness = 189 | (match get i with 190 | | '?' -> (i + 1, suffix_loc (i + 1) (fun e -> Nongreedy e)) 191 | | '+' -> fail (i, i + 1) "Possessive modifier not supported." 192 | | _ -> (i, (fun e -> e))) in 193 | (match get j with 194 | | '?' | '*' | '+' | '{' -> 195 | fail (j, j + 1) "Nested repetition must be parenthesized." 196 | | _ -> (j, greedyness)) 197 | in 198 | let repeat (i, j) (n_min, n_max) = 199 | suffix_loc j (fun e -> Repeat (wrap_loc (i, j) (n_min, n_max), e)) 200 | in 201 | 202 | (* Sequences and Groups *) 203 | let 204 | rec scan_alt i = 205 | let j, e = scan_alt_item i [] in 206 | (j, simplify_alt e) 207 | and scan_alt_item i acc = 208 | let j, e = scan_seq i in 209 | (match get j with 210 | | ')' -> (j, List.rev (e :: acc)) 211 | | '|' -> scan_alt_item (j + 1) (e :: acc) 212 | | _ -> assert false) 213 | 214 | and scan_seq i = 215 | let j, e = scan_seq_item i [] in 216 | (j, simplify_seq ~loc:(make_loc (i, j)) e) 217 | and scan_seq_item i acc = 218 | (match get i with 219 | | ')' | '|' -> (i, List.rev acc) 220 | | '[' -> 221 | let j, e = scan_cset i (i + 1) in 222 | scan_seq_item j (e :: acc) 223 | (* TODO: Reject repetition of ε and zero-width assertions. *) 224 | | '?' -> 225 | let j = i + 1 in 226 | let f = suffix_loc j (fun e -> Opt e) in 227 | let k, g = scan_greedyness j in 228 | scan_seq_item k (apply_to_head (i, k) (g % f) acc) 229 | | '*' -> 230 | let j = i + 1 in 231 | let f = repeat (i, j) (0, None) in 232 | let k, g = scan_greedyness j in 233 | scan_seq_item k (apply_to_head (i, k) (g % f) acc) 234 | | '+' -> 235 | let j = i + 1 in 236 | let f = repeat (i, j) (1, None) in 237 | let k, g = scan_greedyness j in 238 | scan_seq_item k (apply_to_head (i, k) (g % f) acc) 239 | | '{' -> 240 | let j, n_min, n_max = scan_range (i + 1) in 241 | if j = l || s.[j] <> '}' then fail (i, i + 1) "Unbalanced '{'." else 242 | let f = repeat (i, j) (n_min, n_max) in 243 | let k, g = scan_greedyness (j + 1) in 244 | scan_seq_item k (apply_to_head (i, k) (g % f) acc) 245 | | '(' -> 246 | let j, e = scan_group (i + 1) in 247 | if j = l || s.[j] <> ')' then fail (i, i + 1) "Unbalanced '('." else 248 | scan_seq_item (j + 1) (wrap_loc (i, j + 1) e :: acc) 249 | | '^' -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc) 250 | | '$' -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc) 251 | | '\\' -> 252 | let j, e = scan_escape i in 253 | scan_seq_item j (e :: acc) 254 | | _ -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc)) 255 | 256 | and scan_group i = 257 | (match get i with 258 | | '?' -> 259 | if i + 1 = l then fail (i - 1, i) "Unbalanced '('." else 260 | (match s.[i + 1] with 261 | | '&' -> 262 | let j, idr = scan_ident (i + 2) in 263 | if get j = ':' then 264 | let k, lidr = scan_longident (j + 1) in 265 | (k, Capture_as (idr, wrap_loc (j + 1, k) (Call lidr))) 266 | else 267 | let k, lidr = scan_longident_cont idr.Location.txt j in 268 | (k, Call lidr) 269 | | '<' -> 270 | let j, idr = scan_ident (i + 2) in 271 | if get j <> '>' then fail (i, i + 1) "Unbalanced '<'." else 272 | let k, e = with_loc scan_alt (j + 1) in 273 | (k, Capture_as (idr, e)) 274 | | ':' -> 275 | scan_alt (i + 2) 276 | | '#' -> 277 | (try (String.index_from s (i + 2) ')', Seq []) with 278 | | Not_found -> fail (i - 1, i + 1) "Unterminated comment.") 279 | | _ -> 280 | fail (i, i + 2) "Invalid group modifier.") 281 | | '+' -> let j, e = with_loc scan_alt (i + 1) in (j, Capture e) 282 | | '*' | '{' -> fail (i, i + 1) "Invalid group modifier." 283 | | _ -> scan_alt i) 284 | in 285 | 286 | (* Top-Level *) 287 | let scan_toplevel i = 288 | if get i = '?' && get (i + 1) = '<' then scan_group i else scan_alt i 289 | in 290 | let j, e = with_loc scan_toplevel 0 in 291 | if j <> l then fail (j, j + 1) "Unbalanced ')'." else e 292 | -------------------------------------------------------------------------------- /common/regexp.mli: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2018 Petter A. Urkedal 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the OCaml static compilation exception or (at 7 | * your option) the LGPL-3.0 Linking Exception. 8 | * 9 | * This library is distributed in the hope that it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 12 | * License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public License 15 | * along with this library. If not, see . 16 | *) 17 | 18 | type 'a t = 'a node Location.loc 19 | and 'a node = 20 | | Code of 'a 21 | | Seq of 'a t list 22 | | Alt of 'a t list 23 | | Opt of 'a t 24 | | Repeat of (int * int option) Location.loc * 'a t 25 | | Nongreedy of 'a t 26 | | Capture of 'a t 27 | | Capture_as of string Location.loc * 'a t 28 | | Call of Longident.t Location.loc 29 | (* TODO: | Case_sense of t | Case_blind of t *) 30 | 31 | val parse_exn : ?pos: Lexing.position -> string -> string t 32 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.11) 2 | (name ppx_regexp) 3 | (allow_approximate_merlin) 4 | -------------------------------------------------------------------------------- /dune-workspace.dev: -------------------------------------------------------------------------------- 1 | (lang dune 1.11) 2 | (context (opam (switch 4.04.2))) 3 | (context (opam (switch 4.08.1))) 4 | (context (opam (switch 4.09.1))) 5 | (context (opam (switch 4.11.2))) 6 | (context (opam (switch 4.13.1))) 7 | (context (opam (switch 4.14.1))) 8 | (context (opam (switch 5.0.0))) 9 | -------------------------------------------------------------------------------- /ppx_regexp.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Petter A. Urkedal " 3 | authors: [ 4 | "Petter A. Urkedal " 5 | "Gabriel Radanne " 6 | ] 7 | license: "LGPL-3.0-or-later WITH LGPL-3.0-linking-exception" 8 | homepage: "https://github.com/paurkedal/ppx_regexp" 9 | bug-reports: "https://github.com/paurkedal/ppx_regexp/issues" 10 | depends: [ 11 | "ocaml" {>= "4.02.3"} 12 | "dune" {>= "1.11"} 13 | "ppxlib" {>= "0.9.0"} 14 | "re" {>= "1.7.2"} 15 | "qcheck" {with-test} 16 | ] 17 | build: ["dune" "build" "-p" name "-j" jobs] 18 | dev-repo: "git+https://github.com/paurkedal/ppx_regexp.git" 19 | synopsis: "Matching Regular Expressions with OCaml Patterns" 20 | description: """ 21 | This syntax extension turns 22 | 23 | match%pcre x with 24 | | {|re1|} -> e1 25 | ... 26 | | {|reN|} -> eN 27 | | _ -> e0 28 | 29 | into suitable invocations to the ocaml-re library. The patterns are plain 30 | strings of the form accepted by `Re_pcre`, except groups can be bound to 31 | variables using the syntax `(?...)`. The type of `var` will be 32 | `string` if a match is of the groups is guaranteed given a match of the 33 | whole pattern, and `string option` if the variable is bound to or nested 34 | below an optionally matched group. 35 | """ 36 | -------------------------------------------------------------------------------- /ppx_regexp/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name ppx_regexp) 3 | (public_name ppx_regexp) 4 | (kind ppx_rewriter) 5 | (modules ppx_regexp regexp) 6 | (preprocess (pps ppxlib.metaquot)) 7 | (libraries ppxlib re re.perl) 8 | (ppx_runtime_libraries re re.perl)) 9 | 10 | (rule (copy ../common/regexp.mli regexp.mli)) 11 | (rule (copy ../common/regexp.ml regexp.ml)) 12 | -------------------------------------------------------------------------------- /ppx_regexp/ppx_regexp.ml: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2017--2023 Petter A. Urkedal 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the LGPL-3.0 Linking Exception. 7 | * 8 | * This library is distributed in the hope that it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 11 | * License for more details. 12 | * 13 | * You should have received a copy of the GNU Lesser General Public License 14 | * along with this library. If not, see . 15 | *) 16 | 17 | open Ppxlib 18 | open Ast_builder.Default 19 | 20 | let error = Location.raise_errorf 21 | 22 | let warn ~loc msg e = 23 | let e_msg = estring ~loc msg in 24 | let name = {txt = "ocaml.ppwarning"; loc} in 25 | let payload = PStr [{pstr_desc = Pstr_eval (e_msg, []); pstr_loc = loc}] in 26 | {e with pexp_attributes = attribute ~loc ~name ~payload :: e.pexp_attributes} 27 | 28 | module List = struct 29 | include List 30 | 31 | let rec fold f = function 32 | | [] -> fun acc -> acc 33 | | x :: xs -> fun acc -> fold f xs (f x acc) 34 | end 35 | 36 | module Regexp = struct 37 | include Regexp 38 | 39 | let bindings = 40 | let rec recurse must_match (e' : _ Location.loc) = 41 | let loc = e'.Location.loc in 42 | (match e'.Location.txt with 43 | | Code _ -> fun acc -> acc 44 | | Seq es -> List.fold (recurse must_match) es 45 | | Alt es -> List.fold (recurse false) es 46 | | Opt e -> recurse false e 47 | | Repeat ({Location.txt = (i, _); _}, e) -> 48 | recurse (must_match && i > 0) e 49 | | Nongreedy e -> recurse must_match e 50 | | Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre." 51 | | Capture_as (idr, e) -> 52 | fun (nG, bs) -> 53 | recurse must_match e (nG + 1, (idr, Some nG, must_match) :: bs) 54 | | Call _ -> error ~loc "(&...) is not implemented for %%pcre.") 55 | in 56 | (function 57 | | {Location.txt = Capture_as (idr, e); _} -> 58 | recurse true e (0, [idr, None, true]) 59 | | e -> 60 | recurse true e (0, [])) 61 | 62 | let to_string = 63 | let p_alt, p_seq, p_suffix, p_atom = 0, 1, 2, 3 in 64 | let delimit_if b s = if b then "(?:" ^ s ^ ")" else s in 65 | let rec recurse p (e' : _ Location.loc) = 66 | let loc = e'.Location.loc in 67 | (match e'.Location.txt with 68 | | Code s -> 69 | (* Delimiters not needed as Regexp.parse_exn only returns single 70 | * chars, csets, and escape sequences. *) 71 | s 72 | | Seq es -> 73 | delimit_if (p > p_seq) 74 | (String.concat "" (List.map (recurse p_seq) es)) 75 | | Alt es -> 76 | delimit_if (p > p_alt) 77 | (String.concat "|" (List.map (recurse p_alt) es)) 78 | | Opt e -> 79 | delimit_if (p > p_suffix) (recurse p_atom e ^ "?") 80 | | Repeat ({Location.txt = (i, j_opt); _}, e) -> 81 | let j_str = match j_opt with None -> "" | Some j -> string_of_int j in 82 | delimit_if (p > p_suffix) 83 | (Printf.sprintf "%s{%d,%s}" (recurse p_atom e) i j_str) 84 | | Nongreedy e -> recurse p_suffix e ^ "?" 85 | | Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre." 86 | | Capture_as (_, e) -> "(" ^ recurse p_alt e ^ ")" 87 | | Call _ -> error ~loc "(&...) is not implemented for %%pcre.") 88 | in 89 | (function 90 | | {Location.txt = Capture_as (_, e); _} -> 91 | recurse 0 e 92 | | e -> 93 | recurse 0 e) 94 | end 95 | 96 | let fresh_var = 97 | let c = ref 0 in 98 | fun () -> incr c; Printf.sprintf "_ppx_regexp_%d" !c 99 | 100 | let rec is_zero p k = 101 | (match p.[k] with 102 | | '0' -> is_zero p (k + 1) 103 | | '1'..'9' -> false 104 | | _ -> true) 105 | 106 | let rec must_match p i = 107 | let l = String.length p in 108 | if i = l then true else 109 | if p.[i] = '?' || p.[i] = '*' then false else 110 | if p.[i] = '{' then 111 | let j = String.index_from p (i + 1) '}' in 112 | not (is_zero p (i + 1)) && must_match p (j + 1) 113 | else 114 | true 115 | 116 | let extract_bindings ~pos s = 117 | let r = Regexp.parse_exn ~pos s in 118 | let nG, bs = Regexp.bindings r in 119 | let re_str = Regexp.to_string r in 120 | let loc = Location.none in 121 | (estring ~loc re_str, bs, nG) 122 | 123 | let rec wrap_group_bindings ~loc rhs offG = function 124 | | [] -> rhs 125 | | (varG, iG, mustG) :: bs -> 126 | let eG = match iG with 127 | | None -> 128 | [%expr Re.Group.get _g 0] 129 | | Some iG -> 130 | [%expr Re.Group.get _g [%e eint ~loc (offG + iG + 1)]] 131 | in 132 | let eG = 133 | if mustG then eG else 134 | [%expr try Some [%e eG] with Not_found -> None] 135 | in 136 | [%expr 137 | let [%p ppat_var ~loc varG] = [%e eG] in 138 | [%e wrap_group_bindings ~loc rhs offG bs]] 139 | 140 | let transform_cases ~loc cases = 141 | let aux case = 142 | if case.pc_guard <> None then 143 | error ~loc "Guards are not implemented for match%%pcre." 144 | else 145 | Ast_pattern.(parse (pstring __')) loc case.pc_lhs 146 | begin fun {txt = re_src; loc = {loc_start; loc_end; _}} -> 147 | let re_offset = 148 | (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2 149 | in 150 | let pos = {loc_start with pos_cnum = loc_start.pos_cnum + re_offset} in 151 | let re, bs, nG = extract_bindings ~pos re_src in 152 | (re, nG, bs, case.pc_rhs) 153 | end 154 | in 155 | let cases, default_rhs = 156 | (match List.rev (*_map rewrite_case*) cases with 157 | | {pc_lhs = {ppat_desc = Ppat_any; _}; pc_rhs; pc_guard = None} :: cases -> 158 | (cases, pc_rhs) 159 | | {pc_lhs = {ppat_desc = Ppat_var var; _}; pc_rhs; pc_guard = None} :: 160 | cases -> 161 | let rhs = 162 | [%expr let [%p ppat_var ~loc var] = _ppx_regexp_v in [%e pc_rhs]] in 163 | (cases, rhs) 164 | | cases -> 165 | let open Lexing in 166 | let pos = loc.Location.loc_start in 167 | let e0 = estring ~loc pos.pos_fname in 168 | let e1 = eint ~loc pos.pos_lnum in 169 | let e2 = eint ~loc (pos.pos_cnum - pos.pos_bol) in 170 | let e = [%expr raise (Match_failure ([%e e0], [%e e1], [%e e2]))] in 171 | (cases, warn ~loc "A universal case is recommended for %pcre." e)) 172 | in 173 | let cases = List.rev_map aux cases in 174 | let res = pexp_array ~loc (List.map (fun (re, _, _, _) -> re) cases) in 175 | let comp = [%expr 176 | let a = Array.map (fun s -> Re.mark (Re.Perl.re s)) [%e res] in 177 | let marks = Array.map fst a in 178 | let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in 179 | (re, marks) 180 | ] in 181 | let var = fresh_var () in 182 | let re_binding = 183 | value_binding ~loc ~pat:(ppat_var ~loc {txt = var; loc}) ~expr:comp 184 | in 185 | let e_comp = pexp_ident ~loc {txt = Lident var; loc} in 186 | 187 | let rec handle_cases i offG = function 188 | | [] -> [%expr assert false] 189 | | (_, nG, bs, rhs) :: cases -> 190 | [%expr 191 | if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then 192 | [%e wrap_group_bindings ~loc rhs offG bs] 193 | else 194 | [%e handle_cases (i + 1) (offG + nG) cases]] 195 | in 196 | let cases = 197 | [%expr 198 | (match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with 199 | | None -> [%e default_rhs] 200 | | Some _g -> [%e handle_cases 0 0 cases])] 201 | in 202 | (cases, re_binding) 203 | 204 | let transformation = object 205 | inherit [value_binding list] Ast_traverse.fold_map as super 206 | 207 | method! expression e_ext acc = 208 | let e_ext, acc = super#expression e_ext acc in 209 | (match e_ext.pexp_desc with 210 | | Pexp_extension 211 | ({txt = "pcre"; _}, PStr [{pstr_desc = Pstr_eval (e, _); _}]) -> 212 | let loc = e.pexp_loc in 213 | (match e.pexp_desc with 214 | | Pexp_match (e, cases) -> 215 | let cases, binding = transform_cases ~loc cases in 216 | ([%expr let _ppx_regexp_v = [%e e] in [%e cases]], binding :: acc) 217 | | Pexp_function (cases) -> 218 | let cases, binding = transform_cases ~loc cases in 219 | ([%expr fun _ppx_regexp_v -> [%e cases]], binding :: acc) 220 | | _ -> 221 | error ~loc "[%%pcre] only applies to match an function.") 222 | | _ -> (e_ext, acc)) 223 | end 224 | 225 | let impl str = 226 | let str, rev_bindings = transformation#structure str [] in 227 | if rev_bindings = [] then str else 228 | let re_str = 229 | let loc = Location.none in 230 | [%str open (struct [%%i pstr_value ~loc Nonrecursive rev_bindings] end)] 231 | in 232 | re_str @ str 233 | 234 | let () = Driver.register_transformation ~impl "ppx_regexp" 235 | -------------------------------------------------------------------------------- /ppx_tyre.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Petter A. Urkedal " 3 | authors: [ 4 | "Gabriel Radanne " 5 | "Petter A. Urkedal " 6 | ] 7 | license: "LGPL-3 with OCaml linking exception" 8 | homepage: "https://github.com/paurkedal/ppx_regexp" 9 | bug-reports: "https://github.com/paurkedal/ppx_regexp/issues" 10 | depends: [ 11 | "ocaml" {>= "4.02.3"} 12 | "dune" {>= "1.11"} 13 | "ocaml-migrate-parsetree" {>= "1.4.0"} 14 | "re" {>= "1.7.2"} 15 | "ppx_tools_versioned" {>= "5.2.3"} 16 | "tyre" {>= "0.4.1"} 17 | "qcheck" {with-test} 18 | ] 19 | build: ["dune" "build" "-p" name "-j" jobs] 20 | dev-repo: "git+https://github.com/paurkedal/ppx_regexp.git" 21 | synopsis: "PPX syntax for tyre regular expressions and routes" 22 | description: """ 23 | This PPX compiles 24 | 25 | [%tyre {|re|}] 26 | 27 | into `'a Tyre.t` and 28 | 29 | function%tyre 30 | | {|re1|} as x1 -> e1 31 | ... 32 | | {|reN|} as x2 -> eN 33 | 34 | into `'a Type.route`, where `re`, `re1`, ... are regular expressions 35 | expressed in a slightly extended subset of PCRE. The interpretations are: 36 | 37 | - `re?` extracts an option of what `re` extracts. 38 | - `re+`, `re*`, `re{n,m}` extracts a list of what `re` extracts. 39 | - `(?@qname)` refers to any identifier bound to a typed regular expression 40 | of type `'a Tyre.t`. 41 | - One or more `(?re)` at the top level can be used to bind variables 42 | instead of `as ...`. 43 | - One or more `(?re)` in a sequence extracts an object where each method 44 | `v` is bound to what `re` extracts. 45 | - An alternative with one `(?re)` per branch extracts a polymorphic 46 | variant where each constructor `` `v`` receives what `re` extracts as its 47 | argument. 48 | - `(?&v:qname)` is a shortcut for `(?(?&qname))`. 49 | """ 50 | -------------------------------------------------------------------------------- /ppx_tyre/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name ppx_tyre) 3 | (public_name ppx_tyre) 4 | (kind ppx_rewriter) 5 | (modules ppx_tyre regexp) 6 | (preprocess (pps ppx_tools_versioned.metaquot_409)) 7 | (libraries 8 | ocaml-migrate-parsetree 9 | ppx_tools_versioned 10 | re re.perl 11 | tyre) 12 | (ppx_runtime_libraries re re.perl tyre)) 13 | 14 | (rule (copy ../common/regexp.mli regexp.mli)) 15 | (rule (copy ../common/regexp.ml regexp.ml)) 16 | -------------------------------------------------------------------------------- /ppx_tyre/ppx_tyre.ml: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2018 Gabriel Radanne 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the OCaml static compilation exception. 7 | * 8 | * This library is distributed in the hope that it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 11 | * License for more details. 12 | * 13 | * You should have received a copy of the GNU Lesser General Public License 14 | * along with this library. If not, see . 15 | *) 16 | 17 | 18 | open Migrate_parsetree 19 | open Ast_409 20 | let ocaml_version = Versions.ocaml_409 21 | module AC = Ast_convenience_409 22 | 23 | module A = Ast_helper 24 | module Loc = Location 25 | 26 | module List = struct 27 | include List 28 | let init n f = (* for compatibility with OCaml < 4.6.0 *) 29 | let rec loop acc i = if i < 0 then acc else loop (f i :: acc) (i - 1) in 30 | loop [] (n - 1) 31 | end 32 | 33 | let internal_error ~loc = Loc.raise_errorf ~loc "Internal error@." 34 | 35 | let mk_gen s = 36 | let c = ref 0 in 37 | fun () -> incr c; Printf.sprintf "%s%d" s !c 38 | 39 | module Tyre = struct 40 | 41 | let mk ~loc s = AC.evar ~loc ("Tyre."^s) 42 | 43 | let mkf ~loc s l = 44 | A.Exp.apply ~loc (mk ~loc s) l 45 | 46 | let conv ~loc to_ from_ t = 47 | mkf ~loc "conv" [Nolabel, to_ ; Nolabel, from_ ; Nolabel, t] 48 | 49 | let bin ~loc s a b = mkf ~loc s [Nolabel, a; Nolabel, b] 50 | 51 | end 52 | 53 | module Re = struct 54 | 55 | let mk ~loc s = AC.evar ~loc ("Re."^s) 56 | 57 | let mkf ~loc s l = 58 | A.Exp.apply ~loc (mk ~loc s) l 59 | 60 | let mkfl ~loc s l = mkf ~loc s [Nolabel, AC.list ~loc l] 61 | 62 | end 63 | 64 | (** Utilities for captures *) 65 | 66 | type ('a, 'b) capture = 67 | | No 68 | | Named of 'a 69 | | Unnamed of 'b 70 | 71 | let rec capture e = 72 | let open Regexp in 73 | match e.Loc.txt with 74 | | Code _ -> No 75 | | Seq l -> 76 | let cs = List.map capture l in 77 | let l = List.filter (function No -> false | _ -> true) cs in 78 | begin match l with 79 | | [] -> No 80 | | [ c ] -> c 81 | | _ -> Unnamed () 82 | end 83 | | Alt l -> 84 | if List.exists (fun x -> capture x = No) l then 85 | No 86 | else 87 | Unnamed () 88 | | Opt t -> capture t 89 | | Repeat (_,t) -> capture t 90 | | Nongreedy t -> capture t 91 | | Capture _ -> Unnamed () 92 | | Capture_as (s,_) -> Named s 93 | | Call _ -> Unnamed () 94 | 95 | let capture_singleton = function 96 | | No -> No 97 | | Unnamed () -> Unnamed 1 98 | | Named s -> Named [s] 99 | 100 | (** Simplification of regexps *) 101 | 102 | let flatten_seq = 103 | let rec f e = 104 | match e.Loc.txt with 105 | | Regexp.Seq l -> flatten l 106 | | _ -> [e] 107 | and flatten l = List.flatten @@ List.map f l 108 | in 109 | flatten 110 | 111 | let flatten_alt = 112 | let rec f e = 113 | match e.Loc.txt with 114 | | Regexp.Alt l -> flatten l 115 | | _ -> [e] 116 | and flatten l = List.flatten @@ List.map f l 117 | in 118 | flatten 119 | 120 | let extract_re_list ~loc l = 121 | let is_re = function {Loc.txt = Regexp.Code _; _} -> true | _ -> false in 122 | let get = 123 | function {Loc.txt = Regexp.Code r; _} -> r | _ -> internal_error ~loc in 124 | if List.for_all is_re l then Some (List.map get l) else None 125 | 126 | let collapse_ungrouped_seq ~loc l = 127 | let mkseq = function 128 | | [] -> [] 129 | | rl -> [Loc.mkloc (Regexp.Code (Re.mkfl "seq" ~loc @@ List.rev rl)) loc] 130 | in 131 | let rec aux acc = function 132 | | [] -> mkseq acc 133 | | {Loc.txt = Regexp.Code r ; _ } :: l -> aux (r :: acc) l 134 | | h :: t -> 135 | mkseq acc @ h :: aux [] t 136 | in 137 | match aux [] l with 138 | | [] -> Regexp.Code (Re.mk ~loc "epsilon") 139 | | [ x ] -> x.txt 140 | | l -> Seq l 141 | 142 | let rec collapse_ungrouped (t : string Regexp.t) = 143 | let loc = t.Loc.loc in 144 | let e : _ Regexp.node = match t.Loc.txt with 145 | | Regexp.Code e -> 146 | let f = AC.evar ~loc "Re.Perl.re" in 147 | let s = A.Exp.constant ~loc (A.Const.string e) in 148 | Code (A.Exp.apply ~loc f [Nolabel, s]) 149 | | Call lid -> 150 | Call lid 151 | | Capture t -> 152 | Capture (collapse_ungrouped t) 153 | | Capture_as (s, t) -> 154 | Capture_as (s, collapse_ungrouped t) 155 | | Seq l -> 156 | let l = flatten_seq @@ List.map collapse_ungrouped l in 157 | collapse_ungrouped_seq ~loc l 158 | | Alt l -> 159 | let l = flatten_alt @@ List.map collapse_ungrouped l in 160 | begin match extract_re_list ~loc l with 161 | | Some r -> Code (Re.mkfl "alt" ~loc r) 162 | | None -> Alt l 163 | end 164 | | Opt t -> 165 | begin match collapse_ungrouped t with 166 | | {Loc.txt = Code r; _} -> 167 | Code (Re.mkf ~loc "opt" [Nolabel, r]) 168 | | t -> Opt t 169 | end 170 | | Repeat ({Loc.txt = (i, j); _} as ij, t) -> 171 | begin match collapse_ungrouped t with 172 | | {Loc.txt = Code r; _} -> 173 | let i = A.Exp.constant (A.Const.int i) in 174 | let j = 175 | match j with 176 | | None -> AC.constr "None" [] 177 | | Some j -> AC.constr "Some" [A.Exp.constant (A.Const.int j)] 178 | in 179 | Code (Re.mkf ~loc "repn" [Nolabel, r; Nolabel, i; Nolabel, j]) 180 | | t -> Repeat (ij, t) 181 | end 182 | | Nongreedy t -> 183 | begin match collapse_ungrouped t with 184 | | {Loc.txt = Code r; _} -> 185 | Code (Re.mkf ~loc "non_greedy" [Nolabel, r]) 186 | | t -> Nongreedy t 187 | end 188 | in 189 | Loc.mkloc e loc 190 | 191 | let simplify = collapse_ungrouped 192 | 193 | (** Converters to/from nested tuples *) 194 | 195 | let rec make_nested_tuple_pat ~loc ids = 196 | match ids with 197 | | [] -> internal_error ~loc 198 | | [ v ] -> AC.pvar ~loc v 199 | | v :: ids -> 200 | let pat = make_nested_tuple_pat ~loc ids in 201 | A.Pat.tuple ~loc [AC.pvar ~loc v;pat] 202 | let rec make_nested_tuple_expr ~loc exprs = 203 | match exprs with 204 | | [] -> internal_error ~loc 205 | | [e] -> e 206 | | e :: exprs -> 207 | let tuples = make_nested_tuple_expr ~loc exprs in 208 | A.Exp.tuple ~loc [e; tuples] 209 | let make_object_expr ~loc expr meths = 210 | let rec f expr meths = match expr, meths with 211 | | [], [] -> [] 212 | | expr :: exprs, meth :: meths -> 213 | let decls = f exprs meths in 214 | let decl = 215 | A.Cf.method_ ~loc meth 216 | Public 217 | (Cfk_concrete (Fresh, expr)) 218 | in 219 | decl :: decls 220 | | _, _ -> internal_error ~loc 221 | in 222 | A.Exp.object_ ~loc (A.Cstr.mk (A.Pat.any ~loc ()) @@ f expr meths) 223 | 224 | let make_conv_of_nested_tuple ~loc ~make_pat ~make_expr ~ids tyre_expr = 225 | let fun_to = 226 | let tuple_pat = make_nested_tuple_pat ~loc ids in 227 | let lids = List.map (AC.evar ~loc) ids in 228 | let expr = make_expr ~loc lids in 229 | A.Exp.fun_ ~loc Nolabel None tuple_pat expr 230 | in 231 | let fun_from = 232 | let obj_pat, subexprs = make_pat ~loc () in 233 | let expr = make_nested_tuple_expr ~loc subexprs in 234 | A.Exp.fun_ ~loc Nolabel None obj_pat expr 235 | in 236 | Tyre.conv ~loc fun_to fun_from tyre_expr 237 | 238 | let make_conv_object ~loc meths tyre_expr = 239 | let obj_var = "v" in 240 | let gen = mk_gen obj_var in 241 | let ids = List.init (List.length meths) (fun _ -> gen ()) in 242 | let make_expr ~loc lids = 243 | make_object_expr ~loc lids meths 244 | in 245 | let make_pat ~loc () = 246 | let obj = AC.evar ~loc obj_var in 247 | let obj_pat = AC.pvar ~loc obj_var in 248 | let methsends = List.map (fun m -> A.Exp.send ~loc obj m) meths in 249 | obj_pat, methsends 250 | in 251 | make_conv_of_nested_tuple ~loc ~ids ~make_expr ~make_pat tyre_expr 252 | 253 | let make_conv_tuple ~loc n tyre_expr = 254 | let gen = mk_gen "v" in 255 | let ids = List.init n (fun _ -> gen ()) in 256 | let make_expr ~loc l = A.Exp.tuple ~loc l in 257 | let make_pat ~loc () = 258 | let plids = List.map (AC.pvar ~loc) ids in 259 | let elids = List.map (AC.evar ~loc) ids in 260 | let ptuple = A.Pat.tuple ~loc plids in 261 | ptuple, elids 262 | in 263 | make_conv_of_nested_tuple ~loc ~ids ~make_expr ~make_pat tyre_expr 264 | 265 | (** Converters to/from nested either types *) 266 | 267 | let ppoly s ~loc x = A.Pat.(variant ~loc s (Some x)) 268 | let epoly s ~loc x = A.Exp.(variant ~loc s (Some x)) 269 | let make_nested_either_constr ~loc ~length ~mk n x = 270 | let rec nested_rights ~loc n expr = 271 | if n = 0 then expr 272 | else mk "Right" ~loc (nested_rights ~loc (n-1) expr) 273 | in 274 | if n = length - 1 then nested_rights ~loc n x 275 | else nested_rights ~loc n (mk "Left" ~loc x) 276 | 277 | let make_match_from_nested ~loc mk_exprs = 278 | let length = List.length mk_exprs in 279 | let make_case n mk_expr = 280 | let id = "v" in 281 | A.Exp.case 282 | (make_nested_either_constr ~loc ~length ~mk:ppoly n @@ AC.pvar ~loc id) 283 | (mk_expr @@ AC.evar ~loc id) 284 | in 285 | A.Exp.function_ ~loc @@ List.mapi make_case mk_exprs 286 | 287 | let make_match_to_nested ~loc mk_pats = 288 | let length = List.length mk_pats in 289 | let make_case n mk_pat = 290 | let id = "v" in 291 | A.Exp.case 292 | (mk_pat @@ AC.pvar ~loc id) 293 | (make_nested_either_constr ~loc ~length ~mk:epoly n @@ AC.evar ~loc id) 294 | in 295 | A.Exp.function_ ~loc @@ List.mapi make_case mk_pats 296 | 297 | let make_conv_sum ~loc captures tyre_expr = 298 | let name_from_capture i = function 299 | | No -> 300 | Loc.raise_errorf ~loc 301 | "All alternatives branches must have a capturing group." 302 | | Unnamed _ -> Location.mkloc ("Alt"^string_of_int i) loc 303 | | Named s -> s 304 | in 305 | let branchnames = List.mapi name_from_capture captures in 306 | let fun_to = 307 | let expr_branchs = 308 | List.map (fun {Loc.loc;txt} -> epoly ~loc txt) branchnames 309 | in 310 | make_match_from_nested ~loc expr_branchs 311 | in 312 | let fun_from = 313 | let pat_branchs = 314 | List.map (fun {Loc.loc;txt} -> ppoly ~loc txt) branchnames 315 | in 316 | make_match_to_nested ~loc pat_branchs 317 | in 318 | Tyre.conv ~loc fun_to fun_from tyre_expr 319 | 320 | (** Alternatives *) 321 | 322 | let rec alt_to_expr ~loc = function 323 | | [] -> internal_error ~loc 324 | | [ e ] -> e 325 | | (e) :: exprs -> 326 | let exprs = alt_to_expr ~loc exprs in 327 | Tyre.bin ~loc "alt" e exprs 328 | 329 | let alt_to_conv ~loc captures exprs = 330 | let alt_expr = alt_to_expr ~loc exprs in 331 | make_conv_sum ~loc captures alt_expr 332 | 333 | (** Sequences *) 334 | 335 | let rec seq_to_expr ~loc = function 336 | | [] -> internal_error ~loc 337 | | [ capture, e ] -> capture_singleton capture, e 338 | | (capture, e) :: exprs -> 339 | let captures, exprs = seq_to_expr ~loc exprs in 340 | let captures, (<&>) = match capture, captures with 341 | | c, No -> capture_singleton c, Tyre.bin ~loc "suffix" 342 | | No, c -> c, Tyre.bin ~loc "prefix" 343 | | Unnamed (), Unnamed i -> Unnamed (i+1), Tyre.bin ~loc "seq" 344 | | Named s, Named l -> Named (s :: l), Tyre.bin ~loc "seq" 345 | | Unnamed _, Named _ | Named _, Unnamed _ -> 346 | Loc.raise_errorf ~loc 347 | "The same sequence must not mix unnamed and named capture groups@." 348 | in 349 | captures, e <&> exprs 350 | 351 | let seq_to_conv ~loc l = 352 | let seq_capture, seq_expr = seq_to_expr ~loc l in 353 | match seq_capture with 354 | | No -> 355 | (* This case should not happen: If simplification was run, 356 | sequence of ungrouped regex would have been collapsed. *) 357 | internal_error ~loc 358 | | Unnamed 0 | Named [] -> 359 | internal_error ~loc (* No. *) 360 | | Unnamed 1 | Unnamed 2 | Named [_] -> 361 | seq_expr 362 | | Unnamed i -> make_conv_tuple ~loc i seq_expr 363 | | Named l -> make_conv_object ~loc l seq_expr 364 | 365 | (** Put everything together *) 366 | 367 | let rec expr_of_regex (t : _ Regexp.t) = 368 | let loc = t.Loc.loc in 369 | match t.Loc.txt with 370 | | Regexp.Code r -> 371 | Tyre.mkf ~loc "regex" [Nolabel, r] 372 | | Seq l -> 373 | let seq_item re = capture re, expr_of_regex re in 374 | seq_to_conv ~loc @@ List.map seq_item l 375 | | Alt l -> 376 | let exprs = List.map expr_of_regex l in 377 | let captures = List.map capture l in 378 | alt_to_conv ~loc captures exprs 379 | | Opt t -> 380 | Tyre.mkf ~loc "opt" [Nolabel, expr_of_regex t] 381 | | Repeat ({Loc.txt = (0, None); _}, t) -> 382 | Tyre.mkf ~loc "rep" [Nolabel, expr_of_regex t] 383 | | Repeat ({Loc.txt = (1, None); _}, t) -> 384 | Tyre.mkf ~loc "rep1" [Nolabel, expr_of_regex t] 385 | | Repeat ({loc; _}, _) -> 386 | Loc.raise_errorf ~loc "Repetitions other than + and * are not implemented." 387 | | Nongreedy t -> 388 | Tyre.mkf ~loc "non_greedy" [Nolabel, expr_of_regex t] 389 | | Capture t -> expr_of_regex t 390 | | Capture_as (_, t) -> expr_of_regex t 391 | | Call lid -> A.Exp.ident lid 392 | 393 | 394 | let adjust_position ~loc delim = 395 | let (+~) pos i = Lexing.{pos with pos_cnum = pos.pos_cnum + i } in 396 | match delim with 397 | | None -> loc.Loc.loc_start +~ 1 398 | | Some s -> loc.Loc.loc_start +~ (String.length s + 2) 399 | let expr_of_string ~loc s delim = 400 | let pos = adjust_position ~loc delim in 401 | expr_of_regex @@ simplify @@ Regexp.parse_exn ~pos s 402 | 403 | 404 | let rec regexp_of_pattern pat = 405 | let open Parsetree in 406 | let loc = pat.ppat_loc in 407 | let re = match pat.ppat_desc with 408 | | Ppat_constant (Pconst_string (s, delim)) -> 409 | let pos = adjust_position ~loc delim in 410 | (Regexp.parse_exn ~pos s).txt 411 | | Ppat_alias (pat, s) -> 412 | Regexp.(Capture_as (s, regexp_of_pattern pat)) 413 | | Ppat_or (pat1, pat2) -> 414 | Regexp.(Alt [ regexp_of_pattern pat1 ; regexp_of_pattern pat2 ]) 415 | | Ppat_any -> 416 | Regexp.Code ".*" 417 | | Ppat_var id -> 418 | Regexp.(Capture_as (id, {loc; txt = Code ".*"})) 419 | | _ -> 420 | Loc.raise_errorf ~loc 421 | "This pattern is not a valid tyre pattern." 422 | in 423 | Loc.mkloc re loc 424 | 425 | let expr_of_pattern pat = 426 | let re = simplify @@ regexp_of_pattern pat in 427 | match re.txt with 428 | | Seq l -> 429 | let f_item re = capture re, expr_of_regex re in 430 | let capture_seq, expr = seq_to_expr ~loc:re.loc @@ List.map f_item l in 431 | capture_seq, expr 432 | | _ -> 433 | capture_singleton (capture re), expr_of_regex re 434 | 435 | 436 | let expr_of_function ~loc l = 437 | let err_on_guard = function 438 | | None -> () 439 | | Some e -> 440 | Loc.raise_errorf ~loc:e.Parsetree.pexp_loc 441 | "Tyre patterns can not have guards." 442 | in 443 | let route_of_case {Parsetree. pc_rhs ; pc_guard ; pc_lhs } = 444 | err_on_guard pc_guard; 445 | let loc = pc_lhs.ppat_loc in 446 | let capture, re = expr_of_pattern pc_lhs in 447 | let pvar_of_lid {Loc.loc; txt} = AC.pvar ~loc txt in 448 | let arg = match capture with 449 | | Named [] | Unnamed 0 -> internal_error ~loc 450 | | No | Unnamed _ -> A.Pat.any ~loc () 451 | | Named [lid] -> pvar_of_lid lid 452 | | Named l -> 453 | make_nested_tuple_pat ~loc @@ List.map (fun {Loc.txt ; _} -> txt) l 454 | in 455 | let e = AC.func ~loc [arg, pc_rhs] in 456 | AC.constr ~loc "Tyre.Route" [re; e] 457 | in 458 | let l = List.map route_of_case l in 459 | Tyre.mkf ~loc "route" [Nolabel, AC.list ~loc l] 460 | 461 | open Ast_mapper 462 | 463 | let expr mapper e_ext = 464 | let open Parsetree in 465 | match e_ext.pexp_desc with 466 | | Pexp_extension ({txt = "tyre"; _}, 467 | PStr [{pstr_desc = Pstr_eval (e, _); _}]) -> 468 | let loc = e.pexp_loc in 469 | (match e.pexp_desc with 470 | | Pexp_constant (Pconst_string (s, delim)) -> 471 | expr_of_string ~loc s delim 472 | | Pexp_function l -> 473 | expr_of_function ~loc l 474 | | _ -> 475 | Loc.raise_errorf ~loc 476 | "[%%tyre] is only allowed on constant strings and functions.") 477 | | _ -> default_mapper.expr mapper e_ext 478 | 479 | let () = 480 | Driver.register 481 | ~name:"ppx_regexp.tyre" ocaml_version 482 | (fun _config _cookies -> {default_mapper with expr}) 483 | -------------------------------------------------------------------------------- /ppx_tyre/ppx_tyre.mli: -------------------------------------------------------------------------------- 1 | (* Nothing to see here *) 2 | -------------------------------------------------------------------------------- /tests/dune: -------------------------------------------------------------------------------- 1 | ; Tests run for both packages 2 | 3 | (test 4 | (name test_regexp) 5 | (modules regexp test_regexp) 6 | (libraries ppxlib qcheck re re.perl)) 7 | (rule (copy ../common/regexp.mli regexp.mli)) 8 | (rule (copy ../common/regexp.ml regexp.ml)) 9 | 10 | ; Tests for ppx_regexp 11 | 12 | (tests 13 | (names test_ppx_regexp test_ppx_regexp_unused) 14 | (modules test_ppx_regexp test_ppx_regexp_unused) 15 | (package ppx_regexp) 16 | (libraries re re.perl) 17 | (preprocess (pps ppx_regexp))) 18 | 19 | ; Tests for ppx_tyre 20 | 21 | (executable 22 | (name test_ppx_tyre) 23 | (modules test_ppx_tyre) 24 | (libraries re re.perl) 25 | (preprocess (pps ppx_tyre))) 26 | (alias 27 | (name runtest) 28 | (package ppx_tyre) 29 | (deps test_ppx_tyre.exe) 30 | (action (run %{deps}))) 31 | 32 | ; Combined preprocessor 33 | 34 | (executable 35 | (name main) 36 | (modules Main) 37 | (libraries ppx_regexp ppx_tyre ocaml-migrate-parsetree)) 38 | -------------------------------------------------------------------------------- /tests/main.ml: -------------------------------------------------------------------------------- 1 | Migrate_parsetree.Driver.run_as_ppx_rewriter () 2 | -------------------------------------------------------------------------------- /tests/test_ppx_regexp.ml: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2017 Petter A. Urkedal 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the LGPL-3.0 Linking Exception. 7 | * 8 | * This library is distributed in the hope that it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 11 | * License for more details. 12 | * 13 | * You should have received a copy of the GNU Lesser General Public License 14 | * along with this library. If not, see . 15 | *) 16 | 17 | let () = 18 | (match%pcre "%" with _ -> ()); 19 | (match%pcre "%" with s -> assert (s = "%")); 20 | (function%pcre _ -> ()) "%"; 21 | (function%pcre s -> assert (s = "%")) "%" 22 | 23 | let test1 = 24 | (function%pcre 25 | | {|^(?.*): *(?.+)?$|} -> `Attr (k, v) 26 | | {|^# (?.+)$|} -> `Comment comment 27 | | {|^((?[@%]){2}){0,2}$|} -> `Even_sigils last 28 | | {|^[@%]|} -> `Odd_sigils 29 | | _ -> `Unknown) 30 | 31 | let () = 32 | assert (test1 "x: 1" = `Attr ("x", Some "1")); 33 | assert (test1 "# Kommentar" = `Comment "Kommentar"); 34 | assert (test1 "" = `Even_sigils None); 35 | assert (test1 "%%%@" = `Even_sigils (Some "@")); 36 | assert (test1 "%%@" = `Odd_sigils) 37 | 38 | let last_elt s = 39 | let n = String.length s in 40 | assert (s.[n - 1] = ';'); 41 | let i = try String.rindex_from s (n - 2) ';' + 1 with Not_found -> 0 in 42 | String.sub s i (n - i - 1) 43 | 44 | let rec test2 s = 45 | (match%pcre s with 46 | | {|^<>$|} -> assert (s = "<>") 47 | | {|^<(?[^<>]+)>$|} -> assert (s = "<" ^ x ^ ">") 48 | | {|^<(?[^<>]+)><(?[^<>]+)>$|} -> assert (s = "<" ^ x ^ "><" ^ y ^ ">") 49 | | {|^((?[^;<>]);)+$|} -> assert (elt = last_elt s) 50 | | {|^[^{}]*\{(?.*)\}|} -> test2 s' 51 | | {|^(?one)|(?two)$|} -> 52 | assert (a = Some "one" && b = None || a = None && b = Some "two") 53 | | _ -> assert false) 54 | 55 | let test3 s = 56 | (match%pcre s with 57 | | {|no(is)((e)) (?is) (g(oo)d)|} -> assert (is = "is") 58 | | {|?&()[a-zA-Z0-9_-]+(;)|} -> 59 | let i, j = String.index s '&', String.rindex s ';' in 60 | assert (s' = String.sub s i (j - i + 1)) 61 | | {|m(o+)re re(gular)? no(is)e, (no )*be(t+)?er|} -> () 62 | | s' -> assert (s = s')) 63 | 64 | let test4 = function%pcre (* Issue 8 *) 65 | | {|(?[-+]?[[:digit:]]+.[[:digit:]]*)|} -> [x] 66 | | {|(?(abc))[[:space:]]*(?(xyz))|} -> [x; y] 67 | | _ -> assert false 68 | 69 | let test5 = function%pcre 70 | | {|^.(?.+)|} -> 71 | (match%pcre x with 72 | | {|^.(?.+)|} -> 73 | (match%pcre y with 74 | | {|^.(?.+)|} -> (x, y, z) 75 | | _ -> assert false) 76 | | _ -> assert false) 77 | | _ -> assert false 78 | 79 | let () = 80 | test2 "<>"; 81 | test2 ""; 82 | test2 ""; 83 | test2 ""; 84 | test2 ""; 85 | test2 "a;"; 86 | test2 "a;b;c;d;"; 87 | test2 ""; 88 | test2 "Xx{--{a;b;c;}--}yY."; 89 | test2 "one"; 90 | test2 "two"; 91 | test3 "- +   + -"; 92 | test3 "catch-all"; 93 | assert (test4 "::123.456::" = ["123.456"]); 94 | assert (test4 "::abc xyz::" = ["abc"; "xyz"]); 95 | assert (test5 "abcd" = ("bcd", "cd", "d")) 96 | 97 | (* It should work in a functor, and Re_pcre.regxp should be lifted to the 98 | * top-level. *) 99 | module F (M : Map.OrderedType) = struct 100 | let f x = 101 | (match%pcre x with 102 | | {|#(?\s)?(?.*)|} -> Some (space <> None, comment) 103 | | _ -> None) 104 | end 105 | 106 | (* It should work as a top-level eval. *) 107 | let r = ref false 108 | ;;(match%pcre "" with 109 | | {|^$|} -> r := true 110 | | _ -> assert false) 111 | ;;assert (!r = true) 112 | -------------------------------------------------------------------------------- /tests/test_ppx_regexp_unused.ml: -------------------------------------------------------------------------------- 1 | let () = () 2 | -------------------------------------------------------------------------------- /tests/test_ppx_tyre.ml: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2017 Petter A. Urkedal 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the OCaml static compilation exception. 7 | * 8 | * This library is distributed in the hope that it will be useful, but WITHOUT 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 11 | * License for more details. 12 | * 13 | * You should have received a copy of the GNU Lesser General Public License 14 | * along with this library. If not, see . 15 | *) 16 | 17 | let test re s = 18 | match Tyre.exec re s with 19 | | Ok b -> b 20 | | Error e -> 21 | Format.eprintf "Error: %a@." Tyre.pp_error e; 22 | assert false 23 | let (%%) = test 24 | let (%%%) a b = assert (a %% b) 25 | 26 | let () = 27 | (function%tyre _ -> true) %%% "%"; 28 | (function%tyre s -> s = "%") %%%"%" 29 | 30 | type t = [ 31 | | `Attr of string * string option 32 | | `Comment of string 33 | | `Even_sigils of string option 34 | | `Odd_sigils 35 | | `Id of string * int * string 36 | | `Unknown ] 37 | 38 | let test1 : t Tyre.re = 39 | (function%tyre 40 | | {|^(?.*): *(?.+)?$|} -> `Attr (k, v) 41 | | {|^# (?.+)$|} -> `Comment comment 42 | | {|^(?([@%]{2})+)?$|} -> `Even_sigils sigil 43 | | {|^[@%]|} -> `Odd_sigils 44 | | {|^(?[a-z]+)(?&num:Tyre.pos_int)(?[^[:alnum:]]+)$|} 45 | -> `Id (id, num, sym) 46 | | _ -> `Unknown) 47 | 48 | let () = 49 | assert (test1 %% "x: 1" = `Attr ("x", Some "1")); 50 | assert (test1 %% "# Kommentar" = `Comment "Kommentar"); 51 | assert (test1 %% "" = `Even_sigils None); 52 | assert (test1 %% "%%%@" = `Even_sigils (Some "%%%@")); 53 | assert (test1 %% "%%@" = `Odd_sigils); 54 | assert (test1 %% "abc42#@" = `Id ("abc", 42, "#@")) 55 | 56 | let concat_seq sep seq = 57 | let rec f seq = 58 | match seq () with 59 | | Seq.Nil -> "" 60 | | Cons (s,seq) -> s ^ sep ^ f seq 61 | in 62 | f seq 63 | 64 | let test2 = function%tyre 65 | | {|^<>$|} -> (=) "<>" 66 | | {|^<(?[^<>]+)>$|} -> fun s -> s = "<" ^ x ^ ">" 67 | | {|^<(?[^<>]+)><(?[^<>]+)>$|} -> fun s -> s = "<" ^ x ^ "><" ^ y ^ ">" 68 | | {|^((?[^;<>]);)*$|} -> fun s -> concat_seq ";" elt = s 69 | | {|^(?one)|(?two)$|} as x -> 70 | (match x with 71 | | `a a -> fun s -> a = s && a = "one" 72 | | `b b -> fun s -> b = s && b = "two") 73 | 74 | let (%%%%) re s = (re %% s) s 75 | 76 | let () = 77 | assert (test2 %%%%"<>"); 78 | assert (test2 %%%%""); 79 | assert (test2 %%%%""); 80 | assert (test2 %%%%""); 81 | assert (test2 %%%%""); 82 | assert (test2 %%%%"a;"); 83 | assert (test2 %%%%"a;b;c;d;"); 84 | assert (test2 %%%%""); 85 | assert (test2 %%%%"one"); 86 | assert (test2 %%%%"two") 87 | 88 | (* It should work in a functor, and Re_pcre.regxp should be lifted to the 89 | * top-level. *) 90 | module F (M : Map.OrderedType) = struct 91 | let f = function%tyre 92 | | {|#(?\s)?(?.*)|} -> Some (space <> None, comment) 93 | | _ -> None 94 | end 95 | -------------------------------------------------------------------------------- /tests/test_regexp.ml: -------------------------------------------------------------------------------- 1 | (* Copyright (C) 2018--2021 Petter A. Urkedal 2 | * 3 | * This library is free software; you can redistribute it and/or modify it 4 | * under the terms of the GNU Lesser General Public License as published by 5 | * the Free Software Foundation, either version 3 of the License, or (at your 6 | * option) any later version, with the OCaml static compilation exception or (at 7 | * your option) the LGPL-3.0 Linking Exception. 8 | * 9 | * This library is distributed in the hope that it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 12 | * License for more details. 13 | * 14 | * You should have received a copy of the GNU Lesser General Public License 15 | * along with this library. If not, see . 16 | *) 17 | 18 | open Printf 19 | module Loc = Location 20 | module Q = QCheck 21 | 22 | let mkloc = Loc.mkloc 23 | let mknoloc = Loc.mknoloc 24 | let map_loc f {Loc.txt = x; loc} = {Loc.txt = f x; loc} 25 | 26 | (* Dummy implementation for compatibility with OCaml < 4.8.0, comment out the 27 | * real version if needed. *) 28 | let pp_location_error ppf _ = Format.pp_print_string ppf "parse error" 29 | (* 30 | let pp_location_error = Loc.print_report 31 | *) 32 | 33 | type ('a,'b) result = Ok of 'a | Error of 'b 34 | 35 | module Regexp = struct 36 | include Regexp 37 | 38 | let nonepsilon = function {Loc.txt = Seq []; _} -> false | _ -> true 39 | 40 | let rec collect_code = function 41 | | {Loc.txt = Code s1; loc = loc1} :: {Loc.txt = Code s2; loc = loc2} :: es -> 42 | let e12 = 43 | Loc.{ 44 | txt = Code (s1 ^ s2); 45 | loc = { 46 | loc_start = loc1.loc_start; 47 | loc_end = loc2.loc_end; 48 | loc_ghost = loc1.loc_ghost || loc2.loc_ghost; 49 | } 50 | } 51 | in 52 | collect_code (e12 :: es) 53 | | es -> es 54 | 55 | let rec simplify e = map_loc simplify' e 56 | and simplify' = function 57 | | Code "" -> Seq [] 58 | | Seq es -> 59 | let es = es 60 | |> List.map simplify 61 | |> List.map (function {Loc.txt = Seq es; _} -> es | e -> [e]) 62 | |> List.flatten 63 | |> List.filter nonepsilon 64 | in 65 | (match es with 66 | | [e] -> e.Loc.txt 67 | | es -> Seq es) 68 | | Alt es -> 69 | let es = es 70 | |> List.map simplify 71 | |> List.map (function {Loc.txt = Alt es; _} -> es | e -> [e]) 72 | |> List.flatten 73 | in 74 | (match es with 75 | | [e] -> e.Loc.txt 76 | | es -> Alt es) 77 | | Opt e -> 78 | (match simplify e with 79 | | {Loc.txt = Opt _; _} as e' -> e'.Loc.txt 80 | | e' -> Opt e') 81 | | Repeat (ij, e) -> Repeat (ij, simplify e) 82 | | Nongreedy e -> Nongreedy (simplify e) 83 | | Capture e -> Capture (simplify e) 84 | | Capture_as (name, e) -> Capture_as (name, simplify e) 85 | | Code _ | Call _ as e -> e 86 | 87 | let rec equal' e1 e2 = 88 | (match e1.Loc.txt, e2.Loc.txt with 89 | | Code s1, Code s2 -> s1 = s2 90 | | Seq es1, Seq es2 | Alt es1, Alt es2 -> 91 | (try List.for_all2 equal' es1 es2 with Invalid_argument _ -> false) 92 | | Opt e1, Opt e2 -> equal' e1 e2 93 | | Repeat ({Loc.txt = ij1; _}, e1), Repeat ({Loc.txt = ij2; _}, e2) -> 94 | ij1 = ij2 && equal' e1 e2 95 | | Nongreedy e1, Nongreedy e2 -> equal' e1 e2 96 | | Capture e1, Capture e2 -> equal' e1 e2 97 | | Capture_as (name1, e1), Capture_as (name2, e2) -> 98 | name1.Loc.txt = name2.Loc.txt && equal' e1 e2 99 | | Call name1, Call name2 -> name1.Loc.txt = name2.Loc.txt 100 | | _, _ -> false (* We'll notice. *)) 101 | let equal e1 e2 = equal' (simplify e1) (simplify e2) 102 | 103 | let to_string = 104 | let p_bottom, p_alt, p_seq, p_suffix = 0, 1, 2, 3 in 105 | let delimit_if b s = if b then "(" ^ s ^ ")" else s in 106 | let rec aux p e = 107 | (match e.Loc.txt with 108 | | Code s -> 109 | delimit_if (p > p_seq) s 110 | | Seq es -> 111 | delimit_if (p > p_seq) (String.concat "" (List.map (aux p_seq) es)) 112 | | Alt es -> 113 | delimit_if (p > p_alt) (String.concat "|" (List.map (aux p_alt) es)) 114 | | Opt e -> 115 | delimit_if (p >= p_suffix) (aux p_suffix e ^ "?") 116 | | Repeat ({Loc.txt = (i, j_opt); _}, e) -> 117 | let j_str = match j_opt with None -> "" | Some j -> string_of_int j in 118 | delimit_if (p >= p_suffix) 119 | (sprintf "%s{%d,%s}" (aux p_suffix e) i j_str) 120 | | Nongreedy e -> 121 | aux (p_suffix - 1) e ^ "?" 122 | | Capture e -> 123 | "(+" ^ aux p_bottom e ^ ")" 124 | | Capture_as ({Loc.txt = name; _}, e) -> 125 | "(?<" ^ name ^ ">" ^ aux p_bottom e ^ ")" 126 | | Call {Loc.txt = idr; _} -> 127 | "(&" ^ String.concat "." (Longident.flatten idr) ^ ")") 128 | in 129 | aux 0 130 | 131 | let rec pp_debug ppf self = 132 | let open Regexp in 133 | let open Format in 134 | let open Loc in 135 | 136 | let pp_pos ppf pos = 137 | let open Lexing in 138 | Format.fprintf ppf "%d:%d" pos.pos_lnum (pos.pos_cnum - pos.pos_bol) 139 | in 140 | let pp_loc ppf loc = 141 | let open Loc in 142 | let open Lexing in 143 | if loc <> none then begin 144 | if loc.loc_start.pos_lnum = loc.loc_end.pos_lnum then 145 | Format.fprintf ppf "@%a-%d" pp_pos loc.loc_start 146 | (loc.loc_end.pos_cnum - loc.loc_end.pos_bol) 147 | else 148 | Format.fprintf ppf "@%a-%a" pp_pos loc.loc_start pp_pos loc.loc_end 149 | end 150 | in 151 | (match self.txt with 152 | | Code s -> 153 | fprintf ppf "(Code %S)" s 154 | | Seq es -> 155 | fprintf ppf "(Seq "; 156 | List.iter (pp_debug ppf) es; 157 | fprintf ppf ")"; 158 | | Alt es -> 159 | fprintf ppf "(Alt "; 160 | List.iter (pp_debug ppf) es; 161 | fprintf ppf ")"; 162 | | Opt e -> 163 | fprintf ppf "(Opt %a)" pp_debug e 164 | | Repeat ({txt = (i, j); loc}, e) -> 165 | let pp_option f ppf = function None -> () | Some e -> f ppf e in 166 | fprintf ppf "(Repeat {%d,%a}%a %a)" 167 | i (pp_option Format.pp_print_int) j pp_loc loc pp_debug e 168 | | Nongreedy e -> 169 | fprintf ppf "(Nongreedy %a)" pp_debug e 170 | | Capture e -> 171 | fprintf ppf "(Capture %a)" pp_debug e 172 | | Capture_as (name, e) -> 173 | fprintf ppf "(Capture_as %s%a %a)" name.txt pp_loc name.loc pp_debug e 174 | | Call name -> 175 | fprintf ppf "(Call %s%a)" 176 | (String.concat "." (Longident.flatten name.txt)) pp_loc name.loc); 177 | pp_loc ppf self.loc 178 | 179 | let show_debug e = 180 | let buf = Buffer.create 64 in 181 | let ppf = Format.formatter_of_buffer buf in 182 | pp_debug ppf e; 183 | Format.fprintf ppf " => %S" (to_string e); 184 | Format.pp_print_flush ppf (); 185 | Buffer.contents buf 186 | 187 | let rec to_re e = 188 | (match e.Loc.txt with 189 | | Code re -> Re.Perl.re re 190 | | Seq es -> Re.seq (List.map to_re es) 191 | | Alt es -> Re.alt (List.map to_re es) 192 | | Opt e -> Re.opt (to_re e) 193 | | Repeat ({Loc.txt = (i, j); _}, e) -> Re.repn (to_re e) i j 194 | | Nongreedy e -> Re.non_greedy (to_re e) 195 | | Capture e -> Re.group (to_re e) 196 | | Capture_as (_, e) -> Re.group (to_re e) 197 | | Call _ -> raise Re.Perl.Not_supported) 198 | 199 | let rec has_anon_capture e = 200 | (match e.Loc.txt with 201 | | Code _ | Call _ -> false 202 | | Seq es | Alt es -> List.exists has_anon_capture es 203 | | Opt e | Repeat (_, e) | Capture_as (_, e) | Nongreedy e -> 204 | has_anon_capture e 205 | | Capture _ -> true) 206 | 207 | end 208 | 209 | let gen_name = 210 | let open Q.Gen in 211 | let idrletter i = 212 | if i = 0 then '_' else let i = i - 1 in 213 | if i < 26 then Char.chr (0x61 + i) else let i = i - 26 in 214 | if i < 26 then Char.chr (0x41 + i) else let i = i - 26 in 215 | (assert (i < 10); Char.chr (0x30 + i)) 216 | in 217 | let idrfst = map idrletter (int_bound (27 - 1)) in 218 | let idrcnt = map idrletter (int_bound (63 - 1)) in 219 | map2 (fun c s -> String.make 1 c ^ s) idrfst (string ~gen:idrcnt) 220 | 221 | let gen_regexp = 222 | let open Q.Gen in 223 | let open Regexp in 224 | let gen_char = map (fun c -> mknoloc (Code (String.make 1 c))) numeral in 225 | let gen_backlash_op = 226 | let backslash_ops = "wWsSdDbBAZzG" in 227 | map (fun i -> mknoloc (Code (sprintf "\\%c" backslash_ops.[i]))) 228 | (int_bound (String.length backslash_ops - 1)) in 229 | let gen_quoted_op = 230 | let quotable = "!\"#$%&'()*+,-./:=<=>?@[\\]^`{|}~" in 231 | map (fun i -> mknoloc (Code (sprintf "\\%c" quotable.[i]))) 232 | (int_bound (String.length quotable - 1)) in 233 | map Regexp.simplify @@ sized @@ 234 | fix @@ fun self n -> 235 | let gen_seq = 236 | map (fun es -> mknoloc (Seq es)) 237 | ((0 -- 10) >>= fun k -> list_size (return k) (self (n / (max 1 k)))) in 238 | let gen_alt = 239 | map (fun es -> mknoloc (Alt es)) 240 | ((2 -- 10) >>= fun k -> list_size (return k) (self (n / (max 1 k)))) in 241 | let gen_opt = 242 | map (fun e -> mknoloc (Opt e)) (self n) in 243 | let gen_repeat = 244 | map2 (fun i e -> mknoloc (Repeat (mknoloc (i, None), e))) nat (self n) in 245 | let gen_capture = 246 | map (fun e -> mknoloc (Capture e)) (self n) in 247 | let gen_capture_as = 248 | map2 (fun a e -> mknoloc (Capture_as (mknoloc a, e))) gen_name (self n) in 249 | frequency [ 250 | 1, gen_char; 251 | 1, gen_backlash_op; 252 | 1, gen_quoted_op; 253 | n*(n - 1), gen_seq; 254 | n*(n - 1), gen_alt; 255 | n, gen_opt; 256 | n, gen_repeat; 257 | n, gen_capture; 258 | n, gen_capture_as; 259 | ] 260 | 261 | let shrink_regexp = 262 | let open Q.Shrink in 263 | let open Q.Iter in 264 | let open Regexp in 265 | let rec shrink e = 266 | (match e.Loc.txt with 267 | | Code s -> map (fun s -> mknoloc (Code s)) (string s) 268 | | Seq es -> map (fun es -> mknoloc (Seq es)) (list ~shrink es) 269 | | Alt (e :: es) -> 270 | map2 (fun e es -> mknoloc (Alt (e :: es))) (shrink e) (list ~shrink es) 271 | | Opt e -> map (fun e -> mknoloc (Opt e)) (shrink e) 272 | | Repeat ({Loc.txt = (i, j); _}, e) -> 273 | map2 (fun (i, j) e -> mknoloc (Repeat (mknoloc (i, j), e))) 274 | (pair (int i) (option int j)) (shrink e) 275 | | Capture e -> map (fun e -> mknoloc (Capture e)) (shrink e) 276 | | Capture_as (name, e) -> 277 | map2 (fun name e -> mknoloc (Capture_as (mknoloc name, e))) 278 | (string name.Loc.txt) (shrink e) 279 | | _ -> empty) 280 | in 281 | fun e -> map Regexp.simplify (shrink e) 282 | 283 | let arb_regexp = 284 | Q.make ~print:Regexp.show_debug ~shrink:shrink_regexp gen_regexp 285 | 286 | let test_parse s = 287 | let r = 288 | (match Regexp.parse_exn s with 289 | | exception Loc.Error err -> Error err 290 | | e -> 291 | Ok (e, 292 | (try Ok (Regexp.to_re e) with 293 | | Re.Perl.Parse_error -> Error `Parse_error 294 | | Re.Perl.Not_supported -> Error `Not_supported))) 295 | in 296 | let r' = 297 | try Ok (Re.Perl.re s) with 298 | | Re.Perl.Parse_error -> Error `Parse_error 299 | | Re.Perl.Not_supported -> Error `Not_supported 300 | in 301 | (match r, r' with 302 | | (Error _ | Ok (_, Error _)), Error _ -> true 303 | | Ok _, Error `Not_supported -> true 304 | | Ok (e, Ok _), Error `Parse_error -> 305 | if Regexp.has_anon_capture e then true else 306 | Q.Test.fail_reportf "Parsed to %a and converted to Re.t, \ 307 | but should be invalid" Regexp.pp_debug e 308 | | Error err, Ok _ -> 309 | Q.Test.fail_reportf "Failed to parse valid %s: %a" s 310 | pp_location_error err 311 | | Ok (e, Error _), Ok _ -> 312 | Q.Test.fail_reportf "Parsed to %a but conversion to Re.t failed" 313 | Regexp.pp_debug e 314 | | Ok (_, Ok _), Ok _ -> 315 | (* TODO: Would have been nice to compare the two Re.t here. *) 316 | true) 317 | 318 | let tests = [ 319 | Q.Test.make ~long_factor:100 ~name:"parse ∘ to_string" arb_regexp 320 | (fun e -> 321 | (match Regexp.parse_exn (Regexp.to_string e) with 322 | | exception Loc.Error err -> 323 | Q.Test.fail_reportf "%a" pp_location_error err 324 | | e' -> Regexp.equal e' e)); 325 | Q.Test.make ~long_factor:100 ~name:"to_string ∘ parse" 326 | (Q.string_gen Q.Gen.printable) test_parse; 327 | ] 328 | 329 | let () = QCheck_runner.run_tests_main tests 330 | --------------------------------------------------------------------------------