├── .gitignore
├── .travis.sh
├── .travis.yml
├── CHANGES.md
├── COPYING
├── COPYING.LESSER
├── COPYING.LINKING
├── README.md
├── common
├── regexp.ml
└── regexp.mli
├── dune-project
├── dune-workspace.dev
├── ppx_regexp.opam
├── ppx_regexp
├── dune
└── ppx_regexp.ml
├── ppx_tyre.opam
├── ppx_tyre
├── dune
├── ppx_tyre.ml
└── ppx_tyre.mli
└── tests
├── dune
├── main.ml
├── test_ppx_regexp.ml
├── test_ppx_regexp_unused.ml
├── test_ppx_tyre.ml
└── test_regexp.ml
/.gitignore:
--------------------------------------------------------------------------------
1 | .merlin
2 | /_build
3 | /ppx_regexp.install
4 | /ppx_tyre.install
5 |
--------------------------------------------------------------------------------
/.travis.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | set -ex
3 | cd `dirname $0`
4 | sudo apt -y install m4
5 | opam pin add -yn ${PKG_NAME} .
6 | opam depext -y ${PKG_NAME}
7 | opam install -yt ${PKG_NAME}
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 | sudo: required
3 |
4 | services:
5 | - docker
6 |
7 | env:
8 | matrix:
9 | - IMAGE_NAME=ocaml/opam2:4.02 PKG_NAME=ppx_regexp
10 | - IMAGE_NAME=ocaml/opam2:4.02 PKG_NAME=ppx_tyre
11 | - IMAGE_NAME=ocaml/opam2:4.04 PKG_NAME=ppx_regexp
12 | - IMAGE_NAME=ocaml/opam2:4.04 PKG_NAME=ppx_tyre
13 | - IMAGE_NAME=ocaml/opam2:4.07 PKG_NAME=ppx_regexp
14 | - IMAGE_NAME=ocaml/opam2:4.07 PKG_NAME=ppx_tyre
15 | - IMAGE_NAME=ocaml/opam2:4.08 PKG_NAME=ppx_regexp
16 | - IMAGE_NAME=ocaml/opam2:4.08 PKG_NAME=ppx_tyre
17 | - IMAGE_NAME=ocaml/opam2:4.09 PKG_NAME=ppx_regexp
18 | - IMAGE_NAME=ocaml/opam2:4.09 PKG_NAME=ppx_tyre
19 |
20 | before_install:
21 | - docker pull $IMAGE_NAME
22 |
23 | script:
24 | - docker run --privileged -v `pwd`:/mnt:ro --env PKG_NAME=$PKG_NAME
25 | $IMAGE_NAME /mnt/.travis.sh
26 |
--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
1 | ## v0.5.1 - 2022-06-09
2 |
3 | - Fix invalid AST due to empty binding list in `ppx_regexp`.
4 |
5 | ## v0.5.0 - 2022-06-06
6 |
7 | - Migrate `ppx_regexp` to ppxlib.
8 | - Change license exception for `ppx_regexp` to the LGPL-3.0 Linking
9 | Exception.
10 |
11 | ## v0.4.3 - 2019-11-25
12 |
13 | - Fixed nested `[%pcre]` usage for `ppx_regexp`.
14 | - Extended compiler support to 4.02.3 up to 4.09.0 (at least) for both PPXes.
15 | - Upgrade to AST 4.09 to support newer compiler features.
16 |
17 | ## v0.4.2 - 2019-03-24
18 |
19 | - Fix top level group elimination for `ppx_regexp` (#8).
20 |
21 | ## v0.4.1 - 2018-09-04
22 |
23 | - Fix multi-group top level regexp for `ppx_tyre`.
24 |
25 | ## v0.4.0 - 2018-08-20
26 |
27 | - Switched to internal regexp parser.
28 | - Added syntax extension for `tyre` (Gabriel Radanne).
29 | - Fixed type of captures under alternatives for `%pcre`.
30 | - Better error reporting, including locations.
31 | - The PPX now declares its runtime libraries.
32 |
33 | ## v0.3.2 - 2018-03-01
34 |
35 | - Prepare for re 1.7.2.
36 |
37 | ## v0.3.1 - 2017-08-21
38 |
39 | - Fix accidental shadowing of open from another interface-less module using
40 | `ppx_regexp`.
41 | - Support binding of group 0 and the universal pattern.
42 | - Switch to `ppx_tools_versioned`. This provides support for 4.02.3 in the
43 | main branch.
44 |
45 | ## v0.3.0 - 2017-06-04
46 |
47 | - Initial release for OCaml 4.03.0 and 4.04.1.
48 |
49 | ## v0.2.0 - 2017-06-04
50 |
51 | - Initial release for OCaml 4.02.3.
52 |
--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/COPYING.LESSER:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/COPYING.LINKING:
--------------------------------------------------------------------------------
1 | LGPL-3.0 LINKING EXCEPTION
2 |
3 | As a special exception to the GNU Lesser General Public License
4 | version 3 ("LGPL3"), the copyright holders of this Library give you
5 | permission to convey to a third party a Combined Work that links
6 | statically or dynamically to this Library without providing any
7 | Minimal Corresponding Source or Minimal Application Code as set out in
8 | 4d or providing the installation information set out in section 4e,
9 | provided that you comply with the other provisions of LGPL3 and
10 | provided that you meet, for the Application the terms and conditions
11 | of the license(s) which apply to the Application.
12 |
13 | Except as stated in this special exception, the provisions of LGPL3
14 | will continue to comply in full to this Library. If you modify this
15 | Library, you may apply this exception to your version of this Library,
16 | but you are not obliged to do so. If you do not wish to do so, delete
17 | this exception statement from your version. This exception does not
18 | (and cannot) modify any license terms which apply to the Application,
19 | with which you must still comply.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![Build Status][ci-build-status]][ci]
2 |
3 | # Two PPXes for Working with Regular Expressions
4 |
5 | This repo provides two PPXes providing regular expression-based routing:
6 |
7 | - `ppx_regexp` maps to [re][] with the conventional last-match extraction
8 | into `string` and `string option`.
9 | - `ppx_tyre` maps to [Tyre][tyre] providing typed extraction into options,
10 | lists, tuples, objects, and polymorphic variants.
11 |
12 | Another difference is that `ppx_regexp` works directly on strings
13 | essentially hiding the library calls, while `ppx_tyre` provides `Tyre.t` and
14 | `Tyre.route` which can be composed an applied using the Tyre library.
15 |
16 | ## `ppx_regexp` - Regular Expression Matching with OCaml Patterns
17 |
18 | This syntax extension turns
19 | ```ocaml
20 | function%pcre
21 | | {|re1|} -> e1
22 | ...
23 | | {|reN|} -> eN
24 | | _ -> e0
25 | ```
26 | into suitable invocations of the [Re library][re], and similar for
27 | `match%pcre`. The patterns are plain strings of the form accepted by
28 | `Re_pcre`, with the following additions:
29 |
30 | - `(?...)` defines a group and binds whatever it matches as `var`.
31 | The type of `var` will be `string` if the match is guaranteed given that
32 | the whole pattern matches, and `string option` if the variable is bound
33 | to or nested below an optionally matched group.
34 |
35 | - `?` at the start of a pattern binds group 0 as `var : string`.
36 | This may not be the full string if the pattern is unanchored.
37 |
38 | A variable is allowed for the universal case and is bound to the matched
39 | string. A regular alias is currently not allowed for patterns, since it is
40 | not obvious whether is should bind the full string or group 0.
41 |
42 | ### Example
43 |
44 | The following prints out times and hosts for SMTP connections to the Postfix
45 | daemon:
46 | ```ocaml
47 | (* Link with re, re.pcre, lwt, lwt.unix.
48 | Preprocess with ppx_regexp.
49 | Adjust to your OS. *)
50 |
51 | open Lwt.Infix
52 |
53 | let check_line =
54 | (function%pcre
55 | | {|(?.*:\d\d) .* postfix/smtpd\[[0-9]+\]: connect from (?[a-z0-9.-]+)|} ->
56 | Lwt_io.printlf "%s %s" t host
57 | | _ ->
58 | Lwt.return_unit)
59 |
60 | let () = Lwt_main.run begin
61 | Lwt_io.printl "SMTP connections from:" >>= fun () ->
62 | Lwt_stream.iter_s check_line (Lwt_io.lines_of_file "/var/log/syslog")
63 | end
64 | ```
65 |
66 | ## `ppx_tyre` - Syntax Support for Tyre Routes
67 |
68 | ### Typed regular expressions
69 |
70 | This PPX compiles
71 | ```ocaml
72 | [%tyre {|re|}]
73 | ```
74 | into `'a Tyre.t`.
75 |
76 | For instance, We can define a pattern that recognize strings of the form "dim:3x5" like so:
77 |
78 | ```ocaml
79 | # open Tyre ;;
80 | # let dim = [%tyre "dim:(?&int)x(?&int)"] ;;
81 | val dim : (int * int) Tyre.t
82 | ```
83 |
84 | The syntax `(?&id)` allows to call a typed regular expression named `id` of type `'a Tyre.t`, such as `Tyre.int`.
85 |
86 | For convenience, you can also use *named* capture groups to name the captured elements.
87 | ```ocaml
88 | # let dim = [%tyre "dim:(?(?&int))x(?&y:int)"] ;;
89 | val dim : < x : int; y : int > Tyre.t
90 | ```
91 |
92 | Names given using the syntax `(?re)` will be used for the fields
93 | of the results. `(?&y:int)` is a shortcut for `(?(?&int))`.
94 | This can also be used for alternatives, for instance:
95 |
96 | ```ocaml
97 | # let id_or_name = [%tyre "id:(?&id:int)|name:(?[[:alnum:]]+)"] ;;
98 | val id_or_name : [ `id of int | `name of string ] Tyre.t
99 | ```
100 |
101 | Expressions of type `Tyre.t` can then be composed as part of bigger regular
102 | expressions, or compiled with `Tyre.compile`.
103 | See [tyre][]'s documentation for details.
104 |
105 | ### Routes
106 |
107 | `ppx_tyre` can also be used for routing, in the style of `ppx_regexp`:
108 |
109 | ```ocaml
110 | function%tyre
111 | | {|re1|} -> e1
112 | ...
113 | | {|reN|} -> eN
114 | ```
115 |
116 | is turned into a `'a Type.route`, where `re`, `re1`, ... are regular expressions
117 | using the same syntax as above. `"re" as v` is considered like `(?re)` and
118 | `"re1" | "re2"` is turned into a regular expression alternative.
119 |
120 | Once routes are defined, matching is done with `Tyre.exec`.
121 |
122 | ### Details
123 |
124 | The syntax follow Perl's syntax:
125 |
126 | - `re?` extracts an option of what `re` extracts.
127 | - `re+`, `re*`, `re{n,m}` extracts a list of what `re` extracts.
128 | - `(?&qname)` refers to any identifier bound to a typed regular expression
129 | of type `'a Tyre.t`.
130 | - Normal parens are *non-capturing*.
131 | - There are two ways to capture:
132 | - Anonymous capture `(+re)`
133 | - Named capture `(?re)`
134 | - One or more `(?re)` at the top level can be used to bind variables
135 | instead of `as ...`.
136 | - One or more `(?re)` in a sequence extracts an object where each method
137 | `v` is bound to what `re` extracts.
138 | - An alternative with one `(?re)` per branch extracts a polymorphic
139 | variant where each constructor `` `v`` receives what `re` extracts as its
140 | argument.
141 | - `(?&v:qname)` is a shortcut for `(?(?&qname))`.
142 |
143 | ## Limitations
144 |
145 | ### No Pattern Guards
146 |
147 | Pattern guards are not supported. This is due to the fact that all match
148 | cases are combined into a single regular expression, so if one of the
149 | patterns succeed, the match is committed before we can check the guard
150 | condition.
151 |
152 | ### No Exhaustiveness Check
153 |
154 | The syntax extension will always warn if no catch-all case is provided. No
155 | exhaustiveness check is attempted. Doing it right would require
156 | reimplementing full regular expression parsing and an algorithm which would
157 | ideally produce a counter-example.
158 |
159 | ## Bug Reports
160 |
161 | The processor is currently new and not well tested. Please break it and
162 | file bug reports in the GitHub issue tracker. Any exception raised by
163 | generated code except for `Match_failure` is a bug.
164 |
165 |
166 | [ci]: https://travis-ci.org/paurkedal/ppx_regexp
167 | [ci-build-status]: https://travis-ci.org/paurkedal/ppx_regexp.svg?branch=master
168 | [re]: https://github.com/ocaml/ocaml-re
169 | [tyre]: https://github.com/Drup/tyre
170 |
--------------------------------------------------------------------------------
/common/regexp.ml:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2018--2022 Petter A. Urkedal
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the OCaml static compilation exception or (at
7 | * your option) the LGPL-3.0 Linking Exception.
8 | *
9 | * This library is distributed in the hope that it will be useful, but WITHOUT
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
12 | * License for more details.
13 | *
14 | * You should have received a copy of the GNU Lesser General Public License
15 | * along with this library. If not, see .
16 | *)
17 |
18 | let mkloc = Location.mkloc
19 |
20 | let (%) f g x = f (g x)
21 |
22 | type 'a t = 'a node Location.loc
23 | and 'a node =
24 | | Code of 'a
25 | | Seq of 'a t list
26 | | Alt of 'a t list
27 | | Opt of 'a t
28 | | Repeat of (int * int option) Location.loc * 'a t
29 | | Nongreedy of 'a t
30 | | Capture of 'a t
31 | | Capture_as of string Location.loc * 'a t
32 | | Call of Longident.t Location.loc
33 | (* TODO: | Case_sense of t | Case_blind of t *)
34 |
35 | let nonepsilon = function {Location.txt = Seq []; _} -> false | _ -> true
36 |
37 | let simplify_seq ~loc es =
38 | (match List.filter nonepsilon es with
39 | | [e] -> e
40 | | es -> mkloc (Seq es) loc)
41 |
42 | let simplify_alt es =
43 | (match es with
44 | | [e] -> e.Location.txt
45 | | es -> Alt es)
46 |
47 | module Int_map = struct
48 | module M = Map.Make (struct type t = int let compare = compare end)
49 |
50 | [@@@ocaml.warning "-32"]
51 | let find_last f m = List.find (fun (k, _) -> f k) (List.rev (M.bindings m))
52 |
53 | include M
54 | end
55 |
56 | let parse_exn ?(pos = Lexing.dummy_pos) s =
57 | let l = String.length s in
58 | let get i = if i = l then ')' else s.[i] in
59 |
60 | (* Location Tracking *)
61 | let position_of_index =
62 | if pos = Lexing.dummy_pos then (fun _ -> Lexing.dummy_pos) else
63 | let newlines =
64 | let rec loop acc lnum i =
65 | if i = l then acc else
66 | if s.[i] <> '\n' then loop acc lnum (i + 1) else
67 | loop (Int_map.add (i + 1) (lnum + 1) acc) (lnum + 1) (i + 1)
68 | in
69 | loop (Int_map.singleton 0 pos.pos_lnum) pos.pos_lnum 0
70 | in
71 | fun i ->
72 | let j, pos_lnum = Int_map.find_last (fun j -> j <= i) newlines in
73 | { pos with
74 | pos_lnum;
75 | pos_bol = pos.pos_bol + j;
76 | pos_cnum = pos.pos_cnum + i; }
77 | in
78 | let make_loc (i, j) =
79 | let open Location in
80 | if pos = Lexing.dummy_pos then Location.none else
81 | { loc_start = position_of_index i;
82 | loc_end = position_of_index j;
83 | loc_ghost = false }
84 | in
85 | let wrap_loc (i, j) x = Location.{txt = x; loc = make_loc (i, j)} in
86 | let with_loc f i = let j, e = f i in j, wrap_loc (i, j) e in
87 | let suffix_loc j f (e : _ Location.loc) =
88 | let e' = f e in
89 | if pos = Lexing.dummy_pos then Location.mknoloc e' else
90 | let loc = Location.{
91 | loc_start = e.loc.loc_start;
92 | loc_end = position_of_index j;
93 | loc_ghost = false;
94 | } in
95 | mkloc e' loc
96 | in
97 |
98 | let fail (i, j) msg = Location.raise_errorf ~loc:(make_loc (i, j)) "%s" msg in
99 |
100 | (* Identifiers *)
101 | let scan_ident i =
102 | let rec scan_cont j =
103 | (match get j with
104 | | 'A'..'Z' | 'a'..'z' | '0'..'9' | '_' | '\'' -> scan_cont (j + 1)
105 | | _ -> (j, String.sub s i (j - i)))
106 | in
107 | (match get i with
108 | | 'A'..'Z' | 'a'..'z' | '_' -> scan_cont (i + 1)
109 | | _ -> fail (i, i) "Expecting an identifier.")
110 | in
111 | let rec scan_longident_cont lidr i =
112 | if get i <> '.' then (i, lidr) else
113 | let j, idr = scan_ident (i + 1) in
114 | scan_longident_cont (Longident.Ldot (lidr, idr)) j
115 | in
116 | let scan_longident i =
117 | let j, idr = scan_ident i in
118 | scan_longident_cont (Longident.Lident idr) j
119 | in
120 | let scan_ident = with_loc scan_ident in
121 | let scan_longident = with_loc scan_longident in
122 | let scan_longident_cont idr =
123 | with_loc (scan_longident_cont (Longident.Lident idr)) in
124 |
125 | (* Non-Nested Parts *)
126 | let re_perl (i, j) =
127 | let sij = String.sub s i (j - i) in
128 | try ignore (Re.Perl.re sij); wrap_loc (i, j) (Code sij)
129 | with Re.Perl.Parse_error | Re.Perl.Not_supported ->
130 | fail (i, j) "Rejected by Re.Perl."
131 | in
132 | let scan_escape i =
133 | if i + 1 = l then fail (i, i+1) "Escape at end of regular expression." else
134 | (match s.[i + 1] with
135 | | 'a'..'z' | 'A'..'Z' -> (i + 2, re_perl (i, i + 2))
136 | | _ -> (i + 2, re_perl (i, i + 2)))
137 | in
138 | let rec scan_cset i j =
139 | if j = l then fail (i, i + 1) "Unbalanced '['." else
140 | (match s.[j] with
141 | | '\\' ->
142 | if j + 1 = l then
143 | fail (j, j + 1) "Backslash at end of RE while scanning character set."
144 | else
145 | scan_cset i (j + 2)
146 | | '[' when get (j + 1) = ':' ->
147 | (match String.index_from s (j + 1) ']' with
148 | | exception Not_found ->
149 | fail (j + 1, j + 2) "Unbalanced '[' in character set."
150 | | k -> scan_cset i (k + 1))
151 | | ']' when j <> i + 1 && (j <> i + 2 || s.[i + 1] <> '^') ->
152 | (j + 1, re_perl (i, j + 1))
153 | | _ -> scan_cset i (j + 1))
154 | in
155 |
156 | (* Repeat and Opt *)
157 | let scan_int_opt i =
158 | let rec loop i n =
159 | if i = l then (i, n) else
160 | (match s.[i] with
161 | | '0'..'9' as ch -> loop (i + 1) (10 * n + (Char.code ch - 48))
162 | | _ -> (i, n))
163 | in
164 | let j, n = loop i 0 in
165 | (j, (if i = j then None else Some n))
166 | in
167 | let scan_range i =
168 | let j, n_min = scan_int_opt i in
169 | let n_min =
170 | (match n_min with
171 | | None -> fail (i, i) "Missing lower bound for range."
172 | | Some n -> n) in
173 | (match get j with
174 | | ',' ->
175 | let j, n_max = scan_int_opt (j + 1) in
176 | (match n_max with
177 | | Some n_max when n_max < n_min -> fail (i, j) "Reversed repeat range."
178 | | _ -> ());
179 | (j, n_min, n_max)
180 | | _ ->
181 | (j, n_min, (Some n_min)))
182 | in
183 | let apply_to_head (i, j) f = function
184 | | [] -> fail (i, j) "Operator must follow an operand."
185 | | e :: es -> f e :: es
186 | in
187 | let scan_greedyness i =
188 | let j, greedyness =
189 | (match get i with
190 | | '?' -> (i + 1, suffix_loc (i + 1) (fun e -> Nongreedy e))
191 | | '+' -> fail (i, i + 1) "Possessive modifier not supported."
192 | | _ -> (i, (fun e -> e))) in
193 | (match get j with
194 | | '?' | '*' | '+' | '{' ->
195 | fail (j, j + 1) "Nested repetition must be parenthesized."
196 | | _ -> (j, greedyness))
197 | in
198 | let repeat (i, j) (n_min, n_max) =
199 | suffix_loc j (fun e -> Repeat (wrap_loc (i, j) (n_min, n_max), e))
200 | in
201 |
202 | (* Sequences and Groups *)
203 | let
204 | rec scan_alt i =
205 | let j, e = scan_alt_item i [] in
206 | (j, simplify_alt e)
207 | and scan_alt_item i acc =
208 | let j, e = scan_seq i in
209 | (match get j with
210 | | ')' -> (j, List.rev (e :: acc))
211 | | '|' -> scan_alt_item (j + 1) (e :: acc)
212 | | _ -> assert false)
213 |
214 | and scan_seq i =
215 | let j, e = scan_seq_item i [] in
216 | (j, simplify_seq ~loc:(make_loc (i, j)) e)
217 | and scan_seq_item i acc =
218 | (match get i with
219 | | ')' | '|' -> (i, List.rev acc)
220 | | '[' ->
221 | let j, e = scan_cset i (i + 1) in
222 | scan_seq_item j (e :: acc)
223 | (* TODO: Reject repetition of ε and zero-width assertions. *)
224 | | '?' ->
225 | let j = i + 1 in
226 | let f = suffix_loc j (fun e -> Opt e) in
227 | let k, g = scan_greedyness j in
228 | scan_seq_item k (apply_to_head (i, k) (g % f) acc)
229 | | '*' ->
230 | let j = i + 1 in
231 | let f = repeat (i, j) (0, None) in
232 | let k, g = scan_greedyness j in
233 | scan_seq_item k (apply_to_head (i, k) (g % f) acc)
234 | | '+' ->
235 | let j = i + 1 in
236 | let f = repeat (i, j) (1, None) in
237 | let k, g = scan_greedyness j in
238 | scan_seq_item k (apply_to_head (i, k) (g % f) acc)
239 | | '{' ->
240 | let j, n_min, n_max = scan_range (i + 1) in
241 | if j = l || s.[j] <> '}' then fail (i, i + 1) "Unbalanced '{'." else
242 | let f = repeat (i, j) (n_min, n_max) in
243 | let k, g = scan_greedyness (j + 1) in
244 | scan_seq_item k (apply_to_head (i, k) (g % f) acc)
245 | | '(' ->
246 | let j, e = scan_group (i + 1) in
247 | if j = l || s.[j] <> ')' then fail (i, i + 1) "Unbalanced '('." else
248 | scan_seq_item (j + 1) (wrap_loc (i, j + 1) e :: acc)
249 | | '^' -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc)
250 | | '$' -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc)
251 | | '\\' ->
252 | let j, e = scan_escape i in
253 | scan_seq_item j (e :: acc)
254 | | _ -> scan_seq_item (i + 1) (re_perl (i, i + 1) :: acc))
255 |
256 | and scan_group i =
257 | (match get i with
258 | | '?' ->
259 | if i + 1 = l then fail (i - 1, i) "Unbalanced '('." else
260 | (match s.[i + 1] with
261 | | '&' ->
262 | let j, idr = scan_ident (i + 2) in
263 | if get j = ':' then
264 | let k, lidr = scan_longident (j + 1) in
265 | (k, Capture_as (idr, wrap_loc (j + 1, k) (Call lidr)))
266 | else
267 | let k, lidr = scan_longident_cont idr.Location.txt j in
268 | (k, Call lidr)
269 | | '<' ->
270 | let j, idr = scan_ident (i + 2) in
271 | if get j <> '>' then fail (i, i + 1) "Unbalanced '<'." else
272 | let k, e = with_loc scan_alt (j + 1) in
273 | (k, Capture_as (idr, e))
274 | | ':' ->
275 | scan_alt (i + 2)
276 | | '#' ->
277 | (try (String.index_from s (i + 2) ')', Seq []) with
278 | | Not_found -> fail (i - 1, i + 1) "Unterminated comment.")
279 | | _ ->
280 | fail (i, i + 2) "Invalid group modifier.")
281 | | '+' -> let j, e = with_loc scan_alt (i + 1) in (j, Capture e)
282 | | '*' | '{' -> fail (i, i + 1) "Invalid group modifier."
283 | | _ -> scan_alt i)
284 | in
285 |
286 | (* Top-Level *)
287 | let scan_toplevel i =
288 | if get i = '?' && get (i + 1) = '<' then scan_group i else scan_alt i
289 | in
290 | let j, e = with_loc scan_toplevel 0 in
291 | if j <> l then fail (j, j + 1) "Unbalanced ')'." else e
292 |
--------------------------------------------------------------------------------
/common/regexp.mli:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2018 Petter A. Urkedal
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the OCaml static compilation exception or (at
7 | * your option) the LGPL-3.0 Linking Exception.
8 | *
9 | * This library is distributed in the hope that it will be useful, but WITHOUT
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
12 | * License for more details.
13 | *
14 | * You should have received a copy of the GNU Lesser General Public License
15 | * along with this library. If not, see .
16 | *)
17 |
18 | type 'a t = 'a node Location.loc
19 | and 'a node =
20 | | Code of 'a
21 | | Seq of 'a t list
22 | | Alt of 'a t list
23 | | Opt of 'a t
24 | | Repeat of (int * int option) Location.loc * 'a t
25 | | Nongreedy of 'a t
26 | | Capture of 'a t
27 | | Capture_as of string Location.loc * 'a t
28 | | Call of Longident.t Location.loc
29 | (* TODO: | Case_sense of t | Case_blind of t *)
30 |
31 | val parse_exn : ?pos: Lexing.position -> string -> string t
32 |
--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 1.11)
2 | (name ppx_regexp)
3 | (allow_approximate_merlin)
4 |
--------------------------------------------------------------------------------
/dune-workspace.dev:
--------------------------------------------------------------------------------
1 | (lang dune 1.11)
2 | (context (opam (switch 4.04.2)))
3 | (context (opam (switch 4.08.1)))
4 | (context (opam (switch 4.09.1)))
5 | (context (opam (switch 4.11.2)))
6 | (context (opam (switch 4.13.1)))
7 | (context (opam (switch 4.14.1)))
8 | (context (opam (switch 5.0.0)))
9 |
--------------------------------------------------------------------------------
/ppx_regexp.opam:
--------------------------------------------------------------------------------
1 | opam-version: "2.0"
2 | maintainer: "Petter A. Urkedal "
3 | authors: [
4 | "Petter A. Urkedal "
5 | "Gabriel Radanne "
6 | ]
7 | license: "LGPL-3.0-or-later WITH LGPL-3.0-linking-exception"
8 | homepage: "https://github.com/paurkedal/ppx_regexp"
9 | bug-reports: "https://github.com/paurkedal/ppx_regexp/issues"
10 | depends: [
11 | "ocaml" {>= "4.02.3"}
12 | "dune" {>= "1.11"}
13 | "ppxlib" {>= "0.9.0"}
14 | "re" {>= "1.7.2"}
15 | "qcheck" {with-test}
16 | ]
17 | build: ["dune" "build" "-p" name "-j" jobs]
18 | dev-repo: "git+https://github.com/paurkedal/ppx_regexp.git"
19 | synopsis: "Matching Regular Expressions with OCaml Patterns"
20 | description: """
21 | This syntax extension turns
22 |
23 | match%pcre x with
24 | | {|re1|} -> e1
25 | ...
26 | | {|reN|} -> eN
27 | | _ -> e0
28 |
29 | into suitable invocations to the ocaml-re library. The patterns are plain
30 | strings of the form accepted by `Re_pcre`, except groups can be bound to
31 | variables using the syntax `(?...)`. The type of `var` will be
32 | `string` if a match is of the groups is guaranteed given a match of the
33 | whole pattern, and `string option` if the variable is bound to or nested
34 | below an optionally matched group.
35 | """
36 |
--------------------------------------------------------------------------------
/ppx_regexp/dune:
--------------------------------------------------------------------------------
1 | (library
2 | (name ppx_regexp)
3 | (public_name ppx_regexp)
4 | (kind ppx_rewriter)
5 | (modules ppx_regexp regexp)
6 | (preprocess (pps ppxlib.metaquot))
7 | (libraries ppxlib re re.perl)
8 | (ppx_runtime_libraries re re.perl))
9 |
10 | (rule (copy ../common/regexp.mli regexp.mli))
11 | (rule (copy ../common/regexp.ml regexp.ml))
12 |
--------------------------------------------------------------------------------
/ppx_regexp/ppx_regexp.ml:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2017--2023 Petter A. Urkedal
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the LGPL-3.0 Linking Exception.
7 | *
8 | * This library is distributed in the hope that it will be useful, but WITHOUT
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
11 | * License for more details.
12 | *
13 | * You should have received a copy of the GNU Lesser General Public License
14 | * along with this library. If not, see .
15 | *)
16 |
17 | open Ppxlib
18 | open Ast_builder.Default
19 |
20 | let error = Location.raise_errorf
21 |
22 | let warn ~loc msg e =
23 | let e_msg = estring ~loc msg in
24 | let name = {txt = "ocaml.ppwarning"; loc} in
25 | let payload = PStr [{pstr_desc = Pstr_eval (e_msg, []); pstr_loc = loc}] in
26 | {e with pexp_attributes = attribute ~loc ~name ~payload :: e.pexp_attributes}
27 |
28 | module List = struct
29 | include List
30 |
31 | let rec fold f = function
32 | | [] -> fun acc -> acc
33 | | x :: xs -> fun acc -> fold f xs (f x acc)
34 | end
35 |
36 | module Regexp = struct
37 | include Regexp
38 |
39 | let bindings =
40 | let rec recurse must_match (e' : _ Location.loc) =
41 | let loc = e'.Location.loc in
42 | (match e'.Location.txt with
43 | | Code _ -> fun acc -> acc
44 | | Seq es -> List.fold (recurse must_match) es
45 | | Alt es -> List.fold (recurse false) es
46 | | Opt e -> recurse false e
47 | | Repeat ({Location.txt = (i, _); _}, e) ->
48 | recurse (must_match && i > 0) e
49 | | Nongreedy e -> recurse must_match e
50 | | Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre."
51 | | Capture_as (idr, e) ->
52 | fun (nG, bs) ->
53 | recurse must_match e (nG + 1, (idr, Some nG, must_match) :: bs)
54 | | Call _ -> error ~loc "(&...) is not implemented for %%pcre.")
55 | in
56 | (function
57 | | {Location.txt = Capture_as (idr, e); _} ->
58 | recurse true e (0, [idr, None, true])
59 | | e ->
60 | recurse true e (0, []))
61 |
62 | let to_string =
63 | let p_alt, p_seq, p_suffix, p_atom = 0, 1, 2, 3 in
64 | let delimit_if b s = if b then "(?:" ^ s ^ ")" else s in
65 | let rec recurse p (e' : _ Location.loc) =
66 | let loc = e'.Location.loc in
67 | (match e'.Location.txt with
68 | | Code s ->
69 | (* Delimiters not needed as Regexp.parse_exn only returns single
70 | * chars, csets, and escape sequences. *)
71 | s
72 | | Seq es ->
73 | delimit_if (p > p_seq)
74 | (String.concat "" (List.map (recurse p_seq) es))
75 | | Alt es ->
76 | delimit_if (p > p_alt)
77 | (String.concat "|" (List.map (recurse p_alt) es))
78 | | Opt e ->
79 | delimit_if (p > p_suffix) (recurse p_atom e ^ "?")
80 | | Repeat ({Location.txt = (i, j_opt); _}, e) ->
81 | let j_str = match j_opt with None -> "" | Some j -> string_of_int j in
82 | delimit_if (p > p_suffix)
83 | (Printf.sprintf "%s{%d,%s}" (recurse p_atom e) i j_str)
84 | | Nongreedy e -> recurse p_suffix e ^ "?"
85 | | Capture _ -> error ~loc "Unnamed capture is not allowed for %%pcre."
86 | | Capture_as (_, e) -> "(" ^ recurse p_alt e ^ ")"
87 | | Call _ -> error ~loc "(&...) is not implemented for %%pcre.")
88 | in
89 | (function
90 | | {Location.txt = Capture_as (_, e); _} ->
91 | recurse 0 e
92 | | e ->
93 | recurse 0 e)
94 | end
95 |
96 | let fresh_var =
97 | let c = ref 0 in
98 | fun () -> incr c; Printf.sprintf "_ppx_regexp_%d" !c
99 |
100 | let rec is_zero p k =
101 | (match p.[k] with
102 | | '0' -> is_zero p (k + 1)
103 | | '1'..'9' -> false
104 | | _ -> true)
105 |
106 | let rec must_match p i =
107 | let l = String.length p in
108 | if i = l then true else
109 | if p.[i] = '?' || p.[i] = '*' then false else
110 | if p.[i] = '{' then
111 | let j = String.index_from p (i + 1) '}' in
112 | not (is_zero p (i + 1)) && must_match p (j + 1)
113 | else
114 | true
115 |
116 | let extract_bindings ~pos s =
117 | let r = Regexp.parse_exn ~pos s in
118 | let nG, bs = Regexp.bindings r in
119 | let re_str = Regexp.to_string r in
120 | let loc = Location.none in
121 | (estring ~loc re_str, bs, nG)
122 |
123 | let rec wrap_group_bindings ~loc rhs offG = function
124 | | [] -> rhs
125 | | (varG, iG, mustG) :: bs ->
126 | let eG = match iG with
127 | | None ->
128 | [%expr Re.Group.get _g 0]
129 | | Some iG ->
130 | [%expr Re.Group.get _g [%e eint ~loc (offG + iG + 1)]]
131 | in
132 | let eG =
133 | if mustG then eG else
134 | [%expr try Some [%e eG] with Not_found -> None]
135 | in
136 | [%expr
137 | let [%p ppat_var ~loc varG] = [%e eG] in
138 | [%e wrap_group_bindings ~loc rhs offG bs]]
139 |
140 | let transform_cases ~loc cases =
141 | let aux case =
142 | if case.pc_guard <> None then
143 | error ~loc "Guards are not implemented for match%%pcre."
144 | else
145 | Ast_pattern.(parse (pstring __')) loc case.pc_lhs
146 | begin fun {txt = re_src; loc = {loc_start; loc_end; _}} ->
147 | let re_offset =
148 | (loc_end.pos_cnum - loc_start.pos_cnum - String.length re_src) / 2
149 | in
150 | let pos = {loc_start with pos_cnum = loc_start.pos_cnum + re_offset} in
151 | let re, bs, nG = extract_bindings ~pos re_src in
152 | (re, nG, bs, case.pc_rhs)
153 | end
154 | in
155 | let cases, default_rhs =
156 | (match List.rev (*_map rewrite_case*) cases with
157 | | {pc_lhs = {ppat_desc = Ppat_any; _}; pc_rhs; pc_guard = None} :: cases ->
158 | (cases, pc_rhs)
159 | | {pc_lhs = {ppat_desc = Ppat_var var; _}; pc_rhs; pc_guard = None} ::
160 | cases ->
161 | let rhs =
162 | [%expr let [%p ppat_var ~loc var] = _ppx_regexp_v in [%e pc_rhs]] in
163 | (cases, rhs)
164 | | cases ->
165 | let open Lexing in
166 | let pos = loc.Location.loc_start in
167 | let e0 = estring ~loc pos.pos_fname in
168 | let e1 = eint ~loc pos.pos_lnum in
169 | let e2 = eint ~loc (pos.pos_cnum - pos.pos_bol) in
170 | let e = [%expr raise (Match_failure ([%e e0], [%e e1], [%e e2]))] in
171 | (cases, warn ~loc "A universal case is recommended for %pcre." e))
172 | in
173 | let cases = List.rev_map aux cases in
174 | let res = pexp_array ~loc (List.map (fun (re, _, _, _) -> re) cases) in
175 | let comp = [%expr
176 | let a = Array.map (fun s -> Re.mark (Re.Perl.re s)) [%e res] in
177 | let marks = Array.map fst a in
178 | let re = Re.compile (Re.alt (Array.to_list (Array.map snd a))) in
179 | (re, marks)
180 | ] in
181 | let var = fresh_var () in
182 | let re_binding =
183 | value_binding ~loc ~pat:(ppat_var ~loc {txt = var; loc}) ~expr:comp
184 | in
185 | let e_comp = pexp_ident ~loc {txt = Lident var; loc} in
186 |
187 | let rec handle_cases i offG = function
188 | | [] -> [%expr assert false]
189 | | (_, nG, bs, rhs) :: cases ->
190 | [%expr
191 | if Re.Mark.test _g (snd [%e e_comp]).([%e eint ~loc i]) then
192 | [%e wrap_group_bindings ~loc rhs offG bs]
193 | else
194 | [%e handle_cases (i + 1) (offG + nG) cases]]
195 | in
196 | let cases =
197 | [%expr
198 | (match Re.exec_opt (fst [%e e_comp]) _ppx_regexp_v with
199 | | None -> [%e default_rhs]
200 | | Some _g -> [%e handle_cases 0 0 cases])]
201 | in
202 | (cases, re_binding)
203 |
204 | let transformation = object
205 | inherit [value_binding list] Ast_traverse.fold_map as super
206 |
207 | method! expression e_ext acc =
208 | let e_ext, acc = super#expression e_ext acc in
209 | (match e_ext.pexp_desc with
210 | | Pexp_extension
211 | ({txt = "pcre"; _}, PStr [{pstr_desc = Pstr_eval (e, _); _}]) ->
212 | let loc = e.pexp_loc in
213 | (match e.pexp_desc with
214 | | Pexp_match (e, cases) ->
215 | let cases, binding = transform_cases ~loc cases in
216 | ([%expr let _ppx_regexp_v = [%e e] in [%e cases]], binding :: acc)
217 | | Pexp_function (cases) ->
218 | let cases, binding = transform_cases ~loc cases in
219 | ([%expr fun _ppx_regexp_v -> [%e cases]], binding :: acc)
220 | | _ ->
221 | error ~loc "[%%pcre] only applies to match an function.")
222 | | _ -> (e_ext, acc))
223 | end
224 |
225 | let impl str =
226 | let str, rev_bindings = transformation#structure str [] in
227 | if rev_bindings = [] then str else
228 | let re_str =
229 | let loc = Location.none in
230 | [%str open (struct [%%i pstr_value ~loc Nonrecursive rev_bindings] end)]
231 | in
232 | re_str @ str
233 |
234 | let () = Driver.register_transformation ~impl "ppx_regexp"
235 |
--------------------------------------------------------------------------------
/ppx_tyre.opam:
--------------------------------------------------------------------------------
1 | opam-version: "2.0"
2 | maintainer: "Petter A. Urkedal "
3 | authors: [
4 | "Gabriel Radanne "
5 | "Petter A. Urkedal "
6 | ]
7 | license: "LGPL-3 with OCaml linking exception"
8 | homepage: "https://github.com/paurkedal/ppx_regexp"
9 | bug-reports: "https://github.com/paurkedal/ppx_regexp/issues"
10 | depends: [
11 | "ocaml" {>= "4.02.3"}
12 | "dune" {>= "1.11"}
13 | "ocaml-migrate-parsetree" {>= "1.4.0"}
14 | "re" {>= "1.7.2"}
15 | "ppx_tools_versioned" {>= "5.2.3"}
16 | "tyre" {>= "0.4.1"}
17 | "qcheck" {with-test}
18 | ]
19 | build: ["dune" "build" "-p" name "-j" jobs]
20 | dev-repo: "git+https://github.com/paurkedal/ppx_regexp.git"
21 | synopsis: "PPX syntax for tyre regular expressions and routes"
22 | description: """
23 | This PPX compiles
24 |
25 | [%tyre {|re|}]
26 |
27 | into `'a Tyre.t` and
28 |
29 | function%tyre
30 | | {|re1|} as x1 -> e1
31 | ...
32 | | {|reN|} as x2 -> eN
33 |
34 | into `'a Type.route`, where `re`, `re1`, ... are regular expressions
35 | expressed in a slightly extended subset of PCRE. The interpretations are:
36 |
37 | - `re?` extracts an option of what `re` extracts.
38 | - `re+`, `re*`, `re{n,m}` extracts a list of what `re` extracts.
39 | - `(?@qname)` refers to any identifier bound to a typed regular expression
40 | of type `'a Tyre.t`.
41 | - One or more `(?re)` at the top level can be used to bind variables
42 | instead of `as ...`.
43 | - One or more `(?re)` in a sequence extracts an object where each method
44 | `v` is bound to what `re` extracts.
45 | - An alternative with one `(?re)` per branch extracts a polymorphic
46 | variant where each constructor `` `v`` receives what `re` extracts as its
47 | argument.
48 | - `(?&v:qname)` is a shortcut for `(?(?&qname))`.
49 | """
50 |
--------------------------------------------------------------------------------
/ppx_tyre/dune:
--------------------------------------------------------------------------------
1 | (library
2 | (name ppx_tyre)
3 | (public_name ppx_tyre)
4 | (kind ppx_rewriter)
5 | (modules ppx_tyre regexp)
6 | (preprocess (pps ppx_tools_versioned.metaquot_409))
7 | (libraries
8 | ocaml-migrate-parsetree
9 | ppx_tools_versioned
10 | re re.perl
11 | tyre)
12 | (ppx_runtime_libraries re re.perl tyre))
13 |
14 | (rule (copy ../common/regexp.mli regexp.mli))
15 | (rule (copy ../common/regexp.ml regexp.ml))
16 |
--------------------------------------------------------------------------------
/ppx_tyre/ppx_tyre.ml:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2018 Gabriel Radanne
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the OCaml static compilation exception.
7 | *
8 | * This library is distributed in the hope that it will be useful, but WITHOUT
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
11 | * License for more details.
12 | *
13 | * You should have received a copy of the GNU Lesser General Public License
14 | * along with this library. If not, see .
15 | *)
16 |
17 |
18 | open Migrate_parsetree
19 | open Ast_409
20 | let ocaml_version = Versions.ocaml_409
21 | module AC = Ast_convenience_409
22 |
23 | module A = Ast_helper
24 | module Loc = Location
25 |
26 | module List = struct
27 | include List
28 | let init n f = (* for compatibility with OCaml < 4.6.0 *)
29 | let rec loop acc i = if i < 0 then acc else loop (f i :: acc) (i - 1) in
30 | loop [] (n - 1)
31 | end
32 |
33 | let internal_error ~loc = Loc.raise_errorf ~loc "Internal error@."
34 |
35 | let mk_gen s =
36 | let c = ref 0 in
37 | fun () -> incr c; Printf.sprintf "%s%d" s !c
38 |
39 | module Tyre = struct
40 |
41 | let mk ~loc s = AC.evar ~loc ("Tyre."^s)
42 |
43 | let mkf ~loc s l =
44 | A.Exp.apply ~loc (mk ~loc s) l
45 |
46 | let conv ~loc to_ from_ t =
47 | mkf ~loc "conv" [Nolabel, to_ ; Nolabel, from_ ; Nolabel, t]
48 |
49 | let bin ~loc s a b = mkf ~loc s [Nolabel, a; Nolabel, b]
50 |
51 | end
52 |
53 | module Re = struct
54 |
55 | let mk ~loc s = AC.evar ~loc ("Re."^s)
56 |
57 | let mkf ~loc s l =
58 | A.Exp.apply ~loc (mk ~loc s) l
59 |
60 | let mkfl ~loc s l = mkf ~loc s [Nolabel, AC.list ~loc l]
61 |
62 | end
63 |
64 | (** Utilities for captures *)
65 |
66 | type ('a, 'b) capture =
67 | | No
68 | | Named of 'a
69 | | Unnamed of 'b
70 |
71 | let rec capture e =
72 | let open Regexp in
73 | match e.Loc.txt with
74 | | Code _ -> No
75 | | Seq l ->
76 | let cs = List.map capture l in
77 | let l = List.filter (function No -> false | _ -> true) cs in
78 | begin match l with
79 | | [] -> No
80 | | [ c ] -> c
81 | | _ -> Unnamed ()
82 | end
83 | | Alt l ->
84 | if List.exists (fun x -> capture x = No) l then
85 | No
86 | else
87 | Unnamed ()
88 | | Opt t -> capture t
89 | | Repeat (_,t) -> capture t
90 | | Nongreedy t -> capture t
91 | | Capture _ -> Unnamed ()
92 | | Capture_as (s,_) -> Named s
93 | | Call _ -> Unnamed ()
94 |
95 | let capture_singleton = function
96 | | No -> No
97 | | Unnamed () -> Unnamed 1
98 | | Named s -> Named [s]
99 |
100 | (** Simplification of regexps *)
101 |
102 | let flatten_seq =
103 | let rec f e =
104 | match e.Loc.txt with
105 | | Regexp.Seq l -> flatten l
106 | | _ -> [e]
107 | and flatten l = List.flatten @@ List.map f l
108 | in
109 | flatten
110 |
111 | let flatten_alt =
112 | let rec f e =
113 | match e.Loc.txt with
114 | | Regexp.Alt l -> flatten l
115 | | _ -> [e]
116 | and flatten l = List.flatten @@ List.map f l
117 | in
118 | flatten
119 |
120 | let extract_re_list ~loc l =
121 | let is_re = function {Loc.txt = Regexp.Code _; _} -> true | _ -> false in
122 | let get =
123 | function {Loc.txt = Regexp.Code r; _} -> r | _ -> internal_error ~loc in
124 | if List.for_all is_re l then Some (List.map get l) else None
125 |
126 | let collapse_ungrouped_seq ~loc l =
127 | let mkseq = function
128 | | [] -> []
129 | | rl -> [Loc.mkloc (Regexp.Code (Re.mkfl "seq" ~loc @@ List.rev rl)) loc]
130 | in
131 | let rec aux acc = function
132 | | [] -> mkseq acc
133 | | {Loc.txt = Regexp.Code r ; _ } :: l -> aux (r :: acc) l
134 | | h :: t ->
135 | mkseq acc @ h :: aux [] t
136 | in
137 | match aux [] l with
138 | | [] -> Regexp.Code (Re.mk ~loc "epsilon")
139 | | [ x ] -> x.txt
140 | | l -> Seq l
141 |
142 | let rec collapse_ungrouped (t : string Regexp.t) =
143 | let loc = t.Loc.loc in
144 | let e : _ Regexp.node = match t.Loc.txt with
145 | | Regexp.Code e ->
146 | let f = AC.evar ~loc "Re.Perl.re" in
147 | let s = A.Exp.constant ~loc (A.Const.string e) in
148 | Code (A.Exp.apply ~loc f [Nolabel, s])
149 | | Call lid ->
150 | Call lid
151 | | Capture t ->
152 | Capture (collapse_ungrouped t)
153 | | Capture_as (s, t) ->
154 | Capture_as (s, collapse_ungrouped t)
155 | | Seq l ->
156 | let l = flatten_seq @@ List.map collapse_ungrouped l in
157 | collapse_ungrouped_seq ~loc l
158 | | Alt l ->
159 | let l = flatten_alt @@ List.map collapse_ungrouped l in
160 | begin match extract_re_list ~loc l with
161 | | Some r -> Code (Re.mkfl "alt" ~loc r)
162 | | None -> Alt l
163 | end
164 | | Opt t ->
165 | begin match collapse_ungrouped t with
166 | | {Loc.txt = Code r; _} ->
167 | Code (Re.mkf ~loc "opt" [Nolabel, r])
168 | | t -> Opt t
169 | end
170 | | Repeat ({Loc.txt = (i, j); _} as ij, t) ->
171 | begin match collapse_ungrouped t with
172 | | {Loc.txt = Code r; _} ->
173 | let i = A.Exp.constant (A.Const.int i) in
174 | let j =
175 | match j with
176 | | None -> AC.constr "None" []
177 | | Some j -> AC.constr "Some" [A.Exp.constant (A.Const.int j)]
178 | in
179 | Code (Re.mkf ~loc "repn" [Nolabel, r; Nolabel, i; Nolabel, j])
180 | | t -> Repeat (ij, t)
181 | end
182 | | Nongreedy t ->
183 | begin match collapse_ungrouped t with
184 | | {Loc.txt = Code r; _} ->
185 | Code (Re.mkf ~loc "non_greedy" [Nolabel, r])
186 | | t -> Nongreedy t
187 | end
188 | in
189 | Loc.mkloc e loc
190 |
191 | let simplify = collapse_ungrouped
192 |
193 | (** Converters to/from nested tuples *)
194 |
195 | let rec make_nested_tuple_pat ~loc ids =
196 | match ids with
197 | | [] -> internal_error ~loc
198 | | [ v ] -> AC.pvar ~loc v
199 | | v :: ids ->
200 | let pat = make_nested_tuple_pat ~loc ids in
201 | A.Pat.tuple ~loc [AC.pvar ~loc v;pat]
202 | let rec make_nested_tuple_expr ~loc exprs =
203 | match exprs with
204 | | [] -> internal_error ~loc
205 | | [e] -> e
206 | | e :: exprs ->
207 | let tuples = make_nested_tuple_expr ~loc exprs in
208 | A.Exp.tuple ~loc [e; tuples]
209 | let make_object_expr ~loc expr meths =
210 | let rec f expr meths = match expr, meths with
211 | | [], [] -> []
212 | | expr :: exprs, meth :: meths ->
213 | let decls = f exprs meths in
214 | let decl =
215 | A.Cf.method_ ~loc meth
216 | Public
217 | (Cfk_concrete (Fresh, expr))
218 | in
219 | decl :: decls
220 | | _, _ -> internal_error ~loc
221 | in
222 | A.Exp.object_ ~loc (A.Cstr.mk (A.Pat.any ~loc ()) @@ f expr meths)
223 |
224 | let make_conv_of_nested_tuple ~loc ~make_pat ~make_expr ~ids tyre_expr =
225 | let fun_to =
226 | let tuple_pat = make_nested_tuple_pat ~loc ids in
227 | let lids = List.map (AC.evar ~loc) ids in
228 | let expr = make_expr ~loc lids in
229 | A.Exp.fun_ ~loc Nolabel None tuple_pat expr
230 | in
231 | let fun_from =
232 | let obj_pat, subexprs = make_pat ~loc () in
233 | let expr = make_nested_tuple_expr ~loc subexprs in
234 | A.Exp.fun_ ~loc Nolabel None obj_pat expr
235 | in
236 | Tyre.conv ~loc fun_to fun_from tyre_expr
237 |
238 | let make_conv_object ~loc meths tyre_expr =
239 | let obj_var = "v" in
240 | let gen = mk_gen obj_var in
241 | let ids = List.init (List.length meths) (fun _ -> gen ()) in
242 | let make_expr ~loc lids =
243 | make_object_expr ~loc lids meths
244 | in
245 | let make_pat ~loc () =
246 | let obj = AC.evar ~loc obj_var in
247 | let obj_pat = AC.pvar ~loc obj_var in
248 | let methsends = List.map (fun m -> A.Exp.send ~loc obj m) meths in
249 | obj_pat, methsends
250 | in
251 | make_conv_of_nested_tuple ~loc ~ids ~make_expr ~make_pat tyre_expr
252 |
253 | let make_conv_tuple ~loc n tyre_expr =
254 | let gen = mk_gen "v" in
255 | let ids = List.init n (fun _ -> gen ()) in
256 | let make_expr ~loc l = A.Exp.tuple ~loc l in
257 | let make_pat ~loc () =
258 | let plids = List.map (AC.pvar ~loc) ids in
259 | let elids = List.map (AC.evar ~loc) ids in
260 | let ptuple = A.Pat.tuple ~loc plids in
261 | ptuple, elids
262 | in
263 | make_conv_of_nested_tuple ~loc ~ids ~make_expr ~make_pat tyre_expr
264 |
265 | (** Converters to/from nested either types *)
266 |
267 | let ppoly s ~loc x = A.Pat.(variant ~loc s (Some x))
268 | let epoly s ~loc x = A.Exp.(variant ~loc s (Some x))
269 | let make_nested_either_constr ~loc ~length ~mk n x =
270 | let rec nested_rights ~loc n expr =
271 | if n = 0 then expr
272 | else mk "Right" ~loc (nested_rights ~loc (n-1) expr)
273 | in
274 | if n = length - 1 then nested_rights ~loc n x
275 | else nested_rights ~loc n (mk "Left" ~loc x)
276 |
277 | let make_match_from_nested ~loc mk_exprs =
278 | let length = List.length mk_exprs in
279 | let make_case n mk_expr =
280 | let id = "v" in
281 | A.Exp.case
282 | (make_nested_either_constr ~loc ~length ~mk:ppoly n @@ AC.pvar ~loc id)
283 | (mk_expr @@ AC.evar ~loc id)
284 | in
285 | A.Exp.function_ ~loc @@ List.mapi make_case mk_exprs
286 |
287 | let make_match_to_nested ~loc mk_pats =
288 | let length = List.length mk_pats in
289 | let make_case n mk_pat =
290 | let id = "v" in
291 | A.Exp.case
292 | (mk_pat @@ AC.pvar ~loc id)
293 | (make_nested_either_constr ~loc ~length ~mk:epoly n @@ AC.evar ~loc id)
294 | in
295 | A.Exp.function_ ~loc @@ List.mapi make_case mk_pats
296 |
297 | let make_conv_sum ~loc captures tyre_expr =
298 | let name_from_capture i = function
299 | | No ->
300 | Loc.raise_errorf ~loc
301 | "All alternatives branches must have a capturing group."
302 | | Unnamed _ -> Location.mkloc ("Alt"^string_of_int i) loc
303 | | Named s -> s
304 | in
305 | let branchnames = List.mapi name_from_capture captures in
306 | let fun_to =
307 | let expr_branchs =
308 | List.map (fun {Loc.loc;txt} -> epoly ~loc txt) branchnames
309 | in
310 | make_match_from_nested ~loc expr_branchs
311 | in
312 | let fun_from =
313 | let pat_branchs =
314 | List.map (fun {Loc.loc;txt} -> ppoly ~loc txt) branchnames
315 | in
316 | make_match_to_nested ~loc pat_branchs
317 | in
318 | Tyre.conv ~loc fun_to fun_from tyre_expr
319 |
320 | (** Alternatives *)
321 |
322 | let rec alt_to_expr ~loc = function
323 | | [] -> internal_error ~loc
324 | | [ e ] -> e
325 | | (e) :: exprs ->
326 | let exprs = alt_to_expr ~loc exprs in
327 | Tyre.bin ~loc "alt" e exprs
328 |
329 | let alt_to_conv ~loc captures exprs =
330 | let alt_expr = alt_to_expr ~loc exprs in
331 | make_conv_sum ~loc captures alt_expr
332 |
333 | (** Sequences *)
334 |
335 | let rec seq_to_expr ~loc = function
336 | | [] -> internal_error ~loc
337 | | [ capture, e ] -> capture_singleton capture, e
338 | | (capture, e) :: exprs ->
339 | let captures, exprs = seq_to_expr ~loc exprs in
340 | let captures, (<&>) = match capture, captures with
341 | | c, No -> capture_singleton c, Tyre.bin ~loc "suffix"
342 | | No, c -> c, Tyre.bin ~loc "prefix"
343 | | Unnamed (), Unnamed i -> Unnamed (i+1), Tyre.bin ~loc "seq"
344 | | Named s, Named l -> Named (s :: l), Tyre.bin ~loc "seq"
345 | | Unnamed _, Named _ | Named _, Unnamed _ ->
346 | Loc.raise_errorf ~loc
347 | "The same sequence must not mix unnamed and named capture groups@."
348 | in
349 | captures, e <&> exprs
350 |
351 | let seq_to_conv ~loc l =
352 | let seq_capture, seq_expr = seq_to_expr ~loc l in
353 | match seq_capture with
354 | | No ->
355 | (* This case should not happen: If simplification was run,
356 | sequence of ungrouped regex would have been collapsed. *)
357 | internal_error ~loc
358 | | Unnamed 0 | Named [] ->
359 | internal_error ~loc (* No. *)
360 | | Unnamed 1 | Unnamed 2 | Named [_] ->
361 | seq_expr
362 | | Unnamed i -> make_conv_tuple ~loc i seq_expr
363 | | Named l -> make_conv_object ~loc l seq_expr
364 |
365 | (** Put everything together *)
366 |
367 | let rec expr_of_regex (t : _ Regexp.t) =
368 | let loc = t.Loc.loc in
369 | match t.Loc.txt with
370 | | Regexp.Code r ->
371 | Tyre.mkf ~loc "regex" [Nolabel, r]
372 | | Seq l ->
373 | let seq_item re = capture re, expr_of_regex re in
374 | seq_to_conv ~loc @@ List.map seq_item l
375 | | Alt l ->
376 | let exprs = List.map expr_of_regex l in
377 | let captures = List.map capture l in
378 | alt_to_conv ~loc captures exprs
379 | | Opt t ->
380 | Tyre.mkf ~loc "opt" [Nolabel, expr_of_regex t]
381 | | Repeat ({Loc.txt = (0, None); _}, t) ->
382 | Tyre.mkf ~loc "rep" [Nolabel, expr_of_regex t]
383 | | Repeat ({Loc.txt = (1, None); _}, t) ->
384 | Tyre.mkf ~loc "rep1" [Nolabel, expr_of_regex t]
385 | | Repeat ({loc; _}, _) ->
386 | Loc.raise_errorf ~loc "Repetitions other than + and * are not implemented."
387 | | Nongreedy t ->
388 | Tyre.mkf ~loc "non_greedy" [Nolabel, expr_of_regex t]
389 | | Capture t -> expr_of_regex t
390 | | Capture_as (_, t) -> expr_of_regex t
391 | | Call lid -> A.Exp.ident lid
392 |
393 |
394 | let adjust_position ~loc delim =
395 | let (+~) pos i = Lexing.{pos with pos_cnum = pos.pos_cnum + i } in
396 | match delim with
397 | | None -> loc.Loc.loc_start +~ 1
398 | | Some s -> loc.Loc.loc_start +~ (String.length s + 2)
399 | let expr_of_string ~loc s delim =
400 | let pos = adjust_position ~loc delim in
401 | expr_of_regex @@ simplify @@ Regexp.parse_exn ~pos s
402 |
403 |
404 | let rec regexp_of_pattern pat =
405 | let open Parsetree in
406 | let loc = pat.ppat_loc in
407 | let re = match pat.ppat_desc with
408 | | Ppat_constant (Pconst_string (s, delim)) ->
409 | let pos = adjust_position ~loc delim in
410 | (Regexp.parse_exn ~pos s).txt
411 | | Ppat_alias (pat, s) ->
412 | Regexp.(Capture_as (s, regexp_of_pattern pat))
413 | | Ppat_or (pat1, pat2) ->
414 | Regexp.(Alt [ regexp_of_pattern pat1 ; regexp_of_pattern pat2 ])
415 | | Ppat_any ->
416 | Regexp.Code ".*"
417 | | Ppat_var id ->
418 | Regexp.(Capture_as (id, {loc; txt = Code ".*"}))
419 | | _ ->
420 | Loc.raise_errorf ~loc
421 | "This pattern is not a valid tyre pattern."
422 | in
423 | Loc.mkloc re loc
424 |
425 | let expr_of_pattern pat =
426 | let re = simplify @@ regexp_of_pattern pat in
427 | match re.txt with
428 | | Seq l ->
429 | let f_item re = capture re, expr_of_regex re in
430 | let capture_seq, expr = seq_to_expr ~loc:re.loc @@ List.map f_item l in
431 | capture_seq, expr
432 | | _ ->
433 | capture_singleton (capture re), expr_of_regex re
434 |
435 |
436 | let expr_of_function ~loc l =
437 | let err_on_guard = function
438 | | None -> ()
439 | | Some e ->
440 | Loc.raise_errorf ~loc:e.Parsetree.pexp_loc
441 | "Tyre patterns can not have guards."
442 | in
443 | let route_of_case {Parsetree. pc_rhs ; pc_guard ; pc_lhs } =
444 | err_on_guard pc_guard;
445 | let loc = pc_lhs.ppat_loc in
446 | let capture, re = expr_of_pattern pc_lhs in
447 | let pvar_of_lid {Loc.loc; txt} = AC.pvar ~loc txt in
448 | let arg = match capture with
449 | | Named [] | Unnamed 0 -> internal_error ~loc
450 | | No | Unnamed _ -> A.Pat.any ~loc ()
451 | | Named [lid] -> pvar_of_lid lid
452 | | Named l ->
453 | make_nested_tuple_pat ~loc @@ List.map (fun {Loc.txt ; _} -> txt) l
454 | in
455 | let e = AC.func ~loc [arg, pc_rhs] in
456 | AC.constr ~loc "Tyre.Route" [re; e]
457 | in
458 | let l = List.map route_of_case l in
459 | Tyre.mkf ~loc "route" [Nolabel, AC.list ~loc l]
460 |
461 | open Ast_mapper
462 |
463 | let expr mapper e_ext =
464 | let open Parsetree in
465 | match e_ext.pexp_desc with
466 | | Pexp_extension ({txt = "tyre"; _},
467 | PStr [{pstr_desc = Pstr_eval (e, _); _}]) ->
468 | let loc = e.pexp_loc in
469 | (match e.pexp_desc with
470 | | Pexp_constant (Pconst_string (s, delim)) ->
471 | expr_of_string ~loc s delim
472 | | Pexp_function l ->
473 | expr_of_function ~loc l
474 | | _ ->
475 | Loc.raise_errorf ~loc
476 | "[%%tyre] is only allowed on constant strings and functions.")
477 | | _ -> default_mapper.expr mapper e_ext
478 |
479 | let () =
480 | Driver.register
481 | ~name:"ppx_regexp.tyre" ocaml_version
482 | (fun _config _cookies -> {default_mapper with expr})
483 |
--------------------------------------------------------------------------------
/ppx_tyre/ppx_tyre.mli:
--------------------------------------------------------------------------------
1 | (* Nothing to see here *)
2 |
--------------------------------------------------------------------------------
/tests/dune:
--------------------------------------------------------------------------------
1 | ; Tests run for both packages
2 |
3 | (test
4 | (name test_regexp)
5 | (modules regexp test_regexp)
6 | (libraries ppxlib qcheck re re.perl))
7 | (rule (copy ../common/regexp.mli regexp.mli))
8 | (rule (copy ../common/regexp.ml regexp.ml))
9 |
10 | ; Tests for ppx_regexp
11 |
12 | (tests
13 | (names test_ppx_regexp test_ppx_regexp_unused)
14 | (modules test_ppx_regexp test_ppx_regexp_unused)
15 | (package ppx_regexp)
16 | (libraries re re.perl)
17 | (preprocess (pps ppx_regexp)))
18 |
19 | ; Tests for ppx_tyre
20 |
21 | (executable
22 | (name test_ppx_tyre)
23 | (modules test_ppx_tyre)
24 | (libraries re re.perl)
25 | (preprocess (pps ppx_tyre)))
26 | (alias
27 | (name runtest)
28 | (package ppx_tyre)
29 | (deps test_ppx_tyre.exe)
30 | (action (run %{deps})))
31 |
32 | ; Combined preprocessor
33 |
34 | (executable
35 | (name main)
36 | (modules Main)
37 | (libraries ppx_regexp ppx_tyre ocaml-migrate-parsetree))
38 |
--------------------------------------------------------------------------------
/tests/main.ml:
--------------------------------------------------------------------------------
1 | Migrate_parsetree.Driver.run_as_ppx_rewriter ()
2 |
--------------------------------------------------------------------------------
/tests/test_ppx_regexp.ml:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2017 Petter A. Urkedal
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the LGPL-3.0 Linking Exception.
7 | *
8 | * This library is distributed in the hope that it will be useful, but WITHOUT
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
11 | * License for more details.
12 | *
13 | * You should have received a copy of the GNU Lesser General Public License
14 | * along with this library. If not, see .
15 | *)
16 |
17 | let () =
18 | (match%pcre "%" with _ -> ());
19 | (match%pcre "%" with s -> assert (s = "%"));
20 | (function%pcre _ -> ()) "%";
21 | (function%pcre s -> assert (s = "%")) "%"
22 |
23 | let test1 =
24 | (function%pcre
25 | | {|^(?.*): *(?.+)?$|} -> `Attr (k, v)
26 | | {|^# (?.+)$|} -> `Comment comment
27 | | {|^((?[@%]){2}){0,2}$|} -> `Even_sigils last
28 | | {|^[@%]|} -> `Odd_sigils
29 | | _ -> `Unknown)
30 |
31 | let () =
32 | assert (test1 "x: 1" = `Attr ("x", Some "1"));
33 | assert (test1 "# Kommentar" = `Comment "Kommentar");
34 | assert (test1 "" = `Even_sigils None);
35 | assert (test1 "%%%@" = `Even_sigils (Some "@"));
36 | assert (test1 "%%@" = `Odd_sigils)
37 |
38 | let last_elt s =
39 | let n = String.length s in
40 | assert (s.[n - 1] = ';');
41 | let i = try String.rindex_from s (n - 2) ';' + 1 with Not_found -> 0 in
42 | String.sub s i (n - i - 1)
43 |
44 | let rec test2 s =
45 | (match%pcre s with
46 | | {|^<>$|} -> assert (s = "<>")
47 | | {|^<(?[^<>]+)>$|} -> assert (s = "<" ^ x ^ ">")
48 | | {|^<(?[^<>]+)><(?[^<>]+)>$|} -> assert (s = "<" ^ x ^ "><" ^ y ^ ">")
49 | | {|^((?[^;<>]);)+$|} -> assert (elt = last_elt s)
50 | | {|^[^{}]*\{(?.*)\}|} -> test2 s'
51 | | {|^(?one)|(?two)$|} ->
52 | assert (a = Some "one" && b = None || a = None && b = Some "two")
53 | | _ -> assert false)
54 |
55 | let test3 s =
56 | (match%pcre s with
57 | | {|no(is)((e)) (?is) (g(oo)d)|} -> assert (is = "is")
58 | | {|?&()[a-zA-Z0-9_-]+(;)|} ->
59 | let i, j = String.index s '&', String.rindex s ';' in
60 | assert (s' = String.sub s i (j - i + 1))
61 | | {|m(o+)re re(gular)? no(is)e, (no )*be(t+)?er|} -> ()
62 | | s' -> assert (s = s'))
63 |
64 | let test4 = function%pcre (* Issue 8 *)
65 | | {|(?[-+]?[[:digit:]]+.[[:digit:]]*)|} -> [x]
66 | | {|(?(abc))[[:space:]]*(?(xyz))|} -> [x; y]
67 | | _ -> assert false
68 |
69 | let test5 = function%pcre
70 | | {|^.(?.+)|} ->
71 | (match%pcre x with
72 | | {|^.(?.+)|} ->
73 | (match%pcre y with
74 | | {|^.(?.+)|} -> (x, y, z)
75 | | _ -> assert false)
76 | | _ -> assert false)
77 | | _ -> assert false
78 |
79 | let () =
80 | test2 "<>";
81 | test2 "";
82 | test2 "";
83 | test2 "";
84 | test2 "";
85 | test2 "a;";
86 | test2 "a;b;c;d;";
87 | test2 "";
88 | test2 "Xx{--{a;b;c;}--}yY.";
89 | test2 "one";
90 | test2 "two";
91 | test3 "- + + -";
92 | test3 "catch-all";
93 | assert (test4 "::123.456::" = ["123.456"]);
94 | assert (test4 "::abc xyz::" = ["abc"; "xyz"]);
95 | assert (test5 "abcd" = ("bcd", "cd", "d"))
96 |
97 | (* It should work in a functor, and Re_pcre.regxp should be lifted to the
98 | * top-level. *)
99 | module F (M : Map.OrderedType) = struct
100 | let f x =
101 | (match%pcre x with
102 | | {|#(?\s)?(?.*)|} -> Some (space <> None, comment)
103 | | _ -> None)
104 | end
105 |
106 | (* It should work as a top-level eval. *)
107 | let r = ref false
108 | ;;(match%pcre "" with
109 | | {|^$|} -> r := true
110 | | _ -> assert false)
111 | ;;assert (!r = true)
112 |
--------------------------------------------------------------------------------
/tests/test_ppx_regexp_unused.ml:
--------------------------------------------------------------------------------
1 | let () = ()
2 |
--------------------------------------------------------------------------------
/tests/test_ppx_tyre.ml:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2017 Petter A. Urkedal
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the OCaml static compilation exception.
7 | *
8 | * This library is distributed in the hope that it will be useful, but WITHOUT
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
11 | * License for more details.
12 | *
13 | * You should have received a copy of the GNU Lesser General Public License
14 | * along with this library. If not, see .
15 | *)
16 |
17 | let test re s =
18 | match Tyre.exec re s with
19 | | Ok b -> b
20 | | Error e ->
21 | Format.eprintf "Error: %a@." Tyre.pp_error e;
22 | assert false
23 | let (%%) = test
24 | let (%%%) a b = assert (a %% b)
25 |
26 | let () =
27 | (function%tyre _ -> true) %%% "%";
28 | (function%tyre s -> s = "%") %%%"%"
29 |
30 | type t = [
31 | | `Attr of string * string option
32 | | `Comment of string
33 | | `Even_sigils of string option
34 | | `Odd_sigils
35 | | `Id of string * int * string
36 | | `Unknown ]
37 |
38 | let test1 : t Tyre.re =
39 | (function%tyre
40 | | {|^(?.*): *(?.+)?$|} -> `Attr (k, v)
41 | | {|^# (?.+)$|} -> `Comment comment
42 | | {|^(?([@%]{2})+)?$|} -> `Even_sigils sigil
43 | | {|^[@%]|} -> `Odd_sigils
44 | | {|^(?[a-z]+)(?&num:Tyre.pos_int)(?[^[:alnum:]]+)$|}
45 | -> `Id (id, num, sym)
46 | | _ -> `Unknown)
47 |
48 | let () =
49 | assert (test1 %% "x: 1" = `Attr ("x", Some "1"));
50 | assert (test1 %% "# Kommentar" = `Comment "Kommentar");
51 | assert (test1 %% "" = `Even_sigils None);
52 | assert (test1 %% "%%%@" = `Even_sigils (Some "%%%@"));
53 | assert (test1 %% "%%@" = `Odd_sigils);
54 | assert (test1 %% "abc42#@" = `Id ("abc", 42, "#@"))
55 |
56 | let concat_seq sep seq =
57 | let rec f seq =
58 | match seq () with
59 | | Seq.Nil -> ""
60 | | Cons (s,seq) -> s ^ sep ^ f seq
61 | in
62 | f seq
63 |
64 | let test2 = function%tyre
65 | | {|^<>$|} -> (=) "<>"
66 | | {|^<(?[^<>]+)>$|} -> fun s -> s = "<" ^ x ^ ">"
67 | | {|^<(?[^<>]+)><(?[^<>]+)>$|} -> fun s -> s = "<" ^ x ^ "><" ^ y ^ ">"
68 | | {|^((?[^;<>]);)*$|} -> fun s -> concat_seq ";" elt = s
69 | | {|^(?one)|(?two)$|} as x ->
70 | (match x with
71 | | `a a -> fun s -> a = s && a = "one"
72 | | `b b -> fun s -> b = s && b = "two")
73 |
74 | let (%%%%) re s = (re %% s) s
75 |
76 | let () =
77 | assert (test2 %%%%"<>");
78 | assert (test2 %%%%"");
79 | assert (test2 %%%%"");
80 | assert (test2 %%%%"");
81 | assert (test2 %%%%"");
82 | assert (test2 %%%%"a;");
83 | assert (test2 %%%%"a;b;c;d;");
84 | assert (test2 %%%%"");
85 | assert (test2 %%%%"one");
86 | assert (test2 %%%%"two")
87 |
88 | (* It should work in a functor, and Re_pcre.regxp should be lifted to the
89 | * top-level. *)
90 | module F (M : Map.OrderedType) = struct
91 | let f = function%tyre
92 | | {|#(?\s)?(?.*)|} -> Some (space <> None, comment)
93 | | _ -> None
94 | end
95 |
--------------------------------------------------------------------------------
/tests/test_regexp.ml:
--------------------------------------------------------------------------------
1 | (* Copyright (C) 2018--2021 Petter A. Urkedal
2 | *
3 | * This library is free software; you can redistribute it and/or modify it
4 | * under the terms of the GNU Lesser General Public License as published by
5 | * the Free Software Foundation, either version 3 of the License, or (at your
6 | * option) any later version, with the OCaml static compilation exception or (at
7 | * your option) the LGPL-3.0 Linking Exception.
8 | *
9 | * This library is distributed in the hope that it will be useful, but WITHOUT
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
12 | * License for more details.
13 | *
14 | * You should have received a copy of the GNU Lesser General Public License
15 | * along with this library. If not, see .
16 | *)
17 |
18 | open Printf
19 | module Loc = Location
20 | module Q = QCheck
21 |
22 | let mkloc = Loc.mkloc
23 | let mknoloc = Loc.mknoloc
24 | let map_loc f {Loc.txt = x; loc} = {Loc.txt = f x; loc}
25 |
26 | (* Dummy implementation for compatibility with OCaml < 4.8.0, comment out the
27 | * real version if needed. *)
28 | let pp_location_error ppf _ = Format.pp_print_string ppf "parse error"
29 | (*
30 | let pp_location_error = Loc.print_report
31 | *)
32 |
33 | type ('a,'b) result = Ok of 'a | Error of 'b
34 |
35 | module Regexp = struct
36 | include Regexp
37 |
38 | let nonepsilon = function {Loc.txt = Seq []; _} -> false | _ -> true
39 |
40 | let rec collect_code = function
41 | | {Loc.txt = Code s1; loc = loc1} :: {Loc.txt = Code s2; loc = loc2} :: es ->
42 | let e12 =
43 | Loc.{
44 | txt = Code (s1 ^ s2);
45 | loc = {
46 | loc_start = loc1.loc_start;
47 | loc_end = loc2.loc_end;
48 | loc_ghost = loc1.loc_ghost || loc2.loc_ghost;
49 | }
50 | }
51 | in
52 | collect_code (e12 :: es)
53 | | es -> es
54 |
55 | let rec simplify e = map_loc simplify' e
56 | and simplify' = function
57 | | Code "" -> Seq []
58 | | Seq es ->
59 | let es = es
60 | |> List.map simplify
61 | |> List.map (function {Loc.txt = Seq es; _} -> es | e -> [e])
62 | |> List.flatten
63 | |> List.filter nonepsilon
64 | in
65 | (match es with
66 | | [e] -> e.Loc.txt
67 | | es -> Seq es)
68 | | Alt es ->
69 | let es = es
70 | |> List.map simplify
71 | |> List.map (function {Loc.txt = Alt es; _} -> es | e -> [e])
72 | |> List.flatten
73 | in
74 | (match es with
75 | | [e] -> e.Loc.txt
76 | | es -> Alt es)
77 | | Opt e ->
78 | (match simplify e with
79 | | {Loc.txt = Opt _; _} as e' -> e'.Loc.txt
80 | | e' -> Opt e')
81 | | Repeat (ij, e) -> Repeat (ij, simplify e)
82 | | Nongreedy e -> Nongreedy (simplify e)
83 | | Capture e -> Capture (simplify e)
84 | | Capture_as (name, e) -> Capture_as (name, simplify e)
85 | | Code _ | Call _ as e -> e
86 |
87 | let rec equal' e1 e2 =
88 | (match e1.Loc.txt, e2.Loc.txt with
89 | | Code s1, Code s2 -> s1 = s2
90 | | Seq es1, Seq es2 | Alt es1, Alt es2 ->
91 | (try List.for_all2 equal' es1 es2 with Invalid_argument _ -> false)
92 | | Opt e1, Opt e2 -> equal' e1 e2
93 | | Repeat ({Loc.txt = ij1; _}, e1), Repeat ({Loc.txt = ij2; _}, e2) ->
94 | ij1 = ij2 && equal' e1 e2
95 | | Nongreedy e1, Nongreedy e2 -> equal' e1 e2
96 | | Capture e1, Capture e2 -> equal' e1 e2
97 | | Capture_as (name1, e1), Capture_as (name2, e2) ->
98 | name1.Loc.txt = name2.Loc.txt && equal' e1 e2
99 | | Call name1, Call name2 -> name1.Loc.txt = name2.Loc.txt
100 | | _, _ -> false (* We'll notice. *))
101 | let equal e1 e2 = equal' (simplify e1) (simplify e2)
102 |
103 | let to_string =
104 | let p_bottom, p_alt, p_seq, p_suffix = 0, 1, 2, 3 in
105 | let delimit_if b s = if b then "(" ^ s ^ ")" else s in
106 | let rec aux p e =
107 | (match e.Loc.txt with
108 | | Code s ->
109 | delimit_if (p > p_seq) s
110 | | Seq es ->
111 | delimit_if (p > p_seq) (String.concat "" (List.map (aux p_seq) es))
112 | | Alt es ->
113 | delimit_if (p > p_alt) (String.concat "|" (List.map (aux p_alt) es))
114 | | Opt e ->
115 | delimit_if (p >= p_suffix) (aux p_suffix e ^ "?")
116 | | Repeat ({Loc.txt = (i, j_opt); _}, e) ->
117 | let j_str = match j_opt with None -> "" | Some j -> string_of_int j in
118 | delimit_if (p >= p_suffix)
119 | (sprintf "%s{%d,%s}" (aux p_suffix e) i j_str)
120 | | Nongreedy e ->
121 | aux (p_suffix - 1) e ^ "?"
122 | | Capture e ->
123 | "(+" ^ aux p_bottom e ^ ")"
124 | | Capture_as ({Loc.txt = name; _}, e) ->
125 | "(?<" ^ name ^ ">" ^ aux p_bottom e ^ ")"
126 | | Call {Loc.txt = idr; _} ->
127 | "(&" ^ String.concat "." (Longident.flatten idr) ^ ")")
128 | in
129 | aux 0
130 |
131 | let rec pp_debug ppf self =
132 | let open Regexp in
133 | let open Format in
134 | let open Loc in
135 |
136 | let pp_pos ppf pos =
137 | let open Lexing in
138 | Format.fprintf ppf "%d:%d" pos.pos_lnum (pos.pos_cnum - pos.pos_bol)
139 | in
140 | let pp_loc ppf loc =
141 | let open Loc in
142 | let open Lexing in
143 | if loc <> none then begin
144 | if loc.loc_start.pos_lnum = loc.loc_end.pos_lnum then
145 | Format.fprintf ppf "@%a-%d" pp_pos loc.loc_start
146 | (loc.loc_end.pos_cnum - loc.loc_end.pos_bol)
147 | else
148 | Format.fprintf ppf "@%a-%a" pp_pos loc.loc_start pp_pos loc.loc_end
149 | end
150 | in
151 | (match self.txt with
152 | | Code s ->
153 | fprintf ppf "(Code %S)" s
154 | | Seq es ->
155 | fprintf ppf "(Seq ";
156 | List.iter (pp_debug ppf) es;
157 | fprintf ppf ")";
158 | | Alt es ->
159 | fprintf ppf "(Alt ";
160 | List.iter (pp_debug ppf) es;
161 | fprintf ppf ")";
162 | | Opt e ->
163 | fprintf ppf "(Opt %a)" pp_debug e
164 | | Repeat ({txt = (i, j); loc}, e) ->
165 | let pp_option f ppf = function None -> () | Some e -> f ppf e in
166 | fprintf ppf "(Repeat {%d,%a}%a %a)"
167 | i (pp_option Format.pp_print_int) j pp_loc loc pp_debug e
168 | | Nongreedy e ->
169 | fprintf ppf "(Nongreedy %a)" pp_debug e
170 | | Capture e ->
171 | fprintf ppf "(Capture %a)" pp_debug e
172 | | Capture_as (name, e) ->
173 | fprintf ppf "(Capture_as %s%a %a)" name.txt pp_loc name.loc pp_debug e
174 | | Call name ->
175 | fprintf ppf "(Call %s%a)"
176 | (String.concat "." (Longident.flatten name.txt)) pp_loc name.loc);
177 | pp_loc ppf self.loc
178 |
179 | let show_debug e =
180 | let buf = Buffer.create 64 in
181 | let ppf = Format.formatter_of_buffer buf in
182 | pp_debug ppf e;
183 | Format.fprintf ppf " => %S" (to_string e);
184 | Format.pp_print_flush ppf ();
185 | Buffer.contents buf
186 |
187 | let rec to_re e =
188 | (match e.Loc.txt with
189 | | Code re -> Re.Perl.re re
190 | | Seq es -> Re.seq (List.map to_re es)
191 | | Alt es -> Re.alt (List.map to_re es)
192 | | Opt e -> Re.opt (to_re e)
193 | | Repeat ({Loc.txt = (i, j); _}, e) -> Re.repn (to_re e) i j
194 | | Nongreedy e -> Re.non_greedy (to_re e)
195 | | Capture e -> Re.group (to_re e)
196 | | Capture_as (_, e) -> Re.group (to_re e)
197 | | Call _ -> raise Re.Perl.Not_supported)
198 |
199 | let rec has_anon_capture e =
200 | (match e.Loc.txt with
201 | | Code _ | Call _ -> false
202 | | Seq es | Alt es -> List.exists has_anon_capture es
203 | | Opt e | Repeat (_, e) | Capture_as (_, e) | Nongreedy e ->
204 | has_anon_capture e
205 | | Capture _ -> true)
206 |
207 | end
208 |
209 | let gen_name =
210 | let open Q.Gen in
211 | let idrletter i =
212 | if i = 0 then '_' else let i = i - 1 in
213 | if i < 26 then Char.chr (0x61 + i) else let i = i - 26 in
214 | if i < 26 then Char.chr (0x41 + i) else let i = i - 26 in
215 | (assert (i < 10); Char.chr (0x30 + i))
216 | in
217 | let idrfst = map idrletter (int_bound (27 - 1)) in
218 | let idrcnt = map idrletter (int_bound (63 - 1)) in
219 | map2 (fun c s -> String.make 1 c ^ s) idrfst (string ~gen:idrcnt)
220 |
221 | let gen_regexp =
222 | let open Q.Gen in
223 | let open Regexp in
224 | let gen_char = map (fun c -> mknoloc (Code (String.make 1 c))) numeral in
225 | let gen_backlash_op =
226 | let backslash_ops = "wWsSdDbBAZzG" in
227 | map (fun i -> mknoloc (Code (sprintf "\\%c" backslash_ops.[i])))
228 | (int_bound (String.length backslash_ops - 1)) in
229 | let gen_quoted_op =
230 | let quotable = "!\"#$%&'()*+,-./:=<=>?@[\\]^`{|}~" in
231 | map (fun i -> mknoloc (Code (sprintf "\\%c" quotable.[i])))
232 | (int_bound (String.length quotable - 1)) in
233 | map Regexp.simplify @@ sized @@
234 | fix @@ fun self n ->
235 | let gen_seq =
236 | map (fun es -> mknoloc (Seq es))
237 | ((0 -- 10) >>= fun k -> list_size (return k) (self (n / (max 1 k)))) in
238 | let gen_alt =
239 | map (fun es -> mknoloc (Alt es))
240 | ((2 -- 10) >>= fun k -> list_size (return k) (self (n / (max 1 k)))) in
241 | let gen_opt =
242 | map (fun e -> mknoloc (Opt e)) (self n) in
243 | let gen_repeat =
244 | map2 (fun i e -> mknoloc (Repeat (mknoloc (i, None), e))) nat (self n) in
245 | let gen_capture =
246 | map (fun e -> mknoloc (Capture e)) (self n) in
247 | let gen_capture_as =
248 | map2 (fun a e -> mknoloc (Capture_as (mknoloc a, e))) gen_name (self n) in
249 | frequency [
250 | 1, gen_char;
251 | 1, gen_backlash_op;
252 | 1, gen_quoted_op;
253 | n*(n - 1), gen_seq;
254 | n*(n - 1), gen_alt;
255 | n, gen_opt;
256 | n, gen_repeat;
257 | n, gen_capture;
258 | n, gen_capture_as;
259 | ]
260 |
261 | let shrink_regexp =
262 | let open Q.Shrink in
263 | let open Q.Iter in
264 | let open Regexp in
265 | let rec shrink e =
266 | (match e.Loc.txt with
267 | | Code s -> map (fun s -> mknoloc (Code s)) (string s)
268 | | Seq es -> map (fun es -> mknoloc (Seq es)) (list ~shrink es)
269 | | Alt (e :: es) ->
270 | map2 (fun e es -> mknoloc (Alt (e :: es))) (shrink e) (list ~shrink es)
271 | | Opt e -> map (fun e -> mknoloc (Opt e)) (shrink e)
272 | | Repeat ({Loc.txt = (i, j); _}, e) ->
273 | map2 (fun (i, j) e -> mknoloc (Repeat (mknoloc (i, j), e)))
274 | (pair (int i) (option int j)) (shrink e)
275 | | Capture e -> map (fun e -> mknoloc (Capture e)) (shrink e)
276 | | Capture_as (name, e) ->
277 | map2 (fun name e -> mknoloc (Capture_as (mknoloc name, e)))
278 | (string name.Loc.txt) (shrink e)
279 | | _ -> empty)
280 | in
281 | fun e -> map Regexp.simplify (shrink e)
282 |
283 | let arb_regexp =
284 | Q.make ~print:Regexp.show_debug ~shrink:shrink_regexp gen_regexp
285 |
286 | let test_parse s =
287 | let r =
288 | (match Regexp.parse_exn s with
289 | | exception Loc.Error err -> Error err
290 | | e ->
291 | Ok (e,
292 | (try Ok (Regexp.to_re e) with
293 | | Re.Perl.Parse_error -> Error `Parse_error
294 | | Re.Perl.Not_supported -> Error `Not_supported)))
295 | in
296 | let r' =
297 | try Ok (Re.Perl.re s) with
298 | | Re.Perl.Parse_error -> Error `Parse_error
299 | | Re.Perl.Not_supported -> Error `Not_supported
300 | in
301 | (match r, r' with
302 | | (Error _ | Ok (_, Error _)), Error _ -> true
303 | | Ok _, Error `Not_supported -> true
304 | | Ok (e, Ok _), Error `Parse_error ->
305 | if Regexp.has_anon_capture e then true else
306 | Q.Test.fail_reportf "Parsed to %a and converted to Re.t, \
307 | but should be invalid" Regexp.pp_debug e
308 | | Error err, Ok _ ->
309 | Q.Test.fail_reportf "Failed to parse valid %s: %a" s
310 | pp_location_error err
311 | | Ok (e, Error _), Ok _ ->
312 | Q.Test.fail_reportf "Parsed to %a but conversion to Re.t failed"
313 | Regexp.pp_debug e
314 | | Ok (_, Ok _), Ok _ ->
315 | (* TODO: Would have been nice to compare the two Re.t here. *)
316 | true)
317 |
318 | let tests = [
319 | Q.Test.make ~long_factor:100 ~name:"parse ∘ to_string" arb_regexp
320 | (fun e ->
321 | (match Regexp.parse_exn (Regexp.to_string e) with
322 | | exception Loc.Error err ->
323 | Q.Test.fail_reportf "%a" pp_location_error err
324 | | e' -> Regexp.equal e' e));
325 | Q.Test.make ~long_factor:100 ~name:"to_string ∘ parse"
326 | (Q.string_gen Q.Gen.printable) test_parse;
327 | ]
328 |
329 | let () = QCheck_runner.run_tests_main tests
330 |
--------------------------------------------------------------------------------