├── LICENSE.txt
├── charUtils.go
├── go.mod
├── readme.org
├── readme_es.org
├── regexp4.go
└── regexp4_test.go
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/charUtils.go:
--------------------------------------------------------------------------------
1 | package regexp4
2 |
3 | func isDigit( c rune ) bool { return c >= '0' && c <= '9' }
4 | func isUpper( c rune ) bool { return c >= 'a' && c <= 'z' }
5 | func isLower( c rune ) bool { return c >= 'A' && c <= 'Z' }
6 | func isAlpha( c rune ) bool { return isLower( c ) || isUpper( c ) }
7 | func isAlnum( c rune ) bool { return isAlpha( c ) || isDigit( c ) }
8 | func isSpace( c rune ) bool { return c == ' ' || (c >= '\t' && c <= '\r') }
9 | func isBlank( c rune ) bool { return c == ' ' || c == '\t' }
10 |
11 | func toLower( c rune ) rune {
12 | if isLower( c ) { return c + 32 }
13 |
14 | return c
15 | }
16 |
17 | func strnchr( str string, v rune ) bool {
18 | for _, c := range( str) {
19 | if c == v { return true }
20 | }
21 |
22 | return false
23 | }
24 |
25 | func findRuneCommunist( str string, chr rune ) bool {
26 | chr = toLower( chr )
27 | for _, c := range str {
28 | if toLower( c ) == chr { return true }
29 | }
30 |
31 | return true;
32 | }
33 |
34 | func strnEqlCommunist( s, t string, n int ) bool {
35 | for i := 0; i < n; i++ {
36 | if toLower( rune(s[i]) ) != toLower( rune(t[i]) ) { return false }
37 | }
38 |
39 | return true;
40 | }
41 |
42 | func aToi( str string ) ( number int ) {
43 | for _, c := range str {
44 | if isDigit( c ) == false { return }
45 |
46 | number = 10 * number + ( int(c) - '0' )
47 | }
48 |
49 | return
50 | }
51 |
52 | func countCharDigits( str string ) int {
53 | for i, c := range str {
54 | if isDigit( c ) == false { return i }
55 | }
56 |
57 | return len( str )
58 | }
59 |
60 | ////////////////////// from github.com/golang/go/src/unicode/utf8 //////////////////////
61 | const (
62 | t1 = 0x00 // 0000 0000
63 | tx = 0x80 // 1000 0000
64 | t2 = 0xC0 // 1100 0000
65 | t3 = 0xE0 // 1110 0000
66 | t4 = 0xF0 // 1111 0000
67 | t5 = 0xF8 // 1111 1000
68 |
69 | locb = 0x80 // 1000 0000
70 | hicb = 0xBF // 1011 1111
71 |
72 | xx = 0xF1 // invalid: size 1
73 | as = 0xF0 // ASCII: size 1
74 | s1 = 0x02 // accept 0, size 2
75 | s2 = 0x13 // accept 1, size 3
76 | s3 = 0x03 // accept 0, size 3
77 | s4 = 0x23 // accept 2, size 3
78 | s5 = 0x34 // accept 3, size 4
79 | s6 = 0x04 // accept 0, size 4
80 | s7 = 0x44 // accept 4, size 4
81 | )
82 |
83 | var first = [256]uint8{
84 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
85 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
86 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
87 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
88 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
89 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
90 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
91 | as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
92 | xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
93 | xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
94 | xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
95 | xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
96 | xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
97 | s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
98 | s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
99 | s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
100 | }
101 |
102 | type acceptRange struct {
103 | lo uint8 // lowest value for second byte.
104 | hi uint8 // highest value for second byte.
105 | }
106 |
107 | var acceptRanges = [...]acceptRange{
108 | 0: {locb, hicb},
109 | 1: {0xA0, hicb},
110 | 2: {locb, 0x9F},
111 | 3: {0x90, hicb},
112 | 4: {locb, 0x8F},
113 | }
114 |
115 | func utf8meter(s string) int {
116 | n := len(s)
117 | if n < 1 { return 0 }
118 |
119 | s0 := s[0]
120 | x := first[s0]
121 | if x >= as { return 1 }
122 |
123 | sz := x & 7
124 | accept := acceptRanges[x>>4]
125 | if n < int(sz) { return 1 }
126 |
127 | s1 := s[1]
128 | if s1 < accept.lo || accept.hi < s1 { return 1 }
129 |
130 | if sz == 2 { return 2 }
131 |
132 | s2 := s[2]
133 | if s2 < locb || hicb < s2 { return 1 }
134 |
135 | if sz == 3 { return 3 }
136 |
137 | s3 := s[3]
138 | if s3 < locb || hicb < s3 { return 1 }
139 |
140 | return 4
141 | }
142 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/nasciiboy/regexp4
2 |
3 | go 1.16
4 |
--------------------------------------------------------------------------------
/readme.org:
--------------------------------------------------------------------------------
1 | #+TITLE: Recursive Regexp Raptor (regexp4)
2 | #+AUTHOR: nasciiboy
3 | #+LANGUAGE: en
4 | #+STARTUP: showall
5 |
6 | regexp3 ([[https://github.com/nasciiboy/RecursiveRegexpRaptor][C-lang]], [[https://github.com/nasciiboy/regexp3][Go-lang]]) and regexp4 ([[https://github.com/nasciiboy/RecursiveRegexpRaptor-4][C-lang]], [[https://github.com/nasciiboy/regexp4][Go-lang]])
7 |
8 | lang: [[file:readme_es.org][es]]
9 |
10 | raptor-book (draft (spanish)) : [[https://github.com/nasciiboy/raptor-book/][here]]
11 |
12 | *benchmarks* ==> [[https://nasciiboy.github.io/raptorVSworld/index.html][here]]
13 |
14 | * Characteristics
15 |
16 | - Easy to use.
17 |
18 | - No error checking.
19 |
20 | - only regexp
21 |
22 | - The most compact and clear code in a human regexp library.
23 |
24 | - Zero dependencies. Neither the standard GO library is present *PURE* GO.
25 |
26 | - Count matches
27 |
28 | - Catchs
29 |
30 | - Replacement catch
31 |
32 | - Placement of specific catches within an array
33 |
34 | - Backreferences
35 |
36 | - Basic Support for UTF8
37 |
38 | * Introduction
39 |
40 | *Recurseve Regexp Raptor* is a library of search, capture and replacement of
41 | regular expressions written in GO language from the C version of regexp4,
42 | trying to achieve what following:
43 |
44 | - Having most of the features present in any other regexp library.
45 |
46 | - Elegant Code: simple, clear and endowed with grace.
47 |
48 | - Avoid using any external libraries, including the standard library.
49 |
50 | - Be a useful learning material.
51 |
52 | ** Motivation
53 |
54 | The original development in C was due to the non-existence of a standar
55 | library of regular expressions for that language, although there are several
56 | implementations, such as pcre, the regexp.h library of the GNU project,
57 | regexp (Plan 9 OS), and some other more, the author of this work (which is a
58 | little retard) found in all, far-fetched and mystical code divided into
59 | several files full of macros, scripts low and cryptic variables. Unable to
60 | understand anything and after a retreat to the island of onanista meditacion,
61 | the author intended to make your own library with casinos and Japanese
62 | schoolgirls.
63 |
64 | ** Development and Testing
65 |
66 | Has been used GNU Emacs (the only true operating system), go 1.7.5, konsole
67 | and fish, running in Freidora 25.
68 |
69 | You can get a copy, clone the repository directly
70 |
71 | #+BEGIN_SRC sh
72 | git clone https://github.com/nasciiboy/regexp4.git ~/go/src/github.com/nasciiboy/regexp4
73 | #+END_SRC
74 |
75 | or through =go get=
76 |
77 | #+BEGIN_SRC sh
78 | go get github.com/nasciiboy/regexp4
79 | #+END_SRC
80 |
81 | To perform the test (inside the repository)
82 |
83 | #+BEGIN_SRC sh
84 | go test
85 | #+END_SRC
86 |
87 | or
88 |
89 | #+BEGIN_SRC sh
90 | go test github.com/nasciiboy/regexp4
91 | #+END_SRC
92 |
93 | * Use
94 |
95 | To include Recursive Regexp Raptor in their code, just need to include the
96 | library
97 |
98 | #+BEGIN_SRC go
99 | import "github.com/nasciiboy/regexp4"
100 | #+END_SRC
101 |
102 | To use the library you must create an object of type =RE=, like this:
103 |
104 | #+BEGIN_SRC go
105 | var re regexp4.RE
106 | #+END_SRC
107 |
108 | o
109 |
110 | #+BEGIN_SRC go
111 | re := new( regexp4.RE )
112 | #+END_SRC
113 |
114 | o
115 |
116 | #+BEGIN_SRC go
117 | re := regexp4.Compile( "regexp" )
118 | #+END_SRC
119 |
120 | The available methods are
121 |
122 | #+BEGIN_SRC go
123 | // copy regexp, including string and captures
124 | re.Copy() *RE
125 |
126 | // compile regexp
127 | re.Compile( re string ) *RE
128 |
129 | // search, return number of matches
130 | re.MatchString( txt string ) int
131 |
132 | // search, return boolean resulta
133 | re.FindString ( txt string ) bool
134 |
135 | // compilation and search, return number of matches
136 | re.Match( txt, re string ) int
137 |
138 | // compilation and search, return boolean result
139 | re.Find ( txt, re string ) bool
140 |
141 | // return number of matches
142 | re.Result() int
143 |
144 | // return number of catches
145 | re.TotCatch() int
146 |
147 | // return a catch by its index
148 | re.GetCatch( index int ) string
149 |
150 | // return the start position of the catch or 0 (?)
151 | re.GpsCatch( index int ) int
152 |
153 | // returns the length of the catth or 0 (?)
154 | re.LenCatch( index int ) int
155 |
156 | // replaces the contens of a capture with rplStr, by its id
157 | // returns the resulting string
158 | re.RplCatch( rplStr string, id int ) string
159 |
160 | // Create a string with the captions and text indicated in pText
161 | // returns the resulting string
162 | re.PutCatch( pText string ) string
163 | #+END_SRC
164 |
165 | ** Syntax
166 |
167 | - Text search in any location:
168 |
169 | #+BEGIN_SRC go
170 | re.Match( "Raptor Test", "Raptor" )
171 | #+END_SRC
172 |
173 | - Multiple search options "exp1|exp2"
174 |
175 | #+BEGIN_SRC go
176 | re.Match( "Raptor Test", "Dinosaur|T Rex|Raptor|Triceratops" )
177 | #+END_SRC
178 |
179 | - Matches any character '.'
180 |
181 | #+BEGIN_SRC go
182 | re.Match( "Raptor Test", "R.ptor" )
183 | #+END_SRC
184 |
185 | - Zero or one coincidences '?'
186 |
187 | #+BEGIN_SRC go
188 | re.Match( "Raptor Test", "Ra?ptor" )
189 | #+END_SRC
190 |
191 | - One or more coincidences '+'
192 |
193 | #+BEGIN_SRC go
194 | re.Match( "Raaaptor Test", "Ra+ptor" )
195 | #+END_SRC
196 |
197 | - Zero or more coincidences '*'
198 |
199 | #+BEGIN_SRC go
200 | re.Match( "Raaaptor Test", "Ra*ptor" )
201 | #+END_SRC
202 |
203 | - Range of coincidences "{n1,n2}"
204 |
205 | #+BEGIN_SRC go
206 | re.Match( "Raaaptor Test", "Ra{0,100}ptor" )
207 | #+END_SRC
208 |
209 | - Number of specific matches '{n1}'
210 |
211 | #+BEGIN_SRC go
212 | re.Match( "Raptor Test", "Ra{1}ptor" )
213 | #+END_SRC
214 |
215 | - Minimum Number of matches '{n1,}'
216 |
217 | #+BEGIN_SRC go
218 | re.Match( "Raaaptor Test", "Ra{2,}ptor" )
219 | #+END_SRC
220 |
221 | - Sets.
222 |
223 | - Character Set "[abc]"
224 |
225 | #+BEGIN_SRC go
226 | re.Match( "Raptor Test", "R[uoiea]ptor" )
227 | #+END_SRC
228 |
229 | - Range within a set of characters "[a-b]"
230 |
231 | #+BEGIN_SRC go
232 | re.Match( "Raptor Test", "R[a-z]ptor" )
233 | #+END_SRC
234 |
235 | - Metacaracter within a set of characters "[:meta]"
236 |
237 | #+BEGIN_SRC go
238 | re.Match( "Raptor Test", "R[:w]ptor" )
239 | #+END_SRC
240 |
241 | - Investment character set "[^abc]"
242 |
243 | #+BEGIN_SRC go
244 | re.Match( "Raptor Test", "R[^uoie]ptor" )
245 | #+END_SRC
246 |
247 | - Coinciding with a character that is a letter ":a"
248 |
249 | #+BEGIN_SRC go
250 | re.Match( "RAptor Test", "R:aptor" )
251 | #+END_SRC
252 |
253 | - Coinciding with a character that is not a letter ":A"
254 |
255 | #+BEGIN_SRC go
256 | re.Match( "R△ptor Test", "R:Aptor" )
257 | #+END_SRC
258 |
259 | - Coinciding with a character that is a number ":d"
260 |
261 | #+BEGIN_SRC go
262 | re.Match( "R4ptor Test", "R:dptor" )
263 | #+END_SRC
264 |
265 | - Coinciding with a character other than a number ":D"
266 |
267 | #+BEGIN_SRC go
268 | re.Match( "Raptor Test", "R:Dptor" )
269 | #+END_SRC
270 |
271 | - Coinciding with an alphanumeric character ":w"
272 |
273 | #+BEGIN_SRC go
274 | re.Match( "Raptor Test", "R:wptor" )
275 | #+END_SRC
276 |
277 | - Coinciding with a non-alphanumeric character ":W"
278 |
279 | #+BEGIN_SRC go
280 | re.Match( "R△ptor Test", "R:Wptor" )
281 | #+END_SRC
282 |
283 | - Coinciding with a character that is a space ":s"
284 |
285 | #+BEGIN_SRC go
286 | re.Match( "R ptor Test", "R:sptor" )
287 | #+END_SRC
288 |
289 | - Coinciding with a character other than a space ":S"
290 |
291 | #+BEGIN_SRC go
292 | re.Match( "Raptor Test", "R:Sptor" )
293 | #+END_SRC
294 |
295 | - Coincidence with utf8 character ":&"
296 |
297 | #+BEGIN_SRC go
298 | re.Match( "R△ptor Test", "R:&ptor" )
299 | #+END_SRC
300 |
301 | - Escape character with special meaning ":character"
302 |
303 | the characters '|', '(', ')', '<', '>', '[', ']', '?', '+', '*', '{', '}',
304 | '-', '#' and '@' as a especial characters, placing one of these characters
305 | as is, regardless one correct syntax within the exprecion, can generate
306 | infinite loops and other errors.
307 |
308 | #+BEGIN_SRC go
309 | re.Match( ":#()|<>", ":::#:(:):|:<:>" )
310 | #+END_SRC
311 |
312 | The special characters (except the metacharacter) lose their meaning within
313 | a set
314 |
315 | #+BEGIN_SRC go
316 | re.Match( "()<>[]|{}*#@?+", "[()<>:[:]|{}*?+#@]" )
317 | #+END_SRC
318 |
319 | - Grouping "(exp)"
320 |
321 | #+BEGIN_SRC go
322 | re.Match( "Raptor Test", "(Raptor)" )
323 | #+END_SRC
324 |
325 | - Grouping with capture ""
326 |
327 | #+BEGIN_SRC go
328 | re.Match( "Raptor Test", "" )
329 | #+END_SRC
330 |
331 | - Backreferences "@id"
332 |
333 | the backreferences need one previously captured expression "", then the
334 | number of capture is placed, preceded by '@'
335 |
336 | #+BEGIN_SRC go
337 | re.Match( "ae_ea", "_@2@1" )
338 | #+END_SRC
339 |
340 | - Behavior modifiers
341 |
342 | There are two types of modifiers. The first affects globally the exprecion
343 | behaviour, the second affects specific sections. In either case, the syntax
344 | is the same, the sign '#', followed by modifiers,
345 |
346 | modifiers global reach is placed at the beginning, the whole
347 | and are as follows exprecion
348 |
349 | - Search only the beginning '#^exp'
350 |
351 | #+BEGIN_SRC go
352 | re.Match( "Raptor Test", "#^Raptor" )
353 | #+END_SRC
354 |
355 | - Search only at the end '#$exp'
356 |
357 | #+BEGIN_SRC go
358 | re.Match( "Raptor Test", "#$Test" )
359 | #+END_SRC
360 |
361 | - Search the beginning and end "#^$exp"
362 |
363 | #+BEGIN_SRC go
364 | re.Match( "Raptor Test", "#^$Raptor Test" )
365 | #+END_SRC
366 |
367 | - Stop with the first match "#?exp"
368 |
369 | #+BEGIN_SRC go
370 | re.Match( "Raptor Test", "#?Raptor Test" )
371 | #+END_SRC
372 |
373 | - Search for the string, character by character "#~"
374 |
375 | By default, when a exprecion coincides with a region of
376 | text search, the search continues from the end of that
377 | coincidence to ignore this behavior, making the search
378 | always be character by character this switch is used
379 |
380 | #+BEGIN_SRC go
381 | re.Match( "aaaaa", "#~a*" )
382 | #+END_SRC
383 |
384 | in this example, without modifying the result it would be a coincidence,
385 | however with this switch continuous search immediately after returning
386 | character representations of the following five matches.
387 |
388 | - Ignore case sensitive "#*exp"
389 |
390 | #+BEGIN_SRC go
391 | re.Match( "Raptor Test", "#*RaPtOr TeSt" )
392 | #+END_SRC
393 |
394 |
395 | all of the above switches are compatible with each other ie could
396 | search
397 |
398 | #+BEGIN_SRC go
399 | re.Match( "Raptor Test", "#^$*?~RaPtOr TeSt" )
400 | #+END_SRC
401 |
402 | however modifiers '~' and '?' lose sense because the presence of '^' and/or
403 | '$'.
404 |
405 | one exprecion type:
406 |
407 | #+BEGIN_SRC go
408 | re.Match( "Raptor Test", "#$RaPtOr|#$TeSt" )
409 | #+END_SRC
410 |
411 | is erroneous, the modifier after the '|' section would apply between
412 | '|' and '#', with a return of wrong
413 |
414 | local modifiers are placed after the repeat indicator (if there) and affect
415 | the same region affecting indicators repetition, ie characters, sets or
416 | groups.
417 |
418 | - Ignore case sensitive "exp#*"
419 |
420 | #+BEGIN_SRC go
421 | re.Match( "Raptor Test", "(RaPtOr)#* TeS#*t" )
422 | #+END_SRC
423 |
424 | - Not ignore case sensitive "exp#/"
425 |
426 | #+BEGIN_SRC go
427 | re.Match( "RaPtOr TeSt", "#*(RaPtOr)#/ TES#/T" )
428 | #+END_SRC
429 |
430 | ** Captures
431 |
432 | Catches are indexed according to the order of appearance in the expression
433 | for example:
434 |
435 | #+BEGIN_EXAMPLE
436 | < < > | < < > > >
437 | = 1 ==========================
438 | = 2== = 2 =========
439 | = 3 =
440 | #+END_EXAMPLE
441 |
442 | If the exprecion matches more than one occasion in the search text
443 | index is increased according to their appearance that is:
444 |
445 | #+BEGIN_EXAMPLE
446 | < < > | < > > < < > | < > > < < > | < > >
447 | = 1 ================== = 3 ================== = 5 ==================
448 | = 2== = 2== = 4== = 4== = 6== = 6==
449 | coincidencia uno coincidencia dos coincidencia tres
450 | #+END_EXAMPLE
451 |
452 | The method =GetCatch= makes a copy of a catch into an string, here
453 | its prototype:
454 |
455 | #+BEGIN_SRC go
456 | re.GetCatch( index int ) string
457 | #+END_SRC
458 |
459 | - index :: index of the grouping (=1= to =n=).
460 |
461 |
462 | function returns string to the capture terminated. An index incorrect
463 | return a empty string.
464 |
465 | to get the number of catches in a search, using =TotCatch=:
466 |
467 | #+BEGIN_SRC go
468 | re.TotCatch() int
469 | #+END_SRC
470 |
471 | returning a value of =0= a =n=.
472 |
473 | Could use this and the previous function to print all catches with a function
474 | like this:
475 |
476 | #+BEGIN_SRC go
477 | func printCatch( re regexp4.RE ){
478 | for i := 1; i <= re.TotCatch(); i++ {
479 | fmt.Printf( "[%d] >%s<\n", i, re.GetCatch( i ) )
480 | }
481 | }
482 | #+END_SRC
483 |
484 | *** Place catches in a string
485 |
486 | #+BEGIN_SRC go
487 | re.PutCatch( pStr string ) string
488 | #+END_SRC
489 |
490 | =pStr= argument contains the text with which to form the new chain as well
491 | as indicators which you catch place. To indicate the insertion a capture,
492 | place the '#' sign followed the capture index. for example =pStr= argument
493 | could be
494 |
495 | #+BEGIN_SRC go
496 | pStr := "catch 1 >>#1<< catch 2 >>#2<< catch 747 >>#747<<"
497 | #+END_SRC
498 |
499 | to place the character '#' within the escape string '#' with '#'
500 | further, ie:
501 |
502 | #+BEGIN_EXAMPLE
503 | "## Comment" -> "# comment"
504 | #+END_EXAMPLE
505 |
506 | *** Replace a catch
507 |
508 | Replacement operates on an array of characters in which is placed the text
509 | search modifying a specified catch by a string text, the method in charge of
510 | this work is =rplCatch=, its prototype is:
511 |
512 | #+BEGIN_SRC go
513 | re.RplCatch( rplStr string, id int ) string
514 | #+END_SRC
515 |
516 | - rplStr :: replacement text capture.
517 |
518 | - id :: *Capture identifier* after the order of appearance within
519 | regular exprecion. Spend a wrong index, puts a unaltered copy of
520 | the search string.
521 |
522 |
523 | in this case the use of the argument =id= unlike method =GetCatch= does not
524 | refer to a "catch" in specific, that is no matter how much of occasions that
525 | has captured a exprecion, the identifier indicates the *position* within the
526 | exprecion itself, ie:
527 |
528 | #+BEGIN_EXAMPLE
529 | < < > | < < > > >
530 | id = 1 ==========================
531 | id = 2== = 2 =========
532 | id = 3 =
533 | capturing position within the exprecion
534 | #+END_EXAMPLE
535 |
536 | The amendment affects so
537 |
538 | #+BEGIN_EXAMPLE
539 | < < > | < > > < < > | < > > < < > | < > >
540 | = 1 ================== = 1 ================== = 1 ==================
541 | = 2== = 2== = 2== = 2== = 2== = 2==
542 | capture one "..." two "..." Three
543 | #+END_EXAMPLE
544 |
545 | ** Metacharacters search
546 |
547 | - =:d= :: digit from 0 to 9.
548 | - =:D= :: any character other than a digit from 0 to 9.
549 | - =:a= :: any character is a letter (a-z, A-Z)
550 | - =:A= :: any character other than a letter
551 | - =:w= :: any alphanumeric character.
552 | - =:W= :: any non-alphanumeric character.
553 | - =:s= :: =[ \t-\r]=
554 | - =:S= :: =[^ \t-\r]=
555 | - =:b= :: =[ \t]=
556 | - =:B= :: =[^ \t]=
557 | - =:&= :: no ascii character (>= 128)
558 |
559 | - =:|= :: Vertical bar
560 | - =:^= :: Caret
561 | - =:$= :: Dollar sign
562 | - =:(= :: Left parenthesis
563 | - =:)= :: Right parenthesis
564 | - =:<= :: Greater than
565 | - =:>= :: Less than
566 | - =:[= :: Left bracket
567 | - =:]= :: Right bracket
568 | - =:.= :: Point
569 | - =:?= :: Interrogacion
570 | - =:+= :: More
571 | - =:-= :: Less
572 | - =:*= :: Asterisk
573 | - =:{= :: Left key
574 | - =:}= :: Right key
575 | - =:#= :: Modifier
576 | - =::= :: Colons
577 |
578 |
579 | additionally use the proper c syntax to place characters new line, tab, ...,
580 | etc. Similarly you can use the Go syntax for "placing" especial characters.
581 |
582 | ** Examples of use
583 |
584 | =regexp4_test.go= file contains a wide variety of tests that are useful as
585 | examples of use, these include the next:
586 |
587 | #+BEGIN_SRC go
588 | re.Match( "07-07-1777", "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>" )
589 | #+END_SRC
590 |
591 | captures a date format string, separately day, stripper, month and year. The
592 | separator has to coincider the two occasions that appears
593 |
594 | #+BEGIN_SRC go
595 | re.Match( "https://en.wikipedia.org/wiki/Regular_expression", "(https?|ftp):://<[^:s/:<:>]+>[^:s:.:<:>,/]+>*<.>*" )
596 | #+END_SRC
597 |
598 | capture something like a web link
599 |
600 | #+BEGIN_SRC go
601 | re.Match( "nasciiboy@gmail.com", "<[_A-Za-z0-9:-]+(:.[_A-Za-z0-9:-]+)*>:@<[A-Za-z0-9]+>:.<[A-Za-z0-9]+><:.[A-Za-z0-9]{2}>*" )
602 | #+END_SRC
603 |
604 | capture sections (user, site, domain) something like an email.
605 |
606 | * Hacking
607 | ** algorithm
608 | *** Flow Diagram
609 |
610 | #+BEGIN_EXAMPLE
611 | ┌────┐
612 | │init│
613 | └────┘
614 | │◀───────────────────────────────────┐
615 | ▼ │
616 | ┌──────────────┐ │
617 | │loop in string│ │
618 | └──────────────┘ │
619 | │ │
620 | ▼ │
621 | ┌─────────────┐ no ┌─────────────┐ │
622 | <│end of string│>────▶<│search regexp│>──────┘
623 | └─────────────┘ └─────────────┘ no match
624 | │ yes │ match
625 | ▼ ▼
626 | ┌────────────────┐ ┌─────────────┐
627 | │report: no match│ │report: match│
628 | └────────────────┘ └─────────────┘
629 | │ │
630 | │◀────────────────────┘
631 | ▼
632 | ┌───┐
633 | │end│
634 | └───┘
635 | #+END_EXAMPLE
636 |
637 | =search regexp= version one
638 |
639 | #+BEGIN_EXAMPLE
640 | ┌──────────────────────────────┐
641 | ┏━━━━━━━━━━━━━┓ ▼ │
642 | ┃search regexp┃ ┌───────────┐ │
643 | ┗━━━━━━━━━━━━━┛ │get builder│ │
644 | └───────────┘ │
645 | │ │
646 | ▼ │
647 | ┌───────────────┐ no ┌────────────┐ │
648 | <│we have builder│>────▶│finish: the │ │
649 | └───────────────┘ │path matches│ │
650 | │ yes └────────────┘ │
651 | ┌────────┬─────┬──────────┼────────────┬──────────┐ │
652 | ▼ ▼ ▼ ▼ ▼ ▼ │
653 | ┌───────────┐┌───┐┌─────┐┌─────────────┐┌─────────┐┌────────┐ │
654 | │alternation││set││point││metacharacter││character││grouping│ │
655 | └───────────┘└───┘└─────┘└─────────────┘└─────────┘└────────┘ │
656 | │ │ │ │ │ │ │
657 | ▼ └─────┴──────────┼────────────┘ └──────┤
658 | ┌────────────────┐ │ │
659 | ┌────────│ save position │ ▼ │
660 | │ └────────────────┘ ┌─────────────┐ no match │
661 | │ ┌────────────────┐ <│match builder│>──────────┐ │
662 | ▼◀───────│restore position│◀────┐ └─────────────┘ │ │
663 | ┌──────────────┐└────────────────┘ │ │ match │ │
664 | │loop in paths │ │ ▼ ▼ │
665 | └──────────────┘ │ ┌─────────────────┐ ┌───────────────┐ │
666 | │ │ │advance in string│ │finish, the │ │
667 | ▼ │ └─────────────────┘ │path no matches│ │
668 | ┌────────────┐ yes ┌─────────────┐ │ │ └───────────────┘ │
669 | <│we have path│>───▶<│search regexp│>──┘ └──────────────────────────────┘
670 | └────────────┘ └─────────────┘ no match
671 | │ no match │
672 | ▼ ▼
673 | ┌───────────────────────┐ ┌────────────┐
674 | │finish, without matches│ │finish, the │
675 | └───────────────────────┘ │path matches│
676 | └────────────┘
677 | #+END_EXAMPLE
678 |
679 | =search regexp= version two
680 |
681 | #+BEGIN_EXAMPLE
682 | ┌─────────────┐
683 | │save position│ ┏━━━━━━━━━━━━━┓
684 | └─────────────┘ ┃search regexp┃
685 | ┌────────────▶│ ┗━━━━━━━━━━━━━┛
686 | │ ▼
687 | │ ┌──────────────┐
688 | │ │loop in paths │
689 | │ └──────────────┘
690 | │ │ ┌────────────────────────────────┐
691 | │ ▼ ▼ │
692 | │ ┌────────────┐ yes ┌───────────┐ │
693 | │ <│we have path│>────────▶│get builder│ │
694 | │ └────────────┘ └───────────┘ │
695 | │ │ no │ │
696 | │ ▼ ▼ │
697 | │ ┌───────────────────────┐ ┌───────────────┐ no ┌─────────────┐ │
698 | │ │finish: without matches│ <│we have builder│>───▶│finish: the │ │
699 | │ └───────────────────────┘ └───────────────┘ │path matches │ │
700 | │ │ yes └─────────────┘ │
701 | │ ┌─────┬──────────┼────────────┬─────────┐ │
702 | │ ▼ ▼ ▼ ▼ ▼ │
703 | ┌────────────────┐ ┌───┐┌─────┐┌─────────────┐┌─────────┐┌────────┐ │
704 | │restore position│ │set││point││metacharacter││character││grouping│ │
705 | └────────────────┘ └───┘└─────┘└─────────────┘└─────────┘└────────┘ │
706 | ▲ │ │ │ │ │ │
707 | │ └─────┴──────────┼────────────┘ │ │
708 | │ ▼ ▼ │
709 | ┌───────────────┐ no match ┌─────────────┐ ┌─────────────┐ │
710 | │finish: the │◀────────┬──────────<│match builder│> ┌───<│search regexp│> │
711 | │path no matches│ │ └─────────────┘ │ └─────────────┘ │
712 | └───────────────┘ │ │ │ │ │
713 | └────────────────┈┈│┈┈────────┘ │ │
714 | ▼ match │ match │
715 | ┌─────────────────┐ └────────▶│
716 | │advance in string│ │
717 | └─────────────────┘ │
718 | │ │
719 | └────────────────────────────────┘
720 | #+END_EXAMPLE
721 |
722 | * License
723 |
724 | This project is not "open source" is *free software*, and according to this,
725 | use the GNU GPL Version 3. Any work that includes used or resulting code of
726 | this library, you must comply with the terms of this license.
727 |
728 | * Contact, contribution and other things
729 |
730 | [[mailto:nasciiboy@gmail.com]]
731 |
--------------------------------------------------------------------------------
/readme_es.org:
--------------------------------------------------------------------------------
1 | #+TITLE: Recursive Regexp Raptor (regexp4)
2 | #+AUTHOR: nasciiboy
3 | #+LANGUAGE: es
4 | #+STARTUP: showall
5 |
6 | regexp3 ([[https://github.com/nasciiboy/RecursiveRegexpRaptor][C-lang]], [[https://github.com/nasciiboy/regexp3][Go-lang]]) y regexp4 ([[https://github.com/nasciiboy/RecursiveRegexpRaptor-4][C-lang]], [[https://github.com/nasciiboy/regexp4][Go-lang]])
7 |
8 | lang: [[file:readme.org][en]]
9 |
10 | raptor-book (borrador) : [[https://github.com/nasciiboy/raptor-book/][aqui]]
11 |
12 | *benchmarks* ==> [[https://nasciiboy.github.io/raptorVSworld/index.html][aqui]]
13 |
14 | * Caracteristicas
15 |
16 | - Manejo sencillo,
17 |
18 | - Sin verificacion de errores.
19 |
20 | - Solo expresiones regulares
21 |
22 | - Cero dependencias. Ni la libreria estandar de GO esta precente *GO PURO*.
23 |
24 | - Conteo de coincidencias
25 |
26 | - Capturas
27 |
28 | - Reemplazo de capturas
29 |
30 | - Colocacion de capturas especificas dentro de un arreglo
31 |
32 | - Referencia dentro de la exprecion a capturas previas
33 |
34 | - Objeto concurrentemente seguro
35 |
36 | - soporte sencillo para caracteres unicode (UTF-8)
37 |
38 | * Introduccion
39 |
40 | *Recurseve Regexp Raptor* es una libreria de busqueda, captura y reemplazo de
41 | expresiones regulares escrita en lenguaje C desce cero. El presente es un port
42 | al lenguage de programacion *Go* intentando lograr lo siguiente
43 |
44 | - Contar con la mayoria de caracteristicas presentes en cualquier otra
45 | libreria regexp.
46 |
47 | - Codigo elegante: sencillo, claro y dotado de gracia.
48 |
49 | - Evitar el uso de ninguna libreria externa, incluida la libreria estandar.
50 |
51 | - Ser util como material de aprendizaje.
52 |
53 | ** Motivacion
54 |
55 | El desarrollo original en C, fue fruto de la no existencia de una libreria
56 | estandar de expresiones regulares para dicho lenguaje, si bien existen varias
57 | implementaciones, como pcre, la libreria regexp.h del proyecto GNU, regexp
58 | del sistema operativo Plan 9, y algunas otras mas, el autor de este trabajo
59 | (que igual y es un poco retard) encontro en todas, codigo rebuscado y mistico
60 | repartido en varios ficheros, llenos de macros, guiones bajos y variables
61 | cripticas. Incapas de entender nada y tras un retiro a la isla de la
62 | meditacion onanista el autor se propuso hacer su propia libreria con casinos
63 | y colegialas japonesas.
64 |
65 | El port al lenguaje de programacion Go surguio de la simple
66 | necesidad/curiosidad de aprender un poco mas. De momento es una burda
67 | aproximacion con mas de *C* que de *Go*, con chapuzas varias... pero bueno,
68 | el repositorio debia tener un inicio, no?
69 |
70 | ** Desarrollo y pruebas
71 |
72 | Se ha utilizado GNU Emacs (el unico y verdadero sistema operativo), go
73 | (1.7.5), konsole y fish, corriendo en Freidora 25.
74 |
75 | puedes optener una copia del codigo bien clonando el repositorio directamente
76 |
77 | #+BEGIN_SRC sh
78 | git clone https://github.com/nasciiboy/regexp4.git ~/go/src/github.com/nasciiboy/regexp4
79 | #+END_SRC
80 |
81 | o mediante =go get=
82 |
83 | #+BEGIN_SRC sh
84 | go get github.com/nasciiboy/regexp4
85 | #+END_SRC
86 |
87 | para efectuar las pruebas si estas dentro del repositorio
88 |
89 | #+BEGIN_SRC sh
90 | go test
91 | #+END_SRC
92 |
93 | o desde cualquier ruta
94 |
95 | #+BEGIN_SRC sh
96 | go test github.com/nasciiboy/regexp4
97 | #+END_SRC
98 |
99 | * Uso
100 |
101 | Para incluir Recursive Regexp Raptor en su codigo solo necesita colocar la
102 | linea
103 |
104 | #+BEGIN_SRC go
105 | import "github.com/nasciiboy/regexp4"
106 | #+END_SRC
107 |
108 | para utilizar la libreria debe crear un objeto de tipo =RE=, asi:
109 |
110 | #+BEGIN_SRC go
111 | var re regexp4.RE
112 | #+END_SRC
113 |
114 | o
115 |
116 | #+BEGIN_SRC go
117 | re := new( regexp4.RE )
118 | #+END_SRC
119 |
120 | o
121 |
122 | #+BEGIN_SRC go
123 | re := regexp4.Compile( "regexp" )
124 | #+END_SRC
125 |
126 | las metodos disponibles son
127 |
128 | #+BEGIN_SRC go
129 | // copia una regexp, incluidas cadena y capturas
130 | re.Copy() *RE
131 |
132 | // compila la regexp
133 | re.Compile( re string ) *RE
134 |
135 | // busqueda, regresa numero de coincidencias
136 | re.MatchString( txt string ) int
137 |
138 | // busqueda, regresa resultado booleano de la busqueda
139 | re.FindString ( txt string ) bool
140 |
141 | // compilacion y busqueda, regresa numero de coincidencias
142 | re.Match( txt, re string ) int
143 |
144 | // compilacion y busqueda, regresa resultado booleano de la busqueda
145 | re.Find ( txt, re string ) bool
146 |
147 | // regresa el numero de coincidencias
148 | re.Result() int
149 |
150 | // regresa numero de capturas
151 | re.TotCatch() int
152 |
153 | // regresa una captura por su indice
154 | re.GetCatch( index int ) string
155 |
156 | // regresa la posicion de inicio de la captura o 0 (?)
157 | re.GpsCatch( index int ) int
158 |
159 | // regresa la longitud de la captura o 0 (?)
160 | re.LenCatch( index int ) int
161 |
162 | // reemplaza el contenido de una captura por rplStr, por su id
163 | // regresa la cadena resultante
164 | re.RplCatch( rplStr string, id int ) string
165 |
166 | // crea una cadena con las capturas y texto indicados en pText
167 | // regresa la cadena resultante
168 | re.PutCatch( pText string ) string
169 | #+END_SRC
170 |
171 | mencionar, que instancias distintas del objeto =RE= puede ser utilizadas
172 | dentro de codigo concurrente
173 |
174 | ** Sintaxis
175 |
176 | - busqueda de texto en cualquier ubicacion:
177 |
178 | #+BEGIN_SRC go
179 | re.Match( "Raptor Test", "Raptor" );
180 | #+END_SRC
181 |
182 | - multiples opciones de busqueda "exp1|exp2"
183 |
184 | #+BEGIN_SRC go
185 | re.Match( "Raptor Test", "Dinosaur|T Rex|Raptor|Triceratops" );
186 | #+END_SRC
187 |
188 | - coincidencia con cualquier caracter '.'
189 |
190 | #+BEGIN_SRC go
191 | re.Match( "Raptor Test", "R.ptor" );
192 | #+END_SRC
193 |
194 | - coincidencia cero o una ves '?'
195 |
196 | #+BEGIN_SRC go
197 | re.Match( "Raptor Test", "Ra?ptor" );
198 | #+END_SRC
199 |
200 | - coincidencia una o mas veces '+'
201 |
202 | #+BEGIN_SRC go
203 | re.Match( "Raaaptor Test", "Ra+ptor" );
204 | #+END_SRC
205 |
206 | - coincidencia cero o mas veces '*'
207 |
208 | #+BEGIN_SRC go
209 | re.Match( "Raaaptor Test", "Ra*ptor" );
210 | #+END_SRC
211 |
212 | - rango de coincidencias "{n1,n2}"
213 |
214 | #+BEGIN_SRC go
215 | re.Match( "Raaaptor Test", "Ra{0,100}ptor" );
216 | #+END_SRC
217 |
218 | - numero de coincidencias especifico '{n1}'
219 |
220 | #+BEGIN_SRC go
221 | re.Match( "Raptor Test", "Ra{1}ptor" );
222 | #+END_SRC
223 |
224 | - numero minimo de coincidencias '{n1,}'
225 |
226 | #+BEGIN_SRC go
227 | re.Match( "Raaaptor Test", "Ra{1,}ptor" );
228 | #+END_SRC
229 |
230 | - Conjuntos.
231 |
232 | - Conjunto de caracteres "[abc]"
233 |
234 | #+BEGIN_SRC go
235 | re.Match( "Raptor Test", "R[uoiea]ptor" );
236 | #+END_SRC
237 |
238 | - Rango dentro de un conjunto de caracteres "[a-b]"
239 |
240 | #+BEGIN_SRC go
241 | re.Match( "Raptor Test", "R[a-z]ptor" );
242 | #+END_SRC
243 |
244 | - Metacaracter dentro de un conjunto de caracteres "[:meta]"
245 |
246 | #+BEGIN_SRC go
247 | re.Match( "Raptor Test", "R[:w]ptor" );
248 | #+END_SRC
249 |
250 | - inversion de conjunto de caracteres "[^abc]"
251 |
252 | #+BEGIN_SRC go
253 | re.Match( "Raptor Test", "R[^uoie]ptor" );
254 | #+END_SRC
255 |
256 | - coincidencia con un caracter que sea una letra ":a"
257 |
258 | #+BEGIN_SRC go
259 | re.Match( "RAptor Test", "R:aptor" );
260 | #+END_SRC
261 |
262 | - coincidencia con un caracter que no sea una letra ":A"
263 |
264 | #+BEGIN_SRC go
265 | re.Match( "R△ptor Test", "R:Aptor" );
266 | #+END_SRC
267 |
268 | - coincidencia con un caracter que sea una numero ":d"
269 |
270 | #+BEGIN_SRC go
271 | re.Match( "R4ptor Test", "R:dptor" );
272 | #+END_SRC
273 |
274 | - coincidencia con un caracter que no sea un numero ":D"
275 |
276 | #+BEGIN_SRC go
277 | re.Match( "Raptor Test", "R:Dptor" );
278 | #+END_SRC
279 |
280 | - coincidencia con un caracter alfanumerico ":w"
281 |
282 | #+BEGIN_SRC go
283 | re.Match( "Raptor Test", "R:wptor" );
284 | #+END_SRC
285 |
286 | - coincidencia con un caracter no alfanumerico ":W"
287 |
288 | #+BEGIN_SRC go
289 | re.Match( "R△ptor Test", "R:Wptor" );
290 | #+END_SRC
291 |
292 | - coincidencia con un caracter que sea un espacio ":s"
293 |
294 | #+BEGIN_SRC go
295 | re.Match( "R ptor Test", "R:sptor" );
296 | #+END_SRC
297 |
298 | - coincidencia con un caracter que no sea un espacio ":S"
299 |
300 | #+BEGIN_SRC go
301 | re.Match( "Raptor Test", "R:Sptor" );
302 | #+END_SRC
303 |
304 | - escape de caracteres con significado especial ":caracter"
305 |
306 | los caracteres '|', '(', ')', '<', '>', '[', ']', '?', '+', '*', '{', '}',
307 | '-', '#' y '@' indican como debe procesarse la exprecion regular, colocar
308 | alguno de estos caracteres tal cual, sin tener en cuenta una correcta
309 | sintaxis dentro de la exprecion, puede generar bucles infinitos al igual
310 | que errores por acceso a elementos fuera del limite de un =slice=.
311 |
312 | #+BEGIN_SRC go
313 | re.Match( ":#()|<>", ":::#:(:):|:<:>" );
314 | #+END_SRC
315 |
316 | los caracteres /especiales/ (exepto el metacarater =:=) pierden su
317 | significado detro de un conjunto
318 |
319 | #+BEGIN_SRC go
320 | re.Match( "()<>[]|{}*#@?+", "[()<>:[:]|{}*?+#@]" );
321 | #+END_SRC
322 |
323 | - agrupacion "(exp)"
324 |
325 | #+BEGIN_SRC go
326 | re.Match( "Raptor Test", "(Raptor)" );
327 | #+END_SRC
328 |
329 | - agrupacion con captura ""
330 |
331 | #+BEGIN_SRC go
332 | re.Match( "Raptor Test", "" );
333 | #+END_SRC
334 |
335 | - backreferences "@id"
336 |
337 | las referencias necesitan que previamente se halla capturado una exprecion
338 | mediante "", luego se coloca el numero de aparicion de la captura
339 | precidido por '@'
340 |
341 | #+BEGIN_SRC go
342 | re.Match( "ae_ea", "_@2@1" )
343 | #+END_SRC
344 |
345 | - modificadores de comportamiento
346 |
347 | Existen dos tipos de modificadores. El primero afecta de forma global el
348 | comportamiento de la exprecion, el segundo afecta secciones en
349 | especifico. En ambos caso los la sintaxis es la misma, el signo '#',
350 | seguido por los modificadores,
351 |
352 | los modificadores de alcance global se coloca al inicio, de toda la
353 | exprecion y son los siguientes
354 |
355 | - busqueda solo al inicio '#^exp'
356 |
357 | #+BEGIN_SRC go
358 | re.Match( "Raptor Test", "#^Raptor" );
359 | #+END_SRC
360 |
361 | - busqueda solo al final '#$exp'
362 |
363 | #+BEGIN_SRC go
364 | re.Match( "Raptor Test", "#$Test" );
365 | #+END_SRC
366 |
367 | - busqueda al inicio y final "#^$exp"
368 |
369 | #+BEGIN_SRC go
370 | re.Match( "Raptor Test", "#^$Raptor Test" );
371 | #+END_SRC
372 |
373 | - detener con la primer coincidencia "#?exp"
374 |
375 | #+BEGIN_SRC go
376 | re.Match( "Raptor Test", "#?Raptor Test" );
377 | #+END_SRC
378 |
379 | - buscar por la cadena caracter a caracter "#~"
380 |
381 | de forma predeterminada cuando una exprecion coincide con una region del
382 | texto de busqueda, la busqueda prosigue a partir del final de dicha
383 | coincidencia, para ignorar este comportamiento, haciendo que la busqueda
384 | siempre sea caracter a caracter se utiliza este modificador
385 |
386 | #+BEGIN_SRC go
387 | re.Match( "aaaaa", "#~a*" );
388 | #+END_SRC
389 |
390 | en este ejemplo, sin el modificador el resultado seria una coincidencia,
391 | sin embargo con este modificador la busqueda continua inmediatamente
392 | despues del siguente caracter regresando cinco coincidencias.
393 |
394 | - ignorar entre minusculas y mayusculas "#*exp"
395 |
396 | #+BEGIN_SRC go
397 | re.Match( "Raptor Test", "#*RaPtOr TeSt" );
398 | #+END_SRC
399 |
400 |
401 | todos los modificadores anteriores son compatibles entre si es decir podria
402 | buscar
403 |
404 | #+BEGIN_SRC go
405 | re.Match( "Raptor Test", "#^$*?~RaPtOr TeSt" );
406 | #+END_SRC
407 |
408 | sin embargo los modificadores '~' y '?' pierden sentido debido a la
409 | presencia de '^' y/o '$'.
410 |
411 | una exprecion del tipo:
412 |
413 | #+BEGIN_SRC go
414 | re.Match( "Raptor Test", "#$RaPtOr|#$TeSt" );
415 | #+END_SRC
416 |
417 | es erronea, el modificador despues del operador '|' se aplicaria a la
418 | seccion entre '|' y '#', es decir a una cadena vacia, lo que proboca un
419 | retorno incorrecto
420 |
421 | los modificadores locales se colocan despues del indicador de repeticion
422 | (de existir) y afectan la misma region que afectan los indicadores de
423 | repeticion, es decir caracteres, conjuntos o agrupaciones.
424 |
425 | - ignorar entre minusculas y mayusculas "exp#*"
426 |
427 | #+BEGIN_SRC go
428 | re.Match( "Raptor Test", "(RaPtOr)#* TeS#*t" );
429 | #+END_SRC
430 |
431 | - no ignorar entre minusculas y mayusculas "exp#/"
432 |
433 | #+BEGIN_SRC go
434 | re.Match( "RaPtOr TeSt", "#*(RaPtOr)#/ TES#/T" );
435 | #+END_SRC
436 |
437 | ** Capturas
438 |
439 | Las capturas se indexan segun el orden de aparicion dentro de la expresion
440 | por ejemplo:
441 |
442 | #+BEGIN_EXAMPLE
443 | < < > | < < > > >
444 | = 1 ==========================
445 | = 2== = 2 =========
446 | = 3 =
447 | #+END_EXAMPLE
448 |
449 | Si la exprecion coincide mas de una ocacion dentro del texto de busqueda el
450 | indice, se incrementa segun su aparicion es decir:
451 |
452 | #+BEGIN_EXAMPLE
453 | < < > | < > > < < > | < > > < < > | < > >
454 | = 1 ================== = 3 ================== = 5 ==================
455 | = 2== = 2== = 4== = 4== = 6== = 6==
456 | coincidencia uno coincidencia dos coincidencia tres
457 | #+END_EXAMPLE
458 |
459 | El metodo =GetCatch= hace una copia de una la captura dentro de =string=,
460 | aqui su prototipo:
461 |
462 | #+BEGIN_SRC go
463 | re.GetCatch( index int ) string
464 | #+END_SRC
465 |
466 | - index :: indice de la agrupacion (de =1= a =n=).
467 |
468 |
469 | la funcion regeresa una cadena con la copia del contenido de la captura. Un
470 | indice incorrecto regresara un =string= vacio.
471 |
472 | para optener el numero capturadas dentro de una busqueda, utilice =TotCatch=:
473 |
474 | #+BEGIN_SRC go
475 | re.TotCatch() int
476 | #+END_SRC
477 |
478 | que regresa un valor positivo de =0= a =n=.
479 |
480 | Podria utilzar esta y la anterior funcion para imprimir las capturadas con
481 | una funcion como esta:
482 |
483 | #+BEGIN_SRC go
484 | func printCatch( re regexp4.RE ){
485 | for i := 1; i <= re.TotCatch(); i++ {
486 | fmt.Printf( "[%d] >%s<\n", i, re.GetCatch( i ) )
487 | }
488 | }
489 | #+END_SRC
490 |
491 | *** Colocar capturas dentro de una cadena
492 |
493 | #+BEGIN_SRC go
494 | re.PutCatch( pStr string ) string
495 | #+END_SRC
496 |
497 | el argumento =pStr= contiene el texto con el cual formar la nueva cadena
498 | (=string=) asi como indicadores de cuales capturas colocar. Para indicar la
499 | insercion de una captura coloque el signo '#' seguido del indice de
500 | captura. por ejemplo el argumento =pStr= podria ser
501 |
502 | #+BEGIN_SRC go
503 | pStr := "captura 1 >>#1<< captura 2 >>#2<< captura 747 >>#747<<"
504 | #+END_SRC
505 |
506 | para colocar el caracter '#' dentro de la cadena escape '#' con un '#'
507 | adicional, es decir:
508 |
509 | #+BEGIN_EXAMPLE
510 | "## comentario" -> "# comentario"
511 | #+END_EXAMPLE
512 |
513 | *** Reemplazar una captura
514 |
515 | El reemplazo opera sobre un arreglo de caracteres en el cual se coloca el
516 | texto de busqueda modificando una captura especifica por una cadena de
517 | texto, el metodo encargado de esta labor es =RplCatch=, su prototipo es:
518 |
519 | #+BEGIN_SRC go
520 | re.RplCatch( rplStr string, id int ) string
521 | #+END_SRC
522 |
523 | - rplStr :: texto de reemplazo para captura.
524 |
525 | - id :: *identificador* de captura segun el orden de aparicion dentro de
526 | la exprecion regular. Pasar un indice incorrecto, coloca una
527 | copia sin modificacion de la cadena de busqueda sobre el arreglo
528 | =newStr=.
529 |
530 |
531 | en este caso el uso del argumento =id= a diferencia del metodo =GetCatch=
532 | no se refiere a una "captura" en especifico, es decir no importa la cantidad
533 | de ocaciones que se ha capturado una exprecion, el identificador indica la
534 | *posicion* dentro de la exprecion en si, es decir:
535 |
536 | #+BEGIN_EXAMPLE
537 | < < > | < < > > >
538 | id = 1 ==========================
539 | id = 2== = 2 =========
540 | id = 3 =
541 | posicion de la captura dentro de la exprecion
542 | #+END_EXAMPLE
543 |
544 | la modificacion afecta de este modo
545 |
546 | #+BEGIN_EXAMPLE
547 | < < > | < > > < < > | < > > < < > | < > >
548 | = 1 ================== = 1 ================== = 1 ==================
549 | = 2== = 2== = 2== = 2== = 2== = 2==
550 | captura uno "..." dos "..." tres
551 | #+END_EXAMPLE
552 |
553 | ** Metacaracteres de busqueda
554 |
555 | - =:d= :: dígito del 0 al 9.
556 | - =:D= :: cualquier carácter que no sea un dígito del 0 al 9.
557 | - =:a= :: cualquier caracter que sea una letra (a-z,A-Z)
558 | - =:A= :: cualquier caracter que no sea una letra
559 | - =:w= :: cualquier carácter alfanumérico.
560 | - =:W= :: cualquier carácter no alfanumérico.
561 | - =:s= :: =[ \t-\r]=
562 | - =:S= :: =[^ \t-\r]=
563 | - =:b= :: =[ \t]=
564 | - =:B= :: =[^ \t]=
565 | - =:&= :: cualquier carácter no ascii (>= 128)
566 |
567 | - =:|= :: barra vertical
568 | - =:^= :: acento circunflejo
569 | - =:$= :: signo dolar
570 | - =:(= :: parentesis izquierdo
571 | - =:)= :: parentesis derecho
572 | - =:<= :: mayor que
573 | - =:>= :: menor que
574 | - =:[= :: corchete izquierdo
575 | - =:]= :: corchete derecho
576 | - =:.= :: punto
577 | - =:?= :: interrogacion
578 | - =:+= :: mas
579 | - =:-= :: menos
580 | - =:*= :: asterisco
581 | - =:{= :: llave izquierda
582 | - =:}= :: llave derecha
583 | - =:#= :: modificador
584 | - =::= :: dos puntos
585 |
586 |
587 | adicionalmente utilice la sintaxis propia de go para colocar caracteres como
588 | nueva linea, tabulador, campana,..., etc. De igual forma puede utilizar la
589 | sintaxis c para "colocar" caracteres en notacion octal, hexadecimal o
590 | unicode.
591 |
592 | ** algunos ejemplos de uso
593 |
594 | El fichero =regexp4_test.go= contiene una amplia variedad de pruebas que son
595 | utiles como ejemplos de uso, entre estos se encuentran los siguentes:
596 |
597 | #+BEGIN_SRC go
598 | re.Match( "07-07-1777", "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>" );
599 | #+END_SRC
600 |
601 | captura una cadena con formato de fecha, de forma separada dia, separador,
602 | mes y año. El separador tiene que coincider las dos ocaciones que aparece
603 |
604 | #+BEGIN_SRC go
605 | re.Match( "https://en.wikipedia.org/wiki/Regular_expression", "(https?|ftp):://<[^:s/:<:>]+>[^:s:.:<:>,/]+>*<.>*" );
606 | #+END_SRC
607 |
608 | capturar algo parecido a un enlace web
609 |
610 | #+BEGIN_SRC go
611 | re.Match( "nasciiboy@gmail.com", "<[_A-Za-z0-9:-]+(:.[_A-Za-z0-9:-]+)*>:@<[A-Za-z0-9]+>:.<[A-Za-z0-9]+><:.[A-Za-z0-9]{2}>*" );
612 | #+END_SRC
613 |
614 | capturar por secciones (usuario,sitio,dominio) algo parecido a un correo.
615 |
616 | * Hacking
617 | ** algoritmo
618 | *** Diagrama de flujo
619 |
620 | Esta diagrama es una aproximacion del funcionimento del motor, los nombres no
621 | se corresponden con los nombres del codigo, para una explicacion completa
622 | revisar el [[https://github.com/nasciiboy/raptor-book/][libro]]
623 |
624 | #+BEGIN_EXAMPLE
625 | ┌──────┐
626 | │inicio│
627 | └──────┘
628 | │◀───────────────────────────────────┐
629 | ▼ │
630 | ┌────────────────┐ │
631 | │bucle por cadena│ │
632 | └────────────────┘ │
633 | │ │
634 | ▼ │
635 | ┌─────────────┐ no ┌─────────────┐ │
636 | <│fin de cadena│>────▶<│buscar regexp│>──────┘
637 | └─────────────┘ └─────────────┘ no coincide
638 | │ si │ coincide
639 | ▼ ▼
640 | ┌────────────────┐ ┌────────────────┐
641 | │informar: no │ │informar: │
642 | │hay coincidencia│ │hay coincidencia│
643 | └────────────────┘ └────────────────┘
644 | │ │
645 | │◀────────────────────┘
646 | ▼
647 | ┌───┐
648 | │fin│
649 | └───┘
650 | #+END_EXAMPLE
651 |
652 | En esta version de @c(buscar regexp) todos los constructores se optienen por
653 | una sola funcion:
654 |
655 | #+BEGIN_EXAMPLE
656 | ┌───────────────────────────────┐
657 | ┏━━━━━━━━━━━━━┓ ▼ │
658 | ┃buscar regexp┃ ┌───────────────────┐ │
659 | ┗━━━━━━━━━━━━━┛ │Optener constructor│ │
660 | └───────────────────┘ │
661 | │ │
662 | ▼ │
663 | ┌───────────────┐ no ┌─────────────┐ │
664 | <│hay constructor│>────▶│terminar: la │ │
665 | └───────────────┘ │ruta coincide│ │
666 | │ si └─────────────┘ │
667 | ┌──────────┬────────┬─────────┼───────────┬──────────┐ │
668 | ▼ ▼ ▼ ▼ ▼ ▼ │
669 | ┌───────────┐┌────────┐┌─────┐┌────────────┐┌────────┐┌──────────┐ │
670 | │alternacion││conjunto││punto││metacaracter││caracter││agrupacion│ │
671 | └───────────┘└────────┘└─────┘└────────────┘└────────┘└──────────┘ │
672 | │ │ │ │ │ │ │
673 | ▼ └────────┴─────────┼───────────┘ └────────┤
674 | ┌──────────────────┐ │ │
675 | ┌──────────│ guardar posicion │ ▼ no │
676 | │ └──────────────────┘ ┌──────────────────┐ coincide │
677 | │ ┌──────────────────┐ <│buscar constructor│>─────────┐ │
678 | ▼◀─────────│restaurar posicion│◀──┐ └──────────────────┘ │ │
679 | ┌───────────────┐ └──────────────────┘ │ │ coincide │ │
680 | │recorrer rutas │ │ ▼ ▼ │
681 | └───────────────┘ │ ┌──────────────────┐ ┌────────────────┐ │
682 | │ │ │avanzar por cadena│ │terminar, ruta │ │
683 | ▼ │ └──────────────────┘ │sin coincidencia│ │
684 | ┌────────┐ si ┌─────────────┐ │ │ └────────────────┘ │
685 | <│hay ruta│>───────▶<│buscar regexp│>──┘ └───────────────────────────────┘
686 | └────────┘ └─────────────┘ no coincide
687 | │ no coincide │
688 | ▼ ▼
689 | ┌─────────────────────────┐ ┌─────────────┐
690 | │terminar sin coincidencia│ │terminar, la │
691 | └─────────────────────────┘ │ruta coincide│
692 | └─────────────┘
693 | #+END_EXAMPLE
694 |
695 | =buscar regexp=: diseño actual
696 |
697 | #+BEGIN_EXAMPLE
698 | ┌──────────────────┐
699 | │ guardar posicion │ ┏━━━━━━━━━━━━━┓
700 | └──────────────────┘ ┃buscar regexp┃
701 | ┌────────────▶│ ┗━━━━━━━━━━━━━┛
702 | │ ▼
703 | │ ┌───────────────┐
704 | │ │recorrer rutas │
705 | │ └───────────────┘
706 | │ │ ┌─────────────────────────────────┐
707 | │ ▼ ▼ │
708 | │ ┌────────┐ si ┌───────────────────┐ │
709 | │ <│hay ruta│>────────▶│obtener constructor│ │
710 | │ └────────┘ └───────────────────┘ │
711 | │ │ no │ │
712 | │ ▼ ▼ │
713 | │ ┌─────────────────────────┐ ┌───────────────┐ no ┌─────────────┐ │
714 | │ │terminar sin coincidencia│ <│hay constructor│>────▶│terminar: la │ │
715 | │ └─────────────────────────┘ └───────────────┘ │ruta coincide│ │
716 | │ │ si └─────────────┘ │
717 | │ ┌────────┬─────────┼───────────┬──────────┐ │
718 | │ ▼ ▼ ▼ ▼ ▼ │
719 | ┌──────────────────┐ ┌────────┐┌─────┐┌────────────┐┌────────┐┌──────────┐ │
720 | │restaurar posicion│ │conjunto││punto││metacaracter││caracter││agrupacion│ │
721 | └──────────────────┘ └────────┘└─────┘└────────────┘└────────┘└──────────┘ │
722 | ▲ │ │ │ │ │ │
723 | │ └────────┴─────────┼───────────┘ │ │
724 | │ ▼ ▼ │
725 | ┌────────────────┐ no coincide ┌──────────────────┐ ┌─────────────┐ │
726 | │terminar: ruta │◀────────┬─────────<│buscar constructor│> ┌─<│buscar regexp│> │
727 | │sin coincidencia│ │ └──────────────────┘ │ └─────────────┘ │
728 | └────────────────┘ │ │ coincide │ │ │
729 | └──────────────────┈┈│┈┈──────────┘ │ coincide │
730 | ▼ │ │
731 | ┌──────────────────┐ └──────────┤
732 | │avanzar por cadena│ │
733 | └──────────────────┘ │
734 | │ │
735 | └─────────────────────────────────┘
736 | #+END_EXAMPLE
737 |
738 | * Todo
739 |
740 | 1. Pruebas de rendimiento
741 |
742 | 2. Paralelizar la busqueda de rutas
743 |
744 | * Licencia
745 |
746 | Este proyecto no es de codigo "abierto", es *software libre*, y acorde a
747 | ello se utiliza la licencia GNU GPL Version 3. Cualquier obra que incluya o
748 | derive codigo de esta libreria, debera cumplir con los terminos de esta
749 | licencia.
750 |
751 | * Contacto, contribucion y otras cosas
752 |
753 | [[mailto:nasciiboy@gmail.com]]
754 |
--------------------------------------------------------------------------------
/regexp4.go:
--------------------------------------------------------------------------------
1 | package regexp4
2 |
3 | const inf = 1073741824 // 2^30
4 |
5 | const (
6 | modAlpha uint8 = 1
7 | modOmega uint8 = 2
8 | modLonley uint8 = 4
9 | modFwrByChar uint8 = 8
10 | modCommunism uint8 = 16
11 | modNegative uint8 = 128
12 | modPositive uint8 = ^modNegative
13 | modCapitalism uint8 = ^modCommunism
14 | )
15 |
16 | const (
17 | asmPath = iota; asmPathEle; asmPathEnd;
18 | asmGroup; asmGroupEnd; asmHook; asmHookEnd; asmSet; asmSetEnd;
19 | asmBackref; asmMeta; asmRangeab; asmUTF8; asmPoint; asmSimple; asmEnd
20 | )
21 |
22 | type reStruct struct {
23 | str string
24 | reType uint8
25 | mods uint8
26 | loopsMin, loopsMax int
27 | }
28 |
29 | type catchInfo struct { init, end, id int }
30 |
31 | type raptorASM struct {
32 | re reStruct
33 | inst uint8
34 | close int
35 | }
36 |
37 | type RE struct {
38 | txt, re string
39 | compile bool
40 | result int
41 |
42 | end int
43 | pos int
44 |
45 | catches []catchInfo
46 | catchIndex int
47 | catchIdIndex int
48 |
49 | asm []raptorASM
50 | mods uint8
51 | }
52 |
53 | func (r *RE) Compile( re string ) *RE {
54 | r.catchIndex = 1
55 | r.compile = false
56 | if len(re) == 0 { return r }
57 |
58 | rexp := reStruct{ str: re, reType: asmPath }
59 | r.re = re
60 | r.asm = make( []raptorASM, 0, 32 )
61 |
62 | getMods( &rexp, &rexp )
63 | r.mods = rexp.mods
64 |
65 | if isPath( &rexp ) { r.genPaths ( rexp )
66 | } else { r.genTracks( &rexp ) }
67 |
68 | r.asm = append( r.asm, raptorASM{ inst: asmEnd, close: len(r.asm) } )
69 | r.compile = true
70 | return r
71 | }
72 |
73 | func isPath( rexp *reStruct ) bool {
74 | if len(rexp.str) == 0 { return false }
75 |
76 | for i, deep := 0, 0; walkMeta( rexp.str[i:], &i ) < len( rexp.str ); i++ {
77 | switch rexp.str[ i ] {
78 | case '(', '<': deep++
79 | case ')', '>': deep--
80 | case '[': i += walkSet( rexp.str[i:] )
81 | case '|': if deep == 0 { return true }
82 | }
83 | }
84 |
85 | return false
86 | }
87 |
88 | func (r *RE) genPaths( rexp reStruct ){
89 | var track reStruct
90 | pathIndex := len( r.asm )
91 | r.asm = append( r.asm, raptorASM{ inst: asmPath, re: rexp } )
92 |
93 | for cutByType( &rexp, &track, asmPath ) {
94 | trackIndex := len( r.asm )
95 | r.asm = append( r.asm, raptorASM{ inst: asmPathEle, re: track } )
96 | r.genTracks( &track )
97 | r.asm[trackIndex].close = len( r.asm )
98 | }
99 |
100 | r.asm[pathIndex].close = len( r.asm )
101 | r.asm = append( r.asm, raptorASM{ inst: asmPathEnd, close: len(r.asm) } )
102 | }
103 |
104 | func (r *RE) genTracks( rexp *reStruct ){
105 | var track reStruct
106 | for tracker( rexp, &track ) {
107 | trackIndex := len( r.asm )
108 | switch track.reType {
109 | case asmHook :
110 | r.asm = append( r.asm, raptorASM{ inst: asmHook, re: track } )
111 |
112 | if isPath( &track ) { r.genPaths ( track )
113 | } else { r.genTracks( &track ) }
114 |
115 | r.asm[trackIndex].close = len( r.asm )
116 | r.asm = append( r.asm, raptorASM{ inst: asmHookEnd, close: len(r.asm) } )
117 | case asmGroup :
118 | r.asm = append( r.asm, raptorASM{ inst: asmGroup, re: track } )
119 |
120 | if isPath( &track ) { r.genPaths ( track )
121 | } else { r.genTracks( &track ) }
122 |
123 | r.asm[trackIndex].close = len( r.asm )
124 | r.asm = append( r.asm, raptorASM{ inst: asmGroupEnd, close: len(r.asm) } )
125 | case asmPath :
126 | case asmSet : r.genSet( &track )
127 | case asmBackref: r.asm = append( r.asm, raptorASM{ inst: asmBackref, close: trackIndex, re: track } )
128 | case asmMeta : r.asm = append( r.asm, raptorASM{ inst: asmMeta , close: trackIndex, re: track } )
129 | case asmRangeab: r.asm = append( r.asm, raptorASM{ inst: asmRangeab, close: trackIndex, re: track } )
130 | case asmUTF8 : r.asm = append( r.asm, raptorASM{ inst: asmUTF8 , close: trackIndex, re: track } )
131 | case asmPoint : r.asm = append( r.asm, raptorASM{ inst: asmPoint , close: trackIndex, re: track } )
132 | default : r.asm = append( r.asm, raptorASM{ inst: asmSimple , close: trackIndex, re: track } )
133 | }
134 | }
135 | }
136 |
137 | func (r *RE) genSet( rexp *reStruct ){
138 | if len(rexp.str) == 0 { return }
139 |
140 | if rexp.str[0] == '^' {
141 | rexp.str = rexp.str[1:]
142 | rexp.mods |= modNegative
143 | }
144 |
145 | setIndex := len( r.asm )
146 | r.asm = append( r.asm, raptorASM{ inst: asmSet, re: *rexp } )
147 |
148 | var track reStruct
149 | for trackerSet( rexp, &track ) {
150 | switch track.reType {
151 | case asmMeta : r.asm = append( r.asm, raptorASM{ inst: asmMeta , close: len(r.asm), re: track } )
152 | case asmRangeab: r.asm = append( r.asm, raptorASM{ inst: asmRangeab, close: len(r.asm), re: track } )
153 | case asmUTF8 : r.asm = append( r.asm, raptorASM{ inst: asmUTF8 , close: len(r.asm), re: track } )
154 | default : r.asm = append( r.asm, raptorASM{ inst: asmSimple , close: len(r.asm), re: track } )
155 | }
156 | }
157 |
158 | r.asm[ setIndex ].close = len( r.asm )
159 | r.asm = append( r.asm, raptorASM{ inst: asmSetEnd, close: len(r.asm) } )
160 | }
161 |
162 | func trackerSet( rexp, track *reStruct ) bool {
163 | if len( rexp.str ) == 0 { return false }
164 |
165 | if rexp.str[0] > 127 {
166 | cutByLen( rexp, track, utf8meter( rexp.str ), asmUTF8 )
167 | } else if rexp.str[0] == ':' {
168 | cutByLen ( rexp, track, 2, asmMeta )
169 | } else {
170 | for i := 0; i < len( rexp.str ); i++ {
171 | if rexp.str[i] > 127 {
172 | cutByLen( rexp, track, i, asmSimple ); goto setLM;
173 | } else {
174 | switch rexp.str[i] {
175 | case ':': cutByLen( rexp, track, i, asmSimple ); goto setLM;
176 | case '-':
177 | if i == 1 { cutByLen( rexp, track, 3, asmRangeab )
178 | } else { cutByLen( rexp, track, i - 1, asmSimple ) }
179 |
180 | goto setLM;
181 | }
182 | }
183 | }
184 |
185 | cutByLen( rexp, track, len( rexp.str ), asmSimple );
186 | }
187 |
188 | setLM:
189 | track.loopsMin, track.loopsMax = 1, 1
190 | track.mods &= modPositive
191 | return true
192 | }
193 |
194 | func tracker( rexp, track *reStruct ) bool {
195 | if len( rexp.str ) == 0 { return false }
196 |
197 | if rexp.str[0] > 127 {
198 | cutByLen( rexp, track, utf8meter( rexp.str ), asmUTF8 )
199 | } else {
200 | switch rexp.str[0] {
201 | case ':': cutByLen ( rexp, track, 2, asmMeta )
202 | case '.': cutByLen ( rexp, track, 1, asmPoint )
203 | case '@': cutByLen ( rexp, track, 1 +
204 | countCharDigits( rexp.str[1:] ), asmBackref )
205 | case '(': cutByType( rexp, track, asmGroup )
206 | case '<': cutByType( rexp, track, asmHook )
207 | case '[': cutByType( rexp, track, asmSet )
208 | default : cutSimple( rexp, track )
209 | }
210 | }
211 |
212 | getLoops( rexp, track );
213 | getMods ( rexp, track );
214 | return true
215 | }
216 |
217 | func cutSimple( rexp, track *reStruct ){
218 | for i, c := range rexp.str {
219 | if c > 127 {
220 | cutByLen( rexp, track, i, asmSimple ); return
221 | } else {
222 | switch c {
223 | case '(', '<', '[', '@', ':', '.':
224 | cutByLen( rexp, track, i, asmSimple ); return
225 | case '?', '+', '*', '{', '#':
226 | if i == 1 { cutByLen( rexp, track, 1, asmSimple )
227 | } else { cutByLen( rexp, track, i - 1, asmSimple ) }
228 | return
229 | }
230 | }
231 | }
232 |
233 | cutByLen( rexp, track, len(rexp.str), asmSimple );
234 | }
235 |
236 | func cutByLen( rexp, track *reStruct, length int, reType uint8 ){
237 | *track = *rexp
238 | track.str = rexp.str[:length]
239 | rexp.str = rexp.str[length:]
240 | track.reType = reType;
241 | }
242 |
243 | func cutByType( rexp, track *reStruct, reType uint8 ) bool {
244 | if len(rexp.str) == 0 { return false }
245 |
246 | *track = *rexp
247 | track.reType = reType
248 | for i , deep, cut := 0, 0, false; walkMeta( rexp.str[i:], &i ) < len( rexp.str ); i++ {
249 | switch rexp.str[ i ] {
250 | case '(', '<': deep++
251 | case ')', '>': deep--
252 | case '[': i += walkSet( rexp.str[i:] )
253 | }
254 |
255 | switch reType {
256 | case asmHook, asmGroup: cut = deep == 0
257 | case asmSet : cut = rexp.str[ i ] == ']'
258 | case asmPath : cut = rexp.str[ i ] == '|' && deep == 0
259 | }
260 |
261 | if cut {
262 | track.str = rexp.str[:i]
263 | rexp.str = rexp.str[i + 1:]
264 | if reType != asmPath { track.str = track.str[1:] }
265 | return true
266 | }
267 | }
268 |
269 | rexp.str = ""
270 | return true
271 | }
272 |
273 | func walkSet( str string ) int {
274 | for i := 0; walkMeta( str[i:], &i ) < len( str ); i++ {
275 | if str[i] == ']' { return i }
276 | }
277 |
278 | return len(str);
279 | }
280 |
281 | func walkMeta( str string, n *int ) int {
282 | for i := 0; i < len( str ); i += 2 {
283 | if str[i] != ':' { *n += i; return *n }
284 | }
285 |
286 | *n += len( str )
287 | return *n
288 | }
289 |
290 | func getMods( rexp, track *reStruct ){
291 | if len( rexp.str ) > 0 && rexp.str[ 0 ] == '#' {
292 | for i, c := range rexp.str[1:] {
293 | switch c {
294 | case '^': track.mods |= modAlpha
295 | case '$': track.mods |= modOmega
296 | case '?': track.mods |= modLonley
297 | case '~': track.mods |= modFwrByChar
298 | case '*': track.mods |= modCommunism
299 | case '/': track.mods &= modCapitalism
300 | default : rexp.str = rexp.str[i+1:]; return
301 | }
302 | }
303 |
304 | rexp.str = ""
305 | }
306 | }
307 |
308 | func getLoops( rexp, track *reStruct ){
309 | pos := 0;
310 | track.loopsMin, track.loopsMax = 1, 1
311 |
312 | if len( rexp.str ) > 0 {
313 | switch rexp.str[0] {
314 | case '?' : pos = 1; track.loopsMin = 0; track.loopsMax = 1;
315 | case '+' : pos = 1; track.loopsMin = 1; track.loopsMax = inf;
316 | case '*' : pos = 1; track.loopsMin = 0; track.loopsMax = inf;
317 | case '{' : pos = 1
318 | track.loopsMin = aToi( rexp.str[pos:] )
319 | pos += countCharDigits( rexp.str[pos:] )
320 |
321 | if rexp.str[pos] == '}' {
322 | track.loopsMax = track.loopsMin;
323 | pos += 1
324 | } else if rexp.str[pos:pos+2] == ",}" {
325 | pos += 2
326 | track.loopsMax = inf
327 | } else if rexp.str[pos] == ',' {
328 | pos += 1
329 | track.loopsMax = aToi( rexp.str[pos:] )
330 | pos += countCharDigits( rexp.str[pos:] ) + 1
331 | }
332 | }
333 |
334 | rexp.str = rexp.str[pos:]
335 | }
336 | }
337 |
338 | //-! match
339 |
340 | func (r *RE) Find( txt, re string ) bool {
341 | return r.Match( txt, re ) > 0
342 | }
343 |
344 | func (r *RE) Match( txt, re string ) int {
345 | return r.Compile( re ).MatchString( txt )
346 | }
347 |
348 | func (r *RE) FindString( txt string ) bool {
349 | return r.MatchString( txt ) > 0
350 | }
351 |
352 | func (r *RE) MatchString( txt string ) int {
353 | r.end = len(txt)
354 | r.txt = txt
355 | r.result = 0
356 | r.catches = make( []catchInfo, 32 )
357 | r.catchIndex = 1
358 | if r.end == 0 || !r.compile { return 0 }
359 |
360 | loops := r.end
361 | if (r.mods & modAlpha) > 0 { loops = 1 }
362 |
363 | for forward, i, ocindex := 0, 0, 0; i < loops; i += forward {
364 | forward, r.catchIdIndex, r.pos = utf8meter( txt[i:] ), 1, i
365 | ocindex = r.catchIndex
366 |
367 | if r.trekking( 0 ) {
368 | if (r.mods & modOmega) > 0 {
369 | if r.pos == r.end { r.result = 1; return 1
370 | } else { r.catchIndex = 1 }
371 | } else if (r.mods & modLonley ) > 0 { r.result = 1; return 1
372 | } else if (r.mods & modFwrByChar) > 0 || r.pos == i { r.result++
373 | } else { forward = r.pos - i; r.result++; }
374 | } else { r.catchIndex = ocindex }
375 | }
376 |
377 | return r.result
378 | }
379 |
380 | func (r *RE) trekking( index int ) (result bool) {
381 | for ; r.asm[ index ].inst != asmEnd; index = r.asm[ index ].close + 1 {
382 | switch r.asm[ index ].inst {
383 | case asmPathEnd, asmPathEle, asmGroupEnd, asmHookEnd, asmSetEnd: return true
384 | case asmHook : result = r.catcher ( index )
385 | case asmGroup: result = r.loopGroup( index )
386 | case asmPath : result = r.walker ( index )
387 | default : result = r.looper ( index )
388 | }
389 |
390 | if !result { return false }
391 | }
392 |
393 | return true
394 | }
395 |
396 | func (r *RE) catcher( index int ) bool {
397 | i := r.catchIndex
398 | if r.catchIndex < len(r.catches) {
399 | r.catches[ i ] = catchInfo{ r.pos, r.pos, r.catchIdIndex }
400 | } else {
401 | r.catches = append( r.catches, catchInfo{ r.pos, r.pos, r.catchIdIndex } )
402 | }
403 |
404 | r.catchIndex++
405 | r.catchIdIndex++
406 |
407 | if !r.loopGroup( index ) { return false }
408 |
409 | r.catches[ i ].end = r.pos
410 | return true
411 | }
412 |
413 | func (r *RE) walker( index int ) bool {
414 | index++
415 | for oPos, oCatchIndex, oCatchIdIndex := r.pos, r.catchIndex, r.catchIdIndex;
416 | r.asm[ index ].inst == asmPathEle
417 | index, r.pos, r.catchIndex, r.catchIdIndex = r.asm[ index ].close, oPos, oCatchIndex, oCatchIdIndex {
418 | if r.trekking( index + 1 ) { return true }
419 | }
420 |
421 | return false
422 | }
423 |
424 | func (r *RE) looper( index int ) bool {
425 | loops := 0
426 | for forward := 0; loops < r.asm[ index ].re.loopsMax && r.pos < r.end && r.match( index, r.txt[r.pos:], &forward ); {
427 | r.pos += forward
428 | loops++;
429 | }
430 |
431 | if loops < r.asm[ index ].re.loopsMin { return false }
432 | return true
433 | }
434 |
435 | func (r *RE) loopGroup( index int ) bool {
436 | loops := 0
437 | for loops < r.asm[ index ].re.loopsMax && r.trekking( index + 1 ) {
438 | loops++;
439 | }
440 |
441 | if loops < r.asm[ index ].re.loopsMin { return false }
442 | return true
443 | }
444 |
445 | func (r *RE) match( index int, txt string, forward *int ) bool {
446 | switch r.asm[ index ].inst {
447 | case asmPoint : *forward = utf8meter( txt ); return true
448 | case asmSet : return r.matchSet ( index, txt, forward )
449 | case asmBackref: return r.matchBackRef( &r.asm[ index ].re, txt, forward )
450 | case asmRangeab: return matchRange ( &r.asm[ index ].re, txt, forward )
451 | case asmMeta : return matchMeta ( &r.asm[ index ].re, txt, forward )
452 | default : return matchText ( &r.asm[ index ].re, txt, forward )
453 | }
454 | }
455 |
456 | func matchText( rexp *reStruct, txt string, forward *int ) bool {
457 | *forward = len(rexp.str)
458 |
459 | if len(txt) < *forward { return false }
460 |
461 | if (rexp.mods & modCommunism) > 0 {
462 | return strnEqlCommunist( txt, rexp.str, *forward )
463 | }
464 |
465 | return txt[:*forward] == rexp.str
466 | }
467 |
468 | func matchRange( rexp *reStruct, txt string, forward *int ) bool {
469 | *forward = 1
470 | if (rexp.mods & modCommunism) > 0 {
471 | chr := toLower( rune(txt[0]) )
472 | return chr >= toLower( rune(rexp.str[ 0 ]) ) && chr <= toLower( rune(rexp.str[ 2 ]) )
473 | }
474 |
475 | return txt[0] >= rexp.str[ 0 ] && txt[0] <= rexp.str[ 2 ];
476 | }
477 |
478 | func matchMeta( rexp *reStruct, txt string, forward *int ) bool {
479 | var f func( r rune ) bool
480 | *forward = 1
481 |
482 | switch rexp.str[1] {
483 | case 'a' : return isAlpha( rune(txt[0]) )
484 | case 'A' : f = isAlpha
485 | case 'd' : return isDigit( rune(txt[0]) )
486 | case 'D' : f = isDigit
487 | case 'w' : return isAlnum( rune(txt[0]) )
488 | case 'W' : f = isAlnum
489 | case 's' : return isSpace( rune(txt[0]) )
490 | case 'S' : f = isSpace
491 | case 'b' : return isBlank( rune(txt[0]) )
492 | case 'B' : f = isBlank
493 | case '&' : if txt[0] < 128 { return false }
494 | *forward = utf8meter( txt )
495 | return true
496 | default : return txt[0] == rexp.str[1]
497 | }
498 |
499 | if f( rune(txt[0]) ) { return false }
500 | *forward = utf8meter( txt )
501 | return true
502 | }
503 |
504 | func (r *RE) matchSet( index int, txt string, forward *int ) (result bool) {
505 | *forward = 1
506 | reverse := (r.asm[ index ].re.mods & modNegative) > 0
507 |
508 | for index++; !result && r.asm[ index ].inst != asmSetEnd; index++ {
509 | switch r.asm[ index ].inst {
510 | case asmRangeab, asmUTF8, asmMeta:
511 | result = r.match( index, txt, forward )
512 | default:
513 | if (r.asm[ index ].re.mods & modCommunism) > 0 {
514 | result = findRuneCommunist( r.asm[ index ].re.str, rune( txt[ 0 ] ) )
515 | } else {
516 | result = strnchr( r.asm[ index ].re.str, rune( txt[ 0 ] ) )
517 | }
518 | }
519 |
520 | if result {
521 | if reverse { return false }
522 | return true
523 | }
524 | }
525 |
526 | if reverse {
527 | *forward = utf8meter( txt )
528 | return true
529 | }
530 |
531 | return false
532 | }
533 |
534 | func (r *RE) matchBackRef( rexp *reStruct, txt string, forward *int ) bool {
535 | backRefId := aToi( rexp.str[1:] )
536 | backRefIndex := r.lastIdCatch( backRefId )
537 | strCatch := r.GetCatch( backRefIndex )
538 | *forward = len(strCatch)
539 |
540 | if strCatch == "" || len( txt ) < *forward || strCatch != txt[:*forward] { return false }
541 |
542 | return true
543 | }
544 |
545 | func (r *RE) lastIdCatch( id int ) int {
546 | for index := r.catchIndex - 1; index > 0; index-- {
547 | if r.catches[ index ].id == id { return index }
548 | }
549 |
550 | return len(r.catches);
551 | }
552 |
553 | func (r *RE) Result () int { return r.result }
554 |
555 | func (r *RE) TotCatch() int { return r.catchIndex - 1 }
556 |
557 | func (r *RE) GetCatch( index int ) string {
558 | if index < 1 || index >= r.catchIndex { return "" }
559 | return r.txt[ r.catches[index].init : r.catches[index].end ]
560 | }
561 |
562 | func (r *RE) GpsCatch( index int ) int {
563 | if index < 1 || index >= r.catchIndex { return 0 }
564 | return r.catches[index].init
565 | }
566 |
567 | func (r *RE) LenCatch( index int ) int {
568 | if index < 1 || index >= r.catchIndex { return 0 }
569 | return r.catches[index].end - r.catches[index].init
570 | }
571 |
572 | func (r *RE) RplCatch( rplStr string, id int ) string {
573 | last, rpls, catchLens := 0, 0, 0
574 | for index := 1; index < r.catchIndex; index++ {
575 | if r.catches[index].id == id {
576 | rpls++
577 | catchLens += r.catches[index].end - r.catches[index].init
578 | }
579 | }
580 |
581 | if rpls == 0 { return r.txt }
582 | if (r.mods & modFwrByChar) > 0 { catchLens = 0 }
583 |
584 | result, gps := make( []byte, len( r.txt ) - catchLens + rpls * len( rplStr ) ), 0
585 |
586 | for index := 1; index < r.catchIndex; index++ {
587 | if r.catches[index].id == id {
588 | if last > r.catches[index].init { last = r.catches[index].init } // modFwrByChar
589 |
590 | gps += copy( result[gps:], r.txt[last:r.catches[index].init] )
591 | gps += copy( result[gps:], rplStr )
592 | last = r.catches[index].end
593 | }
594 | }
595 |
596 | if last < len(r.txt) { gps += copy( result[gps:], r.txt[last:] ) }
597 |
598 | return string( result[:gps] )
599 | }
600 |
601 | func (r *RE) PutCatch( pStr string ) (result string) {
602 | for i := 0; i < len(pStr); {
603 | if pStr[i] == '#' {
604 | i++
605 | if len(pStr[i:]) > 0 && pStr[i] == '#' {
606 | i++
607 | result += "#"
608 | } else {
609 | result += r.GetCatch( aToi( pStr[i:] ) )
610 | i += countCharDigits ( pStr[i:] )
611 | }
612 | } else { result += pStr[i:i+1]; i++ }
613 | }
614 |
615 | return
616 | }
617 |
618 | func (r *RE) Copy() *RE {
619 | nre := RE{ txt: r.txt, re: r.re, compile: r.compile, result: r.result, catchIndex: r.catchIndex, mods: r.mods }
620 | nre.catches = make( []catchInfo, r.catchIndex )
621 | copy( nre.catches, r.catches )
622 | nre.asm = make( []raptorASM, len( r.asm ) )
623 | copy( nre.asm, r.asm )
624 |
625 | return &nre
626 | }
627 |
628 | func Compile( re string ) *RE {
629 | return new( RE ).Compile( re )
630 | }
631 |
--------------------------------------------------------------------------------
/regexp4_test.go:
--------------------------------------------------------------------------------
1 | //
2 | // Recursive Regexp Raptor (go version)
3 | // Available at http://github.com/nasciiboy/regexp4
4 | //
5 | // Copyright © 2017 nasciiboy .
6 | // Distributed under the GNU GPL v3 License.
7 | // See readme.org for details.
8 | //
9 |
10 | //
11 | // Unit tests
12 | //
13 |
14 | package regexp4
15 |
16 | import "testing"
17 | import "fmt"
18 | import "bytes"
19 |
20 | func printASM( rexp *RE ){
21 | fmt.Printf( " init %q\n", rexp.re )
22 |
23 | for i, v := range rexp.asm {
24 | fmt.Printf( "[%3d][%3d]", i, v.close )
25 |
26 | switch v.inst {
27 | case 0: fmt.Printf( "[%-12s]", "asmPath" )
28 | case 1: fmt.Printf( "[%-12s]", "asmPathEle" )
29 | case 2: fmt.Printf( "[%-12s]", "asmPathEnd" )
30 | case 3: fmt.Printf( "[%-12s]", "asmGroup" )
31 | case 4: fmt.Printf( "[%-12s]", "asmGroupEnd" )
32 | case 5: fmt.Printf( "[%-12s]", "asmHook" )
33 | case 6: fmt.Printf( "[%-12s]", "asmHookEnd" )
34 | case 7: fmt.Printf( "[%-12s]", "asmSet" )
35 | case 8: fmt.Printf( "[%-12s]", "asmSetEnd" )
36 | case 9: fmt.Printf( "[%-12s]", "asmBackref" )
37 | case 10: fmt.Printf( "[%-12s]", "asmMeta" )
38 | case 11: fmt.Printf( "[%-12s]", "asmRangeab" )
39 | case 12: fmt.Printf( "[%-12s]", "asmUTF8" )
40 | case 13: fmt.Printf( "[%-12s]", "asmPoint" )
41 | case 14: fmt.Printf( "[%-12s]", "asmSimple" )
42 | case 15: fmt.Printf( "[%-12s]", "asmEnd" )
43 | }
44 |
45 | fmt.Printf( " %-15q [%d-%d][%08b]\n", v.re.str, v.re.loopsMin, v.re.loopsMax, v.re.mods )
46 | }
47 | }
48 |
49 | func showCompile(t *testing.T) {
50 | re := new( RE )
51 | re.Compile( "<[:a]a>" )
52 | printASM( re )
53 | re.Compile( "#*cas[A-Z]" )
54 | printASM( re )
55 | re.Compile( "#^$<:b*:|(:|+#*:|)+>" )
56 | printASM( re )
57 | }
58 |
59 | func TestRegexp4(t *testing.T) {
60 | // showCompile( t )
61 |
62 | nTest( t )
63 | cTest( t )
64 | dTest( t )
65 | sTest( t )
66 | pTest( t )
67 | gTest( t )
68 |
69 | nTestUTF( t )
70 | cTestUTF( t )
71 | sTestUTF( t )
72 | pTestUTF( t )
73 | gTestUTF( t )
74 | }
75 |
76 | func nTest( t *testing.T ){
77 | numTest := []struct {
78 | txt, re string
79 | n int
80 | }{
81 | { "a", "a", 1 },
82 | { "aa", "aa", 1 },
83 | { "raptor", "raptor", 1 },
84 | { "a", "(a)", 1 },
85 | { "a", "", 1 },
86 | { "a", "((a))", 1 },
87 | { "a", "<>", 1 },
88 | { "a", "((((((a))))))", 1 },
89 | { "a", "<<<<<>>>>>", 1 },
90 | { "a", "b|a", 1 },
91 | { "a", "c|b|a", 1 },
92 | { "a", "(b|a)", 1 },
93 | { "a", "(c|b|a)", 1 },
94 | { "a", "(c|b)|a", 1 },
95 | { "a", "((|)|a)", 1 },
96 | { "raptor", "b|raptor", 1 },
97 | { "raptor", "c|b|raptor", 1 },
98 | { "raptor", "(b|raptor)", 1 },
99 | { "raptor", "(c|raptor)|a", 1 },
100 | { "raptor", "((|)|a)", 1 },
101 | { "ab", "a(b|c)|A(B|C)", 1 },
102 | { "ac", "a(b|c)|A(B|C)", 1 },
103 | { "AB", "a(b|c)|A(B|C)", 1 },
104 | { "AC", "a(b|c)|A(B|C)", 1 },
105 | { "ab", "a|A", 1 },
106 | { "ac", "a|A", 1 },
107 | { "AB", "a|A", 1 },
108 | { "AC", "a|A", 1 },
109 | { "ab" , "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
110 | { "ac" , "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
111 | { "AB" , "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
112 | { "AC" , "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
113 | { "ab" , "|A>|1234", 1 },
114 | { "ac" , "|A>|1234", 1 },
115 | { "AB" , "|A>|1234", 1 },
116 | { "AC" , "|A>|1234", 1 },
117 | { "1234ea", "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
118 | { "1234eb", "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
119 | { "1234ec", "(a(b|c)|A(B|C))|1234(ea|eb|ec)", 1 },
120 | { "1234ea", "|A>|1234", 1 },
121 | { "1234eb", "|A>|1234", 1 },
122 | { "1234ec", "|A>|1234", 1 },
123 | { "abd", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
124 | { "abe", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
125 | { "acd", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
126 | { "ace", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
127 | { "ABD", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
128 | { "ABE", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
129 | { "ACD", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
130 | { "ACE", "a(b|c)(d|e)|A(B|C)(D|E)", 1 },
131 | { "raptor", "(c|r)(e|a)(p|q)(t|u)(0|o)(t|r)", 1 },
132 |
133 | { "", "", 0 },
134 | { "", "a", 0 },
135 | { "a", "", 0 },
136 | { "a", "o", 0 },
137 | { "a", "a", 1 },
138 | { "aaa", "a", 3 },
139 | { "a", "aaa", 0 },
140 | { "a aaa aaa", "aaa", 2 },
141 | { "Raptor Test", "a", 1 },
142 | { "Raptor Test", "t", 2 },
143 | { "aeiou", "a|e|i|o|u", 5 },
144 | { "aeiou", "(a|e|i|o|u)", 5 },
145 | { "aeiou", "(a|e)|i|(o|u)", 5 },
146 | { "aeiou", "(a|(e))|(i|(o|u))", 5 },
147 | { "aa ae ai ao au", "a(a|e|i|o|u)", 5 },
148 | { "aa ae ai ao au", "a(0|1|2|3|4)", 0 },
149 | { "a1 a2 a3 ao au", "a(1|2|3|4|5)", 3 },
150 | { "a1 a2 a3 a4 a5", "a(1|2|3|4|5)", 5 },
151 | { "aa ae ai ao au", "a(a|e|i|o|u) ", 4 },
152 | { "aa ae Ai ao au", "A(a|e|i|o|u)", 1 },
153 | { "aa ae Ai ao au", "(A|a)(a|e|i|o|u)", 5 },
154 | { "aae aei Aio aoa auu", "(A|a)(a|e|i|o|u)(a|e|i|o|u)", 5 },
155 |
156 | { "aa aaaa aaaa", "a", 10 },
157 | { "aa aaaa aaaa", "aa", 5 },
158 | { "aa aaaa aaaa", "aaa", 2 },
159 | { "aa aaaa aaaa", "aaaa", 2 },
160 | { "aaaaaaaaaaaaaaaaaaaa", "a", 20 },
161 | { "abababababababababababababababababababab", "a" , 20 },
162 | { "aaaaaaaaaaaaaaaaaaaa", "(a)", 20 },
163 | { "abababababababababababababababababababab", "(a)", 20 },
164 | { "aaaaaaaaaaaaaaaaaaaa", "", 20 },
165 | { "abababababababababababababababababababab", "", 20 },
166 | { "aaaaaaaaaaaaaaaaaaaa", "a+", 1 },
167 | { "abababababababababababababababababababab", "a+" , 20 },
168 | { "aaaaaaaaaaaaaaaaaaaa", "a?", 20 },
169 | { "abababababababababababababababababababab", "a?" , 40 },
170 | { "aaaaaaaaaaaaaaaaaaaa", "a*", 1 },
171 | { "abababababababababababababababababababab", "a*" , 40 },
172 | { "aaaaaaaaaaaaaaaaaaaa", "a{1}", 20 },
173 | { "aaaaaaaaaaaaaaaaaaaa", "a{1}", 20 },
174 | { "aaaaaaaaaaaaaaaaaaaa", "a{5}", 4 },
175 | { "aaaaaaaaaaaaaaaaaaaa", "a{1,5}", 4 },
176 | { "aaaaaaaaaaaaaaaaaaaa", "a{5,5}", 4 },
177 | { "aaaaaaaaaaaaaaaaaaaa", "a{10}", 2 },
178 | { "aaaaaaaaaaaaaaaaaaaa", "a{1,100}", 1 },
179 | { "aaaaaaaaaaaaaaaaaaaa", "a{001,00100}", 1 },
180 | { "abababababababababababababababababababab", "a{1}" , 20 },
181 | { "abababababababababababababababababababab", "a{001}" , 20 },
182 | { "aaaaaaaaaaaaaaaaaaaa", "a{1,1}", 20 },
183 | { "abababababababababababababababababababab", "a{1,1}" , 20 },
184 | { "abababababababababababababababababababab", "a{001,000001}" , 20 },
185 | { "aaaaaaaaaaaaaaaaaaaa", "a{20}", 1 },
186 | { "abababababababababababababababababababab", "(a|b){1,1}" , 40 },
187 |
188 | { "aaaaaaaaaaaaaaaaaaaa", "a{1}b{0}", 20 },
189 | { "aaaaaaaaaaaaaaaaaaaa", "b{0}a{1}", 20 },
190 |
191 | { "aaaaaaaaaaaaaaaaaaaa", "b{0}", 20 },
192 | { "bbbbbbbbbbbbbbbbbbbb", "b{0}", 20 },
193 | { "bbbbbbbbbbbbbbbbbbbb", "b{1}", 20 },
194 | { "bbbbbbbbbbbbbbbbbbbb", "b{2}", 10 },
195 |
196 | { "abc", "", 1 },
197 | { "abc", "a", 1 },
198 | { "abc", "c", 1 },
199 | { "abc", "ac", 1 },
200 | { "abc", "", 2 },
201 | { "abc", "", 3 },
202 | { "abc", "<(a|b)|c>", 3 },
203 | { "aa aaaa aaaa", "", 5 },
204 | { "abc", "a", 0 },
205 | { "abc", "x", 0 },
206 | { "abc", "x", 0 },
207 | { "abc", "<x|abc>", 1 },
208 | { "abc", "|abc>", 1 },
209 | { "abc abc abc", "", 9 },
210 | { "abc abc abc", "<(a|b|c)(a|b|c)(a|b|c)>", 3 },
211 | { "abc abc abc", "<(a|b|c)(a|b|c)(a|b|c)> ", 2 },
212 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 3 },
213 |
214 | { "a", "a?", 1 },
215 | { "a", "b?", 1 },
216 | { "a", "a+", 1 },
217 | { "a", "a*", 1 },
218 | { "a", "b*", 1 },
219 | { "a", "aa?", 1 },
220 | { "a", "ab?", 1 },
221 | { "a", "aa+", 0 },
222 | { "a", "aa*", 1 },
223 | { "a", "ab*", 1 },
224 | { "a", "a{1,2}", 1 },
225 | { "aaa", "a+", 1 },
226 | { "aaa", "a*", 1 },
227 | { "aaa", "a+", 1 },
228 | { "aaa", "a?", 3 },
229 | { "aaab", "a+", 1 },
230 | { "aaab", "a*", 2 },
231 | { "aaab", "a?", 4 },
232 | { "aaab", "a+b", 1 },
233 | { "aaab", "a*b", 1 },
234 | { "aaab", "a?b", 1 },
235 | { "aaab", "a+b?", 1 },
236 | { "aaab", "a*b?", 1 },
237 | { "aaab", "a?b?", 3 },
238 | { "aaab", "a+b+", 1 },
239 | { "aaab", "a*b+", 1 },
240 | { "aaab", "a?b+", 1 },
241 | { "aaab", "a+b*", 1 },
242 | { "aaab", "a*b*", 1 },
243 | { "aaab", "a?b*", 3 },
244 | { "aaabaaa", "a+", 2 },
245 | { "aaabaaa", "a*", 3 },
246 | { "aaabaaa", "a*", 3 },
247 | { "aaabaaa", "a*", 3 },
248 | { "a", "(a)?", 1 },
249 | { "a", "(b)?", 1 },
250 | { "a", "(a)+", 1 },
251 | { "a", "(a)*", 1 },
252 | { "a", "(b)*", 1 },
253 | { "aaa", "(a)+", 1 },
254 | { "aaa", "(a)*", 1 },
255 |
256 | { "Raptor Test", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1 },
257 | { "Raaaaptor TFest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1 },
258 | { "CaptorTest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1 },
259 | { "Cap CaptorTest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1 },
260 | { "Rap Captor Fest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1 },
261 |
262 | { "a", ":a", 1 },
263 | { "a", ":A", 0 },
264 | { "a", ":d", 0 },
265 | { "a", ":D", 1 },
266 | { "a", ":w", 1 },
267 | { "a", ":W", 0 },
268 | { "a", ":s", 0 },
269 | { "a", ":S", 1 },
270 | { "A", ":a", 1 },
271 | { "A", ":A", 0 },
272 | { "A", ":d", 0 },
273 | { "A", ":D", 1 },
274 | { "A", ":w", 1 },
275 | { "A", ":W", 0 },
276 | { "A", ":s", 0 },
277 | { "A", ":S", 1 },
278 | { "4", ":a", 0 },
279 | { "4", ":A", 1 },
280 | { "4", ":d", 1 },
281 | { "4", ":D", 0 },
282 | { "4", ":w", 1 },
283 | { "4", ":W", 0 },
284 | { "4", ":s", 0 },
285 | { "4", ":S", 1 },
286 | { " ", ":a", 0 },
287 | { " ", ":A", 1 },
288 | { " ", ":d", 0 },
289 | { " ", ":D", 1 },
290 | { " ", ":w", 0 },
291 | { " ", ":W", 1 },
292 | { " ", ":s", 1 },
293 | { " ", ":S", 0 },
294 | { "\t", ":a", 0 },
295 | { "\t", ":A", 1 },
296 | { "\t", ":d", 0 },
297 | { "\t", ":D", 1 },
298 | { "\t", ":w", 0 },
299 | { "\t", ":W", 1 },
300 | { "\t", ":s", 1 },
301 | { "\t", ":S", 0 },
302 |
303 | { "abc", ":a", 3 },
304 | { "abc", ":A", 0 },
305 | { "abc", ":d", 0 },
306 | { "abc", ":D", 3 },
307 | { "abc", ":w", 3 },
308 | { "abc", ":W", 0 },
309 | { "abc", ":s", 0 },
310 | { "abc", ":S", 3 },
311 | { "ABC", ":a", 3 },
312 | { "ABC", ":A", 0 },
313 | { "ABC", ":d", 0 },
314 | { "ABC", ":D", 3 },
315 | { "ABC", ":w", 3 },
316 | { "ABC", ":W", 0 },
317 | { "ABC", ":s", 0 },
318 | { "ABC", ":S", 3 },
319 | { "123", ":a", 0 },
320 | { "123", ":A", 3 },
321 | { "123", ":d", 3 },
322 | { "123", ":D", 0 },
323 | { "123", ":w", 3 },
324 | { "123", ":W", 0 },
325 | { "123", ":s", 0 },
326 | { "123", ":S", 3 },
327 | { " \n\t", ":a", 0 },
328 | { " \n\t", ":A", 3 },
329 | { " \n\t", ":d", 0 },
330 | { " \n\t", ":D", 3 },
331 | { " \n\t", ":w", 0 },
332 | { " \n\t", ":W", 3 },
333 | { " \n\t", ":s", 3 },
334 | { " \n\t", ":S", 0 },
335 | { " \n\t", ":a", 0 },
336 | { " \n\t", ":A", 3 },
337 | { " \n\t", ":d", 0 },
338 | { " \n\t", ":D", 3 },
339 | { " \n\t", ":w", 0 },
340 | { " \n\t", ":W", 3 },
341 | { " \n\t", ":s", 3 },
342 | { " \n\t", ":S", 0 },
343 |
344 | { "abc", ":a+", 1 },
345 | { "abc", ":A+", 0 },
346 | { "abc", ":d+", 0 },
347 | { "abc", ":D+", 1 },
348 | { "abc", ":w+", 1 },
349 | { "abc", ":W+", 0 },
350 | { "abc", ":s+", 0 },
351 | { "abc", ":S+", 1 },
352 | { "ABC", ":a+", 1 },
353 | { "ABC", ":A+", 0 },
354 | { "ABC", ":d+", 0 },
355 | { "ABC", ":D+", 1 },
356 | { "ABC", ":w+", 1 },
357 | { "ABC", ":W+", 0 },
358 | { "ABC", ":s+", 0 },
359 | { "ABC", ":S+", 1 },
360 | { "123", ":a+", 0 },
361 | { "123", ":A+", 1 },
362 | { "123", ":d+", 1 },
363 | { "123", ":D+", 0 },
364 | { "123", ":w+", 1 },
365 | { "123", ":W+", 0 },
366 | { "123", ":s+", 0 },
367 | { "123", ":S+", 1 },
368 | { " \n\t", ":a+", 0 },
369 | { " \n\t", ":A+", 1 },
370 | { " \n\t", ":d+", 0 },
371 | { " \n\t", ":D+", 1 },
372 | { " \n\t", ":w+", 0 },
373 | { " \n\t", ":W+", 1 },
374 | { " \n\t", ":s+", 1 },
375 | { " \n\t", ":S+", 0 },
376 | { " \n\t", ":a+", 0 },
377 | { " \n\t", ":A+", 1 },
378 | { " \n\t", ":d+", 0 },
379 | { " \n\t", ":D+", 1 },
380 | { " \n\t", ":w+", 0 },
381 | { " \n\t", ":W+", 1 },
382 | { " \n\t", ":s+", 1 },
383 | { " \n\t", ":S+", 0 },
384 |
385 | { "aeiou", ":a", 5 },
386 |
387 | { "(((", ":(", 3 },
388 | { ")))", ":)", 3 },
389 | { "<<<", ":<", 3 },
390 | { ">>>", ":>", 3 },
391 | { ":::", "::", 3 },
392 | { "|||", ":|", 3 },
393 | { "###", ":#", 3 },
394 | { ":#()|<>", ":::#:(:):|:<:>", 1 },
395 | { ":#()|<>", "(::|:#|:(|:)|:||:<|:>)", 7 },
396 | { "()<>[]{}*?+", "[:(:):<:>:[:]:{:}:*:?:+]", 11 },
397 | { "()<>[]|{}*#@?+", "[()<>:[:]|{}*?+#@]", 14 },
398 | { "12)", "#^:b*(-|:+|(:d+|:a+)[.)])", 1 },
399 | { "12.", "#^:b*(-|:+|(:d+|:a+)[.)])", 1 },
400 | { "a)" , "#^:b*(-|:+|(:d+|:a+)[.)])", 1 },
401 | { "a." , "#^:b*(-|:+|(:d+|:a+)[.)])", 1 },
402 | { "-" , "#^:b*(-|:+|(:d+|:a+)[.)])", 1 },
403 | { "+" , "#^:b*(-|:+|(:d+|:a+)[.)])", 1 },
404 | { ")>}", "[)>}]", 3 },
405 | { "(test1)(test2)", ":(test:d:)", 2 },
406 |
407 | { "", ".", 0 },
408 | { "a", ".", 1 },
409 | { "aaa", ".", 3 },
410 | { "a", "...", 0 },
411 | { "a aaa aaa", ".", 9 },
412 | { "a aaa aaa", "...", 3 },
413 | { "a aaa aaa", ".aa", 2 },
414 | { "a aaa aaa", "aa.", 2 },
415 | { "Raptor Test", ".a", 1 },
416 | { "Raptor Test", ".t", 2 },
417 | { "Raptor Test", ".z", 0 },
418 | { "Raptor Test", "a.", 1 },
419 | { "Raptor Test", " .", 1 },
420 | { "Raptor Test", "z.", 0 },
421 | { "a", ".?", 1 },
422 | { "a", ".+", 1 },
423 | { "a", ".*", 1 },
424 | { "a", ".{1}", 1 },
425 | { "a aaa aaa", ".?", 9 },
426 | { "a aaa aaa", ".+", 1 },
427 | { "a aaa aaa", ".*", 1 },
428 | { "a aaa aaa", ".{1}", 9 },
429 | { "a", "a.?", 1 },
430 | { "a", "a.+", 0 },
431 | { "a", "a.*", 1 },
432 | { "a", "a.{1}", 0 },
433 | { "aeiou", "a|.", 5 },
434 | { "aeiou", "a|.?", 5 },
435 | { "aeiou", "a|.+", 2 },
436 | { "aeiou", "a|.*", 2 },
437 | { "aeiou", ".|a", 5 },
438 | { "aeiou", ".?|a", 5 },
439 | { "aeiou", ".+|a", 1 },
440 | { "aeiou", ".*|a", 1},
441 | { "aeiou", "(a|.)", 5 },
442 | { "aeiou", "(a|.?)", 5 },
443 | { "aeiou", "(a|.+)", 2 },
444 | { "aeiou", "(a|.*)", 2 },
445 | { "aeiou", "(.|a)", 5 },
446 | { "aeiou", "(.?|a)", 5 },
447 | { "aeiou", "(.+|a)", 1 },
448 | { "aeiou", "(.*|a)", 1},
449 | { "aeiou", "a|(.)", 5 },
450 | { "aeiou", "a|(.?)", 5 },
451 | { "aeiou", "a|(.+)", 2 },
452 | { "aeiou", "a|(.*)", 2 },
453 | { "aeiou", "(.)|a", 5 },
454 | { "aeiou", "(.?)|a", 5 },
455 | { "aeiou", "(.+)|a", 1 },
456 | { "aeiou", "(.*)|a", 1},
457 | { "aeiou", "a|(.)", 5 },
458 | { "aeiou", "a|(.)?", 5 },
459 | { "aeiou", "a|(.)+", 2 },
460 | { "aeiou", "a|(.)*", 2 },
461 | { "aeiou", "(.)|a", 5 },
462 | { "aeiou", "(.)?|a", 5 },
463 | { "aeiou", "(.)+|a", 1 },
464 | { "aeiou", "(.)*|a", 1},
465 | { "abababababababababababababababababababab", "." , 40 },
466 | { "abababababababababababababababababababab", "(a.)" , 20 },
467 | { "abababababababababababababababababababab", "(.a)" , 19 },
468 | { "abababababababababababababababababababab", "(:a.)" , 20 },
469 | { "abababababababababababababababababababab", "(.:a)" , 20 },
470 | { "abababababababababababababababababababab", "(.{5}:a{5})" , 4 },
471 |
472 | { "", "a-z", 0 },
473 | { "a", "a-z", 0 },
474 | { "-", "-", 1 },
475 | { "-", "-a", 0 },
476 | { "-a", "-a", 1 },
477 | { "a-z", "a-z", 1 },
478 | { "A-Z", "A-Z", 1 },
479 | { "a-c", "a-z", 0 },
480 | { "A-c", "A-Z", 0 },
481 | { "a-zA-Z", "a-zA-Z", 1 },
482 | { "a-zB-Z", "a-zA-Z", 0 },
483 | { "a-", "a-z?", 1 },
484 | { "a-z", "a-z+", 1 },
485 | { "a-", "a-z*", 1 },
486 | { "a-z", "a-z?", 1 },
487 | { "a-zzzz", "a-z+", 1 },
488 | { "a-zz", "a-z*", 1 },
489 | { "a-b", "a-z?", 1 },
490 | { "a-bzzzz", "a-z+", 0 },
491 | { "a-bzz", "a-z*", 1 },
492 |
493 | { "", "[a]", 0 },
494 | { "a", "[a]", 1 },
495 | { "a", "[.]", 0 },
496 | { ".", "[.]", 1 },
497 | { "a", "[A]", 0 },
498 | { "A", "[A]", 1 },
499 | { "1", "[A]", 0 },
500 | { "1", "[1]", 1 },
501 | { "a", "[:a]", 1 },
502 | { "A", "[:D]", 1 },
503 | { "aaa", "[a-z]", 3 },
504 | { "a", "[a-z][a-z][a-z]", 0 },
505 | { "a aaa aaa", "[a-z]", 7 },
506 | { "a aaa aaa", "[ a-z]", 9 },
507 | { "a aaa aaa", "[a-z][a-z][a-z]", 2 },
508 | { "a aaa aaa", "[a-z]aa", 2 },
509 | { "a aaa aaa", "aa[a-z]", 2 },
510 | { "Raptor Test", "[:w]a", 1 },
511 | { "Raptor Test", "[:w]t", 2 },
512 | { "Raptor Test", "[a-z]z", 0 },
513 | { "Raptor Test", "a[a-z]", 1 },
514 | { "Raptor Test", " [A-Z]", 1 },
515 | { "Raptor Test", "z[a-z]", 0 },
516 | { "a", "[a]?", 1 },
517 | { "a", "[a]+", 1 },
518 | { "a", "[a]*", 1 },
519 | { "a", "[a]{1}", 1 },
520 | { "a aaa aaa", "[a-z]?", 9 },
521 | { "a aaa aaa", "[a-z]+", 3 },
522 | { "a aaa aaa", "[a-z]*", 5 },
523 | { "a aaa aaa", "[a-z]{1}", 7 },
524 | { "a", "a[a-z]?", 1 },
525 | { "a", "a[a-z]+", 0 },
526 | { "a", "a[a-z]*", 1 },
527 | { "a", "a[a-z]{1}", 0 },
528 | { "aeiou", "a|[aeiou]", 5 },
529 | { "aeiou", "a|[aeiou]?", 5 },
530 | { "aeiou", "a|[aeiou]+", 2 },
531 | { "aeiou", "a|[aeiou]*", 2 },
532 | { "aeiou", "[aeiou]|a", 5 },
533 | { "aeiou", "[aeiou]?|a", 5 },
534 | { "aeiou", "[aeiou]+|a", 1 },
535 | { "aeiou", "[aeiou]*|a", 1},
536 | { "aeiou", "(a|[aeiou])", 5 },
537 | { "aeiou", "(a|[aeiou]?)", 5 },
538 | { "aeiou", "(a|[aeiou]+)", 2 },
539 | { "aeiou", "(a|[aeiou]*)", 2 },
540 | { "aeiou", "([aeiou]|a)", 5 },
541 | { "aeiou", "([aeiou]?|a)", 5 },
542 | { "aeiou", "([aeiou]+|a)", 1 },
543 | { "aeiou", "([aeiou]*|a)", 1},
544 | { "aeiou", "a|([aeiou])", 5 },
545 | { "aeiou", "a|([aeiou]?)", 5 },
546 | { "aeiou", "a|([aeiou]+)", 2 },
547 | { "aeiou", "a|([aeiou]*)", 2 },
548 | { "aeiou", "([aeiou])|a", 5 },
549 | { "aeiou", "([aeiou]?)|a", 5 },
550 | { "aeiou", "([aeiou]+)|a", 1 },
551 | { "aeiou", "([aeiou]*)|a", 1},
552 | { "aeiou", "a|([aeiou])", 5 },
553 | { "aeiou", "a|([aeiou])?", 5 },
554 | { "aeiou", "a|([aeiou])+", 2 },
555 | { "aeiou", "a|([aeiou])*", 2 },
556 | { "aeiou", "([aeiou])|a", 5 },
557 | { "aeiou", "([aeiou])?|a", 5 },
558 | { "aeiou", "([aeiou])+|a", 1 },
559 | { "aeiou", "([aeiou])*|a", 1},
560 | { "1a2a3a4a5a6a", "[1-6]a", 6 },
561 | { "1a2a3a4a5a6a", "[1-3]a", 3 },
562 | { "1a2b3c4d5e6f", "[123456][abcdef]", 6 },
563 | { "1a2b3c4d5e6f", "[123][abcdef]", 3 },
564 | { ".b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b", "[:.]", 20 },
565 | { ".b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b", "[:.b]", 40 },
566 | { ".b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b", "[.]", 20 },
567 | { ".b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b.b", "[.b]", 40 },
568 | { "abababababababababababababababababababab", "(a[ab])" , 20 },
569 | { "abababababababababababababababababababab", "([ab]a)" , 19 },
570 | { "abababababababababababababababababababab", "(:a[ab])" , 20 },
571 | { "abababababababababababababababababababab", "([ab]:a)" , 20 },
572 | { "abababababababababababababababababababab", "([ab]{5}:a{5})" , 4 },
573 |
574 | { "", "[^a]", 0 },
575 | { "a", "[^1]", 1 },
576 | { "a", "[^a]", 0 },
577 | { "A", "[^a]", 1 },
578 | { "1", "[^1]", 0 },
579 | { "1", "[^A]", 1 },
580 | { "a", "[^:a]", 0 },
581 | { "A", "[^:A]", 1 },
582 | { "aaa", "[^z]", 3 },
583 | { "a", "[^z][^z][^z]", 0 },
584 | { "a aaa aaa", "[^ ]", 7 },
585 | { "a aaa aaa", "[^ a]", 0 },
586 | { "a aaa aaa", "[^:d]", 9 },
587 | { "a aaa aaa", "[^:d:s]", 7 },
588 | { "a aaa aaa", "[^:d:s][^:d:s][^:d:s]", 2 },
589 | { "a aaa aaa", "[^:d:s]aa", 2 },
590 | { "a aaa aaa", "aa[^:d:s]", 2 },
591 | { "Raptor Test", "[^:d:s]a", 1 },
592 | { "Raptor Test", "[^A-Z]t", 2 },
593 | { "Raptor Test", "[^:s]z", 0 },
594 | { "Raptor Test", "a[^ ]", 1 },
595 | { "Raptor Test", " [^t]", 1 },
596 | { "Raptor Test", "z[^a]", 0 },
597 | { "a", "[^z]?", 1 },
598 | { "a", "[^z]+", 1 },
599 | { "a", "[^z]*", 1 },
600 | { "a", "[^z]{1}", 1 },
601 | { "a aaa aaa", "[^ ]?", 9 },
602 | { "a aaa aaa", "[^ ]+", 3 },
603 | { "a aaa aaa", "[^ ]*", 5 },
604 | { "a aaa aaa", "[^ ]{1}", 7 },
605 | { "a", "a[^ ]?", 1 },
606 | { "a", "a[^ ]+", 0 },
607 | { "a", "a[^ ]*", 1 },
608 | { "a", "a[^ ]{1}", 0 },
609 | { "aeiou", "a|[^ ]", 5 },
610 | { "aeiou", "a|[^ ]?", 5 },
611 | { "aeiou", "a|[^ ]+", 2 },
612 | { "aeiou", "a|[^ ]*", 2 },
613 | { "aeiou", "[^ ]|a", 5 },
614 | { "aeiou", "[^ ]?|a", 5 },
615 | { "aeiou", "[^ ]+|a", 1 },
616 | { "aeiou", "[^ ]*|a", 1},
617 | { "aeiou", "(a|[^ ])", 5 },
618 | { "aeiou", "(a|[^ ]?)", 5 },
619 | { "aeiou", "(a|[^ ]+)", 2 },
620 | { "aeiou", "(a|[^ ]*)", 2 },
621 | { "aeiou", "([^ ]|a)", 5 },
622 | { "aeiou", "([^ ]?|a)", 5 },
623 | { "aeiou", "([^ ]+|a)", 1 },
624 | { "aeiou", "([^ ]*|a)", 1},
625 | { "aeiou", "a|([^ ])", 5 },
626 | { "aeiou", "a|([^ ]?)", 5 },
627 | { "aeiou", "a|([^ ]+)", 2 },
628 | { "aeiou", "a|([^ ]*)", 2 },
629 | { "aeiou", "([^ ])|a", 5 },
630 | { "aeiou", "([^ ]?)|a", 5 },
631 | { "aeiou", "([^ ]+)|a", 1 },
632 | { "aeiou", "([^ ]*)|a", 1},
633 | { "aeiou", "a|([^ ])", 5 },
634 | { "aeiou", "a|([^ ])?", 5 },
635 | { "aeiou", "a|([^ ])+", 2 },
636 | { "aeiou", "a|([^ ])*", 2 },
637 | { "aeiou", "([^ ])|a", 5 },
638 | { "aeiou", "([^ ])?|a", 5 },
639 | { "aeiou", "([^ ])+|a", 1 },
640 | { "aeiou", "([^ ])*|a", 1},
641 | { "1a2a3a4a5a6a", "[^:a]a", 6 },
642 | { "1a2a3a4a5a6a", "[^4-6]a", 3 },
643 | { "1a2b3c4d5e6f", "[^:a][^:d]", 6 },
644 | { "1a2b3c4d5e6f", "[^4-6][^:d]", 3 },
645 | { "abababababababababababababababababababab", "(a[^a])" , 20 },
646 | { "abababababababababababababababababababab", "([^a]a)" , 19 },
647 | { "abababababababababababababababababababab", "(:a[^a])" , 20 },
648 | { "abababababababababababababababababababab", "([^b]:a)" , 20 },
649 | { "abababababababababababababababababababab", "([^x]{5}:a{5})" , 4 },
650 | { "()<>[]{}*?+", "[^:w]", 11 },
651 |
652 | { "ABC", "#^A", 1 },
653 | { "ABC", "#^AB", 1 },
654 | { "ABC", "#^ABC", 1 },
655 | { "ABC", "#^(b|A)", 1 },
656 | { "ABC", "#^A(B|C)(B|C)", 1 },
657 | { "ABC", "#^(A(B|C))(B|C)", 1 },
658 | { "ABC", "#$C", 1 },
659 | { "ABC", "#$BC", 1 },
660 | { "ABC", "#$ABC", 1 },
661 | { "ABC", "#$(b|C)", 1 },
662 | { "ABC", "#$A(B|C)(B|C)", 1 },
663 | { "ABC", "#$(A(B|C))(B|C)", 1 },
664 | { "ABC", "#^$ABC", 1 },
665 | { "ABC", "#^$A(c|B)(b|C)", 1 },
666 | { "ABC", "#^$A(B|C)(B|C)", 1 },
667 | { "ABC", "#^$(A(B|C))(B|C)", 1 },
668 | { "ABC", "#^$AB([^C]+)", 0 },
669 | { "ABC", "#^$AB(A)+", 0 },
670 |
671 | { "ABC", "#^E", 0 },
672 | { "ABC", "#^EB", 0 },
673 | { "ABC", "#^EBC", 0 },
674 | { "ABC", "#^(b|E)", 0 },
675 | { "ABC", "#^A(B|C)(B|E)", 0 },
676 | { "ABC", "#^(A(B|C))(B|E)", 0 },
677 | { "ABC", "#$E", 0 },
678 | { "ABC", "#$BE", 0 },
679 | { "ABC", "#$ABE", 0 },
680 | { "ABC", "#$(b|E)", 0 },
681 | { "ABC", "#$A(B|C)(B|E)", 0 },
682 | { "ABC", "#$(A(B|C))(B|E)", 0 },
683 | { "ABC", "#^$ABE", 0 },
684 | { "ABC", "#^$A(c|B)(b|E)", 0 },
685 | { "ABC", "#^$A(B|C)(B|E)", 0 },
686 | { "ABC", "#^$(A(B|C))(B|E)", 0 },
687 |
688 | { "A", "a#*", 1 },
689 | { "A", "a?#*", 1 },
690 | { "A", "b?#*", 1 },
691 | { "A", "a+#*", 1 },
692 | { "A", "a*#*", 1 },
693 | { "A", "b*#*", 1 },
694 | { "aAa", "a#*", 3 },
695 | { "aAa", "a+#*", 1 },
696 | { "aAa", "a*#*", 1 },
697 | { "aAa", "a+#*", 1 },
698 | { "aAa", "a?#*", 3 },
699 | { "aAab", "a+#*", 1 },
700 | { "aAab", "a*#*", 2 },
701 | { "aAab", "a?#*", 4 },
702 | { "aAab", "a+#*?^$~b", 1 },
703 | { "aAab", "a*#*?^$~b", 1 },
704 | { "aAab", "a?#*?^$~b", 1 },
705 | { "aAab", "a+#*?^$~b?", 1 },
706 | { "aAab", "a*#*?^$~b?", 1 },
707 | { "aAab", "a?#*?^$~b?", 3 },
708 | { "aAab", "a+#*?^$~b+", 1 },
709 | { "aAab", "a*#*?^$~b+", 1 },
710 | { "aAab", "a?#*?^$~b+", 1 },
711 | { "aAab", "a+#*?^$~b*", 1 },
712 | { "aAab", "a*#*?^$~b*", 1 },
713 | { "aAab", "a?#*?^$~b*", 3 },
714 |
715 | { "a", "a#*", 1 },
716 | { "a", "A#*", 1 },
717 | { "a", "#*A", 1 },
718 | { "a", "#*a", 1 },
719 | { "a", "#*(A)", 1 },
720 | { "a", "#*(a)", 1 },
721 | { "a", "#*[A]", 1 },
722 | { "a", "#*[a]", 1 },
723 | { "a-Z", "#*A-Z", 1 },
724 | { "a-z", "a-Z#*", 1 },
725 | { "a-Z", "a-Z#*", 1 },
726 | { "a", "(a)#*", 1 },
727 | { "a", "(A)#*", 1 },
728 | { "a", "[a]#*", 1 },
729 | { "a", "[A]#*", 1 },
730 | { "a", "#*[A-Z]", 1 },
731 | { "a", "[A-Z]#*", 1 },
732 |
733 | { "aAaA", "a#*", 4 },
734 | { "aAaA", "A#*", 4 },
735 | { "aAaA", "#*A", 4 },
736 | { "aAaA", "#*a", 4 },
737 | { "aAaA", "#*(A)", 4 },
738 | { "aAaA", "#*(a)", 4 },
739 | { "aAaA", "#*[A]", 4 },
740 | { "aAaA", "#*[a]", 4 },
741 | { "aAaA", "#*Aa", 2 },
742 | { "aAaa", "aA#*", 2 },
743 | { "aAaA", "(a)#*", 4 },
744 | { "aAaA", "(A)#*", 4 },
745 | { "aAaA", "[a]#*", 4 },
746 | { "aAaA", "[A]#*", 4 },
747 | { "aAaA", "#*[A-Z]", 4 },
748 | { "aAaA", "[A-Z]#*", 4 },
749 | { "aAaA", "(a#*)", 4 },
750 | { "aAaA", "(A#*)", 4 },
751 | { "aAaA", "(a)#*", 4 },
752 | { "aAaA", "(A)#*", 4 },
753 | { "aAbB", "#*a|b", 4 },
754 | { "aAbB", "#*A|B", 4 },
755 | { "aAbB", "#*(a|b)", 4 },
756 | { "aAbB", "#*(A|B)", 4 },
757 | { "aAbB", "(a#*|b#*)", 4 },
758 | { "aAbB", "(A#*|B#*)", 4 },
759 | { "aAbB", "(a|b)#*", 4 },
760 | { "aAbB", "(A|B)#*", 4 },
761 | { "TesT", "test", 0 },
762 | { "TesT", "test#*", 0 },
763 | { "TesT", "t#*est#*", 1 },
764 | { "TesT", "#*test", 1 },
765 | { "TesT", "#*tESt", 1 },
766 | { "TesT", "#*(tESt)", 1 },
767 | { "TesT", "(tESt)#*", 1 },
768 |
769 | { "a aaa aaa", "#^aaa", 0 },
770 | { "a aaa aaa", "#$aaa", 1 },
771 | { "a aaa aaa", "#?aaa", 1 },
772 | { "a aaa aaa", "#~aaa", 2 },
773 | { "a aaa aaa", "#^?aaa", 0 },
774 | { "a aaa aaa", "#?^aaa", 0 },
775 | { "a aaa aaa", "#?$aaa", 1 },
776 | { "a aaa aaa", "#^?$aaa", 0 },
777 | { "a aaa aaa", "#?$^aaa", 0 },
778 | { "a aaa aaa", "#^?$a aaa aaa", 1 },
779 | { "aa aaaa aaaa", "#~a", 10 },
780 | { "aa aaaa aaaa", "#~aa", 7 },
781 | { "aa aaaa aaaa", "#~aaa", 4 },
782 | { "aaaaaaaaaaaaaaaaaaaa", "#?a+", 1 },
783 | { "abababababababababababababababababababab", "#?a+" , 1 },
784 | { "aaaaaaaaaaaaaaaaaaaa", "#~a+", 20 },
785 | { "abababababababababababababababababababab", "#~a+" , 20 },
786 |
787 | { "Raptor TesT Fest", "RapTor (tESt)#* fEST", 0 },
788 | { "Raptor TesT Fest", "#*rapTor (tESt) fEST", 1 },
789 | { "Raptor TesT Fest", "(RapTor)#* (tESt)#* (fEST)#*", 1 },
790 | { "Raptor TesT Fest", "((Rap#*Tor)#* (t#*ESt)#* (fEST)#*)#*", 1 },
791 | { "Raptor TesT Fest", "#*[a-z]#*apTor (tESt) [A-Z]#*EST", 1 },
792 |
793 | { "a", "a#/", 1 },
794 | { "a", "A#/", 0 },
795 | { "a", "#/A", 0 },
796 | { "a", "#/a", 1 },
797 | { "a", "#/(A)", 0 },
798 | { "a", "#/(a)", 1 },
799 | { "a", "#/[A]", 0 },
800 | { "a", "#/[a]", 1 },
801 | { "a", "#/A-Z", 0 },
802 | { "a", "A-Z#/", 0 },
803 | { "a", "(a)#/", 1 },
804 | { "a", "(A)#/", 0 },
805 | { "a", "[a]#/", 1 },
806 | { "a", "[A]#/", 0 },
807 | { "a", "#/[A-Z]", 0 },
808 | { "a", "[A-Z]#/", 0 },
809 |
810 | { "aAaA", "a#/", 2 },
811 | { "aAaA", "A#/", 2 },
812 | { "aAaA", "#/A", 2 },
813 | { "aAaA", "#/a", 2 },
814 | { "aAaA", "#/(A)", 2 },
815 | { "aAaA", "#/(a)", 2 },
816 | { "aAaA", "#/[A]", 2 },
817 | { "aAaA", "#/[a]", 2 },
818 | { "aAaA", "#/Aa", 1 },
819 | { "aAaA", "#/aA", 2 },
820 | { "aAaA", "Aa#/", 1 },
821 | { "aAaA", "aA#/", 2 },
822 | { "aAaA", "(a)#/", 2 },
823 | { "aAaA", "(A)#/", 2 },
824 | { "aAaA", "[a]#/", 2 },
825 | { "aAaA", "[A]#/", 2 },
826 | { "aAaA", "#/[A-Z]", 2 },
827 | { "aAaA", "[A-Z]#/", 2 },
828 | { "aAaA", "(a#/)", 2 },
829 | { "aAaA", "(A#/)", 2 },
830 | { "aAaA", "(a)#/", 2 },
831 | { "aAaA", "(A)#/", 2 },
832 | { "aAbB", "#/a|b", 2 },
833 | { "aAbB", "#/A|B", 2 },
834 | { "aAbB", "#/(a|b)", 2 },
835 | { "aAbB", "#/(A|B)", 2 },
836 | { "aAbB", "(a#/|b#/)", 2 },
837 | { "aAbB", "(A#/|B#/)", 2 },
838 | { "aAbB", "(a|b)#/", 2 },
839 | { "aAbB", "(A|B)#/", 2 },
840 |
841 | { "Raptor TesT Fest", "#*rapTor (tESt)#/ fEST", 0 },
842 | { "Raptor tESt Fest", "#*rapTor (tESt)#/ fEST", 1 },
843 | { "Raptor TesT Fest", "#*rapTor (tE#/S#/t)#* fEST", 0 },
844 | { "Raptor tESt Fest", "#*rapTor (tE#/S#/t)#* fEST", 1 },
845 |
846 | { "a aaa aaa", "[^ a]", 0 },
847 | { "a aaa aaa", "[^:d:s]", 7 },
848 | { "a aaa aaa", "[^:d:s][^:d:s][^:d:s]", 2 },
849 | { "a aaa aaa", "[^:d:s]aa", 2 },
850 | { "a aaa aaa", "aa[^:d:s]", 2 },
851 | { "Raptor Test", "[^:d:s]a", 1 },
852 | { "Raptor Test", "[^A-Z]t", 2 },
853 | { "a", "a[^ ]?", 1 },
854 | { "a", "a[^ ]+", 0 },
855 | { "a", "a[^ ]*", 1 },
856 | { "a", "a[^ ]{1}", 0 },
857 | { "aeiou", "a|[^ ]", 5 },
858 | { "aeiou", "a|[^ ]?", 5 },
859 | { "aeiou", "a|[^ ]+", 2 },
860 | { "aeiou", "a|[^ ]*", 2 },
861 | { "aeiou", "[^ ]|a", 5 },
862 | { "aeiou", "[^ ]?|a", 5 },
863 | { "aeiou", "[^ ]+|a", 1 },
864 | { "aeiou", "[^ ]*|a", 1},
865 | { "aeiou", "(a|[^ ])", 5 },
866 | { "aeiou", "(a|[^ ]?)", 5 },
867 | { "aeiou", "(a|[^ ]+)", 2 },
868 | { "aeiou", "(a|[^ ]*)", 2 },
869 | { "aeiou", "([^ ]|a)", 5 },
870 | { "aeiou", "([^ ]?|a)", 5 },
871 | { "aeiou", "([^ ]+|a)", 1 },
872 | { "aeiou", "([^ ]*|a)", 1},
873 | { "aeiou", "a|([^ ])", 5 },
874 | { "aeiou", "a|([^ ]?)", 5 },
875 | { "aeiou", "a|([^ ]+)", 2 },
876 | { "aeiou", "a|([^ ]*)", 2 },
877 | { "aeiou", "([^ ])|a", 5 },
878 | { "aeiou", "([^ ]?)|a", 5 },
879 | { "aeiou", "([^ ]+)|a", 1 },
880 | { "aeiou", "([^ ]*)|a", 1},
881 | { "aeiou", "a|([^ ])", 5 },
882 | { "aeiou", "a|([^ ])?", 5 },
883 | { "aeiou", "a|([^ ])+", 2 },
884 | { "aeiou", "a|([^ ])*", 2 },
885 | { "aeiou", "([^ ])|a", 5 },
886 | { "aeiou", "([^ ])?|a", 5 },
887 | { "aeiou", "([^ ])+|a", 1 },
888 | { "aeiou", "([^ ])*|a", 1},
889 |
890 | { "31/13-1331", "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>", 0 },
891 | { "71-17/1177", "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>", 0 },
892 |
893 | { "", "@1", 0 },
894 | { "a", "@1", 0 },
895 | { "a", "@a", 0 },
896 | { "A", "@100", 0 },
897 | { "1", "@1", 0 },
898 | { "", "[@1]", 0 },
899 | { "a", "[@1]", 0 },
900 | { "a", "[@a]", 1 },
901 | { "A", "[@100]", 0 },
902 | { "1", "[@1]", 1 },
903 | { "@", "[@1]", 1 },
904 | { "1@@a", "[a@1]", 4 },
905 | { "", "[^@1]", 0 },
906 | { "a", "[^@1]", 1 },
907 | { "a", "[^@a]", 0 },
908 | { "A", "[^@100]", 1 },
909 | { "1", "[^@1]", 0 },
910 | { "@", "[^2@]", 0 },
911 |
912 | { "", "(@1)", 0 },
913 | { "a", "(@1)", 0 },
914 | { "a", "(@a)", 0 },
915 | { "A", "(@100)", 0 },
916 | { "1", "(@1)", 0 },
917 | { "", "([@1])", 0 },
918 | { "a", "([@1])", 0 },
919 | { "a", "([@a])", 1 },
920 | { "A", "([@100])", 0 },
921 | { "1", "([@1])", 1 },
922 |
923 | { "a", "@1", 0 },
924 | { "a", "@1?", 1 },
925 | { "a", "@1*", 1 },
926 | { "a", "@1+", 0 },
927 | { "a", "@1{1}", 0 },
928 | { "aa", "@1", 1 },
929 | { "aa", "@1?", 1 },
930 | { "aa", "@1+", 1 },
931 | { "aa", "@1*", 1 },
932 | { "aa", "@1{1}", 1 },
933 | { "aaaaa", "@1", 2 },
934 | { "aaaaa", "@1?", 3 },
935 | { "aaaaa", "@1+", 1 },
936 | { "aaaaa", "@1*", 1 },
937 | { "aaaaa", "@1{1}", 2 },
938 |
939 | { "a-a", ":-@1", 1 },
940 | { "1-1", ":-@1", 1 },
941 | { "o_O!-o_O!", ":-@1", 1 },
942 |
943 | { "ae_ea", "_@2@1", 1 },
944 | { "ae_ea", "<>_@2@1", 0 },
945 | { "ae_aae", "<>_@2@1", 1 },
946 | { "ae_eaae_ea", "_@2@1", 2 },
947 | { "ae_eaae_ea", "<>_@2@1", 0 },
948 | { "ae_aaeae_aae", "<>_@2@1", 2 },
949 | { "ae_aaeae_aa1", "<>_@2@1", 1 },
950 | { "aaaaa", "@1", 0 },
951 |
952 | { "012345678910012345678910", "<0><1><2><3><4><5><6><7><8><9><10>@1@2@3@4@5@6@7@8@9@10@11", 1 },
953 |
954 | { "nasciiboy@gmail.com",
955 | "<[_A-Za-z0-9:-]+(:.[_A-Za-z0-9:-]+)*>:@<[A-Za-z0-9]+><:.[A-Za-z0-9]+>*<:.[A-Za-z0-9]{2}>*",
956 | 1 },
957 | { "nasciiboy@gmail.com",
958 | "<[_A-Za-z0-9:-]+(:.[_A-Za-z0-9:-]+)*>:@<[A-Za-z0-9]+><:.[A-Za-z0-9]+>*<:.[A-Za-z0-9]{2}>*",
959 | 1 },
960 |
961 | { "nasciiboy@gmail.com car.re@me",
962 | "<[_A-Za-z0-9:-]+(:.[_A-Za-z0-9:-]+)*>:@<[A-Za-z0-9]+><:.[A-Za-z0-9]+>*<:.[A-Za-z0-9]{2}>*",
963 | 2 },
964 | { "nasciiboy@gmail.com car.re@me",
965 | "<[_A-Za-z0-9:-]+(:.[_A-Za-z0-9:-]+)*>:@<[A-Za-z0-9]+><:.[A-Za-z0-9]+>*<:.[A-Za-z0-9]{2}>*",
966 | 2 },
967 | { "nasciiboy@gmail.com car.re@me",
968 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
969 | 2 },
970 | { "nasciiboy@gmail.com car.re@me",
971 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
972 | 2 },
973 |
974 | { "nasciiboy@gmail.com car.re@me 42_666@info.hell",
975 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
976 | 3 },
977 | { "nasciiboy@gmail.com car.re@me 42_666@info.hell",
978 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
979 | 3 },
980 | { "nasciiboy@gmail.com car.re@me 42_666@info.hell",
981 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
982 | 3 },
983 | { "nasciiboy@gmail.com car.re@me 42_666@info.hell",
984 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+><:.:w+>*",
985 | 3 },
986 |
987 | }
988 |
989 | done := make(chan struct{})
990 | for _, c := range numTest {
991 | go func( txt, re string, n int ){
992 | r := Compile( re )
993 | x := r.MatchString( txt )
994 |
995 | if x != n {
996 | t.Errorf( "Regexp4( %q, %q ) == %d, expected %d", txt, re, x, n )
997 | }
998 | done <- struct{}{}
999 | }( c.txt, c.re, c.n )
1000 | }
1001 |
1002 | for range numTest { <-done }
1003 | }
1004 |
1005 | func cTest( t *testing.T ){
1006 | catchTest := []struct {
1007 | txt, re string
1008 | n int
1009 | catch string
1010 | }{
1011 | { "a", "", 1, "a" },
1012 | { "a", "", 1, "a" },
1013 | { "aa", "", 1, "aa" },
1014 | { "a a a", "", 2, "a" },
1015 | { "abcd", "", 1, "a" },
1016 | { "abcd", "", 2, "b" },
1017 | { "abcd", "", 3, "c" },
1018 | { "abcd", "", 4, "d" },
1019 | { "abcd", "", 5, "" },
1020 | { "abc", "a", 1, "" },
1021 | { "abc", "x", 1, "" },
1022 | { "abc", "x", 1, "" },
1023 | { "abc", "<x|abc>", 1, "abc" },
1024 | { "abc", "<x|abc>", 2, "" },
1025 | { "abc", "|abc>", 1, "abc" },
1026 | { "abc", "|abc>", 2, "" },
1027 | { "abc abc abc", "", 9, "c" },
1028 | { "abc abc abc", "<(a|b|c)(a|b|c)(a|b|c)>", 3, "abc" },
1029 | { "abc abc abc", "<(a|b|c)(a|b|c)(a|b|c)> ", 2, "abc" },
1030 | { "abc abc abc", "#?<(a|b|c)(a|b|c)(a|b|c)>", 1, "abc" },
1031 | { "abc abc abc", "#?<(a|b|c)(a|b|c)((a|b)|x)>", 1, "" },
1032 | { "abc abc abx", "#?<(a|b|c)(a|b|c)((a|b)|x)>", 1, "abx" },
1033 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 1, "abc" },
1034 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 2, "iec" },
1035 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 3, "i" },
1036 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 4, "c" },
1037 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 5, "oeb" },
1038 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 6, "o" },
1039 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 7, "b" },
1040 | { "abc iecc oeb", "<<(a|e)|(i|o)>e|abc>", 8, "" },
1041 |
1042 | { "A", "#$<.{5}>", 1, "" },
1043 | { "AB", "#$<.{5}>", 1, "" },
1044 | { "ABC", "#$<.{5}>", 1, "" },
1045 | { "ABCD", "#$<.{5}>", 1, "" },
1046 | { "ABCDE", "#$<.{5}>", 1, "ABCDE" },
1047 | { "ABCDEF", "#$<.{5}>", 1, "BCDEF" },
1048 | { "ABCDEFG", "#$<.{5}>", 1, "CDEFG" },
1049 | { "ABCDEFGH", "#$<.{5}>", 1, "DEFGH" },
1050 | { "ABCDEFGHI", "#$<.{5}>", 1, "EFGHI" },
1051 | { "ABCDEFGHIJ", "#$<.{5}>", 1, "FGHIJ" },
1052 | { "ABCDEFGHIJK", "#$<.{5}>", 1, "GHIJK" },
1053 | { "ABCDEFGHIJKL", "#$<.{5}>", 1, "HIJKL" },
1054 | { "ABCDEFGHIJKLM", "#$<.{5}>", 1, "IJKLM" },
1055 | { "ABCDEFGHIJKLMN", "#$<.{5}>", 1, "JKLMN" },
1056 | { "ABCDEFGHIJKLMNO", "#$<.{5}>", 1, "KLMNO" },
1057 | { "ABCDEFGHIJKLMNOP", "#$<.{5}>", 1, "LMNOP" },
1058 | { "ABCDEFGHIJKLMNOPQ", "#$<.{5}>", 1, "MNOPQ" },
1059 | { "ABCDEFGHIJKLMNOPQR", "#$<.{5}>", 1, "NOPQR" },
1060 | { "ABCDEFGHIJKLMNOPQRS", "#$<.{5}>", 1, "OPQRS" },
1061 | { "ABCDEFGHIJKLMNOPQRST", "#$<.{5}>", 1, "PQRST" },
1062 | { "ABCDEFGHIJKLMNOPQRSTU", "#$<.{5}>", 1, "QRSTU" },
1063 | { "ABCDEFGHIJKLMNOPQRSTUV", "#$<.{5}>", 1, "RSTUV" },
1064 | { "ABCDEFGHIJKLMNOPQRSTUVW", "#$<.{5}>", 1, "STUVW" },
1065 | { "ABCDEFGHIJKLMNOPQRSTUVWX", "#$<.{5}>", 1, "TUVWX" },
1066 | { "ABCDEFGHIJKLMNOPQRSTUVWXY", "#$<.{5}>", 1, "UVWXY" },
1067 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "#$<.{5}>", 1, "VWXYZ" },
1068 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ[", "#$<.{5}>", 1, "WXYZ[" },
1069 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ][", "#$<.{5}>", 1, "XYZ][" },
1070 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ^][", "#$<.{5}>", 1, "YZ^][" },
1071 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_^][", "#$<.{5}>", 1, "Z_^][" },
1072 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][", "#$<.{5}>", 1, "_`^][" },
1073 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][a", "#$<.{5}>", 1, "`^][a" },
1074 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][ab", "#$<.{5}>", 1, "^][ab" },
1075 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abc", "#$<.{5}>", 1, "][abc" },
1076 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcd", "#$<.{5}>", 1, "[abcd" },
1077 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcde", "#$<.{5}>", 1, "abcde" },
1078 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdef", "#$<.{5}>", 1, "bcdef" },
1079 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefg", "#$<.{5}>", 1, "cdefg" },
1080 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefgh", "#$<.{5}>", 1, "defgh" },
1081 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghi", "#$<.{5}>", 1, "efghi" },
1082 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghij", "#$<.{5}>", 1, "fghij" },
1083 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijk", "#$<.{5}>", 1, "ghijk" },
1084 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijkl", "#$<.{5}>", 1, "hijkl" },
1085 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklm", "#$<.{5}>", 1, "ijklm" },
1086 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmn", "#$<.{5}>", 1, "jklmn" },
1087 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmno", "#$<.{5}>", 1, "klmno" },
1088 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnop", "#$<.{5}>", 1, "lmnop" },
1089 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopq", "#$<.{5}>", 1, "mnopq" },
1090 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqr", "#$<.{5}>", 1, "nopqr" },
1091 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrs", "#$<.{5}>", 1, "opqrs" },
1092 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrst", "#$<.{5}>", 1, "pqrst" },
1093 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstu", "#$<.{5}>", 1, "qrstu" },
1094 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuv", "#$<.{5}>", 1, "rstuv" },
1095 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvw", "#$<.{5}>", 1, "stuvw" },
1096 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwx", "#$<.{5}>", 1, "tuvwx" },
1097 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwxy", "#$<.{5}>", 1, "uvwxy" },
1098 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwxyz", "#$<.{5}>", 1, "vwxyz" },
1099 |
1100 | { "a", "?", 1, "a" },
1101 | { "a", "?", 1, "" },
1102 | { "a", "+", 1, "a" },
1103 | { "a", "*", 1, "a" },
1104 | { "a", "*", 1, "" },
1105 | { "aaa", "+", 1, "aaa" },
1106 | { "aaa", "*", 1, "aaa" },
1107 | { "aaa", "#~+", 1, "aaa" },
1108 | { "aaa", "#~*", 1, "aaa" },
1109 | { "aaab", "#~", 1, "aaa" },
1110 | { "aaab", "#~", 1, "aaa" },
1111 | { "aaab", "#~", 4, "" },
1112 | { "aaab", "#~", 1, "aaab" },
1113 | { "aaab", "#~", 1, "aaab" },
1114 | { "aaab", "#~", 1, "ab" },
1115 | { "aaab", "#~", 1, "aaab" },
1116 | { "aaab", "#~", 1, "aaab" },
1117 | { "aaab", "#~", 3, "ab" },
1118 | { "aaab", "#~", 1, "aaab" },
1119 | { "aaab", "#~", 1, "aaab" },
1120 | { "aaab", "#~", 1, "ab" },
1121 | { "aaab", "#~", 1, "aaab" },
1122 | { "aaab", "#~", 1, "aaab" },
1123 | { "aaab", "#~", 3, "ab" },
1124 | { "aaabaaa", "#~", 4, "aaa" },
1125 | { "aaabaaa", "#~", 5, "aaa" },
1126 |
1127 | { "Raptor Test", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1, "Raptor Test" },
1128 | { "Raptor Test", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 2, "T" },
1129 | { "Raaaaptor TFest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1, "Raaaaptor TFest" },
1130 | { "Raaaaptor TFest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 2, "TF" },
1131 | { "CaptorTest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1, "CaptorTest" },
1132 | { "Cap CaptorTest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1, "Cap CaptorTest" },
1133 | { "Cap CaptorTest", "#~<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 3, "CaptorTest" },
1134 | { "Rap Captor Fest", "<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 1, "Rap Captor Fest" },
1135 | { "Rap Captor Fest", "#~<((C|R)ap C|C|R)(a+p{1}tor) ?((+e)(st))>", 3, "Captor Fest" },
1136 | { "012345678910109876501234", "<0><1><2><3><4><5><6><7><8><9><10><@11@10@9@8@7@6@1@2@3@4@5>", 12, "109876501234" },
1137 |
1138 |
1139 | { "| | text", "#^$<:s*>", 1, "" },
1140 | }
1141 |
1142 | done := make(chan struct{})
1143 | for _, c := range catchTest {
1144 | go func( txt, re string, nCatch int, eCatch string ){
1145 | var r RE
1146 | r.Compile( re )
1147 | r.FindString( txt )
1148 | catch := r.GetCatch( nCatch )
1149 | if catch != eCatch {
1150 | t.Errorf( "Regexp4( %q, %q )\nGetCatch( %d ) == %q, expected %q",
1151 | txt, re, nCatch, catch, eCatch )
1152 | }
1153 | done <- struct{}{}
1154 | }( c.txt, c.re, c.n, c.catch )
1155 | }
1156 |
1157 | for range catchTest { <-done }
1158 | }
1159 |
1160 | func dTest( t *testing.T ){
1161 | catchTest := []struct {
1162 | txt, re string
1163 | n int
1164 | }{
1165 | { "A", "<.>", 1 },
1166 | { "AB", "<.>", 2 },
1167 | { "ABC", "<.>", 3 },
1168 | { "ABCD", "<.>", 4 },
1169 | { "ABCDE", "<.>", 5 },
1170 | { "ABCDEF", "<.>", 6 },
1171 | { "ABCDEFG", "<.>", 7 },
1172 | { "ABCDEFGH", "<.>", 8 },
1173 | { "ABCDEFGHI", "<.>", 9 },
1174 | { "ABCDEFGHIJ", "<.>", 10 },
1175 | { "ABCDEFGHIJK", "<.>", 11 },
1176 | { "ABCDEFGHIJKL", "<.>", 12 },
1177 | { "ABCDEFGHIJKLM", "<.>", 13 },
1178 | { "ABCDEFGHIJKLMN", "<.>", 14 },
1179 | { "ABCDEFGHIJKLMNO", "<.>", 15 },
1180 | { "ABCDEFGHIJKLMNOP", "<.>", 16 },
1181 | { "ABCDEFGHIJKLMNOPQ", "<.>", 17 },
1182 | { "ABCDEFGHIJKLMNOPQR", "<.>", 18 },
1183 | { "ABCDEFGHIJKLMNOPQRS", "<.>", 19 },
1184 | { "ABCDEFGHIJKLMNOPQRST", "<.>", 20 },
1185 | { "ABCDEFGHIJKLMNOPQRSTU", "<.>", 21 },
1186 | { "ABCDEFGHIJKLMNOPQRSTUV", "<.>", 22 },
1187 | { "ABCDEFGHIJKLMNOPQRSTUVW", "<.>", 23 },
1188 | { "ABCDEFGHIJKLMNOPQRSTUVWX", "<.>", 24 },
1189 | { "ABCDEFGHIJKLMNOPQRSTUVWXY", "<.>", 25 },
1190 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "<.>", 26 },
1191 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ[", "<.>", 27 },
1192 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ][", "<.>", 28 },
1193 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ^][", "<.>", 29 },
1194 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_^][", "<.>", 30 },
1195 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][", "<.>", 31 },
1196 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][a", "<.>", 32 },
1197 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][ab", "<.>", 33 },
1198 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abc", "<.>", 34 },
1199 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcd", "<.>", 35 },
1200 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcde", "<.>", 36 },
1201 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdef", "<.>", 37 },
1202 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefg", "<.>", 38 },
1203 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefgh", "<.>", 39 },
1204 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghi", "<.>", 40 },
1205 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghij", "<.>", 41 },
1206 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijk", "<.>", 42 },
1207 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijkl", "<.>", 43 },
1208 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklm", "<.>", 44 },
1209 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmn", "<.>", 45 },
1210 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmno", "<.>", 46 },
1211 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnop", "<.>", 47 },
1212 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopq", "<.>", 48 },
1213 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqr", "<.>", 49 },
1214 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrs", "<.>", 50 },
1215 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrst", "<.>", 51 },
1216 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstu", "<.>", 52 },
1217 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuv", "<.>", 53 },
1218 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvw", "<.>", 54 },
1219 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwx", "<.>", 55 },
1220 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwxy", "<.>", 56 },
1221 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwxyz", "<.>", 57 },
1222 |
1223 | { "A", "<:S>", 1 },
1224 | { "AB", "<:S>", 2 },
1225 | { "ABC", "<:S>", 3 },
1226 | { "ABCD", "<:S>", 4 },
1227 | { "ABCDE", "<:S>", 5 },
1228 | { "ABCDEF", "<:S>", 6 },
1229 | { "ABCDEFG", "<:S>", 7 },
1230 | { "ABCDEFGH", "<:S>", 8 },
1231 | { "ABCDEFGHI", "<:S>", 9 },
1232 | { "ABCDEFGHIJ", "<:S>", 10 },
1233 | { "ABCDEFGHIJK", "<:S>", 11 },
1234 | { "ABCDEFGHIJKL", "<:S>", 12 },
1235 | { "ABCDEFGHIJKLM", "<:S>", 13 },
1236 | { "ABCDEFGHIJKLMN", "<:S>", 14 },
1237 | { "ABCDEFGHIJKLMNO", "<:S>", 15 },
1238 | { "ABCDEFGHIJKLMNOP", "<:S>", 16 },
1239 | { "ABCDEFGHIJKLMNOPQ", "<:S>", 17 },
1240 | { "ABCDEFGHIJKLMNOPQR", "<:S>", 18 },
1241 | { "ABCDEFGHIJKLMNOPQRS", "<:S>", 19 },
1242 | { "ABCDEFGHIJKLMNOPQRST", "<:S>", 20 },
1243 | { "ABCDEFGHIJKLMNOPQRSTU", "<:S>", 21 },
1244 | { "ABCDEFGHIJKLMNOPQRSTUV", "<:S>", 22 },
1245 | { "ABCDEFGHIJKLMNOPQRSTUVW", "<:S>", 23 },
1246 | { "ABCDEFGHIJKLMNOPQRSTUVWX", "<:S>", 24 },
1247 | { "ABCDEFGHIJKLMNOPQRSTUVWXY", "<:S>", 25 },
1248 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "<:S>", 26 },
1249 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ[", "<:S>", 27 },
1250 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ][", "<:S>", 28 },
1251 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ^][", "<:S>", 29 },
1252 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_^][", "<:S>", 30 },
1253 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][", "<:S>", 31 },
1254 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][a", "<:S>", 32 },
1255 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][ab", "<:S>", 33 },
1256 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abc", "<:S>", 34 },
1257 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcd", "<:S>", 35 },
1258 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcde", "<:S>", 36 },
1259 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdef", "<:S>", 37 },
1260 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefg", "<:S>", 38 },
1261 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefgh", "<:S>", 39 },
1262 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghi", "<:S>", 40 },
1263 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghij", "<:S>", 41 },
1264 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijk", "<:S>", 42 },
1265 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijkl", "<:S>", 43 },
1266 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklm", "<:S>", 44 },
1267 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmn", "<:S>", 45 },
1268 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmno", "<:S>", 46 },
1269 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnop", "<:S>", 47 },
1270 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopq", "<:S>", 48 },
1271 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqr", "<:S>", 49 },
1272 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrs", "<:S>", 50 },
1273 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrst", "<:S>", 51 },
1274 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstu", "<:S>", 52 },
1275 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuv", "<:S>", 53 },
1276 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvw", "<:S>", 54 },
1277 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwx", "<:S>", 55 },
1278 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwxy", "<:S>", 56 },
1279 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ_`^][abcdefghijklmnopqrstuvwxyz", "<:S>", 57 },
1280 |
1281 | {},
1282 |
1283 | { "A", "#$<.{5}>", 0 },
1284 | { "AB", "#$<.{5}>", 0 },
1285 | { "ABC", "#$<.{5}>", 0 },
1286 | { "ABCD", "#$<.{5}>", 0 },
1287 | { "ABCDE", "#$<.{5}>", 1 },
1288 | { "ABCDEF", "#$<.{5}>", 1 },
1289 | { "ABCDEFG", "#$<.{5}>", 1 },
1290 | { "ABCDEFGH", "#$<.{5}>", 1 },
1291 | { "ABCDEFGHI", "#$<.{5}>", 1 },
1292 | }
1293 |
1294 | done := make(chan struct{})
1295 | for _, c := range catchTest {
1296 | go func( txt, re string, n int ){
1297 | var r RE
1298 | r.Match( txt, re )
1299 | x := r.TotCatch()
1300 | if x != n {
1301 | t.Errorf( "Regexp4( %q, %q )\nTotCatch() == %d, expected %d",
1302 | txt, re, x, n )
1303 | }
1304 | done <- struct{}{}
1305 | }( c.txt, c.re, c.n )
1306 | }
1307 |
1308 | for range catchTest { <-done }
1309 | }
1310 |
1311 | func sTest( t *testing.T ){
1312 | swapTest := []struct {
1313 | txt, re string
1314 | n int
1315 | swap, expected string
1316 | }{
1317 | { "aaab", "" , 1, "e", "eeeb" },
1318 | { "a", "" , 1, "", "" },
1319 | { "a", "?" , 1, "", "" },
1320 | { "a", "+" , 1, "", "" },
1321 | { "a", "*" , 1, "", "" },
1322 | { "a", "{1}", 1, "", "" },
1323 | { "a", "" , 1, "", "" },
1324 | { "a", "" , 1, "", "" },
1325 | { "a", "" , 1, "", "" },
1326 | { "a", "", 1, "", "" },
1327 |
1328 | { "a", "" , 1, "e", "e" },
1329 | { "a", "?" , 1, "e", "e" },
1330 | { "a", "+" , 1, "e", "e" },
1331 | { "a", "*" , 1, "e", "e" },
1332 | { "a", "{1}", 1, "e", "e" },
1333 | { "a", "" , 1, "e", "e" },
1334 | { "a", "" , 1, "e", "e" },
1335 | { "a", "" , 1, "e", "e" },
1336 | { "a", "", 1, "e", "e" },
1337 |
1338 | { "a", "" , 1, "z", "a" },
1339 | { "a", "?" , 1, "z", "za" },
1340 | { "a", "+" , 1, "z", "a" },
1341 | { "a", "*" , 1, "z", "za" },
1342 | { "a", "{1}", 1, "z", "a" },
1343 | { "a", "" , 1, "z", "za" },
1344 | { "a", "" , 1, "z", "a" },
1345 | { "a", "" , 1, "z", "za" },
1346 | { "a", "", 1, "z", "a" },
1347 |
1348 | { "aaa", "" , 1, "", "" },
1349 | { "aaa", "?" , 1, "", "" },
1350 | { "aaa", "+" , 1, "", "" },
1351 | { "aaa", "*" , 1, "", "" },
1352 | { "aaa", "{1}", 1, "", "" },
1353 | { "aaa", "" , 1, "", "" },
1354 | { "aaa", "" , 1, "", "" },
1355 | { "aaa", "" , 1, "", "" },
1356 | { "aaa", "", 1, "", "" },
1357 |
1358 | { "aaa", "" , 1, "e", "eee" },
1359 | { "aaa", "?" , 1, "e", "eee" },
1360 | { "aaa", "+" , 1, "e", "e" },
1361 | { "aaa", "*" , 1, "e", "e" },
1362 | { "aaa", "{1}", 1, "e", "eee" },
1363 | { "aaa", "" , 1, "e", "eee" },
1364 | { "aaa", "" , 1, "e", "e" },
1365 | { "aaa", "" , 1, "e", "e" },
1366 | { "aaa", "", 1, "e", "eee" },
1367 |
1368 | { "aaa", "" , 1, "z", "aaa" },
1369 |
1370 | { "aaa", "?" , 1, "z", "zazaza" },
1371 | { "aaa", "+" , 1, "z", "aaa" },
1372 | { "aaa", "*" , 1, "z", "zazaza" },
1373 | { "aaa", "{1}", 1, "z", "aaa" },
1374 | { "aaa", "" , 1, "z", "zazaza" },
1375 | { "aaa", "" , 1, "z", "aaa" },
1376 | { "aaa", "" , 1, "z", "zazaza" },
1377 | { "aaa", "", 1, "z", "aaa" },
1378 |
1379 | { "aaab", "" , 1, "e", "eeeb" },
1380 | { "aaab", "?" , 1, "e", "eeeeb" },
1381 | { "aaab", "+" , 1, "e", "eb" },
1382 | { "aaab", "*" , 1, "e", "eeb" },
1383 | { "aaab", "{1}", 1, "e", "eeeb" },
1384 | { "aaab", "" , 1, "e", "eeeeb" },
1385 | { "aaab", "" , 1, "e", "eb" },
1386 | { "aaab", "" , 1, "e", "eeb" },
1387 | { "aaab", "", 1, "e", "eeeb" },
1388 |
1389 | { "aaab", "" , 1, "z", "aaab" },
1390 | { "aaab", "?" , 1, "z", "zazazazb" },
1391 | { "aaab", "+" , 1, "z", "aaab" },
1392 | { "aaab", "*" , 1, "z", "zazazazb" },
1393 | { "aaab", "{1}", 1, "z", "aaab" },
1394 | { "aaab", "" , 1, "z", "zazazazb" },
1395 | { "aaab", "" , 1, "z", "aaab" },
1396 | { "aaab", "" , 1, "z", "zazazazb" },
1397 | { "aaab", "", 1, "z", "aaab" },
1398 |
1399 | { "aaabaaa", "" , 1, "e", "eeebeee" },
1400 | { "aaabaaa", "?" , 1, "e", "eeeebeee" },
1401 | { "aaabaaa", "+" , 1, "e", "ebe" },
1402 | { "aaabaaa", "*" , 1, "e", "eebe" },
1403 | { "aaabaaa", "{1}", 1, "e", "eeebeee" },
1404 | { "aaabaaa", "" , 1, "e", "eeeebeee" },
1405 | { "aaabaaa", "" , 1, "e", "ebe" },
1406 | { "aaabaaa", "" , 1, "e", "eebe" },
1407 | { "aaabaaa", "", 1, "e", "eeebeee" },
1408 |
1409 | { "aaabaaa", "" , 1, "z", "aaabaaa" },
1410 | { "aaabaaa", "?" , 1, "z", "zazazazbzazaza" },
1411 | { "aaabaaa", "+" , 1, "z", "aaabaaa" },
1412 | { "aaabaaa", "*" , 1, "z", "zazazazbzazaza" },
1413 | { "aaabaaa", "{1}", 1, "z", "aaabaaa" },
1414 | { "aaabaaa", "" , 1, "z", "zazazazbzazaza" },
1415 | { "aaabaaa", "" , 1, "z", "aaabaaa" },
1416 | { "aaabaaa", "" , 1, "z", "zazazazbzazaza" },
1417 | { "aaabaaa", "", 1, "z", "aaabaaa" },
1418 |
1419 | { "Raptor Test", "", 1, "Captor", "Captor Test" },
1420 | { "Raptor Test", "", 0, "Captor", "Raptor Test" },
1421 | { "Raptor Test", "", 0, "Captor", "Raptor Test" },
1422 | { "Raptor Test", "", 1, "Captor", "Captor Captor" },
1423 | { "Raptor Test", "", 2, "Captor", "Raptor Test" },
1424 | { "Raptor Test", ">", 2, "Fest", "Raptor Fest" },
1425 | { "Raptor Raptors Raptoring", "", 1, "Test", "Test Test Test" },
1426 | { "Raptor Raptors Raptoring", ":w*", 1, "Test", "Test Tests Testing" },
1427 | { "Raptor Raptors Raptoring", "<<a>ptor>:w*", 3, "C", "Captor Captors Captoring" },
1428 | { "Raptor Raptors Raptoring", "<<a>ptor>:w*", 2, "4", "4ptor 4ptors 4ptoring" },
1429 | }
1430 |
1431 | var re RE
1432 | for _, c := range swapTest {
1433 | re.Match( c.txt, c.re )
1434 | swap := re.RplCatch( c.swap, c.n )
1435 | if swap != c.expected {
1436 | t.Errorf( "Regexp4( %q, %q )\nRplCatch( %q, %d ) == %q\n expected %q",
1437 | c.txt, c.re, c.swap, c.n, swap, c.swap )
1438 | }
1439 | }
1440 | }
1441 |
1442 | func pTest( t *testing.T ){
1443 | putTest := []struct {
1444 | txt, re string
1445 | put, expected string
1446 | }{
1447 | { "a", "", "#1", "a" },
1448 | { "a", "", "#x", "x" },
1449 | { "a", "", "#xx", "xx" },
1450 | { "a", "", "###1##", "#a#" },
1451 | { "a", "", "[#0][#1][#2#3#1000000]", "[][a][]" },
1452 | { "aa", "", "#1", "aa" },
1453 | { "a a a", "", "#1#2#3", "aaa" },
1454 | { "abcd", "", "#4 #3 #2 #1", "d c b a" },
1455 | { "1 2 3 4 5 6 7 8 9", "<1|2|3|4|5|6|7|8|9>", "#5 #6 #7 #8 #9 #1 #2 #3 #4", "5 6 7 8 9 1 2 3 4" },
1456 | { "Raptor Test", "", "C#1 F#2", "Captor Fest" },
1457 | { "Raptor Test", "", "C#5 F#2", "C Fest" },
1458 | { "Raptor Test", "", "C#a F#2", "Ca Fest" },
1459 | { "Raptor Test", "", "C#0 F#2", "C Fest" },
1460 | { "Raptor Test", "", "C#43 F#43", "C F" },
1461 | { "Raptor Test", "", "C##43 ##F#43##", "C#43 #F#" },
1462 | { "Raptor Test", "", "C##43 ##1##2", "C#43 #1#2" },
1463 | { "Raptor Test", "", "##Raptor ##Test", "#Raptor #Test" },
1464 | { "Raptor Test Fest", " ", "#1_#2", "Raptor_Test" },
1465 |
1466 | { "nasciiboy@gmail.com",
1467 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
1468 | "[#1][#2][#3][#4][#5][#6]",
1469 | "[nasciiboy][gmail][com][][][]" },
1470 | { "nasciiboy@gmail.com",
1471 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
1472 | "[#1][#2][#3][][][]",
1473 | "[nasciiboy][gmail][com][][][]" },
1474 | { "u.s.r_43@ru.com.jp",
1475 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
1476 | "[#1][#2][#3][#4][#5][#6]",
1477 | "[u.s.r_43][ru][com][jp][][]" },
1478 |
1479 | { "nasciiboy@gmail.com 42_666@info.hell",
1480 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
1481 | "[#1][#2][#3][#4][#5][#6]",
1482 | "[nasciiboy][gmail][com][42_666][info][hell]" },
1483 | { "nasciiboy@gmail.com 42_666@info.hell",
1484 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
1485 | "[#1][#2][#3][#4][#5][#6]",
1486 | "[nasciiboy][gmail][com][42_666][info][hell]" },
1487 | { "u.s.r_43@ru.com.jp car.re@me",
1488 | "<[_:w:-]+(:.[_:w:-]+)*>:@<:w+>(:.<:w+>)*",
1489 | "[#1][#2][#3][#4][#5][#6]",
1490 | "[u.s.r_43][ru][com][jp][car.re][me]" },
1491 |
1492 | { "nasciiboy@gmail.com Åæ@kal raz@¤re car.re@me 42_666@info.hell",
1493 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
1494 | "[#1][#2][#3]",
1495 | "[nasciiboy@gmail.com][car.re@me][42_666@info.hell]" },
1496 | { "nasciiboy@gmail.com car.re@me comor&re@€uro 42_666@info.hell",
1497 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
1498 | "[#1][#2][#3]",
1499 | "[nasciiboy@gmail.com][car.re@me][42_666@info.hell]" },
1500 | { "u.s.r_43@ru.com.jp mi-kasa tiene-fiebre 4558.54o@ comor?car.re@me 42_666@info.hell",
1501 | "<([_:w:-]+(:.[_:w:-]+)*):@:w+(:.:w+)*>",
1502 | "[#1][#2][#3]",
1503 | "[u.s.r_43@ru.com.jp][car.re@me][42_666@info.hell]" },
1504 |
1505 |
1506 | { "07-07-1777", "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>", "d:#1 m:#3 y:#4", "d:07 m:07 y:1777" },
1507 | { "fecha: 07-07-1777", "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>", "d:#1 m:#3 y:#4", "d:07 m:07 y:1777" },
1508 |
1509 | }
1510 |
1511 | var re RE
1512 | for _, c := range putTest {
1513 | re.Find( c.txt, c.re )
1514 | put := re.PutCatch( c.put )
1515 | if put != c.expected {
1516 | t.Errorf( "Regexp4( %q, %q )\nPutCatch( %q ) == %q, expected %q",
1517 | c.txt, c.re, c.put, put, c.expected )
1518 | }
1519 | }
1520 | }
1521 |
1522 | func gTest( t *testing.T ){
1523 | putTest := []struct {
1524 | txt, re string
1525 | catch int
1526 | pos int
1527 | }{
1528 | { "", "<0>", 1, 0 },
1529 | { "0", "<0>", 1, 0 },
1530 | { "0123456", "<6>", 1, 6 },
1531 | { "0123456789", "<9>", 1, 9 },
1532 | { "0123456789", "<:d>", 1, 0 },
1533 | { "0123456789", "<:d>", 2, 1 },
1534 | { "0123456789", "<:d>", 3, 2 },
1535 | { "0123456789", "<:d>", 4, 3 },
1536 | { "0123456789", "<:d>", 5, 4 },
1537 | { "0123456789", "<:d>", 6, 5 },
1538 | { "0123456789", "<:d>", 7, 6 },
1539 | { "0123456789", "<9><.?>", 1, 9 },
1540 | { "0123456789", "<9><.?>", 2, 10 },
1541 | { "0123456789", "<9><.>?", 2, 10 },
1542 | { "0123456789", "<9><.>", 2, 0 },
1543 | { "0123456789", "<9><:.>*<:a>*", 2, 10 },
1544 | { "0123456789", "<9><:.>*<:a>*", 3, 10 },
1545 | { "0123456789.", "<9><:.>*<:a>*", 2, 10 },
1546 | { "0123456789.", "<9><:.>*<:a>*", 3, 11 },
1547 | { "0123456789.e", "<9><:.>*<:a>*", 3, 11 },
1548 | { "0123456789...e", "<9><:.>*<:a>*", 3, 13 },
1549 | { "0123456789^-^!e", "<9><:.>*<:a>*", 3, 10 },
1550 | }
1551 |
1552 | var re RE
1553 | for _, c := range putTest {
1554 | re.Match( c.txt, c.re )
1555 | pos := re.GpsCatch( c.catch )
1556 | if pos != c.pos {
1557 | t.Errorf( "Regexp4( %q, %q )\nGpsCatch( %d ) == %d, expected %d",
1558 | c.txt, c.re, c.catch, pos, c.pos )
1559 | }
1560 | }
1561 | }
1562 |
1563 | func nTestUTF( t *testing.T ){
1564 | numTest := []struct {
1565 | txt, re string
1566 | n int
1567 | }{
1568 | { "a", "▲", 0 },
1569 | { "a", "▲?", 1 },
1570 | { "a", "△?", 1 },
1571 | { "a", "▲+", 0 },
1572 | { "a", "▲*", 1 },
1573 | { "a", "△*", 1 },
1574 | { "▲", "▲", 1 },
1575 | { "▲", "▲?", 1 },
1576 | { "▲", "△?", 1 },
1577 | { "▲", "▲+", 1 },
1578 | { "▲", "▲*", 1 },
1579 | { "▲", "△*", 1 },
1580 | { "▲▲▲", "▲+", 1 },
1581 | { "▲▲▲", "▲*", 1 },
1582 | { "▲▲▲", "▲+", 1 },
1583 | { "▲▲▲", "▲?", 3 },
1584 | { "▲▲▲△", "▲+", 1 },
1585 | { "▲▲▲△", "▲*", 2 },
1586 | { "▲▲▲△", "▲?", 4 },
1587 | { "▲▲▲△", "▲+△", 1 },
1588 | { "▲▲▲△", "▲*△", 1 },
1589 | { "▲▲▲△", "▲?△", 1 },
1590 | { "▲▲▲△", "▲+△?", 1 },
1591 | { "▲▲▲△", "▲*△?", 1 },
1592 | { "▲▲▲△", "▲?△?", 3 },
1593 | { "▲▲▲△", "▲+△+", 1 },
1594 | { "▲▲▲△", "▲*△+", 1 },
1595 | { "▲▲▲△", "▲?△+", 1 },
1596 | { "▲▲▲△", "▲+△*", 1 },
1597 | { "▲▲▲△", "▲*△*", 1 },
1598 | { "▲▲▲△", "▲?△*", 3 },
1599 | { "▲▲▲△▲▲▲", "▲+", 2 },
1600 | { "▲▲▲△▲▲▲", "▲*", 3 },
1601 | { "▲▲▲△▲▲▲", "▲*", 3 },
1602 | { "▲▲▲△▲▲▲", "▲*", 3 },
1603 | { "▲", "(▲)?", 1 },
1604 | { "▲", "(△)?", 1 },
1605 | { "▲", "(▲)+", 1 },
1606 | { "▲", "(▲)*", 1 },
1607 | { "▲", "(△)*", 1 },
1608 | { "▲▲▲", "(▲)+", 1 },
1609 | { "▲▲▲", "(▲)*", 1 },
1610 | { "▲▲▲", "#~(▲)+", 3 },
1611 | { "▲▲▲", "#~(▲)*", 3 },
1612 | { "▲▲▲△", "#~(▲+)", 3 },
1613 | { "▲▲▲△", "#~(▲*)", 4 },
1614 | { "▲▲▲△", "#~(▲?)", 4 },
1615 | { "▲▲▲△", "#~(▲+△)", 3 },
1616 | { "▲▲▲△", "#~(▲*△)", 4 },
1617 | { "▲▲▲△", "#~(▲?△)", 2 },
1618 | { "▲▲▲△", "#~(▲+△?)", 3 },
1619 | { "▲▲▲△", "#~(▲*△?)", 4 },
1620 | { "▲▲▲△", "#~(▲?△?)", 4 },
1621 | { "▲▲▲△", "#~(▲+△+)", 3 },
1622 | { "▲▲▲△", "#~(▲*△+)", 4 },
1623 | { "▲▲▲△", "#~(▲?△+)", 2 },
1624 | { "▲▲▲△", "#~(▲+△*)", 3 },
1625 | { "▲▲▲△", "#~(▲*△*)", 4 },
1626 | { "▲▲▲△", "#~(▲?△*)", 4 },
1627 | { "▲▲▲△▲▲▲", "#~(▲+)", 6 },
1628 | { "▲▲▲△▲▲▲", "#~(▲*)", 7 },
1629 | { "▲", "[▲]?", 1 },
1630 | { "▲", "[△]?", 1 },
1631 | { "▲", "[▲]+", 1 },
1632 | { "▲", "[▲]*", 1 },
1633 | { "▲", "[△]*", 1 },
1634 | { "▲▲▲", "[▲]?", 3 },
1635 | { "▲▲▲", "[▲]+", 1 },
1636 | { "▲▲▲", "[▲]*", 1 },
1637 | { "▲▲▲", "#~[▲]?", 3 },
1638 | { "▲▲▲", "#~[▲]+", 3 },
1639 | { "▲▲▲", "#~[▲]*", 3 },
1640 | { "▲▲▲△", "#~[▲△]", 4 },
1641 | { "▲▲▲△", "#~[▲△]?", 4 },
1642 | { "▲▲▲△", "#~[▲△]+", 4 },
1643 | { "▲▲▲△", "#~[▲△]*", 4 },
1644 | { "▲▲▲△▲▲▲", "#~[▲△]", 7 },
1645 | { "▲", ":&", 1 },
1646 | { "▲", ":&?", 1 },
1647 | { "▲", ":&+", 1 },
1648 | { "▲", ":&*", 1 },
1649 | { "▲▲▲", ":&?", 3 },
1650 | { "▲▲▲", ":&+", 1 },
1651 | { "▲▲▲", ":&*", 1 },
1652 | { "▲▲▲", "#~:&?", 3 },
1653 | { "▲▲▲", "#~:&+", 3 },
1654 | { "▲▲▲", "#~:&*", 3 },
1655 | { "▲▲▲△", "#~:&", 4 },
1656 | { "▲▲▲△", "#~:&?", 4 },
1657 | { "▲▲▲△", "#~:&+", 4 },
1658 | { "▲▲▲△", "#~:&*", 4 },
1659 | { "▲▲▲△▲▲▲", "#~:&", 7 },
1660 | { "▲", ":w", 0 },
1661 | { "▲", ":w?", 1 },
1662 | { "▲", ":w+", 0 },
1663 | { "▲", ":w*", 1 },
1664 | { "▲▲▲", ":w?", 3 },
1665 | { "▲▲▲", ":w+", 0 },
1666 | { "▲▲▲", ":w*", 3 },
1667 | { "▲▲▲", "#~:w?", 3 },
1668 | { "▲▲▲", "#~:w+", 0 },
1669 | { "▲▲▲", "#~:w*", 3 },
1670 | { "▲▲▲△", "#~:w", 0 },
1671 | { "▲▲▲△", "#~:w?", 4 },
1672 | { "▲▲▲△", "#~:w+", 0 },
1673 | { "▲▲▲△", "#~:w*", 4 },
1674 | { "▲▲▲△▲▲▲", "#~:w", 0 },
1675 | { "▲", ":W", 1 },
1676 | { "▲", ":W?", 1 },
1677 | { "▲", ":W+", 1 },
1678 | { "▲", ":W*", 1 },
1679 | { "▲▲▲", ":W?", 3 },
1680 | { "▲▲▲", ":W+", 1 },
1681 | { "▲▲▲", ":W*", 1 },
1682 | { "▲▲▲", "#~:W?", 3 },
1683 | { "▲▲▲", "#~:W+", 3 },
1684 | { "▲▲▲", "#~:W*", 3 },
1685 | { "▲▲▲△", "#~:W", 4 },
1686 | { "▲▲▲△", "#~:W?", 4 },
1687 | { "▲▲▲△", "#~:W+", 4 },
1688 | { "▲▲▲△", "#~:W*", 4 },
1689 | { "▲▲▲△▲▲▲", "#~:W", 7 },
1690 |
1691 | { "△▲3△567△9", ".", 9 },
1692 | { "△▲3△567△9", "(.)", 9 },
1693 | { "△▲3△567△9", "[.]", 0 },
1694 | { "△▲3△567△9", "(.+)", 1 },
1695 | { "△▲3△567△9", ":&", 4 },
1696 | { "△▲3△567△9", ":w", 5 },
1697 | { "△▲3△567△9", ":W", 4 },
1698 | { "△▲3△567△9", ":d", 5 },
1699 | { "△▲3△567△9", ":a", 0 },
1700 | { "△▲3△567△9", "[△5]", 4 },
1701 | { "△▲3△567△9", "[▲1]", 1 },
1702 | { "△▲3△567△9", "[3-9]", 5 },
1703 | { "△▲3△567△9", "[▲1-7]", 5 },
1704 | { "△▲3△567△9", "[^3-9]", 4 },
1705 | { "△▲3△567△9", "[^a-z]", 9 },
1706 | { "△▲3△567△9", "[^▲1-7]", 4 },
1707 | { "△▲3△567△9", "[^:d]", 4 },
1708 | { "△▲3△567△9", "[^:D]", 5 },
1709 | { "△▲3△567△9", "[^:w]", 4 },
1710 | { "△▲3△567△9", "[^:W]", 5 },
1711 | { "△▲3△567△9", "[^:&]", 5 },
1712 | { "△▲3△567△9", "[^:a]", 9 },
1713 | { "△▲3△567△9", "[^:A]", 0 },
1714 |
1715 | { "Rááptor Test", "R.áptor", 1 },
1716 | { "Rááptor Test", "Rá{2}ptor", 1 },
1717 | { "Rááptor Test", "R(á){2}ptor", 1 },
1718 | { "R△△△ptor Test", "R△{3}ptor", 1 },
1719 | { "R△△△ptor Test", "R[^a]{3}ptor", 1 },
1720 | { "R▲△ptor Test", "R[△▲]{2}ptor", 1 },
1721 | { "R▲△ptor Test", "R[^ae]{2}ptor", 1 },
1722 | { "R▲△ptor Test", "R.{2}ptor", 1 },
1723 | { "R▲△ptor Test", "R[:W]{2}ptor", 1 },
1724 | { "R▲△ptor Test", "R[^:w]{2}ptor", 1 },
1725 |
1726 | { "Σὲ γνωρίζω ἀπὸ τὴν κόψη", ".", 23 },
1727 | { "Σὲ γνωρίζω ἀπὸ τὴν κόψη", ":&", 19 },
1728 | { "Σὲ γνωρίζω ἀπὸ τὴν κόψη", "[:&]", 19 },
1729 | { "Σὲ γνωρίζω ἀπὸ τὴν κόψη", "[^:&]", 4 },
1730 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", ".", 36 },
1731 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", ":&", 7 },
1732 | { "ต้องรบราฆ่าฟันจนบรรลัย", ".", 22 },
1733 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789", ".", 38 },
1734 | { "abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ", ".", 38 },
1735 | { "–—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд", ".", 38 },
1736 | { "∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა", ".", 38 },
1737 | { "⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞", ".", 43 },
1738 | }
1739 |
1740 | done := make(chan struct{})
1741 | for _, c := range numTest {
1742 | go func( txt, re string, n int ){
1743 | x := new( RE ).Match( txt, re )
1744 |
1745 | if x != n {
1746 | t.Errorf( "Regexp4( %q, %q ) == %d, expected %d", txt, re, x, n )
1747 | }
1748 | done <- struct{}{}
1749 | }( c.txt, c.re, c.n )
1750 | }
1751 |
1752 | for range numTest { <-done }
1753 | }
1754 |
1755 | func cTestUTF( t *testing.T ){
1756 | catchTest := []struct {
1757 | txt, re string
1758 | n int
1759 | catch string
1760 | }{
1761 | { "▲", "<▲>", 1, "▲" },
1762 | { "▲▲", "<▲▲>", 1, "▲▲" },
1763 | { "▲ ▲ ▲", "<▲>", 3, "▲" },
1764 | { "▲bcd", "<▲|b|c|d>", 1, "▲" },
1765 | { "▲bcd", "<▲|b|c|d>", 2, "b" },
1766 | { "▲bcd", "<▲|b|c|d>", 3, "c" },
1767 | { "▲bcd", "<▲|b|c|d>", 4, "d" },
1768 | { "▲bcd", "<▲|b|c|d>", 5, "" },
1769 | { "▲bc", "▲", 1, "" },
1770 | { "▲bc", "<▲>x", 1, "" },
1771 | { "▲bc", "<▲|b>x", 1, "" },
1772 | { "▲bc", "<<▲|b>x|▲bc>", 1, "▲bc" },
1773 | { "▲bc", "<<▲|b>x|▲bc>", 2, "" },
1774 | { "▲bc", "|▲bc>", 1, "▲bc" },
1775 | { "▲bc", "|▲bc>", 2, "" },
1776 | { "▲bc ▲bc ▲bc", "<▲|b|c>", 9, "c" },
1777 | { "▲bc ▲bc ▲bc", "<(▲|b|c)(▲|b|c)(▲|b|c)>", 3, "▲bc" },
1778 | { "▲bc ▲bc ▲bc", "<(▲|b|c)(▲|b|c)(▲|b|c)> ", 2, "▲bc" },
1779 | { "▲bc ▲bc ▲bc", "#?<(▲|b|c)(▲|b|c)(▲|b|c)>", 1, "▲bc" },
1780 | { "▲bc ▲bc ▲bc", "#?<(▲|b|c)(▲|b|c)((▲|b)|x)>", 1, "" },
1781 | { "▲bc ▲bc ▲bx", "#?<(▲|b|c)(▲|b|c)((▲|b)|x)>", 1, "▲bx" },
1782 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 1, "▲bc" },
1783 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 2, "iec" },
1784 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 3, "i" },
1785 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 4, "c" },
1786 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 5, "oeb" },
1787 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 6, "o" },
1788 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 7, "b" },
1789 | { "▲bc iecc oeb", "<<(▲|e)|(i|o)>e|▲bc>", 8, "" },
1790 |
1791 | { "▲", "<▲>?", 1, "▲" },
1792 | { "▲", "?", 1, "" },
1793 | { "▲", "<▲>+", 1, "▲" },
1794 | { "▲", "<▲>*", 1, "▲" },
1795 | { "▲", "*", 1, "" },
1796 | { "▲▲▲", "<▲>+", 1, "▲▲▲" },
1797 | { "▲▲▲", "<▲>*", 1, "▲▲▲" },
1798 | { "▲▲▲", "#~<▲>+", 1, "▲▲▲" },
1799 | { "▲▲▲", "#~<▲>*", 1, "▲▲▲" },
1800 | { "▲▲▲b", "#~<▲+>", 1, "▲▲▲" },
1801 | { "▲▲▲b", "#~<▲*>", 1, "▲▲▲" },
1802 | { "▲▲▲b", "#~<▲?>", 4, "" },
1803 | { "▲▲▲b", "#~<▲+b>", 1, "▲▲▲b" },
1804 | { "▲▲▲b", "#~<▲*b>", 1, "▲▲▲b" },
1805 | { "▲▲▲b", "#~<▲?b>", 1, "▲b" },
1806 | { "▲▲▲b", "#~<▲+b?>", 1, "▲▲▲b" },
1807 | { "▲▲▲b", "#~<▲*b?>", 1, "▲▲▲b" },
1808 | { "▲▲▲b", "#~<▲?b?>", 3, "▲b" },
1809 | { "▲▲▲b", "#~<▲+b+>", 1, "▲▲▲b" },
1810 | { "▲▲▲b", "#~<▲*b+>", 1, "▲▲▲b" },
1811 | { "▲▲▲b", "#~<▲?b+>", 1, "▲b" },
1812 | { "▲▲▲b", "#~<▲+b*>", 1, "▲▲▲b" },
1813 | { "▲▲▲b", "#~<▲*b*>", 1, "▲▲▲b" },
1814 | { "▲▲▲b", "#~<▲?b*>", 3, "▲b" },
1815 | { "▲▲▲b▲▲▲", "#~<▲+>", 4, "▲▲▲" },
1816 | { "▲▲▲b▲▲▲", "#~<▲*>", 5, "▲▲▲" },
1817 |
1818 | { "R▲ptor Test", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 1, "R▲ptor Test" },
1819 | { "R▲ptor Test", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 2, "T" },
1820 | { "R▲▲▲▲ptor TFest", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 1, "R▲▲▲▲ptor TFest" },
1821 | { "R▲▲▲▲ptor TFest", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 2, "TF" },
1822 | { "C▲ptorTest", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 1, "C▲ptorTest" },
1823 | { "C▲p C▲ptorTest", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 1, "C▲p C▲ptorTest" },
1824 | { "C▲p C▲ptorTest", "#~<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 3, "C▲ptorTest" },
1825 | { "R▲p C▲ptor Fest", "<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 1, "R▲p C▲ptor Fest" },
1826 | { "R▲p C▲ptor Fest", "#~<((C|R)▲p C|C|R)(▲+p{1}tor) ?((+e)(st))>", 3, "C▲ptor Fest" },
1827 |
1828 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 1, "Λ" },
1829 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 2, "̊" },
1830 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 3, "̇" },
1831 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 4, "̈" },
1832 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 5, "⃑" },
1833 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 6, "⊥" },
1834 | { "STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑", "<:&>", 7, "⃑" },
1835 | }
1836 |
1837 | done := make(chan struct{})
1838 | for _, c := range catchTest {
1839 | go func( txt, re string, nCatch int, eCatch string ){
1840 | r := new( RE )
1841 | r.Match( txt, re )
1842 | catch := r.GetCatch( nCatch )
1843 | if catch != eCatch {
1844 | t.Errorf( "Regexp4( %q, %q )\nGetCatch( %d ) == %q, expected %q",
1845 | txt, re, nCatch, catch, eCatch )
1846 | }
1847 | done <- struct{}{}
1848 | }( c.txt, c.re, c.n, c.catch )
1849 | }
1850 |
1851 | for range catchTest { <-done }
1852 | }
1853 |
1854 | func sTestUTF( t *testing.T ){
1855 | swapTest := []struct {
1856 | txt, re string
1857 | n int
1858 | swap, expected string
1859 | }{
1860 | { "▲", "<▲>" , 1, "", "" },
1861 | { "▲", "<▲>?" , 1, "", "" },
1862 | { "▲", "<▲>+" , 1, "", "" },
1863 | { "▲", "<▲>*" , 1, "", "" },
1864 | { "▲", "<▲>{1}", 1, "", "" },
1865 | { "▲", "<▲?>" , 1, "", "" },
1866 | { "▲", "<▲+>" , 1, "", "" },
1867 | { "▲", "<▲*>" , 1, "", "" },
1868 | { "▲", "<▲{1}>", 1, "", "" },
1869 |
1870 | { "▲", "<▲>" , 1, "e", "e" },
1871 | { "▲", "<▲>?" , 1, "e", "e" },
1872 | { "▲", "<▲>+" , 1, "e", "e" },
1873 | { "▲", "<▲>*" , 1, "e", "e" },
1874 | { "▲", "<▲>{1}", 1, "e", "e" },
1875 | { "▲", "<▲?>" , 1, "e", "e" },
1876 | { "▲", "<▲+>" , 1, "e", "e" },
1877 | { "▲", "<▲*>" , 1, "e", "e" },
1878 | { "▲", "<▲{1}>", 1, "e", "e" },
1879 |
1880 | { "▲", "" , 1, "z", "▲" },
1881 | { "▲", "?" , 1, "z", "z▲" },
1882 | { "▲", "+" , 1, "z", "▲" },
1883 | { "▲", "*" , 1, "z", "z▲" },
1884 | { "▲", "{1}", 1, "z", "▲" },
1885 | { "▲", "" , 1, "z", "z▲" },
1886 | { "▲", "" , 1, "z", "▲" },
1887 | { "▲", "" , 1, "z", "z▲" },
1888 | { "▲", "", 1, "z", "▲" },
1889 |
1890 | { "▲▲▲", "<▲>" , 1, "", "" },
1891 | { "▲▲▲", "<▲>?" , 1, "", "" },
1892 | { "▲▲▲", "<▲>+" , 1, "", "" },
1893 | { "▲▲▲", "<▲>*" , 1, "", "" },
1894 | { "▲▲▲", "<▲>{1}", 1, "", "" },
1895 | { "▲▲▲", "<▲?>" , 1, "", "" },
1896 | { "▲▲▲", "<▲+>" , 1, "", "" },
1897 | { "▲▲▲", "<▲*>" , 1, "", "" },
1898 | { "▲▲▲", "<▲{1}>", 1, "", "" },
1899 |
1900 | { "▲▲▲", "<▲>" , 1, "e", "eee" },
1901 | { "▲▲▲", "<▲>?" , 1, "e", "eee" },
1902 | { "▲▲▲", "<▲>+" , 1, "e", "e" },
1903 | { "▲▲▲", "<▲>*" , 1, "e", "e" },
1904 | { "▲▲▲", "<▲>{1}", 1, "e", "eee" },
1905 | { "▲▲▲", "<▲?>" , 1, "e", "eee" },
1906 | { "▲▲▲", "<▲+>" , 1, "e", "e" },
1907 | { "▲▲▲", "<▲*>" , 1, "e", "e" },
1908 | { "▲▲▲", "<▲{1}>", 1, "e", "eee" },
1909 |
1910 | { "▲▲▲", "" , 1, "z", "▲▲▲" },
1911 | { "▲▲▲", "?" , 1, "z", "z▲z▲z▲" },
1912 | { "▲▲▲", "+" , 1, "z", "▲▲▲" },
1913 | { "▲▲▲", "*" , 1, "z", "z▲z▲z▲" },
1914 | { "▲▲▲", "{1}", 1, "z", "▲▲▲" },
1915 | { "▲▲▲", "" , 1, "z", "z▲z▲z▲" },
1916 | { "▲▲▲", "" , 1, "z", "▲▲▲" },
1917 | { "▲▲▲", "" , 1, "z", "z▲z▲z▲" },
1918 | { "▲▲▲", "", 1, "z", "▲▲▲" },
1919 |
1920 | { "▲▲▲b", "<▲>" , 1, "e", "eeeb" },
1921 | { "▲▲▲b", "<▲>?" , 1, "e", "eeeeb" },
1922 | { "▲▲▲b", "<▲>+" , 1, "e", "eb" },
1923 | { "▲▲▲b", "<▲>*" , 1, "e", "eeb" },
1924 | { "▲▲▲b", "<▲>{1}", 1, "e", "eeeb" },
1925 | { "▲▲▲b", "<▲?>" , 1, "e", "eeeeb" },
1926 | { "▲▲▲b", "<▲+>" , 1, "e", "eb" },
1927 | { "▲▲▲b", "<▲*>" , 1, "e", "eeb" },
1928 | { "▲▲▲b", "<▲{1}>", 1, "e", "eeeb" },
1929 |
1930 | { "▲▲▲b", "" , 1, "z", "▲▲▲b" },
1931 | { "▲▲▲b", "?" , 1, "z", "z▲z▲z▲zb" },
1932 | { "▲▲▲b", "+" , 1, "z", "▲▲▲b" },
1933 | { "▲▲▲b", "*" , 1, "z", "z▲z▲z▲zb" },
1934 | { "▲▲▲b", "{1}", 1, "z", "▲▲▲b" },
1935 | { "▲▲▲b", "" , 1, "z", "z▲z▲z▲zb" },
1936 | { "▲▲▲b", "" , 1, "z", "▲▲▲b" },
1937 | { "▲▲▲b", "" , 1, "z", "z▲z▲z▲zb" },
1938 | { "▲▲▲b", "", 1, "z", "▲▲▲b" },
1939 |
1940 | { "▲▲▲b▲▲▲", "<▲>" , 1, "e", "eeebeee" },
1941 | { "▲▲▲b▲▲▲", "<▲>?" , 1, "e", "eeeebeee" },
1942 | { "▲▲▲b▲▲▲", "<▲>+" , 1, "e", "ebe" },
1943 | { "▲▲▲b▲▲▲", "<▲>*" , 1, "e", "eebe" },
1944 | { "▲▲▲b▲▲▲", "<▲>{1}", 1, "e", "eeebeee" },
1945 | { "▲▲▲b▲▲▲", "<▲?>" , 1, "e", "eeeebeee" },
1946 | { "▲▲▲b▲▲▲", "<▲+>" , 1, "e", "ebe" },
1947 | { "▲▲▲b▲▲▲", "<▲*>" , 1, "e", "eebe" },
1948 | { "▲▲▲b▲▲▲", "<▲{1}>", 1, "e", "eeebeee" },
1949 |
1950 | { "▲▲▲b▲▲▲", "" , 1, "z", "▲▲▲b▲▲▲" },
1951 | { "▲▲▲b▲▲▲", "?" , 1, "z", "z▲z▲z▲zbz▲z▲z▲" },
1952 | { "▲▲▲b▲▲▲", "+" , 1, "z", "▲▲▲b▲▲▲" },
1953 | { "▲▲▲b▲▲▲", "*" , 1, "z", "z▲z▲z▲zbz▲z▲z▲" },
1954 | { "▲▲▲b▲▲▲", "{1}", 1, "z", "▲▲▲b▲▲▲" },
1955 | { "▲▲▲b▲▲▲", "" , 1, "z", "z▲z▲z▲zbz▲z▲z▲" },
1956 | { "▲▲▲b▲▲▲", "" , 1, "z", "▲▲▲b▲▲▲" },
1957 | { "▲▲▲b▲▲▲", "" , 1, "z", "z▲z▲z▲zbz▲z▲z▲" },
1958 | { "▲▲▲b▲▲▲", "", 1, "z", "▲▲▲b▲▲▲" },
1959 |
1960 | { "R▲ptor Test", "", 1, "C▲ptor", "C▲ptor Test" },
1961 | { "R▲ptor Test", "", 0, "C▲ptor", "R▲ptor Test" },
1962 | { "R▲ptor Test", "", 0, "C▲ptor", "R▲ptor Test" },
1963 | { "R▲ptor Test", "", 1, "C▲ptor", "C▲ptor C▲ptor" },
1964 | { "R▲ptor Test", "", 2, "C▲ptor", "R▲ptor Test" },
1965 | { "R▲ptor Test", ">", 2, "Fest", "R▲ptor Fest" },
1966 | { "R▲ptor R▲ptors R▲ptoring", "", 1, "Test", "Test Test Test" },
1967 | { "R▲ptor R▲ptors R▲ptoring", ":w*", 1, "Test", "Test Tests Testing" },
1968 | { "R▲ptor R▲ptors R▲ptoring", "<<▲>ptor>:w*", 3, "C", "C▲ptor C▲ptors C▲ptoring" },
1969 | { "R▲ptor R▲ptors R▲ptoring", "<<▲>ptor>:w*", 2, "4", "4ptor 4ptors 4ptoring" },
1970 | }
1971 |
1972 | var re RE
1973 | for _, c := range swapTest {
1974 | re.Match( c.txt, c.re )
1975 | swap := re.RplCatch( c.swap, c.n )
1976 | if swap != c.expected {
1977 | t.Errorf( "Regexp4( %q, %q )\nRplCatch( %q, %d ) == %q\n expected %q",
1978 | c.txt, c.re, c.swap, c.n, swap, c.swap )
1979 | }
1980 | }
1981 | }
1982 |
1983 | func pTestUTF( t *testing.T ){
1984 | putTest := []struct {
1985 | txt, re string
1986 | put, expected string
1987 | }{
1988 | { "▲", "<▲>", "#1", "▲" },
1989 | { "▲", "<▲>", "#x", "x" },
1990 | { "▲", "<▲>", "#xx", "xx" },
1991 | { "▲", "<▲>", "###1##", "#▲#" },
1992 | { "▲", "<▲>", "[#0][#1][#2#3#1000000]", "[][▲][]" },
1993 | { "▲▲", "<▲▲>", "#1", "▲▲" },
1994 | { "▲ ▲ ▲", "<▲>", "#1#2#3", "▲▲▲" },
1995 | { "▲bcd", "<▲|b|c|d>", "#4 #3 #2 #1", "d c b ▲" },
1996 | { "1 2 3 4 5 6 7 8 9", "<1|2|3|4|5|6|7|8|9>", "#5 #6 #7 #8 #9 #1 #2 #3 #4", "5 6 7 8 9 1 2 3 4" },
1997 | { "R▲ptor Test", "<▲ptor|est>", "C#1 F#2", "C▲ptor Fest" },
1998 | { "R▲ptor Test", "<▲ptor|est>", "C#5 F#2", "C Fest" },
1999 | { "R▲ptor Test", "<▲ptor|est>", "C#▲ F#2", "C▲ Fest" },
2000 | { "R▲ptor Test", "<▲ptor|est>", "C#0 F#2", "C Fest" },
2001 | { "R▲ptor Test", "<▲ptor|est>", "C#43 F#43", "C F" },
2002 | { "R▲ptor Test", "<▲ptor|est>", "C##43 ##F#43##", "C#43 #F#" },
2003 | { "R▲ptor Test", "<▲ptor|est>", "C##43 ##1##2", "C#43 #1#2" },
2004 | { "R▲ptor Test", "<▲ptor|est>", "##R▲ptor ##Test", "#R▲ptor #Test" },
2005 | { "R▲ptor Test Fest", " ", "#1_#2", "R▲ptor_Test" },
2006 | }
2007 |
2008 | var re RE
2009 | for _, c := range putTest {
2010 | re.Match( c.txt, c.re )
2011 | put := re.PutCatch( c.put )
2012 | if put != c.expected {
2013 | t.Errorf( "Regexp4( %q, %q )\nPutCatch( %q ) == %q, expected %q",
2014 | c.txt, c.re, c.put, put, c.expected )
2015 | }
2016 | }
2017 | }
2018 |
2019 | func gTestUTF( t *testing.T ){
2020 | putTest := []struct {
2021 | txt, re string
2022 | catch int
2023 | pos, len int
2024 | }{
2025 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789", "<8>", 1, 36, 1 },
2026 | { "abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ", "<ö>", 1, 45, 2 },
2027 | { "–—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд", "<г>", 1, 82, 2 },
2028 | { "∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა", "<Ա>", 1, 98, 2 },
2029 | { "⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞", "<⡹>", 1, 75, 3 },
2030 | { "ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789", "<89>", 1, 36, 2 },
2031 | { "abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ", "<öÿ>", 1, 45, 4 },
2032 | { "–—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд", "<гд>", 1, 82, 4 },
2033 | { "∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა", "<Աა>", 1, 98, 5 },
2034 | { "⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞", "<⡹⠻>", 1, 75, 6 },
2035 | }
2036 |
2037 | var re RE
2038 | for _, c := range putTest {
2039 | re.Find( c.txt, c.re )
2040 | pos := re.GpsCatch( c.catch )
2041 | len := re.LenCatch( c.catch )
2042 | if pos != c.pos || len != c.len {
2043 | t.Errorf( "Regexp4( %q, %q )\nGpsCatch( %d ) == %d, expected %d\nLenCatch( %d ) == %d, expected %d",
2044 | c.txt, c.re, c.catch, pos, c.pos, c.catch, len, c.len )
2045 | }
2046 | }
2047 | }
2048 |
2049 | ////////////// INTERNAL-COMPARATIVE-BENCHMARKS
2050 | /// Find vs [Compile() + Copy().FindStirng()]
2051 |
2052 | const rebe = "<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>"
2053 | const reco = "07-07-1777"
2054 |
2055 | func BenchmarkFind(b *testing.B) {
2056 | var re RE
2057 | for i := 0; i < b.N; i++ {
2058 | if !re.Find( reco, rebe ) {
2059 | b.Errorf( "BenchmarkFind: re.Find(): no-match" )
2060 | }
2061 | }
2062 | }
2063 |
2064 | var reFi = Compile( rebe )
2065 |
2066 | func BenchmarkFindCopy(b *testing.B) {
2067 | for i := 0; i < b.N; i++ {
2068 | if !reFi.Copy().FindString( reco ) {
2069 | b.Errorf( "BenchmarkFindCopy: re.Find(): no-match" )
2070 | }
2071 | }
2072 | }
2073 |
2074 | const rebe2 = "#^<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>"
2075 | const rebe3 = "#*$<0?[1-9]|[12][0-9]|3[01]><[/:-\\]><0?[1-9]|1[012]>@2<[12][0-9]{3}>"
2076 |
2077 | func BenchmarkFind3X(b *testing.B) {
2078 | var re RE
2079 | for i := 0; i < b.N; i++ {
2080 | if !re.Find( reco, rebe ) {
2081 | b.Errorf( "BenchmarkFind: re.Find(): no-match" )
2082 | }
2083 | if !re.Find( reco, rebe2 ) {
2084 | b.Errorf( "BenchmarkFind: re.Find(): no-match" )
2085 | }
2086 | if !re.Find( reco, rebe3 ) {
2087 | b.Errorf( "BenchmarkFind: re.Find(): no-match" )
2088 | }
2089 | }
2090 | }
2091 |
2092 | var reFi2 = Compile( rebe2 )
2093 | var reFi3 = Compile( rebe3 )
2094 |
2095 | func BenchmarkFindCopy3X(b *testing.B) {
2096 | for i := 0; i < b.N; i++ {
2097 | if !reFi.Copy().FindString( reco ) {
2098 | b.Errorf( "BenchmarkFindCopy: re.Find(): no-match" )
2099 | }
2100 | if !reFi2.Copy().FindString( reco ) {
2101 | b.Errorf( "BenchmarkFindCopy: re.Find(): no-match" )
2102 | }
2103 | if !reFi3.Copy().FindString( reco ) {
2104 | b.Errorf( "BenchmarkFindCopy: re.Find(): no-match" )
2105 | }
2106 | }
2107 | }
2108 |
2109 | const srebe = "#^text"
2110 | const sreco = "text"
2111 |
2112 | func BenchmarkFindSimple(b *testing.B) {
2113 | var re RE
2114 | for i := 0; i < b.N; i++ {
2115 | if !re.Find( sreco, srebe ) {
2116 | b.Errorf( "BenchmarkFind: re.Find(): no-match" )
2117 | }
2118 | }
2119 | }
2120 |
2121 | var reSi = Compile( srebe )
2122 |
2123 | func BenchmarkFindCopySimple(b *testing.B) {
2124 | for i := 0; i < b.N; i++ {
2125 | if !reSi.Copy().FindString( sreco ) {
2126 | b.Errorf( "BenchmarkFindCopy: re.Find(): no-match" )
2127 | }
2128 | }
2129 | }
2130 |
2131 | /// RplCatch (string vs []byte vs bytes.Buffer)
2132 |
2133 | func (r *RE) OldRplCatch( rplStr string, id int ) (result string) {
2134 | last := 0
2135 |
2136 | for index := 1; index < r.catchIndex; index++ {
2137 | if r.catches[index].id == id {
2138 | if last > r.catches[index].init { last = r.catches[index].init }
2139 |
2140 | result += r.txt[last:r.catches[index].init]
2141 | result += rplStr
2142 | last = r.catches[index].end
2143 | }
2144 | }
2145 |
2146 | if last < len(r.txt) { result += r.txt[last:] }
2147 |
2148 | return
2149 | }
2150 |
2151 | func (r *RE) BufferRplCatch( rplStr string, id int ) string {
2152 | last := 0
2153 | var b bytes.Buffer
2154 |
2155 | for index := 1; index < r.catchIndex; index++ {
2156 | if r.catches[index].id == id {
2157 | if last > r.catches[index].init { last = r.catches[index].init }
2158 |
2159 | b.WriteString( r.txt[last:r.catches[index].init] )
2160 | b.WriteString( rplStr )
2161 | last = r.catches[index].end
2162 | }
2163 | }
2164 |
2165 | if last < len(r.txt) { b.WriteString( r.txt[last:] ) }
2166 |
2167 | return b.String()
2168 | }
2169 |
2170 | var rerpl = Compile( "<:s>+" )
2171 | const ssIn = " \nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\nline-a\t\v\n\nline-b\n\nline-c\nline-d\t\v\n\nline-en\n"
2172 | const ssSwp = "––"
2173 | const ssOut = "––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––line-a––line-b––line-c––line-d––line-en––"
2174 |
2175 |
2176 | func BenchmarkRplCatchOld( b *testing.B ){
2177 | r := rerpl.Copy()
2178 | r.FindString( ssIn )
2179 |
2180 | for i := 0; i < b.N; i++ {
2181 | if( r.OldRplCatch( ssSwp, 1 ) != ssOut ){
2182 | b.Fatalf( "BenchmarkRplCatchOld(): no match" )
2183 | }
2184 | }
2185 | }
2186 |
2187 | func BenchmarkRplCatchBuffer( b *testing.B ){
2188 | r := rerpl.Copy()
2189 | r.FindString( ssIn )
2190 |
2191 | for i := 0; i < b.N; i++ {
2192 | if( r.BufferRplCatch( ssSwp, 1 ) != ssOut ){
2193 | b.Fatalf( "BenchmarkRplCatchBuffer(): no match" )
2194 | }
2195 | }
2196 | }
2197 |
2198 | func BenchmarkRplCatch( b *testing.B ){
2199 | r := rerpl.Copy()
2200 | r.FindString( ssIn )
2201 |
2202 | for i := 0; i < b.N; i++ {
2203 | if( r.RplCatch( ssSwp, 1 ) != ssOut ){
2204 | b.Fatalf( "BenchmarkRplCatch(): no match" )
2205 | }
2206 | }
2207 | }
2208 |
--------------------------------------------------------------------------------