├── .clang-format ├── .editorconfig ├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── .markdownlint.json ├── .ocamlformat ├── CHANGELOG.md ├── LICENSE.md ├── Makefile ├── README.md ├── dune ├── dune-project ├── examples ├── Makefile ├── README.md ├── cloc.ml ├── count_hash.ml ├── dfa_restart.ml ├── dune ├── pcregrep.ml └── subst.ml ├── lib ├── Makefile ├── config │ ├── Makefile │ ├── discover.ml │ └── dune ├── dune ├── pcre.ml ├── pcre.mli └── pcre_stubs.c ├── pa_ppx_test ├── Makefile └── pcre_tests.ml ├── pcre.opam └── test ├── dune └── pcre_tests.ml /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig: https://EditorConfig.org 2 | 3 | # Top-most EditorConfig file 4 | root = true 5 | 6 | # Default settings for all files 7 | [*] 8 | charset = utf-8 9 | end_of_line = lf 10 | insert_final_newline = true 11 | trim_trailing_whitespace = true 12 | indent_style = space 13 | indent_size = 2 14 | max_line_length = 80 15 | 16 | # Makefile 17 | [Makefile] 18 | # Makefiles require tabs instead of spaces 19 | indent_style = tab 20 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: github-actions 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Builds, tests & co 2 | 3 | on: 4 | - pull_request 5 | - push 6 | - workflow_dispatch 7 | 8 | permissions: read-all 9 | 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | os: 16 | - ubuntu-latest 17 | - macos-latest 18 | - windows-latest 19 | 20 | runs-on: ${{ matrix.os }} 21 | 22 | steps: 23 | - name: Checkout tree 24 | uses: actions/checkout@v4 25 | 26 | - name: Set-up OCaml 27 | uses: ocaml/setup-ocaml@v3 28 | with: 29 | ocaml-compiler: 5 30 | 31 | - run: opam install . --deps-only --with-test 32 | 33 | - run: opam exec -- dune build 34 | 35 | - run: opam exec -- dune runtest 36 | 37 | lint-doc: 38 | runs-on: ubuntu-latest 39 | steps: 40 | - name: Checkout tree 41 | uses: actions/checkout@v4 42 | - name: Set-up OCaml 43 | uses: ocaml/setup-ocaml@v3 44 | with: 45 | ocaml-compiler: 5 46 | - uses: ocaml/setup-ocaml/lint-doc@v3 47 | 48 | lint-fmt: 49 | runs-on: ubuntu-latest 50 | steps: 51 | - name: Checkout tree 52 | uses: actions/checkout@v4 53 | - name: Set-up OCaml 54 | uses: ocaml/setup-ocaml@v3 55 | with: 56 | ocaml-compiler: 5 57 | - uses: ocaml/setup-ocaml/lint-fmt@v3 58 | 59 | lint-opam: 60 | runs-on: ubuntu-latest 61 | steps: 62 | - name: Checkout tree 63 | uses: actions/checkout@v4 64 | - name: Set-up OCaml 65 | uses: ocaml/setup-ocaml@v3 66 | with: 67 | ocaml-compiler: 5 68 | - uses: ocaml/setup-ocaml/lint-opam@v3 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | .merlin 3 | *.install 4 | _build 5 | -------------------------------------------------------------------------------- /.markdownlint.json: -------------------------------------------------------------------------------- 1 | { 2 | "no-duplicate-heading": { 3 | "siblings_only": true 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | version = 0.27.0 2 | profile = conventional 3 | 4 | # Default overrides 5 | wrap-comments = true 6 | parse-docstrings = true 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [8.0.3] - 2025-02-18 4 | 5 | - Removed export on `caml_alloc_some` to prevent linking issues. 6 | - Added OUnit2-based test suite. 7 | 8 | Thanks to Chet Murthy for these contribution. 9 | 10 | ## [8.0.2] - 2025-01-06 11 | 12 | ### Added 13 | 14 | - Support for OCaml 4.08. Thanks to Chet Murthy for this contribution. 15 | 16 | ## [8.0.1] - 2024-12-20 17 | 18 | ### Fixed 19 | 20 | - Fixed a bug in the `full_split` function where non-capturing groups were not 21 | identified as such. 22 | 23 | ### Removed 24 | 25 | - Removed obsolete base-bytes dependency. 26 | 27 | ## [7.5.1] - 2024-12-07 28 | 29 | ### Added 30 | 31 | - GitHub workflow for automated CI/CD. 32 | 33 | ### Changed 34 | 35 | - Reformatted all OCaml and C-files for consistency. 36 | - Improved and fixed documentation. 37 | - Enhanced README and example README for clarity. 38 | 39 | ### Fixed 40 | 41 | - Fixed macro instantiation formatting and minor C preprocessing issues. 42 | - Corrected license typo. 43 | 44 | ## [7.5.0] - 2021-07-22 45 | 46 | ### Added 47 | 48 | - Support for JIT-compilation of patterns to improve matching performance. Users 49 | need to add the `jit_compile` flag to request JIT-compilation when creating 50 | regular expressions. 51 | 52 | ## [7.4.6] - 2020-08-04 53 | 54 | ### Removed 55 | 56 | - Mistakenly kept base library configuration dependency. 57 | 58 | ## [7.4.5] - 2020-08-04 59 | 60 | ### Removed 61 | 62 | - Excessive build dependency on `base` package. 63 | 64 | ## [7.4.4] - 2020-07-30 65 | 66 | ### Added 67 | 68 | - Missing dune-configurator dependency. 69 | - Support for const char strings in stubs due to stricter handling in newer 70 | OCaml runtimes, eliminating C-compiler warnings. 71 | 72 | ## [7.4.3] - 2019-10-27 73 | 74 | ### Changed 75 | 76 | - Switched from `caml_alloc_custom` to `caml_alloc_custom_mem` to improve memory 77 | usage and GC performance. 78 | - Switched to OPAM file generation via `dune-project`. 79 | 80 | ## [7.4.2] - 2019-10-11 81 | 82 | ### Fixed 83 | 84 | - Warnings in C-stubs. 85 | 86 | ## [7.4.1] - 2019-02-21 87 | 88 | ### Fixed 89 | 90 | - Pattern execution bug due to DFA implementation. 91 | 92 | ## [7.4.0] - 2019-02-05 93 | 94 | ### Added 95 | 96 | - DFA support with new functions: 97 | 98 | - `pcre_dfa_exec` 99 | - `unsafe_pcre_dfa_exec` 100 | 101 | Thanks to Chas Emerick for this contribution. 102 | 103 | ## [7.3.5] - 2018-10-25 104 | 105 | ### Changed 106 | 107 | - Switched to dune, dune-release, and OPAM 2.0. 108 | 109 | ## [7.3.4] - 2017-11-22 110 | 111 | ### Improved 112 | 113 | - Finalization of regular expressions and tables for better performance. 114 | 115 | ## [7.3.3] - 2017-10-17 116 | 117 | ### Fixed 118 | 119 | - External declaration bug in internal regexp compile function. 120 | 121 | ## [7.3.2] - 2017-10-10 122 | 123 | ### Improved 124 | 125 | - Compatibility with MSVC. 126 | 127 | ## [7.3.1] - 2017-10-08 128 | 129 | ### Changed 130 | 131 | - Used untagged integers when declaring external functions. 132 | 133 | ## [7.3.0] - 2017-07-27 134 | 135 | ### Changed 136 | 137 | - Switched to jbuilder and topkg. 138 | 139 | ## Changes Before Version 7.3.0 140 | 141 | ```text 142 | 2016-02-25: Minor version release v7.2.3: 143 | 144 | Fixed callout bug introduced with v7.2.0. 145 | 146 | Thanks to Raman Varabets for the bug report. 147 | 148 | 2016-02-23: Fixed linking problem with old versions of PCRE (< 8.20). 149 | 150 | Fixed backward compatibility issue with OCaml <= 3.12. 151 | 152 | 2016-02-22: Fixed a subgroup matching bug. 153 | 154 | Thanks to Cheng Lou for the bug report. 155 | 156 | 2015-08-21: Made GC less aggressive reclaiming regexps and chartables. 157 | 158 | 2014-12-10: Fixed another limit handling bug in the full_split function. 159 | 160 | 2014-12-02: Fixed a limit handling bug in the full_split function. 161 | 162 | Thanks to Rudi Grinberg for the report. 163 | 164 | 2014-10-23: Fixed string handling for new OCaml version 4.02 (String/Bytes 165 | modules). Requires new findlib version (>= 1.5). 166 | 167 | 2014-07-06: Moved to GitHub. 168 | 169 | 2014-06-04: Bug fixes: 170 | 171 | * Allcation bug when performing callouts 172 | * Unprotected root when performing callouts 173 | * More portable offset copying in the C-stubs 174 | * Fixed a PERL-compatibility bug in the splitting routines 175 | 176 | The bug fixes required a minor API-change in an unsafe function, 177 | which is almost surely not directly called by any users. 178 | 179 | 2012-07-20: Downgraded findlib version constraint to support the Debian 180 | testing branch. 181 | 182 | Added --with-pcre-config flag to configure location of PCRE 183 | configuration generator. 184 | 185 | 2012-07-15: New major release version 7.0.0: 186 | 187 | * Upgraded to OCaml 4.00 188 | * Switched to Oasis for packaging 189 | * Switched to OCamlBuild for the build process 190 | * Rewrote README in Markdown 191 | * Added stricter compilation flags 192 | * Minor bugfixes 193 | 194 | 2012-01-04: Fixed native code debug build target by updating OCamlMakefile. 195 | 196 | Thanks to Stéphane Glondu for the patch. 197 | 198 | 2011-12-15: Fixed a Windows portability bug in the C-bindings. 199 | 200 | Thanks to Evgenii Lepikhin for the patch. 201 | 202 | 2011-11-09: Updated OCamlMakefile to fix linking order. 203 | 204 | 2011-01-16: Added support for limit recursion flag. 205 | 206 | Thanks to Delphin Lecucq for the patch. 207 | 208 | 2010-10-31: Improved Windows support with MSVC. 209 | 210 | Thanks to Sylvain Le Gall for the patch. 211 | 212 | 2010-04-01: Added new function: 213 | 214 | * regexp_or 215 | 216 | 2009-06-20: Fixed bug in configuration functions that could lead to a segfault. 217 | 218 | Thanks to Gerd Stolpmann for the patch. 219 | 220 | 2009-05-07: Changed API wrt. error handling and thus made a major release. 221 | 222 | Improved behavior in the presence of recursion limit errors. 223 | Thanks to Martin Jambon for this patch. 224 | 225 | 2009-04-23: Fixed build problem on MinGW. 226 | 227 | Thanks to Gerd Stolpmann for the patch. 228 | 229 | 2009-03-08: Fixed build problem on Mac OS X with macports. 230 | 231 | Thanks to Ralph Douglass for the 232 | initial patch. 233 | 234 | Update OCamlMakefile. 235 | 236 | Improved Godi-distribution. 237 | 238 | 2008-05-06: Fixed build problem with newer versions of PCRE. 239 | 240 | 2008-03-14: Synced with Jane Street tree. 241 | 242 | 2008-01-25: Added new function: 243 | 244 | * names 245 | 246 | This function returns the names of all named substrings in a 247 | regular expression. 248 | 249 | Thanks to Benedikt Grundmann 250 | for the patch. 251 | 252 | 2007-07-12: Improved build scripts for Windows. 253 | 254 | Thanks to Christophe Troestler 255 | for the patch. 256 | 257 | 2007-07-12: Improved documentation for Win32 builds, and added some build 258 | scripts usable on Windows. 259 | 260 | Thanks to Christophe Troestler 261 | for this contribution. 262 | 263 | 2007-04-23: callback_exn -> caml_callback_exn. 264 | 265 | Updated OCamlMakefile. 266 | 267 | 2006-11-22: Updated OCamlMakefile. 268 | 269 | 2006-06-11: Updated to pcre-5.0. 270 | 271 | New representation for callbacks: they now take one 272 | argument (a record of the callback data). 273 | 274 | Added partial matching and auto callouts. 275 | 276 | 2006-01-16: Updated OCamlMakefile. 277 | 278 | Removed a superfluous binding. 279 | 280 | 2005-08-18: Fixed a small compilation problem on rare platforms by 281 | upgrading OCamlMakefile. 282 | 283 | 2005-06-08: Relaxed license. Fixed copyright headers. 284 | 285 | 2005-05-31: Fixed some uncleanliness reported by Saffire (FFI-type checker). 286 | 287 | 2004-09-17: Fixed a bug for null patterns in exec_all (extract_all 288 | and extract_all_opt are also affected). 289 | 290 | Updated OCamlMakefile. 291 | 292 | 2004-05-19: Updated Makefile.mingw. 293 | Thanks to Jeff Henrikson for the patch. 294 | 295 | Updated OCamlMakefile. 296 | 297 | 2004-04-29: *** Changed behaviour of "get_substring" *** 298 | 299 | It now does not return the empty string anymore if an 300 | accessed substring was not captured. It raises the exception 301 | "Not_found" instead. 302 | 303 | Three new functions: 304 | 305 | * get_opt_substrings 306 | * extract_opt 307 | * extract_all_opt 308 | 309 | These behave like the functions without "opt", but return 310 | "Some substring" for captured substrings, and "None" 311 | otherwise. 312 | 313 | 2004-04-27: Updated OCamlMakefile. 314 | 315 | 2004-04-24: Updated OCamlMakefile. 316 | 317 | 2004-03-28: Changed interface to build-time configuration functions 318 | of PCRE. 319 | 320 | Updated OCamlMakefile. 321 | 322 | 2004-02-21: Added CAMLprim in the C-interface where appropriate. 323 | 324 | 2004-02-08: Fixed a minor bug with returning unit values from C. 325 | 326 | 2004-01-31: Added pcre_make.win32/Makefile.mingw. Thanks to Jeffrey 327 | Henrikson for this contribution. 328 | 329 | Update OCamlMakefile. 330 | 331 | 2004-01-13: Updated `pcre_make.win32/pcre.h`. 332 | 333 | 2003-12-30: Fixed documentation. 334 | 335 | 2003-12-21: Updated to pcre-4.5. 336 | 337 | New function: 338 | 339 | * config_stackrecurse 340 | 341 | New exception: 342 | 343 | * BadUTF8Offset 344 | 345 | Updated OCamlMakefile. 346 | 347 | 2003-12-19: Fixed a small (but probably unnoticable) bug with 348 | allocation of optional values in the C-stubs. 349 | 350 | 2003-12-12: Updated OCamlMakefile. Renamed stubs (invisible to users). 351 | 352 | 2003-11-16: Updated `pcre_make.win32/pcre.h` to reflect newest PCRE-version. 353 | May help Windows users. 354 | 355 | 2003-10-08: Upgraded to pcre-4.4. 356 | 357 | New flag for compiling patterns: NO_UTF8_CHECK 358 | New exception: BadUTF8 359 | 360 | Updated OCamlMakefile. 361 | 362 | 2003-09-30: Fixed a bug in the documentation. 363 | Updated OCamlMakefile. 364 | 365 | 2003-06-17: Fixed a bug in the documentation. 366 | Updated OCamlMakefile. 367 | 368 | 2003-05-29: Updated to pcre-4.3. 369 | 370 | Major change: callouts are now fully supported. This allows 371 | the matching engine call OCaml-code while matching. 372 | Please see the interface specification for more information. 373 | 374 | Small changes (improvements) in C-code. Updated 375 | documentation. 376 | 377 | 2003-04-08: Updated OCamlMakefile. Reformatted documentation. 378 | 379 | 2003-03-20: Added new function `get_subject`. Patched OCamlMakefile. 380 | 381 | 2003-03-18: Major update: upgraded to pcre-4.1. 382 | 383 | Better UTF8-support. 384 | New flag "NO_AUTO_CAPTURE". 385 | 386 | New values: 387 | 388 | * config_utf8 389 | * config_newline 390 | * config_link_size 391 | * config_match_limit 392 | 393 | Renamed all occurrences of "firstchar" to "firstbyte". 394 | 395 | New functions: 396 | 397 | * studysize 398 | * namecount 399 | * nameentrysize 400 | * get_stringnumber 401 | 402 | Updated OCamlMakefile. 403 | 404 | 2003-01-07: Updated OCamlMakefile to make use of "findlib". 405 | 406 | Added support for UTF-8 character encodings. 407 | 408 | Better installation and documentation for Win32. 409 | 410 | Thanks to Artem Prisyznuk for the above 411 | patches. 412 | 413 | 2002-12-14: Fixed a bug with zero-sized matches effecting 414 | `replace`, `qreplace`, `substitute_substrings` and 415 | `substitute`. 416 | 417 | Updated OCamlMakefile. 418 | 419 | 2002-12-08: Improved documentation of `pcre_exec`. 420 | 421 | 2002-11-24: Fixed a bug in `full_split` with matched subgroups. 422 | 423 | 2002-11-12: Added a new function `extract_all` (see interface 424 | documentation). 425 | 426 | 2002-08-16: Fixed a bug in the `split`-function: 427 | 428 | The Perl-splitting semantics was not fully adhered to: 429 | we stripped leading whitespace after the matching process 430 | rather than before, which lead to incompatible behaviour 431 | with limits. 432 | 433 | Thanks to Yutaka Oiwa for the 434 | bug report. 435 | 436 | Updated OCamlMakefile. 437 | 438 | 2002-07-31: Fixed a bug in the following functions: 439 | 440 | * replace 441 | * qreplace 442 | * substitute_substrings 443 | 444 | Transformed most part of the library to make it slightly 445 | more efficient and simple. The interface is still the same. 446 | 447 | Updated OCamlMakefile + documentation. 448 | 449 | 2002-07-15: Fixed a mistake in the documentation. 450 | 451 | 2002-05-05: Fixed a bug with the generation of byte-code libraries that 452 | dynamically link the PCRE. 453 | 454 | 2002-05-01: Removed C-library from distribution. Users must install it 455 | on their own now. Reorganized whole distribution and updated 456 | OCamlMakefile again for better support of dynamic and/or 457 | static libraries. 458 | 459 | 2002-04-30: Updated OcamlMakefile: it does not ask for confirmation 460 | during installation anymore. 461 | 462 | 2002-03-06: Upgraded to pcre-3.9. This should not change anything for 463 | OCaml-users. 464 | 465 | 2002-03-01: Updated OcamlMakefile. 466 | 467 | 2002-02-24: Separated compilation of library and examples to prevent 468 | confusions when the library requires installation before 469 | one can build the examples. 470 | 471 | 2002-02-15: Fixed a bug in the `pcre_exec`-function introduced 472 | ten days ago during correction of another bug (thanks to 473 | Gerd Stolpmann for the report). 474 | 475 | 2002-02-15: Added the option to compile the library statically. 476 | 477 | Updated INSTALL-notes to explain possible installation 478 | problems associated with support of dynamic linking. 479 | 480 | Added META-file for findlib. 481 | 482 | 2002-02-12: Rewrote interface documentation to support OCamldoc. 483 | 484 | Fixed a portability bug with shared libraries. 485 | 486 | 2002-02-10: Removed project from Sourceforge for simpler maintainance. 487 | 488 | 2002-02-07: Important news: library linked dynamically now by default. 489 | 490 | 2002-02-06: Fixed a stupid bug affecting the following functions 491 | (thanks to Jacek Chrzaszcz for the bug report): 492 | 493 | * num_of_subs 494 | * get_substring 495 | * get_substring_ofs 496 | * get_substrings 497 | * extract 498 | 499 | Also done: converted literal pattern strings in the library 500 | and the cloc-example so that the escape char (backslash) does 501 | not cause warnings anymore with the new OCaml-release. The 502 | latter is more paranoid about unknown escape combinations, 503 | requiring the user to add extra backslashes. 504 | 505 | 2002-01-07: Fixed a stupid bug: the position argument (offset) was 506 | incorrectly handled in replacement and substitution 507 | functions, leading to wrong results. As it seems, people 508 | seldom use arguments other than zero... 509 | 510 | The "subst"-example now assumes that offsets other than zero 511 | should not lead to an error if it exceeds the line length. 512 | We copy the line instead. 513 | 514 | 2001-12-28: Added README.win32 (courtesy of John W. Small). 515 | 516 | Updated README so that Hevea generates it. 517 | 518 | 2001-11-19: Upgraded to the newest release of the underlying C-library 519 | (PCRE-3.7). 520 | 521 | Added a patch to compile with the Visual C++ compiler under 522 | W2K (thanks to John W. Small). 523 | 524 | Updated contact address. 525 | 526 | 2001-11-17: Updated OcamlMakefile. 527 | 528 | 2001-09-15: Upgraded to the newest release of the underlying C-library 529 | (PCRE-3.5). 530 | 531 | Added a new function: substitute_substrings 532 | 533 | Like "substitute", but takes the full substring information of the 534 | match rather than the matching string. 535 | Thanks to Patrick M. Doane for proposing this missing feature. 536 | 537 | 2001-09-07: Updated OcamlMakefile 538 | 539 | 2001-08-27: Fixed a bug in the splitting function: leading whitespace was 540 | always removed incorrectly when using a regular expression 541 | rather than a pattern. This behaviour should happen 542 | with the default whitespace pattern, which gets used if you 543 | do not specify any pattern or regexp in the function call. 544 | 545 | 2001-06-30: Removed "Printexc.catch" from examples: is going to be 546 | deprecated in upcoming OCaml-release. 547 | 548 | 2001-05-22: Fixed typo in documentation. 549 | 550 | 2001-04-25: Added a new function: asplit 551 | 552 | Identical to "split" with the exception that it returns a string 553 | array instead of a string list. This makes it easier for the user 554 | to access strings by index. 555 | 556 | Added a new option to "get_substrings" and "extract": full_match 557 | 558 | When "full_match" is true (default: yes), then the resulting 559 | string array will contain the full match at index 0, 560 | otherwise the result will contain captured substrings. 561 | 562 | Removed superfluous comments in "pcre.ml": they are already 563 | present in the interface documentation anyway. 564 | 565 | 2001-04-08: Small patch that makes this library compile on OpenBSD, too. 566 | 567 | 2001-01-30: Made Makefile more general (allows simpler addition of 568 | further examples). 569 | 570 | 2001-01-24: Updated OcamlMakefile: made default definition of 571 | "OCAMLLIBPATH" backwards compatible again: some people 572 | do not use the CVS-version of OCaml, which supports the 573 | "-where"-option. People with the new compiler will not 574 | notice any effect. 575 | 576 | 2001-01-06: Added a new function: `exec_all` 577 | 578 | It allows you to execute pattern matching over a whole 579 | string until we find no more matches: then it returns 580 | the array of all matching "substrings". You can extract 581 | subpatterns of each of those matching substrings again with 582 | the usual functions. 583 | 584 | Fixed a minor inconsistency in "next_match". 585 | 586 | 2000-12-23: Updated OcamlMakefile: makes use of the new "-where"-keyword 587 | to find the path to the standard library if undefined. 588 | 589 | 2000-12-14: Pedantry in the C-interface: added "const" qualifiers. 590 | 591 | 2000-12-09: Made some functions tail-recursive (unlikely to cause 592 | any noticable effect for most people). 593 | 594 | 2000-12-02: Cleaned up the code a bit for distribution on SourceForge. 595 | Speed of some operations should be slightly improved, too. 596 | 597 | 2000-11-16: Fixed a stupid bug that could crash your programs under rare 598 | circumstances (when you use faulty regular expressions). 599 | 600 | Added an internally used exception (InternalError) to the 601 | interface of the library. This allows the user to handle 602 | the case when the C-engine exhibits undefined behaviour 603 | (should never happen, anyway). 604 | 605 | 2000-09-27: Upgraded to the newest release of the underlying C-library 606 | (PCRE-3.4). 607 | 608 | See "pcre-C/ChangeLog" for more information (minor bugfixes). 609 | 610 | Renamed "pgrep" to "pcregrep" to prevent name hiding on 611 | Solaris (change as suggested in the C-library distribution). 612 | 613 | 2000-06-24: Updated OcamlMakefile 614 | 615 | 2000-06-13: Updated OcamlMakefile 616 | 617 | 2000-06-12: Mini-optimisation: lifted a pure value out of a function body. 618 | 619 | 2000-06-11: Updated OcamlMakefile 620 | 621 | 2000-06-08: Added installation routine + updated OcamlMakefile again: 622 | 623 | This upgrade makes installation much easier. Read the 624 | updated INSTALL-file. 625 | 626 | 2000-06-07: Updated to new OcamlMakefile 627 | 628 | 2000-06-05: Added a new function: 629 | 630 | get_substring_ofs substrings n 631 | 632 | This allows you to get the offset positions of the matching 633 | pattern and the substrings directly from a value of type 634 | "substring". See the interface documentation for details. 635 | 636 | 2000-05-15: Upgraded to the newest release of the underlying C-library 637 | (PCRE-3.2). 638 | 639 | This does not add new features, but should fix some 640 | PERL-compatibility bugs and improves portability. 641 | 642 | 2000-05-04: Minor update of C-interface: 643 | 644 | Use the new "hash_variant"-function to compute the hash value of 645 | variants (used Callback.register before - a bit clumsy). 646 | 647 | 2000-04-24: New release: compiles with OCaml-3.00. 648 | 649 | Lots of changes in interface. OCaml-3.00 introduced some syntax 650 | changes for labels. No keywords allowed for them. 651 | All this required renamings. I tried to stick to 652 | the new labels of the "str"-library as close as makes sense. 653 | Sorry for this inconvenience, but I think that adhering to some 654 | "standard" is a Good Thing... 655 | 656 | For further information on the changes, look at the documentation 657 | of the interface file. 658 | 659 | 2000-04-23: Minor cleanup of C-interface: 660 | 661 | Made local functions + variables static and moved a check for 662 | error codes (lint should be happier now). 663 | 664 | 2000-04-01: Changed linking of the library again (marginally): 665 | 666 | The new linking semantics for byte code libraries allows passing 667 | of now remembered flags. This then requires linking executables 668 | against "pcre.cma" - the C-library ("libpcre.a") links 669 | automatically. 670 | 671 | 2000-03-30: Cleaned up the C-interface: 672 | 673 | Removed a possible bug in the allocation of firstchar values. 674 | 675 | Saner handling of polymorphic variants (not hard-wired 676 | integers anymore - registered via callbacks). 677 | 678 | Better GC-settings: much nicer to the GC now (fewer full 679 | cycles - marginally more memory consumption). Should make 680 | programs run slightly faster when regular expressions are 681 | often allocated. 682 | 683 | Changed linking of the library to suit the semantics of 684 | the upcoming OCaml-major release (backward compatible). 685 | 686 | 2000-02-07: Changed interface of function `foreach_file`: 687 | 688 | No idea why I implemented a tuple in the interface instead of a 689 | curried function. Must have been the drugs... ;-) 690 | 691 | Former interface: 692 | 693 | val foreach_file : 694 | string list -> (string * in_channel -> unit) -> unit 695 | 696 | Current interface: 697 | 698 | val foreach_file : 699 | string list -> (string -> in_channel -> unit) -> unit 700 | 701 | This function now also closes the file channel in case of an 702 | exception. The exception gets reraised, of course. 703 | 704 | 2000-02-05: Eliminated a PERL-incompatibility of the "split"-function: 705 | 706 | When we specify neither the pattern nor the compiled regular 707 | expression, "split" defaults to a whitespace pattern. 708 | PERL also strips leading whitespace when in "default" mode. 709 | 710 | This last "feature" (?) was overseen during the introduction 711 | of labels and default arguments. 712 | 713 | 2000-02-05: Updated the C-library to the newest release (pcre-3.0). 714 | 715 | This is a major release, but the changes to the OCaml-part 716 | are rather small: the functionality for information on compiled 717 | patterns has changed. 718 | 719 | Instead of the former "info"-function, we provide more than one. 720 | I think this is the most convenient way to handle 721 | this, because it does not force the user to do explicit 722 | pattern matching on results: due to the number of new 723 | info-options this would have been rather confusing. 724 | 725 | Because the old PCRE-function "pcre_info" is obsolete, I chose to 726 | drop its OCaml-interface. This part is infrequently used anyway. 727 | 728 | See the interface file and the ChangeLog + documentation 729 | of the C-library to see, what has changed in detail. 730 | 731 | The other changes marginally effect efficiency. One 732 | bug was present in the last release in the "info"-part 733 | (polymorphic variants were incorrectly represented). 734 | This has changed anyway and should work fine now. 735 | 736 | 2000-01-10: `foreach_line` uses stdin as labeled (label: 'in') default 737 | argument now. 738 | 739 | 1999-12-29: Lots of changes: 740 | 741 | * Uses the new features of OCaml 2.99: 742 | 743 | Labeled parameters and default arguments for much more 744 | convenience. E.g. write 745 | 746 | split pos:1 "foo bar" 747 | 748 | to get the list ["oo"; "bar"]. 749 | 750 | Polymorphic variants for passing options: this change allows, 751 | for example, using the data constructor "ANCHORED" for both 752 | compiling flags and at matching time instead of "C_ANCHORED" 753 | and "R_ANCHORED". 754 | 755 | * Got rid of all the superfluous shortcuts, like "bounded_psplit" 756 | and the like. Labeled parameters are much more readable and 757 | convenient. 758 | 759 | * Removed the functions compatible to the "Str"-module. It's 760 | probably rather confusing for people to see two different kinds 761 | of implementations. 762 | 763 | * Renamed functions due to the use of labels and the removing of 764 | the obsolete compatibility functions. 765 | 766 | * Updated documentation with more details on using the library. 767 | 768 | 1999-12-21: Small change (2 chars...) to make the C-interface compile with 769 | the newest release (OCaml 2.99). 770 | 771 | 1999-09-27: Fixed a bug which occurs, when replacements or substitutions 772 | in strings are longer than the subject string itself. 773 | This concerns functions `replace_all`, `qreplace_all` and 774 | `substitute_all` + their variants. 775 | 776 | THANKS to GERD STOLPMANN (Gerd.Stolpmann@darmstadt.netsurf.de) 777 | for finding another bug. 778 | 779 | 1999-09-21: Fixed some bugs in `pcre_intf.c`: 780 | 781 | We used `Store_field` inappropriately throughout the file, 782 | which could lead to segfaults in the garbage collector. We 783 | corrected this by using the "Field"-macro where appropriate. 784 | 785 | Protected an input parameter from untimely reclamation. 786 | 787 | Initialized a field before throwing an exception to please 788 | the GC. 789 | 790 | The GC should be happy now... 791 | 792 | THANKS to GERD STOLPMANN (Gerd.Stolpmann@darmstadt.netsurf.de) 793 | for the patch. 794 | 795 | 1999-08-31: Updated to pcre-2.08 - this should fix some bugs. See 796 | "pcre-C/ChangeLog" for details. 797 | 798 | Renamed directory "pcre-C-2.07" to "pcre-C" to allow for 799 | easier upgrading. 800 | 801 | Added CVS-info to sources. 802 | 803 | 1999-08-24: Updated to pcre-2.07. Splitting is 100% PERL-compatible now. 804 | New runtime option: NOTEMPTY (see interface for details). 805 | 806 | Two more examples: cloc and count_hash. 807 | 808 | New functions: sregexpo, sregexp, sregexp_case_fold. 809 | 810 | 1999-08-02: Exception handling more regular now: 811 | 812 | Raises [Invalid_argument] instead of [Failure] where 813 | appropriate. 814 | Raises [InternalError] if C-library exhibits undefined 815 | behaviour (has never happened so far). 816 | 817 | Updated comments. 818 | 819 | 1999-07-30: Bugfix in `pcre_intf.c`: 820 | 821 | `pcre_ocaml` should work now on 64-bit architectures... 822 | 823 | 1999-07-29: Small fix in `pcre_intf.c`: 824 | 825 | Strict compilers (gcc is not strict) otherwise complain 826 | about undefined behaviour in a certain line. 827 | 828 | Explicitely mention all include files. 829 | Removed unused variable. 830 | 831 | Also moved a line for efficiency... 832 | 833 | 1999-07-28: First release. 834 | ``` 835 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 1999- Markus Mottl 2 | 3 | The Library is distributed under the terms of the GNU Lesser General 4 | Public License version 2.1 (included below). 5 | 6 | As a special exception to the GNU Lesser General Public License, you 7 | may link, statically or dynamically, a "work that uses the Library" 8 | with a publicly distributed version of the Library to produce an 9 | executable file containing portions of the Library, and distribute that 10 | executable file under terms of your choice, without any of the additional 11 | requirements listed in clause 6 of the GNU Lesser General Public License. 12 | By "a publicly distributed version of the Library", we mean either the 13 | unmodified Library as distributed by the authors, or a modified version 14 | of the Library that is distributed under the conditions defined in clause 15 | 2 of the GNU Lesser General Public License. This exception does not 16 | however invalidate any other reasons why the executable file might be 17 | covered by the GNU Lesser General Public License. 18 | 19 | --------------------------------------------------------------------------- 20 | 21 | ### GNU LESSER GENERAL PUBLIC LICENSE 22 | 23 | Version 2.1, February 1999 24 | 25 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 26 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 27 | 28 | Everyone is permitted to copy and distribute verbatim copies 29 | of this license document, but changing it is not allowed. 30 | 31 | [This is the first released version of the Lesser GPL. It also counts 32 | as the successor of the GNU Library Public License, version 2, hence 33 | the version number 2.1.] 34 | 35 | ### Preamble 36 | 37 | The licenses for most software are designed to take away your freedom 38 | to share and change it. By contrast, the GNU General Public Licenses 39 | are intended to guarantee your freedom to share and change free 40 | software--to make sure the software is free for all its users. 41 | 42 | This license, the Lesser General Public License, applies to some 43 | specially designated software packages--typically libraries--of the 44 | Free Software Foundation and other authors who decide to use it. You 45 | can use it too, but we suggest you first think carefully about whether 46 | this license or the ordinary General Public License is the better 47 | strategy to use in any particular case, based on the explanations 48 | below. 49 | 50 | When we speak of free software, we are referring to freedom of use, 51 | not price. Our General Public Licenses are designed to make sure that 52 | you have the freedom to distribute copies of free software (and charge 53 | for this service if you wish); that you receive source code or can get 54 | it if you want it; that you can change the software and use pieces of 55 | it in new free programs; and that you are informed that you can do 56 | these things. 57 | 58 | To protect your rights, we need to make restrictions that forbid 59 | distributors to deny you these rights or to ask you to surrender these 60 | rights. These restrictions translate to certain responsibilities for 61 | you if you distribute copies of the library or if you modify it. 62 | 63 | For example, if you distribute copies of the library, whether gratis 64 | or for a fee, you must give the recipients all the rights that we gave 65 | you. You must make sure that they, too, receive or can get the source 66 | code. If you link other code with the library, you must provide 67 | complete object files to the recipients, so that they can relink them 68 | with the library after making changes to the library and recompiling 69 | it. And you must show them these terms so they know their rights. 70 | 71 | We protect your rights with a two-step method: (1) we copyright the 72 | library, and (2) we offer you this license, which gives you legal 73 | permission to copy, distribute and/or modify the library. 74 | 75 | To protect each distributor, we want to make it very clear that there 76 | is no warranty for the free library. Also, if the library is modified 77 | by someone else and passed on, the recipients should know that what 78 | they have is not the original version, so that the original author's 79 | reputation will not be affected by problems that might be introduced 80 | by others. 81 | 82 | Finally, software patents pose a constant threat to the existence of 83 | any free program. We wish to make sure that a company cannot 84 | effectively restrict the users of a free program by obtaining a 85 | restrictive license from a patent holder. Therefore, we insist that 86 | any patent license obtained for a version of the library must be 87 | consistent with the full freedom of use specified in this license. 88 | 89 | Most GNU software, including some libraries, is covered by the 90 | ordinary GNU General Public License. This license, the GNU Lesser 91 | General Public License, applies to certain designated libraries, and 92 | is quite different from the ordinary General Public License. We use 93 | this license for certain libraries in order to permit linking those 94 | libraries into non-free programs. 95 | 96 | When a program is linked with a library, whether statically or using a 97 | shared library, the combination of the two is legally speaking a 98 | combined work, a derivative of the original library. The ordinary 99 | General Public License therefore permits such linking only if the 100 | entire combination fits its criteria of freedom. The Lesser General 101 | Public License permits more lax criteria for linking other code with 102 | the library. 103 | 104 | We call this license the "Lesser" General Public License because it 105 | does Less to protect the user's freedom than the ordinary General 106 | Public License. It also provides other free software developers Less 107 | of an advantage over competing non-free programs. These disadvantages 108 | are the reason we use the ordinary General Public License for many 109 | libraries. However, the Lesser license provides advantages in certain 110 | special circumstances. 111 | 112 | For example, on rare occasions, there may be a special need to 113 | encourage the widest possible use of a certain library, so that it 114 | becomes a de-facto standard. To achieve this, non-free programs must 115 | be allowed to use the library. A more frequent case is that a free 116 | library does the same job as widely used non-free libraries. In this 117 | case, there is little to gain by limiting the free library to free 118 | software only, so we use the Lesser General Public License. 119 | 120 | In other cases, permission to use a particular library in non-free 121 | programs enables a greater number of people to use a large body of 122 | free software. For example, permission to use the GNU C Library in 123 | non-free programs enables many more people to use the whole GNU 124 | operating system, as well as its variant, the GNU/Linux operating 125 | system. 126 | 127 | Although the Lesser General Public License is Less protective of the 128 | users' freedom, it does ensure that the user of a program that is 129 | linked with the Library has the freedom and the wherewithal to run 130 | that program using a modified version of the Library. 131 | 132 | The precise terms and conditions for copying, distribution and 133 | modification follow. Pay close attention to the difference between a 134 | "work based on the library" and a "work that uses the library". The 135 | former contains code derived from the library, whereas the latter must 136 | be combined with the library in order to run. 137 | 138 | ### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 139 | 140 | **0.** This License Agreement applies to any software library or other 141 | program which contains a notice placed by the copyright holder or 142 | other authorized party saying it may be distributed under the terms of 143 | this Lesser General Public License (also called "this License"). Each 144 | licensee is addressed as "you". 145 | 146 | A "library" means a collection of software functions and/or data 147 | prepared so as to be conveniently linked with application programs 148 | (which use some of those functions and data) to form executables. 149 | 150 | The "Library", below, refers to any such software library or work 151 | which has been distributed under these terms. A "work based on the 152 | Library" means either the Library or any derivative work under 153 | copyright law: that is to say, a work containing the Library or a 154 | portion of it, either verbatim or with modifications and/or translated 155 | straightforwardly into another language. (Hereinafter, translation is 156 | included without limitation in the term "modification".) 157 | 158 | "Source code" for a work means the preferred form of the work for 159 | making modifications to it. For a library, complete source code means 160 | all the source code for all modules it contains, plus any associated 161 | interface definition files, plus the scripts used to control 162 | compilation and installation of the library. 163 | 164 | Activities other than copying, distribution and modification are not 165 | covered by this License; they are outside its scope. The act of 166 | running a program using the Library is not restricted, and output from 167 | such a program is covered only if its contents constitute a work based 168 | on the Library (independent of the use of the Library in a tool for 169 | writing it). Whether that is true depends on what the Library does and 170 | what the program that uses the Library does. 171 | 172 | **1.** You may copy and distribute verbatim copies of the Library's 173 | complete source code as you receive it, in any medium, provided that 174 | you conspicuously and appropriately publish on each copy an 175 | appropriate copyright notice and disclaimer of warranty; keep intact 176 | all the notices that refer to this License and to the absence of any 177 | warranty; and distribute a copy of this License along with the 178 | Library. 179 | 180 | You may charge a fee for the physical act of transferring a copy, and 181 | you may at your option offer warranty protection in exchange for a 182 | fee. 183 | 184 | **2.** You may modify your copy or copies of the Library or any 185 | portion of it, thus forming a work based on the Library, and copy and 186 | distribute such modifications or work under the terms of Section 1 187 | above, provided that you also meet all of these conditions: 188 | 189 | - **a)** The modified work must itself be a software library. 190 | - **b)** You must cause the files modified to carry prominent 191 | notices stating that you changed the files and the date of 192 | any change. 193 | - **c)** You must cause the whole of the work to be licensed at no 194 | charge to all third parties under the terms of this License. 195 | - **d)** If a facility in the modified Library refers to a function 196 | or a table of data to be supplied by an application program that 197 | uses the facility, other than as an argument passed when the 198 | facility is invoked, then you must make a good faith effort to 199 | ensure that, in the event an application does not supply such 200 | function or table, the facility still operates, and performs 201 | whatever part of its purpose remains meaningful. 202 | 203 | (For example, a function in a library to compute square roots has 204 | a purpose that is entirely well-defined independent of 205 | the application. Therefore, Subsection 2d requires that any 206 | application-supplied function or table used by this function must 207 | be optional: if the application does not supply it, the square 208 | root function must still compute square roots.) 209 | 210 | These requirements apply to the modified work as a whole. If 211 | identifiable sections of that work are not derived from the Library, 212 | and can be reasonably considered independent and separate works in 213 | themselves, then this License, and its terms, do not apply to those 214 | sections when you distribute them as separate works. But when you 215 | distribute the same sections as part of a whole which is a work based 216 | on the Library, the distribution of the whole must be on the terms of 217 | this License, whose permissions for other licensees extend to the 218 | entire whole, and thus to each and every part regardless of who wrote 219 | it. 220 | 221 | Thus, it is not the intent of this section to claim rights or contest 222 | your rights to work written entirely by you; rather, the intent is to 223 | exercise the right to control the distribution of derivative or 224 | collective works based on the Library. 225 | 226 | In addition, mere aggregation of another work not based on the Library 227 | with the Library (or with a work based on the Library) on a volume of 228 | a storage or distribution medium does not bring the other work under 229 | the scope of this License. 230 | 231 | **3.** You may opt to apply the terms of the ordinary GNU General 232 | Public License instead of this License to a given copy of the Library. 233 | To do this, you must alter all the notices that refer to this License, 234 | so that they refer to the ordinary GNU General Public License, version 235 | 2, instead of to this License. (If a newer version than version 2 of 236 | the ordinary GNU General Public License has appeared, then you can 237 | specify that version instead if you wish.) Do not make any other 238 | change in these notices. 239 | 240 | Once this change is made in a given copy, it is irreversible for that 241 | copy, so the ordinary GNU General Public License applies to all 242 | subsequent copies and derivative works made from that copy. 243 | 244 | This option is useful when you wish to copy part of the code of the 245 | Library into a program that is not a library. 246 | 247 | **4.** You may copy and distribute the Library (or a portion or 248 | derivative of it, under Section 2) in object code or executable form 249 | under the terms of Sections 1 and 2 above provided that you accompany 250 | it with the complete corresponding machine-readable source code, which 251 | must be distributed under the terms of Sections 1 and 2 above on a 252 | medium customarily used for software interchange. 253 | 254 | If distribution of object code is made by offering access to copy from 255 | a designated place, then offering equivalent access to copy the source 256 | code from the same place satisfies the requirement to distribute the 257 | source code, even though third parties are not compelled to copy the 258 | source along with the object code. 259 | 260 | **5.** A program that contains no derivative of any portion of the 261 | Library, but is designed to work with the Library by being compiled or 262 | linked with it, is called a "work that uses the Library". Such a work, 263 | in isolation, is not a derivative work of the Library, and therefore 264 | falls outside the scope of this License. 265 | 266 | However, linking a "work that uses the Library" with the Library 267 | creates an executable that is a derivative of the Library (because it 268 | contains portions of the Library), rather than a "work that uses the 269 | library". The executable is therefore covered by this License. Section 270 | 6 states terms for distribution of such executables. 271 | 272 | When a "work that uses the Library" uses material from a header file 273 | that is part of the Library, the object code for the work may be a 274 | derivative work of the Library even though the source code is not. 275 | Whether this is true is especially significant if the work can be 276 | linked without the Library, or if the work is itself a library. The 277 | threshold for this to be true is not precisely defined by law. 278 | 279 | If such an object file uses only numerical parameters, data structure 280 | layouts and accessors, and small macros and small inline functions 281 | (ten lines or less in length), then the use of the object file is 282 | unrestricted, regardless of whether it is legally a derivative work. 283 | (Executables containing this object code plus portions of the Library 284 | will still fall under Section 6.) 285 | 286 | Otherwise, if the work is a derivative of the Library, you may 287 | distribute the object code for the work under the terms of Section 6. 288 | Any executables containing that work also fall under Section 6, 289 | whether or not they are linked directly with the Library itself. 290 | 291 | **6.** As an exception to the Sections above, you may also combine or 292 | link a "work that uses the Library" with the Library to produce a work 293 | containing portions of the Library, and distribute that work under 294 | terms of your choice, provided that the terms permit modification of 295 | the work for the customer's own use and reverse engineering for 296 | debugging such modifications. 297 | 298 | You must give prominent notice with each copy of the work that the 299 | Library is used in it and that the Library and its use are covered by 300 | this License. You must supply a copy of this License. If the work 301 | during execution displays copyright notices, you must include the 302 | copyright notice for the Library among them, as well as a reference 303 | directing the user to the copy of this License. Also, you must do one 304 | of these things: 305 | 306 | - **a)** Accompany the work with the complete corresponding 307 | machine-readable source code for the Library including whatever 308 | changes were used in the work (which must be distributed under 309 | Sections 1 and 2 above); and, if the work is an executable linked 310 | with the Library, with the complete machine-readable "work that 311 | uses the Library", as object code and/or source code, so that the 312 | user can modify the Library and then relink to produce a modified 313 | executable containing the modified Library. (It is understood that 314 | the user who changes the contents of definitions files in the 315 | Library will not necessarily be able to recompile the application 316 | to use the modified definitions.) 317 | - **b)** Use a suitable shared library mechanism for linking with 318 | the Library. A suitable mechanism is one that (1) uses at run time 319 | a copy of the library already present on the user's computer 320 | system, rather than copying library functions into the executable, 321 | and (2) will operate properly with a modified version of the 322 | library, if the user installs one, as long as the modified version 323 | is interface-compatible with the version that the work was 324 | made with. 325 | - **c)** Accompany the work with a written offer, valid for at least 326 | three years, to give the same user the materials specified in 327 | Subsection 6a, above, for a charge no more than the cost of 328 | performing this distribution. 329 | - **d)** If distribution of the work is made by offering access to 330 | copy from a designated place, offer equivalent access to copy the 331 | above specified materials from the same place. 332 | - **e)** Verify that the user has already received a copy of these 333 | materials or that you have already sent this user a copy. 334 | 335 | For an executable, the required form of the "work that uses the 336 | Library" must include any data and utility programs needed for 337 | reproducing the executable from it. However, as a special exception, 338 | the materials to be distributed need not include anything that is 339 | normally distributed (in either source or binary form) with the major 340 | components (compiler, kernel, and so on) of the operating system on 341 | which the executable runs, unless that component itself accompanies 342 | the executable. 343 | 344 | It may happen that this requirement contradicts the license 345 | restrictions of other proprietary libraries that do not normally 346 | accompany the operating system. Such a contradiction means you cannot 347 | use both them and the Library together in an executable that you 348 | distribute. 349 | 350 | **7.** You may place library facilities that are a work based on the 351 | Library side-by-side in a single library together with other library 352 | facilities not covered by this License, and distribute such a combined 353 | library, provided that the separate distribution of the work based on 354 | the Library and of the other library facilities is otherwise 355 | permitted, and provided that you do these two things: 356 | 357 | - **a)** Accompany the combined library with a copy of the same work 358 | based on the Library, uncombined with any other 359 | library facilities. This must be distributed under the terms of 360 | the Sections above. 361 | - **b)** Give prominent notice with the combined library of the fact 362 | that part of it is a work based on the Library, and explaining 363 | where to find the accompanying uncombined form of the same work. 364 | 365 | **8.** You may not copy, modify, sublicense, link with, or distribute 366 | the Library except as expressly provided under this License. Any 367 | attempt otherwise to copy, modify, sublicense, link with, or 368 | distribute the Library is void, and will automatically terminate your 369 | rights under this License. However, parties who have received copies, 370 | or rights, from you under this License will not have their licenses 371 | terminated so long as such parties remain in full compliance. 372 | 373 | **9.** You are not required to accept this License, since you have not 374 | signed it. However, nothing else grants you permission to modify or 375 | distribute the Library or its derivative works. These actions are 376 | prohibited by law if you do not accept this License. Therefore, by 377 | modifying or distributing the Library (or any work based on the 378 | Library), you indicate your acceptance of this License to do so, and 379 | all its terms and conditions for copying, distributing or modifying 380 | the Library or works based on it. 381 | 382 | **10.** Each time you redistribute the Library (or any work based on 383 | the Library), the recipient automatically receives a license from the 384 | original licensor to copy, distribute, link with or modify the Library 385 | subject to these terms and conditions. You may not impose any further 386 | restrictions on the recipients' exercise of the rights granted herein. 387 | You are not responsible for enforcing compliance by third parties with 388 | this License. 389 | 390 | **11.** If, as a consequence of a court judgment or allegation of 391 | patent infringement or for any other reason (not limited to patent 392 | issues), conditions are imposed on you (whether by court order, 393 | agreement or otherwise) that contradict the conditions of this 394 | License, they do not excuse you from the conditions of this License. 395 | If you cannot distribute so as to satisfy simultaneously your 396 | obligations under this License and any other pertinent obligations, 397 | then as a consequence you may not distribute the Library at all. For 398 | example, if a patent license would not permit royalty-free 399 | redistribution of the Library by all those who receive copies directly 400 | or indirectly through you, then the only way you could satisfy both it 401 | and this License would be to refrain entirely from distribution of the 402 | Library. 403 | 404 | If any portion of this section is held invalid or unenforceable under 405 | any particular circumstance, the balance of the section is intended to 406 | apply, and the section as a whole is intended to apply in other 407 | circumstances. 408 | 409 | It is not the purpose of this section to induce you to infringe any 410 | patents or other property right claims or to contest validity of any 411 | such claims; this section has the sole purpose of protecting the 412 | integrity of the free software distribution system which is 413 | implemented by public license practices. Many people have made 414 | generous contributions to the wide range of software distributed 415 | through that system in reliance on consistent application of that 416 | system; it is up to the author/donor to decide if he or she is willing 417 | to distribute software through any other system and a licensee cannot 418 | impose that choice. 419 | 420 | This section is intended to make thoroughly clear what is believed to 421 | be a consequence of the rest of this License. 422 | 423 | **12.** If the distribution and/or use of the Library is restricted in 424 | certain countries either by patents or by copyrighted interfaces, the 425 | original copyright holder who places the Library under this License 426 | may add an explicit geographical distribution limitation excluding 427 | those countries, so that distribution is permitted only in or among 428 | countries not thus excluded. In such case, this License incorporates 429 | the limitation as if written in the body of this License. 430 | 431 | **13.** The Free Software Foundation may publish revised and/or new 432 | versions of the Lesser General Public License from time to time. Such 433 | new versions will be similar in spirit to the present version, but may 434 | differ in detail to address new problems or concerns. 435 | 436 | Each version is given a distinguishing version number. If the Library 437 | specifies a version number of this License which applies to it and 438 | "any later version", you have the option of following the terms and 439 | conditions either of that version or of any later version published by 440 | the Free Software Foundation. If the Library does not specify a 441 | license version number, you may choose any version ever published by 442 | the Free Software Foundation. 443 | 444 | **14.** If you wish to incorporate parts of the Library into other 445 | free programs whose distribution conditions are incompatible with 446 | these, write to the author to ask for permission. For software which 447 | is copyrighted by the Free Software Foundation, write to the Free 448 | Software Foundation; we sometimes make exceptions for this. Our 449 | decision will be guided by the two goals of preserving the free status 450 | of all derivatives of our free software and of promoting the sharing 451 | and reuse of software generally. 452 | 453 | **NO WARRANTY** 454 | 455 | **15.** BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 456 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 457 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 458 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 459 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 460 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 461 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 462 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 463 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 464 | 465 | **16.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 466 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 467 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 468 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 469 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 470 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 471 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 472 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 473 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 474 | DAMAGES. 475 | 476 | ### END OF TERMS AND CONDITIONS 477 | 478 | ### How to Apply These Terms to Your New Libraries 479 | 480 | If you develop a new library, and you want it to be of the greatest 481 | possible use to the public, we recommend making it free software that 482 | everyone can redistribute and change. You can do so by permitting 483 | redistribution under these terms (or, alternatively, under the terms 484 | of the ordinary General Public License). 485 | 486 | To apply these terms, attach the following notices to the library. It 487 | is safest to attach them to the start of each source file to most 488 | effectively convey the exclusion of warranty; and each file should 489 | have at least the "copyright" line and a pointer to where the full 490 | notice is found. 491 | 492 | one line to give the library's name and an idea of what it does. 493 | Copyright (C) year name of author 494 | 495 | This library is free software; you can redistribute it and/or 496 | modify it under the terms of the GNU Lesser General Public 497 | License as published by the Free Software Foundation; either 498 | version 2.1 of the License, or (at your option) any later version. 499 | 500 | This library is distributed in the hope that it will be useful, 501 | but WITHOUT ANY WARRANTY; without even the implied warranty of 502 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 503 | Lesser General Public License for more details. 504 | 505 | You should have received a copy of the GNU Lesser General Public 506 | License along with this library; if not, write to the Free Software 507 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 508 | 509 | Also add information on how to contact you by electronic and paper 510 | mail. 511 | 512 | You should also get your employer (if you work as a programmer) or 513 | your school, if any, to sign a "copyright disclaimer" for the library, 514 | if necessary. Here is a sample; alter the names: 515 | 516 | Yoyodyne, Inc., hereby disclaims all copyright interest in 517 | the library `Frob' (a library for tweaking knobs) written 518 | by James Random Hacker. 519 | 520 | signature of Ty Coon, 1 April 1990 521 | Ty Coon, President of Vice 522 | 523 | That's all there is to it! 524 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean doc 2 | 3 | all: 4 | dune build @install 5 | 6 | clean: 7 | dune clean 8 | 9 | doc: 10 | dune build @doc 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PCRE-OCaml - Perl Compatibility Regular Expressions for OCaml 2 | 3 | This [OCaml](http://www.ocaml.org) library interfaces with the C library 4 | [PCRE](http://www.pcre.org), providing Perl-compatible regular expressions 5 | for string matching. 6 | 7 | ## Features 8 | 9 | PCRE-OCaml offers: 10 | 11 | - Pattern searching 12 | - Subpattern extraction 13 | - String splitting by patterns 14 | - Pattern substitution 15 | 16 | Reasons to choose PCRE-OCaml: 17 | 18 | - The PCRE library by Philip Hazel is mature and stable, implementing nearly 19 | all Perl regular expression features. High-level OCaml functions (split, 20 | replace, etc.) are compatible with Perl functions, as much as OCaml allows. 21 | Some developers find Perl-style regex syntax more intuitive and powerful 22 | than the Emacs-style regex used in OCaml's `Str` module. 23 | 24 | - PCRE-OCaml is reentrant and thread-safe, unlike the `Str` module. This 25 | reentrancy offers convenience, eliminating concerns about library state. 26 | 27 | - High-level replacement and substitution functions in OCaml are faster than 28 | those in the `Str` module. When compiled to native code, they can even 29 | outperform Perl's C-based functions. 30 | 31 | - Returned data is unique, allowing safe destructive updates without side 32 | effects. 33 | 34 | - The library interface uses labels and default arguments for enhanced 35 | programming comfort. 36 | 37 | ## Usage 38 | 39 | Please run: 40 | 41 | ```sh 42 | odig odoc pcre2 43 | ``` 44 | 45 | Or: 46 | 47 | ```sh 48 | dune build @doc 49 | ``` 50 | 51 | Consult the [API](https://mmottl.github.io/pcre-ocaml/api/pcre) for details. 52 | 53 | Functions support two flag types: 54 | 55 | 1. **Convenience flags**: Readable and concise, translated internally on each 56 | call. Example: 57 | 58 | ```ocaml 59 | let rex = Pcre.regexp ~flags:[`ANCHORED; `CASELESS] "some pattern" in 60 | (* ... *) 61 | ``` 62 | 63 | These are easy to use but may incur overhead in loops. For performance 64 | optimization, consider the next approach. 65 | 66 | 2. **Internal flags**: Predefined and translated from convenience flags for 67 | optimal loop performance. Example: 68 | 69 | ```ocaml 70 | let iflags = Pcre.cflags [`ANCHORED; `CASELESS] in 71 | for i = 1 to 1000 do 72 | let rex = Pcre.regexp ~iflags "some pattern constructed at runtime" in 73 | (* ... *) 74 | done 75 | ``` 76 | 77 | Translating flags outside loops saves cycles. Avoid creating regex in 78 | loops: 79 | 80 | ```ocaml 81 | for i = 1 to 1000 do 82 | let chunks = Pcre.split ~pat:"[ \t]+" "foo bar" in 83 | (* ... *) 84 | done 85 | ``` 86 | 87 | Instead, predefine the regex: 88 | 89 | ```ocaml 90 | let rex = Pcre.regexp "[ \t]+" in 91 | for i = 1 to 1000 do 92 | let chunks = Pcre.split ~rex "foo bar" in 93 | (* ... *) 94 | done 95 | ``` 96 | 97 | Functions use optional arguments with intuitive defaults. For instance, 98 | `Pcre.split` defaults to whitespace as the pattern. The `examples` directory 99 | contains applications demonstrating PCRE-OCaml's functionality. 100 | 101 | ## Restartable (Partial) Pattern Matching 102 | 103 | PCRE includes a DFA match function for restarting partial matches with new 104 | input, exposed via `pcre_dfa_exec`. While not suitable for extracting 105 | submatches or splitting strings, it's useful for streaming and search tasks. 106 | 107 | Example of a partial match restarted: 108 | 109 | ```ocaml 110 | utop # open Pcre;; 111 | utop # let rex = regexp "12+3";; 112 | val rex : regexp = 113 | utop # let workspace = Array.make 40 0;; 114 | val workspace : int array = 115 | [| ... |] 116 | utop # pcre_dfa_exec ~rex ~flags:[`PARTIAL] ~workspace "12222";; 117 | Exception: Pcre.Error Partial. 118 | utop # pcre_dfa_exec ~rex ~flags:[`PARTIAL; `DFA_RESTART] ~workspace "2222222";; 119 | Exception: Pcre.Error Partial. 120 | utop # pcre_dfa_exec ~rex ~flags:[`PARTIAL; `DFA_RESTART] ~workspace "2222222";; 121 | Exception: Pcre.Error Partial. 122 | utop # pcre_dfa_exec ~rex ~flags:[`PARTIAL; `DFA_RESTART] ~workspace "223xxxx";; 123 | - : int array = [|0; 3; 0|] 124 | ``` 125 | 126 | Refer to the `pcre_dfa_exec` documentation and the `dfa_restart` example for 127 | more information. 128 | 129 | ## Contact Information and Contributing 130 | 131 | Submit bug reports, feature requests, and contributions via the 132 | [GitHub issue tracker](https://github.com/mmottl/pcre-ocaml/issues). 133 | 134 | For the latest information, visit: 135 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- 1 | (env 2 | (dev 3 | (flags 4 | (:standard -w -9 -principal)) 5 | (c_flags 6 | (:standard -Wall -pedantic -Wextra -Wunused))) 7 | (release 8 | (ocamlopt_flags 9 | (:standard -O3)))) 10 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.7) 2 | 3 | (name pcre) 4 | 5 | (generate_opam_files true) 6 | 7 | (source 8 | (github mmottl/pcre-ocaml)) 9 | 10 | (license "LGPL-2.1-or-later WITH OCaml-LGPL-linking-exception") 11 | 12 | (homepage "https://mmottl.github.io/pcre-ocaml") 13 | 14 | (documentation "https://mmottl.github.io/pcre-ocaml/api") 15 | 16 | (maintainers "Markus Mottl ") 17 | 18 | (authors "Markus Mottl ") 19 | 20 | (package 21 | (name pcre) 22 | (synopsis "Bindings to the Perl Compatibility Regular Expressions library") 23 | (description 24 | "pcre-ocaml offers library functions for string pattern matching and\nsubstitution, similar to the functionality offered by the Perl language.") 25 | (depends 26 | (ocaml 27 | (>= 4.08)) 28 | dune-configurator 29 | (conf-libpcre :build) 30 | (ounit2 :with-test))) 31 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS = $(addsuffix .bc, cloc count_hash dfa_restart pcregrep subst) 2 | 3 | .PHONY: all clean 4 | 5 | all: 6 | @dune build $(TARGETS) 7 | 8 | clean: 9 | @dune clean 10 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## `cloc` 4 | 5 | This program reads C source code from `stdin` and outputs it to `stdout` 6 | with comments and empty lines removed. It's useful for counting lines of code. 7 | 8 | ## `count_hash` 9 | 10 | This program reads text from `stdin`, counts occurrences of identical words 11 | separated by whitespace, and prints the result to `stdout`. 12 | 13 | ## `pcregrep` 14 | 15 | A grep-like program using Perl-compatible regular expressions. Start the 16 | program with the `-help` argument to see its functionality. 17 | 18 | ## `subst` 19 | 20 | Substitutes text in files using Perl-compatible regular expressions and 21 | substitution patterns. Start the program with the `-help` argument to see 22 | its functionality. 23 | 24 | Example invocation: 25 | 26 | ```sh 27 | subst '([Tt])ermcap' '$1ermCap' < /etc/termcap 28 | ``` 29 | 30 | ## `dfa_restart` 31 | 32 | Tests the DFA matching function and its partial match restart capability. 33 | Given a pattern, it accepts input incrementally, restarting the prior 34 | partial match until the pattern either succeeds or fails. 35 | 36 | Example interaction: 37 | 38 | ```sh 39 | $ dfa_restart.exe 'abc12+3' 40 | > abc 41 | partial match, provide more input: 42 | > 122222 43 | partial match, provide more input: 44 | > 222 45 | partial match, provide more input: 46 | > 3 47 | match completed: "[|0;1;0|]" 48 | ``` 49 | -------------------------------------------------------------------------------- /examples/cloc.ml: -------------------------------------------------------------------------------- 1 | open Pcre 2 | 3 | let read_whole_channel ch = 4 | let size = 4096 in 5 | let strbuf = Bytes.create size in 6 | let buf = Buffer.create 65536 in 7 | let len = ref size in 8 | while !len <> 0 do 9 | len := input ch strbuf 0 size; 10 | Buffer.add_subbytes buf strbuf 0 !len 11 | done; 12 | Buffer.contents buf 13 | 14 | let () = 15 | let str = read_whole_channel stdin in 16 | let str = qreplace ~pat:"/\\*(.|\n)*?\\*/" str in 17 | let str = qreplace_first ~pat:"^(\n|\\s)+" str in 18 | let str = qreplace ~pat:"\n+((\n|\\s)\n)*" ~templ:"\n" str in 19 | print_string str 20 | -------------------------------------------------------------------------------- /examples/count_hash.ml: -------------------------------------------------------------------------------- 1 | open Hashtbl 2 | 3 | let hash = create 1973 4 | let add_string s = try incr (find hash s) with Not_found -> add hash s (ref 1) 5 | ;; 6 | 7 | Pcre.foreach_line (fun line -> List.iter add_string (Pcre.split line)); 8 | iter (fun k v -> Printf.printf "%4d\t%s\n" !v k) hash 9 | -------------------------------------------------------------------------------- /examples/dfa_restart.ml: -------------------------------------------------------------------------------- 1 | open Pcre 2 | open Printf 3 | 4 | let show_array arr = 5 | Array.map string_of_int arr 6 | |> Array.to_list |> String.concat ";" |> sprintf "[|%s|]" 7 | 8 | let new_workspace () = Array.make 50 0 9 | 10 | let () = 11 | let pat = 12 | if Array.length Sys.argv > 1 then Sys.argv.(1) 13 | else ( 14 | eprintf "%s: expected pattern argument\n" Sys.argv.(0); 15 | exit 1) 16 | in 17 | let rex = regexp pat in 18 | let rec find_match flags workspace = 19 | print_string "> "; 20 | let line, eof = 21 | try (read_line (), false) with End_of_file -> ("", true) 22 | in 23 | match pcre_dfa_exec ~rex ~flags ~workspace line with 24 | | res -> 25 | printf "match completed: %S\n" (show_array res); 26 | if not eof then ( 27 | printf "\n *input & workspace reset*\n"; 28 | find_match [ `PARTIAL ] (new_workspace ())) 29 | | exception Error Partial -> 30 | printf "partial match, provide more input:\n"; 31 | find_match [ `DFA_RESTART; `PARTIAL ] workspace 32 | | exception exn -> 33 | (match exn with 34 | | Not_found -> eprintf "pattern match failed\n" 35 | | Error WorkspaceSize -> eprintf "need larger workspace vector\n" 36 | | Error (InternalError s) -> eprintf "internal error: %s\n" s 37 | | exn -> raise exn); 38 | exit 1 39 | in 40 | find_match [ `PARTIAL ] (new_workspace ()) 41 | -------------------------------------------------------------------------------- /examples/dune: -------------------------------------------------------------------------------- 1 | (executables 2 | (names cloc count_hash pcregrep subst dfa_restart) 3 | (libraries pcre) 4 | (modes byte exe)) 5 | -------------------------------------------------------------------------------- /examples/pcregrep.ml: -------------------------------------------------------------------------------- 1 | open Pcre 2 | open Printf 3 | 4 | let filenames = ref true 5 | and filenames_only = ref false 6 | and count_only = ref false 7 | and invert = ref false 8 | and number = ref false 9 | and silent = ref false 10 | and whole_lines = ref false 11 | 12 | let parse_args () = 13 | let ignore_case = ref false and pat = ref None and files = ref [] in 14 | 15 | let c = ("-c", Arg.Set count_only, "Count lines only.") 16 | and h = 17 | ( "-h", 18 | Arg.Clear filenames, 19 | "Suppress printing of filenames when searching multiple files." ) 20 | and i = ("-i", Arg.Set ignore_case, "Ignore case.") 21 | and l = 22 | ( "-l", 23 | Arg.Set filenames_only, 24 | "Only print names of files containing matching lines (once)." ) 25 | and n = 26 | ("-n", Arg.Set number, "Precede each line by its line number in the file.") 27 | and s = 28 | ( "-s", 29 | Arg.Set silent, 30 | "Display nothing but error messages. Exit status indicates match." ) 31 | and v = 32 | ("-v", Arg.Set invert, "Invert sense of the match: finds nonmatching lines.") 33 | and x = 34 | ( "-x", 35 | Arg.Set whole_lines, 36 | "Force the pattern to be anchored and to match the entire line." ) 37 | and usage = 38 | "Usage: pcregrep [options] pattern [file] ...\n\n\ 39 | Searches files for character patterns.\n" 40 | and anon_arg arg = 41 | if !pat = None then pat := Some arg else files := arg :: !files 42 | in 43 | 44 | let args = [ c; h; i; l; n; s; v; x ] in 45 | Arg.parse args anon_arg usage; 46 | 47 | let flags = 48 | let flag_list = if !ignore_case then [ `CASELESS ] else [] in 49 | if !whole_lines then `ANCHORED :: flag_list else flag_list 50 | in 51 | 52 | let rex = 53 | match !pat with 54 | | Some pat -> regexp ~flags pat 55 | | None -> 56 | eprintf "%s: not enough arguments!\n" Sys.argv.(0); 57 | Arg.usage args usage; 58 | exit 2 59 | in 60 | (rex, List.rev !files) 61 | 62 | let _ = 63 | let rex, files = parse_args () and rfl = rflags [] in 64 | 65 | let _, ovector = make_ovector rex in 66 | 67 | let pcregrep file name = 68 | let ret_code = ref 1 69 | and linenumber = ref 0 70 | and count = ref 0 71 | and stdin_print_name () = 72 | match name with 73 | | Some filename -> print_endline filename 74 | | None -> print_endline "" 75 | and print_name () = 76 | match name with Some name -> printf "%s:" name | None -> () 77 | in 78 | 79 | let try_match line = 80 | let matched = 81 | try 82 | unsafe_pcre_exec rfl rex ~pos:0 ~subj_start:0 ~subj:line ovector None; 83 | if !whole_lines && ovector.(1) <> String.length line then false 84 | else true 85 | with Not_found -> false 86 | in 87 | 88 | incr linenumber; 89 | 90 | if matched <> !invert then ( 91 | if !count_only then incr count 92 | else if !filenames_only then ( 93 | stdin_print_name (); 94 | raise Exit) 95 | else if !silent then raise Exit 96 | else ( 97 | print_name (); 98 | if !number then printf "%d:" !linenumber; 99 | print_endline line); 100 | ret_code := 0) 101 | in 102 | 103 | try 104 | foreach_line ~ic:file try_match; 105 | if !count_only then ( 106 | print_name (); 107 | printf "%d\n" !count); 108 | !ret_code 109 | with Exit -> 0 110 | in 111 | 112 | if files = [] then exit (pcregrep stdin None); 113 | 114 | if List.length files = 1 then filenames := false; 115 | if !filenames_only then filenames := true; 116 | 117 | let collect ret_code filename = 118 | try 119 | let file = open_in filename in 120 | let frc = pcregrep file (if !filenames then Some filename else None) in 121 | close_in file; 122 | if frc = 0 && ret_code = 1 then 0 else ret_code 123 | with Sys_error msg -> 124 | prerr_endline msg; 125 | 2 126 | in 127 | exit (List.fold_left collect 1 files) 128 | -------------------------------------------------------------------------------- /examples/subst.ml: -------------------------------------------------------------------------------- 1 | open Pcre 2 | 3 | let parse_args () = 4 | let quick = ref false 5 | and first = ref false 6 | and ignore_case = ref false 7 | and offset = ref 0 8 | and pat = ref None 9 | and substr = ref None in 10 | 11 | let q = 12 | ( "-q", 13 | Arg.Set quick, 14 | "Quick replacement. Interpretes substitution as plain text." ) 15 | and f = ("-f", Arg.Set first, "Replace first occurrence in line only.") 16 | and i = ("-i", Arg.Set ignore_case, "Ignore case.") 17 | and ofs = 18 | ("-ofs", Arg.Int (fun n -> offset := n), "Start matching at column n.") 19 | and usage = 20 | "Usage: subst [-q] [-f] [-i] [-ofs offset] pattern substitution\n\n\ 21 | Reads lines from standard input and replaces occurrences of\n\ 22 | the PERL-style regular expression \"pattern\" with \"substitution\",\n\ 23 | printing the result to standard output.\n\ 24 | In default mode the contents of \"substitution\" will be interpreted\n\ 25 | similarly to its equivalent in PERL.\n" 26 | and anon_arg arg = 27 | match (!pat, !substr) with 28 | | None, _ -> pat := Some arg 29 | | _, None -> substr := Some arg 30 | | _ -> raise (Arg.Bad "too many arguments!") 31 | in 32 | 33 | let args = [ q; f; i; ofs ] in 34 | Arg.parse args anon_arg usage; 35 | 36 | let flags = if !ignore_case then [ `CASELESS ] else [] in 37 | 38 | let rex, sstr = 39 | match (!pat, !substr) with 40 | | Some rex, Some sstr -> (regexp ~flags rex, sstr) 41 | | _ -> 42 | prerr_endline (Sys.argv.(0) ^ ": not enough arguments!"); 43 | Arg.usage args usage; 44 | exit 1 45 | in 46 | 47 | match (!quick, !first) with 48 | | false, false -> fun s -> replace ~rex ~pos:!offset ~templ:sstr s 49 | | true, false -> fun s -> qreplace ~rex ~pos:!offset ~templ:sstr s 50 | | false, true -> fun s -> replace_first ~rex ~pos:!offset ~templ:sstr s 51 | | true, true -> fun s -> qreplace_first ~rex ~pos:!offset ~templ:sstr s 52 | 53 | let _ = 54 | let substitute = parse_args () in 55 | foreach_line (fun line -> 56 | try print_endline (substitute line) 57 | with Invalid_argument _ -> print_endline line) 58 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS = pcre.cma libpcre_stubs.a 2 | 3 | .PHONY: all clean 4 | 5 | all: 6 | @dune build $(TARGETS) 7 | 8 | clean: 9 | @dune clean 10 | -------------------------------------------------------------------------------- /lib/config/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS = discover.bc 2 | 3 | .PHONY: all clean 4 | 5 | all: 6 | @dune build $(TARGETS) 7 | 8 | clean: 9 | @dune clean 10 | -------------------------------------------------------------------------------- /lib/config/discover.ml: -------------------------------------------------------------------------------- 1 | let () = 2 | let module C = Configurator.V1 in 3 | C.main ~name:"pcre" (fun c -> 4 | let default : C.Pkg_config.package_conf = 5 | { libs = [ "-lpcre" ]; cflags = [] } 6 | in 7 | let conf = 8 | match C.Pkg_config.get c with 9 | | None -> default 10 | | Some pc -> 11 | Option.value (C.Pkg_config.query pc ~package:"libpcre") ~default 12 | in 13 | C.Flags.write_sexp "c_flags.sexp" conf.cflags; 14 | C.Flags.write_sexp "c_library_flags.sexp" conf.libs) 15 | -------------------------------------------------------------------------------- /lib/config/dune: -------------------------------------------------------------------------------- 1 | (executables 2 | (names discover) 3 | (libraries dune.configurator) 4 | (modes byte exe)) 5 | -------------------------------------------------------------------------------- /lib/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (public_name pcre) 3 | (foreign_stubs 4 | (language c) 5 | (names pcre_stubs) 6 | (flags 7 | (:standard) 8 | (:include c_flags.sexp) 9 | -O2 10 | -fPIC 11 | -DPIC)) 12 | (c_library_flags 13 | (:include c_library_flags.sexp))) 14 | 15 | (rule 16 | (targets c_flags.sexp c_library_flags.sexp) 17 | (action 18 | (run ./config/discover.exe))) 19 | -------------------------------------------------------------------------------- /lib/pcre.ml: -------------------------------------------------------------------------------- 1 | (* PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml 2 | 3 | Copyright © 1999- Markus Mottl 4 | 5 | This library is free software; you can redistribute it and/or modify it under 6 | the terms of the GNU Lesser General Public License as published by the Free 7 | Software Foundation; either version 2.1 of the License, or (at your option) 8 | any later version. 9 | 10 | This library is distributed in the hope that it will be useful, but WITHOUT 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | details. 14 | 15 | You should have received a copy of the GNU Lesser General Public License 16 | along with this library; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *) 18 | 19 | (* Public exceptions and their registration with the C runtime *) 20 | 21 | open Printf 22 | 23 | type error = 24 | | Partial 25 | | BadPartial 26 | | BadPattern of string * int 27 | | BadUTF8 28 | | BadUTF8Offset 29 | | MatchLimit 30 | | RecursionLimit 31 | | WorkspaceSize 32 | | InternalError of string 33 | 34 | let string_of_error = function 35 | | Partial -> "Partial" 36 | | BadPartial -> "BadPartial" 37 | | BadPattern (msg, pos) -> sprintf "Pcre.BadPattern(%S, pos=%i)" msg pos 38 | | BadUTF8 -> "BadUTF8" 39 | | BadUTF8Offset -> "BadUTF8Offset" 40 | | MatchLimit -> "MatchLimit" 41 | | RecursionLimit -> "RecursionLimit" 42 | | WorkspaceSize -> "WorkspaceSize" 43 | | InternalError msg -> sprintf "InternalError(%S)" msg 44 | 45 | exception Error of error 46 | exception Backtrack 47 | exception Regexp_or of string * error 48 | 49 | let string_of_exn = function 50 | | Error error -> Some (sprintf "Pcre.Error(%s)" (string_of_error error)) 51 | | Backtrack -> Some "Pcre.Backtrack" 52 | | Regexp_or (pat, error) -> 53 | Some (sprintf "Pcre.Regexp_or(pat=%S, %s)" pat (string_of_error error)) 54 | | _not_from_pcre -> None 55 | 56 | let () = Printexc.register_printer string_of_exn 57 | 58 | (* Puts exceptions into global C-variables for fast retrieval *) 59 | external pcre_ocaml_init : unit -> unit = "pcre_ocaml_init" 60 | 61 | (* Registers exceptions with the C runtime and caches polymorphic variants *) 62 | let () = 63 | Callback.register_exception "Pcre.Error" (Error (InternalError "")); 64 | Callback.register_exception "Pcre.Backtrack" Backtrack; 65 | pcre_ocaml_init () 66 | 67 | (* Compilation and runtime flags and their conversion functions *) 68 | 69 | type icflag = int 70 | type irflag = int 71 | 72 | (* Compilation flags *) 73 | 74 | type cflag = 75 | [ `CASELESS 76 | | `MULTILINE 77 | | `DOTALL 78 | | `EXTENDED 79 | | `ANCHORED 80 | | `DOLLAR_ENDONLY 81 | | `EXTRA 82 | | `UNGREEDY 83 | | `UTF8 84 | | `NO_UTF8_CHECK 85 | | `NO_AUTO_CAPTURE 86 | | `AUTO_CALLOUT 87 | | `FIRSTLINE ] 88 | 89 | let int_of_cflag = function 90 | | `CASELESS -> 0x0001 91 | | `MULTILINE -> 0x0002 92 | | `DOTALL -> 0x0004 93 | | `EXTENDED -> 0x0008 94 | | `ANCHORED -> 0x0010 95 | | `DOLLAR_ENDONLY -> 0x0020 96 | | `EXTRA -> 0x0040 97 | | `UNGREEDY -> 0x0200 98 | | `UTF8 -> 0x0800 99 | | `NO_AUTO_CAPTURE -> 0x1000 100 | | `NO_UTF8_CHECK -> 0x2000 101 | | `AUTO_CALLOUT -> 0x4000 102 | | `FIRSTLINE -> 0x40000 103 | 104 | let coll_icflag icflag flag = int_of_cflag flag lor icflag 105 | let cflags flags = List.fold_left coll_icflag 0 flags 106 | 107 | let cflag_of_int = function 108 | | 0x0001 -> `CASELESS 109 | | 0x0002 -> `MULTILINE 110 | | 0x0004 -> `DOTALL 111 | | 0x0008 -> `EXTENDED 112 | | 0x0010 -> `ANCHORED 113 | | 0x0020 -> `DOLLAR_ENDONLY 114 | | 0x0040 -> `EXTRA 115 | | 0x0200 -> `UNGREEDY 116 | | 0x0800 -> `UTF8 117 | | 0x1000 -> `NO_AUTO_CAPTURE 118 | | 0x2000 -> `NO_UTF8_CHECK 119 | | 0x4000 -> `AUTO_CALLOUT 120 | | 0x40000 -> `FIRSTLINE 121 | | _ -> failwith "Pcre.cflag_list: unknown compilation flag" 122 | 123 | let all_cflags = 124 | [ 125 | 0x0001; 126 | 0x0002; 127 | 0x0004; 128 | 0x0008; 129 | 0x0010; 130 | 0x0020; 131 | 0x0040; 132 | 0x0200; 133 | 0x0800; 134 | 0x1000; 135 | 0x2000; 136 | 0x4000; 137 | 0x40000; 138 | ] 139 | 140 | let cflag_list icflags = 141 | let coll flag_list flag = 142 | if icflags land flag <> 0 then cflag_of_int flag :: flag_list else flag_list 143 | in 144 | List.fold_left coll [] all_cflags 145 | 146 | (* Runtime flags *) 147 | 148 | type rflag = 149 | [ `ANCHORED | `NOTBOL | `NOTEOL | `NOTEMPTY | `PARTIAL | `DFA_RESTART ] 150 | 151 | let int_of_rflag = function 152 | | `ANCHORED -> 0x00010 153 | | `NOTBOL -> 0x00080 154 | | `NOTEOL -> 0x00100 155 | | `NOTEMPTY -> 0x00400 156 | | `PARTIAL -> 0x08000 157 | | `DFA_RESTART -> 0x20000 158 | 159 | let coll_irflag irflag flag = int_of_rflag flag lor irflag 160 | let rflags flags = List.fold_left coll_irflag 0 flags 161 | 162 | let rflag_of_int = function 163 | | 0x00010 -> `ANCHORED 164 | | 0x00080 -> `NOTBOL 165 | | 0x00100 -> `NOTEOL 166 | | 0x00400 -> `NOTEMPTY 167 | | 0x08000 -> `PARTIAL 168 | | 0x20000 -> `DFA_RESTART 169 | | _ -> failwith "Pcre.rflag_list: unknown runtime flag" 170 | 171 | let all_rflags = [ 0x0010; 0x0080; 0x0100; 0x0400; 0x8000; 0x20000 ] 172 | 173 | let rflag_list irflags = 174 | let coll flag_list flag = 175 | if irflags land flag <> 0 then rflag_of_int flag :: flag_list else flag_list 176 | in 177 | List.fold_left coll [] all_rflags 178 | 179 | (* Information on the PCRE-configuration (build-time options) *) 180 | 181 | external pcre_version : unit -> string = "pcre_version_stub" 182 | external pcre_config_utf8 : unit -> bool = "pcre_config_utf8_stub" [@@noalloc] 183 | 184 | external pcre_config_newline : unit -> char = "pcre_config_newline_stub" 185 | [@@noalloc] 186 | 187 | external pcre_config_link_size : unit -> (int[@untagged]) 188 | = "pcre_config_link_size_stub_bc" "pcre_config_link_size_stub" 189 | [@@noalloc] 190 | 191 | external pcre_config_match_limit : unit -> (int[@untagged]) 192 | = "pcre_config_match_limit_stub_bc" "pcre_config_match_limit_stub" 193 | [@@noalloc] 194 | 195 | external pcre_config_match_limit_recursion : unit -> (int[@untagged]) 196 | = "pcre_config_match_limit_recursion_stub_bc" 197 | "pcre_config_match_limit_recursion_stub" 198 | [@@noalloc] 199 | 200 | external pcre_config_stackrecurse : unit -> bool 201 | = "pcre_config_stackrecurse_stub" 202 | [@@noalloc] 203 | 204 | let version = pcre_version () 205 | let config_utf8 = pcre_config_utf8 () 206 | let config_newline = pcre_config_newline () 207 | let config_link_size = pcre_config_link_size () 208 | let config_match_limit = pcre_config_match_limit () 209 | let config_match_limit_recursion = pcre_config_match_limit_recursion () 210 | let config_stackrecurse = pcre_config_stackrecurse () 211 | 212 | (* Information on patterns *) 213 | 214 | type firstbyte_info = [ `Char of char | `Start_only | `ANCHORED ] 215 | type study_stat = [ `Not_studied | `Studied | `Optimal ] 216 | type regexp 217 | 218 | external options : regexp -> (icflag[@untagged]) 219 | = "pcre_options_stub_bc" "pcre_options_stub" 220 | 221 | external size : regexp -> (int[@untagged]) 222 | = "pcre_size_stub_bc" "pcre_size_stub" 223 | 224 | external studysize : regexp -> (int[@untagged]) 225 | = "pcre_studysize_stub_bc" "pcre_studysize_stub" 226 | 227 | external capturecount : regexp -> (int[@untagged]) 228 | = "pcre_capturecount_stub_bc" "pcre_capturecount_stub" 229 | 230 | external backrefmax : regexp -> (int[@untagged]) 231 | = "pcre_backrefmax_stub_bc" "pcre_backrefmax_stub" 232 | 233 | external namecount : regexp -> (int[@untagged]) 234 | = "pcre_namecount_stub_bc" "pcre_namecount_stub" 235 | 236 | external nameentrysize : regexp -> (int[@untagged]) 237 | = "pcre_nameentrysize_stub_bc" "pcre_nameentrysize_stub" 238 | 239 | external names : regexp -> string array = "pcre_names_stub" 240 | external firstbyte : regexp -> firstbyte_info = "pcre_firstbyte_stub" 241 | external firsttable : regexp -> string option = "pcre_firsttable_stub" 242 | external lastliteral : regexp -> char option = "pcre_lastliteral_stub" 243 | external study_stat : regexp -> study_stat = "pcre_study_stat_stub" [@@noalloc] 244 | 245 | (* Compilation of patterns *) 246 | 247 | type chtables 248 | 249 | external maketables : unit -> chtables = "pcre_maketables_stub" 250 | 251 | (* Internal use only! *) 252 | external pcre_study : regexp -> jit_compile:bool -> unit = "pcre_study_stub" 253 | 254 | external compile : (icflag[@untagged]) -> chtables option -> string -> regexp 255 | = "pcre_compile_stub_bc" "pcre_compile_stub" 256 | 257 | external get_match_limit : regexp -> int option = "pcre_get_match_limit_stub" 258 | 259 | external get_match_limit_recursion : regexp -> int option 260 | = "pcre_get_match_limit_recursion_stub" 261 | 262 | (* Internal use only! *) 263 | external set_imp_match_limit : regexp -> (int[@untagged]) -> regexp 264 | = "pcre_set_imp_match_limit_stub_bc" "pcre_set_imp_match_limit_stub" 265 | [@@noalloc] 266 | 267 | (* Internal use only! *) 268 | external set_imp_match_limit_recursion : regexp -> (int[@untagged]) -> regexp 269 | = "pcre_set_imp_match_limit_recursion_stub_bc" 270 | "pcre_set_imp_match_limit_recursion_stub" 271 | [@@noalloc] 272 | 273 | let regexp ?(study = true) ?(jit_compile = false) ?limit ?limit_recursion 274 | ?(iflags = 0) ?flags ?chtables pat = 275 | let rex = 276 | match flags with 277 | | Some flag_list -> compile (cflags flag_list) chtables pat 278 | | _ -> compile iflags chtables pat 279 | in 280 | if study then pcre_study ~jit_compile rex; 281 | let rex = 282 | match limit with None -> rex | Some lim -> set_imp_match_limit rex lim 283 | in 284 | match limit_recursion with 285 | | None -> rex 286 | | Some lim -> set_imp_match_limit_recursion rex lim 287 | 288 | let regexp_or ?study ?jit_compile ?limit ?limit_recursion ?(iflags = 0) ?flags 289 | ?chtables pats = 290 | let check pat = 291 | try ignore (regexp ~study:false ~iflags ?flags ?chtables pat) 292 | with Error error -> raise (Regexp_or (pat, error)) 293 | in 294 | List.iter check pats; 295 | let big_pat = 296 | let cnv pat = "(?:" ^ pat ^ ")" in 297 | String.concat "|" (List.rev (List.rev_map cnv pats)) 298 | in 299 | regexp ?study ?jit_compile ?limit ?limit_recursion ~iflags ?flags ?chtables 300 | big_pat 301 | 302 | let bytes_unsafe_blit_string str str_ofs bts bts_ofs len = 303 | let str_bts = Bytes.unsafe_of_string str in 304 | Bytes.unsafe_blit str_bts str_ofs bts bts_ofs len 305 | 306 | let string_unsafe_sub str ofs len = 307 | let res = Bytes.create len in 308 | bytes_unsafe_blit_string str ofs res 0 len; 309 | Bytes.unsafe_to_string res 310 | 311 | let quote s = 312 | let len = String.length s in 313 | let buf = Bytes.create (len lsl 1) in 314 | let pos = ref 0 in 315 | for i = 0 to len - 1 do 316 | match String.unsafe_get s i with 317 | | ('\\' | '^' | '$' | '.' | '[' | '|' | '(' | ')' | '?' | '*' | '+' | '{') 318 | as c -> 319 | Bytes.unsafe_set buf !pos '\\'; 320 | incr pos; 321 | Bytes.unsafe_set buf !pos c; 322 | incr pos 323 | | c -> 324 | Bytes.unsafe_set buf !pos c; 325 | incr pos 326 | done; 327 | string_unsafe_sub (Bytes.unsafe_to_string buf) 0 !pos 328 | 329 | (* Matching of patterns and subpattern extraction *) 330 | 331 | (* Default regular expression when none is provided by the user *) 332 | let def_rex = regexp ~jit_compile:true "\\s+" 333 | 334 | type substrings = string * int array 335 | 336 | type callout_data = { 337 | callout_number : int; 338 | substrings : substrings; 339 | start_match : int; 340 | current_position : int; 341 | capture_top : int; 342 | capture_last : int; 343 | pattern_position : int; 344 | next_item_length : int; 345 | } 346 | 347 | type callout = callout_data -> unit 348 | 349 | let get_subject (subj, _) = subj 350 | let num_of_subs (_, ovector) = Array.length ovector / 3 351 | 352 | let get_offset_start ovector str_num = 353 | if str_num < 0 || str_num >= Array.length ovector / 3 then 354 | invalid_arg "Pcre.get_offset_start: illegal offset"; 355 | let offset = str_num lsl 1 in 356 | (offset, Array.unsafe_get ovector offset) 357 | 358 | let get_substring_aux (subj, ovector) offset start = 359 | if start < 0 then raise Not_found 360 | else 361 | string_unsafe_sub subj start (Array.unsafe_get ovector (offset + 1) - start) 362 | 363 | let get_substring ((_, ovector) as substrings) str_num = 364 | let offset, start = get_offset_start ovector str_num in 365 | get_substring_aux substrings offset start 366 | 367 | let get_substring_ofs (_subj, ovector) str_num = 368 | let offset, start = get_offset_start ovector str_num in 369 | if start < 0 then raise Not_found 370 | else (start, Array.unsafe_get ovector (offset + 1)) 371 | 372 | let unsafe_get_substring ((_, ovector) as substrings) str_num = 373 | let offset = str_num lsl 1 in 374 | try get_substring_aux substrings offset (Array.unsafe_get ovector offset) 375 | with Not_found -> "" 376 | 377 | let get_substrings ?(full_match = true) ((_, ovector) as substrings) = 378 | if full_match then 379 | Array.init (Array.length ovector / 3) (unsafe_get_substring substrings) 380 | else 381 | let len = (Array.length ovector / 3) - 1 in 382 | Array.init len (fun n -> unsafe_get_substring substrings (n + 1)) 383 | 384 | let unsafe_get_opt_substring ((_, ovector) as substrings) str_num = 385 | let offset = str_num lsl 1 in 386 | try 387 | let start = Array.unsafe_get ovector offset in 388 | let str = get_substring_aux substrings offset start in 389 | Some str 390 | with Not_found -> None 391 | 392 | let get_opt_substrings ?(full_match = true) ((_, ovector) as substrings) = 393 | if full_match then 394 | Array.init (Array.length ovector / 3) (unsafe_get_opt_substring substrings) 395 | else 396 | let len = (Array.length ovector / 3) - 1 in 397 | Array.init len (fun n -> unsafe_get_opt_substring substrings (n + 1)) 398 | 399 | external get_stringnumber : regexp -> string -> (int[@untagged]) 400 | = "pcre_get_stringnumber_stub_bc" "pcre_get_stringnumber_stub" 401 | 402 | let get_named_substring rex name substrings = 403 | get_substring substrings (get_stringnumber rex name) 404 | 405 | let get_named_substring_ofs rex name substrings = 406 | get_substring_ofs substrings (get_stringnumber rex name) 407 | 408 | external unsafe_pcre_exec : 409 | (irflag[@untagged]) -> 410 | regexp -> 411 | pos:(int[@untagged]) -> 412 | subj_start:(int[@untagged]) -> 413 | subj:string -> 414 | int array -> 415 | callout option -> 416 | unit = "pcre_exec_stub_bc" "pcre_exec_stub" 417 | 418 | let make_ovector rex = 419 | let subgroups1 = capturecount rex + 1 in 420 | let subgroups2 = subgroups1 lsl 1 in 421 | (subgroups2, Array.make (subgroups1 + subgroups2) 0) 422 | 423 | external unsafe_pcre_dfa_exec : 424 | (irflag[@untagged]) -> 425 | regexp -> 426 | pos:(int[@untagged]) -> 427 | subj_start:(int[@untagged]) -> 428 | subj:string -> 429 | int array -> 430 | callout option -> 431 | workspace:int array -> 432 | unit = "pcre_dfa_exec_stub_bc" "pcre_exec_stub0" 433 | 434 | let pcre_dfa_exec ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) ?callout 435 | ?(workspace = Array.make 20 0) subj = 436 | let rex = match pat with Some str -> regexp str | _ -> rex in 437 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 438 | let _, ovector = make_ovector rex in 439 | unsafe_pcre_dfa_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout 440 | ~workspace; 441 | ovector 442 | 443 | let pcre_exec ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) ?callout 444 | subj = 445 | let rex = match pat with Some str -> regexp str | _ -> rex in 446 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 447 | let _, ovector = make_ovector rex in 448 | unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout; 449 | ovector 450 | 451 | let exec ?iflags ?flags ?rex ?pat ?pos ?callout subj = 452 | (subj, pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj) 453 | 454 | let next_match ?iflags ?flags ?rex ?pat ?(pos = 0) ?callout (subj, ovector) = 455 | let pos = Array.unsafe_get ovector 1 + pos in 456 | let subj_len = String.length subj in 457 | if pos < 0 || pos > subj_len then 458 | invalid_arg "Pcre.next_match: illegal offset"; 459 | (subj, pcre_exec ?iflags ?flags ?rex ?pat ~pos ?callout subj) 460 | 461 | let rec copy_lst ar n = function 462 | | [] -> ar 463 | | h :: t -> 464 | Array.unsafe_set ar n h; 465 | copy_lst ar (n - 1) t 466 | 467 | let exec_all ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj = 468 | let rex = match pat with Some str -> regexp str | _ -> rex in 469 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 470 | let ((_, ovector) as sstrs) = exec ~iflags ~rex ?pos ?callout subj in 471 | let null_flags = iflags lor 0x0400 in 472 | let subj_len = String.length subj in 473 | let rec loop pos ((subj, ovector) as sstrs) n lst = 474 | let maybe_ovector = 475 | try 476 | let first = Array.unsafe_get ovector 0 in 477 | if first = pos && Array.unsafe_get ovector 1 = pos then 478 | if pos = subj_len then None 479 | else Some (pcre_exec ~iflags:null_flags ~rex ~pos ?callout subj) 480 | else Some (pcre_exec ~iflags ~rex ~pos ?callout subj) 481 | with Not_found -> None 482 | in 483 | match maybe_ovector with 484 | | Some ovector -> 485 | let new_pos = Array.unsafe_get ovector 1 in 486 | loop new_pos (subj, ovector) (n + 1) (sstrs :: lst) 487 | | None -> copy_lst (Array.make (n + 1) sstrs) (n - 1) lst 488 | in 489 | loop (Array.unsafe_get ovector 1) sstrs 0 [] 490 | 491 | let extract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj = 492 | get_substrings ?full_match (exec ?iflags ?flags ?rex ?pat ?pos ?callout subj) 493 | 494 | let extract_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj = 495 | get_opt_substrings ?full_match 496 | (exec ?iflags ?flags ?rex ?pat ?pos ?callout subj) 497 | 498 | let extract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj = 499 | let many_sstrs = exec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj in 500 | Array.map (get_substrings ?full_match) many_sstrs 501 | 502 | let extract_all_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj = 503 | let many_sstrs = exec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj in 504 | Array.map (get_opt_substrings ?full_match) many_sstrs 505 | 506 | let pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj = 507 | try 508 | ignore (pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj); 509 | true 510 | with Not_found -> false 511 | 512 | (* String substitution *) 513 | 514 | (* Elements of a substitution pattern *) 515 | type subst = 516 | | SubstString of int * int (* Denotes a substring in the substitution *) 517 | | Backref of int (* nth backreference ($0 is program name!) *) 518 | | Match (* The whole matched string *) 519 | | PreMatch (* The string before the match *) 520 | | PostMatch (* The string after the match *) 521 | | LastParenMatch (* The last matched group *) 522 | 523 | (* Information on substitution patterns *) 524 | type substitution = 525 | string (* The substitution string *) 526 | * int (* Highest group number of backreferences *) 527 | * bool (* Makes use of "LastParenMatch" *) 528 | * subst list (* The list of substitution elements *) 529 | 530 | (* Only used internally in "subst" *) 531 | exception FoundAt of int 532 | 533 | let zero = Char.code '0' 534 | 535 | let subst str = 536 | let max_br = ref 0 in 537 | let with_lp = ref false in 538 | let lix = String.length str - 1 in 539 | let rec loop acc n = 540 | if lix < n then acc 541 | else 542 | try 543 | for i = n to lix do 544 | if String.unsafe_get str i = '$' then raise (FoundAt i) 545 | done; 546 | SubstString (n, lix - n + 1) :: acc 547 | with FoundAt i -> ( 548 | if i = lix then SubstString (n, lix - n + 1) :: acc 549 | else 550 | let i1 = i + 1 in 551 | let acc = if n = i then acc else SubstString (n, i - n) :: acc in 552 | match String.unsafe_get str i1 with 553 | | '0' .. '9' as c -> ( 554 | let subpat_nr = ref (Char.code c - zero) in 555 | try 556 | for j = i1 + 1 to lix do 557 | let c = String.unsafe_get str j in 558 | if c >= '0' && c <= '9' then 559 | subpat_nr := (10 * !subpat_nr) + Char.code c - zero 560 | else raise (FoundAt j) 561 | done; 562 | max_br := max !subpat_nr !max_br; 563 | Backref !subpat_nr :: acc 564 | with FoundAt j -> 565 | max_br := max !subpat_nr !max_br; 566 | loop (Backref !subpat_nr :: acc) j) 567 | | '!' -> loop acc (i1 + 1) 568 | | '$' -> loop (SubstString (i1, 1) :: acc) (i1 + 1) 569 | | '&' -> loop (Match :: acc) (i1 + 1) 570 | | '`' -> loop (PreMatch :: acc) (i1 + 1) 571 | | '\'' -> loop (PostMatch :: acc) (i1 + 1) 572 | | '+' -> 573 | with_lp := true; 574 | loop (LastParenMatch :: acc) (i1 + 1) 575 | | _ -> loop acc i1) 576 | in 577 | let subst_lst = loop [] 0 in 578 | (str, !max_br, !with_lp, subst_lst) 579 | 580 | let def_subst = subst "" 581 | 582 | (* Calculates a list of tuples (str, offset, len) which contain substrings to be 583 | copied on substitutions. Internal use only! *) 584 | let calc_trans_lst subgroups2 ovector subj templ subst_lst = 585 | let prefix_len = Array.unsafe_get ovector 0 in 586 | let last = Array.unsafe_get ovector 1 in 587 | let coll ((res_len, trans_lst) as accu) = 588 | let return_lst ((_str, _ix, len) as el) = 589 | if len = 0 then accu else (res_len + len, el :: trans_lst) 590 | in 591 | function 592 | | SubstString (ix, len) -> return_lst (templ, ix, len) 593 | | Backref 0 -> 594 | let prog_name = Sys.argv.(0) in 595 | return_lst (prog_name, 0, String.length prog_name) 596 | | Backref n -> 597 | let offset = n lsl 1 in 598 | let start = Array.unsafe_get ovector offset in 599 | let len = Array.unsafe_get ovector (offset + 1) - start in 600 | return_lst (subj, start, len) 601 | | Match -> return_lst (subj, prefix_len, last - prefix_len) 602 | | PreMatch -> return_lst (subj, 0, prefix_len) 603 | | PostMatch -> return_lst (subj, last, String.length subj - last) 604 | | LastParenMatch -> 605 | let subgroups2_2 = subgroups2 - 2 in 606 | let pos = ref subgroups2_2 in 607 | let ix = ref (Array.unsafe_get ovector subgroups2_2) in 608 | while !ix < 0 do 609 | let pos_2 = !pos - 2 in 610 | pos := pos_2; 611 | ix := Array.unsafe_get ovector pos_2 612 | done; 613 | return_lst (subj, !ix, Array.unsafe_get ovector (!pos + 1) - !ix) 614 | in 615 | List.fold_left coll (0, []) subst_lst 616 | 617 | let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) 618 | ?(itempl = def_subst) ?templ ?callout subj = 619 | let rex = match pat with Some str -> regexp str | _ -> rex in 620 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 621 | let templ, max_br, with_lp, subst_lst = 622 | match templ with Some str -> subst str | _ -> itempl 623 | in 624 | let subj_len = String.length subj in 625 | if pos < 0 || pos > subj_len then invalid_arg "Pcre.replace: illegal offset"; 626 | let subgroups2, ovector = make_ovector rex in 627 | let nsubs = (subgroups2 lsr 1) - 1 in 628 | if max_br > nsubs then 629 | failwith "Pcre.replace: backreference denotes nonexistent subpattern"; 630 | if with_lp && nsubs = 0 then failwith "Pcre.replace: no backreferences"; 631 | let rec loop full_len trans_lsts cur_pos = 632 | if 633 | cur_pos > subj_len 634 | || 635 | try 636 | unsafe_pcre_exec iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector 637 | callout; 638 | false 639 | with Not_found -> true 640 | then ( 641 | let postfix_len = max (subj_len - cur_pos) 0 in 642 | let left = pos + full_len in 643 | let res = Bytes.create (left + postfix_len) in 644 | bytes_unsafe_blit_string subj 0 res 0 pos; 645 | bytes_unsafe_blit_string subj cur_pos res left postfix_len; 646 | let inner_coll ofs (templ, ix, len) = 647 | bytes_unsafe_blit_string templ ix res ofs len; 648 | ofs + len 649 | in 650 | let coll ofs (res_len, trans_lst) = 651 | let new_ofs = ofs - res_len in 652 | let _ = List.fold_left inner_coll new_ofs trans_lst in 653 | new_ofs 654 | in 655 | let _ = List.fold_left coll left trans_lsts in 656 | Bytes.unsafe_to_string res) 657 | else 658 | let first = Array.unsafe_get ovector 0 in 659 | let len = first - cur_pos in 660 | let ((res_len, _) as trans_lst_el) = 661 | calc_trans_lst subgroups2 ovector subj templ subst_lst 662 | in 663 | let trans_lsts = 664 | if len > 0 then 665 | trans_lst_el :: (len, [ (subj, cur_pos, len) ]) :: trans_lsts 666 | else trans_lst_el :: trans_lsts 667 | in 668 | let full_len = full_len + len + res_len in 669 | let next = first + 1 in 670 | let last = Array.unsafe_get ovector 1 in 671 | if last < next then 672 | if first < subj_len then 673 | let new_trans_lsts = 674 | (1, [ (subj, cur_pos + len, 1) ]) :: trans_lsts 675 | in 676 | loop (full_len + 1) new_trans_lsts next 677 | else loop full_len trans_lsts next 678 | else loop full_len trans_lsts last 679 | in 680 | loop 0 [] pos 681 | 682 | let qreplace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) ?(templ = "") 683 | ?callout subj = 684 | let rex = match pat with Some str -> regexp str | _ -> rex in 685 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 686 | let subj_len = String.length subj in 687 | if pos < 0 || pos > subj_len then invalid_arg "Pcre.qreplace: illegal offset"; 688 | let templ_len = String.length templ in 689 | let _, ovector = make_ovector rex in 690 | let rec loop full_len subst_lst cur_pos = 691 | if 692 | cur_pos > subj_len 693 | || 694 | try 695 | unsafe_pcre_exec iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector 696 | callout; 697 | false 698 | with Not_found -> true 699 | then ( 700 | let postfix_len = max (subj_len - cur_pos) 0 in 701 | let left = pos + full_len in 702 | let res = Bytes.create (left + postfix_len) in 703 | bytes_unsafe_blit_string subj 0 res 0 pos; 704 | bytes_unsafe_blit_string subj cur_pos res left postfix_len; 705 | let coll ofs = function 706 | | Some (substr, ix, len) -> 707 | let new_ofs = ofs - len in 708 | bytes_unsafe_blit_string substr ix res new_ofs len; 709 | new_ofs 710 | | None -> 711 | let new_ofs = ofs - templ_len in 712 | bytes_unsafe_blit_string templ 0 res new_ofs templ_len; 713 | new_ofs 714 | in 715 | let _ = List.fold_left coll left subst_lst in 716 | Bytes.unsafe_to_string res) 717 | else 718 | let first = Array.unsafe_get ovector 0 in 719 | let len = first - cur_pos in 720 | let subst_lst = 721 | if len > 0 then None :: Some (subj, cur_pos, len) :: subst_lst 722 | else None :: subst_lst 723 | in 724 | let last = Array.unsafe_get ovector 1 in 725 | let full_len = full_len + len + templ_len in 726 | let next = first + 1 in 727 | if last < next then 728 | if first < subj_len then 729 | loop (full_len + 1) (Some (subj, cur_pos + len, 1) :: subst_lst) next 730 | else loop full_len subst_lst next 731 | else loop full_len subst_lst last 732 | in 733 | loop 0 [] pos 734 | 735 | let substitute_substrings ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) 736 | ?callout ~subst subj = 737 | let rex = match pat with Some str -> regexp str | _ -> rex in 738 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 739 | let subj_len = String.length subj in 740 | if pos < 0 || pos > subj_len then 741 | invalid_arg "Pcre.substitute: illegal offset"; 742 | let _, ovector = make_ovector rex in 743 | let rec loop full_len subst_lst cur_pos = 744 | if 745 | cur_pos > subj_len 746 | || 747 | try 748 | unsafe_pcre_exec iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector 749 | callout; 750 | false 751 | with Not_found -> true 752 | then ( 753 | let postfix_len = max (subj_len - cur_pos) 0 in 754 | let left = pos + full_len in 755 | let res = Bytes.create (left + postfix_len) in 756 | bytes_unsafe_blit_string subj 0 res 0 pos; 757 | bytes_unsafe_blit_string subj cur_pos res left postfix_len; 758 | let coll ofs (templ, ix, len) = 759 | let new_ofs = ofs - len in 760 | bytes_unsafe_blit_string templ ix res new_ofs len; 761 | new_ofs 762 | in 763 | let _ = List.fold_left coll left subst_lst in 764 | Bytes.unsafe_to_string res) 765 | else 766 | let first = Array.unsafe_get ovector 0 in 767 | let len = first - cur_pos in 768 | let templ = subst (subj, ovector) in 769 | let templ_len = String.length templ in 770 | let subst_lst = 771 | if len > 0 then 772 | (templ, 0, templ_len) :: (subj, cur_pos, len) :: subst_lst 773 | else (templ, 0, templ_len) :: subst_lst 774 | in 775 | let last = Array.unsafe_get ovector 1 in 776 | let full_len = full_len + len + templ_len in 777 | let next = first + 1 in 778 | if last < next then 779 | if first < subj_len then 780 | loop (full_len + 1) ((subj, cur_pos + len, 1) :: subst_lst) next 781 | else loop full_len subst_lst next 782 | else loop full_len subst_lst last 783 | in 784 | loop 0 [] pos 785 | 786 | let substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst:str_subst subj = 787 | let subst (subj, ovector) = 788 | let first = Array.unsafe_get ovector 0 in 789 | let last = Array.unsafe_get ovector 1 in 790 | str_subst (string_unsafe_sub subj first (last - first)) 791 | in 792 | substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj 793 | 794 | let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) 795 | ?(itempl = def_subst) ?templ ?callout subj = 796 | let rex = match pat with Some str -> regexp str | _ -> rex in 797 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 798 | let templ, max_br, with_lp, subst_lst = 799 | match templ with Some str -> subst str | _ -> itempl 800 | in 801 | let subgroups2, ovector = make_ovector rex in 802 | let nsubs = (subgroups2 lsr 1) - 1 in 803 | if max_br > nsubs then 804 | failwith "Pcre.replace_first: backreference denotes nonexistent subpattern"; 805 | if with_lp && nsubs = 0 then failwith "Pcre.replace_first: no backreferences"; 806 | try 807 | unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout; 808 | let res_len, trans_lst = 809 | calc_trans_lst subgroups2 ovector subj templ subst_lst 810 | in 811 | let first = Array.unsafe_get ovector 0 in 812 | let last = Array.unsafe_get ovector 1 in 813 | let rest = String.length subj - last in 814 | let res = Bytes.create (first + res_len + rest) in 815 | bytes_unsafe_blit_string subj 0 res 0 first; 816 | let coll ofs (templ, ix, len) = 817 | bytes_unsafe_blit_string templ ix res ofs len; 818 | ofs + len 819 | in 820 | let ofs = List.fold_left coll first trans_lst in 821 | bytes_unsafe_blit_string subj last res ofs rest; 822 | Bytes.unsafe_to_string res 823 | with Not_found -> subj 824 | 825 | let qreplace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) 826 | ?(templ = "") ?callout subj = 827 | let rex = match pat with Some str -> regexp str | _ -> rex in 828 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 829 | let _, ovector = make_ovector rex in 830 | try 831 | unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout; 832 | let first = Array.unsafe_get ovector 0 in 833 | let last = Array.unsafe_get ovector 1 in 834 | let len = String.length templ in 835 | let rest = String.length subj - last in 836 | let postfix_start = first + len in 837 | let res = Bytes.create (postfix_start + rest) in 838 | bytes_unsafe_blit_string subj 0 res 0 first; 839 | bytes_unsafe_blit_string templ 0 res first len; 840 | bytes_unsafe_blit_string subj last res postfix_start rest; 841 | Bytes.unsafe_to_string res 842 | with Not_found -> subj 843 | 844 | let substitute_substrings_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat 845 | ?(pos = 0) ?callout ~subst subj = 846 | let rex = match pat with Some str -> regexp str | _ -> rex in 847 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 848 | let _, ovector = make_ovector rex in 849 | try 850 | unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout; 851 | let subj_len = String.length subj in 852 | let prefix_len = Array.unsafe_get ovector 0 in 853 | let last = Array.unsafe_get ovector 1 in 854 | let templ = subst (subj, ovector) in 855 | let postfix_len = subj_len - last in 856 | let templ_len = String.length templ in 857 | let postfix_start = prefix_len + templ_len in 858 | let res = Bytes.create (postfix_start + postfix_len) in 859 | bytes_unsafe_blit_string subj 0 res 0 prefix_len; 860 | bytes_unsafe_blit_string templ 0 res prefix_len templ_len; 861 | bytes_unsafe_blit_string subj last res postfix_start postfix_len; 862 | Bytes.unsafe_to_string res 863 | with Not_found -> subj 864 | 865 | let substitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst:str_subst 866 | subj = 867 | let subst (subj, ovector) = 868 | let first = Array.unsafe_get ovector 0 in 869 | let last = Array.unsafe_get ovector 1 in 870 | str_subst (string_unsafe_sub subj first (last - first)) 871 | in 872 | substitute_substrings_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj 873 | 874 | (* Splitting *) 875 | 876 | let internal_psplit flags rex max pos callout subj = 877 | let subj_len = String.length subj in 878 | if subj_len = 0 then [] 879 | else if max = 1 then [ subj ] 880 | else 881 | let subgroups2, ovector = make_ovector rex in 882 | 883 | (* Adds contents of subgroups to the string accumulator *) 884 | let handle_subgroups strs = 885 | let strs = ref strs in 886 | let i = ref 2 in 887 | while !i < subgroups2 do 888 | let first = Array.unsafe_get ovector !i in 889 | incr i; 890 | let last = Array.unsafe_get ovector !i in 891 | let str = 892 | if first < 0 then "" else string_unsafe_sub subj first (last - first) 893 | in 894 | strs := str :: !strs; 895 | incr i 896 | done; 897 | !strs 898 | in 899 | 900 | (* Performs the recursive split *) 901 | let rec loop strs cnt pos prematch = 902 | let len = subj_len - pos in 903 | if len < 0 then strs 904 | else if 905 | (* Checks termination due to max restriction *) 906 | cnt = 0 907 | then 908 | if 909 | prematch 910 | && 911 | try 912 | unsafe_pcre_exec flags rex ~pos ~subj_start:pos ~subj ovector 913 | callout; 914 | true 915 | with Not_found -> false 916 | then 917 | let last = Array.unsafe_get ovector 1 in 918 | let strs = handle_subgroups strs in 919 | string_unsafe_sub subj last (subj_len - last) :: strs 920 | else string_unsafe_sub subj pos len :: strs 921 | (* Calculates next accumulator state for splitting *) 922 | else if 923 | try 924 | unsafe_pcre_exec flags rex ~pos ~subj_start:pos ~subj ovector callout; 925 | false 926 | with Not_found -> true 927 | then string_unsafe_sub subj pos len :: strs 928 | else 929 | let first = Array.unsafe_get ovector 0 in 930 | let last = Array.unsafe_get ovector 1 in 931 | if first = pos then 932 | if last = pos then 933 | let strs = if prematch then handle_subgroups strs else strs in 934 | if len = 0 then "" :: strs 935 | else if 936 | try 937 | unsafe_pcre_exec (flags lor 0x0410) rex ~pos ~subj_start:pos 938 | ~subj ovector callout; 939 | true 940 | with Not_found -> false 941 | then 942 | let new_strs = handle_subgroups ("" :: strs) in 943 | loop new_strs (cnt - 1) (Array.unsafe_get ovector 1) false 944 | else 945 | let new_strs = string_unsafe_sub subj pos 1 :: strs in 946 | loop new_strs (cnt - 1) (pos + 1) true 947 | else if prematch then loop (handle_subgroups strs) cnt last false 948 | else loop (handle_subgroups ("" :: strs)) (cnt - 1) last false 949 | else 950 | let new_strs = string_unsafe_sub subj pos (first - pos) :: strs in 951 | loop (handle_subgroups new_strs) (cnt - 1) last false 952 | in 953 | loop [] (max - 1) pos false 954 | 955 | let rec strip_all_empty = function "" :: t -> strip_all_empty t | l -> l 956 | 957 | external isspace : char -> bool = "pcre_isspace_stub" [@@noalloc] 958 | 959 | let rec find_no_space ix len str = 960 | if ix = len || not (isspace (String.unsafe_get str ix)) then ix 961 | else find_no_space (ix + 1) len str 962 | 963 | let split ?(iflags = 0) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj = 964 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 965 | let res = 966 | match (pat, rex) with 967 | | Some str, _ -> internal_psplit iflags (regexp str) max pos callout subj 968 | | _, Some rex -> internal_psplit iflags rex max pos callout subj 969 | | _ -> 970 | (* special case for Perl-splitting semantics *) 971 | let len = String.length subj in 972 | if pos > len || pos < 0 then failwith "Pcre.split: illegal offset"; 973 | let new_pos = find_no_space pos len subj in 974 | internal_psplit iflags def_rex max new_pos callout subj 975 | in 976 | List.rev (if max = 0 then strip_all_empty res else res) 977 | 978 | let asplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj = 979 | Array.of_list (split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj) 980 | 981 | (* Full splitting *) 982 | 983 | type split_result = 984 | | Text of string 985 | | Delim of string 986 | | Group of int * string 987 | | NoGroup 988 | 989 | let rec strip_all_empty_full = function 990 | | Delim _ :: rest -> strip_all_empty_full rest 991 | | l -> l 992 | 993 | let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0) ?(max = 0) 994 | ?callout subj = 995 | let rex = match pat with Some str -> regexp str | _ -> rex in 996 | let iflags = match flags with Some flags -> rflags flags | _ -> iflags in 997 | let subj_len = String.length subj in 998 | if subj_len = 0 then [] 999 | else if max = 1 then [ Text subj ] 1000 | else 1001 | let subgroups2, ovector = make_ovector rex in 1002 | 1003 | (* Adds contents of subgroups to the string accumulator *) 1004 | let handle_subgroups strs = 1005 | let strs = ref strs in 1006 | let i = ref 2 in 1007 | while !i < subgroups2 do 1008 | let group_nr = !i lsr 1 in 1009 | let first = Array.unsafe_get ovector !i in 1010 | incr i; 1011 | let last = Array.unsafe_get ovector !i in 1012 | let str = 1013 | if first < 0 then NoGroup 1014 | else 1015 | let group_str = string_unsafe_sub subj first (last - first) in 1016 | Group (group_nr, group_str) 1017 | in 1018 | strs := str :: !strs; 1019 | incr i 1020 | done; 1021 | !strs 1022 | in 1023 | 1024 | (* Performs the recursive split *) 1025 | let rec loop strs cnt pos prematch = 1026 | let len = subj_len - pos in 1027 | if len < 0 then strs 1028 | else if 1029 | (* Checks termination due to max restriction *) 1030 | cnt = 0 1031 | then 1032 | if 1033 | prematch 1034 | && 1035 | try 1036 | unsafe_pcre_exec iflags rex ~pos ~subj_start:pos ~subj ovector 1037 | callout; 1038 | true 1039 | with Not_found -> false 1040 | then 1041 | let first = Array.unsafe_get ovector 0 in 1042 | let last = Array.unsafe_get ovector 1 in 1043 | let delim = Delim (string_unsafe_sub subj first (last - first)) in 1044 | Text (string_unsafe_sub subj last (subj_len - last)) 1045 | :: handle_subgroups (delim :: strs) 1046 | else if len = 0 then strs 1047 | else Text (string_unsafe_sub subj pos len) :: strs 1048 | (* Calculates next accumulator state for splitting *) 1049 | else if 1050 | try 1051 | unsafe_pcre_exec iflags rex ~pos ~subj_start:pos ~subj ovector callout; 1052 | false 1053 | with Not_found -> true 1054 | then 1055 | if len = 0 then strs else Text (string_unsafe_sub subj pos len) :: strs 1056 | else 1057 | let first = Array.unsafe_get ovector 0 in 1058 | let last = Array.unsafe_get ovector 1 in 1059 | if first = pos then 1060 | if last = pos then 1061 | if len = 0 then handle_subgroups (Delim "" :: strs) 1062 | else 1063 | let empty_groups = handle_subgroups [] in 1064 | if 1065 | try 1066 | unsafe_pcre_exec (iflags lor 0x0410) rex ~pos ~subj_start:pos 1067 | ~subj ovector callout; 1068 | true 1069 | with Not_found -> false 1070 | then 1071 | let first = Array.unsafe_get ovector 0 in 1072 | let last = Array.unsafe_get ovector 1 in 1073 | let delim = 1074 | Delim (string_unsafe_sub subj first (last - first)) 1075 | in 1076 | let new_strs = 1077 | let tmp_strs = 1078 | if prematch then strs else empty_groups @ (Delim "" :: strs) 1079 | in 1080 | handle_subgroups (delim :: tmp_strs) 1081 | in 1082 | loop new_strs (cnt - 1) last false 1083 | else 1084 | let new_strs = 1085 | (Text (string_unsafe_sub subj pos 1) :: empty_groups) 1086 | @ (Delim "" :: strs) 1087 | in 1088 | loop new_strs (cnt - 1) (pos + 1) true 1089 | else 1090 | let delim = Delim (string_unsafe_sub subj first (last - first)) in 1091 | loop (handle_subgroups (delim :: strs)) cnt last false 1092 | else 1093 | let delim = Delim (string_unsafe_sub subj first (last - first)) in 1094 | let pre_strs = 1095 | Text (string_unsafe_sub subj pos (first - pos)) :: strs 1096 | in 1097 | loop (handle_subgroups (delim :: pre_strs)) (cnt - 1) last false 1098 | in 1099 | let res = loop [] (max - 1) pos true in 1100 | List.rev (if max = 0 then strip_all_empty_full res else res) 1101 | 1102 | (* Additional convenience functions useful in combination with this library *) 1103 | 1104 | let foreach_line ?(ic = stdin) f = 1105 | try 1106 | while true do 1107 | f (input_line ic) 1108 | done 1109 | with End_of_file -> () 1110 | 1111 | let foreach_file filenames f = 1112 | let do_with_file filename = 1113 | let file = open_in filename in 1114 | try 1115 | f filename file; 1116 | close_in file 1117 | with exn -> 1118 | close_in file; 1119 | raise exn 1120 | in 1121 | List.iter do_with_file filenames 1122 | -------------------------------------------------------------------------------- /lib/pcre.mli: -------------------------------------------------------------------------------- 1 | (* PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml 2 | 3 | Copyright © 1999- Markus Mottl 4 | 5 | This library is free software; you can redistribute it and/or modify it under 6 | the terms of the GNU Lesser General Public License as published by the Free 7 | Software Foundation; either version 2.1 of the License, or (at your option) 8 | any later version. 9 | 10 | This library is distributed in the hope that it will be useful, but WITHOUT 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 13 | details. 14 | 15 | You should have received a copy of the GNU Lesser General Public License 16 | along with this library; if not, write to the Free Software Foundation, Inc., 17 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA *) 18 | 19 | (** Perl Compatibility Regular Expressions for OCaml 20 | 21 | {e %%VERSION%% - {{:%%PKG_HOMEPAGE%%} homepage}} *) 22 | 23 | (** {1 Exceptions} *) 24 | 25 | type error = 26 | | Partial (** String only matched the pattern partially *) 27 | | BadPartial 28 | (** Pattern contains items that cannot be used together with partial 29 | matching. *) 30 | | BadPattern of string * int 31 | (** [BadPattern (msg, pos)] regular expression is malformed. The reason is 32 | in [msg], the position of the error in the pattern in [pos]. *) 33 | | BadUTF8 (** UTF8 string being matched is invalid *) 34 | | BadUTF8Offset 35 | (** Gets raised when a UTF8 string being matched with offset is invalid. 36 | *) 37 | | MatchLimit 38 | (** Maximum allowed number of match attempts with backtracking or 39 | recursion is reached during matching. ALL FUNCTIONS CALLING THE 40 | MATCHING ENGINE MAY RAISE IT!!! *) 41 | | RecursionLimit 42 | | WorkspaceSize 43 | (** Raised by {!pcre_dfa_exec} when the provided workspace array is too 44 | small. See documention on {!pcre_dfa_exec} for details on workspace 45 | array sizing. *) 46 | | InternalError of string 47 | (** [InternalError msg] C-library exhibits unknown/undefined behaviour. 48 | The reason is in [msg]. *) 49 | 50 | exception Error of error 51 | (** Exception indicating PCRE errors. *) 52 | 53 | exception Backtrack 54 | (** [Backtrack] used in callout functions to force backtracking. *) 55 | 56 | exception Regexp_or of string * error 57 | (** [Regexp_or (pat, error)] gets raised for sub-pattern [pat] by [regexp_or] if 58 | it failed to compile. *) 59 | 60 | (** {1 Compilation and runtime flags and their conversion functions} *) 61 | 62 | type icflag 63 | (** Internal representation of compilation flags *) 64 | 65 | and irflag 66 | (** Internal representation of runtime flags *) 67 | 68 | and cflag = 69 | [ `CASELESS (** Case insensitive matching *) 70 | | `MULTILINE 71 | (** '^' and '$' match before/after newlines, not just at the beginning/end 72 | of a string *) 73 | | `DOTALL (** '.' matches all characters (newlines, too) *) 74 | | `EXTENDED 75 | (** Ignores whitespace and PERL-comments. Behaves like the '/x'-option in 76 | PERL *) 77 | | `ANCHORED (** Pattern matches only at start of string *) 78 | | `DOLLAR_ENDONLY (** '$' in pattern matches only at end of string *) 79 | | `EXTRA (** Reserved for future extensions of PCRE *) 80 | | `UNGREEDY (** Quantifiers not greedy anymore, only if followed by '?' *) 81 | | `UTF8 (** Treats patterns and strings as UTF8 characters. *) 82 | | `NO_UTF8_CHECK 83 | (** Turns off validity checks on UTF8 strings for efficiency reasons. 84 | WARNING: invalid UTF8 strings may cause a crash then! *) 85 | | `NO_AUTO_CAPTURE (** Disables the use of numbered capturing parentheses *) 86 | | `AUTO_CALLOUT 87 | (** Automatically inserts callouts with id 255 before each pattern item *) 88 | | `FIRSTLINE (** Unanchored patterns must match before/at first NL *) ] 89 | (** Compilation flags *) 90 | 91 | val cflags : cflag list -> icflag 92 | (** [cflags cflag_list] converts a list of compilation flags to their internal 93 | representation. *) 94 | 95 | val cflag_list : icflag -> cflag list 96 | (** [cflag_list cflags] converts internal representation of compilation flags to 97 | a list. *) 98 | 99 | type rflag = 100 | [ `ANCHORED (** Treats pattern as if it were anchored *) 101 | | `NOTBOL (** Beginning of string is not treated as beginning of line *) 102 | | `NOTEOL (** End of string is not treated as end of line *) 103 | | `NOTEMPTY (** Empty strings are not considered to be a valid match *) 104 | | `PARTIAL (** Turns on partial matching *) 105 | | `DFA_RESTART 106 | (** Causes matching to proceed presuming the subject string is further to 107 | one partially matched previously using the same int-array working set. 108 | May only be used with {!pcre_dfa_exec} or {!unsafe_pcre_dfa_exec}, and 109 | should always be paired with [`PARTIAL]. *) ] 110 | (** Runtime flags *) 111 | 112 | val rflags : rflag list -> irflag 113 | (** [rflags rflag_list] converts a list of runtime flags to their internal 114 | representation. *) 115 | 116 | val rflag_list : irflag -> rflag list 117 | (** [rflag_list rflags] converts internal representation of runtime flags to a 118 | list. *) 119 | 120 | (** {1 Information on the PCRE-configuration (build-time options)} *) 121 | 122 | (** Version information *) 123 | val version : string 124 | (** Version of the PCRE-C-library *) 125 | 126 | val config_utf8 : bool 127 | (** Indicates whether UTF8-support is enabled *) 128 | 129 | val config_newline : char 130 | (** Character used as newline *) 131 | 132 | val config_link_size : int 133 | (** Number of bytes used for internal linkage of regular expressions *) 134 | 135 | val config_match_limit : int 136 | (** Default limit for calls to internal matching function *) 137 | 138 | val config_match_limit_recursion : int 139 | (** Default limit recursion for calls to internal matching function *) 140 | 141 | val config_stackrecurse : bool 142 | (** Indicates use of stack recursion in matching function *) 143 | 144 | (** {1 Information on patterns} *) 145 | 146 | type firstbyte_info = 147 | [ `Char of char (** Fixed first character *) 148 | | `Start_only (** Pattern matches at beginning and end of newlines *) 149 | | `ANCHORED (** Pattern is anchored *) ] 150 | (** Information on matching of "first chars" in patterns *) 151 | 152 | type study_stat = 153 | [ `Not_studied (** Pattern has not yet been studied *) 154 | | `Studied (** Pattern has been studied successfully *) 155 | | `Optimal (** Pattern could not be improved by studying *) ] 156 | (** Information on the study status of patterns *) 157 | 158 | type regexp 159 | (** Compiled regular expressions *) 160 | 161 | val options : regexp -> icflag 162 | (** [options regexp] 163 | 164 | @return compilation flags of [regexp]. *) 165 | 166 | val size : regexp -> int 167 | (** [size regexp] 168 | 169 | @return memory size of [regexp]. *) 170 | 171 | val studysize : regexp -> int 172 | (** [studysize regexp] 173 | 174 | @return memory size of study information of [regexp]. *) 175 | 176 | val capturecount : regexp -> int 177 | (** [capturecount regexp] 178 | 179 | @return number of capturing subpatterns in [regexp]. *) 180 | 181 | val backrefmax : regexp -> int 182 | (** [backrefmax regexp] 183 | 184 | @return number of highest backreference in [regexp]. *) 185 | 186 | val namecount : regexp -> int 187 | (** [namecount regexp] 188 | 189 | @return number of named subpatterns in [regexp]. *) 190 | 191 | val nameentrysize : regexp -> int 192 | (** [nameentrysize regexp] 193 | 194 | @return size of longest name of named subpatterns in [regexp] + 3. *) 195 | 196 | val names : regexp -> string array 197 | (** [names regex] 198 | 199 | @return array of names of named substrings in [regexp]. *) 200 | 201 | val firstbyte : regexp -> firstbyte_info 202 | (** [firstbyte regexp] 203 | 204 | @return firstbyte info on [regexp]. *) 205 | 206 | val firsttable : regexp -> string option 207 | (** [firsttable regexp] 208 | 209 | @return 210 | some 256-bit (32-byte) fixed set table in form of a string for [regexp] if 211 | available, [None] otherwise. *) 212 | 213 | val lastliteral : regexp -> char option 214 | (** [lastliteral regexp] 215 | 216 | @return 217 | some last matching character of [regexp] if available, [None] otherwise. 218 | *) 219 | 220 | val study_stat : regexp -> study_stat 221 | (** [study_stat regexp] 222 | 223 | @return study status of [regexp]. *) 224 | 225 | val get_stringnumber : regexp -> string -> int 226 | (** [get_stringnumber rex name] 227 | 228 | @return 229 | the index of the named substring [name] in regular expression [rex]. This 230 | index can then be used with [get_substring]. 231 | 232 | @raise Invalid_arg if there is no such named substring. *) 233 | 234 | val get_match_limit : regexp -> int option 235 | (** [get_match_limit rex] 236 | 237 | @return some match limit of regular expression [rex] or [None]. *) 238 | 239 | val get_match_limit_recursion : regexp -> int option 240 | (** [get_match_limit_recursion rex] 241 | 242 | @return some recursion match limit of regular expression [rex] or [None]. *) 243 | 244 | (** {1 Compilation of patterns} *) 245 | 246 | type chtables 247 | (** Alternative set of char tables for pattern matching *) 248 | 249 | val maketables : unit -> chtables 250 | (** Generates new set of char tables for the current locale. *) 251 | 252 | val regexp : 253 | ?study:bool -> 254 | ?jit_compile:bool -> 255 | ?limit:int -> 256 | ?limit_recursion:int -> 257 | ?iflags:icflag -> 258 | ?flags:cflag list -> 259 | ?chtables:chtables -> 260 | string -> 261 | regexp 262 | (** [regexp ?jit_compile ?study ?limit ?limit_recursion ?iflags ?flags ?chtables 263 | pattern] compiles [pattern] with [flags] when given, with [iflags] 264 | otherwise, and with char tables [chtables]. If [study] is true, then the 265 | resulting regular expression will be studied. If [jit_compile] is true, 266 | studying will also perform JIT-compilation of the pattern. If [limit] is 267 | specified, this sets a limit to the amount of recursion and backtracking 268 | (only lower than the builtin default!). If this limit is exceeded, 269 | [MatchLimit] will be raised during matching. 270 | 271 | @param study default = true 272 | @param jit_compile default = false 273 | @param limit default = no extra limit other than default 274 | @param limit_recursion default = no extra limit_recursion other than default 275 | @param iflags default = no extra flags 276 | @param flags default = ignored 277 | @param chtables default = builtin char tables 278 | 279 | @return the regular expression. 280 | 281 | For detailed documentation on how you can specify PERL-style regular 282 | expressions (= patterns), please consult the PCRE-documentation ("man 283 | pcrepattern") or PERL-manuals. 284 | 285 | @see www.perl.com *) 286 | 287 | val regexp_or : 288 | ?study:bool -> 289 | ?jit_compile:bool -> 290 | ?limit:int -> 291 | ?limit_recursion:int -> 292 | ?iflags:icflag -> 293 | ?flags:cflag list -> 294 | ?chtables:chtables -> 295 | string list -> 296 | regexp 297 | (** [regexp_or ?study ?limit ?limit_recursion ?iflags ?flags ?chtables patterns] 298 | like {!val-regexp}, but combines [patterns] as alternatives (or-patterns) 299 | into one regular expression. *) 300 | 301 | val quote : string -> string 302 | (** [quote str] 303 | 304 | @return the quoted string of [str]. *) 305 | 306 | (** {1 Subpattern extraction} *) 307 | 308 | type substrings 309 | (** Information on substrings after pattern matching *) 310 | 311 | val get_subject : substrings -> string 312 | (** [get_subject substrings] 313 | 314 | @return the subject string of [substrings]. *) 315 | 316 | val num_of_subs : substrings -> int 317 | (** [num_of_subs substrings] 318 | 319 | @return number of strings in [substrings] (whole match inclusive). *) 320 | 321 | val get_substring : substrings -> int -> string 322 | (** [get_substring substrings n] 323 | 324 | @return the [n]th substring (0 is whole match) of [substrings]. 325 | 326 | @raise Invalid_argument 327 | if [n] is not in the range of the number of substrings. 328 | @raise Not_found 329 | if the corresponding subpattern did not capture a substring. *) 330 | 331 | val get_substring_ofs : substrings -> int -> int * int 332 | (** [get_substring_ofs substrings n] 333 | 334 | @return 335 | the offset tuple of the [n]th substring of [substrings] (0 is whole 336 | match). 337 | 338 | @raise Invalid_argument 339 | if [n] is not in the range of the number of substrings. 340 | @raise Not_found 341 | if the corresponding subpattern did not capture a substring. *) 342 | 343 | val get_substrings : ?full_match:bool -> substrings -> string array 344 | (** [get_substrings ?full_match substrings] 345 | 346 | @return 347 | the array of substrings in [substrings]. It includes the full match at 348 | index 0 when [full_match] is [true], the captured substrings only when it 349 | is [false]. If a subpattern did not capture a substring, the empty string 350 | is returned in the corresponding position instead. 351 | 352 | @param full_match default = true *) 353 | 354 | val get_opt_substrings : ?full_match:bool -> substrings -> string option array 355 | (** [get_opt_substrings ?full_match substrings] 356 | 357 | @return 358 | the array of optional substrings in [substrings]. It includes 359 | [Some full_match_str] at index 0 when [full_match] is [true], 360 | [Some captured_substrings] only when it is [false]. If a subpattern did 361 | not capture a substring, [None] is returned in the corresponding position 362 | instead. 363 | 364 | @param full_match default = true *) 365 | 366 | val get_named_substring : regexp -> string -> substrings -> string 367 | (** [get_named_substring rex name substrings] 368 | 369 | @return 370 | the named substring [name] in regular expression [rex] and [substrings]. 371 | 372 | @raise Invalid_argument if there is no such named substring. 373 | @raise Not_found 374 | if the corresponding subpattern did not capture a substring. *) 375 | 376 | val get_named_substring_ofs : regexp -> string -> substrings -> int * int 377 | (** [get_named_substring_ofs rex name substrings] 378 | 379 | @return 380 | the offset tuple of the named substring [name] in regular expression [rex] 381 | and [substrings]. 382 | 383 | @raise Invalid_argument if there is no such named substring. 384 | @raise Not_found 385 | if the corresponding subpattern did not capture a substring. *) 386 | 387 | (** {1 Callouts} *) 388 | 389 | type callout_data = { 390 | callout_number : int; (** Callout number *) 391 | substrings : substrings; (** Substrings matched so far *) 392 | start_match : int; (** Subject start offset of current match attempt *) 393 | current_position : int; (** Subject offset of current match pointer *) 394 | capture_top : int; (** Number of the highest captured substring so far *) 395 | capture_last : int; (** Number of the most recently captured substring *) 396 | pattern_position : int; (** Offset of next match item in pattern string *) 397 | next_item_length : int; (** Length of next match item in pattern string *) 398 | } 399 | 400 | (** Type of callout functions *) 401 | type callout = callout_data -> unit 402 | (** Callouts are referred to in patterns as "(?Cn)" where "n" is a 403 | [callout_number] ranging from 0 to 255. Substrings captured so far are 404 | accessible as usual via [substrings]. You will have to consider 405 | [capture_top] and [capture_last] to know about the current state of valid 406 | substrings. 407 | 408 | By raising exception [Backtrack] within a callout function, the user can 409 | force the pattern matching engine to backtrack to other possible solutions. 410 | Other exceptions will terminate matching immediately and return control to 411 | OCaml. *) 412 | 413 | (** {1 Matching of patterns and subpattern extraction} *) 414 | 415 | val pcre_exec : 416 | ?iflags:irflag -> 417 | ?flags:rflag list -> 418 | ?rex:regexp -> 419 | ?pat:string -> 420 | ?pos:int -> 421 | ?callout:callout -> 422 | string -> 423 | int array 424 | (** [pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj] 425 | 426 | @return 427 | an array of offsets that describe the position of matched subpatterns in 428 | the string [subj] starting at position [pos] with pattern [pat] when 429 | given, regular expression [rex] otherwise. The array also contains 430 | additional workspace needed by the match engine. Uses [flags] when given, 431 | the precompiled [iflags] otherwise. Callouts are handled by [callout]. 432 | 433 | @param iflags default = no extra flags 434 | @param flags default = ignored 435 | @param rex default = matches whitespace 436 | @param pat default = ignored 437 | @param pos default = 0 438 | @param callout default = ignore callouts 439 | 440 | @raise Not_found if pattern does not match. *) 441 | 442 | val pcre_dfa_exec : 443 | ?iflags:irflag -> 444 | ?flags:rflag list -> 445 | ?rex:regexp -> 446 | ?pat:string -> 447 | ?pos:int -> 448 | ?callout:callout -> 449 | ?workspace:int array -> 450 | string -> 451 | int array 452 | (** [pcre_dfa_exec ?iflags ?flags ?rex ?pat ?pos ?callout ?workspace subj] 453 | invokes the "alternative" DFA matching function. 454 | 455 | @return 456 | an array of offsets that describe the position of matched subpatterns in 457 | the string [subj] starting at position [pos] with pattern [pat] when 458 | given, regular expression [rex] otherwise. The array also contains 459 | additional workspace needed by the match engine. Uses [flags] when given, 460 | the precompiled [iflags] otherwise. Requires a sufficiently-large 461 | [workspace] array. Callouts are handled by [callout]. 462 | 463 | Note that the returned array of offsets are quite different from those 464 | returned by {!pcre_exec} et al. The motivating use case for the DFA match 465 | function is to be able to restart a partial match with N additional input 466 | segments. Because the match function/workspace does not store segments seen 467 | previously, the offsets returned when a match completes will refer only to 468 | the matching portion of the last subject string provided. Thus, returned 469 | offsets from this function should not be used to support extracting captured 470 | submatches. If you need to capture submatches from a series of inputs 471 | incrementally matched with this function, you'll need to concatenate those 472 | inputs that yield a successful match here and re-run the same pattern 473 | against that single subject string. 474 | 475 | Aside from an absolute minimum of [20], PCRE does not provide any guidance 476 | regarding the size of workspace array needed by any given pattern. 477 | Therefore, it is wise to appropriately handle the possible [WorkspaceSize] 478 | error. If raised, you can allocate a new, larger workspace array and begin 479 | the DFA matching process again. 480 | 481 | @param iflags default = no extra flags 482 | @param flags default = ignored 483 | @param rex default = matches whitespace 484 | @param pat default = ignored 485 | @param pos default = 0 486 | @param callout default = ignore callouts 487 | @param workspace default = fresh array of length [20] 488 | 489 | @raise Not_found if the pattern match has failed 490 | @raise Error 491 | Partial if the pattern has matched partially; a subsequent exec call with 492 | the same pattern and workspace (adding the [DFA_RESTART] flag) be made to 493 | either further advance or complete the partial match. 494 | @raise Error 495 | WorkspaceSize if the workspace array is too small to accommodate the DFA 496 | state required by the supplied pattern *) 497 | 498 | val exec : 499 | ?iflags:irflag -> 500 | ?flags:rflag list -> 501 | ?rex:regexp -> 502 | ?pat:string -> 503 | ?pos:int -> 504 | ?callout:callout -> 505 | string -> 506 | substrings 507 | (** [exec ?iflags ?flags ?rex ?pat ?pos ?callout subj] 508 | 509 | @return 510 | substring information on string [subj] starting at position [pos] with 511 | pattern [pat] when given, regular expression [rex] otherwise. Uses [flags] 512 | when given, the precompiled [iflags] otherwise. Callouts are handled by 513 | [callout]. 514 | 515 | @param iflags default = no extra flags 516 | @param flags default = ignored 517 | @param rex default = matches whitespace 518 | @param pat default = ignored 519 | @param pos default = 0 520 | @param callout default = ignore callouts 521 | 522 | @raise Not_found if pattern does not match. *) 523 | 524 | val exec_all : 525 | ?iflags:irflag -> 526 | ?flags:rflag list -> 527 | ?rex:regexp -> 528 | ?pat:string -> 529 | ?pos:int -> 530 | ?callout:callout -> 531 | string -> 532 | substrings array 533 | (** [exec_all ?iflags ?flags ?rex ?pat ?pos ?callout subj] 534 | 535 | @return 536 | an array of substring information of all matching substrings in string 537 | [subj] starting at position [pos] with pattern [pat] when given, regular 538 | expression [rex] otherwise. Uses [flags] when given, the precompiled 539 | [iflags] otherwise. Callouts are handled by [callout]. 540 | 541 | @param iflags default = no extra flags 542 | @param flags default = ignored 543 | @param rex default = matches whitespace 544 | @param pat default = ignored 545 | @param pos default = 0 546 | @param callout default = ignore callouts 547 | 548 | @raise Not_found if pattern does not match. *) 549 | 550 | val next_match : 551 | ?iflags:irflag -> 552 | ?flags:rflag list -> 553 | ?rex:regexp -> 554 | ?pat:string -> 555 | ?pos:int -> 556 | ?callout:callout -> 557 | substrings -> 558 | substrings 559 | (** [next_match ?iflags ?flags ?rex ?pat ?pos ?callout substrs] 560 | 561 | @return 562 | substring information on the match that follows on the last match denoted 563 | by [substrs], jumping over [pos] characters (also backwards!), using 564 | pattern [pat] when given, regular expression [rex] otherwise. Uses [flags] 565 | when given, the precompiled [iflags] otherwise. Callouts are handled by 566 | [callout]. 567 | 568 | @param iflags default = no extra flags 569 | @param flags default = ignored 570 | @param rex default = matches whitespace 571 | @param pat default = ignored 572 | @param pos default = 0 573 | @param callout default = ignore callouts 574 | 575 | @raise Not_found if pattern does not match. 576 | @raise Invalid_arg 577 | if [pos] let matching start outside of the subject string. *) 578 | 579 | val extract : 580 | ?iflags:irflag -> 581 | ?flags:rflag list -> 582 | ?rex:regexp -> 583 | ?pat:string -> 584 | ?pos:int -> 585 | ?full_match:bool -> 586 | ?callout:callout -> 587 | string -> 588 | string array 589 | (** [extract ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj] 590 | 591 | @return 592 | the array of substrings that match [subj] starting at position [pos], 593 | using pattern [pat] when given, regular expression [rex] otherwise. Uses 594 | [flags] when given, the precompiled [iflags] otherwise. It includes the 595 | full match at index 0 when [full_match] is [true], the captured substrings 596 | only when it is [false]. Callouts are handled by [callout]. If a 597 | subpattern did not capture a substring, the empty string is returned in 598 | the corresponding position instead. 599 | 600 | @param iflags default = no extra flags 601 | @param flags default = ignored 602 | @param rex default = matches whitespace 603 | @param pat default = ignored 604 | @param pos default = 0 605 | @param full_match default = true 606 | @param callout default = ignore callouts 607 | 608 | @raise Not_found if pattern does not match. *) 609 | 610 | val extract_opt : 611 | ?iflags:irflag -> 612 | ?flags:rflag list -> 613 | ?rex:regexp -> 614 | ?pat:string -> 615 | ?pos:int -> 616 | ?full_match:bool -> 617 | ?callout:callout -> 618 | string -> 619 | string option array 620 | (** [extract_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj] 621 | 622 | @return 623 | the array of optional substrings that match [subj] starting at position 624 | [pos], using pattern [pat] when given, regular expression [rex] otherwise. 625 | Uses [flags] when given, the precompiled [iflags] otherwise. It includes 626 | [Some full_match_str] at index 0 when [full_match] is [true], 627 | [Some captured-substrings] only when it is [false]. Callouts are handled 628 | by [callout]. If a subpattern did not capture a substring, [None] is 629 | returned in the corresponding position instead. 630 | 631 | @param iflags default = no extra flags 632 | @param flags default = ignored 633 | @param rex default = matches whitespace 634 | @param pat default = ignored 635 | @param pos default = 0 636 | @param full_match default = true 637 | @param callout default = ignore callouts 638 | 639 | @raise Not_found if pattern does not match. *) 640 | 641 | val extract_all : 642 | ?iflags:irflag -> 643 | ?flags:rflag list -> 644 | ?rex:regexp -> 645 | ?pat:string -> 646 | ?pos:int -> 647 | ?full_match:bool -> 648 | ?callout:callout -> 649 | string -> 650 | string array array 651 | (** [extract_all ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj] 652 | 653 | @return 654 | an array of arrays of all matching substrings that match [subj] starting 655 | at position [pos], using pattern [pat] when given, regular expression 656 | [rex] otherwise. Uses [flags] when given, the precompiled [iflags] 657 | otherwise. It includes the full match at index 0 of the extracted string 658 | arrays when [full_match] is [true], the captured substrings only when it 659 | is [false]. Callouts are handled by [callout]. 660 | 661 | @param iflags default = no extra flags 662 | @param flags default = ignored 663 | @param rex default = matches whitespace 664 | @param pat default = ignored 665 | @param pos default = 0 666 | @param full_match default = true 667 | @param callout default = ignore callouts 668 | 669 | @raise Not_found if pattern does not match. *) 670 | 671 | val extract_all_opt : 672 | ?iflags:irflag -> 673 | ?flags:rflag list -> 674 | ?rex:regexp -> 675 | ?pat:string -> 676 | ?pos:int -> 677 | ?full_match:bool -> 678 | ?callout:callout -> 679 | string -> 680 | string option array array 681 | (** [extract_all_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj] 682 | 683 | @return 684 | an array of arrays of all optional matching substrings that match [subj] 685 | starting at position [pos], using pattern [pat] when given, regular 686 | expression [rex] otherwise. Uses [flags] when given, the precompiled 687 | [iflags] otherwise. It includes [Some full_match_str] at index 0 of the 688 | extracted string arrays when [full_match] is [true], 689 | [Some captured_substrings] only when it is [false]. Callouts are handled 690 | by [callout]. If a subpattern did not capture a substring, [None] is 691 | returned in the corresponding position instead. 692 | 693 | @param iflags default = no extra flags 694 | @param flags default = ignored 695 | @param rex default = matches whitespace 696 | @param pat default = ignored 697 | @param pos default = 0 698 | @param full_match default = true 699 | @param callout default = ignore callouts 700 | 701 | @raise Not_found if pattern does not match. *) 702 | 703 | val pmatch : 704 | ?iflags:irflag -> 705 | ?flags:rflag list -> 706 | ?rex:regexp -> 707 | ?pat:string -> 708 | ?pos:int -> 709 | ?callout:callout -> 710 | string -> 711 | bool 712 | (** [pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj] 713 | 714 | @return 715 | [true] if [subj] is matched by pattern [pat] when given, regular 716 | expression [rex] otherwise, starting at position [pos]. Uses [flags] when 717 | given, the precompiled [iflags] otherwise. Callouts are handled by 718 | [callout]. 719 | 720 | @param iflags default = no extra flags 721 | @param flags default = ignored 722 | @param rex default = matches whitespace 723 | @param pat default = ignored 724 | @param pos default = 0 725 | @param callout default = ignore callouts *) 726 | 727 | (** {1 String substitution} *) 728 | 729 | type substitution 730 | (** Information on substitution patterns *) 731 | 732 | val subst : string -> substitution 733 | (** [subst str] converts the string [str] representing a substitution pattern to 734 | the internal representation 735 | 736 | The contents of the substitution string [str] can be normal text mixed with 737 | any of the following (mostly as in PERL): 738 | 739 | - {e $\[0-9\]+} - a "$" immediately followed by an arbitrary number. "$0" 740 | stands for the name of the executable, any other number for the n-th 741 | backreference. 742 | - {e $&} - the whole matched pattern 743 | - {e $`} - the text before the match 744 | - {e $'} - the text after the match 745 | - {e $+} - the last group that matched 746 | - {e $$} - a single "$" 747 | - {e $!} - delimiter which does not appear in the substitution. Can be used 748 | to part "$[0-9]+" from an immediately following other number. *) 749 | 750 | val replace : 751 | ?iflags:irflag -> 752 | ?flags:rflag list -> 753 | ?rex:regexp -> 754 | ?pat:string -> 755 | ?pos:int -> 756 | ?itempl:substitution -> 757 | ?templ:string -> 758 | ?callout:callout -> 759 | string -> 760 | string 761 | (** [replace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj] 762 | replaces all substrings of [subj] matching pattern [pat] when given, regular 763 | expression [rex] otherwise, starting at position [pos] with the substitution 764 | string [templ] when given, [itempl] otherwise. Uses [flags] when given, the 765 | precompiled [iflags] otherwise. Callouts are handled by [callout]. 766 | 767 | @param iflags default = no extra flags 768 | @param flags default = ignored 769 | @param rex default = matches whitespace 770 | @param pat default = ignored 771 | @param pos default = 0 772 | @param itempl default = empty string 773 | @param templ default = ignored 774 | @param callout default = ignore callouts 775 | 776 | @raise Failure if there are backreferences to nonexistent subpatterns. *) 777 | 778 | val qreplace : 779 | ?iflags:irflag -> 780 | ?flags:rflag list -> 781 | ?rex:regexp -> 782 | ?pat:string -> 783 | ?pos:int -> 784 | ?templ:string -> 785 | ?callout:callout -> 786 | string -> 787 | string 788 | (** [qreplace ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj] replaces all 789 | substrings of [subj] matching pattern [pat] when given, regular expression 790 | [rex] otherwise, starting at position [pos] with the string [templ]. Uses 791 | [flags] when given, the precompiled [iflags] otherwise. Callouts are handled 792 | by [callout]. 793 | 794 | @param iflags default = no extra flags 795 | @param flags default = ignored 796 | @param rex default = matches whitespace 797 | @param pat default = ignored 798 | @param pos default = 0 799 | @param templ default = ignored 800 | @param callout default = ignore callouts *) 801 | 802 | val substitute_substrings : 803 | ?iflags:irflag -> 804 | ?flags:rflag list -> 805 | ?rex:regexp -> 806 | ?pat:string -> 807 | ?pos:int -> 808 | ?callout:callout -> 809 | subst:(substrings -> string) -> 810 | string -> 811 | string 812 | (** [substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj] 813 | replaces all substrings of [subj] matching pattern [pat] when given, regular 814 | expression [rex] otherwise, starting at position [pos] with the result of 815 | function [subst] applied to the substrings of the match. Uses [flags] when 816 | given, the precompiled [iflags] otherwise. Callouts are handled by 817 | [callout]. 818 | 819 | @param iflags default = no extra flags 820 | @param flags default = ignored 821 | @param rex default = matches whitespace 822 | @param pat default = ignored 823 | @param pos default = 0 824 | @param callout default = ignore callouts *) 825 | 826 | val substitute : 827 | ?iflags:irflag -> 828 | ?flags:rflag list -> 829 | ?rex:regexp -> 830 | ?pat:string -> 831 | ?pos:int -> 832 | ?callout:callout -> 833 | subst:(string -> string) -> 834 | string -> 835 | string 836 | (** [substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj] replaces all 837 | substrings of [subj] matching pattern [pat] when given, regular expression 838 | [rex] otherwise, starting at position [pos] with the result of function 839 | [subst] applied to the match. Uses [flags] when given, the precompiled 840 | [iflags] otherwise. Callouts are handled by [callout]. 841 | 842 | @param iflags default = no extra flags 843 | @param flags default = ignored 844 | @param rex default = matches whitespace 845 | @param pat default = ignored 846 | @param pos default = 0 847 | @param callout default = ignore callouts *) 848 | 849 | val replace_first : 850 | ?iflags:irflag -> 851 | ?flags:rflag list -> 852 | ?rex:regexp -> 853 | ?pat:string -> 854 | ?pos:int -> 855 | ?itempl:substitution -> 856 | ?templ:string -> 857 | ?callout:callout -> 858 | string -> 859 | string 860 | (** [replace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ ?callout subj] 861 | replaces the first substring of [subj] matching pattern [pat] when given, 862 | regular expression [rex] otherwise, starting at position [pos] with the 863 | substitution string [templ] when given, [itempl] otherwise. Uses [flags] 864 | when given, the precompiled [iflags] otherwise. Callouts are handled by 865 | [callout]. 866 | 867 | @param iflags default = no extra flags 868 | @param flags default = ignored 869 | @param rex default = matches whitespace 870 | @param pat default = ignored 871 | @param pos default = 0 872 | @param itempl default = empty string 873 | @param templ default = ignored 874 | @param callout default = ignore callouts 875 | 876 | @raise Failure if there are backreferences to nonexistent subpatterns. *) 877 | 878 | val qreplace_first : 879 | ?iflags:irflag -> 880 | ?flags:rflag list -> 881 | ?rex:regexp -> 882 | ?pat:string -> 883 | ?pos:int -> 884 | ?templ:string -> 885 | ?callout:callout -> 886 | string -> 887 | string 888 | (** [qreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ ?callout subj] replaces 889 | the first substring of [subj] matching pattern [pat] when given, regular 890 | expression [rex] otherwise, starting at position [pos] with the string 891 | [templ]. Uses [flags] when given, the precompiled [iflags] otherwise. 892 | Callouts are handled by [callout]. 893 | 894 | @param iflags default = no extra flags 895 | @param flags default = ignored 896 | @param rex default = matches whitespace 897 | @param pat default = ignored 898 | @param pos default = 0 899 | @param templ default = ignored 900 | @param callout default = ignore callouts *) 901 | 902 | val substitute_substrings_first : 903 | ?iflags:irflag -> 904 | ?flags:rflag list -> 905 | ?rex:regexp -> 906 | ?pat:string -> 907 | ?pos:int -> 908 | ?callout:callout -> 909 | subst:(substrings -> string) -> 910 | string -> 911 | string 912 | (** [substitute_substrings_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst 913 | subj] replaces the first substring of [subj] matching pattern [pat] when 914 | given, regular expression [rex] otherwise, starting at position [pos] with 915 | the result of function [subst] applied to the substrings of the match. Uses 916 | [flags] when given, the precompiled [iflags] otherwise. Callouts are handled 917 | by [callout]. 918 | 919 | @param iflags default = no extra flags 920 | @param flags default = ignored 921 | @param rex default = matches whitespace 922 | @param pat default = ignored 923 | @param pos default = 0 924 | @param callout default = ignore callouts *) 925 | 926 | val substitute_first : 927 | ?iflags:irflag -> 928 | ?flags:rflag list -> 929 | ?rex:regexp -> 930 | ?pat:string -> 931 | ?pos:int -> 932 | ?callout:callout -> 933 | subst:(string -> string) -> 934 | string -> 935 | string 936 | (** [substitute_first ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj] 937 | replaces the first substring of [subj] matching pattern [pat] when given, 938 | regular expression [rex] otherwise, starting at position [pos] with the 939 | result of function [subst] applied to the match. Uses [flags] when given, 940 | the precompiled [iflags] otherwise. Callouts are handled by [callout]. 941 | 942 | @param iflags default = no extra flags 943 | @param flags default = ignored 944 | @param rex default = matches whitespace 945 | @param pat default = ignored 946 | @param pos default = 0 947 | @param callout default = ignore callouts *) 948 | 949 | (** {1 Splitting} *) 950 | 951 | val split : 952 | ?iflags:irflag -> 953 | ?flags:rflag list -> 954 | ?rex:regexp -> 955 | ?pat:string -> 956 | ?pos:int -> 957 | ?max:int -> 958 | ?callout:callout -> 959 | string -> 960 | string list 961 | (** [split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] splits [subj] into 962 | a list of at most [max] strings, using as delimiter pattern [pat] when 963 | given, regular expression [rex] otherwise, starting at position [pos]. Uses 964 | [flags] when given, the precompiled [iflags] otherwise. If [max] is zero, 965 | trailing empty fields are stripped. If it is negative, it is treated as 966 | arbitrarily large. If neither [pat] nor [rex] are specified, leading 967 | whitespace will be stripped! Should behave exactly as in PERL. Callouts are 968 | handled by [callout]. 969 | 970 | @param iflags default = no extra flags 971 | @param flags default = ignored 972 | @param rex default = matches whitespace 973 | @param pat default = ignored 974 | @param pos default = 0 975 | @param max default = 0 976 | @param callout default = ignore callouts *) 977 | 978 | val asplit : 979 | ?iflags:irflag -> 980 | ?flags:rflag list -> 981 | ?rex:regexp -> 982 | ?pat:string -> 983 | ?pos:int -> 984 | ?max:int -> 985 | ?callout:callout -> 986 | string -> 987 | string array 988 | (** [asplit ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] same as 989 | {!Pcre.split} but return an array instead of a list. *) 990 | 991 | (** Result of a {!Pcre.full_split} *) 992 | type split_result = 993 | | Text of string (** Text part of split string *) 994 | | Delim of string (** Delimiter part of split string *) 995 | | Group of int * string 996 | (** Subgroup of matched delimiter (subgroup_nr, subgroup_str) *) 997 | | NoGroup (** Unmatched subgroup *) 998 | 999 | val full_split : 1000 | ?iflags:irflag -> 1001 | ?flags:rflag list -> 1002 | ?rex:regexp -> 1003 | ?pat:string -> 1004 | ?pos:int -> 1005 | ?max:int -> 1006 | ?callout:callout -> 1007 | string -> 1008 | split_result list 1009 | (** [full_split ?iflags ?flags ?rex ?pat ?pos ?max ?callout subj] splits [subj] 1010 | into a list of at most [max] elements of type "split_result", using as 1011 | delimiter pattern [pat] when given, regular expression [rex] otherwise, 1012 | starting at position [pos]. Uses [flags] when given, the precompiled 1013 | [iflags] otherwise. If [max] is zero, trailing empty fields are stripped. If 1014 | it is negative, it is treated as arbitrarily large. Should behave exactly as 1015 | in PERL. Callouts are handled by [callout]. 1016 | 1017 | @param iflags default = no extra flags 1018 | @param flags default = ignored 1019 | @param rex default = matches whitespace 1020 | @param pat default = ignored 1021 | @param pos default = 0 1022 | @param max default = 0 1023 | @param callout default = ignore callouts *) 1024 | 1025 | (** {1 Additional convenience functions} *) 1026 | 1027 | val foreach_line : ?ic:in_channel -> (string -> unit) -> unit 1028 | (** [foreach_line ?ic f] applies [f] to each line in inchannel [ic] until the 1029 | end-of-file is reached. 1030 | 1031 | @param ic default = stdin *) 1032 | 1033 | val foreach_file : string list -> (string -> in_channel -> unit) -> unit 1034 | (** [foreach_file filenames f] opens each file in the list [filenames] for input 1035 | and applies [f] to each filename and the corresponding channel. Channels are 1036 | closed after each operation (even when exceptions occur - they get reraised 1037 | afterwards!). *) 1038 | 1039 | (** {1 {b UNSAFE STUFF - USE WITH CAUTION!}} *) 1040 | 1041 | val unsafe_pcre_exec : 1042 | irflag -> 1043 | regexp -> 1044 | pos:int -> 1045 | subj_start:int -> 1046 | subj:string -> 1047 | int array -> 1048 | callout option -> 1049 | unit 1050 | (** [unsafe_pcre_exec flags rex ~pos ~subj_start ~subj offset_vector callout]. 1051 | You should read the C-source to know what happens. If you do not understand 1052 | it - {b don't use this function!} *) 1053 | 1054 | val make_ovector : regexp -> int * int array 1055 | (** [make_ovector regexp] calculates the tuple (subgroups2, ovector) which is 1056 | the number of subgroup offsets and the offset array. *) 1057 | 1058 | val unsafe_pcre_dfa_exec : 1059 | irflag -> 1060 | regexp -> 1061 | pos:int -> 1062 | subj_start:int -> 1063 | subj:string -> 1064 | int array -> 1065 | callout option -> 1066 | workspace:int array -> 1067 | unit 1068 | (** [unsafe_pcre_dfa_exec flags rex ~pos ~subj_start ~subj offset_vector callout 1069 | ~workpace]. You should read the C-source to know what happens. If you do 1070 | not understand it - {b don't use this function!} *) 1071 | -------------------------------------------------------------------------------- /lib/pcre_stubs.c: -------------------------------------------------------------------------------- 1 | /* 2 | PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml 3 | 4 | Copyright © 1999- Markus Mottl 5 | 6 | This library is free software; you can redistribute it and/or 7 | modify it under the terms of the GNU Lesser General Public 8 | License as published by the Free Software Foundation; either 9 | version 2.1 of the License, or (at your option) any later version. 10 | 11 | This library is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | Lesser General Public License for more details. 15 | 16 | You should have received a copy of the GNU Lesser General Public 17 | License along with this library; if not, write to the Free Software 18 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 | */ 20 | 21 | #if defined(_WIN32) 22 | #define snprintf _snprintf 23 | #if defined(_DLL) 24 | #define PCREextern __declspec(dllexport) 25 | #else 26 | #define PCREextern 27 | #endif 28 | #endif 29 | 30 | #if defined(_WIN64) 31 | typedef long long *caml_int_ptr; 32 | #else 33 | typedef long *caml_int_ptr; 34 | #endif 35 | 36 | #if __GNUC__ >= 3 37 | #define __unused __attribute__((unused)) 38 | #else 39 | #define __unused 40 | #endif 41 | 42 | #include 43 | #include 44 | #include 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | 54 | #if (OCAML_VERSION_MAJOR == 4) && (OCAML_VERSION_MINOR < 12) 55 | #define Val_none (Val_long(0)) 56 | #define Is_none(v) ((v) == Val_none) 57 | 58 | static value caml_alloc_some(value v) { 59 | CAMLparam1(v); 60 | value some = caml_alloc_small(1, 0); 61 | Field(some, 0) = v; 62 | CAMLreturn(some); 63 | } 64 | #endif 65 | 66 | #include 67 | 68 | /* Error codes as defined for pcre 7.9, undefined in pcre 4.5 */ 69 | #ifndef PCRE_ERROR_PARTIAL 70 | #define PCRE_ERROR_PARTIAL (-12) 71 | #endif 72 | #ifndef PCRE_ERROR_BADPARTIAL 73 | #define PCRE_ERROR_BADPARTIAL (-13) 74 | #endif 75 | #ifndef PCRE_ERROR_RECURSIONLIMIT 76 | #define PCRE_ERROR_RECURSIONLIMIT (-21) 77 | #endif 78 | 79 | /* Make sure to define JIT-compilation flag appropriately if unsupported */ 80 | #ifndef PCRE_STUDY_JIT_COMPILE 81 | #define PCRE_STUDY_JIT_COMPILE 0 82 | #endif 83 | 84 | typedef const unsigned char *chartables; /* Type of chartable sets */ 85 | 86 | /* Contents of callout data */ 87 | struct cod { 88 | long subj_start; /* Start of subject string */ 89 | value *v_substrings_p; /* Pointer to substrings matched so far */ 90 | value *v_cof_p; /* Pointer to callout function */ 91 | value v_exn; /* Possible exception raised by callout function */ 92 | }; 93 | 94 | /* Cache for exceptions */ 95 | static const value *pcre_exc_Error = NULL; /* Exception [Error] */ 96 | static const value *pcre_exc_Backtrack = NULL; /* Exception [Backtrack] */ 97 | 98 | /* Cache for polymorphic variants */ 99 | static value var_Start_only; /* Variant [`Start_only] */ 100 | static value var_ANCHORED; /* Variant [`ANCHORED] */ 101 | static value var_Char; /* Variant [`Char char] */ 102 | static value var_Not_studied; /* Variant [`Not_studied] */ 103 | static value var_Studied; /* Variant [`Studied] */ 104 | static value var_Optimal; /* Variant [`Optimal] */ 105 | 106 | /* Data associated with OCaml values of PCRE regular expression */ 107 | struct pcre_ocaml_regexp { 108 | pcre *rex; 109 | pcre_extra *extra; 110 | int studied; 111 | }; 112 | 113 | #define Pcre_ocaml_regexp_val(v) \ 114 | ((struct pcre_ocaml_regexp *)Data_custom_val(v)) 115 | 116 | #define get_rex(v) Pcre_ocaml_regexp_val(v)->rex 117 | #define get_extra(v) Pcre_ocaml_regexp_val(v)->extra 118 | #define get_studied(v) Pcre_ocaml_regexp_val(v)->studied 119 | 120 | #define set_rex(v, r) Pcre_ocaml_regexp_val(v)->rex = r 121 | #define set_extra(v, e) Pcre_ocaml_regexp_val(v)->extra = e 122 | #define set_studied(v, s) Pcre_ocaml_regexp_val(v)->studied = s 123 | 124 | /* Data associated with OCaml values of PCRE tables */ 125 | struct pcre_ocaml_tables { 126 | chartables tables; 127 | }; 128 | 129 | #define Pcre_ocaml_tables_val(v) \ 130 | ((struct pcre_ocaml_tables *)Data_custom_val(v)) 131 | 132 | #define get_tables(v) Pcre_ocaml_tables_val(v)->tables 133 | #define set_tables(v, t) Pcre_ocaml_tables_val(v)->tables = t 134 | 135 | /* Converts subject offsets from C-integers to OCaml-Integers. 136 | 137 | This is a bit tricky, because there are 32- and 64-bit platforms around 138 | and OCaml chooses the larger possibility for representing integers when 139 | available (also in arrays) - not so the PCRE! 140 | */ 141 | static inline void copy_ovector(long subj_start, const int *ovec_src, 142 | caml_int_ptr ovec_dst, int subgroups2) { 143 | if (subj_start == 0) 144 | while (subgroups2--) { 145 | *ovec_dst = Val_int(*ovec_src); 146 | --ovec_src; 147 | --ovec_dst; 148 | } 149 | else 150 | while (subgroups2--) { 151 | *ovec_dst = 152 | (*ovec_src == -1) ? Val_int(-1) : Val_long(*ovec_src + subj_start); 153 | --ovec_src; 154 | --ovec_dst; 155 | } 156 | } 157 | 158 | /* Callout handler */ 159 | static int pcre_callout_handler(pcre_callout_block *cb) { 160 | struct cod *cod = (struct cod *)cb->callout_data; 161 | 162 | if (cod != NULL) { 163 | /* Callout is available */ 164 | value v_res; 165 | 166 | /* Set up parameter array */ 167 | value v_callout_data = caml_alloc_small(8, 0); 168 | 169 | const value v_substrings = *cod->v_substrings_p; 170 | 171 | const int capture_top = cb->capture_top; 172 | int subgroups2 = capture_top << 1; 173 | const int subgroups2_1 = subgroups2 - 1; 174 | 175 | const int *ovec_src = cb->offset_vector + subgroups2_1; 176 | caml_int_ptr ovec_dst = 177 | (long *)&Field(Field(v_substrings, 1), 0) + subgroups2_1; 178 | long subj_start = cod->subj_start; 179 | 180 | copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2); 181 | 182 | Field(v_callout_data, 0) = Val_int(cb->callout_number); 183 | Field(v_callout_data, 1) = v_substrings; 184 | Field(v_callout_data, 2) = Val_int(cb->start_match + subj_start); 185 | Field(v_callout_data, 3) = Val_int(cb->current_position + subj_start); 186 | Field(v_callout_data, 4) = Val_int(capture_top); 187 | Field(v_callout_data, 5) = Val_int(cb->capture_last); 188 | Field(v_callout_data, 6) = Val_int(cb->pattern_position); 189 | Field(v_callout_data, 7) = Val_int(cb->next_item_length); 190 | 191 | /* Perform callout */ 192 | v_res = caml_callback_exn(*cod->v_cof_p, v_callout_data); 193 | 194 | if (Is_exception_result(v_res)) { 195 | /* Callout raised an exception */ 196 | const value v_exn = Extract_exception(v_res); 197 | if (Field(v_exn, 0) == *pcre_exc_Backtrack) 198 | return 1; 199 | cod->v_exn = v_exn; 200 | return PCRE_ERROR_CALLOUT; 201 | } 202 | } 203 | 204 | return 0; 205 | } 206 | 207 | /* Fetches the named OCaml-values + caches them and 208 | calculates + caches the variant hash values */ 209 | CAMLprim value pcre_ocaml_init(value __unused v_unit) { 210 | pcre_exc_Error = caml_named_value("Pcre.Error"); 211 | pcre_exc_Backtrack = caml_named_value("Pcre.Backtrack"); 212 | 213 | var_Start_only = caml_hash_variant("Start_only"); 214 | var_ANCHORED = caml_hash_variant("ANCHORED"); 215 | var_Char = caml_hash_variant("Char"); 216 | var_Not_studied = caml_hash_variant("Not_studied"); 217 | var_Studied = caml_hash_variant("Studied"); 218 | var_Optimal = caml_hash_variant("Optimal"); 219 | 220 | pcre_callout = &pcre_callout_handler; 221 | 222 | return Val_unit; 223 | } 224 | 225 | /* Finalizing deallocation function for chartable sets */ 226 | static void pcre_dealloc_tables(value v_tables) { 227 | (pcre_free)((void *)get_tables(v_tables)); 228 | } 229 | 230 | /* Finalizing deallocation function for compiled regular expressions */ 231 | static void pcre_dealloc_regexp(value v_rex) { 232 | void *extra = get_extra(v_rex); 233 | if (extra != NULL) 234 | #ifdef PCRE_STUDY_JIT_COMPILE 235 | pcre_free_study(extra); 236 | #else 237 | pcre_free(extra); 238 | #endif 239 | (pcre_free)(get_rex(v_rex)); 240 | } 241 | 242 | /* Raising exceptions */ 243 | 244 | CAMLnoreturn_start static inline void 245 | raise_pcre_error(value v_arg) CAMLnoreturn_end; 246 | 247 | CAMLnoreturn_start static inline void raise_partial(void) CAMLnoreturn_end; 248 | 249 | CAMLnoreturn_start static inline void raise_bad_partial(void) CAMLnoreturn_end; 250 | 251 | CAMLnoreturn_start static inline void raise_bad_utf8(void) CAMLnoreturn_end; 252 | 253 | CAMLnoreturn_start static inline void 254 | raise_bad_utf8_offset(void) CAMLnoreturn_end; 255 | 256 | CAMLnoreturn_start static inline void raise_match_limit(void) CAMLnoreturn_end; 257 | 258 | CAMLnoreturn_start static inline void 259 | raise_recursion_limit(void) CAMLnoreturn_end; 260 | 261 | CAMLnoreturn_start static inline void 262 | raise_workspace_size(void) CAMLnoreturn_end; 263 | 264 | CAMLnoreturn_start static inline void 265 | raise_bad_pattern(const char *msg, int pos) CAMLnoreturn_end; 266 | 267 | CAMLnoreturn_start static inline void 268 | raise_internal_error(char *msg) CAMLnoreturn_end; 269 | 270 | static inline void raise_pcre_error(value v_arg) { 271 | caml_raise_with_arg(*pcre_exc_Error, v_arg); 272 | } 273 | 274 | static inline void raise_partial(void) { raise_pcre_error(Val_int(0)); } 275 | static inline void raise_bad_partial(void) { raise_pcre_error(Val_int(1)); } 276 | static inline void raise_bad_utf8(void) { raise_pcre_error(Val_int(2)); } 277 | static inline void raise_bad_utf8_offset(void) { raise_pcre_error(Val_int(3)); } 278 | static inline void raise_match_limit(void) { raise_pcre_error(Val_int(4)); } 279 | static inline void raise_recursion_limit(void) { raise_pcre_error(Val_int(5)); } 280 | static inline void raise_workspace_size(void) { raise_pcre_error(Val_int(6)); } 281 | 282 | static inline void raise_bad_pattern(const char *msg, int pos) { 283 | CAMLparam0(); 284 | CAMLlocal1(v_msg); 285 | value v_arg; 286 | v_msg = caml_copy_string(msg); 287 | v_arg = caml_alloc_small(2, 0); 288 | Field(v_arg, 0) = v_msg; 289 | Field(v_arg, 1) = Val_int(pos); 290 | raise_pcre_error(v_arg); 291 | CAMLnoreturn; 292 | } 293 | 294 | static inline void raise_internal_error(char *msg) { 295 | CAMLparam0(); 296 | CAMLlocal1(v_msg); 297 | value v_arg; 298 | v_msg = caml_copy_string(msg); 299 | v_arg = caml_alloc_small(1, 1); 300 | Field(v_arg, 0) = v_msg; 301 | raise_pcre_error(v_arg); 302 | CAMLnoreturn; 303 | } 304 | 305 | /* PCRE pattern compilation */ 306 | 307 | static struct custom_operations regexp_ops = { 308 | "pcre_ocaml_regexp", pcre_dealloc_regexp, 309 | custom_compare_default, custom_hash_default, 310 | custom_serialize_default, custom_deserialize_default, 311 | custom_compare_ext_default, custom_fixed_length_default}; 312 | 313 | /* Makes compiled regular expression from compilation options, an optional 314 | value of chartables and the pattern string */ 315 | 316 | CAMLprim value pcre_compile_stub(intnat v_opt, value v_tables, value v_pat) { 317 | value v_rex; /* Final result -> value of type [regexp] */ 318 | size_t regexp_size, ocaml_regexp_size = sizeof(struct pcre_ocaml_regexp); 319 | const char *error = NULL; /* pointer to possible error message */ 320 | int error_ofs = 0; /* offset in the pattern at which error occurred */ 321 | 322 | /* If v_tables = [None], then pointer to tables is NULL, otherwise 323 | set it to the appropriate value */ 324 | chartables tables = Is_none(v_tables) ? NULL : get_tables(Field(v_tables, 0)); 325 | 326 | /* Compiles the pattern */ 327 | pcre *regexp = 328 | pcre_compile(String_val(v_pat), v_opt, &error, &error_ofs, tables); 329 | 330 | /* Raises appropriate exception with [BadPattern] if the pattern 331 | could not be compiled */ 332 | if (regexp == NULL) 333 | raise_bad_pattern(error, error_ofs); 334 | 335 | /* It's unknown at this point whether the user will study the pattern 336 | later (probably), or if JIT compilation is going to be used, but we 337 | have to decide on a size. Tests with some simple patterns indicate a 338 | roughly 50% increase in size when studying without JIT. A factor of 339 | two times hence seems like a reasonable bound to use here. */ 340 | pcre_fullinfo(regexp, NULL, PCRE_INFO_SIZE, ®exp_size); 341 | v_rex = 342 | caml_alloc_custom_mem(®exp_ops, ocaml_regexp_size, 2 * regexp_size); 343 | 344 | set_rex(v_rex, regexp); 345 | set_extra(v_rex, NULL); 346 | set_studied(v_rex, 0); 347 | 348 | return v_rex; 349 | } 350 | 351 | CAMLprim value pcre_compile_stub_bc(value v_opt, value v_tables, value v_pat) { 352 | return pcre_compile_stub(Int_val(v_opt), v_tables, v_pat); 353 | } 354 | 355 | /* Studies a regexp */ 356 | CAMLprim value pcre_study_stub(value v_rex, value v_jit_compile) { 357 | /* If it has not yet been studied */ 358 | if (!get_studied(v_rex)) { 359 | const char *error = NULL; 360 | int flags = Bool_val(v_jit_compile) ? PCRE_STUDY_JIT_COMPILE : 0; 361 | pcre_extra *extra = pcre_study(get_rex(v_rex), flags, &error); 362 | if (error != NULL) 363 | caml_invalid_argument((char *)error); 364 | set_extra(v_rex, extra); 365 | set_studied(v_rex, 1); 366 | } 367 | return v_rex; 368 | } 369 | 370 | /* Gets the match limit recursion of a regular expression if it exists */ 371 | CAMLprim value pcre_get_match_limit_recursion_stub(value v_rex) { 372 | pcre_extra *extra = get_extra(v_rex); 373 | if (extra == NULL) 374 | return Val_none; 375 | if (extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) { 376 | value v_lim = Val_int(extra->match_limit_recursion); 377 | value v_res = caml_alloc_small(1, 0); 378 | Field(v_res, 0) = v_lim; 379 | return v_res; 380 | } 381 | return Val_none; 382 | } 383 | 384 | /* Gets the match limit of a regular expression if it exists */ 385 | CAMLprim value pcre_get_match_limit_stub(value v_rex) { 386 | pcre_extra *extra = get_extra(v_rex); 387 | if (extra == NULL) 388 | return Val_none; 389 | if (extra->flags & PCRE_EXTRA_MATCH_LIMIT) { 390 | value v_lim = Val_int(extra->match_limit); 391 | value v_res = caml_alloc_small(1, 0); 392 | Field(v_res, 0) = v_lim; 393 | return v_res; 394 | } 395 | return Val_none; 396 | } 397 | 398 | /* Sets a match limit for a regular expression imperatively */ 399 | 400 | CAMLprim value pcre_set_imp_match_limit_stub(value v_rex, intnat v_lim) { 401 | pcre_extra *extra = get_extra(v_rex); 402 | if (extra == NULL) { 403 | extra = pcre_malloc(sizeof(pcre_extra)); 404 | extra->flags = PCRE_EXTRA_MATCH_LIMIT; 405 | set_extra(v_rex, extra); 406 | } else { 407 | unsigned long *flags_ptr = &extra->flags; 408 | *flags_ptr = PCRE_EXTRA_MATCH_LIMIT | *flags_ptr; 409 | } 410 | extra->match_limit = v_lim; 411 | return v_rex; 412 | } 413 | 414 | CAMLprim value pcre_set_imp_match_limit_stub_bc(value v_rex, value v_lim) { 415 | return pcre_set_imp_match_limit_stub(v_rex, Int_val(v_lim)); 416 | } 417 | 418 | /* Sets a match limit recursion for a regular expression imperatively */ 419 | 420 | CAMLprim value pcre_set_imp_match_limit_recursion_stub(value v_rex, 421 | intnat v_lim) { 422 | pcre_extra *extra = get_extra(v_rex); 423 | if (extra == NULL) { 424 | extra = pcre_malloc(sizeof(pcre_extra)); 425 | extra->flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION; 426 | set_extra(v_rex, extra); 427 | } else { 428 | unsigned long *flags_ptr = &extra->flags; 429 | *flags_ptr = PCRE_EXTRA_MATCH_LIMIT_RECURSION | *flags_ptr; 430 | } 431 | extra->match_limit_recursion = v_lim; 432 | return v_rex; 433 | } 434 | 435 | CAMLprim value pcre_set_imp_match_limit_recursion_stub_bc(value v_rex, 436 | value v_lim) { 437 | return pcre_set_imp_match_limit_recursion_stub(v_rex, Int_val(v_lim)); 438 | } 439 | 440 | /* Performs the call to the pcre_fullinfo function */ 441 | static inline int pcre_fullinfo_stub(value v_rex, int what, void *where) { 442 | return pcre_fullinfo(get_rex(v_rex), get_extra(v_rex), what, where); 443 | } 444 | 445 | /* Some stubs for info-functions */ 446 | 447 | /* Generic macro for getting integer results from pcre_fullinfo */ 448 | #define MAKE_INTNAT_INFO(tp, name, option) \ 449 | CAMLprim intnat pcre_##name##_stub(value v_rex) { \ 450 | tp options; \ 451 | const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_##option, &options); \ 452 | if (ret != 0) \ 453 | raise_internal_error("pcre_##name##_stub"); \ 454 | return options; \ 455 | } \ 456 | \ 457 | CAMLprim value pcre_##name##_stub_bc(value v_rex) { \ 458 | return Val_int(pcre_##name##_stub(v_rex)); \ 459 | } 460 | 461 | MAKE_INTNAT_INFO(unsigned long, options, OPTIONS) 462 | MAKE_INTNAT_INFO(size_t, size, SIZE) 463 | MAKE_INTNAT_INFO(size_t, studysize, STUDYSIZE) 464 | MAKE_INTNAT_INFO(int, capturecount, CAPTURECOUNT) 465 | MAKE_INTNAT_INFO(int, backrefmax, BACKREFMAX) 466 | MAKE_INTNAT_INFO(int, namecount, NAMECOUNT) 467 | MAKE_INTNAT_INFO(int, nameentrysize, NAMEENTRYSIZE) 468 | 469 | CAMLprim value pcre_firstbyte_stub(value v_rex) { 470 | int firstbyte; 471 | const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte); 472 | 473 | if (ret != 0) 474 | raise_internal_error("pcre_firstbyte_stub"); 475 | 476 | switch (firstbyte) { 477 | case -1: 478 | return var_Start_only; 479 | break; /* [`Start_only] */ 480 | case -2: 481 | return var_ANCHORED; 482 | break; /* [`ANCHORED] */ 483 | default: 484 | if (firstbyte < 0) /* Should not happen */ 485 | raise_internal_error("pcre_firstbyte_stub"); 486 | else { 487 | value v_firstbyte; 488 | /* Allocates the non-constant constructor [`Char of char] and fills 489 | in the appropriate value */ 490 | v_firstbyte = caml_alloc_small(2, 0); 491 | Field(v_firstbyte, 0) = var_Char; 492 | Field(v_firstbyte, 1) = Val_int(firstbyte); 493 | return v_firstbyte; 494 | } 495 | } 496 | } 497 | 498 | CAMLprim value pcre_firsttable_stub(value v_rex) { 499 | CAMLparam1(v_rex); 500 | const unsigned char *ftable; 501 | int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *)&ftable); 502 | if (ret != 0) 503 | raise_internal_error("pcre_firsttable_stub"); 504 | CAMLreturn((ftable == NULL) ? Val_none 505 | : caml_alloc_some(caml_alloc_initialized_string( 506 | 32, (char *)ftable))); 507 | } 508 | 509 | CAMLprim value pcre_lastliteral_stub(value v_rex) { 510 | int lastliteral; 511 | const int ret = 512 | pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL, &lastliteral); 513 | if (ret != 0) 514 | raise_internal_error("pcre_lastliteral_stub"); 515 | if (lastliteral == -1) 516 | return Val_none; 517 | if (lastliteral < 0) 518 | raise_internal_error("pcre_lastliteral_stub"); 519 | else 520 | return caml_alloc_some(Val_int(lastliteral)); 521 | } 522 | 523 | CAMLprim value pcre_study_stat_stub(value v_rex) { 524 | /* Generates the appropriate constant constructor [`Optimal] or 525 | [`Studied] if regexp has already been studied */ 526 | if (get_studied(v_rex)) 527 | return (get_extra(v_rex) == NULL) ? var_Optimal : var_Studied; 528 | 529 | return var_Not_studied; /* otherwise [`Not_studied] */ 530 | } 531 | 532 | CAMLnoreturn_start static inline void 533 | handle_exec_error(char *loc, const int ret) CAMLnoreturn_end; 534 | 535 | static inline void handle_exec_error(char *loc, const int ret) { 536 | switch (ret) { 537 | /* Dedicated exceptions */ 538 | case PCRE_ERROR_NOMATCH: 539 | caml_raise_not_found(); 540 | case PCRE_ERROR_PARTIAL: 541 | raise_partial(); 542 | case PCRE_ERROR_MATCHLIMIT: 543 | raise_match_limit(); 544 | case PCRE_ERROR_BADPARTIAL: 545 | raise_bad_partial(); 546 | case PCRE_ERROR_BADUTF8: 547 | raise_bad_utf8(); 548 | case PCRE_ERROR_BADUTF8_OFFSET: 549 | raise_bad_utf8_offset(); 550 | case PCRE_ERROR_RECURSIONLIMIT: 551 | raise_recursion_limit(); 552 | case PCRE_ERROR_DFA_WSSIZE: 553 | raise_workspace_size(); 554 | /* Unknown error */ 555 | default: { 556 | char err_buf[100]; 557 | snprintf(err_buf, 100, "%s: unhandled PCRE error code: %d", loc, ret); 558 | raise_internal_error(err_buf); 559 | } 560 | } 561 | } 562 | 563 | static inline void handle_pcre_exec_result(int *ovec, value v_ovec, 564 | long ovec_len, long subj_start, 565 | int ret) { 566 | caml_int_ptr ocaml_ovec = (caml_int_ptr)&Field(v_ovec, 0); 567 | const int subgroups2 = ret * 2; 568 | const int subgroups2_1 = subgroups2 - 1; 569 | const int *ovec_src = ovec + subgroups2_1; 570 | caml_int_ptr ovec_clear_stop = ocaml_ovec + (ovec_len * 2) / 3; 571 | caml_int_ptr ovec_dst = ocaml_ovec + subgroups2_1; 572 | copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2); 573 | while (++ovec_dst < ovec_clear_stop) 574 | *ovec_dst = -1; 575 | } 576 | 577 | /* Executes a pattern match with runtime options, a regular expression, a 578 | matching position, the start of the subject string, a subject string, 579 | a number of subgroup offsets, an offset vector and an optional callout 580 | function */ 581 | 582 | CAMLprim value pcre_exec_stub0(intnat v_opt, value v_rex, intnat v_pos, 583 | intnat v_subj_start, value v_subj, value v_ovec, 584 | value v_maybe_cof, value v_workspace) { 585 | int ret; 586 | int is_dfa = v_workspace != (value)NULL; 587 | long pos = v_pos, len = caml_string_length(v_subj), subj_start = v_subj_start; 588 | long ovec_len = Wosize_val(v_ovec); 589 | 590 | if (pos > len || pos < subj_start) 591 | caml_invalid_argument("Pcre.pcre_exec_stub: illegal position"); 592 | 593 | if (subj_start > len || subj_start < 0) 594 | caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start"); 595 | 596 | pos -= subj_start; 597 | len -= subj_start; 598 | 599 | { 600 | const pcre *code = get_rex(v_rex); /* Compiled pattern */ 601 | const pcre_extra *extra = get_extra(v_rex); /* Extra info */ 602 | const char *ocaml_subj = 603 | String_val(v_subj) + subj_start; /* Subject string */ 604 | const int opt = v_opt; /* Runtime options */ 605 | 606 | /* Special case when no callout functions specified */ 607 | if (Is_none(v_maybe_cof)) { 608 | int *ovec = (int *)&Field(v_ovec, 0); 609 | 610 | /* Performs the match */ 611 | if (is_dfa) 612 | ret = pcre_dfa_exec(code, extra, ocaml_subj, len, pos, opt, ovec, 613 | ovec_len, (int *)&Field(v_workspace, 0), 614 | Wosize_val(v_workspace)); 615 | else 616 | ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len); 617 | 618 | if (ret < 0) 619 | handle_exec_error("pcre_exec_stub", ret); 620 | else 621 | handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret); 622 | } 623 | 624 | /* There are callout functions */ 625 | else { 626 | value v_cof = Field(v_maybe_cof, 0); 627 | value v_substrings; 628 | char *subj = caml_stat_alloc(sizeof(char) * len); 629 | int *ovec = caml_stat_alloc(sizeof(int) * ovec_len); 630 | int workspace_len; 631 | int *workspace; 632 | struct cod cod = {0, (value *)NULL, (value *)NULL, (value)NULL}; 633 | struct pcre_extra new_extra = 634 | #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION 635 | #ifdef PCRE_EXTRA_MARK 636 | #ifdef PCRE_EXTRA_EXECUTABLE_JIT 637 | {PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL}; 638 | #else 639 | {PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL}; 640 | #endif 641 | #else 642 | {PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0}; 643 | #endif 644 | #else 645 | {PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL}; 646 | #endif 647 | 648 | cod.subj_start = subj_start; 649 | memcpy(subj, ocaml_subj, len); 650 | 651 | Begin_roots4(v_rex, v_cof, v_substrings, v_ovec); 652 | Begin_roots1(v_subj); 653 | v_substrings = caml_alloc_small(2, 0); 654 | End_roots(); 655 | 656 | Field(v_substrings, 0) = v_subj; 657 | Field(v_substrings, 1) = v_ovec; 658 | 659 | cod.v_substrings_p = &v_substrings; 660 | cod.v_cof_p = &v_cof; 661 | new_extra.callout_data = &cod; 662 | 663 | if (extra != NULL) { 664 | new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags; 665 | new_extra.study_data = extra->study_data; 666 | new_extra.match_limit = extra->match_limit; 667 | new_extra.tables = extra->tables; 668 | #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION 669 | new_extra.match_limit_recursion = extra->match_limit_recursion; 670 | #endif 671 | } 672 | 673 | if (is_dfa) { 674 | workspace_len = Wosize_val(v_workspace); 675 | workspace = caml_stat_alloc(sizeof(int) * workspace_len); 676 | ret = pcre_dfa_exec(code, extra, subj, len, pos, opt, ovec, ovec_len, 677 | (int *)&Field(v_workspace, 0), workspace_len); 678 | } else 679 | ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec, ovec_len); 680 | 681 | caml_stat_free(subj); 682 | End_roots(); 683 | 684 | if (ret < 0) { 685 | if (is_dfa) 686 | caml_stat_free(workspace); 687 | caml_stat_free(ovec); 688 | if (ret == PCRE_ERROR_CALLOUT) 689 | caml_raise(cod.v_exn); 690 | else 691 | handle_exec_error("pcre_exec_stub(callout)", ret); 692 | } else { 693 | handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret); 694 | if (is_dfa) { 695 | caml_int_ptr ocaml_workspace_dst = 696 | (caml_int_ptr)&Field(v_workspace, 0); 697 | const int *workspace_src = workspace; 698 | const int *workspace_src_stop = workspace + workspace_len; 699 | while (workspace_src != workspace_src_stop) { 700 | *ocaml_workspace_dst = *workspace_src; 701 | ocaml_workspace_dst++; 702 | workspace_src++; 703 | } 704 | caml_stat_free(workspace); 705 | } 706 | caml_stat_free(ovec); 707 | } 708 | } 709 | } 710 | 711 | return Val_unit; 712 | } 713 | 714 | CAMLprim value pcre_exec_stub(intnat v_opt, value v_rex, intnat v_pos, 715 | intnat v_subj_start, value v_subj, value v_ovec, 716 | value v_maybe_cof) { 717 | return pcre_exec_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj, v_ovec, 718 | v_maybe_cof, (value)NULL); 719 | } 720 | 721 | /* Byte-code hook for pcre_exec_stub 722 | Needed, because there are more than 5 arguments */ 723 | CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn) { 724 | return pcre_exec_stub0(Int_val(argv[0]), argv[1], Int_val(argv[2]), 725 | Int_val(argv[3]), argv[4], argv[5], argv[6], 726 | (value)NULL); 727 | } 728 | 729 | /* Byte-code hook for pcre_dfa_exec_stub 730 | Needed, because there are more than 5 arguments */ 731 | CAMLprim value pcre_dfa_exec_stub_bc(value *argv, int __unused argn) { 732 | return pcre_exec_stub0(Int_val(argv[0]), argv[1], Int_val(argv[2]), 733 | Int_val(argv[3]), argv[4], argv[5], argv[6], argv[7]); 734 | } 735 | 736 | static struct custom_operations tables_ops = { 737 | "pcre_ocaml_tables", pcre_dealloc_tables, 738 | custom_compare_default, custom_hash_default, 739 | custom_serialize_default, custom_deserialize_default, 740 | custom_compare_ext_default, custom_fixed_length_default}; 741 | 742 | /* Generates a new set of chartables for the current locale (see man 743 | page of PCRE */ 744 | CAMLprim value pcre_maketables_stub(value __unused v_unit) { 745 | /* According to testing with `malloc_size`, it seems that a typical set of 746 | tables will require about 1536 bytes of memory. This may or may not 747 | be true on other platforms or for all versions of PCRE. Since there 748 | is apparently no reliable way of finding out, 1536 is probably a good 749 | default value. */ 750 | size_t tables_size = sizeof(struct pcre_ocaml_tables); 751 | const value v_tables = caml_alloc_custom_mem(&tables_ops, tables_size, 1536); 752 | set_tables(v_tables, pcre_maketables()); 753 | return v_tables; 754 | } 755 | 756 | /* Wraps around the isspace-function */ 757 | CAMLprim value pcre_isspace_stub(value v_c) { 758 | return Val_bool(isspace(Int_val(v_c))); 759 | } 760 | 761 | /* Returns number of substring associated with a name */ 762 | 763 | CAMLprim intnat pcre_get_stringnumber_stub(value v_rex, value v_name) { 764 | const int ret = pcre_get_stringnumber(get_rex(v_rex), String_val(v_name)); 765 | if (ret == PCRE_ERROR_NOSUBSTRING) 766 | caml_invalid_argument("Named string not found"); 767 | 768 | return ret; 769 | } 770 | 771 | CAMLprim value pcre_get_stringnumber_stub_bc(value v_rex, value v_name) { 772 | return Val_int(pcre_get_stringnumber_stub(v_rex, v_name)); 773 | } 774 | 775 | /* Returns array of names of named substrings in a regexp */ 776 | CAMLprim value pcre_names_stub(value v_rex) { 777 | CAMLparam1(v_rex); 778 | CAMLlocal1(v_res); 779 | int name_count; 780 | int entry_size; 781 | const char *tbl_ptr; 782 | int i; 783 | 784 | int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count); 785 | if (ret != 0) 786 | raise_internal_error("pcre_names_stub: namecount"); 787 | 788 | ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size); 789 | if (ret != 0) 790 | raise_internal_error("pcre_names_stub: nameentrysize"); 791 | 792 | ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr); 793 | if (ret != 0) 794 | raise_internal_error("pcre_names_stub: nametable"); 795 | 796 | v_res = caml_alloc(name_count, 0); 797 | 798 | for (i = 0; i < name_count; ++i) { 799 | value v_name = caml_copy_string(tbl_ptr + 2); 800 | Store_field(v_res, i, v_name); 801 | tbl_ptr += entry_size; 802 | } 803 | 804 | CAMLreturn(v_res); 805 | } 806 | 807 | /* Generic stub for getting integer results from pcre_config */ 808 | static inline int pcre_config_int(int what) { 809 | int ret; 810 | pcre_config(what, (void *)&ret); 811 | return ret; 812 | } 813 | 814 | /* Generic stub for getting long integer results from pcre_config */ 815 | static inline int pcre_config_long(int what) { 816 | long ret; 817 | pcre_config(what, (void *)&ret); 818 | return ret; 819 | } 820 | 821 | /* Some stubs for config-functions */ 822 | 823 | /* Makes OCaml-string from PCRE-version */ 824 | CAMLprim value pcre_version_stub(value __unused v_unit) { 825 | return caml_copy_string((char *)pcre_version()); 826 | } 827 | 828 | /* Returns boolean indicating UTF8-support */ 829 | CAMLprim value pcre_config_utf8_stub(value __unused v_unit) { 830 | return Val_bool(pcre_config_int(PCRE_CONFIG_UTF8)); 831 | } 832 | 833 | /* Returns character used as newline */ 834 | CAMLprim value pcre_config_newline_stub(value __unused v_unit) { 835 | return Val_int(pcre_config_int(PCRE_CONFIG_NEWLINE)); 836 | } 837 | 838 | /* Returns number of bytes used for internal linkage of regular expressions */ 839 | 840 | CAMLprim intnat pcre_config_link_size_stub(value __unused v_unit) { 841 | return pcre_config_int(PCRE_CONFIG_LINK_SIZE); 842 | } 843 | 844 | CAMLprim value pcre_config_link_size_stub_bc(value v_unit) { 845 | return Val_int(pcre_config_link_size_stub(v_unit)); 846 | } 847 | 848 | /* Returns default limit for calls to internal matching function */ 849 | 850 | CAMLprim intnat pcre_config_match_limit_stub(value __unused v_unit) { 851 | return pcre_config_long(PCRE_CONFIG_MATCH_LIMIT); 852 | } 853 | 854 | CAMLprim value pcre_config_match_limit_stub_bc(value v_unit) { 855 | return Val_int(pcre_config_match_limit_stub(v_unit)); 856 | } 857 | 858 | /* Returns default limit for recursive calls to internal matching function */ 859 | 860 | CAMLprim intnat pcre_config_match_limit_recursion_stub(value __unused v_unit) { 861 | return pcre_config_long(PCRE_CONFIG_MATCH_LIMIT_RECURSION); 862 | } 863 | 864 | CAMLprim value pcre_config_match_limit_recursion_stub_bc(value v_unit) { 865 | return Val_int(pcre_config_match_limit_recursion_stub(v_unit)); 866 | } 867 | 868 | /* Returns boolean indicating use of stack recursion */ 869 | CAMLprim value pcre_config_stackrecurse_stub(value __unused v_unit) { 870 | return Val_bool(pcre_config_int(PCRE_CONFIG_STACKRECURSE)); 871 | } 872 | -------------------------------------------------------------------------------- /pa_ppx_test/Makefile: -------------------------------------------------------------------------------- 1 | NOT_OCAMLFIND=not-ocamlfind 2 | 3 | bootstrap: ../test/pcre_tests.ml 4 | 5 | ../test/%.ml: %.ml 6 | $(NOT_OCAMLFIND) preprocess \ 7 | -package pa_ppx_regexp,camlp5.pr_o -ppopt -pa_ppx_regexp-nostatic \ 8 | -syntax camlp5o $< > $@.NEW && \ 9 | mv $@.NEW $@ 10 | 11 | .SUFFIXES: .ml 12 | -------------------------------------------------------------------------------- /pa_ppx_test/pcre_tests.ml: -------------------------------------------------------------------------------- 1 | (**pp -syntax camlp5o -package pa_ppx.deriving_plugins.std *) 2 | 3 | open OUnit2 4 | 5 | let test_special_char_regexps ctxt = 6 | (); 7 | assert_equal "\n" ([%match {|\n$|} / s exc pcre strings] "\n"); 8 | assert_equal "" ([%subst {|\n+$|} / {||} / s pcre] "\n\n") 9 | 10 | let test_pcre_simple_match ctxt = 11 | (); 12 | assert_equal "abc" 13 | (Pcre.get_substring ([%match "abc" / exc raw pcre] "abc") 0); 14 | assert_equal (Some "abc") ([%match "abc" / pcre] "abc"); 15 | assert_equal (Some "abc") ([%match "abc" / strings pcre] "abc"); 16 | assert_equal true ([%match "abc" / pred pcre] "abc"); 17 | assert_equal false ([%match "abc" / pred pcre] "abd"); 18 | assert_equal None ([%match "abc" / pcre] "abd"); 19 | assert_raises Not_found (fun () -> [%match "abc" / exc pcre] "abd"); 20 | assert_raises Not_found (fun () -> [%match "abc" / exc strings pcre] "abd"); 21 | assert_equal None ([%match "abc" / strings pcre] "abd"); 22 | assert_equal "abc" ([%match "abc" / exc strings pcre] "abc"); 23 | assert_equal ("abc", Some "b") ([%match "a(b)c" / exc strings pcre] "abc"); 24 | assert_equal ("ac", None) ([%match "a(?:(b)?)c" / exc strings pcre] "ac"); 25 | assert_equal "abc" 26 | (Pcre.get_substring ([%match "ABC" / exc raw i pcre] "abc") 0); 27 | assert_equal 28 | ("abc", Some "a", Some "b", Some "c") 29 | ([%match "(a)(b)(c)" / exc strings pcre] "abc") 30 | 31 | let test_pcre_selective_match ctxt = 32 | (); 33 | assert_equal ("abc", Some "b") 34 | ([%match "a(b)c" / exc strings (!0, 1) pcre] "abc"); 35 | assert_equal ("abc", "b") ([%match "a(b)c" / exc strings (!0, !1) pcre] "abc"); 36 | assert_equal "b" ([%match "a(b)c" / exc strings !1 pcre] "abc"); 37 | assert_equal 38 | (Some ("abc", "b")) 39 | ([%match "a(b)c" / strings (!0, !1) pcre] "abc"); 40 | assert_equal ("ac", None) ([%match "a(b)?c" / exc strings (!0, 1) pcre] "ac"); 41 | assert_raises Not_found (fun _ -> 42 | [%match "a(b)?c" / exc strings (!0, !1) pcre] "ac"); 43 | assert_equal None ([%match "a(b)?c" / strings (!0, !1) pcre] "ac") 44 | 45 | let test_pcre_search ctxt = 46 | (); 47 | assert_equal "abc" ([%match "abc" / exc strings pcre] "zzzabc"); 48 | assert_equal None ([%match "^abc" / strings pcre] "zzzabc") 49 | 50 | let test_pcre_single ctxt = 51 | (); 52 | assert_equal None ([%match ".+" / pcre] "\n\n"); 53 | assert_equal "\n\n" ([%match ".+" / s exc pcre strings] "\n\n"); 54 | assert_equal None ([%match ".+" / m pcre strings] "\n\n"); 55 | 56 | assert_equal None ([%match ".+" / pcre strings] "\n\n"); 57 | assert_equal (Some "\n\n") ([%match ".+" / s pcre strings] "\n\n"); 58 | assert_equal None ([%match ".+" / m pcre strings] "\n\n"); 59 | 60 | assert_equal "<>\ndef" ([%subst ".+" / {|<<$0>>|} / pcre] "abc\ndef"); 61 | assert_equal "<>" ([%subst ".+" / {|<<$0>>|} / s pcre] "abc\ndef"); 62 | assert_equal "<>\ndef" ([%subst ".+" / {|<<$0>>|} / m pcre] "abc\ndef"); 63 | 64 | assert_equal "<>\ndef" ([%subst ".*" / {|<<$0>>|} / pcre] "abc\ndef"); 65 | assert_equal "<><<>>\n<><<>>" 66 | ([%subst ".*" / {|<<$0>>|} / g pcre] "abc\ndef"); 67 | assert_equal "<>\n<>" 68 | ([%subst ".+" / {|<<$0>>|} / g pcre] "abc\ndef"); 69 | assert_equal "<>a\nc<>" 70 | ([%subst "a.c" / {|<<$0>>|} / g pcre] "abca\ncaec"); 71 | assert_equal "<><><>" 72 | ([%subst "a.c" / {|<<$0>>|} / g s pcre] "abca\ncaec") 73 | 74 | let test_pcre_multiline ctxt = 75 | (); 76 | assert_equal (Some "bar") ([%match ".+$" / strings pcre] "foo\nbar"); 77 | assert_equal (Some "foo") ([%match ".+$" / m strings pcre] "foo\nbar") 78 | 79 | let test_pcre_simple_split ctxt = 80 | (); 81 | assert_equal [ "bb" ] ([%split "a" / pcre] "bb") 82 | 83 | let test_pcre_delim_split_raw ctxt = 84 | let open Pcre in 85 | (); 86 | assert_equal 87 | [ Delim "a"; Text "b"; Delim "a"; Text "b" ] 88 | ([%split "a" / pcre raw] "ababa"); 89 | assert_equal 90 | [ Delim "a"; Text "b"; Delim "a"; Delim "a"; Text "b" ] 91 | ([%split "a" / pcre raw] "abaaba"); 92 | assert_equal 93 | [ 94 | Delim "a"; 95 | NoGroup; 96 | Text "b"; 97 | Delim "ac"; 98 | Group (1, "c"); 99 | Text "b"; 100 | Delim "a"; 101 | NoGroup; 102 | ] 103 | ([%split "a(c)?" / pcre raw] "abacba"); 104 | assert_equal 105 | [ 106 | Delim "ac"; 107 | Group (1, "c"); 108 | Text "b"; 109 | Delim "ac"; 110 | Group (1, "c"); 111 | Text "b"; 112 | Delim "ac"; 113 | Group (1, "c"); 114 | ] 115 | ([%split "a(c)" / pcre raw] "acbacbac"); 116 | assert_equal 117 | [ 118 | Delim "ac"; 119 | Group (1, "c"); 120 | Text "b"; 121 | Delim "ac"; 122 | Group (1, "c"); 123 | Text "b"; 124 | Delim "ac"; 125 | Group (1, "c"); 126 | ] 127 | ([%split "a(c)" / pcre raw] "acbacbac"); 128 | assert_equal 129 | [ 130 | Delim "a"; 131 | NoGroup; 132 | Text "b"; 133 | Delim "ac"; 134 | Group (1, "c"); 135 | Text "b"; 136 | Delim "a"; 137 | NoGroup; 138 | ] 139 | ([%split "a(c)?" / pcre raw] "abacba"); 140 | assert_equal 141 | [ Text "ab"; Delim "x"; Group (1, "x"); NoGroup; Text "cd" ] 142 | ([%split {|(x)|(u)|} / raw pcre] "abxcd"); 143 | assert_equal 144 | [ 145 | Text "ab"; 146 | Delim "x"; 147 | Group (1, "x"); 148 | NoGroup; 149 | Text "cd"; 150 | Delim "u"; 151 | NoGroup; 152 | Group (2, "u"); 153 | ] 154 | ([%split {|(x)|(u)|} / raw pcre] "abxcdu") 155 | 156 | let test_pcre_subst ctxt = 157 | (); 158 | assert_equal "$b" ([%subst "a(b)c" / {|$$$1|} / pcre] "abc"); 159 | assert_equal "$b" ([%subst "A(B)C" / {|$$$1|} / i pcre] "abc"); 160 | assert_equal "$babc" ([%subst "A(B)C" / {|$$$1|} / i pcre] "abcabc"); 161 | assert_equal "$b$b" ([%subst "A(B)C" / {|$$$1|} / g i pcre] "abcabc"); 162 | assert_equal "$b$b" ([%subst "A(B)C" / {|"$" ^ $1$|} / e g i pcre] "abcabc"); 163 | assert_equal "$$" ([%subst "A(B)C" / {|"$"|} / e g i pcre] "abcabc"); 164 | assert_equal "$$" ([%subst "A(B)C" / {|$$|} / g i pcre] "abcabc") 165 | 166 | let show_string_option = function 167 | | None -> "None" 168 | | Some s -> Printf.sprintf "Some %s" s 169 | 170 | let test_pcre_ocamlfind_bits ctxt = 171 | (); 172 | assert_equal ~printer:show_string_option (Some "-syntax camlp5o ") 173 | (snd 174 | ([%match {|^\(\*\*pp (.*?)\*\)|} / exc strings pcre] 175 | {|(**pp -syntax camlp5o *) 176 | |})) 177 | 178 | let pcre_envsubst envlookup s = 179 | let f s1 s2 = 180 | if s1 <> "" then envlookup s1 181 | else if s2 <> "" then envlookup s2 182 | else assert false 183 | in 184 | 185 | [%subst {|(?:\$\(([^)]+)\)|\$\{([^}]+)\})|} / {| f $1$ $2$ |} / g e pcre] s 186 | 187 | let test_pcre_envsubst_via_replace ctxt = 188 | let f = function 189 | | "A" -> "res1" 190 | | "B" -> "res2" 191 | | _ -> failwith "unexpected arg in envsubst" 192 | in 193 | assert_equal "...res1...res2..." (pcre_envsubst f {|...$(A)...${B}...|}) 194 | 195 | let suite = 196 | "Test pa_ppx_regexp" 197 | >::: [ 198 | "pcre only_regexps" >:: test_special_char_regexps; 199 | "pcre simple_match" >:: test_pcre_simple_match; 200 | "pcre selective_match" >:: test_pcre_selective_match; 201 | "pcre search" >:: test_pcre_search; 202 | "pcre single" >:: test_pcre_single; 203 | "pcre multiline" >:: test_pcre_multiline; 204 | "pcre simple_split" >:: test_pcre_simple_split; 205 | "pcre delim_split raw" >:: test_pcre_delim_split_raw; 206 | "pcre subst" >:: test_pcre_subst; 207 | "pcre ocamlfind bits" >:: test_pcre_ocamlfind_bits; 208 | "pcre envsubst via replace" >:: test_pcre_envsubst_via_replace; 209 | ] 210 | 211 | let _ = if not !Sys.interactive then run_test_tt_main suite else () 212 | -------------------------------------------------------------------------------- /pcre.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | synopsis: "Bindings to the Perl Compatibility Regular Expressions library" 4 | description: """ 5 | pcre-ocaml offers library functions for string pattern matching and 6 | substitution, similar to the functionality offered by the Perl language.""" 7 | maintainer: ["Markus Mottl "] 8 | authors: ["Markus Mottl "] 9 | license: "LGPL-2.1-or-later WITH OCaml-LGPL-linking-exception" 10 | homepage: "https://mmottl.github.io/pcre-ocaml" 11 | doc: "https://mmottl.github.io/pcre-ocaml/api" 12 | bug-reports: "https://github.com/mmottl/pcre-ocaml/issues" 13 | depends: [ 14 | "dune" {>= "2.7"} 15 | "ocaml" {>= "4.08"} 16 | "dune-configurator" 17 | "conf-libpcre" {build} 18 | "ounit2" {with-test} 19 | "odoc" {with-doc} 20 | ] 21 | build: [ 22 | ["dune" "subst"] {dev} 23 | [ 24 | "dune" 25 | "build" 26 | "-p" 27 | name 28 | "-j" 29 | jobs 30 | "@install" 31 | "@runtest" {with-test} 32 | "@doc" {with-doc} 33 | ] 34 | ] 35 | dev-repo: "git+https://github.com/mmottl/pcre-ocaml.git" 36 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (test 2 | (name pcre_tests) 3 | (libraries pcre ounit2)) 4 | 5 | (env 6 | (dev 7 | (flags 8 | (:standard -w -27)))) 9 | -------------------------------------------------------------------------------- /test/pcre_tests.ml: -------------------------------------------------------------------------------- 1 | (**pp -syntax camlp5o -package pa_ppx.deriving_plugins.std *) 2 | 3 | open OUnit2 4 | 5 | let test_special_char_regexps ctxt = 6 | (); 7 | assert_equal "\n" 8 | ((let __re__ = Pcre.regexp ~flags:[ `DOTALL ] "\\n$" in 9 | fun __subj__ -> 10 | (fun __g__ -> Pcre.get_substring __g__ 0) 11 | (Pcre.exec ~rex:__re__ __subj__)) 12 | "\n"); 13 | assert_equal "" 14 | (Pcre.substitute_substrings_first 15 | ~rex:(Pcre.regexp ~flags:[ `DOTALL ] "\\n+$") 16 | ~subst:(fun __g__ -> String.concat "" []) 17 | "\n\n") 18 | 19 | let test_pcre_simple_match ctxt = 20 | (); 21 | assert_equal "abc" 22 | (Pcre.get_substring 23 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 24 | fun __subj__ -> Pcre.exec ~rex:__re__ __subj__) 25 | "abc") 26 | 0); 27 | assert_equal (Some "abc") 28 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 29 | fun __subj__ -> 30 | match 31 | Option.map 32 | (fun __g__ -> Pcre.get_substring __g__ 0) 33 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 34 | with 35 | | exception Not_found -> None 36 | | rv -> rv) 37 | "abc"); 38 | assert_equal (Some "abc") 39 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 40 | fun __subj__ -> 41 | match 42 | Option.map 43 | (fun __g__ -> Pcre.get_substring __g__ 0) 44 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 45 | with 46 | | exception Not_found -> None 47 | | rv -> rv) 48 | "abc"); 49 | assert_equal true 50 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 51 | fun __subj__ -> Pcre.pmatch ~rex:__re__ __subj__) 52 | "abc"); 53 | assert_equal false 54 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 55 | fun __subj__ -> Pcre.pmatch ~rex:__re__ __subj__) 56 | "abd"); 57 | assert_equal None 58 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 59 | fun __subj__ -> 60 | match 61 | Option.map 62 | (fun __g__ -> Pcre.get_substring __g__ 0) 63 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 64 | with 65 | | exception Not_found -> None 66 | | rv -> rv) 67 | "abd"); 68 | assert_raises Not_found (fun () -> 69 | (let __re__ = Pcre.regexp ~flags:[] "abc" in 70 | fun __subj__ -> 71 | (fun __g__ -> Pcre.get_substring __g__ 0) 72 | (Pcre.exec ~rex:__re__ __subj__)) 73 | "abd"); 74 | assert_raises Not_found (fun () -> 75 | (let __re__ = Pcre.regexp ~flags:[] "abc" in 76 | fun __subj__ -> 77 | (fun __g__ -> Pcre.get_substring __g__ 0) 78 | (Pcre.exec ~rex:__re__ __subj__)) 79 | "abd"); 80 | assert_equal None 81 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 82 | fun __subj__ -> 83 | match 84 | Option.map 85 | (fun __g__ -> Pcre.get_substring __g__ 0) 86 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 87 | with 88 | | exception Not_found -> None 89 | | rv -> rv) 90 | "abd"); 91 | assert_equal "abc" 92 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 93 | fun __subj__ -> 94 | (fun __g__ -> Pcre.get_substring __g__ 0) 95 | (Pcre.exec ~rex:__re__ __subj__)) 96 | "abc"); 97 | assert_equal ("abc", Some "b") 98 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)c" in 99 | fun __subj__ -> 100 | (fun __g__ -> 101 | ( Pcre.get_substring __g__ 0, 102 | try Some (Pcre.get_substring __g__ 1) with Not_found -> None )) 103 | (Pcre.exec ~rex:__re__ __subj__)) 104 | "abc"); 105 | assert_equal ("ac", None) 106 | ((let __re__ = Pcre.regexp ~flags:[] "a(?:(b)?)c" in 107 | fun __subj__ -> 108 | (fun __g__ -> 109 | ( Pcre.get_substring __g__ 0, 110 | try Some (Pcre.get_substring __g__ 1) with Not_found -> None )) 111 | (Pcre.exec ~rex:__re__ __subj__)) 112 | "ac"); 113 | assert_equal "abc" 114 | (Pcre.get_substring 115 | ((let __re__ = Pcre.regexp ~flags:[ `CASELESS ] "ABC" in 116 | fun __subj__ -> Pcre.exec ~rex:__re__ __subj__) 117 | "abc") 118 | 0); 119 | assert_equal 120 | ("abc", Some "a", Some "b", Some "c") 121 | ((let __re__ = Pcre.regexp ~flags:[] "(a)(b)(c)" in 122 | fun __subj__ -> 123 | (fun __g__ -> 124 | ( Pcre.get_substring __g__ 0, 125 | (try Some (Pcre.get_substring __g__ 1) with Not_found -> None), 126 | (try Some (Pcre.get_substring __g__ 2) with Not_found -> None), 127 | try Some (Pcre.get_substring __g__ 3) with Not_found -> None )) 128 | (Pcre.exec ~rex:__re__ __subj__)) 129 | "abc") 130 | 131 | let test_pcre_selective_match ctxt = 132 | (); 133 | assert_equal ("abc", Some "b") 134 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)c" in 135 | fun __subj__ -> 136 | (fun __g__ -> 137 | ( Pcre.get_substring __g__ 0, 138 | try Some (Pcre.get_substring __g__ 1) with Not_found -> None )) 139 | (Pcre.exec ~rex:__re__ __subj__)) 140 | "abc"); 141 | assert_equal ("abc", "b") 142 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)c" in 143 | fun __subj__ -> 144 | (fun __g__ -> (Pcre.get_substring __g__ 0, Pcre.get_substring __g__ 1)) 145 | (Pcre.exec ~rex:__re__ __subj__)) 146 | "abc"); 147 | assert_equal "b" 148 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)c" in 149 | fun __subj__ -> 150 | (fun __g__ -> Pcre.get_substring __g__ 1) 151 | (Pcre.exec ~rex:__re__ __subj__)) 152 | "abc"); 153 | assert_equal 154 | (Some ("abc", "b")) 155 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)c" in 156 | fun __subj__ -> 157 | match 158 | Option.map 159 | (fun __g__ -> 160 | (Pcre.get_substring __g__ 0, Pcre.get_substring __g__ 1)) 161 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 162 | with 163 | | exception Not_found -> None 164 | | rv -> rv) 165 | "abc"); 166 | assert_equal ("ac", None) 167 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)?c" in 168 | fun __subj__ -> 169 | (fun __g__ -> 170 | ( Pcre.get_substring __g__ 0, 171 | try Some (Pcre.get_substring __g__ 1) with Not_found -> None )) 172 | (Pcre.exec ~rex:__re__ __subj__)) 173 | "ac"); 174 | assert_raises Not_found (fun _ -> 175 | (let __re__ = Pcre.regexp ~flags:[] "a(b)?c" in 176 | fun __subj__ -> 177 | (fun __g__ -> (Pcre.get_substring __g__ 0, Pcre.get_substring __g__ 1)) 178 | (Pcre.exec ~rex:__re__ __subj__)) 179 | "ac"); 180 | assert_equal None 181 | ((let __re__ = Pcre.regexp ~flags:[] "a(b)?c" in 182 | fun __subj__ -> 183 | match 184 | Option.map 185 | (fun __g__ -> 186 | (Pcre.get_substring __g__ 0, Pcre.get_substring __g__ 1)) 187 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 188 | with 189 | | exception Not_found -> None 190 | | rv -> rv) 191 | "ac") 192 | 193 | let test_pcre_search ctxt = 194 | (); 195 | assert_equal "abc" 196 | ((let __re__ = Pcre.regexp ~flags:[] "abc" in 197 | fun __subj__ -> 198 | (fun __g__ -> Pcre.get_substring __g__ 0) 199 | (Pcre.exec ~rex:__re__ __subj__)) 200 | "zzzabc"); 201 | assert_equal None 202 | ((let __re__ = Pcre.regexp ~flags:[] "^abc" in 203 | fun __subj__ -> 204 | match 205 | Option.map 206 | (fun __g__ -> Pcre.get_substring __g__ 0) 207 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 208 | with 209 | | exception Not_found -> None 210 | | rv -> rv) 211 | "zzzabc") 212 | 213 | let test_pcre_single ctxt = 214 | (); 215 | assert_equal None 216 | ((let __re__ = Pcre.regexp ~flags:[] ".+" in 217 | fun __subj__ -> 218 | match 219 | Option.map 220 | (fun __g__ -> Pcre.get_substring __g__ 0) 221 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 222 | with 223 | | exception Not_found -> None 224 | | rv -> rv) 225 | "\n\n"); 226 | assert_equal "\n\n" 227 | ((let __re__ = Pcre.regexp ~flags:[ `DOTALL ] ".+" in 228 | fun __subj__ -> 229 | (fun __g__ -> Pcre.get_substring __g__ 0) 230 | (Pcre.exec ~rex:__re__ __subj__)) 231 | "\n\n"); 232 | assert_equal None 233 | ((let __re__ = Pcre.regexp ~flags:[ `MULTILINE ] ".+" in 234 | fun __subj__ -> 235 | match 236 | Option.map 237 | (fun __g__ -> Pcre.get_substring __g__ 0) 238 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 239 | with 240 | | exception Not_found -> None 241 | | rv -> rv) 242 | "\n\n"); 243 | assert_equal None 244 | ((let __re__ = Pcre.regexp ~flags:[] ".+" in 245 | fun __subj__ -> 246 | match 247 | Option.map 248 | (fun __g__ -> Pcre.get_substring __g__ 0) 249 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 250 | with 251 | | exception Not_found -> None 252 | | rv -> rv) 253 | "\n\n"); 254 | assert_equal (Some "\n\n") 255 | ((let __re__ = Pcre.regexp ~flags:[ `DOTALL ] ".+" in 256 | fun __subj__ -> 257 | match 258 | Option.map 259 | (fun __g__ -> Pcre.get_substring __g__ 0) 260 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 261 | with 262 | | exception Not_found -> None 263 | | rv -> rv) 264 | "\n\n"); 265 | assert_equal None 266 | ((let __re__ = Pcre.regexp ~flags:[ `MULTILINE ] ".+" in 267 | fun __subj__ -> 268 | match 269 | Option.map 270 | (fun __g__ -> Pcre.get_substring __g__ 0) 271 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 272 | with 273 | | exception Not_found -> None 274 | | rv -> rv) 275 | "\n\n"); 276 | assert_equal "<>\ndef" 277 | (Pcre.substitute_substrings_first 278 | ~rex:(Pcre.regexp ~flags:[] ".+") 279 | ~subst:(fun __g__ -> 280 | String.concat "" 281 | [ 282 | "<<"; 283 | (match Pcre.get_substring __g__ 0 with 284 | | exception Not_found -> "" 285 | | s -> s); 286 | ">>"; 287 | ]) 288 | "abc\ndef"); 289 | assert_equal "<>" 290 | (Pcre.substitute_substrings_first 291 | ~rex:(Pcre.regexp ~flags:[ `DOTALL ] ".+") 292 | ~subst:(fun __g__ -> 293 | String.concat "" 294 | [ 295 | "<<"; 296 | (match Pcre.get_substring __g__ 0 with 297 | | exception Not_found -> "" 298 | | s -> s); 299 | ">>"; 300 | ]) 301 | "abc\ndef"); 302 | assert_equal "<>\ndef" 303 | (Pcre.substitute_substrings_first 304 | ~rex:(Pcre.regexp ~flags:[ `MULTILINE ] ".+") 305 | ~subst:(fun __g__ -> 306 | String.concat "" 307 | [ 308 | "<<"; 309 | (match Pcre.get_substring __g__ 0 with 310 | | exception Not_found -> "" 311 | | s -> s); 312 | ">>"; 313 | ]) 314 | "abc\ndef"); 315 | assert_equal "<>\ndef" 316 | (Pcre.substitute_substrings_first 317 | ~rex:(Pcre.regexp ~flags:[] ".*") 318 | ~subst:(fun __g__ -> 319 | String.concat "" 320 | [ 321 | "<<"; 322 | (match Pcre.get_substring __g__ 0 with 323 | | exception Not_found -> "" 324 | | s -> s); 325 | ">>"; 326 | ]) 327 | "abc\ndef"); 328 | assert_equal "<><<>>\n<><<>>" 329 | (Pcre.substitute_substrings 330 | ~rex:(Pcre.regexp ~flags:[] ".*") 331 | ~subst:(fun __g__ -> 332 | String.concat "" 333 | [ 334 | "<<"; 335 | (match Pcre.get_substring __g__ 0 with 336 | | exception Not_found -> "" 337 | | s -> s); 338 | ">>"; 339 | ]) 340 | "abc\ndef"); 341 | assert_equal "<>\n<>" 342 | (Pcre.substitute_substrings 343 | ~rex:(Pcre.regexp ~flags:[] ".+") 344 | ~subst:(fun __g__ -> 345 | String.concat "" 346 | [ 347 | "<<"; 348 | (match Pcre.get_substring __g__ 0 with 349 | | exception Not_found -> "" 350 | | s -> s); 351 | ">>"; 352 | ]) 353 | "abc\ndef"); 354 | assert_equal "<>a\nc<>" 355 | (Pcre.substitute_substrings 356 | ~rex:(Pcre.regexp ~flags:[] "a.c") 357 | ~subst:(fun __g__ -> 358 | String.concat "" 359 | [ 360 | "<<"; 361 | (match Pcre.get_substring __g__ 0 with 362 | | exception Not_found -> "" 363 | | s -> s); 364 | ">>"; 365 | ]) 366 | "abca\ncaec"); 367 | assert_equal "<><><>" 368 | (Pcre.substitute_substrings 369 | ~rex:(Pcre.regexp ~flags:[ `DOTALL ] "a.c") 370 | ~subst:(fun __g__ -> 371 | String.concat "" 372 | [ 373 | "<<"; 374 | (match Pcre.get_substring __g__ 0 with 375 | | exception Not_found -> "" 376 | | s -> s); 377 | ">>"; 378 | ]) 379 | "abca\ncaec") 380 | 381 | let test_pcre_multiline ctxt = 382 | (); 383 | assert_equal (Some "bar") 384 | ((let __re__ = Pcre.regexp ~flags:[] ".+$" in 385 | fun __subj__ -> 386 | match 387 | Option.map 388 | (fun __g__ -> Pcre.get_substring __g__ 0) 389 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 390 | with 391 | | exception Not_found -> None 392 | | rv -> rv) 393 | "foo\nbar"); 394 | assert_equal (Some "foo") 395 | ((let __re__ = Pcre.regexp ~flags:[ `MULTILINE ] ".+$" in 396 | fun __subj__ -> 397 | match 398 | Option.map 399 | (fun __g__ -> Pcre.get_substring __g__ 0) 400 | (try Some (Pcre.exec ~rex:__re__ __subj__) with Not_found -> None) 401 | with 402 | | exception Not_found -> None 403 | | rv -> rv) 404 | "foo\nbar") 405 | 406 | let test_pcre_simple_split ctxt = 407 | (); 408 | assert_equal [ "bb" ] 409 | ((let __re__ = Pcre.regexp ~flags:[] "a" in 410 | fun __subj__ -> Pcre.split ~rex:__re__ __subj__) 411 | "bb") 412 | 413 | let test_pcre_delim_split_raw ctxt = 414 | let open Pcre in 415 | (); 416 | assert_equal 417 | [ Delim "a"; Text "b"; Delim "a"; Text "b" ] 418 | ((let __re__ = Pcre.regexp ~flags:[] "a" in 419 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 420 | "ababa"); 421 | assert_equal 422 | [ Delim "a"; Text "b"; Delim "a"; Delim "a"; Text "b" ] 423 | ((let __re__ = Pcre.regexp ~flags:[] "a" in 424 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 425 | "abaaba"); 426 | assert_equal 427 | [ 428 | Delim "a"; 429 | NoGroup; 430 | Text "b"; 431 | Delim "ac"; 432 | Group (1, "c"); 433 | Text "b"; 434 | Delim "a"; 435 | NoGroup; 436 | ] 437 | ((let __re__ = Pcre.regexp ~flags:[] "a(c)?" in 438 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 439 | "abacba"); 440 | assert_equal 441 | [ 442 | Delim "ac"; 443 | Group (1, "c"); 444 | Text "b"; 445 | Delim "ac"; 446 | Group (1, "c"); 447 | Text "b"; 448 | Delim "ac"; 449 | Group (1, "c"); 450 | ] 451 | ((let __re__ = Pcre.regexp ~flags:[] "a(c)" in 452 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 453 | "acbacbac"); 454 | assert_equal 455 | [ 456 | Delim "ac"; 457 | Group (1, "c"); 458 | Text "b"; 459 | Delim "ac"; 460 | Group (1, "c"); 461 | Text "b"; 462 | Delim "ac"; 463 | Group (1, "c"); 464 | ] 465 | ((let __re__ = Pcre.regexp ~flags:[] "a(c)" in 466 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 467 | "acbacbac"); 468 | assert_equal 469 | [ 470 | Delim "a"; 471 | NoGroup; 472 | Text "b"; 473 | Delim "ac"; 474 | Group (1, "c"); 475 | Text "b"; 476 | Delim "a"; 477 | NoGroup; 478 | ] 479 | ((let __re__ = Pcre.regexp ~flags:[] "a(c)?" in 480 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 481 | "abacba"); 482 | assert_equal 483 | [ Text "ab"; Delim "x"; Group (1, "x"); NoGroup; Text "cd" ] 484 | ((let __re__ = Pcre.regexp ~flags:[] "(x)|(u)" in 485 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 486 | "abxcd"); 487 | assert_equal 488 | [ 489 | Text "ab"; 490 | Delim "x"; 491 | Group (1, "x"); 492 | NoGroup; 493 | Text "cd"; 494 | Delim "u"; 495 | NoGroup; 496 | Group (2, "u"); 497 | ] 498 | ((let __re__ = Pcre.regexp ~flags:[] "(x)|(u)" in 499 | fun __subj__ -> Pcre.full_split ~rex:__re__ __subj__) 500 | "abxcdu") 501 | 502 | let test_pcre_subst ctxt = 503 | (); 504 | assert_equal "$b" 505 | (Pcre.substitute_substrings_first 506 | ~rex:(Pcre.regexp ~flags:[] "a(b)c") 507 | ~subst:(fun __g__ -> 508 | String.concat "" 509 | [ 510 | "$"; 511 | ""; 512 | (match Pcre.get_substring __g__ 1 with 513 | | exception Not_found -> "" 514 | | s -> s); 515 | ]) 516 | "abc"); 517 | assert_equal "$b" 518 | (Pcre.substitute_substrings_first 519 | ~rex:(Pcre.regexp ~flags:[ `CASELESS ] "A(B)C") 520 | ~subst:(fun __g__ -> 521 | String.concat "" 522 | [ 523 | "$"; 524 | ""; 525 | (match Pcre.get_substring __g__ 1 with 526 | | exception Not_found -> "" 527 | | s -> s); 528 | ]) 529 | "abc"); 530 | assert_equal "$babc" 531 | (Pcre.substitute_substrings_first 532 | ~rex:(Pcre.regexp ~flags:[ `CASELESS ] "A(B)C") 533 | ~subst:(fun __g__ -> 534 | String.concat "" 535 | [ 536 | "$"; 537 | ""; 538 | (match Pcre.get_substring __g__ 1 with 539 | | exception Not_found -> "" 540 | | s -> s); 541 | ]) 542 | "abcabc"); 543 | assert_equal "$b$b" 544 | (Pcre.substitute_substrings 545 | ~rex:(Pcre.regexp ~flags:[ `CASELESS ] "A(B)C") 546 | ~subst:(fun __g__ -> 547 | String.concat "" 548 | [ 549 | "$"; 550 | ""; 551 | (match Pcre.get_substring __g__ 1 with 552 | | exception Not_found -> "" 553 | | s -> s); 554 | ]) 555 | "abcabc"); 556 | assert_equal "$b$b" 557 | (Pcre.substitute_substrings 558 | ~rex:(Pcre.regexp ~flags:[ `CASELESS ] "A(B)C") 559 | ~subst:(fun __g__ -> 560 | "$" 561 | ^ 562 | match Pcre.get_substring __g__ 1 with 563 | | exception Not_found -> "" 564 | | s -> s) 565 | "abcabc"); 566 | assert_equal "$$" 567 | (Pcre.substitute_substrings 568 | ~rex:(Pcre.regexp ~flags:[ `CASELESS ] "A(B)C") 569 | ~subst:(fun __g__ -> "$") 570 | "abcabc"); 571 | assert_equal "$$" 572 | (Pcre.substitute_substrings 573 | ~rex:(Pcre.regexp ~flags:[ `CASELESS ] "A(B)C") 574 | ~subst:(fun __g__ -> String.concat "" [ "$" ]) 575 | "abcabc") 576 | 577 | let show_string_option = function 578 | | None -> "None" 579 | | Some s -> Printf.sprintf "Some %s" s 580 | 581 | let test_pcre_ocamlfind_bits ctxt = 582 | (); 583 | assert_equal ~printer:show_string_option (Some "-syntax camlp5o ") 584 | (snd 585 | ((let __re__ = Pcre.regexp ~flags:[] "^\\(\\*\\*pp (.*?)\\*\\)" in 586 | fun __subj__ -> 587 | (fun __g__ -> 588 | ( Pcre.get_substring __g__ 0, 589 | try Some (Pcre.get_substring __g__ 1) with Not_found -> None )) 590 | (Pcre.exec ~rex:__re__ __subj__)) 591 | "(**pp -syntax camlp5o *)\n")) 592 | 593 | let pcre_envsubst envlookup s = 594 | let f s1 s2 = 595 | if s1 <> "" then envlookup s1 596 | else if s2 <> "" then envlookup s2 597 | else assert false 598 | in 599 | Pcre.substitute_substrings 600 | ~rex:(Pcre.regexp ~flags:[] "(?:\\$\\(([^)]+)\\)|\\$\\{([^}]+)\\})") 601 | ~subst:(fun __g__ -> 602 | f 603 | (match Pcre.get_substring __g__ 1 with 604 | | exception Not_found -> "" 605 | | s -> s) 606 | (match Pcre.get_substring __g__ 2 with 607 | | exception Not_found -> "" 608 | | s -> s)) 609 | s 610 | 611 | let test_pcre_envsubst_via_replace ctxt = 612 | let f = function 613 | | "A" -> "res1" 614 | | "B" -> "res2" 615 | | _ -> failwith "unexpected arg in envsubst" 616 | in 617 | assert_equal "...res1...res2..." (pcre_envsubst f "...$(A)...${B}...") 618 | 619 | let suite = 620 | "Test pa_ppx_regexp" 621 | >::: [ 622 | "pcre only_regexps" >:: test_special_char_regexps; 623 | "pcre simple_match" >:: test_pcre_simple_match; 624 | "pcre selective_match" >:: test_pcre_selective_match; 625 | "pcre search" >:: test_pcre_search; 626 | "pcre single" >:: test_pcre_single; 627 | "pcre multiline" >:: test_pcre_multiline; 628 | "pcre simple_split" >:: test_pcre_simple_split; 629 | "pcre delim_split raw" >:: test_pcre_delim_split_raw; 630 | "pcre subst" >:: test_pcre_subst; 631 | "pcre ocamlfind bits" >:: test_pcre_ocamlfind_bits; 632 | "pcre envsubst via replace" >:: test_pcre_envsubst_via_replace; 633 | ] 634 | 635 | let _ = if not !Sys.interactive then run_test_tt_main suite 636 | --------------------------------------------------------------------------------