├── .gitignore ├── ChangeLog ├── LICENSE.txt ├── README.rst ├── dune-project ├── mparser-pcre.opam ├── mparser-re.opam ├── mparser.opam └── src ├── dune ├── mParser.ml ├── mParser.mli ├── mParser_Char_Stream.ml ├── mParser_Char_Stream.mli ├── mParser_PCRE.ml ├── mParser_PCRE.mli ├── mParser_RE.ml ├── mParser_RE.mli ├── mParser_Sig.mli ├── mParser_Utils.ml └── mParser_Utils.mli /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.merlin 3 | *.install -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2021-03-09 Max Mouratov 2 | 3 | Version 1.3 4 | 5 | * New: Build system is changed from OASIS to Dune. 6 | * Break: Support for RE and PCRE was moved to separate OPAM packages: 7 | mparser-re, mparser-pcre. Findlib packages were renamed accordingly: 8 | mparser.re -> mparser-re, mparser.pcre -> mparser-pcre. 9 | * New: [MParser_RE.wrap], [MParser_PCRE.wrap]. 10 | * Fix: Cleaned up deprecation warnings. 11 | * Drop: Support for OCaml < 4.02. 12 | * Drop: Support for ocaml-re < 1.7.2. 13 | 14 | 15 | 2017-06-12 Max Mouratov 16 | 17 | Version 1.2.3 18 | 19 | * New: [MParser.is_not]. 20 | * New: [MParser.non_space]. 21 | * New: Some tweaks that should provide a minor performance boost. 22 | 23 | 24 | 2017-05-19 Max Mouratov 25 | 26 | Version 1.2.2 27 | 28 | A minor release that improves documentation. 29 | 30 | 31 | 2016-11-19 Max Mouratov 32 | 33 | Version 1.2.1 34 | 35 | * New: OASIS is set up to produce API reference (in HTML). 36 | * Fix: The library file names were made distinct from the corresponding 37 | module names, as name conflicts made OASIS confused on Windows. 38 | * New: Some tweaks that should provide a minor performance boost. 39 | 40 | 41 | 2016-01-20 Max Mouratov 42 | 43 | Version 1.2 44 | 45 | * New: [MParser.(>>)] and [MParser.(<<)] now have aliases 46 | [MParser.(>>>)] and [MParser.(<<<)], the names of which do not cause 47 | conflicts with the Camlp4 parser ("<<" is the start of a quotation). 48 | * Break: [MParser.skip p] is now an alias for [p |>> ignore]. The old 49 | [MParser.skip] is now available as [MParser.skip_nchars], which is a 50 | more suitable name for its semantics. 51 | * New (MParser_PCRE): [`Anchored] is now a compilation flag instead 52 | of a runtime one, which helps a little bit with performance. 53 | * Break: [CharStream] was renamed to [MParser_Char_Stream] in order 54 | to avoid link-time module name conflicts with other libraries. 55 | * Fix (MParser_Char_Stream): Mutable strings were replaced with 56 | [Bytes.t] in the implementation, as it should be in modern OCaml. 57 | 58 | 59 | 2014-07-04 Max Mouratov 60 | 61 | Version 1.1 62 | 63 | * New: Support for pluggable regular expression engines. 64 | * New (MParser_PCRE): PCRE-related functionality is split out into a 65 | separate, optional subpackage. 66 | * New (MParser_RE): support for RE-based regular expressions. 67 | * Fix: .cmxs files (native plugins) are built correctly now. 68 | * New: [MParser.set_pos]. 69 | 70 | 71 | 2013-05-02 Max Mouratov 72 | 73 | Version 1.0.1 74 | 75 | * Fix: "expecting" and "unexpected" error messages must not contain 76 | duplicate items. 77 | * New: A tutorial section in the README file. 78 | 79 | 80 | 2013-01-22 Max Mouratov 81 | 82 | Version 1.0 (initial version after ripping the code out of ocaml-base) 83 | 84 | * Fix: A bug in [MParser.option]. 85 | * New: [MParser.bind] (for the sake of compatibility with pa_monad). 86 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | This library is free software; you can redistribute it and/or modify 2 | it under the terms of the GNU Lesser General Public License (LGPL) as 3 | published by the Free Software Foundation; either version 2.1 of the 4 | License (see below), or (at your option) any later version. 5 | 6 | As a special exception to the GNU Lesser General Public License, you 7 | may link, statically or dynamically, a "work that uses the Library" 8 | with a publicly distributed version of the Library to produce an 9 | executable file containing portions of the Library, and distribute 10 | that executable file under terms of your choice, without any of the 11 | additional requirements listed in clause 6 of the GNU Lesser General 12 | Public License. By "a publicly distributed version of the Library", we 13 | mean either the unmodified Library as distributed, or a modified 14 | version of the Library that is distributed under the conditions 15 | defined in clause 2 of the GNU Lesser General Public License. This 16 | exception does not however invalidate any other reasons why the 17 | executable file might be covered by the GNU Lesser General Public 18 | License. 19 | 20 | This library is distributed in the hope that it will be useful, 21 | but WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 23 | 24 | ====================================================================== 25 | 26 | 27 | GNU LESSER GENERAL PUBLIC LICENSE 28 | Version 2.1, February 1999 29 | 30 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 31 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 32 | Everyone is permitted to copy and distribute verbatim copies 33 | of this license document, but changing it is not allowed. 34 | 35 | [This is the first released version of the Lesser GPL. It also counts 36 | as the successor of the GNU Library Public License, version 2, hence 37 | the version number 2.1.] 38 | 39 | Preamble 40 | 41 | The licenses for most software are designed to take away your 42 | freedom to share and change it. By contrast, the GNU General Public 43 | Licenses are intended to guarantee your freedom to share and change 44 | free software--to make sure the software is free for all its users. 45 | 46 | This license, the Lesser General Public License, applies to some 47 | specially designated software packages--typically libraries--of the 48 | Free Software Foundation and other authors who decide to use it. You 49 | can use it too, but we suggest you first think carefully about whether 50 | this license or the ordinary General Public License is the better 51 | strategy to use in any particular case, based on the explanations below. 52 | 53 | When we speak of free software, we are referring to freedom of use, 54 | not price. Our General Public Licenses are designed to make sure that 55 | you have the freedom to distribute copies of free software (and charge 56 | for this service if you wish); that you receive source code or can get 57 | it if you want it; that you can change the software and use pieces of 58 | it in new free programs; and that you are informed that you can do 59 | these things. 60 | 61 | To protect your rights, we need to make restrictions that forbid 62 | distributors to deny you these rights or to ask you to surrender these 63 | rights. These restrictions translate to certain responsibilities for 64 | you if you distribute copies of the library or if you modify it. 65 | 66 | For example, if you distribute copies of the library, whether gratis 67 | or for a fee, you must give the recipients all the rights that we gave 68 | you. You must make sure that they, too, receive or can get the source 69 | code. If you link other code with the library, you must provide 70 | complete object files to the recipients, so that they can relink them 71 | with the library after making changes to the library and recompiling 72 | it. And you must show them these terms so they know their rights. 73 | 74 | We protect your rights with a two-step method: (1) we copyright the 75 | library, and (2) we offer you this license, which gives you legal 76 | permission to copy, distribute and/or modify the library. 77 | 78 | To protect each distributor, we want to make it very clear that 79 | there is no warranty for the free library. Also, if the library is 80 | modified by someone else and passed on, the recipients should know 81 | that what they have is not the original version, so that the original 82 | author's reputation will not be affected by problems that might be 83 | introduced by others. 84 | 85 | Finally, software patents pose a constant threat to the existence of 86 | any free program. We wish to make sure that a company cannot 87 | effectively restrict the users of a free program by obtaining a 88 | restrictive license from a patent holder. Therefore, we insist that 89 | any patent license obtained for a version of the library must be 90 | consistent with the full freedom of use specified in this license. 91 | 92 | Most GNU software, including some libraries, is covered by the 93 | ordinary GNU General Public License. This license, the GNU Lesser 94 | General Public License, applies to certain designated libraries, and 95 | is quite different from the ordinary General Public License. We use 96 | this license for certain libraries in order to permit linking those 97 | libraries into non-free programs. 98 | 99 | When a program is linked with a library, whether statically or using 100 | a shared library, the combination of the two is legally speaking a 101 | combined work, a derivative of the original library. The ordinary 102 | General Public License therefore permits such linking only if the 103 | entire combination fits its criteria of freedom. The Lesser General 104 | Public License permits more lax criteria for linking other code with 105 | the library. 106 | 107 | We call this license the "Lesser" General Public License because it 108 | does Less to protect the user's freedom than the ordinary General 109 | Public License. It also provides other free software developers Less 110 | of an advantage over competing non-free programs. These disadvantages 111 | are the reason we use the ordinary General Public License for many 112 | libraries. However, the Lesser license provides advantages in certain 113 | special circumstances. 114 | 115 | For example, on rare occasions, there may be a special need to 116 | encourage the widest possible use of a certain library, so that it becomes 117 | a de-facto standard. To achieve this, non-free programs must be 118 | allowed to use the library. A more frequent case is that a free 119 | library does the same job as widely used non-free libraries. In this 120 | case, there is little to gain by limiting the free library to free 121 | software only, so we use the Lesser General Public License. 122 | 123 | In other cases, permission to use a particular library in non-free 124 | programs enables a greater number of people to use a large body of 125 | free software. For example, permission to use the GNU C Library in 126 | non-free programs enables many more people to use the whole GNU 127 | operating system, as well as its variant, the GNU/Linux operating 128 | system. 129 | 130 | Although the Lesser General Public License is Less protective of the 131 | users' freedom, it does ensure that the user of a program that is 132 | linked with the Library has the freedom and the wherewithal to run 133 | that program using a modified version of the Library. 134 | 135 | The precise terms and conditions for copying, distribution and 136 | modification follow. Pay close attention to the difference between a 137 | "work based on the library" and a "work that uses the library". The 138 | former contains code derived from the library, whereas the latter must 139 | be combined with the library in order to run. 140 | 141 | GNU LESSER GENERAL PUBLIC LICENSE 142 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 143 | 144 | 0. This License Agreement applies to any software library or other 145 | program which contains a notice placed by the copyright holder or 146 | other authorized party saying it may be distributed under the terms of 147 | this Lesser General Public License (also called "this License"). 148 | Each licensee is addressed as "you". 149 | 150 | A "library" means a collection of software functions and/or data 151 | prepared so as to be conveniently linked with application programs 152 | (which use some of those functions and data) to form executables. 153 | 154 | The "Library", below, refers to any such software library or work 155 | which has been distributed under these terms. A "work based on the 156 | Library" means either the Library or any derivative work under 157 | copyright law: that is to say, a work containing the Library or a 158 | portion of it, either verbatim or with modifications and/or translated 159 | straightforwardly into another language. (Hereinafter, translation is 160 | included without limitation in the term "modification".) 161 | 162 | "Source code" for a work means the preferred form of the work for 163 | making modifications to it. For a library, complete source code means 164 | all the source code for all modules it contains, plus any associated 165 | interface definition files, plus the scripts used to control compilation 166 | and installation of the library. 167 | 168 | Activities other than copying, distribution and modification are not 169 | covered by this License; they are outside its scope. The act of 170 | running a program using the Library is not restricted, and output from 171 | such a program is covered only if its contents constitute a work based 172 | on the Library (independent of the use of the Library in a tool for 173 | writing it). Whether that is true depends on what the Library does 174 | and what the program that uses the Library does. 175 | 176 | 1. You may copy and distribute verbatim copies of the Library's 177 | complete source code as you receive it, in any medium, provided that 178 | you conspicuously and appropriately publish on each copy an 179 | appropriate copyright notice and disclaimer of warranty; keep intact 180 | all the notices that refer to this License and to the absence of any 181 | warranty; and distribute a copy of this License along with the 182 | Library. 183 | 184 | You may charge a fee for the physical act of transferring a copy, 185 | and you may at your option offer warranty protection in exchange for a 186 | fee. 187 | 188 | 2. You may modify your copy or copies of the Library or any portion 189 | of it, thus forming a work based on the Library, and copy and 190 | distribute such modifications or work under the terms of Section 1 191 | above, provided that you also meet all of these conditions: 192 | 193 | a) The modified work must itself be a software library. 194 | 195 | b) You must cause the files modified to carry prominent notices 196 | stating that you changed the files and the date of any change. 197 | 198 | c) You must cause the whole of the work to be licensed at no 199 | charge to all third parties under the terms of this License. 200 | 201 | d) If a facility in the modified Library refers to a function or a 202 | table of data to be supplied by an application program that uses 203 | the facility, other than as an argument passed when the facility 204 | is invoked, then you must make a good faith effort to ensure that, 205 | in the event an application does not supply such function or 206 | table, the facility still operates, and performs whatever part of 207 | its purpose remains meaningful. 208 | 209 | (For example, a function in a library to compute square roots has 210 | a purpose that is entirely well-defined independent of the 211 | application. Therefore, Subsection 2d requires that any 212 | application-supplied function or table used by this function must 213 | be optional: if the application does not supply it, the square 214 | root function must still compute square roots.) 215 | 216 | These requirements apply to the modified work as a whole. If 217 | identifiable sections of that work are not derived from the Library, 218 | and can be reasonably considered independent and separate works in 219 | themselves, then this License, and its terms, do not apply to those 220 | sections when you distribute them as separate works. But when you 221 | distribute the same sections as part of a whole which is a work based 222 | on the Library, the distribution of the whole must be on the terms of 223 | this License, whose permissions for other licensees extend to the 224 | entire whole, and thus to each and every part regardless of who wrote 225 | it. 226 | 227 | Thus, it is not the intent of this section to claim rights or contest 228 | your rights to work written entirely by you; rather, the intent is to 229 | exercise the right to control the distribution of derivative or 230 | collective works based on the Library. 231 | 232 | In addition, mere aggregation of another work not based on the Library 233 | with the Library (or with a work based on the Library) on a volume of 234 | a storage or distribution medium does not bring the other work under 235 | the scope of this License. 236 | 237 | 3. You may opt to apply the terms of the ordinary GNU General Public 238 | License instead of this License to a given copy of the Library. To do 239 | this, you must alter all the notices that refer to this License, so 240 | that they refer to the ordinary GNU General Public License, version 2, 241 | instead of to this License. (If a newer version than version 2 of the 242 | ordinary GNU General Public License has appeared, then you can specify 243 | that version instead if you wish.) Do not make any other change in 244 | these notices. 245 | 246 | Once this change is made in a given copy, it is irreversible for 247 | that copy, so the ordinary GNU General Public License applies to all 248 | subsequent copies and derivative works made from that copy. 249 | 250 | This option is useful when you wish to copy part of the code of 251 | the Library into a program that is not a library. 252 | 253 | 4. You may copy and distribute the Library (or a portion or 254 | derivative of it, under Section 2) in object code or executable form 255 | under the terms of Sections 1 and 2 above provided that you accompany 256 | it with the complete corresponding machine-readable source code, which 257 | must be distributed under the terms of Sections 1 and 2 above on a 258 | medium customarily used for software interchange. 259 | 260 | If distribution of object code is made by offering access to copy 261 | from a designated place, then offering equivalent access to copy the 262 | source code from the same place satisfies the requirement to 263 | distribute the source code, even though third parties are not 264 | compelled to copy the source along with the object code. 265 | 266 | 5. A program that contains no derivative of any portion of the 267 | Library, but is designed to work with the Library by being compiled or 268 | linked with it, is called a "work that uses the Library". Such a 269 | work, in isolation, is not a derivative work of the Library, and 270 | therefore falls outside the scope of this License. 271 | 272 | However, linking a "work that uses the Library" with the Library 273 | creates an executable that is a derivative of the Library (because it 274 | contains portions of the Library), rather than a "work that uses the 275 | library". The executable is therefore covered by this License. 276 | Section 6 states terms for distribution of such executables. 277 | 278 | When a "work that uses the Library" uses material from a header file 279 | that is part of the Library, the object code for the work may be a 280 | derivative work of the Library even though the source code is not. 281 | Whether this is true is especially significant if the work can be 282 | linked without the Library, or if the work is itself a library. The 283 | threshold for this to be true is not precisely defined by law. 284 | 285 | If such an object file uses only numerical parameters, data 286 | structure layouts and accessors, and small macros and small inline 287 | functions (ten lines or less in length), then the use of the object 288 | file is unrestricted, regardless of whether it is legally a derivative 289 | work. (Executables containing this object code plus portions of the 290 | Library will still fall under Section 6.) 291 | 292 | Otherwise, if the work is a derivative of the Library, you may 293 | distribute the object code for the work under the terms of Section 6. 294 | Any executables containing that work also fall under Section 6, 295 | whether or not they are linked directly with the Library itself. 296 | 297 | 6. As an exception to the Sections above, you may also combine or 298 | link a "work that uses the Library" with the Library to produce a 299 | work containing portions of the Library, and distribute that work 300 | under terms of your choice, provided that the terms permit 301 | modification of the work for the customer's own use and reverse 302 | engineering for debugging such modifications. 303 | 304 | You must give prominent notice with each copy of the work that the 305 | Library is used in it and that the Library and its use are covered by 306 | this License. You must supply a copy of this License. If the work 307 | during execution displays copyright notices, you must include the 308 | copyright notice for the Library among them, as well as a reference 309 | directing the user to the copy of this License. Also, you must do one 310 | of these things: 311 | 312 | a) Accompany the work with the complete corresponding 313 | machine-readable source code for the Library including whatever 314 | changes were used in the work (which must be distributed under 315 | Sections 1 and 2 above); and, if the work is an executable linked 316 | with the Library, with the complete machine-readable "work that 317 | uses the Library", as object code and/or source code, so that the 318 | user can modify the Library and then relink to produce a modified 319 | executable containing the modified Library. (It is understood 320 | that the user who changes the contents of definitions files in the 321 | Library will not necessarily be able to recompile the application 322 | to use the modified definitions.) 323 | 324 | b) Use a suitable shared library mechanism for linking with the 325 | Library. A suitable mechanism is one that (1) uses at run time a 326 | copy of the library already present on the user's computer system, 327 | rather than copying library functions into the executable, and (2) 328 | will operate properly with a modified version of the library, if 329 | the user installs one, as long as the modified version is 330 | interface-compatible with the version that the work was made with. 331 | 332 | c) Accompany the work with a written offer, valid for at 333 | least three years, to give the same user the materials 334 | specified in Subsection 6a, above, for a charge no more 335 | than the cost of performing this distribution. 336 | 337 | d) If distribution of the work is made by offering access to copy 338 | from a designated place, offer equivalent access to copy the above 339 | specified materials from the same place. 340 | 341 | e) Verify that the user has already received a copy of these 342 | materials or that you have already sent this user a copy. 343 | 344 | For an executable, the required form of the "work that uses the 345 | Library" must include any data and utility programs needed for 346 | reproducing the executable from it. However, as a special exception, 347 | the materials to be distributed need not include anything that is 348 | normally distributed (in either source or binary form) with the major 349 | components (compiler, kernel, and so on) of the operating system on 350 | which the executable runs, unless that component itself accompanies 351 | the executable. 352 | 353 | It may happen that this requirement contradicts the license 354 | restrictions of other proprietary libraries that do not normally 355 | accompany the operating system. Such a contradiction means you cannot 356 | use both them and the Library together in an executable that you 357 | distribute. 358 | 359 | 7. You may place library facilities that are a work based on the 360 | Library side-by-side in a single library together with other library 361 | facilities not covered by this License, and distribute such a combined 362 | library, provided that the separate distribution of the work based on 363 | the Library and of the other library facilities is otherwise 364 | permitted, and provided that you do these two things: 365 | 366 | a) Accompany the combined library with a copy of the same work 367 | based on the Library, uncombined with any other library 368 | facilities. This must be distributed under the terms of the 369 | Sections above. 370 | 371 | b) Give prominent notice with the combined library of the fact 372 | that part of it is a work based on the Library, and explaining 373 | where to find the accompanying uncombined form of the same work. 374 | 375 | 8. You may not copy, modify, sublicense, link with, or distribute 376 | the Library except as expressly provided under this License. Any 377 | attempt otherwise to copy, modify, sublicense, link with, or 378 | distribute the Library is void, and will automatically terminate your 379 | rights under this License. However, parties who have received copies, 380 | or rights, from you under this License will not have their licenses 381 | terminated so long as such parties remain in full compliance. 382 | 383 | 9. You are not required to accept this License, since you have not 384 | signed it. However, nothing else grants you permission to modify or 385 | distribute the Library or its derivative works. These actions are 386 | prohibited by law if you do not accept this License. Therefore, by 387 | modifying or distributing the Library (or any work based on the 388 | Library), you indicate your acceptance of this License to do so, and 389 | all its terms and conditions for copying, distributing or modifying 390 | the Library or works based on it. 391 | 392 | 10. Each time you redistribute the Library (or any work based on the 393 | Library), the recipient automatically receives a license from the 394 | original licensor to copy, distribute, link with or modify the Library 395 | subject to these terms and conditions. You may not impose any further 396 | restrictions on the recipients' exercise of the rights granted herein. 397 | You are not responsible for enforcing compliance by third parties with 398 | this License. 399 | 400 | 11. If, as a consequence of a court judgment or allegation of patent 401 | infringement or for any other reason (not limited to patent issues), 402 | conditions are imposed on you (whether by court order, agreement or 403 | otherwise) that contradict the conditions of this License, they do not 404 | excuse you from the conditions of this License. If you cannot 405 | distribute so as to satisfy simultaneously your obligations under this 406 | License and any other pertinent obligations, then as a consequence you 407 | may not distribute the Library at all. For example, if a patent 408 | license would not permit royalty-free redistribution of the Library by 409 | all those who receive copies directly or indirectly through you, then 410 | the only way you could satisfy both it and this License would be to 411 | refrain entirely from distribution of the Library. 412 | 413 | If any portion of this section is held invalid or unenforceable under any 414 | particular circumstance, the balance of the section is intended to apply, 415 | and the section as a whole is intended to apply in other circumstances. 416 | 417 | It is not the purpose of this section to induce you to infringe any 418 | patents or other property right claims or to contest validity of any 419 | such claims; this section has the sole purpose of protecting the 420 | integrity of the free software distribution system which is 421 | implemented by public license practices. Many people have made 422 | generous contributions to the wide range of software distributed 423 | through that system in reliance on consistent application of that 424 | system; it is up to the author/donor to decide if he or she is willing 425 | to distribute software through any other system and a licensee cannot 426 | impose that choice. 427 | 428 | This section is intended to make thoroughly clear what is believed to 429 | be a consequence of the rest of this License. 430 | 431 | 12. If the distribution and/or use of the Library is restricted in 432 | certain countries either by patents or by copyrighted interfaces, the 433 | original copyright holder who places the Library under this License may add 434 | an explicit geographical distribution limitation excluding those countries, 435 | so that distribution is permitted only in or among countries not thus 436 | excluded. In such case, this License incorporates the limitation as if 437 | written in the body of this License. 438 | 439 | 13. The Free Software Foundation may publish revised and/or new 440 | versions of the Lesser General Public License from time to time. 441 | Such new versions will be similar in spirit to the present version, 442 | but may differ in detail to address new problems or concerns. 443 | 444 | Each version is given a distinguishing version number. If the Library 445 | specifies a version number of this License which applies to it and 446 | "any later version", you have the option of following the terms and 447 | conditions either of that version or of any later version published by 448 | the Free Software Foundation. If the Library does not specify a 449 | license version number, you may choose any version ever published by 450 | the Free Software Foundation. 451 | 452 | 14. If you wish to incorporate parts of the Library into other free 453 | programs whose distribution conditions are incompatible with these, 454 | write to the author to ask for permission. For software which is 455 | copyrighted by the Free Software Foundation, write to the Free 456 | Software Foundation; we sometimes make exceptions for this. Our 457 | decision will be guided by the two goals of preserving the free status 458 | of all derivatives of our free software and of promoting the sharing 459 | and reuse of software generally. 460 | 461 | NO WARRANTY 462 | 463 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 464 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 465 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 466 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 467 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 468 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 469 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 470 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 471 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 472 | 473 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 474 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 475 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 476 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 477 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 478 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 479 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 480 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 481 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 482 | DAMAGES. 483 | 484 | END OF TERMS AND CONDITIONS 485 | 486 | 487 | ====================================================================== 488 | 489 | How to Apply These Terms to Your New Libraries 490 | 491 | If you develop a new library, and you want it to be of the greatest 492 | possible use to the public, we recommend making it free software that 493 | everyone can redistribute and change. You can do so by permitting 494 | redistribution under these terms (or, alternatively, under the terms 495 | of the ordinary General Public License). 496 | 497 | To apply these terms, attach the following notices to the library. 498 | It is safest to attach them to the start of each source file to most 499 | effectively convey the exclusion of warranty; and each file should 500 | have at least the "copyright" line and a pointer to where the full 501 | notice is found. 502 | 503 | 504 | 505 | Copyright (C) 506 | 507 | This library is free software; you can redistribute it and/or 508 | modify it under the terms of the GNU Lesser General Public 509 | License as published by the Free Software Foundation; either 510 | version 2.1 of the License, or (at your option) any later version. 511 | 512 | This library is distributed in the hope that it will be useful, 513 | but WITHOUT ANY WARRANTY; without even the implied warranty of 514 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 515 | Lesser General Public License for more details. 516 | 517 | You should have received a copy of the GNU Lesser General Public 518 | License along with this library; if not, write to the Free Software 519 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 520 | 521 | Also add information on how to contact you by electronic and paper mail. 522 | 523 | You should also get your employer (if you work as a programmer) or 524 | your school, if any, to sign a "copyright disclaimer" for the library, 525 | if necessary. Here is a sample; alter the names: 526 | 527 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 528 | library `Frob' (a library for tweaking knobs) written by James 529 | Random Hacker. 530 | 531 | , 1 April 1990 532 | Ty Coon, President of Vice 533 | 534 | That's all there is to it! 535 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =================================================== 2 | MParser, a simple monadic parser combinator library 3 | =================================================== 4 | 5 | This library implements a rather complete and efficient monadic parser 6 | combinator library similar to the Parsec library for Haskell by Daan Leijen 7 | and the FParsec library for FSharp by Stephan Tolksdorf. 8 | 9 | See LICENSE.txt for copying conditions (LGPL with static linking exception). 10 | 11 | Home page: https://github.com/murmour/mparser. 12 | 13 | MParser used to be a part of ocaml-base, a collection of useful OCaml 14 | libraries by Holger Arnold [1]_. 15 | 16 | The monadic interface of MParser is compatible with pa_monad [2]_. 17 | 18 | 19 | Dependencies 20 | ------------ 21 | 22 | To build this package, you need: 23 | 24 | * OCaml (>= 4.02). 25 | * Dune (>= 1.11) [3]_. 26 | * Findlib [4]_. 27 | * Optionally: ``re`` [5]_ for ``mparser-re``. 28 | * Optionally: ``pcre-ocaml`` [6]_ for ``mparser-pcre``. 29 | 30 | 31 | Installing 32 | ---------- 33 | 34 | Either use OPAM [7]_ or build manually with Dune [3]_. 35 | 36 | Installing from OPAM: ``opam install [sub-library name]``. 37 | 38 | To build manually, ``cd`` to this folder and run ``dune build -p [sub-library name]``. Add ``@install`` for installation. Add ``@doc`` to produce API reference in ``_build/default/_doc/_html``. Consult Dune manual for more options. 39 | 40 | Available sub-libraries: 41 | 42 | - ``mparser``: base library; 43 | - ``mparser-re``: a plugin that adds support for regular expressions based on 44 | ``re`` [5]_ (``MParser_RE`` module, ``mparser-re`` findlib package); 45 | - ``mparser-pcre``: a plugin that adds support for regular expressions based on 46 | ``pcre-ocaml`` [6]_ (``MParser_PCRE`` module, ``mparser-pcre`` findlib package). 47 | 48 | 49 | Usage example 50 | ------------- 51 | 52 | Let's implement a simple expression evaluator. 53 | 54 | To save the typing effort, it is often handy to open the ``MParser`` module: 55 | 56 | .. sourcecode:: ocaml 57 | 58 | open MParser 59 | 60 | 61 | First, we define a parsing combinator ``expr``, which handles expression 62 | parsing, taking care of the operator precedence issues: 63 | 64 | .. sourcecode:: ocaml 65 | 66 | let infix p op = 67 | Infix (p |>> (fun _ a b -> (`Binop (op, a, b))), Assoc_left) 68 | 69 | let operators = 70 | [ 71 | [ 72 | infix (char '*') `Mul; 73 | infix (char '/') `Div; 74 | ]; 75 | [ 76 | infix (char '+') `Add; 77 | infix (char '-') `Sub; 78 | ]; 79 | ] 80 | 81 | let decimal = 82 | many1_chars digit |>> int_of_string 83 | 84 | let expr = 85 | expression operators (decimal |>> fun i -> `Int i) 86 | 87 | 88 | Next, we implement an interpreter for our expression tree: 89 | 90 | .. sourcecode:: ocaml 91 | 92 | let rec calc = function 93 | | `Int i -> i 94 | | `Binop (op, a, b) -> 95 | match op with 96 | | `Add -> calc a + calc b 97 | | `Sub -> calc a - calc b 98 | | `Mul -> calc a * calc b 99 | | `Div -> calc a / calc b 100 | 101 | 102 | The evaluator function: 103 | 104 | .. sourcecode:: ocaml 105 | 106 | let eval (s: string) : int = 107 | match MParser.parse_string expr s () with 108 | | Success e -> 109 | calc e 110 | | Failed (msg, e) -> 111 | failwith msg 112 | 113 | 114 | Using it: 115 | 116 | .. sourcecode:: ocaml 117 | 118 | eval "4*4+10/2" -> 21 119 | 120 | 121 | Have fun! 122 | 123 | 124 | References 125 | ---------- 126 | 127 | .. [1] http://www.holgerarnold.net/software 128 | .. [2] https://www.cas.mcmaster.ca/~carette/pa_monad 129 | .. [3] https://github.com/ocaml/dune 130 | .. [4] http://projects.camlcity.org/projects/findlib.html 131 | .. [5] https://github.com/ocaml/ocaml-re 132 | .. [6] https://mmottl.github.io/pcre-ocaml 133 | .. [7] https://opam.ocaml.org 134 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.11) 2 | 3 | (license "LGPL-2.1 with OCaml linking exception") 4 | (name mparser) 5 | (version 1.3) 6 | 7 | (authors "Holger Arnold " "Max Mouratov ") 8 | (maintainers "Max Mouratov ") 9 | (source (github murmour/mparser)) 10 | 11 | (package 12 | (name mparser) 13 | (synopsis "A simple monadic parser combinator library") 14 | (description " 15 | This library implements a rather complete and efficient monadic parser 16 | combinator library similar to the Parsec library for Haskell by Daan Leijen and 17 | the FParsec library for FSharp by Stephan Tolksdorf.") 18 | (depends 19 | (ocaml (>= 4.02)))) 20 | 21 | (package 22 | (name mparser-pcre) 23 | (synopsis "MParser plugin: PCRE-based regular expressions") 24 | (depends 25 | (ocaml (>= 4.02)) 26 | mparser 27 | pcre)) 28 | 29 | (package 30 | (name mparser-re) 31 | (synopsis "MParser plugin: RE-based regular expressions") 32 | (depends 33 | (ocaml (>= 4.02)) 34 | mparser 35 | (re (>= 1.7.2)))) 36 | 37 | (generate_opam_files true) 38 | -------------------------------------------------------------------------------- /mparser-pcre.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "1.3" 4 | synopsis: "MParser plugin: PCRE-based regular expressions" 5 | maintainer: ["Max Mouratov "] 6 | authors: [ 7 | "Holger Arnold " "Max Mouratov " 8 | ] 9 | license: "LGPL-2.1 with OCaml linking exception" 10 | homepage: "https://github.com/murmour/mparser" 11 | bug-reports: "https://github.com/murmour/mparser/issues" 12 | depends: [ 13 | "dune" {>= "1.11"} 14 | "ocaml" {>= "4.02"} 15 | "mparser" 16 | "pcre" 17 | ] 18 | build: [ 19 | ["dune" "subst"] {pinned} 20 | [ 21 | "dune" 22 | "build" 23 | "-p" 24 | name 25 | "-j" 26 | jobs 27 | "@install" 28 | "@runtest" {with-test} 29 | "@doc" {with-doc} 30 | ] 31 | ] 32 | dev-repo: "git+https://github.com/murmour/mparser.git" 33 | -------------------------------------------------------------------------------- /mparser-re.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "1.3" 4 | synopsis: "MParser plugin: RE-based regular expressions" 5 | maintainer: ["Max Mouratov "] 6 | authors: [ 7 | "Holger Arnold " "Max Mouratov " 8 | ] 9 | license: "LGPL-2.1 with OCaml linking exception" 10 | homepage: "https://github.com/murmour/mparser" 11 | bug-reports: "https://github.com/murmour/mparser/issues" 12 | depends: [ 13 | "dune" {>= "1.11"} 14 | "ocaml" {>= "4.02"} 15 | "mparser" 16 | "re" {>= "1.7.2"} 17 | ] 18 | build: [ 19 | ["dune" "subst"] {pinned} 20 | [ 21 | "dune" 22 | "build" 23 | "-p" 24 | name 25 | "-j" 26 | jobs 27 | "@install" 28 | "@runtest" {with-test} 29 | "@doc" {with-doc} 30 | ] 31 | ] 32 | dev-repo: "git+https://github.com/murmour/mparser.git" 33 | -------------------------------------------------------------------------------- /mparser.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "1.3" 4 | synopsis: "A simple monadic parser combinator library" 5 | description: """ 6 | 7 | This library implements a rather complete and efficient monadic parser 8 | combinator library similar to the Parsec library for Haskell by Daan Leijen and 9 | the FParsec library for FSharp by Stephan Tolksdorf.""" 10 | maintainer: ["Max Mouratov "] 11 | authors: [ 12 | "Holger Arnold " "Max Mouratov " 13 | ] 14 | license: "LGPL-2.1 with OCaml linking exception" 15 | homepage: "https://github.com/murmour/mparser" 16 | bug-reports: "https://github.com/murmour/mparser/issues" 17 | depends: [ 18 | "dune" {>= "1.11"} 19 | "ocaml" {>= "4.02"} 20 | ] 21 | build: [ 22 | ["dune" "subst"] {pinned} 23 | [ 24 | "dune" 25 | "build" 26 | "-p" 27 | name 28 | "-j" 29 | jobs 30 | "@install" 31 | "@runtest" {with-test} 32 | "@doc" {with-doc} 33 | ] 34 | ] 35 | dev-repo: "git+https://github.com/murmour/mparser.git" 36 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name mparser) 3 | (public_name mparser) 4 | (modules_without_implementation MParser_Sig) 5 | (modules MParser MParser_Sig MParser_Char_Stream MParser_Utils) 6 | (wrapped false)) 7 | 8 | (library 9 | (name mparser_pcre) 10 | (public_name mparser-pcre) 11 | (libraries mparser pcre) 12 | (modules MParser_PCRE) 13 | (wrapped false)) 14 | 15 | (library 16 | (name mparser_re) 17 | (libraries mparser re.perl) 18 | (public_name mparser-re) 19 | (modules MParser_RE) 20 | (wrapped false)) 21 | -------------------------------------------------------------------------------- /src/mParser.ml: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** For an introduction to monadic parser combinators see the following paper: 21 | 22 | - Graham Hutton and Erik Meijer, "Monadic Parser Combinators", Technical 23 | report NOTTCS-TR-96-4, Department of Computer Science, University of 24 | Nottingham, 1996. 25 | 26 | The internal design of this module is based on the one used in the FParsec 27 | library for F# by Stephan Tolksdorf and the Parsec library for Haskell by 28 | Daan Leijen, which is described in the following paper: 29 | 30 | - Daan Leijen and Erik Meijer, "Parsec: Direct-Style Monadic Parser 31 | Combinators For The Real World", Technical Report UU-CS-2001-35, 32 | Departement of Computer Science, Universiteit Utrecht, 2001. 33 | *) 34 | 35 | 36 | open Printf 37 | open MParser_Utils 38 | 39 | 40 | (* Parser state 41 | -------------------------------------------------------------------------- *) 42 | 43 | type 's state = { 44 | input: MParser_Char_Stream.t; 45 | index: int; 46 | line: int; 47 | line_begin: int; 48 | user: 's; 49 | } 50 | 51 | 52 | let init input user = { 53 | input; 54 | index = 0; 55 | line = 1; 56 | line_begin = 0; 57 | user; 58 | } 59 | 60 | let is_valid_index s = 61 | s.index >= 0 && s.index < MParser_Char_Stream.length s.input 62 | 63 | let is_eof s = 64 | not (is_valid_index s) 65 | 66 | let advance_state s n = 67 | if is_valid_index s then 68 | { s with index = s.index + n } 69 | else 70 | s 71 | 72 | let advance_state_nl s n = 73 | if is_valid_index s then 74 | { s with 75 | index = s.index + n; 76 | line = s.line + 1; 77 | line_begin = s.index + n; 78 | } 79 | else 80 | s 81 | 82 | let next_state s = 83 | advance_state s 1 84 | 85 | let read_char s = 86 | MParser_Char_Stream.read_char s.input s.index 87 | 88 | let read_index s i = 89 | MParser_Char_Stream.read_char s.input i 90 | 91 | let next_char s = 92 | MParser_Char_Stream.read_char s.input (s.index + 1) 93 | 94 | let prev_char s = 95 | MParser_Char_Stream.read_char s.input (s.index - 1) 96 | 97 | let read_string s n = 98 | MParser_Char_Stream.read_string s.input s.index n 99 | 100 | let match_char s c = 101 | MParser_Char_Stream.match_char s.input s.index c 102 | 103 | let match_string s str = 104 | MParser_Char_Stream.match_string s.input s.index str 105 | 106 | 107 | (* Error handling 108 | -------------------------------------------------------------------------- *) 109 | 110 | type pos = int * int * int 111 | 112 | let pos_of_state s = 113 | (s.index, s.line, s.index - s.line_begin + 1) 114 | 115 | type error_message = 116 | | Unexpected_error of string 117 | | Expected_error of string 118 | | Message_error of string 119 | | Compound_error of string * error 120 | | Backtrack_error of error 121 | | Unknown_error 122 | 123 | and error = 124 | | Parse_error of pos * error_message list 125 | | No_error 126 | 127 | 128 | let unexpected_error s msg = 129 | Parse_error (pos_of_state s, [ Unexpected_error msg ]) 130 | 131 | let expected_error s msg = 132 | Parse_error (pos_of_state s, [ Expected_error msg ]) 133 | 134 | let message_error s msg = 135 | Parse_error (pos_of_state s, [ Message_error msg ]) 136 | 137 | let compound_error s msg e = 138 | Parse_error (pos_of_state s, [ Compound_error (msg, e) ]) 139 | 140 | let backtrack_error s e = 141 | Parse_error (pos_of_state s, [ Backtrack_error e ]) 142 | 143 | let unknown_error s = 144 | Parse_error (pos_of_state s, [ Unknown_error ]) 145 | 146 | let merge_errors e1 e2 = 147 | match (e1, e2) with 148 | | (No_error, e2) -> 149 | e2 150 | | (e1, No_error) -> 151 | e1 152 | | (Parse_error (s, msg1), Parse_error (_, msg2)) -> 153 | Parse_error (s, List.append msg1 msg2) 154 | 155 | 156 | (* Error reporting 157 | -------------------------------------------------------------------------- *) 158 | 159 | let error_line input pos width indent = 160 | let rec find_nl i stop = 161 | if i >= stop then 162 | i 163 | else match MParser_Char_Stream.read_char input i with 164 | | None | Some '\n' | Some '\r' -> 165 | i 166 | | _ -> 167 | find_nl (i+1) stop 168 | in 169 | let space = width - indent in 170 | if space <= 10 then 171 | "\n" 172 | else 173 | let (index, _, column) = pos in 174 | let start = index - (min (column - 1) (space / 2)) in 175 | let stop = min (start + space) (MParser_Char_Stream.length input) in 176 | let length = (find_nl start stop) - start in 177 | let offset = index - start in 178 | if length <= 0 then 179 | "\n" 180 | else 181 | (String.make indent ' ') 182 | ^ (MParser_Char_Stream.read_string input start length) ^ "\n" 183 | ^ (String.make (indent + offset) ' ') ^ "^\n" 184 | 185 | (** [concat_conj sep conj strings] concatenates the elements of [strings] 186 | using the separator [sep] and the conjunction [conj] according to the 187 | rules of the English language. For example, [concat_conj "and" 188 | \["A"; "B"; "C"\]] results in the string ["A, B, and C"]. 189 | *) 190 | let rec concat_conj conj = function 191 | | [] -> 192 | "" 193 | | [ x ] -> 194 | x 195 | | x :: [ y ] -> 196 | sprintf "%s %s %s" x conj y 197 | | x :: xs -> 198 | sprintf "%s, %s" x (concat_conj conj xs) 199 | 200 | let rec error_message input pos messages width indent = 201 | let (_, line, column) = pos in 202 | 203 | let (unexp, exp, msg, comp, back, _unknowns) = 204 | List.fold_left 205 | (fun ((u, e, m, c, b, k) as msgs) msg -> 206 | match msg with 207 | | Unexpected_error s when s <> "" -> 208 | (s :: u, e, m, c, b, k) 209 | | Expected_error s when s <> "" -> 210 | (u, s :: e, m, c, b, k) 211 | | Message_error s when s <> "" -> 212 | (u, e, s :: m, c, b, k) 213 | | Compound_error (s, Parse_error (pos1, msg1)) -> 214 | (u, s :: e, m, (s, pos1, msg1) :: c, b, k) 215 | | Backtrack_error (Parse_error (pos1, msg1)) -> 216 | (u, e, m, c, (pos1, msg1) :: b, k) 217 | | Unknown_error -> 218 | (u, e, m, c, b, k+1) 219 | | _ -> 220 | msgs) 221 | ([], [], [], [], [], 0) messages 222 | in 223 | 224 | let ind = String.make indent ' ' in 225 | let buf = Buffer.create 160 in 226 | 227 | bprintf buf "%sError in line %d, column %d:\n%s" 228 | ind line column (error_line input pos width indent); 229 | 230 | if unexp <> [] then 231 | bprintf buf "%sUnexpected %s\n" 232 | ind (concat_conj "and" (String.unique unexp)); 233 | 234 | if exp <> [] then 235 | bprintf buf "%sExpecting %s\n" 236 | ind (concat_conj "or" (String.unique exp)); 237 | 238 | if msg <> [] then 239 | if unexp <> [] || exp <> [] then 240 | (bprintf buf "%sOther errors:\n" ind; 241 | msg |> List.iter (fun m -> 242 | bprintf buf "%s %s\n" ind m)) 243 | else 244 | msg |> List.iter (fun m -> 245 | bprintf buf "%s%s\n" ind m); 246 | 247 | comp |> List.iter (fun (s, p, m) -> 248 | bprintf buf "%s%s could not be parsed because:\n%s" 249 | ind s (error_message input p m width (indent + 2))); 250 | 251 | back |> List.iter (fun (p, m) -> 252 | bprintf buf "%sBacktracking occurred after:\n%s" 253 | ind (error_message input p m width (indent + 2))); 254 | 255 | Buffer.contents buf 256 | 257 | 258 | (* Parser type 259 | -------------------------------------------------------------------------- *) 260 | 261 | type ('a, 's) reply = 262 | | Empty_failed of error 263 | | Empty_ok of 'a * 's state * error 264 | | Consumed_failed of error 265 | | Consumed_ok of 'a * 's state * error 266 | 267 | type ('a, 's) t = 's state -> ('a, 's) reply 268 | type ('a, 's) parser = ('a, 's) t 269 | 270 | 271 | let make_ok consumed r s e = 272 | if consumed then 273 | Consumed_ok (r, s, e) 274 | else 275 | Empty_ok (r, s, e) 276 | 277 | let make_failed consumed e = 278 | if consumed then 279 | Consumed_failed e 280 | else 281 | Empty_failed e 282 | 283 | let is_consumed r = 284 | match r with 285 | | Consumed_failed _ | Consumed_ok _ -> 286 | true 287 | | Empty_failed _ | Empty_ok _ -> 288 | false 289 | 290 | let is_empty r = 291 | match r with 292 | | Consumed_failed _ | Consumed_ok _ -> 293 | false 294 | | Empty_failed _ | Empty_ok _ -> 295 | true 296 | 297 | let is_error r = 298 | match r with 299 | | Empty_failed _ | Consumed_failed _ -> 300 | true 301 | | Empty_ok _ | Consumed_ok _ -> 302 | false 303 | 304 | let is_ok r = 305 | match r with 306 | | Empty_failed _ | Consumed_failed _ -> 307 | false 308 | | Empty_ok _ | Consumed_ok _ -> 309 | true 310 | 311 | let get_error reply = 312 | match reply with 313 | | Empty_failed e | Empty_ok (_, _, e) 314 | | Consumed_failed e | Consumed_ok (_, _, e) -> e 315 | 316 | let set_error reply error = 317 | match reply with 318 | | Empty_failed _ -> 319 | Empty_failed error 320 | | Empty_ok (r, s, _) -> 321 | Empty_ok (r, s, error) 322 | | Consumed_failed _ -> 323 | Consumed_failed error 324 | | Consumed_ok (r, s, _) -> 325 | Consumed_ok (r, s, error) 326 | 327 | type 'a result = 328 | | Success of 'a 329 | | Failed of string * error 330 | 331 | let parse p input user = 332 | match p (init input user) with 333 | | Empty_ok (x, _, _) | Consumed_ok (x, _, _) -> 334 | Success x 335 | | Empty_failed e | Consumed_failed e -> 336 | (match e with 337 | | Parse_error (pos, messages) -> 338 | Failed (error_message input pos messages 78 0, e) 339 | | No_error -> 340 | Failed ("", e)) 341 | 342 | let parse_string p str user = 343 | let input = MParser_Char_Stream.from_string str in 344 | parse p input user 345 | 346 | let parse_channel p chn user = 347 | let input = MParser_Char_Stream.from_channel chn in 348 | parse p input user 349 | 350 | 351 | (* Parser combinators 352 | -------------------------------------------------------------------------- *) 353 | 354 | let return x s = 355 | Empty_ok (x, s, No_error) 356 | 357 | let try_return f x msg s0 s1 = 358 | try 359 | Empty_ok (f x, s1, No_error) 360 | with _ -> 361 | Empty_failed (message_error s0 msg) 362 | 363 | let try_return2 f x1 x2 msg s0 s1 = 364 | try 365 | Empty_ok (f x1 x2, s1, No_error) 366 | with _ -> 367 | Empty_failed (message_error s0 msg) 368 | 369 | let try_return3 f x1 x2 x3 msg s0 s1 = 370 | try 371 | Empty_ok (f x1 x2 x3, s1, No_error) 372 | with _ -> 373 | Empty_failed (message_error s0 msg) 374 | 375 | let fail msg s = 376 | Consumed_failed (message_error s msg) 377 | 378 | let message msg s = 379 | Empty_failed (message_error s msg) 380 | 381 | let zero s = 382 | Empty_failed (unknown_error s) 383 | 384 | let bind p f s = 385 | match p s with 386 | | (Empty_failed _ | Consumed_failed _) as failed -> 387 | failed 388 | | Empty_ok (r1, s1, e1) -> 389 | (match f r1 s1 with 390 | | Empty_failed e2 -> 391 | Empty_failed (merge_errors e2 e1) 392 | | Empty_ok (r2, s2, e2) -> 393 | Empty_ok (r2, s2, merge_errors e2 e1) 394 | | (Consumed_ok _ | Consumed_failed _) as consumed -> 395 | consumed) 396 | | Consumed_ok (r1, s1, e1) -> 397 | (match f r1 s1 with 398 | | Empty_failed e2 -> 399 | Consumed_failed (merge_errors e2 e1) 400 | | Empty_ok (r2, s2, e2) -> 401 | Consumed_ok (r2, s2, merge_errors e2 e1) 402 | | (Consumed_ok _ | Consumed_failed _) as consumed -> 403 | consumed) 404 | 405 | let (>>=) = bind 406 | 407 | let (>>) p q = 408 | p >>= fun _ -> q 409 | 410 | let (<<) p q = 411 | p >>= fun x -> q >> return x 412 | 413 | let (>>>) = (>>) 414 | 415 | let (<<<) = (<<) 416 | 417 | let (>>$) p x = 418 | p >> return x 419 | 420 | let (>>?) p q s = 421 | match p s with 422 | | (Empty_failed _ | Consumed_failed _) as failed -> 423 | failed 424 | | Empty_ok (_, s1, e1) -> 425 | (match q s1 with 426 | | Empty_failed e2 -> 427 | Empty_failed (merge_errors e2 e1) 428 | | Empty_ok (r2, s2, e2) -> 429 | Empty_ok (r2, s2, merge_errors e2 e1) 430 | | (Consumed_ok _ | Consumed_failed _) as consumed -> 431 | consumed) 432 | | Consumed_ok (_, s1, e1) -> 433 | (match q s1 with 434 | | Empty_failed e2 -> 435 | Empty_failed (backtrack_error s (merge_errors e2 e1)) 436 | | Empty_ok (r2, s2, e2) -> 437 | Consumed_ok (r2, s2, merge_errors e2 e1) 438 | | (Consumed_ok _ | Consumed_failed _) as consumed -> 439 | consumed) 440 | 441 | let (|>>) p f = 442 | p >>= fun x -> return (f x) 443 | 444 | let pipe2 p1 p2 f = 445 | p1 >>= fun x1 -> 446 | p2 >>= fun x2 -> 447 | return (f x1 x2) 448 | 449 | let pipe3 p1 p2 p3 f = 450 | p1 >>= fun x1 -> 451 | p2 >>= fun x2 -> 452 | p3 >>= fun x3 -> 453 | return (f x1 x2 x3) 454 | 455 | let pipe4 p1 p2 p3 p4 f = 456 | p1 >>= fun x1 -> 457 | p2 >>= fun x2 -> 458 | p3 >>= fun x3 -> 459 | p4 >>= fun x4 -> 460 | return (f x1 x2 x3 x4) 461 | 462 | let (<|>) p1 p2 s = 463 | match p1 s with 464 | | Empty_failed e1 -> 465 | (match p2 s with 466 | | Empty_failed e2 -> 467 | Empty_failed (merge_errors e2 e1) 468 | | Empty_ok (r2, s2, e2) -> 469 | Empty_ok (r2, s2, merge_errors e2 e1) 470 | | (Consumed_ok _ | Consumed_failed _) as consumed -> 471 | consumed) 472 | | other -> 473 | other 474 | 475 | let (<|>$) p x = 476 | p <|> return x 477 | 478 | let choice ps = 479 | List.fold_left (<|>) zero ps 480 | 481 | let attempt p s = 482 | match p s with 483 | | Consumed_failed e -> 484 | Empty_failed (backtrack_error s e) 485 | | other -> 486 | other 487 | 488 | let () p label s = 489 | let reply = p s in 490 | if is_empty reply then 491 | set_error reply (expected_error s label) 492 | else 493 | reply 494 | 495 | let () p label s = 496 | let reply = p s in 497 | if is_empty reply then 498 | if is_error reply then 499 | match get_error reply with 500 | | Parse_error (_, [ Backtrack_error error ]) -> 501 | set_error reply (compound_error s label error) 502 | | _ -> 503 | set_error reply (expected_error s label) 504 | else 505 | set_error reply (expected_error s label) 506 | else if is_error reply then 507 | set_error reply (compound_error s label (get_error reply)) 508 | else 509 | reply 510 | 511 | let look_ahead p s = 512 | match p s with 513 | | Empty_ok (r, _, _) | Consumed_ok (r, _, _) -> 514 | Empty_ok (r, s, No_error) 515 | | (Empty_failed _) as err -> 516 | err 517 | | Consumed_failed e -> 518 | Empty_failed (backtrack_error s e) 519 | 520 | let followed_by p msg s = 521 | match p s with 522 | | Empty_ok _ | Consumed_ok _ -> 523 | Empty_ok ((), s, No_error) 524 | | Empty_failed _ | Consumed_failed _ -> 525 | Empty_failed (expected_error s msg) 526 | 527 | let not_followed_by p msg s = 528 | match p s with 529 | | Empty_ok _ | Consumed_ok _ -> 530 | Empty_failed (unexpected_error s msg) 531 | | Empty_failed _ | Consumed_failed _ -> 532 | Empty_ok ((), s, No_error) 533 | 534 | let opt x p = 535 | p <|>$ x 536 | 537 | let option p = 538 | (p >>= fun r -> return (Some r)) <|> return None 539 | 540 | let optional p = 541 | p >>$ () <|>$ () 542 | 543 | let try_skip p = 544 | p >>$ true <|>$ false 545 | 546 | let pair p q = 547 | p >>= fun x -> 548 | q >>= fun y -> 549 | return (x, y) 550 | 551 | let many_fold_apply f a g p = 552 | let rec loop consumed a s e = 553 | match p s with 554 | | Consumed_ok (r, s1, e1) -> 555 | loop true (f a r) s1 e1 556 | | (Consumed_failed _) as err -> 557 | err 558 | | Empty_failed e1 -> 559 | make_ok consumed (g a) s (merge_errors e1 e) 560 | | Empty_ok _ -> 561 | failwith "MParser: the \"many\" combinator is applied to \ 562 | a parser that accepts an empty string" 563 | in 564 | fun s -> loop false a s No_error 565 | 566 | let many1_fold_apply f a g p s = 567 | match many_fold_apply f a g p s with 568 | | Empty_ok (_, _, e) -> 569 | Empty_failed e 570 | | other -> 571 | other 572 | 573 | let many p = 574 | many_fold_apply (fun xs x -> x :: xs) [] List.rev p 575 | 576 | let many1 p = 577 | many1_fold_apply (fun xs x -> x :: xs) [] List.rev p 578 | 579 | let many_rev p = 580 | many_fold_apply (fun xs x -> x :: xs) [] (fun x -> x) p 581 | 582 | let many1_rev p = 583 | many1_fold_apply (fun xs x -> x :: xs) [] (fun x -> x) p 584 | 585 | let skip p = 586 | p |>> ignore 587 | 588 | let skip_many p = 589 | many_fold_apply (fun _ _ -> ()) () (fun _ -> ()) p 590 | 591 | let skip_many1 p = 592 | many1_fold_apply (fun _ _ -> ()) () (fun _ -> ()) p 593 | 594 | let many_fold_left f a = 595 | many_fold_apply f a (fun x -> x) 596 | 597 | let many1_fold_left f a = 598 | many1_fold_apply f a (fun x -> x) 599 | 600 | let many_rev_fold_left f a = 601 | many_fold_apply (fun xs x -> x :: xs) [] (List.fold_left f a) 602 | 603 | let many1_rev_fold_left f a = 604 | many1_fold_apply (fun xs x -> x :: xs) [] (List.fold_left f a) 605 | 606 | let chain_left1 p op = 607 | p >>= fun x -> 608 | many_fold_left (fun x (f, y) -> f x y) x (pair op p) 609 | 610 | let chain_left p op x = 611 | chain_left1 p op <|>$ x 612 | 613 | let chain_right1 p op = 614 | let rec make_op a f y l = 615 | match l with 616 | | (g, x) :: r -> 617 | make_op a g (f x y) r 618 | | [] -> 619 | f a y 620 | in 621 | pipe2 p (many_rev (pair op p)) (fun x l -> 622 | match l with 623 | | [] -> x 624 | | (f, y) :: r -> make_op x f y r) 625 | 626 | let chain_right p op x = 627 | chain_right1 p op <|>$ x 628 | 629 | let count n p = 630 | let rec loop consumed n acc s e = 631 | if n <= 0 then 632 | make_ok consumed (List.rev acc) s e 633 | else match p s with 634 | | Empty_ok (r, s1, e1) -> 635 | loop consumed (n - 1) (r :: acc) s1 (merge_errors e1 e) 636 | | Consumed_ok (r, s1, e1) -> 637 | loop true (n - 1) (r :: acc) s1 e1 638 | | Empty_failed e1 -> 639 | make_failed consumed (merge_errors e1 e) 640 | | (Consumed_failed _) as failed -> 641 | failed 642 | in 643 | fun s -> loop false n [] s No_error 644 | 645 | let skip_count n p = 646 | let rec loop consumed n s e = 647 | if n <= 0 then 648 | make_ok consumed () s e 649 | else match p s with 650 | | Empty_ok (_, s1, e1) -> 651 | loop consumed (n - 1) s1 (merge_errors e1 e) 652 | | Consumed_ok (_, s1, e1) -> 653 | loop true (n - 1) s1 e1 654 | | Empty_failed e1 -> 655 | make_failed consumed (merge_errors e1 e) 656 | | (Consumed_failed _) as failed -> 657 | failed 658 | in 659 | fun s -> loop false n s No_error 660 | 661 | let between left right p = 662 | left >> p << right 663 | 664 | let sep_by1 p sep = 665 | p >>= fun x -> 666 | many (sep >> p) >>= fun xs -> 667 | return (x :: xs) 668 | 669 | let sep_by p sep = 670 | sep_by1 p sep <|>$ [] 671 | 672 | let sep_end_by1 p sep = 673 | p >>= fun x -> 674 | many (sep >>? p) >>= fun xs -> 675 | optional sep >> 676 | return (x :: xs) 677 | 678 | let sep_end_by p sep = 679 | sep_end_by1 p sep <|>$ [] 680 | 681 | let end_by p sep = 682 | many (p << sep) 683 | 684 | let end_by1 p sep = 685 | many1 (p << sep) 686 | 687 | let many_until p q = 688 | many (not_followed_by q "" >> p) << q 689 | 690 | let skip_many_until p q = 691 | skip_many (not_followed_by q "" >> p) << q 692 | 693 | 694 | (* Accessing state 695 | -------------------------------------------------------------------------- *) 696 | 697 | let get_user_state s = 698 | Empty_ok (s.user, s, No_error) 699 | 700 | let set_user_state user s = 701 | Empty_ok ((), { s with user }, No_error) 702 | 703 | let update_user_state f s = 704 | Empty_ok ((), { s with user = f (s.user) }, No_error) 705 | 706 | let get_input s = 707 | Empty_ok (s.input, s, No_error) 708 | 709 | let get_index s = 710 | Empty_ok (s.index, s, No_error) 711 | 712 | let get_pos s = 713 | Empty_ok (pos_of_state s, s, No_error) 714 | 715 | let register_nl lines chars_after_nl s = 716 | let s1 = 717 | { s with 718 | line = s.line + lines; 719 | line_begin = s.index - chars_after_nl; 720 | } 721 | in 722 | Empty_ok ((), s1, No_error) 723 | 724 | let set_pos (_, line, column) s = 725 | let s' = 726 | { s with 727 | line; 728 | line_begin = s.index - (column - 1); 729 | } 730 | in 731 | Empty_ok ((), s', No_error) 732 | 733 | 734 | (* Character parsers 735 | -------------------------------------------------------------------------- *) 736 | 737 | let skip_nchars n s = 738 | if n < 0 then 739 | invalid_arg "MParser.skip: negative offset"; 740 | let s' = advance_state s n in 741 | if s'.index <> s.index then 742 | Consumed_ok ((), s', No_error) 743 | else 744 | Empty_ok ((), s, No_error) 745 | 746 | let eof s = 747 | match read_char s with 748 | | Some _ -> 749 | Empty_failed (expected_error s "end of input") 750 | | None -> 751 | Empty_ok ((), s, No_error) 752 | 753 | let char c s = 754 | if match_char s c then 755 | Consumed_ok (c, advance_state s 1, No_error) 756 | else 757 | Empty_failed (expected_error s ("'" ^ (String.make 1 c) ^ "'")) 758 | 759 | let skip_char c s = 760 | if match_char s c then 761 | Consumed_ok ((), advance_state s 1, No_error) 762 | else 763 | Empty_failed (expected_error s ("'" ^ (String.make 1 c) ^ "'")) 764 | 765 | let any_char s = 766 | match read_char s with 767 | | Some c -> 768 | Consumed_ok (c, advance_state s 1, No_error) 769 | | None -> 770 | Empty_failed (expected_error s "any character") 771 | 772 | let skip_any_char s = 773 | match read_char s with 774 | | Some _ -> 775 | Consumed_ok ((), advance_state s 1, No_error) 776 | | None -> 777 | Empty_failed (expected_error s "any character") 778 | 779 | let any_char_or_nl s = 780 | match read_char s with 781 | | Some c -> 782 | if c <> '\n' && c <> '\r' then 783 | Consumed_ok (c, advance_state s 1, No_error) 784 | else 785 | let n = (if c = '\r' && next_char s = Some '\n' then 2 else 1) in 786 | Consumed_ok ('\n', advance_state_nl s n, No_error) 787 | | None -> 788 | Empty_failed (expected_error s "any character") 789 | 790 | let skip_any_char_or_nl s = 791 | match read_char s with 792 | | Some c -> 793 | if c <> '\n' && c <> '\r' then 794 | Consumed_ok ((), advance_state s 1, No_error) 795 | else 796 | let n = if c = '\r' && next_char s = Some '\n' then 2 else 1 in 797 | Consumed_ok ((), advance_state_nl s n, No_error) 798 | | None -> 799 | Empty_failed (expected_error s "any character") 800 | 801 | let peek_char s = 802 | match next_char s with 803 | | Some c -> 804 | Empty_ok (c, s, No_error) 805 | | None -> 806 | Empty_failed (unexpected_error s "end of input") 807 | 808 | let string str s = 809 | if match_string s str then 810 | Consumed_ok (str, advance_state s (String.length str), No_error) 811 | else 812 | Empty_failed (expected_error s ("\"" ^ str ^ "\"")) 813 | 814 | let skip_string str s = 815 | if match_string s str then 816 | Consumed_ok ((), advance_state s (String.length str), No_error) 817 | else 818 | Empty_failed (expected_error s ("\"" ^ str ^ "\"")) 819 | 820 | let any_string n s = 821 | if n = 0 then 822 | Empty_ok ("", s, No_error) 823 | else 824 | let r = read_string s n in 825 | if String.length r = n then 826 | Consumed_ok (r, advance_state s n, No_error) 827 | else 828 | let msg = sprintf "any sequence of %d characters" n in 829 | Empty_failed (expected_error s msg) 830 | 831 | let many_chars p s = 832 | many_fold_apply 833 | (fun b c -> Buffer.add_char b c; b) (Buffer.create 16) 834 | (Buffer.contents) p s 835 | 836 | let many1_chars p s = 837 | many1_fold_apply 838 | (fun b c -> Buffer.add_char b c; b) (Buffer.create 16) 839 | (Buffer.contents) p s 840 | 841 | let skip_many_chars = 842 | skip_many 843 | 844 | let skip_many1_chars = 845 | skip_many1 846 | 847 | let many_chars_until p q = 848 | many_chars (not_followed_by q "" >> p) << q 849 | 850 | let skip_many_chars_until p q = 851 | skip_many_chars (not_followed_by q "" >> p) << q 852 | 853 | let satisfy p s = 854 | match read_char s with 855 | | Some c -> 856 | if p c then 857 | Consumed_ok (c, advance_state s 1, No_error) 858 | else 859 | Empty_failed (unknown_error s) 860 | | None -> 861 | Empty_failed (unexpected_error s "end of input") 862 | 863 | let satisfy_l p label s = 864 | match read_char s with 865 | | Some c when p c -> 866 | Consumed_ok (c, advance_state s 1, No_error) 867 | | _ -> 868 | Empty_failed (expected_error s label) 869 | 870 | let skip_satisfy p = 871 | satisfy p |>> ignore 872 | 873 | let skip_satisfy_l p label = 874 | satisfy_l p label |>> ignore 875 | 876 | let nsatisfy n p s = 877 | if n = 0 then 878 | Empty_ok ("", s, No_error) 879 | else 880 | let r = read_string s n in 881 | if String.length r = n && String.for_all p r then 882 | Consumed_ok (r, advance_state s n, No_error) 883 | else 884 | Empty_failed (unknown_error s) 885 | 886 | let many_satisfy_loop p = 887 | let rec loop i s = 888 | match read_index s i with 889 | | Some c when p c -> 890 | loop (i+1) s 891 | | _ -> 892 | i - s.index 893 | in 894 | fun s -> loop s.index s 895 | 896 | let many_satisfy p s = 897 | let n = many_satisfy_loop p s in 898 | if n > 0 then 899 | Consumed_ok (read_string s n, advance_state s n, No_error) 900 | else 901 | Empty_ok ("", s, No_error) 902 | 903 | let many1_satisfy p s = 904 | match many_satisfy p s with 905 | | Consumed_ok _ as result -> 906 | result 907 | | _ -> 908 | Empty_failed (unknown_error s) 909 | 910 | let skip_many_satisfy p s = 911 | let n = many_satisfy_loop p s in 912 | if n > 0 then 913 | Consumed_ok ((), advance_state s n, No_error) 914 | else 915 | Empty_ok ((), s, No_error) 916 | 917 | let skip_many1_satisfy p s = 918 | match skip_many_satisfy p s with 919 | | Consumed_ok _ as result -> 920 | result 921 | | _ -> 922 | Empty_failed (unknown_error s) 923 | 924 | let next_char_satisfies p s = 925 | match next_char s with 926 | | Some c when p c -> 927 | Empty_ok ((), s, No_error) 928 | | _ -> 929 | Empty_failed (unknown_error s) 930 | 931 | let prev_char_satisfies p s = 932 | match prev_char s with 933 | | Some c when p c -> 934 | Empty_ok ((), s, No_error) 935 | | _ -> 936 | Empty_failed (unknown_error s) 937 | 938 | let any_of str = 939 | satisfy (String.contains str) 940 | 941 | let none_of str = 942 | satisfy (fun x -> not (String.contains str x)) 943 | 944 | let is_not p s = 945 | if is_ok (p s) then 946 | Empty_failed (unknown_error s) 947 | else match read_char s with 948 | | Some c -> 949 | Consumed_ok (c, advance_state s 1, No_error) 950 | | None -> 951 | Empty_failed (unknown_error s) 952 | 953 | let uppercase s = 954 | satisfy_l (function 'A'..'Z' -> true | _ -> false) 955 | "uppercase letter" s 956 | 957 | let lowercase s = 958 | satisfy_l (function 'a'..'z' -> true | _ -> false) 959 | "lowercase letter" s 960 | 961 | let letter s = 962 | satisfy_l (function 'a'..'z' | 'A'..'Z' -> true | _ -> false) 963 | "letter" s 964 | 965 | let digit s = 966 | satisfy_l (function '0'..'9' -> true | _ -> false) 967 | "digit" s 968 | 969 | let hex_digit s = 970 | satisfy_l (function 'a'..'f' | 'A'..'F' | '0'..'9' -> true | _ -> false) 971 | "hex digit" s 972 | 973 | let oct_digit s = 974 | satisfy_l (function '0'..'9' -> true | _ -> false) 975 | "oct digit" s 976 | 977 | let alphanum s = 978 | satisfy_l (function 'a'..'z' | 'A'..'Z' | '0'..'9' -> true | _ -> false) 979 | "letter or digit" s 980 | 981 | let tab s = 982 | satisfy_l (fun c -> c = '\t') 983 | "tab" s 984 | 985 | let blank s = 986 | satisfy_l (function '\t' | ' ' -> true | _ -> false) 987 | "space or tab" s 988 | 989 | let newline s = 990 | match read_char s with 991 | | Some c when c = '\n' || c = '\r' -> 992 | let k = if c = '\r' && next_char s = Some '\n' then 2 else 1 in 993 | Consumed_ok ('\n', advance_state_nl s k, No_error) 994 | | _ -> 995 | Empty_failed (expected_error s "newline") 996 | 997 | let space s = 998 | match read_char s with 999 | | Some ((' ' | '\t') as c) -> 1000 | Consumed_ok (c, advance_state s 1, No_error) 1001 | | Some '\r' when next_char s = Some '\n' -> 1002 | Consumed_ok ('\n', advance_state_nl s 2, No_error) 1003 | | Some ('\n' | '\r') -> 1004 | Consumed_ok ('\n', advance_state_nl s 1, No_error) 1005 | | _ -> 1006 | Empty_failed (expected_error s "whitespace") 1007 | 1008 | let non_space s = 1009 | match read_char s with 1010 | | None | Some (' ' | '\t' | '\r' | '\n') -> 1011 | Empty_failed (expected_error s "not whitespace") 1012 | | Some c -> 1013 | Consumed_ok (c, advance_state s 1, No_error) 1014 | 1015 | let spaces s = 1016 | let lines = ref 0 in 1017 | let line_begin = ref 0 in 1018 | 1019 | (* Performance hack: consuming whitespace in a loop *) 1020 | let rec loop i = 1021 | match read_index s i with 1022 | | Some (' ' | '\t') -> 1023 | loop (i+1) 1024 | | Some '\r' when read_index s (i+1) = Some '\n' -> 1025 | let i' = i + 2 in 1026 | lines := !lines + 1; 1027 | line_begin := i'; 1028 | loop i' 1029 | | Some ('\r' | '\n') -> 1030 | let i' = i + 1 in 1031 | lines := !lines + 1; 1032 | line_begin := i'; 1033 | loop i' 1034 | | _ -> 1035 | i - s.index 1036 | in 1037 | let n = loop s.index in 1038 | 1039 | if !lines > 0 then 1040 | let s1 = 1041 | { s with 1042 | index = s.index + n; 1043 | line = s.line + !lines; 1044 | line_begin = !line_begin; 1045 | } 1046 | in 1047 | Consumed_ok ((), s1, No_error) 1048 | else if n > 0 then 1049 | Consumed_ok ((), advance_state s n, No_error) 1050 | else 1051 | Empty_ok ((), s, No_error) 1052 | 1053 | let spaces1 s = 1054 | match spaces s with 1055 | | Consumed_ok _ as result -> 1056 | result 1057 | | _ -> 1058 | Empty_failed (expected_error s "whitespace") 1059 | 1060 | 1061 | (* Expressions 1062 | -------------------------------------------------------------------------- *) 1063 | 1064 | type assoc = 1065 | | Assoc_none 1066 | | Assoc_left 1067 | | Assoc_right 1068 | 1069 | type ('a, 's) operator = 1070 | | Infix of (('a -> 'a -> 'a, 's) t * assoc) 1071 | | Prefix of ('a -> 'a, 's) t 1072 | | Postfix of ('a -> 'a, 's) t 1073 | 1074 | 1075 | let make_expr_parser term (ops: ('a, 's) operator list) : ('a, 's) t = 1076 | let split_op (rassoc, lassoc, nassoc, prefix, postfix) op = 1077 | match op with 1078 | | Infix (p, Assoc_right) -> 1079 | (p :: rassoc, lassoc, nassoc, prefix, postfix) 1080 | | Infix (p, Assoc_left) -> 1081 | (rassoc, p :: lassoc, nassoc, prefix, postfix) 1082 | | Infix (p, Assoc_none) -> 1083 | (rassoc, lassoc, p :: nassoc, prefix, postfix) 1084 | | Prefix p -> 1085 | (rassoc, lassoc, nassoc, p :: prefix, postfix) 1086 | | Postfix p -> 1087 | (rassoc, lassoc, nassoc, prefix, p :: postfix) 1088 | in 1089 | 1090 | let (rassoc, lassoc, nassoc, prefix, postfix) = 1091 | List.fold_left split_op ([], [], [], [], []) ops 1092 | in 1093 | 1094 | let rassoc_op = choice rassoc in 1095 | let lassoc_op = choice lassoc in 1096 | let nassoc_op = choice nassoc in 1097 | let prefix_op = choice prefix in 1098 | let postfix_op = choice postfix in 1099 | let prefix_p = opt (fun x -> x) prefix_op in 1100 | let postfix_p = opt (fun x -> x) postfix_op in 1101 | 1102 | let term_p = 1103 | prefix_p >>= fun pre -> 1104 | term >>= fun x -> 1105 | postfix_p >>= fun post -> 1106 | return (post (pre x)) 1107 | in 1108 | 1109 | let rec rassoc_p x = 1110 | rassoc_op >>= fun f -> 1111 | (term_p >>= (fun z -> rassoc_p' z)) >>= fun y -> 1112 | return (f x y) 1113 | and rassoc_p' x = 1114 | opt x (rassoc_p x) 1115 | in 1116 | 1117 | let rec lassoc_p x = 1118 | lassoc_op >>= fun f -> 1119 | term_p >>= fun y -> 1120 | lassoc_p' (f x y) 1121 | and lassoc_p' x = 1122 | opt x (lassoc_p x) 1123 | in 1124 | 1125 | let nassoc_p x = 1126 | nassoc_op >>= fun f -> 1127 | term_p >>= fun y -> 1128 | return (f x y) 1129 | in 1130 | 1131 | term_p >>= fun x -> 1132 | (rassoc_p x <|> lassoc_p x <|> nassoc_p x <|>$ x) 1133 | 1134 | let expression operators term = 1135 | List.fold_left make_expr_parser term operators 1136 | 1137 | 1138 | (* Regexp-related features 1139 | -------------------------------------------------------------------------- *) 1140 | 1141 | module MakeRegexp (Regexp: MParser_Sig.Regexp) = struct 1142 | module CharStreamRx = MParser_Char_Stream.MakeRegexp (Regexp) 1143 | 1144 | 1145 | let match_regexp s r = 1146 | CharStreamRx.match_regexp s.input s.index r 1147 | 1148 | let make_regexp pat = 1149 | Regexp.make pat 1150 | 1151 | let regexp r s = 1152 | match match_regexp s r with 1153 | | None -> 1154 | zero s 1155 | | Some substrings -> 1156 | match Regexp.get_substring substrings 0 with 1157 | | None -> 1158 | zero s 1159 | | Some result -> 1160 | let n = String.length result in 1161 | if n > 0 then 1162 | Consumed_ok (result, advance_state s n, No_error) 1163 | else 1164 | Empty_ok (result, s, No_error) 1165 | 1166 | let regexp_substrings r s = 1167 | match match_regexp s r with 1168 | | None -> 1169 | zero s 1170 | | Some substrings -> 1171 | let result = Regexp.get_all_substrings substrings in 1172 | let n = String.length (Array.get result 0) in 1173 | if n > 0 then 1174 | Consumed_ok (result, advance_state s n, No_error) 1175 | else 1176 | Empty_ok (result, s, No_error) 1177 | 1178 | 1179 | (* Token parsers 1180 | ------------------------------------------------------------------------ *) 1181 | 1182 | module Tokens = struct 1183 | 1184 | let symbol s = 1185 | string s << spaces 1186 | 1187 | let skip_symbol s = 1188 | skip_string s << spaces 1189 | 1190 | let char_sp c = 1191 | char c << spaces 1192 | 1193 | let parens p = 1194 | between (char_sp '(') (char_sp ')') p 1195 | 1196 | let braces p = 1197 | between (char_sp '{') (char_sp '}') p 1198 | 1199 | let brackets p = 1200 | between (char_sp '<') (char_sp '>') p 1201 | 1202 | let squares p = 1203 | between (char_sp '[') (char_sp ']') p 1204 | 1205 | let semi s = 1206 | char_sp ';' s 1207 | 1208 | let comma s = 1209 | char_sp ',' s 1210 | 1211 | let colon s = 1212 | char_sp ':' s 1213 | 1214 | let dot s = 1215 | char_sp '.' s 1216 | 1217 | let semi_sep p = 1218 | sep_by p semi 1219 | 1220 | let semi_sep1 p = 1221 | sep_by1 p semi 1222 | 1223 | let semi_sep_end p = 1224 | sep_end_by p semi 1225 | 1226 | let semi_sep_end1 p = 1227 | sep_end_by1 p semi 1228 | 1229 | let semi_end p = 1230 | end_by p semi 1231 | 1232 | let semi_end1 p = 1233 | end_by1 p semi 1234 | 1235 | let comma_sep p = 1236 | sep_by p comma 1237 | 1238 | let comma_sep1 p = 1239 | sep_by1 p comma 1240 | 1241 | let escaped_char s = 1242 | (any_of "nrtb\\\"\'" |>> (function 1243 | | 'n' -> '\n' 1244 | | 'r' -> '\r' 1245 | | 't' -> '\t' 1246 | | 'b' -> '\b' 1247 | | c -> c)) s 1248 | 1249 | let escape_sequence_dec = 1250 | let int_of_dec c = (Char.code c) - (Char.code '0') in 1251 | let char_of_digits d2 d1 d0 = 1252 | char_of_int (100 * (int_of_dec d2) + 1253 | 10 * (int_of_dec d1) 1254 | + (int_of_dec d0)) 1255 | in 1256 | fun s -> 1257 | (digit >>= fun d2 -> 1258 | digit >>= fun d1 -> 1259 | digit >>= fun d0 -> 1260 | try_return3 char_of_digits d2 d1 d0 1261 | "Escape sequence is no valid character code" s) s 1262 | 1263 | let escape_sequence_hex = 1264 | let int_of_hex c = 1265 | match c with 1266 | | '0'..'9' -> 1267 | (Char.code c) - (Char.code '0') 1268 | | 'a'..'f' -> 1269 | (Char.code c) - (Char.code 'a') + 10 1270 | | 'A'..'F' -> 1271 | (Char.code c) - (Char.code 'A') + 10 1272 | | _ -> 1273 | failwith "MParser.int_of_hex: no hex digit" 1274 | in 1275 | let char_of_digits h1 h0 = 1276 | char_of_int (16 * (int_of_hex h1) + (int_of_hex h0)) 1277 | in 1278 | fun s -> 1279 | (char 'x' >> 1280 | hex_digit >>= fun h1 -> 1281 | hex_digit >>= fun h0 -> 1282 | try_return2 char_of_digits h1 h0 1283 | "Escape sequence is no valid character code" s) s 1284 | 1285 | let escape_sequence s = 1286 | (escape_sequence_dec 1287 | <|> escape_sequence_hex) s 1288 | 1289 | let char_token s = 1290 | ((char '\\' >> (escaped_char <|> escape_sequence)) 1291 | <|> any_char) s 1292 | 1293 | let char_literal s = 1294 | ((char '\'' >> char_token << char_sp '\'') 1295 | "character literal") s 1296 | 1297 | let string_literal s = 1298 | (char '"' >> (many_chars_until char_token (char_sp '"')) 1299 | "string literal") s 1300 | 1301 | let decimal_r = 1302 | make_regexp "\\d+" 1303 | 1304 | let hexadecimal_r = 1305 | make_regexp "0(x|X)[0-9a-fA-F]+" 1306 | 1307 | let octal_r = 1308 | make_regexp "0(o|O)[0-7]+" 1309 | 1310 | let binary_r = 1311 | make_regexp "0(b|B)[01]+" 1312 | 1313 | let integer_r = 1314 | make_regexp "-?\\d+" 1315 | 1316 | let float_r = 1317 | make_regexp "-?\\d+(\\.\\d*)?((e|E)?(\\+|-)?\\d+)?" 1318 | 1319 | let decimal s = 1320 | (regexp decimal_r >>= fun digits -> 1321 | spaces >> 1322 | try_return int_of_string digits "Decimal value out of range" s 1323 | "decimal value") s 1324 | 1325 | let hexadecimal s = 1326 | (regexp hexadecimal_r >>= fun digits -> 1327 | spaces >> 1328 | try_return int_of_string digits "Hexadecimal value out of range" s 1329 | "hexadecimal value") s 1330 | 1331 | let octal s = 1332 | (regexp octal_r >>= fun digits -> 1333 | spaces >> 1334 | try_return int_of_string digits "Octal value out of range" s 1335 | "octal value") s 1336 | 1337 | let binary s = 1338 | (regexp binary_r >>= fun digits -> 1339 | spaces >> 1340 | try_return int_of_string digits "Binary value out of range" s 1341 | "binary value") s 1342 | 1343 | let integer s = 1344 | (regexp integer_r >>= fun digits -> 1345 | spaces >> 1346 | try_return int_of_string digits "Integer value out of range" s 1347 | "integer value") s 1348 | 1349 | let float s = 1350 | (regexp float_r >>= fun digits -> 1351 | spaces >> 1352 | try_return float_of_string digits "Not a valid float value" s 1353 | "float value") s 1354 | 1355 | end 1356 | 1357 | end 1358 | -------------------------------------------------------------------------------- /src/mParser.mli: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** A monadic parser combinator library. *) 21 | 22 | (** The parser combinators provided by this module can be used to build parsers 23 | for context-sensitive, infinite look-ahead grammars that are reasonably 24 | efficient and produce good error messages due to a controlled use of 25 | backtracking. The performance of the resulting parsers should be 26 | sufficient for most applications. The parsers get their input from 27 | character streams provided by the {!MParser_Char_Stream} module, which means 28 | that it is possible to parse files up to a size of at least 1GB. 29 | 30 | The [MParser] module is an OCaml version of the 31 | {{:http://www.quanttec.com/fparsec}FParsec} library for F# by Stephan 32 | Tolksdorf and the 33 | {{:http://research.microsoft.com/users/daan/parsec.html}Parsec} library 34 | for Haskell by Daan Leijen. The interface of the [MParser] module is very 35 | similar to the interfaces of Parsec and FParsec. For this reason, we keep 36 | the documentation here rather terse. See the excellent documentation of 37 | Parsec and FParsec for more information. Parsers should be easily 38 | portable from these two libraries to [MParser] (although some functions 39 | might behave subtly different). Where the behavior of Parsec and FParsec 40 | differs, [MParser] generally behaves like FParsec (but there might be 41 | exceptions). 42 | 43 | A significant drawback of the implementation is that it relies on the 44 | standard OCaml types [char] and [string] and therefore there is 45 | {e currently no support for Unicode}. 46 | *) 47 | 48 | 49 | (** {2 Parser state} 50 | 51 | The state of a parser consists of the input to be parsed, the current 52 | position in the input, the number of the current line, the position of the 53 | first character of the current line in the input, and an optional user 54 | state. A position [p] is valid if it satisfies [0 <= p && p < l], where [l] 55 | is the length of the input; all other positions are invalid. Characters can 56 | only be read from valid positions. 57 | 58 | The following functions that directly access or change the parser state 59 | should only be used to write very low-level parsers. All other parsers 60 | should be composed from parser combinators (see below). *) 61 | 62 | type 's state 63 | (** The type of parser states. *) 64 | 65 | 66 | val init: MParser_Char_Stream.t -> 's -> 's state 67 | (** [init input user] returns an initial parser state using the input stream 68 | [input] and the initial user state [user]. *) 69 | 70 | val is_eof: 's state -> bool 71 | (** [is_eof s] returns [true] if the current position of [s] is not a valid 72 | position, and [false] otherwise. If [is_eof] returns [false], a character 73 | can be read from the current position. *) 74 | 75 | val next_state: 's state -> 's state 76 | (** [advance s] returns the state [s] with the position advanced by one 77 | character if the current position of [s] is a valid position. Otherwise, 78 | the same state is returned. This function does not register newlines. If 79 | the current character is a newline, [advance_state_nl] should be used 80 | instead. *) 81 | 82 | val advance_state: 's state -> int -> 's state 83 | (** [advance_state s n] returns the state [s] with the position advanced by 84 | [n] characters if the current position of [s] is a valid position. 85 | Otherwise, the same state is returned. This function does not register 86 | newlines. If the current character is a newline, [advance_state_nl] 87 | should be used instead. *) 88 | 89 | val advance_state_nl: 's state -> int -> 's state 90 | (** [advance_state_nl s n] returns the state [s] with the position advanced by 91 | [n] characters and the line counter increased by one if the current 92 | position of [s] is a valid position. Otherwise, the same state is 93 | returned. *) 94 | 95 | val read_char: 's state -> char option 96 | (** [read_char s] returns [Some c] where [c] is the character at the current 97 | position, or [None] if this position is not a valid position. *) 98 | 99 | val read_index: 's state -> int -> char option 100 | (** [read_index s pos] returns [Some c] where [c] is the character at the 101 | position [pos], or [None] if this position is not a valid position. *) 102 | 103 | val next_char: 's state -> char option 104 | (** [next_char s] returns [Some c] where [c] is the character after the 105 | current position, or [None] if this position is not a valid position. *) 106 | 107 | val prev_char: 's state -> char option 108 | (** [prev_char s] returns [Some c] where [c] is the character before the 109 | current position, or [None] if this position is not a valid position. *) 110 | 111 | val read_string: 's state -> int -> string 112 | (** [read_string s maxlen] returns a string containing the next [n] 113 | characters, where [n] is the minimum of [maxlen] and the number of 114 | characters remaining from the current position. If the current position 115 | is not a valid position, the empty string is returned. *) 116 | 117 | val match_char: 's state -> char -> bool 118 | (** [match_char s c] returns [true] if [c] is the character at the current 119 | position, and [false] otherwise. *) 120 | 121 | val match_string: 's state -> string -> bool 122 | (** [match_string s str] returns [true] if the input starting at the current 123 | position matches the string [str], and [false] otherwise. *) 124 | 125 | 126 | (** {2 Error handling and reporting} 127 | 128 | When building parsers from the parser combinators and running them using 129 | the [parse] functions (see below), error handling and reporting is nearly 130 | automatic. If a parser run fails, the [parse] functions return a 131 | human-readable (plain English) error message that is generated from the 132 | labels attached to the parsers using the labelling operators [] and 133 | []. 134 | 135 | The following types and functions can be used for explicit creation of 136 | errors in parsers and for customizing the handling of errors returned by 137 | parser runs. For this purpose the [parse] functions also return the 138 | actual [error] value in the case of a failed parser run. Typical 139 | applications for customized error handling are the internationalization of 140 | error messages and the automatic processing of parse errors. *) 141 | 142 | type pos = int * int * int 143 | (** An input position, consisting of an index into the input, a line number, 144 | and a column number. *) 145 | 146 | type error_message = 147 | | Unexpected_error of string 148 | (** An unexpected symbol occurred in the input. *) 149 | | Expected_error of string 150 | (** A symbol that was expected in the input could not be parsed. *) 151 | | Message_error of string 152 | (** An error occurred that does not fit into any other category. *) 153 | | Compound_error of string * error 154 | (** An error occurred while parsing a part of a compound. *) 155 | | Backtrack_error of error 156 | (** The parser backtracked after an error occurred. *) 157 | | Unknown_error 158 | (** An unknown error occurred. *) 159 | (** The type of error messages returned by parsers. *) 160 | 161 | and error = 162 | | Parse_error of pos * error_message list 163 | | No_error 164 | (** The type of errors returned by parsers. *) 165 | 166 | 167 | val unexpected_error: 's state -> string -> error 168 | (** Creates an [Unexpected_error]. The argument should describe the 169 | unexpected symbol that occurred in the input. *) 170 | 171 | val expected_error: 's state -> string -> error 172 | (** Creates an [Expected_error]. The argument should describe the symbol that 173 | was expected but could not be parsed. *) 174 | 175 | val message_error: 's state -> string -> error 176 | (** Creates a [Message_error]. The argument should contain the complete error 177 | message. *) 178 | 179 | val compound_error: 's state -> string -> error -> error 180 | (** Creates a [Compound_error]. The string argument should describe the 181 | compound that could not be parsed; the error argument should be the 182 | error that caused to compound parser to fail. *) 183 | 184 | val backtrack_error: 's state -> error -> error 185 | (** Creates a [Backtrack_error]. The argument should be the error that caused 186 | the parser to backtrack. *) 187 | 188 | val unknown_error: 's state -> error 189 | (** Creates an [Unknown_error]. *) 190 | 191 | val merge_errors: error -> error -> error 192 | (** Merges two errors. The behavior of the error reporting is undefined if 193 | [Parse_error] values from different positions are merged. *) 194 | 195 | 196 | (** {2 The parser type} 197 | 198 | To make handling of parse errors possible, the reply of a parser must not 199 | only indicate whether the parser has failed or succeeded, but also whether 200 | the parser has consumed input. When a parser is run, the general rule is 201 | that when it fails, alternative parsers created using the [<|>] and 202 | [choice] combinators are only tried if the first parser did not consume 203 | input. Thus by default the resulting parsers are predictive 204 | (non-backtracking). This behavior can be changed by using combinators 205 | like [attempt] and [look_ahead]. By this means the [MParser] module can 206 | be used to build efficient parsers for a very large class of languages 207 | that provide nearly automatic handling of errors, which is virtually 208 | impossible with full-backtracking parsers (because the position causing 209 | the failure cannot be determined). 210 | 211 | This approach to combinator parsing has been pioneered by Daan Leijen's 212 | {{:http://research.microsoft.com/users/daan/parsec.html}Parsec} library. 213 | A more detailed presentation of it can be found in the following paper: 214 | Daan Leijen and Erik Meijer, {e Parsec: Direct-Style Monadic Parser 215 | Combinators For The Real World}, Technical Report UU-CS-2001-35, 216 | Departement of Computer Science, Universiteit Utrecht, 2001. *) 217 | 218 | type ('a, 's) reply = 219 | | Empty_failed of error 220 | (** The parser failed without consuming input. *) 221 | | Empty_ok of 'a * 's state * error 222 | (** The parser succeeded without consuming input. *) 223 | | Consumed_failed of error 224 | (** The parser failed after consuming input. *) 225 | | Consumed_ok of 'a * 's state * error 226 | (** The parser succeeded after consuming input. *) 227 | (** The type of replies returned by parsers. *) 228 | 229 | type ('a, 's) t = 's state -> ('a, 's) reply 230 | type ('a, 's) parser = ('a, 's) t 231 | (** The type of parsers with result type ['a] and user state type ['s]. *) 232 | 233 | 234 | val make_ok: bool -> 'a -> 's state -> error -> ('a, 's) reply 235 | (** [make_ok consumed result state error] returns [Empty_ok (result, state, 236 | error)] if [consumed = false], and [Consumed_ok (result, state, error)] 237 | if [consumed = true]. *) 238 | 239 | val make_failed: bool -> error -> ('a, 's) reply 240 | (** [make_failed consumed error] returns [Empty_failed error] if [consumed = 241 | false], and [Consumed_failed error] of [consumed = true]. *) 242 | 243 | val is_consumed: ('a, 's) reply -> bool 244 | (** [is_consumed reply] returns [true] if [reply] is [Consumed_failed] or 245 | [Consumed_ok], and [false] otherwise. *) 246 | 247 | val is_empty: ('a, 's) reply -> bool 248 | (** [is_consumed reply] returns [true] if [reply] is [Empty_failed] or 249 | [Empty_ok], and [false] otherwise. *) 250 | 251 | val is_error: ('a, 's) reply -> bool 252 | (** [is_error reply] returns [true] if [reply] is [Empty_failed] or 253 | [Consumed_failed], and [false] otherwise. *) 254 | 255 | val is_ok: ('a, 's) reply -> bool 256 | (** [is_ok reply] returns [true] if [reply] is [Empty_ok] or [Consumed_ok], 257 | and [false] otherwise. *) 258 | 259 | val set_error: ('a, 's) reply -> error -> ('a, 's) reply 260 | (** [set_error reply error] returns [reply] with the error message replaced by 261 | [error]. *) 262 | 263 | 264 | (** {2 Running a parser} *) 265 | 266 | (** The result of a parser run. In the case of [Failed], it contains a 267 | human-readable error message. *) 268 | type 'a result = 269 | | Success of 'a 270 | | Failed of string * error 271 | 272 | 273 | val parse: ('a, 's) t -> MParser_Char_Stream.t -> 's -> 'a result 274 | (** [parse p s user] runs the parser [p] on the input stream [s] using the 275 | initial user state [user]. *) 276 | 277 | val parse_string: ('a, 's) t -> string -> 's -> 'a result 278 | (** [parse_string p str user] runs the parser [p] on the input stream produced 279 | from the string [str] using the initial user state [user]. The stream is 280 | created with [MParser_Char_Stream.from_string]. *) 281 | 282 | val parse_channel: ('a, 's) t -> in_channel -> 's -> 'a result 283 | (** [parse_string p chn user] runs the parser [p] on the input stream produced 284 | from the channel [chn] using the initial user state [user]. The stream is 285 | created with [MParser_Char_Stream.from_channel]. *) 286 | 287 | 288 | (** {2 Parser combinators} 289 | 290 | {e Note:} A statement of the form "parser [p] is equivalent to [q]", where 291 | [q] is a compound parser, means that [p] is functionally equivalent to 292 | [q], that is, it behaves exactly the same as [q], although it might be 293 | implemented differently. Using [p] is generally more efficient than using 294 | the compound parser [q] and should therefore be preferred. *) 295 | 296 | val return: 'a -> ('a, 's) t 297 | (** [return x] always succeeds with the result [x] without consuming any 298 | input. *) 299 | 300 | val try_return: ('a -> 'b) -> 'a -> string -> 's state -> ('b, 's) t 301 | (** [try_return f x msg s0] succeeds with the result [f x] without consuming 302 | input if [f x] does not raise an exception. Otherwise, it fails with a 303 | [Message_error] with error message [msg] at state [s0]. This combinator 304 | is useful where a result must be computed from another parser result and 305 | where this computation may raise an exception. *) 306 | 307 | val try_return2: ('a -> 'b -> 'c) -> 'a -> 'b -> string -> 's state -> 308 | ('c, 's) t 309 | (** A variant of [try_return] for functions with two parameters. *) 310 | 311 | val try_return3: ('a -> 'b -> 'c -> 'd) -> 'a -> 'b -> 'c -> string -> 312 | 's state -> ('d, 's) t 313 | (** A variant of [try_return] for functions with three parameters. *) 314 | 315 | val fail: string -> ('a, 's) t 316 | (** [fail msg] always fails with a [Message_error] with error message [msg]. 317 | The [fail] parser pretends having consumed input, so that all error 318 | messages are overwritten. *) 319 | 320 | val message: string -> ('a, 's) t 321 | (** [message msg] always fails with a [Message_error] with error message [msg] 322 | without consuming input, so that the error message is merged with other 323 | errors generated for the same input position. *) 324 | 325 | val zero: ('a, 's) t 326 | (** [zero] always fails with an [Unknown_error] without consuming input. *) 327 | 328 | val bind: ('a, 's) t -> ('a -> ('b, 's) t) -> ('b, 's) t 329 | (** [p >>= f] first applies the parser [p], then applies [f] to the resulting 330 | value, and finally applies the resulting parser. Since the second 331 | parser can depend on the result of the first parser, it is possible to 332 | parse context-sensitive grammars. *) 333 | 334 | val (>>=): ('a, 's) t -> ('a -> ('b, 's) t) -> ('b, 's) t 335 | (** [p >>= f] is equivalent to [bind p f] *) 336 | 337 | val (>>): ('a, 's) t -> ('b, 's) t -> ('b, 's) t 338 | (** [p >> q] is equivalent to [p >>= (fun _ -> q)]. *) 339 | 340 | val (<<): ('a, 's) t -> ('b, 's) t -> ('a, 's) t 341 | (** [p << q] is equivalent to [p >>= (fun x -> q >> return x)]. *) 342 | 343 | val (>>>): ('a, 's) t -> ('b, 's) t -> ('b, 's) t 344 | (** Camlp4-compatible alternative to [>>]. *) 345 | 346 | val (<<<): ('a, 's) t -> ('b, 's) t -> ('a, 's) t 347 | (** Camlp4-compatible alternative to [<<]. *) 348 | 349 | val (>>$): ('a, 's) t -> 'b -> ('b, 's) t 350 | (** [p >>$ x] is equivalent to [p >> return x]. *) 351 | 352 | val (>>?): ('a, 's) t -> ('b, 's) t -> ('b, 's) t 353 | (** [p >>? q] behaves like [p >> q], but if [q] fails without consuming input, 354 | it backtracks and pretends not having consumed input, even if [p] has 355 | consumed input. *) 356 | 357 | val (|>>): ('a, 's) t -> ('a -> 'b) -> ('b, 's) t 358 | (** [p |>> f] is equivalent to [p >>= (fun x -> return (f x))]. *) 359 | 360 | val pipe2: ('a, 's) t -> ('b, 's) t -> ('a -> 'b -> 'c) -> ('c, 's) t 361 | (** A variant of [(|>>)] for functions with two parameters. *) 362 | 363 | val pipe3: ('a, 's) t -> ('b, 's) t -> ('c, 's) t -> 364 | ('a -> 'b -> 'c -> 'd) -> ('d, 's) t 365 | (** A variant of [(|>>)] for functions with three parameters. *) 366 | 367 | val pipe4: ('a, 's) t -> ('b, 's) t -> ('c, 's) t -> ('d, 's) t -> 368 | ('a -> 'b -> 'c -> 'd -> 'e) -> ('e, 's) t 369 | (** A variant of [(|>>)] for functions with four parameters. *) 370 | 371 | val (<|>): ('a, 's) t -> ('a, 's) t -> ('a, 's) t 372 | (** [p <|> q] first applies [p]. If [p] fails without consuming input, it 373 | applies [q]. *) 374 | 375 | val choice: ('a, 's) t list -> ('a, 's) t 376 | (** [choice \[p1; p2; ...; pn \]] is equivalent to [p1 <|> p2 <|> ... <|> pn 377 | <|> zero]. *) 378 | 379 | val attempt: ('a, 's) t -> ('a, 's) t 380 | (** [attempt p] behaves like [p], but if [p] fails after consuming input, it 381 | backtracks and pretends not having consumed input. The error message of 382 | [p] is wrapped inside a [Backtrack_error]. *) 383 | 384 | val (): ('a, 's) t -> string -> ('a, 's) t 385 | (** [p label] attaches the label [label] to [p]. If [p] fails without 386 | consuming input, the error message of [p] is replaced by an 387 | [Expected_error] with the label [label]. *) 388 | 389 | val (): ('a, 's) t -> string -> ('a, 's) t 390 | (** [p label] behaves like [p label], but if [p] fails after 391 | consuming input, the error message of [p] is wrapped inside a 392 | [Compound_error]. *) 393 | 394 | val look_ahead: ('a, 's) t -> ('a, 's) t 395 | (** [look_ahead p] behaves like [p], but restores the original state after 396 | parsing. It always returns an empty reply. *) 397 | 398 | val followed_by: ('a, 's) t -> string -> (unit, 's) t 399 | (** [followed_by p msg] succeeds without consuming input and returns [()] if 400 | [p] succeeds at the current position. Otherwise, it fails without 401 | consuming input and returns an [Expected_error] with error message 402 | [msg]. *) 403 | 404 | val not_followed_by: ('a, 's) t -> string -> (unit, 's) t 405 | (** [not_followed_by p msg] succeeds without consuming input and returns [()] 406 | if [p] does not succeed at the current position. Otherwise, it fails 407 | without consuming input and returns an [Unexpected_error] with error 408 | message [msg]. *) 409 | 410 | val opt: 'a -> ('a, 's) t -> ('a, 's) t 411 | (** [opt x p] is equivalent to [p <|>$ x]. *) 412 | 413 | val option: ('a, 's) t -> ('a option, 's) t 414 | (** [option p] is equivalent to [p >>= (fun r -> return (Some r)) <|>$ 415 | None]. *) 416 | 417 | val optional: ('a, 's) t -> (unit, 's) t 418 | (** [optional p] is equivalent to [p >>$ () <|>$ ()]. *) 419 | 420 | val try_skip: ('a, 's) t -> (bool, 's) t 421 | (** [try_skip p] is equivalent to [p >>$ true <|>$ false]. *) 422 | 423 | val pair: ('a, 's) t -> ('b, 's) t -> ('a * 'b, 's) t 424 | (** [pair p q] is equivalent to [ p >>= (fun x -> q >>= (fun y -> return (x, 425 | y)))]. *) 426 | 427 | val many: ('a, 's) t -> ('a list, 's) t 428 | (** [many p] parses zero or more occurrences of [p] and returns a list of the 429 | results returned by [p]. 430 | 431 | @raise Failure if [p] doesn't accept any input. *) 432 | 433 | val many1: ('a, 's) t -> ('a list, 's) t 434 | (** [many1 p] parses one or more occurrences of [p] and returns a list of the 435 | results returned by [p]. 436 | 437 | @raise Failure if [p] doesn't accept any input. *) 438 | 439 | val many_rev: ('a, 's) t -> ('a list, 's) t 440 | (** [many_rev p] is equivalent to [many p |>> List.rev]. 441 | 442 | @raise Failure if [p] doesn't accept any input. *) 443 | 444 | val many1_rev: ('a, 's) t -> ('a list, 's) t 445 | (** [many1_rev p] is equivalent to [many1 p |>> List.rev]. 446 | 447 | @raise Failure if [p] doesn't accept any input. *) 448 | 449 | val skip: ('a, 's) t -> (unit, 's) t 450 | (** [skip p] is equivalent to [p |>> ignore]. *) 451 | 452 | val skip_many: ('a, 's) t -> (unit, 's) t 453 | (** [skip_many p] is equivalent to [skip (many p)]. 454 | 455 | @raise Failure if [p] doesn't accept any input. *) 456 | 457 | val skip_many1: ('a, 's) t -> (unit, 's) t 458 | (** [skip_many1 p] is equivalent to [skip (many1 p)]. 459 | 460 | @raise Failure if [p] doesn't accept any input. *) 461 | 462 | val many_fold_left: ('a -> 'b -> 'a) -> 'a -> ('b, 's) t -> ('a, 's) t 463 | (** [many_fold_left f a p] is equivalent to 464 | [many p |>> List.fold_left f a]. 465 | 466 | @raise Failure if [p] doesn't accept any input. *) 467 | 468 | val many1_fold_left: ('a -> 'b -> 'a) -> 'a -> ('b, 's) t -> ('a, 's) t 469 | (** [many1_fold_left f a p] is equivalent to 470 | [many1 p |>> List.fold_left f a]. 471 | 472 | @raise Failure if [p] doesn't accept any input. *) 473 | 474 | val many_rev_fold_left: ('a -> 'b -> 'a) -> 'a -> ('b, 's) t -> ('a, 's) t 475 | (** [many_rev_fold_left f a p] is equivalent to 476 | [many p |>> List.rev |>> List.fold_left f a]. 477 | 478 | @raise Failure if [p] doesn't accept any input. *) 479 | 480 | val many1_rev_fold_left: ('a -> 'b -> 'a) -> 'a -> ('b, 's) t -> ('a, 's) t 481 | (** [many1_rev_fold_left f a p] is equivalent to 482 | [many1 p |>> List.rev |>> List.fold_left f a]. 483 | 484 | @raise Failure if [p] doesn't accept any input. *) 485 | 486 | val chain_left: ('a, 's) t -> ('a -> 'a -> 'a, 's) t -> 'a -> ('a, 's) t 487 | (** [chain_left p op x] parses zero or more occurrences of [p], separated by 488 | [op]. It returns the value obtained by the left-associative application 489 | of the functions returned by [op] to the results of [p]. If there are 490 | zero occurrences of [p], the value [x] is returned. *) 491 | 492 | val chain_left1: ('a, 's) t -> ('a -> 'a -> 'a, 's) t -> ('a, 's) t 493 | (** [chain_left1 p op] parses one or more occurrences of [p], separated by 494 | [op]. It returns the value obtained by the left-associative application 495 | of the functions returned by [op] to the results of [p]. *) 496 | 497 | val chain_right: ('a, 's) t -> ('a -> 'a -> 'a, 's) t -> 'a -> ('a, 's) t 498 | (** [chain_right p op x] parses zero or more occurrences of [p], separated by 499 | [op]. It returns the value obtained by the right-associative 500 | application of the functions returned by [op] to the results of [p]. If 501 | there are zero occurrences of [p], the value [x] is returned. *) 502 | 503 | val chain_right1: ('a, 's) t -> ('a -> 'a -> 'a, 's) t -> ('a, 's) t 504 | (** [chain_right1 p op] parses one or more occurrences of [p], separated by 505 | [op]. It returns the value obtained by the right-associative 506 | application of the functions returned by [op] to the results of [p]. *) 507 | 508 | val count: int -> ('a, 's) t -> ('a list, 's) t 509 | (** [count n p] parses exactly [n] occurrences of [p] and returns a list of 510 | the results returned by [p]. *) 511 | 512 | val skip_count: int -> ('a, 's) t -> (unit, 's) t 513 | (** [skip_count n p] is equivalent to [skip (count n p)]. *) 514 | 515 | val between: ('a, 's) t -> ('b, 's) t -> ('c, 's) t -> ('c, 's) t 516 | (** [between left right p] is equivalent to [left >> p << right]. *) 517 | 518 | val sep_by: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 519 | (** [sep_by p sep] parses zero or more occurrences of [p], separated by [sep]. 520 | It returns a list of the results returned by [p]. *) 521 | 522 | val sep_by1: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 523 | (** [sep_by1 p sep] parses one or more occurrences of [p], separated by [sep]. 524 | It returns a list of the results returned by [p]. *) 525 | 526 | val sep_end_by: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 527 | (** [sep_end_by p sep] parses zero or more occurrences of [p], separated and 528 | optionally ended by [sep]. It returns a list of the results returned by 529 | [p]. *) 530 | 531 | val sep_end_by1: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 532 | (** [sep_end_by1 p sep] parses one or more occurrences of [p], separated and 533 | optionally ended by [sep]. It returns a list of the results returned by 534 | [p]. *) 535 | 536 | val end_by: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 537 | (** [end_by p sep] parses zero or more occurrences of [p], separated and ended 538 | by [sep]. It returns a list of the results returned by [p]. *) 539 | 540 | val end_by1: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 541 | (** [end_by1 p sep] parses one or more occurrences of [p], separated and ended 542 | by [sep]. It returns a list of the results returned by [p]. *) 543 | 544 | val many_until: ('a, 's) t -> ('b, 's) t -> ('a list, 's) t 545 | (** [many_until p q] parses zero or more occurrences of [p] until [q] succeeds 546 | and returns a list of the results returned by [p]. It is equivalent to 547 | [many (not_followed_by q "" >> p) << q]. Note that [q] is parsed twice 548 | and should therefore not have side effects. *) 549 | 550 | val skip_many_until: ('a, 's) t -> ('b, 's) t -> (unit, 's) t 551 | (** [skip_many_until p q] is equivalent to [skip (many_until p q)]. *) 552 | 553 | 554 | (** {2 Parsers accessing the parser state} *) 555 | 556 | val get_input: (MParser_Char_Stream.t, 's) t 557 | (** Returns the input stream. *) 558 | 559 | val get_index: (int, 's) t 560 | (** Returns the current index into the input. *) 561 | 562 | val get_pos: (pos, 's) t 563 | (** Returns the current position. *) 564 | 565 | val register_nl: int -> int -> (unit, 's) t 566 | (** [register_nl lines chars_after_nl] increases the line counter by [lines] 567 | and sets the beginning of the current line to [chars_after_nl] characters 568 | before the current index. *) 569 | 570 | val set_pos: pos -> (unit, 's) t 571 | (** Sets the current position. *) 572 | 573 | val eof: (unit, 's) t 574 | (** Parses the end of the input. *) 575 | 576 | 577 | (** {2 Parsers accessing the user state} *) 578 | 579 | val get_user_state: ('s, 's) t 580 | (** Returns the current user state of the parser. *) 581 | 582 | val set_user_state: 's -> (unit, 's) t 583 | (** Sets the current user state of the parser. *) 584 | 585 | val update_user_state: ('s -> 's) -> (unit, 's) t 586 | (** [update_user_state f] applies [f] to the user state of the parser. *) 587 | 588 | 589 | (** {2 Character-based parsers} 590 | 591 | The following specialized parsers and parser combinators work directly on 592 | the characters of the input stream and are therefore more efficient than 593 | the general combinators. Generally, the basic character and string 594 | parsers only consume input when they succeed. *) 595 | 596 | val skip_nchars: int -> (unit, 's) t 597 | (** [skip_nchars n] skips [n] characters of the input. Newlines are not 598 | registered. This parser never fails, even if there are less than [n] 599 | characters left. 600 | 601 | @raise Invalid_argument if [n < 0]. *) 602 | 603 | val char: char -> (char, 's) t 604 | (** [char c] parses the character [c] and returns it. *) 605 | 606 | val skip_char: char -> (unit, 's) t 607 | (** [skip_char c] is equivalent to [skip (char c)]. *) 608 | 609 | val any_char: (char, 's) t 610 | (** Parses any character and returns it. This parser does not register 611 | newlines. Use [any_char_or_nl] if the current character can be a 612 | newline. *) 613 | 614 | val skip_any_char: (unit, 's) t 615 | (** [skip_any_char] is equivalent to [skip any_char]. *) 616 | 617 | val any_char_or_nl: (char, 's) t 618 | (** [any_char_or_nl] is equivalent to [newline <|> any_char]. *) 619 | 620 | val skip_any_char_or_nl: (unit, 's) t 621 | (** [skip_any_char_or_nl] is equivalent to [skip any_char_or_nl]. *) 622 | 623 | val peek_char: (char, 's) t 624 | (** Returns the character at the position after the current position or fails 625 | if this is not a valid position. This parser does not consume input. *) 626 | 627 | val string: string -> (string, 's) t 628 | (** [string s] parses the string [s] and returns it. *) 629 | 630 | val skip_string: string -> (unit, 's) t 631 | (** [skip_string s] is equivalent to [skip (string s)]. *) 632 | 633 | val any_string: int -> (string, 's) t 634 | (** [any_string n] parses any string of [n] characters and returns it. Fails 635 | if there are less than [n] characters left in the input. *) 636 | 637 | val many_chars: (char, 's) t -> (string, 's) t 638 | (** [many_chars p] parses zero or more occurrences of [p] and returns a string 639 | of the results returned by [p]. 640 | 641 | @raise Failure if [p] doesn't accept any input. *) 642 | 643 | val many1_chars: (char, 's) t -> (string, 's) t 644 | (** [many1_chars p] parses one or more occurrences of [p] and returns a string 645 | of the results returned by [p]. 646 | 647 | @raise Failure if [p] doesn't accept any input. *) 648 | 649 | val skip_many_chars: (char, 's) t -> (unit, 's) t 650 | (** [skip_many_chars p] is equivalent to [skip (many_chars p)]. *) 651 | 652 | val skip_many1_chars: (char, 's) t -> (unit, 's) t 653 | (** [skip_many1_chars p] is equivalent to [skip (many1_chars p)]. *) 654 | 655 | val many_chars_until: (char, 's) t -> (char, 's) t -> (string, 's) t 656 | (** [many_chars_until p q] parses zero or more occurrences of [p] until [q] 657 | succeeds and returns a string of the results returned by [p]. It is 658 | equivalent to [many_chars (not_followed_by q "" >> p) << q]. Note that 659 | [q] is parsed twice and should therefore not have side effects. *) 660 | 661 | val skip_many_chars_until: (char, 's) t -> (char, 's) t -> (unit, 's) t 662 | (** [skip_many_chars_until p q] is equivalent to 663 | [skip (many_chars_until p q)]. *) 664 | 665 | val satisfy: (char -> bool) -> (char, 's) t 666 | (** [satisfy p] parses a character for which [p] returns [true] and returns 667 | this character. It fails with an [Unknown_error] if the character at the 668 | current position does not satisfy [p]. *) 669 | 670 | val satisfy_l: (char -> bool) -> string -> (char, 's) t 671 | (** [satisfy_l p label] is equivalent to [satisfy p label]. *) 672 | 673 | val skip_satisfy: (char -> bool) -> (unit, 's) t 674 | (** [skip_satisfy p] is equivalent to [skip (satisfy p)]. *) 675 | 676 | val skip_satisfy_l: (char -> bool) -> string -> (unit, 's) t 677 | (** [skip_satisfy_l p label] is equivalent to [skip (satisfy_l p label)]. *) 678 | 679 | val nsatisfy: int -> (char -> bool) -> (string, 's) t 680 | (** [nsatisfy n p] parses the next [n] characters if [p] returns [true] for 681 | each of them. Otherwise it fails with an [Unknown_error] without 682 | consuming input. *) 683 | 684 | val many_satisfy: (char -> bool) -> (string, 's) t 685 | (** [many_satisfy p] is equivalent to [many_chars (satisfy p)]. *) 686 | 687 | val many1_satisfy: (char -> bool) -> (string, 's) t 688 | (** [many1_satisfy p] is equivalent to [many1_chars (satisfy p)]. *) 689 | 690 | val skip_many_satisfy: (char -> bool) -> (unit, 's) t 691 | (** [skip_many_satisfy p] is equivalent to [skip_many (satisfy p)]. *) 692 | 693 | val skip_many1_satisfy: (char -> bool) -> (unit, 's) t 694 | (** [skip_many1_satisfy p] is equivalent to [skip_many1 (satisfy p)]. *) 695 | 696 | val next_char_satisfies: (char -> bool) -> (unit, 's) t 697 | (** [next_char_satisfies p] succeeds without consuming input if [p] returns 698 | [true] for the character after the current position. Otherwise it fails 699 | with an [Unknown_error]. *) 700 | 701 | val prev_char_satisfies: (char -> bool) -> (unit, 's) t 702 | (** [prev_char_satisfies p] succeeds without consuming input if [p] returns 703 | [true] for the character before the current position. Otherwise it fails 704 | with an [Unknown_error]. *) 705 | 706 | val any_of: string -> (char, 's) t 707 | (** [any_of str] parses any character occurring in the string [str] and returns 708 | it. *) 709 | 710 | val none_of: string -> (char, 's) t 711 | (** [none_of str] parses any character not occurring in the string [str] and 712 | returns it. *) 713 | 714 | val is_not: (char, 's) t -> (char, 's) t 715 | (** [is_not c] parses any character that is not accepted by parser [c]. 716 | Fails with [Unknown_error] if the character is accepted by [c]. *) 717 | 718 | val uppercase: (char, 's) t 719 | (** Parses an English uppercase letter and returns it. *) 720 | 721 | val lowercase: (char, 's) t 722 | (** Parses an English lowercase letter and returns it. *) 723 | 724 | val letter: (char, 's) t 725 | (** Parses an English letter and returns it. *) 726 | 727 | val digit: (char, 's) t 728 | (** Parses a decimal digit and returns it. *) 729 | 730 | val hex_digit: (char, 's) t 731 | (** Parses a hexadecimal digit and returns it. *) 732 | 733 | val oct_digit: (char, 's) t 734 | (** Parses an octal digit and returns it. *) 735 | 736 | val alphanum: (char, 's) t 737 | (** Parses an English letter or a decimal digit and returns it. *) 738 | 739 | val tab: (char, 's) t 740 | (** Parses a tab character (['\t']) and returns it. *) 741 | 742 | val blank: (char, 's) t 743 | (** Parses a space or a tab character ([' '] or ['\t'] and returns it. *) 744 | 745 | val newline: (char, 's) t 746 | (** Parses a newline (['\n'], ['\r'], or the sequence ['\r', '\n']). If it 747 | succeeds, it always returns ['\n']. The position in the parser state is 748 | correctly updated. *) 749 | 750 | val space: (char, 's) t 751 | (** Parses a space ([' ']), a tab (['\t']) or a newline (['\n'], ['\r'], or 752 | the sequence ['\r', '\n']). If a newline is parsed, it returns ['\n'] and 753 | correctly updates the position in the parser state. Otherwise it returns 754 | the parsed character. *) 755 | 756 | val non_space: (char, 's) t 757 | (** [non_space] is equivalent to [is_not space], with a better error message. *) 758 | 759 | val spaces: (unit, 's) t 760 | (** [spaces] is equivalent to [skip_many_chars space]. *) 761 | 762 | val spaces1: (unit, 's) t 763 | (** [spaces] is equivalent to [skip_many_chars1 space]. *) 764 | 765 | 766 | (** {2 Expression parser} *) 767 | 768 | type assoc = 769 | | Assoc_none (** None-associative operator. *) 770 | | Assoc_left (** Left-associative operator. *) 771 | | Assoc_right (** Right-associative operator. *) 772 | (** The associativity of an operator. An operator [(#)] is left-associative 773 | if [a # b # c = (a # b) # c], right-associative if [a # b # c = a # (b # 774 | c)], and non-associative if applying [(#)] to an expression with head 775 | operator [(#)] is not allowed. Note that a value of this type specifies 776 | only how an expression like [a # b # c] is parsed, not how it is 777 | interpreted semantically. *) 778 | 779 | type ('a, 's) operator = 780 | | Infix of (('a -> 'a -> 'a, 's) t * assoc) (** Infix operator. *) 781 | | Prefix of ('a -> 'a, 's) t (** Prefix operator. *) 782 | | Postfix of ('a -> 'a, 's) t (** Postfix operator. *) 783 | (** The type of operators on type ['a]. The function returned by the parser 784 | argument to the [Infix], [Prefix], and [Postfix] constructor is used to 785 | build the result of applying the operator to its operands. *) 786 | 787 | 788 | val expression: (('a, 's) operator list) list -> ('a, 's) t -> ('a, 's) t 789 | (** [expression operators term] parses any well-formed expression that can 790 | built from the basic terms parsed by [term] and the operators specified in 791 | the operator table [operators]. The operator table is a list of 792 | [operator] lists that is ordered in descending precedence. All elements 793 | in one list of [operators] have the same precedence, but may have 794 | different associativities. 795 | 796 | Adjacent prefix and postfix operators of the same precedence are not 797 | well-formed. For example, if [(-)] denotes prefix negation, [--x] is not 798 | a well-formed expression (if [(--)] does not denote an operator on its 799 | own). If a prefix and a postfix operator of the same precedence are 800 | applied to an expression, the prefix operator is applied before the 801 | postfix operator. 802 | 803 | The following example demonstrates the usage of the [expression] parser. 804 | It implements a minimalistic calculator that can be used to evaluate 805 | expressions like [eval "(1 + 2 * 3) / -2"], which returns [-3]. 806 | {[ 807 | open MParser 808 | open Tokens 809 | 810 | exception Syntax_error 811 | 812 | let infix sym f assoc = Infix (skip_symbol sym >> return f, assoc) 813 | let prefix sym f = Prefix (skip_symbol sym >> return f) 814 | 815 | let negate x = -x 816 | 817 | let operators = 818 | [ 819 | [ prefix "-" negate ]; 820 | [ infix "*" ( * ) Assoc_left; infix "/" ( / ) Assoc_left ]; 821 | [ infix "+" ( + ) Assoc_left; infix "-" ( - ) Assoc_left ]; 822 | ] 823 | 824 | let rec term s = (parens expr <|> decimal) s 825 | 826 | and expr s = expression operators term s 827 | 828 | let eval s = 829 | match parse_string expr s () with 830 | | Success x -> x 831 | | Failed (msg, _) -> 832 | print_string msg; 833 | raise Syntax_error 834 | ]} 835 | *) 836 | 837 | 838 | (** {2 Regexp-related features} *) 839 | 840 | module MakeRegexp (Regexp: MParser_Sig.Regexp): sig 841 | 842 | val match_regexp: 's state -> Regexp.t -> Regexp.substrings option 843 | (** [match_regexp s rex] matches the regular expression [rex] against the 844 | input. It returns [Some substrings] if the match succeeds, where 845 | [substrings] contains the matched substrings. If the match fails or if 846 | the current position is already behind the last position of the input, it 847 | returns [None]. 848 | 849 | If the input is read from a (large) file, [rex] is not necessarily matched 850 | against the complete remaining substring. The minimum number of 851 | characters that are guaranteed to be used for matching is specified when 852 | creating the input character stream. See the documentation of the 853 | {!MParser_Char_Stream} module for more information. *) 854 | 855 | val make_regexp: string -> Regexp.t 856 | (** Creates a regular expression from a string. *) 857 | 858 | val regexp: Regexp.t -> (string, 's) t 859 | (** [regexp rex] parses any string matching the regular expression [rex] and 860 | returns it. 861 | 862 | If the input is read from a (large) file, [rex] is not necessarily matched 863 | against the complete remaining substring. The minimum number of 864 | characters that are guaranteed to be used for matching is specified when 865 | creating the input character stream. See the documentation of the 866 | {!MParser_Char_Stream} module for more information. *) 867 | 868 | val regexp_substrings: Regexp.t -> (string array, 's) t 869 | (** [regexp_substrings rex] parses any string matching the regular expression 870 | [rex] and returns an array containing all matched substrings. 871 | 872 | If the input is read from a (large) file, [rex] is not necessarily matched 873 | against the complete remaining substring. The minimum number of 874 | characters that are guaranteed to be used for matching is specified when 875 | creating the input character stream. See the documentation of the 876 | {!MParser_Char_Stream} module for more information. *) 877 | 878 | 879 | (** Predefined token parsers. 880 | 881 | This module provides parsers for tokens that are commonly used in parsing 882 | computer languages. All parsers in this module skip the spaces (as 883 | defined by the {!MParser.spaces} parser) that occur after a token. Where 884 | they are applied to a user-defined parser [p], however, they do not skip 885 | the spaces occurring after the characters parsed by [p]. For example, 886 | [parens p] is equivalent to [char '(' >> spaces >> p << char ')' << spaces]. 887 | *) 888 | module Tokens: sig 889 | 890 | val symbol: string -> (string, 's) t 891 | (** [symbol sym] parses the literal string [sym] and returns it. *) 892 | 893 | val skip_symbol: string -> (unit, 's) t 894 | (** [skip_symbol sym] is equivalent to [skip (symbol sym)]. *) 895 | 896 | val parens: ('a, 's) t -> ('a, 's) t 897 | (** [parens p] parses [p] between parentheses ['('] and [')']. *) 898 | 899 | val braces: ('a, 's) t -> ('a, 's) t 900 | (** [braces p] parses [p] between curly braces ['{'] and ['}']. *) 901 | 902 | val brackets: ('a, 's) t -> ('a, 's) t 903 | (** [brackets p] parses [p] between angle brackets ['<'] and ['>']. *) 904 | 905 | val squares: ('a, 's) t -> ('a, 's) t 906 | (** [squares p] parses [p] between square brackets ['\['] and ['\]']. *) 907 | 908 | val semi: (char, 's) t 909 | (** Parses a semicolon [';']. *) 910 | 911 | val comma: (char, 's) t 912 | (** Parses a comma [',']. *) 913 | 914 | val colon: (char, 's) t 915 | (** Parses a colon [':']. *) 916 | 917 | val dot: (char, 's) t 918 | (** Parses a dot ['.']. *) 919 | 920 | val semi_sep: ('a, 's) t -> ('a list, 's) t 921 | (** [semi_sep p] parses zero or more occurrences of [p], separated by [';']. 922 | It returns a list of the results returned by [p]. *) 923 | 924 | val semi_sep1: ('a, 's) t -> ('a list, 's) t 925 | (** [semi_sep1 p] parses one or more occurrences of [p], separated by [';']. 926 | It returns a list of the results returned by [p]. *) 927 | 928 | val semi_sep_end: ('a, 's) t -> ('a list, 's) t 929 | (** [semi_sep_end p] parses zero or more occurrences of [p], separated and 930 | optionally ended by [';']. It returns a list of the results returned by 931 | [p]. *) 932 | 933 | val semi_sep_end1: ('a, 's) t -> ('a list, 's) t 934 | (** [semi_sep_end1 p] parses one or more occurrences of [p], separated and 935 | optionally ended by [';']. It returns a list of the results returned by 936 | [p]. *) 937 | 938 | val semi_end: ('a, 's) t -> ('a list, 's) t 939 | (** [semi_end p] parses zero or more occurrences of [p], separated and ended 940 | by [';']. It returns a list of the results returned by [p]. *) 941 | 942 | val semi_end1: ('a, 's) t -> ('a list, 's) t 943 | (** [semi_sep_end1 p] parses one or more occurrences of [p], separated and 944 | ended by [';']. It returns a list of the results returned by [p]. *) 945 | 946 | val comma_sep: ('a, 's) t -> ('a list, 's) t 947 | (** [comma_sep p] parses zero or more occurrences of [p], separated by 948 | [',']. It returns a list of the results returned by [p]. *) 949 | 950 | val comma_sep1: ('a, 's) t -> ('a list, 's) t 951 | (** [comma_sep1 p] parses one or more occurrences of [p], separated by 952 | [',']. It returns a list of the results returned by [p]. *) 953 | 954 | val char_literal: (char, 's) t 955 | (** Parses a character literal as defined in the OCaml language and returns 956 | the character. The literal may contain an escape sequence. *) 957 | 958 | val string_literal: (string, 's) t 959 | (** Parses a string literal as defined in the OCaml language and returns the 960 | string. The literal may contain escape sequences. *) 961 | 962 | val decimal: (int, 's) t 963 | (** Parses a decimal natural number and returns it as an integer value. 964 | Fails with a [Message_error] if the parsed number is larger than 965 | [max_int]. *) 966 | 967 | val hexadecimal: (int, 's) t 968 | (** Parses a hexadecimal natural number as defined in the OCaml language 969 | (prefixed with ["0x"] or ["0X"]) and returns it as an integer value. 970 | Fails with a [Message_error] if the parsed number is larger than 971 | [max_int]. *) 972 | 973 | val octal: (int, 's) t 974 | (** Parses an octal natural number as defined in the OCaml language 975 | (prefixed with ["0o"] or ["0O"]) and returns it as an integer value. 976 | Fails with a [Message_error] if the parsed number is larger than 977 | [max_int]. *) 978 | 979 | val binary: (int, 's) t 980 | (** Parses a binary natural number as defined in the OCaml language 981 | (prefixed with ["0b"] or ["0B"]) and returns it as an integer value. 982 | Fails with a [Message_error] if the parsed number is larger than 983 | [max_int]. *) 984 | 985 | val integer: (int, 's) t 986 | (** Parses a decimal integer number and returns its value. Fails with a 987 | [Message_error] if the parsed number is smaller than [min_int] or larger 988 | than [max_int]. *) 989 | 990 | val float: (float, 's) t 991 | (** Parses floating-point literal as defined in the OCaml language and 992 | returns its value. Fails with a [Message_error] if the parsed number is 993 | not a valid representation of a [float] value. *) 994 | 995 | end 996 | 997 | end 998 | -------------------------------------------------------------------------------- /src/mParser_Char_Stream.ml: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | 21 | open MParser_Utils 22 | 23 | 24 | type t = { 25 | block_size: int; (** Size of a block in chars. *) 26 | block_overlap: int; (** Overlap between blocks in chars. *) 27 | min_rspace: int; (** Minimum space for regexp matching. *) 28 | length: int; (** Length of the stream in chars. *) 29 | input: in_channel; (** Input if created from a channel. *) 30 | buffer: Bytes.t; (** The block buffer. *) 31 | mutable buffer_pos: int; (** Stream position of the current block. *) 32 | } 33 | 34 | 35 | (** [read_block s pos length] reads a block of [length] characters from 36 | current position in the input channel and writes it to the block buffer 37 | starting at [pos]. 38 | 39 | The functions in this module use [read_block] exclusively to fill the 40 | block buffer. If the input channel is not modified after creating the 41 | char stream from it, there will be at least [length] characters left in 42 | the channel when [read_block] is called, but this condition cannot be 43 | enforced by this module. Therefore, an exception is raised if less than 44 | [length] characters are available. 45 | 46 | @raise Failure if less than [length] characters could be read. 47 | *) 48 | let read_block s pos length = 49 | if IO.input s.input s.buffer pos length <> length then 50 | failwith "MParser_Char_Stream.read_block: I/O error" 51 | 52 | let from_string str = 53 | let length = String.length str in 54 | { 55 | block_size = length; 56 | block_overlap = 0; 57 | min_rspace = 0; 58 | length; 59 | input = Obj.magic 0; 60 | buffer = Bytes.of_string str; 61 | buffer_pos = 0; 62 | } 63 | 64 | let from_channel ?(block_size = 1048576) ?block_overlap ?min_rspace input = 65 | 66 | let block_overlap = 67 | match block_overlap with 68 | | Some x -> x 69 | | None -> block_size / 16 70 | in 71 | 72 | let min_rspace = 73 | match min_rspace with 74 | | Some x -> x 75 | | None -> block_size / 64 76 | in 77 | 78 | if block_size < 1 || block_size > Sys.max_string_length then 79 | invalid_arg "MParser_Char_Stream.from_channel: invalid block size"; 80 | if block_overlap < 1 || block_overlap > block_size / 2 then 81 | invalid_arg "MParser_Char_Stream.from_channel: invalid block overlap"; 82 | if min_rspace < 1 || min_rspace > block_overlap then 83 | invalid_arg "MParser_Char_Stream.from_channel: invalid minimum rspace"; 84 | 85 | let length = in_channel_length input in 86 | let block_size = min block_size length in 87 | let buffer = Bytes.create block_size in 88 | let buffer_pos = pos_in input in 89 | 90 | let s = 91 | { 92 | block_size; 93 | block_overlap; 94 | min_rspace; 95 | length; 96 | input; 97 | buffer; 98 | buffer_pos; 99 | } 100 | in 101 | read_block s 0 block_size; 102 | s 103 | 104 | let length s = 105 | s.length 106 | 107 | let is_valid_pos s pos = 108 | pos >= 0 && pos < s.length 109 | 110 | let is_visible s pos = 111 | pos >= s.buffer_pos && pos < s.buffer_pos + s.block_size 112 | 113 | (** [perform_unsafe_seek s pos] sets the position in the input stream to 114 | [pos], unconditionally reads the corresponding block from the input 115 | channel, and writes it to the block buffer. [pos] must be a valid 116 | position in the input channel. The function ensures that the block buffer 117 | contains at least [(max 0 (min (pos - 1) s.block_overlap))] characters 118 | from the input before [pos] and at least [(max 0 (min (length s - (pos + 119 | 1)) s.block_overlap))] characters from the input after [pos]. 120 | *) 121 | let perform_unsafe_seek s pos = 122 | let new_buffer_pos = 123 | min (s.length - s.block_size) (max 0 (pos - s.block_overlap)) 124 | in 125 | let offset = new_buffer_pos - s.buffer_pos in 126 | if offset > 0 && offset < s.block_size then 127 | let overlap = s.block_size - offset in 128 | Bytes.unsafe_blit s.buffer (s.block_size - overlap) s.buffer 0 overlap; 129 | seek_in s.input (new_buffer_pos + overlap); 130 | read_block s overlap (s.block_size - overlap) 131 | else if offset < 0 && -offset < s.block_size then 132 | let overlap = s.block_size + offset in 133 | Bytes.unsafe_blit s.buffer 0 s.buffer (s.block_size - overlap) overlap; 134 | seek_in s.input new_buffer_pos; 135 | read_block s 0 (s.block_size - overlap) 136 | else 137 | (seek_in s.input new_buffer_pos; 138 | read_block s 0 s.block_size); 139 | 140 | s.buffer_pos <- new_buffer_pos 141 | 142 | (** [unsafe_seek s pos] sets the position in the input stream to [pos]. If 143 | this position is currently not visible, i.e., not covered by the block 144 | buffer, it reads the corresponding block from the input channel and writes 145 | it to the block buffer using [perform_unsafe_seek s pos]. [pos] must be a 146 | valid position in the input channel. 147 | *) 148 | let unsafe_seek s pos = 149 | if not (is_visible s pos) then 150 | perform_unsafe_seek s pos 151 | 152 | let seek s pos = 153 | if not (is_valid_pos s pos) then 154 | invalid_arg "MParser_Char_Stream.seek: invalid stream position"; 155 | unsafe_seek s pos 156 | 157 | let chars_left s pos = 158 | if is_valid_pos s pos then 159 | length s - pos 160 | else 161 | 0 162 | 163 | let read_char s pos = 164 | if not (is_valid_pos s pos) then 165 | None 166 | else 167 | (unsafe_seek s pos; 168 | Some (Bytes.unsafe_get s.buffer (pos - s.buffer_pos))) 169 | 170 | let read_string s pos maxlen = 171 | if not (is_valid_pos s pos) then 172 | "" 173 | else 174 | let sub: Bytes.t = 175 | let len = min maxlen (chars_left s pos) in 176 | if is_visible s pos && is_visible s (pos + len - 1) then 177 | Bytes.sub s.buffer (pos - s.buffer_pos) len 178 | else if len <= s.block_overlap then 179 | (perform_unsafe_seek s pos; 180 | Bytes.sub s.buffer (pos - s.buffer_pos) len) 181 | else 182 | (let result = Bytes.create len in 183 | let chars_left = ref len in 184 | seek_in s.input pos; 185 | while !chars_left > 0 do 186 | let nchars = min s.block_size !chars_left in 187 | read_block s 0 nchars; 188 | Bytes.unsafe_blit s.buffer 0 result (len - !chars_left) nchars; 189 | chars_left := !chars_left - nchars; 190 | done; 191 | result) 192 | in 193 | Bytes.unsafe_to_string sub 194 | 195 | let match_char s pos c = 196 | if not (is_valid_pos s pos) then 197 | false 198 | else 199 | (unsafe_seek s pos; 200 | c = Bytes.unsafe_get s.buffer (pos - s.buffer_pos)) 201 | 202 | let match_string s pos str = 203 | if not (is_valid_pos s pos) then 204 | str = "" 205 | else 206 | let len = String.length str in 207 | if len > chars_left s pos then 208 | false 209 | else if is_visible s pos && is_visible s (pos + len - 1) then 210 | Bytes.match_sub s.buffer (pos - s.buffer_pos) str 211 | else if len <= s.block_overlap then 212 | (perform_unsafe_seek s pos; 213 | Bytes.match_sub s.buffer (pos - s.buffer_pos) str) 214 | else 215 | (let result = ref true in 216 | let chars_left = ref len in 217 | seek_in s.input pos; 218 | while !chars_left > 0 do 219 | let nchars = min s.block_size !chars_left in 220 | read_block s 0 nchars; 221 | if Bytes.match_sub2 s.buffer 0 str (len - !chars_left) nchars then 222 | chars_left := !chars_left - nchars 223 | else 224 | (result := false; 225 | chars_left := 0) 226 | done; 227 | !result) 228 | 229 | 230 | (* Regexp-related features 231 | -------------------------------------------------------------------------- *) 232 | 233 | module MakeRegexp (Regexp: MParser_Sig.Regexp) = struct 234 | 235 | let match_regexp s pos rex = 236 | if not (is_valid_pos s pos) then 237 | None 238 | else 239 | (if not (is_visible s pos && is_visible s (pos + s.min_rspace)) then 240 | perform_unsafe_seek s pos; 241 | Regexp.exec ~rex ~pos:(pos - s.buffer_pos) s.buffer) 242 | 243 | end 244 | -------------------------------------------------------------------------------- /src/mParser_Char_Stream.mli: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** A position-based interface to character streams. *) 21 | 22 | (** The streams are optimized for applications that mostly read a stream 23 | sequentially and occasionally backtrack over a bounded distance, which is 24 | a common usage pattern of backtracking parsers. 25 | 26 | The characters in a character stream provided by this module are accessed 27 | based on their position in the stream. A position [pos] is valid in the 28 | stream [s] if it satisfies [0 <= pos < length s]. Character streams can 29 | be created from input channels and from strings. 30 | *) 31 | 32 | 33 | type t 34 | (** A character stream. *) 35 | 36 | 37 | val from_string: string -> t 38 | (** [from_string s] creates a character stream that contains the characters of 39 | the string [s]. *) 40 | 41 | val from_channel: ?block_size:int -> ?block_overlap:int -> ?min_rspace:int -> in_channel -> t 42 | (** [from_channel ?block_size ?block_overlap ?min_rspace chn] creates a 43 | character stream that contains the characters of the input channel [chn]. 44 | The behavior of the stream is undefined if the channel is modified 45 | subsequently. 46 | 47 | When a character stream is created from an input channel, the characters 48 | in this channel are read in overlapping blocks, where [block_size] and 49 | [block_overlap] determine the size of a block and the amount of overlap. 50 | If the length of the channel is not greater than the block size, the whole 51 | channel is read at once. Otherwise, only a single block of the channel is 52 | kept in memory at a time. Whenever the current stream position leaves the 53 | part that is currently kept in memory, a new block is read from the 54 | channel. The channel must support seeking (i.e., must be created from a 55 | regular file) to enable this. If possible, blocks are read with the 56 | specified amount of overlap to minimize the re-reading of blocks due to 57 | backtracking. [min_rspace] specifies the minimum number of characters a 58 | regular expression is matched against (if possible) by [match_regexp]. 59 | 60 | @param block_size default: [1048576] characters, valid range: [1 <= 61 | block_size <= Sys.max_string_length]. 62 | 63 | @param block_overlap default: [block_size / 16], valid range: [1 <= 64 | block_overlap <= block_size / 2]. 65 | 66 | @param min_rspace default: [block_size / 64], valid range: [1 <= 67 | min_rspace <= block_overlap]. 68 | 69 | @raise Invalid_argument if the arguments are invalid. *) 70 | 71 | val length: t -> int 72 | (** [length s] returns the number of characters in the stream [s]. *) 73 | 74 | val seek: t -> int -> unit 75 | (** [seek s pos] prepares the stream for reading from position [pos]. If 76 | [pos] is outside the block currently held in memory, a block containing 77 | [pos] is read, replacing the old block. 78 | 79 | @raise Invalid_argument if [pos] is not a valid stream position. *) 80 | 81 | val read_char: t -> int -> char option 82 | (** [read_char s pos] returns [Some c] if [c] is the character at position 83 | [pos] in [s], or [None] if this position is not a valid position in 84 | [s]. *) 85 | 86 | val read_string: t -> int -> int -> string 87 | (** [read_string s pos maxlen] returns a string containing the next [n] 88 | characters in [s], where [n] is the minimum of [maxlen] and the number of 89 | characters remaining from position [pos]. If [pos] is not a valid 90 | position in [s], the empty string is returned. *) 91 | 92 | val match_char: t -> int -> char -> bool 93 | (** [match_char s pos c] is equivalent to [read_char s pos = Some c]. *) 94 | 95 | val match_string: t -> int -> string -> bool 96 | (** [match_string s pos str] is equivalent to [read_string s pos 97 | (String.length str) = str]. *) 98 | 99 | 100 | (** {2 Regexp-related features} *) 101 | 102 | module MakeRegexp (Regexp: MParser_Sig.Regexp): sig 103 | 104 | val match_regexp: t -> int -> Regexp.t -> Regexp.substrings option 105 | (** [match_regexp s pos rex] matches the regular expression [rex] against the 106 | characters in [s] starting at position [pos]. It returns [Some 107 | substrings] if the match succeeds, where [substrings] contains the matched 108 | substrings. If the match fails or if [pos] is not a valid position in 109 | [s], it returns [None]. 110 | 111 | It is not guaranteed that [rex] is matched against the complete substream 112 | starting at the current position. The [min_rspace] parameter of 113 | {!MParser_Char_Stream.from_channel} specifies the minimum number of 114 | characters avaliable for matching. *) 115 | 116 | end 117 | -------------------------------------------------------------------------------- /src/mParser_PCRE.ml: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | 21 | module Regexp: MParser_Sig.Regexp = struct 22 | 23 | type t = Pcre.regexp 24 | type substrings = Pcre.substrings 25 | 26 | 27 | let compile_flags = 28 | Pcre.cflags [ `ANCHORED ] 29 | 30 | let make pattern = 31 | Pcre.regexp ~iflags:compile_flags pattern 32 | 33 | let get_substring s idx = 34 | try 35 | Some (Pcre.get_substring s idx) 36 | with Not_found | Invalid_argument _ -> 37 | None 38 | 39 | let get_all_substrings s = 40 | Pcre.get_substrings s 41 | 42 | let exec ~rex ~pos b = 43 | try 44 | Some (Pcre.exec ~pos ~rex (Bytes.unsafe_to_string b)) 45 | with Not_found -> 46 | None 47 | 48 | end 49 | 50 | include MParser.MakeRegexp (Regexp) 51 | 52 | 53 | external wrap: Pcre.regexp -> Regexp.t = "%identity" 54 | -------------------------------------------------------------------------------- /src/mParser_PCRE.mli: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** PCRE-based regular expression parsers. *) 21 | 22 | 23 | module Regexp: MParser_Sig.Regexp 24 | 25 | include module type of MParser.MakeRegexp (Regexp) 26 | 27 | 28 | val wrap: Pcre.regexp -> Regexp.t 29 | (** Wrap a compiled regular expression into an abstract regexp. 30 | 31 | Use this to circumvent limitations of [Regexp.make], which is defined as: 32 | 33 | let make (rx: string) : Regexp.t = 34 | Pcre.regexp ~iflags:(Pcre.cflags [ `ANCHORED ]) rx 35 | *) 36 | -------------------------------------------------------------------------------- /src/mParser_RE.ml: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | 21 | module Regexp: MParser_Sig.Regexp = struct 22 | 23 | type t = Re.re 24 | type substrings = Re.Group.t 25 | 26 | 27 | let compile_flags = 28 | [ `Anchored ] 29 | 30 | let make pattern = 31 | Re.Perl.(compile (re ~opts:compile_flags pattern)) 32 | 33 | let get_substring s idx = 34 | try 35 | Some (Re.Group.get s idx) 36 | with Not_found -> 37 | None 38 | 39 | let get_all_substrings s = 40 | Re.Group.all s 41 | 42 | let exec ~rex ~pos b = 43 | try 44 | Some (Re.exec ~pos rex (Bytes.unsafe_to_string b)) 45 | with Not_found -> 46 | None 47 | 48 | end 49 | 50 | include MParser.MakeRegexp (Regexp) 51 | 52 | 53 | external wrap: Re.re -> Regexp.t = "%identity" 54 | -------------------------------------------------------------------------------- /src/mParser_RE.mli: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** RE-based regular expression parsers. 21 | The used syntax is re.perl (the one most similar to PCRE). 22 | *) 23 | 24 | 25 | module Regexp: MParser_Sig.Regexp 26 | 27 | include module type of MParser.MakeRegexp (Regexp) 28 | 29 | 30 | val wrap: Re.re -> Regexp.t 31 | (** Wrap a compiled regular expression into an abstract regexp. 32 | 33 | Use this to circumvent limitations of [Regexp.make], which is defined as: 34 | 35 | let make (rx: string) : Regexp.t = 36 | Re.Perl.(compile (re ~opts:[ `Anchored ] rx)) 37 | *) 38 | -------------------------------------------------------------------------------- /src/mParser_Sig.mli: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** Common module signatures. *) 21 | 22 | 23 | module type Regexp = sig 24 | (** A pluggable regular expression engine. *) 25 | 26 | 27 | type t 28 | (** A compiled regular expression. *) 29 | 30 | type substrings 31 | (** Substrings matched by a regular expression. *) 32 | 33 | 34 | val make: string -> t 35 | (** Compiles a regular expression. *) 36 | 37 | val get_substring: substrings -> int -> string option 38 | (** Extracts a single substring. 39 | Returns None if the group did not match. *) 40 | 41 | val get_all_substrings: substrings -> string array 42 | (** Extracts all the matched substrings. 43 | Includes the full match at index 0. 44 | If a subpattern did not capture a substring, the empty 45 | string is returned in the corresponding position instead. *) 46 | 47 | val exec: rex: t -> pos: int -> Bytes.t -> substrings option 48 | (** Attempts to match the byte-buffer with a regular expression, starting 49 | from the position [pos]. Returns the matched substrings or [None] 50 | on failure. *) 51 | 52 | end 53 | -------------------------------------------------------------------------------- /src/mParser_Utils.ml: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | 21 | module IO = struct 22 | 23 | let input chn buffer pos length = 24 | if pos < 0 || pos + length > Bytes.length buffer then 25 | invalid_arg "MParser_Utils.IO.input: invalid substring"; 26 | let rec iter chars_read = 27 | let pos' = pos + chars_read in 28 | let length' = length - chars_read in 29 | let chars = (* Pervasives/Stdlib.*)input chn buffer pos' length' in 30 | if chars > 0 then 31 | iter (chars_read + chars) 32 | else 33 | chars_read 34 | in 35 | iter 0 36 | 37 | end 38 | 39 | 40 | module String = struct 41 | include String 42 | 43 | let unique = 44 | let module S = Set.Make (String) in 45 | fun l -> S.elements (List.fold_right S.add l S.empty) 46 | 47 | let for_all p a = 48 | let rec iter i = 49 | if i >= String.length a then 50 | true 51 | else if p (String.unsafe_get a i) then 52 | iter (i+1) 53 | else 54 | false 55 | in 56 | iter 0 57 | 58 | end 59 | 60 | 61 | module Bytes = struct 62 | include Bytes 63 | 64 | let match_sub b start pat = 65 | let len_b = Bytes.length b in 66 | let len_pat = String.length pat in 67 | if not (start >= 0 && start <= len_b) then 68 | invalid_arg "MParser_Utils.Bytes.match_sub: invalid index"; 69 | if start + len_pat > len_b then 70 | false 71 | else 72 | let rec iter i = 73 | if i >= len_pat then 74 | true 75 | else if String.unsafe_get pat i <> 76 | Bytes.unsafe_get b (start + i) then 77 | false 78 | else 79 | iter (i+1) 80 | in 81 | iter 0 82 | 83 | let match_sub2 b1 i1 s2 i2 n = 84 | if not (i1 >= 0 && i1 + n <= Bytes.length b1 && 85 | i2 >= 0 && i2 + n <= String.length s2) then 86 | invalid_arg "MParser_Utils.Bytes.match_sub2: invalid index"; 87 | let rec iter i = 88 | if i >= n then 89 | true 90 | else if Bytes.unsafe_get b1 (i1 + i) <> 91 | String.unsafe_get s2 (i2 + i) then 92 | false 93 | else 94 | iter (i+1) 95 | in 96 | iter 0 97 | 98 | end 99 | -------------------------------------------------------------------------------- /src/mParser_Utils.mli: -------------------------------------------------------------------------------- 1 | 2 | (* MParser, a simple monadic parser combinator library 3 | ----------------------------------------------------------------------------- 4 | Copyright (C) 2008, Holger Arnold 5 | 2014-2020, Max Mouratov 6 | 7 | License: 8 | This library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU Library General Public 10 | License version 2.1, as published by the Free Software Foundation. 11 | 12 | This library is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 | 16 | See the GNU Library General Public License version 2.1 for more details 17 | (enclosed in the file LICENSE.txt). 18 | *) 19 | 20 | (** General purpose utilities. *) 21 | 22 | 23 | module IO: sig 24 | 25 | val input: in_channel -> Bytes.t -> int -> int -> int 26 | (** [input chn b pos length] reads up to [length] characters from the 27 | channel [chn] and stores them in the byte-buffer [b], starting at position 28 | [pos]. It returns the actual number of characters read. A value less 29 | than [length] is only returned if there are less than [length] characters 30 | available from [chn] ([Pervasives.input] is allowed to read less than 31 | [length] characters if it "finds it convenient to do a partial read"). 32 | 33 | @raise Invalid_argument if [pos] and [length] do not specify a valid 34 | substring of [b]. *) 35 | 36 | end 37 | 38 | 39 | module String: sig 40 | include module type of String 41 | 42 | val unique: string list -> string list 43 | (** [unique l] returns the sorted list of unique strings in [l]. *) 44 | 45 | val for_all: (char -> bool) -> string -> bool 46 | (** [for_all p s] returns [true] if [p c = true] for all characters [c] of 47 | [s], and [false] otherwise. *) 48 | 49 | end 50 | 51 | 52 | module Bytes: sig 53 | include module type of Bytes 54 | 55 | val match_sub: Bytes.t -> int -> string -> bool 56 | (** [match_sub b i s] equals [Bytes.sub b i (String.length s) = s]. 57 | 58 | @raise Invalid_argument if [i] isn't a valid index in [b]. *) 59 | 60 | val match_sub2: Bytes.t -> int -> string -> int -> int -> bool 61 | (** [match_sub2 b i s j len] equals [Bytes.sub b i len = String.sub s j len]. 62 | 63 | @raise Invalid_argument if [i], [j], [len] do not specify valid 64 | substrings of [b] and [s]. *) 65 | 66 | end 67 | --------------------------------------------------------------------------------