├── VERSION ├── pcre ├── test2.ml ├── META ├── Makefile ├── run_mikmatch_pcre.ml ├── syntax_pcre.ml ├── test.ml └── pcre_lib.ml ├── common ├── Makefile ├── top_declaration.ml ├── global_def.mli ├── global_def.ml ├── select_lib.ml ├── constants.ml ├── charset.ml ├── messages.ml ├── mm_util.ml ├── syntax_common.ml ├── mikmatch.ml ├── regexp_ast.ml └── mikmatch.mli ├── doc ├── macros.hva ├── Makefile ├── ocamldoc.sty └── mikmatch-manual.tex.mlx ├── README.md ├── .gitignore ├── str ├── META ├── syntax_str.ml ├── run_mikmatch_str.ml ├── Makefile ├── test1.ml └── str_lib.ml ├── TODO ├── LICENSE ├── INSTALL ├── Makefile ├── Changes └── OCamlMakefile /VERSION: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | mikmatch_version="1.0.9" 3 | echo $mikmatch_version 4 | -------------------------------------------------------------------------------- /pcre/test2.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | ignore (FILTER "a") 4 | 5 | open Set 6 | 7 | module M = 8 | struct 9 | open Map 10 | 11 | ignore (FILTER "b") 12 | 13 | open Hashtbl 14 | end 15 | -------------------------------------------------------------------------------- /common/Makefile: -------------------------------------------------------------------------------- 1 | SOURCES = \ 2 | global_def.ml \ 3 | messages.ml charset.ml \ 4 | constants.ml \ 5 | regexp_ast.ml \ 6 | select_lib.ml 7 | 8 | OCAMLFLAGS = -dtypes 9 | USE_CAMLP4 = yes 10 | 11 | .PHONY: default 12 | default: bcnl 13 | 14 | TRASH = *~ 15 | 16 | OCAMLMAKEFILE = ../OCamlMakefile 17 | include $(OCAMLMAKEFILE) 18 | -------------------------------------------------------------------------------- /doc/macros.hva: -------------------------------------------------------------------------------- 1 | \let\oldmeta=\@meta 2 | \renewcommand{\@meta}{% 3 | \oldmeta 4 | \begin{rawhtml} 5 | 20 | \end{rawhtml}} 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Mikmatch 2 | ======== 3 | 4 | See `INSTALL` for the installation instructions. 5 | See https://mjambon.github.io/mjambon2016/micmatch.html for the tutorial. 6 | See `pcre/test1.ml` for a complete set of examples. 7 | 8 | For any question or problem, please create a Github "issue". 9 | 10 | Our contribution guidelines are here: 11 | https://github.com/mjambon/documents/blob/master/how-to-contribute.md 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.cm[ioxa] 3 | *.cmxa 4 | *.[oa] 5 | *.annot 6 | *.top 7 | ._d 8 | ._bcdi 9 | *.ppo 10 | 11 | bytecode 12 | nativecode 13 | 14 | pcre/global_def.ml 15 | pcre/global_def.mli 16 | pcre/match.ml 17 | pcre/mikmatch.ml 18 | pcre/mikmatch.mli 19 | pcre/mm_util.ml 20 | pcre/syntax_common.ml 21 | 22 | str/global_def.ml 23 | str/global_def.mli 24 | str/match.ml 25 | str/mikmatch.ml 26 | str/mikmatch.mli 27 | str/mm_util.ml 28 | str/syntax_common.ml 29 | 30 | /_opam 31 | -------------------------------------------------------------------------------- /str/META: -------------------------------------------------------------------------------- 1 | name = "mikmatch_str" 2 | description = "Pattern matching extended with regexps in Ocamllex syntax" 3 | 4 | requires = "camlp4 str unix" 5 | requires(toploop) += "tophide" 6 | 7 | archive(syntax,toploop) = "pa_mikmatch_str.cma run_mikmatch_str.cma" 8 | archive(syntax,create_toploop) = "pa_mikmatch_str.cma run_mikmatch_str.cma" 9 | archive(syntax,preprocessor) = "pa_mikmatch_str.cma" 10 | archive(byte) = "run_mikmatch_str.cma" 11 | archive(native) = "run_mikmatch_str.cmxa" 12 | -------------------------------------------------------------------------------- /common/top_declaration.ml: -------------------------------------------------------------------------------- 1 | let add, flush = 2 | let accu = ref [] in 3 | let add str_item = 4 | accu := str_item :: !accu 5 | in 6 | let flush () = 7 | let result = <:str_item< $ List.rev !accu $ >> in 8 | accu := []; 9 | result 10 | in 11 | add, flush 12 | 13 | 14 | let install_syntax () = 15 | 16 | EXTEND Camlp4.PreCast.Gram 17 | 18 | GLOBAL: str_item; 19 | 20 | str_item: FIRST [ 21 | [ si = NEXT -> add si; flush () ] 22 | ]; 23 | 24 | END 25 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | Documentation: 2 | - mention all the rules which are overridden, since it may 3 | cause incompatibilities with other extensions which do the same 4 | 5 | Compatibility: 6 | - (maybe) add an option for using another keyword than "match", and 7 | avoid to have to delete existing rules 8 | 9 | Features: 10 | - port to Str the macros which already exist for PCRE (if possible) 11 | 12 | Possible optimizations: 13 | - computation of only the substrings that are effectively used (or at 14 | least which name appears in some expression) 15 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | all: mikmatch-ocamldoc/index.html 2 | camlmix -o mikmatch-manual.tex -clean mikmatch-manual.tex.mlx 3 | pdflatex -interaction=nonstopmode mikmatch-manual 4 | pdflatex -interaction=nonstopmode mikmatch-manual 5 | hevea -fix macros.hva mikmatch-manual 6 | 7 | mikmatch-ocamldoc/index.html: ../common/mikmatch.mli 8 | ocamldoc -d mikmatch-ocamldoc -html $< 9 | ocamldoc $< -o mikmatch-ocamldoc.tex -latex -noheader -notoc -notrailer 10 | 11 | clean: 12 | rm -f *~ *.haux *.log *.aux *.toc *.pdf *.html *.out *.htoc \ 13 | mikmatch-ocamldoc/* 14 | -------------------------------------------------------------------------------- /pcre/META: -------------------------------------------------------------------------------- 1 | name = "mikmatch_pcre" 2 | description = "Pattern matching extended with regexps in Ocamllex syntax" 3 | 4 | requires = "camlp4 pcre unix" 5 | requires(toploop) += "tophide" 6 | 7 | archive(syntax,toploop) = "pa_mikmatch_pcre.cma run_mikmatch_pcre.cma" 8 | archive(syntax,create_toploop) = "pa_mikmatch_pcre.cma run_mikmatch_pcre.cma" 9 | archive(syntax,preprocessor) = "pa_mikmatch_pcre.cma" 10 | archive(byte) = "run_mikmatch_pcre.cma" 11 | archive(native) = "run_mikmatch_pcre.cmxa" 12 | 13 | package "run" ( 14 | description = "Subpackage providing only runtime support for mikmatch_pcre" 15 | requires = "pcre unix" 16 | archive(byte) = "run_mikmatch_pcre.cma" 17 | archive(native) = "run_mikmatch_pcre.cmxa" 18 | ) 19 | 20 | package "syntax" ( 21 | description = "Subpackage providing only syntax support for mikmatch_pcre" 22 | requires = "pcre camlp4" 23 | archive(syntax,toploop) = "pa_mikmatch_pcre.cma" 24 | archive(syntax,preprocessor) = "pa_mikmatch_pcre.cma" 25 | ) 26 | -------------------------------------------------------------------------------- /common/global_def.mli: -------------------------------------------------------------------------------- 1 | (* $Id$ *) 2 | 3 | open Camlp4.PreCast 4 | 5 | val init : (Ast.loc -> string -> Ast.str_item option) -> unit 6 | (* 7 | [init get] registers a new filter that will operate after the parsing phase. 8 | The [get] function will be called to look up all expression lowercase 9 | identifiers. It returns the structure items that are required by this 10 | expression. They will be inserted just before the current structure item, 11 | only once. 12 | 13 | Example: 14 | 15 | In your files that will extend the camlp4 preprocessor, define the 16 | following: 17 | 18 | 19 | let get _loc id = 20 | match id with 21 | "pi" -> Some <:str_item< value pi = acos (-1.) >> 22 | | _ -> None 23 | ;; 24 | 25 | let () = init get;; 26 | 27 | 28 | The preprocessed file: 29 | 30 | let x = 0.5 *. pi 31 | 32 | 33 | will be expanded into: 34 | 35 | let pi = acos (-1.) 36 | let x = 0.5 *. pi 37 | 38 | *) 39 | 40 | val init_from_table : (string, Ast.str_item) Hashtbl.t -> unit 41 | (* 42 | Same as [init], but uses the given hash table for its lookups. 43 | *) 44 | -------------------------------------------------------------------------------- /str/syntax_str.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | (* $Id$ *) 3 | 4 | open Printf 5 | 6 | open Camlp4.PreCast 7 | open Syntax 8 | 9 | open Syntax_common 10 | open Select_lib 11 | open Match 12 | 13 | let extend_common () = 14 | EXTEND Gram 15 | expr: [ 16 | [ "RE_STR"; re = regexp -> 17 | Regexp_ast.warnings re; 18 | let (re_args, re_source, named_groups, postbindings) = 19 | Str_lib.lib.process_regexp _loc ~sharing:true re "" in 20 | 21 | let re_fragments = Match.get_re_fragments _loc re_source in 22 | <:expr< ( $re_fragments$, 23 | $pp_named_groups _loc named_groups$ ) >> ] 24 | ]; 25 | 26 | Syntax_common.regexp: LEVEL "simple" [ 27 | [ "_" -> Regexp_ast.Characters (_loc, Charset.full) ] 28 | ]; 29 | 30 | END;; 31 | 32 | let extend_regular () = extend_common () 33 | (* 34 | let extend_revised () = extend_common () 35 | *) 36 | 37 | let _ = 38 | select_lib Str_lib.lib; 39 | 40 | (* Keeping it for backwards compatibility *) 41 | Camlp4.Options.add "-thread" 42 | (Arg.Unit ( 43 | fun () -> 44 | select_lib Str_lib.lib_mt; 45 | eprintf "Warning: -thread is deprecated.\n/%!" 46 | )) 47 | " Deprecated option that protects access to shared data with a mutex. \ 48 | Currently only patterns containing @ are concerned."; 49 | 50 | (* How to test if the current syntax is the regular or revised one? *) 51 | extend_regular () 52 | -------------------------------------------------------------------------------- /common/global_def.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | (* $Id$ *) 3 | 4 | open Camlp4.PreCast 5 | 6 | let init get = 7 | let rec rewrite si0 = 8 | let _loc = Ast.loc_of_str_item si0 in 9 | let tbl = Hashtbl.create 10 in 10 | let accu = ref <:str_item< >> in 11 | let map = 12 | (object 13 | inherit Ast.map as super 14 | 15 | method expr e = 16 | let _loc = Ast.loc_of_expr e in 17 | (match super#expr e with 18 | | <:expr< $lid:id$ >> -> 19 | (match get _loc id with 20 | Some si -> 21 | if not (Hashtbl.mem tbl id) then 22 | (Hashtbl.add tbl id (); 23 | accu := <:str_item< $!accu$ ; $si$ >>) 24 | 25 | | None -> ()) 26 | 27 | | _ -> ()); 28 | e 29 | 30 | method str_item si0 = 31 | let si = super#str_item si0 in 32 | let pending = !accu in 33 | accu := <:str_item< >>; 34 | match pending with 35 | <:str_item< >> -> 36 | (* Special workaround. 37 | Otherwise directives are not recognized and 38 | are skipped. *) 39 | si 40 | 41 | | _ -> <:str_item< $pending$ ; $si$ >> 42 | end) 43 | in 44 | map # str_item si0 45 | in 46 | 47 | AstFilters.register_str_item_filter rewrite; 48 | AstFilters.register_topphrase_filter rewrite 49 | 50 | 51 | let init_from_table tbl = 52 | let get _loc id = 53 | try Some (Hashtbl.find tbl id) 54 | with Not_found -> None 55 | in 56 | init get 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2004-2008 Martin Jambon 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 3. The name of the author may not be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | Installation of Mikmatch version 1.0.0 or higher 2 | ================================================ 3 | 4 | 5 | The recommended way of installing mikmatch and a bunch of other packages 6 | is from GODI (godi_console) available from http://godi.camlcity.org/ 7 | or from your favorite packaging system. 8 | 9 | 10 | Otherwise, there is the manual install: 11 | 12 | Prerequisites: 13 | ------------- 14 | 15 | You must have installed: 16 | 17 | - OCaml including Camlp4 (versions 3.10.2 and 3.11.0 should be OK; 18 | versions up to 3.09.3 will not work). 19 | 20 | - PCRE-OCaml (http://www.ocaml.info/home/ocaml_sources.html) 21 | 22 | 23 | Everything will run smoothly if you have: 24 | 25 | - Findlib, i.e. ocamlfind 26 | (http://projects.camlcity.org/projects/findlib.html) 27 | - Gnu make 28 | - an Sh-compatible shell 29 | 30 | 31 | Compilation: 32 | ----------- 33 | 34 | make 35 | 36 | Installation: 37 | ------------ 38 | 39 | make install 40 | 41 | 42 | Uninstallation: 43 | -------------- 44 | 45 | make uninstall 46 | 47 | Options: 48 | ------- 49 | 50 | By default, only mikmatch_pcre is built and installed. 51 | Mikmatch_str can be built and installed with the following commands: 52 | make str # compilation 53 | make install-str # installation 54 | make uninstall-str # uninstallation 55 | 56 | 57 | Problems? 58 | -------- 59 | 60 | Questions, comments and bug reports should be sent to: 61 | 62 | http://groups.google.com/group/micmatch (no subscription required) 63 | -------------------------------------------------------------------------------- /common/select_lib.ml: -------------------------------------------------------------------------------- 1 | (* $Id$ *) 2 | 3 | open Camlp4.PreCast 4 | open Ast 5 | 6 | let not_implemented _loc = 7 | Messages.failure _loc "not implemented" 8 | 9 | let fail _ = failwith "not implemented" 10 | 11 | type regexp_args = (string * Ast.expr) list 12 | type regexp_source = [ `String of string | `Expr of Ast.expr ] list 13 | 14 | type regexp_lib = 15 | { predefined_regexps : (string * Regexp_ast.ast) list; 16 | unfold_range : bool; 17 | process_regexp : 18 | loc -> sharing:bool -> Regexp_ast.ast -> string -> 19 | regexp_args * 20 | regexp_source * 21 | (Regexp_ast.named_groups * Regexp_ast.named_groups) * 22 | (string * expr) list; 23 | compile_regexp_match : loc -> regexp_args -> regexp_source -> expr; 24 | compile_regexp_search : loc -> regexp_args -> regexp_source -> expr; 25 | match_and_bind : 26 | loc -> string -> expr -> expr -> 27 | (Regexp_ast.named_groups * Regexp_ast.named_groups) -> 28 | expr -> expr -> expr; 29 | wrap_match : expr -> expr; 30 | wrap_user_case : expr -> expr; 31 | really_wrap_match : bool; 32 | really_wrap_user_case : bool } 33 | 34 | let dummy = 35 | { predefined_regexps = []; 36 | unfold_range = false; 37 | process_regexp = not_implemented; 38 | compile_regexp_match = not_implemented; 39 | compile_regexp_search = not_implemented; 40 | match_and_bind = not_implemented; 41 | wrap_match = fail; 42 | wrap_user_case = fail; 43 | really_wrap_match = false; 44 | really_wrap_user_case = false } 45 | -------------------------------------------------------------------------------- /common/constants.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | 3 | open Camlp4.PreCast 4 | 5 | let dummy_loc = Loc.ghost 6 | 7 | let debug_mode = ref false 8 | let reserved_prefix = if !debug_mode then "_" else "__mikmatch_" 9 | let uppercase_prefix = "C" ^ reserved_prefix 10 | let typevar_prefix = "a" ^ reserved_prefix 11 | 12 | let mod_runtime = ref "" 13 | let mod_runtime_mt = ref "" 14 | 15 | let exn_exit = "Mikmatch_exit" 16 | 17 | let any_exn = reserved_prefix ^ "any_exn" 18 | let any_target = reserved_prefix ^ "any_target" 19 | let any_result = reserved_prefix ^ "any_result" 20 | 21 | let expr_exit _loc = 22 | <:expr< $uid: !mod_runtime$.$uid:exn_exit$ >> 23 | 24 | let raise_exit _loc = 25 | <:expr< raise $expr_exit _loc$ >> 26 | 27 | let patt_exit _loc = 28 | <:patt< $uid: !mod_runtime$.$uid:exn_exit$ >> 29 | 30 | let shared re_name = re_name ^ "shared" 31 | let shared_ovector re_name = re_name ^ "shared_ovector" 32 | 33 | let regexp_prefix = reserved_prefix ^ "regexp_" 34 | let view_prefix = reserved_prefix ^ "view_" 35 | 36 | let new_regexp = 37 | let r = ref 0 in 38 | fun () -> incr r; (!r, regexp_prefix ^ string_of_int !r) 39 | 40 | let new_view = 41 | let r = ref 0 in 42 | fun () -> incr r; (!r, view_prefix ^ string_of_int !r) 43 | 44 | let new_target = 45 | let r = ref 0 in 46 | fun () -> incr r; reserved_prefix ^ "match_target_" ^ string_of_int !r 47 | 48 | let new_subpatt = 49 | let r = ref 0 in 50 | fun () -> incr r; reserved_prefix ^ "subpatt_" ^ string_of_int !r 51 | 52 | let new_var = 53 | let r = ref 0 in 54 | fun () -> incr r; reserved_prefix ^ "var_" ^ string_of_int !r 55 | 56 | let new_type_var = 57 | let r = ref 0 in 58 | fun () -> incr r; typevar_prefix ^ string_of_int !r 59 | -------------------------------------------------------------------------------- /common/charset.ml: -------------------------------------------------------------------------------- 1 | module C = Set.Make (Char) 2 | 3 | type t = C.t 4 | 5 | let empty = C.empty 6 | 7 | let add = C.add 8 | let singleton = C.singleton 9 | let union = C.union 10 | let diff = C.diff 11 | 12 | let add_range first last set = 13 | let r = ref set in 14 | for i = Char.code first to Char.code last do 15 | r := add (Char.chr i) !r 16 | done; 17 | !r 18 | 19 | let range c1 c2 = add_range c1 c2 empty 20 | let irange i j = range (Char.chr i) (Char.chr j) 21 | 22 | let full = range '\000' '\255' 23 | let full_for_C = C.remove '\000' full 24 | 25 | let of_string s = 26 | let accu = ref C.empty in 27 | String.iter (fun c -> accu := C.add c !accu) s; 28 | !accu 29 | 30 | let complement set = C.diff full set 31 | 32 | let list = C.elements 33 | 34 | let nocase set = 35 | C.fold 36 | (fun c set -> 37 | let c1 = Char.lowercase_ascii c 38 | and c2 = Char.uppercase_ascii c in 39 | let set1 = C.add c1 set in 40 | if c1 <> c2 then C.add c2 set1 41 | else set1) 42 | set 43 | C.empty 44 | 45 | module Posix = 46 | struct 47 | let lower = range 'a' 'z' 48 | let upper = range 'A' 'Z' 49 | let ascii = range '\x00' '\x7F' 50 | let alpha = union lower upper 51 | let digit = range '0' '9' 52 | let alnum = union alpha digit 53 | let punct = of_string "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" 54 | let graph = union alnum punct 55 | let print = union (singleton ' ') graph 56 | let blank = of_string " \t" 57 | let cntrl = union (range '\x00' '\x1F') (singleton '\x7F') 58 | let xdigit = of_string "0123456789abcdefABCDEF" 59 | let space = of_string " \t\n\x0B\x0C\r" 60 | 61 | let all = [ "lower", lower; 62 | "upper", upper; 63 | "ascii", ascii; 64 | "alpha", alpha; 65 | "digit", digit; 66 | "alnum", alnum; 67 | "punct", punct; 68 | "graph", graph; 69 | "print", print; 70 | "blank", blank; 71 | "cntrl", cntrl; 72 | "xdigit", xdigit; 73 | "space", space; ] 74 | end 75 | -------------------------------------------------------------------------------- /common/messages.ml: -------------------------------------------------------------------------------- 1 | (* $Id$ *) 2 | 3 | open Printf 4 | 5 | open Camlp4.PreCast 6 | 7 | let warning _loc s = 8 | (* Format.err_formatter _loc;*) 9 | let label = if !Sys.interactive then "" else "Warning: " in 10 | Format.eprintf "%a:@.%s%s@." Loc.print _loc label s 11 | 12 | let failure _loc s = 13 | (* does it print the error like Stdpp.raise_with_loc? *) 14 | Loc.raise _loc (Failure s) 15 | 16 | 17 | let list = function 18 | [] -> "" 19 | | [s] -> s 20 | | l -> 21 | let l' = List.rev l in 22 | String.concat ", " (List.rev (List.tl l')) ^ " and " ^ List.hd l' 23 | 24 | let invalid_backref _loc name = 25 | failure _loc 26 | (sprintf "Invalid backreference %s" name) 27 | 28 | let unbalanced_bindings _loc l = 29 | failure _loc 30 | (sprintf "Variable%s %s must occur on both sides of this | pattern" 31 | (if List.length l > 1 then "s" else "") 32 | (list l)) 33 | 34 | let multiple_binding _loc l = 35 | let s, are = 36 | if List.length l > 1 then "s", "are" 37 | else "", "is" in 38 | failure _loc 39 | (sprintf "Variable%s %s %s bound several times in this matching" 40 | s (list l) are) 41 | 42 | let invalid_range _loc = 43 | failure _loc "Invalid range" 44 | 45 | let invalid_pattern _loc = 46 | failure _loc "Invalid pattern" 47 | 48 | let invalid_lookbehind _loc kind adjective = 49 | failure _loc 50 | (sprintf "%s are disabled in %slookbehind assertions" kind adjective) 51 | 52 | let not_visible _loc who where = 53 | let s, are = 54 | if List.length who > 1 then "s", "are" 55 | else "", "is" in 56 | warning _loc 57 | (sprintf "identifier%s %s %s not visible \ 58 | out of this %s" 59 | s (list who) are where) 60 | 61 | let invalid_converter _loc name = 62 | failure _loc 63 | (sprintf "%s is not a valid converter" name) 64 | 65 | let reserved_identifier _loc prefix name = 66 | failure _loc 67 | (sprintf "%s is a reserved identifier: use another prefix than %s" 68 | name prefix) 69 | 70 | let misplaced_pattern p = 71 | failure (Ast.loc_of_patt p) 72 | ("patterns of this kind cannot appear in this context. \ 73 | Use match ... with if you are unsure.") 74 | 75 | let cannot_delete_rule name = 76 | eprintf "Warning: Cannot delete rule %s\n%!" name 77 | -------------------------------------------------------------------------------- /doc/ocamldoc.sty: -------------------------------------------------------------------------------- 1 | 2 | %% Support macros for LaTeX documentation generated by ocamldoc. 3 | %% This file is in the public domain; do what you want with it. 4 | 5 | \NeedsTeXFormat{LaTeX2e} 6 | \ProvidesPackage{ocamldoc} 7 | [2001/12/04 v1.0 ocamldoc support] 8 | 9 | \newenvironment{ocamldoccode}{% 10 | \bgroup 11 | \leftskip\@totalleftmargin 12 | \rightskip\z@skip 13 | \parindent\z@ 14 | \parfillskip\@flushglue 15 | \parskip\z@skip 16 | %\noindent 17 | \@@par\smallskip 18 | \@tempswafalse 19 | \def\par{% 20 | \if@tempswa 21 | \leavevmode\null\@@par\penalty\interlinepenalty 22 | \else 23 | \@tempswatrue 24 | \ifhmode\@@par\penalty\interlinepenalty\fi 25 | \fi} 26 | \obeylines 27 | \verbatim@font 28 | \let\org@prime~% 29 | \@noligs 30 | \let\org@dospecials\dospecials 31 | \g@remfrom@specials{\\} 32 | \g@remfrom@specials{\{} 33 | \g@remfrom@specials{\}} 34 | \let\do\@makeother 35 | \dospecials 36 | \let\dospecials\org@dospecials 37 | \frenchspacing\@vobeyspaces 38 | \everypar \expandafter{\the\everypar \unpenalty}} 39 | {\egroup\par} 40 | 41 | \def\g@remfrom@specials#1{% 42 | \def\@new@specials{} 43 | \def\@remove##1{% 44 | \ifx##1#1\else 45 | \g@addto@macro\@new@specials{\do ##1}\fi} 46 | \let\do\@remove\dospecials 47 | \let\dospecials\@new@specials 48 | } 49 | 50 | \newenvironment{ocamldocdescription} 51 | {\list{}{\rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax\ignorespaces} 52 | {\endlist\medskip} 53 | 54 | \newenvironment{ocamldoccomment} 55 | {\list{}{\leftmargin 2\leftmargini \rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax} 56 | {\endlist} 57 | 58 | \let \ocamldocparagraph \paragraph 59 | \def \paragraph #1{\ocamldocparagraph {#1}\noindent} 60 | \let \ocamldocsubparagraph \subparagraph 61 | \def \subparagraph #1{\ocamldocsubparagraph {#1}\noindent} 62 | 63 | \let\ocamldocvspace\vspace 64 | 65 | \newenvironment{ocamldocindent}{\list{}{}\item\relax}{\endlist} 66 | \newenvironment{ocamldocsigend} 67 | {\noindent\quad\texttt{sig}\ocamldocindent} 68 | {\endocamldocindent\vskip -\lastskip 69 | \noindent\quad\texttt{end}\medskip} 70 | \newenvironment{ocamldocobjectend} 71 | {\noindent\quad\texttt{object}\ocamldocindent} 72 | {\endocamldocindent\vskip -\lastskip 73 | \noindent\quad\texttt{end}\medskip} 74 | 75 | \endinput 76 | -------------------------------------------------------------------------------- /str/run_mikmatch_str.ml: -------------------------------------------------------------------------------- 1 | exception Mikmatch_exit 2 | 3 | let nocase s = 4 | let len = String.length s in 5 | let buf = Buffer.create (4 * len) in 6 | for i = 0 to len - 1 do 7 | let c = s.[i] in 8 | let cl = Char.lowercase_ascii c 9 | and cu = Char.uppercase_ascii c in 10 | if cl <> cu then (* in this case, cl and cu are letters *) 11 | Printf.bprintf buf "[%c%c]" cl cu 12 | else 13 | Buffer.add_string buf (Str.quote (String.make 1 c)) 14 | done; 15 | Buffer.contents buf 16 | 17 | 18 | module Mem = 19 | struct 20 | (* memoization table with periodic removal of old items *) 21 | 22 | type ('a, 'b) t = 23 | { mutable date : float; 24 | mutable last_cleanup : float; 25 | opt_size : int; 26 | max_size : int; 27 | tbl : ('a, ('b * float ref)) Hashtbl.t } 28 | 29 | let create n = 30 | if n < 1 then invalid_arg "Memo.create" 31 | else 32 | { date = 0.; 33 | last_cleanup = 0.; 34 | opt_size = n; 35 | max_size = n + n; 36 | tbl = Hashtbl.create (n + n) } 37 | 38 | (* removal of anything which is too old *) 39 | let cleanup t = 40 | let t0 = t.last_cleanup in 41 | t.last_cleanup <- t.date; 42 | let tbl = t.tbl in 43 | let trash = 44 | Hashtbl.fold 45 | (fun key (data, last_access) trash -> 46 | if !last_access < t0 then 47 | key :: trash 48 | else trash) 49 | tbl 50 | [] in 51 | List.iter (Hashtbl.remove tbl) trash 52 | 53 | (* unsafe addition of data (key should not be in the table) *) 54 | let unsafe_add t key data = 55 | let date = t.date +. 1. in 56 | t.date <- date; 57 | Hashtbl.add t.tbl key (data, ref date); 58 | let size = Hashtbl.length t.tbl in 59 | if size > t.max_size then 60 | cleanup t 61 | else 62 | if size = t.opt_size + 1 then 63 | t.last_cleanup <- t.date 64 | 65 | let add t key data = 66 | if Hashtbl.mem t.tbl key then 67 | invalid_arg "Memo.add" 68 | else 69 | unsafe_add t key data 70 | 71 | let find t key = 72 | let (data, last_access) = Hashtbl.find t.tbl key in 73 | let date = t.date in 74 | last_access := date; 75 | t.date <- date +. 1.; 76 | data 77 | 78 | let get t key lazy_data = 79 | try find t key 80 | with Not_found -> 81 | let data = Lazy.force lazy_data in 82 | unsafe_add t key data; 83 | data 84 | 85 | let clear t = 86 | Hashtbl.clear t.tbl; 87 | t.date <- 0.; 88 | t.last_cleanup <- 0. 89 | 90 | end 91 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default install uninstall reinstall \ 2 | all opt mikmatch-pcre mikmatch-str \ 3 | common install-str install-pcre uninstall-str uninstall-pcre \ 4 | backup clean archive pcre str 5 | 6 | ifndef PREFIX 7 | BINDIR = $(shell dirname `which ocaml`) 8 | PREFIX = $(shell dirname $(BINDIR)) 9 | else 10 | BINDIR = $(PREFIX)/bin 11 | endif 12 | export PREFIX 13 | export BINDIR 14 | 15 | 16 | default: mikmatch-pcre 17 | install: install-pcre 18 | uninstall: uninstall-pcre 19 | reinstall: 20 | $(MAKE) uninstall 21 | $(MAKE) install 22 | 23 | ## GODIVA/GODI targets 24 | all: common 25 | cd pcre && $(MAKE) all-bc 26 | opt: common 27 | cd pcre && $(MAKE) all-nc 28 | 29 | ## end of GODIVA targets 30 | 31 | mikmatch-pcre: common pcre 32 | mikmatch-str: common str 33 | 34 | common: 35 | cd common && $(MAKE) 36 | 37 | str: common 38 | cd str && $(MAKE) 39 | pcre: common 40 | cd pcre && $(MAKE) 41 | 42 | install-str: 43 | cd str && $(MAKE) install 44 | install-pcre: 45 | cd pcre && $(MAKE) install 46 | 47 | uninstall-str: 48 | cd str && $(MAKE) uninstall 49 | uninstall-pcre: 50 | cd pcre && $(MAKE) uninstall 51 | 52 | 53 | backup: 54 | scp -r . $$BACKUP_DIR/mikmatch/ 55 | 56 | clean:: 57 | cd doc && $(MAKE) clean 58 | cd common && $(MAKE) clean 59 | cd str && $(MAKE) clean 60 | cd pcre && $(MAKE) clean 61 | 62 | 63 | 64 | VERSION = $(shell ./VERSION) 65 | export VERSION 66 | 67 | install: OCAMLFIND_INSTFLAGS = -patch-version $(VERSION) 68 | export OCAMLFIND_INSTFLAGS 69 | 70 | # Only for developers; requires camlmix, hevea, pdflatex 71 | # and maybe other things. 72 | archive: 73 | @echo "Making archive for version $(VERSION)" 74 | cd str && $(MAKE) version 75 | cd pcre && $(MAKE) version 76 | cd doc && $(MAKE) 77 | rm -rf /tmp/mikmatch /tmp/mikmatch-$(VERSION) && \ 78 | cp -r . /tmp/mikmatch && \ 79 | cd /tmp/mikmatch && \ 80 | $(MAKE) clean && \ 81 | rm -rf *~ mikmatch*.tar* `find . -name .svn` && \ 82 | cd /tmp && cp -r mikmatch mikmatch-$(VERSION) && \ 83 | tar czf mikmatch.tar.gz mikmatch && \ 84 | tar cjf mikmatch.tar.bz2 mikmatch && \ 85 | tar czf mikmatch-$(VERSION).tar.gz mikmatch-$(VERSION) && \ 86 | tar cjf mikmatch-$(VERSION).tar.bz2 mikmatch-$(VERSION) 87 | mv /tmp/mikmatch.tar.gz /tmp/mikmatch.tar.bz2 . 88 | mv /tmp/mikmatch-$(VERSION).tar.gz /tmp/mikmatch-$(VERSION).tar.bz2 . 89 | cp mikmatch.tar.gz mikmatch.tar.bz2 $$WWW/ 90 | cp mikmatch-$(VERSION).tar.gz mikmatch-$(VERSION).tar.bz2 $$WWW/ 91 | cp LICENSE $$WWW/mikmatch-license.txt 92 | cp VERSION $$WWW/mikmatch-version 93 | cp Changes $$WWW/mikmatch-changes.txt 94 | $(MAKE) install-www-doc 95 | 96 | install-www-doc: 97 | cp doc/mikmatch-manual.pdf $$WWW 98 | cp doc/mikmatch-manual.html $$WWW/mikmatch-manual-nocounter.html 99 | cp doc/mikmatch-ocamldoc/* $$WWW/mikmatch-ocamldoc 100 | touch -c $$WWW/mikmatch.html.mlx 101 | -------------------------------------------------------------------------------- /str/Makefile: -------------------------------------------------------------------------------- 1 | ifndef BINDIR 2 | BINDIR = $(shell dirname `which ocaml`) 3 | endif 4 | 5 | SOURCES = \ 6 | mm_util.ml \ 7 | global_def.mli global_def.ml \ 8 | match.ml \ 9 | str_lib.ml \ 10 | syntax_common.ml \ 11 | syntax_str.ml 12 | 13 | RESULT = mikmatch_str 14 | 15 | OCAMLFLAGS = -dtypes 16 | 17 | OCAMLLDFLAGS = \ 18 | messages.cmo charset.cmo \ 19 | constants.cmo \ 20 | regexp_ast.cmo \ 21 | select_lib.cmo 22 | 23 | INCDIRS = ../common 24 | 25 | USE_CAMLP4 = yes 26 | 27 | LIBINSTALL_FILES := \ 28 | pa_mikmatch_str.cma pa_mikmatch_str.cmo pa_mikmatch_str.cmi \ 29 | run_mikmatch_str.cma run_mikmatch_str.cmo run_mikmatch_str.cmi \ 30 | mikmatch.cmi mikmatch.cmo mikmatch.cmx mikmatch.mli \ 31 | run_mikmatch_str.cmxa run_mikmatch_str.cmx \ 32 | run_mikmatch_str.a run_mikmatch_str.o 33 | 34 | .PHONY: default force all links 35 | 36 | default: links pa_lib misc 37 | force: 38 | touch $(SOURCES) 39 | $(MAKE) 40 | 41 | all: 42 | 43 | links: mikmatch.mli mikmatch.ml match.ml syntax_common.ml mm_util.ml \ 44 | global_def.mli global_def.ml 45 | 46 | mikmatch.mli: ../common/mikmatch.mli 47 | ln -s $< $@ 48 | mikmatch.ml: ../common/mikmatch.ml 49 | ln -s $< $@ 50 | syntax_common.ml: ../common/syntax_common.ml 51 | ln -s $< $@ 52 | match.ml: ../common/match.ml 53 | ln -s $< $@ 54 | mm_util.ml: ../common/mm_util.ml 55 | ln -s $< $@ 56 | global_def.mli: ../common/global_def.mli 57 | ln -s $< $@ 58 | global_def.ml: ../common/global_def.ml 59 | ln -s $< $@ 60 | 61 | .PHONY: pa_lib install uninstall topinstall misc 62 | 63 | pa_lib: 64 | $(MAKE) RESULT=pa_mikmatch_str pabc bcl 65 | 66 | #install: libinstall topinstall 67 | install: libinstall 68 | uninstall: libuninstall 69 | rm -f $(BINDIR)/mikmatch_str.top $(BINDIR)/mikmatch_str 70 | 71 | topinstall: 72 | install -m 0755 mikmatch_str.top mikmatch_str $(BINDIR) 73 | 74 | misc: 75 | ocamlc -c mikmatch.mli 76 | ocamlc -a -o run_mikmatch_str.cma \ 77 | mikmatch.ml run_mikmatch_str.ml 78 | ocamlopt -a -o run_mikmatch_str.cmxa \ 79 | mikmatch.ml run_mikmatch_str.ml 80 | 81 | 82 | .PHONY: test1 test-install 83 | test: test1 84 | 85 | # (preinstall test) 86 | test1: 87 | camlp4o ./pa_mikmatch_str.cma -printer o test1.ml > test1.ppo 88 | ocamlopt -pp 'camlp4o ./pa_mikmatch_str.cma' \ 89 | str.cmxa unix.cmxa \ 90 | run_mikmatch_str.cmxa \ 91 | test1.ml -o test1 92 | ./test1 93 | # ocamlmktop -o mikmatch_str.test -I +camlp4 -I . camlp4o.cma \ 94 | # pa_mikmatch_str.cma str.cma unix.cma run_mikmatch_str.cma 95 | # ./mikmatch_str.test test1.ml 96 | 97 | 98 | # Compilation with ocamlfind (postinstall test) 99 | test-install: 100 | ocamlfind ocamlopt \ 101 | -syntax camlp4o \ 102 | -package mikmatch_str\ 103 | -linkpkg \ 104 | test1.ml -o test1_inst 105 | ./test1_inst 106 | 107 | 108 | TRASH = \ 109 | *~ *.ppo *.cm[ioxa] *.cmxa *.o *.a *.top \ 110 | *.test test1 test1.more mikmatch_str mikmatch_str.ml 111 | 112 | 113 | OCAMLMAKEFILE = ../OCamlMakefile 114 | include $(OCAMLMAKEFILE) 115 | -------------------------------------------------------------------------------- /common/mm_util.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | (* $Id$ *) 3 | 4 | open Camlp4.PreCast 5 | 6 | open Messages 7 | 8 | (* General Camlp4 utilities *) 9 | 10 | 11 | let debug s = 12 | if !Constants.debug_mode then 13 | Printf.eprintf "[debug] %s\n%!" s 14 | 15 | 16 | let list_of_comma_expr e = 17 | let rec aux e l = 18 | match e with 19 | <:expr< $e1$ , $e2$ >> -> aux e1 (aux e2 l) 20 | | <:expr< >> -> l 21 | | e -> e :: l 22 | in 23 | aux e [] 24 | 25 | let list_of_comma_patt p = 26 | let rec aux p l = 27 | match p with 28 | <:patt< $p1$ , $p2$ >> -> aux p1 (aux p2 l) 29 | | <:patt< >> -> l 30 | | p -> p :: l 31 | in 32 | aux p [] 33 | 34 | let list_of_semicolon_patt p = 35 | let rec aux p l = 36 | match p with 37 | <:patt< $p1$ ; $p2$ >> -> aux p1 (aux p2 l) 38 | | <:patt< >> -> l 39 | | p -> p :: l 40 | in 41 | aux p [] 42 | 43 | let list_of_record p = 44 | List.map ( 45 | fun field -> 46 | match field with 47 | <:patt< $p1$ = $p2$ >> -> `Normal (p1, p2) 48 | | p -> `Other p 49 | | _ -> assert false 50 | ) (list_of_semicolon_patt p) 51 | 52 | 53 | let comma_expr_of_list _loc = function 54 | hd :: tl -> 55 | debug "comma_expr_of_list"; 56 | List.fold_left ( 57 | fun accu e -> <:expr< $accu$ , $e$ >> 58 | ) hd tl 59 | | [] -> assert false 60 | 61 | 62 | let comma_patt_of_list _loc = function 63 | hd :: tl -> 64 | debug "comma_patt_of_list"; 65 | List.fold_left ( 66 | fun accu p -> <:patt< $accu$ , $p$ >> 67 | ) hd tl 68 | | [] -> assert false 69 | 70 | 71 | let semicolon_patt_of_list _loc = function 72 | hd :: tl -> 73 | debug "semicolon_patt_of_list"; 74 | List.fold_left ( 75 | fun accu p -> <:patt< $accu$ ; $p$ >> 76 | ) hd tl 77 | | [] -> assert false 78 | 79 | let record_of_list _loc l = 80 | debug "record_of_list"; 81 | semicolon_patt_of_list _loc 82 | (List.map ( 83 | function 84 | | `Normal (p1, p2) -> <:patt< $p1$ = $p2$ >> 85 | | `Other p -> p 86 | ) l) 87 | 88 | let meta_bool = function 89 | true -> Ast.BTrue 90 | | false -> Ast.BFalse 91 | 92 | let binding_of_pair _loc (p, e) = 93 | debug "binding_of_pair"; 94 | <:binding< $p$ = $e$ >> 95 | 96 | let pair_of_binding = function 97 | <:binding< $p$ = $e$ >> -> (p, e) 98 | | b -> 99 | let _loc = Ast.loc_of_binding b in 100 | failure _loc "Failed assertion in Mm_util.pair_of_binding" 101 | 102 | 103 | let list_of_binding b = 104 | let rec aux b l = 105 | match b with 106 | <:binding< $b1$ and $b2$ >> -> aux b1 (aux b2 l) 107 | | <:binding< >> -> l 108 | | <:binding< $p$ = $e$ >> -> (p, e) :: l 109 | | <:binding< $anti: _ $ >> -> 110 | failure (Ast.loc_of_binding b) 111 | "Antiquotations for let bindings are not supported by mikmatch" 112 | in 113 | aux b [] 114 | 115 | 116 | let match_case_of_tuple _loc (p, w, e) = 117 | debug "match_case_of_tuple"; 118 | match w with 119 | None -> <:match_case< $p$ -> $e$ >> 120 | | Some cond -> <:match_case< $p$ when $cond$ -> $e$ >> 121 | 122 | 123 | let eval_string s = Camlp4.Struct.Token.Eval.string ~strict:() s 124 | 125 | let eval_char s = Camlp4.Struct.Token.Eval.char s 126 | 127 | let rec_flag = function 128 | true -> Ast.ReRecursive 129 | | false -> Ast.ReNil 130 | -------------------------------------------------------------------------------- /pcre/Makefile: -------------------------------------------------------------------------------- 1 | PCRE_DIR = $(shell ocamlfind query pcre) 2 | # PCRE_DIR = something else 3 | 4 | ifndef BINDIR 5 | BINDIR = $(shell dirname `which ocaml`) 6 | endif 7 | 8 | SOURCES = \ 9 | mm_util.ml \ 10 | global_def.mli global_def.ml \ 11 | match.ml \ 12 | pcre_lib.ml \ 13 | syntax_common.ml \ 14 | syntax_pcre.ml 15 | 16 | RESULT = mikmatch_pcre 17 | 18 | OCAMLFLAGS = -dtypes 19 | 20 | OCAMLLDFLAGS = \ 21 | messages.cmo charset.cmo \ 22 | constants.cmo \ 23 | regexp_ast.cmo \ 24 | select_lib.cmo 25 | 26 | INCDIRS = ../common 27 | 28 | USE_CAMLP4 = yes 29 | 30 | COMMON_LIBINSTALL_FILES = \ 31 | pa_mikmatch_pcre.cma pa_mikmatch_pcre.cmo pa_mikmatch_pcre.cmi \ 32 | run_mikmatch_pcre.cmi mikmatch.cmi mikmatch.mli 33 | BC_LIBINSTALL_FILES = \ 34 | run_mikmatch_pcre.cma run_mikmatch_pcre.cmo 35 | NC_LIBINSTALL_FILES = \ 36 | run_mikmatch_pcre.cmxa run_mikmatch_pcre.cmx \ 37 | run_mikmatch_pcre.a run_mikmatch_pcre.o mikmatch.cmx 38 | ALL_LIBINSTALL_FILES = \ 39 | $(COMMON_LIBINSTALL_FILES) $(BC_LIBINSTALL_FILES) $(NC_LIBINSTALL_FILES) 40 | 41 | .PHONY: default all-bc all-nc all force links pa_lib install uninstall \ 42 | topinstall misc-bc misc-nc 43 | 44 | default: links pa_lib misc-bc misc-nc 45 | touch bytecode 46 | touch nativecode 47 | all-bc: links pa_lib misc-bc 48 | touch bytecode 49 | all-nc: links pa_lib misc-nc 50 | touch nativecode 51 | 52 | all: # needed by libinstall 53 | 54 | force: 55 | touch $(SOURCES) 56 | $(MAKE) 57 | 58 | links: mikmatch.mli mikmatch.ml match.ml syntax_common.ml mm_util.ml \ 59 | global_def.mli global_def.ml 60 | 61 | mikmatch.mli: ../common/mikmatch.mli 62 | ln -s $< $@ 63 | mikmatch.ml: ../common/mikmatch.ml 64 | ln -s $< $@ 65 | syntax_common.ml: ../common/syntax_common.ml 66 | ln -s $< $@ 67 | match.ml: ../common/match.ml 68 | ln -s $< $@ 69 | mm_util.ml: ../common/mm_util.ml 70 | ln -s $< $@ 71 | global_def.mli: ../common/global_def.mli 72 | ln -s $< $@ 73 | global_def.ml: ../common/global_def.ml 74 | ln -s $< $@ 75 | 76 | pa_lib: 77 | $(MAKE) RESULT=pa_mikmatch_pcre pabc bcl 78 | 79 | install: topinstall 80 | if test -f nativecode; \ 81 | then \ 82 | $(MAKE) "LIBINSTALL_FILES=$(ALL_LIBINSTALL_FILES)" libinstall;\ 83 | else \ 84 | $(MAKE) "LIBINSTALL_FILES=$(COMMON_LIBINSTALL_FILES) $(BC_LIBINSTALL_FILES)" libinstall; \ 85 | fi 86 | 87 | uninstall: 88 | $(MAKE) libuninstall 89 | 90 | misc-bc: 91 | ocamlc -c mikmatch.mli 92 | ocamlc -a -o run_mikmatch_pcre.cma -I $(PCRE_DIR) \ 93 | mikmatch.ml run_mikmatch_pcre.ml 94 | ocamlfind ocamlmktop -o mikmatch_pcre.top \ 95 | -linkpkg -package camlp-streams,camlp4.lib,pcre,unix \ 96 | pa_mikmatch_pcre.cma run_mikmatch_pcre.cma 97 | 98 | misc-nc: 99 | ocamlc -c mikmatch.mli 100 | ocamlopt -a -o run_mikmatch_pcre.cmxa -I $(PCRE_DIR) \ 101 | mikmatch.ml run_mikmatch_pcre.ml 102 | 103 | 104 | .PHONY: test simple-test more-tests test-install 105 | test: simple-test more-tests 106 | 107 | # Toplevel (preinstall test) 108 | simple-test: 109 | camlp4o ./pa_mikmatch_pcre.cma -printer o test.ml > test.ppo 110 | ocamlmktop -o mikmatch_pcre.test -I +camlp4 -I . -I $(PCRE_DIR) \ 111 | dynlink.cma \ 112 | camlp4o.cma \ 113 | pa_mikmatch_pcre.cma pcre.cma unix.cma run_mikmatch_pcre.cma 114 | 115 | # Preinstall test 116 | more-tests: 117 | camlp4o ./pa_mikmatch_pcre.cma -printer o -direct test.ml > test.ppo 118 | ocamlopt \ 119 | -pp 'camlp4o ./pa_mikmatch_pcre.cma' \ 120 | -I $(PCRE_DIR) pcre.cmxa run_mikmatch_pcre.cmxa \ 121 | test.ml -o test 122 | ./test 123 | 124 | # Postinstall test 125 | test-install: 126 | ocamlfind ocamlopt \ 127 | -syntax camlp4o \ 128 | -package pcre,mikmatch_pcre\ 129 | -linkpkg \ 130 | test.ml -o test 131 | ./test 132 | 133 | # Debugging 134 | .PHONY: test2 135 | test2: 136 | camlp4o ./pa_mikmatch_pcre.cma -printer o test2.ml > test2.ppo 137 | 138 | TRASH = \ 139 | *~ *.ppo *.cm[ioxa] *.cmxa *.o *.a *.top \ 140 | *.test test test.more mikmatch bytecode nativecode mikmatch_pcre.ml 141 | 142 | 143 | OCAMLMAKEFILE = ../OCamlMakefile 144 | include $(OCAMLMAKEFILE) 145 | -------------------------------------------------------------------------------- /pcre/run_mikmatch_pcre.ml: -------------------------------------------------------------------------------- 1 | (* $Id$ *) 2 | 3 | exception Mikmatch_exit 4 | 5 | open Pcre 6 | 7 | let irflags = rflags [] 8 | 9 | external make_substrings : string * int array -> substrings = "%identity" 10 | 11 | let search rex f ?(pos = 0) subj = 12 | let _, offset_vector = make_ovector rex in 13 | let substrings = make_substrings (subj, offset_vector) in 14 | let subj_len = String.length subj in 15 | let rec loop cur_pos = 16 | if 17 | try 18 | unsafe_pcre_exec 19 | irflags rex ~pos:cur_pos ~subj_start:0 ~subj offset_vector None; true 20 | with Not_found -> false 21 | then 22 | (f substrings; 23 | let first = offset_vector.(0) in 24 | let last = offset_vector.(1) in 25 | if first < subj_len then 26 | loop (max (first + 1) last)) in 27 | loop pos 28 | 29 | let scan ~full rex pos ~ftext ~fmatch subj = 30 | let _, offset_vector = make_ovector rex in 31 | let substrings = make_substrings (subj, offset_vector) in 32 | let subj_len = String.length subj in 33 | let rec loop previous_last cur_pos = 34 | if 35 | try 36 | unsafe_pcre_exec 37 | irflags rex ~pos:cur_pos ~subj_start:0 ~subj offset_vector None; true 38 | with Not_found -> 39 | let last = String.length subj in 40 | if full || last > previous_last then 41 | ftext (String.sub subj previous_last (last - previous_last)); 42 | false 43 | then 44 | (let first = offset_vector.(0) in 45 | let last = offset_vector.(1) in 46 | if full || first > pos then 47 | ftext (String.sub subj previous_last (first - previous_last)); 48 | fmatch substrings; 49 | if first < subj_len then 50 | loop last (max (first + 1) last) 51 | else if full then 52 | ftext "") in 53 | loop pos pos 54 | 55 | let map rex f ?(pos = 0) ?(full = true) subj = 56 | let l = ref [] in 57 | let ftext s = l := `Text s :: !l 58 | and fmatch substrings = l := f substrings :: !l in 59 | scan ~full rex pos ~ftext ~fmatch subj; 60 | List.rev !l 61 | 62 | let collect rex f ?(pos = 0) subj = 63 | let l = ref [] in 64 | let f substrings = l := f substrings :: !l in 65 | search rex f ~pos subj; 66 | List.rev !l 67 | 68 | let split rex ?(full = false) ?(pos = 0) subj = 69 | let l = ref [] in 70 | let ftext s = l := s :: !l 71 | and fmatch substrings = () in 72 | scan ~full rex pos ~ftext ~fmatch subj; 73 | List.rev !l 74 | 75 | let bquote_char buf c = 76 | match c with 77 | '\\' | '^' | '$' | '.' | '[' | ']' | '|' 78 | | '(' | ')' | '?' | '*' | '+' | '{' | '}' -> Printf.bprintf buf "\\%c" c 79 | | '\000' -> Buffer.add_string buf "\\000" 80 | | c -> Buffer.add_char buf c 81 | 82 | (* Pcre.quote does not escape null characters (which terminate C strings) *) 83 | let quote_string s = 84 | let len = String.length s in 85 | let buf = Buffer.create (2 * len) in 86 | for i = 0 to len - 1 do 87 | bquote_char buf (String.unsafe_get s i) 88 | done; 89 | Buffer.contents buf 90 | 91 | let nocase s = 92 | let len = String.length s in 93 | let buf = Buffer.create (2 * len) in 94 | for i = 0 to len - 1 do 95 | let c = s.[i] in 96 | let cl = Char.lowercase_ascii c 97 | and cu = Char.uppercase_ascii c in 98 | if cl <> cu then (* in this case, cl and cu are letters *) 99 | Printf.bprintf buf "[%c%c]" cl cu 100 | else 101 | bquote_char buf c 102 | done; 103 | Buffer.contents buf 104 | 105 | module Mem = 106 | struct 107 | (* memoization table with periodic removal of old items *) 108 | 109 | type ('a, 'b) t = 110 | { mutable date : float; 111 | mutable last_cleanup : float; 112 | opt_size : int; 113 | max_size : int; 114 | tbl : ('a, ('b * float ref)) Hashtbl.t } 115 | 116 | let create n = 117 | if n < 1 then invalid_arg "Memo.create" 118 | else 119 | { date = 0.; 120 | last_cleanup = 0.; 121 | opt_size = n; 122 | max_size = n + n; 123 | tbl = Hashtbl.create (n + n) } 124 | 125 | (* removal of anything which is too old *) 126 | let cleanup t = 127 | let t0 = t.last_cleanup in 128 | t.last_cleanup <- t.date; 129 | let tbl = t.tbl in 130 | let trash = 131 | Hashtbl.fold 132 | (fun key (data, last_access) trash -> 133 | if !last_access < t0 then 134 | key :: trash 135 | else trash) 136 | tbl 137 | [] in 138 | List.iter (Hashtbl.remove tbl) trash 139 | 140 | (* unsafe addition of data (key should not be in the table) *) 141 | let unsafe_add t key data = 142 | let date = t.date +. 1. in 143 | t.date <- date; 144 | Hashtbl.add t.tbl key (data, ref date); 145 | let size = Hashtbl.length t.tbl in 146 | if size > t.max_size then 147 | cleanup t 148 | else 149 | if size = t.opt_size + 1 then 150 | t.last_cleanup <- t.date 151 | 152 | let add t key data = 153 | if Hashtbl.mem t.tbl key then 154 | invalid_arg "Memo.add" 155 | else 156 | unsafe_add t key data 157 | 158 | let find t key = 159 | let (data, last_access) = Hashtbl.find t.tbl key in 160 | let date = t.date in 161 | last_access := date; 162 | t.date <- date +. 1.; 163 | data 164 | 165 | let get t key lazy_data = 166 | try find t key 167 | with Not_found -> 168 | let data = Lazy.force lazy_data in 169 | unsafe_add t key data; 170 | data 171 | 172 | let clear t = 173 | Hashtbl.clear t.tbl; 174 | t.date <- 0.; 175 | t.last_cleanup <- 0. 176 | 177 | end 178 | -------------------------------------------------------------------------------- /str/test1.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | let _ = function RE "" -> () 4 | 5 | let x = 1 in ();; 6 | let / alpha / = "a" in ();; 7 | 8 | (* Definition of regular expressions for further use *) 9 | RE space = [' ' '\t' '\n' '\r'] 10 | RE not_space = _ # space 11 | RE digit = ['0'-'9'] 12 | RE letter = ['A'-'Z' '_' 'a'-'z'] 13 | RE word = letter+ 14 | 15 | (* 16 | (* Inclusion of file in the same syntax, e.g. a library of user-defined 17 | regular expressions. (known problem with error location) *) 18 | USE "my_regexps.ml" (* defines `word' and `digit' *) 19 | *) 20 | 21 | (* Extended pattern-matching in the following constructs: 22 | match ... with ... 23 | try ... with ... 24 | function ... 25 | *) 26 | 27 | let test expected a b c = 28 | printf "[case %i] " expected; flush stdout; 29 | (match a, b, c with 30 | None, (None | Some (RE space* )), None -> printf "case 1\n" 31 | | Some ({ contents = [| RE (word as x); _; y |]}), 32 | (Some ("test" as z | RE word space (word as z))), 33 | None -> printf "case 2: %S %S %S\n" x y z 34 | | _, _, Some (RE space* (['0'-'9']+ as n)) -> printf "case 3: %s\n" n 35 | | _ -> printf "case 4\n"); 36 | flush stdout 37 | 38 | let _ = 39 | printf "Tests (match ... with):\n"; flush stdout; 40 | test 1 None (Some " ") None; 41 | 42 | test 2 43 | (Some (ref [| "alpha"; "beta"; "2 gamma" |])) 44 | (Some "Hello World!") 45 | None; 46 | 47 | test 3 None None (Some " 123 "); 48 | 49 | test 4 (Some (ref [| |])) (Some "") (Some "abc") 50 | 51 | let _ = 52 | match "" with 53 | (RE (("a" as a) | ("b" as a))) | a -> a 54 | 55 | let hello_who s = 56 | match s with 57 | RE _* ['h''H']"ello" ","? space* 58 | ((word | space)* word as someone) -> String.capitalize someone 59 | 60 | | _ -> "nobody" 61 | 62 | let _ = 63 | printf "Extraction of the recipient's name\n"; 64 | List.iter (fun s -> 65 | printf "Hello who: %S\n" s; 66 | printf " -> %S\n" (hello_who s); 67 | flush stdout) 68 | [ "Hello World!"; 69 | "First of all, hello everybody."; 70 | "*** hello world ***"; 71 | "Hello, Caml riders!" ] 72 | 73 | let _ = 74 | printf "Test (local and global bindings):\n"; flush stdout; 75 | match "" with 76 | (RE (word as x | space+ (word as x))* ) | _ -> 77 | printf "Passed.\n" 78 | 79 | let _ = 80 | printf "Test (repetition range + end of line):\n"; flush stdout; 81 | let f s = 82 | match s with 83 | RE '-'? digit{1-4} eol -> printf "%S has less than 5 digits.\n" s 84 | | RE '-'? digit{5-} eol -> printf "%S has at least 5 digits.\n" s 85 | | _ -> printf "%S is not a number.\n" s in 86 | List.iter f ["123"; "1234"; "12345"; "12345678"; "-1234"; "*1"; "1*"; 87 | "9\n*" ] 88 | 89 | let test f (expected, s) = 90 | let (success, result) = f s in 91 | let passed = expected = success in 92 | if passed then (printf "[OK] %s%s\n" s 93 | (match result with None -> "" 94 | | Some x -> sprintf " -> %s"x); 95 | flush stdout) 96 | else 97 | (print_endline (s ^ "Failed"); flush stdout; failwith s) 98 | 99 | let () = 100 | printf "Test (no case: the ~ operator):\n"; flush stdout; 101 | List.iter 102 | (test (function 103 | RE "hello"~ " World!" -> true, None 104 | | _ -> false, None)) 105 | [ true, "Hello World!"; 106 | true, "hElLO World!"; 107 | false, "hello WORLD!" ] 108 | 109 | let () = 110 | printf "Test (try ... with):\n"; flush stdout; 111 | try failwith "Hello World!" 112 | with 113 | Failure RE "Hello" space* (word as w) -> printf "OK: %s\n" w 114 | | Failure s -> printf "Failure: %s\n" s 115 | 116 | let () = 117 | printf "Test (function ... -> ...):\n"; flush stdout; 118 | let f = 119 | function 120 | RE "Hello" space* (word as w) -> printf "OK: %s\n" w 121 | | _ -> printf "Error\n" in 122 | f "Hello Everybody"; 123 | f "Hello Caml!" 124 | 125 | let () = 126 | printf "Test (backreferences):\n"; flush stdout; 127 | let f s = 128 | match s with 129 | RE 130 | (digit+ as x | (word as x)) (* x = global id *) 131 | (" " as sp !sp)* (* sp = local id *) 132 | !x -> true, Some x 133 | | _ -> false, None in 134 | List.iter (test f) 135 | [ true, "123123"; 136 | false, "123 123"; 137 | true, "123 123"; 138 | true, "aaaa"; 139 | false, "abc"; 140 | false, "ab1ab1" ] 141 | 142 | 143 | let print_named_groups l = 144 | List.iter 145 | (fun (name, positions) -> 146 | printf "%s:" name; 147 | List.iter (fun i -> printf " %i" i) positions; 148 | printf "\n") 149 | l 150 | 151 | (* Lower level feature: RE_STR returns the source of the regexp, 152 | to be used with specific compilation or search options. *) 153 | 154 | let _ = 155 | let (src, named_groups) = 156 | RE_STR 157 | (("a"|"b"+)? digit{2}) as start 158 | (space* word)+ ( digit{1} (word as last_word) 159 | | digit{3} (word as last_word)) in 160 | 161 | printf "Str regexp source: %S\n" src; 162 | printf "Named groups and their locations:\n"; 163 | print_named_groups named_groups; 164 | flush stdout 165 | 166 | let charset_union = RE_STR digit | space | "a" | ['A'-'Z'] 167 | 168 | 169 | let _ = 170 | printf "Debugging test 1:\n"; flush stdout; 171 | match ["Coucou Martin"] with 172 | [ (RE (word as x) space (word as y)) | ("zobi" as x as y) ] 173 | | ("abc" as x :: y :: _) -> printf "Trop cool x=%S y=%S\n" x y 174 | | _ -> printf "Bof bof...\n" 175 | 176 | let _ = 177 | printf "Debugging test 2:\n"; flush stdout; 178 | match ["Hello"; "!"], 123 with 179 | "***" :: _, _ -> printf "Hop!\n" 180 | 181 | | RE word as w :: RE _ as c :: _, (122|123) when w <> "Bye" -> 182 | printf "Cool: %S %S\n" w c 183 | 184 | | [RE ""], _ -> printf "Glouglou\n" 185 | | _ -> printf "Sorry\n" 186 | 187 | let _ = 188 | printf "Debugging test 3:\n"; flush stdout; 189 | (match "hello" with 190 | (RE ' '{10}) 191 | | RE _* ' '{10} eol -> () 192 | | _ -> ()); 193 | printf "Passed.\n" 194 | 195 | let _ = 196 | match Some "x" with 197 | Some ((RE "a") | ("b"|"c")) -> true 198 | | _ -> false 199 | 200 | let _ = 201 | match "axxxxyz", 333 with 202 | RE "a" as s ("bc" %pos | "x"+ %pos) (_* as s'), _ 203 | | (s as s', pos) -> 204 | printf "%s, %i, %s\n%!" s pos s' 205 | 206 | let _ = 207 | match "123" with 208 | RE digit+ as n := fun _ -> 1 -> n 209 | | _ -> 2 210 | 211 | 212 | 213 | (* Parametrized regexps *) 214 | let _ = 215 | let find s = 216 | match "abbcdefgghijkl" with 217 | RE _* @s @s -> assert true 218 | | _ -> assert false in 219 | find "b"; 220 | find "g" 221 | 222 | -------------------------------------------------------------------------------- /pcre/syntax_pcre.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | 3 | open Printf 4 | 5 | open Camlp4.PreCast 6 | open Syntax 7 | 8 | open Regexp_ast 9 | open Syntax_common 10 | open Select_lib 11 | open Match 12 | 13 | let expand_macro ?(sharing = false) ?(anchored = false) _loc re e f = 14 | warnings re; 15 | let (num, re_name) = Constants.new_regexp () in 16 | let var_name = var_of_regexp re_name in 17 | let (re_args, re_source, named_groups, postbindings) = 18 | (!lib).process_regexp ~sharing _loc re re_name in 19 | let get_re = Match.make_get_re _loc re_name re_args in 20 | add_compiled_regexp ~anchored postbindings 21 | _loc re_name num re_args re_source named_groups; 22 | 23 | !(lib).wrap_match 24 | (f _loc re_name get_re var_name named_groups 25 | ((!lib).wrap_user_case e)) 26 | 27 | let check_assertion ~lookahead positive re = 28 | let rec check ~branched = function 29 | Bind (_loc, e, name, conv) -> 30 | if not positive then 31 | Messages.not_visible _loc [name] "negative assertion"; 32 | check ~branched e 33 | | Bind_pos (_loc, name) -> 34 | if not positive then 35 | Messages.not_visible _loc [name] "negative assertion"; 36 | 0 37 | | Epsilon _ -> 0 38 | | Characters _ -> 1 39 | | Special (_loc, s, (name, Some len)) -> len 40 | | Special (_loc, s, (name, None)) -> 41 | if not lookahead then 42 | Messages.invalid_lookbehind _loc 43 | (sprintf "These patterns (%s)" name) "" 44 | else 0 45 | | Backref (_loc, _) -> 46 | if not lookahead then 47 | Messages.invalid_lookbehind _loc "Backreferences" "" 48 | else 0 49 | | Variable (_loc, _) | Nocase_variable (_loc, _) -> 50 | if not lookahead && branched then 51 | Messages.invalid_lookbehind _loc "Variables in optional branches" "" 52 | else 0 53 | | Sequence (_loc, e1, e2) -> check ~branched e1 + check ~branched e2 54 | | Alternative (_loc, e1, e2, _, _) -> 55 | let len1 = check ~branched:true e1 in 56 | let len2 = check ~branched:true e2 in 57 | if not lookahead && len1 <> len2 then 58 | Messages.invalid_lookbehind _loc 59 | "Alternatives of different length" "" 60 | else max len1 len2 61 | | Repetition (_loc, (kind, greediness), e) -> 62 | (match kind with 63 | Range (x, None) -> x * check ~branched e 64 | | _ -> 65 | if not lookahead then 66 | Messages.invalid_lookbehind _loc 67 | "Repetitions of variable length" "" 68 | else check ~branched e) 69 | | Lookahead (_loc, _, e) 70 | | Lookbehind (_loc, _, e) -> check ~branched e 71 | | Possessive (_, e) 72 | | Closed e -> check ~branched e in 73 | 74 | ignore (check ~branched:false re) 75 | 76 | 77 | 78 | 79 | let lookahead _loc bopt re = 80 | let positive = bopt = None in 81 | check_assertion ~lookahead:true positive re; 82 | Lookahead (_loc, positive, if positive then re else Closed re) 83 | 84 | let lookbehind _loc bopt re = 85 | let positive = bopt = None in 86 | check_assertion ~lookahead:false positive re; 87 | Lookbehind (_loc, positive, if positive then re else Closed re) 88 | 89 | let seq _loc e = <:expr< do { $e$ } >> 90 | 91 | let extend_common () = 92 | let expr_level = "top" in 93 | EXTEND Gram 94 | expr: LEVEL $expr_level$ [ 95 | [ "RE_PCRE"; re = regexp -> 96 | warnings re; 97 | let (re_args, re_source, named_groups, postbindings) = 98 | Pcre_lib.lib.process_regexp _loc ~sharing:false re "" in 99 | 100 | let re_fragments = Match.get_re_fragments _loc re_source in 101 | <:expr< ( $re_fragments$, 102 | $pp_named_groups _loc named_groups$ ) >> 103 | 104 | 105 | | "REPLACE"; re = regexp; "->"; e = sequence -> 106 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_replace 107 | 108 | | "SEARCH"; re = regexp; "->"; e = sequence -> 109 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_search 110 | 111 | | "MAP"; re = regexp; "->"; e = sequence -> 112 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_map 113 | 114 | | "COLLECT"; re = regexp; "->"; e = sequence -> 115 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_collect 116 | 117 | | "COLLECTOBJ"; re = regexp -> 118 | expand_macro _loc re <:expr< assert false >> Pcre_lib.macro_collectobj 119 | 120 | | "SPLIT"; re = regexp -> 121 | expand_macro _loc re <:expr< assert false >> Pcre_lib.macro_split 122 | 123 | | "REPLACE_FIRST"; re = regexp; "->"; e = sequence -> 124 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_replace_first 125 | 126 | | "SEARCH_FIRST"; re = regexp; "->"; e = sequence -> 127 | expand_macro ~sharing:true _loc re (seq _loc e) 128 | Pcre_lib.macro_search_first 129 | 130 | | "MATCH"; re = regexp; "->"; e = sequence -> 131 | expand_macro ~sharing:true ~anchored:true _loc re (seq _loc e) 132 | Pcre_lib.macro_match 133 | 134 | | "FILTER"; re = regexp -> 135 | expand_macro ~sharing:true ~anchored:true _loc re 136 | <:expr< assert false >> Pcre_lib.macro_filter 137 | 138 | | "CAPTURE"; re = regexp -> 139 | expand_macro ~sharing:true ~anchored:true _loc re 140 | <:expr< assert false >> Pcre_lib.macro_capture 141 | ] 142 | ]; 143 | 144 | regexp: LEVEL "postop" [ 145 | [ re = regexp; "*"; UIDENT "Lazy" -> 146 | Repetition (_loc, (Star, false), Closed re) 147 | | re = regexp; "+"; UIDENT "Lazy" -> 148 | Repetition (_loc, (Plus, false), Closed re) 149 | | re = regexp; "?"; UIDENT "Lazy" -> 150 | Repetition (_loc, (Option, false), Closed re) 151 | | r = regexp; "{"; (rng, rng_loc) = range; "}"; UIDENT "Lazy" -> 152 | Repetition (_loc, (Range rng, false), Closed r) 153 | | re = regexp; UIDENT "Possessive" -> 154 | Possessive (_loc, re) ] 155 | ]; 156 | 157 | regexp: LEVEL "simple" [ 158 | [ "_" -> Characters (_loc, Charset.full) 159 | | "<"; 160 | x = OPT [ b1 = OPT [ x = UIDENT "Not" -> x ]; 161 | re1 = regexp -> (b1, re1) ]; 162 | y = OPT [ "."; 163 | r2 = OPT [ b2 = OPT [ x = UIDENT "Not" -> x ]; 164 | re2 = regexp -> (b2, re2) ] -> r2 ]; 165 | ">" -> 166 | match x, y with 167 | None, None 168 | | None, Some None -> Epsilon _loc 169 | | None, Some (Some (b2, re2)) -> lookahead _loc b2 re2 170 | | Some (b1, re1), None -> lookahead _loc b1 re1 171 | | Some (b1, re1), Some None -> lookbehind _loc b1 re1 172 | | Some (b1, re1), Some (Some (b2, re2)) -> 173 | Sequence (_loc, lookbehind _loc b1 re1, lookahead _loc b2 re2) ] 174 | ]; 175 | 176 | END;; 177 | 178 | let extend_regular () = extend_common () 179 | (* 180 | let extend_revised () = extend_common () 181 | *) 182 | 183 | let _ = 184 | select_lib Pcre_lib.lib; 185 | 186 | Camlp4.Options.add "-thread" 187 | (Arg.Unit ( 188 | fun () -> 189 | select_lib Pcre_lib.lib_mt; 190 | eprintf "Warning: -thread is deprecated.\n/%!" 191 | ) 192 | ) 193 | " Deprecated option that protects access to shared data with a mutex. \ 194 | Currently only patterns containing @ are concerned."; 195 | 196 | (* How to test if the current syntax is the regular or revised one? *) 197 | extend_regular () 198 | -------------------------------------------------------------------------------- /common/syntax_common.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | 3 | 4 | open Printf 5 | 6 | open Camlp4.PreCast 7 | open Syntax 8 | 9 | open Mm_util 10 | open Regexp_ast 11 | open Select_lib 12 | open Match 13 | 14 | let regexp = Gram.Entry.mk "regexp";; 15 | let regexp_match_case = Gram.Entry.mk "regexp_match_case";; 16 | let range = Gram.Entry.mk "range";; 17 | 18 | let seq _loc e = 19 | match e with 20 | <:expr< $_$ ; $_$ >> -> <:expr< do { $e$ } >> 21 | | _ -> e 22 | 23 | 24 | let extend_common () = 25 | (try DELETE_RULE Gram patt: LIDENT END 26 | with _rule_not_found -> ()); 27 | 28 | (try 29 | DELETE_RULE Gram 30 | expr: "let"; opt_rec; binding; "in"; expr LEVEL ";" 31 | END 32 | with _rule_not_found -> 33 | Messages.cannot_delete_rule "(1)"); 34 | 35 | (try 36 | DELETE_RULE Gram 37 | str_item: "let"; opt_rec; binding; "in"; expr 38 | END 39 | with _rule_not_found -> 40 | Messages.cannot_delete_rule "(2)"); 41 | 42 | (try 43 | DELETE_RULE Gram 44 | str_item: "let"; opt_rec; binding 45 | END 46 | with _rule_not_found -> 47 | Messages.cannot_delete_rule "(3)"); 48 | 49 | 50 | EXTEND Gram 51 | GLOBAL: 52 | str_item patt expr 53 | regexp regexp_match_case range; 54 | 55 | str_item: [ 56 | [ "RE"; name = LIDENT; "="; re = regexp -> 57 | warnings re; 58 | Hashtbl.add named_regexps name re; 59 | <:str_item< >> ] 60 | ]; 61 | 62 | special_patt: [ 63 | [ "RE"; re = regexp -> `Regexp re 64 | | "/"; re = regexp; "/" -> `Regexp re 65 | | "%"; name = uid_path; 66 | arg = OPT patt LEVEL "simple" -> `View (name, arg) ] 67 | ]; 68 | 69 | uid_path: [ 70 | [ l = LIST1 [ x = UIDENT -> x ] SEP "." -> 71 | match List.rev l with 72 | basename :: modname -> (_loc, basename, List.rev modname) 73 | | _ -> assert false ] 74 | ]; 75 | 76 | patt: LEVEL "simple" [ 77 | [ x = special_patt -> handle_special_patt _loc x ] 78 | ]; 79 | 80 | expr: LEVEL "top" [ 81 | [ "let"; o = OPT "rec"; b = binding; "in"; e2 = sequence -> 82 | handle_let_bindings _loc (o <> None) (list_of_binding b) 83 | (seq _loc e2) ] 84 | ]; 85 | 86 | expr: LEVEL "top" [ 87 | [ "let"; LIDENT "view"; 88 | name = UIDENT; "="; e1 = expr; "in"; e2 = sequence -> 89 | <:expr< let $lid:"view_" ^ name$ = $e1$ in $seq _loc e2$ >> ] 90 | ]; 91 | 92 | expr: LEVEL "top" [ 93 | [ "let"; "try"; o = OPT "rec"; l = LIST1 let_binding SEP "and"; 94 | "in"; e2 = sequence; 95 | "with"; pwel = LIST1 lettry_case SEP "|" -> 96 | let_try_in _loc o (List.map pair_of_binding l) (seq _loc e2) pwel ] 97 | ]; 98 | 99 | str_item: LEVEL "top" [ 100 | [ 101 | "let"; o = OPT "rec"; b = binding -> 102 | handle_value_bindings _loc (o <> None) (list_of_binding b) 103 | 104 | | "let"; o = OPT "rec"; b = binding; "in"; e2 = sequence -> 105 | let e = 106 | handle_let_bindings _loc (o <> None) 107 | (list_of_binding b) 108 | (seq _loc e2) in 109 | <:str_item< $exp:e$ >> 110 | 111 | | "let"; LIDENT "view"; name = UIDENT; "="; e1 = expr -> 112 | <:str_item< value $lid:"view_" ^ name$ = $e1$ >> 113 | 114 | | "let"; "try"; o = OPT "rec"; b = binding; 115 | "in"; e2 = sequence; 116 | "with"; pwel = LIST1 lettry_case SEP "|" -> 117 | let e = 118 | let_try_in _loc o (list_of_binding b) (seq _loc e2) pwel in 119 | <:str_item< $exp:e$ >> 120 | ] 121 | ]; 122 | 123 | lettry_case: [ 124 | [ p = patt; 125 | w = OPT [ "when"; e = expr -> e ]; "->"; 126 | e = sequence -> 127 | match_case_of_tuple _loc (p, w, <:expr< fun () -> $seq _loc e$ >>) ] 128 | ]; 129 | 130 | regexp_match_case: [ 131 | [ x1 = patt; 132 | w = OPT [ "when"; e = expr -> e ]; "->"; 133 | x2 = sequence -> 134 | (_loc, x1, w, seq _loc x2) ] 135 | ]; 136 | 137 | regexp: [ 138 | [ r = regexp; "as"; i = LIDENT; 139 | conv = 140 | OPT [ ":"; s = LIDENT -> 141 | (match s with 142 | "int" -> `Int 143 | | "float" -> `Float 144 | | "option" -> `Option 145 | | s -> Messages.invalid_converter _loc s) 146 | | ":="; e = expr -> `Custom e 147 | | "="; e = expr -> `Value e ] -> 148 | Bind (_loc, r, i, conv) ] 149 | | [ r1 = regexp; "|"; r2 = regexp -> alternative _loc r1 r2 ] 150 | | [ r1 = regexp; r2 = regexp -> Sequence (_loc, r1, r2) ] 151 | 152 | | "postop" NONA 153 | [ r = regexp; "*" -> Repetition (_loc, (Star, true), Closed r) 154 | | r = regexp; "+" -> Repetition (_loc, (Plus, true), Closed r) 155 | | r = regexp; "?" -> Repetition (_loc, (Option, true), Closed r) 156 | | r = regexp; "~" -> nocase r 157 | | r = regexp; "{"; (rng, rng_loc) = range; "}" -> 158 | if !(lib).unfold_range then repeat rng_loc (Closed r) rng 159 | else Repetition (_loc, (Range rng, true), Closed r) ] 160 | 161 | | "binop" LEFTA 162 | [ r1 = regexp; "#"; r2 = regexp -> 163 | let msg = " term is not a set of characters" in 164 | let set1 = Regexp_ast.as_charset _loc ("left" ^ msg) r1 in 165 | let set2 = Regexp_ast.as_charset _loc ("right" ^ msg) r2 in 166 | Characters (_loc, Charset.diff set1 set2) ] 167 | 168 | | "preop" NONA 169 | [ "!"; ident = LIDENT -> Backref (_loc, ident) 170 | | "@"; e = expr LEVEL "." -> Variable (_loc, e) ] 171 | 172 | | "simple" NONA 173 | [ "["; set = charset; "]" -> Characters (_loc, set) 174 | | s = string -> s 175 | | name = LIDENT -> find_named_regexp _loc name 176 | | "%"; name = LIDENT -> Bind_pos (_loc, name) 177 | | "("; r = regexp; ")" -> r 178 | ] 179 | ]; 180 | 181 | string: [ 182 | [ s = STRING -> Regexp_ast.of_string _loc (eval_string s) ] 183 | | [ c = CHAR -> Characters (_loc, Charset.singleton (eval_char c)) ] 184 | ]; 185 | 186 | charset: [ 187 | [ "^"; x = charset -> Charset.complement x ] 188 | | [ c1 = CHAR; "-"; c2 = CHAR -> 189 | Charset.range (eval_char c1) (eval_char c2) 190 | 191 | | c = CHAR -> Charset.singleton (eval_char c) 192 | | s = STRING -> Charset.of_string (eval_string s) 193 | | name = LIDENT -> 194 | Regexp_ast.as_charset _loc "not a set of characters" 195 | (find_named_regexp _loc name) 196 | | set1 = charset; set2 = charset -> Charset.union set1 set2 197 | ] 198 | ]; 199 | 200 | range: [ 201 | [ mini = INT; 202 | maxi = 203 | OPT ["-"; maxi = 204 | OPT [ maxi = INT -> int_of_string maxi ] -> maxi] -> 205 | let mini = int_of_string mini in 206 | (mini, maxi), _loc 207 | | mini = INT; "+" -> (int_of_string mini, Some None), _loc ] 208 | ]; 209 | 210 | (* Reserved identifiers in patterns *) 211 | patt: LEVEL "simple" [ 212 | [ s = LIDENT -> 213 | if Match.is_reserved s then 214 | Messages.reserved_identifier _loc Constants.reserved_prefix s 215 | else <:patt< $lid:s$ >> ] 216 | ]; 217 | 218 | END;; 219 | 220 | let extend_regular () = 221 | extend_common (); 222 | (try 223 | DELETE_RULE Gram 224 | expr: "function"; match_case 225 | END 226 | with _rule_not_found -> 227 | Messages.cannot_delete_rule "(reg 1)"); 228 | 229 | (try 230 | DELETE_RULE Gram 231 | expr: "match"; sequence; "with"; match_case 232 | END 233 | with _rule_not_found -> 234 | Messages.cannot_delete_rule "(reg 2)"); 235 | 236 | EXTEND Gram 237 | expr: LEVEL "top" [ 238 | [ "match"; target = sequence; "with"; OPT "|"; 239 | cases = LIST1 regexp_match_case SEP "|" -> 240 | output_match _loc (seq _loc target) cases 241 | | "try"; e = expr; "with"; OPT "|"; 242 | cases = LIST1 regexp_match_case SEP "|" -> 243 | output_try _loc e cases 244 | | "function"; OPT "|"; cases = LIST1 regexp_match_case SEP "|" -> 245 | output_function _loc cases ] 246 | ]; 247 | END 248 | 249 | 250 | let () = 251 | init_named_regexps (); 252 | 253 | Camlp4.Options.add "-tailrec" 254 | (Arg.Set tailrec) 255 | " produce code that preserves tail-recursivity (default)"; 256 | 257 | Camlp4.Options.add "-direct" 258 | (Arg.Clear tailrec) 259 | " produce code that does not try to preserve tail-recursivity"; 260 | 261 | (* How to test if the current syntax is the regular or revised one? *) 262 | extend_regular () 263 | -------------------------------------------------------------------------------- /common/mikmatch.ml: -------------------------------------------------------------------------------- 1 | module Text = 2 | struct 3 | 4 | exception Internal_exit 5 | 6 | let iter_lines_of_channel f ic = 7 | try 8 | while true do 9 | let line = 10 | try input_line ic 11 | with End_of_file -> raise Internal_exit in 12 | f line 13 | done 14 | with Internal_exit -> () 15 | 16 | 17 | let iter_lines_of_file f file = 18 | let ic = open_in file in 19 | try 20 | iter_lines_of_channel f ic; 21 | close_in ic 22 | with exn -> 23 | close_in_noerr ic; 24 | raise exn 25 | 26 | 27 | let lines_of_channel ic = 28 | let l = ref [] in 29 | iter_lines_of_channel (fun line -> l := line :: !l) ic; 30 | List.rev !l 31 | 32 | let lines_of_file file = 33 | let l = ref [] in 34 | iter_lines_of_file (fun line -> l := line :: !l) file; 35 | List.rev !l 36 | 37 | let channel_contents ic = 38 | let len = 2048 in 39 | let buf = Bytes.create len in 40 | let rec loop size accu = 41 | match input ic buf 0 len with 42 | 0 -> (accu, size) 43 | | n when n = len -> loop (size + n) (Bytes.copy buf :: accu) 44 | | n -> loop (size + n) (Bytes.sub buf 0 n :: accu) in 45 | 46 | let accu, size = loop 0 [] in 47 | let result = Bytes.create size in 48 | let rec loop2 last_pos = function 49 | [] -> assert (last_pos = 0) 50 | | s :: rest -> 51 | let len = Bytes.length s in 52 | let pos = last_pos - len in 53 | Bytes.blit s 0 result pos len; 54 | loop2 pos rest in 55 | loop2 size accu; 56 | Bytes.unsafe_to_string result 57 | 58 | 59 | let file_contents ?(bin = false) file = 60 | let ic = open_in file in 61 | let s = 62 | try channel_contents ic 63 | with exn -> 64 | close_in_noerr ic; 65 | raise exn in 66 | close_in ic; 67 | s 68 | 69 | let save file data = 70 | let oc = open_out_bin file in 71 | (try 72 | output_string oc data; 73 | with exn -> 74 | close_out_noerr oc; 75 | raise exn); 76 | close_out oc 77 | 78 | let save_lines file lines = 79 | let oc = open_out_bin file in 80 | (try 81 | List.iter (fun s -> output_string oc s; output_char oc '\n') lines; 82 | with exn -> 83 | close_out_noerr oc; 84 | raise exn); 85 | close_out oc 86 | 87 | 88 | exception Skip 89 | 90 | let rev_map f l = 91 | let rec loop f accu = function 92 | [] -> accu 93 | | hd :: tl -> 94 | let accu' = 95 | try f hd :: accu 96 | with Skip -> accu in 97 | loop f accu' tl in 98 | loop f [] l 99 | 100 | let map f l = 101 | List.rev (rev_map f l) 102 | 103 | let rec fold_left f accu l = 104 | match l with 105 | [] -> accu 106 | | hd :: tl -> 107 | let accu' = 108 | try f accu hd 109 | with Skip -> accu in 110 | fold_left f accu' tl 111 | 112 | let rec rev_fold_right f l accu = 113 | match l with 114 | [] -> accu 115 | | hd :: tl -> 116 | let accu' = 117 | try f hd accu 118 | with Skip -> accu in 119 | rev_fold_right f tl accu' 120 | 121 | let rec fold_right f l accu = 122 | rev_fold_right f (List.rev l) accu 123 | 124 | let map_lines_of_channel f ic = 125 | let l = ref [] in 126 | iter_lines_of_channel (fun line -> 127 | try l := f line :: !l 128 | with Skip -> ()) ic; 129 | List.rev !l 130 | 131 | let map_lines_of_file f file = 132 | let l = ref [] in 133 | iter_lines_of_file (fun line -> 134 | try l := f line :: !l 135 | with Skip -> ()) file; 136 | List.rev !l 137 | 138 | end 139 | 140 | module Fixed = 141 | struct 142 | 143 | let chop_spaces str = 144 | let len = String.length str in 145 | let rec getfirst n = 146 | if n = len then len 147 | else 148 | if String.unsafe_get str n = ' ' 149 | then getfirst (n+1) 150 | else n 151 | and getlast n = 152 | if String.unsafe_get str n = ' ' 153 | then getlast (n-1) 154 | else n in 155 | let first = getfirst 0 in 156 | if first = len then "" 157 | else 158 | let last = getlast (len - 1) in 159 | String.sub str first (last-first+1) 160 | 161 | let int s = int_of_string (chop_spaces s) 162 | let float s = float_of_string (chop_spaces s) 163 | 164 | end 165 | 166 | module Directory = 167 | struct 168 | 169 | let list ?(absolute = false) ?path dir = 170 | let names = Sys.readdir dir in 171 | Array.sort String.compare names; 172 | 173 | let make_path, path_maker = 174 | match absolute, path with 175 | false, None 176 | | false, Some false -> false, (fun s -> s) 177 | | false, Some true -> true, Filename.concat dir 178 | | true, Some true 179 | | true, None -> 180 | let f = 181 | if Filename.is_relative dir then 182 | let cwd = Sys.getcwd () in 183 | Filename.concat (Filename.concat cwd dir) 184 | else Filename.concat dir in 185 | true, f 186 | | true, Some false -> invalid_arg "Directory.list" in 187 | 188 | let paths = 189 | if make_path then Array.map path_maker names 190 | else names in 191 | Array.to_list paths 192 | 193 | let is_dir ?(nofollow = false) x = 194 | let stat = if nofollow then Unix.lstat else Unix.stat in 195 | try (stat x).Unix.st_kind = Unix.S_DIR 196 | with Unix.Unix_error (Unix.ENOENT, _, _) -> 197 | false (* may be a bad symbolic link if nofollow is false *) 198 | 199 | end 200 | 201 | module Glob = 202 | struct 203 | (* Filename globbing utility *) 204 | 205 | (* 206 | Examples of use with mikmatch: 207 | 208 | let ml_files = list [FILTER _* ".ml" "i"? eos] 209 | let trash_files = 210 | list [ FILTER ""; 211 | FILTER _* (".cm" ("i"|"o"|"x"|"a"|"xa") | ".o" | ".a") eos ] 212 | *) 213 | 214 | let filter_array f a = 215 | Array.fold_right (fun x l -> if f x then x :: l else l) a [] 216 | 217 | let rec scan_gen ~cons ~real_dir ~dir ?nofollow action path_filter = 218 | let real_real_dir = 219 | if real_dir = "" then Filename.current_dir_name else real_dir in 220 | match path_filter with 221 | [] -> () 222 | | [f] -> 223 | List.iter (fun name -> action (cons dir name)) 224 | (filter_array f (Sys.readdir real_real_dir)) 225 | | f :: subpath_filter -> 226 | let filtered_files = 227 | filter_array f (Sys.readdir real_real_dir) in 228 | List.iter 229 | (fun name -> 230 | let subdir = cons dir name in 231 | let real_subdir = Filename.concat real_dir name in 232 | if Directory.is_dir ?nofollow real_subdir then 233 | scan_gen 234 | ~cons 235 | ~real_dir:real_subdir 236 | ~dir:subdir ?nofollow action subpath_filter) 237 | filtered_files 238 | 239 | let get_dir ~getcwd ~concat ~is_relative ~fun_name ~absolute ~path 240 | ~relative_root root = 241 | match absolute, path with 242 | false, None 243 | | false, Some false -> relative_root 244 | | false, Some true -> root 245 | | true, Some true 246 | | true, None -> 247 | if is_relative root then 248 | let cwd = getcwd () in 249 | concat cwd root 250 | else root 251 | | true, Some false -> invalid_arg fun_name 252 | 253 | let scan ?(absolute = false) ?path ?(root = "") ?nofollow 254 | action path_filter = 255 | let getcwd = Sys.getcwd in 256 | let cons = Filename.concat in 257 | let concat = Filename.concat in 258 | let is_relative = Filename.is_relative in 259 | let fun_name = "Glob.scan" in 260 | let relative_root = "" in 261 | let dir = 262 | get_dir ~getcwd ~concat ~is_relative ~fun_name 263 | ~absolute ~path ~relative_root root in 264 | scan_gen ~cons ~real_dir:root ~dir ?nofollow action path_filter 265 | 266 | 267 | let lscan ?(rev = false) ?(absolute = false) ?path ?(root = []) ?nofollow 268 | action path_filter = 269 | let getcwd () = [Sys.getcwd ()] in 270 | let cons = (fun l s -> s :: l) in 271 | let concat = (fun root rel_path -> rel_path @ root) in 272 | let rec is_relative = function 273 | [] -> true 274 | | [x] -> Filename.is_relative x 275 | | x :: l -> is_relative l in 276 | let fun_name = "Glob.lscan" in 277 | let relative_root = [] in 278 | let rev_root = if rev then root else List.rev root in 279 | let rev_dir = 280 | get_dir ~getcwd ~concat ~is_relative ~fun_name 281 | ~absolute ~path ~relative_root rev_root in 282 | let real_dir = List.fold_left Filename.concat "" (List.rev rev_dir) in 283 | let new_action = 284 | if rev then action 285 | else (fun l -> action (List.rev l)) in 286 | scan_gen ~cons ~real_dir ~dir:rev_dir ?nofollow new_action path_filter 287 | 288 | 289 | let list_gen scan ?absolute ?path ?root ?nofollow ?(sort = true) path_filter = 290 | let l = ref [] in 291 | scan ?absolute ?path ?root ?nofollow (fun x -> l := x :: !l) path_filter; 292 | if sort then List.sort compare !l 293 | else !l 294 | 295 | let list = list_gen scan 296 | let llist ?rev = list_gen (lscan ?rev) 297 | end 298 | -------------------------------------------------------------------------------- /common/regexp_ast.ml: -------------------------------------------------------------------------------- 1 | (* $Id$ *) 2 | (* Abstract syntax tree for regular expressions *) 3 | 4 | open Camlp4.PreCast 5 | 6 | type converter = [ `Int 7 | | `Float 8 | | `Option 9 | | `Custom of Ast.expr 10 | | `Value of Ast.expr ] 11 | 12 | module S = Set.Make (String) 13 | 14 | let list_named_groups set = List.sort String.compare (S.elements set) 15 | 16 | module Named_groups = 17 | struct 18 | module M = Map.Make (String) 19 | include M 20 | let list m = 21 | List.sort 22 | (fun (a, _) (b, _) -> String.compare a b) 23 | (fold 24 | (fun key data accu -> 25 | let positions = 26 | List.sort 27 | (fun (loc, i, conv1) (loc, j, conv2) -> 28 | Stdlib.compare i j) 29 | data in 30 | (key, positions) :: accu) 31 | m []) 32 | 33 | let list_keys m = 34 | List.sort String.compare (fold (fun key data accu -> key :: accu) m []) 35 | 36 | let keys m = fold (fun key data accu -> S.add key accu) m S.empty 37 | let equal m1 m2 = S.equal (keys m1) (keys m2) 38 | let inter m1 m2 = S.inter (keys m1) (keys m2) 39 | let union m1 m2 = S.union (keys m1) (keys m2) 40 | let diff m1 m2 = S.diff (keys m1) (keys m2) 41 | end 42 | 43 | type named_groups = (Ast.loc * int * converter option) list Named_groups.t 44 | 45 | 46 | let add_new loc name conv group_num set = 47 | if Named_groups.mem name set then 48 | Messages.multiple_binding loc [name]; 49 | Named_groups.add name [loc, group_num, conv] set 50 | 51 | let add_new_group loc name conv group_num (groups, positions) = 52 | (add_new loc name conv group_num groups, positions) 53 | 54 | let add_new_pos loc name group_num (groups, positions) = 55 | (groups, add_new loc name None group_num positions) 56 | 57 | let merge_lists l1 l2 = 58 | let tbl = Hashtbl.create (List.length l1 + List.length l2) in 59 | let add l = 60 | List.iter (fun ((_, n, conv) as x) -> Hashtbl.replace tbl n x) l in 61 | add l1; 62 | add l2; 63 | let l = Hashtbl.fold (fun _ x l -> x :: l) tbl [] in 64 | let cmp (_, x, _) (_, y, _) = compare x y in 65 | List.sort cmp l 66 | 67 | let really_add name l2 set = 68 | try 69 | let l1 = Named_groups.find name set in 70 | Named_groups.add 71 | name (merge_lists l1 l2) 72 | (Named_groups.remove name set) 73 | with Not_found -> 74 | Named_groups.add name l2 set 75 | 76 | let merge set1 set2 = 77 | Named_groups.fold really_add set1 set2 78 | 79 | type repetition_kind = 80 | Star 81 | | Option 82 | | Plus 83 | | Range of (int * int option option) 84 | 85 | type greediness = bool 86 | 87 | type ast = 88 | Epsilon of Ast.loc 89 | | Characters of Ast.loc * Charset.t 90 | | Sequence of Ast.loc * ast * ast 91 | | Alternative of Ast.loc 92 | * ast (* choice 1 *) 93 | * ast (* choice 2 *) 94 | * S.t (* group names *) 95 | * S.t (* position names *) 96 | | Repetition of Ast.loc * (repetition_kind * greediness) * ast 97 | | Possessive of Ast.loc * ast 98 | | Bind of Ast.loc * ast * string * converter option 99 | | Bind_pos of Ast.loc * string 100 | | Backref of Ast.loc * string 101 | | Variable of Ast.loc * Ast.expr 102 | | Nocase_variable of Ast.loc * Ast.expr 103 | | Special of Ast.loc * string * (string * int option) 104 | | Lookahead of Ast.loc * bool * ast 105 | | Lookbehind of Ast.loc * bool * ast 106 | | Closed of ast 107 | 108 | let rec loc_of_regexp = function 109 | Epsilon loc 110 | | Characters (loc, _) 111 | | Sequence (loc, _, _) 112 | | Alternative (loc, _, _, _, _) 113 | | Repetition (loc, _, _) 114 | | Possessive (loc, _) 115 | | Bind (loc, _, _, _) 116 | | Bind_pos (loc, _) 117 | | Backref (loc, _) 118 | | Variable (loc, _) 119 | | Nocase_variable (loc, _) 120 | | Special (loc, _, _) 121 | | Lookahead (loc, _, _) 122 | | Lookbehind (loc, _, _) -> loc 123 | | Closed ast -> loc_of_regexp ast 124 | 125 | let rec bindings : ast -> S.t = function 126 | Bind (loc, e, s, conv) -> S.add s (bindings e) 127 | | Bind_pos _ 128 | | Epsilon _ 129 | | Characters _ 130 | | Backref _ 131 | | Variable _ 132 | | Nocase_variable _ 133 | | Special _ -> S.empty 134 | | Sequence (loc, e1, e2) -> S.union (bindings e1) (bindings e2) 135 | | Alternative (loc, e1, e2, set, pos_set) -> set 136 | | Repetition (loc, kind, e) -> bindings e 137 | | Possessive (loc, e) 138 | | Lookahead (loc, _, e) 139 | | Lookbehind (loc, _, e) -> bindings e 140 | | Closed e -> S.empty 141 | 142 | let rec pos_bindings : ast -> S.t = function 143 | Bind_pos (loc, s) -> S.singleton s 144 | | Bind _ 145 | | Epsilon _ 146 | | Characters _ 147 | | Backref _ 148 | | Variable _ 149 | | Nocase_variable _ 150 | | Special _ -> S.empty 151 | | Sequence (loc, e1, e2) -> S.union (pos_bindings e1) (pos_bindings e2) 152 | | Alternative (loc, e1, e2, set, pos_set) -> pos_set 153 | | Repetition (loc, kind, e) -> pos_bindings e 154 | | Possessive (loc, e) 155 | | Lookahead (loc, _, e) 156 | | Lookbehind (loc, _, e) -> pos_bindings e 157 | | Closed _ -> S.empty 158 | 159 | 160 | 161 | let alternative loc e1 e2 = 162 | match e1, e2 with 163 | Characters (loc1, s1), Characters (loc2, s2) -> 164 | Characters (loc, Charset.union s1 s2) 165 | | _ -> 166 | let b1 = bindings e1 167 | and b2 = bindings e2 in 168 | let pb1 = pos_bindings e1 169 | and pb2 = pos_bindings e2 in 170 | Alternative (loc, e1, e2, S.union b1 b2, S.union pb1 pb2) 171 | 172 | let rec repeat loc e (mini, maxoptopt) = 173 | if mini < 0 then 174 | Messages.invalid_range loc 175 | else 176 | match maxoptopt with 177 | None -> 178 | (match mini with 179 | 0 -> Epsilon loc 180 | | n -> 181 | let rec loop i = 182 | if i > 1 then 183 | Sequence (loc, e, loop (i-1)) 184 | else e in 185 | loop n) 186 | | Some (Some maxi) -> 187 | let diff = maxi - mini in 188 | if diff < 0 then Messages.invalid_range loc 189 | else if diff = 0 then e 190 | else 191 | let rec loop i = 192 | alternative loc (Epsilon loc) 193 | (if i > 1 then 194 | (Sequence (loc, e, loop (i-1))) 195 | else e) in 196 | Sequence (loc, (repeat loc e (mini, None)), loop diff) 197 | | Some None -> 198 | Sequence (loc, repeat loc e (mini, None), 199 | Repetition (loc, (Star, true), e)) 200 | 201 | 202 | let rec nocase = function 203 | Bind (loc, e, s, conv) -> Bind (loc, nocase e, s, conv) 204 | | Bind_pos _ 205 | | Epsilon _ 206 | | Backref _ 207 | | Nocase_variable _ 208 | | Special _ as e -> e 209 | | Characters (loc, charset) -> Characters (loc, Charset.nocase charset) 210 | | Sequence (loc, e1, e2) -> Sequence (loc, nocase e1, nocase e2) 211 | | Alternative (loc, e1, e2, ids, pos_ids) -> 212 | Alternative (loc, nocase e1, nocase e2, ids, pos_ids) 213 | | Repetition (loc, kind, e) -> Repetition (loc, kind, nocase e) 214 | | Possessive (loc, e) -> Possessive (loc, nocase e) 215 | | Lookahead (loc, b, e) -> Lookahead (loc, b, nocase e) 216 | | Lookbehind (loc, b, e) -> Lookbehind (loc, b, nocase e) 217 | | Variable (loc, e) -> Nocase_variable (loc, e) 218 | | Closed ast -> Closed (nocase ast) 219 | 220 | 221 | (* Miscellaneous functions *) 222 | 223 | let explode s = 224 | let l = ref [] in 225 | for i = String.length s - 1 downto 0 do 226 | l := s.[i] :: !l 227 | done; 228 | !l 229 | 230 | let of_string loc s = 231 | let l = explode s in 232 | match l with 233 | [c] -> Characters (loc, Charset.singleton c) 234 | | _ -> 235 | List.fold_right 236 | (fun c re -> 237 | Sequence (loc, (Characters (loc, Charset.singleton c)), re)) 238 | l (Epsilon loc) 239 | 240 | let as_charset _loc msg = function 241 | Characters (_loc, set) -> set 242 | | _ -> Messages.failure _loc msg 243 | 244 | let rec warn_bindings w = function 245 | Bind (loc, e, s, conv) -> 246 | if w then Messages.not_visible loc [s] "context"; 247 | warn_bindings w e 248 | | Bind_pos (loc, s) -> if w then Messages.not_visible loc [s] "context" 249 | | Epsilon _ 250 | | Characters _ 251 | | Backref _ 252 | | Variable _ 253 | | Nocase_variable _ 254 | | Special _ -> () 255 | | Sequence (loc, e1, e2) -> warn_bindings w e1; warn_bindings w e2 256 | | Alternative (loc, e1, e2, set, pos_set) -> 257 | if w then 258 | (match list_named_groups (S.union set pos_set) with 259 | [] -> () 260 | | ignored -> Messages.not_visible loc ignored "context") 261 | | Repetition (loc, kind, e) -> warn_bindings w e 262 | | Possessive (loc, e) 263 | | Lookahead (loc, _, e) 264 | | Lookbehind (loc, _, e) -> warn_bindings w e 265 | | Closed e -> warn_bindings true e 266 | 267 | let warnings re = 268 | warn_bindings false re 269 | 270 | -------------------------------------------------------------------------------- /common/mikmatch.mli: -------------------------------------------------------------------------------- 1 | (** A small text-oriented library *) 2 | 3 | (** The [Mikmatch] module provides a submodule named [Text]. 4 | A normal usage is to place [open Mikmatch] at the beginning of 5 | user code that uses it. 6 | 7 | This module is part of the runtime environment of Mikmatch 8 | (the library run_mikmatch_pcre.cma or equivalent). 9 | *) 10 | 11 | module Text : 12 | sig 13 | (** This module provides some general functions which are especially 14 | useful for manipulating text and text files. 15 | *) 16 | 17 | val iter_lines_of_channel : (string -> unit) -> in_channel -> unit 18 | (** [iter_lines_of_channel f ic] reads input channel [ic] 19 | and applies successively the given function [f] to 20 | each line until the end of file is reached. *) 21 | 22 | val iter_lines_of_file : (string -> unit) -> string -> unit 23 | (** [iter_lines_of_file f file] reads file [file] 24 | and applies successively the given function [f] to 25 | each line until the end of file is reached. *) 26 | 27 | val lines_of_channel : in_channel -> string list 28 | (** [lines_of_channel ic] returns the list of the lines that can be 29 | read from input channel [ic]. *) 30 | 31 | val lines_of_file : string -> string list 32 | (** [lines_of_file file] returns the list of the lines that can be 33 | read from file [file]. *) 34 | 35 | val channel_contents : in_channel -> string 36 | (** [channel_contents ic] returns the string containing the bytes 37 | that can be read from the given input channel [ic]. *) 38 | 39 | val file_contents : ?bin:bool -> string -> string 40 | (** [file_contents file] returns the string containing the bytes 41 | that can be read from the given file. 42 | Option [bin] specifies if [Pervasives.open_in_bin] should be 43 | used instead of [Pervasives.open_in] to open the file. Default is 44 | [false]. *) 45 | 46 | val save : string -> string -> unit 47 | (** [save file data] stores the string [data] in [file]. 48 | If the file already exists, its contents is discarded silently. *) 49 | 50 | val save_lines : string -> string list -> unit 51 | (** [save_lines file l] saves the given list [l] of strings in [file] 52 | and adds a newline characters (['\n']) after each of them. 53 | If the file already exists, its contents is discarded silently. *) 54 | 55 | 56 | exception Skip 57 | (** This exception can be used to skip an element of a list being 58 | processed with [rev_map], [map], [fold_left], and [fold_right]. *) 59 | 60 | val map : ('a -> 'b) -> 'a list -> 'b list 61 | (** Like [List.map] but it is guaranteed that 62 | the elements of the input list are processed from left to right. 63 | Moreover the [Skip] exception can be used to skip an element 64 | of the list. 65 | This function runs in constant stack space. *) 66 | 67 | val rev_map : ('a -> 'b) -> 'a list -> 'b list 68 | (** Like [List.rev_map], but it is guaranteed that 69 | the elements of the input list are processed from left to right. 70 | Moreover the [Skip] exception can be used to skip an element 71 | of the list. 72 | This function runs in constant stack space and is slightly faster 73 | then [map]. *) 74 | 75 | val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b list -> 'a 76 | (** Like [List.fold_left] 77 | but the [Skip] exception can be used to skip an element 78 | of the list. 79 | This function runs in constant stack space. *) 80 | 81 | val fold_right : ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b 82 | (** Like [List.fold_right] 83 | but the [Skip] exception can be used to skip an element 84 | of the list. 85 | This function runs in constant stack space. *) 86 | 87 | val map_lines_of_channel : (string -> 'a) -> in_channel -> 'a list 88 | (** [map_lines_of_channel f ic] is equivalent to 89 | [map f (lines_of_channel ic)] but faster. *) 90 | 91 | val map_lines_of_file : (string -> 'a) -> string -> 'a list 92 | (** [map_lines_of_file f file] is equivalent to 93 | [map f (lines_of_file file)] but faster. *) 94 | 95 | end 96 | 97 | 98 | module Fixed : 99 | sig 100 | (** This module provides some functions which are useful 101 | for manipulating files with fields of fixed width. 102 | *) 103 | 104 | val chop_spaces : string -> string 105 | (** [chop_spaces s] returns a string where the leading and trailing 106 | spaces are removed. *) 107 | 108 | val int : string -> int 109 | (** [int s] reads an int from a string where leading and 110 | trailing spaces are allowed. 111 | Equivalent to [Pervasives.int_of_string (chop_spaces s)]. *) 112 | 113 | val float : string -> float 114 | (** [float s] reads an float from a string where leading and 115 | trailing spaces are allowed. 116 | Equivalent to [Pervasives.float_of_string (chop_spaces s)]. *) 117 | end 118 | 119 | module Directory : 120 | sig 121 | (** Basic operations on directories *) 122 | 123 | val list : ?absolute:bool -> ?path:bool -> string -> string list 124 | (** [list dir] returns the alphabetically sorted list 125 | of the names of the files contained in directory [dir]. 126 | The special names that refer to the parent directory (e.g. [..]) 127 | and the directory itself (e.g. [.]) are ignored. 128 | 129 | If the option [absolute] is set to [true], the result is a list 130 | of absolute file paths, i.e. that do not depend on the current directory 131 | which is associated to the process 132 | (default is false; implies [path = true]). 133 | 134 | If the option [path] is set to [true], the result is a list of paths 135 | instead of just the file names 136 | (default is [false] except if [absolute] is explicitely set to [true]). 137 | 138 | Exception [Invalid_argument "Directory.list"] is raised 139 | if there is an incompatibility between the options. 140 | Unspecified exceptions will be raised if the given directory does not 141 | exist or is not readable. 142 | *) 143 | 144 | val is_dir : ?nofollow:bool -> string -> bool 145 | (** [is_dir dir] returns true if [dir] is a directory, false otherwise. 146 | The [nofollow] option is false by default, but if true, 147 | a symbolic link will not be followed. In that case false is returned 148 | even if the link points to a valid directory. *) 149 | end 150 | 151 | module Glob : 152 | sig 153 | (** A generic file path matching utility *) 154 | 155 | val scan : 156 | ?absolute:bool -> 157 | ?path:bool -> 158 | ?root:string -> 159 | ?nofollow:bool -> 160 | (string -> unit) -> (string -> bool) list -> unit 161 | (** [scan action path_filter] returns all the file paths having a name 162 | that matches [path_filter]. [path_filter] is a list of filters that 163 | test whether a directory name or a file name should be selected. 164 | 165 | The path search starts from the current directory by default, or 166 | from the directory specified by the [root] option. The file names 167 | are examined in an undefined order. When a file path matches, 168 | [action] is applied to the string representing the path. 169 | Options [absolute] and [path] have the same meaning and the same 170 | default values as in {!Mikmatch.Directory.list}. 171 | 172 | [nofollow] can be used to prevent from considering symbolic links 173 | as directories. It is false by default. 174 | See also {!Mikmatch.Directory.is_dir}. 175 | *) 176 | 177 | val lscan : 178 | ?rev:bool -> 179 | ?absolute:bool -> 180 | ?path:bool -> 181 | ?root:string list -> 182 | ?nofollow:bool -> 183 | (string list -> unit) -> (string -> bool) list -> unit 184 | (** Same as {!Mikmatch.Glob.scan} but file paths are kept as a list 185 | of strings that form a valid path when concatenated using 186 | [Filename.concat]. Option [rev] can be set if the lists representing 187 | paths are in reversed order, i.e. the root comes last. 188 | 189 | In [lscan action path_filter], options [rev], [absolute], and [path] 190 | take their default values which are all false. 191 | In this situation, it is guaranteed that the paths that are passed 192 | to [action] have the same length as [path_filter]. 193 | *) 194 | 195 | val list : 196 | ?absolute:bool -> 197 | ?path:bool -> 198 | ?root:string -> 199 | ?nofollow:bool -> 200 | ?sort:bool -> 201 | (string -> bool) list -> string list 202 | (** [list path_filter] works like {!Mikmatch.Glob.scan} but returns a list 203 | of all file paths that match [path_filter]. 204 | 205 | An example in Mikmatch syntax is [list [FILTER _* ".ml" eos]]. 206 | It returns the list of ".ml" files in the current directory. 207 | It could have been written as 208 | [list [ fun s -> Filename.check_suffix s ".ml"]] and is equivalent 209 | to [*.ml] in shell syntax. 210 | *) 211 | 212 | val llist : 213 | ?rev:bool -> 214 | ?absolute:bool -> 215 | ?path:bool -> 216 | ?root:string list -> 217 | ?nofollow:bool -> 218 | ?sort:bool -> (string -> bool) list -> string list list 219 | (** [llist path_filter] works like {!Mikmatch.Glob.lscan} 220 | but returns a list 221 | of all file paths that match [path_filter]. *) 222 | end 223 | -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- 1 | Changes in the successive versions of Mikmatch 2 | ============================================== 3 | 4 | Mikmatch is the port of micmatch 0.700 to the "new" Camlp4 (3.10). 5 | Mikmatch is not compatible with Camlp4 <= 3.09 or Camlp5. 6 | 7 | Please send bug reports, comments or feature requests to 8 | Martin Jambon or to the public forum at 9 | http://groups.google.com/group/micmatch 10 | 11 | !!! = some incompatibilities 12 | opt = optimizations 13 | +ui = additions in the user interface 14 | -ui = restrictions in the user interface 15 | bug = bug or security fix 16 | doc = major changes in the documentation 17 | pkg = changes in the structure of the package or in the installation procedure 18 | 19 | 2012-11-04 20 | 1.0.6: [bug] Fix for camlp4 4.00.0 21 | 22 | 2012-02-03 23 | 1.0.5: [pkg] Subpackage "mikmatch_pcre.top" for toplevel usage 24 | 25 | 2011-10-21 26 | 1.0.4: [bug] Fixed bug consisting in Not_found exception being raised 27 | in an alternative capturing an empty string. 28 | (losing compatibility with pcre-ocaml versions from 2004 29 | or earlier) 30 | 31 | 1.0.3: [!!!] Builds with camlp4 3.12.0 but no longer with 3.11. 32 | Thanks to Jake Donham for the patch. 33 | 34 | 1.0.2: [bug] Fixed important name scoping bug 35 | (definitions were inserted too early). 36 | [bug] Fixed bug that was causing directives to be ignored. 37 | 38 | 1.0.1: [!!!] Support for Camlp4 3.11.0, including toplevel support which 39 | wasn't possible with Camlp4 3.10. 40 | Dropped support for Camlp4 3.10 (please use mikmatch 1.0.0 41 | instead). 42 | Will restore compatibility with Camlp4 3.10 only in case 43 | of significant new mikmatch features. 44 | [pkg] Added dependency towards tophide. It is not a strict requirement 45 | but improves the toplevel experience. 46 | 47 | 1.0.0: [*] First release of mikmatch, translation of micmatch 0.700 for 48 | the "new camlp4" 3.10. 49 | [pkg] License: all source code is now distributed under the BSD license. 50 | [pkg] All occurrences of "micmatch" have been replaced by "mikmatch". 51 | [-ui] Temporarily no toplevel support for the syntax features, waiting 52 | for the availability of camlp4 filters for toplevel phrases. 53 | [pkg] Custom toplevels are no longer built and installed. 54 | [+ui] No more polymorphic value restriction. Functions that 55 | use mikmatch patterns can now be polymorphic. 56 | Many identifiers with the "__mikmatch" prefix are now visible 57 | as module structure items. 58 | [-ui] Dropped support for Camlp4's revised syntax. 59 | [-ui] No more -thread option. Only matching patterns that contain @ 60 | use a table that is shared among threads. User code is responsible 61 | for using locks if needed. 62 | 63 | 64 | ------------------------- Micmatch (for camlp4 <= 3.09) ------------------ 65 | 66 | This is the history for micmatch before the port to Camlp4 3.10, renamed 67 | mikmatch. 68 | 69 | 0.700: [bug] It was not possible to use "Not" outside of micmatch_pcre 70 | regexps. This is now fixed. 71 | 72 | 0.699: [+ui] added CAPTURE and COLLECTOBJ 73 | 74 | 0.698: [bug] fixed dynamic linking problems on MacOS/NetBSD by removing 75 | version_filter. As a consequence, only recent versions of camlp4 76 | are now supported (starting from 3.08.4, maybe earlier). 77 | [bug] fixed missing dependency on "common" for "pcre" and "str" 78 | targets in main Makefile 79 | 80 | 0.697: [bug] installation of executables now correctly follows $BINDIR 81 | or $PREFIX/bin 82 | [+ui] new FILTER macro which returns true or false 83 | [+ui] changed grammar entry level of macros 84 | (now "expr1" instead of "top"). Allows for less parentheses. 85 | [pkg] added dependency to the Unix library 86 | [+ui] added filename globbing in the Micmatch library 87 | [+ui] added experimental support for views 88 | 89 | 0.696: [pkg] removed micmatch_pcre.godiva which is a big source of 90 | trouble 91 | 92 | 0.695: [pkg] minor changes for GODI 93 | [bug] added .PHONY targets in Makefile 94 | 95 | 0.694: [+ui] int and float predefined regexps are not experimental anymore 96 | [bug] fixed null-character related bugs 97 | (Pcre.quote does not escape them) 98 | [bug] fixed bug with ocaml 3.08.1 (and probably earlier): Not_found 99 | was raised during initialization of the preprocessing library 100 | (deletion of grammar rules which didn't exist) 101 | 102 | 0.693: [+ui] added support for regexps with arguments (gaps of the 103 | form @some_expr) which are evaluated at runtime. 104 | It uses a cache which stores the most recently used 105 | compiled regexps for different sets of arguments. 106 | [opt] optionally shared data structures are now created only where 107 | necessary (just reduces the code size a little) 108 | 109 | 0.692: [bug, +ui] added "nan" and "inf" (caseless, optional sign) 110 | to the set of strings recognized by the "float" pattern. 111 | 112 | 0.691: [+ui] experimental addition of predefined regexps "int" and "float" 113 | 114 | 0.690: [+ui] added support for global shortcut bindings: 115 | let / ... / = ... ;; 116 | 117 | 0.689: [+ui] 118 | - added support for shortcuts: 119 | let RE ... = ... in ... 120 | let / ... / = ... in ... 121 | - official support for / ... / as an equivalent of RE ... (patterns) 122 | - general support for: 123 | let try ... = ... in ... with ... 124 | [bug] code with misplaced RE patterns cannot compile anymore 125 | 126 | 0.688: [+ui] added support for automatic type conversions: 127 | (... as x : int) 128 | (... as x := int_of_string) 129 | (... as x = Zero) 130 | [bug] fixed bug which caused some alternative patterns to be 131 | ignored. Simplest example that caused the bug: 132 | match "c" with (RE "a") | (RE "b") | (RE "c") -> ();; 133 | 134 | 0.687: [doc] added warning against inaccessible named subgroups or positional 135 | markers such as in ("abc" as local !local)* 136 | [+ui] added option -do for command-line programs (like sh -c or perl -e) 137 | 138 | 0.686: [bug] fixed bugs related to patterns matching empty substrings 139 | (avoiding infinite loops in such cases; 140 | for instance (SPLIT "") and (SPLIT "") ~full:true both work). 141 | 142 | 0.685: [bug] Fixed the following bugs: 143 | - was counting special "any" as 0 characters instead of 1 144 | (lookbehind assertions) 145 | - "Not_found" problems with recent versions of Pcre due 146 | to the 2004-04-29 change in Pcre.get_substring 147 | - silent removal of bindings which don't make sense in assertions 148 | (BTW I don't know how to print a precise warning instead) 149 | 150 | 0.684: [+ui] added support for lookaround assertions (PCRE only): 151 | < lookbehind . lookahead > or < lookahead > 152 | 153 | 0.683: [+ui] added support for positional markers in regexps (e.g. %pos) 154 | 155 | 0.682: [bug] added compatibility with OCaml 3.09+dev14 156 | 157 | 0.681: [bug] fixed bug in REPLACE_FIRST 158 | 159 | 0.680: [bug] replaced the regexp-pp package with the newest version 160 | for compatibility with OCaml 3.09+dev6 161 | 162 | 0.679: [+ui] added ~share option to MATCH and SEARCH_FIRST 163 | [doc] updated and commented example/shootout.ml 164 | 165 | 0.678: [+ui] added experimental / ... / syntax in patterns 166 | 167 | 0.677: [pkg] fixed bugs in META files 168 | [+ui] added "save" and "save_lines" functions to the library 169 | [bug] now bos, eos, bol and eol assertions work (micmatch_pcre) 170 | 171 | 0.676: [pkg] added version ID in the name of the archives 172 | [bug] 173 | - fixed bug which prevented the use of the Camlp4 174 | syntax extension for stream parsers (was due to a wrong LEVEL) 175 | - fixed fatal bug in micmatch_str which was accidentally 176 | introduced in the last version (was due to an inexisting LEVEL) 177 | [+ui] added a short Micmatch.Fixed module for handling text 178 | with fixed-width columns. 179 | 180 | 0.675: [bug] several bugfixes in the installation procedure 181 | (correct clean, any name for gmake OK, camlmix not required) 182 | [pkg] separate installation of micmatch_pcre (default) 183 | and micmatch_str (now optional) 184 | 185 | 0.674: [+ui] 186 | - added full support for PCRE-OCaml, with 187 | many additional macros and specifications 188 | - POSIX characters classes are now predefined 189 | for both micmatch_str and micmatch_pcre 190 | - micmatch and micmatch_str are now binary executables 191 | so that micmatch scripts can be made self-executable 192 | on Unix-like systems 193 | [doc] updated the reference manual and the web page 194 | [-ui] deprecated use of {123-}. Use {123+} instead. 195 | 196 | 0.673: [bug] added support for OCaml 3.08.1 (replaced regexp-pp package) 197 | 198 | 0.672: [+ui,bug] 199 | - added checks for unbalanced or redundant local bindings 200 | - added support for local backreferences 201 | 202 | 0.671: [+ui] 203 | - added support for backreferences (!ident) 204 | - tries alternatives from left to right, and is greedy by 205 | default for optional matches (? operator). Not official, 206 | since these properties are not specified in the Str library. 207 | - function keyword replaced by fun in the revised syntax 208 | (still not tested though) 209 | 210 | 0.670: [+ui] addition of the ~ operator for ignoring case (uses the OCaml 211 | definition of case, i.e. the latin1 charset). 212 | [doc] created the reference manual 213 | 214 | 0.669: [opt] tail-recursivity is now preserved (options -direct/-tailrec) 215 | [-ui] regexpr{-12} is not valid anymore because it looks strange 216 | and ambiguous. regexpr{0-12} should be used instead. 217 | regexpr{12-} is still valid. 218 | 0.668: [bug] 219 | - fixed abusive simplification (missing try ... with) 220 | - added missing binding in multithreaded mode 221 | - fixed the buggy Makefile on the web page 222 | 0.667: [opt] alternatives between charsets handled like unions of charsets 223 | 0.666: [*] initial public release (2004-08-02) 224 | -------------------------------------------------------------------------------- /str/str_lib.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | (* $Id$ *) 3 | 4 | open Camlp4.PreCast 5 | 6 | open Mm_util 7 | open Constants 8 | 9 | let _ = 10 | mod_runtime := "Run_mikmatch_str"; 11 | mod_runtime_mt := "Run_mikmatch_str_mt" 12 | 13 | let str_mutex = "str_mutex" 14 | 15 | 16 | (* Emacs/Str syntax for regular expressions *) 17 | 18 | open Printf 19 | 20 | open Regexp_ast 21 | 22 | let special_regexps = 23 | let _loc = Constants.dummy_loc in 24 | [ "bol", Special (_loc, "^", ("bol", Some 0)); (* beginning of line *) 25 | "eol", Special (_loc, "$", ("eol", Some 0)); (* end of line *) 26 | "bnd", Special (_loc, "\\b", ("bnd", Some 0)); (* word boundary *) 27 | "any", Special (_loc, ".", ("any", Some 1)); (* any character 28 | except newline *) ] 29 | 30 | (* 31 | Note that the usual regexp special characters are not special inside 32 | a character set. A completely different set of special characters exists 33 | inside character sets: `]', `-' and `^'. 34 | 35 | To include a `]' in a character set, you must make it the first 36 | character. For example, `[]a]' matches `]' or `a'. To include a `-', 37 | write `-' as the first or last character of the set, or put it after 38 | a range. Thus, `[]-]' matches both `]' and `-'. 39 | 40 | To include `^', make it other than the first character in the set. 41 | *) 42 | 43 | let string c = String.make 1 c 44 | 45 | let quote_char = function 46 | '[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' as c -> 47 | let s = Bytes.create 2 in 48 | Bytes.set s 0 '\\'; 49 | Bytes.set s 1 c; 50 | Bytes.unsafe_to_string s 51 | | c -> string c 52 | 53 | 54 | let reorder_charset l = 55 | if l = [] then 56 | invalid_arg "reorder_charset"; 57 | List.sort 58 | (fun c1 c2 -> 59 | if c1 = c2 then invalid_arg "reorder_charset: repeated char" 60 | else if c1 = ']' then -1 61 | else if c2 = ']' then 1 62 | else if c1 = '-' then 1 63 | else if c2 = '-' then -1 64 | else if c1 = '^' then 1 65 | else if c2 = '^' then -1 66 | else Char.compare c1 c2) 67 | l 68 | 69 | let compact l = 70 | let finish first last = 71 | match Char.code last - Char.code first with 72 | 0 -> string first 73 | | 1 -> string first ^ string last 74 | | _ -> string first ^ "-" ^ string last in 75 | 76 | let rec extend first last = 77 | function 78 | [] -> [finish first last] 79 | | c :: rest -> 80 | if Char.code c = Char.code last + 1 then 81 | extend first c rest 82 | else 83 | finish first last :: extend c c rest in 84 | 85 | match l with 86 | [] -> [] 87 | | hd :: tl -> extend hd hd tl 88 | 89 | 90 | let compact_charset l = 91 | let rbracket = ref false 92 | and dash = ref false 93 | and caret = ref false in 94 | let normal = 95 | List.filter (function 96 | ']' -> rbracket := true; false 97 | | '-' -> dash := true; false 98 | | '^' -> caret := true; false 99 | | _ -> true) l in 100 | let sorted = List.sort Char.compare normal in 101 | let special_tail = 102 | let tail = if !dash then ["-"] else [] in 103 | if !caret then "^" :: tail else tail in 104 | let tail = compact sorted @ special_tail in 105 | if !rbracket then "]" :: tail else tail 106 | 107 | 108 | let add_const accu s = 109 | accu := `String s :: !accu 110 | let add_var accu e = 111 | accu := `Var (e, false) :: !accu 112 | let add_var_nocase accu e = 113 | accu := `Var (e, true) :: !accu 114 | 115 | let rec rm_closed = function Closed ast -> rm_closed ast | ast -> ast 116 | 117 | let rec to_string ?(top = false) ((last_group, named_groups) as groups) accu = 118 | function 119 | Epsilon _loc -> groups 120 | | Special (_loc, s, _) -> add_const accu s; groups 121 | | Characters (_loc, set) -> 122 | let l = Charset.list set in 123 | (match l with 124 | [] -> groups 125 | | [c] -> 126 | add_const accu (quote_char c); 127 | groups 128 | | _ -> 129 | add_const accu "["; 130 | List.iter (add_const accu) (compact_charset l); 131 | add_const accu "]"; 132 | groups) 133 | 134 | | Sequence (_loc, re1, re2) -> 135 | let groups = to_string groups accu re1 in 136 | to_string groups accu re2 137 | 138 | | Alternative (_loc, re, Epsilon _, _, _) -> 139 | 140 | let must_group = 141 | not top && 142 | match rm_closed re with 143 | Characters _ | Special _ | Bind _ | Alternative _ -> false 144 | | _ -> true in 145 | let last_group = if must_group then succ last_group else last_group in 146 | if must_group then add_const accu "\$"; 147 | let (last_group, named_groups) as groups = 148 | to_string (last_group, named_groups) accu re in 149 | if must_group then add_const accu "\$"; 150 | add_const accu "?"; 151 | groups 152 | 153 | | Alternative (_loc, re1, re2, _, _) -> 154 | 155 | let must_group = not top in 156 | let last_group = if must_group then succ last_group else last_group in 157 | if must_group then add_const accu "\$"; 158 | let (last_group, named_groups1) = 159 | to_string (last_group, named_groups) accu re1 in 160 | add_const accu "\\|"; 161 | 162 | let (last_group, named_groups2) = 163 | to_string (last_group, named_groups) accu re2 in 164 | if must_group then add_const accu "\$"; 165 | 166 | let check_balance set1 set2 = 167 | if not (Named_groups.equal set1 set2) then 168 | (let missing = 169 | S.diff 170 | (Named_groups.union set1 set2) 171 | (Named_groups.inter set1 set2) in 172 | Messages.unbalanced_bindings _loc (list_named_groups missing)) in 173 | 174 | let (groups1, positions1) = named_groups1 175 | and (groups2, positions2) = named_groups2 in 176 | check_balance groups1 groups2; 177 | check_balance positions1 positions2; 178 | 179 | (last_group, (merge groups1 groups2, merge positions1 positions2)) 180 | 181 | | Repetition (_loc, (Star, true), 182 | (Repetition (_, (Star, true), _) as re)) -> 183 | to_string ~top groups accu re 184 | 185 | | Repetition (_loc, kind, re) -> 186 | let must_group = 187 | not top && 188 | match rm_closed re with 189 | Characters _ | Special _ | Bind _ | Alternative _ -> false 190 | | _ -> true in 191 | let last_group = 192 | if must_group then 193 | (add_const accu "\$"; 194 | succ last_group) 195 | else last_group in 196 | let groups = to_string (last_group, named_groups) accu re in 197 | if must_group then 198 | add_const accu "\$"; 199 | let op = 200 | match kind with 201 | (Star, true) -> "*" 202 | | (Plus, true) -> "+" 203 | | (Option, true) -> "?" 204 | | _ -> assert false in 205 | add_const accu op; 206 | groups 207 | 208 | | Bind (_loc, re, name, conv) -> 209 | let last_group = succ last_group in 210 | let named_groups = 211 | add_new_group _loc name conv last_group named_groups in 212 | add_const accu "\$"; 213 | let groups = to_string (last_group, named_groups) accu re in 214 | add_const accu "\$"; 215 | groups 216 | 217 | | Bind_pos (_loc, name) -> 218 | let last_group = succ last_group in 219 | let named_groups = add_new_pos _loc name last_group named_groups in 220 | add_const accu "\$\$"; 221 | (last_group, named_groups) 222 | 223 | | Backref (_loc, name) -> 224 | (try 225 | match Named_groups.find name (fst named_groups) with 226 | [] -> Messages.invalid_backref _loc name 227 | | [(_, n, conv)] -> add_const accu (sprintf "\\%i" n); groups 228 | | l -> 229 | add_const accu (sprintf "\$%s\$" 230 | (String.concat "\\|" 231 | (List.map (fun (_, n, conv) -> sprintf "\\%i" n) l))); 232 | (succ last_group, named_groups) 233 | with Not_found -> Messages.invalid_backref _loc name) 234 | 235 | | Variable (_loc, e) -> add_var accu e; groups 236 | | Nocase_variable (_loc, e) -> add_var_nocase accu e; groups 237 | 238 | | Closed ast -> 239 | let saved_named_groups = named_groups in 240 | let (last_group, named_groups) = to_string groups accu ast in 241 | (last_group, saved_named_groups) 242 | 243 | | Possessive _ -> assert false 244 | | Lookahead _ -> assert false 245 | | Lookbehind _ -> assert false 246 | 247 | let nocasify e = 248 | let _loc = Ast.loc_of_expr e in 249 | <:expr< $uid: !mod_runtime$.nocase $e$ >> 250 | 251 | let process_regexp _loc ~sharing re re_name = 252 | let accu = ref [] in 253 | let (last_group, named_groups) = 254 | to_string ~top:true (0, (Named_groups.empty, Named_groups.empty)) 255 | accu re in 256 | let re_args, re_source = 257 | Match.get_re_source ~quote_expr: <:expr< Str.quote >> 258 | ~nocasify accu in 259 | (re_args, re_source, named_groups, []) 260 | 261 | 262 | (* Syntax expanders *) 263 | 264 | open Constants 265 | 266 | let expr_mutex _loc = <:expr< $uid: !mod_runtime_mt$.$lid:str_mutex$ >> 267 | 268 | let unlock _loc = 269 | <:expr< Mutex.unlock $expr_mutex _loc$ >> 270 | 271 | let lock _loc = 272 | <:expr< Mutex.lock $expr_mutex _loc$ >> 273 | 274 | let lock_unlock e = 275 | let _loc = Ast.loc_of_expr e in 276 | <:expr< do { $lock _loc$; 277 | try let x = $e$ in do { $unlock _loc$; x } 278 | with [ exn -> do { $unlock _loc$; raise exn } ] } >> 279 | 280 | let unlock_lock e = 281 | let _loc = Ast.loc_of_expr e in 282 | <:expr< do { $unlock _loc$; 283 | try let x = $e$ in do { $lock _loc$; x } 284 | with [ exn -> do { $lock _loc$; raise exn } ] } >> 285 | 286 | let string_match _loc re_name get_re target pos = 287 | <:expr< Str.string_match $get_re$ $target$ $int:string_of_int pos$ >> 288 | 289 | let matched_group _loc n target = 290 | <:expr< Str.matched_group $int:string_of_int n$ $target$ >> 291 | 292 | let matched_position _loc n target = 293 | <:expr< Str.group_beginning $int:string_of_int n$ >> 294 | 295 | let compile_regexp ~mt _loc re_args re_source = 296 | let compile_string e = 297 | <:expr< Str.regexp $e$ >> in 298 | match re_args with 299 | [] -> 300 | let re_string = Match.compute_re_string _loc re_source in 301 | compile_string re_string 302 | | _ -> 303 | let key = 304 | match re_args with 305 | [name, _] -> <:expr< $lid:name$ >> 306 | | _ -> 307 | let expr_list = 308 | List.map (fun (name, _) -> <:expr< $lid:name$ >>) re_args in 309 | let tup = comma_expr_of_list _loc expr_list in 310 | <:expr< ( $tup: tup$ ) >> in 311 | let compile = 312 | let re_string = Match.compute_re_string _loc re_source in 313 | compile_string re_string in 314 | 315 | let find = 316 | Match.protect mt 317 | <:expr< $uid: !mod_runtime$.Mem.find tbl key >> in 318 | let add = 319 | Match.protect mt 320 | <:expr< $uid: !mod_runtime$.Mem.unsafe_add tbl key data >> in 321 | 322 | let check_cache = 323 | <:expr< 324 | let key = $key$ in 325 | try $find$ 326 | with [ Not_found -> 327 | let data = $compile$ in 328 | do { $add$; 329 | data } ] >> in 330 | 331 | let get_regexp = 332 | List.fold_right 333 | (fun (argname, _) e -> <:expr< fun $lid:argname$ -> $e$ >>) 334 | re_args 335 | check_cache in 336 | 337 | let result = 338 | <:expr< 339 | let tbl = $uid: !mod_runtime$.Mem.create 100 in 340 | $get_regexp$ >> in 341 | 342 | if mt then <:expr< let mutex = Mutex.create () in $result$ >> 343 | else result 344 | 345 | 346 | 347 | let convert _loc conv e = 348 | match conv with 349 | None -> e 350 | | Some f -> 351 | match f with 352 | `Int -> <:expr< Pervasives.int_of_string $e$ >> 353 | | `Float -> <:expr< Pervasives.float_of_string $e$ >> 354 | | `Option -> <:expr< let s = $e$ in 355 | if s = "" then None 356 | else Some s >> 357 | | `Custom f -> <:expr< $f$ $e$ >> 358 | | `Value e' -> <:expr< do { ignore $e$; $e'$ } >> 359 | 360 | let insert_bindings_poly make_expr _loc target set e = 361 | Named_groups.fold 362 | (fun name l e -> 363 | match l with 364 | [] -> assert false 365 | | (_loc, _, _) :: _ -> 366 | let find_it = 367 | List.fold_right 368 | (fun (_loc, n, conv) accu -> 369 | let expr = 370 | convert _loc conv (make_expr _loc n target) in 371 | match accu with 372 | None -> Some expr 373 | | Some e -> 374 | Some <:expr< 375 | try $expr$ with [ Not_found -> $e$ ] >>) 376 | l 377 | None in 378 | let result = 379 | match find_it with 380 | None -> assert false 381 | | Some e -> e in 382 | <:expr< let $lid:name$ = $result$ in $e$ >>) 383 | set 384 | e 385 | 386 | let insert_group_bindings = insert_bindings_poly matched_group 387 | let insert_position_bindings = insert_bindings_poly matched_position 388 | 389 | let insert_bindings _loc target (group_bindings, position_bindings) e = 390 | insert_group_bindings _loc target group_bindings 391 | (insert_position_bindings _loc target position_bindings e) 392 | 393 | 394 | let match_and_bind _loc re_name get_re target named_groups success failure = 395 | <:expr< 396 | if $string_match _loc re_name get_re target 0$ 397 | then $insert_bindings _loc target named_groups success$ 398 | else $failure$ >> 399 | 400 | 401 | let macro_replace _loc re_name target_name named_groups expr = 402 | let target = <:expr< $lid:target_name$ >> in 403 | <:expr< 404 | fun $lid:target_name$ -> 405 | Str.global_substitute $lid:re_name$ 406 | (fun _ -> $insert_bindings _loc target named_groups expr$) 407 | $target$ >> 408 | 409 | 410 | open Select_lib 411 | 412 | let lib = { predefined_regexps = special_regexps; 413 | unfold_range = true; 414 | process_regexp = process_regexp; 415 | compile_regexp_match = compile_regexp ~mt:false; 416 | compile_regexp_search = compile_regexp ~mt:false; 417 | match_and_bind = match_and_bind; 418 | wrap_match = (fun e -> e); 419 | wrap_user_case = (fun e -> e); 420 | really_wrap_match = false; 421 | really_wrap_user_case = false } 422 | 423 | let lib_mt = { predefined_regexps = special_regexps; 424 | unfold_range = true; 425 | process_regexp = process_regexp; 426 | compile_regexp_match = compile_regexp ~mt:true; 427 | compile_regexp_search = compile_regexp ~mt:true; 428 | match_and_bind = match_and_bind; 429 | wrap_match = lock_unlock; 430 | wrap_user_case = unlock_lock; 431 | really_wrap_match = true; 432 | really_wrap_user_case = true } 433 | -------------------------------------------------------------------------------- /pcre/test.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | (* Definition of regular expressions for further use *) 4 | RE space = [' ' '\t' '\n' '\r'] 5 | RE not_space = _ # space 6 | RE digit = ['0'-'9'] 7 | RE letter = ['A'-'Z' '_' 'a'-'z'] 8 | RE word = letter+ 9 | 10 | (* 11 | (* Inclusion of file in the same syntax, e.g. a library of user-defined 12 | regular expressions. (known problem with error location) *) 13 | USE "my_regexps.ml" (* defines `word' and `digit' *) 14 | *) 15 | 16 | (* Extended pattern-matching in the following constructs: 17 | match ... with ... 18 | try ... with ... 19 | function ... 20 | *) 21 | 22 | 23 | (* Doesn't work. Don't know how to make it work. *) 24 | (* Testing the Camlp4 support for stream parsers *) 25 | 26 | let _ = match Stream.of_list [] with parser [< >] -> () 27 | 28 | 29 | let test expected a b c = 30 | printf "[case %i] " expected; flush stdout; 31 | (match a, b, c with 32 | None, (None | Some (RE space* )), None -> printf "case 1\n" 33 | | Some ({ contents = [| RE (word as x); _; y |]}), 34 | (Some ("test" as z | RE word space (word as z))), 35 | None -> printf "case 2: %S %S %S\n" x y z 36 | | _, _, Some (RE space* (['0'-'9']+ as n)) -> printf "case 3: %s\n" n 37 | | _ -> printf "case 4\n"); 38 | flush stdout 39 | 40 | let _ = 41 | printf "Tests (match ... with):\n"; flush stdout; 42 | test 1 None (Some " ") None; 43 | 44 | test 2 45 | (Some (ref [| "alpha"; "beta"; "2 gamma" |])) 46 | (Some "Hello World!") 47 | None; 48 | 49 | test 3 None None (Some " 123 "); 50 | 51 | test 4 (Some (ref [| |])) (Some "") (Some "abc") 52 | 53 | let _ = 54 | match "" with 55 | (RE (("a" as a) | ("b" as a))) | a -> () 56 | 57 | let hello_who s = 58 | match s with 59 | RE _* ['h''H']"ello" ","? space* 60 | ((word | space)* word as someone) -> String.capitalize someone 61 | 62 | | _ -> "nobody" 63 | 64 | let _ = 65 | printf "Extraction of the recipient's name\n"; flush stdout; 66 | List.iter (fun s -> 67 | printf "Hello who: %S\n" s; 68 | printf " -> %S\n" (hello_who s); 69 | flush stdout) 70 | [ "Hello World!"; 71 | "First of all, hello everybody."; 72 | "*** hello world ***"; 73 | "Hello, Caml riders!" ] 74 | 75 | let _ = 76 | printf "Test (local and global bindings):\n"; flush stdout; 77 | match "" with 78 | (RE (word as x | space+ (word as x))* ) | _ -> 79 | printf "Passed.\n" 80 | 81 | let _ = 82 | printf "Test (repetition range + end of line):\n"; flush stdout; 83 | let f s = 84 | match s with 85 | RE '-'? digit{1-4} eol -> printf "%S has less than 5 digits.\n" s 86 | | RE '-'? digit{5+} eol -> printf "%S has at least 5 digits.\n" s 87 | | _ -> printf "%S is not a number.\n" s in 88 | List.iter f ["123"; "1234"; "12345"; "12345678"; "-1234"; "*1"; "1*"; 89 | "9\n*" ] 90 | 91 | let test f (expected, s) = 92 | let (success, result) = f s in 93 | let passed = expected = success in 94 | if passed then (printf "[OK] %s%s\n" s 95 | (match result with None -> "" 96 | | Some x -> sprintf " -> %s"x); 97 | flush stdout) 98 | else 99 | (print_endline (s ^ "Failed"); flush stdout; failwith s) 100 | 101 | let () = 102 | printf "Test (no case: the ~ operator):\n"; flush stdout; 103 | List.iter 104 | (test (function 105 | RE "hello"~ " World!" -> true, None 106 | | _ -> false, None)) 107 | [ true, "Hello World!"; 108 | true, "hElLO World!"; 109 | false, "hello WORLD!" ] 110 | 111 | let () = 112 | printf "Test (try ... with):\n"; flush stdout; 113 | try failwith "Hello World!" 114 | with 115 | Failure RE "Hello" space* (word as w) -> printf "OK: %s\n" w 116 | | Failure s -> printf "Failure: %s\n" s 117 | 118 | let () = 119 | printf "Test (function ... -> ...):\n"; flush stdout; 120 | let f = 121 | function 122 | RE "Hello" space* (word as w) -> printf "OK: %s\n" w 123 | | _ -> printf "Error\n" in 124 | f "Hello Everybody"; 125 | f "Hello Caml!" 126 | 127 | let () = 128 | printf "Test (backreferences):\n"; flush stdout; 129 | let f s = 130 | match s with 131 | RE 132 | (digit+ as x | (word as x)) (* x = global id *) 133 | (" " as sp !sp)* (* sp = local id *) 134 | !x -> true, Some x 135 | | _ -> false, None in 136 | List.iter (test f) 137 | [ true, "123123"; 138 | false, "123 123"; 139 | true, "123 123"; 140 | true, "aaaa"; 141 | false, "abc"; 142 | false, "ab1ab1" ] 143 | 144 | 145 | let print_named_groups l = 146 | List.iter 147 | (fun (name, positions) -> 148 | printf "%s:" name; 149 | List.iter (fun i -> printf " %i" i) positions; 150 | printf "\n") 151 | l 152 | 153 | (* Lower level feature: RE_PCRE returns the source of the regexp, 154 | to be used with specific compilation or search options. *) 155 | 156 | let _ = 157 | let (src, named_groups) = 158 | RE_PCRE 159 | (("a"|"b"+)? digit{2}) as start 160 | (space* word)+ ( digit{1} (word as last_word) 161 | | digit{1} Lazy (word as last_word) 162 | | digit{3} (word as last_word)) in 163 | 164 | printf "Regexp source: %S\n" src; 165 | printf "Named groups and their locations:\n"; 166 | print_named_groups named_groups; 167 | flush stdout 168 | 169 | let charset_union = RE_PCRE digit | space | "a" | ['A'-'Z'] 170 | 171 | 172 | (* Laziness *) 173 | 174 | let _ = 175 | printf "Laziness and backreferences:\n"; flush stdout; 176 | let f = function 177 | RE _* Lazy 178 | "<" (_* Lazy as tag) ">" (_* Lazy as contents) "" -> 179 | sprintf " -> (%S, %S)" tag contents 180 | | s -> "" in 181 | List.iter (fun s -> 182 | printf "%S%s\n" s (f s); 183 | flush stdout) 184 | [ "hello"; 185 | ""; 186 | "text" ] 187 | 188 | let _ = 189 | printf "Possessiveness + backreferences + laziness 190 | Take the first word, find its next occurence and return the text 191 | in the middle:\n"; flush stdout; 192 | let f = function 193 | RE letter* Possessive as x (_* Lazy as y) !x -> sprintf " -> %S" y 194 | | _ -> "" in 195 | List.iter (fun s -> 196 | printf "%S%s\n" s (f s); 197 | flush stdout) 198 | [ "abc,ab,abc,abc" ] 199 | 200 | 201 | 202 | (* Macros *) 203 | 204 | 205 | let swap = 206 | REPLACE 207 | "(" space* (word as x) space* "," 208 | space* (word as y) space* ")" -> "(" ^ y ^ "," ^ x ^ ")" 209 | 210 | let swap = 211 | REPLACE_FIRST 212 | "(" space* (word as x) space* "," 213 | space* (word as y) space* ")" -> "(" ^ y ^ "," ^ x ^ ")" 214 | 215 | let _ = 216 | let test s = 217 | let s1 = swap s in 218 | let s2 = swap s1 in 219 | printf "swap 0: %s\nswap 1: %s\nswap 2: %s\n" s s1 s2 in 220 | 221 | test "tuples: (x,y)/(1, 2)/(martin, jambon)" 222 | 223 | 224 | RE host = (['.' '-'] | letter | digit)+ 225 | 226 | let hide_email = 227 | flush stdout; 228 | REPLACE "@" (host as host) -> "@" ^ (String.make (String.length host) '.') 229 | 230 | let _ = 231 | let test s = 232 | printf "\ 233 | before: %s 234 | after : %s 235 | " s (hide_email s) in 236 | test "this is a list of email addresses: joe@sixpack.com, martin@home" 237 | 238 | 239 | let _ = 240 | let i = ref 0 in 241 | let f = SEARCH "a" -> incr i in 242 | f "aaa"; 243 | printf "This should be 3: %i\n" !i; 244 | flush stdout 245 | 246 | let _ = 247 | let f = MAP ['a'-'z']+ as w -> `Word (String.capitalize w) in 248 | f "hello world!" 249 | 250 | let _ = 251 | List.iter print_endline ((SPLIT ",") "a,b,c") 252 | 253 | let _ = 254 | let l = 255 | List.filter 256 | (fun s -> (FILTER _* ".ml" eos) s) 257 | (Array.to_list (Sys.readdir ".")) in 258 | printf "*.ml: %s\n%!" (String.concat " " l) 259 | 260 | 261 | (* Sharing the subgroups array *) 262 | 263 | let _ = 264 | let f ?share s = 265 | try (MATCH "+" (print* as x) -> print_endline (" Found " ^ x)) ?share s 266 | with Not_found -> print_endline " Not found" in 267 | let g ?share () = 268 | print_endline "2 found:"; 269 | flush stdout; 270 | List.iter 271 | (f ?share) 272 | [ "+a"; "b"; "+blop" ] in 273 | g ~share:true (); 274 | g ~share:false () 275 | 276 | 277 | (* Positions *) 278 | 279 | let _ = 280 | match "a1234yz", 333 with 281 | RE "a" as s ("bc" %pos | digit+ %pos), _ 282 | | (s, pos) -> 283 | printf "a = %s, 5 = %i\n%!" s pos; 284 | assert (s = "a" && pos = 5) 285 | 286 | let _ = 287 | let find = 288 | SEARCH_FIRST "(" %pos1 (_* Lazy as x) %pos2 ")" -> 289 | printf "%s %i-%i\n%!" x pos1 pos2; 290 | assert (pos1 = 11 && pos2 = 17 && x = "result") in 291 | find "0123456789(result)..." 292 | 293 | 294 | (* No real tuple (maybe not fixed yet) *) 295 | let _ = 296 | match "abc", "def" with 297 | (RE _, RE _) 298 | | "abc", _ -> ignore `Case1 299 | | _ -> ignore `Case2 300 | 301 | 302 | (* Assertions *) 303 | let _ = 304 | let search = 305 | SEARCH 306 | alpha{2}.> digit+ Possessive as m 307 | < %pos _ as x . Not (alpha | "_"+ !m)+ > as n -> 308 | printf "num = %s; pos = %i; x = %s\n%!" n pos x in 309 | search "abc1 23 456xyz 7. x8 33_33Y 34_35Y 36__36Y" 310 | 311 | 312 | let print_triplets_of_letters = SEARCH -> print_endline x 313 | let _ = print_triplets_of_letters "Hello World!" 314 | 315 | let _ = List.iter print_endline ((SPLIT "") "abc");; 316 | 317 | RE test_warnings = 318 | ("a" as plus_warning)+ 319 | 320 | let _ = match "" with RE ("a" as star_warning)* test_warnings -> () | _ -> () 321 | 322 | 323 | (* Converters *) 324 | let _ = 325 | let f s = 326 | printf "got it!\n"; 327 | int_of_string s in 328 | let n = 329 | match "(123)", 456 with 330 | (RE "(" (digit+ as n := f) ")"), _ 331 | | (RE _{1-3} as n = -1), _ 332 | | (RE digit+ as n : int), _ 333 | | _, n -> n in 334 | assert (n = 123); 335 | printf "123=%i\n%!" n 336 | 337 | 338 | (* debugging *) 339 | let _ = 340 | match "a" with 341 | (RE "b") | (RE "c") | (RE "a") -> () 342 | | _ -> failwith "test failed" 343 | 344 | let () = () in ();; 345 | 346 | 347 | 348 | (* General syntax for local exception handling (let try ... in ... with ...) *) 349 | let _ = 350 | try 351 | (let try x = () 352 | and z = () in 353 | raise Exit 354 | with Exit -> assert false) 355 | with Exit -> 356 | print_endline "OK for local exception handling (let-try-in-with)" 357 | ;; 358 | 359 | let RE (_* as x) = "hello" in assert ("" <> "hello");; 360 | 361 | 362 | (* Shortcut syntax *) 363 | let _ = 364 | let RE (alpha+ as x) = "abc def" in 365 | assert (x = "abc"); 366 | assert (x = "abc"); 367 | print_endline "shortcut is OK" 368 | 369 | (* Shortcut syntax with local exception handling *) 370 | let _ = 371 | try 372 | (let try /"xy" as x/ = "xyz" in 373 | ignore x; 374 | raise Exit 375 | with Exit -> assert false) 376 | with Exit -> 377 | print_endline "OK for local exception handling (RE)" 378 | 379 | let /alpha+ space+ (alpha+ as x)/ = "xyz abc " in 380 | assert (x = "abc");; 381 | 382 | 383 | (* Similar tests for str_item's let in *) 384 | let try /alpha+ space+ (alpha+ as x)/ = "" in 385 | assert false 386 | with Match_failure _ -> print_endline "OK for str_item let-try-in-with";; 387 | 388 | 389 | (* Global value bindings *) 390 | let /[digit "."]* as version/ = Sys.ocaml_version 391 | 392 | RE int = digit+ 393 | let /(int as major : int) "." (int as minor : int) 394 | ("." (int as patchlevel : int) | ("" as patchlevel = 0)) 395 | ("+" (_* as additional_info) | ("" as additional_info))/ = 396 | Sys.ocaml_version 397 | 398 | let _ = 399 | printf "OCaml version: major=%i minor=%i patchlevel=%i additional-info=%S\n%!" 400 | major minor patchlevel additional_info 401 | 402 | 403 | (* Parametrized regexps *) 404 | let _ = 405 | let find s = 406 | match "abcabcdefggghijkl" with 407 | RE _* Lazy ( @s+ as x) -> x 408 | | _ -> assert false in 409 | assert (find "abc" = "abcabc"); 410 | assert (find "g" = "ggg") 411 | 412 | let _ = 413 | let find_not_after x y = 414 | COLLECT < Not ( @x ) . > ":" @y "=" (alnum* as result) -> result in 415 | 416 | let text = "a:b=, xy:z=1, x:z=25, _:z=99" in 417 | assert (find_not_after "x" "z" text = ["1"; "99"]); 418 | assert (find_not_after "" "z" text = []); 419 | assert (find_not_after "a" "b" text = []); 420 | assert (find_not_after "a" "" text = []); 421 | assert (find_not_after "?" "b" text = [""]) 422 | 423 | let _ = 424 | let find_not_between ~before ~after ~label = 425 | COLLECT 426 | < Not < ( @before ) . > @label "=" alnum* @after > 427 | @label "=" (alnum* as result) -> result in 428 | 429 | let text = "(field=12) (field=OK, field=) (field=yes" in 430 | assert (find_not_between ~before:"(" ~after:")" ~label:"field" text = 431 | ["OK"; ""; "yes"]) 432 | 433 | 434 | let _ = 435 | let field key = 436 | printf "Case-insensitive search for field %S:\n%!" key; 437 | SEARCH @key~ "=" (alnum* as data) -> printf " %s=%s\n%!" key data in 438 | 439 | let text = "hello name=Martin, AGE=27, Name=Jambon" in 440 | printf "Text: %S\n" text; 441 | field "name" text; 442 | field "age" text 443 | 444 | (* Null character *) 445 | let _ = 446 | match "" with 447 | / "\000abc" / -> assert false 448 | | _ -> () 449 | 450 | let _ = 451 | let zero_abc = "\000abc" in 452 | match "" with 453 | / @zero_abc / -> assert false 454 | | _ -> () 455 | 456 | let ( % ) = (+) 457 | 458 | let view T = fun x -> true 459 | let view Lazy = fun x -> try Some (Lazy.force x) with _ -> None 460 | 461 | let _ = 462 | match "a", lazy (1+1), lazy (3, lazy 4) with 463 | %T, %Lazy (2 as x), %Lazy (y, %Lazy z) -> 464 | assert (x = 2); 465 | assert (y = 3); 466 | assert (z = 4); 467 | printf "Passed view test 1\n%!" 468 | | _ -> assert false 469 | 470 | 471 | 472 | type 'a lazy_list = Empty | Cons of ('a * 'a lazy_list lazy_t) 473 | 474 | let view Empty = 475 | fun l -> 476 | try Lazy.force l = Empty 477 | with _ -> false 478 | 479 | let view Cons = 480 | fun l -> 481 | try 482 | match Lazy.force l with 483 | Cons x -> Some x 484 | | Empty -> None 485 | with _ -> None 486 | 487 | let _ = 488 | let l = lazy (Cons (1, lazy (Cons (2, lazy Empty)))) in 489 | match l with 490 | %Empty 491 | | %Cons (_, %Empty) -> assert false 492 | | %Cons (x1, %Cons (x2, %Empty)) -> 493 | assert (x1 = 1); 494 | assert (x2 = 2); 495 | printf "Passed view test 2\n%!" 496 | | _ -> assert false 497 | 498 | let _ = 499 | let view XY = fun o -> Some (o#x, o#y) in 500 | let view S = fun o -> Some o#s in 501 | let o = 502 | (object 503 | method x = 0 504 | method y = 1 505 | method s = "abc" 506 | end) in 507 | match o with 508 | %XY (1, _) -> assert false 509 | | %S / "A" / -> assert false 510 | | %S ( / upper as c / | / lower as c / as s) -> 511 | assert (c = "a"); 512 | assert (s = "abc"); 513 | printf "Passed view test 3\n%!" 514 | 515 | module Test = 516 | struct 517 | let view Even = fun x -> x land 1 = 0 518 | end 519 | 520 | let _ = 521 | match 0 with 522 | %Test.Even -> printf "Passed view test 4\n%!" 523 | | _ -> assert false 524 | 525 | 526 | let _ = 527 | let f = COLLECTOBJ (letter+ as x) (digit+ as y : int) in 528 | match List.map (fun x -> (x#x, x#y)) (f "ab12ER5") with 529 | [ ("ab", 12); ("ER", 5) ] -> printf "Passed COLLECTOBJ test\n%!" 530 | | _ -> assert false 531 | 532 | let _ = 533 | match (CAPTURE (letter+ as x) (digit+ as y : int)) "ab12ER5" with 534 | Some o -> 535 | assert (o#x = "ab" && o#y = 12); 536 | printf "Passed CAPTURE test\n%!" 537 | | None -> assert false 538 | 539 | let _ = 540 | match (SPLIT "x" ) "axbxc" with 541 | [ "a"; "b"; "c" ] -> 542 | printf "Passed basic SPLIT test\n%!" 543 | | _ -> assert false 544 | 545 | let _ = 546 | match (SPLIT < "x" > ) "axbxc" with 547 | [ "a"; "xb"; "xc" ] -> 548 | printf "Passed zero-length SPLIT test (bug in versions <= 1.0.1)\n%!" 549 | | _ -> assert false 550 | 551 | let () = 552 | try 553 | match "a" with 554 | / "a" ("" as x) | ("b" as x) / -> 555 | ignore x; 556 | printf "Passed zero-length capture in alternative \ 557 | (bug in versions <= 1.0.3)\n%!" 558 | | _ -> assert false 559 | with Not_found -> 560 | assert false 561 | 562 | let () = 563 | match "1" with 564 | / ("0" as x = 0) | ("1" as x := int_of_string) / -> 565 | if x = 1 then 566 | printf "Passed alt/= test (bug in versions <= 1.0.4)\n%!" 567 | else 568 | assert false 569 | | _ -> assert false 570 | 571 | type test = { x : int; y : int } 572 | 573 | let f x = 574 | match x with 575 | | {x = 1; _} -> () 576 | | _ -> () 577 | 578 | let g x = 579 | match x with 580 | | {x} -> () 581 | | _ -> () 582 | -------------------------------------------------------------------------------- /pcre/pcre_lib.ml: -------------------------------------------------------------------------------- 1 | (*pp camlp4orf *) 2 | 3 | 4 | open Printf 5 | 6 | open Camlp4.PreCast 7 | 8 | open Mm_util 9 | open Regexp_ast 10 | 11 | let _ = 12 | Constants.mod_runtime := "Run_mikmatch_pcre"; 13 | Constants.mod_runtime_mt := "Run_mikmatch_pcre" 14 | 15 | (* output for PCRE: Perl Compatible Regular Expressions *) 16 | 17 | let special_regexps = 18 | let _loc = Constants.dummy_loc in 19 | [ "any", (* any character except newline *) 20 | Special (_loc, ".", ("any", Some 1)); 21 | 22 | "bol", (* beginning of line *) 23 | Special (_loc, "(?:^|(?<=\n))", ("bol", Some 0)); 24 | 25 | "eol", (* end of line *) 26 | Special (_loc, "(?:$|(?=\n))", ("eol", Some 0)); 27 | 28 | "bos", (* beginning of string *) 29 | Special (_loc, "^", ("bos", Some 0)); 30 | 31 | "eos", (* end of string *) 32 | Special (_loc, "$", ("eos", Some 0)); ] 33 | 34 | 35 | let string c = String.make 1 c 36 | 37 | let quote_char c = 38 | match c with 39 | '\\' | '^' | '$' | '.' | '[' | ']' | '|' 40 | | '(' | ')' | '?' | '*' | '+' | '{' | '}' -> 41 | let s = Bytes.create 2 in 42 | Bytes.set s 0 '\\'; 43 | Bytes.set s 1 c; 44 | Bytes.unsafe_to_string s 45 | | '\000' -> "\\000" 46 | | c -> string c 47 | 48 | let quote_char_in_class c = 49 | match c with 50 | '\\' -> "\\\\" 51 | | ']' -> "\\]" 52 | | '-' -> "\\-" 53 | | '^' -> "\\^" 54 | | '\000' -> "\\000" 55 | | c -> string c 56 | 57 | 58 | let reorder_charset l = 59 | if l = [] then 60 | invalid_arg "reorder_charset"; 61 | List.sort Char.compare l 62 | 63 | let compact l = 64 | let finish first last = 65 | match Char.code last - Char.code first with 66 | 0 -> quote_char_in_class first 67 | | 1 -> (quote_char_in_class first) ^ (quote_char_in_class last) 68 | | _ -> (quote_char_in_class first) ^ "-" ^ (quote_char_in_class last) in 69 | 70 | let rec extend first last = 71 | function 72 | [] -> [finish first last] 73 | | c :: rest -> 74 | if Char.code c = Char.code last + 1 then 75 | extend first c rest 76 | else 77 | finish first last :: extend c c rest in 78 | 79 | match l with 80 | [] -> [] 81 | | hd :: tl -> extend hd hd tl 82 | 83 | 84 | let compact_charset _loc l = 85 | let sorted = List.sort Char.compare l in 86 | let (zero, nozero) = 87 | match sorted with 88 | '\000' :: rest -> ("\\000", rest) 89 | | l -> ("", l) in 90 | String.concat "" (zero :: compact nozero) 91 | 92 | 93 | let rec rm_closed = function Closed ast -> rm_closed ast | ast -> ast 94 | 95 | 96 | let add_const accu s = 97 | accu := `String s :: !accu 98 | let add_var accu e = 99 | accu := `Var (e, false) :: !accu 100 | let add_var_nocase accu e = 101 | accu := `Var (e, true) :: !accu 102 | 103 | let rec to_string ?(top = false) ((last_group, named_groups) as groups) accu = 104 | function 105 | Epsilon _loc -> groups 106 | | Special (_loc, s, _) -> add_const accu s; groups 107 | | Characters (_loc, set) -> 108 | let l = Charset.list set in 109 | (match l with 110 | [] -> groups 111 | | [c] -> 112 | add_const accu (quote_char c); 113 | groups 114 | | _ -> 115 | add_const accu (sprintf "[%s]" (compact_charset _loc l)); 116 | groups) 117 | 118 | | Sequence (_loc, re1, re2) -> 119 | let groups = to_string groups accu re1 in 120 | to_string groups accu re2 121 | 122 | | Alternative (_loc, re, Epsilon _, _, _) -> 123 | 124 | let must_group = 125 | not top && 126 | match rm_closed re with 127 | Characters _ | Special _ | Bind _ | Alternative _ -> false 128 | | _ -> true in 129 | if must_group then add_const accu "(?:"; 130 | let (last_group, named_groups) as groups = 131 | to_string (last_group, named_groups) accu re in 132 | if must_group then add_const accu ")"; 133 | add_const accu "?"; 134 | groups 135 | 136 | | Alternative (_loc, re1, re2, _, _) -> 137 | 138 | let must_group = not top in 139 | if must_group then add_const accu "(?:"; 140 | let (last_group, named_groups1) = 141 | to_string (last_group, named_groups) accu re1 in 142 | add_const accu "|"; 143 | 144 | let (last_group, named_groups2) = 145 | to_string (last_group, named_groups) accu re2 in 146 | if must_group then add_const accu ")"; 147 | 148 | let check_balance set1 set2 = 149 | if not (Named_groups.equal set1 set2) then 150 | (let missing = 151 | S.diff 152 | (Named_groups.union set1 set2) 153 | (Named_groups.inter set1 set2) in 154 | Messages.unbalanced_bindings _loc (list_named_groups missing)) in 155 | 156 | let (groups1, positions1) = named_groups1 157 | and (groups2, positions2) = named_groups2 in 158 | check_balance groups1 groups2; 159 | check_balance positions1 positions2; 160 | 161 | (last_group, (merge groups1 groups2, merge positions1 positions2)) 162 | 163 | | Repetition (_loc, (kind, greedy), re) -> 164 | let must_group = 165 | not top && 166 | match rm_closed re with 167 | Characters _ | Special _ | Bind _ 168 | | Alternative _ | Possessive _ -> false 169 | | _ -> true in 170 | if must_group then 171 | add_const accu "(?:"; 172 | let groups = to_string (last_group, named_groups) accu re in 173 | if must_group then 174 | add_const accu ")"; 175 | let rec convert = function 176 | Star -> "*" 177 | | Plus -> "+" 178 | | Option -> "?" 179 | | Range (m, None) -> 180 | (match m with 181 | 1 -> "" 182 | | _ -> sprintf "{%i}" m) 183 | | Range (m, Some None) -> 184 | (match m with 185 | 0 -> "*" 186 | | 1 -> "+" 187 | | _ -> sprintf "{%i,}" m) 188 | | Range (m, Some (Some n)) -> 189 | if m = n then 190 | convert (Range (m, None)) 191 | else if m = 0 && n = 1 then "?" 192 | else sprintf "{%i,%i}" m n in 193 | let base_op = convert kind in 194 | add_const accu base_op; 195 | if not greedy && base_op <> "" then 196 | add_const accu "?"; 197 | groups 198 | 199 | | Possessive (_loc, re) -> 200 | add_const accu "(?>"; 201 | let groups = to_string groups accu re in 202 | add_const accu ")"; 203 | groups 204 | 205 | | Lookahead (_loc, positive, re) -> 206 | let start = if positive then "(?=" else "(?!" in 207 | add_const accu start; 208 | let groups = to_string groups accu re in 209 | add_const accu ")"; 210 | groups 211 | 212 | | Lookbehind (_loc, positive, re) -> 213 | let start = if positive then "(?<=" else "(? 220 | let last_group = succ last_group in 221 | let named_groups = 222 | add_new_group _loc name conv last_group named_groups in 223 | add_const accu "("; 224 | let groups = to_string (last_group, named_groups) accu re in 225 | add_const accu ")"; 226 | groups 227 | 228 | | Bind_pos (_loc, name) -> 229 | let last_group = succ last_group in 230 | let named_groups = add_new_pos _loc name last_group named_groups in 231 | add_const accu "()"; 232 | (last_group, named_groups) 233 | 234 | | Backref (_loc, name) -> 235 | (try 236 | match Named_groups.find name (fst named_groups) with 237 | [] -> Messages.invalid_backref _loc name 238 | | [(_, n, conv)] -> add_const accu (sprintf "\\%i" n); groups 239 | | l -> 240 | add_const accu (sprintf "(?:%s)" 241 | (String.concat "|" 242 | (List.map (fun (_, n, conv) -> sprintf "\\%d" n) l))); 243 | groups 244 | with Not_found -> Messages.invalid_backref _loc name) 245 | 246 | | Variable (_loc, e) -> add_var accu e; groups 247 | | Nocase_variable (_loc, e) -> add_var_nocase accu e; groups 248 | 249 | | Closed ast -> 250 | let saved_named_groups = named_groups in 251 | let (last_group, named_groups) = to_string groups accu ast in 252 | (last_group, saved_named_groups) 253 | 254 | 255 | (* Syntax expanders *) 256 | 257 | open Constants 258 | 259 | let nocasify e = 260 | let _loc = Ast.loc_of_expr e in 261 | <:expr< $uid: !mod_runtime$.nocase $e$ >> 262 | 263 | 264 | let make_get_re_noargs _loc re_name re_args = 265 | let empty = <:expr< "" >> in 266 | let empty_args = List.map (fun (name, arg) -> (name, empty)) re_args in 267 | Match.make_get_re _loc re_name empty_args 268 | 269 | 270 | let process_regexp _loc ~sharing re re_name = 271 | let accu = ref [] in 272 | let (last_group, named_groups) = 273 | to_string ~top:true (0, (Named_groups.empty, Named_groups.empty)) 274 | accu re in 275 | let re_args, re_source = 276 | Match.get_re_source 277 | ~quote_expr: <:expr< $uid: !mod_runtime$.quote_string >> 278 | ~nocasify accu in 279 | let shared_id = shared re_name in 280 | let get_re_noargs = make_get_re_noargs _loc re_name re_args in 281 | let postbindings = 282 | if sharing then 283 | [ shared_id, <:expr< Pcre.make_ovector $get_re_noargs$ >>; 284 | shared_ovector re_name, <:expr< snd $lid:shared_id$ >> ] 285 | else [] in 286 | (re_args, re_source, named_groups, postbindings) 287 | 288 | 289 | let raises_exn = function 290 | <:expr< raise $exn$ >> -> true 291 | | _ -> false 292 | 293 | let string_match _loc re_name get_re target substrings pos success failure = 294 | let match_it = 295 | <:expr< Pcre.exec ~rex:$get_re$ ~pos:$int:string_of_int pos$ $target$ >> in 296 | 297 | if raises_exn failure then (* shortcut *) 298 | <:expr< 299 | let $lid:substrings$ = 300 | try $match_it$ 301 | with [ Not_found -> $failure$ ] in 302 | $success$ 303 | >> 304 | 305 | else 306 | <:expr< 307 | try 308 | let $lid:substrings$ = 309 | try $match_it$ 310 | with [ Not_found -> $raise_exit _loc$ ] in 311 | $success$ 312 | with 313 | [ $patt_exit _loc$ -> $failure$ ] >> 314 | 315 | 316 | let matched_group _loc substrings n = 317 | <:expr< Pcre.get_substring $lid:substrings$ $int:string_of_int n$ >> 318 | 319 | let matched_position _loc substrings n = 320 | <:expr< Pcre.get_substring_ofs $lid:substrings$ $int:string_of_int n$ >> 321 | 322 | 323 | 324 | let compile_regexp_general ~anchored ~mt _loc re_args re_source = 325 | let default_flags = <:expr< [`DOLLAR_ENDONLY] >> in 326 | let anchored_flags = <:expr< [`ANCHORED; `DOLLAR_ENDONLY] >> in 327 | let flags = if anchored then anchored_flags else default_flags in 328 | let compile_string e = 329 | <:expr< Pcre.regexp ~flags:$flags$ $e$ >> in 330 | match re_args with 331 | [] -> 332 | let re_string = Match.compute_re_string _loc re_source in 333 | compile_string re_string 334 | | _ -> 335 | let key = 336 | match re_args with 337 | [name, _] -> <:expr< $id: <:ident< $lid:name$ >> $ >> 338 | | _ -> 339 | let expr_list = 340 | List.map ( 341 | fun (name, _) -> 342 | <:expr< $id: <:ident< $lid:name$ >> $ >> 343 | ) re_args in 344 | let tup = comma_expr_of_list _loc expr_list in 345 | <:expr< ( $tup: tup$ ) >> in 346 | let compile = 347 | let re_string = Match.compute_re_string _loc re_source in 348 | compile_string re_string in 349 | 350 | let find = 351 | Match.protect mt 352 | <:expr< $uid: !mod_runtime$.Mem.find tbl key >> in 353 | let add = 354 | Match.protect mt 355 | <:expr< $uid: !mod_runtime$.Mem.unsafe_add tbl key data >> in 356 | 357 | let check_cache = 358 | <:expr< 359 | let key = $key$ in 360 | try $find$ 361 | with [ Not_found -> 362 | let data = $compile$ in 363 | do { $add$; 364 | data } ] >> in 365 | 366 | let get_regexp = 367 | List.fold_right 368 | (fun (argname, _) e -> <:expr< fun $lid:argname$ -> $e$ >>) 369 | re_args 370 | check_cache in 371 | 372 | let result = 373 | <:expr< 374 | let tbl = $uid: !mod_runtime$.Mem.create 100 in 375 | $get_regexp$ >> in 376 | 377 | if mt then <:expr< let mutex = Mutex.create () in $result$ >> 378 | else result 379 | 380 | 381 | let compile_regexp_match = compile_regexp_general ~anchored:true 382 | let compile_regexp_search = compile_regexp_general ~anchored:false 383 | 384 | let convert _loc conv e = 385 | match conv with 386 | None -> e 387 | | Some f -> 388 | match f with 389 | `Int -> <:expr< Stdlib.int_of_string $e$ >> 390 | | `Float -> <:expr< Stdlib.float_of_string $e$ >> 391 | | `Option -> <:expr< let s = $e$ in 392 | if s = "" then None 393 | else Some s >> 394 | | `Custom f -> <:expr< $f$ $e$ >> 395 | | `Value e' -> <:expr< do { ignore $e$; $e'$ } >> 396 | 397 | let insert_bindings_poly 398 | ?(skip_empty_captures = false) (* for compatibility with 399 | old versions of Pcre (before 2004-04-29) *) 400 | ?(get_fst = false) 401 | make_expr _loc substrings set e = 402 | Named_groups.fold 403 | (fun name l e -> 404 | match l with 405 | [] -> assert false 406 | | (_loc, _, _) :: _ -> 407 | let find_it = 408 | List.fold_right 409 | (fun (_loc, n, conv) accu -> 410 | let expr = make_expr _loc substrings n in 411 | match accu with 412 | None -> Some (convert _loc conv expr) 413 | | Some e -> 414 | let result = 415 | if skip_empty_captures then 416 | <:expr< 417 | try 418 | match $expr$ with 419 | [ "" -> raise Not_found 420 | | s -> $convert _loc conv <:expr< s >>$ ] 421 | with [ Not_found -> $e$ ] >> 422 | else 423 | <:expr< 424 | try $convert _loc conv expr$ 425 | with [ Not_found -> $e$ ] >> in 426 | Some result) 427 | l 428 | None in 429 | let result = 430 | match find_it with 431 | None -> assert false 432 | | Some e -> e in 433 | let patt = 434 | if get_fst then <:patt< ( $lid:name$, _ ) >> 435 | else <:patt< $lid:name$ >> in 436 | <:expr< let $patt$ = $result$ in $e$ >>) 437 | set 438 | e 439 | 440 | let insert_group_bindings = 441 | insert_bindings_poly matched_group 442 | let insert_position_bindings = 443 | insert_bindings_poly ~get_fst:true matched_position 444 | 445 | let insert_bindings _loc substrings (group_bindings, position_bindings) e = 446 | insert_group_bindings _loc substrings group_bindings 447 | (insert_position_bindings _loc substrings position_bindings e) 448 | 449 | 450 | let substrings_of_target target = 451 | match target with 452 | <:expr< $lid:s$ >> -> s ^ "_result" 453 | | _ -> assert false 454 | 455 | let match_and_bind _loc re_name get_re target named_groups success failure = 456 | let substrings = substrings_of_target target in 457 | string_match _loc re_name get_re target substrings 0 458 | (insert_bindings _loc substrings named_groups success) 459 | failure 460 | 461 | let macro_replace_generic 462 | f _loc (re_name : string) get_re target_name named_groups expr = 463 | let target = <:expr< $lid:target_name$ >> in 464 | let substrings = substrings_of_target target in 465 | <:expr< 466 | fun ?pos $lid:target_name$ -> 467 | Pcre.$lid: f$ 468 | ~rex:$get_re$ 469 | ?pos 470 | ~subst:(fun $lid:substrings$ -> 471 | $insert_bindings _loc substrings named_groups expr$) 472 | $target$ >> 473 | 474 | let macro_replace = macro_replace_generic "substitute_substrings" 475 | let macro_replace_first = macro_replace_generic "substitute_substrings_first" 476 | 477 | let macro_match ?(ignore_bindings = false) 478 | _loc re_name get_re target_name named_groups expr = 479 | let target = <:expr< $lid:target_name$ >> in 480 | let substrings = substrings_of_target target in 481 | let sv = shared_ovector re_name in 482 | let result = 483 | if ignore_bindings then expr 484 | else insert_bindings _loc substrings named_groups expr in 485 | <:expr< 486 | fun ?(share = False) ?pos $lid:target_name$ -> 487 | let $lid:substrings$ = 488 | if not share then 489 | Pcre.exec ~rex:$get_re$ ?pos $target$ 490 | else 491 | (Obj.magic 492 | ($target$, 493 | do { Pcre.unsafe_pcre_exec 494 | (Obj.magic 0 : Pcre.irflag) 495 | $get_re$ (match pos with [ None -> 0 | Some n -> n]) 496 | 0 $target$ 497 | $lid:sv$ None; 498 | $lid:sv$ }) : Pcre.substrings) in 499 | $result$ >> 500 | 501 | let macro_search_first = macro_match 502 | 503 | let macro_filter _loc re_name get_re target_name named_groups _ = 504 | let expr = <:expr< True >> in 505 | let e = 506 | macro_match ~ignore_bindings:true 507 | _loc re_name get_re target_name named_groups expr in 508 | <:expr< fun ?share ?pos x -> try $e$ ?share ?pos x 509 | with [ Not_found -> False ] >> 510 | 511 | let make_object _loc (group_bindings, position_bindings) = 512 | let all_ids = 513 | Named_groups.list_keys group_bindings @ 514 | Named_groups.list_keys position_bindings in 515 | let methods = 516 | List.fold_right 517 | (fun id accu -> <:class_str_item< method $id$ = $lid:id$ ; $accu$ >>) 518 | all_ids <:class_str_item< >>in 519 | <:expr< object $methods$ end >> 520 | 521 | let macro_capture _loc re_name get_re target_name named_groups _ = 522 | let expr = make_object _loc named_groups in 523 | let e = macro_match _loc re_name get_re target_name named_groups expr in 524 | <:expr< fun ?share ?pos x -> try Some ($e$ ?share ?pos x) 525 | with [ Not_found -> None ] >> 526 | 527 | let macro_function 528 | fun_name _loc (re_name : string) get_re target_name named_groups expr = 529 | let target = <:expr< $lid:target_name$ >> in 530 | let substrings = substrings_of_target target in 531 | <:expr< 532 | $uid: !mod_runtime$.$lid:fun_name$ 533 | $get_re$ 534 | (fun $lid:substrings$ -> 535 | $insert_bindings _loc substrings named_groups expr$) 536 | >> 537 | 538 | 539 | let macro_search = macro_function "search" 540 | let macro_map = macro_function "map" 541 | let macro_collect = macro_function "collect" 542 | 543 | let macro_collectobj _loc re_name get_re target_name named_groups _ = 544 | let e = make_object _loc named_groups in 545 | macro_function "collect" _loc re_name get_re target_name named_groups e 546 | 547 | let macro_split _loc re_name get_re target_name named_groups _ = 548 | <:expr< $uid: !mod_runtime$.split $get_re$ >> 549 | 550 | open Select_lib 551 | 552 | let lib = { predefined_regexps = special_regexps; 553 | unfold_range = false; 554 | process_regexp = process_regexp; 555 | compile_regexp_match = compile_regexp_match ~mt:false; 556 | compile_regexp_search = compile_regexp_search ~mt:false; 557 | match_and_bind = match_and_bind; 558 | wrap_match = (fun e -> e); 559 | wrap_user_case = (fun e -> e); 560 | really_wrap_match = false; 561 | really_wrap_user_case = false } 562 | 563 | let lib_mt = { lib with 564 | compile_regexp_match = compile_regexp_match ~mt:true; 565 | compile_regexp_search = compile_regexp_search ~mt:true } 566 | -------------------------------------------------------------------------------- /doc/mikmatch-manual.tex.mlx: -------------------------------------------------------------------------------- 1 | % -*- mode: latex -*- 2 | \documentclass[a4paper,12pt]{article} 3 | 4 | \usepackage{ae} 5 | \usepackage{hyperref} 6 | \usepackage{hevea} 7 | 8 | \usepackage{alltt} 9 | 10 | \usepackage[latin1]{inputenc} 11 | \usepackage[T1]{fontenc} 12 | \usepackage{fullpage} 13 | \usepackage{url} 14 | \usepackage{ocamldoc} 15 | 16 | \title{Mikmatch Version~## Sys.command "../VERSION" ##\\ 17 | Reference Manual} 18 | \author{Martin Jambon} 19 | \date{October 21, 2011} 20 | 21 | \setcounter{secnumdepth}{5} 22 | \setcounter{tocdepth}{4} 23 | 24 | \newcommand{\toplevelwarning}[0]{% 25 | {\footnotesize [toplevel support not available in Camlp4 3.10]}% 26 | } 27 | 28 | 29 | \begin{document} 30 | 31 | \maketitle 32 | 33 | This manual is available online as a single HTML file at\\ 34 | \url{http://mjambon.com/mikmatch-manual.html}\\ 35 | and as a PDF document at\\ 36 | \url{http://mjambon.com/mikmatch-manual.pdf}.\\ 37 | The home page of Mikmatch is:\\ 38 | \url{http://mjambon.com/micmatch.html} 39 | 40 | \tableofcontents 41 | 42 | \section{Introduction} 43 | 44 | Mikmatch is an extension of the syntax of the Objective Caml 45 | programming language (OCaml). 46 | Its purpose it to make the use of regular expressions easier and more 47 | generally to provide a set of tools for using OCaml as a 48 | powerful scripting language. 49 | Mikmatch believes that regular expressions are just like any other 50 | program and deserve better than a cryptic sequence of symbols placed 51 | in a string of a master program. 52 | 53 | Mikmatch currently supports two different libraries that implement 54 | regular expressions: Str which comes with the original distribution of 55 | OCaml and PCRE-OCaml which is an interface to PCRE (Perl Compatible 56 | Regular Expressions) for OCaml. 57 | These two flavors will be referred as Mikmatch\_str and 58 | Mikmatch\_pcre. 59 | They share a large number of syntaxic features, 60 | but Mikmatch\_pcre provides several macros that cannot be implemented 61 | safely in Mikmatch\_str. Therefore, it is recommended to use 62 | Mikmatch\_pcre. 63 | 64 | 65 | \section{Language} 66 | 67 | 68 | \subsection{Regular expressions} 69 | 70 | \subsubsection{Grammar of the regular expressions} 71 | 72 | Regular expressions support the syntax of Ocamllex regular 73 | expressions as of version 3.08.1 of the Objective Caml system 74 | (\url{http://caml.inria.fr/pub/docs/manual-ocaml/}), and 75 | several additional features. 76 | A regular expression (\textit{regexp}) is defined by the grammar that 77 | follows. The associativity rules are given by priority levels. 0 is the 78 | strongest priority. 79 | \begin{itemize} 80 | \item \underline{\textit{char-literal}} 81 | Match the given character (priority 0). 82 | 83 | \item \underline{\textbf{\_}} (underscore) 84 | Match any character (priority 0). 85 | 86 | \item \underline{\textit{string-literal}} 87 | Match the given sequence of characters (priority 0). 88 | 89 | \item \underline{\textbf{$[$}\textit{set-of-characters}\textbf{$]$}} 90 | Match one of the characters given by \textit{set-of-characters} 91 | (priority 0). 92 | The grammar for \textit{set-of-characters} is the following: 93 | \begin{itemize} 94 | \item \underline{\textit{char-literal}\textbf{$-$}\textit{char-literal}} 95 | defines a range of characters according to the iso-8859-1 encoding 96 | (includes ASCII). 97 | \item \underline{\textit{char-literal}} 98 | defines a singleton (a set containing just this character). 99 | \item \underline{\textit{string-literal}} 100 | defines a set that contains all the characters present in the given string. 101 | \item \underline{\textit{lowercase-identifier}} 102 | is replaced by the corresponding predefined regular expression; this 103 | regular expression must be exactly of length 1 and therefore 104 | represents a set of characters. 105 | \item \underline{\textit{set-of-characters} \textit{set-of-characters}} 106 | defines the union of two sets of characters. 107 | \end{itemize} 108 | 109 | \item \underline{\textit{regexp} \textbf{\#} \textit{regexp}} 110 | Match any of the characters given by the first regular expression 111 | except those which are given by the second one. Both regular 112 | expressions must be of length 1 and thus stand for a set of characters 113 | (priority 0). 114 | 115 | \item \underline{\textbf{$[$\^{}}\textit{set-of-characters}\textbf{$]$}} 116 | Same as \textbf{\_ \#} 117 | \textbf{$[$}\textit{set-of-characters}\textbf{$]$} 118 | (priority 0). 119 | 120 | 121 | \item \underline{\textit{regexp} \textbf{*}} 122 | Match the pattern given by \textit{regexp} 0 time or more (priority 0). 123 | 124 | \item \underline{\textit{regexp} \textbf{+}} 125 | Match the pattern given by \textit{regexp} 1 time or more (priority 0). 126 | 127 | \item \underline{\textit{regexp} \textbf{?}} 128 | Match the pattern given by \textit{regexp} at most once (priority 0). 129 | 130 | \item \underline{\textit{regexp}\textbf{\{}\textit{m\textbf{$-$}n}\textbf{\}}} 131 | Match \textit{regexp} at least \textit{m}~times and up to 132 | \textit{n}~times. \textit{m} and~\textit{n} must be integer 133 | literals (priority 0). 134 | 135 | \item \underline{\textit{regexp}\textbf{\{}\textit{n}\textbf{\}}} 136 | Same as \textit{regexp}\textbf{\{}\textit{n\textbf{$-$}n}\textbf{\}} 137 | (priority 0). 138 | 139 | \item \underline{\textit{regexp}\textbf{\{}\textit{n}\textbf{$+$\}}} 140 | Same as 141 | \textit{regexp}\textbf{\{}\textit{n}\textbf{\}}\textit{regexp}\textbf{$*$} 142 | (priority 0). 143 | 144 | \item \underline{\textit{regexp}\textbf{\{}\textit{n}\textbf{$-$\}}} 145 | Deprecated. Same as \textit{regexp}\textbf{\{}\textit{n}\textbf{$+$\}} 146 | (priority 0). 147 | 148 | \item \underline{\textbf{(} \textit{regexp} \textbf{)}} 149 | Match \textit{regexp} (priority 0). 150 | 151 | \item \underline{\textit{regexp} \textbf{\~{}}} 152 | Case insensitive match of the given 153 | regular expression \textit{regexp} according to the conventions of 154 | Objective Caml, i.e. according to the representation of characters 155 | in the iso-8859-1 standard (latin1) (priority 0). 156 | 157 | \item \underline{\textit{regexp} \textit{regexp}} 158 | Match the first regular expressions and then the second one (priority 1). 159 | 160 | \item \underline{\textit{regexp} \textbf{|} \textit{regexp}} 161 | Match one of these two regular expressions (priority 2). 162 | 163 | \item \underline{\textit{regexp} \textbf{as} \textit{lowercase-identifier}} 164 | Give a name to the substring that will be matched by the given pattern. 165 | This string becomes available under this name (priority 3). 166 | In-place conversions of the matched substring can be performed using 167 | one these three mechanisms: 168 | \begin{itemize} 169 | \item \underline{\textit{regexp} \textbf{as} 170 | \textit{lowercase-identifier} \textbf{:} \textit{built-in-converter}} 171 | where \textit{built-in-converter} is one of \texttt{int}, 172 | \texttt{float} or \texttt{option}. \texttt{int} behaves as 173 | \texttt{int\_of\_string}, \texttt{float} behaves as 174 | \texttt{float\_of\_string}, and \texttt{option} encapsulate the 175 | substring in an object of type \texttt{string option} using 176 | an equivalent of \texttt{function "" -> None | s -> Some s} 177 | 178 | \item \underline{\textit{regexp} \textbf{as} 179 | \textit{lowercase-identifier} \textbf{:=} \textit{converter}} 180 | where \textit{converter} is any function which converts a string 181 | into something else. 182 | 183 | \item \underline{\textit{regexp} \textbf{as} 184 | \textit{lowercase-identifier} \textbf{=} \textit{expr}} 185 | where \textit{expr} is any OCaml expression, usually a constant, which 186 | assigns a value to \textit{lowercase-identifier} without knowing 187 | which substring it matches. 188 | \end{itemize} 189 | 190 | \item \underline{\textbf{\%} \textit{lowercase-identifier}} 191 | Give a name to the position in the string 192 | that is being matched. 193 | This position becomes available as an int under this name. 194 | 195 | \item \underline{\textbf{@} \textit{expr}} 196 | Match the string given by \textit{expr}. \textit{expr} can be any 197 | OCaml expression of type string. Parentheses will be needed around 198 | \textit{expr} if it is a function application, or any construct of 199 | equivalent or lower precedence (see the Objective Caml manual, chapter 200 | ``The Objective Caml language'', section ``Expressions''). 201 | Matching such patterns is not thread-safe in any of the current 202 | implementations. 203 | Expressions that contain 204 | @ patterns should be protected against concurrent accesses. 205 | 206 | 207 | \end{itemize} 208 | 209 | 210 | \subsubsection{Named regular expressions} 211 | 212 | Naming regular expressions is possible using the following toplevel 213 | construct:\\ 214 | \underline{\textbf{RE} \textit{ident} \textbf{=} \textit{regexp}}\\ 215 | where \textit{ident} is a lowercase identifier. 216 | Regular expressions share their own namespace. 217 | 218 | For instance, we can define a phone number as a sequence of 3~digits 219 | followed by a dash and followed by 4~digits: 220 | \begin{verbatim} 221 | RE digit = ['0'-'9'] 222 | RE phone = digit{3} '-' digit{4} 223 | \end{verbatim} 224 | 225 | 226 | \subsubsection{Predefined sets of characters} 227 | 228 | The POSIX character classes (sets of characters) are available as 229 | predefined regular expressions of length 1. 230 | Their definition is given in table~\ref{posix-classes}. 231 | \begin{table} 232 | \caption{\label{posix-classes} 233 | POSIX character classes and their 234 | definition in the Mikmatch syntax} 235 | \tt 236 | \begin{tabular}{l} 237 | RE lower = \verb!['a'-'z']!\\ 238 | RE upper = \verb!['A'-'Z']!\\ 239 | RE alpha = lower | upper\\ 240 | RE digit = \verb!['0'-'9']!\\ 241 | RE alnum = alpha | digit\\ 242 | RE punct = \verb=["!\"#$%&'()*+,-./:;<=\verb!=>?@[\\]^_`{|}~"]! \\ %$ 243 | RE graph = alnum | punct\\ 244 | RE print = graph | ' '\\ 245 | RE blank = \verb!' ' | '\t'!\\ 246 | RE cntrl = \verb!['\x00'-'\x1F' '\x7F']!\\ 247 | RE xdigit = \verb![digit 'a'-'f' 'A'-'F']!\\ 248 | RE space = \verb![blank "\n\x0B\x0C\r"]!\\ 249 | \end{tabular} 250 | \end{table} 251 | 252 | \subsubsection{More predefined patterns} 253 | 254 | Some named regexps are predefined and available in 255 | every implementation of Mikmatch. These are the following: 256 | \begin{itemize} 257 | \item \texttt{int}: matches an integer (see 258 | table~\ref{predefined-regexps}). 259 | It accepts a superset of the integer literals that are 260 | produced with the OCaml standard function \texttt{string\_of\_int}. 261 | \item \texttt{float}: matches a floating-point number 262 | (see table~\ref{predefined-regexps}). 263 | It accepts a superset of the float literals that are 264 | produced with the OCaml standard function \texttt{string\_of\_float}. 265 | \end{itemize} 266 | 267 | \begin{table} 268 | \caption{\label{predefined-regexps} 269 | Predefined regexps in Mikmatch} 270 | \tt 271 | \begin{verbatim} 272 | RE int = ["-+"]? ( "0" ( ["xX"] xdigit+ 273 | | ["oO"] ['0'-'7']+ 274 | | ["bB"] ["01"]+ ) 275 | | digit+ ) 276 | 277 | RE float = 278 | ["-+"]? 279 | ( ( digit+ ("." digit* )? | "." digit+ ) (["eE"] ["+-"]? digit+ )? 280 | | "nan"~ 281 | | "inf"~ ) 282 | \end{verbatim} 283 | \end{table} 284 | 285 | 286 | \subsection{General pattern matching} 287 | 288 | \subsubsection{Regexps and match/function/try constructs} 289 | 290 | In Mikmatch, regular expressions can be used to match strings instead 291 | of the regular patterns. In this case, the regular expression must 292 | be preceded by the \textbf{RE} keyword, or placed between slashes 293 | (\textbf{/}\dots\textbf{/}). Both notations are equivalent. 294 | 295 | Only the following constructs support patterns that contain regular 296 | expressions: 297 | \begin{itemize} 298 | \item 299 | \textbf{match} \dots\ \textbf{with} \textit{pattern} \textbf{->} \dots 300 | \item \textbf{function} \textit{pattern} \textbf{->} \dots 301 | \item \textbf{try} \dots\ \textbf{with} \textit{pattern} \textbf{->} \dots 302 | \end{itemize} 303 | 304 | Examples: 305 | \begin{verbatim} 306 | let is_num = function RE ['0'-'9']+ -> true | _ -> false 307 | 308 | let get_option () = 309 | match Sys.argv with 310 | [| _ |] -> None 311 | | [| _; RE (['a'-'z']+ as key) "=" (_* as data) |] -> Some (key, data) 312 | | _ -> failwith "Usage: myprog [key=value]" 313 | 314 | let option = 315 | try get_option () 316 | with Failure RE "usage"~ -> None 317 | \end{verbatim} 318 | 319 | If alternatives are used in a pattern, then both alternatives must 320 | define the same set of identifiers. 321 | In the following example, the string \texttt{code} can either come 322 | from the normal pattern matching or be a fresh substring which was 323 | extracted using the regular expression: 324 | \begin{verbatim} 325 | match option, s with 326 | Some code, _ 327 | | None, RE _* "=" (['A'-'Z']['0'-'9'] as code) -> print_endline code 328 | 329 | | _ -> () 330 | \end{verbatim} 331 | 332 | In the general case, it is not possible to check in advance if the 333 | pattern-matching cases are complete if at least one of the patterns 334 | is a regular expression. In this case, no warnings against missing 335 | cases are displayed, thus it is safer to either add a catch-all case 336 | like in the previous examples or to catch the \texttt{Match\_failure} 337 | exception that can be raised unexpectedly. 338 | 339 | \subsubsection{Views (experimental feature)} 340 | 341 | Views are a general form of symbolic patterns other than those 342 | authorized by the concrete structure of data. For example, \texttt{Positive} 343 | could be a view for positive ints. View patterns can also bind 344 | variables and a useful example in OCaml is pattern-matching over lazy 345 | values. 346 | 347 | Here we propose simple views, as suggested by Simon Peyton Jones 348 | for Haskell:\\ 349 | \url{http://hackage.haskell.org/trac/ghc/wiki/ViewPatterns}. 350 | We propose a different syntax, but note that the syntax that we 351 | have chosen here is 352 | experimental and may change slightly in future releases. 353 | 354 | \paragraph{View patterns} 355 | 356 | A view pattern has one of these two forms: 357 | \begin{enumerate} 358 | \item \underline{\textbf{\%} \textit{view-name}}: a view without an 359 | argument. It is a simple check over the subject data. 360 | \item \underline{\textbf{\%} \textit{view-name} 361 | \textit{pattern}}: a view with an argument, the pattern. 362 | If the view function matches successfully, its 363 | result is matched against the given pattern. 364 | \end{enumerate} 365 | where a \textit{view-name} is a capitalized alphanumeric identifier, 366 | possibly preceded by a module path specification, 367 | e.g. \texttt{Name} or \texttt{Module.Name}. 368 | 369 | \paragraph{Definition of a view} 370 | 371 | Views without arguments are defined as functions of type 372 | \texttt{'a -> bool}, while views with arguments are defined as 373 | functions of type \texttt{'a -> 'b option}. 374 | 375 | The syntax for defining a view is: 376 | \begin{itemize} 377 | \item \underline{\textbf{let} \textbf{view} \textit{uppercase-identifier} 378 | \textbf{=} \textit{expression}} 379 | \item \underline{\textbf{let} \textbf{view} \textit{uppercase-identifier} 380 | \textbf{=} \textit{expression} \textbf{in} \textit{expression}} 381 | \end{itemize} 382 | 383 | Using the syntax above is however not strictly needed, since it just 384 | defines a function named after the name of the view, and prefixed by 385 | \texttt{view\_}. For instance \texttt{let view X = f} can be written 386 | as \texttt{let view\_X = f} in regular OCaml. Therefore, some library 387 | modules can export view definitions without using any syntax 388 | extension themselves. 389 | 390 | 391 | \paragraph{Example} 392 | 393 | \begin{verbatim} 394 | (* The type of lazy lists *) 395 | type 'a lazy_list = Nil | Cons of ('a * 'a lazy_list lazy_t) 396 | 397 | (* Definition of a view without argument for the empty list *) 398 | let view Nil = 399 | fun l -> 400 | try Lazy.force l = Nil 401 | with _ -> false 402 | 403 | (* Independent definition of a view with an argument, 404 | the head and tail of the list *) 405 | let view Cons = 406 | fun l -> 407 | try 408 | match Lazy.force l with 409 | Cons x -> Some x 410 | | Nil -> None 411 | with _ -> None 412 | 413 | 414 | (* Test *) 415 | let _ = 416 | let l = lazy (Cons (1, lazy (Cons (2, lazy Nil)))) in 417 | match l with 418 | %Nil 419 | | %Cons (_, %Nil) -> assert false 420 | | %Cons (x1, %Cons (x2, %Nil)) -> 421 | assert (x1 = 1); 422 | assert (x2 = 2); 423 | Printf.printf "Passed view test\n%!" 424 | | _ -> assert false 425 | \end{verbatim} 426 | 427 | \paragraph{Limitations} 428 | 429 | Each time a value is tested against a view pattern, the corresponding 430 | function is called. There is no optimization that would avoid calling 431 | the view function twice on the same argument. 432 | 433 | Redundant or missing cases cannot be checked, just like when there is 434 | a regexp in a pattern. This is due both to our definition of views and 435 | to the implementation that we get using Camlp4. 436 | 437 | 438 | \subsection{Shortcut for one-case regexp matching} 439 | 440 | A shortcut notation can be used to extract substrings from a string 441 | that match a pattern which is known in advance:\\ 442 | \textbf{let /}\textit{regexp}\textbf{/} \textbf{=} \textit{expr} 443 | \textbf{in} \textit{expr} 444 | 445 | Global declarations also support this shortcut:\\ 446 | \textbf{let /}\textit{regexp}\textbf{/} \textbf{=} \textit{expr}\\ 447 | Example \toplevelwarning: 448 | \begin{verbatim} 449 | # Sys.ocaml_version;; 450 | - : string = "3.08.3" 451 | # RE int = digit+;; 452 | # let /(int as major : int) "." (int as minor : int) 453 | ("." (int as patchlevel) | ("" as patchlevel)) 454 | ("+" (_* as additional_info) | ("" as additional_info))/ = 455 | Sys.ocaml_version 456 | ;; 457 | val additional_info : string = "" 458 | val major : int = 3 459 | val minor : int = 8 460 | val patchlevel : string = "3" 461 | \end{verbatim} 462 | 463 | The notation does not allow simultaneous definitions using the 464 | \textbf{and} keyword nor recursive definitions using \textbf{rec}. 465 | 466 | As usual, the \texttt{Match\_failure} exception is raised if the 467 | string fails to match the pattern. 468 | The let-try-in-with construct described in the next section also 469 | supports regexp patterns, with the same restrictions. 470 | 471 | 472 | \subsection{The let-try-in-with construct} 473 | 474 | A general notation for catching exceptions that are raised during 475 | the definition of bindings is provided:\\ 476 | \textbf{let} \textbf{try} [\textbf{rec}] \textit{let-binding} 477 | \{\textbf{and} \textit{let-binding}\} \textbf{in}\\ 478 | \verb! !\textit{expr}\\ 479 | \textbf{with} \textit{pattern-matching} 480 | 481 | It has the same meaning as:\\ 482 | \textbf{try} \textbf{let} [\textbf{rec}] \textit{let-binding} 483 | \{\textbf{and} \textit{let-binding}\} \textbf{in}\\ 484 | \verb! !\textit{expr}\\ 485 | \textbf{with} \textit{pattern-matching}\\ 486 | except that in the former case only the exceptions raised by the 487 | \textit{let-binding}s are handled by the exception handler introduced 488 | by \textbf{with}. 489 | 490 | 491 | \subsection{Implementation-dependent features} 492 | 493 | These features depend on which library is actually used internally for 494 | manipulating regular expressions. 495 | Currently two libraries are supported: the Str library from the 496 | official OCaml distribution and the PCRE-OCaml library. 497 | Support for other libraries might be added in the future. 498 | 499 | \subsubsection{Backreferences} 500 | \label{backref} 501 | Previously matched substrings can be matched again using 502 | backreferences. 503 | \underline{\textbf{!}\textit{ident}} 504 | is a backreference to the named group \textit{ident} that is defined 505 | previously in the sequence. 506 | During the matching process, it is not possible that a backreference 507 | refers to a named group which is not matched. 508 | In the following example, we 509 | extract the repeated pattern \texttt{abc} from \texttt{abcabc} 510 | \toplevelwarning: 511 | \begin{verbatim} 512 | # match "abcabc" with RE _* as x !x -> x;; 513 | - : string = "abc" 514 | \end{verbatim} 515 | 516 | 517 | \subsubsection{Specificities of Mikmatch\_str} 518 | 519 | Backreferences as described previously (section~\ref{backref}) are 520 | supported. 521 | 522 | In addition to the POSIX character classes, 523 | a set of predefined patterns is available: 524 | \begin{itemize} 525 | \item \underline{bol} matches at beginning of line (either at the 526 | beginning of the matched string, or just after a newline character). 527 | \item \underline{eol} matches at end of line (either at the end of the matched 528 | string, or just before a newline character). 529 | \item \underline{any} matches any character except newline. 530 | \item \underline{bnd} matches word boundaries. 531 | \end{itemize} 532 | 533 | \subsubsection{Specificities of Mikmatch\_pcre} 534 | 535 | This is currently the version which is used by the 536 | \verb$mikmatch$ command. 537 | 538 | \paragraph{Matching order} 539 | 540 | Alternatives (\textit{regexp1}|\textit{regexp2}) 541 | are tried from left to right. 542 | 543 | The quantifiers (\verb$*$, \verb$+$, \verb$?$ and 544 | \verb${$\dots\verb$}$) 545 | are greedy except if specified otherwise (see next paragraph). 546 | The regular expressions are matched from left to right, and the 547 | repeated patterns are matched as many times as possible before trying 548 | to match the rest of the regular expression and either succeed or give 549 | up one repetition before retrying (backtracking). 550 | 551 | \paragraph{Greediness and laziness} 552 | Normally, quantifiers (\verb$*$, \verb$+$, \verb$?$ and 553 | \verb${$\dots\verb$}$) are greedy, i.e. they perform the longest match 554 | in terms of number of repetitions 555 | before matching the rest of the regular expression or 556 | backtracking. The opposite behavior is laziness: in that case, 557 | the number of repetitions is made minimal before trying to match the 558 | rest of the regular expression and either succeed or continue with one 559 | more repetition. 560 | 561 | The lazy behavior is turned on by placing the keyword \verb$Lazy$ 562 | after the quantifier. This is the equivalent of Perl's quantifiers 563 | \verb$*?$, \verb$+?$, \verb$??$ and \verb${$\dots\verb$}?$. 564 | For instance, compare the following behaviors \toplevelwarning: 565 | \begin{verbatim} 566 | # match "" with RE "<" (_* as contents) ">" -> contents;; 567 | - : string = "hello>" with RE "<" (_* Lazy as contents) ">" -> contents;; 569 | - : string = "hello" 570 | \end{verbatim} 571 | 572 | \paragraph{Possessiveness or atomic grouping} 573 | Sometimes it can be useful to prevent backtracking. 574 | This is achieved by placing the \verb$Possessive$ keyword after a given group. 575 | For instance, compare the following \toplevelwarning: 576 | \begin{verbatim} 577 | # match "abc" with RE _* _ -> true | _ -> false;; 578 | - : bool = true 579 | # match "abc" with RE _* Possessive _ -> true | _ -> false;; 580 | - : bool = false 581 | \end{verbatim} 582 | This operator has the strongest associativity priority (0), just like 583 | the quantifiers. 584 | 585 | \paragraph{Backreferences} 586 | Backreferences are supported as described in section~\ref{backref}. 587 | 588 | \paragraph{Predefined patterns} 589 | The following predefined patterns are available in addition to the 590 | POSIX character classes: 591 | \begin{itemize} 592 | \item \underline{bos} matches at beginning of the matched string. 593 | \item \underline{eos} matches at the end of the matched string. 594 | \item \underline{bol} matches at beginning of line (either at the 595 | beginning of the matched string, or just after a newline character). 596 | \item \underline{eol} matches at end of line (either at the end of the matched 597 | string, or just before a newline character). 598 | \item \underline{any} matches any character except newline. 599 | \end{itemize} 600 | 601 | 602 | \paragraph{Lookaround assertions} 603 | 604 | A lookaround assertion is a pattern that 605 | has to be matched but doesn't consume characters in the string being 606 | matched. 607 | 608 | Lookahead assertions are checked after the current position in the 609 | string, and lookbehind assertions are matched before the current 610 | point. The general syntax for an assertion is the following: 611 | \\ 612 | \fbox{\texttt{<} \textit{lookbehind} 613 | \texttt{.} \textit{lookahead} \texttt{>}}\\ 614 | \fbox{\texttt{<} \textit{lookahead} \texttt{>}}\\ 615 | The central dot symbolizes the current position. The 616 | \textit{lookbehind} assertion is a test over the characters at the 617 | left of the current point, while the \textit{lookahead} is a test over 618 | the characters at the right of the current point in the string. 619 | 620 | \textit{lookbehind} or \textit{lookahead} are either empty or a 621 | regular expression, optionally preceded by \texttt{Not}. 622 | An assertion starting with \texttt{Not} is called negative and means 623 | that the given regular expression can not match here. 624 | 625 | There are no restrictions on the contents of lookahead regular 626 | expressions. Lookbehind regular expressions are restricted 627 | to those that match substrings of length that can be predetermined. 628 | Besides this, backreferences are not supported in lookbehind expressions. 629 | 630 | 631 | 632 | \paragraph{Macros} 633 | This implementation provides a set of macros that follow this 634 | syntax:\\ 635 | \fbox{\textit{MACRO-NAME} \textit{regexp} \texttt{->} \textit{expr}}\\ 636 | where \textit{expr} is the expression that will be computed every time 637 | the pattern given by \textit{regexp} is matched. 638 | 639 | Only the \verb$SPLIT$ and \verb$FILTER$ macros follows a simplified syntax:\\ 640 | \fbox{\textit{MACRO-NAME} \textit{regexp}} 641 | 642 | These constructs build a function which accepts some optional 643 | arguments and the string to match. 644 | For instance, \\ 645 | \verb$(REPLACE "," -> ";") "a,b,c"$\\ 646 | returns \verb$"a;b;c"$ 647 | whereas\\ 648 | \verb$(REPLACE "," -> ";") ~pos:2 "a,b,c"$\\ 649 | returns \verb$"a,b;c"$ 650 | 651 | The possible options are the following: 652 | \begin{itemize} 653 | \item \verb$pos$ has type \verb$int$ and indicates that matching or 654 | searching must start from this position in the string. 655 | Its default value is always 0 (beginning of the string). 656 | \item \verb$full$ is a boolean that defines whether split operations 657 | must ignore empty fragments before the first matched pattern or the 658 | last matched pattern in the string. The default value is \verb$true$ 659 | for \verb$MAP$ and \verb$false$ for \verb$SPLIT$. 660 | \item \verb$share$ is a potentially unsafe option which allows the 661 | reuse of some mutable data which are associated to a given regular 662 | expression. This may make the program slightly faster, but should 663 | generally not be used in multi-threaded programs or in libraries. 664 | \end{itemize} 665 | 666 | \fbox{\texttt{MATCH} \textit{regexp} \texttt{->} \textit{expr}}\\ 667 | tries to match the pattern \textit{regexp} at the beginning of the 668 | string or at the given position \verb$pos$ and returns \textit{expr} 669 | or raise \verb$Not_found$. Options: \verb$pos$ (0), \verb$share$ (false). 670 | When \verb$pos$ and \verb$share$ are not specified, it is equivalent to: 671 | \begin{alltt} 672 | function 673 | RE \textit{regexp} -> \textit{expr} 674 | | _ -> raise Not_found 675 | \end{alltt} 676 | 677 | \fbox{\texttt{REPLACE} \textit{regexp} \texttt{->} \textit{expr}}\\ 678 | returns a string in which every occurrence of the pattern is 679 | replaced by \textit{expr}. Options: \verb$pos$ (0). 680 | 681 | \fbox{\texttt{REPLACE\_FIRST} \textit{regexp} \texttt{->} \textit{expr}}\\ 682 | returns a string in which the first occurrence of the pattern is 683 | replaced by \textit{expr}. A copy of the input string is returned if 684 | the pattern is not found. 685 | Options: \verb$pos$ (0). 686 | 687 | \fbox{\texttt{SEARCH} \textit{regexp} \texttt{->} \textit{expr}}\\ 688 | simply evaluates \textit{expr} every time the pattern is 689 | matched. Options: \verb$pos$ (0). 690 | 691 | \fbox{\texttt{SEARCH\_FIRST} \textit{regexp} \texttt{->} \textit{expr}}\\ 692 | simply evaluates \textit{expr} the first time the pattern is 693 | matched and returns the result. Exception \texttt{Not\_Found} is 694 | raised if the pattern is not matched. 695 | Options: \verb$pos$ (0), \verb$share$ (false). 696 | 697 | \fbox{\texttt{COLLECT} \textit{regexp} \texttt{->} \textit{expr}}\\ 698 | evaluates \textit{expr} every time the pattern is 699 | matched and puts the result into a list. Options: \verb$pos$ (0). 700 | 701 | \fbox{\texttt{COLLECTOBJ} \textit{regexp}}\\ 702 | like \texttt{COLLECT}, but the elements of the returned list are automatically objects 703 | with methods that correspond to the subgroups captured with 704 | \texttt{as}. 705 | Options: \verb$pos$ (0). 706 | 707 | \fbox{\texttt{SPLIT} \textit{regexp}}\\ 708 | splits the given string using \textit{regexp} as a delimiter. 709 | Options: \verb$pos$ (0), \verb$full$ (false). 710 | 711 | \fbox{\texttt{FILTER} \textit{regexp}}\\ 712 | creates a predicate that returns true is the given string matches 713 | \textit{regexp} or false otherwise. 714 | Options: \verb$pos$ (0), \verb$share$ (false). 715 | 716 | \fbox{\texttt{CAPTURE} \textit{regexp}}\\ 717 | returns \texttt{Some o} where \texttt{o} is an object with methods 718 | that correspond to the captured subgroups, or 719 | \texttt{None} if the subject string doesn't match \textit{regexp}. 720 | Options: \verb$pos$ (0), \verb$share$ (false). 721 | 722 | \fbox{\texttt{MAP} \textit{regexp} \texttt{->} \textit{expr}}\\ 723 | splits the given string into fragments: the fragments that do not match the 724 | pattern are returned as \texttt{`Text s} where s is a 725 | string. Fragments that match the pattern are replaced by 726 | the result of \textit{expr}, which has to be a polymorphic variant. 727 | Options: \verb$pos$ (0), \verb$full$ (true). 728 | For instance, \\ 729 | \verb$(MAP ',' -> `Sep) "a,b,c,"$\\ 730 | returns the list\\ 731 | \verb$[`Text "a"; `Sep; `Text "b"; `Sep; `Text "c"; `Sep; `Text ""]$\\ 732 | whereas \\ 733 | \verb$(MAP ',' -> `Sep) ~full:false "a,b,c,"$\\ 734 | returns only\\ 735 | \verb$[`Text "a"; `Sep; `Text "b"; `Sep; `Text "c"; `Sep]$ 736 | 737 | 738 | \section{Tools} 739 | 740 | \subsection{Micmatch, Mikmatch, old Camlp4, new Camlp4, Camlp5} 741 | 742 | Camlp4/Camlp5 is the set of tools that allows to build and use 743 | syntax extensions of OCaml. We distinguish 3 major variants of Camlp4: 744 | \begin{itemize} 745 | \item The ``old Camlp4'' is Camlp4 as distributed with OCaml until 746 | version 3.09.3. 747 | \item Camlp5 is an independent branch of the old Camlp4, compatible 748 | with at least the 3.09 and 3.10 release lines of OCaml. It is close 749 | to 100\% compatible with the old Camlp4. 750 | \item The new Camlp4 or just Camlp4 shares the same goals as the 751 | old Camlp4 and Camlp5, but is largely incompatible with them. 752 | It is included in the core OCaml distribution starting from OCaml 3.10 753 | and replaces the old Camlp4. 754 | \end{itemize} 755 | 756 | Micmatch is the name of the original implementation of Mikmatch for the 757 | old Camlp4: 758 | \begin{itemize} 759 | \item Micmatch < 1.0 requires the old Camlp4. 760 | \item Micmatch $\geq$ 1.0 requires Camlp5. 761 | \item Mikmatch requires the new Camlp4. 762 | \end{itemize} 763 | 764 | 765 | 766 | \subsection{The toplevel} 767 | 768 | \toplevelwarning 769 | 770 | 771 | \subsection{The libraries for the preprocessor} 772 | 773 | \subsubsection{Mikmatch\_str} 774 | The preprocessing library \texttt{pa\_mikmatch\_str.cma} must be loaded by 775 | the preprocessor (\texttt{camlp4o} or \texttt{camlp4r}). 776 | 777 | It is safe to use Mikmatch\_str in 778 | multithreaded programs without locks only if the patterns do not contain 779 | the @ keyword because it uses a shared cache of compiled regexps. 780 | 781 | \subsubsection{Mikmatch\_pcre} 782 | The preprocessing library \texttt{pa\_mikmatch\_pcre.cma} must be loaded by 783 | the preprocessor (\texttt{camlp4o} or \texttt{camlp4r}). 784 | 785 | It is safe to use Mikmatch\_str in 786 | multithreaded programs without locks only if the patterns do not contain 787 | the @ keyword because it uses a shared cache of compiled regexps. 788 | 789 | 790 | \subsection{The runtime libraries} 791 | 792 | Both variants depend on portable features of the Unix library. 793 | The executables must therefore be linked against \texttt{unix.cma} 794 | (bytecode) or \texttt{unix.cmxa} (native code) in addition to 795 | the specific libraries mentioned below. 796 | 797 | \subsubsection{Mikmatch\_str} 798 | 799 | In addition to the backend for the regular expressions engine 800 | (\texttt{str.cma} for bytecode or \texttt{str.cmxa} for native code), 801 | the OCaml code which is produced by the preprocessor needs to be 802 | linked against either \texttt{run\_mikmatch\_str.cma} (bytecode), 803 | \texttt{run\_mikmatch\_str.cmxa} (native code), 804 | \texttt{run\_mikmatch\_str\_mt.cma} (bytecode, threads) 805 | or \texttt{run\_mikmatch\_str\_mt.cmxa} (native code, threads). 806 | 807 | 808 | \subsubsection{Mikmatch\_pcre} 809 | 810 | In addition to the backend for the regular expressions engine 811 | (\texttt{pcre.cma} for bytecode or \texttt{pcre.cmxa} for native code), 812 | the OCaml code which is produced by the preprocessor needs to be 813 | linked against either \texttt{run\_mikmatch\_pcre.cma} (bytecode), 814 | \texttt{run\_mikmatch\_pcre.cmxa} (native code). 815 | Multithreaded programs are supported as well 816 | and do not require a specific library. 817 | 818 | \begin{latexonly} 819 | \input{mikmatch-ocamldoc.tex} 820 | \end{latexonly} 821 | 822 | \begin{htmlonly} 823 | \section{A small text-oriented library} 824 | \begin{rawhtml} 825 | Module Mikmatch 826 | \end{rawhtml} 827 | \end{htmlonly} 828 | 829 | 830 | \end{document} 831 | -------------------------------------------------------------------------------- /OCamlMakefile: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # OCamlMakefile 3 | # Copyright (C) 1999-2004 Markus Mottl 4 | # 5 | # For updates see: 6 | # http://www.oefai.at/~markus/ocaml_sources 7 | # 8 | # $Id$ 9 | # 10 | ########################################################################### 11 | 12 | # Modified by damien for .glade.ml compilation 13 | 14 | # Set these variables to the names of the sources to be processed and 15 | # the result variable. Order matters during linkage! 16 | 17 | ifndef SOURCES 18 | SOURCES := foo.ml 19 | endif 20 | export SOURCES 21 | 22 | ifndef RES_CLIB_SUF 23 | RES_CLIB_SUF := _stubs 24 | endif 25 | export RES_CLIB_SUF 26 | 27 | ifndef RESULT 28 | RESULT := foo 29 | endif 30 | export RESULT 31 | 32 | export LIB_PACK_NAME 33 | 34 | ifndef DOC_FILES 35 | DOC_FILES := $(filter %.mli, $(SOURCES)) 36 | endif 37 | export DOC_FILES 38 | 39 | export BCSUFFIX 40 | export NCSUFFIX 41 | 42 | ifndef TOPSUFFIX 43 | TOPSUFFIX := .top 44 | endif 45 | export TOPSUFFIX 46 | 47 | # Eventually set include- and library-paths, libraries to link, 48 | # additional compilation-, link- and ocamlyacc-flags 49 | # Path- and library information needs not be written with "-I" and such... 50 | # Define THREADS if you need it, otherwise leave it unset (same for 51 | # USE_CAMLP4)! 52 | 53 | export THREADS 54 | export VMTHREADS 55 | export ANNOTATE 56 | export USE_CAMLP4 57 | 58 | export INCDIRS 59 | export LIBDIRS 60 | export EXTLIBDIRS 61 | export RESULTDEPS 62 | export OCAML_DEFAULT_DIRS 63 | 64 | export LIBS 65 | export CLIBS 66 | 67 | export OCAMLFLAGS 68 | export OCAMLNCFLAGS 69 | export OCAMLBCFLAGS 70 | 71 | export OCAMLLDFLAGS 72 | export OCAMLNLDFLAGS 73 | export OCAMLBLDFLAGS 74 | 75 | ifndef OCAMLCPFLAGS 76 | OCAMLCPFLAGS := a 77 | endif 78 | 79 | export OCAMLCPFLAGS 80 | 81 | export PPFLAGS 82 | 83 | export YFLAGS 84 | export IDLFLAGS 85 | 86 | export OCAMLDOCFLAGS 87 | 88 | export OCAMLFIND_INSTFLAGS 89 | 90 | export DVIPSFLAGS 91 | 92 | export STATIC 93 | 94 | # Add a list of optional trash files that should be deleted by "make clean" 95 | export TRASH 96 | 97 | #################### variables depending on your OCaml-installation 98 | 99 | ifdef MINGW 100 | export MINGW 101 | WIN32 := 1 102 | CFLAGS_WIN32 := -mno-cygwin 103 | endif 104 | ifdef MSVC 105 | export MSVC 106 | WIN32 := 1 107 | ifndef STATIC 108 | CFLAGS_WIN32 := -DCAML_DLL 109 | endif 110 | CFLAGS_WIN32 += -nologo 111 | EXT_OBJ := obj 112 | EXT_LIB := lib 113 | ifeq ($(CC),gcc) 114 | # work around GNU Make default value 115 | ifdef THREADS 116 | CC := cl -MT 117 | else 118 | CC := cl 119 | endif 120 | endif 121 | ifeq ($(CXX),g++) 122 | # work around GNU Make default value 123 | CXX := $(CC) 124 | endif 125 | CFLAG_O := -Fo 126 | endif 127 | ifdef WIN32 128 | EXT_CXX := cpp 129 | EXE := .exe 130 | endif 131 | 132 | ifndef EXT_OBJ 133 | EXT_OBJ := o 134 | endif 135 | ifndef EXT_LIB 136 | EXT_LIB := a 137 | endif 138 | ifndef EXT_CXX 139 | EXT_CXX := cc 140 | endif 141 | ifndef EXE 142 | EXE := # empty 143 | endif 144 | ifndef CFLAG_O 145 | CFLAG_O := -o # do not delete this comment (preserves trailing whitespace)! 146 | endif 147 | 148 | export CC 149 | export CXX 150 | export CFLAGS 151 | export CXXFLAGS 152 | export LDFLAGS 153 | 154 | ifndef RPATH_FLAG 155 | RPATH_FLAG := -R 156 | endif 157 | export RPATH_FLAG 158 | 159 | ifndef MSVC 160 | ifndef PIC_FLAGS 161 | PIC_FLAGS := -fPIC -DPIC 162 | endif 163 | endif 164 | 165 | export PIC_FLAGS 166 | 167 | BCRESULT := $(addsuffix $(BCSUFFIX), $(RESULT)) 168 | NCRESULT := $(addsuffix $(NCSUFFIX), $(RESULT)) 169 | TOPRESULT := $(addsuffix $(TOPSUFFIX), $(RESULT)) 170 | 171 | ifndef OCAMLFIND 172 | OCAMLFIND := ocamlfind 173 | endif 174 | export OCAMLFIND 175 | 176 | ifndef OCAMLC 177 | OCAMLC := ocamlc 178 | endif 179 | export OCAMLC 180 | 181 | ifndef OCAMLOPT 182 | OCAMLOPT := ocamlopt 183 | endif 184 | export OCAMLOPT 185 | 186 | ifndef OCAMLMKTOP 187 | OCAMLMKTOP := ocamlmktop 188 | endif 189 | export OCAMLMKTOP 190 | 191 | ifndef OCAMLCP 192 | OCAMLCP := ocamlcp 193 | endif 194 | export OCAMLCP 195 | 196 | ifndef OCAMLDEP 197 | OCAMLDEP := ocamldep 198 | endif 199 | export OCAMLDEP 200 | 201 | ifndef OCAMLLEX 202 | OCAMLLEX := ocamllex 203 | endif 204 | export OCAMLLEX 205 | 206 | ifndef OCAMLYACC 207 | OCAMLYACC := ocamlyacc 208 | endif 209 | export OCAMLYACC 210 | 211 | ifndef OCAMLMKLIB 212 | OCAMLMKLIB := ocamlmklib 213 | endif 214 | export OCAMLMKLIB 215 | 216 | ifndef OCAML_GLADECC 217 | OCAML_GLADECC := lablgladecc2 218 | endif 219 | export OCAML_GLADECC 220 | 221 | ifndef OCAML_GLADECC_FLAGS 222 | OCAML_GLADECC_FLAGS := 223 | endif 224 | export OCAML_GLADECC_FLAGS 225 | 226 | ifndef CAMELEON_REPORT 227 | CAMELEON_REPORT := report 228 | endif 229 | export CAMELEON_REPORT 230 | 231 | ifndef CAMELEON_REPORT_FLAGS 232 | CAMELEON_REPORT_FLAGS := 233 | endif 234 | export CAMELEON_REPORT_FLAGS 235 | 236 | ifndef CAMELEON_ZOGGY 237 | CAMELEON_ZOGGY := camlp4o pa_zog.cma pr_o.cmo 238 | endif 239 | export CAMELEON_ZOGGY 240 | 241 | ifndef CAMELEON_ZOGGY_FLAGS 242 | CAMELEON_ZOGGY_FLAGS := 243 | endif 244 | export CAMELEON_ZOGGY_FLAGS 245 | 246 | ifndef OXRIDL 247 | OXRIDL := oxridl 248 | endif 249 | export OXRIDL 250 | 251 | ifndef CAMLIDL 252 | CAMLIDL := camlidl 253 | endif 254 | export CAMLIDL 255 | 256 | ifndef CAMLIDLDLL 257 | CAMLIDLDLL := camlidldll 258 | endif 259 | export CAMLIDLDLL 260 | 261 | ifndef NOIDLHEADER 262 | MAYBE_IDL_HEADER := -header 263 | endif 264 | export NOIDLHEADER 265 | 266 | export NO_CUSTOM 267 | 268 | ifndef CAMLP4 269 | CAMLP4 := camlp4 270 | endif 271 | export CAMLP4 272 | 273 | ifdef PACKS 274 | ifndef CREATE_LIB 275 | PACKS += threads 276 | endif 277 | empty := 278 | space := $(empty) $(empty) 279 | comma := , 280 | ifdef PREDS 281 | PRE_OCAML_FIND_PREDICATES := $(subst $(space),$(comma),$(PREDS)) 282 | PRE_OCAML_FIND_PACKAGES := $(subst $(space),$(comma),$(PACKS)) 283 | OCAML_FIND_PREDICATES := -predicates $(PRE_OCAML_FIND_PREDICATES) 284 | # OCAML_DEP_PREDICATES := -syntax $(PRE_OCAML_FIND_PREDICATES) 285 | OCAML_FIND_PACKAGES := $(OCAML_FIND_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES) 286 | OCAML_DEP_PACKAGES := $(OCAML_DEP_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES) 287 | else 288 | OCAML_FIND_PACKAGES := -package $(subst $(space),$(comma),$(PACKS)) 289 | OCAML_DEP_PACKAGES := 290 | endif 291 | OCAML_FIND_LINKPKG := -linkpkg 292 | REAL_OCAMLFIND := $(OCAMLFIND) 293 | endif 294 | 295 | export OCAML_FIND_PACKAGES 296 | export OCAML_DEP_PACKAGES 297 | export OCAML_FIND_LINKPKG 298 | export REAL_OCAMLFIND 299 | 300 | ifndef OCAMLDOC 301 | OCAMLDOC := ocamldoc 302 | endif 303 | export OCAMLDOC 304 | 305 | ifndef LATEX 306 | LATEX := latex 307 | endif 308 | export LATEX 309 | 310 | ifndef DVIPS 311 | DVIPS := dvips 312 | endif 313 | export DVIPS 314 | 315 | ifndef PS2PDF 316 | PS2PDF := ps2pdf 317 | endif 318 | export PS2PDF 319 | 320 | ifndef OCAMLMAKEFILE 321 | OCAMLMAKEFILE := OCamlMakefile 322 | endif 323 | export OCAMLMAKEFILE 324 | 325 | ifndef OCAMLLIBPATH 326 | OCAMLLIBPATH := \ 327 | $(shell $(OCAMLC) 2>/dev/null -where || echo /usr/local/lib/ocaml) 328 | endif 329 | export OCAMLLIBPATH 330 | 331 | ifndef OCAML_LIB_INSTALL 332 | OCAML_LIB_INSTALL := $(OCAMLLIBPATH)/contrib 333 | endif 334 | export OCAML_LIB_INSTALL 335 | 336 | ########################################################################### 337 | 338 | #################### change following sections only if 339 | #################### you know what you are doing! 340 | 341 | # delete target files when a build command fails 342 | .PHONY: .DELETE_ON_ERROR 343 | .DELETE_ON_ERROR: 344 | 345 | # for pedants using "--warn-undefined-variables" 346 | export MAYBE_IDL 347 | export REAL_RESULT 348 | export CAMLIDLFLAGS 349 | export THREAD_FLAG 350 | export RES_CLIB 351 | export MAKEDLL 352 | export ANNOT_FLAG 353 | export C_OXRIDL 354 | export SUBPROJS 355 | export CFLAGS_WIN32 356 | INCFLAGS := 357 | 358 | SHELL := /bin/sh 359 | 360 | MLDEPDIR := ._d 361 | BCDIDIR := ._bcdi 362 | NCDIDIR := ._ncdi 363 | 364 | FILTER_EXTNS := %.mli %.ml %.mll %.mly %.idl %.oxridl %.c %.$(EXT_CXX) %.rep %.zog %.glade 365 | 366 | FILTERED := $(filter $(FILTER_EXTNS), $(SOURCES)) 367 | SOURCE_DIRS := $(filter-out ./, $(sort $(dir $(FILTERED)))) 368 | 369 | FILTERED_REP := $(filter %.rep, $(FILTERED)) 370 | DEP_REP := $(FILTERED_REP:%.rep=$(MLDEPDIR)/%.d) 371 | AUTO_REP := $(FILTERED_REP:.rep=.ml) 372 | 373 | FILTERED_ZOG := $(filter %.zog, $(FILTERED)) 374 | DEP_ZOG := $(FILTERED_ZOG:%.zog=$(MLDEPDIR)/%.d) 375 | AUTO_ZOG := $(FILTERED_ZOG:.zog=.ml) 376 | 377 | FILTERED_GLADE := $(filter %.glade, $(FILTERED)) 378 | DEP_GLADE := $(FILTERED_GLADE:%.glade=$(MLDEPDIR)/%.d) 379 | AUTO_GLADE := $(FILTERED_GLADE:.glade=.ml) 380 | 381 | FILTERED_ML := $(filter %.ml, $(FILTERED)) 382 | DEP_ML := $(FILTERED_ML:%.ml=$(MLDEPDIR)/%.d) 383 | 384 | FILTERED_MLI := $(filter %.mli, $(FILTERED)) 385 | DEP_MLI := $(FILTERED_MLI:.mli=.di) 386 | 387 | FILTERED_MLL := $(filter %.mll, $(FILTERED)) 388 | DEP_MLL := $(FILTERED_MLL:%.mll=$(MLDEPDIR)/%.d) 389 | AUTO_MLL := $(FILTERED_MLL:.mll=.ml) 390 | 391 | FILTERED_MLY := $(filter %.mly, $(FILTERED)) 392 | DEP_MLY := $(FILTERED_MLY:%.mly=$(MLDEPDIR)/%.d) $(FILTERED_MLY:.mly=.di) 393 | AUTO_MLY := $(FILTERED_MLY:.mly=.mli) $(FILTERED_MLY:.mly=.ml) 394 | 395 | FILTERED_IDL := $(filter %.idl, $(FILTERED)) 396 | DEP_IDL := $(FILTERED_IDL:%.idl=$(MLDEPDIR)/%.d) $(FILTERED_IDL:.idl=.di) 397 | C_IDL := $(FILTERED_IDL:%.idl=%_stubs.c) 398 | ifndef NOIDLHEADER 399 | C_IDL += $(FILTERED_IDL:.idl=.h) 400 | endif 401 | OBJ_C_IDL := $(FILTERED_IDL:%.idl=%_stubs.$(EXT_OBJ)) 402 | AUTO_IDL := $(FILTERED_IDL:.idl=.mli) $(FILTERED_IDL:.idl=.ml) $(C_IDL) 403 | 404 | FILTERED_OXRIDL := $(filter %.oxridl, $(FILTERED)) 405 | DEP_OXRIDL := $(FILTERED_OXRIDL:%.oxridl=$(MLDEPDIR)/%.d) $(FILTERED_OXRIDL:.oxridl=.di) 406 | AUTO_OXRIDL := $(FILTERED_OXRIDL:.oxridl=.mli) $(FILTERED_OXRIDL:.oxridl=.ml) $(C_OXRIDL) 407 | 408 | FILTERED_C_CXX := $(filter %.c %.$(EXT_CXX), $(FILTERED)) 409 | OBJ_C_CXX := $(FILTERED_C_CXX:.c=.$(EXT_OBJ)) 410 | OBJ_C_CXX := $(OBJ_C_CXX:.$(EXT_CXX)=.$(EXT_OBJ)) 411 | 412 | PRE_TARGETS += $(AUTO_MLL) $(AUTO_MLY) $(AUTO_IDL) $(AUTO_OXRIDL) $(AUTO_ZOG) $(AUTO_REP) $(AUTO_GLADE) 413 | 414 | ALL_DEPS := $(DEP_ML) $(DEP_MLI) $(DEP_MLL) $(DEP_MLY) $(DEP_IDL) $(DEP_OXRIDL) $(DEP_ZOG) $(DEP_REP) $(DEP_GLADE) 415 | 416 | MLDEPS := $(filter %.d, $(ALL_DEPS)) 417 | MLIDEPS := $(filter %.di, $(ALL_DEPS)) 418 | BCDEPIS := $(MLIDEPS:%.di=$(BCDIDIR)/%.di) 419 | NCDEPIS := $(MLIDEPS:%.di=$(NCDIDIR)/%.di) 420 | 421 | ALLML := $(filter %.mli %.ml %.mll %.mly %.idl %.oxridl %.rep %.zog %.glade, $(FILTERED)) 422 | 423 | IMPLO_INTF := $(ALLML:%.mli=%.mli.__) 424 | IMPLO_INTF := $(foreach file, $(IMPLO_INTF), \ 425 | $(basename $(file)).cmi $(basename $(file)).cmo) 426 | IMPLO_INTF := $(filter-out %.mli.cmo, $(IMPLO_INTF)) 427 | IMPLO_INTF := $(IMPLO_INTF:%.mli.cmi=%.cmi) 428 | 429 | IMPLX_INTF := $(IMPLO_INTF:.cmo=.cmx) 430 | 431 | INTF := $(filter %.cmi, $(IMPLO_INTF)) 432 | IMPL_CMO := $(filter %.cmo, $(IMPLO_INTF)) 433 | IMPL_CMX := $(IMPL_CMO:.cmo=.cmx) 434 | IMPL_ASM := $(IMPL_CMO:.cmo=.asm) 435 | IMPL_S := $(IMPL_CMO:.cmo=.s) 436 | 437 | OBJ_LINK := $(OBJ_C_IDL) $(OBJ_C_CXX) 438 | OBJ_FILES := $(IMPL_CMO:.cmo=.$(EXT_OBJ)) $(OBJ_LINK) 439 | 440 | EXECS := $(addsuffix $(EXE), \ 441 | $(sort $(TOPRESULT) $(BCRESULT) $(NCRESULT))) 442 | ifdef WIN32 443 | EXECS += $(BCRESULT).dll $(NCRESULT).dll 444 | endif 445 | 446 | CLIB_BASE := $(RESULT)$(RES_CLIB_SUF) 447 | ifneq ($(strip $(OBJ_LINK)),) 448 | RES_CLIB := lib$(CLIB_BASE).$(EXT_LIB) 449 | endif 450 | 451 | ifdef WIN32 452 | DLLSONAME := $(CLIB_BASE).dll 453 | else 454 | DLLSONAME := dll$(CLIB_BASE).so 455 | endif 456 | 457 | NONEXECS := $(INTF) $(IMPL_CMO) $(IMPL_CMX) $(IMPL_ASM) $(IMPL_S) \ 458 | $(OBJ_FILES) $(PRE_TARGETS) $(BCRESULT).cma $(NCRESULT).cmxa \ 459 | $(NCRESULT).$(EXT_LIB) $(BCRESULT).cmi $(BCRESULT).cmo \ 460 | $(NCRESULT).cmi $(NCRESULT).cmx $(NCRESULT).o \ 461 | $(RES_CLIB) $(IMPL_CMO:.cmo=.annot) \ 462 | $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(LIB_PACK_NAME).cmx $(LIB_PACK_NAME).o 463 | 464 | ifndef STATIC 465 | NONEXECS += $(DLLSONAME) 466 | endif 467 | 468 | ifndef LIBINSTALL_FILES 469 | LIBINSTALL_FILES := $(RESULT).mli $(RESULT).cmi $(RESULT).cma \ 470 | $(RESULT).cmxa $(RESULT).$(EXT_LIB) $(RES_CLIB) 471 | ifndef STATIC 472 | ifneq ($(strip $(OBJ_LINK)),) 473 | LIBINSTALL_FILES += $(DLLSONAME) 474 | endif 475 | endif 476 | endif 477 | 478 | export LIBINSTALL_FILES 479 | 480 | ifdef WIN32 481 | # some extra stuff is created while linking DLLs 482 | NONEXECS += $(BCRESULT).$(EXT_LIB) $(BCRESULT).exp $(NCRESULT).exp $(CLIB_BASE).exp $(CLIB_BASE).lib 483 | endif 484 | 485 | TARGETS := $(EXECS) $(NONEXECS) 486 | 487 | # If there are IDL-files 488 | ifneq ($(strip $(FILTERED_IDL)),) 489 | MAYBE_IDL := -cclib -lcamlidl 490 | endif 491 | 492 | ifdef USE_CAMLP4 493 | CAMLP4PATH := \ 494 | $(shell $(CAMLP4) -where 2>/dev/null || echo /usr/local/lib/camlp4) 495 | INCFLAGS := -I $(CAMLP4PATH) 496 | CINCFLAGS := -I$(CAMLP4PATH) 497 | endif 498 | 499 | DINCFLAGS := $(INCFLAGS) $(SOURCE_DIRS:%=-I %) $(OCAML_DEFAULT_DIRS:%=-I %) 500 | INCFLAGS := $(DINCFLAGS) $(INCDIRS:%=-I %) 501 | CINCFLAGS += $(SOURCE_DIRS:%=-I%) $(INCDIRS:%=-I%) $(OCAML_DEFAULT_DIRS:%=-I%) 502 | 503 | ifndef MSVC 504 | CLIBFLAGS += $(SOURCE_DIRS:%=-L%) $(LIBDIRS:%=-L%) \ 505 | $(EXTLIBDIRS:%=-L%) $(EXTLIBDIRS:%=-Wl,$(RPATH_FLAG)%) \ 506 | $(OCAML_DEFAULT_DIRS:%=-L%) 507 | endif 508 | 509 | ifndef PROFILING 510 | INTF_OCAMLC := $(OCAMLC) 511 | else 512 | ifndef THREADS 513 | INTF_OCAMLC := $(OCAMLCP) -p $(OCAMLCPFLAGS) 514 | else 515 | # OCaml does not support profiling byte code 516 | # with threads (yet), therefore we force an error. 517 | ifndef REAL_OCAMLC 518 | $(error Profiling of multithreaded byte code not yet supported by OCaml) 519 | endif 520 | INTF_OCAMLC := $(OCAMLC) 521 | endif 522 | endif 523 | 524 | ifndef MSVC 525 | COMMON_LDFLAGS := $(LDFLAGS:%=-ccopt %) $(SOURCE_DIRS:%=-ccopt -L%) \ 526 | $(LIBDIRS:%=-ccopt -L%) $(EXTLIBDIRS:%=-ccopt -L%) \ 527 | $(EXTLIBDIRS:%=-ccopt -Wl,$(RPATH_FLAG)%) \ 528 | $(OCAML_DEFAULT_DIRS:%=-ccopt -L%) 529 | else 530 | COMMON_LDFLAGS := -ccopt "/link -NODEFAULTLIB:LIBC $(LDFLAGS:%=%) $(SOURCE_DIRS:%=-LIBPATH:%) \ 531 | $(LIBDIRS:%=-LIBPATH:%) $(EXTLIBDIRS:%=-LIBPATH:%) \ 532 | $(OCAML_DEFAULT_DIRS:%=-LIBPATH:%) " 533 | endif 534 | 535 | CLIBS_OPTS := $(CLIBS:%=-cclib -l%) 536 | ifdef MSVC 537 | ifndef STATIC 538 | # MSVC libraries do not have 'lib' prefix 539 | CLIBS_OPTS := $(CLIBS:%=-cclib %.lib) 540 | endif 541 | endif 542 | 543 | ifneq ($(strip $(OBJ_LINK)),) 544 | ifdef CREATE_LIB 545 | OBJS_LIBS := -cclib -l$(CLIB_BASE) $(CLIBS_OPTS) $(MAYBE_IDL) 546 | else 547 | OBJS_LIBS := $(OBJ_LINK) $(CLIBS_OPTS) $(MAYBE_IDL) 548 | endif 549 | else 550 | OBJS_LIBS := $(CLIBS_OPTS) $(MAYBE_IDL) 551 | endif 552 | 553 | # If we have to make byte-code 554 | ifndef REAL_OCAMLC 555 | BYTE_OCAML := y 556 | 557 | # EXTRADEPS is added dependencies we have to insert for all 558 | # executable files we generate. Ideally it should be all of the 559 | # libraries we use, but it's hard to find the ones that get searched on 560 | # the path since I don't know the paths built into the compiler, so 561 | # just include the ones with slashes in their names. 562 | EXTRADEPS := $(addsuffix .cma,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) 563 | SPECIAL_OCAMLFLAGS := $(OCAMLBCFLAGS) 564 | 565 | REAL_OCAMLC := $(INTF_OCAMLC) 566 | 567 | REAL_IMPL := $(IMPL_CMO) 568 | REAL_IMPL_INTF := $(IMPLO_INTF) 569 | IMPL_SUF := .cmo 570 | 571 | DEPFLAGS := 572 | MAKE_DEPS := $(MLDEPS) $(BCDEPIS) 573 | 574 | ifdef CREATE_LIB 575 | CFLAGS := $(PIC_FLAGS) $(CFLAGS) 576 | ifndef STATIC 577 | ifneq ($(strip $(OBJ_LINK)),) 578 | MAKEDLL := $(DLLSONAME) 579 | ALL_LDFLAGS := -dllib $(DLLSONAME) 580 | endif 581 | endif 582 | endif 583 | 584 | ifndef NO_CUSTOM 585 | ifneq "$(strip $(OBJ_LINK) $(THREADS) $(MAYBE_IDL) $(CLIBS))" "" 586 | ALL_LDFLAGS += -custom 587 | endif 588 | endif 589 | 590 | ALL_LDFLAGS += $(INCFLAGS) $(OCAMLLDFLAGS) $(OCAMLBLDFLAGS) \ 591 | $(COMMON_LDFLAGS) $(LIBS:%=%.cma) 592 | CAMLIDLDLLFLAGS := 593 | 594 | ifdef THREADS 595 | ifdef VMTHREADS 596 | THREAD_FLAG := -vmthread 597 | else 598 | THREAD_FLAG := -thread 599 | endif 600 | ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS) 601 | ifndef CREATE_LIB 602 | ifndef REAL_OCAMLFIND 603 | ALL_LDFLAGS := unix.cma threads.cma $(ALL_LDFLAGS) 604 | endif 605 | endif 606 | endif 607 | 608 | # we have to make native-code 609 | else 610 | EXTRADEPS := $(addsuffix .cmxa,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i)))) 611 | ifndef PROFILING 612 | SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS) 613 | PLDFLAGS := 614 | else 615 | SPECIAL_OCAMLFLAGS := -p $(OCAMLNCFLAGS) 616 | PLDFLAGS := -p 617 | endif 618 | 619 | REAL_IMPL := $(IMPL_CMX) 620 | REAL_IMPL_INTF := $(IMPLX_INTF) 621 | IMPL_SUF := .cmx 622 | 623 | CFLAGS := -DNATIVE_CODE $(CFLAGS) 624 | 625 | DEPFLAGS := -native 626 | MAKE_DEPS := $(MLDEPS) $(NCDEPIS) 627 | 628 | ALL_LDFLAGS := $(PLDFLAGS) $(INCFLAGS) $(OCAMLLDFLAGS) \ 629 | $(OCAMLNLDFLAGS) $(COMMON_LDFLAGS) 630 | CAMLIDLDLLFLAGS := -opt 631 | 632 | ifndef CREATE_LIB 633 | ALL_LDFLAGS += $(LIBS:%=%.cmxa) 634 | else 635 | CFLAGS := $(PIC_FLAGS) $(CFLAGS) 636 | endif 637 | 638 | ifdef THREADS 639 | THREAD_FLAG := -thread 640 | ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS) 641 | ifndef CREATE_LIB 642 | ifndef REAL_OCAMLFIND 643 | ALL_LDFLAGS := unix.cmxa threads.cmxa $(ALL_LDFLAGS) 644 | endif 645 | endif 646 | endif 647 | endif 648 | 649 | export MAKE_DEPS 650 | 651 | ifdef ANNOTATE 652 | ANNOT_FLAG := -dtypes 653 | else 654 | endif 655 | 656 | ALL_OCAMLCFLAGS := $(THREAD_FLAG) $(ANNOT_FLAG) $(OCAMLFLAGS) \ 657 | $(INCFLAGS) $(SPECIAL_OCAMLFLAGS) 658 | 659 | ifdef make_deps 660 | -include $(MAKE_DEPS) 661 | PRE_TARGETS := 662 | endif 663 | 664 | ########################################################################### 665 | # USER RULES 666 | 667 | # Call "OCamlMakefile QUIET=" to get rid of all of the @'s. 668 | QUIET=@ 669 | 670 | # generates byte-code (default) 671 | byte-code: $(PRE_TARGETS) 672 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ 673 | REAL_RESULT="$(BCRESULT)" make_deps=yes 674 | bc: byte-code 675 | 676 | byte-code-nolink: $(PRE_TARGETS) 677 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ 678 | REAL_RESULT="$(BCRESULT)" make_deps=yes 679 | bcnl: byte-code-nolink 680 | 681 | top: $(PRE_TARGETS) 682 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(TOPRESULT) \ 683 | REAL_RESULT="$(BCRESULT)" make_deps=yes 684 | 685 | # generates native-code 686 | 687 | native-code: $(PRE_TARGETS) 688 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ 689 | REAL_RESULT="$(NCRESULT)" \ 690 | REAL_OCAMLC="$(OCAMLOPT)" \ 691 | make_deps=yes 692 | nc: native-code 693 | 694 | native-code-nolink: $(PRE_TARGETS) 695 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ 696 | REAL_RESULT="$(NCRESULT)" \ 697 | REAL_OCAMLC="$(OCAMLOPT)" \ 698 | make_deps=yes 699 | ncnl: native-code-nolink 700 | 701 | # generates byte-code libraries 702 | byte-code-library: $(PRE_TARGETS) 703 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 704 | $(RES_CLIB) $(BCRESULT).cma \ 705 | REAL_RESULT="$(BCRESULT)" \ 706 | CREATE_LIB=yes \ 707 | make_deps=yes 708 | bcl: byte-code-library 709 | 710 | # generates native-code libraries 711 | native-code-library: $(PRE_TARGETS) 712 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 713 | $(RES_CLIB) $(NCRESULT).cmxa \ 714 | REAL_RESULT="$(NCRESULT)" \ 715 | REAL_OCAMLC="$(OCAMLOPT)" \ 716 | CREATE_LIB=yes \ 717 | make_deps=yes 718 | ncl: native-code-library 719 | 720 | ifdef WIN32 721 | # generates byte-code dll 722 | byte-code-dll: $(PRE_TARGETS) 723 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 724 | $(RES_CLIB) $(BCRESULT).dll \ 725 | REAL_RESULT="$(BCRESULT)" \ 726 | make_deps=yes 727 | bcd: byte-code-dll 728 | 729 | # generates native-code dll 730 | native-code-dll: $(PRE_TARGETS) 731 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 732 | $(RES_CLIB) $(NCRESULT).dll \ 733 | REAL_RESULT="$(NCRESULT)" \ 734 | REAL_OCAMLC="$(OCAMLOPT)" \ 735 | make_deps=yes 736 | ncd: native-code-dll 737 | endif 738 | 739 | # generates byte-code with debugging information 740 | debug-code: $(PRE_TARGETS) 741 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ 742 | REAL_RESULT="$(BCRESULT)" make_deps=yes \ 743 | OCAMLFLAGS="-g $(OCAMLFLAGS)" \ 744 | OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" 745 | dc: debug-code 746 | 747 | debug-code-nolink: $(PRE_TARGETS) 748 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \ 749 | REAL_RESULT="$(BCRESULT)" make_deps=yes \ 750 | OCAMLFLAGS="-g $(OCAMLFLAGS)" \ 751 | OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" 752 | dcnl: debug-code-nolink 753 | 754 | # generates byte-code libraries with debugging information 755 | debug-code-library: $(PRE_TARGETS) 756 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 757 | $(RES_CLIB) $(BCRESULT).cma \ 758 | REAL_RESULT="$(BCRESULT)" make_deps=yes \ 759 | CREATE_LIB=yes \ 760 | OCAMLFLAGS="-g $(OCAMLFLAGS)" \ 761 | OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)" 762 | dcl: debug-code-library 763 | 764 | # generates byte-code for profiling 765 | profiling-byte-code: $(PRE_TARGETS) 766 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \ 767 | REAL_RESULT="$(BCRESULT)" PROFILING="y" \ 768 | make_deps=yes 769 | pbc: profiling-byte-code 770 | 771 | # generates native-code 772 | 773 | profiling-native-code: $(PRE_TARGETS) 774 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \ 775 | REAL_RESULT="$(NCRESULT)" \ 776 | REAL_OCAMLC="$(OCAMLOPT)" \ 777 | PROFILING="y" \ 778 | make_deps=yes 779 | pnc: profiling-native-code 780 | 781 | # generates byte-code libraries 782 | profiling-byte-code-library: $(PRE_TARGETS) 783 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 784 | $(RES_CLIB) $(BCRESULT).cma \ 785 | REAL_RESULT="$(BCRESULT)" PROFILING="y" \ 786 | CREATE_LIB=yes \ 787 | make_deps=yes 788 | pbcl: profiling-byte-code-library 789 | 790 | # generates native-code libraries 791 | profiling-native-code-library: $(PRE_TARGETS) 792 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 793 | $(RES_CLIB) $(NCRESULT).cmxa \ 794 | REAL_RESULT="$(NCRESULT)" PROFILING="y" \ 795 | REAL_OCAMLC="$(OCAMLOPT)" \ 796 | CREATE_LIB=yes \ 797 | make_deps=yes 798 | pncl: profiling-native-code-library 799 | 800 | # packs byte-code objects 801 | pack-byte-code: $(PRE_TARGETS) 802 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT).cmo \ 803 | REAL_RESULT="$(BCRESULT)" \ 804 | PACK_LIB=yes make_deps=yes 805 | pabc: pack-byte-code 806 | 807 | # packs native-code objects 808 | pack-native-code: $(PRE_TARGETS) 809 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \ 810 | $(NCRESULT).cmx $(NCRESULT).o \ 811 | REAL_RESULT="$(NCRESULT)" \ 812 | REAL_OCAMLC="$(OCAMLOPT)" \ 813 | PACK_LIB=yes make_deps=yes 814 | panc: pack-native-code 815 | 816 | # generates HTML-documentation 817 | htdoc: doc/$(RESULT)/html 818 | 819 | # generates Latex-documentation 820 | ladoc: doc/$(RESULT)/latex 821 | 822 | # generates PostScript-documentation 823 | psdoc: doc/$(RESULT)/latex/doc.ps 824 | 825 | # generates PDF-documentation 826 | pdfdoc: doc/$(RESULT)/latex/doc.pdf 827 | 828 | # generates all supported forms of documentation 829 | doc: htdoc ladoc psdoc pdfdoc 830 | 831 | ########################################################################### 832 | # LOW LEVEL RULES 833 | 834 | $(REAL_RESULT): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) $(RESULTDEPS) 835 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) \ 836 | $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \ 837 | $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \ 838 | $(REAL_IMPL) 839 | 840 | nolink: $(REAL_IMPL_INTF) $(OBJ_LINK) 841 | 842 | ifdef WIN32 843 | $(REAL_RESULT).dll: $(REAL_IMPL_INTF) $(OBJ_LINK) 844 | $(CAMLIDLDLL) $(CAMLIDLDLLFLAGS) $(OBJ_LINK) $(CLIBS) \ 845 | -o $@ $(REAL_IMPL) 846 | endif 847 | 848 | %$(TOPSUFFIX): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) 849 | $(REAL_OCAMLFIND) $(OCAMLMKTOP) \ 850 | $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \ 851 | $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \ 852 | $(REAL_IMPL) 853 | 854 | .SUFFIXES: .mli .ml .cmi .cmo .cmx .cma .cmxa .$(EXT_OBJ) \ 855 | .mly .di .d .$(EXT_LIB) .idl %.oxridl .c .$(EXT_CXX) .h .so \ 856 | .rep .zog .glade 857 | 858 | ifndef STATIC 859 | ifdef MINGW 860 | $(DLLSONAME): $(OBJ_LINK) 861 | $(CC) $(CFLAGS) $(CFLAGS_WIN32) $(OBJ_LINK) -shared -o $@ \ 862 | -Wl,--whole-archive $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/lib%.a))) \ 863 | $(OCAMLLIBPATH)/ocamlrun.a \ 864 | -Wl,--export-all-symbols \ 865 | -Wl,--no-whole-archive 866 | else 867 | ifdef MSVC 868 | $(DLLSONAME): $(OBJ_LINK) 869 | link /NOLOGO /DLL /OUT:$@ $(OBJ_LINK) \ 870 | $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/%.lib))) \ 871 | $(OCAMLLIBPATH)/ocamlrun.lib 872 | 873 | else 874 | $(DLLSONAME): $(OBJ_LINK) 875 | $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \ 876 | -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) \ 877 | $(OCAMLMKLIB_FLAGS) 878 | endif 879 | endif 880 | endif 881 | 882 | ifndef LIB_PACK_NAME 883 | $(RESULT).cma: $(REAL_IMPL_INTF) $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS) 884 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \ 885 | $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(REAL_IMPL) 886 | 887 | $(RESULT).cmxa $(RESULT).$(EXT_LIB): $(REAL_IMPL_INTF) $(EXTRADEPS) $(RESULTDEPS) 888 | $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \ 889 | $(OCAMLNLDFLAGS) -o $@ $(REAL_IMPL) 890 | else 891 | ifdef BYTE_OCAML 892 | $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo: $(REAL_IMPL_INTF) 893 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmo $(REAL_IMPL) 894 | else 895 | $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx: $(REAL_IMPL_INTF) 896 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmx $(REAL_IMPL) 897 | endif 898 | 899 | $(RESULT).cma: $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS) 900 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \ 901 | $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(LIB_PACK_NAME).cmo 902 | 903 | $(RESULT).cmxa $(RESULT).$(EXT_LIB): $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx $(EXTRADEPS) $(RESULTDEPS) 904 | $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \ 905 | $(OCAMLNLDFLAGS) -o $@ $(LIB_PACK_NAME).cmx 906 | endif 907 | 908 | $(RES_CLIB): $(OBJ_LINK) 909 | ifndef MSVC 910 | ifneq ($(strip $(OBJ_LINK)),) 911 | $(AR) rcs $@ $(OBJ_LINK) 912 | endif 913 | else 914 | ifneq ($(strip $(OBJ_LINK)),) 915 | lib -nologo -debugtype:cv -out:$(RES_CLIB) $(OBJ_LINK) 916 | endif 917 | endif 918 | 919 | .mli.cmi: $(EXTRADEPS) 920 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp $[^*]*$ \*)/\1/p;q' $<`; \ 921 | if [ -z "$$pp" ]; then \ 922 | echo $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ 923 | -c $(THREAD_FLAG) $(ANNOT_FLAG) \ 924 | $(OCAMLFLAGS) $(INCFLAGS) $<; \ 925 | $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ 926 | -c $(THREAD_FLAG) $(ANNOT_FLAG) \ 927 | $(OCAMLFLAGS) $(INCFLAGS) $<; \ 928 | else \ 929 | echo $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ 930 | -c -pp \"$$pp $(PPFLAGS)\" $(THREAD_FLAG) $(ANNOT_FLAG) \ 931 | $(OCAMLFLAGS) $(INCFLAGS) $<; \ 932 | $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \ 933 | -c -pp "$$pp $(PPFLAGS)" $(THREAD_FLAG) $(ANNOT_FLAG) \ 934 | $(OCAMLFLAGS) $(INCFLAGS) $<; \ 935 | fi 936 | 937 | .ml.cmi .ml.$(EXT_OBJ) .ml.cmx .ml.cmo: $(EXTRADEPS) 938 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp $[^*]*$ \*)/\1/p;q' $<`; \ 939 | if [ -z "$$pp" ]; then \ 940 | echo $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ 941 | -c $(ALL_OCAMLCFLAGS) $<; \ 942 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ 943 | -c $(ALL_OCAMLCFLAGS) $<; \ 944 | else \ 945 | echo $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ 946 | -c -pp \"$$pp $(PPFLAGS)\" $(ALL_OCAMLCFLAGS) $<; \ 947 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \ 948 | -c -pp "$$pp $(PPFLAGS)" $(ALL_OCAMLCFLAGS) $<; \ 949 | fi 950 | 951 | ifdef PACK_LIB 952 | $(REAL_RESULT).cmo $(REAL_RESULT).cmx $(REAL_RESULT).o: $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) 953 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack $(ALL_LDFLAGS) \ 954 | $(OBJS_LIBS) -o $@ $(REAL_IMPL) 955 | endif 956 | 957 | .PRECIOUS: %.ml 958 | %.ml: %.mll 959 | $(OCAMLLEX) $< 960 | 961 | .PRECIOUS: %.ml %.mli 962 | %.ml %.mli: %.mly 963 | $(OCAMLYACC) $(YFLAGS) $< 964 | 965 | .PRECIOUS: %.ml 966 | %.ml: %.rep 967 | $(CAMELEON_REPORT) $(CAMELEON_REPORT_FLAGS) -gen $< 968 | 969 | .PRECIOUS: %.ml 970 | %.ml: %.zog 971 | $(CAMELEON_ZOGGY) $(CAMELEON_ZOGGY_FLAGS) -impl $< > $@ 972 | 973 | .PRECIOUS: %.ml 974 | %.ml: %.glade 975 | $(OCAML_GLADECC) $(OCAML_GLADECC_FLAGS) $< > $@ 976 | 977 | .PRECIOUS: %.ml %.mli 978 | %.ml %.mli: %.oxridl 979 | $(OXRIDL) $< 980 | 981 | .PRECIOUS: %.ml %.mli %_stubs.c %.h 982 | %.ml %.mli %_stubs.c %.h: %.idl 983 | $(CAMLIDL) $(MAYBE_IDL_HEADER) $(IDLFLAGS) \ 984 | $(CAMLIDLFLAGS) $< 985 | $(QUIET)if [ $(NOIDLHEADER) ]; then touch $*.h; fi 986 | 987 | .c.$(EXT_OBJ): 988 | $(OCAMLC) -c -cc "$(CC)" -ccopt "$(CFLAGS) \ 989 | $(CFLAGS_WIN32) $(CINCFLAGS) $(CFLAG_O)$@ " $< 990 | 991 | .$(EXT_CXX).$(EXT_OBJ): 992 | $(CXX) -c $(CXXFLAGS) $(CINCFLAGS) -I'$(OCAMLLIBPATH)' \ 993 | $< $(CFLAG_O)$@ 994 | 995 | $(MLDEPDIR)/%.d: %.ml 996 | $(QUIET)echo making $@ from $< 997 | $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi 998 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp $[^*]*$ \*)/\1/p;q' $<`; \ 999 | if [ -z "$$pp" ]; then \ 1000 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ 1001 | $(DINCFLAGS) $< > $@; \ 1002 | else \ 1003 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \ 1004 | -pp "$$pp $(PPFLAGS)" $(DINCFLAGS) $< > $@; \ 1005 | fi 1006 | 1007 | $(BCDIDIR)/%.di $(NCDIDIR)/%.di: %.mli 1008 | $(QUIET)echo making $@ from $< 1009 | $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi 1010 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp $[^*]*$ \*)/\1/p;q' $<`; \ 1011 | if [ -z "$$pp" ]; then \ 1012 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(DINCFLAGS) $< > $@; \ 1013 | else \ 1014 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \ 1015 | -pp "$$pp $(PPFLAGS)" $(DINCFLAGS) $< > $@; \ 1016 | fi 1017 | 1018 | doc/$(RESULT)/html: $(DOC_FILES) 1019 | rm -rf $@ 1020 | mkdir -p $@ 1021 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp $[^*]*$ \*)/\1/p;q' $<`; \ 1022 | if [ -z "$$pp" ]; then \ 1023 | echo $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES); \ 1024 | $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES); \ 1025 | else \ 1026 | echo $(OCAMLDOC) -pp \"$$pp $(PPFLAGS)\" -html -d $@ $(OCAMLDOCFLAGS) \ 1027 | $(INCFLAGS) $(DOC_FILES); \ 1028 | $(OCAMLDOC) -pp "$$pp $(PPFLAGS)" -html -d $@ $(OCAMLDOCFLAGS) \ 1029 | $(INCFLAGS) $(DOC_FILES); \ 1030 | fi 1031 | 1032 | doc/$(RESULT)/latex: $(DOC_FILES) 1033 | rm -rf $@ 1034 | mkdir -p $@ 1035 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp $[^*]*$ \*)/\1/p;q' $<`; \ 1036 | if [ -z "$$pp" ]; then \ 1037 | echo $(OCAMLDOC) -latex $(OCAMLDOCFLAGS) $(INCFLAGS) \ 1038 | $(DOC_FILES) -o $@/doc.tex; \ 1039 | $(OCAMLDOC) -latex $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES) \ 1040 | -o $@/doc.tex; \ 1041 | else \ 1042 | echo $(OCAMLDOC) -pp \"$$pp $(PPFLAGS)\" -latex $(OCAMLDOCFLAGS) \ 1043 | $(INCFLAGS) $(DOC_FILES) -o $@/doc.tex; \ 1044 | $(OCAMLDOC) -pp "$$pp $(PPFLAGS)" -latex $(OCAMLDOCFLAGS) \ 1045 | $(INCFLAGS) $(DOC_FILES) -o $@/doc.tex; \ 1046 | fi 1047 | 1048 | doc/$(RESULT)/latex/doc.ps: doc/$(RESULT)/latex 1049 | cd doc/$(RESULT)/latex && \ 1050 | $(LATEX) doc.tex && \ 1051 | $(LATEX) doc.tex && \ 1052 | $(DVIPS) $(DVIPSFLAGS) doc.dvi -o $(@F) 1053 | 1054 | doc/$(RESULT)/latex/doc.pdf: doc/$(RESULT)/latex/doc.ps 1055 | cd doc/$(RESULT)/latex && $(PS2PDF) $(