├── VERSION
├── pcre
├── test2.ml
├── META
├── Makefile
├── run_mikmatch_pcre.ml
├── syntax_pcre.ml
├── test.ml
└── pcre_lib.ml
├── common
├── Makefile
├── top_declaration.ml
├── global_def.mli
├── global_def.ml
├── select_lib.ml
├── constants.ml
├── charset.ml
├── messages.ml
├── mm_util.ml
├── syntax_common.ml
├── mikmatch.ml
├── regexp_ast.ml
└── mikmatch.mli
├── doc
├── macros.hva
├── Makefile
├── ocamldoc.sty
└── mikmatch-manual.tex.mlx
├── README.md
├── .gitignore
├── str
├── META
├── syntax_str.ml
├── run_mikmatch_str.ml
├── Makefile
├── test1.ml
└── str_lib.ml
├── TODO
├── LICENSE
├── INSTALL
├── Makefile
├── Changes
└── OCamlMakefile
/VERSION:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | mikmatch_version="1.0.9"
3 | echo $mikmatch_version
4 |
--------------------------------------------------------------------------------
/pcre/test2.ml:
--------------------------------------------------------------------------------
1 | open Printf
2 |
3 | ignore (FILTER "a")
4 |
5 | open Set
6 |
7 | module M =
8 | struct
9 | open Map
10 |
11 | ignore (FILTER "b")
12 |
13 | open Hashtbl
14 | end
15 |
--------------------------------------------------------------------------------
/common/Makefile:
--------------------------------------------------------------------------------
1 | SOURCES = \
2 | global_def.ml \
3 | messages.ml charset.ml \
4 | constants.ml \
5 | regexp_ast.ml \
6 | select_lib.ml
7 |
8 | OCAMLFLAGS = -dtypes
9 | USE_CAMLP4 = yes
10 |
11 | .PHONY: default
12 | default: bcnl
13 |
14 | TRASH = *~
15 |
16 | OCAMLMAKEFILE = ../OCamlMakefile
17 | include $(OCAMLMAKEFILE)
18 |
--------------------------------------------------------------------------------
/doc/macros.hva:
--------------------------------------------------------------------------------
1 | \let\oldmeta=\@meta
2 | \renewcommand{\@meta}{%
3 | \oldmeta
4 | \begin{rawhtml}
5 |
20 | \end{rawhtml}}
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Mikmatch
2 | ========
3 |
4 | See `INSTALL` for the installation instructions.
5 | See https://mjambon.github.io/mjambon2016/micmatch.html for the tutorial.
6 | See `pcre/test1.ml` for a complete set of examples.
7 |
8 | For any question or problem, please create a Github "issue".
9 |
10 | Our contribution guidelines are here:
11 | https://github.com/mjambon/documents/blob/master/how-to-contribute.md
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.cm[ioxa]
3 | *.cmxa
4 | *.[oa]
5 | *.annot
6 | *.top
7 | ._d
8 | ._bcdi
9 | *.ppo
10 |
11 | bytecode
12 | nativecode
13 |
14 | pcre/global_def.ml
15 | pcre/global_def.mli
16 | pcre/match.ml
17 | pcre/mikmatch.ml
18 | pcre/mikmatch.mli
19 | pcre/mm_util.ml
20 | pcre/syntax_common.ml
21 |
22 | str/global_def.ml
23 | str/global_def.mli
24 | str/match.ml
25 | str/mikmatch.ml
26 | str/mikmatch.mli
27 | str/mm_util.ml
28 | str/syntax_common.ml
29 |
30 | /_opam
31 |
--------------------------------------------------------------------------------
/str/META:
--------------------------------------------------------------------------------
1 | name = "mikmatch_str"
2 | description = "Pattern matching extended with regexps in Ocamllex syntax"
3 |
4 | requires = "camlp4 str unix"
5 | requires(toploop) += "tophide"
6 |
7 | archive(syntax,toploop) = "pa_mikmatch_str.cma run_mikmatch_str.cma"
8 | archive(syntax,create_toploop) = "pa_mikmatch_str.cma run_mikmatch_str.cma"
9 | archive(syntax,preprocessor) = "pa_mikmatch_str.cma"
10 | archive(byte) = "run_mikmatch_str.cma"
11 | archive(native) = "run_mikmatch_str.cmxa"
12 |
--------------------------------------------------------------------------------
/common/top_declaration.ml:
--------------------------------------------------------------------------------
1 | let add, flush =
2 | let accu = ref [] in
3 | let add str_item =
4 | accu := str_item :: !accu
5 | in
6 | let flush () =
7 | let result = <:str_item< $ List.rev !accu $ >> in
8 | accu := [];
9 | result
10 | in
11 | add, flush
12 |
13 |
14 | let install_syntax () =
15 |
16 | EXTEND Camlp4.PreCast.Gram
17 |
18 | GLOBAL: str_item;
19 |
20 | str_item: FIRST [
21 | [ si = NEXT -> add si; flush () ]
22 | ];
23 |
24 | END
25 |
--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | Documentation:
2 | - mention all the rules which are overridden, since it may
3 | cause incompatibilities with other extensions which do the same
4 |
5 | Compatibility:
6 | - (maybe) add an option for using another keyword than "match", and
7 | avoid to have to delete existing rules
8 |
9 | Features:
10 | - port to Str the macros which already exist for PCRE (if possible)
11 |
12 | Possible optimizations:
13 | - computation of only the substrings that are effectively used (or at
14 | least which name appears in some expression)
15 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | all: mikmatch-ocamldoc/index.html
2 | camlmix -o mikmatch-manual.tex -clean mikmatch-manual.tex.mlx
3 | pdflatex -interaction=nonstopmode mikmatch-manual
4 | pdflatex -interaction=nonstopmode mikmatch-manual
5 | hevea -fix macros.hva mikmatch-manual
6 |
7 | mikmatch-ocamldoc/index.html: ../common/mikmatch.mli
8 | ocamldoc -d mikmatch-ocamldoc -html $<
9 | ocamldoc $< -o mikmatch-ocamldoc.tex -latex -noheader -notoc -notrailer
10 |
11 | clean:
12 | rm -f *~ *.haux *.log *.aux *.toc *.pdf *.html *.out *.htoc \
13 | mikmatch-ocamldoc/*
14 |
--------------------------------------------------------------------------------
/pcre/META:
--------------------------------------------------------------------------------
1 | name = "mikmatch_pcre"
2 | description = "Pattern matching extended with regexps in Ocamllex syntax"
3 |
4 | requires = "camlp4 pcre unix"
5 | requires(toploop) += "tophide"
6 |
7 | archive(syntax,toploop) = "pa_mikmatch_pcre.cma run_mikmatch_pcre.cma"
8 | archive(syntax,create_toploop) = "pa_mikmatch_pcre.cma run_mikmatch_pcre.cma"
9 | archive(syntax,preprocessor) = "pa_mikmatch_pcre.cma"
10 | archive(byte) = "run_mikmatch_pcre.cma"
11 | archive(native) = "run_mikmatch_pcre.cmxa"
12 |
13 | package "run" (
14 | description = "Subpackage providing only runtime support for mikmatch_pcre"
15 | requires = "pcre unix"
16 | archive(byte) = "run_mikmatch_pcre.cma"
17 | archive(native) = "run_mikmatch_pcre.cmxa"
18 | )
19 |
20 | package "syntax" (
21 | description = "Subpackage providing only syntax support for mikmatch_pcre"
22 | requires = "pcre camlp4"
23 | archive(syntax,toploop) = "pa_mikmatch_pcre.cma"
24 | archive(syntax,preprocessor) = "pa_mikmatch_pcre.cma"
25 | )
26 |
--------------------------------------------------------------------------------
/common/global_def.mli:
--------------------------------------------------------------------------------
1 | (* $Id$ *)
2 |
3 | open Camlp4.PreCast
4 |
5 | val init : (Ast.loc -> string -> Ast.str_item option) -> unit
6 | (*
7 | [init get] registers a new filter that will operate after the parsing phase.
8 | The [get] function will be called to look up all expression lowercase
9 | identifiers. It returns the structure items that are required by this
10 | expression. They will be inserted just before the current structure item,
11 | only once.
12 |
13 | Example:
14 |
15 | In your files that will extend the camlp4 preprocessor, define the
16 | following:
17 |
18 |
19 | let get _loc id =
20 | match id with
21 | "pi" -> Some <:str_item< value pi = acos (-1.) >>
22 | | _ -> None
23 | ;;
24 |
25 | let () = init get;;
26 |
27 |
28 | The preprocessed file:
29 |
30 | let x = 0.5 *. pi
31 |
32 |
33 | will be expanded into:
34 |
35 | let pi = acos (-1.)
36 | let x = 0.5 *. pi
37 |
38 | *)
39 |
40 | val init_from_table : (string, Ast.str_item) Hashtbl.t -> unit
41 | (*
42 | Same as [init], but uses the given hash table for its lookups.
43 | *)
44 |
--------------------------------------------------------------------------------
/str/syntax_str.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 | (* $Id$ *)
3 |
4 | open Printf
5 |
6 | open Camlp4.PreCast
7 | open Syntax
8 |
9 | open Syntax_common
10 | open Select_lib
11 | open Match
12 |
13 | let extend_common () =
14 | EXTEND Gram
15 | expr: [
16 | [ "RE_STR"; re = regexp ->
17 | Regexp_ast.warnings re;
18 | let (re_args, re_source, named_groups, postbindings) =
19 | Str_lib.lib.process_regexp _loc ~sharing:true re "" in
20 |
21 | let re_fragments = Match.get_re_fragments _loc re_source in
22 | <:expr< ( $re_fragments$,
23 | $pp_named_groups _loc named_groups$ ) >> ]
24 | ];
25 |
26 | Syntax_common.regexp: LEVEL "simple" [
27 | [ "_" -> Regexp_ast.Characters (_loc, Charset.full) ]
28 | ];
29 |
30 | END;;
31 |
32 | let extend_regular () = extend_common ()
33 | (*
34 | let extend_revised () = extend_common ()
35 | *)
36 |
37 | let _ =
38 | select_lib Str_lib.lib;
39 |
40 | (* Keeping it for backwards compatibility *)
41 | Camlp4.Options.add "-thread"
42 | (Arg.Unit (
43 | fun () ->
44 | select_lib Str_lib.lib_mt;
45 | eprintf "Warning: -thread is deprecated.\n/%!"
46 | ))
47 | " Deprecated option that protects access to shared data with a mutex. \
48 | Currently only patterns containing @ are concerned.";
49 |
50 | (* How to test if the current syntax is the regular or revised one? *)
51 | extend_regular ()
52 |
--------------------------------------------------------------------------------
/common/global_def.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 | (* $Id$ *)
3 |
4 | open Camlp4.PreCast
5 |
6 | let init get =
7 | let rec rewrite si0 =
8 | let _loc = Ast.loc_of_str_item si0 in
9 | let tbl = Hashtbl.create 10 in
10 | let accu = ref <:str_item< >> in
11 | let map =
12 | (object
13 | inherit Ast.map as super
14 |
15 | method expr e =
16 | let _loc = Ast.loc_of_expr e in
17 | (match super#expr e with
18 | | <:expr< $lid:id$ >> ->
19 | (match get _loc id with
20 | Some si ->
21 | if not (Hashtbl.mem tbl id) then
22 | (Hashtbl.add tbl id ();
23 | accu := <:str_item< $!accu$ ; $si$ >>)
24 |
25 | | None -> ())
26 |
27 | | _ -> ());
28 | e
29 |
30 | method str_item si0 =
31 | let si = super#str_item si0 in
32 | let pending = !accu in
33 | accu := <:str_item< >>;
34 | match pending with
35 | <:str_item< >> ->
36 | (* Special workaround.
37 | Otherwise directives are not recognized and
38 | are skipped. *)
39 | si
40 |
41 | | _ -> <:str_item< $pending$ ; $si$ >>
42 | end)
43 | in
44 | map # str_item si0
45 | in
46 |
47 | AstFilters.register_str_item_filter rewrite;
48 | AstFilters.register_topphrase_filter rewrite
49 |
50 |
51 | let init_from_table tbl =
52 | let get _loc id =
53 | try Some (Hashtbl.find tbl id)
54 | with Not_found -> None
55 | in
56 | init get
57 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2004-2008 Martin Jambon
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions
6 | are met:
7 | 1. Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright
10 | notice, this list of conditions and the following disclaimer in the
11 | documentation and/or other materials provided with the distribution.
12 | 3. The name of the author may not be used to endorse or promote products
13 | derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 | Installation of Mikmatch version 1.0.0 or higher
2 | ================================================
3 |
4 |
5 | The recommended way of installing mikmatch and a bunch of other packages
6 | is from GODI (godi_console) available from http://godi.camlcity.org/
7 | or from your favorite packaging system.
8 |
9 |
10 | Otherwise, there is the manual install:
11 |
12 | Prerequisites:
13 | -------------
14 |
15 | You must have installed:
16 |
17 | - OCaml including Camlp4 (versions 3.10.2 and 3.11.0 should be OK;
18 | versions up to 3.09.3 will not work).
19 |
20 | - PCRE-OCaml (http://www.ocaml.info/home/ocaml_sources.html)
21 |
22 |
23 | Everything will run smoothly if you have:
24 |
25 | - Findlib, i.e. ocamlfind
26 | (http://projects.camlcity.org/projects/findlib.html)
27 | - Gnu make
28 | - an Sh-compatible shell
29 |
30 |
31 | Compilation:
32 | -----------
33 |
34 | make
35 |
36 | Installation:
37 | ------------
38 |
39 | make install
40 |
41 |
42 | Uninstallation:
43 | --------------
44 |
45 | make uninstall
46 |
47 | Options:
48 | -------
49 |
50 | By default, only mikmatch_pcre is built and installed.
51 | Mikmatch_str can be built and installed with the following commands:
52 | make str # compilation
53 | make install-str # installation
54 | make uninstall-str # uninstallation
55 |
56 |
57 | Problems?
58 | --------
59 |
60 | Questions, comments and bug reports should be sent to:
61 |
62 | http://groups.google.com/group/micmatch (no subscription required)
63 |
--------------------------------------------------------------------------------
/common/select_lib.ml:
--------------------------------------------------------------------------------
1 | (* $Id$ *)
2 |
3 | open Camlp4.PreCast
4 | open Ast
5 |
6 | let not_implemented _loc =
7 | Messages.failure _loc "not implemented"
8 |
9 | let fail _ = failwith "not implemented"
10 |
11 | type regexp_args = (string * Ast.expr) list
12 | type regexp_source = [ `String of string | `Expr of Ast.expr ] list
13 |
14 | type regexp_lib =
15 | { predefined_regexps : (string * Regexp_ast.ast) list;
16 | unfold_range : bool;
17 | process_regexp :
18 | loc -> sharing:bool -> Regexp_ast.ast -> string ->
19 | regexp_args *
20 | regexp_source *
21 | (Regexp_ast.named_groups * Regexp_ast.named_groups) *
22 | (string * expr) list;
23 | compile_regexp_match : loc -> regexp_args -> regexp_source -> expr;
24 | compile_regexp_search : loc -> regexp_args -> regexp_source -> expr;
25 | match_and_bind :
26 | loc -> string -> expr -> expr ->
27 | (Regexp_ast.named_groups * Regexp_ast.named_groups) ->
28 | expr -> expr -> expr;
29 | wrap_match : expr -> expr;
30 | wrap_user_case : expr -> expr;
31 | really_wrap_match : bool;
32 | really_wrap_user_case : bool }
33 |
34 | let dummy =
35 | { predefined_regexps = [];
36 | unfold_range = false;
37 | process_regexp = not_implemented;
38 | compile_regexp_match = not_implemented;
39 | compile_regexp_search = not_implemented;
40 | match_and_bind = not_implemented;
41 | wrap_match = fail;
42 | wrap_user_case = fail;
43 | really_wrap_match = false;
44 | really_wrap_user_case = false }
45 |
--------------------------------------------------------------------------------
/common/constants.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 |
3 | open Camlp4.PreCast
4 |
5 | let dummy_loc = Loc.ghost
6 |
7 | let debug_mode = ref false
8 | let reserved_prefix = if !debug_mode then "_" else "__mikmatch_"
9 | let uppercase_prefix = "C" ^ reserved_prefix
10 | let typevar_prefix = "a" ^ reserved_prefix
11 |
12 | let mod_runtime = ref ""
13 | let mod_runtime_mt = ref ""
14 |
15 | let exn_exit = "Mikmatch_exit"
16 |
17 | let any_exn = reserved_prefix ^ "any_exn"
18 | let any_target = reserved_prefix ^ "any_target"
19 | let any_result = reserved_prefix ^ "any_result"
20 |
21 | let expr_exit _loc =
22 | <:expr< $uid: !mod_runtime$.$uid:exn_exit$ >>
23 |
24 | let raise_exit _loc =
25 | <:expr< raise $expr_exit _loc$ >>
26 |
27 | let patt_exit _loc =
28 | <:patt< $uid: !mod_runtime$.$uid:exn_exit$ >>
29 |
30 | let shared re_name = re_name ^ "shared"
31 | let shared_ovector re_name = re_name ^ "shared_ovector"
32 |
33 | let regexp_prefix = reserved_prefix ^ "regexp_"
34 | let view_prefix = reserved_prefix ^ "view_"
35 |
36 | let new_regexp =
37 | let r = ref 0 in
38 | fun () -> incr r; (!r, regexp_prefix ^ string_of_int !r)
39 |
40 | let new_view =
41 | let r = ref 0 in
42 | fun () -> incr r; (!r, view_prefix ^ string_of_int !r)
43 |
44 | let new_target =
45 | let r = ref 0 in
46 | fun () -> incr r; reserved_prefix ^ "match_target_" ^ string_of_int !r
47 |
48 | let new_subpatt =
49 | let r = ref 0 in
50 | fun () -> incr r; reserved_prefix ^ "subpatt_" ^ string_of_int !r
51 |
52 | let new_var =
53 | let r = ref 0 in
54 | fun () -> incr r; reserved_prefix ^ "var_" ^ string_of_int !r
55 |
56 | let new_type_var =
57 | let r = ref 0 in
58 | fun () -> incr r; typevar_prefix ^ string_of_int !r
59 |
--------------------------------------------------------------------------------
/common/charset.ml:
--------------------------------------------------------------------------------
1 | module C = Set.Make (Char)
2 |
3 | type t = C.t
4 |
5 | let empty = C.empty
6 |
7 | let add = C.add
8 | let singleton = C.singleton
9 | let union = C.union
10 | let diff = C.diff
11 |
12 | let add_range first last set =
13 | let r = ref set in
14 | for i = Char.code first to Char.code last do
15 | r := add (Char.chr i) !r
16 | done;
17 | !r
18 |
19 | let range c1 c2 = add_range c1 c2 empty
20 | let irange i j = range (Char.chr i) (Char.chr j)
21 |
22 | let full = range '\000' '\255'
23 | let full_for_C = C.remove '\000' full
24 |
25 | let of_string s =
26 | let accu = ref C.empty in
27 | String.iter (fun c -> accu := C.add c !accu) s;
28 | !accu
29 |
30 | let complement set = C.diff full set
31 |
32 | let list = C.elements
33 |
34 | let nocase set =
35 | C.fold
36 | (fun c set ->
37 | let c1 = Char.lowercase_ascii c
38 | and c2 = Char.uppercase_ascii c in
39 | let set1 = C.add c1 set in
40 | if c1 <> c2 then C.add c2 set1
41 | else set1)
42 | set
43 | C.empty
44 |
45 | module Posix =
46 | struct
47 | let lower = range 'a' 'z'
48 | let upper = range 'A' 'Z'
49 | let ascii = range '\x00' '\x7F'
50 | let alpha = union lower upper
51 | let digit = range '0' '9'
52 | let alnum = union alpha digit
53 | let punct = of_string "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
54 | let graph = union alnum punct
55 | let print = union (singleton ' ') graph
56 | let blank = of_string " \t"
57 | let cntrl = union (range '\x00' '\x1F') (singleton '\x7F')
58 | let xdigit = of_string "0123456789abcdefABCDEF"
59 | let space = of_string " \t\n\x0B\x0C\r"
60 |
61 | let all = [ "lower", lower;
62 | "upper", upper;
63 | "ascii", ascii;
64 | "alpha", alpha;
65 | "digit", digit;
66 | "alnum", alnum;
67 | "punct", punct;
68 | "graph", graph;
69 | "print", print;
70 | "blank", blank;
71 | "cntrl", cntrl;
72 | "xdigit", xdigit;
73 | "space", space; ]
74 | end
75 |
--------------------------------------------------------------------------------
/common/messages.ml:
--------------------------------------------------------------------------------
1 | (* $Id$ *)
2 |
3 | open Printf
4 |
5 | open Camlp4.PreCast
6 |
7 | let warning _loc s =
8 | (* Format.err_formatter _loc;*)
9 | let label = if !Sys.interactive then "" else "Warning: " in
10 | Format.eprintf "%a:@.%s%s@." Loc.print _loc label s
11 |
12 | let failure _loc s =
13 | (* does it print the error like Stdpp.raise_with_loc? *)
14 | Loc.raise _loc (Failure s)
15 |
16 |
17 | let list = function
18 | [] -> ""
19 | | [s] -> s
20 | | l ->
21 | let l' = List.rev l in
22 | String.concat ", " (List.rev (List.tl l')) ^ " and " ^ List.hd l'
23 |
24 | let invalid_backref _loc name =
25 | failure _loc
26 | (sprintf "Invalid backreference %s" name)
27 |
28 | let unbalanced_bindings _loc l =
29 | failure _loc
30 | (sprintf "Variable%s %s must occur on both sides of this | pattern"
31 | (if List.length l > 1 then "s" else "")
32 | (list l))
33 |
34 | let multiple_binding _loc l =
35 | let s, are =
36 | if List.length l > 1 then "s", "are"
37 | else "", "is" in
38 | failure _loc
39 | (sprintf "Variable%s %s %s bound several times in this matching"
40 | s (list l) are)
41 |
42 | let invalid_range _loc =
43 | failure _loc "Invalid range"
44 |
45 | let invalid_pattern _loc =
46 | failure _loc "Invalid pattern"
47 |
48 | let invalid_lookbehind _loc kind adjective =
49 | failure _loc
50 | (sprintf "%s are disabled in %slookbehind assertions" kind adjective)
51 |
52 | let not_visible _loc who where =
53 | let s, are =
54 | if List.length who > 1 then "s", "are"
55 | else "", "is" in
56 | warning _loc
57 | (sprintf "identifier%s %s %s not visible \
58 | out of this %s"
59 | s (list who) are where)
60 |
61 | let invalid_converter _loc name =
62 | failure _loc
63 | (sprintf "%s is not a valid converter" name)
64 |
65 | let reserved_identifier _loc prefix name =
66 | failure _loc
67 | (sprintf "%s is a reserved identifier: use another prefix than %s"
68 | name prefix)
69 |
70 | let misplaced_pattern p =
71 | failure (Ast.loc_of_patt p)
72 | ("patterns of this kind cannot appear in this context. \
73 | Use match ... with if you are unsure.")
74 |
75 | let cannot_delete_rule name =
76 | eprintf "Warning: Cannot delete rule %s\n%!" name
77 |
--------------------------------------------------------------------------------
/doc/ocamldoc.sty:
--------------------------------------------------------------------------------
1 |
2 | %% Support macros for LaTeX documentation generated by ocamldoc.
3 | %% This file is in the public domain; do what you want with it.
4 |
5 | \NeedsTeXFormat{LaTeX2e}
6 | \ProvidesPackage{ocamldoc}
7 | [2001/12/04 v1.0 ocamldoc support]
8 |
9 | \newenvironment{ocamldoccode}{%
10 | \bgroup
11 | \leftskip\@totalleftmargin
12 | \rightskip\z@skip
13 | \parindent\z@
14 | \parfillskip\@flushglue
15 | \parskip\z@skip
16 | %\noindent
17 | \@@par\smallskip
18 | \@tempswafalse
19 | \def\par{%
20 | \if@tempswa
21 | \leavevmode\null\@@par\penalty\interlinepenalty
22 | \else
23 | \@tempswatrue
24 | \ifhmode\@@par\penalty\interlinepenalty\fi
25 | \fi}
26 | \obeylines
27 | \verbatim@font
28 | \let\org@prime~%
29 | \@noligs
30 | \let\org@dospecials\dospecials
31 | \g@remfrom@specials{\\}
32 | \g@remfrom@specials{\{}
33 | \g@remfrom@specials{\}}
34 | \let\do\@makeother
35 | \dospecials
36 | \let\dospecials\org@dospecials
37 | \frenchspacing\@vobeyspaces
38 | \everypar \expandafter{\the\everypar \unpenalty}}
39 | {\egroup\par}
40 |
41 | \def\g@remfrom@specials#1{%
42 | \def\@new@specials{}
43 | \def\@remove##1{%
44 | \ifx##1#1\else
45 | \g@addto@macro\@new@specials{\do ##1}\fi}
46 | \let\do\@remove\dospecials
47 | \let\dospecials\@new@specials
48 | }
49 |
50 | \newenvironment{ocamldocdescription}
51 | {\list{}{\rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax\ignorespaces}
52 | {\endlist\medskip}
53 |
54 | \newenvironment{ocamldoccomment}
55 | {\list{}{\leftmargin 2\leftmargini \rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax}
56 | {\endlist}
57 |
58 | \let \ocamldocparagraph \paragraph
59 | \def \paragraph #1{\ocamldocparagraph {#1}\noindent}
60 | \let \ocamldocsubparagraph \subparagraph
61 | \def \subparagraph #1{\ocamldocsubparagraph {#1}\noindent}
62 |
63 | \let\ocamldocvspace\vspace
64 |
65 | \newenvironment{ocamldocindent}{\list{}{}\item\relax}{\endlist}
66 | \newenvironment{ocamldocsigend}
67 | {\noindent\quad\texttt{sig}\ocamldocindent}
68 | {\endocamldocindent\vskip -\lastskip
69 | \noindent\quad\texttt{end}\medskip}
70 | \newenvironment{ocamldocobjectend}
71 | {\noindent\quad\texttt{object}\ocamldocindent}
72 | {\endocamldocindent\vskip -\lastskip
73 | \noindent\quad\texttt{end}\medskip}
74 |
75 | \endinput
76 |
--------------------------------------------------------------------------------
/str/run_mikmatch_str.ml:
--------------------------------------------------------------------------------
1 | exception Mikmatch_exit
2 |
3 | let nocase s =
4 | let len = String.length s in
5 | let buf = Buffer.create (4 * len) in
6 | for i = 0 to len - 1 do
7 | let c = s.[i] in
8 | let cl = Char.lowercase_ascii c
9 | and cu = Char.uppercase_ascii c in
10 | if cl <> cu then (* in this case, cl and cu are letters *)
11 | Printf.bprintf buf "[%c%c]" cl cu
12 | else
13 | Buffer.add_string buf (Str.quote (String.make 1 c))
14 | done;
15 | Buffer.contents buf
16 |
17 |
18 | module Mem =
19 | struct
20 | (* memoization table with periodic removal of old items *)
21 |
22 | type ('a, 'b) t =
23 | { mutable date : float;
24 | mutable last_cleanup : float;
25 | opt_size : int;
26 | max_size : int;
27 | tbl : ('a, ('b * float ref)) Hashtbl.t }
28 |
29 | let create n =
30 | if n < 1 then invalid_arg "Memo.create"
31 | else
32 | { date = 0.;
33 | last_cleanup = 0.;
34 | opt_size = n;
35 | max_size = n + n;
36 | tbl = Hashtbl.create (n + n) }
37 |
38 | (* removal of anything which is too old *)
39 | let cleanup t =
40 | let t0 = t.last_cleanup in
41 | t.last_cleanup <- t.date;
42 | let tbl = t.tbl in
43 | let trash =
44 | Hashtbl.fold
45 | (fun key (data, last_access) trash ->
46 | if !last_access < t0 then
47 | key :: trash
48 | else trash)
49 | tbl
50 | [] in
51 | List.iter (Hashtbl.remove tbl) trash
52 |
53 | (* unsafe addition of data (key should not be in the table) *)
54 | let unsafe_add t key data =
55 | let date = t.date +. 1. in
56 | t.date <- date;
57 | Hashtbl.add t.tbl key (data, ref date);
58 | let size = Hashtbl.length t.tbl in
59 | if size > t.max_size then
60 | cleanup t
61 | else
62 | if size = t.opt_size + 1 then
63 | t.last_cleanup <- t.date
64 |
65 | let add t key data =
66 | if Hashtbl.mem t.tbl key then
67 | invalid_arg "Memo.add"
68 | else
69 | unsafe_add t key data
70 |
71 | let find t key =
72 | let (data, last_access) = Hashtbl.find t.tbl key in
73 | let date = t.date in
74 | last_access := date;
75 | t.date <- date +. 1.;
76 | data
77 |
78 | let get t key lazy_data =
79 | try find t key
80 | with Not_found ->
81 | let data = Lazy.force lazy_data in
82 | unsafe_add t key data;
83 | data
84 |
85 | let clear t =
86 | Hashtbl.clear t.tbl;
87 | t.date <- 0.;
88 | t.last_cleanup <- 0.
89 |
90 | end
91 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: default install uninstall reinstall \
2 | all opt mikmatch-pcre mikmatch-str \
3 | common install-str install-pcre uninstall-str uninstall-pcre \
4 | backup clean archive pcre str
5 |
6 | ifndef PREFIX
7 | BINDIR = $(shell dirname `which ocaml`)
8 | PREFIX = $(shell dirname $(BINDIR))
9 | else
10 | BINDIR = $(PREFIX)/bin
11 | endif
12 | export PREFIX
13 | export BINDIR
14 |
15 |
16 | default: mikmatch-pcre
17 | install: install-pcre
18 | uninstall: uninstall-pcre
19 | reinstall:
20 | $(MAKE) uninstall
21 | $(MAKE) install
22 |
23 | ## GODIVA/GODI targets
24 | all: common
25 | cd pcre && $(MAKE) all-bc
26 | opt: common
27 | cd pcre && $(MAKE) all-nc
28 |
29 | ## end of GODIVA targets
30 |
31 | mikmatch-pcre: common pcre
32 | mikmatch-str: common str
33 |
34 | common:
35 | cd common && $(MAKE)
36 |
37 | str: common
38 | cd str && $(MAKE)
39 | pcre: common
40 | cd pcre && $(MAKE)
41 |
42 | install-str:
43 | cd str && $(MAKE) install
44 | install-pcre:
45 | cd pcre && $(MAKE) install
46 |
47 | uninstall-str:
48 | cd str && $(MAKE) uninstall
49 | uninstall-pcre:
50 | cd pcre && $(MAKE) uninstall
51 |
52 |
53 | backup:
54 | scp -r . $$BACKUP_DIR/mikmatch/
55 |
56 | clean::
57 | cd doc && $(MAKE) clean
58 | cd common && $(MAKE) clean
59 | cd str && $(MAKE) clean
60 | cd pcre && $(MAKE) clean
61 |
62 |
63 |
64 | VERSION = $(shell ./VERSION)
65 | export VERSION
66 |
67 | install: OCAMLFIND_INSTFLAGS = -patch-version $(VERSION)
68 | export OCAMLFIND_INSTFLAGS
69 |
70 | # Only for developers; requires camlmix, hevea, pdflatex
71 | # and maybe other things.
72 | archive:
73 | @echo "Making archive for version $(VERSION)"
74 | cd str && $(MAKE) version
75 | cd pcre && $(MAKE) version
76 | cd doc && $(MAKE)
77 | rm -rf /tmp/mikmatch /tmp/mikmatch-$(VERSION) && \
78 | cp -r . /tmp/mikmatch && \
79 | cd /tmp/mikmatch && \
80 | $(MAKE) clean && \
81 | rm -rf *~ mikmatch*.tar* `find . -name .svn` && \
82 | cd /tmp && cp -r mikmatch mikmatch-$(VERSION) && \
83 | tar czf mikmatch.tar.gz mikmatch && \
84 | tar cjf mikmatch.tar.bz2 mikmatch && \
85 | tar czf mikmatch-$(VERSION).tar.gz mikmatch-$(VERSION) && \
86 | tar cjf mikmatch-$(VERSION).tar.bz2 mikmatch-$(VERSION)
87 | mv /tmp/mikmatch.tar.gz /tmp/mikmatch.tar.bz2 .
88 | mv /tmp/mikmatch-$(VERSION).tar.gz /tmp/mikmatch-$(VERSION).tar.bz2 .
89 | cp mikmatch.tar.gz mikmatch.tar.bz2 $$WWW/
90 | cp mikmatch-$(VERSION).tar.gz mikmatch-$(VERSION).tar.bz2 $$WWW/
91 | cp LICENSE $$WWW/mikmatch-license.txt
92 | cp VERSION $$WWW/mikmatch-version
93 | cp Changes $$WWW/mikmatch-changes.txt
94 | $(MAKE) install-www-doc
95 |
96 | install-www-doc:
97 | cp doc/mikmatch-manual.pdf $$WWW
98 | cp doc/mikmatch-manual.html $$WWW/mikmatch-manual-nocounter.html
99 | cp doc/mikmatch-ocamldoc/* $$WWW/mikmatch-ocamldoc
100 | touch -c $$WWW/mikmatch.html.mlx
101 |
--------------------------------------------------------------------------------
/str/Makefile:
--------------------------------------------------------------------------------
1 | ifndef BINDIR
2 | BINDIR = $(shell dirname `which ocaml`)
3 | endif
4 |
5 | SOURCES = \
6 | mm_util.ml \
7 | global_def.mli global_def.ml \
8 | match.ml \
9 | str_lib.ml \
10 | syntax_common.ml \
11 | syntax_str.ml
12 |
13 | RESULT = mikmatch_str
14 |
15 | OCAMLFLAGS = -dtypes
16 |
17 | OCAMLLDFLAGS = \
18 | messages.cmo charset.cmo \
19 | constants.cmo \
20 | regexp_ast.cmo \
21 | select_lib.cmo
22 |
23 | INCDIRS = ../common
24 |
25 | USE_CAMLP4 = yes
26 |
27 | LIBINSTALL_FILES := \
28 | pa_mikmatch_str.cma pa_mikmatch_str.cmo pa_mikmatch_str.cmi \
29 | run_mikmatch_str.cma run_mikmatch_str.cmo run_mikmatch_str.cmi \
30 | mikmatch.cmi mikmatch.cmo mikmatch.cmx mikmatch.mli \
31 | run_mikmatch_str.cmxa run_mikmatch_str.cmx \
32 | run_mikmatch_str.a run_mikmatch_str.o
33 |
34 | .PHONY: default force all links
35 |
36 | default: links pa_lib misc
37 | force:
38 | touch $(SOURCES)
39 | $(MAKE)
40 |
41 | all:
42 |
43 | links: mikmatch.mli mikmatch.ml match.ml syntax_common.ml mm_util.ml \
44 | global_def.mli global_def.ml
45 |
46 | mikmatch.mli: ../common/mikmatch.mli
47 | ln -s $< $@
48 | mikmatch.ml: ../common/mikmatch.ml
49 | ln -s $< $@
50 | syntax_common.ml: ../common/syntax_common.ml
51 | ln -s $< $@
52 | match.ml: ../common/match.ml
53 | ln -s $< $@
54 | mm_util.ml: ../common/mm_util.ml
55 | ln -s $< $@
56 | global_def.mli: ../common/global_def.mli
57 | ln -s $< $@
58 | global_def.ml: ../common/global_def.ml
59 | ln -s $< $@
60 |
61 | .PHONY: pa_lib install uninstall topinstall misc
62 |
63 | pa_lib:
64 | $(MAKE) RESULT=pa_mikmatch_str pabc bcl
65 |
66 | #install: libinstall topinstall
67 | install: libinstall
68 | uninstall: libuninstall
69 | rm -f $(BINDIR)/mikmatch_str.top $(BINDIR)/mikmatch_str
70 |
71 | topinstall:
72 | install -m 0755 mikmatch_str.top mikmatch_str $(BINDIR)
73 |
74 | misc:
75 | ocamlc -c mikmatch.mli
76 | ocamlc -a -o run_mikmatch_str.cma \
77 | mikmatch.ml run_mikmatch_str.ml
78 | ocamlopt -a -o run_mikmatch_str.cmxa \
79 | mikmatch.ml run_mikmatch_str.ml
80 |
81 |
82 | .PHONY: test1 test-install
83 | test: test1
84 |
85 | # (preinstall test)
86 | test1:
87 | camlp4o ./pa_mikmatch_str.cma -printer o test1.ml > test1.ppo
88 | ocamlopt -pp 'camlp4o ./pa_mikmatch_str.cma' \
89 | str.cmxa unix.cmxa \
90 | run_mikmatch_str.cmxa \
91 | test1.ml -o test1
92 | ./test1
93 | # ocamlmktop -o mikmatch_str.test -I +camlp4 -I . camlp4o.cma \
94 | # pa_mikmatch_str.cma str.cma unix.cma run_mikmatch_str.cma
95 | # ./mikmatch_str.test test1.ml
96 |
97 |
98 | # Compilation with ocamlfind (postinstall test)
99 | test-install:
100 | ocamlfind ocamlopt \
101 | -syntax camlp4o \
102 | -package mikmatch_str\
103 | -linkpkg \
104 | test1.ml -o test1_inst
105 | ./test1_inst
106 |
107 |
108 | TRASH = \
109 | *~ *.ppo *.cm[ioxa] *.cmxa *.o *.a *.top \
110 | *.test test1 test1.more mikmatch_str mikmatch_str.ml
111 |
112 |
113 | OCAMLMAKEFILE = ../OCamlMakefile
114 | include $(OCAMLMAKEFILE)
115 |
--------------------------------------------------------------------------------
/common/mm_util.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 | (* $Id$ *)
3 |
4 | open Camlp4.PreCast
5 |
6 | open Messages
7 |
8 | (* General Camlp4 utilities *)
9 |
10 |
11 | let debug s =
12 | if !Constants.debug_mode then
13 | Printf.eprintf "[debug] %s\n%!" s
14 |
15 |
16 | let list_of_comma_expr e =
17 | let rec aux e l =
18 | match e with
19 | <:expr< $e1$ , $e2$ >> -> aux e1 (aux e2 l)
20 | | <:expr< >> -> l
21 | | e -> e :: l
22 | in
23 | aux e []
24 |
25 | let list_of_comma_patt p =
26 | let rec aux p l =
27 | match p with
28 | <:patt< $p1$ , $p2$ >> -> aux p1 (aux p2 l)
29 | | <:patt< >> -> l
30 | | p -> p :: l
31 | in
32 | aux p []
33 |
34 | let list_of_semicolon_patt p =
35 | let rec aux p l =
36 | match p with
37 | <:patt< $p1$ ; $p2$ >> -> aux p1 (aux p2 l)
38 | | <:patt< >> -> l
39 | | p -> p :: l
40 | in
41 | aux p []
42 |
43 | let list_of_record p =
44 | List.map (
45 | fun field ->
46 | match field with
47 | <:patt< $p1$ = $p2$ >> -> `Normal (p1, p2)
48 | | p -> `Other p
49 | | _ -> assert false
50 | ) (list_of_semicolon_patt p)
51 |
52 |
53 | let comma_expr_of_list _loc = function
54 | hd :: tl ->
55 | debug "comma_expr_of_list";
56 | List.fold_left (
57 | fun accu e -> <:expr< $accu$ , $e$ >>
58 | ) hd tl
59 | | [] -> assert false
60 |
61 |
62 | let comma_patt_of_list _loc = function
63 | hd :: tl ->
64 | debug "comma_patt_of_list";
65 | List.fold_left (
66 | fun accu p -> <:patt< $accu$ , $p$ >>
67 | ) hd tl
68 | | [] -> assert false
69 |
70 |
71 | let semicolon_patt_of_list _loc = function
72 | hd :: tl ->
73 | debug "semicolon_patt_of_list";
74 | List.fold_left (
75 | fun accu p -> <:patt< $accu$ ; $p$ >>
76 | ) hd tl
77 | | [] -> assert false
78 |
79 | let record_of_list _loc l =
80 | debug "record_of_list";
81 | semicolon_patt_of_list _loc
82 | (List.map (
83 | function
84 | | `Normal (p1, p2) -> <:patt< $p1$ = $p2$ >>
85 | | `Other p -> p
86 | ) l)
87 |
88 | let meta_bool = function
89 | true -> Ast.BTrue
90 | | false -> Ast.BFalse
91 |
92 | let binding_of_pair _loc (p, e) =
93 | debug "binding_of_pair";
94 | <:binding< $p$ = $e$ >>
95 |
96 | let pair_of_binding = function
97 | <:binding< $p$ = $e$ >> -> (p, e)
98 | | b ->
99 | let _loc = Ast.loc_of_binding b in
100 | failure _loc "Failed assertion in Mm_util.pair_of_binding"
101 |
102 |
103 | let list_of_binding b =
104 | let rec aux b l =
105 | match b with
106 | <:binding< $b1$ and $b2$ >> -> aux b1 (aux b2 l)
107 | | <:binding< >> -> l
108 | | <:binding< $p$ = $e$ >> -> (p, e) :: l
109 | | <:binding< $anti: _ $ >> ->
110 | failure (Ast.loc_of_binding b)
111 | "Antiquotations for let bindings are not supported by mikmatch"
112 | in
113 | aux b []
114 |
115 |
116 | let match_case_of_tuple _loc (p, w, e) =
117 | debug "match_case_of_tuple";
118 | match w with
119 | None -> <:match_case< $p$ -> $e$ >>
120 | | Some cond -> <:match_case< $p$ when $cond$ -> $e$ >>
121 |
122 |
123 | let eval_string s = Camlp4.Struct.Token.Eval.string ~strict:() s
124 |
125 | let eval_char s = Camlp4.Struct.Token.Eval.char s
126 |
127 | let rec_flag = function
128 | true -> Ast.ReRecursive
129 | | false -> Ast.ReNil
130 |
--------------------------------------------------------------------------------
/pcre/Makefile:
--------------------------------------------------------------------------------
1 | PCRE_DIR = $(shell ocamlfind query pcre)
2 | # PCRE_DIR = something else
3 |
4 | ifndef BINDIR
5 | BINDIR = $(shell dirname `which ocaml`)
6 | endif
7 |
8 | SOURCES = \
9 | mm_util.ml \
10 | global_def.mli global_def.ml \
11 | match.ml \
12 | pcre_lib.ml \
13 | syntax_common.ml \
14 | syntax_pcre.ml
15 |
16 | RESULT = mikmatch_pcre
17 |
18 | OCAMLFLAGS = -dtypes
19 |
20 | OCAMLLDFLAGS = \
21 | messages.cmo charset.cmo \
22 | constants.cmo \
23 | regexp_ast.cmo \
24 | select_lib.cmo
25 |
26 | INCDIRS = ../common
27 |
28 | USE_CAMLP4 = yes
29 |
30 | COMMON_LIBINSTALL_FILES = \
31 | pa_mikmatch_pcre.cma pa_mikmatch_pcre.cmo pa_mikmatch_pcre.cmi \
32 | run_mikmatch_pcre.cmi mikmatch.cmi mikmatch.mli
33 | BC_LIBINSTALL_FILES = \
34 | run_mikmatch_pcre.cma run_mikmatch_pcre.cmo
35 | NC_LIBINSTALL_FILES = \
36 | run_mikmatch_pcre.cmxa run_mikmatch_pcre.cmx \
37 | run_mikmatch_pcre.a run_mikmatch_pcre.o mikmatch.cmx
38 | ALL_LIBINSTALL_FILES = \
39 | $(COMMON_LIBINSTALL_FILES) $(BC_LIBINSTALL_FILES) $(NC_LIBINSTALL_FILES)
40 |
41 | .PHONY: default all-bc all-nc all force links pa_lib install uninstall \
42 | topinstall misc-bc misc-nc
43 |
44 | default: links pa_lib misc-bc misc-nc
45 | touch bytecode
46 | touch nativecode
47 | all-bc: links pa_lib misc-bc
48 | touch bytecode
49 | all-nc: links pa_lib misc-nc
50 | touch nativecode
51 |
52 | all: # needed by libinstall
53 |
54 | force:
55 | touch $(SOURCES)
56 | $(MAKE)
57 |
58 | links: mikmatch.mli mikmatch.ml match.ml syntax_common.ml mm_util.ml \
59 | global_def.mli global_def.ml
60 |
61 | mikmatch.mli: ../common/mikmatch.mli
62 | ln -s $< $@
63 | mikmatch.ml: ../common/mikmatch.ml
64 | ln -s $< $@
65 | syntax_common.ml: ../common/syntax_common.ml
66 | ln -s $< $@
67 | match.ml: ../common/match.ml
68 | ln -s $< $@
69 | mm_util.ml: ../common/mm_util.ml
70 | ln -s $< $@
71 | global_def.mli: ../common/global_def.mli
72 | ln -s $< $@
73 | global_def.ml: ../common/global_def.ml
74 | ln -s $< $@
75 |
76 | pa_lib:
77 | $(MAKE) RESULT=pa_mikmatch_pcre pabc bcl
78 |
79 | install: topinstall
80 | if test -f nativecode; \
81 | then \
82 | $(MAKE) "LIBINSTALL_FILES=$(ALL_LIBINSTALL_FILES)" libinstall;\
83 | else \
84 | $(MAKE) "LIBINSTALL_FILES=$(COMMON_LIBINSTALL_FILES) $(BC_LIBINSTALL_FILES)" libinstall; \
85 | fi
86 |
87 | uninstall:
88 | $(MAKE) libuninstall
89 |
90 | misc-bc:
91 | ocamlc -c mikmatch.mli
92 | ocamlc -a -o run_mikmatch_pcre.cma -I $(PCRE_DIR) \
93 | mikmatch.ml run_mikmatch_pcre.ml
94 | ocamlfind ocamlmktop -o mikmatch_pcre.top \
95 | -linkpkg -package camlp-streams,camlp4.lib,pcre,unix \
96 | pa_mikmatch_pcre.cma run_mikmatch_pcre.cma
97 |
98 | misc-nc:
99 | ocamlc -c mikmatch.mli
100 | ocamlopt -a -o run_mikmatch_pcre.cmxa -I $(PCRE_DIR) \
101 | mikmatch.ml run_mikmatch_pcre.ml
102 |
103 |
104 | .PHONY: test simple-test more-tests test-install
105 | test: simple-test more-tests
106 |
107 | # Toplevel (preinstall test)
108 | simple-test:
109 | camlp4o ./pa_mikmatch_pcre.cma -printer o test.ml > test.ppo
110 | ocamlmktop -o mikmatch_pcre.test -I +camlp4 -I . -I $(PCRE_DIR) \
111 | dynlink.cma \
112 | camlp4o.cma \
113 | pa_mikmatch_pcre.cma pcre.cma unix.cma run_mikmatch_pcre.cma
114 |
115 | # Preinstall test
116 | more-tests:
117 | camlp4o ./pa_mikmatch_pcre.cma -printer o -direct test.ml > test.ppo
118 | ocamlopt \
119 | -pp 'camlp4o ./pa_mikmatch_pcre.cma' \
120 | -I $(PCRE_DIR) pcre.cmxa run_mikmatch_pcre.cmxa \
121 | test.ml -o test
122 | ./test
123 |
124 | # Postinstall test
125 | test-install:
126 | ocamlfind ocamlopt \
127 | -syntax camlp4o \
128 | -package pcre,mikmatch_pcre\
129 | -linkpkg \
130 | test.ml -o test
131 | ./test
132 |
133 | # Debugging
134 | .PHONY: test2
135 | test2:
136 | camlp4o ./pa_mikmatch_pcre.cma -printer o test2.ml > test2.ppo
137 |
138 | TRASH = \
139 | *~ *.ppo *.cm[ioxa] *.cmxa *.o *.a *.top \
140 | *.test test test.more mikmatch bytecode nativecode mikmatch_pcre.ml
141 |
142 |
143 | OCAMLMAKEFILE = ../OCamlMakefile
144 | include $(OCAMLMAKEFILE)
145 |
--------------------------------------------------------------------------------
/pcre/run_mikmatch_pcre.ml:
--------------------------------------------------------------------------------
1 | (* $Id$ *)
2 |
3 | exception Mikmatch_exit
4 |
5 | open Pcre
6 |
7 | let irflags = rflags []
8 |
9 | external make_substrings : string * int array -> substrings = "%identity"
10 |
11 | let search rex f ?(pos = 0) subj =
12 | let _, offset_vector = make_ovector rex in
13 | let substrings = make_substrings (subj, offset_vector) in
14 | let subj_len = String.length subj in
15 | let rec loop cur_pos =
16 | if
17 | try
18 | unsafe_pcre_exec
19 | irflags rex ~pos:cur_pos ~subj_start:0 ~subj offset_vector None; true
20 | with Not_found -> false
21 | then
22 | (f substrings;
23 | let first = offset_vector.(0) in
24 | let last = offset_vector.(1) in
25 | if first < subj_len then
26 | loop (max (first + 1) last)) in
27 | loop pos
28 |
29 | let scan ~full rex pos ~ftext ~fmatch subj =
30 | let _, offset_vector = make_ovector rex in
31 | let substrings = make_substrings (subj, offset_vector) in
32 | let subj_len = String.length subj in
33 | let rec loop previous_last cur_pos =
34 | if
35 | try
36 | unsafe_pcre_exec
37 | irflags rex ~pos:cur_pos ~subj_start:0 ~subj offset_vector None; true
38 | with Not_found ->
39 | let last = String.length subj in
40 | if full || last > previous_last then
41 | ftext (String.sub subj previous_last (last - previous_last));
42 | false
43 | then
44 | (let first = offset_vector.(0) in
45 | let last = offset_vector.(1) in
46 | if full || first > pos then
47 | ftext (String.sub subj previous_last (first - previous_last));
48 | fmatch substrings;
49 | if first < subj_len then
50 | loop last (max (first + 1) last)
51 | else if full then
52 | ftext "") in
53 | loop pos pos
54 |
55 | let map rex f ?(pos = 0) ?(full = true) subj =
56 | let l = ref [] in
57 | let ftext s = l := `Text s :: !l
58 | and fmatch substrings = l := f substrings :: !l in
59 | scan ~full rex pos ~ftext ~fmatch subj;
60 | List.rev !l
61 |
62 | let collect rex f ?(pos = 0) subj =
63 | let l = ref [] in
64 | let f substrings = l := f substrings :: !l in
65 | search rex f ~pos subj;
66 | List.rev !l
67 |
68 | let split rex ?(full = false) ?(pos = 0) subj =
69 | let l = ref [] in
70 | let ftext s = l := s :: !l
71 | and fmatch substrings = () in
72 | scan ~full rex pos ~ftext ~fmatch subj;
73 | List.rev !l
74 |
75 | let bquote_char buf c =
76 | match c with
77 | '\\' | '^' | '$' | '.' | '[' | ']' | '|'
78 | | '(' | ')' | '?' | '*' | '+' | '{' | '}' -> Printf.bprintf buf "\\%c" c
79 | | '\000' -> Buffer.add_string buf "\\000"
80 | | c -> Buffer.add_char buf c
81 |
82 | (* Pcre.quote does not escape null characters (which terminate C strings) *)
83 | let quote_string s =
84 | let len = String.length s in
85 | let buf = Buffer.create (2 * len) in
86 | for i = 0 to len - 1 do
87 | bquote_char buf (String.unsafe_get s i)
88 | done;
89 | Buffer.contents buf
90 |
91 | let nocase s =
92 | let len = String.length s in
93 | let buf = Buffer.create (2 * len) in
94 | for i = 0 to len - 1 do
95 | let c = s.[i] in
96 | let cl = Char.lowercase_ascii c
97 | and cu = Char.uppercase_ascii c in
98 | if cl <> cu then (* in this case, cl and cu are letters *)
99 | Printf.bprintf buf "[%c%c]" cl cu
100 | else
101 | bquote_char buf c
102 | done;
103 | Buffer.contents buf
104 |
105 | module Mem =
106 | struct
107 | (* memoization table with periodic removal of old items *)
108 |
109 | type ('a, 'b) t =
110 | { mutable date : float;
111 | mutable last_cleanup : float;
112 | opt_size : int;
113 | max_size : int;
114 | tbl : ('a, ('b * float ref)) Hashtbl.t }
115 |
116 | let create n =
117 | if n < 1 then invalid_arg "Memo.create"
118 | else
119 | { date = 0.;
120 | last_cleanup = 0.;
121 | opt_size = n;
122 | max_size = n + n;
123 | tbl = Hashtbl.create (n + n) }
124 |
125 | (* removal of anything which is too old *)
126 | let cleanup t =
127 | let t0 = t.last_cleanup in
128 | t.last_cleanup <- t.date;
129 | let tbl = t.tbl in
130 | let trash =
131 | Hashtbl.fold
132 | (fun key (data, last_access) trash ->
133 | if !last_access < t0 then
134 | key :: trash
135 | else trash)
136 | tbl
137 | [] in
138 | List.iter (Hashtbl.remove tbl) trash
139 |
140 | (* unsafe addition of data (key should not be in the table) *)
141 | let unsafe_add t key data =
142 | let date = t.date +. 1. in
143 | t.date <- date;
144 | Hashtbl.add t.tbl key (data, ref date);
145 | let size = Hashtbl.length t.tbl in
146 | if size > t.max_size then
147 | cleanup t
148 | else
149 | if size = t.opt_size + 1 then
150 | t.last_cleanup <- t.date
151 |
152 | let add t key data =
153 | if Hashtbl.mem t.tbl key then
154 | invalid_arg "Memo.add"
155 | else
156 | unsafe_add t key data
157 |
158 | let find t key =
159 | let (data, last_access) = Hashtbl.find t.tbl key in
160 | let date = t.date in
161 | last_access := date;
162 | t.date <- date +. 1.;
163 | data
164 |
165 | let get t key lazy_data =
166 | try find t key
167 | with Not_found ->
168 | let data = Lazy.force lazy_data in
169 | unsafe_add t key data;
170 | data
171 |
172 | let clear t =
173 | Hashtbl.clear t.tbl;
174 | t.date <- 0.;
175 | t.last_cleanup <- 0.
176 |
177 | end
178 |
--------------------------------------------------------------------------------
/str/test1.ml:
--------------------------------------------------------------------------------
1 | open Printf
2 |
3 | let _ = function RE "" -> ()
4 |
5 | let x = 1 in ();;
6 | let / alpha / = "a" in ();;
7 |
8 | (* Definition of regular expressions for further use *)
9 | RE space = [' ' '\t' '\n' '\r']
10 | RE not_space = _ # space
11 | RE digit = ['0'-'9']
12 | RE letter = ['A'-'Z' '_' 'a'-'z']
13 | RE word = letter+
14 |
15 | (*
16 | (* Inclusion of file in the same syntax, e.g. a library of user-defined
17 | regular expressions. (known problem with error location) *)
18 | USE "my_regexps.ml" (* defines `word' and `digit' *)
19 | *)
20 |
21 | (* Extended pattern-matching in the following constructs:
22 | match ... with ...
23 | try ... with ...
24 | function ...
25 | *)
26 |
27 | let test expected a b c =
28 | printf "[case %i] " expected; flush stdout;
29 | (match a, b, c with
30 | None, (None | Some (RE space* )), None -> printf "case 1\n"
31 | | Some ({ contents = [| RE (word as x); _; y |]}),
32 | (Some ("test" as z | RE word space (word as z))),
33 | None -> printf "case 2: %S %S %S\n" x y z
34 | | _, _, Some (RE space* (['0'-'9']+ as n)) -> printf "case 3: %s\n" n
35 | | _ -> printf "case 4\n");
36 | flush stdout
37 |
38 | let _ =
39 | printf "Tests (match ... with):\n"; flush stdout;
40 | test 1 None (Some " ") None;
41 |
42 | test 2
43 | (Some (ref [| "alpha"; "beta"; "2 gamma" |]))
44 | (Some "Hello World!")
45 | None;
46 |
47 | test 3 None None (Some " 123 ");
48 |
49 | test 4 (Some (ref [| |])) (Some "") (Some "abc")
50 |
51 | let _ =
52 | match "" with
53 | (RE (("a" as a) | ("b" as a))) | a -> a
54 |
55 | let hello_who s =
56 | match s with
57 | RE _* ['h''H']"ello" ","? space*
58 | ((word | space)* word as someone) -> String.capitalize someone
59 |
60 | | _ -> "nobody"
61 |
62 | let _ =
63 | printf "Extraction of the recipient's name\n";
64 | List.iter (fun s ->
65 | printf "Hello who: %S\n" s;
66 | printf " -> %S\n" (hello_who s);
67 | flush stdout)
68 | [ "Hello World!";
69 | "First of all, hello everybody.";
70 | "*** hello world ***";
71 | "Hello, Caml riders!" ]
72 |
73 | let _ =
74 | printf "Test (local and global bindings):\n"; flush stdout;
75 | match "" with
76 | (RE (word as x | space+ (word as x))* ) | _ ->
77 | printf "Passed.\n"
78 |
79 | let _ =
80 | printf "Test (repetition range + end of line):\n"; flush stdout;
81 | let f s =
82 | match s with
83 | RE '-'? digit{1-4} eol -> printf "%S has less than 5 digits.\n" s
84 | | RE '-'? digit{5-} eol -> printf "%S has at least 5 digits.\n" s
85 | | _ -> printf "%S is not a number.\n" s in
86 | List.iter f ["123"; "1234"; "12345"; "12345678"; "-1234"; "*1"; "1*";
87 | "9\n*" ]
88 |
89 | let test f (expected, s) =
90 | let (success, result) = f s in
91 | let passed = expected = success in
92 | if passed then (printf "[OK] %s%s\n" s
93 | (match result with None -> ""
94 | | Some x -> sprintf " -> %s"x);
95 | flush stdout)
96 | else
97 | (print_endline (s ^ "Failed"); flush stdout; failwith s)
98 |
99 | let () =
100 | printf "Test (no case: the ~ operator):\n"; flush stdout;
101 | List.iter
102 | (test (function
103 | RE "hello"~ " World!" -> true, None
104 | | _ -> false, None))
105 | [ true, "Hello World!";
106 | true, "hElLO World!";
107 | false, "hello WORLD!" ]
108 |
109 | let () =
110 | printf "Test (try ... with):\n"; flush stdout;
111 | try failwith "Hello World!"
112 | with
113 | Failure RE "Hello" space* (word as w) -> printf "OK: %s\n" w
114 | | Failure s -> printf "Failure: %s\n" s
115 |
116 | let () =
117 | printf "Test (function ... -> ...):\n"; flush stdout;
118 | let f =
119 | function
120 | RE "Hello" space* (word as w) -> printf "OK: %s\n" w
121 | | _ -> printf "Error\n" in
122 | f "Hello Everybody";
123 | f "Hello Caml!"
124 |
125 | let () =
126 | printf "Test (backreferences):\n"; flush stdout;
127 | let f s =
128 | match s with
129 | RE
130 | (digit+ as x | (word as x)) (* x = global id *)
131 | (" " as sp !sp)* (* sp = local id *)
132 | !x -> true, Some x
133 | | _ -> false, None in
134 | List.iter (test f)
135 | [ true, "123123";
136 | false, "123 123";
137 | true, "123 123";
138 | true, "aaaa";
139 | false, "abc";
140 | false, "ab1ab1" ]
141 |
142 |
143 | let print_named_groups l =
144 | List.iter
145 | (fun (name, positions) ->
146 | printf "%s:" name;
147 | List.iter (fun i -> printf " %i" i) positions;
148 | printf "\n")
149 | l
150 |
151 | (* Lower level feature: RE_STR returns the source of the regexp,
152 | to be used with specific compilation or search options. *)
153 |
154 | let _ =
155 | let (src, named_groups) =
156 | RE_STR
157 | (("a"|"b"+)? digit{2}) as start
158 | (space* word)+ ( digit{1} (word as last_word)
159 | | digit{3} (word as last_word)) in
160 |
161 | printf "Str regexp source: %S\n" src;
162 | printf "Named groups and their locations:\n";
163 | print_named_groups named_groups;
164 | flush stdout
165 |
166 | let charset_union = RE_STR digit | space | "a" | ['A'-'Z']
167 |
168 |
169 | let _ =
170 | printf "Debugging test 1:\n"; flush stdout;
171 | match ["Coucou Martin"] with
172 | [ (RE (word as x) space (word as y)) | ("zobi" as x as y) ]
173 | | ("abc" as x :: y :: _) -> printf "Trop cool x=%S y=%S\n" x y
174 | | _ -> printf "Bof bof...\n"
175 |
176 | let _ =
177 | printf "Debugging test 2:\n"; flush stdout;
178 | match ["Hello"; "!"], 123 with
179 | "***" :: _, _ -> printf "Hop!\n"
180 |
181 | | RE word as w :: RE _ as c :: _, (122|123) when w <> "Bye" ->
182 | printf "Cool: %S %S\n" w c
183 |
184 | | [RE ""], _ -> printf "Glouglou\n"
185 | | _ -> printf "Sorry\n"
186 |
187 | let _ =
188 | printf "Debugging test 3:\n"; flush stdout;
189 | (match "hello" with
190 | (RE ' '{10})
191 | | RE _* ' '{10} eol -> ()
192 | | _ -> ());
193 | printf "Passed.\n"
194 |
195 | let _ =
196 | match Some "x" with
197 | Some ((RE "a") | ("b"|"c")) -> true
198 | | _ -> false
199 |
200 | let _ =
201 | match "axxxxyz", 333 with
202 | RE "a" as s ("bc" %pos | "x"+ %pos) (_* as s'), _
203 | | (s as s', pos) ->
204 | printf "%s, %i, %s\n%!" s pos s'
205 |
206 | let _ =
207 | match "123" with
208 | RE digit+ as n := fun _ -> 1 -> n
209 | | _ -> 2
210 |
211 |
212 |
213 | (* Parametrized regexps *)
214 | let _ =
215 | let find s =
216 | match "abbcdefgghijkl" with
217 | RE _* @s @s -> assert true
218 | | _ -> assert false in
219 | find "b";
220 | find "g"
221 |
222 |
--------------------------------------------------------------------------------
/pcre/syntax_pcre.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 |
3 | open Printf
4 |
5 | open Camlp4.PreCast
6 | open Syntax
7 |
8 | open Regexp_ast
9 | open Syntax_common
10 | open Select_lib
11 | open Match
12 |
13 | let expand_macro ?(sharing = false) ?(anchored = false) _loc re e f =
14 | warnings re;
15 | let (num, re_name) = Constants.new_regexp () in
16 | let var_name = var_of_regexp re_name in
17 | let (re_args, re_source, named_groups, postbindings) =
18 | (!lib).process_regexp ~sharing _loc re re_name in
19 | let get_re = Match.make_get_re _loc re_name re_args in
20 | add_compiled_regexp ~anchored postbindings
21 | _loc re_name num re_args re_source named_groups;
22 |
23 | !(lib).wrap_match
24 | (f _loc re_name get_re var_name named_groups
25 | ((!lib).wrap_user_case e))
26 |
27 | let check_assertion ~lookahead positive re =
28 | let rec check ~branched = function
29 | Bind (_loc, e, name, conv) ->
30 | if not positive then
31 | Messages.not_visible _loc [name] "negative assertion";
32 | check ~branched e
33 | | Bind_pos (_loc, name) ->
34 | if not positive then
35 | Messages.not_visible _loc [name] "negative assertion";
36 | 0
37 | | Epsilon _ -> 0
38 | | Characters _ -> 1
39 | | Special (_loc, s, (name, Some len)) -> len
40 | | Special (_loc, s, (name, None)) ->
41 | if not lookahead then
42 | Messages.invalid_lookbehind _loc
43 | (sprintf "These patterns (%s)" name) ""
44 | else 0
45 | | Backref (_loc, _) ->
46 | if not lookahead then
47 | Messages.invalid_lookbehind _loc "Backreferences" ""
48 | else 0
49 | | Variable (_loc, _) | Nocase_variable (_loc, _) ->
50 | if not lookahead && branched then
51 | Messages.invalid_lookbehind _loc "Variables in optional branches" ""
52 | else 0
53 | | Sequence (_loc, e1, e2) -> check ~branched e1 + check ~branched e2
54 | | Alternative (_loc, e1, e2, _, _) ->
55 | let len1 = check ~branched:true e1 in
56 | let len2 = check ~branched:true e2 in
57 | if not lookahead && len1 <> len2 then
58 | Messages.invalid_lookbehind _loc
59 | "Alternatives of different length" ""
60 | else max len1 len2
61 | | Repetition (_loc, (kind, greediness), e) ->
62 | (match kind with
63 | Range (x, None) -> x * check ~branched e
64 | | _ ->
65 | if not lookahead then
66 | Messages.invalid_lookbehind _loc
67 | "Repetitions of variable length" ""
68 | else check ~branched e)
69 | | Lookahead (_loc, _, e)
70 | | Lookbehind (_loc, _, e) -> check ~branched e
71 | | Possessive (_, e)
72 | | Closed e -> check ~branched e in
73 |
74 | ignore (check ~branched:false re)
75 |
76 |
77 |
78 |
79 | let lookahead _loc bopt re =
80 | let positive = bopt = None in
81 | check_assertion ~lookahead:true positive re;
82 | Lookahead (_loc, positive, if positive then re else Closed re)
83 |
84 | let lookbehind _loc bopt re =
85 | let positive = bopt = None in
86 | check_assertion ~lookahead:false positive re;
87 | Lookbehind (_loc, positive, if positive then re else Closed re)
88 |
89 | let seq _loc e = <:expr< do { $e$ } >>
90 |
91 | let extend_common () =
92 | let expr_level = "top" in
93 | EXTEND Gram
94 | expr: LEVEL $expr_level$ [
95 | [ "RE_PCRE"; re = regexp ->
96 | warnings re;
97 | let (re_args, re_source, named_groups, postbindings) =
98 | Pcre_lib.lib.process_regexp _loc ~sharing:false re "" in
99 |
100 | let re_fragments = Match.get_re_fragments _loc re_source in
101 | <:expr< ( $re_fragments$,
102 | $pp_named_groups _loc named_groups$ ) >>
103 |
104 |
105 | | "REPLACE"; re = regexp; "->"; e = sequence ->
106 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_replace
107 |
108 | | "SEARCH"; re = regexp; "->"; e = sequence ->
109 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_search
110 |
111 | | "MAP"; re = regexp; "->"; e = sequence ->
112 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_map
113 |
114 | | "COLLECT"; re = regexp; "->"; e = sequence ->
115 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_collect
116 |
117 | | "COLLECTOBJ"; re = regexp ->
118 | expand_macro _loc re <:expr< assert false >> Pcre_lib.macro_collectobj
119 |
120 | | "SPLIT"; re = regexp ->
121 | expand_macro _loc re <:expr< assert false >> Pcre_lib.macro_split
122 |
123 | | "REPLACE_FIRST"; re = regexp; "->"; e = sequence ->
124 | expand_macro _loc re (seq _loc e) Pcre_lib.macro_replace_first
125 |
126 | | "SEARCH_FIRST"; re = regexp; "->"; e = sequence ->
127 | expand_macro ~sharing:true _loc re (seq _loc e)
128 | Pcre_lib.macro_search_first
129 |
130 | | "MATCH"; re = regexp; "->"; e = sequence ->
131 | expand_macro ~sharing:true ~anchored:true _loc re (seq _loc e)
132 | Pcre_lib.macro_match
133 |
134 | | "FILTER"; re = regexp ->
135 | expand_macro ~sharing:true ~anchored:true _loc re
136 | <:expr< assert false >> Pcre_lib.macro_filter
137 |
138 | | "CAPTURE"; re = regexp ->
139 | expand_macro ~sharing:true ~anchored:true _loc re
140 | <:expr< assert false >> Pcre_lib.macro_capture
141 | ]
142 | ];
143 |
144 | regexp: LEVEL "postop" [
145 | [ re = regexp; "*"; UIDENT "Lazy" ->
146 | Repetition (_loc, (Star, false), Closed re)
147 | | re = regexp; "+"; UIDENT "Lazy" ->
148 | Repetition (_loc, (Plus, false), Closed re)
149 | | re = regexp; "?"; UIDENT "Lazy" ->
150 | Repetition (_loc, (Option, false), Closed re)
151 | | r = regexp; "{"; (rng, rng_loc) = range; "}"; UIDENT "Lazy" ->
152 | Repetition (_loc, (Range rng, false), Closed r)
153 | | re = regexp; UIDENT "Possessive" ->
154 | Possessive (_loc, re) ]
155 | ];
156 |
157 | regexp: LEVEL "simple" [
158 | [ "_" -> Characters (_loc, Charset.full)
159 | | "<";
160 | x = OPT [ b1 = OPT [ x = UIDENT "Not" -> x ];
161 | re1 = regexp -> (b1, re1) ];
162 | y = OPT [ ".";
163 | r2 = OPT [ b2 = OPT [ x = UIDENT "Not" -> x ];
164 | re2 = regexp -> (b2, re2) ] -> r2 ];
165 | ">" ->
166 | match x, y with
167 | None, None
168 | | None, Some None -> Epsilon _loc
169 | | None, Some (Some (b2, re2)) -> lookahead _loc b2 re2
170 | | Some (b1, re1), None -> lookahead _loc b1 re1
171 | | Some (b1, re1), Some None -> lookbehind _loc b1 re1
172 | | Some (b1, re1), Some (Some (b2, re2)) ->
173 | Sequence (_loc, lookbehind _loc b1 re1, lookahead _loc b2 re2) ]
174 | ];
175 |
176 | END;;
177 |
178 | let extend_regular () = extend_common ()
179 | (*
180 | let extend_revised () = extend_common ()
181 | *)
182 |
183 | let _ =
184 | select_lib Pcre_lib.lib;
185 |
186 | Camlp4.Options.add "-thread"
187 | (Arg.Unit (
188 | fun () ->
189 | select_lib Pcre_lib.lib_mt;
190 | eprintf "Warning: -thread is deprecated.\n/%!"
191 | )
192 | )
193 | " Deprecated option that protects access to shared data with a mutex. \
194 | Currently only patterns containing @ are concerned.";
195 |
196 | (* How to test if the current syntax is the regular or revised one? *)
197 | extend_regular ()
198 |
--------------------------------------------------------------------------------
/common/syntax_common.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 |
3 |
4 | open Printf
5 |
6 | open Camlp4.PreCast
7 | open Syntax
8 |
9 | open Mm_util
10 | open Regexp_ast
11 | open Select_lib
12 | open Match
13 |
14 | let regexp = Gram.Entry.mk "regexp";;
15 | let regexp_match_case = Gram.Entry.mk "regexp_match_case";;
16 | let range = Gram.Entry.mk "range";;
17 |
18 | let seq _loc e =
19 | match e with
20 | <:expr< $_$ ; $_$ >> -> <:expr< do { $e$ } >>
21 | | _ -> e
22 |
23 |
24 | let extend_common () =
25 | (try DELETE_RULE Gram patt: LIDENT END
26 | with _rule_not_found -> ());
27 |
28 | (try
29 | DELETE_RULE Gram
30 | expr: "let"; opt_rec; binding; "in"; expr LEVEL ";"
31 | END
32 | with _rule_not_found ->
33 | Messages.cannot_delete_rule "(1)");
34 |
35 | (try
36 | DELETE_RULE Gram
37 | str_item: "let"; opt_rec; binding; "in"; expr
38 | END
39 | with _rule_not_found ->
40 | Messages.cannot_delete_rule "(2)");
41 |
42 | (try
43 | DELETE_RULE Gram
44 | str_item: "let"; opt_rec; binding
45 | END
46 | with _rule_not_found ->
47 | Messages.cannot_delete_rule "(3)");
48 |
49 |
50 | EXTEND Gram
51 | GLOBAL:
52 | str_item patt expr
53 | regexp regexp_match_case range;
54 |
55 | str_item: [
56 | [ "RE"; name = LIDENT; "="; re = regexp ->
57 | warnings re;
58 | Hashtbl.add named_regexps name re;
59 | <:str_item< >> ]
60 | ];
61 |
62 | special_patt: [
63 | [ "RE"; re = regexp -> `Regexp re
64 | | "/"; re = regexp; "/" -> `Regexp re
65 | | "%"; name = uid_path;
66 | arg = OPT patt LEVEL "simple" -> `View (name, arg) ]
67 | ];
68 |
69 | uid_path: [
70 | [ l = LIST1 [ x = UIDENT -> x ] SEP "." ->
71 | match List.rev l with
72 | basename :: modname -> (_loc, basename, List.rev modname)
73 | | _ -> assert false ]
74 | ];
75 |
76 | patt: LEVEL "simple" [
77 | [ x = special_patt -> handle_special_patt _loc x ]
78 | ];
79 |
80 | expr: LEVEL "top" [
81 | [ "let"; o = OPT "rec"; b = binding; "in"; e2 = sequence ->
82 | handle_let_bindings _loc (o <> None) (list_of_binding b)
83 | (seq _loc e2) ]
84 | ];
85 |
86 | expr: LEVEL "top" [
87 | [ "let"; LIDENT "view";
88 | name = UIDENT; "="; e1 = expr; "in"; e2 = sequence ->
89 | <:expr< let $lid:"view_" ^ name$ = $e1$ in $seq _loc e2$ >> ]
90 | ];
91 |
92 | expr: LEVEL "top" [
93 | [ "let"; "try"; o = OPT "rec"; l = LIST1 let_binding SEP "and";
94 | "in"; e2 = sequence;
95 | "with"; pwel = LIST1 lettry_case SEP "|" ->
96 | let_try_in _loc o (List.map pair_of_binding l) (seq _loc e2) pwel ]
97 | ];
98 |
99 | str_item: LEVEL "top" [
100 | [
101 | "let"; o = OPT "rec"; b = binding ->
102 | handle_value_bindings _loc (o <> None) (list_of_binding b)
103 |
104 | | "let"; o = OPT "rec"; b = binding; "in"; e2 = sequence ->
105 | let e =
106 | handle_let_bindings _loc (o <> None)
107 | (list_of_binding b)
108 | (seq _loc e2) in
109 | <:str_item< $exp:e$ >>
110 |
111 | | "let"; LIDENT "view"; name = UIDENT; "="; e1 = expr ->
112 | <:str_item< value $lid:"view_" ^ name$ = $e1$ >>
113 |
114 | | "let"; "try"; o = OPT "rec"; b = binding;
115 | "in"; e2 = sequence;
116 | "with"; pwel = LIST1 lettry_case SEP "|" ->
117 | let e =
118 | let_try_in _loc o (list_of_binding b) (seq _loc e2) pwel in
119 | <:str_item< $exp:e$ >>
120 | ]
121 | ];
122 |
123 | lettry_case: [
124 | [ p = patt;
125 | w = OPT [ "when"; e = expr -> e ]; "->";
126 | e = sequence ->
127 | match_case_of_tuple _loc (p, w, <:expr< fun () -> $seq _loc e$ >>) ]
128 | ];
129 |
130 | regexp_match_case: [
131 | [ x1 = patt;
132 | w = OPT [ "when"; e = expr -> e ]; "->";
133 | x2 = sequence ->
134 | (_loc, x1, w, seq _loc x2) ]
135 | ];
136 |
137 | regexp: [
138 | [ r = regexp; "as"; i = LIDENT;
139 | conv =
140 | OPT [ ":"; s = LIDENT ->
141 | (match s with
142 | "int" -> `Int
143 | | "float" -> `Float
144 | | "option" -> `Option
145 | | s -> Messages.invalid_converter _loc s)
146 | | ":="; e = expr -> `Custom e
147 | | "="; e = expr -> `Value e ] ->
148 | Bind (_loc, r, i, conv) ]
149 | | [ r1 = regexp; "|"; r2 = regexp -> alternative _loc r1 r2 ]
150 | | [ r1 = regexp; r2 = regexp -> Sequence (_loc, r1, r2) ]
151 |
152 | | "postop" NONA
153 | [ r = regexp; "*" -> Repetition (_loc, (Star, true), Closed r)
154 | | r = regexp; "+" -> Repetition (_loc, (Plus, true), Closed r)
155 | | r = regexp; "?" -> Repetition (_loc, (Option, true), Closed r)
156 | | r = regexp; "~" -> nocase r
157 | | r = regexp; "{"; (rng, rng_loc) = range; "}" ->
158 | if !(lib).unfold_range then repeat rng_loc (Closed r) rng
159 | else Repetition (_loc, (Range rng, true), Closed r) ]
160 |
161 | | "binop" LEFTA
162 | [ r1 = regexp; "#"; r2 = regexp ->
163 | let msg = " term is not a set of characters" in
164 | let set1 = Regexp_ast.as_charset _loc ("left" ^ msg) r1 in
165 | let set2 = Regexp_ast.as_charset _loc ("right" ^ msg) r2 in
166 | Characters (_loc, Charset.diff set1 set2) ]
167 |
168 | | "preop" NONA
169 | [ "!"; ident = LIDENT -> Backref (_loc, ident)
170 | | "@"; e = expr LEVEL "." -> Variable (_loc, e) ]
171 |
172 | | "simple" NONA
173 | [ "["; set = charset; "]" -> Characters (_loc, set)
174 | | s = string -> s
175 | | name = LIDENT -> find_named_regexp _loc name
176 | | "%"; name = LIDENT -> Bind_pos (_loc, name)
177 | | "("; r = regexp; ")" -> r
178 | ]
179 | ];
180 |
181 | string: [
182 | [ s = STRING -> Regexp_ast.of_string _loc (eval_string s) ]
183 | | [ c = CHAR -> Characters (_loc, Charset.singleton (eval_char c)) ]
184 | ];
185 |
186 | charset: [
187 | [ "^"; x = charset -> Charset.complement x ]
188 | | [ c1 = CHAR; "-"; c2 = CHAR ->
189 | Charset.range (eval_char c1) (eval_char c2)
190 |
191 | | c = CHAR -> Charset.singleton (eval_char c)
192 | | s = STRING -> Charset.of_string (eval_string s)
193 | | name = LIDENT ->
194 | Regexp_ast.as_charset _loc "not a set of characters"
195 | (find_named_regexp _loc name)
196 | | set1 = charset; set2 = charset -> Charset.union set1 set2
197 | ]
198 | ];
199 |
200 | range: [
201 | [ mini = INT;
202 | maxi =
203 | OPT ["-"; maxi =
204 | OPT [ maxi = INT -> int_of_string maxi ] -> maxi] ->
205 | let mini = int_of_string mini in
206 | (mini, maxi), _loc
207 | | mini = INT; "+" -> (int_of_string mini, Some None), _loc ]
208 | ];
209 |
210 | (* Reserved identifiers in patterns *)
211 | patt: LEVEL "simple" [
212 | [ s = LIDENT ->
213 | if Match.is_reserved s then
214 | Messages.reserved_identifier _loc Constants.reserved_prefix s
215 | else <:patt< $lid:s$ >> ]
216 | ];
217 |
218 | END;;
219 |
220 | let extend_regular () =
221 | extend_common ();
222 | (try
223 | DELETE_RULE Gram
224 | expr: "function"; match_case
225 | END
226 | with _rule_not_found ->
227 | Messages.cannot_delete_rule "(reg 1)");
228 |
229 | (try
230 | DELETE_RULE Gram
231 | expr: "match"; sequence; "with"; match_case
232 | END
233 | with _rule_not_found ->
234 | Messages.cannot_delete_rule "(reg 2)");
235 |
236 | EXTEND Gram
237 | expr: LEVEL "top" [
238 | [ "match"; target = sequence; "with"; OPT "|";
239 | cases = LIST1 regexp_match_case SEP "|" ->
240 | output_match _loc (seq _loc target) cases
241 | | "try"; e = expr; "with"; OPT "|";
242 | cases = LIST1 regexp_match_case SEP "|" ->
243 | output_try _loc e cases
244 | | "function"; OPT "|"; cases = LIST1 regexp_match_case SEP "|" ->
245 | output_function _loc cases ]
246 | ];
247 | END
248 |
249 |
250 | let () =
251 | init_named_regexps ();
252 |
253 | Camlp4.Options.add "-tailrec"
254 | (Arg.Set tailrec)
255 | " produce code that preserves tail-recursivity (default)";
256 |
257 | Camlp4.Options.add "-direct"
258 | (Arg.Clear tailrec)
259 | " produce code that does not try to preserve tail-recursivity";
260 |
261 | (* How to test if the current syntax is the regular or revised one? *)
262 | extend_regular ()
263 |
--------------------------------------------------------------------------------
/common/mikmatch.ml:
--------------------------------------------------------------------------------
1 | module Text =
2 | struct
3 |
4 | exception Internal_exit
5 |
6 | let iter_lines_of_channel f ic =
7 | try
8 | while true do
9 | let line =
10 | try input_line ic
11 | with End_of_file -> raise Internal_exit in
12 | f line
13 | done
14 | with Internal_exit -> ()
15 |
16 |
17 | let iter_lines_of_file f file =
18 | let ic = open_in file in
19 | try
20 | iter_lines_of_channel f ic;
21 | close_in ic
22 | with exn ->
23 | close_in_noerr ic;
24 | raise exn
25 |
26 |
27 | let lines_of_channel ic =
28 | let l = ref [] in
29 | iter_lines_of_channel (fun line -> l := line :: !l) ic;
30 | List.rev !l
31 |
32 | let lines_of_file file =
33 | let l = ref [] in
34 | iter_lines_of_file (fun line -> l := line :: !l) file;
35 | List.rev !l
36 |
37 | let channel_contents ic =
38 | let len = 2048 in
39 | let buf = Bytes.create len in
40 | let rec loop size accu =
41 | match input ic buf 0 len with
42 | 0 -> (accu, size)
43 | | n when n = len -> loop (size + n) (Bytes.copy buf :: accu)
44 | | n -> loop (size + n) (Bytes.sub buf 0 n :: accu) in
45 |
46 | let accu, size = loop 0 [] in
47 | let result = Bytes.create size in
48 | let rec loop2 last_pos = function
49 | [] -> assert (last_pos = 0)
50 | | s :: rest ->
51 | let len = Bytes.length s in
52 | let pos = last_pos - len in
53 | Bytes.blit s 0 result pos len;
54 | loop2 pos rest in
55 | loop2 size accu;
56 | Bytes.unsafe_to_string result
57 |
58 |
59 | let file_contents ?(bin = false) file =
60 | let ic = open_in file in
61 | let s =
62 | try channel_contents ic
63 | with exn ->
64 | close_in_noerr ic;
65 | raise exn in
66 | close_in ic;
67 | s
68 |
69 | let save file data =
70 | let oc = open_out_bin file in
71 | (try
72 | output_string oc data;
73 | with exn ->
74 | close_out_noerr oc;
75 | raise exn);
76 | close_out oc
77 |
78 | let save_lines file lines =
79 | let oc = open_out_bin file in
80 | (try
81 | List.iter (fun s -> output_string oc s; output_char oc '\n') lines;
82 | with exn ->
83 | close_out_noerr oc;
84 | raise exn);
85 | close_out oc
86 |
87 |
88 | exception Skip
89 |
90 | let rev_map f l =
91 | let rec loop f accu = function
92 | [] -> accu
93 | | hd :: tl ->
94 | let accu' =
95 | try f hd :: accu
96 | with Skip -> accu in
97 | loop f accu' tl in
98 | loop f [] l
99 |
100 | let map f l =
101 | List.rev (rev_map f l)
102 |
103 | let rec fold_left f accu l =
104 | match l with
105 | [] -> accu
106 | | hd :: tl ->
107 | let accu' =
108 | try f accu hd
109 | with Skip -> accu in
110 | fold_left f accu' tl
111 |
112 | let rec rev_fold_right f l accu =
113 | match l with
114 | [] -> accu
115 | | hd :: tl ->
116 | let accu' =
117 | try f hd accu
118 | with Skip -> accu in
119 | rev_fold_right f tl accu'
120 |
121 | let rec fold_right f l accu =
122 | rev_fold_right f (List.rev l) accu
123 |
124 | let map_lines_of_channel f ic =
125 | let l = ref [] in
126 | iter_lines_of_channel (fun line ->
127 | try l := f line :: !l
128 | with Skip -> ()) ic;
129 | List.rev !l
130 |
131 | let map_lines_of_file f file =
132 | let l = ref [] in
133 | iter_lines_of_file (fun line ->
134 | try l := f line :: !l
135 | with Skip -> ()) file;
136 | List.rev !l
137 |
138 | end
139 |
140 | module Fixed =
141 | struct
142 |
143 | let chop_spaces str =
144 | let len = String.length str in
145 | let rec getfirst n =
146 | if n = len then len
147 | else
148 | if String.unsafe_get str n = ' '
149 | then getfirst (n+1)
150 | else n
151 | and getlast n =
152 | if String.unsafe_get str n = ' '
153 | then getlast (n-1)
154 | else n in
155 | let first = getfirst 0 in
156 | if first = len then ""
157 | else
158 | let last = getlast (len - 1) in
159 | String.sub str first (last-first+1)
160 |
161 | let int s = int_of_string (chop_spaces s)
162 | let float s = float_of_string (chop_spaces s)
163 |
164 | end
165 |
166 | module Directory =
167 | struct
168 |
169 | let list ?(absolute = false) ?path dir =
170 | let names = Sys.readdir dir in
171 | Array.sort String.compare names;
172 |
173 | let make_path, path_maker =
174 | match absolute, path with
175 | false, None
176 | | false, Some false -> false, (fun s -> s)
177 | | false, Some true -> true, Filename.concat dir
178 | | true, Some true
179 | | true, None ->
180 | let f =
181 | if Filename.is_relative dir then
182 | let cwd = Sys.getcwd () in
183 | Filename.concat (Filename.concat cwd dir)
184 | else Filename.concat dir in
185 | true, f
186 | | true, Some false -> invalid_arg "Directory.list" in
187 |
188 | let paths =
189 | if make_path then Array.map path_maker names
190 | else names in
191 | Array.to_list paths
192 |
193 | let is_dir ?(nofollow = false) x =
194 | let stat = if nofollow then Unix.lstat else Unix.stat in
195 | try (stat x).Unix.st_kind = Unix.S_DIR
196 | with Unix.Unix_error (Unix.ENOENT, _, _) ->
197 | false (* may be a bad symbolic link if nofollow is false *)
198 |
199 | end
200 |
201 | module Glob =
202 | struct
203 | (* Filename globbing utility *)
204 |
205 | (*
206 | Examples of use with mikmatch:
207 |
208 | let ml_files = list [FILTER _* ".ml" "i"? eos]
209 | let trash_files =
210 | list [ FILTER "";
211 | FILTER _* (".cm" ("i"|"o"|"x"|"a"|"xa") | ".o" | ".a") eos ]
212 | *)
213 |
214 | let filter_array f a =
215 | Array.fold_right (fun x l -> if f x then x :: l else l) a []
216 |
217 | let rec scan_gen ~cons ~real_dir ~dir ?nofollow action path_filter =
218 | let real_real_dir =
219 | if real_dir = "" then Filename.current_dir_name else real_dir in
220 | match path_filter with
221 | [] -> ()
222 | | [f] ->
223 | List.iter (fun name -> action (cons dir name))
224 | (filter_array f (Sys.readdir real_real_dir))
225 | | f :: subpath_filter ->
226 | let filtered_files =
227 | filter_array f (Sys.readdir real_real_dir) in
228 | List.iter
229 | (fun name ->
230 | let subdir = cons dir name in
231 | let real_subdir = Filename.concat real_dir name in
232 | if Directory.is_dir ?nofollow real_subdir then
233 | scan_gen
234 | ~cons
235 | ~real_dir:real_subdir
236 | ~dir:subdir ?nofollow action subpath_filter)
237 | filtered_files
238 |
239 | let get_dir ~getcwd ~concat ~is_relative ~fun_name ~absolute ~path
240 | ~relative_root root =
241 | match absolute, path with
242 | false, None
243 | | false, Some false -> relative_root
244 | | false, Some true -> root
245 | | true, Some true
246 | | true, None ->
247 | if is_relative root then
248 | let cwd = getcwd () in
249 | concat cwd root
250 | else root
251 | | true, Some false -> invalid_arg fun_name
252 |
253 | let scan ?(absolute = false) ?path ?(root = "") ?nofollow
254 | action path_filter =
255 | let getcwd = Sys.getcwd in
256 | let cons = Filename.concat in
257 | let concat = Filename.concat in
258 | let is_relative = Filename.is_relative in
259 | let fun_name = "Glob.scan" in
260 | let relative_root = "" in
261 | let dir =
262 | get_dir ~getcwd ~concat ~is_relative ~fun_name
263 | ~absolute ~path ~relative_root root in
264 | scan_gen ~cons ~real_dir:root ~dir ?nofollow action path_filter
265 |
266 |
267 | let lscan ?(rev = false) ?(absolute = false) ?path ?(root = []) ?nofollow
268 | action path_filter =
269 | let getcwd () = [Sys.getcwd ()] in
270 | let cons = (fun l s -> s :: l) in
271 | let concat = (fun root rel_path -> rel_path @ root) in
272 | let rec is_relative = function
273 | [] -> true
274 | | [x] -> Filename.is_relative x
275 | | x :: l -> is_relative l in
276 | let fun_name = "Glob.lscan" in
277 | let relative_root = [] in
278 | let rev_root = if rev then root else List.rev root in
279 | let rev_dir =
280 | get_dir ~getcwd ~concat ~is_relative ~fun_name
281 | ~absolute ~path ~relative_root rev_root in
282 | let real_dir = List.fold_left Filename.concat "" (List.rev rev_dir) in
283 | let new_action =
284 | if rev then action
285 | else (fun l -> action (List.rev l)) in
286 | scan_gen ~cons ~real_dir ~dir:rev_dir ?nofollow new_action path_filter
287 |
288 |
289 | let list_gen scan ?absolute ?path ?root ?nofollow ?(sort = true) path_filter =
290 | let l = ref [] in
291 | scan ?absolute ?path ?root ?nofollow (fun x -> l := x :: !l) path_filter;
292 | if sort then List.sort compare !l
293 | else !l
294 |
295 | let list = list_gen scan
296 | let llist ?rev = list_gen (lscan ?rev)
297 | end
298 |
--------------------------------------------------------------------------------
/common/regexp_ast.ml:
--------------------------------------------------------------------------------
1 | (* $Id$ *)
2 | (* Abstract syntax tree for regular expressions *)
3 |
4 | open Camlp4.PreCast
5 |
6 | type converter = [ `Int
7 | | `Float
8 | | `Option
9 | | `Custom of Ast.expr
10 | | `Value of Ast.expr ]
11 |
12 | module S = Set.Make (String)
13 |
14 | let list_named_groups set = List.sort String.compare (S.elements set)
15 |
16 | module Named_groups =
17 | struct
18 | module M = Map.Make (String)
19 | include M
20 | let list m =
21 | List.sort
22 | (fun (a, _) (b, _) -> String.compare a b)
23 | (fold
24 | (fun key data accu ->
25 | let positions =
26 | List.sort
27 | (fun (loc, i, conv1) (loc, j, conv2) ->
28 | Stdlib.compare i j)
29 | data in
30 | (key, positions) :: accu)
31 | m [])
32 |
33 | let list_keys m =
34 | List.sort String.compare (fold (fun key data accu -> key :: accu) m [])
35 |
36 | let keys m = fold (fun key data accu -> S.add key accu) m S.empty
37 | let equal m1 m2 = S.equal (keys m1) (keys m2)
38 | let inter m1 m2 = S.inter (keys m1) (keys m2)
39 | let union m1 m2 = S.union (keys m1) (keys m2)
40 | let diff m1 m2 = S.diff (keys m1) (keys m2)
41 | end
42 |
43 | type named_groups = (Ast.loc * int * converter option) list Named_groups.t
44 |
45 |
46 | let add_new loc name conv group_num set =
47 | if Named_groups.mem name set then
48 | Messages.multiple_binding loc [name];
49 | Named_groups.add name [loc, group_num, conv] set
50 |
51 | let add_new_group loc name conv group_num (groups, positions) =
52 | (add_new loc name conv group_num groups, positions)
53 |
54 | let add_new_pos loc name group_num (groups, positions) =
55 | (groups, add_new loc name None group_num positions)
56 |
57 | let merge_lists l1 l2 =
58 | let tbl = Hashtbl.create (List.length l1 + List.length l2) in
59 | let add l =
60 | List.iter (fun ((_, n, conv) as x) -> Hashtbl.replace tbl n x) l in
61 | add l1;
62 | add l2;
63 | let l = Hashtbl.fold (fun _ x l -> x :: l) tbl [] in
64 | let cmp (_, x, _) (_, y, _) = compare x y in
65 | List.sort cmp l
66 |
67 | let really_add name l2 set =
68 | try
69 | let l1 = Named_groups.find name set in
70 | Named_groups.add
71 | name (merge_lists l1 l2)
72 | (Named_groups.remove name set)
73 | with Not_found ->
74 | Named_groups.add name l2 set
75 |
76 | let merge set1 set2 =
77 | Named_groups.fold really_add set1 set2
78 |
79 | type repetition_kind =
80 | Star
81 | | Option
82 | | Plus
83 | | Range of (int * int option option)
84 |
85 | type greediness = bool
86 |
87 | type ast =
88 | Epsilon of Ast.loc
89 | | Characters of Ast.loc * Charset.t
90 | | Sequence of Ast.loc * ast * ast
91 | | Alternative of Ast.loc
92 | * ast (* choice 1 *)
93 | * ast (* choice 2 *)
94 | * S.t (* group names *)
95 | * S.t (* position names *)
96 | | Repetition of Ast.loc * (repetition_kind * greediness) * ast
97 | | Possessive of Ast.loc * ast
98 | | Bind of Ast.loc * ast * string * converter option
99 | | Bind_pos of Ast.loc * string
100 | | Backref of Ast.loc * string
101 | | Variable of Ast.loc * Ast.expr
102 | | Nocase_variable of Ast.loc * Ast.expr
103 | | Special of Ast.loc * string * (string * int option)
104 | | Lookahead of Ast.loc * bool * ast
105 | | Lookbehind of Ast.loc * bool * ast
106 | | Closed of ast
107 |
108 | let rec loc_of_regexp = function
109 | Epsilon loc
110 | | Characters (loc, _)
111 | | Sequence (loc, _, _)
112 | | Alternative (loc, _, _, _, _)
113 | | Repetition (loc, _, _)
114 | | Possessive (loc, _)
115 | | Bind (loc, _, _, _)
116 | | Bind_pos (loc, _)
117 | | Backref (loc, _)
118 | | Variable (loc, _)
119 | | Nocase_variable (loc, _)
120 | | Special (loc, _, _)
121 | | Lookahead (loc, _, _)
122 | | Lookbehind (loc, _, _) -> loc
123 | | Closed ast -> loc_of_regexp ast
124 |
125 | let rec bindings : ast -> S.t = function
126 | Bind (loc, e, s, conv) -> S.add s (bindings e)
127 | | Bind_pos _
128 | | Epsilon _
129 | | Characters _
130 | | Backref _
131 | | Variable _
132 | | Nocase_variable _
133 | | Special _ -> S.empty
134 | | Sequence (loc, e1, e2) -> S.union (bindings e1) (bindings e2)
135 | | Alternative (loc, e1, e2, set, pos_set) -> set
136 | | Repetition (loc, kind, e) -> bindings e
137 | | Possessive (loc, e)
138 | | Lookahead (loc, _, e)
139 | | Lookbehind (loc, _, e) -> bindings e
140 | | Closed e -> S.empty
141 |
142 | let rec pos_bindings : ast -> S.t = function
143 | Bind_pos (loc, s) -> S.singleton s
144 | | Bind _
145 | | Epsilon _
146 | | Characters _
147 | | Backref _
148 | | Variable _
149 | | Nocase_variable _
150 | | Special _ -> S.empty
151 | | Sequence (loc, e1, e2) -> S.union (pos_bindings e1) (pos_bindings e2)
152 | | Alternative (loc, e1, e2, set, pos_set) -> pos_set
153 | | Repetition (loc, kind, e) -> pos_bindings e
154 | | Possessive (loc, e)
155 | | Lookahead (loc, _, e)
156 | | Lookbehind (loc, _, e) -> pos_bindings e
157 | | Closed _ -> S.empty
158 |
159 |
160 |
161 | let alternative loc e1 e2 =
162 | match e1, e2 with
163 | Characters (loc1, s1), Characters (loc2, s2) ->
164 | Characters (loc, Charset.union s1 s2)
165 | | _ ->
166 | let b1 = bindings e1
167 | and b2 = bindings e2 in
168 | let pb1 = pos_bindings e1
169 | and pb2 = pos_bindings e2 in
170 | Alternative (loc, e1, e2, S.union b1 b2, S.union pb1 pb2)
171 |
172 | let rec repeat loc e (mini, maxoptopt) =
173 | if mini < 0 then
174 | Messages.invalid_range loc
175 | else
176 | match maxoptopt with
177 | None ->
178 | (match mini with
179 | 0 -> Epsilon loc
180 | | n ->
181 | let rec loop i =
182 | if i > 1 then
183 | Sequence (loc, e, loop (i-1))
184 | else e in
185 | loop n)
186 | | Some (Some maxi) ->
187 | let diff = maxi - mini in
188 | if diff < 0 then Messages.invalid_range loc
189 | else if diff = 0 then e
190 | else
191 | let rec loop i =
192 | alternative loc (Epsilon loc)
193 | (if i > 1 then
194 | (Sequence (loc, e, loop (i-1)))
195 | else e) in
196 | Sequence (loc, (repeat loc e (mini, None)), loop diff)
197 | | Some None ->
198 | Sequence (loc, repeat loc e (mini, None),
199 | Repetition (loc, (Star, true), e))
200 |
201 |
202 | let rec nocase = function
203 | Bind (loc, e, s, conv) -> Bind (loc, nocase e, s, conv)
204 | | Bind_pos _
205 | | Epsilon _
206 | | Backref _
207 | | Nocase_variable _
208 | | Special _ as e -> e
209 | | Characters (loc, charset) -> Characters (loc, Charset.nocase charset)
210 | | Sequence (loc, e1, e2) -> Sequence (loc, nocase e1, nocase e2)
211 | | Alternative (loc, e1, e2, ids, pos_ids) ->
212 | Alternative (loc, nocase e1, nocase e2, ids, pos_ids)
213 | | Repetition (loc, kind, e) -> Repetition (loc, kind, nocase e)
214 | | Possessive (loc, e) -> Possessive (loc, nocase e)
215 | | Lookahead (loc, b, e) -> Lookahead (loc, b, nocase e)
216 | | Lookbehind (loc, b, e) -> Lookbehind (loc, b, nocase e)
217 | | Variable (loc, e) -> Nocase_variable (loc, e)
218 | | Closed ast -> Closed (nocase ast)
219 |
220 |
221 | (* Miscellaneous functions *)
222 |
223 | let explode s =
224 | let l = ref [] in
225 | for i = String.length s - 1 downto 0 do
226 | l := s.[i] :: !l
227 | done;
228 | !l
229 |
230 | let of_string loc s =
231 | let l = explode s in
232 | match l with
233 | [c] -> Characters (loc, Charset.singleton c)
234 | | _ ->
235 | List.fold_right
236 | (fun c re ->
237 | Sequence (loc, (Characters (loc, Charset.singleton c)), re))
238 | l (Epsilon loc)
239 |
240 | let as_charset _loc msg = function
241 | Characters (_loc, set) -> set
242 | | _ -> Messages.failure _loc msg
243 |
244 | let rec warn_bindings w = function
245 | Bind (loc, e, s, conv) ->
246 | if w then Messages.not_visible loc [s] "context";
247 | warn_bindings w e
248 | | Bind_pos (loc, s) -> if w then Messages.not_visible loc [s] "context"
249 | | Epsilon _
250 | | Characters _
251 | | Backref _
252 | | Variable _
253 | | Nocase_variable _
254 | | Special _ -> ()
255 | | Sequence (loc, e1, e2) -> warn_bindings w e1; warn_bindings w e2
256 | | Alternative (loc, e1, e2, set, pos_set) ->
257 | if w then
258 | (match list_named_groups (S.union set pos_set) with
259 | [] -> ()
260 | | ignored -> Messages.not_visible loc ignored "context")
261 | | Repetition (loc, kind, e) -> warn_bindings w e
262 | | Possessive (loc, e)
263 | | Lookahead (loc, _, e)
264 | | Lookbehind (loc, _, e) -> warn_bindings w e
265 | | Closed e -> warn_bindings true e
266 |
267 | let warnings re =
268 | warn_bindings false re
269 |
270 |
--------------------------------------------------------------------------------
/common/mikmatch.mli:
--------------------------------------------------------------------------------
1 | (** A small text-oriented library *)
2 |
3 | (** The [Mikmatch] module provides a submodule named [Text].
4 | A normal usage is to place [open Mikmatch] at the beginning of
5 | user code that uses it.
6 |
7 | This module is part of the runtime environment of Mikmatch
8 | (the library run_mikmatch_pcre.cma or equivalent).
9 | *)
10 |
11 | module Text :
12 | sig
13 | (** This module provides some general functions which are especially
14 | useful for manipulating text and text files.
15 | *)
16 |
17 | val iter_lines_of_channel : (string -> unit) -> in_channel -> unit
18 | (** [iter_lines_of_channel f ic] reads input channel [ic]
19 | and applies successively the given function [f] to
20 | each line until the end of file is reached. *)
21 |
22 | val iter_lines_of_file : (string -> unit) -> string -> unit
23 | (** [iter_lines_of_file f file] reads file [file]
24 | and applies successively the given function [f] to
25 | each line until the end of file is reached. *)
26 |
27 | val lines_of_channel : in_channel -> string list
28 | (** [lines_of_channel ic] returns the list of the lines that can be
29 | read from input channel [ic]. *)
30 |
31 | val lines_of_file : string -> string list
32 | (** [lines_of_file file] returns the list of the lines that can be
33 | read from file [file]. *)
34 |
35 | val channel_contents : in_channel -> string
36 | (** [channel_contents ic] returns the string containing the bytes
37 | that can be read from the given input channel [ic]. *)
38 |
39 | val file_contents : ?bin:bool -> string -> string
40 | (** [file_contents file] returns the string containing the bytes
41 | that can be read from the given file.
42 | Option [bin] specifies if [Pervasives.open_in_bin] should be
43 | used instead of [Pervasives.open_in] to open the file. Default is
44 | [false]. *)
45 |
46 | val save : string -> string -> unit
47 | (** [save file data] stores the string [data] in [file].
48 | If the file already exists, its contents is discarded silently. *)
49 |
50 | val save_lines : string -> string list -> unit
51 | (** [save_lines file l] saves the given list [l] of strings in [file]
52 | and adds a newline characters (['\n']) after each of them.
53 | If the file already exists, its contents is discarded silently. *)
54 |
55 |
56 | exception Skip
57 | (** This exception can be used to skip an element of a list being
58 | processed with [rev_map], [map], [fold_left], and [fold_right]. *)
59 |
60 | val map : ('a -> 'b) -> 'a list -> 'b list
61 | (** Like [List.map] but it is guaranteed that
62 | the elements of the input list are processed from left to right.
63 | Moreover the [Skip] exception can be used to skip an element
64 | of the list.
65 | This function runs in constant stack space. *)
66 |
67 | val rev_map : ('a -> 'b) -> 'a list -> 'b list
68 | (** Like [List.rev_map], but it is guaranteed that
69 | the elements of the input list are processed from left to right.
70 | Moreover the [Skip] exception can be used to skip an element
71 | of the list.
72 | This function runs in constant stack space and is slightly faster
73 | then [map]. *)
74 |
75 | val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b list -> 'a
76 | (** Like [List.fold_left]
77 | but the [Skip] exception can be used to skip an element
78 | of the list.
79 | This function runs in constant stack space. *)
80 |
81 | val fold_right : ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b
82 | (** Like [List.fold_right]
83 | but the [Skip] exception can be used to skip an element
84 | of the list.
85 | This function runs in constant stack space. *)
86 |
87 | val map_lines_of_channel : (string -> 'a) -> in_channel -> 'a list
88 | (** [map_lines_of_channel f ic] is equivalent to
89 | [map f (lines_of_channel ic)] but faster. *)
90 |
91 | val map_lines_of_file : (string -> 'a) -> string -> 'a list
92 | (** [map_lines_of_file f file] is equivalent to
93 | [map f (lines_of_file file)] but faster. *)
94 |
95 | end
96 |
97 |
98 | module Fixed :
99 | sig
100 | (** This module provides some functions which are useful
101 | for manipulating files with fields of fixed width.
102 | *)
103 |
104 | val chop_spaces : string -> string
105 | (** [chop_spaces s] returns a string where the leading and trailing
106 | spaces are removed. *)
107 |
108 | val int : string -> int
109 | (** [int s] reads an int from a string where leading and
110 | trailing spaces are allowed.
111 | Equivalent to [Pervasives.int_of_string (chop_spaces s)]. *)
112 |
113 | val float : string -> float
114 | (** [float s] reads an float from a string where leading and
115 | trailing spaces are allowed.
116 | Equivalent to [Pervasives.float_of_string (chop_spaces s)]. *)
117 | end
118 |
119 | module Directory :
120 | sig
121 | (** Basic operations on directories *)
122 |
123 | val list : ?absolute:bool -> ?path:bool -> string -> string list
124 | (** [list dir] returns the alphabetically sorted list
125 | of the names of the files contained in directory [dir].
126 | The special names that refer to the parent directory (e.g. [..])
127 | and the directory itself (e.g. [.]) are ignored.
128 |
129 | If the option [absolute] is set to [true], the result is a list
130 | of absolute file paths, i.e. that do not depend on the current directory
131 | which is associated to the process
132 | (default is false; implies [path = true]).
133 |
134 | If the option [path] is set to [true], the result is a list of paths
135 | instead of just the file names
136 | (default is [false] except if [absolute] is explicitely set to [true]).
137 |
138 | Exception [Invalid_argument "Directory.list"] is raised
139 | if there is an incompatibility between the options.
140 | Unspecified exceptions will be raised if the given directory does not
141 | exist or is not readable.
142 | *)
143 |
144 | val is_dir : ?nofollow:bool -> string -> bool
145 | (** [is_dir dir] returns true if [dir] is a directory, false otherwise.
146 | The [nofollow] option is false by default, but if true,
147 | a symbolic link will not be followed. In that case false is returned
148 | even if the link points to a valid directory. *)
149 | end
150 |
151 | module Glob :
152 | sig
153 | (** A generic file path matching utility *)
154 |
155 | val scan :
156 | ?absolute:bool ->
157 | ?path:bool ->
158 | ?root:string ->
159 | ?nofollow:bool ->
160 | (string -> unit) -> (string -> bool) list -> unit
161 | (** [scan action path_filter] returns all the file paths having a name
162 | that matches [path_filter]. [path_filter] is a list of filters that
163 | test whether a directory name or a file name should be selected.
164 |
165 | The path search starts from the current directory by default, or
166 | from the directory specified by the [root] option. The file names
167 | are examined in an undefined order. When a file path matches,
168 | [action] is applied to the string representing the path.
169 | Options [absolute] and [path] have the same meaning and the same
170 | default values as in {!Mikmatch.Directory.list}.
171 |
172 | [nofollow] can be used to prevent from considering symbolic links
173 | as directories. It is false by default.
174 | See also {!Mikmatch.Directory.is_dir}.
175 | *)
176 |
177 | val lscan :
178 | ?rev:bool ->
179 | ?absolute:bool ->
180 | ?path:bool ->
181 | ?root:string list ->
182 | ?nofollow:bool ->
183 | (string list -> unit) -> (string -> bool) list -> unit
184 | (** Same as {!Mikmatch.Glob.scan} but file paths are kept as a list
185 | of strings that form a valid path when concatenated using
186 | [Filename.concat]. Option [rev] can be set if the lists representing
187 | paths are in reversed order, i.e. the root comes last.
188 |
189 | In [lscan action path_filter], options [rev], [absolute], and [path]
190 | take their default values which are all false.
191 | In this situation, it is guaranteed that the paths that are passed
192 | to [action] have the same length as [path_filter].
193 | *)
194 |
195 | val list :
196 | ?absolute:bool ->
197 | ?path:bool ->
198 | ?root:string ->
199 | ?nofollow:bool ->
200 | ?sort:bool ->
201 | (string -> bool) list -> string list
202 | (** [list path_filter] works like {!Mikmatch.Glob.scan} but returns a list
203 | of all file paths that match [path_filter].
204 |
205 | An example in Mikmatch syntax is [list [FILTER _* ".ml" eos]].
206 | It returns the list of ".ml" files in the current directory.
207 | It could have been written as
208 | [list [ fun s -> Filename.check_suffix s ".ml"]] and is equivalent
209 | to [*.ml] in shell syntax.
210 | *)
211 |
212 | val llist :
213 | ?rev:bool ->
214 | ?absolute:bool ->
215 | ?path:bool ->
216 | ?root:string list ->
217 | ?nofollow:bool ->
218 | ?sort:bool -> (string -> bool) list -> string list list
219 | (** [llist path_filter] works like {!Mikmatch.Glob.lscan}
220 | but returns a list
221 | of all file paths that match [path_filter]. *)
222 | end
223 |
--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
1 | Changes in the successive versions of Mikmatch
2 | ==============================================
3 |
4 | Mikmatch is the port of micmatch 0.700 to the "new" Camlp4 (3.10).
5 | Mikmatch is not compatible with Camlp4 <= 3.09 or Camlp5.
6 |
7 | Please send bug reports, comments or feature requests to
8 | Martin Jambon or to the public forum at
9 | http://groups.google.com/group/micmatch
10 |
11 | !!! = some incompatibilities
12 | opt = optimizations
13 | +ui = additions in the user interface
14 | -ui = restrictions in the user interface
15 | bug = bug or security fix
16 | doc = major changes in the documentation
17 | pkg = changes in the structure of the package or in the installation procedure
18 |
19 | 2012-11-04
20 | 1.0.6: [bug] Fix for camlp4 4.00.0
21 |
22 | 2012-02-03
23 | 1.0.5: [pkg] Subpackage "mikmatch_pcre.top" for toplevel usage
24 |
25 | 2011-10-21
26 | 1.0.4: [bug] Fixed bug consisting in Not_found exception being raised
27 | in an alternative capturing an empty string.
28 | (losing compatibility with pcre-ocaml versions from 2004
29 | or earlier)
30 |
31 | 1.0.3: [!!!] Builds with camlp4 3.12.0 but no longer with 3.11.
32 | Thanks to Jake Donham for the patch.
33 |
34 | 1.0.2: [bug] Fixed important name scoping bug
35 | (definitions were inserted too early).
36 | [bug] Fixed bug that was causing directives to be ignored.
37 |
38 | 1.0.1: [!!!] Support for Camlp4 3.11.0, including toplevel support which
39 | wasn't possible with Camlp4 3.10.
40 | Dropped support for Camlp4 3.10 (please use mikmatch 1.0.0
41 | instead).
42 | Will restore compatibility with Camlp4 3.10 only in case
43 | of significant new mikmatch features.
44 | [pkg] Added dependency towards tophide. It is not a strict requirement
45 | but improves the toplevel experience.
46 |
47 | 1.0.0: [*] First release of mikmatch, translation of micmatch 0.700 for
48 | the "new camlp4" 3.10.
49 | [pkg] License: all source code is now distributed under the BSD license.
50 | [pkg] All occurrences of "micmatch" have been replaced by "mikmatch".
51 | [-ui] Temporarily no toplevel support for the syntax features, waiting
52 | for the availability of camlp4 filters for toplevel phrases.
53 | [pkg] Custom toplevels are no longer built and installed.
54 | [+ui] No more polymorphic value restriction. Functions that
55 | use mikmatch patterns can now be polymorphic.
56 | Many identifiers with the "__mikmatch" prefix are now visible
57 | as module structure items.
58 | [-ui] Dropped support for Camlp4's revised syntax.
59 | [-ui] No more -thread option. Only matching patterns that contain @
60 | use a table that is shared among threads. User code is responsible
61 | for using locks if needed.
62 |
63 |
64 | ------------------------- Micmatch (for camlp4 <= 3.09) ------------------
65 |
66 | This is the history for micmatch before the port to Camlp4 3.10, renamed
67 | mikmatch.
68 |
69 | 0.700: [bug] It was not possible to use "Not" outside of micmatch_pcre
70 | regexps. This is now fixed.
71 |
72 | 0.699: [+ui] added CAPTURE and COLLECTOBJ
73 |
74 | 0.698: [bug] fixed dynamic linking problems on MacOS/NetBSD by removing
75 | version_filter. As a consequence, only recent versions of camlp4
76 | are now supported (starting from 3.08.4, maybe earlier).
77 | [bug] fixed missing dependency on "common" for "pcre" and "str"
78 | targets in main Makefile
79 |
80 | 0.697: [bug] installation of executables now correctly follows $BINDIR
81 | or $PREFIX/bin
82 | [+ui] new FILTER macro which returns true or false
83 | [+ui] changed grammar entry level of macros
84 | (now "expr1" instead of "top"). Allows for less parentheses.
85 | [pkg] added dependency to the Unix library
86 | [+ui] added filename globbing in the Micmatch library
87 | [+ui] added experimental support for views
88 |
89 | 0.696: [pkg] removed micmatch_pcre.godiva which is a big source of
90 | trouble
91 |
92 | 0.695: [pkg] minor changes for GODI
93 | [bug] added .PHONY targets in Makefile
94 |
95 | 0.694: [+ui] int and float predefined regexps are not experimental anymore
96 | [bug] fixed null-character related bugs
97 | (Pcre.quote does not escape them)
98 | [bug] fixed bug with ocaml 3.08.1 (and probably earlier): Not_found
99 | was raised during initialization of the preprocessing library
100 | (deletion of grammar rules which didn't exist)
101 |
102 | 0.693: [+ui] added support for regexps with arguments (gaps of the
103 | form @some_expr) which are evaluated at runtime.
104 | It uses a cache which stores the most recently used
105 | compiled regexps for different sets of arguments.
106 | [opt] optionally shared data structures are now created only where
107 | necessary (just reduces the code size a little)
108 |
109 | 0.692: [bug, +ui] added "nan" and "inf" (caseless, optional sign)
110 | to the set of strings recognized by the "float" pattern.
111 |
112 | 0.691: [+ui] experimental addition of predefined regexps "int" and "float"
113 |
114 | 0.690: [+ui] added support for global shortcut bindings:
115 | let / ... / = ... ;;
116 |
117 | 0.689: [+ui]
118 | - added support for shortcuts:
119 | let RE ... = ... in ...
120 | let / ... / = ... in ...
121 | - official support for / ... / as an equivalent of RE ... (patterns)
122 | - general support for:
123 | let try ... = ... in ... with ...
124 | [bug] code with misplaced RE patterns cannot compile anymore
125 |
126 | 0.688: [+ui] added support for automatic type conversions:
127 | (... as x : int)
128 | (... as x := int_of_string)
129 | (... as x = Zero)
130 | [bug] fixed bug which caused some alternative patterns to be
131 | ignored. Simplest example that caused the bug:
132 | match "c" with (RE "a") | (RE "b") | (RE "c") -> ();;
133 |
134 | 0.687: [doc] added warning against inaccessible named subgroups or positional
135 | markers such as in ("abc" as local !local)*
136 | [+ui] added option -do for command-line programs (like sh -c or perl -e)
137 |
138 | 0.686: [bug] fixed bugs related to patterns matching empty substrings
139 | (avoiding infinite loops in such cases;
140 | for instance (SPLIT "") and (SPLIT "") ~full:true both work).
141 |
142 | 0.685: [bug] Fixed the following bugs:
143 | - was counting special "any" as 0 characters instead of 1
144 | (lookbehind assertions)
145 | - "Not_found" problems with recent versions of Pcre due
146 | to the 2004-04-29 change in Pcre.get_substring
147 | - silent removal of bindings which don't make sense in assertions
148 | (BTW I don't know how to print a precise warning instead)
149 |
150 | 0.684: [+ui] added support for lookaround assertions (PCRE only):
151 | < lookbehind . lookahead > or < lookahead >
152 |
153 | 0.683: [+ui] added support for positional markers in regexps (e.g. %pos)
154 |
155 | 0.682: [bug] added compatibility with OCaml 3.09+dev14
156 |
157 | 0.681: [bug] fixed bug in REPLACE_FIRST
158 |
159 | 0.680: [bug] replaced the regexp-pp package with the newest version
160 | for compatibility with OCaml 3.09+dev6
161 |
162 | 0.679: [+ui] added ~share option to MATCH and SEARCH_FIRST
163 | [doc] updated and commented example/shootout.ml
164 |
165 | 0.678: [+ui] added experimental / ... / syntax in patterns
166 |
167 | 0.677: [pkg] fixed bugs in META files
168 | [+ui] added "save" and "save_lines" functions to the library
169 | [bug] now bos, eos, bol and eol assertions work (micmatch_pcre)
170 |
171 | 0.676: [pkg] added version ID in the name of the archives
172 | [bug]
173 | - fixed bug which prevented the use of the Camlp4
174 | syntax extension for stream parsers (was due to a wrong LEVEL)
175 | - fixed fatal bug in micmatch_str which was accidentally
176 | introduced in the last version (was due to an inexisting LEVEL)
177 | [+ui] added a short Micmatch.Fixed module for handling text
178 | with fixed-width columns.
179 |
180 | 0.675: [bug] several bugfixes in the installation procedure
181 | (correct clean, any name for gmake OK, camlmix not required)
182 | [pkg] separate installation of micmatch_pcre (default)
183 | and micmatch_str (now optional)
184 |
185 | 0.674: [+ui]
186 | - added full support for PCRE-OCaml, with
187 | many additional macros and specifications
188 | - POSIX characters classes are now predefined
189 | for both micmatch_str and micmatch_pcre
190 | - micmatch and micmatch_str are now binary executables
191 | so that micmatch scripts can be made self-executable
192 | on Unix-like systems
193 | [doc] updated the reference manual and the web page
194 | [-ui] deprecated use of {123-}. Use {123+} instead.
195 |
196 | 0.673: [bug] added support for OCaml 3.08.1 (replaced regexp-pp package)
197 |
198 | 0.672: [+ui,bug]
199 | - added checks for unbalanced or redundant local bindings
200 | - added support for local backreferences
201 |
202 | 0.671: [+ui]
203 | - added support for backreferences (!ident)
204 | - tries alternatives from left to right, and is greedy by
205 | default for optional matches (? operator). Not official,
206 | since these properties are not specified in the Str library.
207 | - function keyword replaced by fun in the revised syntax
208 | (still not tested though)
209 |
210 | 0.670: [+ui] addition of the ~ operator for ignoring case (uses the OCaml
211 | definition of case, i.e. the latin1 charset).
212 | [doc] created the reference manual
213 |
214 | 0.669: [opt] tail-recursivity is now preserved (options -direct/-tailrec)
215 | [-ui] regexpr{-12} is not valid anymore because it looks strange
216 | and ambiguous. regexpr{0-12} should be used instead.
217 | regexpr{12-} is still valid.
218 | 0.668: [bug]
219 | - fixed abusive simplification (missing try ... with)
220 | - added missing binding in multithreaded mode
221 | - fixed the buggy Makefile on the web page
222 | 0.667: [opt] alternatives between charsets handled like unions of charsets
223 | 0.666: [*] initial public release (2004-08-02)
224 |
--------------------------------------------------------------------------------
/str/str_lib.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 | (* $Id$ *)
3 |
4 | open Camlp4.PreCast
5 |
6 | open Mm_util
7 | open Constants
8 |
9 | let _ =
10 | mod_runtime := "Run_mikmatch_str";
11 | mod_runtime_mt := "Run_mikmatch_str_mt"
12 |
13 | let str_mutex = "str_mutex"
14 |
15 |
16 | (* Emacs/Str syntax for regular expressions *)
17 |
18 | open Printf
19 |
20 | open Regexp_ast
21 |
22 | let special_regexps =
23 | let _loc = Constants.dummy_loc in
24 | [ "bol", Special (_loc, "^", ("bol", Some 0)); (* beginning of line *)
25 | "eol", Special (_loc, "$", ("eol", Some 0)); (* end of line *)
26 | "bnd", Special (_loc, "\\b", ("bnd", Some 0)); (* word boundary *)
27 | "any", Special (_loc, ".", ("any", Some 1)); (* any character
28 | except newline *) ]
29 |
30 | (*
31 | Note that the usual regexp special characters are not special inside
32 | a character set. A completely different set of special characters exists
33 | inside character sets: `]', `-' and `^'.
34 |
35 | To include a `]' in a character set, you must make it the first
36 | character. For example, `[]a]' matches `]' or `a'. To include a `-',
37 | write `-' as the first or last character of the set, or put it after
38 | a range. Thus, `[]-]' matches both `]' and `-'.
39 |
40 | To include `^', make it other than the first character in the set.
41 | *)
42 |
43 | let string c = String.make 1 c
44 |
45 | let quote_char = function
46 | '[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$' as c ->
47 | let s = Bytes.create 2 in
48 | Bytes.set s 0 '\\';
49 | Bytes.set s 1 c;
50 | Bytes.unsafe_to_string s
51 | | c -> string c
52 |
53 |
54 | let reorder_charset l =
55 | if l = [] then
56 | invalid_arg "reorder_charset";
57 | List.sort
58 | (fun c1 c2 ->
59 | if c1 = c2 then invalid_arg "reorder_charset: repeated char"
60 | else if c1 = ']' then -1
61 | else if c2 = ']' then 1
62 | else if c1 = '-' then 1
63 | else if c2 = '-' then -1
64 | else if c1 = '^' then 1
65 | else if c2 = '^' then -1
66 | else Char.compare c1 c2)
67 | l
68 |
69 | let compact l =
70 | let finish first last =
71 | match Char.code last - Char.code first with
72 | 0 -> string first
73 | | 1 -> string first ^ string last
74 | | _ -> string first ^ "-" ^ string last in
75 |
76 | let rec extend first last =
77 | function
78 | [] -> [finish first last]
79 | | c :: rest ->
80 | if Char.code c = Char.code last + 1 then
81 | extend first c rest
82 | else
83 | finish first last :: extend c c rest in
84 |
85 | match l with
86 | [] -> []
87 | | hd :: tl -> extend hd hd tl
88 |
89 |
90 | let compact_charset l =
91 | let rbracket = ref false
92 | and dash = ref false
93 | and caret = ref false in
94 | let normal =
95 | List.filter (function
96 | ']' -> rbracket := true; false
97 | | '-' -> dash := true; false
98 | | '^' -> caret := true; false
99 | | _ -> true) l in
100 | let sorted = List.sort Char.compare normal in
101 | let special_tail =
102 | let tail = if !dash then ["-"] else [] in
103 | if !caret then "^" :: tail else tail in
104 | let tail = compact sorted @ special_tail in
105 | if !rbracket then "]" :: tail else tail
106 |
107 |
108 | let add_const accu s =
109 | accu := `String s :: !accu
110 | let add_var accu e =
111 | accu := `Var (e, false) :: !accu
112 | let add_var_nocase accu e =
113 | accu := `Var (e, true) :: !accu
114 |
115 | let rec rm_closed = function Closed ast -> rm_closed ast | ast -> ast
116 |
117 | let rec to_string ?(top = false) ((last_group, named_groups) as groups) accu =
118 | function
119 | Epsilon _loc -> groups
120 | | Special (_loc, s, _) -> add_const accu s; groups
121 | | Characters (_loc, set) ->
122 | let l = Charset.list set in
123 | (match l with
124 | [] -> groups
125 | | [c] ->
126 | add_const accu (quote_char c);
127 | groups
128 | | _ ->
129 | add_const accu "[";
130 | List.iter (add_const accu) (compact_charset l);
131 | add_const accu "]";
132 | groups)
133 |
134 | | Sequence (_loc, re1, re2) ->
135 | let groups = to_string groups accu re1 in
136 | to_string groups accu re2
137 |
138 | | Alternative (_loc, re, Epsilon _, _, _) ->
139 |
140 | let must_group =
141 | not top &&
142 | match rm_closed re with
143 | Characters _ | Special _ | Bind _ | Alternative _ -> false
144 | | _ -> true in
145 | let last_group = if must_group then succ last_group else last_group in
146 | if must_group then add_const accu "\\(";
147 | let (last_group, named_groups) as groups =
148 | to_string (last_group, named_groups) accu re in
149 | if must_group then add_const accu "\\)";
150 | add_const accu "?";
151 | groups
152 |
153 | | Alternative (_loc, re1, re2, _, _) ->
154 |
155 | let must_group = not top in
156 | let last_group = if must_group then succ last_group else last_group in
157 | if must_group then add_const accu "\\(";
158 | let (last_group, named_groups1) =
159 | to_string (last_group, named_groups) accu re1 in
160 | add_const accu "\\|";
161 |
162 | let (last_group, named_groups2) =
163 | to_string (last_group, named_groups) accu re2 in
164 | if must_group then add_const accu "\\)";
165 |
166 | let check_balance set1 set2 =
167 | if not (Named_groups.equal set1 set2) then
168 | (let missing =
169 | S.diff
170 | (Named_groups.union set1 set2)
171 | (Named_groups.inter set1 set2) in
172 | Messages.unbalanced_bindings _loc (list_named_groups missing)) in
173 |
174 | let (groups1, positions1) = named_groups1
175 | and (groups2, positions2) = named_groups2 in
176 | check_balance groups1 groups2;
177 | check_balance positions1 positions2;
178 |
179 | (last_group, (merge groups1 groups2, merge positions1 positions2))
180 |
181 | | Repetition (_loc, (Star, true),
182 | (Repetition (_, (Star, true), _) as re)) ->
183 | to_string ~top groups accu re
184 |
185 | | Repetition (_loc, kind, re) ->
186 | let must_group =
187 | not top &&
188 | match rm_closed re with
189 | Characters _ | Special _ | Bind _ | Alternative _ -> false
190 | | _ -> true in
191 | let last_group =
192 | if must_group then
193 | (add_const accu "\\(";
194 | succ last_group)
195 | else last_group in
196 | let groups = to_string (last_group, named_groups) accu re in
197 | if must_group then
198 | add_const accu "\\)";
199 | let op =
200 | match kind with
201 | (Star, true) -> "*"
202 | | (Plus, true) -> "+"
203 | | (Option, true) -> "?"
204 | | _ -> assert false in
205 | add_const accu op;
206 | groups
207 |
208 | | Bind (_loc, re, name, conv) ->
209 | let last_group = succ last_group in
210 | let named_groups =
211 | add_new_group _loc name conv last_group named_groups in
212 | add_const accu "\\(";
213 | let groups = to_string (last_group, named_groups) accu re in
214 | add_const accu "\\)";
215 | groups
216 |
217 | | Bind_pos (_loc, name) ->
218 | let last_group = succ last_group in
219 | let named_groups = add_new_pos _loc name last_group named_groups in
220 | add_const accu "\\(\\)";
221 | (last_group, named_groups)
222 |
223 | | Backref (_loc, name) ->
224 | (try
225 | match Named_groups.find name (fst named_groups) with
226 | [] -> Messages.invalid_backref _loc name
227 | | [(_, n, conv)] -> add_const accu (sprintf "\\%i" n); groups
228 | | l ->
229 | add_const accu (sprintf "\\(%s\\)"
230 | (String.concat "\\|"
231 | (List.map (fun (_, n, conv) -> sprintf "\\%i" n) l)));
232 | (succ last_group, named_groups)
233 | with Not_found -> Messages.invalid_backref _loc name)
234 |
235 | | Variable (_loc, e) -> add_var accu e; groups
236 | | Nocase_variable (_loc, e) -> add_var_nocase accu e; groups
237 |
238 | | Closed ast ->
239 | let saved_named_groups = named_groups in
240 | let (last_group, named_groups) = to_string groups accu ast in
241 | (last_group, saved_named_groups)
242 |
243 | | Possessive _ -> assert false
244 | | Lookahead _ -> assert false
245 | | Lookbehind _ -> assert false
246 |
247 | let nocasify e =
248 | let _loc = Ast.loc_of_expr e in
249 | <:expr< $uid: !mod_runtime$.nocase $e$ >>
250 |
251 | let process_regexp _loc ~sharing re re_name =
252 | let accu = ref [] in
253 | let (last_group, named_groups) =
254 | to_string ~top:true (0, (Named_groups.empty, Named_groups.empty))
255 | accu re in
256 | let re_args, re_source =
257 | Match.get_re_source ~quote_expr: <:expr< Str.quote >>
258 | ~nocasify accu in
259 | (re_args, re_source, named_groups, [])
260 |
261 |
262 | (* Syntax expanders *)
263 |
264 | open Constants
265 |
266 | let expr_mutex _loc = <:expr< $uid: !mod_runtime_mt$.$lid:str_mutex$ >>
267 |
268 | let unlock _loc =
269 | <:expr< Mutex.unlock $expr_mutex _loc$ >>
270 |
271 | let lock _loc =
272 | <:expr< Mutex.lock $expr_mutex _loc$ >>
273 |
274 | let lock_unlock e =
275 | let _loc = Ast.loc_of_expr e in
276 | <:expr< do { $lock _loc$;
277 | try let x = $e$ in do { $unlock _loc$; x }
278 | with [ exn -> do { $unlock _loc$; raise exn } ] } >>
279 |
280 | let unlock_lock e =
281 | let _loc = Ast.loc_of_expr e in
282 | <:expr< do { $unlock _loc$;
283 | try let x = $e$ in do { $lock _loc$; x }
284 | with [ exn -> do { $lock _loc$; raise exn } ] } >>
285 |
286 | let string_match _loc re_name get_re target pos =
287 | <:expr< Str.string_match $get_re$ $target$ $int:string_of_int pos$ >>
288 |
289 | let matched_group _loc n target =
290 | <:expr< Str.matched_group $int:string_of_int n$ $target$ >>
291 |
292 | let matched_position _loc n target =
293 | <:expr< Str.group_beginning $int:string_of_int n$ >>
294 |
295 | let compile_regexp ~mt _loc re_args re_source =
296 | let compile_string e =
297 | <:expr< Str.regexp $e$ >> in
298 | match re_args with
299 | [] ->
300 | let re_string = Match.compute_re_string _loc re_source in
301 | compile_string re_string
302 | | _ ->
303 | let key =
304 | match re_args with
305 | [name, _] -> <:expr< $lid:name$ >>
306 | | _ ->
307 | let expr_list =
308 | List.map (fun (name, _) -> <:expr< $lid:name$ >>) re_args in
309 | let tup = comma_expr_of_list _loc expr_list in
310 | <:expr< ( $tup: tup$ ) >> in
311 | let compile =
312 | let re_string = Match.compute_re_string _loc re_source in
313 | compile_string re_string in
314 |
315 | let find =
316 | Match.protect mt
317 | <:expr< $uid: !mod_runtime$.Mem.find tbl key >> in
318 | let add =
319 | Match.protect mt
320 | <:expr< $uid: !mod_runtime$.Mem.unsafe_add tbl key data >> in
321 |
322 | let check_cache =
323 | <:expr<
324 | let key = $key$ in
325 | try $find$
326 | with [ Not_found ->
327 | let data = $compile$ in
328 | do { $add$;
329 | data } ] >> in
330 |
331 | let get_regexp =
332 | List.fold_right
333 | (fun (argname, _) e -> <:expr< fun $lid:argname$ -> $e$ >>)
334 | re_args
335 | check_cache in
336 |
337 | let result =
338 | <:expr<
339 | let tbl = $uid: !mod_runtime$.Mem.create 100 in
340 | $get_regexp$ >> in
341 |
342 | if mt then <:expr< let mutex = Mutex.create () in $result$ >>
343 | else result
344 |
345 |
346 |
347 | let convert _loc conv e =
348 | match conv with
349 | None -> e
350 | | Some f ->
351 | match f with
352 | `Int -> <:expr< Pervasives.int_of_string $e$ >>
353 | | `Float -> <:expr< Pervasives.float_of_string $e$ >>
354 | | `Option -> <:expr< let s = $e$ in
355 | if s = "" then None
356 | else Some s >>
357 | | `Custom f -> <:expr< $f$ $e$ >>
358 | | `Value e' -> <:expr< do { ignore $e$; $e'$ } >>
359 |
360 | let insert_bindings_poly make_expr _loc target set e =
361 | Named_groups.fold
362 | (fun name l e ->
363 | match l with
364 | [] -> assert false
365 | | (_loc, _, _) :: _ ->
366 | let find_it =
367 | List.fold_right
368 | (fun (_loc, n, conv) accu ->
369 | let expr =
370 | convert _loc conv (make_expr _loc n target) in
371 | match accu with
372 | None -> Some expr
373 | | Some e ->
374 | Some <:expr<
375 | try $expr$ with [ Not_found -> $e$ ] >>)
376 | l
377 | None in
378 | let result =
379 | match find_it with
380 | None -> assert false
381 | | Some e -> e in
382 | <:expr< let $lid:name$ = $result$ in $e$ >>)
383 | set
384 | e
385 |
386 | let insert_group_bindings = insert_bindings_poly matched_group
387 | let insert_position_bindings = insert_bindings_poly matched_position
388 |
389 | let insert_bindings _loc target (group_bindings, position_bindings) e =
390 | insert_group_bindings _loc target group_bindings
391 | (insert_position_bindings _loc target position_bindings e)
392 |
393 |
394 | let match_and_bind _loc re_name get_re target named_groups success failure =
395 | <:expr<
396 | if $string_match _loc re_name get_re target 0$
397 | then $insert_bindings _loc target named_groups success$
398 | else $failure$ >>
399 |
400 |
401 | let macro_replace _loc re_name target_name named_groups expr =
402 | let target = <:expr< $lid:target_name$ >> in
403 | <:expr<
404 | fun $lid:target_name$ ->
405 | Str.global_substitute $lid:re_name$
406 | (fun _ -> $insert_bindings _loc target named_groups expr$)
407 | $target$ >>
408 |
409 |
410 | open Select_lib
411 |
412 | let lib = { predefined_regexps = special_regexps;
413 | unfold_range = true;
414 | process_regexp = process_regexp;
415 | compile_regexp_match = compile_regexp ~mt:false;
416 | compile_regexp_search = compile_regexp ~mt:false;
417 | match_and_bind = match_and_bind;
418 | wrap_match = (fun e -> e);
419 | wrap_user_case = (fun e -> e);
420 | really_wrap_match = false;
421 | really_wrap_user_case = false }
422 |
423 | let lib_mt = { predefined_regexps = special_regexps;
424 | unfold_range = true;
425 | process_regexp = process_regexp;
426 | compile_regexp_match = compile_regexp ~mt:true;
427 | compile_regexp_search = compile_regexp ~mt:true;
428 | match_and_bind = match_and_bind;
429 | wrap_match = lock_unlock;
430 | wrap_user_case = unlock_lock;
431 | really_wrap_match = true;
432 | really_wrap_user_case = true }
433 |
--------------------------------------------------------------------------------
/pcre/test.ml:
--------------------------------------------------------------------------------
1 | open Printf
2 |
3 | (* Definition of regular expressions for further use *)
4 | RE space = [' ' '\t' '\n' '\r']
5 | RE not_space = _ # space
6 | RE digit = ['0'-'9']
7 | RE letter = ['A'-'Z' '_' 'a'-'z']
8 | RE word = letter+
9 |
10 | (*
11 | (* Inclusion of file in the same syntax, e.g. a library of user-defined
12 | regular expressions. (known problem with error location) *)
13 | USE "my_regexps.ml" (* defines `word' and `digit' *)
14 | *)
15 |
16 | (* Extended pattern-matching in the following constructs:
17 | match ... with ...
18 | try ... with ...
19 | function ...
20 | *)
21 |
22 |
23 | (* Doesn't work. Don't know how to make it work. *)
24 | (* Testing the Camlp4 support for stream parsers *)
25 |
26 | let _ = match Stream.of_list [] with parser [< >] -> ()
27 |
28 |
29 | let test expected a b c =
30 | printf "[case %i] " expected; flush stdout;
31 | (match a, b, c with
32 | None, (None | Some (RE space* )), None -> printf "case 1\n"
33 | | Some ({ contents = [| RE (word as x); _; y |]}),
34 | (Some ("test" as z | RE word space (word as z))),
35 | None -> printf "case 2: %S %S %S\n" x y z
36 | | _, _, Some (RE space* (['0'-'9']+ as n)) -> printf "case 3: %s\n" n
37 | | _ -> printf "case 4\n");
38 | flush stdout
39 |
40 | let _ =
41 | printf "Tests (match ... with):\n"; flush stdout;
42 | test 1 None (Some " ") None;
43 |
44 | test 2
45 | (Some (ref [| "alpha"; "beta"; "2 gamma" |]))
46 | (Some "Hello World!")
47 | None;
48 |
49 | test 3 None None (Some " 123 ");
50 |
51 | test 4 (Some (ref [| |])) (Some "") (Some "abc")
52 |
53 | let _ =
54 | match "" with
55 | (RE (("a" as a) | ("b" as a))) | a -> ()
56 |
57 | let hello_who s =
58 | match s with
59 | RE _* ['h''H']"ello" ","? space*
60 | ((word | space)* word as someone) -> String.capitalize someone
61 |
62 | | _ -> "nobody"
63 |
64 | let _ =
65 | printf "Extraction of the recipient's name\n"; flush stdout;
66 | List.iter (fun s ->
67 | printf "Hello who: %S\n" s;
68 | printf " -> %S\n" (hello_who s);
69 | flush stdout)
70 | [ "Hello World!";
71 | "First of all, hello everybody.";
72 | "*** hello world ***";
73 | "Hello, Caml riders!" ]
74 |
75 | let _ =
76 | printf "Test (local and global bindings):\n"; flush stdout;
77 | match "" with
78 | (RE (word as x | space+ (word as x))* ) | _ ->
79 | printf "Passed.\n"
80 |
81 | let _ =
82 | printf "Test (repetition range + end of line):\n"; flush stdout;
83 | let f s =
84 | match s with
85 | RE '-'? digit{1-4} eol -> printf "%S has less than 5 digits.\n" s
86 | | RE '-'? digit{5+} eol -> printf "%S has at least 5 digits.\n" s
87 | | _ -> printf "%S is not a number.\n" s in
88 | List.iter f ["123"; "1234"; "12345"; "12345678"; "-1234"; "*1"; "1*";
89 | "9\n*" ]
90 |
91 | let test f (expected, s) =
92 | let (success, result) = f s in
93 | let passed = expected = success in
94 | if passed then (printf "[OK] %s%s\n" s
95 | (match result with None -> ""
96 | | Some x -> sprintf " -> %s"x);
97 | flush stdout)
98 | else
99 | (print_endline (s ^ "Failed"); flush stdout; failwith s)
100 |
101 | let () =
102 | printf "Test (no case: the ~ operator):\n"; flush stdout;
103 | List.iter
104 | (test (function
105 | RE "hello"~ " World!" -> true, None
106 | | _ -> false, None))
107 | [ true, "Hello World!";
108 | true, "hElLO World!";
109 | false, "hello WORLD!" ]
110 |
111 | let () =
112 | printf "Test (try ... with):\n"; flush stdout;
113 | try failwith "Hello World!"
114 | with
115 | Failure RE "Hello" space* (word as w) -> printf "OK: %s\n" w
116 | | Failure s -> printf "Failure: %s\n" s
117 |
118 | let () =
119 | printf "Test (function ... -> ...):\n"; flush stdout;
120 | let f =
121 | function
122 | RE "Hello" space* (word as w) -> printf "OK: %s\n" w
123 | | _ -> printf "Error\n" in
124 | f "Hello Everybody";
125 | f "Hello Caml!"
126 |
127 | let () =
128 | printf "Test (backreferences):\n"; flush stdout;
129 | let f s =
130 | match s with
131 | RE
132 | (digit+ as x | (word as x)) (* x = global id *)
133 | (" " as sp !sp)* (* sp = local id *)
134 | !x -> true, Some x
135 | | _ -> false, None in
136 | List.iter (test f)
137 | [ true, "123123";
138 | false, "123 123";
139 | true, "123 123";
140 | true, "aaaa";
141 | false, "abc";
142 | false, "ab1ab1" ]
143 |
144 |
145 | let print_named_groups l =
146 | List.iter
147 | (fun (name, positions) ->
148 | printf "%s:" name;
149 | List.iter (fun i -> printf " %i" i) positions;
150 | printf "\n")
151 | l
152 |
153 | (* Lower level feature: RE_PCRE returns the source of the regexp,
154 | to be used with specific compilation or search options. *)
155 |
156 | let _ =
157 | let (src, named_groups) =
158 | RE_PCRE
159 | (("a"|"b"+)? digit{2}) as start
160 | (space* word)+ ( digit{1} (word as last_word)
161 | | digit{1} Lazy (word as last_word)
162 | | digit{3} (word as last_word)) in
163 |
164 | printf "Regexp source: %S\n" src;
165 | printf "Named groups and their locations:\n";
166 | print_named_groups named_groups;
167 | flush stdout
168 |
169 | let charset_union = RE_PCRE digit | space | "a" | ['A'-'Z']
170 |
171 |
172 | (* Laziness *)
173 |
174 | let _ =
175 | printf "Laziness and backreferences:\n"; flush stdout;
176 | let f = function
177 | RE _* Lazy
178 | "<" (_* Lazy as tag) ">" (_* Lazy as contents) "" !tag ">" ->
179 | sprintf " -> (%S, %S)" tag contents
180 | | s -> "" in
181 | List.iter (fun s ->
182 | printf "%S%s\n" s (f s);
183 | flush stdout)
184 | [ "hello";
185 | "";
186 | "text" ]
187 |
188 | let _ =
189 | printf "Possessiveness + backreferences + laziness
190 | Take the first word, find its next occurence and return the text
191 | in the middle:\n"; flush stdout;
192 | let f = function
193 | RE letter* Possessive as x (_* Lazy as y) !x -> sprintf " -> %S" y
194 | | _ -> "" in
195 | List.iter (fun s ->
196 | printf "%S%s\n" s (f s);
197 | flush stdout)
198 | [ "abc,ab,abc,abc" ]
199 |
200 |
201 |
202 | (* Macros *)
203 |
204 |
205 | let swap =
206 | REPLACE
207 | "(" space* (word as x) space* ","
208 | space* (word as y) space* ")" -> "(" ^ y ^ "," ^ x ^ ")"
209 |
210 | let swap =
211 | REPLACE_FIRST
212 | "(" space* (word as x) space* ","
213 | space* (word as y) space* ")" -> "(" ^ y ^ "," ^ x ^ ")"
214 |
215 | let _ =
216 | let test s =
217 | let s1 = swap s in
218 | let s2 = swap s1 in
219 | printf "swap 0: %s\nswap 1: %s\nswap 2: %s\n" s s1 s2 in
220 |
221 | test "tuples: (x,y)/(1, 2)/(martin, jambon)"
222 |
223 |
224 | RE host = (['.' '-'] | letter | digit)+
225 |
226 | let hide_email =
227 | flush stdout;
228 | REPLACE "@" (host as host) -> "@" ^ (String.make (String.length host) '.')
229 |
230 | let _ =
231 | let test s =
232 | printf "\
233 | before: %s
234 | after : %s
235 | " s (hide_email s) in
236 | test "this is a list of email addresses: joe@sixpack.com, martin@home"
237 |
238 |
239 | let _ =
240 | let i = ref 0 in
241 | let f = SEARCH "a" -> incr i in
242 | f "aaa";
243 | printf "This should be 3: %i\n" !i;
244 | flush stdout
245 |
246 | let _ =
247 | let f = MAP ['a'-'z']+ as w -> `Word (String.capitalize w) in
248 | f "hello world!"
249 |
250 | let _ =
251 | List.iter print_endline ((SPLIT ",") "a,b,c")
252 |
253 | let _ =
254 | let l =
255 | List.filter
256 | (fun s -> (FILTER _* ".ml" eos) s)
257 | (Array.to_list (Sys.readdir ".")) in
258 | printf "*.ml: %s\n%!" (String.concat " " l)
259 |
260 |
261 | (* Sharing the subgroups array *)
262 |
263 | let _ =
264 | let f ?share s =
265 | try (MATCH "+" (print* as x) -> print_endline (" Found " ^ x)) ?share s
266 | with Not_found -> print_endline " Not found" in
267 | let g ?share () =
268 | print_endline "2 found:";
269 | flush stdout;
270 | List.iter
271 | (f ?share)
272 | [ "+a"; "b"; "+blop" ] in
273 | g ~share:true ();
274 | g ~share:false ()
275 |
276 |
277 | (* Positions *)
278 |
279 | let _ =
280 | match "a1234yz", 333 with
281 | RE "a" as s ("bc" %pos | digit+ %pos), _
282 | | (s, pos) ->
283 | printf "a = %s, 5 = %i\n%!" s pos;
284 | assert (s = "a" && pos = 5)
285 |
286 | let _ =
287 | let find =
288 | SEARCH_FIRST "(" %pos1 (_* Lazy as x) %pos2 ")" ->
289 | printf "%s %i-%i\n%!" x pos1 pos2;
290 | assert (pos1 = 11 && pos2 = 17 && x = "result") in
291 | find "0123456789(result)..."
292 |
293 |
294 | (* No real tuple (maybe not fixed yet) *)
295 | let _ =
296 | match "abc", "def" with
297 | (RE _, RE _)
298 | | "abc", _ -> ignore `Case1
299 | | _ -> ignore `Case2
300 |
301 |
302 | (* Assertions *)
303 | let _ =
304 | let search =
305 | SEARCH
306 | alpha{2}.> digit+ Possessive as m
307 | < %pos _ as x . Not (alpha | "_"+ !m)+ > as n ->
308 | printf "num = %s; pos = %i; x = %s\n%!" n pos x in
309 | search "abc1 23 456xyz 7. x8 33_33Y 34_35Y 36__36Y"
310 |
311 |
312 | let print_triplets_of_letters = SEARCH -> print_endline x
313 | let _ = print_triplets_of_letters "Hello World!"
314 |
315 | let _ = List.iter print_endline ((SPLIT "") "abc");;
316 |
317 | RE test_warnings =
318 | ("a" as plus_warning)+
319 |
320 | let _ = match "" with RE ("a" as star_warning)* test_warnings -> () | _ -> ()
321 |
322 |
323 | (* Converters *)
324 | let _ =
325 | let f s =
326 | printf "got it!\n";
327 | int_of_string s in
328 | let n =
329 | match "(123)", 456 with
330 | (RE "(" (digit+ as n := f) ")"), _
331 | | (RE _{1-3} as n = -1), _
332 | | (RE digit+ as n : int), _
333 | | _, n -> n in
334 | assert (n = 123);
335 | printf "123=%i\n%!" n
336 |
337 |
338 | (* debugging *)
339 | let _ =
340 | match "a" with
341 | (RE "b") | (RE "c") | (RE "a") -> ()
342 | | _ -> failwith "test failed"
343 |
344 | let () = () in ();;
345 |
346 |
347 |
348 | (* General syntax for local exception handling (let try ... in ... with ...) *)
349 | let _ =
350 | try
351 | (let try x = ()
352 | and z = () in
353 | raise Exit
354 | with Exit -> assert false)
355 | with Exit ->
356 | print_endline "OK for local exception handling (let-try-in-with)"
357 | ;;
358 |
359 | let RE (_* as x) = "hello" in assert ("" <> "hello");;
360 |
361 |
362 | (* Shortcut syntax *)
363 | let _ =
364 | let RE (alpha+ as x) = "abc def" in
365 | assert (x = "abc");
366 | assert (x = "abc");
367 | print_endline "shortcut is OK"
368 |
369 | (* Shortcut syntax with local exception handling *)
370 | let _ =
371 | try
372 | (let try /"xy" as x/ = "xyz" in
373 | ignore x;
374 | raise Exit
375 | with Exit -> assert false)
376 | with Exit ->
377 | print_endline "OK for local exception handling (RE)"
378 |
379 | let /alpha+ space+ (alpha+ as x)/ = "xyz abc " in
380 | assert (x = "abc");;
381 |
382 |
383 | (* Similar tests for str_item's let in *)
384 | let try /alpha+ space+ (alpha+ as x)/ = "" in
385 | assert false
386 | with Match_failure _ -> print_endline "OK for str_item let-try-in-with";;
387 |
388 |
389 | (* Global value bindings *)
390 | let /[digit "."]* as version/ = Sys.ocaml_version
391 |
392 | RE int = digit+
393 | let /(int as major : int) "." (int as minor : int)
394 | ("." (int as patchlevel : int) | ("" as patchlevel = 0))
395 | ("+" (_* as additional_info) | ("" as additional_info))/ =
396 | Sys.ocaml_version
397 |
398 | let _ =
399 | printf "OCaml version: major=%i minor=%i patchlevel=%i additional-info=%S\n%!"
400 | major minor patchlevel additional_info
401 |
402 |
403 | (* Parametrized regexps *)
404 | let _ =
405 | let find s =
406 | match "abcabcdefggghijkl" with
407 | RE _* Lazy ( @s+ as x) -> x
408 | | _ -> assert false in
409 | assert (find "abc" = "abcabc");
410 | assert (find "g" = "ggg")
411 |
412 | let _ =
413 | let find_not_after x y =
414 | COLLECT < Not ( @x ) . > ":" @y "=" (alnum* as result) -> result in
415 |
416 | let text = "a:b=, xy:z=1, x:z=25, _:z=99" in
417 | assert (find_not_after "x" "z" text = ["1"; "99"]);
418 | assert (find_not_after "" "z" text = []);
419 | assert (find_not_after "a" "b" text = []);
420 | assert (find_not_after "a" "" text = []);
421 | assert (find_not_after "?" "b" text = [""])
422 |
423 | let _ =
424 | let find_not_between ~before ~after ~label =
425 | COLLECT
426 | < Not < ( @before ) . > @label "=" alnum* @after >
427 | @label "=" (alnum* as result) -> result in
428 |
429 | let text = "(field=12) (field=OK, field=) (field=yes" in
430 | assert (find_not_between ~before:"(" ~after:")" ~label:"field" text =
431 | ["OK"; ""; "yes"])
432 |
433 |
434 | let _ =
435 | let field key =
436 | printf "Case-insensitive search for field %S:\n%!" key;
437 | SEARCH @key~ "=" (alnum* as data) -> printf " %s=%s\n%!" key data in
438 |
439 | let text = "hello name=Martin, AGE=27, Name=Jambon" in
440 | printf "Text: %S\n" text;
441 | field "name" text;
442 | field "age" text
443 |
444 | (* Null character *)
445 | let _ =
446 | match "" with
447 | / "\000abc" / -> assert false
448 | | _ -> ()
449 |
450 | let _ =
451 | let zero_abc = "\000abc" in
452 | match "" with
453 | / @zero_abc / -> assert false
454 | | _ -> ()
455 |
456 | let ( % ) = (+)
457 |
458 | let view T = fun x -> true
459 | let view Lazy = fun x -> try Some (Lazy.force x) with _ -> None
460 |
461 | let _ =
462 | match "a", lazy (1+1), lazy (3, lazy 4) with
463 | %T, %Lazy (2 as x), %Lazy (y, %Lazy z) ->
464 | assert (x = 2);
465 | assert (y = 3);
466 | assert (z = 4);
467 | printf "Passed view test 1\n%!"
468 | | _ -> assert false
469 |
470 |
471 |
472 | type 'a lazy_list = Empty | Cons of ('a * 'a lazy_list lazy_t)
473 |
474 | let view Empty =
475 | fun l ->
476 | try Lazy.force l = Empty
477 | with _ -> false
478 |
479 | let view Cons =
480 | fun l ->
481 | try
482 | match Lazy.force l with
483 | Cons x -> Some x
484 | | Empty -> None
485 | with _ -> None
486 |
487 | let _ =
488 | let l = lazy (Cons (1, lazy (Cons (2, lazy Empty)))) in
489 | match l with
490 | %Empty
491 | | %Cons (_, %Empty) -> assert false
492 | | %Cons (x1, %Cons (x2, %Empty)) ->
493 | assert (x1 = 1);
494 | assert (x2 = 2);
495 | printf "Passed view test 2\n%!"
496 | | _ -> assert false
497 |
498 | let _ =
499 | let view XY = fun o -> Some (o#x, o#y) in
500 | let view S = fun o -> Some o#s in
501 | let o =
502 | (object
503 | method x = 0
504 | method y = 1
505 | method s = "abc"
506 | end) in
507 | match o with
508 | %XY (1, _) -> assert false
509 | | %S / "A" / -> assert false
510 | | %S ( / upper as c / | / lower as c / as s) ->
511 | assert (c = "a");
512 | assert (s = "abc");
513 | printf "Passed view test 3\n%!"
514 |
515 | module Test =
516 | struct
517 | let view Even = fun x -> x land 1 = 0
518 | end
519 |
520 | let _ =
521 | match 0 with
522 | %Test.Even -> printf "Passed view test 4\n%!"
523 | | _ -> assert false
524 |
525 |
526 | let _ =
527 | let f = COLLECTOBJ (letter+ as x) (digit+ as y : int) in
528 | match List.map (fun x -> (x#x, x#y)) (f "ab12ER5") with
529 | [ ("ab", 12); ("ER", 5) ] -> printf "Passed COLLECTOBJ test\n%!"
530 | | _ -> assert false
531 |
532 | let _ =
533 | match (CAPTURE (letter+ as x) (digit+ as y : int)) "ab12ER5" with
534 | Some o ->
535 | assert (o#x = "ab" && o#y = 12);
536 | printf "Passed CAPTURE test\n%!"
537 | | None -> assert false
538 |
539 | let _ =
540 | match (SPLIT "x" ) "axbxc" with
541 | [ "a"; "b"; "c" ] ->
542 | printf "Passed basic SPLIT test\n%!"
543 | | _ -> assert false
544 |
545 | let _ =
546 | match (SPLIT < "x" > ) "axbxc" with
547 | [ "a"; "xb"; "xc" ] ->
548 | printf "Passed zero-length SPLIT test (bug in versions <= 1.0.1)\n%!"
549 | | _ -> assert false
550 |
551 | let () =
552 | try
553 | match "a" with
554 | / "a" ("" as x) | ("b" as x) / ->
555 | ignore x;
556 | printf "Passed zero-length capture in alternative \
557 | (bug in versions <= 1.0.3)\n%!"
558 | | _ -> assert false
559 | with Not_found ->
560 | assert false
561 |
562 | let () =
563 | match "1" with
564 | / ("0" as x = 0) | ("1" as x := int_of_string) / ->
565 | if x = 1 then
566 | printf "Passed alt/= test (bug in versions <= 1.0.4)\n%!"
567 | else
568 | assert false
569 | | _ -> assert false
570 |
571 | type test = { x : int; y : int }
572 |
573 | let f x =
574 | match x with
575 | | {x = 1; _} -> ()
576 | | _ -> ()
577 |
578 | let g x =
579 | match x with
580 | | {x} -> ()
581 | | _ -> ()
582 |
--------------------------------------------------------------------------------
/pcre/pcre_lib.ml:
--------------------------------------------------------------------------------
1 | (*pp camlp4orf *)
2 |
3 |
4 | open Printf
5 |
6 | open Camlp4.PreCast
7 |
8 | open Mm_util
9 | open Regexp_ast
10 |
11 | let _ =
12 | Constants.mod_runtime := "Run_mikmatch_pcre";
13 | Constants.mod_runtime_mt := "Run_mikmatch_pcre"
14 |
15 | (* output for PCRE: Perl Compatible Regular Expressions *)
16 |
17 | let special_regexps =
18 | let _loc = Constants.dummy_loc in
19 | [ "any", (* any character except newline *)
20 | Special (_loc, ".", ("any", Some 1));
21 |
22 | "bol", (* beginning of line *)
23 | Special (_loc, "(?:^|(?<=\n))", ("bol", Some 0));
24 |
25 | "eol", (* end of line *)
26 | Special (_loc, "(?:$|(?=\n))", ("eol", Some 0));
27 |
28 | "bos", (* beginning of string *)
29 | Special (_loc, "^", ("bos", Some 0));
30 |
31 | "eos", (* end of string *)
32 | Special (_loc, "$", ("eos", Some 0)); ]
33 |
34 |
35 | let string c = String.make 1 c
36 |
37 | let quote_char c =
38 | match c with
39 | '\\' | '^' | '$' | '.' | '[' | ']' | '|'
40 | | '(' | ')' | '?' | '*' | '+' | '{' | '}' ->
41 | let s = Bytes.create 2 in
42 | Bytes.set s 0 '\\';
43 | Bytes.set s 1 c;
44 | Bytes.unsafe_to_string s
45 | | '\000' -> "\\000"
46 | | c -> string c
47 |
48 | let quote_char_in_class c =
49 | match c with
50 | '\\' -> "\\\\"
51 | | ']' -> "\\]"
52 | | '-' -> "\\-"
53 | | '^' -> "\\^"
54 | | '\000' -> "\\000"
55 | | c -> string c
56 |
57 |
58 | let reorder_charset l =
59 | if l = [] then
60 | invalid_arg "reorder_charset";
61 | List.sort Char.compare l
62 |
63 | let compact l =
64 | let finish first last =
65 | match Char.code last - Char.code first with
66 | 0 -> quote_char_in_class first
67 | | 1 -> (quote_char_in_class first) ^ (quote_char_in_class last)
68 | | _ -> (quote_char_in_class first) ^ "-" ^ (quote_char_in_class last) in
69 |
70 | let rec extend first last =
71 | function
72 | [] -> [finish first last]
73 | | c :: rest ->
74 | if Char.code c = Char.code last + 1 then
75 | extend first c rest
76 | else
77 | finish first last :: extend c c rest in
78 |
79 | match l with
80 | [] -> []
81 | | hd :: tl -> extend hd hd tl
82 |
83 |
84 | let compact_charset _loc l =
85 | let sorted = List.sort Char.compare l in
86 | let (zero, nozero) =
87 | match sorted with
88 | '\000' :: rest -> ("\\000", rest)
89 | | l -> ("", l) in
90 | String.concat "" (zero :: compact nozero)
91 |
92 |
93 | let rec rm_closed = function Closed ast -> rm_closed ast | ast -> ast
94 |
95 |
96 | let add_const accu s =
97 | accu := `String s :: !accu
98 | let add_var accu e =
99 | accu := `Var (e, false) :: !accu
100 | let add_var_nocase accu e =
101 | accu := `Var (e, true) :: !accu
102 |
103 | let rec to_string ?(top = false) ((last_group, named_groups) as groups) accu =
104 | function
105 | Epsilon _loc -> groups
106 | | Special (_loc, s, _) -> add_const accu s; groups
107 | | Characters (_loc, set) ->
108 | let l = Charset.list set in
109 | (match l with
110 | [] -> groups
111 | | [c] ->
112 | add_const accu (quote_char c);
113 | groups
114 | | _ ->
115 | add_const accu (sprintf "[%s]" (compact_charset _loc l));
116 | groups)
117 |
118 | | Sequence (_loc, re1, re2) ->
119 | let groups = to_string groups accu re1 in
120 | to_string groups accu re2
121 |
122 | | Alternative (_loc, re, Epsilon _, _, _) ->
123 |
124 | let must_group =
125 | not top &&
126 | match rm_closed re with
127 | Characters _ | Special _ | Bind _ | Alternative _ -> false
128 | | _ -> true in
129 | if must_group then add_const accu "(?:";
130 | let (last_group, named_groups) as groups =
131 | to_string (last_group, named_groups) accu re in
132 | if must_group then add_const accu ")";
133 | add_const accu "?";
134 | groups
135 |
136 | | Alternative (_loc, re1, re2, _, _) ->
137 |
138 | let must_group = not top in
139 | if must_group then add_const accu "(?:";
140 | let (last_group, named_groups1) =
141 | to_string (last_group, named_groups) accu re1 in
142 | add_const accu "|";
143 |
144 | let (last_group, named_groups2) =
145 | to_string (last_group, named_groups) accu re2 in
146 | if must_group then add_const accu ")";
147 |
148 | let check_balance set1 set2 =
149 | if not (Named_groups.equal set1 set2) then
150 | (let missing =
151 | S.diff
152 | (Named_groups.union set1 set2)
153 | (Named_groups.inter set1 set2) in
154 | Messages.unbalanced_bindings _loc (list_named_groups missing)) in
155 |
156 | let (groups1, positions1) = named_groups1
157 | and (groups2, positions2) = named_groups2 in
158 | check_balance groups1 groups2;
159 | check_balance positions1 positions2;
160 |
161 | (last_group, (merge groups1 groups2, merge positions1 positions2))
162 |
163 | | Repetition (_loc, (kind, greedy), re) ->
164 | let must_group =
165 | not top &&
166 | match rm_closed re with
167 | Characters _ | Special _ | Bind _
168 | | Alternative _ | Possessive _ -> false
169 | | _ -> true in
170 | if must_group then
171 | add_const accu "(?:";
172 | let groups = to_string (last_group, named_groups) accu re in
173 | if must_group then
174 | add_const accu ")";
175 | let rec convert = function
176 | Star -> "*"
177 | | Plus -> "+"
178 | | Option -> "?"
179 | | Range (m, None) ->
180 | (match m with
181 | 1 -> ""
182 | | _ -> sprintf "{%i}" m)
183 | | Range (m, Some None) ->
184 | (match m with
185 | 0 -> "*"
186 | | 1 -> "+"
187 | | _ -> sprintf "{%i,}" m)
188 | | Range (m, Some (Some n)) ->
189 | if m = n then
190 | convert (Range (m, None))
191 | else if m = 0 && n = 1 then "?"
192 | else sprintf "{%i,%i}" m n in
193 | let base_op = convert kind in
194 | add_const accu base_op;
195 | if not greedy && base_op <> "" then
196 | add_const accu "?";
197 | groups
198 |
199 | | Possessive (_loc, re) ->
200 | add_const accu "(?>";
201 | let groups = to_string groups accu re in
202 | add_const accu ")";
203 | groups
204 |
205 | | Lookahead (_loc, positive, re) ->
206 | let start = if positive then "(?=" else "(?!" in
207 | add_const accu start;
208 | let groups = to_string groups accu re in
209 | add_const accu ")";
210 | groups
211 |
212 | | Lookbehind (_loc, positive, re) ->
213 | let start = if positive then "(?<=" else "(?
220 | let last_group = succ last_group in
221 | let named_groups =
222 | add_new_group _loc name conv last_group named_groups in
223 | add_const accu "(";
224 | let groups = to_string (last_group, named_groups) accu re in
225 | add_const accu ")";
226 | groups
227 |
228 | | Bind_pos (_loc, name) ->
229 | let last_group = succ last_group in
230 | let named_groups = add_new_pos _loc name last_group named_groups in
231 | add_const accu "()";
232 | (last_group, named_groups)
233 |
234 | | Backref (_loc, name) ->
235 | (try
236 | match Named_groups.find name (fst named_groups) with
237 | [] -> Messages.invalid_backref _loc name
238 | | [(_, n, conv)] -> add_const accu (sprintf "\\%i" n); groups
239 | | l ->
240 | add_const accu (sprintf "(?:%s)"
241 | (String.concat "|"
242 | (List.map (fun (_, n, conv) -> sprintf "\\%d" n) l)));
243 | groups
244 | with Not_found -> Messages.invalid_backref _loc name)
245 |
246 | | Variable (_loc, e) -> add_var accu e; groups
247 | | Nocase_variable (_loc, e) -> add_var_nocase accu e; groups
248 |
249 | | Closed ast ->
250 | let saved_named_groups = named_groups in
251 | let (last_group, named_groups) = to_string groups accu ast in
252 | (last_group, saved_named_groups)
253 |
254 |
255 | (* Syntax expanders *)
256 |
257 | open Constants
258 |
259 | let nocasify e =
260 | let _loc = Ast.loc_of_expr e in
261 | <:expr< $uid: !mod_runtime$.nocase $e$ >>
262 |
263 |
264 | let make_get_re_noargs _loc re_name re_args =
265 | let empty = <:expr< "" >> in
266 | let empty_args = List.map (fun (name, arg) -> (name, empty)) re_args in
267 | Match.make_get_re _loc re_name empty_args
268 |
269 |
270 | let process_regexp _loc ~sharing re re_name =
271 | let accu = ref [] in
272 | let (last_group, named_groups) =
273 | to_string ~top:true (0, (Named_groups.empty, Named_groups.empty))
274 | accu re in
275 | let re_args, re_source =
276 | Match.get_re_source
277 | ~quote_expr: <:expr< $uid: !mod_runtime$.quote_string >>
278 | ~nocasify accu in
279 | let shared_id = shared re_name in
280 | let get_re_noargs = make_get_re_noargs _loc re_name re_args in
281 | let postbindings =
282 | if sharing then
283 | [ shared_id, <:expr< Pcre.make_ovector $get_re_noargs$ >>;
284 | shared_ovector re_name, <:expr< snd $lid:shared_id$ >> ]
285 | else [] in
286 | (re_args, re_source, named_groups, postbindings)
287 |
288 |
289 | let raises_exn = function
290 | <:expr< raise $exn$ >> -> true
291 | | _ -> false
292 |
293 | let string_match _loc re_name get_re target substrings pos success failure =
294 | let match_it =
295 | <:expr< Pcre.exec ~rex:$get_re$ ~pos:$int:string_of_int pos$ $target$ >> in
296 |
297 | if raises_exn failure then (* shortcut *)
298 | <:expr<
299 | let $lid:substrings$ =
300 | try $match_it$
301 | with [ Not_found -> $failure$ ] in
302 | $success$
303 | >>
304 |
305 | else
306 | <:expr<
307 | try
308 | let $lid:substrings$ =
309 | try $match_it$
310 | with [ Not_found -> $raise_exit _loc$ ] in
311 | $success$
312 | with
313 | [ $patt_exit _loc$ -> $failure$ ] >>
314 |
315 |
316 | let matched_group _loc substrings n =
317 | <:expr< Pcre.get_substring $lid:substrings$ $int:string_of_int n$ >>
318 |
319 | let matched_position _loc substrings n =
320 | <:expr< Pcre.get_substring_ofs $lid:substrings$ $int:string_of_int n$ >>
321 |
322 |
323 |
324 | let compile_regexp_general ~anchored ~mt _loc re_args re_source =
325 | let default_flags = <:expr< [`DOLLAR_ENDONLY] >> in
326 | let anchored_flags = <:expr< [`ANCHORED; `DOLLAR_ENDONLY] >> in
327 | let flags = if anchored then anchored_flags else default_flags in
328 | let compile_string e =
329 | <:expr< Pcre.regexp ~flags:$flags$ $e$ >> in
330 | match re_args with
331 | [] ->
332 | let re_string = Match.compute_re_string _loc re_source in
333 | compile_string re_string
334 | | _ ->
335 | let key =
336 | match re_args with
337 | [name, _] -> <:expr< $id: <:ident< $lid:name$ >> $ >>
338 | | _ ->
339 | let expr_list =
340 | List.map (
341 | fun (name, _) ->
342 | <:expr< $id: <:ident< $lid:name$ >> $ >>
343 | ) re_args in
344 | let tup = comma_expr_of_list _loc expr_list in
345 | <:expr< ( $tup: tup$ ) >> in
346 | let compile =
347 | let re_string = Match.compute_re_string _loc re_source in
348 | compile_string re_string in
349 |
350 | let find =
351 | Match.protect mt
352 | <:expr< $uid: !mod_runtime$.Mem.find tbl key >> in
353 | let add =
354 | Match.protect mt
355 | <:expr< $uid: !mod_runtime$.Mem.unsafe_add tbl key data >> in
356 |
357 | let check_cache =
358 | <:expr<
359 | let key = $key$ in
360 | try $find$
361 | with [ Not_found ->
362 | let data = $compile$ in
363 | do { $add$;
364 | data } ] >> in
365 |
366 | let get_regexp =
367 | List.fold_right
368 | (fun (argname, _) e -> <:expr< fun $lid:argname$ -> $e$ >>)
369 | re_args
370 | check_cache in
371 |
372 | let result =
373 | <:expr<
374 | let tbl = $uid: !mod_runtime$.Mem.create 100 in
375 | $get_regexp$ >> in
376 |
377 | if mt then <:expr< let mutex = Mutex.create () in $result$ >>
378 | else result
379 |
380 |
381 | let compile_regexp_match = compile_regexp_general ~anchored:true
382 | let compile_regexp_search = compile_regexp_general ~anchored:false
383 |
384 | let convert _loc conv e =
385 | match conv with
386 | None -> e
387 | | Some f ->
388 | match f with
389 | `Int -> <:expr< Stdlib.int_of_string $e$ >>
390 | | `Float -> <:expr< Stdlib.float_of_string $e$ >>
391 | | `Option -> <:expr< let s = $e$ in
392 | if s = "" then None
393 | else Some s >>
394 | | `Custom f -> <:expr< $f$ $e$ >>
395 | | `Value e' -> <:expr< do { ignore $e$; $e'$ } >>
396 |
397 | let insert_bindings_poly
398 | ?(skip_empty_captures = false) (* for compatibility with
399 | old versions of Pcre (before 2004-04-29) *)
400 | ?(get_fst = false)
401 | make_expr _loc substrings set e =
402 | Named_groups.fold
403 | (fun name l e ->
404 | match l with
405 | [] -> assert false
406 | | (_loc, _, _) :: _ ->
407 | let find_it =
408 | List.fold_right
409 | (fun (_loc, n, conv) accu ->
410 | let expr = make_expr _loc substrings n in
411 | match accu with
412 | None -> Some (convert _loc conv expr)
413 | | Some e ->
414 | let result =
415 | if skip_empty_captures then
416 | <:expr<
417 | try
418 | match $expr$ with
419 | [ "" -> raise Not_found
420 | | s -> $convert _loc conv <:expr< s >>$ ]
421 | with [ Not_found -> $e$ ] >>
422 | else
423 | <:expr<
424 | try $convert _loc conv expr$
425 | with [ Not_found -> $e$ ] >> in
426 | Some result)
427 | l
428 | None in
429 | let result =
430 | match find_it with
431 | None -> assert false
432 | | Some e -> e in
433 | let patt =
434 | if get_fst then <:patt< ( $lid:name$, _ ) >>
435 | else <:patt< $lid:name$ >> in
436 | <:expr< let $patt$ = $result$ in $e$ >>)
437 | set
438 | e
439 |
440 | let insert_group_bindings =
441 | insert_bindings_poly matched_group
442 | let insert_position_bindings =
443 | insert_bindings_poly ~get_fst:true matched_position
444 |
445 | let insert_bindings _loc substrings (group_bindings, position_bindings) e =
446 | insert_group_bindings _loc substrings group_bindings
447 | (insert_position_bindings _loc substrings position_bindings e)
448 |
449 |
450 | let substrings_of_target target =
451 | match target with
452 | <:expr< $lid:s$ >> -> s ^ "_result"
453 | | _ -> assert false
454 |
455 | let match_and_bind _loc re_name get_re target named_groups success failure =
456 | let substrings = substrings_of_target target in
457 | string_match _loc re_name get_re target substrings 0
458 | (insert_bindings _loc substrings named_groups success)
459 | failure
460 |
461 | let macro_replace_generic
462 | f _loc (re_name : string) get_re target_name named_groups expr =
463 | let target = <:expr< $lid:target_name$ >> in
464 | let substrings = substrings_of_target target in
465 | <:expr<
466 | fun ?pos $lid:target_name$ ->
467 | Pcre.$lid: f$
468 | ~rex:$get_re$
469 | ?pos
470 | ~subst:(fun $lid:substrings$ ->
471 | $insert_bindings _loc substrings named_groups expr$)
472 | $target$ >>
473 |
474 | let macro_replace = macro_replace_generic "substitute_substrings"
475 | let macro_replace_first = macro_replace_generic "substitute_substrings_first"
476 |
477 | let macro_match ?(ignore_bindings = false)
478 | _loc re_name get_re target_name named_groups expr =
479 | let target = <:expr< $lid:target_name$ >> in
480 | let substrings = substrings_of_target target in
481 | let sv = shared_ovector re_name in
482 | let result =
483 | if ignore_bindings then expr
484 | else insert_bindings _loc substrings named_groups expr in
485 | <:expr<
486 | fun ?(share = False) ?pos $lid:target_name$ ->
487 | let $lid:substrings$ =
488 | if not share then
489 | Pcre.exec ~rex:$get_re$ ?pos $target$
490 | else
491 | (Obj.magic
492 | ($target$,
493 | do { Pcre.unsafe_pcre_exec
494 | (Obj.magic 0 : Pcre.irflag)
495 | $get_re$ (match pos with [ None -> 0 | Some n -> n])
496 | 0 $target$
497 | $lid:sv$ None;
498 | $lid:sv$ }) : Pcre.substrings) in
499 | $result$ >>
500 |
501 | let macro_search_first = macro_match
502 |
503 | let macro_filter _loc re_name get_re target_name named_groups _ =
504 | let expr = <:expr< True >> in
505 | let e =
506 | macro_match ~ignore_bindings:true
507 | _loc re_name get_re target_name named_groups expr in
508 | <:expr< fun ?share ?pos x -> try $e$ ?share ?pos x
509 | with [ Not_found -> False ] >>
510 |
511 | let make_object _loc (group_bindings, position_bindings) =
512 | let all_ids =
513 | Named_groups.list_keys group_bindings @
514 | Named_groups.list_keys position_bindings in
515 | let methods =
516 | List.fold_right
517 | (fun id accu -> <:class_str_item< method $id$ = $lid:id$ ; $accu$ >>)
518 | all_ids <:class_str_item< >>in
519 | <:expr< object $methods$ end >>
520 |
521 | let macro_capture _loc re_name get_re target_name named_groups _ =
522 | let expr = make_object _loc named_groups in
523 | let e = macro_match _loc re_name get_re target_name named_groups expr in
524 | <:expr< fun ?share ?pos x -> try Some ($e$ ?share ?pos x)
525 | with [ Not_found -> None ] >>
526 |
527 | let macro_function
528 | fun_name _loc (re_name : string) get_re target_name named_groups expr =
529 | let target = <:expr< $lid:target_name$ >> in
530 | let substrings = substrings_of_target target in
531 | <:expr<
532 | $uid: !mod_runtime$.$lid:fun_name$
533 | $get_re$
534 | (fun $lid:substrings$ ->
535 | $insert_bindings _loc substrings named_groups expr$)
536 | >>
537 |
538 |
539 | let macro_search = macro_function "search"
540 | let macro_map = macro_function "map"
541 | let macro_collect = macro_function "collect"
542 |
543 | let macro_collectobj _loc re_name get_re target_name named_groups _ =
544 | let e = make_object _loc named_groups in
545 | macro_function "collect" _loc re_name get_re target_name named_groups e
546 |
547 | let macro_split _loc re_name get_re target_name named_groups _ =
548 | <:expr< $uid: !mod_runtime$.split $get_re$ >>
549 |
550 | open Select_lib
551 |
552 | let lib = { predefined_regexps = special_regexps;
553 | unfold_range = false;
554 | process_regexp = process_regexp;
555 | compile_regexp_match = compile_regexp_match ~mt:false;
556 | compile_regexp_search = compile_regexp_search ~mt:false;
557 | match_and_bind = match_and_bind;
558 | wrap_match = (fun e -> e);
559 | wrap_user_case = (fun e -> e);
560 | really_wrap_match = false;
561 | really_wrap_user_case = false }
562 |
563 | let lib_mt = { lib with
564 | compile_regexp_match = compile_regexp_match ~mt:true;
565 | compile_regexp_search = compile_regexp_search ~mt:true }
566 |
--------------------------------------------------------------------------------
/doc/mikmatch-manual.tex.mlx:
--------------------------------------------------------------------------------
1 | % -*- mode: latex -*-
2 | \documentclass[a4paper,12pt]{article}
3 |
4 | \usepackage{ae}
5 | \usepackage{hyperref}
6 | \usepackage{hevea}
7 |
8 | \usepackage{alltt}
9 |
10 | \usepackage[latin1]{inputenc}
11 | \usepackage[T1]{fontenc}
12 | \usepackage{fullpage}
13 | \usepackage{url}
14 | \usepackage{ocamldoc}
15 |
16 | \title{Mikmatch Version~## Sys.command "../VERSION" ##\\
17 | Reference Manual}
18 | \author{Martin Jambon}
19 | \date{October 21, 2011}
20 |
21 | \setcounter{secnumdepth}{5}
22 | \setcounter{tocdepth}{4}
23 |
24 | \newcommand{\toplevelwarning}[0]{%
25 | {\footnotesize [toplevel support not available in Camlp4 3.10]}%
26 | }
27 |
28 |
29 | \begin{document}
30 |
31 | \maketitle
32 |
33 | This manual is available online as a single HTML file at\\
34 | \url{http://mjambon.com/mikmatch-manual.html}\\
35 | and as a PDF document at\\
36 | \url{http://mjambon.com/mikmatch-manual.pdf}.\\
37 | The home page of Mikmatch is:\\
38 | \url{http://mjambon.com/micmatch.html}
39 |
40 | \tableofcontents
41 |
42 | \section{Introduction}
43 |
44 | Mikmatch is an extension of the syntax of the Objective Caml
45 | programming language (OCaml).
46 | Its purpose it to make the use of regular expressions easier and more
47 | generally to provide a set of tools for using OCaml as a
48 | powerful scripting language.
49 | Mikmatch believes that regular expressions are just like any other
50 | program and deserve better than a cryptic sequence of symbols placed
51 | in a string of a master program.
52 |
53 | Mikmatch currently supports two different libraries that implement
54 | regular expressions: Str which comes with the original distribution of
55 | OCaml and PCRE-OCaml which is an interface to PCRE (Perl Compatible
56 | Regular Expressions) for OCaml.
57 | These two flavors will be referred as Mikmatch\_str and
58 | Mikmatch\_pcre.
59 | They share a large number of syntaxic features,
60 | but Mikmatch\_pcre provides several macros that cannot be implemented
61 | safely in Mikmatch\_str. Therefore, it is recommended to use
62 | Mikmatch\_pcre.
63 |
64 |
65 | \section{Language}
66 |
67 |
68 | \subsection{Regular expressions}
69 |
70 | \subsubsection{Grammar of the regular expressions}
71 |
72 | Regular expressions support the syntax of Ocamllex regular
73 | expressions as of version 3.08.1 of the Objective Caml system
74 | (\url{http://caml.inria.fr/pub/docs/manual-ocaml/}), and
75 | several additional features.
76 | A regular expression (\textit{regexp}) is defined by the grammar that
77 | follows. The associativity rules are given by priority levels. 0 is the
78 | strongest priority.
79 | \begin{itemize}
80 | \item \underline{\textit{char-literal}}
81 | Match the given character (priority 0).
82 |
83 | \item \underline{\textbf{\_}} (underscore)
84 | Match any character (priority 0).
85 |
86 | \item \underline{\textit{string-literal}}
87 | Match the given sequence of characters (priority 0).
88 |
89 | \item \underline{\textbf{$[$}\textit{set-of-characters}\textbf{$]$}}
90 | Match one of the characters given by \textit{set-of-characters}
91 | (priority 0).
92 | The grammar for \textit{set-of-characters} is the following:
93 | \begin{itemize}
94 | \item \underline{\textit{char-literal}\textbf{$-$}\textit{char-literal}}
95 | defines a range of characters according to the iso-8859-1 encoding
96 | (includes ASCII).
97 | \item \underline{\textit{char-literal}}
98 | defines a singleton (a set containing just this character).
99 | \item \underline{\textit{string-literal}}
100 | defines a set that contains all the characters present in the given string.
101 | \item \underline{\textit{lowercase-identifier}}
102 | is replaced by the corresponding predefined regular expression; this
103 | regular expression must be exactly of length 1 and therefore
104 | represents a set of characters.
105 | \item \underline{\textit{set-of-characters} \textit{set-of-characters}}
106 | defines the union of two sets of characters.
107 | \end{itemize}
108 |
109 | \item \underline{\textit{regexp} \textbf{\#} \textit{regexp}}
110 | Match any of the characters given by the first regular expression
111 | except those which are given by the second one. Both regular
112 | expressions must be of length 1 and thus stand for a set of characters
113 | (priority 0).
114 |
115 | \item \underline{\textbf{$[$\^{}}\textit{set-of-characters}\textbf{$]$}}
116 | Same as \textbf{\_ \#}
117 | \textbf{$[$}\textit{set-of-characters}\textbf{$]$}
118 | (priority 0).
119 |
120 |
121 | \item \underline{\textit{regexp} \textbf{*}}
122 | Match the pattern given by \textit{regexp} 0 time or more (priority 0).
123 |
124 | \item \underline{\textit{regexp} \textbf{+}}
125 | Match the pattern given by \textit{regexp} 1 time or more (priority 0).
126 |
127 | \item \underline{\textit{regexp} \textbf{?}}
128 | Match the pattern given by \textit{regexp} at most once (priority 0).
129 |
130 | \item \underline{\textit{regexp}\textbf{\{}\textit{m\textbf{$-$}n}\textbf{\}}}
131 | Match \textit{regexp} at least \textit{m}~times and up to
132 | \textit{n}~times. \textit{m} and~\textit{n} must be integer
133 | literals (priority 0).
134 |
135 | \item \underline{\textit{regexp}\textbf{\{}\textit{n}\textbf{\}}}
136 | Same as \textit{regexp}\textbf{\{}\textit{n\textbf{$-$}n}\textbf{\}}
137 | (priority 0).
138 |
139 | \item \underline{\textit{regexp}\textbf{\{}\textit{n}\textbf{$+$\}}}
140 | Same as
141 | \textit{regexp}\textbf{\{}\textit{n}\textbf{\}}\textit{regexp}\textbf{$*$}
142 | (priority 0).
143 |
144 | \item \underline{\textit{regexp}\textbf{\{}\textit{n}\textbf{$-$\}}}
145 | Deprecated. Same as \textit{regexp}\textbf{\{}\textit{n}\textbf{$+$\}}
146 | (priority 0).
147 |
148 | \item \underline{\textbf{(} \textit{regexp} \textbf{)}}
149 | Match \textit{regexp} (priority 0).
150 |
151 | \item \underline{\textit{regexp} \textbf{\~{}}}
152 | Case insensitive match of the given
153 | regular expression \textit{regexp} according to the conventions of
154 | Objective Caml, i.e. according to the representation of characters
155 | in the iso-8859-1 standard (latin1) (priority 0).
156 |
157 | \item \underline{\textit{regexp} \textit{regexp}}
158 | Match the first regular expressions and then the second one (priority 1).
159 |
160 | \item \underline{\textit{regexp} \textbf{|} \textit{regexp}}
161 | Match one of these two regular expressions (priority 2).
162 |
163 | \item \underline{\textit{regexp} \textbf{as} \textit{lowercase-identifier}}
164 | Give a name to the substring that will be matched by the given pattern.
165 | This string becomes available under this name (priority 3).
166 | In-place conversions of the matched substring can be performed using
167 | one these three mechanisms:
168 | \begin{itemize}
169 | \item \underline{\textit{regexp} \textbf{as}
170 | \textit{lowercase-identifier} \textbf{:} \textit{built-in-converter}}
171 | where \textit{built-in-converter} is one of \texttt{int},
172 | \texttt{float} or \texttt{option}. \texttt{int} behaves as
173 | \texttt{int\_of\_string}, \texttt{float} behaves as
174 | \texttt{float\_of\_string}, and \texttt{option} encapsulate the
175 | substring in an object of type \texttt{string option} using
176 | an equivalent of \texttt{function "" -> None | s -> Some s}
177 |
178 | \item \underline{\textit{regexp} \textbf{as}
179 | \textit{lowercase-identifier} \textbf{:=} \textit{converter}}
180 | where \textit{converter} is any function which converts a string
181 | into something else.
182 |
183 | \item \underline{\textit{regexp} \textbf{as}
184 | \textit{lowercase-identifier} \textbf{=} \textit{expr}}
185 | where \textit{expr} is any OCaml expression, usually a constant, which
186 | assigns a value to \textit{lowercase-identifier} without knowing
187 | which substring it matches.
188 | \end{itemize}
189 |
190 | \item \underline{\textbf{\%} \textit{lowercase-identifier}}
191 | Give a name to the position in the string
192 | that is being matched.
193 | This position becomes available as an int under this name.
194 |
195 | \item \underline{\textbf{@} \textit{expr}}
196 | Match the string given by \textit{expr}. \textit{expr} can be any
197 | OCaml expression of type string. Parentheses will be needed around
198 | \textit{expr} if it is a function application, or any construct of
199 | equivalent or lower precedence (see the Objective Caml manual, chapter
200 | ``The Objective Caml language'', section ``Expressions'').
201 | Matching such patterns is not thread-safe in any of the current
202 | implementations.
203 | Expressions that contain
204 | @ patterns should be protected against concurrent accesses.
205 |
206 |
207 | \end{itemize}
208 |
209 |
210 | \subsubsection{Named regular expressions}
211 |
212 | Naming regular expressions is possible using the following toplevel
213 | construct:\\
214 | \underline{\textbf{RE} \textit{ident} \textbf{=} \textit{regexp}}\\
215 | where \textit{ident} is a lowercase identifier.
216 | Regular expressions share their own namespace.
217 |
218 | For instance, we can define a phone number as a sequence of 3~digits
219 | followed by a dash and followed by 4~digits:
220 | \begin{verbatim}
221 | RE digit = ['0'-'9']
222 | RE phone = digit{3} '-' digit{4}
223 | \end{verbatim}
224 |
225 |
226 | \subsubsection{Predefined sets of characters}
227 |
228 | The POSIX character classes (sets of characters) are available as
229 | predefined regular expressions of length 1.
230 | Their definition is given in table~\ref{posix-classes}.
231 | \begin{table}
232 | \caption{\label{posix-classes}
233 | POSIX character classes and their
234 | definition in the Mikmatch syntax}
235 | \tt
236 | \begin{tabular}{l}
237 | RE lower = \verb!['a'-'z']!\\
238 | RE upper = \verb!['A'-'Z']!\\
239 | RE alpha = lower | upper\\
240 | RE digit = \verb!['0'-'9']!\\
241 | RE alnum = alpha | digit\\
242 | RE punct = \verb=["!\"#$%&'()*+,-./:;<=\verb!=>?@[\\]^_`{|}~"]! \\ %$
243 | RE graph = alnum | punct\\
244 | RE print = graph | ' '\\
245 | RE blank = \verb!' ' | '\t'!\\
246 | RE cntrl = \verb!['\x00'-'\x1F' '\x7F']!\\
247 | RE xdigit = \verb![digit 'a'-'f' 'A'-'F']!\\
248 | RE space = \verb![blank "\n\x0B\x0C\r"]!\\
249 | \end{tabular}
250 | \end{table}
251 |
252 | \subsubsection{More predefined patterns}
253 |
254 | Some named regexps are predefined and available in
255 | every implementation of Mikmatch. These are the following:
256 | \begin{itemize}
257 | \item \texttt{int}: matches an integer (see
258 | table~\ref{predefined-regexps}).
259 | It accepts a superset of the integer literals that are
260 | produced with the OCaml standard function \texttt{string\_of\_int}.
261 | \item \texttt{float}: matches a floating-point number
262 | (see table~\ref{predefined-regexps}).
263 | It accepts a superset of the float literals that are
264 | produced with the OCaml standard function \texttt{string\_of\_float}.
265 | \end{itemize}
266 |
267 | \begin{table}
268 | \caption{\label{predefined-regexps}
269 | Predefined regexps in Mikmatch}
270 | \tt
271 | \begin{verbatim}
272 | RE int = ["-+"]? ( "0" ( ["xX"] xdigit+
273 | | ["oO"] ['0'-'7']+
274 | | ["bB"] ["01"]+ )
275 | | digit+ )
276 |
277 | RE float =
278 | ["-+"]?
279 | ( ( digit+ ("." digit* )? | "." digit+ ) (["eE"] ["+-"]? digit+ )?
280 | | "nan"~
281 | | "inf"~ )
282 | \end{verbatim}
283 | \end{table}
284 |
285 |
286 | \subsection{General pattern matching}
287 |
288 | \subsubsection{Regexps and match/function/try constructs}
289 |
290 | In Mikmatch, regular expressions can be used to match strings instead
291 | of the regular patterns. In this case, the regular expression must
292 | be preceded by the \textbf{RE} keyword, or placed between slashes
293 | (\textbf{/}\dots\textbf{/}). Both notations are equivalent.
294 |
295 | Only the following constructs support patterns that contain regular
296 | expressions:
297 | \begin{itemize}
298 | \item
299 | \textbf{match} \dots\ \textbf{with} \textit{pattern} \textbf{->} \dots
300 | \item \textbf{function} \textit{pattern} \textbf{->} \dots
301 | \item \textbf{try} \dots\ \textbf{with} \textit{pattern} \textbf{->} \dots
302 | \end{itemize}
303 |
304 | Examples:
305 | \begin{verbatim}
306 | let is_num = function RE ['0'-'9']+ -> true | _ -> false
307 |
308 | let get_option () =
309 | match Sys.argv with
310 | [| _ |] -> None
311 | | [| _; RE (['a'-'z']+ as key) "=" (_* as data) |] -> Some (key, data)
312 | | _ -> failwith "Usage: myprog [key=value]"
313 |
314 | let option =
315 | try get_option ()
316 | with Failure RE "usage"~ -> None
317 | \end{verbatim}
318 |
319 | If alternatives are used in a pattern, then both alternatives must
320 | define the same set of identifiers.
321 | In the following example, the string \texttt{code} can either come
322 | from the normal pattern matching or be a fresh substring which was
323 | extracted using the regular expression:
324 | \begin{verbatim}
325 | match option, s with
326 | Some code, _
327 | | None, RE _* "=" (['A'-'Z']['0'-'9'] as code) -> print_endline code
328 |
329 | | _ -> ()
330 | \end{verbatim}
331 |
332 | In the general case, it is not possible to check in advance if the
333 | pattern-matching cases are complete if at least one of the patterns
334 | is a regular expression. In this case, no warnings against missing
335 | cases are displayed, thus it is safer to either add a catch-all case
336 | like in the previous examples or to catch the \texttt{Match\_failure}
337 | exception that can be raised unexpectedly.
338 |
339 | \subsubsection{Views (experimental feature)}
340 |
341 | Views are a general form of symbolic patterns other than those
342 | authorized by the concrete structure of data. For example, \texttt{Positive}
343 | could be a view for positive ints. View patterns can also bind
344 | variables and a useful example in OCaml is pattern-matching over lazy
345 | values.
346 |
347 | Here we propose simple views, as suggested by Simon Peyton Jones
348 | for Haskell:\\
349 | \url{http://hackage.haskell.org/trac/ghc/wiki/ViewPatterns}.
350 | We propose a different syntax, but note that the syntax that we
351 | have chosen here is
352 | experimental and may change slightly in future releases.
353 |
354 | \paragraph{View patterns}
355 |
356 | A view pattern has one of these two forms:
357 | \begin{enumerate}
358 | \item \underline{\textbf{\%} \textit{view-name}}: a view without an
359 | argument. It is a simple check over the subject data.
360 | \item \underline{\textbf{\%} \textit{view-name}
361 | \textit{pattern}}: a view with an argument, the pattern.
362 | If the view function matches successfully, its
363 | result is matched against the given pattern.
364 | \end{enumerate}
365 | where a \textit{view-name} is a capitalized alphanumeric identifier,
366 | possibly preceded by a module path specification,
367 | e.g. \texttt{Name} or \texttt{Module.Name}.
368 |
369 | \paragraph{Definition of a view}
370 |
371 | Views without arguments are defined as functions of type
372 | \texttt{'a -> bool}, while views with arguments are defined as
373 | functions of type \texttt{'a -> 'b option}.
374 |
375 | The syntax for defining a view is:
376 | \begin{itemize}
377 | \item \underline{\textbf{let} \textbf{view} \textit{uppercase-identifier}
378 | \textbf{=} \textit{expression}}
379 | \item \underline{\textbf{let} \textbf{view} \textit{uppercase-identifier}
380 | \textbf{=} \textit{expression} \textbf{in} \textit{expression}}
381 | \end{itemize}
382 |
383 | Using the syntax above is however not strictly needed, since it just
384 | defines a function named after the name of the view, and prefixed by
385 | \texttt{view\_}. For instance \texttt{let view X = f} can be written
386 | as \texttt{let view\_X = f} in regular OCaml. Therefore, some library
387 | modules can export view definitions without using any syntax
388 | extension themselves.
389 |
390 |
391 | \paragraph{Example}
392 |
393 | \begin{verbatim}
394 | (* The type of lazy lists *)
395 | type 'a lazy_list = Nil | Cons of ('a * 'a lazy_list lazy_t)
396 |
397 | (* Definition of a view without argument for the empty list *)
398 | let view Nil =
399 | fun l ->
400 | try Lazy.force l = Nil
401 | with _ -> false
402 |
403 | (* Independent definition of a view with an argument,
404 | the head and tail of the list *)
405 | let view Cons =
406 | fun l ->
407 | try
408 | match Lazy.force l with
409 | Cons x -> Some x
410 | | Nil -> None
411 | with _ -> None
412 |
413 |
414 | (* Test *)
415 | let _ =
416 | let l = lazy (Cons (1, lazy (Cons (2, lazy Nil)))) in
417 | match l with
418 | %Nil
419 | | %Cons (_, %Nil) -> assert false
420 | | %Cons (x1, %Cons (x2, %Nil)) ->
421 | assert (x1 = 1);
422 | assert (x2 = 2);
423 | Printf.printf "Passed view test\n%!"
424 | | _ -> assert false
425 | \end{verbatim}
426 |
427 | \paragraph{Limitations}
428 |
429 | Each time a value is tested against a view pattern, the corresponding
430 | function is called. There is no optimization that would avoid calling
431 | the view function twice on the same argument.
432 |
433 | Redundant or missing cases cannot be checked, just like when there is
434 | a regexp in a pattern. This is due both to our definition of views and
435 | to the implementation that we get using Camlp4.
436 |
437 |
438 | \subsection{Shortcut for one-case regexp matching}
439 |
440 | A shortcut notation can be used to extract substrings from a string
441 | that match a pattern which is known in advance:\\
442 | \textbf{let /}\textit{regexp}\textbf{/} \textbf{=} \textit{expr}
443 | \textbf{in} \textit{expr}
444 |
445 | Global declarations also support this shortcut:\\
446 | \textbf{let /}\textit{regexp}\textbf{/} \textbf{=} \textit{expr}\\
447 | Example \toplevelwarning:
448 | \begin{verbatim}
449 | # Sys.ocaml_version;;
450 | - : string = "3.08.3"
451 | # RE int = digit+;;
452 | # let /(int as major : int) "." (int as minor : int)
453 | ("." (int as patchlevel) | ("" as patchlevel))
454 | ("+" (_* as additional_info) | ("" as additional_info))/ =
455 | Sys.ocaml_version
456 | ;;
457 | val additional_info : string = ""
458 | val major : int = 3
459 | val minor : int = 8
460 | val patchlevel : string = "3"
461 | \end{verbatim}
462 |
463 | The notation does not allow simultaneous definitions using the
464 | \textbf{and} keyword nor recursive definitions using \textbf{rec}.
465 |
466 | As usual, the \texttt{Match\_failure} exception is raised if the
467 | string fails to match the pattern.
468 | The let-try-in-with construct described in the next section also
469 | supports regexp patterns, with the same restrictions.
470 |
471 |
472 | \subsection{The let-try-in-with construct}
473 |
474 | A general notation for catching exceptions that are raised during
475 | the definition of bindings is provided:\\
476 | \textbf{let} \textbf{try} [\textbf{rec}] \textit{let-binding}
477 | \{\textbf{and} \textit{let-binding}\} \textbf{in}\\
478 | \verb! !\textit{expr}\\
479 | \textbf{with} \textit{pattern-matching}
480 |
481 | It has the same meaning as:\\
482 | \textbf{try} \textbf{let} [\textbf{rec}] \textit{let-binding}
483 | \{\textbf{and} \textit{let-binding}\} \textbf{in}\\
484 | \verb! !\textit{expr}\\
485 | \textbf{with} \textit{pattern-matching}\\
486 | except that in the former case only the exceptions raised by the
487 | \textit{let-binding}s are handled by the exception handler introduced
488 | by \textbf{with}.
489 |
490 |
491 | \subsection{Implementation-dependent features}
492 |
493 | These features depend on which library is actually used internally for
494 | manipulating regular expressions.
495 | Currently two libraries are supported: the Str library from the
496 | official OCaml distribution and the PCRE-OCaml library.
497 | Support for other libraries might be added in the future.
498 |
499 | \subsubsection{Backreferences}
500 | \label{backref}
501 | Previously matched substrings can be matched again using
502 | backreferences.
503 | \underline{\textbf{!}\textit{ident}}
504 | is a backreference to the named group \textit{ident} that is defined
505 | previously in the sequence.
506 | During the matching process, it is not possible that a backreference
507 | refers to a named group which is not matched.
508 | In the following example, we
509 | extract the repeated pattern \texttt{abc} from \texttt{abcabc}
510 | \toplevelwarning:
511 | \begin{verbatim}
512 | # match "abcabc" with RE _* as x !x -> x;;
513 | - : string = "abc"
514 | \end{verbatim}
515 |
516 |
517 | \subsubsection{Specificities of Mikmatch\_str}
518 |
519 | Backreferences as described previously (section~\ref{backref}) are
520 | supported.
521 |
522 | In addition to the POSIX character classes,
523 | a set of predefined patterns is available:
524 | \begin{itemize}
525 | \item \underline{bol} matches at beginning of line (either at the
526 | beginning of the matched string, or just after a newline character).
527 | \item \underline{eol} matches at end of line (either at the end of the matched
528 | string, or just before a newline character).
529 | \item \underline{any} matches any character except newline.
530 | \item \underline{bnd} matches word boundaries.
531 | \end{itemize}
532 |
533 | \subsubsection{Specificities of Mikmatch\_pcre}
534 |
535 | This is currently the version which is used by the
536 | \verb$mikmatch$ command.
537 |
538 | \paragraph{Matching order}
539 |
540 | Alternatives (\textit{regexp1}|\textit{regexp2})
541 | are tried from left to right.
542 |
543 | The quantifiers (\verb$*$, \verb$+$, \verb$?$ and
544 | \verb${$\dots\verb$}$)
545 | are greedy except if specified otherwise (see next paragraph).
546 | The regular expressions are matched from left to right, and the
547 | repeated patterns are matched as many times as possible before trying
548 | to match the rest of the regular expression and either succeed or give
549 | up one repetition before retrying (backtracking).
550 |
551 | \paragraph{Greediness and laziness}
552 | Normally, quantifiers (\verb$*$, \verb$+$, \verb$?$ and
553 | \verb${$\dots\verb$}$) are greedy, i.e. they perform the longest match
554 | in terms of number of repetitions
555 | before matching the rest of the regular expression or
556 | backtracking. The opposite behavior is laziness: in that case,
557 | the number of repetitions is made minimal before trying to match the
558 | rest of the regular expression and either succeed or continue with one
559 | more repetition.
560 |
561 | The lazy behavior is turned on by placing the keyword \verb$Lazy$
562 | after the quantifier. This is the equivalent of Perl's quantifiers
563 | \verb$*?$, \verb$+?$, \verb$??$ and \verb${$\dots\verb$}?$.
564 | For instance, compare the following behaviors \toplevelwarning:
565 | \begin{verbatim}
566 | # match "" with RE "<" (_* as contents) ">" -> contents;;
567 | - : string = "hello>" with RE "<" (_* Lazy as contents) ">" -> contents;;
569 | - : string = "hello"
570 | \end{verbatim}
571 |
572 | \paragraph{Possessiveness or atomic grouping}
573 | Sometimes it can be useful to prevent backtracking.
574 | This is achieved by placing the \verb$Possessive$ keyword after a given group.
575 | For instance, compare the following \toplevelwarning:
576 | \begin{verbatim}
577 | # match "abc" with RE _* _ -> true | _ -> false;;
578 | - : bool = true
579 | # match "abc" with RE _* Possessive _ -> true | _ -> false;;
580 | - : bool = false
581 | \end{verbatim}
582 | This operator has the strongest associativity priority (0), just like
583 | the quantifiers.
584 |
585 | \paragraph{Backreferences}
586 | Backreferences are supported as described in section~\ref{backref}.
587 |
588 | \paragraph{Predefined patterns}
589 | The following predefined patterns are available in addition to the
590 | POSIX character classes:
591 | \begin{itemize}
592 | \item \underline{bos} matches at beginning of the matched string.
593 | \item \underline{eos} matches at the end of the matched string.
594 | \item \underline{bol} matches at beginning of line (either at the
595 | beginning of the matched string, or just after a newline character).
596 | \item \underline{eol} matches at end of line (either at the end of the matched
597 | string, or just before a newline character).
598 | \item \underline{any} matches any character except newline.
599 | \end{itemize}
600 |
601 |
602 | \paragraph{Lookaround assertions}
603 |
604 | A lookaround assertion is a pattern that
605 | has to be matched but doesn't consume characters in the string being
606 | matched.
607 |
608 | Lookahead assertions are checked after the current position in the
609 | string, and lookbehind assertions are matched before the current
610 | point. The general syntax for an assertion is the following:
611 | \\
612 | \fbox{\texttt{<} \textit{lookbehind}
613 | \texttt{.} \textit{lookahead} \texttt{>}}\\
614 | \fbox{\texttt{<} \textit{lookahead} \texttt{>}}\\
615 | The central dot symbolizes the current position. The
616 | \textit{lookbehind} assertion is a test over the characters at the
617 | left of the current point, while the \textit{lookahead} is a test over
618 | the characters at the right of the current point in the string.
619 |
620 | \textit{lookbehind} or \textit{lookahead} are either empty or a
621 | regular expression, optionally preceded by \texttt{Not}.
622 | An assertion starting with \texttt{Not} is called negative and means
623 | that the given regular expression can not match here.
624 |
625 | There are no restrictions on the contents of lookahead regular
626 | expressions. Lookbehind regular expressions are restricted
627 | to those that match substrings of length that can be predetermined.
628 | Besides this, backreferences are not supported in lookbehind expressions.
629 |
630 |
631 |
632 | \paragraph{Macros}
633 | This implementation provides a set of macros that follow this
634 | syntax:\\
635 | \fbox{\textit{MACRO-NAME} \textit{regexp} \texttt{->} \textit{expr}}\\
636 | where \textit{expr} is the expression that will be computed every time
637 | the pattern given by \textit{regexp} is matched.
638 |
639 | Only the \verb$SPLIT$ and \verb$FILTER$ macros follows a simplified syntax:\\
640 | \fbox{\textit{MACRO-NAME} \textit{regexp}}
641 |
642 | These constructs build a function which accepts some optional
643 | arguments and the string to match.
644 | For instance, \\
645 | \verb$(REPLACE "," -> ";") "a,b,c"$\\
646 | returns \verb$"a;b;c"$
647 | whereas\\
648 | \verb$(REPLACE "," -> ";") ~pos:2 "a,b,c"$\\
649 | returns \verb$"a,b;c"$
650 |
651 | The possible options are the following:
652 | \begin{itemize}
653 | \item \verb$pos$ has type \verb$int$ and indicates that matching or
654 | searching must start from this position in the string.
655 | Its default value is always 0 (beginning of the string).
656 | \item \verb$full$ is a boolean that defines whether split operations
657 | must ignore empty fragments before the first matched pattern or the
658 | last matched pattern in the string. The default value is \verb$true$
659 | for \verb$MAP$ and \verb$false$ for \verb$SPLIT$.
660 | \item \verb$share$ is a potentially unsafe option which allows the
661 | reuse of some mutable data which are associated to a given regular
662 | expression. This may make the program slightly faster, but should
663 | generally not be used in multi-threaded programs or in libraries.
664 | \end{itemize}
665 |
666 | \fbox{\texttt{MATCH} \textit{regexp} \texttt{->} \textit{expr}}\\
667 | tries to match the pattern \textit{regexp} at the beginning of the
668 | string or at the given position \verb$pos$ and returns \textit{expr}
669 | or raise \verb$Not_found$. Options: \verb$pos$ (0), \verb$share$ (false).
670 | When \verb$pos$ and \verb$share$ are not specified, it is equivalent to:
671 | \begin{alltt}
672 | function
673 | RE \textit{regexp} -> \textit{expr}
674 | | _ -> raise Not_found
675 | \end{alltt}
676 |
677 | \fbox{\texttt{REPLACE} \textit{regexp} \texttt{->} \textit{expr}}\\
678 | returns a string in which every occurrence of the pattern is
679 | replaced by \textit{expr}. Options: \verb$pos$ (0).
680 |
681 | \fbox{\texttt{REPLACE\_FIRST} \textit{regexp} \texttt{->} \textit{expr}}\\
682 | returns a string in which the first occurrence of the pattern is
683 | replaced by \textit{expr}. A copy of the input string is returned if
684 | the pattern is not found.
685 | Options: \verb$pos$ (0).
686 |
687 | \fbox{\texttt{SEARCH} \textit{regexp} \texttt{->} \textit{expr}}\\
688 | simply evaluates \textit{expr} every time the pattern is
689 | matched. Options: \verb$pos$ (0).
690 |
691 | \fbox{\texttt{SEARCH\_FIRST} \textit{regexp} \texttt{->} \textit{expr}}\\
692 | simply evaluates \textit{expr} the first time the pattern is
693 | matched and returns the result. Exception \texttt{Not\_Found} is
694 | raised if the pattern is not matched.
695 | Options: \verb$pos$ (0), \verb$share$ (false).
696 |
697 | \fbox{\texttt{COLLECT} \textit{regexp} \texttt{->} \textit{expr}}\\
698 | evaluates \textit{expr} every time the pattern is
699 | matched and puts the result into a list. Options: \verb$pos$ (0).
700 |
701 | \fbox{\texttt{COLLECTOBJ} \textit{regexp}}\\
702 | like \texttt{COLLECT}, but the elements of the returned list are automatically objects
703 | with methods that correspond to the subgroups captured with
704 | \texttt{as}.
705 | Options: \verb$pos$ (0).
706 |
707 | \fbox{\texttt{SPLIT} \textit{regexp}}\\
708 | splits the given string using \textit{regexp} as a delimiter.
709 | Options: \verb$pos$ (0), \verb$full$ (false).
710 |
711 | \fbox{\texttt{FILTER} \textit{regexp}}\\
712 | creates a predicate that returns true is the given string matches
713 | \textit{regexp} or false otherwise.
714 | Options: \verb$pos$ (0), \verb$share$ (false).
715 |
716 | \fbox{\texttt{CAPTURE} \textit{regexp}}\\
717 | returns \texttt{Some o} where \texttt{o} is an object with methods
718 | that correspond to the captured subgroups, or
719 | \texttt{None} if the subject string doesn't match \textit{regexp}.
720 | Options: \verb$pos$ (0), \verb$share$ (false).
721 |
722 | \fbox{\texttt{MAP} \textit{regexp} \texttt{->} \textit{expr}}\\
723 | splits the given string into fragments: the fragments that do not match the
724 | pattern are returned as \texttt{`Text s} where s is a
725 | string. Fragments that match the pattern are replaced by
726 | the result of \textit{expr}, which has to be a polymorphic variant.
727 | Options: \verb$pos$ (0), \verb$full$ (true).
728 | For instance, \\
729 | \verb$(MAP ',' -> `Sep) "a,b,c,"$\\
730 | returns the list\\
731 | \verb$[`Text "a"; `Sep; `Text "b"; `Sep; `Text "c"; `Sep; `Text ""]$\\
732 | whereas \\
733 | \verb$(MAP ',' -> `Sep) ~full:false "a,b,c,"$\\
734 | returns only\\
735 | \verb$[`Text "a"; `Sep; `Text "b"; `Sep; `Text "c"; `Sep]$
736 |
737 |
738 | \section{Tools}
739 |
740 | \subsection{Micmatch, Mikmatch, old Camlp4, new Camlp4, Camlp5}
741 |
742 | Camlp4/Camlp5 is the set of tools that allows to build and use
743 | syntax extensions of OCaml. We distinguish 3 major variants of Camlp4:
744 | \begin{itemize}
745 | \item The ``old Camlp4'' is Camlp4 as distributed with OCaml until
746 | version 3.09.3.
747 | \item Camlp5 is an independent branch of the old Camlp4, compatible
748 | with at least the 3.09 and 3.10 release lines of OCaml. It is close
749 | to 100\% compatible with the old Camlp4.
750 | \item The new Camlp4 or just Camlp4 shares the same goals as the
751 | old Camlp4 and Camlp5, but is largely incompatible with them.
752 | It is included in the core OCaml distribution starting from OCaml 3.10
753 | and replaces the old Camlp4.
754 | \end{itemize}
755 |
756 | Micmatch is the name of the original implementation of Mikmatch for the
757 | old Camlp4:
758 | \begin{itemize}
759 | \item Micmatch < 1.0 requires the old Camlp4.
760 | \item Micmatch $\geq$ 1.0 requires Camlp5.
761 | \item Mikmatch requires the new Camlp4.
762 | \end{itemize}
763 |
764 |
765 |
766 | \subsection{The toplevel}
767 |
768 | \toplevelwarning
769 |
770 |
771 | \subsection{The libraries for the preprocessor}
772 |
773 | \subsubsection{Mikmatch\_str}
774 | The preprocessing library \texttt{pa\_mikmatch\_str.cma} must be loaded by
775 | the preprocessor (\texttt{camlp4o} or \texttt{camlp4r}).
776 |
777 | It is safe to use Mikmatch\_str in
778 | multithreaded programs without locks only if the patterns do not contain
779 | the @ keyword because it uses a shared cache of compiled regexps.
780 |
781 | \subsubsection{Mikmatch\_pcre}
782 | The preprocessing library \texttt{pa\_mikmatch\_pcre.cma} must be loaded by
783 | the preprocessor (\texttt{camlp4o} or \texttt{camlp4r}).
784 |
785 | It is safe to use Mikmatch\_str in
786 | multithreaded programs without locks only if the patterns do not contain
787 | the @ keyword because it uses a shared cache of compiled regexps.
788 |
789 |
790 | \subsection{The runtime libraries}
791 |
792 | Both variants depend on portable features of the Unix library.
793 | The executables must therefore be linked against \texttt{unix.cma}
794 | (bytecode) or \texttt{unix.cmxa} (native code) in addition to
795 | the specific libraries mentioned below.
796 |
797 | \subsubsection{Mikmatch\_str}
798 |
799 | In addition to the backend for the regular expressions engine
800 | (\texttt{str.cma} for bytecode or \texttt{str.cmxa} for native code),
801 | the OCaml code which is produced by the preprocessor needs to be
802 | linked against either \texttt{run\_mikmatch\_str.cma} (bytecode),
803 | \texttt{run\_mikmatch\_str.cmxa} (native code),
804 | \texttt{run\_mikmatch\_str\_mt.cma} (bytecode, threads)
805 | or \texttt{run\_mikmatch\_str\_mt.cmxa} (native code, threads).
806 |
807 |
808 | \subsubsection{Mikmatch\_pcre}
809 |
810 | In addition to the backend for the regular expressions engine
811 | (\texttt{pcre.cma} for bytecode or \texttt{pcre.cmxa} for native code),
812 | the OCaml code which is produced by the preprocessor needs to be
813 | linked against either \texttt{run\_mikmatch\_pcre.cma} (bytecode),
814 | \texttt{run\_mikmatch\_pcre.cmxa} (native code).
815 | Multithreaded programs are supported as well
816 | and do not require a specific library.
817 |
818 | \begin{latexonly}
819 | \input{mikmatch-ocamldoc.tex}
820 | \end{latexonly}
821 |
822 | \begin{htmlonly}
823 | \section{A small text-oriented library}
824 | \begin{rawhtml}
825 | Module Mikmatch
826 | \end{rawhtml}
827 | \end{htmlonly}
828 |
829 |
830 | \end{document}
831 |
--------------------------------------------------------------------------------
/OCamlMakefile:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # OCamlMakefile
3 | # Copyright (C) 1999-2004 Markus Mottl
4 | #
5 | # For updates see:
6 | # http://www.oefai.at/~markus/ocaml_sources
7 | #
8 | # $Id$
9 | #
10 | ###########################################################################
11 |
12 | # Modified by damien for .glade.ml compilation
13 |
14 | # Set these variables to the names of the sources to be processed and
15 | # the result variable. Order matters during linkage!
16 |
17 | ifndef SOURCES
18 | SOURCES := foo.ml
19 | endif
20 | export SOURCES
21 |
22 | ifndef RES_CLIB_SUF
23 | RES_CLIB_SUF := _stubs
24 | endif
25 | export RES_CLIB_SUF
26 |
27 | ifndef RESULT
28 | RESULT := foo
29 | endif
30 | export RESULT
31 |
32 | export LIB_PACK_NAME
33 |
34 | ifndef DOC_FILES
35 | DOC_FILES := $(filter %.mli, $(SOURCES))
36 | endif
37 | export DOC_FILES
38 |
39 | export BCSUFFIX
40 | export NCSUFFIX
41 |
42 | ifndef TOPSUFFIX
43 | TOPSUFFIX := .top
44 | endif
45 | export TOPSUFFIX
46 |
47 | # Eventually set include- and library-paths, libraries to link,
48 | # additional compilation-, link- and ocamlyacc-flags
49 | # Path- and library information needs not be written with "-I" and such...
50 | # Define THREADS if you need it, otherwise leave it unset (same for
51 | # USE_CAMLP4)!
52 |
53 | export THREADS
54 | export VMTHREADS
55 | export ANNOTATE
56 | export USE_CAMLP4
57 |
58 | export INCDIRS
59 | export LIBDIRS
60 | export EXTLIBDIRS
61 | export RESULTDEPS
62 | export OCAML_DEFAULT_DIRS
63 |
64 | export LIBS
65 | export CLIBS
66 |
67 | export OCAMLFLAGS
68 | export OCAMLNCFLAGS
69 | export OCAMLBCFLAGS
70 |
71 | export OCAMLLDFLAGS
72 | export OCAMLNLDFLAGS
73 | export OCAMLBLDFLAGS
74 |
75 | ifndef OCAMLCPFLAGS
76 | OCAMLCPFLAGS := a
77 | endif
78 |
79 | export OCAMLCPFLAGS
80 |
81 | export PPFLAGS
82 |
83 | export YFLAGS
84 | export IDLFLAGS
85 |
86 | export OCAMLDOCFLAGS
87 |
88 | export OCAMLFIND_INSTFLAGS
89 |
90 | export DVIPSFLAGS
91 |
92 | export STATIC
93 |
94 | # Add a list of optional trash files that should be deleted by "make clean"
95 | export TRASH
96 |
97 | #################### variables depending on your OCaml-installation
98 |
99 | ifdef MINGW
100 | export MINGW
101 | WIN32 := 1
102 | CFLAGS_WIN32 := -mno-cygwin
103 | endif
104 | ifdef MSVC
105 | export MSVC
106 | WIN32 := 1
107 | ifndef STATIC
108 | CFLAGS_WIN32 := -DCAML_DLL
109 | endif
110 | CFLAGS_WIN32 += -nologo
111 | EXT_OBJ := obj
112 | EXT_LIB := lib
113 | ifeq ($(CC),gcc)
114 | # work around GNU Make default value
115 | ifdef THREADS
116 | CC := cl -MT
117 | else
118 | CC := cl
119 | endif
120 | endif
121 | ifeq ($(CXX),g++)
122 | # work around GNU Make default value
123 | CXX := $(CC)
124 | endif
125 | CFLAG_O := -Fo
126 | endif
127 | ifdef WIN32
128 | EXT_CXX := cpp
129 | EXE := .exe
130 | endif
131 |
132 | ifndef EXT_OBJ
133 | EXT_OBJ := o
134 | endif
135 | ifndef EXT_LIB
136 | EXT_LIB := a
137 | endif
138 | ifndef EXT_CXX
139 | EXT_CXX := cc
140 | endif
141 | ifndef EXE
142 | EXE := # empty
143 | endif
144 | ifndef CFLAG_O
145 | CFLAG_O := -o # do not delete this comment (preserves trailing whitespace)!
146 | endif
147 |
148 | export CC
149 | export CXX
150 | export CFLAGS
151 | export CXXFLAGS
152 | export LDFLAGS
153 |
154 | ifndef RPATH_FLAG
155 | RPATH_FLAG := -R
156 | endif
157 | export RPATH_FLAG
158 |
159 | ifndef MSVC
160 | ifndef PIC_FLAGS
161 | PIC_FLAGS := -fPIC -DPIC
162 | endif
163 | endif
164 |
165 | export PIC_FLAGS
166 |
167 | BCRESULT := $(addsuffix $(BCSUFFIX), $(RESULT))
168 | NCRESULT := $(addsuffix $(NCSUFFIX), $(RESULT))
169 | TOPRESULT := $(addsuffix $(TOPSUFFIX), $(RESULT))
170 |
171 | ifndef OCAMLFIND
172 | OCAMLFIND := ocamlfind
173 | endif
174 | export OCAMLFIND
175 |
176 | ifndef OCAMLC
177 | OCAMLC := ocamlc
178 | endif
179 | export OCAMLC
180 |
181 | ifndef OCAMLOPT
182 | OCAMLOPT := ocamlopt
183 | endif
184 | export OCAMLOPT
185 |
186 | ifndef OCAMLMKTOP
187 | OCAMLMKTOP := ocamlmktop
188 | endif
189 | export OCAMLMKTOP
190 |
191 | ifndef OCAMLCP
192 | OCAMLCP := ocamlcp
193 | endif
194 | export OCAMLCP
195 |
196 | ifndef OCAMLDEP
197 | OCAMLDEP := ocamldep
198 | endif
199 | export OCAMLDEP
200 |
201 | ifndef OCAMLLEX
202 | OCAMLLEX := ocamllex
203 | endif
204 | export OCAMLLEX
205 |
206 | ifndef OCAMLYACC
207 | OCAMLYACC := ocamlyacc
208 | endif
209 | export OCAMLYACC
210 |
211 | ifndef OCAMLMKLIB
212 | OCAMLMKLIB := ocamlmklib
213 | endif
214 | export OCAMLMKLIB
215 |
216 | ifndef OCAML_GLADECC
217 | OCAML_GLADECC := lablgladecc2
218 | endif
219 | export OCAML_GLADECC
220 |
221 | ifndef OCAML_GLADECC_FLAGS
222 | OCAML_GLADECC_FLAGS :=
223 | endif
224 | export OCAML_GLADECC_FLAGS
225 |
226 | ifndef CAMELEON_REPORT
227 | CAMELEON_REPORT := report
228 | endif
229 | export CAMELEON_REPORT
230 |
231 | ifndef CAMELEON_REPORT_FLAGS
232 | CAMELEON_REPORT_FLAGS :=
233 | endif
234 | export CAMELEON_REPORT_FLAGS
235 |
236 | ifndef CAMELEON_ZOGGY
237 | CAMELEON_ZOGGY := camlp4o pa_zog.cma pr_o.cmo
238 | endif
239 | export CAMELEON_ZOGGY
240 |
241 | ifndef CAMELEON_ZOGGY_FLAGS
242 | CAMELEON_ZOGGY_FLAGS :=
243 | endif
244 | export CAMELEON_ZOGGY_FLAGS
245 |
246 | ifndef OXRIDL
247 | OXRIDL := oxridl
248 | endif
249 | export OXRIDL
250 |
251 | ifndef CAMLIDL
252 | CAMLIDL := camlidl
253 | endif
254 | export CAMLIDL
255 |
256 | ifndef CAMLIDLDLL
257 | CAMLIDLDLL := camlidldll
258 | endif
259 | export CAMLIDLDLL
260 |
261 | ifndef NOIDLHEADER
262 | MAYBE_IDL_HEADER := -header
263 | endif
264 | export NOIDLHEADER
265 |
266 | export NO_CUSTOM
267 |
268 | ifndef CAMLP4
269 | CAMLP4 := camlp4
270 | endif
271 | export CAMLP4
272 |
273 | ifdef PACKS
274 | ifndef CREATE_LIB
275 | PACKS += threads
276 | endif
277 | empty :=
278 | space := $(empty) $(empty)
279 | comma := ,
280 | ifdef PREDS
281 | PRE_OCAML_FIND_PREDICATES := $(subst $(space),$(comma),$(PREDS))
282 | PRE_OCAML_FIND_PACKAGES := $(subst $(space),$(comma),$(PACKS))
283 | OCAML_FIND_PREDICATES := -predicates $(PRE_OCAML_FIND_PREDICATES)
284 | # OCAML_DEP_PREDICATES := -syntax $(PRE_OCAML_FIND_PREDICATES)
285 | OCAML_FIND_PACKAGES := $(OCAML_FIND_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES)
286 | OCAML_DEP_PACKAGES := $(OCAML_DEP_PREDICATES) -package $(PRE_OCAML_FIND_PACKAGES)
287 | else
288 | OCAML_FIND_PACKAGES := -package $(subst $(space),$(comma),$(PACKS))
289 | OCAML_DEP_PACKAGES :=
290 | endif
291 | OCAML_FIND_LINKPKG := -linkpkg
292 | REAL_OCAMLFIND := $(OCAMLFIND)
293 | endif
294 |
295 | export OCAML_FIND_PACKAGES
296 | export OCAML_DEP_PACKAGES
297 | export OCAML_FIND_LINKPKG
298 | export REAL_OCAMLFIND
299 |
300 | ifndef OCAMLDOC
301 | OCAMLDOC := ocamldoc
302 | endif
303 | export OCAMLDOC
304 |
305 | ifndef LATEX
306 | LATEX := latex
307 | endif
308 | export LATEX
309 |
310 | ifndef DVIPS
311 | DVIPS := dvips
312 | endif
313 | export DVIPS
314 |
315 | ifndef PS2PDF
316 | PS2PDF := ps2pdf
317 | endif
318 | export PS2PDF
319 |
320 | ifndef OCAMLMAKEFILE
321 | OCAMLMAKEFILE := OCamlMakefile
322 | endif
323 | export OCAMLMAKEFILE
324 |
325 | ifndef OCAMLLIBPATH
326 | OCAMLLIBPATH := \
327 | $(shell $(OCAMLC) 2>/dev/null -where || echo /usr/local/lib/ocaml)
328 | endif
329 | export OCAMLLIBPATH
330 |
331 | ifndef OCAML_LIB_INSTALL
332 | OCAML_LIB_INSTALL := $(OCAMLLIBPATH)/contrib
333 | endif
334 | export OCAML_LIB_INSTALL
335 |
336 | ###########################################################################
337 |
338 | #################### change following sections only if
339 | #################### you know what you are doing!
340 |
341 | # delete target files when a build command fails
342 | .PHONY: .DELETE_ON_ERROR
343 | .DELETE_ON_ERROR:
344 |
345 | # for pedants using "--warn-undefined-variables"
346 | export MAYBE_IDL
347 | export REAL_RESULT
348 | export CAMLIDLFLAGS
349 | export THREAD_FLAG
350 | export RES_CLIB
351 | export MAKEDLL
352 | export ANNOT_FLAG
353 | export C_OXRIDL
354 | export SUBPROJS
355 | export CFLAGS_WIN32
356 | INCFLAGS :=
357 |
358 | SHELL := /bin/sh
359 |
360 | MLDEPDIR := ._d
361 | BCDIDIR := ._bcdi
362 | NCDIDIR := ._ncdi
363 |
364 | FILTER_EXTNS := %.mli %.ml %.mll %.mly %.idl %.oxridl %.c %.$(EXT_CXX) %.rep %.zog %.glade
365 |
366 | FILTERED := $(filter $(FILTER_EXTNS), $(SOURCES))
367 | SOURCE_DIRS := $(filter-out ./, $(sort $(dir $(FILTERED))))
368 |
369 | FILTERED_REP := $(filter %.rep, $(FILTERED))
370 | DEP_REP := $(FILTERED_REP:%.rep=$(MLDEPDIR)/%.d)
371 | AUTO_REP := $(FILTERED_REP:.rep=.ml)
372 |
373 | FILTERED_ZOG := $(filter %.zog, $(FILTERED))
374 | DEP_ZOG := $(FILTERED_ZOG:%.zog=$(MLDEPDIR)/%.d)
375 | AUTO_ZOG := $(FILTERED_ZOG:.zog=.ml)
376 |
377 | FILTERED_GLADE := $(filter %.glade, $(FILTERED))
378 | DEP_GLADE := $(FILTERED_GLADE:%.glade=$(MLDEPDIR)/%.d)
379 | AUTO_GLADE := $(FILTERED_GLADE:.glade=.ml)
380 |
381 | FILTERED_ML := $(filter %.ml, $(FILTERED))
382 | DEP_ML := $(FILTERED_ML:%.ml=$(MLDEPDIR)/%.d)
383 |
384 | FILTERED_MLI := $(filter %.mli, $(FILTERED))
385 | DEP_MLI := $(FILTERED_MLI:.mli=.di)
386 |
387 | FILTERED_MLL := $(filter %.mll, $(FILTERED))
388 | DEP_MLL := $(FILTERED_MLL:%.mll=$(MLDEPDIR)/%.d)
389 | AUTO_MLL := $(FILTERED_MLL:.mll=.ml)
390 |
391 | FILTERED_MLY := $(filter %.mly, $(FILTERED))
392 | DEP_MLY := $(FILTERED_MLY:%.mly=$(MLDEPDIR)/%.d) $(FILTERED_MLY:.mly=.di)
393 | AUTO_MLY := $(FILTERED_MLY:.mly=.mli) $(FILTERED_MLY:.mly=.ml)
394 |
395 | FILTERED_IDL := $(filter %.idl, $(FILTERED))
396 | DEP_IDL := $(FILTERED_IDL:%.idl=$(MLDEPDIR)/%.d) $(FILTERED_IDL:.idl=.di)
397 | C_IDL := $(FILTERED_IDL:%.idl=%_stubs.c)
398 | ifndef NOIDLHEADER
399 | C_IDL += $(FILTERED_IDL:.idl=.h)
400 | endif
401 | OBJ_C_IDL := $(FILTERED_IDL:%.idl=%_stubs.$(EXT_OBJ))
402 | AUTO_IDL := $(FILTERED_IDL:.idl=.mli) $(FILTERED_IDL:.idl=.ml) $(C_IDL)
403 |
404 | FILTERED_OXRIDL := $(filter %.oxridl, $(FILTERED))
405 | DEP_OXRIDL := $(FILTERED_OXRIDL:%.oxridl=$(MLDEPDIR)/%.d) $(FILTERED_OXRIDL:.oxridl=.di)
406 | AUTO_OXRIDL := $(FILTERED_OXRIDL:.oxridl=.mli) $(FILTERED_OXRIDL:.oxridl=.ml) $(C_OXRIDL)
407 |
408 | FILTERED_C_CXX := $(filter %.c %.$(EXT_CXX), $(FILTERED))
409 | OBJ_C_CXX := $(FILTERED_C_CXX:.c=.$(EXT_OBJ))
410 | OBJ_C_CXX := $(OBJ_C_CXX:.$(EXT_CXX)=.$(EXT_OBJ))
411 |
412 | PRE_TARGETS += $(AUTO_MLL) $(AUTO_MLY) $(AUTO_IDL) $(AUTO_OXRIDL) $(AUTO_ZOG) $(AUTO_REP) $(AUTO_GLADE)
413 |
414 | ALL_DEPS := $(DEP_ML) $(DEP_MLI) $(DEP_MLL) $(DEP_MLY) $(DEP_IDL) $(DEP_OXRIDL) $(DEP_ZOG) $(DEP_REP) $(DEP_GLADE)
415 |
416 | MLDEPS := $(filter %.d, $(ALL_DEPS))
417 | MLIDEPS := $(filter %.di, $(ALL_DEPS))
418 | BCDEPIS := $(MLIDEPS:%.di=$(BCDIDIR)/%.di)
419 | NCDEPIS := $(MLIDEPS:%.di=$(NCDIDIR)/%.di)
420 |
421 | ALLML := $(filter %.mli %.ml %.mll %.mly %.idl %.oxridl %.rep %.zog %.glade, $(FILTERED))
422 |
423 | IMPLO_INTF := $(ALLML:%.mli=%.mli.__)
424 | IMPLO_INTF := $(foreach file, $(IMPLO_INTF), \
425 | $(basename $(file)).cmi $(basename $(file)).cmo)
426 | IMPLO_INTF := $(filter-out %.mli.cmo, $(IMPLO_INTF))
427 | IMPLO_INTF := $(IMPLO_INTF:%.mli.cmi=%.cmi)
428 |
429 | IMPLX_INTF := $(IMPLO_INTF:.cmo=.cmx)
430 |
431 | INTF := $(filter %.cmi, $(IMPLO_INTF))
432 | IMPL_CMO := $(filter %.cmo, $(IMPLO_INTF))
433 | IMPL_CMX := $(IMPL_CMO:.cmo=.cmx)
434 | IMPL_ASM := $(IMPL_CMO:.cmo=.asm)
435 | IMPL_S := $(IMPL_CMO:.cmo=.s)
436 |
437 | OBJ_LINK := $(OBJ_C_IDL) $(OBJ_C_CXX)
438 | OBJ_FILES := $(IMPL_CMO:.cmo=.$(EXT_OBJ)) $(OBJ_LINK)
439 |
440 | EXECS := $(addsuffix $(EXE), \
441 | $(sort $(TOPRESULT) $(BCRESULT) $(NCRESULT)))
442 | ifdef WIN32
443 | EXECS += $(BCRESULT).dll $(NCRESULT).dll
444 | endif
445 |
446 | CLIB_BASE := $(RESULT)$(RES_CLIB_SUF)
447 | ifneq ($(strip $(OBJ_LINK)),)
448 | RES_CLIB := lib$(CLIB_BASE).$(EXT_LIB)
449 | endif
450 |
451 | ifdef WIN32
452 | DLLSONAME := $(CLIB_BASE).dll
453 | else
454 | DLLSONAME := dll$(CLIB_BASE).so
455 | endif
456 |
457 | NONEXECS := $(INTF) $(IMPL_CMO) $(IMPL_CMX) $(IMPL_ASM) $(IMPL_S) \
458 | $(OBJ_FILES) $(PRE_TARGETS) $(BCRESULT).cma $(NCRESULT).cmxa \
459 | $(NCRESULT).$(EXT_LIB) $(BCRESULT).cmi $(BCRESULT).cmo \
460 | $(NCRESULT).cmi $(NCRESULT).cmx $(NCRESULT).o \
461 | $(RES_CLIB) $(IMPL_CMO:.cmo=.annot) \
462 | $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(LIB_PACK_NAME).cmx $(LIB_PACK_NAME).o
463 |
464 | ifndef STATIC
465 | NONEXECS += $(DLLSONAME)
466 | endif
467 |
468 | ifndef LIBINSTALL_FILES
469 | LIBINSTALL_FILES := $(RESULT).mli $(RESULT).cmi $(RESULT).cma \
470 | $(RESULT).cmxa $(RESULT).$(EXT_LIB) $(RES_CLIB)
471 | ifndef STATIC
472 | ifneq ($(strip $(OBJ_LINK)),)
473 | LIBINSTALL_FILES += $(DLLSONAME)
474 | endif
475 | endif
476 | endif
477 |
478 | export LIBINSTALL_FILES
479 |
480 | ifdef WIN32
481 | # some extra stuff is created while linking DLLs
482 | NONEXECS += $(BCRESULT).$(EXT_LIB) $(BCRESULT).exp $(NCRESULT).exp $(CLIB_BASE).exp $(CLIB_BASE).lib
483 | endif
484 |
485 | TARGETS := $(EXECS) $(NONEXECS)
486 |
487 | # If there are IDL-files
488 | ifneq ($(strip $(FILTERED_IDL)),)
489 | MAYBE_IDL := -cclib -lcamlidl
490 | endif
491 |
492 | ifdef USE_CAMLP4
493 | CAMLP4PATH := \
494 | $(shell $(CAMLP4) -where 2>/dev/null || echo /usr/local/lib/camlp4)
495 | INCFLAGS := -I $(CAMLP4PATH)
496 | CINCFLAGS := -I$(CAMLP4PATH)
497 | endif
498 |
499 | DINCFLAGS := $(INCFLAGS) $(SOURCE_DIRS:%=-I %) $(OCAML_DEFAULT_DIRS:%=-I %)
500 | INCFLAGS := $(DINCFLAGS) $(INCDIRS:%=-I %)
501 | CINCFLAGS += $(SOURCE_DIRS:%=-I%) $(INCDIRS:%=-I%) $(OCAML_DEFAULT_DIRS:%=-I%)
502 |
503 | ifndef MSVC
504 | CLIBFLAGS += $(SOURCE_DIRS:%=-L%) $(LIBDIRS:%=-L%) \
505 | $(EXTLIBDIRS:%=-L%) $(EXTLIBDIRS:%=-Wl,$(RPATH_FLAG)%) \
506 | $(OCAML_DEFAULT_DIRS:%=-L%)
507 | endif
508 |
509 | ifndef PROFILING
510 | INTF_OCAMLC := $(OCAMLC)
511 | else
512 | ifndef THREADS
513 | INTF_OCAMLC := $(OCAMLCP) -p $(OCAMLCPFLAGS)
514 | else
515 | # OCaml does not support profiling byte code
516 | # with threads (yet), therefore we force an error.
517 | ifndef REAL_OCAMLC
518 | $(error Profiling of multithreaded byte code not yet supported by OCaml)
519 | endif
520 | INTF_OCAMLC := $(OCAMLC)
521 | endif
522 | endif
523 |
524 | ifndef MSVC
525 | COMMON_LDFLAGS := $(LDFLAGS:%=-ccopt %) $(SOURCE_DIRS:%=-ccopt -L%) \
526 | $(LIBDIRS:%=-ccopt -L%) $(EXTLIBDIRS:%=-ccopt -L%) \
527 | $(EXTLIBDIRS:%=-ccopt -Wl,$(RPATH_FLAG)%) \
528 | $(OCAML_DEFAULT_DIRS:%=-ccopt -L%)
529 | else
530 | COMMON_LDFLAGS := -ccopt "/link -NODEFAULTLIB:LIBC $(LDFLAGS:%=%) $(SOURCE_DIRS:%=-LIBPATH:%) \
531 | $(LIBDIRS:%=-LIBPATH:%) $(EXTLIBDIRS:%=-LIBPATH:%) \
532 | $(OCAML_DEFAULT_DIRS:%=-LIBPATH:%) "
533 | endif
534 |
535 | CLIBS_OPTS := $(CLIBS:%=-cclib -l%)
536 | ifdef MSVC
537 | ifndef STATIC
538 | # MSVC libraries do not have 'lib' prefix
539 | CLIBS_OPTS := $(CLIBS:%=-cclib %.lib)
540 | endif
541 | endif
542 |
543 | ifneq ($(strip $(OBJ_LINK)),)
544 | ifdef CREATE_LIB
545 | OBJS_LIBS := -cclib -l$(CLIB_BASE) $(CLIBS_OPTS) $(MAYBE_IDL)
546 | else
547 | OBJS_LIBS := $(OBJ_LINK) $(CLIBS_OPTS) $(MAYBE_IDL)
548 | endif
549 | else
550 | OBJS_LIBS := $(CLIBS_OPTS) $(MAYBE_IDL)
551 | endif
552 |
553 | # If we have to make byte-code
554 | ifndef REAL_OCAMLC
555 | BYTE_OCAML := y
556 |
557 | # EXTRADEPS is added dependencies we have to insert for all
558 | # executable files we generate. Ideally it should be all of the
559 | # libraries we use, but it's hard to find the ones that get searched on
560 | # the path since I don't know the paths built into the compiler, so
561 | # just include the ones with slashes in their names.
562 | EXTRADEPS := $(addsuffix .cma,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i))))
563 | SPECIAL_OCAMLFLAGS := $(OCAMLBCFLAGS)
564 |
565 | REAL_OCAMLC := $(INTF_OCAMLC)
566 |
567 | REAL_IMPL := $(IMPL_CMO)
568 | REAL_IMPL_INTF := $(IMPLO_INTF)
569 | IMPL_SUF := .cmo
570 |
571 | DEPFLAGS :=
572 | MAKE_DEPS := $(MLDEPS) $(BCDEPIS)
573 |
574 | ifdef CREATE_LIB
575 | CFLAGS := $(PIC_FLAGS) $(CFLAGS)
576 | ifndef STATIC
577 | ifneq ($(strip $(OBJ_LINK)),)
578 | MAKEDLL := $(DLLSONAME)
579 | ALL_LDFLAGS := -dllib $(DLLSONAME)
580 | endif
581 | endif
582 | endif
583 |
584 | ifndef NO_CUSTOM
585 | ifneq "$(strip $(OBJ_LINK) $(THREADS) $(MAYBE_IDL) $(CLIBS))" ""
586 | ALL_LDFLAGS += -custom
587 | endif
588 | endif
589 |
590 | ALL_LDFLAGS += $(INCFLAGS) $(OCAMLLDFLAGS) $(OCAMLBLDFLAGS) \
591 | $(COMMON_LDFLAGS) $(LIBS:%=%.cma)
592 | CAMLIDLDLLFLAGS :=
593 |
594 | ifdef THREADS
595 | ifdef VMTHREADS
596 | THREAD_FLAG := -vmthread
597 | else
598 | THREAD_FLAG := -thread
599 | endif
600 | ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS)
601 | ifndef CREATE_LIB
602 | ifndef REAL_OCAMLFIND
603 | ALL_LDFLAGS := unix.cma threads.cma $(ALL_LDFLAGS)
604 | endif
605 | endif
606 | endif
607 |
608 | # we have to make native-code
609 | else
610 | EXTRADEPS := $(addsuffix .cmxa,$(foreach i,$(LIBS),$(if $(findstring /,$(i)),$(i))))
611 | ifndef PROFILING
612 | SPECIAL_OCAMLFLAGS := $(OCAMLNCFLAGS)
613 | PLDFLAGS :=
614 | else
615 | SPECIAL_OCAMLFLAGS := -p $(OCAMLNCFLAGS)
616 | PLDFLAGS := -p
617 | endif
618 |
619 | REAL_IMPL := $(IMPL_CMX)
620 | REAL_IMPL_INTF := $(IMPLX_INTF)
621 | IMPL_SUF := .cmx
622 |
623 | CFLAGS := -DNATIVE_CODE $(CFLAGS)
624 |
625 | DEPFLAGS := -native
626 | MAKE_DEPS := $(MLDEPS) $(NCDEPIS)
627 |
628 | ALL_LDFLAGS := $(PLDFLAGS) $(INCFLAGS) $(OCAMLLDFLAGS) \
629 | $(OCAMLNLDFLAGS) $(COMMON_LDFLAGS)
630 | CAMLIDLDLLFLAGS := -opt
631 |
632 | ifndef CREATE_LIB
633 | ALL_LDFLAGS += $(LIBS:%=%.cmxa)
634 | else
635 | CFLAGS := $(PIC_FLAGS) $(CFLAGS)
636 | endif
637 |
638 | ifdef THREADS
639 | THREAD_FLAG := -thread
640 | ALL_LDFLAGS := $(THREAD_FLAG) $(ALL_LDFLAGS)
641 | ifndef CREATE_LIB
642 | ifndef REAL_OCAMLFIND
643 | ALL_LDFLAGS := unix.cmxa threads.cmxa $(ALL_LDFLAGS)
644 | endif
645 | endif
646 | endif
647 | endif
648 |
649 | export MAKE_DEPS
650 |
651 | ifdef ANNOTATE
652 | ANNOT_FLAG := -dtypes
653 | else
654 | endif
655 |
656 | ALL_OCAMLCFLAGS := $(THREAD_FLAG) $(ANNOT_FLAG) $(OCAMLFLAGS) \
657 | $(INCFLAGS) $(SPECIAL_OCAMLFLAGS)
658 |
659 | ifdef make_deps
660 | -include $(MAKE_DEPS)
661 | PRE_TARGETS :=
662 | endif
663 |
664 | ###########################################################################
665 | # USER RULES
666 |
667 | # Call "OCamlMakefile QUIET=" to get rid of all of the @'s.
668 | QUIET=@
669 |
670 | # generates byte-code (default)
671 | byte-code: $(PRE_TARGETS)
672 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \
673 | REAL_RESULT="$(BCRESULT)" make_deps=yes
674 | bc: byte-code
675 |
676 | byte-code-nolink: $(PRE_TARGETS)
677 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \
678 | REAL_RESULT="$(BCRESULT)" make_deps=yes
679 | bcnl: byte-code-nolink
680 |
681 | top: $(PRE_TARGETS)
682 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(TOPRESULT) \
683 | REAL_RESULT="$(BCRESULT)" make_deps=yes
684 |
685 | # generates native-code
686 |
687 | native-code: $(PRE_TARGETS)
688 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \
689 | REAL_RESULT="$(NCRESULT)" \
690 | REAL_OCAMLC="$(OCAMLOPT)" \
691 | make_deps=yes
692 | nc: native-code
693 |
694 | native-code-nolink: $(PRE_TARGETS)
695 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \
696 | REAL_RESULT="$(NCRESULT)" \
697 | REAL_OCAMLC="$(OCAMLOPT)" \
698 | make_deps=yes
699 | ncnl: native-code-nolink
700 |
701 | # generates byte-code libraries
702 | byte-code-library: $(PRE_TARGETS)
703 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
704 | $(RES_CLIB) $(BCRESULT).cma \
705 | REAL_RESULT="$(BCRESULT)" \
706 | CREATE_LIB=yes \
707 | make_deps=yes
708 | bcl: byte-code-library
709 |
710 | # generates native-code libraries
711 | native-code-library: $(PRE_TARGETS)
712 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
713 | $(RES_CLIB) $(NCRESULT).cmxa \
714 | REAL_RESULT="$(NCRESULT)" \
715 | REAL_OCAMLC="$(OCAMLOPT)" \
716 | CREATE_LIB=yes \
717 | make_deps=yes
718 | ncl: native-code-library
719 |
720 | ifdef WIN32
721 | # generates byte-code dll
722 | byte-code-dll: $(PRE_TARGETS)
723 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
724 | $(RES_CLIB) $(BCRESULT).dll \
725 | REAL_RESULT="$(BCRESULT)" \
726 | make_deps=yes
727 | bcd: byte-code-dll
728 |
729 | # generates native-code dll
730 | native-code-dll: $(PRE_TARGETS)
731 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
732 | $(RES_CLIB) $(NCRESULT).dll \
733 | REAL_RESULT="$(NCRESULT)" \
734 | REAL_OCAMLC="$(OCAMLOPT)" \
735 | make_deps=yes
736 | ncd: native-code-dll
737 | endif
738 |
739 | # generates byte-code with debugging information
740 | debug-code: $(PRE_TARGETS)
741 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \
742 | REAL_RESULT="$(BCRESULT)" make_deps=yes \
743 | OCAMLFLAGS="-g $(OCAMLFLAGS)" \
744 | OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)"
745 | dc: debug-code
746 |
747 | debug-code-nolink: $(PRE_TARGETS)
748 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) nolink \
749 | REAL_RESULT="$(BCRESULT)" make_deps=yes \
750 | OCAMLFLAGS="-g $(OCAMLFLAGS)" \
751 | OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)"
752 | dcnl: debug-code-nolink
753 |
754 | # generates byte-code libraries with debugging information
755 | debug-code-library: $(PRE_TARGETS)
756 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
757 | $(RES_CLIB) $(BCRESULT).cma \
758 | REAL_RESULT="$(BCRESULT)" make_deps=yes \
759 | CREATE_LIB=yes \
760 | OCAMLFLAGS="-g $(OCAMLFLAGS)" \
761 | OCAMLLDFLAGS="-g $(OCAMLLDFLAGS)"
762 | dcl: debug-code-library
763 |
764 | # generates byte-code for profiling
765 | profiling-byte-code: $(PRE_TARGETS)
766 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT) \
767 | REAL_RESULT="$(BCRESULT)" PROFILING="y" \
768 | make_deps=yes
769 | pbc: profiling-byte-code
770 |
771 | # generates native-code
772 |
773 | profiling-native-code: $(PRE_TARGETS)
774 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(NCRESULT) \
775 | REAL_RESULT="$(NCRESULT)" \
776 | REAL_OCAMLC="$(OCAMLOPT)" \
777 | PROFILING="y" \
778 | make_deps=yes
779 | pnc: profiling-native-code
780 |
781 | # generates byte-code libraries
782 | profiling-byte-code-library: $(PRE_TARGETS)
783 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
784 | $(RES_CLIB) $(BCRESULT).cma \
785 | REAL_RESULT="$(BCRESULT)" PROFILING="y" \
786 | CREATE_LIB=yes \
787 | make_deps=yes
788 | pbcl: profiling-byte-code-library
789 |
790 | # generates native-code libraries
791 | profiling-native-code-library: $(PRE_TARGETS)
792 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
793 | $(RES_CLIB) $(NCRESULT).cmxa \
794 | REAL_RESULT="$(NCRESULT)" PROFILING="y" \
795 | REAL_OCAMLC="$(OCAMLOPT)" \
796 | CREATE_LIB=yes \
797 | make_deps=yes
798 | pncl: profiling-native-code-library
799 |
800 | # packs byte-code objects
801 | pack-byte-code: $(PRE_TARGETS)
802 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) $(BCRESULT).cmo \
803 | REAL_RESULT="$(BCRESULT)" \
804 | PACK_LIB=yes make_deps=yes
805 | pabc: pack-byte-code
806 |
807 | # packs native-code objects
808 | pack-native-code: $(PRE_TARGETS)
809 | $(QUIET)$(MAKE) -r -f $(OCAMLMAKEFILE) \
810 | $(NCRESULT).cmx $(NCRESULT).o \
811 | REAL_RESULT="$(NCRESULT)" \
812 | REAL_OCAMLC="$(OCAMLOPT)" \
813 | PACK_LIB=yes make_deps=yes
814 | panc: pack-native-code
815 |
816 | # generates HTML-documentation
817 | htdoc: doc/$(RESULT)/html
818 |
819 | # generates Latex-documentation
820 | ladoc: doc/$(RESULT)/latex
821 |
822 | # generates PostScript-documentation
823 | psdoc: doc/$(RESULT)/latex/doc.ps
824 |
825 | # generates PDF-documentation
826 | pdfdoc: doc/$(RESULT)/latex/doc.pdf
827 |
828 | # generates all supported forms of documentation
829 | doc: htdoc ladoc psdoc pdfdoc
830 |
831 | ###########################################################################
832 | # LOW LEVEL RULES
833 |
834 | $(REAL_RESULT): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS) $(RESULTDEPS)
835 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) \
836 | $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \
837 | $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \
838 | $(REAL_IMPL)
839 |
840 | nolink: $(REAL_IMPL_INTF) $(OBJ_LINK)
841 |
842 | ifdef WIN32
843 | $(REAL_RESULT).dll: $(REAL_IMPL_INTF) $(OBJ_LINK)
844 | $(CAMLIDLDLL) $(CAMLIDLDLLFLAGS) $(OBJ_LINK) $(CLIBS) \
845 | -o $@ $(REAL_IMPL)
846 | endif
847 |
848 | %$(TOPSUFFIX): $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS)
849 | $(REAL_OCAMLFIND) $(OCAMLMKTOP) \
850 | $(OCAML_FIND_PACKAGES) $(OCAML_FIND_LINKPKG) \
851 | $(ALL_LDFLAGS) $(OBJS_LIBS) -o $@$(EXE) \
852 | $(REAL_IMPL)
853 |
854 | .SUFFIXES: .mli .ml .cmi .cmo .cmx .cma .cmxa .$(EXT_OBJ) \
855 | .mly .di .d .$(EXT_LIB) .idl %.oxridl .c .$(EXT_CXX) .h .so \
856 | .rep .zog .glade
857 |
858 | ifndef STATIC
859 | ifdef MINGW
860 | $(DLLSONAME): $(OBJ_LINK)
861 | $(CC) $(CFLAGS) $(CFLAGS_WIN32) $(OBJ_LINK) -shared -o $@ \
862 | -Wl,--whole-archive $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/lib%.a))) \
863 | $(OCAMLLIBPATH)/ocamlrun.a \
864 | -Wl,--export-all-symbols \
865 | -Wl,--no-whole-archive
866 | else
867 | ifdef MSVC
868 | $(DLLSONAME): $(OBJ_LINK)
869 | link /NOLOGO /DLL /OUT:$@ $(OBJ_LINK) \
870 | $(wildcard $(foreach dir,$(LIBDIRS),$(CLIBS:%=$(dir)/%.lib))) \
871 | $(OCAMLLIBPATH)/ocamlrun.lib
872 |
873 | else
874 | $(DLLSONAME): $(OBJ_LINK)
875 | $(OCAMLMKLIB) $(INCFLAGS) $(CLIBFLAGS) \
876 | -o $(CLIB_BASE) $(OBJ_LINK) $(CLIBS:%=-l%) \
877 | $(OCAMLMKLIB_FLAGS)
878 | endif
879 | endif
880 | endif
881 |
882 | ifndef LIB_PACK_NAME
883 | $(RESULT).cma: $(REAL_IMPL_INTF) $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS)
884 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \
885 | $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(REAL_IMPL)
886 |
887 | $(RESULT).cmxa $(RESULT).$(EXT_LIB): $(REAL_IMPL_INTF) $(EXTRADEPS) $(RESULTDEPS)
888 | $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \
889 | $(OCAMLNLDFLAGS) -o $@ $(REAL_IMPL)
890 | else
891 | ifdef BYTE_OCAML
892 | $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo: $(REAL_IMPL_INTF)
893 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmo $(REAL_IMPL)
894 | else
895 | $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx: $(REAL_IMPL_INTF)
896 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack -o $(LIB_PACK_NAME).cmx $(REAL_IMPL)
897 | endif
898 |
899 | $(RESULT).cma: $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmo $(MAKEDLL) $(EXTRADEPS) $(RESULTDEPS)
900 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -a $(ALL_LDFLAGS) \
901 | $(OBJS_LIBS) -o $@ $(OCAMLBLDFLAGS) $(LIB_PACK_NAME).cmo
902 |
903 | $(RESULT).cmxa $(RESULT).$(EXT_LIB): $(LIB_PACK_NAME).cmi $(LIB_PACK_NAME).cmx $(EXTRADEPS) $(RESULTDEPS)
904 | $(REAL_OCAMLFIND) $(OCAMLOPT) -a $(ALL_LDFLAGS) $(OBJS_LIBS) \
905 | $(OCAMLNLDFLAGS) -o $@ $(LIB_PACK_NAME).cmx
906 | endif
907 |
908 | $(RES_CLIB): $(OBJ_LINK)
909 | ifndef MSVC
910 | ifneq ($(strip $(OBJ_LINK)),)
911 | $(AR) rcs $@ $(OBJ_LINK)
912 | endif
913 | else
914 | ifneq ($(strip $(OBJ_LINK)),)
915 | lib -nologo -debugtype:cv -out:$(RES_CLIB) $(OBJ_LINK)
916 | endif
917 | endif
918 |
919 | .mli.cmi: $(EXTRADEPS)
920 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
921 | if [ -z "$$pp" ]; then \
922 | echo $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
923 | -c $(THREAD_FLAG) $(ANNOT_FLAG) \
924 | $(OCAMLFLAGS) $(INCFLAGS) $<; \
925 | $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
926 | -c $(THREAD_FLAG) $(ANNOT_FLAG) \
927 | $(OCAMLFLAGS) $(INCFLAGS) $<; \
928 | else \
929 | echo $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
930 | -c -pp \"$$pp $(PPFLAGS)\" $(THREAD_FLAG) $(ANNOT_FLAG) \
931 | $(OCAMLFLAGS) $(INCFLAGS) $<; \
932 | $(REAL_OCAMLFIND) $(INTF_OCAMLC) $(OCAML_FIND_PACKAGES) \
933 | -c -pp "$$pp $(PPFLAGS)" $(THREAD_FLAG) $(ANNOT_FLAG) \
934 | $(OCAMLFLAGS) $(INCFLAGS) $<; \
935 | fi
936 |
937 | .ml.cmi .ml.$(EXT_OBJ) .ml.cmx .ml.cmo: $(EXTRADEPS)
938 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
939 | if [ -z "$$pp" ]; then \
940 | echo $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
941 | -c $(ALL_OCAMLCFLAGS) $<; \
942 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
943 | -c $(ALL_OCAMLCFLAGS) $<; \
944 | else \
945 | echo $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
946 | -c -pp \"$$pp $(PPFLAGS)\" $(ALL_OCAMLCFLAGS) $<; \
947 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) $(OCAML_FIND_PACKAGES) \
948 | -c -pp "$$pp $(PPFLAGS)" $(ALL_OCAMLCFLAGS) $<; \
949 | fi
950 |
951 | ifdef PACK_LIB
952 | $(REAL_RESULT).cmo $(REAL_RESULT).cmx $(REAL_RESULT).o: $(REAL_IMPL_INTF) $(OBJ_LINK) $(EXTRADEPS)
953 | $(REAL_OCAMLFIND) $(REAL_OCAMLC) -pack $(ALL_LDFLAGS) \
954 | $(OBJS_LIBS) -o $@ $(REAL_IMPL)
955 | endif
956 |
957 | .PRECIOUS: %.ml
958 | %.ml: %.mll
959 | $(OCAMLLEX) $<
960 |
961 | .PRECIOUS: %.ml %.mli
962 | %.ml %.mli: %.mly
963 | $(OCAMLYACC) $(YFLAGS) $<
964 |
965 | .PRECIOUS: %.ml
966 | %.ml: %.rep
967 | $(CAMELEON_REPORT) $(CAMELEON_REPORT_FLAGS) -gen $<
968 |
969 | .PRECIOUS: %.ml
970 | %.ml: %.zog
971 | $(CAMELEON_ZOGGY) $(CAMELEON_ZOGGY_FLAGS) -impl $< > $@
972 |
973 | .PRECIOUS: %.ml
974 | %.ml: %.glade
975 | $(OCAML_GLADECC) $(OCAML_GLADECC_FLAGS) $< > $@
976 |
977 | .PRECIOUS: %.ml %.mli
978 | %.ml %.mli: %.oxridl
979 | $(OXRIDL) $<
980 |
981 | .PRECIOUS: %.ml %.mli %_stubs.c %.h
982 | %.ml %.mli %_stubs.c %.h: %.idl
983 | $(CAMLIDL) $(MAYBE_IDL_HEADER) $(IDLFLAGS) \
984 | $(CAMLIDLFLAGS) $<
985 | $(QUIET)if [ $(NOIDLHEADER) ]; then touch $*.h; fi
986 |
987 | .c.$(EXT_OBJ):
988 | $(OCAMLC) -c -cc "$(CC)" -ccopt "$(CFLAGS) \
989 | $(CFLAGS_WIN32) $(CINCFLAGS) $(CFLAG_O)$@ " $<
990 |
991 | .$(EXT_CXX).$(EXT_OBJ):
992 | $(CXX) -c $(CXXFLAGS) $(CINCFLAGS) -I'$(OCAMLLIBPATH)' \
993 | $< $(CFLAG_O)$@
994 |
995 | $(MLDEPDIR)/%.d: %.ml
996 | $(QUIET)echo making $@ from $<
997 | $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi
998 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
999 | if [ -z "$$pp" ]; then \
1000 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \
1001 | $(DINCFLAGS) $< > $@; \
1002 | else \
1003 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(OCAML_DEP_PACKAGES) \
1004 | -pp "$$pp $(PPFLAGS)" $(DINCFLAGS) $< > $@; \
1005 | fi
1006 |
1007 | $(BCDIDIR)/%.di $(NCDIDIR)/%.di: %.mli
1008 | $(QUIET)echo making $@ from $<
1009 | $(QUIET)if [ ! -d $(@D) ]; then mkdir -p $(@D); fi
1010 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
1011 | if [ -z "$$pp" ]; then \
1012 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) $(DINCFLAGS) $< > $@; \
1013 | else \
1014 | $(REAL_OCAMLFIND) $(OCAMLDEP) $(DEPFLAGS) \
1015 | -pp "$$pp $(PPFLAGS)" $(DINCFLAGS) $< > $@; \
1016 | fi
1017 |
1018 | doc/$(RESULT)/html: $(DOC_FILES)
1019 | rm -rf $@
1020 | mkdir -p $@
1021 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
1022 | if [ -z "$$pp" ]; then \
1023 | echo $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES); \
1024 | $(OCAMLDOC) -html -d $@ $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES); \
1025 | else \
1026 | echo $(OCAMLDOC) -pp \"$$pp $(PPFLAGS)\" -html -d $@ $(OCAMLDOCFLAGS) \
1027 | $(INCFLAGS) $(DOC_FILES); \
1028 | $(OCAMLDOC) -pp "$$pp $(PPFLAGS)" -html -d $@ $(OCAMLDOCFLAGS) \
1029 | $(INCFLAGS) $(DOC_FILES); \
1030 | fi
1031 |
1032 | doc/$(RESULT)/latex: $(DOC_FILES)
1033 | rm -rf $@
1034 | mkdir -p $@
1035 | $(QUIET)pp=`sed -n -e '/^#/d' -e 's/(\*pp \([^*]*\) \*)/\1/p;q' $<`; \
1036 | if [ -z "$$pp" ]; then \
1037 | echo $(OCAMLDOC) -latex $(OCAMLDOCFLAGS) $(INCFLAGS) \
1038 | $(DOC_FILES) -o $@/doc.tex; \
1039 | $(OCAMLDOC) -latex $(OCAMLDOCFLAGS) $(INCFLAGS) $(DOC_FILES) \
1040 | -o $@/doc.tex; \
1041 | else \
1042 | echo $(OCAMLDOC) -pp \"$$pp $(PPFLAGS)\" -latex $(OCAMLDOCFLAGS) \
1043 | $(INCFLAGS) $(DOC_FILES) -o $@/doc.tex; \
1044 | $(OCAMLDOC) -pp "$$pp $(PPFLAGS)" -latex $(OCAMLDOCFLAGS) \
1045 | $(INCFLAGS) $(DOC_FILES) -o $@/doc.tex; \
1046 | fi
1047 |
1048 | doc/$(RESULT)/latex/doc.ps: doc/$(RESULT)/latex
1049 | cd doc/$(RESULT)/latex && \
1050 | $(LATEX) doc.tex && \
1051 | $(LATEX) doc.tex && \
1052 | $(DVIPS) $(DVIPSFLAGS) doc.dvi -o $(@F)
1053 |
1054 | doc/$(RESULT)/latex/doc.pdf: doc/$(RESULT)/latex/doc.ps
1055 | cd doc/$(RESULT)/latex && $(PS2PDF) $(