├── .gitignore
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── benchmarks
    ├── bench.sh
    ├── calc
    │   ├── calc.ml
    │   ├── dune
    │   ├── ext.ml
    │   ├── ext2.ml
    │   ├── lexer.mll
    │   ├── parser.mly
    │   ├── prio.ml
    │   └── simple.ml
    ├── dune
    ├── lexer.mll
    ├── parser.mly
    ├── seq.ml
    └── sexp
    │   ├── ast.ml
    │   ├── dune
    │   ├── err.ml
    │   ├── lexer.mll
    │   ├── lr.ml
    │   ├── parser.mly
    │   ├── rr.ml
    │   └── sexp.ml
├── dune
├── dune-project
├── examples
    ├── README.md
    ├── calc.ml
    ├── calc_ext.ml
    ├── calc_ext2.ml
    ├── calc_prio.ml
    ├── catalan.ml
    ├── dune
    ├── paragraphs.ml
    ├── sexp.ml
    ├── test.txt
    └── test_ext2.txt
├── lib
    ├── assoc.ml
    ├── assoc.mli
    ├── bench.ml
    ├── blank.ml
    ├── blank.mli
    ├── charset.ml
    ├── charset.mli
    ├── comb.ml
    ├── comb.mli
    ├── container.ml
    ├── container.mli
    ├── dune
    ├── grammar.ml
    ├── grammar.mli
    ├── hashtbl_eq.ml
    ├── hashtbl_eq.mli
    ├── heap.ml
    ├── heap.mli
    ├── index.mld
    ├── input.ml
    ├── input.mli
    ├── interpolate.ml
    ├── interpolate.mli
    ├── keywords.ml
    ├── keywords.mli
    ├── lex.ml
    ├── lex.mli
    ├── pos.ml
    ├── pos.mli
    ├── regexp.ml
    ├── regexp.mli
    ├── unionFind.ml
    ├── unionFind.mli
    ├── utf8.ml
    ├── utf8.mli
    ├── word_list.ml
    └── word_list.mli
├── pacomb.opam
├── ppx
    ├── dune
    └── ppx_pacomb.ml
├── tests
    ├── Break.ml
    ├── GraphemeBreakTest.txt
    ├── big_expr.ml
    ├── calc_factor.ml
    ├── calc_utf8.ml
    ├── dseq_test.ml
    ├── dune
    ├── hard.ml
    ├── ppx_test.ml
    ├── scan.ml
    └── test.ml
└── tools
    └── sanity_check.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.install
3 | _build
4 | .merlin
5 | attic
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | install: test -e .travis-opam.sh || wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-opam.sh
 3 | script: bash -ex .travis-opam.sh
 4 | env:
 5 |   - OCAML_VERSION=4.02
 6 |   - OCAML_VERSION=4.03
 7 |   - OCAML_VERSION=4.04
 8 |   - OCAML_VERSION=4.05
 9 |   - OCAML_VERSION=4.06
10 |   - OCAML_VERSION=4.07
11 |   - OCAML_VERSION=4.08
12 |   - OCAML_VERSION=4.09
13 |   - OCAML_VERSION=4.10
14 |   - OCAML_VERSION=4.11
15 |   - OCAML_VERSION=4.12
16 |   - OCAML_VERSION=4.13
17 | os:
18 |   - freebsd
19 |   - linux
20 |   - osx
21 | arch:
22 |   - amd64
23 |   - arm64
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Christophe Raffalli & Rodolphe Lepigre
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | .PHONY: all
 4 | all:
 5 | 	dune build
 6 | 
 7 | .PHONY: check
 8 | check:
 9 | 	tools/sanity_check.sh
10 | 
11 | .PHONY: tests
12 | tests: all check
13 | 	dune runtest
14 | 
15 | .PHONY: bench
16 | bench: all check
17 | 	./benchmarks/bench.sh
18 | 
19 | .PHONY: clean
20 | clean:
21 | 	dune clean
22 | 	rm -rf doc
23 | 	find -name \*.csv -exec rm {} \;
24 | 
25 | .PHONY: install
26 | install: all
27 | 	dune install
28 | 
29 | .PHONY: doc
30 | doc:
31 | 	dune build @doc
32 | 
33 | .PHONY: install_doc
34 | install_doc: doc
35 | 	rsync -r --delete _build/default/_doc/_html/   ${HOME}/WWW2/Raffalli/pacomb/
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | PaComb: an efficient parsing library for OCaml
 2 | ==============================================
 3 | 
 4 | PaComb implements a representation of grammars with semantic actions (values
 5 | returned as a result of parsing). Parsing is performed by compiling grammars
 6 | defined with the `Grammar` module (or indirectly though a PPX extension) to
 7 | the combinators of the `Combinator` module. The library offers _scanner less_
 8 | parsing, but the `Lex` module provide a notion of _terminals_ and _blanks_
 9 | that give a simple way to write grammars in two phases, as usual.
10 | 
11 | The main advantage of PaComb and similar solutions, contrary to ocamlyacc, is
12 | that grammars (compiled or not) are __first class values__. This allows using
13 | the full power of OCaml for manipulating grammars. For example, this is very
14 | useful when working with syntax extension mechanisms.
15 | 
16 | Importantly, the __performances__ of PaComb are very good: it is only two to
17 | five times slower than grammars generated by ocamlyacc, which is a compiler.
18 | 
19 | Defining languages using the `Grammar` module directly is cumbersome. For that
20 | reason, PaComb provides a BNF-like PPX syntax extension (enabled using the
21 | `-ppx pacomb.ppx` compilation flag).
22 | 
23 | A complete documentation is available via ocamldoc (make doc)
24 | 
25 | Pacomb also support: self extensible grammars, ambiguous grammars (with merge),
26 | late rejection of rule via raising exception from action code, priority and others.
27 | 
28 | A complete [documentation is available](https://raffalli.eu/pacomb/pacomb)
29 | 
30 | As teaser, the usual calculator example:
31 | 
32 | ```
33 | (* The three levels of priorities *)
34 | type p = Atom | Prod | Sum
35 | 
36 | let%parser rec
37 |      (* This includes each priority level in the next one *)
38 |      expr p = Atom < Prod < Sum
39 |             (* all other rule are selected by their priority level *)
40 |             ; (p=Atom) (x::FLOAT)                        => x
41 |             ; (p=Atom) '(' (e::expr Sum) ')'             => e
42 |             ; (p=Prod) (x::expr Prod) '*' (y::expr Atom) => x*.y
43 |             ; (p=Prod) (x::expr Prod) '/' (y::expr Atom) => x/.y
44 |             ; (p=Sum ) (x::expr Sum ) '+' (y::expr Prod) => x+.y
45 |             ; (p=Sum ) (x::expr Sum ) '-' (y::expr Prod) => x-.y
46 | ̀̀̀
47 | 


--------------------------------------------------------------------------------
/benchmarks/bench.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | make
 4 | 
 5 | opts=--no-print-directory
 6 | cd benchmarks
 7 | 
 8 | echo "testing seq"
 9 | dune exec $opts -- ./seq.exe
10 | 
11 | echo "testing sexp"
12 | dune exec $opts -- ./sexp/sexp.exe
13 | 
14 | echo "testing calc"
15 | dune exec $opts -- ./calc/calc.exe
16 | 
17 | cd ../examples
18 | 
19 | echo "testing catalan"
20 | dune exec $opts -- ./catalan.exe 80 2
21 | dune exec $opts -- ./catalan.exe 60 3
22 | 


--------------------------------------------------------------------------------
/benchmarks/calc/calc.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | let test = Array.length Sys.argv > 1 && Sys.argv.(1) = "--test"
 4 | 
 5 | let rec gen_expr n p s ch =
 6 |   let print_string s =
 7 |     output_string ch s; String.length s
 8 |   in
 9 |   let rec seq op n atom =
10 |     if n <= 0 then atom ()
11 |     else (atom () +  print_string op + seq op (n-1) atom)
12 |   in
13 |   let paren atom =
14 |     print_string ")" + atom () + print_string "("
15 |   in
16 |   let int () =
17 |     print_string (string_of_int (Random.int 0x20000000))
18 |   in
19 |   if n <= 0 then int ()
20 |   else seq (if Random.bool () then " + " else " - ")
21 |          s (fun () -> seq
22 |                         (if Random.bool () then "*" else "/")
23 |                         p (fun () ->
24 |                           if n = 1 then int () else
25 |                             paren (fun () -> gen_expr (n - 1) p s ch)))
26 | 
27 | (* blanks *)
28 | let blank = Blank.from_charset (Charset.singleton ' ')
29 | 
30 | let _ =
31 |   let bench_simple = Bench.create () in
32 |   let bench_prio   = Bench.create () in
33 |   let bench_ext    = Bench.create () in
34 |   let bench_ext2   = Bench.create () in
35 |   let bench_yacc   = Bench.create () in
36 |   for n = (if test then 1 else 3) to (if test then 2 else 5) do
37 |     for p = 2 to 4 do
38 |       for s = 2 to (if n = 6 && p = 4 then 3 else 4) do
39 |         let producer ch = let r = gen_expr n p s ch in Printf.fprintf ch "\n%!"; r in
40 |         let producer2 ch =
41 |           Printf.fprintf ch "rule 1.0 : Exp 1.0 Str \"*\" Exp 0.9 => Op2 \"*\" \n\
42 |                              rule 1.0 : Exp 1.0 Str \"/\" Exp 0.9 => Op2 \"/\" \n\
43 |                              rule 2.0 : Exp 2.0 Str \"+\" Exp 1.9 => Op2 \"+\" \n\
44 |                              rule 2.0 : Exp 2.0 Str \"-\" Exp 1.9 => Op2 \"-\" \n\
45 |                              ";
46 |           producer ch
47 |         in
48 |         let size = Bench.size producer in
49 |         let ((),ts,w) = Bench.parse_pipe bench_simple Simple.top blank size producer in
50 |         Printf.printf "simple %d %d %d %.2f Mb in %.2fms %.2f Mb \n%!" n p s
51 |           (float size /. 1024. /. 1024.)
52 |           (1000. *. ts) (float w /. 1024. /. 1024. *. float Sys.word_size);
53 |         let (_,tp,w) = Bench.parse_pipe bench_prio Prio.top blank size producer in
54 |         Printf.printf "prio   %d %d %d %.2f Mb in %.2fms %.2f Mb \n%!" n p s
55 |           (float size /. 1024. /. 1024.)
56 |           (1000. *. tp) (float w /. 1024. /. 1024. *. float Sys.word_size);
57 |         let (_,te,w) = Bench.parse_pipe bench_ext Ext.top blank size producer in
58 |         Printf.printf "ext    %d %d %d %.2f Mb in %.2fms %.2f Mb \n%!" n p s
59 |           (float size /. 1024. /. 1024.)
60 |           (1000. *. te) (float w /. 1024. /. 1024. *. float Sys.word_size);
61 |         let ((),t2,w) = Bench.parse_pipe bench_ext2 Ext2.top blank size producer2 in
62 |         Printf.printf "ext2   %d %d %d %.2f Mb in %.2fms %.2f Mb \n%!" n p s
63 |           (float size /. 1024. /. 1024.)
64 |           (1000. *. t2) (float w /. 1024. /. 1024. *. float Sys.word_size);
65 |         let (_,ty,w) = Bench.yacc_pipe bench_yacc Parser.main Lexer.token size producer in
66 |         Printf.printf "yacc   %d %d %d %.2f Mb in %.2fms %.2f Mb \n%!" n p s
67 |           (float size /. 1024. /. 1024.)
68 |           (1000. *. ty) (float w /. 1024. /. 1024. *. float Sys.word_size);
69 |         Printf.printf "simple/yacc  : %f " (ts /. ty);
70 |         Printf.printf "prio/yacc  : %f "   (tp /. ty);
71 |         Printf.printf "ext/yacc  : %f "    (te /. ty);
72 |         Printf.printf "ext2/yacc  : %f "   (t2 /. ty);
73 |         Printf.printf "prio/simple: %f "   (tp /. ts);
74 |         Printf.printf "ext/simple: %f "    (te /. ts);
75 |         Printf.printf "ext2/simple: %f\n%!" (t2 /. ts);
76 |       done
77 |     done
78 |   done;
79 |   Bench.stats "simple" bench_simple;
80 |   Bench.stats "prio  " bench_prio;
81 |   Bench.stats "ext   " bench_ext;
82 |   Bench.stats "ext2  " bench_ext2;
83 |   Bench.stats "yacc  " bench_yacc;
84 |   if not test then
85 |     begin
86 |       Bench.csv bench_simple "simple.csv";
87 |       Bench.csv bench_prio   "prio.csv";
88 |       Bench.csv bench_ext    "ext.csv";
89 |       Bench.csv bench_ext2   "ext2.csv";
90 |       Bench.csv bench_yacc   "yacc.csv"
91 |     end
92 | 


--------------------------------------------------------------------------------
/benchmarks/calc/dune:
--------------------------------------------------------------------------------
 1 | (test
 2 |   (name calc)
 3 |   (modules :standard)
 4 |   (preprocess (pps pacomb.ppx))
 5 |   (libraries unix pacomb)
 6 |   (action (run ./calc.exe --test)))
 7 | 
 8 | (rule
 9 |  (targets lexer.ml)
10 |  (deps lexer.mll)
11 |  (action (run ocamllex lexer.mll)))
12 | 
13 | (rule
14 |  (targets parser.ml parser.mli)
15 |  (deps parser.mly)
16 |  (action (run ocamlyacc parser.mly)))
17 | 


--------------------------------------------------------------------------------
/benchmarks/calc/ext.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | let eps = 1e-10
 4 | 
 5 | type assoc = RightAssoc | LeftAssoc | NonAssoc
 6 | 
 7 | let cs = Charset.(complement (from_string "0-9()"))
 8 | 
 9 | let bins = Word_list.create ~cs ()
10 | 
11 | let _ =
12 |   Word_list.add_ascii bins "*" (( *. ), 4.0, LeftAssoc);
13 |   Word_list.add_ascii bins "/" (( /. ),4.0, LeftAssoc);
14 |   Word_list.add_ascii bins "+" (( +. ),6.0, LeftAssoc);
15 |   Word_list.add_ascii bins "-" (( -. ),6.0, LeftAssoc)
16 | 
17 | let%parser op pmin pmax =
18 |   ((f,p,a)::Word_list.word bins) => (
19 |       let good = match a with
20 |         | NonAssoc -> pmin < p && p < pmax
21 |         | LeftAssoc -> pmin <= p && p < pmax
22 |         | RightAssoc -> pmin < p && p <= pmax
23 |       in
24 |       if not good then Lex.give_up ();
25 |       let p = match a with
26 |         | RightAssoc -> p
27 |         | _          -> p -. 1e-10
28 |       in
29 |       (p,f))
30 | 
31 | let%parser rec
32 |  expr pmax = ((pe,e1)>:expr pmax) ((pop,b)>:op pe pmax) ((__,e2)::expr pop)
33 |                                                   => (pop, b e1 e2)
34 |             ; (x::FLOAT)                          => (0.0,x)
35 |             ; '(' (e::expr_top) ')'               => (0.0,e)
36 | 
37 | and expr_top = ((__,e)::expr 1000.0) => e
38 | 
39 | let%parser rec top = () => () ; top (__::expr_top) '\n' => ()
40 | 


--------------------------------------------------------------------------------
/benchmarks/calc/ext2.ml:
--------------------------------------------------------------------------------
  1 | (* This code is to benchmark extensible grammar ...
  2 |    it is not the best code to do a calculator with an extensible grammar *)
  3 | 
  4 | open Pacomb
  5 | 
  6 | let get_op2 = function
  7 |   | "*" -> ( *. )
  8 |   | "+" -> ( +. )
  9 |   | "/" -> ( /. )
 10 |   | "-" -> ( -. )
 11 |   | "^" -> ( ** )
 12 |   |  _  -> failwith "invalid binary op"
 13 | 
 14 | let get_op1 = function
 15 |   | "cos" -> cos
 16 |   | "sin" -> sin
 17 |   | "tan" -> tan
 18 |   | "ln"  -> log
 19 |   | "exp" -> exp
 20 |   |  _  -> failwith "invalid unary op"
 21 | 
 22 | (** Bigger float = lower priority, 0.0 is for atomic expresion,
 23 |    so all priorities must be positive *)
 24 | type prio = float
 25 | 
 26 | (** list of all priorities, in decreasing order (lowest priority first). *)
 27 | type prios = prio list
 28 | 
 29 | (** a parsing rule: a grammar from an environment *)
 30 | type 'a rule = env -> 'a Grammar.t
 31 | 
 32 | and rules = (float * float rule list) list
 33 | 
 34 | (** parsing environment: all rules and all prios sorted in
 35 |     decreasing order (lowest priority first). *)
 36 | and env = { rules : rules; prios : prios }
 37 | 
 38 | let empty_env = { rules = []; prios = [] }
 39 | 
 40 | let add_prio p env =
 41 |   { env with prios = List.rev (List.sort_uniq compare (p::env.prios)) }
 42 | 
 43 | (** get the next priority *)
 44 | let next_prio p env =
 45 |   let rec fn = function
 46 |     | x::(y::_) when x = p -> y
 47 |     | x::l                 -> assert (x >= p); fn l
 48 |     | _ -> 0.0
 49 |   in
 50 |   fn env.prios
 51 | 
 52 | (** get the priority nearest to p . For associativity,
 53 |     we will use [p - epsilon] to get the priority below p,
 54 |     and get_prio ill fetch the priority below p *)
 55 | let get_prio p env =
 56 |   let rec fn = function
 57 |     | x::_      when x <= p -> x
 58 |     | _::l                  -> fn l
 59 |     | _                     -> 0.0
 60 |   in
 61 |   fn env.prios
 62 | 
 63 | (** the maximum priority *)
 64 | let max_prio env = match env.prios with
 65 |   | x::_ -> x | [] -> 0.0
 66 | 
 67 | (** add a rule with the given priority *)
 68 | let add_rule prio r env =
 69 |   let old = try List.assoc prio env.rules with Not_found ->  [] in
 70 |   let rules = (List.filter (fun (p,_) -> prio <> p) env.rules) in
 71 |   { env with rules = (prio, r::old) :: rules }
 72 | 
 73 | (** get all the rule of a given priority *)
 74 | let get_rule : prio -> env -> float Grammar.t = fun p env ->
 75 |   let rules = List.assoc p env.rules in
 76 |   let rules = List.map (fun r -> r env) rules in
 77 |   Grammar.alt rules
 78 | 
 79 | let pr (_, p) = Printf.sprintf "%g" p
 80 | 
 81 | (** the parsing for expression *)
 82 | let%parser [@print_param pr] rec expr env (prio:prio) =
 83 |    (* constant *)
 84 |    (prio = 0.) (x::FLOAT)                           => x
 85 |    (* parenthesis, using max_prio *)
 86 |  ; (prio = 0.) '(' (x::expr env (max_prio env)) ')' => x
 87 |    (* incluse next priority level *)
 88 |  ; (prio > 0.) (x::expr env (next_prio prio env))   => x
 89 |    (* get all the rule for the level *)
 90 |  ; (prio > 0.) (x::get_rule prio env)               => x
 91 | 
 92 | (** a type of type *)
 93 | type _ ty =
 94 |   Flt : float ty
 95 | | Arr : 'a ty * 'b ty -> ('a -> 'b) ty
 96 | 
 97 | (** the magic parsing : parse a BNF rule and return the parser
 98 |     for that BNF, parametrized by the current environment *)
 99 | (** Remark: we need fake, dependant sequence (<:) because other
100 |     with the construction of the grammar loops producing bigger
101 |     and bigger types. Dependant sequence builds the grammar lazily *)
102 | let%parser rec rule : type a. a ty -> (env -> a Grammar.t) Grammar.t
103 |   = fun t ->
104 |     "Exp" (prio<:FLOAT) (r::rule (Arr(Flt,t))) =>
105 |       (fun env -> (x::expr env (get_prio prio env)) (f::r env) => f x)
106 |   ; "Str" (s<:STRING_LIT) (r::rule t) =>
107 |       (fun env -> (STR s) (x::r env) => x)
108 |   ; "=>" (a::action t) => (fun _ -> () => a)
109 | 
110 | (** action, syntaxe style calculette HP *)
111 | and action : type a. a ty -> a Grammar.t
112 |   = fun t ->
113 |     "Cst" (x<:FLOAT) (f::action (Arr(Flt,t))) => f x
114 |   ; (t =| Arr(Flt,t1)) "Op1" (s<:STRING_LIT) (f::action (Arr(Flt,t1))) =>
115 |       (let g = get_op1 s in (fun x -> f (g x) : a))
116 |   ; (t =| Arr(Flt,Arr(Flt,t1))) "Op2" (s<:STRING_LIT) (f::action (Arr(Flt,t1))) =>
117 |       (let g = get_op2 s in (fun x y -> f (g y x) : a))
118 |   ; (t =| Arr(Flt,Flt)) () => (fun x -> x)
119 | 
120 | (** The command parsing a new rule *)
121 | let%parser new_rule env =
122 |   "rule" (p::FLOAT) ":" (r::rule Flt) '\n' =>
123 |        let env = add_rule p r env in
124 |        let env = add_prio p env in
125 |        (env, ())
126 | 
127 | let%parser top_expr env =
128 |   (x::expr env (max_prio env)) '\n' => x
129 | 
130 | (** main parsing, right recursion with no action is ok now *)
131 | let%parser rec cmds env =
132 |     ()                                     => ()
133 |   ; (top_expr env) (cmds env)              => ()
134 |   ; ((env,()) >: new_rule env) (cmds env)  => ()
135 | 
136 | let top = cmds empty_env
137 | 


--------------------------------------------------------------------------------
/benchmarks/calc/lexer.mll:
--------------------------------------------------------------------------------
 1 | {
 2 |   open Parser        (* The type token is defined in parser.mli *)
 3 |   exception Eof
 4 | }
 5 | rule token = parse
 6 |     [' ''\t''\r']     { token lexbuf }     (* skip blanks *)
 7 |   | '\n'            { EOL }
 8 |   | eof          { EOF }
 9 |   | ['0'-'9']+(['.']['0'-'9']+)?(['e''E']['-']?['0'-'9']+)? as lxm
10 |                    { FLOAT(float_of_string lxm) }
11 |   | '+'            { PLUS }
12 |   | '-'            { MINUS }
13 |   | '*'            { TIMES }
14 |   | '*' '*'        { POW }
15 |   | '/'            { DIV }
16 |   | '('            { LPAREN }
17 |   | ')'            { RPAREN }
18 | 


--------------------------------------------------------------------------------
/benchmarks/calc/parser.mly:
--------------------------------------------------------------------------------
 1 | /* File parser.mly */
 2 | %token <float> FLOAT
 3 | %token PLUS MINUS TIMES DIV POW
 4 | %token LPAREN RPAREN
 5 | %token EOL EOF
 6 | %left PLUS MINUS        /* lowest precedence */
 7 | %left TIMES DIV         /* medium precedence */
 8 | %right POW
 9 | %nonassoc UMINUS        /* highest precedence */
10 | %start main             /* the entry point */
11 | %type <unit> main
12 | %%
13 | main:
14 |                           { () }
15 | | main top                { () }
16 | ;
17 | top: expr EOL             { $1 }
18 | ;
19 | expr:
20 |   FLOAT                   { $1 }
21 | | LPAREN expr RPAREN      { $2 }
22 | | expr PLUS expr          { $1 +. $3 }
23 | | expr MINUS expr         { $1 -. $3 }
24 | | expr TIMES expr         { $1 *. $3 }
25 | | expr DIV expr           { $1 /. $3 }
26 | | expr POW expr           { $1 ** $3 }
27 | | MINUS expr %prec UMINUS { -. $2 }
28 | | PLUS expr %prec UMINUS  { $2 }
29 | ;
30 | 


--------------------------------------------------------------------------------
/benchmarks/calc/prio.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | (* classical calculator example, with parameters priority level *)
 3 | (* The three levels of priorities *)
 4 | type p = Atom | Prod | Sum
 5 | 
 6 | (* for printing, we provide a function to convert priorities to string *)
 7 | let%parser rec
 8 |      (* This includes each priority level in the next one *)
 9 |      expr p = Atom < Prod < Sum
10 |             (* all other rule are selected by their priority level *)
11 |             ; (p=Atom) (x::FLOAT)                        => x
12 |             ; (p=Atom) '(' (e::expr Sum) ')'             => e
13 |             ; (p=Prod) (x::expr Prod) '*' (y::expr Atom) => x*.y
14 |             ; (p=Prod) (x::expr Prod) '/' (y::expr Atom) => x/.y
15 |             ; (p=Sum ) (x::expr Sum ) '+' (y::expr Prod) => x+.y
16 |             ; (p=Sum ) (x::expr Sum ) '-' (y::expr Prod) => x-.y
17 | 
18 | let%parser rec top = () => () ; top (__::expr Sum) '\n' => ()
19 | 


--------------------------------------------------------------------------------
/benchmarks/calc/simple.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | (* classical calculator example, with a non terminal for each priority level *)
 3 | let%parser rec
 4 |         atom = (x::FLOAT)        => x                             (* constant *)
 5 |              ; '(' (e::expr) ')' => e                 (* rule for parenthesis *)
 6 | 
 7 | (* Here is the grammar for products *)
 8 | and prod = (a::atom)               => a
 9 |           ; (x::prod) '*' (y::atom) => x*.y
10 |           ; (x::prod) '/' (y::atom) => x/.y
11 | 
12 | (* and finally all remaining expressions *)
13 | and expr = (a::prod)               => a
14 |          ; (x::expr) '+' (y::prod) => x+.y
15 |          ; (x::expr) '-' (y::prod) => x-.y
16 | 
17 | let%parser rec top = () => () ; top (__::expr) '\n' => ()
18 | 


--------------------------------------------------------------------------------
/benchmarks/dune:
--------------------------------------------------------------------------------
 1 | (test
 2 |   (name seq)
 3 |   (modules :standard)
 4 |   (preprocess (pps pacomb.ppx))
 5 |   (libraries unix pacomb)
 6 |   (action (run ./seq.exe --test)))
 7 | 
 8 | (rule
 9 |  (targets lexer.ml)
10 |  (deps lexer.mll)
11 |  (action (run ocamllex lexer.mll)))
12 | 
13 | (rule
14 |  (targets parser.ml parser.mli)
15 |  (deps parser.mly)
16 |  (action (run ocamlyacc parser.mly)))
17 | 


--------------------------------------------------------------------------------
/benchmarks/lexer.mll:
--------------------------------------------------------------------------------
1 | {
2 |   open Parser        (* The type token is defined in parser.mli *)
3 |   exception Eof
4 | }
5 | rule token = parse
6 |   | 'a'            { A }
7 |   | eof            { EOF }
8 | 


--------------------------------------------------------------------------------
/benchmarks/parser.mly:
--------------------------------------------------------------------------------
 1 | /* File parser.mly */
 2 | %token A
 3 | %token EOF
 4 | %start lr_top             /* the entry point */
 5 | %start rr_top             /* the entry point */
 6 | %type <unit> lr_top
 7 | %type <unit> rr_top
 8 | %%
 9 | lr:
10 |                       { () }
11 | | lr A                { () }
12 | ;
13 | rr:
14 |                       { () }
15 | | A rr                { () }
16 | ;
17 | lr_top: lr EOF             { $1 }
18 | ;
19 | rr_top: rr EOF             { $1 }
20 | ;
21 | 


--------------------------------------------------------------------------------
/benchmarks/seq.ml:
--------------------------------------------------------------------------------
  1 | open Pacomb
  2 | 
  3 | let test = Array.length Sys.argv > 1 && Sys.argv.(1) = "--test"
  4 | 
  5 | let expr n ch =
  6 |   for _ = 1 to n do
  7 |     output_char ch 'a'
  8 |   done;
  9 |   n
 10 | 
 11 | module Lr = struct
 12 |   let%parser rec lr = ()     => ()
 13 |                     ; lr 'a' => ()
 14 | 
 15 |   let%parser top = lr EOF => ()
 16 | end
 17 | 
 18 | module Ld = struct
 19 |   let%parser rec lr = ()           => ()
 20 |                     ; (__<:lr) 'a' => ()
 21 | 
 22 |   let%parser top = lr EOF => ()
 23 | end
 24 | 
 25 | module Rr = struct
 26 |   let%parser rec rr = ()     => ()
 27 |                     ; 'a' rr => ()
 28 | 
 29 |   let%parser top = (x::rr) EOF => x
 30 | end
 31 | 
 32 | module Rrl = struct
 33 |   let%parser rec rr = ()               => lazy 0
 34 |                     ; 'a' (lazy x::rr) => lazy (x+1)
 35 | 
 36 |   let%parser top = (x::rr) EOF => Lazy.force x
 37 | end
 38 | 
 39 | module Rd = struct
 40 |   let%parser rec rr = ()                 => ()
 41 |                     ; (__<:'a') (__::rr) => ()
 42 | 
 43 |   let%parser top = (x::rr) EOF => x
 44 | end
 45 | 
 46 | module Rdl = struct
 47 |   let%parser rec rr = ()                     => lazy 0
 48 |                     ; (__<:'a') (lazy x::rr) => lazy (x+1)
 49 | 
 50 |   let%parser top = (x::rr) EOF => Lazy.force x
 51 | end
 52 | 
 53 | module Lp = struct
 54 |   let%parser rec lr = ()     => ()
 55 |                     ; lr 'a' => let _ = _pos in ()
 56 | 
 57 |   let%parser top = lr EOF => ()
 58 | end
 59 | 
 60 | module Rp = struct
 61 |   let%parser rec rr = ()                => lazy ()
 62 |                     ; 'a' (lazy __::rr) => lazy (let _ = _pos in ())
 63 | 
 64 |   let%parser top = (x::rr) EOF => Lazy.force x
 65 | end
 66 | 
 67 | 
 68 | let blank = Blank.none
 69 | 
 70 | let _ =
 71 |   let bench_lr = Bench.create () in
 72 |   let bench_rr = Bench.create () in
 73 |   let bench_rrl = Bench.create () in
 74 |   let bench_ld = Bench.create () in
 75 |   let bench_rd = Bench.create () in
 76 |   let bench_rdl = Bench.create () in
 77 |   let bench_lp = Bench.create () in
 78 |   let bench_rp = Bench.create () in
 79 |   let bench_ly = Bench.create () in
 80 |   let bench_ry = Bench.create () in
 81 |   let bench_dum = Bench.create () in
 82 |   let producer0 ch = let r = expr 0 ch in Printf.fprintf ch "%!"; r in
 83 |   for n = 1 to (if test then 2 else 20) do
 84 |     let n = n * 200_000 in
 85 |     let producer ch = let r = expr n ch in Printf.fprintf ch "%!"; r in
 86 |     let size = Bench.size producer in
 87 |     let (_,tl,w) = Bench.parse_pipe bench_lr Lr.top blank size producer in
 88 |     Printf.printf "lr   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
 89 |       (float size /. 1024. /. 1024.)
 90 |       (1000. *. tl) (float w /. 1024. /. 1024. *. float Sys.word_size);
 91 |     let (_,tr,w) = Bench.parse_pipe bench_rr Rr.top blank size producer in
 92 |         Printf.printf "rr   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
 93 |           (float size /. 1024. /. 1024.)
 94 |           (1000. *. tr) (float w /. 1024. /. 1024. *. float Sys.word_size);
 95 |     let (_,trl,w) = Bench.parse_pipe bench_rrl Rrl.top blank size producer in
 96 |         Printf.printf "rrl  %d %.2f Mb in %.2fms %.2f Mb \n%!" n
 97 |           (float size /. 1024. /. 1024.)
 98 |           (1000. *. trl) (float w /. 1024. /. 1024. *. float Sys.word_size);
 99 |     let (_,tld,w) = Bench.parse_pipe bench_ld Ld.top blank size producer in
100 |         Printf.printf "ld   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
101 |           (float size /. 1024. /. 1024.)
102 |           (1000. *. tld) (float w /. 1024. /. 1024. *. float Sys.word_size);
103 |     let (_,trd,w) = Bench.parse_pipe bench_rd Rd.top blank size producer in
104 |         Printf.printf "rd   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
105 |           (float size /. 1024. /. 1024.)
106 |           (1000. *. trd) (float w /. 1024. /. 1024. *. float Sys.word_size);
107 |     let (_,trdl,w) = Bench.parse_pipe bench_rdl Rdl.top blank size producer in
108 |         Printf.printf "rdl  %d %.2f Mb in %.2fms %.2f Mb \n%!" n
109 |           (float size /. 1024. /. 1024.)
110 |           (1000. *. trdl) (float w /. 1024. /. 1024. *. float Sys.word_size);
111 |     let (_,tlp,w) = Bench.parse_pipe bench_lp Lp.top blank size producer in
112 |         Printf.printf "lp   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
113 |           (float size /. 1024. /. 1024.)
114 |           (1000. *. tlp) (float w /. 1024. /. 1024. *. float Sys.word_size);
115 |     let (_,trp,w) = Bench.parse_pipe bench_rp Rp.top blank size producer in
116 |         Printf.printf "rp   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
117 |           (float size /. 1024. /. 1024.)
118 |           (1000. *. trp) (float w /. 1024. /. 1024. *. float Sys.word_size);
119 |     let (_,tly,w) = Bench.yacc_pipe bench_ly Parser.lr_top Lexer.token size producer in
120 |     Printf.printf "ly   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
121 |       (float size /. 1024. /. 1024.)
122 |       (1000. *. tly) (float w /. 1024. /. 1024. *. float Sys.word_size);
123 |     let _ = Bench.yacc_pipe bench_dum Parser.lr_top Lexer.token 0 producer0 in
124 |     let (_,try_,w) = Bench.yacc_pipe bench_ry Parser.rr_top Lexer.token size producer in
125 |     Printf.printf "ry   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
126 |       (float size /. 1024. /. 1024.)
127 |       (1000. *. try_) (float w /. 1024. /. 1024. *. float Sys.word_size);
128 |     let _ = Bench.yacc_pipe bench_dum Parser.rr_top Lexer.token 0 producer0 in
129 |     Printf.printf "lr/ly: %f " (tl  /. tly);
130 |     Printf.printf "ry/ly: %f " (try_  /. tly);
131 |     Printf.printf "rr/ly: %f " (tr  /. tly);
132 |     Printf.printf "ld/ly: %f " (tld /. tly);
133 |     Printf.printf "rd/ly: %f " (trd /. tly);
134 |     Printf.printf "lp/ly: %f " (tlp /. tly);
135 |     Printf.printf "rp/ly: %f\n%!" (trp /. tly);
136 |     Printf.printf "rr/lr: %f " (tr  /. tl);
137 |     Printf.printf "ld/lr: %f " (tld /. tl);
138 |     Printf.printf "rd/lr: %f " (trd /. tl);
139 |     Printf.printf "lp/lr: %f " (tlp /. tl);
140 |     Printf.printf "rp/lr: %f " (trp /. tl);
141 |     Printf.printf "rd/rr: %f " (trd /. tr);
142 |     Printf.printf "rp/rr: %f\n%!" (trp /. tr);
143 |   done;
144 |   Bench.stats "lr   " bench_lr;
145 |   Bench.stats "ld   " bench_ld;
146 |   Bench.stats "rr   " bench_rr;
147 |   Bench.stats "rd   " bench_rd;
148 |   Bench.stats "lp   " bench_lp;
149 |   Bench.stats "rp   " bench_rp;
150 |   if not test then
151 |     begin
152 |       Bench.csv bench_lr   "lr.csv";
153 |       Bench.csv bench_rr   "rr.csv";
154 |       Bench.csv bench_ld   "ld.csv";
155 |       Bench.csv bench_rd   "rd.csv";
156 |       Bench.csv bench_lp   "lp.csv";
157 |       Bench.csv bench_rp   "rp.csv"
158 |     end
159 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/ast.ml:
--------------------------------------------------------------------------------
1 | type 'a sexp = { p: 'a; e : 'a sexp' }
2 | and 'a sexp' =
3 |   | Idt of string
4 |   | Lst of 'a sexp list
5 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/dune:
--------------------------------------------------------------------------------
 1 | (test
 2 |   (name sexp)
 3 |   (modules :standard)
 4 |   (preprocess (pps pacomb.ppx))
 5 |   (libraries unix pacomb)
 6 |   (action (run ./sexp.exe --test)))
 7 | 
 8 | (rule
 9 |  (targets lexer.ml)
10 |  (deps lexer.mll)
11 |  (action (run ocamllex lexer.mll)))
12 | 
13 | (rule
14 |  (targets parser.ml parser.mli)
15 |  (deps parser.mly)
16 |  (action (run ocamlyacc parser.mly)))
17 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/err.ml:
--------------------------------------------------------------------------------
 1 | open Ast
 2 | 
 3 | let id = "[a-zA-Z_][a-zA-Z_0-9]*[']*"
 4 | 
 5 | let%parser rec sexp
 6 |    = ERROR(["id";"("])
 7 |    ; (x::RE id)         => { p = _pos; e = Idt x }
 8 |    ; '(' (l::sexps) => (ERROR(")") ; ')'
 9 |                         => { p = _pos; e = Lst l })
10 | and sexps = () => []
11 |           ; (l::sexps) (e::sexp) => e::l
12 | 
13 | let%parser top = (s::sexp) => s
14 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/lexer.mll:
--------------------------------------------------------------------------------
 1 |         {
 2 |         open Parser        (* The type token is defined in parser.mli *)
 3 |         exception Eof
 4 |         }
 5 |         rule token = parse
 6 |             [' ' '\t' '\n' '\r']     { token lexbuf }     (* skip blanks *)
 7 |           | eof        { EOL }
 8 |           | ['a'-'z''A'-'Z''_']['a'-'z''A'-'Z''_''0'-'9']* as lxm { ID lxm }
 9 |           | '(' { LPAREN }
10 |           | ')' { RPAREN }
11 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/lr.ml:
--------------------------------------------------------------------------------
 1 | open Ast
 2 | 
 3 | let id = "[a-zA-Z_][a-zA-Z_0-9]*[']*"
 4 | let%parser rec sexp
 5 |    = (x::RE id)         => { p = _pos; e = Idt x }
 6 |    ; '(' (l::sexps) ')' => { p = _pos; e = Lst (List.rev l) }
 7 | and sexps = () => []
 8 |           ; (l::sexps) (e::sexp) => e::l
 9 | 
10 | let%parser top = (s::sexp) => s
11 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/parser.mly:
--------------------------------------------------------------------------------
 1 | /* File parser.mly */
 2 | %{
 3 | open Ast
 4 | let mkloc e = { p= (Parsing.symbol_start_pos (), Parsing.symbol_end_pos  ())
 5 |               ; e}
 6 | %}
 7 | %token <string> ID
 8 | %token LPAREN RPAREN
 9 | %token EOL
10 | %start main             /* the entry point */
11 | %type <(Lexing.position*Lexing.position) Ast.sexp> main
12 | %%
13 | 
14 |   main:
15 |     sexp EOL                { $1 }
16 |     ;
17 |   sexp:
18 |       ID                      { mkloc (Ast.Idt $1) }
19 |     | LPAREN sexps RPAREN     { mkloc (Ast.Lst (List.rev $2)) }
20 |   sexps:
21 |                               { [] }
22 |     | sexps sexp              { $2 :: $1 }
23 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/rr.ml:
--------------------------------------------------------------------------------
 1 | open Ast
 2 | 
 3 | let id = "[a-zA-Z_][a-zA-Z_0-9]*[']*"
 4 | 
 5 | let%parser rec sexp = (x::RE id)   => { p = _pos; e = Idt x }
 6 |               ; '(' (lazy l::sexps) ')' => { p = _pos; e = Lst l }
 7 | and sexps = () => lazy []
 8 |            ; (e::sexp) (lazy l::sexps) => lazy (e::l)
 9 | 
10 | let%parser top = (s::sexp) => s
11 | 


--------------------------------------------------------------------------------
/benchmarks/sexp/sexp.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | let test = Array.length Sys.argv > 1 && Sys.argv.(1) = "--test"
 4 | 
 5 | let random_id () =
 6 |   let c = Char.chr (Char.code 'a' + Random.int 26) in
 7 |   let n = string_of_int (Random.int 1000) in
 8 |   String.make 1 c ^ n
 9 | 
10 | let print_spaces ch lvl =
11 |   output_string ch "\n";
12 |   for _ = 1 to lvl do
13 |     output_string ch "\t"
14 |   done;
15 |   1+lvl
16 | 
17 | let sqrt n = Random.int (truncate ((float n) ** 0.7) + 1)
18 | 
19 | let rec expr lvl n ch =
20 |   let print_string s =
21 |     output_string ch s; String.length s
22 |   in
23 |   if n <= 0 then 0
24 |   else if n = 1 then print_string (random_id ())
25 |   else
26 |     begin
27 |       let x = print_spaces ch lvl in
28 |       let lvl = lvl + 1 in
29 |       let y = print_string "(" in
30 |       let s = sqrt n in
31 |       let n = n - 1 in
32 |       let rec fn s n =
33 |         if s <= 1 then expr lvl n ch
34 |         else
35 |           begin
36 |             let q = n/2 in
37 |             let n0 = n/s - q/2 + Random.int(min 1 q) in
38 |             let n0 = max 1 (min n n0) in
39 |             let x = expr lvl n0 ch in
40 |             let y = print_string " " in
41 |             let z = fn (s-1) (n - n0) in
42 |             x + y + z
43 |           end
44 |       in
45 |       let z = fn s n in
46 |       let t = print_string ")" in
47 |       x + y + z + t
48 |     end
49 | 
50 | (* blanks *)
51 | let blank = Blank.from_charset (Charset.from_string " \t\n")
52 | 
53 | let _ =
54 |   let bench_lr = Bench.create () in
55 |   let bench_rr     = Bench.create () in
56 |   let bench_err    = Bench.create () in
57 |   let bench_yacc   = Bench.create () in
58 |   for n = 1 to (if test then 2 else 20) do
59 |     let n = n * 50_000 in
60 |     let producer ch = let r = expr 0 n ch in Printf.fprintf ch "\n%!"; r in
61 |     let size = Bench.size producer in
62 |     let (_,ts,w) = Bench.parse_pipe bench_lr Lr.top blank size producer in
63 |     Printf.printf "lr   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
64 |       (float size /. 1024. /. 1024.)
65 |       (1000. *. ts) (float w /. 1024. /. 1024. *. float Sys.word_size);
66 |     let (_,tr,w) = Bench.parse_pipe bench_rr Rr.top blank size producer in
67 |     Printf.printf "rr   %d %.2f Mb in %.2fms %.2f Mb \n%!" n
68 |       (float size /. 1024. /. 1024.)
69 |       (1000. *. tr) (float w /. 1024. /. 1024. *. float Sys.word_size);
70 |     let (_,te,w) = Bench.parse_pipe bench_err Err.top blank size producer in
71 |     Printf.printf "err  %d %.2f Mb in %.2fms %.2f Mb \n%!" n
72 |       (float size /. 1024. /. 1024.)
73 |       (1000. *. te) (float w /. 1024. /. 1024. *. float Sys.word_size);
74 |     let (_,ty,w) = Bench.yacc_pipe bench_yacc Parser.main Lexer.token size producer in
75 |     Printf.printf "yacc %d %.2f Mb in %.2fms %.2f Mb \n%!" n
76 |       (float size /. 1024. /. 1024.)
77 |       (1000. *. ty) (float w /. 1024. /. 1024. *. float Sys.word_size);
78 |     Printf.printf "lr/yacc : %f " (ts /. ty);
79 |     Printf.printf "rr/yacc : %f "     (tr /. ty);
80 |     Printf.printf "err/yacc : %f "     (te /. ty);
81 |     Printf.printf "rr/lr: %f " (tr /. ts);
82 |     Printf.printf "err/lr: %f\n%!" (te /. ts);
83 |   done;
84 |   Bench.stats "lr   " bench_lr;
85 |   Bench.stats "rr   " bench_rr;
86 |   Bench.stats "err  " bench_err;
87 |   Bench.stats "yacc " bench_yacc;
88 |   if not test then
89 |     begin
90 |       Bench.csv bench_lr   "lr.csv";
91 |       Bench.csv bench_rr   "rr.csv";
92 |       Bench.csv bench_rr   "err.csv";
93 |       Bench.csv bench_yacc "yacc.csv"
94 |     end
95 | 


--------------------------------------------------------------------------------
/dune:
--------------------------------------------------------------------------------
1 | (env
2 |   (dev     (flags :standard -w -9 -I +unix))
3 |   (release (flags :standard -w -9 -I +unix)))
4 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 1.9)
2 | (name pacomb)
3 | (allow_approximate_merlin)
4 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | This folder contains some useful examples:
2 | 
3 | - calc_ext.ml : an extensible calculator where new operator can be defined with
4 |   their priority.
5 | 


--------------------------------------------------------------------------------
/examples/calc.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | (* classical calculator example, with a non terminal for each priority level *)
 4 | 
 5 | (* Here is the definition of the  parser with the ppx syntax extension described
 6 |    in the documentation.
 7 | 
 8 |    Here, we deal with priorities  by manually defining three different grammars.
 9 |    Starting with the grammar for atomic expressions. *)
10 | let%parser rec
11 |         atom = (x::FLOAT)        => x                             (* constant *)
12 |              ; '(' (e::expr) ')' => e                 (* rule for parenthesis *)
13 | 
14 | (* Here is the grammar for products *)
15 | and prod = (a::atom)               => a
16 |           ; (x::prod) '*' (y::atom) => x*.y
17 |           ; (x::prod) '/' (y::atom) => x/.y
18 | 
19 | (* and finally all remaining expressions *)
20 | and expr = (a::prod)               => a
21 |          ; (x::expr) '+' (y::prod) => x+.y
22 |          ; (x::expr) '-' (y::prod) => x-.y
23 | 
24 | (* A subtlety : we want to parse expression, one by one and print the
25 |    result. Pacomb needs to do things that require buffer examination after
26 |    each token. So printing after parsing the newline does not work.
27 |    A trick that works is to test for the newline, not parsing it,
28 |    using Grammar.test_after. Another solution would be to read each
29 |    line with input_line and use Grammar.parse_string on the result.
30 | *)
31 | let nl _ b i _ _ =
32 |   let (c,_,_) = Input.read b i in c = '\n'
33 | let%parser rec top = (t::Grammar.test_after nl expr) => Printf.printf "%g\n=> %!" t
34 | let%parser rec exprs = () => () ; exprs top '\n' => ()
35 | 
36 | (* parsing command line arguments, illustrating grammar printing *)
37 | let usage_msg = Printf.sprintf "%s [options]" Sys.argv.(0)
38 | 
39 | let rec help () =
40 |   Arg.usage spec usage_msg;
41 |   Printf.eprintf "\nParsing with:\n\n%a\n%!"
42 |     (fun ch -> Grammar.print_grammar ch) exprs
43 | 
44 | and spec = [( "-help", Arg.Unit help, "print help message")
45 |            ;("--help", Arg.Unit help, "print help message")]
46 | 
47 | let _ = Arg.parse spec (fun s -> raise (Arg.Bad s)) usage_msg
48 | 
49 | (* blanks *)
50 | let blank = Blank.from_charset (Charset.singleton ' ')
51 | 
52 | let _ =
53 |   try
54 |     while true do
55 |       let f () =
56 |         Printf.printf "=> %!"; (* initial prompt *)
57 |         (* no need to stack the buffer of in_channel and those of Pacomb. So
58 |            file desciptor are preferred. *)
59 |         Grammar.parse_fd exprs blank Unix.stdin;
60 |         raise End_of_file
61 |       in
62 |       (* [Pos] module provides a function to handle exception with
63 |          an optional argument to call for error (default is to exit with
64 |          code 1 *)
65 |       Pos.handle_exception ~error:(fun _ -> ()) f ()
66 |     done
67 |   with
68 |     End_of_file -> ()
69 | 


--------------------------------------------------------------------------------
/examples/calc_ext.ml:
--------------------------------------------------------------------------------
  1 | open Pacomb
  2 | open Lex
  3 | open Pos
  4 | open Grammar
  5 | (* this example illustrates various features:
  6 |    - extensible grammars
  7 |    - dependant sequences to deal with priorities (necessary for extensibility
  8 |    - error reporting
  9 | 
 10 | 
 11 | Here is an example of accepted input:
 12 | 
 13 | x ++ y priority 6 left associative = (x+y)/2
 14 | -- x priority 1 = x - 1
 15 | ++ x priority 1 = x + 1
 16 | a = 2
 17 | b(x) = sin(x) + cos(x)
 18 | c = a ++ b(a)
 19 | c + 5
 20 | *)
 21 | 
 22 | (* type of expressions *)
 23 | type expr =
 24 |   | Cst of float
 25 |   | Idt of string * expr array
 26 | 
 27 | (* and the values inthe environment *)
 28 | type func =
 29 |   | Def of (expr*string array)
 30 |   | Op0 of float
 31 |   | Op1 of (float -> float)
 32 |   | Op2 of (float -> float -> float)
 33 |   | Op3 of (float -> float -> float -> float)
 34 | 
 35 | (* The initial environment *)
 36 | let init_env =[(("^",2)  , (Op2 ( ** )))
 37 |               ;(("*",2)  , (Op2 ( *. )))
 38 |               ;(("/",2)  , (Op2 ( /. )))
 39 |               ;(("+",2)  , (Op2 ( +. )))
 40 |               ;(("-",2)  , (Op2 ( -. )))
 41 |               ;(("-",1)  , (Op1 (fun x -> -. x)))
 42 |               ;(("+",1)  , (Op1 (fun x -> x)))
 43 |               ;(("e",0)  , (Op0 (exp 1.0)))
 44 |               ;(("exp",1), (Op1 (exp )))
 45 |               ;(("log",1), (Op1 (log )))
 46 |               ;(("pi",0) , (Op0 (acos(-1.0))))
 47 |               ;(("cos",1), (Op1 (cos )))
 48 |               ;(("sin",1), (Op1 (sin )))
 49 |               ;(("tan",1), (Op1 (tan )))
 50 |               ]
 51 | 
 52 | let env =
 53 |   let e = Hashtbl.create 32 in
 54 |   List.iter (fun (k,d) -> Hashtbl.add e k d) init_env;
 55 |   e
 56 | 
 57 | exception Unbound of string * int
 58 | 
 59 | (* and the evaluation function *)
 60 | let rec eval env = function
 61 |   | Cst x -> x
 62 |   | Idt(id,args) ->
 63 |      try
 64 |        let args = Array.map (eval env) args in
 65 |        let f = Hashtbl.find env (id,Array.length args) in
 66 |        match f with
 67 |        | Def(f,params) ->
 68 |           let add i id = Hashtbl.add env (id,0) (Op0 args.(i)) in
 69 |           let remove _ id = Hashtbl.remove env (id,0) in
 70 |           Array.iteri add params;
 71 |           (try
 72 |             let r = eval env f in
 73 |             Array.iteri remove params; r
 74 |           with
 75 |             e -> Array.iteri remove params; raise e)
 76 |        | Op0(x) -> x
 77 |        | Op1(f) -> f args.(0)
 78 |        | Op2(f) -> f args.(0) args.(1)
 79 |        | Op3(f) -> f args.(0) args.(1) args.(2)
 80 | 
 81 |      with Not_found -> raise (Unbound (id, Array.length args))
 82 | 
 83 | (* parser for identifier, notice the error construct to report error messages,
 84 |    using regular expressions *)
 85 | let%parser ident = (id::RE("[a-zA-Z][a-zA-Z0-9_]*")) => id
 86 |                  ; ERROR("ident")
 87 | 
 88 | (* tables giving the syntax class of each symbol *)
 89 | type assoc = RightAssoc | LeftAssoc | NonAssoc
 90 | 
 91 | let cs = Charset.(from_string "-&~^+=*/\\$!:")
 92 | 
 93 | let infix_tbl =
 94 |   let t = Word_list.create ~cs () in
 95 |   Word_list.add_ascii t "^" ("^", 2.0, RightAssoc);
 96 |   Word_list.add_ascii t "*" ("*", 4.0, LeftAssoc);
 97 |   Word_list.add_ascii t "/" ("/", 4.0, LeftAssoc);
 98 |   Word_list.add_ascii t "+" ("+", 6.0, LeftAssoc);
 99 |   Word_list.add_ascii t "-" ("-", 6.0, LeftAssoc);
100 |   t
101 | 
102 | let prefix_tbl =
103 |   let t = Word_list.create ~cs () in
104 |   Word_list.add_ascii t "+" ("+", 5.0);
105 |   Word_list.add_ascii t "-" ("-", 5.0);
106 |   t
107 | 
108 | (* parser for symbols, it uses Lex.give_up to reject some rule from the action
109 |    code. *)
110 | let%parser op =
111 |   (c::RE("[-&~^+=*/\\$!:]+\\(_[a-zA-Z0-9_]+\\)?"))
112 |     => (if c = "=" then give_up ~msg:"= not valid as op bin" (); c)
113 | ; (ERROR "symbol")
114 | 
115 | (* parser for infix symbol parametrized with the maximum and minimum
116 |    priority. It returns the actual priority, minum eps for left and non
117 |    associative symbols *)
118 | 
119 | let eps = 1e-10
120 | 
121 | let%parser infix pmin pmax =
122 |   ((c,p,a)::Word_list.word infix_tbl) =>
123 |         let good = match a with
124 |           | NonAssoc   -> pmin < p && p < pmax
125 |           | LeftAssoc  -> pmin <= p && p < pmax
126 |           | RightAssoc -> pmin < p && p <= pmax
127 |         in
128 |         if not good then give_up ();
129 |         let p = match a with
130 |           | RightAssoc -> p
131 |           | _          -> p -. 1e-10
132 |         in
133 |         (p,c)
134 | 
135 | (* parser for prefix symbol *)
136 | let%parser prefix pmax =
137 |   ((c,p)::Word_list.word prefix_tbl) =>
138 |         let good = p <= pmax in
139 |         if not good then give_up ();
140 |         (p,c)
141 | 
142 | (* some keywords *)
143 | let%parser opening = '(' => (); ERROR "closing parenthesis"
144 | let%parser closing = ')' => (); ERROR "closing parenthesis"
145 | let%parser comma   = ',' => (); ERROR "comma"
146 | 
147 | (* parser for expressions, using dependant sequence.  when writing
148 |    (p,x)>:grammar in a rule, p can be used in the rest of the rule while x can
149 |    only be used in the rest of the grammar.
150 | 
151 |    Like infix and prefix, expression are parametrized with a priority (here a
152 |    maximum and we use the priorities returned by the parsing of expressions,
153 |    infix and prefix to deal with priority of whats coming next.
154 |  *)
155 | let%parser rec
156 |  expr pmax = ((pe,e1)>:expr pmax) ((pop,b)>:infix pe pmax)
157 |                ((__,e2)::expr pop)            =>  (pop, Idt(b,[|e1;e2|]))
158 |             ; ((pop,b)>:prefix pmax)
159 |                ((__,e1)::expr pop)            =>  (pop, Idt(b,[|e1|]))
160 |             ; (x::FLOAT)                      => (0.0,Cst x)
161 |             ; opening (e::expr_top) closing   => (0.0,e)
162 |             ; (id::ident) (args::args)        => (0.0, Idt(id,args))
163 | 
164 | (* the ppx extension has syntactic sugar for option and sequences.
165 |    ~+ [comma] grammar denotes non empty lists with separator. *)
166 | and args =
167 |     ()                                        => [||]
168 |   ; opening (l:: ~+ [comma] expr_top) closing => Array.of_list l
169 | 
170 | and expr_top = ((__,e)::expr 1000.0)          => e
171 | 
172 | (* here we define the keywords for parsing symbol definitions *)
173 | let%parser assoc = "none"   => NonAssoc
174 |                  ; "left"   => LeftAssoc
175 |                  ; "right"  => RightAssoc
176 |                  ; ERROR("assoc: none | left | right")
177 | 
178 | let%parser priority = (x::FLOAT) => x
179 |                     ; ERROR("float")
180 | 
181 | let%parser eq = '=' => () ; ERROR("=")
182 | let%parser priority_kwd = "priority" => (); ERROR("priority keyword")
183 | let%parser assoc_kwd = "associative" => (); ERROR("associative keyword")
184 | 
185 | (* list of parameters for definition of functions *)
186 | let%parser params =
187 |     ()                                     => [||]
188 |   ; opening (l:: ~+ [comma] ident) closing => Array.of_list l
189 | 
190 | (* toplevel commands *)
191 | let%parser cmd =
192 |     (e::expr_top) EOF
193 |       => (Printf.printf "%f\n%!" (eval env e))
194 |   ; (id::ident) (params::params) eq (e::expr_top) EOF
195 |       => Hashtbl.add env (id,Array.length params) (Def(e,params))
196 |   ; (id::op) (a1::ident)
197 |        priority_kwd (p::priority)
198 |        eq (e::expr_top) EOF
199 |       => (let params = [|a1|] in
200 |          Hashtbl.add env (id,Array.length params) (Def(e,params));
201 |          Word_list.add_ascii prefix_tbl id (id,p))
202 |   ; (a1::ident) (id::op) (a2::ident)
203 |        priority_kwd (p::priority)
204 |        (a::assoc) assoc_kwd
205 |        eq (e::expr_top) EOF
206 |       => (let params = [|a1;a2|] in
207 |          Hashtbl.add env (id,Array.length params) (Def(e,params));
208 |          Word_list.add_ascii infix_tbl id (id,p,a))
209 | 
210 | (* blanks *)
211 | let blank = Blank.from_charset (Charset.singleton ' ')
212 | 
213 | (* main loop *)
214 | let _ =
215 |   try
216 |     while true do
217 |       let f () =
218 |         try
219 |           Printf.printf "=> %!";
220 |           let line = input_line stdin in
221 |           parse_string cmd blank line
222 |         with Unbound(s,n) ->
223 |           Printf.eprintf "unbound %s with arity %d\n%!" s n
224 |       in handle_exception ~error:(fun _ -> ()) f ()
225 |     done
226 |   with
227 |     End_of_file -> ()
228 | 


--------------------------------------------------------------------------------
/examples/calc_ext2.ml:
--------------------------------------------------------------------------------
  1 | (* This code is to benchmark extensible grammar ...
  2 |    it is not the best code to do a calculator with an extensible grammar *)
  3 | 
  4 | open Pacomb
  5 | 
  6 | (** Access to float functions for the action of rules *)
  7 | let get_op2 = function
  8 |   | "*" -> ( *. )
  9 |   | "+" -> ( +. )
 10 |   | "/" -> ( /. )
 11 |   | "-" -> ( -. )
 12 |   | "^" -> ( ** )
 13 |   | "atan2" -> atan2
 14 |   |  _  -> failwith "invalid binary op"
 15 | 
 16 | let get_op1 = function
 17 |   | "sqrt" -> sqrt
 18 |   | "cos" -> cos
 19 |   | "acos" -> acos
 20 |   | "sin" -> sin
 21 |   | "asin" -> asin
 22 |   | "tan" -> tan
 23 |   | "atan" -> atan
 24 |   | "ln"  -> log
 25 |   | "log"  -> log10
 26 |   | "exp" -> exp
 27 |   |  _  -> failwith "invalid unary op"
 28 | 
 29 | (** Bigger float = lower priority, 0.0 is for atomic expresion,
 30 |    so all priorities must be positive *)
 31 | type prio = float
 32 | 
 33 | (** list of all priorities, in decreasing order (lowest priority first). *)
 34 | type prios = prio list
 35 | 
 36 | (** a parsing rule: a grammar from an environment *)
 37 | type 'a rule = env -> 'a Grammar.t
 38 | 
 39 | (** list of rule, with a name for each rule *)
 40 | and rules = (float * (string * float rule) list) list
 41 | 
 42 | (** parsing environment: all rules and all prios sorted in
 43 |     decreasing order (lowest priority first). *)
 44 | and env = { rules : rules; prios : prios }
 45 | 
 46 | let empty_env = { rules = []; prios = [] }
 47 | 
 48 | let add_prio p env =
 49 |   { env with prios = List.rev (List.sort_uniq compare (p::env.prios)) }
 50 | 
 51 | (** get the next priority *)
 52 | let next_prio p env =
 53 |   let rec fn = function
 54 |     | x::(y::_) when x = p -> y
 55 |     | x::l                 -> assert (x >= p); fn l
 56 |     | _                    -> 0.0
 57 |   in
 58 |   fn env.prios
 59 | 
 60 | (** get the priority nearest to p . For associativity, we will use [p] of [<p]
 61 |    and get_prio will fetch the first priority below [p] is needed *)
 62 | let get_prio leq p env = if leq then p else next_prio p env
 63 | 
 64 | (** the maximum priority *)
 65 | let max_prio env = match env.prios with
 66 |   | x::_ -> x | [] -> 0.0
 67 | 
 68 | (** add a rule with the given priority *)
 69 | let add_rule name prio r env =
 70 |   let old = try List.assoc prio env.rules with Not_found ->  [] in
 71 |   let rules = (List.filter (fun (p,_) -> prio <> p) env.rules) in
 72 |   let env = add_prio prio env in
 73 |   { env with rules = (prio, (name,r)::old) :: rules }
 74 | 
 75 | let rm_rule name env =
 76 |   let count = ref 0 in
 77 |   let rm l = List.filter (fun (n,_) -> if n = name then incr count;
 78 |                                        n <> name) l
 79 |   in
 80 |   let rules = List.map (fun (p,l) -> (p, rm l)) env.rules in
 81 |   let rules = List.filter (fun (_,l) -> l <> []) rules in
 82 |   let prios = List.rev (List.sort_uniq compare (List.map fst rules)) in
 83 |   { rules; prios }, !count
 84 | 
 85 | (** get all the rule of a given priority *)
 86 | let get_rule : prio -> env -> float Grammar.t = fun p env ->
 87 |   let rules = try List.assoc p env.rules with Not_found -> [] in
 88 |   let rules = List.map (fun (_,r) -> r env) rules in
 89 |   Grammar.alt rules
 90 | 
 91 | let pr (_, p) = Printf.sprintf "%g" p
 92 | 
 93 | (** the parsing for expression *)
 94 | let%parser [@print_param pr] rec expr env (prio:prio) =
 95 |    (* constant *)
 96 |    (prio = 0.) (x::FLOAT)                           => x
 97 |    (* parenthesis, using max_prio *)
 98 |  ; (prio = 0.) '(' (x::expr env (max_prio env)) ')' => x
 99 |    (* incluse next priority level *)
100 |  ; (prio > 0.) (x::expr env (next_prio prio env))   => x
101 |    (* get all the rule for the level (including 0.0) *)
102 |  ;             (x::get_rule prio env)               => x
103 | 
104 | (** a type of type *)
105 | type _ ty =
106 |   Flt : float ty
107 | | Arr : 'a ty * 'b ty -> ('a -> 'b) ty
108 | 
109 | (** action of a given type , syntaxe, HP style *)
110 | (** Remark: we need fake, dependant sequence (<:) because otherwise
111 |     the construction of the grammar loops producing bigger
112 |     and bigger types. Dependant sequence builds the grammar lazily *)
113 | let%parser rec action : type a. a ty -> a Grammar.t
114 |   = fun t ->
115 |     "Cst" (x<:FLOAT) (f::action (Arr(Flt,t))) => f x
116 |   ; (t =| Arr(Flt,t1)) "Op1" (s<:STRING_LIT) (f::action (Arr(Flt,t1))) =>
117 |       (let g = get_op1 s in (fun x -> f (g x) : a))
118 |   ; (t =| Arr(Flt,Arr(Flt,t1))) "Op2" (s<:STRING_LIT) (f::action (Arr(Flt,t1))) =>
119 |       (let g = get_op2 s in (fun x y -> f (g y x) : a))
120 |   ; (t =| Arr(Flt,Flt)) () => (fun x -> x)
121 | 
122 | (** the magic parsing : parse a BNF rule and return the parser
123 |     for that BNF, parametrized by the current environment *)
124 | let%parser rec rule : type a. a ty -> (env -> a Grammar.t) Grammar.t
125 |   = fun t ->
126 |     "Exp" (leq:: ~? [true] ("<" => false)) (prio<:FLOAT) (r::rule (Arr(Flt,t))) =>
127 |       (fun env -> (x::expr env (get_prio leq prio env)) (f::r env) => f x)
128 |   ; "Str" (s<:STRING_LIT) (r::rule t) =>
129 |       (fun env -> (STR s) (x::r env) => x)
130 |   ; "=>" (a::action t) => (fun _ -> () => a)
131 | 
132 | (* A subtlety : we want to parse expression, one by one and print the
133 |    result. Pacomb needs to do things that require buffer examination after
134 |    each token. So printing after parsing the newline does not work.
135 |    A trick that works is to test for the newline, not parsing it,
136 |    using Grammar.test_after. Another solution would be to read each
137 |    line with input_line and use Grammar.parse_string on the result.
138 | *)
139 | let nl _ b i _ _ =
140 |   let (c,_,_) = Input.read b i in c = '\n'
141 | 
142 | (** the command parsing an expression and printing it *)
143 | let%parser top_expr env =
144 |   (x::Grammar.test_after nl (expr env (max_prio env))) =>
145 |     Printf.printf "%g\n=> %!" x
146 | 
147 | (** reference only uses to keep the env in case of parse error *)
148 | let env_ref = ref empty_env
149 | 
150 | (** The command parsing a new rule *)
151 | let%parser new_rule env =
152 |   "rule" (n::STRING_LIT) (p::FLOAT) ":" (r::Grammar.test_after nl (rule Flt)) =>
153 |        let env = add_rule n p r env in
154 |        env_ref := env;
155 |        Printf.printf "new rule accepted\n=> %!";
156 |        (env, ())
157 | 
158 |          (** The command parsing removing all rules with a given name *)
159 | let%parser rem_rule env =
160 |   "remove" "rule" (n::STRING_LIT) =>
161 |     let (env, nb) = rm_rule n env in
162 |     env_ref := env;
163 |     Printf.printf "%d rule(s) removed\n=> %!" nb;
164 |     (env, ())
165 | 
166 | (** main parsing, right recursion with no action is ok now *)
167 | let%parser rec cmds env =
168 |     ()                                          => ()
169 |   ; (top_expr env) '\n' (cmds env)              => ()
170 |   ; ((env,()) >: new_rule env) '\n' (cmds env)  => ()
171 |   ; ((env,()) >: rem_rule env) '\n' (cmds env)  => ()
172 | 
173 | let top = cmds empty_env
174 | 
175 | (* blanks *)
176 | let blank = Blank.from_charset (Charset.singleton ' ')
177 | 
178 | let _ =
179 |   try
180 |     while true do
181 |       let f () =
182 |         Printf.printf "=> %!"; (* initial prompt *)
183 |         (* no need to stack the buffer of in_channel and those of Pacomb. So
184 |            file desciptor are preferred. *)
185 |         (* as we parse stdin, we need to keed the whole buffer in memory
186 |            to have line and column number, ~rescan:false only give byte
187 |            position *)
188 |         Grammar.parse_fd (cmds !env_ref) blank Unix.stdin;
189 |         raise End_of_file
190 |       in
191 |       (* [Pos] module provides a function to handle exception with
192 |          an optional argument to call for error (default is to exit with
193 |          code 1 *)
194 |       Pos.handle_exception ~error:(fun _ -> ()) f ()
195 |     done
196 |   with
197 |     End_of_file -> ()
198 | 


--------------------------------------------------------------------------------
/examples/calc_prio.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | (*  This  example  (read  calc.ml  first)  illustrates  another  way  to  handle
 4 |    priorities with parametric grammars. *)
 5 | 
 6 | (* The three levels of priorities *)
 7 | type p = Atom | Prod | Sum
 8 | 
 9 | let string_of_prio = function
10 |   | Atom -> "A"
11 |   | Prod -> "P"
12 |   | Sum  -> "S"
13 | 
14 | (* for printing, we provide a function to convert priorities to string *)
15 | let%parser [@print_param string_of_prio] rec
16 |      (* This includes each priority level in the next one *)
17 |      expr p = Atom < Prod < Sum
18 |             (* all other rule are selected by their priority level *)
19 |             ; (p=Atom) (x::FLOAT)                        => x
20 |             ; (p=Atom) '(' (e::expr Sum) ')'             => e
21 |             ; (p=Prod) (x::expr Prod) '*' (y::expr Atom) => x*.y
22 |             ; (p=Prod) (x::expr Prod) '/' (y::expr Atom) => x/.y
23 |             ; (p=Sum ) (x::expr Sum ) '+' (y::expr Prod) => x+.y
24 |             ; (p=Sum ) (x::expr Sum ) '-' (y::expr Prod) => x-.y
25 | 
26 | (* A subtlety : we want to parse expression, one by one and print the
27 |    result. Pacomb needs to do things that require buffer examination after
28 |    each token. So printing after parsing the newline does not work.
29 |    A trick that works is to test for the newline, not parsing it,
30 |    using Grammar.test_after. Another solution would be to read each
31 |    line with input_line and use Grammar.parse_string on the result.
32 | *)
33 | let nl _ b i _ _ =
34 |   let (c,_,_) = Input.read b i in c = '\n'
35 | let%parser rec top =
36 |   (t::Grammar.test_after nl (expr Sum)) => Printf.printf "%g\n=> %!" t
37 | let%parser rec exprs =
38 |   () => () ; exprs top '\n' => ()
39 | 
40 | (* parsing command line arguments, illustrating grammar printing *)
41 | let usage_msg = Printf.sprintf "%s [options]" Sys.argv.(0)
42 | 
43 | let rec help () =
44 |   Arg.usage spec usage_msg;
45 |   Printf.eprintf "\nParsing with:\n\n%a\n%!"
46 |     (fun ch -> Grammar.print_grammar ch) exprs
47 | 
48 | and spec = [( "-help", Arg.Unit help, "print help message")
49 |            ;("--help", Arg.Unit help, "print help message")]
50 | 
51 | let _ = Arg.parse spec (fun s -> raise (Arg.Bad s)) usage_msg
52 | 
53 | (* blanks *)
54 | let blank = Blank.from_charset (Charset.singleton ' ')
55 | 
56 | let _ =
57 |   try
58 |     while true do
59 |       let f () =
60 |         Printf.printf "=> %!"; (* initial prompt *)
61 |         (* no need to stack the buffer of in_channel and those of Pacomb. So
62 |            file desciptor are preferred *)
63 |         Grammar.parse_fd exprs blank Unix.stdin;
64 |         raise End_of_file
65 |       in
66 |       (* [Pos] module provides a function to handle exception with
67 |          an optional argument to call for error (default is to exit with
68 |          code 1 *)
69 |       Pos.handle_exception ~error:(fun _ -> ()) f ()
70 |     done
71 |   with
72 |     End_of_file -> ()
73 | 


--------------------------------------------------------------------------------
/examples/catalan.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | (* This (useless) example illustrate the use of merge on ambiguous grammars. We
 4 |    define two ambiguous grammars parsing sequences of characters to generate all
 5 |    binary and ternary trees.
 6 | 
 7 |    "catalan 10 2" will compute number of binary trees of size 0 to 10
 8 |    "catalan 10 3" will compute number of ternary trees of size 0 to 10
 9 | 
10 | *)
11 | 
12 | (* A parser parsing arbitrary sequence of charaters 'a' as a set of binary
13 |    trees. We use merge in case of ambiguity and, as [@merge ...] implies
14 |    [@cache] pacomb parses only once each part of the input. We just return the
15 |    number of trees. *)
16 | let%parser [@merge (+.)] rec bin_seq =
17 |     ()                              => 1.0
18 |   ; (t1::bin_seq) 'a' (t2::bin_seq) => t1 *. t2
19 | 
20 | let _ = Grammar.print_grammar stdout ~def:false bin_seq; print_newline ()
21 | 
22 | (* Idem for ternary tree, we need an internal cache to achieve polynomial
23 |    complexity. *)
24 | let%parser [@merge (+.)] rec ter_seq =
25 |     ()                                    => 1.0
26 |   ; (t1::ter_seq) 'a' (t2t3::ter_seq_aux) => t1 *. t2t3
27 | 
28 | and [@merge (+.)] ter_seq_aux =
29 |     (t2::ter_seq) (t3::ter_seq) => t2 *. t3
30 | 
31 | let _ = Grammar.print_grammar stdout ~def:false ter_seq; print_newline ()
32 | 
33 | (* To test, here is Catalan number, i.e. the number of binary trees of a given size *)
34 | let catalan =
35 |   let memo = Hashtbl.create 128 in
36 |   let rec fn n =
37 |     if n <= 1 then 1.0 else
38 |     try Hashtbl.find memo n
39 |     with Not_found ->
40 |       let r = ref 0.0 in
41 |       for i = 0 to n-1 do
42 |         r := fn i *. fn (n - i - 1) +. !r
43 |       done;
44 |       Hashtbl.add memo n !r;
45 |       !r
46 |   in
47 |   fn
48 | 
49 | (* idem for ternary trees *)
50 | let catalan3 =
51 |   let memo = Hashtbl.create 128 in
52 |   let rec fn n =
53 |     if n <= 1 then 1.0 else
54 |     try Hashtbl.find memo n
55 |     with Not_found ->
56 |       let r = ref 0.0 in
57 |       for i = 0 to n-1 do
58 |         for j = 0 to n-i-1 do
59 |           r := fn i *. fn j *. fn (n - i - j - 1) +. !r
60 |         done
61 |       done;
62 |       Hashtbl.add memo n !r;
63 |       !r
64 |   in
65 |   fn
66 | 
67 | (* parsing command line *)
68 | let catalan_max, branches =
69 |   try
70 |     if Array.length Sys.argv <> 3 then raise Not_found;
71 |     int_of_string Sys.argv.(1), int_of_string Sys.argv.(2)
72 |   with _ ->
73 |     Printf.eprintf "usage: %s max_len [2|3]\n%!" Sys.argv.(0);
74 |     exit 1
75 | 
76 | let _ =
77 |   Printf.printf "checking the number of parsetrees on an ambiguous example,\
78 |                  using merge and cache\n%!";
79 |   let (p,f) = if branches = 2 then bin_seq, catalan
80 |               else ter_seq, catalan3
81 |   in
82 |   let bench = Bench.create () in
83 |   for i = 0 to catalan_max do
84 |     let s = String.make i 'a' in
85 |     let (t,dt,w) = Bench.parse_string bench p Blank.none s in
86 |     let k = f i in
87 |     Printf.printf "catalan(%d): %d => %e=%e in %.2fms %.2f Mb \n%!"
88 |       branches i t k (1000. *. dt) (float w /. 1024. /. 1024. *. float Sys.word_size);
89 |   done;
90 |   Bench.stats "catalan " bench;
91 |   Bench.csv bench (Printf.sprintf "catalan%d.csv" branches)
92 | 


--------------------------------------------------------------------------------
/examples/dune:
--------------------------------------------------------------------------------
 1 | (test
 2 |   (name calc)
 3 |   (modules calc)
 4 |   (preprocess (pps pacomb.ppx))
 5 |   (libraries unix pacomb)
 6 |   (deps ../tests/big_expr.exe calc.exe)
 7 |   (action
 8 |     (system "../tests/big_expr.exe 4 4 4 | ./calc.exe")))
 9 | 
10 | (test
11 |   (name calc_prio)
12 |   (modules calc_prio)
13 |   (preprocess (pps pacomb.ppx))
14 |   (libraries unix pacomb)
15 |   (deps ../tests/big_expr.exe calc_prio.exe)
16 |   (action
17 |     (system "../tests/big_expr.exe 4 4 4 | ./calc_prio.exe")))
18 | 
19 | (test
20 |   (name calc_ext)
21 |   (modules calc_ext)
22 |   (preprocess (pps pacomb.ppx))
23 |   (libraries unix pacomb)
24 |   (deps ../tests/big_expr.exe calc_ext.exe)
25 |   (action
26 |     (system "../tests/big_expr.exe 3 4 4 | ./calc_ext.exe")))
27 | 
28 | (test
29 |   (name calc_ext2)
30 |   (modules calc_ext2)
31 |   (preprocess (pps pacomb.ppx))
32 |   (libraries unix pacomb)
33 |   (deps ../tests/big_expr.exe calc_ext2.exe test_ext2.txt)
34 |   (action
35 |     (system "(cat test_ext2.txt && ../tests/big_expr.exe 3 4 4) | ./calc_ext2.exe")))
36 | 
37 | (test
38 |   (name sexp)
39 |   (modules sexp)
40 |   (preprocess (pps pacomb.ppx))
41 |   (libraries unix pacomb)
42 |   (deps sexp.exe)
43 |   (action
44 |     (system "echo 'a b (c (d e) f) (g h)' | ./sexp.exe")))
45 | 
46 | (test
47 |   (name paragraphs)
48 |   (modules paragraphs)
49 |   (preprocess (pps pacomb.ppx))
50 |   (libraries unix pacomb)
51 |   (deps paragraphs.exe test.txt)
52 |   (action
53 |     (system "./paragraphs.exe < test.txt")))
54 | 
55 | (test
56 |   (name catalan)
57 |   (modules catalan)
58 |   (preprocess (pps pacomb.ppx))
59 |   (libraries unix pacomb)
60 |   (deps catalan.exe)
61 |   (action
62 |     (progn
63 |       (system "./catalan.exe 30 2")
64 |       (system "./catalan.exe 30 3"))))
65 | 


--------------------------------------------------------------------------------
/examples/paragraphs.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | let%parser word =
 4 |     (w::RE"[-a-zA-Z0-9']+") => w
 5 |   ; (p::RE"[.;,:!?]") => p
 6 | 
 7 | (* blank with at most one newline for paragraphs *)
 8 | let blank1 = Regexp.blank_regexp "[ \t\r]*\n?[ \t\r]*"
 9 | 
10 | (* general blank, no newline, we parse them as separator. *)
11 | let blank2 = Regexp.blank_regexp "[ \t\r]*"
12 | 
13 | (* for paragraph, we use [Grammar.layout] to change the blank from [blank2] to
14 |    [blank1] and we parse with blank1 after the paragraph to read the last
15 |    newline at the end of each paragraph, the default would be to parse with
16 |    [blank2] only after the paragraph. This way, the minimum number of newline left to
17 |    parse as paragraph separation is 1.  *)
18 | let%parser
19 |       [@layout blank1 ~config:Blank.{ default_layout_config with
20 |                                       new_blanks_after = true
21 |                                     ; old_blanks_after = false }]
22 |       paragraph = ((p:: ~+ word) => (p, p_pos))
23 | 
24 | (* paragraph separator, at least one newline *)
25 | let%parser sep = (~+ (CHAR('\n'))) => ()
26 | 
27 | (* text are list separated by sep and may have initial and final newlines *)
28 | let%parser text = (~? sep) (t:: ~* [sep] paragraph) (~? sep) => t
29 | 
30 | (* we call the parser *)
31 | let _ =
32 |   let t = Pos.handle_exception (Grammar.parse_channel text blank2) stdin in
33 |   Format.printf "%d paragraphs\n%!" (List.length t);
34 |   List.iteri (fun i (p,pos) -> Format.printf "  paragraph %d at %a: %d word(s)\n%!"
35 |                                  i
36 |                                  (Pos.print_pos ~style:Short ())
37 |                                  pos
38 |                                  (List.length p)) t
39 | 


--------------------------------------------------------------------------------
/examples/sexp.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | 
 3 | type sexp = { p: Pos.pos; e : sexp' }
 4 | and sexp' =
 5 |   | Idt of string
 6 |   | Lst of sexp list
 7 | 
 8 | let id = "[a-zA-Z_][a-zA-Z_0-9]*[']*"
 9 | 
10 | let%parser rec sexp
11 |    = ERROR(["id";"("])
12 |    ; (x::RE id)         => { p = _pos; e = Idt x }
13 |    ; '(' (l::sexps)     => (ERROR(")") ; ')'
14 |                         => { p = _pos; e = Lst (List.rev l) })
15 | and sexps = () => []
16 |           ; (l::sexps) (e::sexp) => e::l
17 | 
18 | (* blank c++ like end of line comments *)
19 | let blank = Blank.line_comments "\\\\"
20 | 
21 | let _ =
22 |   let f () = Grammar.parse_channel sexps blank stdin in
23 |   Pos.handle_exception f ()
24 | 


--------------------------------------------------------------------------------
/examples/test.txt:
--------------------------------------------------------------------------------
 1 | Un permier paragraph;
 2 | coucou.
 3 | 
 4 | Puis un second.
 5 | Avec deux phrases.
 6 | 
 7 |   Et
 8 | un
 9 | dernier.
10 | 


--------------------------------------------------------------------------------
/examples/test_ext2.txt:
--------------------------------------------------------------------------------
 1 | 2
 2 | 3
 3 | (4)
 4 | 5
 5 | rule "suc" 1 : Str "S" Exp 1 => Cst 1 Op2 "+"
 6 | rule "pre" 1 : Str "P" Exp 1 => Cst 1 Op2 "-"
 7 | SPPS6
 8 | rule "pow" 2 : Exp <2 Str "^" Exp  2 => Op2 "^"
 9 | rule "mul" 3 : Exp  3 Str "*" Exp <3 => Op2 "*"
10 | rule "div" 3 : Exp  3 Str "/" Exp <3 => Op2 "/"
11 | rule "add" 4 : Exp  4 Str "+" Exp <4 => Op2 "+"
12 | rule "sub" 4 : Exp  4 Str "-" Exp <4 => Op2 "-"
13 | 3*3
14 | 2+2
15 | 2+3*3
16 | 2+3*5/2-1
17 | (5+5)
18 | SPS6 * 3 + S1
19 | remove rule "suc"
20 | remove rule "pre"
21 | 2^2^2
22 | 2^3^2
23 | (2^3)^2
24 | 2^(3^2)
25 | 


--------------------------------------------------------------------------------
/lib/assoc.ml:
--------------------------------------------------------------------------------
 1 | type _ token =  ..
 2 | 
 3 | type ('a, 'b) eq =
 4 |   | Eq  : ('a, 'a) eq
 5 |   | NEq : ('a, 'b) eq
 6 | 
 7 | type 'a key = { tok : 'a token ; uid : int; eq : 'b. 'b token -> ('a, 'b) eq }
 8 | 
 9 | (** To store keys in lists *)
10 | type any_key = K : 'a key -> any_key [@@unboxed]
11 | 
12 | let key_count = ref 0
13 | let new_key : type a. unit -> a key = fun () ->
14 |   let module M = struct type _ token += T : a token end in let open M in
15 |   let eq : type b. b token -> (a, b) eq = function T -> Eq | _ -> NEq in
16 |   let uid = !key_count in incr key_count;
17 |   { tok = T ; uid; eq }
18 | 
19 | let compare k1 k2 = compare k1.uid k2.uid
20 | 
21 | module Make(T:sig type 'a data end) = struct
22 |   open T
23 | 
24 |   type t =
25 |     | Nil : t
26 |     | Cns : 'a key * 'a data * t -> t
27 | 
28 |   let empty : t = Nil
29 | 
30 |   let length : t -> int =
31 |     let rec length acc l =
32 |       match l with
33 |       | Nil        -> acc
34 |       | Cns(_,_,l) -> length (acc+1) l
35 |     in
36 |     length 0
37 | 
38 |   let add : type a. a key -> a data -> t -> t = fun k x l ->
39 |     Cns(k,x,l)
40 | 
41 |   type iter = { f : 'a. 'a key -> 'a data -> unit }
42 |   let rec iter : iter -> t -> unit = fun f l ->
43 |     match l with
44 |     | Nil -> ()
45 |     | Cns(j,x,l) -> f.f j x; iter f l
46 | 
47 |   let add_key : type a. a data -> t -> a key * t = fun x l ->
48 |     let k = new_key () in (k, Cns(k,x,l))
49 | 
50 |   let rec find : type a. a key -> t -> a data = fun k l ->
51 |     match l with
52 |     | Nil        -> raise Not_found
53 |     | Cns(j,x,l) -> match j.eq k.tok with Eq -> x | NEq -> find k l
54 | 
55 |   let rec mem : type a. a key -> t -> bool = fun k l ->
56 |     match l with
57 |     | Nil        -> false
58 |     | Cns(j,_,l) -> match k.eq j.tok with Eq -> true | NEq -> mem k l
59 | 
60 |   let rec remove : type a. a key -> t -> t = fun k l ->
61 |     match l with
62 |     | Nil        -> raise Not_found
63 |     | Cns(j,v,l) -> match k.eq j.tok with Eq  -> l | NEq -> Cns(j,v,remove k l)
64 | 
65 |   let rec replace : type a. a key -> a data -> t -> t = fun k x l ->
66 |     match l with
67 |     | Nil        -> Cns(k,x,Nil)
68 |   | Cns(j,v,l) -> match k.eq j.tok with Eq  -> Cns(j,x,l)
69 |                                       | NEq -> Cns(j,v,replace k x l)
70 | 
71 |   let rec append : t -> t -> t = fun l1 l2 ->
72 |     match l1 with
73 |     | Nil        -> l2
74 |     | Cns(j,v,l) -> Cns(j,v,append l l2)
75 | end
76 | 
77 | include Make(struct type 'a data = 'a end)
78 | 


--------------------------------------------------------------------------------
/lib/assoc.mli:
--------------------------------------------------------------------------------
 1 | (** Dependant association lists. *)
 2 | 
 3 | (** Standard equality type using a GADT. *)
 4 | type ('a, 'b) eq =
 5 |   | Eq  : ('a, 'a) eq
 6 |   | NEq : ('a, 'b) eq
 7 | 
 8 | (** Type of tokens  used to make keys unique, and carrying a  type. This type is
 9 |    not intended  to be  extended by  the user,  hence it  is private...  but not
10 |    declared priveta as it fails if 4.04 *)
11 | type _ token = ..
12 | 
13 | (** Type of a key  for a value of type ['a]. It contains  a unique token and the
14 |    corresponding (very efficient) equality test. *)
15 | type 'a key = { tok : 'a token ; uid:int; eq : 'b. 'b token -> ('a, 'b) eq }
16 | 
17 | (** To store keys in lists *)
18 | type any_key = K : 'a key -> any_key [@@unboxed]
19 | 
20 | (** [new_key ()] generates a new unique key for a value of type ['a]. *)
21 | val new_key : unit -> 'a key
22 | 
23 | (** Type of an association list, where items may have different types *)
24 | type t
25 | 
26 | (** [empty] is the empty association list. *)
27 | val empty : t
28 | 
29 | (** compare keys by uid *)
30 | val compare : 'a key -> 'b key -> int
31 | 
32 | (** [add  k v  l] inserts a  new binding  of [k] to  [v] at the  head of  [l]. A
33 |    previous binding of [k] will not  be removed. Hence removing [k] will uncover
34 |    a previous binding. *)
35 | val add : 'a key -> 'a -> t -> t
36 | 
37 | (** [length l] returns the size of the association list [l]. *)
38 | val length : t -> int
39 | 
40 | (** [add_key v l] is equivalent to [let k = new_key () in (k, add k v l)]. *)
41 | val add_key : 'a -> t -> 'a key * t
42 | 
43 | (** [find k l]  returns the latest inserted value with key [k]  in list [l]. The
44 |     exception [Not_found] is raised if there is none. *)
45 | val find : 'a key -> t -> 'a
46 | 
47 | (** [mem k l] tells whether an element is mapped to [k] in the list [l]. *)
48 | val mem : 'a key -> t -> bool
49 | 
50 | (** [remove k l]  removes the latest inserted binding of the key  [k] in [l]. If
51 |     there is no such binding, then [Not_found] is raised. *)
52 | val remove : 'a key -> t -> t
53 | 
54 | (** [replace  k l] replaces  a previous binding if  it exists.  If  two bindings
55 |    existed, only the first is removed to be replaced. *)
56 | val replace : 'a key -> 'a -> t -> t
57 | 
58 | (** [append  l1 l2] concatenate  the two  association lists. Duplicated  are not
59 |     removed. *)
60 | val append : t -> t -> t
61 | 
62 | (** Iterator *)
63 | type iter = { f : 'a. 'a key -> 'a -> unit }
64 | val iter : iter -> t -> unit
65 | 
66 | (** Variation on the  above to associate value of type ['a data]  to key of type
67 |    ['a key]. The  abobe function are obatained with [Make(struct  type 'a data =
68 |    'a end)] *)
69 | module Make(T:sig type 'a data end) :
70 | sig
71 |   type t
72 |   val empty : t
73 | 
74 |   val add : 'a key -> 'a T.data -> t -> t
75 |   val length : t -> int
76 |   val add_key : 'a T.data -> t -> 'a key * t
77 |   val find : 'a key -> t -> 'a T.data
78 |   val mem : 'a key -> t -> bool
79 |   val remove : 'a key -> t -> t
80 |   val replace : 'a key -> 'a T.data -> t -> t
81 |   val append : t -> t -> t
82 | 
83 |   type iter = { f : 'a. 'a key -> 'a T.data -> unit }
84 |   val iter : iter -> t -> unit
85 | end
86 | 


--------------------------------------------------------------------------------
/lib/bench.ml:
--------------------------------------------------------------------------------
  1 | 
  2 | let create () =
  3 |   (* when bench are created, no more compaction to keep it manual *)
  4 |   Gc.(set { (get ()) with max_overhead = 1_000_000 });
  5 |   ref []
  6 | 
  7 | let parse_string bench gram blank s =
  8 |   let _ = Grammar.compile gram in
  9 |   let _ = Gc.compact () in
 10 |   let size = String.length s in
 11 |   let t0 = Unix.gettimeofday () in
 12 |   let r = Grammar.parse_string gram blank s in
 13 |   let t1 = Unix.gettimeofday () in
 14 |   let dw = (Gc.quick_stat ()).heap_words in
 15 |   let dt = t1 -. t0 in
 16 |   bench := (size, dt, float dw) :: !bench;
 17 |   (r, dt, dw)
 18 | 
 19 | let parse_ch bench gram blank size ch =
 20 |   let _ = Grammar.compile gram in
 21 |   let _ = Gc.compact () in
 22 |   let t0 = Unix.gettimeofday () in
 23 |   let r = Grammar.parse_channel gram blank ch in
 24 |   let t1 = Unix.gettimeofday () in
 25 |   let dw = (Gc.quick_stat ()).heap_words in
 26 |   let dt = t1 -. t0 in
 27 |   bench := (size, dt, float dw) :: !bench;
 28 |   (r, dt, dw)
 29 | 
 30 | let parse_fd bench gram blank size fd =
 31 |   let _ = Grammar.compile gram in
 32 |   let _ = Gc.compact () in
 33 |   let t0 = Unix.gettimeofday () in
 34 |   let r = Grammar.parse_fd gram blank fd in
 35 |   let t1 = Unix.gettimeofday () in
 36 |   let dw = (Gc.quick_stat ()).heap_words in
 37 |   let dt = t1 -. t0 in
 38 |   bench := (size, dt, float dw) :: !bench;
 39 |   (r, dt, dw)
 40 | 
 41 | let parse_yacc main lex ch =
 42 |   let _ = Gc.compact () in
 43 |   let t0 = Unix.gettimeofday () in
 44 |   let lexbuf = Lexing.from_channel ch in
 45 |   let r = main lex lexbuf in
 46 |   let t1 = Unix.gettimeofday () in
 47 |   let dw = (Gc.quick_stat ()).heap_words in
 48 |   let dt = t1 -. t0 in
 49 |   (r, dt, dw)
 50 | 
 51 | let parse_pipe bench gram blank size producer =
 52 |   let (fd_in, fd_out) = Unix.pipe () in
 53 |   let pid = Unix.fork () in
 54 |   if pid = 0 then
 55 |     (Unix.close fd_in;
 56 |      let ch_out = Unix.out_channel_of_descr fd_out in
 57 |      let _ = producer ch_out in
 58 |      close_out ch_out;
 59 |      exit 0)
 60 |   else
 61 |     (Unix.close fd_out;
 62 |      (parse_fd bench gram blank size fd_in))
 63 | 
 64 | let yacc_pipe bench entry lex size producer =
 65 |   let (fd_in, fd_out) = Unix.pipe () in
 66 |   let pid = Unix.fork () in
 67 |   if pid = 0 then
 68 |     (Unix.close fd_in;
 69 |      let ch_out = Unix.out_channel_of_descr fd_out in
 70 |      let _ = producer ch_out in
 71 |      close_out ch_out;
 72 |      exit 0)
 73 |   else
 74 |     (Unix.close fd_out;
 75 |      let ch_in = Unix.in_channel_of_descr fd_in in
 76 |      let (res_in, res_out) = Unix.pipe () in
 77 |      (* Parsing.clear_parser () does not seem to work! we fork again! *)
 78 |      let pid = Unix.fork () in
 79 |      if pid = 0 then
 80 |        (Unix.close res_in;
 81 |         let ch_out = Unix.out_channel_of_descr res_out in
 82 |         let res = parse_yacc entry lex ch_in in
 83 |         output_value ch_out res;
 84 |         flush ch_out;
 85 |         close_out ch_out; exit 0)
 86 |      else
 87 |        (Unix.close res_out;
 88 |         let ch_in = Unix.in_channel_of_descr res_in in
 89 |         let ((_,dt,dw) as r) = input_value ch_in in
 90 |         bench := (size, dt, float dw) :: !bench;
 91 |         close_in ch_in;
 92 |         r))
 93 | 
 94 | let size producer =
 95 |   let null = open_out "/dev/null" in
 96 |   let size = producer null in
 97 |   close_out null;
 98 |   size
 99 | 
100 | 
101 | (* ln(C N^e) = c + e ln(N) + f ln(ln(N)) *)
102 | module Base = struct
103 |   type input = int
104 |   let base = [| (fun _ -> 1.0)
105 |               ; (fun n -> log (float (n + 1)))
106 |              |]
107 | end
108 | 
109 | module Interpolate = Interpolate.Make(Base)
110 | 
111 | let corr samples =
112 |   let nb = float (Array.length samples) in
113 |   let sx, sy =
114 |     Array.fold_left (fun (sx,sy) (n,y) ->
115 |         let x = log (float (1+n)) in
116 |         (sx +. x, sy +. y))
117 |       (0.,0.) samples
118 |   in
119 |   let ax  = sx  /. nb in
120 |   let ay  = sy  /. nb in
121 |   let dxy, dx2, dy2 =
122 |     Array.fold_left (fun (dxy,dx2,dy2) (n,y) ->
123 |         let x = log (float (1+n)) in
124 |         ( dxy +. (x -. ax) *. (y -. ay)
125 |         , dx2 +. (x -. ax) *. (x -. ax)
126 |         , dy2 +. (y -. ay) *. (y -. ay)))
127 |       (0.,0.,0.) samples
128 |   in
129 |   dxy /. sqrt (dx2 *. dy2)
130 | 
131 | 
132 | let stats msg bench =
133 |   let open Interpolate in
134 |   let time = Array.of_list (List.map (fun (n,t,_) -> (n, log(t))) !bench) in
135 |   let coefs = compute_coefs time in
136 |   let c = corr time in
137 |   let coefs = get coefs in
138 |   Printf.printf "%s Time ~ %g N^%g (corr: %.2f)\n%!"
139 |     msg (exp coefs.(0)) coefs.(1) c;
140 |   let space = Array.of_list (List.map (fun (n,_,s) -> (n, log(s))) !bench) in
141 |   let coefs = compute_coefs space in
142 |   let c = corr space in
143 |   let coefs = get coefs in
144 |   Printf.printf "%s Space ~ %g N^%g (corr: %.2f)\n%!"
145 |     msg (exp coefs.(0)) coefs.(1) c
146 | 
147 | let csv bench file =
148 |   let replace = String.map (function '.' -> ',' | c -> c) in
149 |   let ch = open_out file in
150 |   let prf ch x =
151 |     output_string ch (replace (Printf.sprintf "%f" x))
152 |   in
153 |   let m = List.length !bench + 3 in
154 |   Printf.fprintf ch "pente(t) corr(t) pente(s) corr(s)\n%!";
155 |   Printf.fprintf ch "N ln(1+N) T ln(T) S ln(S)\n%!";
156 |   Printf.fprintf ch "=pente($D$4:$D$%d;$B$4:$B$%d) "
157 |     m m;
158 |   Printf.fprintf ch "=coefficient.correlation($D$4:$D$%d;$B$4:$B$%d) "
159 |     m m;
160 |   Printf.fprintf ch "=pente($F$4:$F$%d;$B$4:$B$%d) "
161 |     m m;
162 |   Printf.fprintf ch "=coefficient.correlation($F$4:$F$%d;$B$4:$B$%d)\n%!"
163 |     m m;
164 |   List.iter (fun (n, t, s) ->
165 |       Printf.fprintf ch "%d %a %a %a %a %a\n%!" n
166 |         prf (log (float (1 + n)))
167 |         prf t
168 |         prf (log t)
169 |         prf s
170 |         prf (log s)) (List.rev !bench);
171 |   close_out ch
172 | 


--------------------------------------------------------------------------------
/lib/blank.ml:
--------------------------------------------------------------------------------
 1 | (** Functions managing blanks *)
 2 | 
 3 | type buf = Input.buffer
 4 | type idx = Input.idx
 5 | 
 6 | (** A blank function is just a function progressing in a buffer *)
 7 | type blank = buf -> idx -> buf * idx
 8 | type t = blank
 9 | 
10 | (** Use when you have no blank chars *)
11 | let none : blank = fun s n -> (s,n)
12 | 
13 | (** Blank from a charset *)
14 | let from_charset : Charset.t -> blank =
15 |   fun cs s n ->
16 |     let rec fn s n =
17 |       let (c,s',n') = Input.read s n in
18 |       if Charset.mem cs c then fn s' n' else (s,n)
19 |     in
20 |     fn s n
21 | 
22 | (** Blank from a terminal *)
23 | let from_terminal : 'a Lex.t -> blank =
24 |   fun t s n ->
25 |     try
26 |       let (_,s,n) = t.f s n in
27 |       (s,n)
28 |     with Lex.NoParse -> (s,n)
29 | 
30 | (** blank with c++/lisp/shell like comments *)
31 | let line_comments : ?cs:Charset.t -> string -> blank =
32 |   fun ?(cs=Charset.from_string " \t\n\r")  start_comment ->
33 |     let start_comment = (Lex.string start_comment).f in
34 |     fun s n ->
35 |       let rec fn s n =
36 |         let (c,s',n') = Input.read s n in
37 |         if Charset.mem cs c then fn s' n' else
38 |           try
39 |             let (_,s,n) = start_comment s n in
40 |             let rec gn s n =
41 |               let (c,s',n') = Input.read s n in
42 |               if c <> '\n' && c <> '\r' then gn s' n'
43 |               else fn s n
44 |             in
45 |             gn s n
46 |           with Lex.NoParse -> (s,n)
47 |       in
48 |       fn s n
49 | 
50 | (** Layout records *)
51 | 
52 | type layout_config =
53 |   { old_blanks_before : bool
54 |   (** Ignoring blanks with the old blank function before parsing? *)
55 |   ; new_blanks_before : bool
56 |   (** Then ignore blanks with the new blank function (before parsing)? *)
57 |   ; new_blanks_after  : bool
58 |   (** Use the new blank function one last time before resuming old layout? *)
59 |   ; old_blanks_after  : bool
60 |   (** Use then the old blank function one last time as well? *) }
61 | 
62 | let default_layout_config : layout_config =
63 |   { old_blanks_before = true
64 |   ; new_blanks_before = false
65 |   ; new_blanks_after  = false
66 |   ; old_blanks_after  = true }
67 | 


--------------------------------------------------------------------------------
/lib/blank.mli:
--------------------------------------------------------------------------------
 1 | (* {1 Blank: ignoring insignificant characters}
 2 | 
 3 | It provides functions to eliminate "blank" characteres. *)
 4 | 
 5 | 
 6 | (**  Position  in   a  buffer  is  a  [Input.buffer]  together   with  an  index
 7 |     [Input.pos]. *)
 8 | type buf = Input.buffer
 9 | type idx = Input.idx
10 | 
11 | (** A blank function is just a function progressing in a buffer *)
12 | type blank = buf -> idx -> buf * idx
13 | type t     = blank
14 | 
15 | (** {2 Functions managing blanks} *)
16 | 
17 | (** Use when you have no blank chars *)
18 | val none : blank
19 | 
20 | (** Blank from a charset *)
21 | val from_charset : Charset.t -> blank
22 | 
23 | (** Blank from a terminal *)
24 | val from_terminal : 'a Lex.t -> blank
25 | 
26 | (** [line_comments s] Blank with standard spaces and line starting with [s].
27 |     [cs] defaults to [Charset.from_string " \t\n\r"] *)
28 | val line_comments : ?cs:Charset.t -> string -> blank
29 | 
30 | (* {2 records for layout (i.e. blank function) change} *)
31 | 
32 | type layout_config =
33 |   { old_blanks_before : bool
34 |   (** Ignoring blanks with the old blank function before parsing? *)
35 |   ; new_blanks_before : bool
36 |   (** Then ignore blanks with the new blank function (before parsing)? *)
37 |   ; new_blanks_after  : bool
38 |   (** Use the new blank function one last time before resuming old layout? *)
39 |   ; old_blanks_after  : bool
40 |   (** Use then the old blank function one last time as well? *) }
41 | 
42 | (** Default configuration,  parsing with the old blanks before  (i.e., the field
43 |     [old_blanks_before] is  [true]), and the  new blanks after (i.e.,  the field
44 |     [old_blanks_after] is also [true]). The other two fields are [false]. *)
45 | val default_layout_config : layout_config
46 | 


--------------------------------------------------------------------------------
/lib/charset.ml:
--------------------------------------------------------------------------------
  1 | type charset = int array
  2 | type t = charset
  3 | 
  4 | let mask, shift, size =
  5 |   match Sys.word_size with
  6 |   | 32 -> 15, 4, 256 / 16
  7 |   | 64 -> 31, 5, 256 / 32
  8 |   | _  -> assert false (* Cannot happen... *)
  9 | 
 10 | let compare = Stdlib.compare
 11 | let equal = (=)
 12 | 
 13 | let empty = Array.make size 0
 14 | let full  = Array.make size (-1)
 15 | 
 16 | let complement = Array.map ((lxor) (-1))
 17 | 
 18 | let mem cs c =
 19 |   let i = Char.code c in
 20 |   cs.(i lsr shift) land (1 lsl (i land mask)) <> 0
 21 | 
 22 | let addq cs c =
 23 |   let i = Char.code c in
 24 |   cs.(i lsr shift) <- cs.(i lsr shift) lor (1 lsl (i land mask))
 25 | 
 26 | let add cs c =
 27 |   let i = Char.code c in
 28 |   let cs = Array.copy cs in
 29 |   cs.(i lsr shift) <- cs.(i lsr shift) lor (1 lsl (i land mask));
 30 |   cs
 31 | 
 32 | let range cmin cmax =
 33 |   let res = ref empty in
 34 |   for i = Char.code cmin to Char.code cmax do
 35 |     res := add !res (Char.chr i)
 36 |   done; !res
 37 | 
 38 | let delq cs c =
 39 |   let i = Char.code c in
 40 |   cs.(i lsr shift) <- cs.(i lsr shift) land (lnot (1 lsl (i land mask)))
 41 | 
 42 | let del cs c =
 43 |   let i = Char.code c in
 44 |   let cs = Array.copy cs in
 45 |   cs.(i lsr shift) <- cs.(i lsr shift) land (lnot (1 lsl (i land mask)));
 46 |   cs
 47 | 
 48 | let union cs1 cs2 =
 49 |   Array.mapi (fun i x -> x lor cs2.(i)) cs1
 50 | 
 51 | let singleton =
 52 |   let tbl = Array.init 256 (fun i -> add empty (Char.chr i)) in
 53 |   fun c -> tbl.(Char.code c)
 54 | 
 55 | let copy = Array.copy
 56 | 
 57 | let from_string s =
 58 |   let rec build cs l =
 59 |     match l with
 60 |     | []                     -> cs
 61 |     | '-' :: '-' :: _        -> invalid_arg "bad charset description"
 62 |     | '-' :: l               -> build (add cs '-') l
 63 |     | _   :: '-' :: '-' :: _ -> invalid_arg "bad charset description"
 64 |     | c1  :: '-' :: c2  :: l -> build (union cs (range c1 c2)) l
 65 |     | c   :: l               -> build (add cs c) l
 66 |   in
 67 |   let string_to_list s =
 68 |     let l = ref [] in
 69 |     String.iter (fun c -> l := c :: !l) s;
 70 |     List.rev !l
 71 |   in
 72 |   build empty (string_to_list s)
 73 | 
 74 | let show cs =
 75 |   let has_range min max =
 76 |     let has_all = ref true in
 77 |     for i = (Char.code min) to (Char.code max) do
 78 |       if not (mem cs (Char.chr i)) then has_all := false
 79 |     done; !has_all
 80 |   in
 81 |   if cs = full then "<FULL>"
 82 |   else if cs = empty then "<EMPTY>"
 83 |   else
 84 |     let res = ref "" in
 85 |     let add_all min max =
 86 |       for i = min to max do
 87 |         if mem cs (Char.chr i) then
 88 |           res := !res ^ (Char.escaped (Char.chr i))
 89 |       done
 90 |     in
 91 |     let has_all_nums  = has_range '0' '9' in
 92 |     let has_all_upper = has_range 'A' 'Z' in
 93 |     let has_all_lower = has_range 'a' 'z' in
 94 |     (* Before character '0' *)
 95 |     add_all 0 (Char.code '0' - 1);
 96 |     (* Numbers. *)
 97 |     if has_all_nums then res := !res ^ "0-9"
 98 |     else add_all (Char.code '0') (Char.code '9');
 99 |     (* Before character 'A' *)
100 |     add_all (Char.code '9' + 1) (Char.code 'A' - 1);
101 |     (* Uppercase letters. *)
102 |     if has_all_upper then res := !res ^ "A-Z"
103 |     else add_all (Char.code 'A') (Char.code 'Z');
104 |     (* Before character 'a' *)
105 |     add_all (Char.code 'Z' + 1) (Char.code 'a' - 1);
106 |     (* Lowercase letters. *)
107 |     if has_all_lower then res := !res ^ "a-z"
108 |     else add_all (Char.code 'a') (Char.code 'z');
109 |     (* After character 'z'. *)
110 |     add_all (Char.code 'z' + 1) 255;
111 |     !res
112 | 
113 | let pp ff cs =
114 |   Format.pp_print_string ff (show cs)
115 | 
116 | let show_full cs =
117 |   let res = ref "" in
118 |   for i = 0 to 255 do
119 |     if mem cs (Char.chr i) then
120 |       res := !res ^ (Char.escaped (Char.chr i))
121 |   done; !res
122 | 
123 | let pp_full ff cs =
124 |   Format.pp_print_string ff (show_full cs)
125 | 


--------------------------------------------------------------------------------
/lib/charset.mli:
--------------------------------------------------------------------------------
 1 | (** A module providing efficient character sets. *)
 2 | 
 3 | (** {2 Type} *)
 4 | 
 5 | (** The abstract type for a character set. *)
 6 | type charset
 7 | 
 8 | (** Synonym of [charset]. *)
 9 | type t = charset
10 | 
11 | (** {2 Charset construction} *)
12 | 
13 | (** The empty character set. *)
14 | val empty : charset
15 | 
16 | (** The full character set. *)
17 | val full : charset
18 | 
19 | (** [singleton c] returns a charset containing only [c]. *)
20 | val singleton : char -> charset
21 | 
22 | (** [range cmin cmax] returns the charset containing all the  characters
23 |     between [cmin] and [cmax]. *)
24 | val range : char -> char -> charset
25 | 
26 | (** [from_string s] returns the charset corresponding to the description
27 |     string [s],  which may contain standalone characters (different from
28 |     ['-'], which is only allowed as first character) or ranges. They are
29 |     build of start and end characters, separated by ['-']. An example of
30 |     a valid description is ["-_a-zA-Z0-9"]. Note that [Invalid_argument]
31 |     is raised in case of ill-formed description. *)
32 | val from_string : string -> charset
33 | 
34 | (** [union cs1 cs2] builds a new charset that contins the union  of  the
35 |     characters of [cs1] and [cs2]. *)
36 | val union : charset -> charset -> charset
37 | 
38 | (** [complement cs] returns a new charset containing exactly  characters
39 |     that are not in [cs]. *)
40 | val complement : charset -> charset
41 | 
42 | (** [add cs c] returns a new charset containing the characters  of  [cs]
43 |     and the character [c]. *)
44 | val add : charset -> char -> charset
45 | 
46 | (** [del cs c] returns a new charset containing the characters  of  [cs]
47 |     but not the character [c]. *)
48 | val del : charset -> char -> charset
49 | 
50 | (** {2 Membership test} *)
51 | 
52 | (** [mem cs c] tests whether the charset [cs] contains [c]. *)
53 | val mem : charset -> char -> bool
54 | 
55 | (** {2 Printing and string representation} *)
56 | 
57 | (** [pp ff cs] prints the charset [cs] to output formatter [ff]. Compact
58 |     format is used for printing: ranges, full and empty charsets are not
59 |     given in full, but abbreviated. *)
60 | val pp : Format.formatter -> charset -> unit
61 | 
62 | (** [pp_full ff cs] is similar to [pp ff cs], but it does not abbreviate
63 |     ranges, full and empty charsets. *)
64 | val pp_full : Format.formatter -> charset -> unit
65 | 
66 | (** [show oc cs] builds a string representing the charset [cs] using the
67 |     same compact format as [print]. *)
68 | val show : charset -> string
69 | 
70 | (** [show_full oc cs] is the same as [show oc cs] but it  does  not  use
71 |     abreviations (i.e. all characters appear). *)
72 | val show_full : charset -> string
73 | 
74 | (** {2 Manipulating charsets imperatively} *)
75 | 
76 | (** [copy cs] make a copy of the charset [cs]. *)
77 | val copy : charset -> charset
78 | 
79 | (** [addq cs c] adds the character [c] to the charset [cs].  Users  must
80 |     be particularly careful when using this function. In particular,  it
81 |     should not be used directly on [empty], [full] or the result of  the
82 |     [singleton] function as it would change their value permanently.  It
83 |     is advisable to prefer the use of [add] or to work on a [copy]. *)
84 | val addq : charset -> char -> unit
85 | 
86 | (** [delq cs c] deletes the character [c] from the charset [cs]. Similar
87 |     recomendatiosn as for [addq] apply. *)
88 | val delq : charset -> char -> unit
89 | 
90 | (** {2 Comparison and equality test} *)
91 | 
92 | (** [compare cs1 cs2] compares the charsets [cs1] and [cs2] according to
93 |     some (unspecified) total order. *)
94 | val compare : charset -> charset -> int
95 | 
96 | (** [equal cs1 cs2] tests the equality of charsets [cs1] and [cs2]. *)
97 | val equal : charset -> charset -> bool
98 | 


--------------------------------------------------------------------------------
/lib/comb.mli:
--------------------------------------------------------------------------------
  1 | (** {1 Combinator library, using continuation}
  2 | 
  3 |     As usual left recursion is not supported,  but the library is intended to be
  4 |     used  through  the  [Grammar]  module  that  provides  elimination  of  left
  5 |     recursion. However, a  cache combinatr is supported to overcome  the cost of
  6 |     backtracking. *)
  7 | 
  8 | (** {2 function and type usefull to the end-user} *)
  9 | 
 10 | (** The type of combinator *)
 11 | type 'a t
 12 | 
 13 | (** Partial parsing.  Beware, the returned position is not  the maximum position
 14 |     that can be reached by the grammar.  *)
 15 | val partial_parse_buffer : 'a t -> Blank.t -> ?blank_after:bool ->
 16 |         Lex.buf -> Lex.idx -> 'a * Lex.buf * Lex.idx
 17 | 
 18 | (** Returns all possible parse trees.  Usefull for natural languages but also to
 19 |     debug ambiguity in a supposed non ambiguous grammar. If end of input is not
 20 |     parsed in some ways, some value may correspond to only the beginning of the
 21 |     input. Except when debugging or testing, you should rather use cache/merge
 22 |     anyway. *)
 23 | val parse_all_buffer : 'a t -> Blank.t -> Lex.buf -> Lex.idx -> 'a list
 24 | 
 25 | (** {2 combinator constructors, normally not needed by the casual user } *)
 26 | 
 27 | (** [fail] is a parser rejecting every input (it always fails). *)
 28 | val fail : 'a t
 29 | 
 30 | (** Fails and report an error *)
 31 | val error : string list -> 'a t
 32 | 
 33 | val assert_false : 'a t
 34 | 
 35 | (** [empty v] is  a parser that only accepts the empty input  and returns [v] as
 36 |     its semantic value. *)
 37 | val empty : 'a -> 'a t
 38 | 
 39 | (** [lexeme l] is  a parser accepting the lexeme (or  terminal) [l], and returns
 40 |     the corresponding semantic value. *)
 41 | val lexeme : 'a Lex.lexeme -> 'a t
 42 | 
 43 | (**  [seq g1  g2] sequences  the parsers  [g1] and  [g2].  The  resulting parser
 44 |     starts by parsing  using [g1], and then  parses the rest of  the input using
 45 |     [g2]. The result of  parsing with [g2] is then apply to  the result of [g1].
 46 |  *)
 47 | val seq : 'a t -> Charset.t -> ('a -> 'b) t -> 'b t
 48 | 
 49 | (** optimisation of the above with no argument, allows costless right recursion.
 50 |    with no semantics *)
 51 | val iseq : 'a t -> Charset.t -> 'b t -> 'b t
 52 | 
 53 | (** [dseq c1 c2] is a dependant sequence, contrary to [seq c1 c2], the
 54 |     combinator used to parse after [c1] depends upon the first value returned by
 55 |     [c1]. It is a good idea to memoize the function c2.  The separation of ['a]
 56 |     and ['b] in the smeantics of [g1] allows to depend on the smallest set of
 57 |     possible vaue which is important in case of memoisation. *)
 58 | val dseq: ('a * 'b) t -> ('a -> ('b -> 'c) t)  -> 'c t
 59 | 
 60 | (** optimisation of the above with no argument, allows costless right recursion.
 61 |     with no semantics *)
 62 | val diseq: 'a t -> ('a -> 'b t)  -> 'b t
 63 | 
 64 | (** Combinator parsing with the first combinator and in case of failure with the
 65 |     second from  the same  position.  The optionnal  charset corresponds  to the
 66 |     charaters accepted at the beginning of  the input for each combinators.  The
 67 |     charset  must be  Charset.full if  the corresponding  combinator accept  the
 68 |     empty input *)
 69 | val alt : (Charset.t * 'a t) list -> 'a t
 70 | 
 71 | (** [option a ~cs  c] is an optimisation for [alt (empty a)  ~cs c].  In fact it
 72 |     is better to use [alt] with grammar  not accepting empty and use [option] to
 73 |     deal with an empty case *)
 74 | val option: 'a -> Charset.t -> 'a t -> 'a t
 75 | 
 76 | (** Parses with the given combinator and transforms the semantics with the given
 77 |     function *)
 78 | val appl : 'a t -> ('a -> 'b) -> 'b t
 79 | 
 80 | (** Replace the semantics of the given combinator *)
 81 | val repl : 'a t -> 'b -> 'b t
 82 | 
 83 | val lazy_ : 'a t -> 'a lazy_t t
 84 | val force : 'a lazy_t t -> 'a t
 85 | 
 86 | (** unmerge  a merged ambiguous  grammar, typically if  the rest of  the parsing
 87 |    uses dependant sequences. Allows for ambiguous terminals by return a list and
 88 |    the using unmerge. *)
 89 | val unmerge : 'a list t -> 'a t
 90 | 
 91 | (** Parses  as the given  combinator and  give the position  to the left  of the
 92 |     parsing input as argument to the action *)
 93 | val left_pos : (Pos.spos -> 'a) t -> 'a t
 94 | 
 95 | (** Same as above with the position to the right *)
 96 | val right_pos : (Pos.spos -> 'a) t -> 'a t
 97 | 
 98 | (** To eliminate left  recursion, lpos has to be left factored.   if lpos is one
 99 |     single combinator, this adds a lot of closures in action code. To solve this
100 |     problem, lpos is  splitted in two combinators, one that  pushes the position
101 |     to a stack and pops after parsing and another that reads the position. *)
102 | val read_pos : (Pos.spos -> 'a) t -> 'a t
103 | 
104 | (** key used by lr below *)
105 | type 'a key = 'a Assoc.key
106 | 
107 | (** [lr c1 v c2] is an optimized version  of [let rec r = seq c1 (seq r c2)]
108 |     which is  illegal as it  is left recursive  and loops. The  optional charset
109 |     indicates the characteres accepted by [c2] at the beginning of input. [v] is
110 |     like variable bound in [c2], see [read_tbl] below *)
111 | val lr : 'a t -> 'a key -> Charset.t -> 'a t -> 'a t
112 | 
113 | (** Same as above, but also store the position *)
114 | val lr_pos : 'a t -> 'a key -> Charset.t -> 'a t -> 'a t
115 | 
116 | (** type to represent the left prefix of a mutually recursive grammar.
117 |     the key represents the produced grammar for each left prefix. *)
118 | type mlr_left =
119 |   LNil : mlr_left
120 | | LCns : 'a key * Charset.t * 'a t * mlr_left -> mlr_left
121 | 
122 | (** type of the suffix to be repeted in a mutually recursive grammar.
123 |     the first key represents the grammar that parsed the input before
124 |     the second key represents the produced grammar.
125 | 
126 |     Somehow, mlr_right is a matrix R, the two keys being the index of
127 |     the coefficient and mlr_left is a vector L. Parsing, will somehow use
128 |     L R^n for n large enough;
129 | *)
130 | type mlr_right =
131 |   RNil : mlr_right
132 | | RCns : 'a key * 'b key * Charset.t * 'b t * mlr_right -> mlr_right
133 | 
134 | (** The combinator itself. The optionnal argument indicated that we need
135 |     the position before parsing *)
136 | val mlr : ?lpos:bool -> mlr_left -> mlr_right -> 'a key -> 'a t
137 | 
138 | (** combinator to  access the value stored by  lr. It must be uses  as prefix of
139 |     [c2] in [lr c1 c2].  For instance, the coding  of [let rec r = seq c1 (seq r
140 |     c2)] is [let k = Assoc.new_key () in lr c1 k (seq (read_tbl k) c2)]. Here we
141 |     ommited the actions.  This way of  coding left recursion avoids to transform
142 |     the action and  produce closure. The code for elimination  of left recursion
143 |     is also much simpler *)
144 | val read_tbl : 'a key -> 'a t
145 | 
146 | (** Allow to test the blank characteres before a grammar and more *)
147 | val test_before : (Lex.buf -> Lex.idx -> Lex.buf -> Lex.idx -> bool)
148 |                  -> 'a t -> 'a t
149 | 
150 | (** Allow to test the blank characteres after a grammar and more *)
151 | val test_after : ('a -> Lex.buf -> Lex.idx -> Lex.buf -> Lex.idx -> bool)
152 |                  -> 'a t -> 'a t
153 | 
154 | (** Access to a reference to a combinator, used by Grammar.compile for recursive
155 |     grammars (not for left recursion *)
156 | val deref : 'a t ref -> 'a t
157 | 
158 | (** Change the blank  function used to parse with the  given combinator.  we can
159 |     choose which blank to use at the boundary with the optional parameters. *)
160 | val change_layout : ?config:Blank.layout_config -> Blank.t -> 'a t -> 'a t
161 | 
162 | (** Combinator  that caches  a grammar to  avoid exponential  behavior.  parsing
163 |     with the grammar  from each position is memoized to  avoid parsing twice the
164 |     same sequence with the same grammar. *)
165 | type 'a merge = infos:Input.infos -> start:Input.byte_pos -> end_:Input.byte_pos
166 |                 -> 'a -> 'a -> 'a
167 | 
168 | val cache : ?merge:'a merge -> 'a t -> 'a t
169 | 


--------------------------------------------------------------------------------
/lib/container.ml:
--------------------------------------------------------------------------------
  1 | (** Standard eq-type. *)
  2 | type ('a,'b) eq =
  3 |   | Y : ('a,'a) eq
  4 |   | N : ('a,'b) eq
  5 | 
  6 | (** GADT to represent types in the syntax (extended when needed). *)
  7 | type _ tag = ..
  8 | 
  9 | module Make(V : sig type ('a,'b) elt end) = struct
 10 |   include V
 11 | 
 12 |   (** Non-uniform list (containing elements of possibly different types). *)
 13 |   type ptag = T : 'a tag -> ptag [@@unboxed]
 14 | 
 15 |   type 'b etag = E : 'a tag * ('a,'b) elt -> 'b etag
 16 | 
 17 |   type 'b nu_list = (ptag, 'b etag) Hashtbl.t
 18 | 
 19 | 
 20 |   (** Actual container. *)
 21 |   type 'a container =
 22 |     { mutable data : 'a nu_list (** Contents for each table. *)
 23 |     ;         uid  : 'a tag     (** Unique identifier.     *)
 24 |     ;         eq   : 'b. 'b tag -> ('a,'b) eq}
 25 | 
 26 |   (** Creation function for containers. *)
 27 |   let create : type a. unit -> a container =
 28 |     fun () ->
 29 |       let module M = struct type _ tag += T : a tag end in
 30 |       let eq : type b. b tag -> (a, b) eq = function M.T -> Y | _ -> N in
 31 |       {data = Hashtbl.create 16; uid = M.T; eq }
 32 | 
 33 |   (** Obtain the UID of a container. *)
 34 |   let address : 'b container -> 'b tag = fun c -> c.uid
 35 | 
 36 |   (** Equality on containers *)
 37 |   let eq : 'a container -> 'b container -> ('a, 'b) eq =
 38 |     fun c1 c2 -> c1.eq c2.uid
 39 | 
 40 |   (** unboxed mandatory for weak hashtbl to work, from 4.04.0 *)
 41 |   type any = C : 'b container -> any [@@unboxed]
 42 | 
 43 |   (** Container table. *)
 44 |   type 'a table =
 45 |     { tag  : 'a tag                   (** Unique tag for this table.   *)
 46 |     ; eq   : 'b. 'b tag -> ('a,'b) eq (** Equality to the table's tag. *)
 47 |     ; mutable elts : any list
 48 |     }
 49 | 
 50 |   (* Find the value associated to the given table and container. *)
 51 |   let find : type a b. a table -> b container -> (a, b) elt =
 52 |     fun tab c ->
 53 |       match Hashtbl.find c.data (T tab.tag) with E(t,v) ->
 54 |         match tab.eq t with
 55 |         | Y -> v
 56 |         | N -> assert false
 57 | 
 58 |   (** Insert a new value associated to the given table and container. If a
 59 |     value is already present, it is overwriten. *)
 60 |   let add : type a b. a table -> b container -> (a, b) elt -> unit =
 61 |     fun tab c v ->
 62 |       let mem = Hashtbl.mem c.data (T tab.tag) in
 63 |       Hashtbl.replace c.data (T tab.tag) (E(tab.tag, v));
 64 |       if not mem then tab.elts <- C c :: tab.elts
 65 | 
 66 |   let clear : type a. a table -> unit = fun tab ->
 67 |     List.iter (fun (C c) -> Hashtbl.remove c.data (T tab.tag)) tab.elts;
 68 |     tab.elts <- []
 69 | 
 70 |   let create_table : type a. unit -> a table = fun () ->
 71 |     let module M = struct type _ tag += T : a tag end in
 72 |     let eq : type b. b tag -> (a, b) eq = function M.T -> Y | _ -> N in
 73 |     let res = { tag  = M.T ; eq ; elts = [] } in
 74 |     Gc.finalise clear res;
 75 |     res
 76 | 
 77 |   let length : type a. a table -> int = fun tab ->
 78 |     let n = ref 0 in
 79 |     let fn : type b. b nu_list -> unit = fun data ->
 80 |       Hashtbl.iter (fun (T t) _ ->
 81 |           match tab.eq t with
 82 |           | Y -> incr n;
 83 |           | N -> assert false) data
 84 |     in
 85 |     List.iter (fun (C c) -> fn c.data) tab.elts;
 86 |     !n
 87 | 
 88 |   type 'a iter = { f : 'b.('a, 'b) elt -> unit }
 89 | 
 90 |   let iter : type a. a iter -> a table -> unit = fun f tab ->
 91 |     let fn : type b. b nu_list -> unit = fun data ->
 92 |       Hashtbl.iter (fun _ (E (t,v)) ->
 93 |           match tab.eq t with
 94 |           | Y -> f.f v;
 95 |           | N -> assert false) data
 96 |     in
 97 |     List.iter (fun (C c) -> fn c.data) tab.elts
 98 | 
 99 |   type ('a,'c) fold = { f : 'b.('a, 'b) elt -> 'c -> 'c }
100 | 
101 |   let fold : type a c. (a, c) fold -> a table -> c -> c = fun f tab acc ->
102 |     let fn : type b. b nu_list -> c -> c = fun data acc ->
103 |       Hashtbl.fold (fun _ (E (t,v)) acc ->
104 |           match tab.eq t with
105 |           | Y -> f.f v acc
106 |           | N -> assert false) data acc
107 |     in
108 |     List.fold_left (fun acc (C c) -> fn c.data acc) acc tab.elts
109 | 
110 | end
111 | 
112 | module type Param = sig
113 |   type 'a table
114 |   type 'b container
115 |   type ('a, 'b) elt
116 |   val create : unit -> 'b container
117 |   val create_table : unit -> 'a table
118 |   val address : 'b container -> 'b tag
119 |   val eq : 'a container -> 'b container -> ('a, 'b) eq
120 |   val add : 'a table -> 'b container -> ('a, 'b) elt -> unit
121 |   val find : 'a table -> 'b container -> ('a, 'b) elt
122 |   val clear : 'a table -> unit
123 |   val length : 'a table -> int
124 |   type 'a iter = { f : 'b.('a, 'b) elt -> unit }
125 |   val iter : 'a iter -> 'a table -> unit
126 |   type ('a,'c) fold = { f : 'b.('a, 'b) elt -> 'c -> 'c }
127 |   val fold : ('a, 'c) fold -> 'a table -> 'c -> 'c
128 | end
129 | 
130 | type ('a, 'b) el = 'a
131 | include Make(struct type ('a, 'b) elt = ('a, 'b) el end)
132 | 
133 | (* redefine iter and fold, to avoid the useless record but also to avoid
134 |    https://caml.inria.fr/mantis/view.php?id=7636
135 |  *)
136 | let iter : type a. (a -> unit) -> a table -> unit = fun f tab ->
137 |   let fn : type b. b nu_list -> unit = fun data ->
138 |   Hashtbl.iter (fun _ (E (t,v)) ->
139 |       match tab.eq t with
140 |       | Y -> f v;
141 |       | N -> assert false) data
142 |   in
143 |   List.iter (fun (C c) -> fn c.data) tab.elts
144 | 
145 | let fold : type a c. (a -> c -> c) -> a table -> c -> c = fun f tab acc ->
146 |   let fn : type b. b nu_list -> c -> c = fun data acc ->
147 |     Hashtbl.fold (fun _ (E (t,v)) acc ->
148 |       match tab.eq t with
149 |       | Y -> f v acc
150 |       | N -> assert false) data acc
151 |   in
152 |   List.fold_left (fun acc (C c) -> fn c.data acc) acc tab.elts
153 | 
154 | type ('a, 'b) le = 'b
155 | module Ref = Make(struct type ('a, 'b) elt = ('a, 'b) le end)
156 | 
157 | (** Exported name for [container]. *)
158 | type t = unit container
159 | 
160 | let eq c1 c2 = match eq c1 c2 with Y -> true | N -> false
161 | 
162 | (* This does not work !
163 | let iter : type a.(a -> unit) -> a table -> unit =
164 |   fun f tabl ->
165 |     iter { f = (let f : type b.(a, b) el -> unit = f in f) } tab
166 | *)
167 | 


--------------------------------------------------------------------------------
/lib/container.mli:
--------------------------------------------------------------------------------
  1 | (** This library provide a type [Container.t], which can be used as a
  2 |     list of polymorphic references.
  3 | 
  4 |     Another way to see it, is map with access time in O(N) where
  5 |     N is the number of tables! Note: O(1) is possible, but usually
  6 |     there are very few tables at the same time.
  7 | 
  8 |     The typical use case is to have a record with a field of type
  9 |     [Container.t]. Then when you want to store some information in
 10 |     that field of type [a], you create with [Container.create_table],
 11 |     a value of type [a Container.table]. Then with [Container.add]
 12 |     and [Container.find], you can store value in your field of
 13 |     type [Container.t].
 14 | 
 15 |     More precisely, consider the following type for oriented graphs:
 16 | {[
 17 |     type node = { name: string;
 18 |                   mutable next: node list;
 19 |                   ptrs : Container.t }
 20 |     type graph = node list (* at least on node per component *)
 21 | ]}
 22 |     If you want to traverse the graphe, you create a table
 23 |     to associate a boolean to each node:
 24 | {[
 25 |     let iter graph f =
 26 |       let visited : bool Container.table = Container.create_table 101 in
 27 |       ....
 28 |       (* the table is automatically freed when visited is collected *)
 29 | ]}
 30 |     If you want to compute the distance between two nodes:
 31 | {[
 32 |     let distance a b =
 33 |       let distance_to_a : int Container.table = Container.create_table 101 in
 34 |       ....
 35 | ]}
 36 | 
 37 |     The functorial interface is useful when you have a parametric
 38 |     type.  Considier a record type [ 'b t ] to which you want to
 39 |     associate values of type [ ('a,'b) v ]. It is enough for this to
 40 |     call the functor with
 41 | 
 42 |     [ module M = Constainer.Make(struct type ('a,'b) v end) ]
 43 | 
 44 |     and have a field of type [ 'b M.container ] inside your record.
 45 |     Then, you use the type [ 'a M.table ] when you want to start to
 46 |     associate values of type [ ('a,'b) v ] to the record. The same
 47 |     module [ M ] can be used for many types [ 'a ].
 48 | 
 49 |     Remark: the non funtorial version is just defined by:
 50 |       [ Container.Make(struct type ('a, 'b) elt = 'a end ]
 51 |  *)
 52 | 
 53 | (** Type of a container cell *)
 54 | type t
 55 | 
 56 | (** equality on container *)
 57 | val eq : t -> t -> bool
 58 | 
 59 | (** Type of a container table. You must create a container table of
 60 |     type [ a table ] to store value of type [ a ] in a container
 61 |     cell. Many table can have the same type, the value are associated
 62 |     to the pair (cell, table), not just to the cell. *)
 63 | type 'a table
 64 | 
 65 | (** [ create () ] creates a new container cell *)
 66 | val create : unit -> t
 67 | 
 68 | (** [ add tab cell v ] associates the value v to the pair (tab, cell).
 69 |     complexity if O(N) where N is the number of tables with a value
 70 |     associated to this cell. *)
 71 | val add : 'a table -> t -> 'a -> unit
 72 | 
 73 | (** [ find tab cell ] return the value associated to (tab, cell).
 74 |     raises Not_found if the are no such value *)
 75 | val find : 'a table -> t -> 'a
 76 | 
 77 | (** [ clear tab ] removed all value associated to a table. *)
 78 | val clear : 'a table -> unit
 79 | 
 80 | (** [ create_table () ] creates a new table *)
 81 | val create_table : unit -> 'a table
 82 | 
 83 | (** GADT to represent types in the syntax (extended when needed). *)
 84 | type _ tag = ..
 85 | 
 86 | (** [ address n ] return a unique id of each cell *)
 87 | val address   : t -> unit tag
 88 | 
 89 | (** iterator as in List module *)
 90 | val iter : ('a -> unit) -> 'a table -> unit
 91 | 
 92 | (** fold as in List module *)
 93 | val fold : ('a -> 'c -> 'c) -> 'a table -> 'c -> 'c
 94 | 
 95 | (** definition of the [elt] type *)
 96 | type ('a, 'b) elt = 'a
 97 | 
 98 | (** Standard eq-type. *)
 99 | type ('a,'b) eq =
100 |   | Y : ('a,'a) eq
101 |   | N : ('a,'b) eq
102 | 
103 | (** functorial verstion storing element of type ('a, 'b) elt for container of
104 |    the 'b and table of type 'a *)
105 | module type Param = sig
106 |   type 'a table
107 |   type 'b container
108 |   type ('a, 'b) elt
109 |   val create : unit -> 'b container
110 |   val create_table : unit -> 'a table
111 |   val address : 'b container -> 'b tag
112 |   val eq : 'a container -> 'b container -> ('a, 'b) eq
113 |   val add : 'a table -> 'b container -> ('a, 'b) elt -> unit
114 |   val find : 'a table -> 'b container -> ('a, 'b) elt
115 |   val clear : 'a table -> unit
116 |   val length : 'a table -> int
117 |   type 'a iter = { f : 'b.('a, 'b) elt -> unit }
118 |   val iter : 'a iter -> 'a table -> unit
119 |   type ('a,'c) fold = { f : 'b.('a, 'b) elt -> 'c -> 'c }
120 |   val fold : ('a, 'c) fold -> 'a table -> 'c -> 'c
121 | end
122 | 
123 | module Make(T : sig type ('a,'b) elt end) : Param
124 |        with type ('a, 'b) elt = ('a,'b) T.elt
125 | 
126 | (** useful particular case, when container are used for graphs or union
127 |     find and you associate containers to containers *)
128 | module Ref : Param with type ('a,'b) elt = 'b
129 | 


--------------------------------------------------------------------------------
/lib/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |   (name pacomb)
 3 |   (public_name pacomb)
 4 |   (modules :standard)
 5 |   (preprocess (pps pacomb.ppx))
 6 |   (libraries stdlib-shims unix)
 7 |   (private_modules container hashtbl_eq assoc comb unionFind heap interpolate))
 8 | 
 9 | (documentation
10 |   (package pacomb))
11 | 


--------------------------------------------------------------------------------
/lib/grammar.mli:
--------------------------------------------------------------------------------
  1 | (** {1 Main module of Pacomb} *)
  2 | 
  3 | (** {2 Type} *)
  4 | 
  5 | (** type of a grammar with semantical action of type ['a ].*)
  6 | type 'a grammar
  7 | 
  8 | (** An abbreviation *)
  9 | type 'a t = 'a grammar
 10 | 
 11 | type name_kind = Created | Inherited | Given
 12 | 
 13 | type name = string * name_kind
 14 | 
 15 | (** {2 Grammar contructors} *)
 16 | 
 17 | (** All  construœqctors can be  given an optional  [name] argument that  is used
 18 |     when printing the grammar. *)
 19 | 
 20 | (** [print_grammar ch g] prints the grammar [g] of the given output channel.  if
 21 |     [def=false] (the  default is [true])  it will print the  transformed grammar
 22 |     prior to compilation. *)
 23 | val print_grammar : ?no_other:bool -> ?def:bool -> out_channel ->
 24 |                     'a grammar -> unit
 25 | 
 26 | (** [fail ()] is a grammar that parses nothing (always fails) *)
 27 | val fail : ?name:string -> unit -> 'a grammar
 28 | 
 29 | (** fails reporting an error *)
 30 | val error : ?name:string -> string list -> 'a grammar
 31 | 
 32 | (** [empty a] accepts the empty input and returns [a] *)
 33 | val empty : ?name:string -> 'a -> 'a grammar
 34 | 
 35 | (** [cond  b] is  [if b then  empty ()  else fail ()].  Very usefull  in grammar
 36 |     family at the beginning of a rule. The test is done at grammar construction,
 37 |     not at parsing time (except if it is used in a dependant grammar). *)
 38 | val cond : ?name:string -> bool -> unit grammar
 39 | 
 40 | (** [term  t] accepts the  terminal [t] and  returns its semantics.   See module
 41 |     [Lex] *)
 42 | val term : ?name:string -> 'a Lex.terminal -> 'a grammar
 43 | 
 44 | (** [appl g f] parses with [g] and apply [f] to the resulting semantics *)
 45 | val appl : ?name:string -> 'a grammar -> ('a -> 'b) -> 'b grammar
 46 | 
 47 | (** [unmerge g] introduce multiple parse branch from a list of semantics.
 48 |     Allows to create ambiguous terminals and allows fr unmerge to continue
 49 |     a dependent parsing *)
 50 | val unmerge : ?name:string -> 'a list grammar -> 'a grammar
 51 | 
 52 | val lazy_ : ?name:string -> 'a grammar -> 'a lazy_t grammar
 53 | val force : ?name:string -> 'a lazy_t grammar -> 'a grammar
 54 | 
 55 | (** [alt [g1;g2;...;gn]] parses with [g1] and if it fails then [g2] and so on *)
 56 | val alt : ?name:string -> 'a grammar list -> 'a grammar
 57 | 
 58 | (** [seq g1 g2]  parses with [g1] and then with [g2] for  the rest of the input,
 59 |     combine both semantics by apply the semantics of [g2] to [g1] *)
 60 | val seq : ?name:string -> 'a grammar -> ('a -> 'b) grammar -> 'b grammar
 61 | 
 62 | (** variation  of the abover when  we do not  use all semantics, it  allows cost
 63 |    less right recursion with no semantics *)
 64 | val iseq : ?name:string -> 'a grammar -> 'b grammar -> 'b grammar
 65 | 
 66 | (** [dseq  g1 g2)] is  a dependant sequence, the  grammar [g2] used  after  [g1]
 67 |     may depend  upon the semantics  of [g1]. This is  not very efficient  as the
 68 |     grammar [g2] must be compiled at  parsing time.  [g2] is memoized by default
 69 |     to partially overcome this fact. *)
 70 | val dseq : ?name:string -> ('a * 'b) grammar
 71 |            -> ('a -> ('b -> 'c) grammar) -> 'c grammar
 72 | 
 73 | (** variation  of the abover when  we do not  use all semantics, it  allows cost
 74 |    less right recursion with no semantics *)
 75 | val diseq : ?name:string -> 'a grammar -> ('a -> 'b grammar) -> 'b grammar
 76 | 
 77 | (** [lpos  g] is identical  to [g] but passes  the position just  before parsing
 78 |     with [g] to the semantical action of [g] *)
 79 | val lpos : ?name:string -> (Pos.spos -> 'a) grammar -> 'a grammar
 80 | 
 81 | (** [rpos g] is identical to [g] but passes the position just after parsing with
 82 |     [g] to the semantical action of [g] *)
 83 | val rpos : ?name:string -> (Pos.spos -> 'a) grammar -> 'a grammar
 84 | 
 85 | val mk_pos : ?name:string -> (Pos.pos -> 'a) grammar  -> 'a grammar
 86 | 
 87 | (** variants of seq with the position of the first iterm *)
 88 | val seq_pos : ?name:string -> 'a grammar -> (Pos.pos * 'a -> 'b) grammar
 89 |               -> 'b grammar
 90 | 
 91 | (** variants of dseq with the position of the first iterm *)
 92 | val dseq_pos  : ?name:string -> ('a * 'b) grammar
 93 |                -> ('a -> (Pos.pos * 'b -> 'c) grammar) -> 'c grammar
 94 | 
 95 | (** type for a merge function *)
 96 | type 'a merge = infos:Input.infos -> start:Input.byte_pos -> end_:Input.byte_pos
 97 |                 -> 'a -> 'a -> 'a
 98 | 
 99 | (** [cache  g] avoids to parse twice  the same input  with [g] by  memoizing the
100 |     result of  the first parsing. The  optional [merge] parameter is  applied to
101 |     group semantics corresponding  to the same part of the  input. Using [cache]
102 |     with [merge] allows to recover a polynomial time complexity (cubic at worst)
103 |     and a quadratic space (in the size of the input) *)
104 | val cache : ?name:string -> ?merge:'a merge -> 'a grammar -> 'a grammar
105 | 
106 | (** [set_debug_merge fmt] Will produce a debugging message for all ambiguities
107 |     including the possition. *)
108 | val set_debug_merge : Format.formatter -> unit
109 | val unset_debug_merge : unit -> unit
110 | 
111 | (** allows to perform a test, the test function receive the position before
112 |     and after the blanks *)
113 | val test_before : ?name:string
114 |                   -> (Lex.buf -> Lex.idx -> Lex.buf -> Lex.idx -> bool)
115 |                  -> 'a grammar -> 'a grammar
116 | 
117 | val test_after : ?name:string
118 |                  -> ('a -> Lex.buf -> Lex.idx -> Lex.buf -> Lex.idx -> bool)
119 |                  -> 'a grammar -> 'a grammar
120 | 
121 | (** particular cases of the above testing the absence of blanks. *)
122 | val no_blank_before : ?name:string -> 'a grammar -> 'a grammar
123 | val no_blank_after  : ?name:string -> 'a grammar -> 'a grammar
124 | 
125 | (** [layout b g] changes the blank  function to parse the input with the grammar
126 |     [g].  The optional parameters allow to  control which blanks are used at the
127 |     boundary.  Both  can be used  in which case the  new blanks are  used second
128 |     before parsing with [g] and first after. *)
129 | val layout : ?name:string -> ?config:Blank.layout_config
130 |              -> Blank.t -> 'a grammar -> 'a grammar
131 | 
132 | (** usual option/star/plus combinator *)
133 | val option : ?name:string -> 'a grammar -> 'a option grammar
134 | val default_option : ?name:string -> 'a -> 'a grammar -> 'a grammar
135 | val star : ?name:string -> 'a grammar -> 'a list grammar
136 | val plus : ?name:string -> 'a grammar -> 'a list grammar
137 | val star_sep : ?name:string -> 'b grammar -> 'a grammar -> 'a list grammar
138 | val plus_sep : ?name:string -> 'b grammar -> 'a grammar -> 'a list grammar
139 | 
140 | (** {2 Definition of recursive grammars } *)
141 | 
142 | (** to  define recursive grammars,  one may declare  the grammar first  and then
143 |     gives its value.   [declare_grammar name] creates an  undefined grammar with
144 |     the given name *)
145 | val declare_grammar : string -> 'a grammar
146 | 
147 | (** [set_grammar g1  g2] set the value of [g1]  declared with [declare_grammar].
148 |     will   raise   [Invalid_argument]   if    [g1]   was   not   defined   using
149 |     [declare_grammar] or if it was already set.*)
150 | val set_grammar : 'a grammar -> 'a grammar -> unit
151 | 
152 | (** [fixpoint g] compute  the fixpoint of [g], that is a  grammar [g0] such that
153 |     [g0 = g g0] *)
154 | val fixpoint : ?name:string -> ('a grammar -> 'a grammar) -> 'a grammar
155 | 
156 | (** [grammar_family to_str name] returns a  pair [(gs, set_gs)], where [gs] is a
157 |     finite  family of  grammars parametrized  by a  value of  type ['a].  A name
158 |     [name] is to  be provided for the family, and  an optional function [to_str]
159 |     can be provided to print the parameter and display better error messages. *)
160 | val grammar_family : ?param_to_string:('a -> string) -> string
161 |   -> ('a -> 'b grammar) * (('a -> 'b grammar) -> unit)
162 | 
163 | (**
164 |    {[
165 |    (* Declare the grammar family *)
166 |    let (gr, set_gr) = grammar_family to_str name in
167 | 
168 |    ... code using grammars of gr to define mutually recursive grammars ...
169 | 
170 |    (* Define the grammar family *)
171 |    let _ = set_gr the_grammars
172 | 
173 |    ... now the new family can be used ...
174 |    ]}
175 | *)
176 | 
177 | (** {2 Compilation of a grammar and various} *)
178 | 
179 | (**  [compile g]  produces a  combinator that  can be  used to  actually do  the
180 |     parsing see the [Comb] module *)
181 | val compile : 'a grammar -> 'a Comb.t
182 | 
183 | (** gives the grammar name *)
184 | val grammar_name : 'a grammar -> string
185 | 
186 | (** allows to rename a grammar *)
187 | val give_name : string -> 'a grammar -> 'a grammar
188 | 
189 | (** Parse a whole input buffer. the eof combinator is added at
190 |     the end of the given combinator *)
191 | val parse_buffer : 'a grammar -> Blank.t -> ?offset:Lex.idx -> Lex.buf -> 'a
192 | 
193 | (** Partial parsing.  Beware, the returned  position is not the maximum position
194 |     that can  be reached  by the grammar  if the grammar  is ambiguous.  In this
195 |     case, a message is printed on  stderr. The charset is the character accepted
196 |     at  the  end of  input.  Mainly  useful  with  'eof' when  [blank_after]  is
197 |     [true]. *)
198 | val partial_parse_buffer : 'a grammar -> Blank.t -> ?blank_after:bool ->
199 |                           ?offset:Lex.idx -> Lex.buf -> 'a * Lex.buf * Lex.idx
200 | 
201 | (** Returns all possible parse trees.  Usefull for natural languages but also to
202 |     debug ambiguity in a supposed non ambiguous grammar. *)
203 | val parse_all_buffer : 'a grammar -> Blank.t ->
204 |                        ?offset:Lex.idx -> Lex.buf -> 'a list
205 | 
206 | (**  Parse a  whole string,  reporting position  according to  utf8 if  optional
207 |     argument [utf8] is given and [Utf8.UTF8 or Utf8.CJK_UTF8] *)
208 | val parse_string  : ?utf8:Utf8.context -> 'a grammar -> Blank.t -> string -> 'a
209 | 
210 | (**  Parse a  whole  input  channel, reporting  postiion  according  to utf8.
211 |      After closing the file position reporting by parsing cannot be transformed
212 |      bash to line/column number. *)
213 | val parse_channel : ?utf8:Utf8.context -> ?filename:string ->
214 |                     'a grammar -> Blank.t -> in_channel -> 'a
215 | 
216 | (**  Parse a  whole  Unix.file_desc, reporting  postiion  according  to utf8.
217 |      After closing the file position reporting by parsing cannot be transformed
218 |      bash to line/column number. *)
219 | val parse_fd : ?utf8:Utf8.context -> ?filename:string ->
220 |                'a grammar -> Blank.t -> Unix.file_descr -> 'a
221 | 
222 | (**  Parse a  whole  file, reporting  postiion  according  to utf8.
223 |      File is reopen to read position. So the file should not change on disk *)
224 | val parse_file : ?utf8:Utf8.context ->
225 |                     'a grammar -> Blank.t -> string -> 'a
226 | 


--------------------------------------------------------------------------------
/lib/hashtbl_eq.ml:
--------------------------------------------------------------------------------
  1 | (** [eq_closure] is an alternative to the polymorphic equality function [(=)],
  2 |     that compares closures using [(==)] instead of failing. Note that equality
  3 |     testing is consequently not perfect. *)
  4 | let eq_closure : type a. a -> a -> bool = fun v1 v2 ->
  5 |   (* We remember encountered values in [eq_done] to handle cyclicity. *)
  6 |   let eq_done : (Obj.t * Obj.t) list ref = ref [] in
  7 |   let rec eq : Obj.t -> Obj.t -> bool = fun v1 v2 ->
  8 |     (* Physical equality is tested first. *)
  9 |     v1 == v2 ||
 10 |     (* We then look at tags, and unfold potential forward blocks. *)
 11 |     let t1 = Obj.tag v1 in
 12 |     if t1 = Obj.forward_tag then eq (Obj.field v1 0) v2 else
 13 |     let t2 = Obj.tag v2 in
 14 |     if t2 = Obj.forward_tag then eq v1 (Obj.field v2 0) else
 15 |     (* Tags must otherwise be the same to have equality. *)
 16 |     t1 == t2 &&
 17 |     (* Strings, doubles and arrays of doubles are compared using [=]. *)
 18 |     if t1 = Obj.string_tag then v1 = v2 else
 19 |     if t1 = Obj.double_tag then v1 = v2 else
 20 |     if t1 = Obj.double_array_tag then v1 = v2 else
 21 |     (* For everything that is not a non-constant constructors, equality failed
 22 |     at this point (e.g., for closures or sealed values).  Such values are only
 23 |     considered equal if physical equality succeeds (it was tested already). *)
 24 |     Obj.first_non_constant_constructor_tag <= t1 &&
 25 |     t1 <= Obj.last_non_constant_constructor_tag &&
 26 |     Obj.size v1 == Obj.size v2 && (* Number of fields should be equal. *)
 27 |     (* We recursively explore the fields. *)
 28 |     let rec fn = function
 29 |       | (v1', v2')::l ->
 30 |          begin
 31 |            match (v1 == v1', v2 == v2') with
 32 |            | (true , true ) -> true
 33 |            | (true , false) -> false
 34 |            | (false, true ) -> false
 35 |            | (_    , _    ) -> fn l
 36 |          end
 37 |       | []            ->
 38 |          let rec gn i =
 39 |            i < 0 || (eq (Obj.field v1 i) (Obj.field v2 i) && gn (i-1))
 40 |          in
 41 |          eq_done := (v1, v2) :: !eq_done; gn (Obj.size v1 - 1)
 42 |     in
 43 |     fn !eq_done
 44 |   in
 45 |   eq (Obj.repr v1) (Obj.repr v2)
 46 | 
 47 | type ('a, 'b) t =
 48 |   { eq_key             : 'a -> 'a -> bool
 49 |   (** Equality function for keys. *)
 50 |   ; mutable nb_buckets : int
 51 |   (** Number of buckets. *)
 52 |   ; mutable buckets    : ('a * 'b) list array
 53 |   (** Array of buckets. *)
 54 |   ; mutable max_size   : int
 55 |   (** Current maximum bucket size. *)
 56 |   ; mutable size_limit : int
 57 |   (** Maximum size allowed for a bucket. *) }
 58 | 
 59 | (** Create an empty hash table. *)
 60 | let create : ?eq_key:('a -> 'a -> bool) -> int -> ('a, 'b) t =
 61 |     fun ?(eq_key=eq_closure) nb_buckets ->
 62 |   let rec log2 n = if n <= 0 then 0 else 1 + log2 (n lsr 1) in
 63 |   let nb_buckets = max nb_buckets 8 in
 64 |   let buckets = Array.make nb_buckets [] in
 65 |   let size_limit = log2 nb_buckets + 7 in
 66 |   { eq_key ; nb_buckets ; buckets ; max_size = 0 ; size_limit }
 67 | 
 68 | (** Iterates a function over the bindings of the given hash table. *)
 69 | let iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit = fun f htbl ->
 70 |   Array.iter (List.iter (fun (k,v) -> f k v)) htbl.buckets
 71 | 
 72 | (** Finds the bucket corresponding to the given key in the hash table. *)
 73 | let find_bucket : ('a, 'b) t -> 'a -> int = fun htbl k ->
 74 |   Hashtbl.hash k mod htbl.nb_buckets
 75 | 
 76 | (** Lookup function. *)
 77 | let find : ('a, 'b) t -> 'a -> 'b = fun h k ->
 78 |   let i = find_bucket h k in
 79 |   let rec find = function
 80 |     | []            -> raise Not_found
 81 |     | (kv, v) :: xs -> if h.eq_key k kv then v else find xs
 82 |   in
 83 |   find h.buckets.(i)
 84 | 
 85 | (** Insertion function (replacing existing binding). *)
 86 | let rec add : ('a, 'b) t -> 'a -> 'b -> unit = fun h k v ->
 87 |   (* Doubles the size of the hash table. *)
 88 |   let grow : ('a, 'b) t -> unit = fun h ->
 89 |     let old_tbl = h.buckets in
 90 |     h.nb_buckets <- h.nb_buckets * 2;
 91 |     h.buckets <- Array.make h.nb_buckets [];
 92 |     h.size_limit <- h.size_limit + 1;
 93 |     h.max_size <- 0;
 94 |     Array.iter (List.iter (fun (k,v) -> add h k v)) old_tbl
 95 |   in
 96 |   (* Removes existing binding, or returns size of bucket with exception. *)
 97 |   let exception Size_is of int in
 98 |   let rec remove sz l =
 99 |     match l with
100 |     | []     -> raise (Size_is sz)
101 |     | b :: l -> if h.eq_key k (fst b) then l else b :: remove (sz+1) l
102 |   in
103 |   (* Find the right bucket and replace the binding (if any). *)
104 |   let i = find_bucket h k in
105 |   try h.buckets.(i) <- (k,v) :: remove 0 h.buckets.(i) with Size_is(sz) ->
106 |   (* Otherwise insert the new binding. *)
107 |   h.buckets.(i) <- (k,v) :: h.buckets.(i);
108 |   h.max_size <- max h.max_size sz;
109 |   (* Grow the table if the bucket is too large. *)
110 |   if h.max_size > h.size_limit then grow h
111 | 


--------------------------------------------------------------------------------
/lib/hashtbl_eq.mli:
--------------------------------------------------------------------------------
 1 | (** Custom hash-table module. *)
 2 | 
 3 | (** The [Hashtbl] module (of the standard library) does not work when keys can
 4 |     contain closures, since they are compared with polymorphic equality [(=)].
 5 |     For memoization though, using a perfect equality test is not important. In
 6 |     other words, it does not really matter if the equality test produces false
 7 |     negatives when comparing closures. We thus use an alternative, polymorphic
 8 |     equality function that behaves as [(=)], but compares closures with  using
 9 |     [Marshall.to_string] instead of failing. *)
10 | 
11 | (** Representation of a hash table with keys of type ['a] and elements of type
12 |     ['b], with a custom equality test. *)
13 | type ('a, 'b) t
14 | 
15 | (** [create ~eq_key n] creates an empty hash table with initial size n (if [n]
16 |     is not reasonable then a default value is used),  and using [eq_key] as an
17 |     equality test for keys. If no [eq_key] is given, the function described in
18 |     the preamble of this module is used. *)
19 | val create : ?eq_key:('a -> 'a -> bool) -> int -> ('a, 'b) t
20 | 
21 | (** [add htbl k v] extends the hash table [htbl] with a new binding of key [k]
22 |     to value [v]. Any previous binding for key [k] is removed. *)
23 | val add : ('a, 'b) t -> 'a -> 'b -> unit
24 | 
25 | (** [find htbl k] returns the value maped to [k] in the hash table [htbl]. The
26 |     exception [Not_found] is raised if there is no such binding. *)
27 | val find : ('a, 'b) t -> 'a -> 'b
28 | 
29 | (** [iter f htbl] calls [f k v] for every binding of a key [k] to a value [v],
30 |     in the hash table [htbl]. The order of iteration is unspecified. *)
31 | val iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit
32 | 


--------------------------------------------------------------------------------
/lib/heap.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | let empty = []
 3 | 
 4 | type 'a t = 'a list list
 5 | 
 6 | let add cmp x h =
 7 |   let rec fn acc h = match h with
 8 |     | [] -> List.rev_append acc [[x]]
 9 |     | []::_ -> assert false
10 |     | (y::_ as l)::h' ->
11 |        match cmp x y with
12 |        | 0 -> List.rev_append acc ((x::l) :: h')
13 |        | n when n < 0 -> List.rev_append acc ([x]:: h)
14 |        | _ -> fn (l::acc) h'
15 |   in
16 |   fn [] h
17 | 
18 | let remove l = match l with
19 |   | [] -> raise Not_found
20 |   | [x]::h -> x, h
21 |   | (x::l)::h -> (x, (l::h))
22 |   | []::_ -> assert false
23 | 


--------------------------------------------------------------------------------
/lib/heap.mli:
--------------------------------------------------------------------------------
 1 | 
 2 | (** type of a priority heap, holding elements of type 'a *)
 3 | type 'a t
 4 | 
 5 | (** empty heap *)
 6 | val empty : 'a t
 7 | 
 8 | (** [add cmp x h] add [x] to the heap [h] with the priority [p] *)
 9 | val add : ('a -> 'a -> int) -> 'a -> 'a t -> 'a t
10 | 
11 | (** [remove h] returns an element with  the least priority from the heap and the
12 |    resulting heap. Semantics is unspecified for equal priorities *)
13 | val remove : 'a t -> ('a * 'a t)
14 | 


--------------------------------------------------------------------------------
/lib/input.ml:
--------------------------------------------------------------------------------
  1 | type context = Utf8.context
  2 | 
  3 | type stream_infos =
  4 |   | File of { name               : string
  5 |             ; length             : int
  6 |             ; date               : float
  7 |             ; close              : unit -> unit
  8 |             }
  9 |   | String of string
 10 |   | Stream
 11 | 
 12 | let stream_infos_of_fd file_name fd close_file =
 13 |   let open Unix in
 14 |   let s = fstat fd in
 15 |   File { name       = file_name
 16 |        ; length     = s.st_size
 17 |        ; date       = s.st_mtime
 18 |        ; close      = close_file }
 19 | 
 20 | let stream_infos_of_ch file_name ch =
 21 |   stream_infos_of_fd file_name (Unix.descr_of_in_channel ch)
 22 |     (fun () -> close_in ch)
 23 | 
 24 | let stream_infos_of_str str =
 25 |   String str
 26 | 
 27 | type infos =
 28 |   { utf8         : Utf8.context (** Uses utf8 for positions                 *)
 29 |   ; stream_infos : stream_infos (** The name of the buffer (e.g. file name) *)
 30 |   ; uid          : int          (** Unique identifier                       *)
 31 |   }
 32 | 
 33 | let stream_infos infos = infos.stream_infos
 34 | 
 35 | let filename infos = match infos.stream_infos with
 36 |   | File { name } -> name
 37 |   | _             -> ""
 38 | 
 39 | let utf8 infos = infos.utf8
 40 | 
 41 | type buffer =
 42 |   { boff         : int    (* Offset to the line ( bytes )            *)
 43 |   ; data         : string (* Contents of the buffer                  *)
 44 |   ; mutable next : buffer Lazy.t (* Following line                   *)
 45 |   ; mutable ctnr : Container.t option array
 46 |                           (* for map table, initialized if used      *)
 47 |   ; infos        : infos  (* infos common to the whole file          *)
 48 |   }
 49 | 
 50 | let close buffer =
 51 |   match buffer.infos.stream_infos with
 52 |   | File f -> f.close ()
 53 |   | _ -> ()
 54 | 
 55 | 
 56 | (* Generate a unique identifier. *)
 57 | let new_uid =
 58 |   let c = ref 0 in
 59 |   fun () -> let uid = !c in incr c; uid
 60 | 
 61 | (** infos function *)
 62 | let infos b = b.infos
 63 | 
 64 | let phantom_infos =
 65 |   { utf8         = Utf8.ASCII
 66 |   ; stream_infos = Stream
 67 |   ; uid          = new_uid ()
 68 |   }
 69 | 
 70 | 
 71 | (** idx type and constant *)
 72 | type idx = int
 73 | 
 74 | let init_idx = 0
 75 | 
 76 | (** byte_pos type and constant *)
 77 | type byte_pos = int
 78 | 
 79 | let int_of_byte_pos x = x
 80 | 
 81 | let init_byte_pos = 0
 82 | 
 83 | let phantom_byte_pos = -1
 84 | 
 85 | (** spos type  and constant *)
 86 | type spos = infos * byte_pos
 87 | 
 88 | let phantom_spos = (phantom_infos, phantom_byte_pos)
 89 | 
 90 | (* Emtpy buffer. *)
 91 | let empty_buffer infos boff =
 92 |   let rec line = lazy
 93 |     { boff; data = "" ; next = line ; infos ; ctnr = [||] }
 94 |   in Lazy.force line
 95 | 
 96 | let is_eof b = b.data = ""
 97 | 
 98 | let llen b = String.length b.data
 99 | 
100 | (* Test if a buffer is empty. *)
101 | let rec is_empty l idx =
102 |   if idx < llen l then false
103 |   else if idx = 0 then is_eof l
104 |   else is_empty (Lazy.force l.next) (idx - llen l)
105 | 
106 | (* Read the character at the given position in the given buffer. *)
107 | let [@inline] rec read l i =
108 |   if i < llen l then (l.data.[i], l     , i+1)
109 |   else if is_eof l then ('\255', l, 0)
110 |   else read (Lazy.force l.next) (i - llen l)
111 | 
112 | (* Get the character at the given position in the given buffer. *)
113 | let [@nline] rec get l i =
114 |   if i < llen l then l.data.[i] else
115 |   if is_eof l then '\255' else
116 |   get (Lazy.force l.next) (i - llen l)
117 | 
118 | (* substring of a buffer *)
119 | let sub b i len =
120 |   let s = Bytes.create len in
121 |   let rec fn b i j =
122 |     if j = len then Bytes.unsafe_to_string s
123 |     else
124 |       let (c,b,i) = read b i in
125 |       Bytes.set s j c;
126 |       fn b i (j+1)
127 |   in
128 |   fn b i 0
129 | 
130 | (* byte position *)
131 | let [@inline] byte_pos b p = b.boff + p
132 | 
133 | (* short position *)
134 | let [@inline] spos b p = (b.infos, b.boff + p)
135 | 
136 | (* Equality of buffers. *)
137 | let buffer_equal b1 b2 =
138 |   b1.infos.uid = b2.infos.uid && b1.boff = b2.boff
139 | 
140 | (* Comparison of buffers. *)
141 | let buffer_compare b1 b2 =
142 |   match b1.boff - b2.boff with
143 |   | 0 -> b1.infos.uid - b2.infos.uid
144 |   | c -> c
145 | 
146 | (* Get the unique identifier of the buffer. *)
147 | let buffer_uid b = b.infos.uid
148 | 
149 | 
150 | exception NoLineNorColumnNumber
151 | 
152 | 
153 | let buf_size = 0x10000
154 | 
155 | (* returns [(s,nl)] with [nl = true] iff there is a newline at the end of [s] *)
156 | let input_buffer ch =
157 |   let res = Bytes.create buf_size in
158 |   let n = input ch res 0 buf_size in
159 |   if n = 0 then (* n = 0: we are at EOF *)
160 |     raise End_of_file
161 |   else if n = buf_size then
162 |     Bytes.unsafe_to_string res
163 |   else
164 |     Bytes.sub_string res 0 n
165 | 
166 | let fd_buffer fd =
167 |   let res = Bytes.create buf_size in
168 |   let n = Unix.read fd res 0 buf_size in
169 |   if n = 0 then (* n = 0: we are at EOF *)
170 |     raise End_of_file
171 |   else if n = buf_size then
172 |     Bytes.unsafe_to_string res
173 |   else
174 |     Bytes.sub_string res 0 n
175 | 
176 | let from_fun utf8 stream_infos get_line file =
177 |   let infos = { utf8; stream_infos; uid = new_uid () } in
178 |   let cont boff =
179 |     empty_buffer infos boff
180 |   in
181 |   let rec  fn boff =
182 |     begin
183 |       (* Tail rec exception trick to avoid stack overflow. *)
184 |       try
185 |         let data = get_line file in
186 |         let llen = String.length data in
187 |         fun () ->
188 |         { boff; data ; infos
189 |           ; next = lazy (fn (boff + llen))
190 |           ; ctnr = [||] }
191 |       with End_of_file ->
192 |         fun () -> cont boff
193 |     end ()
194 |   in
195 |   fn 0
196 | 
197 | let from_channel
198 |     : ?utf8:context -> ?filename:string -> in_channel -> buffer =
199 |   fun ?(utf8=Utf8.ASCII) ?(filename="") ch ->
200 |   let filename = stream_infos_of_ch filename ch in
201 |   from_fun utf8 filename input_buffer ch
202 | 
203 | let from_fd
204 |     : ?utf8:context -> ?filename:string -> Unix.file_descr -> buffer =
205 |   fun ?(utf8=Utf8.ASCII) ?(filename="") fd ->
206 |   let filename = stream_infos_of_fd filename fd (fun () -> Unix.close fd) in
207 |   from_fun utf8 filename fd_buffer fd
208 | 
209 | let from_file : ?utf8:context -> string -> buffer =
210 |   fun ?(utf8=Utf8.ASCII) filename ->
211 |   let fd = Unix.(openfile filename [O_RDONLY] 0) in
212 |   let filename = stream_infos_of_fd filename fd (fun () -> Unix.close fd) in
213 |   from_fun utf8 filename fd_buffer fd
214 | 
215 | let from_string : ?utf8:context -> string -> buffer =
216 |   fun ?(utf8=Utf8.ASCII) str ->
217 |   let stream_infos = stream_infos_of_str str in
218 |   let b = ref true in
219 |   let string_buffer () =
220 |     if !b then (b := false; str) else raise End_of_file
221 |   in
222 |   from_fun utf8 stream_infos string_buffer ()
223 | 
224 | let leq_buf {boff = b1} i1 {boff = b2} i2 =
225 |   b1 < b2 || (b1 = b2 && (i1 <= i2))
226 | 
227 | let buffer_before b1 i1 b2 i2 = leq_buf b1 i1 b2 i2
228 | 
229 | (** Table to associate value to positions in input buffers *)
230 | module Tbl = struct
231 |   type 'a t = 'a Container.table
232 | 
233 |   let create = Container.create_table
234 | 
235 |   let ctnr buf idx =
236 |     if buf.ctnr = [||] then
237 |       buf.ctnr <- Array.make (llen buf + 1) None;
238 |     let a = buf.ctnr.(idx) in
239 |     match a with
240 |     | None -> let c = Container.create () in buf.ctnr.(idx) <- Some c; c
241 |     | Some c -> c
242 | 
243 |   let add tbl buf idx x =
244 |     Container.add tbl (ctnr buf idx) x
245 | 
246 |   let find tbl buf idx =
247 |     Container.find tbl (ctnr buf idx)
248 | 
249 |   let clear = Container.clear
250 | 
251 |   let iter : type a. a t -> (a -> unit) -> unit = fun tbl f ->
252 |     Container.iter f tbl
253 | 
254 | end
255 | 


--------------------------------------------------------------------------------
/lib/input.mli:
--------------------------------------------------------------------------------
  1 | (** A module providing efficient input buffers with preprocessing. *)
  2 | 
  3 | (** {2 Type} *)
  4 | 
  5 | (** Information for a file for reopening *)
  6 | type stream_infos =
  7 |   (* regular file, that can be reopened *)
  8 |   | File of { name               : string
  9 |             ; length             : int
 10 |             ; date               : float
 11 |             ; close              : unit -> unit
 12 |             }
 13 |   (* string *)
 14 |   | String of string
 15 |   (* stream: no detailed position available *)
 16 |   | Stream
 17 | 
 18 | (** The abstract type for an input buffer. *)
 19 | type buffer
 20 | 
 21 | (** Type of fixed data attached to the buffer (like file name) *)
 22 | type infos
 23 | 
 24 | (** return the [infos] associated to a buffer *)
 25 | val infos : buffer -> infos
 26 | 
 27 | (** dummy infos *)
 28 | val phantom_infos : infos
 29 | 
 30 | (** returns the [stream_infos] stored in [infos] *)
 31 | val stream_infos : infos -> stream_infos
 32 | 
 33 | (** returns the filename if it exists, the empty string otherwise *)
 34 | val filename : infos -> string
 35 | 
 36 | (** [utf8 infos] return the unicode context in use for this file *)
 37 | val utf8 : infos -> Utf8.context
 38 | 
 39 | (** The abstract type position relative to the current buffer *)
 40 | type idx
 41 | 
 42 | (** position at the beginning of a buffer *)
 43 | val init_idx : idx
 44 | 
 45 | (** The abstract position relative to the beginning of buffer *)
 46 | type byte_pos
 47 | 
 48 | (** convert byte_pos to natural number *)
 49 | val int_of_byte_pos : byte_pos -> int
 50 | 
 51 | (** zero *)
 52 | val init_byte_pos : byte_pos
 53 | 
 54 | (** dummy value, to initiaize references for instance *)
 55 | val phantom_byte_pos : byte_pos
 56 | 
 57 | (** Short (and quick) type for positions *)
 58 | type spos = infos * byte_pos
 59 | 
 60 | (** dummy value, to initiaize references for instance *)
 61 | val phantom_spos : spos
 62 | 
 63 | (** {2 Reading from a buffer} *)
 64 | 
 65 | (** [read buf idx] returns the character  at position [idx] in the buffer [buf],
 66 |    together with the new buffer and position. Read infinitely many '\255' at end
 67 |    of buffer *)
 68 | val read : buffer -> idx -> char * buffer * idx
 69 | 
 70 | (** [sub b  i len] returns [len]  characters from position [idx]. If  the end of
 71 |    buffer is reached, the string is filed with eof '\255' *)
 72 | val sub : buffer -> idx -> int -> string
 73 | 
 74 | (**  [get  buf idx]  returns  the  character at  position  [idx]  in the  buffer
 75 |    [buf]. *)
 76 | val get : buffer -> idx -> char
 77 | 
 78 | (** {2 Creating a buffer} *)
 79 | 
 80 | type context = Utf8.context
 81 | 
 82 | (** [from_file fn] returns a buffer constructed using the file [fn].
 83 | 
 84 |     If [utf8] is  [Utf8.UTF8] or [Utf8.CJK_UTF8] ([Utf8.ASCII]  is the default),
 85 |    positions are reported according to [utf8]. [read] is still reading bytes.
 86 | 
 87 |     Getting line  number and column number  requires rescanning the file  and if
 88 |    the file  is not a regular  file, it is  kept in memory. Setting  [rescan] to
 89 |    false avoid this, but only byte position and file name will be available.  *)
 90 | val from_file : ?utf8:context -> string -> buffer
 91 | 
 92 | (** [from_channel ~filename  ch] returns a buffer constructed  using the channel
 93 |     [ch]. The optional [filename] is only used  as a reference to the channel in
 94 |     error messages.
 95 | 
 96 |     [uft8] and [rescan] as in [from_file]. *)
 97 | val from_channel : ?utf8:context -> ?filename:string
 98 |                    -> in_channel -> buffer
 99 | 
100 | (** Same as above for file descriptor *)
101 | val from_fd : ?utf8:context -> ?filename:string
102 |               -> Unix.file_descr -> buffer
103 | 
104 | (** For buffer represented by file, this will close the file *)
105 | val close: buffer -> unit
106 | 
107 | (** [from_string  ~filename str] returns  a buffer constructed using  the string
108 |     [str]. The optional [filename] is only used as a reference to the channel in
109 |     error messages. *)
110 | val from_string : ?utf8:context -> string -> buffer
111 | 
112 | (** {2 Buffer manipulation functions} *)
113 | 
114 | (** [is_empty buf] test whether the buffer [buf] is empty. *)
115 | val is_empty : buffer -> int -> bool
116 | 
117 | exception NoLineNorColumnNumber
118 | 
119 | (** position in bytes, regardless to utf8 *)
120 | val byte_pos : buffer -> idx -> byte_pos
121 | 
122 | (**  get spos  from  buffer and  idx,  to  get line_num  and  col_num if  needed
123 |    later. *)
124 | val spos : buffer -> idx -> spos
125 | 
126 | (** [buffer_uid buf]  returns a unique identifier. [Input.read]  does not change
127 |    the uid. The uid is created when creating the initial buffer. *)
128 | val buffer_uid : buffer -> int
129 | 
130 | (** [buffer_eq b1 b2] tests the equality of [b1] and [b2]. *)
131 | val buffer_equal : buffer -> buffer -> bool
132 | 
133 | (** [buffer_compare b1 b2] compares [b1] and [b2]. *)
134 | val buffer_compare : buffer -> buffer -> int
135 | 
136 | (** [buffer_before b1 i1 b2 i2] returns  true if the position [b1, i1] is before
137 |     [b2, i2]. Gives meaningless result if [b1] and [b2] do not refer to the same
138 |     file, i.e. do not have the same uid. *)
139 | val buffer_before : buffer -> int -> buffer -> int -> bool
140 | 
141 | (** Table  to associate value to  positions in input buffers.  The complexity of
142 |     access in the table is O(ln(N)) where N is the number of tables. *)
143 | module Tbl : sig
144 |   type 'a t
145 | 
146 |   val create : unit -> 'a t
147 | 
148 |   val add : 'a t -> buffer -> idx -> 'a -> unit
149 | 
150 |   val find : 'a t -> buffer -> idx -> 'a
151 | 
152 |   val clear : 'a t -> unit
153 | 
154 |   val iter : 'a t -> ('a -> unit) -> unit
155 | end
156 | 


--------------------------------------------------------------------------------
/lib/interpolate.ml:
--------------------------------------------------------------------------------
  1 | 
  2 | let solve mat vector =
  3 |   let dim = Array.length vector in
  4 | 
  5 |   for i = 0 to dim - 2 do
  6 | 
  7 |     let pivot, pivot_val =
  8 |       let r = ref (-1, 0.0) in
  9 |       for j = i to dim - 1 do
 10 |         let v = abs_float mat.(j).(i) in
 11 |         if v > snd !r then r := (j, v)
 12 |       done;
 13 |       !r
 14 |     in
 15 |     if pivot = -1 then failwith "non invertible";
 16 |     for j = i to dim-1 do
 17 |       let v = mat.(pivot).(j) in
 18 |       mat.(pivot).(j) <- mat.(i).(j);
 19 |       mat.(i).(j) <- v
 20 |     done;
 21 |     let v = vector.(pivot) in
 22 |     vector.(pivot) <- vector.(i);
 23 |     vector.(i) <- v;
 24 | 
 25 |     for j = i+1 to dim-1 do
 26 |       let v = mat.(j).(i) in
 27 |       mat.(j).(i) <- 0.0;
 28 |       for k = i+1 to dim-1 do
 29 |         mat.(j).(k) <- mat.(j).(k) -. v *. mat.(i).(k) /. pivot_val
 30 |       done;
 31 |       vector.(j) <- vector.(j) -. v *. vector.(i) /. pivot_val
 32 |     done;
 33 | 
 34 |   done;
 35 | 
 36 |   let r = Array.copy vector in
 37 | 
 38 |   for i = dim - 1 downto 0 do
 39 |     for j = i + 1 to dim - 1 do
 40 |       r.(i) <- r.(i) -. r.(j) *. mat.(i).(j)
 41 |     done;
 42 |     r.(i) <- r.(i) /. mat.(i).(i)
 43 |   done;
 44 | 
 45 |   r
 46 | 
 47 | type 'a base = ('a -> float) array
 48 | 
 49 | module type Base = sig
 50 |   type input
 51 |   val base : input base
 52 | end
 53 | 
 54 | module type Interpolation = sig
 55 |   type input
 56 |   type interpolation
 57 | 
 58 |   val get : interpolation -> float array
 59 | 
 60 |   val zero : interpolation
 61 | 
 62 |   val compute : interpolation -> input -> float
 63 | 
 64 |   val compute_coefs : (input * float) array -> interpolation
 65 | 
 66 |   val error :  (input * float) array -> interpolation -> float
 67 | 
 68 |   val print : out_channel -> interpolation -> unit
 69 | end
 70 | 
 71 | module Make(B:Base) = struct
 72 | 
 73 |   let funs = B.base
 74 |   let dim = Array.length funs
 75 | 
 76 |   let get x = x
 77 | 
 78 |   let zero = Array.make dim 0.0
 79 | 
 80 |   type input = B.input
 81 |   type interpolation = float array
 82 | 
 83 |   let compute (coefs:float array) (x:input) =
 84 |     let r = ref 0.0 in
 85 |     for i = 0 to dim - 1 do
 86 |       r := !r +. coefs.(i) *. funs.(i)(x)
 87 |     done;
 88 |     !r
 89 | 
 90 |   let compute_coefs (samples:(input * float) array) =
 91 |     let ns = Array.length samples in
 92 |     let m i j = funs.(i)(fst samples.(j)) in
 93 |     let v j = snd samples.(j) in
 94 |     let a =
 95 |       Array.init dim (fun i ->
 96 |       Array.init dim (fun i' ->
 97 |                    let r = ref 0.0 in
 98 |                    for j = 0 to ns - 1 do
 99 |                      r := !r +. m i j *. m i' j
100 |                    done;
101 |                  !r))
102 |     in
103 |     let b =
104 |       Array.init dim (fun i ->
105 |                    let r = ref 0.0 in
106 |                    for j = 0 to ns - 1 do
107 |                      r := !r +. m i j *. v j
108 |                    done;
109 |                    !r)
110 |     in
111 |     solve a b
112 | 
113 |   let print ch a =
114 |     Printf.fprintf ch "(";
115 |     for i = 0 to dim - 1 do
116 |       Printf.fprintf ch "%s%e" (if i > 0  then ", " else "") a.(i)
117 |     done;
118 |     Printf.fprintf ch ")"
119 | 
120 |   let error (samples:(input * float) array) coefs =
121 |     let nb  = float (Array.length samples) in
122 |     let error = Array.fold_left (fun e (n, x) ->
123 |                     let dx = (x -. compute coefs n) /. x in
124 |                     e +. dx *. dx) 0.0  samples
125 |     in
126 |     sqrt (error /. nb)
127 | 
128 | end
129 | 


--------------------------------------------------------------------------------
/lib/interpolate.mli:
--------------------------------------------------------------------------------
 1 | 
 2 | type 'a base = ('a -> float) array
 3 | 
 4 | module type Base = sig
 5 |   type input
 6 |   val base : input base
 7 | end
 8 | 
 9 | module type Interpolation = sig
10 |   type input
11 |   type interpolation
12 | 
13 |   val get : interpolation -> float array
14 | 
15 |   val zero : interpolation
16 | 
17 |   val compute : interpolation -> input -> float
18 | 
19 |   val compute_coefs : (input * float) array -> interpolation
20 | 
21 |   val error :  (input * float) array -> interpolation -> float
22 | 
23 |   val print : out_channel -> interpolation -> unit
24 | end
25 | 
26 | module Make(B:Base) : Interpolation with type input = B.input
27 | 


--------------------------------------------------------------------------------
/lib/keywords.ml:
--------------------------------------------------------------------------------
 1 | module type Spec =
 2 |   sig
 3 |     val id_charset : Charset.t
 4 |     val reserved : string list
 5 |   end
 6 | 
 7 | let keyword_uid = ref 0
 8 | 
 9 | module Make(S : Spec) =
10 |   struct
11 |     let reserved = Word_list.create ()
12 | 
13 |     let uid = incr keyword_uid; !keyword_uid
14 | 
15 |     let mem : string -> bool = fun s ->
16 |       Word_list.mem_ascii reserved s
17 | 
18 |     let reserve : string -> unit = fun s ->
19 |       try Word_list.add_ascii reserved s ()
20 |       with Word_list.Already_bound -> invalid_arg "already reserved"
21 | 
22 |     let _ = List.iter reserve S.reserved
23 | 
24 |     let check : string -> unit = fun s ->
25 |       if mem s then Lex.give_up ()
26 | 
27 |     let special : string -> unit Grammar.t = fun s ->
28 |       if s = "" then invalid_arg "empty word";
29 |       let fn str pos =
30 |         let str = ref str in
31 |         let pos = ref pos in
32 |         for i = 0 to String.length s - 1 do
33 |           let (c, str', pos') = Input.read !str !pos in
34 |           if c <> s.[i] then Lex.give_up ();
35 |           str := str'; pos := pos'
36 |         done;
37 |         let c = Input.get !str !pos in
38 |         if Charset.mem S.id_charset c then Lex.give_up ();
39 |         ((), !str, !pos)
40 |       in
41 |       let n = Printf.sprintf "%S" s in
42 |       Grammar.term { n; f = fn ; a = Keyword(s,uid);
43 |                      c = Charset.singleton s.[0] }
44 | 
45 |     let create : string -> unit Grammar.t = fun s ->
46 |       if mem s then invalid_arg "keyword already defined";
47 |       reserve s; special s
48 |   end
49 | 


--------------------------------------------------------------------------------
/lib/keywords.mli:
--------------------------------------------------------------------------------
 1 | (** Signature of a module for keyword properties. *)
 2 | module type Spec =
 3 |   sig
 4 |     (** [id_charset] contains the characters that are not allowed to  directly
 5 |         follow a keyword (because it would make it an identifier for example).
 6 |         In general, it should correspond to characters that are can be used in
 7 |         identifiers. *)
 8 |     val id_charset : Charset.t
 9 | 
10 |     (** [reserved] is a liste of words that must be rejected as identifiers as
11 |         if they were keywords. Note that if a word is in the list, it will not
12 |         be possible to define it as a keyword. *)
13 |     val reserved : string list
14 |   end
15 | 
16 | module Make(S : Spec) :
17 |   sig
18 |     (** [reserve s] reserves the word [s] as if it was a keyword,  although it
19 |         will not be defined as one. The exception [Invalid_argument] is raised
20 |         if the word [s] is already reserved. *)
21 |     val reserve : string -> unit
22 | 
23 |     (** [mem s] tests whether [s] is a reserved word or not. *)
24 |     val mem : string -> bool
25 | 
26 |     (** [check s] calls [Earley.give_up ()] if [s] is a reserved word.  It can
27 |         be used to reject keywords while parsing identifiers for example. *)
28 |     val check : string -> unit
29 | 
30 |     (** [create s] reserves the keyword [s] and returns a parser accepting the
31 |         string [s], not followed by a character of [S.id_charset]. In the case
32 |         where [s] is already reserved, [Invalid_argument] is raised. *)
33 |     val create : string -> unit Grammar.t
34 | 
35 |     (** [special s] accpets the same input as [create s], but the word [s]  is
36 |         not reserved. *)
37 |     val special : string -> unit Grammar.t
38 |   end
39 | 


--------------------------------------------------------------------------------
/lib/lex.mli:
--------------------------------------------------------------------------------
  1 | (** {1 Lexing: grouping characters before parsing}
  2 | 
  3 |     It is traditionnal to do parsing  in two phases (scanning/parsing).  This is
  4 |     not necessary with combinators in  general (scannerless). This is still true
  5 |     with Pacomb.  However, this makes the  grammar more readable to use a lexing
  6 |     phase.
  7 | 
  8 |     Moreover,  lexing is  often done  with  a longuest  match rule  that is  not
  9 |     semantically equivalent to the semantics of context free grammar.
 10 | 
 11 |     This modules  provide combinator  to create terminals  that the  parser will
 12 |     call.
 13 | 
 14 |  *)
 15 | 
 16 | (** {2 Types and exception} *)
 17 | 
 18 | (**  Position  in   a  buffer  is  a  [Input.buffer]  together   with  an  index
 19 |     [Input.pos]. *)
 20 | type buf = Input.buffer
 21 | type idx = Input.idx
 22 | 
 23 | (** Type of terminal function, similar to blank, but with a returned value *)
 24 | type 'a lexeme = buf -> idx -> 'a * buf * idx
 25 | 
 26 | (** ast for terminals, needed for equality *)
 27 | type _ ast =
 28 |   | Any : char ast
 29 |   | Any_utf8 : Uchar.t ast
 30 |   | Any_grapheme : string ast
 31 |   | Eof : unit ast
 32 |   | Char : char -> unit ast
 33 |   | Grapheme : string -> unit ast
 34 |   | String : string -> unit ast
 35 |   | Nat : int ast
 36 |   | Int : int ast
 37 |   | Float : float ast
 38 |   | CharLit : char ast
 39 |   | StringLit : string ast
 40 |   | Test : (char -> bool) -> char ast
 41 |   | NotTest : (char -> bool) -> unit ast
 42 |   | Seq : 'a t * 'b t * ('a -> 'b -> 'c) * 'c Assoc.key -> 'c ast
 43 |   | Alt : 'a t * 'a t -> 'a ast
 44 |   | Save : 'a t * (string -> 'a -> 'b) * 'b Assoc.key -> 'b ast
 45 |   | Option : 'a * 'a t -> 'a ast
 46 |   | Appl : 'a t * ('a -> 'b) * 'b Assoc.key -> 'b ast
 47 |   | Star : 'a t * (unit -> 'b) * ('b -> 'a -> 'b) * 'b Assoc.key -> 'b ast
 48 |   | Plus : 'a t * (unit -> 'b) * ('b -> 'a -> 'b) * 'b Assoc.key -> 'b ast
 49 |   | Sub : 'a t * ('a -> bool) * 'a Assoc.key -> 'a ast
 50 |   | Keyword : string * int -> unit ast
 51 |   | Custom : 'a lexeme * 'a Assoc.key -> 'a ast
 52 | 
 53 | (** The previous types encapsulated in a record *)
 54 | and 'a terminal = { n : string    (** name *)
 55 |                   ; f : 'a lexeme (** the terminal itself *)
 56 |                   ; a : 'a ast
 57 |                   ; c : Charset.t (** the set of characters accepted
 58 |                                       at the beginning of input *) }
 59 | 
 60 | (** Abbreviation *)
 61 | and 'a t = 'a terminal
 62 | 
 63 | (** exception when failing,
 64 |     - can be raised (but not captured) by terminals
 65 |     - can  be raised  (but not  captured)  by action  code in  the grammar,  see
 66 |       [Combinator.give_up]
 67 |     - will be  raised and captured  by [Combinator.parse_buffer] that  will give
 68 |       the most advanced position *)
 69 | exception NoParse
 70 | 
 71 | (** from action ony may give an error message when rejecting a rule *)
 72 | exception Give_up of string
 73 | 
 74 | (** [give_up ()] rejects parsing from  a corresponding semantic action. An error
 75 |     message can be provided.  Can be used both in the semantics of terminals and
 76 |     parsing rules. *)
 77 | val give_up : ?msg:string -> unit -> 'a
 78 | 
 79 | (** {2 Combinators to create terminals} *)
 80 | 
 81 | (** accept any character, except eof*)
 82 | val any : ?name:string -> unit -> char t
 83 | 
 84 | (** Terminal accepting the end of a buffer only.  remark: [eof] is automatically
 85 |     added at the end of  a grammar by [Combinator.parse_buffer].
 86 |     [name] default is ["EOF"] *)
 87 | val eof : ?name:string -> unit -> unit t
 88 | 
 89 | (** Terminal  accepting a  given char,  remark: [char  '\255'] is  equivalent to
 90 |     [eof].
 91 |     [name] default is the given charater. *)
 92 | val char : ?name:string -> char -> unit t
 93 | 
 94 | (**  Accept any character for which the test returns [true].
 95 |     [name]  default to the result of [Charset.show]. *)
 96 | val test : ?name:string -> (char -> bool) -> char t
 97 | 
 98 | (** Accept a character in the given charset. [name] default as in [test] *)
 99 | val charset : ?name:string -> Charset.t -> char t
100 | 
101 | (** Reject  the input  (raises [Noparse])  if the first  character of  the input
102 |     passed the  test. Does  not read  the character if  the test  fails.
103 |     [name] default to ["^"] prepended to the result of [Charset.show]. *)
104 | val not_test : ?name:string -> (char -> bool) -> unit t
105 | 
106 | (** Reject the input  (raises [Noparse]) if the first character  of the input is
107 |     in the charset. Does  not read the character if not  in the charset.
108 |     [name] default as in [not_test] *)
109 | val not_charset : ?name:string -> Charset.t -> unit t
110 | 
111 | (** Does a test on the result of a given lexer and reject if it returns
112 |     false. You may provide a restricted charset for the set of charaters
113 |     accepted in the initial position. *)
114 | val sub : ?name:string -> ?charset:Charset.t -> 'a t -> ('a -> bool) -> 'a t
115 | 
116 | (** Compose  two terminals in sequence.  [name] default is the  concatenation of
117 |     the two names. *)
118 | val seq : ?name:string -> 'a t -> 'b t -> ('a -> 'b -> 'c) -> 'c t
119 | 
120 | (** variation on the above *)
121 | val seq1 : ?name:string -> 'a t -> 'b t -> 'a t
122 | val seq2 : ?name:string -> 'a t -> 'b t -> 'b t
123 | val seqs : 'a t list -> ('a -> 'a -> 'a) -> 'a t
124 | 
125 | (** [save t f] save the part of the input parsed by the terminal [t] and combine
126 |     it with its semantics using [f] *)
127 | val save : ?name:string -> 'a t -> (string -> 'a -> 'b) -> 'b t
128 | 
129 | (** [alt  t1 t2]  parses the  input with  [t1] or  [t2].  Contrary  to grammars,
130 |     terminals does not use continuations, if  [t1] succeds, no backtrack will be
131 |     performed to try [t2].  For instance,
132 |       {[seq1 (alt (char 'a' ())
133 |                  (seq1 (char 'a' ()) (char 'b' ())))
134 |             (char 'b' ())]}
135 |     will reject "ab".
136 |     If both [t1] and [t2] accept the input, longuest match is selected.
137 |     [name] default to [sprintf "(%s)|(%s)" t1.n t2.n]. *)
138 | val alt : ?name:string -> 'a t -> 'a t -> 'a t
139 | val alts : 'a t list -> 'a t
140 | 
141 | (** [option x  t] parses the given terminal  0 or 1 time. [x] is  returned if 0.
142 |     [name] defaults to [sprintf "(%s)?" t.n]. *)
143 | val option : ?name:string -> 'a -> 'a t -> 'a t
144 | 
145 | (** Applies a function to the result  of the given terminal.
146 |     [name] defaults to the terminal name. *)
147 | val appl : ?name:string -> ('a -> 'b) -> 'a t -> 'b t
148 | 
149 | (** [star t a f] Repetition of a  given terminal 0,1 or more times.  The type of
150 |     function   to  compose   the  action   allows  for   ['b  =   Buffer.t]  for
151 |     efficiency. The returned value  is [f ( ... (f(f (a ())  x_1) x_2) ...) x_n]
152 |     if [t]  returns [x_1] ...  [x_n].
153 |     [name] defaults to  [sprintf "(%s)*" t.n] *)
154 | val star : ?name:string -> 'a t -> (unit -> 'b) -> ('b -> 'a -> 'b) -> 'b t
155 | 
156 | (** Same as above but parses at least once.*)
157 | val plus : ?name:string -> 'a t -> (unit -> 'b) -> ('b -> 'a -> 'b) -> 'b t
158 | 
159 | (** [string s] Accepts only the given string.
160 |     Raises [Invalid_argument] if [s = ""].
161 |     [name] defaults to [sprintf "%S" s]. *)
162 | val string : ?name:string -> string -> unit t
163 | 
164 | (** Parses an natural in base 10. ["-42"] and ["-42"] are not accepted.
165 |     [name] defaults to ["NAT"] *)
166 | val nat : ?name:string -> unit -> int t
167 | 
168 | (** Parses an integer in base 10. ["+42"] is accepted.
169 |     [name] defaults to ["INT"] *)
170 | val int : ?name:string -> unit -> int t
171 | 
172 | (** Parses a float in base 10. [".1"] is accepted as ["0.1"]
173 |     [name] defaults to ["FLOAT"] *)
174 | val float : ?name:string -> unit -> float t
175 | 
176 | (** Parses a char litteral 'c' using ocaml escaping convention
177 |     [name] defaults to ["CHARLIT"] *)
178 | val char_lit : ?name:string -> unit -> char t
179 | 
180 | (** Parses a string litteral "cccc" using ocaml escaping convention
181 |     [name] defaults to ["STRINGLIT"] *)
182 | val string_lit : ?name:string -> unit -> string t
183 | 
184 | (** Parses a unicode UTF8 char
185 |     [name] defaults to ["UTF8"] *)
186 | val any_utf8 : ?name:string -> unit -> Uchar.t t
187 | 
188 | (** [utf8 c] parses a specific unicode char and returns [()],
189 |     [name] defaults to the string representing the char *)
190 | val utf8 : ?name:string -> Uchar.t -> unit t
191 | 
192 | (** Parses any utf8 grapheme.
193 |     [name] defaults to ["GRAPHEME"] *)
194 | val any_grapheme : ?name:string -> unit -> string t
195 | 
196 | (** [grapheme s] parses the given utf8 grapheme and return [()].
197 |     The difference with [string s x] is that if the input starts
198 |     with a grapheme [s'] such that [s] is a strict prefix of [s'],
199 |     parsing will fail.
200 |     [name] defaults to ["GRAPHEME("^s^")"] *)
201 | val grapheme : ?name:string -> string -> unit t
202 | 
203 | (** Test wether a terminal accept the  empty string. Such a terminal are illegal
204 |    in a grammar, but may be used in combinator below to create terminals *)
205 | val accept_empty : 'a t -> bool
206 | 
207 | (** Test constructor for the test constructor in [Grammar] *)
208 | val test_from_lex : bool t -> buf -> idx -> buf -> idx -> bool
209 | val blank_test_from_lex : bool t -> buf -> idx -> buf -> idx -> bool
210 | 
211 | (** equality, incomplete in particular for "alt" *)
212 | val eq : 'a t -> 'b t -> ('a,'b) Assoc.eq
213 | 
214 | (** If you build custom lexeme, you need to use this to fill the
215 |     [a] field of the record *)
216 | val custom : 'a lexeme -> 'a ast
217 | 
218 | (** where to put it ... *)
219 | val default : 'a -> 'a option -> 'a
220 | 


--------------------------------------------------------------------------------
/lib/pos.ml:
--------------------------------------------------------------------------------
  1 | (** Functions managing positions *)
  2 | 
  3 | (** Position  in AST resulting  from parsetree may be  a non negligible  part of
  4 |    parsing complexity, both in speed and space. For instance: - computing column
  5 |    number in Unicode is costly - all information about a position takes a lot of
  6 |    machine words (up to 10)
  7 | 
  8 |     Moreover, in most cases, position are only used in error messages!
  9 | 
 10 |     Pacomb proposes to use three levels of positions to minimise the costs:
 11 | 
 12 |     1°)  the type  Pos.t =  Input.spos is  the type  return to  action. It  is a
 13 |    position in bytes together with some information about the file.
 14 | 
 15 |     2°) type type 'a located (or pos = unit located) is some data decorated with
 16 |    a start and end position. This is the type recommended to be used in AST.  it
 17 |    can be  constructed by the in_pos,  no_pos and mk_pos functions  below.  Each
 18 |    AST node located with a position costs 5 words.
 19 | 
 20 |     3°) the  type pos_info contain all  the information about a  position. It is
 21 |    produced by rescanning the file (or the string). If one parses a stream which
 22 |    is not  a regular  file, or  if the regular  file is  no more  available, the
 23 |    position is not really useful and pacomb will still print the byte position.
 24 |    *)
 25 | 
 26 | (** byte position from input, type type returned by parsing combinator *)
 27 | type spos = Input.spos
 28 | 
 29 | (** Type to represent data with a position *)
 30 | type pos =
 31 |   { offset_start : int
 32 |   ; offset_end   : int
 33 |   ; infos        : Input.infos }
 34 | 
 35 | let mk_pos s e infos =
 36 |   { offset_start = Input.int_of_byte_pos s
 37 |   ; offset_end   = Input.int_of_byte_pos e
 38 |   ; infos }
 39 | 
 40 | (** merging of two positions, create the smallest position pair containing both
 41 |    *)
 42 | let merge : pos -> pos -> pos = fun p1 p2 ->
 43 |   { p1 with
 44 |     offset_start = min p1.offset_start p2.offset_end
 45 |   ;  offset_end = max p1.offset_end p2.offset_end }
 46 | 
 47 | 
 48 | let phantom_pos =
 49 |   { offset_start = -1
 50 |   ; offset_end   = -1
 51 |   ; infos        = Input.phantom_infos }
 52 | let no_pos = phantom_pos
 53 | 
 54 | let has_pos p =
 55 |   p.offset_start <> -1 &&
 56 |   p.offset_end <> -1
 57 | 
 58 | let file_cache = Hashtbl.create 32
 59 | let str_cache = Hashtbl.create 32
 60 | 
 61 | (* The original file is not available *)
 62 | exception No_detailed_position
 63 | 
 64 | (* The file is available but has changed *)
 65 | exception File_changed
 66 | 
 67 | let init_file_cache file_name length date =
 68 |   try
 69 |     let ch = open_in file_name in
 70 |     let open Unix in
 71 |     let s = fstat (descr_of_in_channel ch) in
 72 |     if s.st_size <> length || s.st_mtime <> date then raise File_changed;
 73 |     let r =(ch, ref []) in
 74 |     Hashtbl.add file_cache file_name r;
 75 |     r
 76 |   with
 77 |   | Sys_error _ -> raise No_detailed_position
 78 | 
 79 | let init_str_cache str =
 80 |   let r =ref [] in
 81 |   Hashtbl.add str_cache str r;
 82 |   r
 83 | 
 84 | type pos_info =
 85 |   { start_line        : int
 86 |   ; start_col         : int
 87 |   ; start_line_offset : int
 88 |   ; start_byte        : int
 89 |   ; end_line          : int
 90 |   ; end_col           : int
 91 |   ; end_line_offset   : int
 92 |   ; end_byte          : int
 93 |   ; file_name         : string
 94 |   ; text              : string }
 95 | 
 96 | let cache_interval = 1024
 97 | 
 98 | let ch_sub_string ch n p =
 99 |   seek_in ch n;
100 |   let s = p-n in
101 |   let buf = Bytes.create s in
102 |   really_input ch buf 0 s;
103 |   Bytes.unsafe_to_string buf
104 | 
105 | let ch_col_num ch utf8 n p =
106 |   if utf8 = Utf8.ASCII then p - n
107 |   else
108 |     begin
109 |       let str = ch_sub_string ch n p in
110 |       (*Printf.printf "text: %s\n%!" str;*)
111 |       Utf8.length utf8 str
112 |     end
113 | 
114 | let str_col_num str utf8 n p =
115 |   if utf8 = Utf8.ASCII then p - n
116 |   else
117 |     begin
118 |       let str = String.sub str n (p-n) in
119 |       Utf8.length utf8 str
120 |     end
121 | 
122 | let pos_info
123 |     : ?relocate:(string -> string) -> ?text:bool -> pos -> pos_info
124 |   = fun ?(relocate=fun x -> x) ?(text=false) pos ->
125 |     let file_name, cache, seek_in, input_char, col_num, sub_string =
126 |       match Input.stream_infos pos.infos with
127 |       | File { name; length; date } ->
128 |          let name0 = relocate name in
129 |          let (ch, cache) = try Hashtbl.find file_cache name0
130 |                            with Not_found -> init_file_cache name0 length date
131 |          in
132 |          let seek_in = seek_in ch in
133 |          let input_char () = input_char ch in
134 |          (name, cache, seek_in, input_char, ch_col_num ch, ch_sub_string ch)
135 |       | String str ->
136 |          let cache = try Hashtbl.find str_cache str
137 |                      with Not_found -> init_str_cache str
138 |          in
139 |          let pos = ref 0 in
140 |          let seek_in n = pos := n in
141 |          let input_char () =
142 |            if !pos >= String.length str then raise End_of_file;
143 |            let x = !pos in incr pos;
144 |            str.[x]
145 |          in
146 |          ("", cache, seek_in, input_char, str_col_num str, String.sub str)
147 |       | Stream ->
148 |          raise No_detailed_position
149 |     in
150 |     let n0 = pos.offset_start in
151 |     let n1 = pos.offset_end   in
152 |     if n0 = -1 || n1 = -1 then raise No_detailed_position;
153 |     let utf8 = Input.utf8 pos.infos in
154 |     let rec fn n = function
155 |       | (p, _ , _)::ls when p > n -> fn n ls
156 |       | c         ::_             -> c
157 |       | []                        -> (0, 0,  1)
158 |     in
159 |     let rec gn n p lo ln =
160 |       if p mod cache_interval = 0 then cache := (p, lo, ln) :: !cache;
161 |       if p = n then (lo, ln) else
162 |         begin
163 |           assert(p < n);
164 |           let c = try input_char () with End_of_file -> assert false in
165 |           (*Printf.printf "%d %d %d %d %C\n" n p lo ln c;*)
166 |           let p = p + 1 in
167 |           if c = '\n' then gn n p p (ln + 1)
168 |           else gn n p lo ln
169 |         end
170 |     in
171 |     let to_eol n =
172 |       let rec fn n =
173 |         try
174 |           let c = input_char () in
175 |           if c = '\n' then n else fn (n+1)
176 |         with
177 |           End_of_file -> n
178 |       in
179 |       seek_in n;
180 |       fn n
181 |     in
182 |     let (start_line_offset, start_line) =
183 |       let (p, lo, ln) = fn n0 !cache in
184 |       seek_in p;
185 |       gn n0 p lo ln
186 |     in
187 |     let (end_line_offset  , end_line) =
188 |       let (p, lo, ln) = fn n1 !cache in
189 |       seek_in p;
190 |       gn n1 p lo ln
191 |     in
192 |     let start_col = col_num utf8 start_line_offset n0 in
193 |     let end_col   = col_num utf8 end_line_offset n1 in
194 |     let text      = if text then sub_string start_line_offset (to_eol n1)
195 |                     else ""
196 |     in
197 |     { start_line; start_col; start_line_offset
198 |     ; end_line  ; end_col  ; end_line_offset
199 |     ; start_byte = n0; end_byte = n1
200 |     ; file_name ; text }
201 | 
202 | (* quote functions *)
203 | type quote =
204 |   { numbers  : bool
205 |   ; prefix   : string
206 |   ; header   : string
207 |   ; footer   : string
208 |   ; enlight  : string -> string }
209 | 
210 | type style =
211 |   | OCaml
212 |   | Short
213 | 
214 | let decorate : int -> string -> string = fun width s ->
215 |   if s = "" then
216 |     String.make width '='
217 |   else
218 |     let n = width - String.length s in
219 |     let n1 = n / 2 in
220 |     let n2 = n - n1 - 2 in
221 |     String.make n1 '=' ^ " " ^ s ^ " " ^ String.make n2 '='
222 | 
223 | let _red : string -> string =
224 |   fun s -> "\027[0m\027[31m" ^ s ^ "\027[0m"
225 | 
226 | let ulined : string -> string =
227 |   fun s -> "\027[0m\027[4m" ^ s ^ "\027[0m"
228 | 
229 | let default_quote =
230 |   { numbers  = true
231 |   ; prefix   = ""
232 |   ; header   = ""
233 |   ; footer   = ""
234 |   ; enlight  = ulined }
235 | 
236 | let quote_text : quote -> Format.formatter -> pos_info -> unit =
237 |   fun quote ch pos ->
238 |     let open Format in
239 |     if pos.text = "" then () else
240 |     let lines = String.split_on_char '\n' pos.text in
241 |     let start = pos.start_line in
242 |     (*Printf.printf "start %d offset %d text: %s\n%!" start offset pos.text;*)
243 |     let max_num =
244 |       String.length (string_of_int (List.length lines + start - 1))
245 |     in
246 |     let last = List.length lines - 1 in
247 |     let print i line =
248 |       let line =
249 |         if i = 0 && i = last then
250 |           begin
251 |             let byte_offset1 = pos.start_byte - pos.start_line_offset in
252 |             let byte_offset2 = pos.end_byte - pos.end_line_offset in
253 |             let s1 = byte_offset2 - byte_offset1 in
254 |             let s2 = String.length line - byte_offset2 in
255 |             String.sub line 0 byte_offset1 ^
256 |               quote.enlight (String.sub line byte_offset1 s1) ^
257 |                 String.sub line byte_offset2 s2
258 | 
259 |           end
260 |         else if i = 0 then
261 |           begin
262 |             let byte_offset = pos.start_byte - pos.start_line_offset in
263 |             let s = String.length line - byte_offset in
264 |             String.sub line 0 byte_offset ^
265 |               quote.enlight (String.sub line byte_offset s)
266 |           end
267 |         else if i = last then
268 |           begin
269 |             let byte_offset = pos.end_byte - pos.end_line_offset in
270 |             let s = String.length line - byte_offset in
271 |             quote.enlight (String.sub line 0 byte_offset) ^
272 |               String.sub line byte_offset s
273 |           end
274 |         else quote.enlight line
275 |       in
276 |       let number =
277 |         if quote.numbers then
278 |           let num = string_of_int (i + start) in
279 |           let pad = String.make (max_num - String.length num) ' ' in
280 |           pad ^ num ^ "|"
281 |         else ""
282 |       in
283 |       fprintf ch "%s%s%s\n" quote.prefix number line
284 |     in
285 |     if quote.header <> "" then fprintf ch "%s\n" quote.header;
286 |     List.iteri print lines;
287 |     if quote.footer <> "" then
288 |       fprintf ch "%s%s" quote.prefix quote.footer
289 | 
290 | let print_spos ?(style=OCaml) () ch ((infos,n):spos) =
291 |   let open Format in
292 |   if n = Input.phantom_byte_pos then
293 |     fprintf ch "NO POSITION"
294 |   else
295 |   let n = Input.int_of_byte_pos n in
296 |   let name = Input.filename infos in
297 |   if name = "" then
298 |     let format : (_,_,_) format = match style with
299 |       | OCaml -> "character %d"
300 |       | Short -> "%d"
301 |     in
302 |     fprintf ch format n
303 |   else
304 |     let format : (_,_,_) format = match style with
305 |       | OCaml -> "File %S, character %d"
306 |       | Short -> "%S:%d"
307 |     in
308 |     fprintf ch format name n
309 | 
310 | let print_pos ?(style=OCaml) () ch pos =
311 |   let open Format in
312 |   let n1 = pos.offset_start  in
313 |   let n2 = pos.offset_end in
314 |   if n1 = -1 || n2 = -1 then
315 |     fprintf ch "NO POSITION"
316 |   else
317 |   let name = Input.filename pos.infos in
318 |    if name = "" then
319 |     let format : (_,_,_) format = match style with
320 |       | OCaml -> "character %d-%d"
321 |       | Short -> "%d-%d"
322 |     in
323 |     fprintf ch format n1 n2
324 |   else
325 |     let format : (_,_,_) format = match style with
326 |       | OCaml -> "File %S, character %d to %d"
327 |       | Short -> "%S:%d:%d"
328 |     in
329 |     fprintf ch format name n1 n2
330 | 
331 | let print_pos_info ?(style=OCaml) ?quote () ch (pos:pos_info) =
332 |   let open Format in
333 |   let str_pos =
334 |     if pos.file_name = "" then
335 |       if pos.start_line = pos.end_line then
336 |         if pos.start_col = pos.end_col then
337 |           let format : (_,_,_) format = match style with
338 |             | OCaml -> "line %d, characters %d"
339 |             | Short -> "%d:%d"
340 |           in
341 |           sprintf format pos.start_line pos.start_col
342 |         else
343 |           let format : (_,_,_) format = match style with
344 |             | OCaml -> "line %d, characters %d-%d"
345 |             | Short -> "%d:%d-%d"
346 |           in
347 |           sprintf format pos.start_line pos.start_col pos.end_col
348 |       else
349 |         let format : (_,_,_) format = match style with
350 |           | OCaml -> "line %d, character %d - line %d, character %d"
351 |           | Short -> "%d:%d-%d:%d"
352 |         in
353 |         sprintf format pos.start_line
354 |           pos.start_col pos.end_line pos.end_col
355 |     else
356 |       if pos.start_line = pos.end_line then
357 |         if pos.start_col = pos.end_col then
358 |           let format : (_,_,_) format = match style with
359 |             | OCaml -> "File %S, line %d, characters %d"
360 |             | Short -> "%S:%d:%d"
361 |           in
362 |           sprintf format pos.file_name pos.start_line pos.start_col
363 |         else
364 |           let format : (_,_,_) format = match style with
365 |             | OCaml -> "File %S, line %d, characters %d-%d"
366 |             | Short -> "%S:%d:%d-%d"
367 |           in
368 |           sprintf format pos.file_name pos.start_line
369 |             pos.start_col pos.end_col
370 |       else
371 |         let format : (_,_,_) format = match style with
372 |           | OCaml -> "File %S, line %d, character %d - line %d, character %d"
373 |           | Short -> "%S:%d:%d-%d:%d"
374 |         in
375 |         sprintf format pos.file_name pos.start_line
376 |           pos.start_col pos.end_line pos.end_col
377 |   in
378 |   match quote with
379 |   | None -> fprintf ch "%s" str_pos
380 |   | Some q -> quote_text { q with header = decorate 79 str_pos;
381 |                                   footer = decorate 70 "" }
382 |                 ch pos
383 | 
384 | let print_spos ?(style=OCaml) ?quote () ch (infos,offset as p) =
385 |   try
386 |     let p2 = mk_pos offset offset infos in
387 |     print_pos_info ~style ?quote () ch (pos_info ~text:(quote<>None) p2)
388 |   with No_detailed_position ->
389 |     print_spos ~style () ch p
390 | 
391 | let print_pos ?(style=OCaml) ?quote () ch p =
392 |   try
393 |     print_pos_info ~style ?quote () ch (pos_info ~text:(quote<>None) p)
394 |   with No_detailed_position ->
395 |     print_pos ~style () ch p
396 | 
397 | let print_buf_pos ?(style=OCaml) ?quote () ch (buf,idx) =
398 |   print_spos ~style ?quote () ch (Input.spos buf idx)
399 | 
400 | (** exception returned by the parser *)
401 | exception Parse_error of Input.buffer * Input.idx * string list
402 | 
403 | let fail_no_parse (_:exn) = exit 1
404 | 
405 | (** A helper to handle exceptions *)
406 | let handle_exception ?(error=fail_no_parse) ?(style=OCaml) f a =
407 |   try f a with Parse_error(buf, pos, msgs) as e ->
408 |     let red fmt = "\027[31m" ^^ fmt ^^ "\027[0m%!" in
409 |     Format.eprintf (red "Parse error: %a.\n%!")
410 |       (print_buf_pos ~style ()) (buf, pos);
411 |     if msgs <> [] then
412 |       begin
413 |         let open Format in
414 |         let prl ch l = List.iter (fprintf ch "%s@ ") l in
415 |         eprintf "@[<hov 2>expecting:@ %a@]@." prl msgs
416 |       end;
417 |     error e
418 | 


--------------------------------------------------------------------------------
/lib/pos.mli:
--------------------------------------------------------------------------------
  1 | (** {1 Functions managing positions} *)
  2 | 
  3 | (** short position *)
  4 | type spos = Input.spos
  5 | 
  6 | (** Type to represent an interval between two position *)
  7 | type pos =
  8 |   { offset_start : int
  9 |   ; offset_end   : int
 10 |   ; infos        : Input.infos }
 11 | 
 12 | val mk_pos : Input.byte_pos -> Input.byte_pos -> Input.infos -> pos
 13 | 
 14 | (** a phantom position, used for grammar accepting the empty input, and other
 15 |     reference initialisation *)
 16 | val phantom_pos : pos
 17 | val no_pos : pos
 18 | 
 19 | (** return false on [phantom_pos/no_pos] *)
 20 | val has_pos : pos -> bool
 21 | 
 22 | (** build the smallest position containing both position *)
 23 | val merge : pos -> pos -> pos
 24 | 
 25 | (** Fully informed positions *)
 26 | type pos_info =
 27 |   { start_line        : int
 28 |   ; start_col         : int
 29 |   ; start_line_offset : int
 30 |   ; start_byte        : int
 31 |   ; end_line          : int
 32 |   ; end_col           : int
 33 |   ; end_line_offset   : int
 34 |   ; end_byte          : int
 35 |   ; file_name         : string
 36 |   ; text              : string }
 37 | 
 38 | (** Function to recover full position from the information stored in located
 39 |    data.
 40 | 
 41 |     The optional paramater ~relocate, which is identity by default allows to
 42 |    deal with situation like a moved file since position was produced, or a file
 43 |    name stored with a relative position. The file_name in the pos_info is not
 44 |    affected by [relocate] which is only used to reopen the file.
 45 | 
 46 |     The optionnal parameter [~text] (false by default) will fill the [text]
 47 |    field within the position if it is true. Otherwise the [text] field is set to
 48 |    the empty string.
 49 | 
 50 |     This function may raise [No_detailled_position] is the file can not be
 51 |    reopen or [File_changed] if the file is present but changed its last
 52 |    modification time of size. *)
 53 | exception No_detailed_position
 54 | exception File_changed
 55 | 
 56 | val pos_info
 57 |     : ?relocate:(string -> string) -> ?text:bool -> pos -> pos_info
 58 | 
 59 | (** configuration for quoting file *)
 60 | type quote =
 61 |   { numbers  : bool    (** includes line number *)
 62 |   ; prefix   : string  (** prefix added after each newline but not added for the
 63 |                           first printed line *)
 64 |   ; header   : string  (** header, added as first line if non empty *)
 65 |   ; footer   : string  (** footer, added as last line if non empty *)
 66 |   ; enlight  : string -> string
 67 |                        (** used to transform the quoted part of the printed
 68 |                            lines *)
 69 |   }
 70 | 
 71 | (** default quote:
 72 |   {[
 73 |   let ulined : string -> string =
 74 |     fun s -> "\027[0m\027[4m" ^ s ^ "\027[0m"
 75 | 
 76 |   let default_quote =
 77 |     { numbers  = true
 78 |     ; prefix   = ""
 79 |     ; header   = ""
 80 |     ; footer   = ""
 81 |     ; enlight  = ulined }
 82 |    ]}
 83 | *)
 84 | val default_quote : quote
 85 | 
 86 | (** Style for printing positions: *)
 87 | type style = OCaml (** like OCaml *)
 88 |            | Short (** like gcc *)
 89 | 
 90 | (** printing for the three kind of positions, and the current position
 91 |     of a buffer.  *)
 92 | val print_pos_info   : ?style:style -> ?quote:quote -> unit
 93 |                        -> Format.formatter -> pos_info -> unit
 94 | (** The three functions below will print only the byte_pos if the file
 95 |     cannot be reopenned. The exception File_changed is raised if the file
 96 |     changed since last openning.
 97 | 
 98 |     If quote is given, the file is quoted.
 99 | *)
100 | val print_pos     : ?style:style -> ?quote:quote -> unit
101 |                     -> Format.formatter -> pos -> unit
102 | val print_spos    : ?style:style -> ?quote:quote -> unit
103 |                     -> Format.formatter -> spos -> unit
104 | val print_buf_pos : ?style:style -> ?quote:quote -> unit
105 |                     -> Format.formatter -> (Input.buffer * Input.idx) -> unit
106 | 
107 | (** Exception raised when parsing fails *)
108 | exception Parse_error of Input.buffer * Input.idx * string list
109 | 
110 | (** [handle_exception  fn v] applies  the function [fn]  to [v] and  handles the
111 |     [Parse_error] exception. In  particular, a parse error  message is presented
112 |     to the user  in case of a failure,  then [error e] is called where  e is the
113 |     raised exception.   The default  [error] is  [fun _ ->  exit 1].  [raise] is
114 |     another possibility. *)
115 | val handle_exception : ?error:(exn -> 'b) -> ?style:style
116 |                        -> ('a -> 'b) -> 'a -> 'b
117 | 


--------------------------------------------------------------------------------
/lib/regexp.ml:
--------------------------------------------------------------------------------
  1 | (** Type of a regular expression. *)
  2 | type regexp =
  3 |   | Chr of char        (** Single character.              *)
  4 |   | Set of Charset.t   (** Any character in a charset.    *)
  5 |   | Seq of regexp list (** Sequence of regexps.           *)
  6 |   | Alt of regexp list (** Alternative between regexps.   *)
  7 |   | Opt of regexp      (** Optional regexp.               *)
  8 |   | Str of regexp      (** Zero or more times the regexp. *)
  9 |   | Pls of regexp      (** One  or more times the regexp. *)
 10 |   | Sav of regexp      (** Save the matching string.      *)
 11 | 
 12 | (** Short synonym of {!type:regexp}. *)
 13 | type t = regexp
 14 | 
 15 | (** [pp ff re] outputs the regexp [re] to the formatter [ff]. *)
 16 | let rec pp : Format.formatter -> t -> unit = fun ff re ->
 17 |   let pp_sep ff _ = Format.pp_print_string ff ";" in
 18 |   let pp_list ff = Format.pp_print_list ~pp_sep pp ff in
 19 |   match re with
 20 |   | Chr(c) ->Format.fprintf ff "Chr(%C)" c
 21 |   | Set(s) ->Format.fprintf ff "Set(%a)" Charset.pp s
 22 |   | Seq(l) ->Format.fprintf ff "Seq([%a])" pp_list l
 23 |   | Alt(l) ->Format.fprintf ff "Alt([%a])" pp_list l
 24 |   | Opt(r) ->Format.fprintf ff "Opt(%a)" pp r
 25 |   | Str(r) ->Format.fprintf ff "Str(%a)" pp r
 26 |   | Pls(r) ->Format.fprintf ff "Pls(%a)" pp r
 27 |   | Sav(r) ->Format.fprintf ff "Sav(%a)" pp r
 28 | 
 29 | (** [accepts_empty re] tells whether the empty input is valid for [re]. *)
 30 | let rec accepts_empty : regexp -> bool = fun re ->
 31 |   match re with
 32 |   | Chr(_) -> false
 33 |   | Set(s) -> Charset.equal s Charset.empty
 34 |   | Seq(l) -> List.for_all accepts_empty l
 35 |   | Alt(l) -> List.exists accepts_empty l
 36 |   | Opt(_) -> true
 37 |   | Str(_) -> true
 38 |   | Pls(r) -> accepts_empty r
 39 |   | Sav(r) -> accepts_empty r
 40 | 
 41 | (** [accepted_first_chars re] returns the set of characters that are possible,
 42 |     valid first characters for matching [re]. *)
 43 | let rec accepted_first_chars : regexp -> Charset.t = fun re ->
 44 |   let rec aux_seq l =
 45 |     match l with
 46 |     | []   -> Charset.empty
 47 |     | r::l -> let cs = accepted_first_chars r in
 48 |               if accepts_empty r then Charset.union cs (aux_seq l) else cs
 49 |   in
 50 |   match re with
 51 |   | Chr(c) -> Charset.singleton c
 52 |   | Set(s) -> s
 53 |   | Seq(l) -> aux_seq l
 54 |   | Alt(l) -> let fn cs r = Charset.union cs (accepted_first_chars r) in
 55 |               List.fold_left fn Charset.empty l
 56 |   | Opt(r) -> accepted_first_chars r
 57 |   | Str(r) -> accepted_first_chars r
 58 |   | Pls(r) -> accepted_first_chars r
 59 |   | Sav(r) -> accepted_first_chars r
 60 | 
 61 | module Pacomb = struct
 62 |   module Lex = Lex
 63 |   module Grammar = Grammar
 64 | end
 65 | 
 66 | let%parser atom_charset first =
 67 |     (c1::CHAR) '-' (c2::CHAR) => (if c1 = '-' || (not first && c1 = ']') ||
 68 |                                        (first && c1 = '^') ||
 69 |                                          c2 = '-' then Lex.give_up ();
 70 |                                   Charset.range c1 c2)
 71 |   ; (c1::CHAR)                => (if (not first && (c1 = '-' || c1 = ']')) ||
 72 |                                        (first && c1 = '^') then Lex.give_up ();
 73 |                                   Charset.singleton c1)
 74 | 
 75 | let%parser p_charset =
 76 |   (cs1::atom_charset true) (cs2:: ~* (atom_charset false)) =>
 77 |     List.fold_left Charset.union cs1 cs2
 78 | 
 79 | let is_spe c = List.mem c ['\\';'.';'*';'+';'?';'[';']']
 80 | 
 81 | let%parser rec atom_regexp =
 82 |     '[' (cpl:: ~? '^') (cs::p_charset) ']' =>
 83 |       begin
 84 |         let cs = if cpl <> None then Charset.complement (Charset.add cs '\255')
 85 |                  else cs
 86 |         in
 87 |         Set cs
 88 |       end
 89 |   ; (c::CHAR) =>
 90 |       begin
 91 |         if is_spe c then Lex.give_up () else Chr c
 92 |       end
 93 |   ; '\\' (c::CHAR) =>
 94 |       begin
 95 |         if is_spe c then Chr c else Lex.give_up ()
 96 |       end
 97 |   ; '.' => Set (Charset.del Charset.full '\255')
 98 |   ; "\\(" (r::regexp) "\\)" => Sav r
 99 |   ; (r::atom_regexp) '?' => Opt r
100 |   ; (r::atom_regexp) '*' => Str r
101 |   ; (r::atom_regexp) '+' => Pls r
102 | 
103 | and seq_regexp =
104 |   (rs :: ~+ atom_regexp) => Seq rs
105 | 
106 | and regexp =
107 |   (rs :: ~+ ["\\|"] seq_regexp) => Alt rs
108 | 
109 | (* Exception raised when a regexp cannot be parsed. *)
110 | exception Regexp_error of Input.buffer * Input.idx
111 | 
112 | let from_string : string -> regexp = fun s ->
113 |   try Grammar.parse_string regexp Blank.none s
114 |   with Pos.Parse_error(b,s,_) -> raise (Regexp_error(b,s))
115 | 
116 | open Lex
117 | 
118 | let from_regexp_grps : ?grps:int list -> regexp -> string list Lex.t =
119 |   fun ?grps r ->
120 |     let n = ref 0 in
121 |     let do_save fn r =
122 |       let n0 = !n in
123 |       incr n;
124 |       let r = fn r in
125 |       match grps with
126 |       | None -> save r (fun s l -> s :: l)
127 |       | Some l ->
128 |          if List.mem n0 l then save r (fun s l -> s :: l)
129 |          else r
130 |     in
131 |     let rec fn = function
132 |       | Chr c -> appl (fun _ -> []) (char c)
133 |       | Set s -> appl (fun _ -> []) (charset s)
134 |       | Alt l -> alts (List.map fn l)
135 |       | Seq l -> seqs (List.map fn l) (@)
136 |       | Opt r -> option [] (fn r)
137 |       | Str r -> star (fn r) (fun () -> []) (@)
138 |       | Pls r -> plus (fn r) (fun () -> []) (@)
139 |       | Sav r -> do_save fn r
140 |     in
141 |     let r = do_save fn r in
142 |     begin
143 |       match grps with
144 |       | None -> ()
145 |       | Some l -> if List.exists (fun g -> g < 0 || g >= !n) l
146 |                   then invalid_arg "from_regexp_grps"
147 |     end;
148 |     r
149 | 
150 | let from_regexp : regexp -> string Lex.t = fun r ->
151 |   Lex.appl
152 |     (function [s] -> s | _ -> assert false)
153 |     (from_regexp_grps ~grps:[0] r)
154 | 
155 | (** create a terminal from a regexp. Returns the groups list, last to finish
156 |     to be parsed is first in the result *)
157 | let regexp_grps : ?name:string -> ?grps:int list -> regexp -> string list t =
158 |   fun ?name ?grps r ->
159 |     let r = from_regexp_grps ?grps r in
160 |     { r with n = default r.n name }
161 | 
162 | let regexp : ?name:string -> regexp -> string t = fun ?name r ->
163 |   let r = from_regexp r in
164 |   { r with n = default r.n name }
165 | 
166 | let blank_regexp s = Blank.from_terminal (regexp (from_string s))
167 | 


--------------------------------------------------------------------------------
/lib/regexp.mli:
--------------------------------------------------------------------------------
 1 | (** A small module for efficient regular expressions. *)
 2 | 
 3 | (** Type of a regular expression. *)
 4 | type regexp =
 5 |   | Chr of char        (* Single character.                *)
 6 |   | Set of Charset.t   (* Any character in a charset.      *)
 7 |   | Seq of regexp list (* Sequence of regular expressions. *)
 8 |   | Alt of regexp list (* Alternative between regexps.     *)
 9 |   | Opt of regexp      (* Optional regexp.                 *)
10 |   | Str of regexp      (* Zero or more times the regexp.   *)
11 |   | Pls of regexp      (* One  or more times the regexp.   *)
12 |   | Sav of regexp      (* Save the matching string.        *)
13 | 
14 | (** Short synonym of {!type:regexp}. *)
15 | type t = regexp
16 | 
17 | (** [pp ff re] outputs the regexp [re] to the formatter [ff]. *)
18 | val pp : Format.formatter -> regexp -> unit
19 | 
20 | (** [accepts_empty re] tells whether the empty input is valid for [re]. *)
21 | val accepts_empty : regexp -> bool
22 | 
23 | (** [accepted_first_chars re] returns the set of characters that are possible,
24 |     valid first characters for matching [re]. *)
25 | val accepted_first_chars : regexp -> Charset.t
26 | 
27 | (** Exception raised when a regexp does not match.  Note that the given buffer
28 |     and position correspond to the first character that cannot be matched. *)
29 | exception Regexp_error of Input.buffer * Input.idx
30 | 
31 | (** [from_string s] convert a string into a regexp following [Str] syntax. *)
32 | val from_string : string -> regexp
33 | 
34 | (** create a terminal from a regexp. Returns the whole matched string *)
35 | val regexp : ?name:string -> regexp -> string Lex.t
36 | 
37 | (** create a terminal from a regexp.  Returns the groups list, last to finish to
38 |     be  parsed  is first  in  the  result.  The  optional argument  grps  allows
39 |     selection of the produced groups. As usual, 0 means the whole regexp and n >
40 |     0 the sub string corresponding to the nth opening parenthesis.  *)
41 | val regexp_grps : ?name:string -> ?grps:int list -> regexp -> string list Lex.t
42 | 
43 | (** create a blank function from a string representing a regexp *)
44 | val blank_regexp : string -> Blank.t
45 | 


--------------------------------------------------------------------------------
/lib/unionFind.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | type 'a t = 'a cell ref
 3 | 
 4 | and 'a cell = Link of 'a t | Root of 'a
 5 | 
 6 | let root x = ref (Root x)
 7 | 
 8 | let rec find x =
 9 |   match !x with
10 |   | Root r -> (r, x)
11 |   | Link y -> let (_, y' as c) = find y in
12 |               if y != y' then x := Link y';
13 |               c
14 | 
15 | let union fn x y =
16 |   let (rx,x) = find x in
17 |   let (ry,y) = find y in
18 |   if x != y then
19 |     begin
20 |       let r = fn rx ry in
21 |       x := Link y;
22 |       y := Root r
23 |     end
24 | 
25 | let set_root x r =
26 |   match !x with
27 |   | Root _ -> x := Root r
28 |   | Link _ -> invalid_arg "set_root on non root"
29 | 


--------------------------------------------------------------------------------
/lib/unionFind.mli:
--------------------------------------------------------------------------------
 1 | (* type a union find equivalent classes with data of type 'a attached *)
 2 | type 'a t
 3 | 
 4 | (* creationg of a new equivalent class *)
 5 | val root : 'a -> 'a t
 6 | 
 7 | (* get the value and root of an equivalent class *)
 8 | val find : 'a t -> ('a * 'a t)
 9 | 
10 | (* merge 2 equivalent class *)
11 | val union : ('a -> 'a -> 'a) -> 'a t -> 'a t -> unit
12 | 
13 | (* set the root of a root element of an equivalent class.
14 |    raise Invalig_argument "set_root on non root" is the
15 |    first argument is not a root (i.e. the result of find *)
16 | val set_root : 'a t -> 'a -> unit
17 | 


--------------------------------------------------------------------------------
/lib/utf8.mli:
--------------------------------------------------------------------------------
 1 | 
 2 | (* possible context for column numbering in file *)
 3 | type context =
 4 |   ASCII | UTF8 | CJK_UTF8
 5 | 
 6 | (* [width n] return the column width of the unicode cahcar*)
 7 | val width : ?context:context -> Uchar.t -> int
 8 | 
 9 | type grapheme_break_property =
10 |   | Other
11 |   | CR
12 |   | LF
13 |   | Prepend
14 |   | Control
15 |   | Extend
16 |   | SpacingMark
17 |   | L
18 |   | V
19 |   | T
20 |   | LV
21 |   | LVT
22 |   | ZWJ
23 |   | RegionalIndicator
24 |   | ExtPict
25 | 
26 | (* Give the grapheme break property f a charactere *)
27 | val gbp : Uchar.t -> grapheme_break_property
28 | 
29 | type previous_chars =
30 |   EvenRegionalIndicator | ExtPictExtendStar | NoPrevious
31 | 
32 | val encode : Uchar.t -> string
33 | 
34 | val decode : string -> int -> Uchar.t * int
35 | 
36 | val look : string -> int -> Uchar.t
37 | 
38 | val next : string -> int -> int
39 | 
40 | val prev : string -> int -> int
41 | 
42 | val of_list : Uchar.t list -> string
43 | 
44 | val to_list : string -> Uchar.t list
45 | 
46 | val fold : ('a -> Uchar.t -> 'a) -> 'a -> string -> 'a
47 | 
48 | val length : context -> string -> int
49 | 
50 | val sub : string -> int -> int -> string
51 | 
52 | val grapheme_break : string -> int -> bool
53 | 
54 | val grapheme_break_after : Uchar.t list -> Uchar.t -> bool
55 | 
56 | val next_grapheme : string -> int -> int
57 | 
58 | val prev_grapheme : string -> int -> int
59 | 
60 | val fold_grapheme : ('a -> string -> 'a) -> 'a -> string -> 'a
61 | 


--------------------------------------------------------------------------------
/lib/word_list.ml:
--------------------------------------------------------------------------------
  1 | module Pacomb = struct
  2 |   module Lex = Lex
  3 |   module Grammar = Grammar
  4 | end
  5 | 
  6 | type 'a data =
  7 |   { mutable leafs : 'a list
  8 |   ; mutable next  : 'a data option array }
  9 | 
 10 | type ('a,'b) t =
 11 |   { data : 'b data
 12 |   ; uniq : bool
 13 |   ; map  : 'a -> 'a
 14 |   ; cs   : Charset.t
 15 |   ; finl : Input.buffer -> Input.idx -> bool }
 16 | 
 17 | let create_data () = { leafs = []; next = Array.make 256 None }
 18 | 
 19 | let idt x = x
 20 | 
 21 | let create ?(unique=true) ?(map=idt)
 22 |       ?(cs=Charset.full) ?(final_test=fun _ _ -> true) () =
 23 |   { data = create_data () ; uniq = unique; map; cs; finl = final_test }
 24 | 
 25 | let reset t = t.data.leafs <- []; t.data.next <- Array.make 256 None
 26 | 
 27 | let save t = { leafs = t.data.leafs; next = t.data.next }
 28 | 
 29 | let save_and_reset t = let s = save t in reset t; s
 30 | 
 31 | let restore t s = t.data.leafs <- s.leafs; t.data.next <- s.next
 32 | 
 33 | let size { data = {leafs; next}; _} =
 34 |   let res = ref 0 in
 35 |   let rec fn {leafs; next} =
 36 |     res := !res + List.length leafs;
 37 |     Array.iter gn next
 38 |   and gn = function
 39 |     | None -> ()
 40 |     | Some d -> fn d
 41 |   in
 42 |   res := !res + List.length leafs;
 43 |   Array.iter gn next;
 44 |   !res
 45 | 
 46 | exception Already_bound
 47 | 
 48 | let next tbl c = tbl.next.(Char.code c)
 49 | 
 50 | let advance : bool -> (char -> char) -> 'b data -> string -> 'b data =
 51 |   fun add map tbl s ->
 52 |   let r = ref tbl in
 53 |   for i = 0 to String.length s - 1 do
 54 |     let c = map s.[i] in
 55 |     match !r.next.(Char.code c) with
 56 |     | Some tbl -> r := tbl
 57 |     | None ->
 58 |        if add then
 59 |          let tbl = create_data () in
 60 |          !r.next.(Char.code c) <- Some tbl;
 61 |          r := tbl
 62 |        else raise Not_found
 63 |   done;
 64 |   !r
 65 | 
 66 | let add_ascii : (char,'b) t -> string -> 'b -> unit =
 67 |   fun { data; uniq; map; cs } s v ->
 68 |     if s = "" then invalid_arg "Word_list.add_ascii: empty word";
 69 |     if not (Charset.mem cs s.[0]) then
 70 |       invalid_arg "Word_list.add: charset mismatch";
 71 |     let data = advance true map data s in
 72 |     if uniq && data.leafs <> [] then raise Already_bound;
 73 |     data.leafs <- v :: data.leafs
 74 | 
 75 | let mem_ascii : (char,'b) t -> string -> bool =
 76 |   fun { data; map; _ } s ->
 77 |     try
 78 |       let data = advance false map data s in
 79 |       data.leafs <> []
 80 |     with
 81 |       Not_found -> false
 82 | 
 83 | let add_utf8 : (string, 'b) t -> string -> 'b -> unit =
 84 |   fun { data; map; uniq; cs  } s v ->
 85 |     if s = "" then invalid_arg "Word_list.add_utf8: empty word";
 86 |     if not (Charset.mem cs s.[0]) then
 87 |       invalid_arg "Word_list.add: charset mismatch";
 88 |     let fn data s = advance true (fun c -> c) data (map s) in
 89 |     let data = Utf8.fold_grapheme fn data s in
 90 |     if uniq && data.leafs <> [] then raise Already_bound;
 91 |     data.leafs <- v :: data.leafs
 92 | 
 93 | let mem_utf8 : (string, 'b) t -> string -> bool =
 94 |   fun { data; map; _ } s ->
 95 |     try
 96 |       let fn data s = advance false (fun c -> c) data (map s) in
 97 |       let data = Utf8.fold_grapheme fn data s in
 98 |       data.leafs <> []
 99 |     with
100 |       Not_found -> false
101 | 
102 | let word : ?name:string -> (char, 'a) t -> 'a Grammar.t =
103 |   fun ?name { data = tbl; map; cs; finl; uniq } ->
104 |     let n = Lex.default "WORD" name in
105 |     if uniq then
106 |       let rec f tbl s0 n0 =
107 |         let (c,s,n) = Input.read s0 n0 in
108 |         let c = map c in
109 |         match next tbl c with
110 |         | Some t -> f t s n
111 |         | None ->
112 |            if finl s0 n0 && tbl.leafs <> [] then (List.hd tbl.leafs, s0, n0)
113 |            else (raise Lex.NoParse)
114 |       in
115 |       let f = f tbl in
116 |       let lex = Lex.{ n; f; a = Custom(f,Assoc.new_key ()); c = cs }
117 |       in
118 |       Grammar.term ?name lex
119 |     else
120 |       let rec f tbl s0 n0 =
121 |         let (c,s,n) = Input.read s0 n0 in
122 |         let c = map c in
123 |         match next tbl c with
124 |         | Some t -> f t s n
125 |         | None ->
126 |            if finl s0 n0 && tbl.leafs <> [] then (tbl.leafs, s0, n0)
127 |            else raise Lex.NoParse
128 |       in
129 |       let f = f tbl in
130 |       let lex = Lex.{ n; f; a = Custom(f,Assoc.new_key ()); c = cs }
131 |       in
132 |       Grammar.unmerge ?name (Grammar.term lex)
133 | 
134 | let utf8_word : ?name:string -> (string, 'a) t -> 'a Grammar.t =
135 |   fun ?name { data = tbl; map; finl; cs; uniq } ->
136 |     let n = Lex.default "UTF8_WORD" name in
137 |     if uniq then
138 |       let rec f tbl s n =
139 |         try
140 |           let (g,s,n) = Lex.((any_grapheme ()).f s n) in
141 |           let g = map g in
142 |           f (advance false (fun c -> c) tbl g) s n
143 |         with
144 |           Not_found ->
145 |           if finl s n && tbl.leafs <> [] then(List.hd tbl.leafs, s, n)
146 |           else raise Lex.NoParse
147 |       in
148 |       let f = f tbl in
149 |       let lex = Lex.{ n; f; a = Custom(f,Assoc.new_key ()); c = cs }
150 |       in
151 |       Grammar.term ?name lex
152 |     else
153 |       let rec f tbl s n =
154 |         try
155 |           let (g,s,n) = Lex.((any_grapheme ()).f s n) in
156 |           let g = map g in
157 |           f (advance false (fun c -> c) tbl g) s n
158 |         with
159 |           Not_found ->
160 |           if finl s n && tbl.leafs <> [] then (tbl.leafs, s, n)
161 |           else raise Lex.NoParse
162 |       in
163 |       let f = f tbl in
164 |       let lex = Lex.{ n; f; a = Custom(f,Assoc.new_key ()); c = cs }
165 |       in
166 |       Grammar.unmerge ?name (Grammar.term lex)
167 | 


--------------------------------------------------------------------------------
/lib/word_list.mli:
--------------------------------------------------------------------------------
 1 | (** {1 Module to build and parse list of words} *)
 2 | 
 3 | (** Type of a word list with
 4 |     'a : the type of characters (typically, char for ascii or string for utf8)
 5 |     'b : a value associated to each word *)
 6 | type ('a,'b) t
 7 | 
 8 | (** exception raise when multiple binding are added and not allowed *)
 9 | exception Already_bound
10 | 
11 | (** Create  a new  empty table. The  optional parameter [  unique ]  defaults to
12 |    true. Setting  it to false  with allow multiple identical  bindings, creating
13 |    ambiguous grammars. If [ unique ] is true, then adding multiple bindings will
14 |    raise the exception [ Already_bound ].
15 | 
16 |    [map] is a function transforming  character before addition (typically a case
17 |    transformer or a unicode normalisation). (defaults to identity).
18 | 
19 |    [final_test]  will be  called  after parsing.  It may  be  used typically  to
20 |    ensure that the next character is not alphanumeric.  Defaults to an always
21 |    passing test.
22 | 
23 |    [cs]  can be  given as  an optimisation.  All words  added should  start with
24 |    characters in this set. *)
25 | val create : ?unique:bool ->
26 |              ?map:('a -> 'a) ->
27 |              ?cs:Charset.t ->
28 |              ?final_test:(Input.buffer -> Input.idx -> bool)
29 |              -> unit -> ('a,'b) t
30 | 
31 | (** Returns the number of bindings in the table *)
32 | val size : ('a,'b) t -> int
33 | 
34 | (** empty a table *)
35 | val reset : ('a,'b) t -> unit
36 | 
37 | (** [add_ascii  tbl s  v] adds  a binding  from [s]  to [v]  in [tbl],  keep all
38 |     previous bindings. *)
39 | 
40 | val add_ascii : (char,'b) t -> string -> 'b -> unit
41 | 
42 | (** [mem_ascii tbl s] tells if [s] if present in [tbl]. Typically used to reject
43 |     identifiers that are keywords *)
44 | val mem_ascii : (char,'b) t -> string -> bool
45 | 
46 | (** Same as above for a unicode string, which are splitted in graphemes *)
47 | val add_utf8 : (string, 'b) t -> string -> 'b -> unit
48 | 
49 | val mem_utf8 : (string,'b) t -> string -> bool
50 | 
51 | (**  Parses word  from a  dictionnary returning  as action  all the  assiociated
52 |     values (it is an ambiguous grammar if there is more than one value).*)
53 | val word : ?name:string -> (char, 'a) t -> 'a Grammar.t
54 | 
55 | val utf8_word : ?name:string -> (string, 'a) t -> 'a Grammar.t
56 | 
57 | type 'a data
58 | val save : ('a,'b) t -> 'b data
59 | val save_and_reset : ('a,'b) t -> 'b data
60 | val restore : ('a,'b) t -> 'b data -> unit
61 | 


--------------------------------------------------------------------------------
/pacomb.opam:
--------------------------------------------------------------------------------
 1 | synopsis: "Parsing library based on combinators and ppx extension to write languages"
 2 | description:
 3 | """
 4 | Pacomb is a parsing library that compiles grammars to combinators prior to
 5 | parsing together with a PPX extension to write parsers inside OCaml files.
 6 | 
 7 | The advantages of Pacomb are
 8 | 
 9 | - Grammars as first class values defined in your OCaml files. This is
10 |   an example from the distribution:
11 | 
12 | (* The three levels of priorities *)
13 | type p = Atom | Prod | Sum
14 | let%parser rec
15 |               (* This includes each priority level in the next one *)
16 |      expr p = Atom < Prod < Sum
17 |             (* all other rule are selected by their priority level *)
18 |             ; (p=Atom) (x::FLOAT)                        => x
19 |             ; (p=Atom) '(' (e::expr Sum) ')'             => e
20 |             ; (p=Prod) (x::expr Prod) '*' (y::expr Atom) => x*.y
21 |             ; (p=Prod) (x::expr Prod) '/' (y::expr Atom) => x/.y
22 |             ; (p=Sum ) (x::expr Sum ) '+' (y::expr Prod) => x+.y
23 |             ; (p=Sum ) (x::expr Sum ) '-' (y::expr Prod) => x-.y
24 | 
25 | - Good performances:
26 |   - on non ambiguous grammars, 2 to 3 time slower compared to ocamlyacc
27 |   - on ambiguous grammars O(N^3 ln(N)) can be achieved.
28 | 
29 | - Parsing from left to right (despite the use of combinators) allowing not
30 |   to keep the whole input in memory and allowing to parse streams.
31 | 
32 | - Dependant sequence allowing for self extensible grammars (like new infix
33 |   with a given priority in a given example).
34 | 
35 | - Managing of blanks that for instance allows for nested language using
36 |   different kind of comments or blanks.
37 | 
38 | - Support for cache and merge for ambiguous grammars (to get O(N^3 ln(N)))
39 | 
40 | - Enough support for utf8 to write parser for a language using utf8.
41 | 
42 | - Comes with documentation and various examples illustrating most possibilities.
43 | 
44 | All this makes Pacomb a promising solution to write languages in OCaml.
45 | """
46 | 
47 | opam-version: "2.0"
48 | maintainer: "Christophe Raffalli <christophe@raffalli.eu>"
49 | bug-reports: "https://github.com/craff/pacomb/issues"
50 | homepage: "https://github.com/craff/pacomb"
51 | dev-repo: "git+https://github.com/craff/pacomb.git"
52 | authors: [
53 |   "Christophe Raffalli <christophe@raffalli.eu>"
54 |   "Rodolphe Lepigre <lepigre@mpi-sws.org>" ]
55 | license: "MIT"
56 | 
57 | depends: [
58 |   "ocaml" { >= "4.04.1" }
59 |   "dune"  { >= "1.9.0" }
60 |   "ppxlib" { >= "0.10.0" }
61 |   "stdlib-shims"
62 | ]
63 | 
64 | build: [ [ "dune" "build" "-p" name "-j" jobs ] ]
65 | run-test: [ [ "dune" "runtest" "-p" name "-j" jobs ] ]
66 | 


--------------------------------------------------------------------------------
/ppx/dune:
--------------------------------------------------------------------------------
1 | (library
2 |   (name pacomb_ppx)
3 |   (public_name pacomb.ppx)
4 |   (preprocess (pps ppxlib.metaquot))
5 |   (modules :standard)
6 |   (kind ppx_rewriter)
7 |   (libraries ppxlib stdlib-shims))
8 | 


--------------------------------------------------------------------------------
/tests/Break.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | open Grammar
 3 | 
 4 | (* Blank function *)
 5 | let blank = Regexp.blank_regexp "\\(\\([#][^\n]*\\)\\|[ \r\t\026]+\\)*"
 6 |    (* bug: "\\([ \r\t\026]\\|\\(\\(#[^\n]*\\)\\)*" *)
 7 | 
 8 | (* Parser for hexadecimal integers *)
 9 | let%parser char =
10 |   (i::RE"[0-9A-Fa-F]+") => Uchar.of_int (int_of_string ("0x" ^ i))
11 | 
12 | let%parser sep = "÷" => true ; "×" => false
13 | 
14 | let%parser rec sample_aux = (l::sample_aux) (c::char) (s::sep) => (c,s) :: l
15 |                           ; (c::char) (s::sep)                 => [(c,s)]
16 | 
17 | let%parser sample = sep (l::sample_aux) => List.rev l
18 | 
19 | let%parser rec break =                   () => []
20 |                  ; (g::GRAPHEME) (l::break) => g::l
21 | 
22 | let good = ref true
23 | 
24 | let test pos l0 =
25 |   try
26 |     let chars = List.map fst l0 in
27 |     let s = Utf8.of_list chars in
28 |     let rec fn = function
29 |       | [] -> []
30 |       | []::_ -> Format.eprintf "unexpected empty at %a\n%!"
31 |                    (Pos.print_pos ()) pos;
32 |                  good := false;
33 |                  raise Exit
34 |       | [x]::l -> (x,true)::fn l
35 |       | (x::l1)::l -> (x,false)::fn (l1::l)
36 |     in
37 |     let l = parse_string ~utf8:Utf8.UTF8 break Blank.none s in
38 |     let l = List.map (fun s -> Utf8.to_list s) l in
39 |     let l = fn l in
40 |     if l <> l0 then
41 |       begin
42 |         Format.eprintf "break fail at %a\n%!" (Pos.print_pos ()) pos;
43 |         List.iter (fun (l,b) -> Printf.eprintf "%x %b " (Uchar.to_int l) b) l;
44 |         Format.eprintf " <> ";
45 |         List.iter (fun (l,b) -> Printf.eprintf "%x %b " (Uchar.to_int l) b) l0;
46 |         Format.eprintf "\n%!";
47 |         good := false;
48 |         raise Exit
49 |       end
50 |   with Exit -> ()
51 | 
52 |   (* Single mapping parser *)
53 | let%parser test = (l::sample) (~+ '\n' => ()) => test l_pos l
54 | 
55 | let%parser tests =
56 |   (star ('\n' => ())) (star test) => ()
57 | 
58 | let parse = parse_channel ~utf8:Utf8.UTF8 tests blank
59 | 
60 | let _ =
61 |   (* Command line args *)
62 |   if Array.length Sys.argv != 2 then
63 |     begin
64 |       let pn = Sys.argv.(0) in
65 |       Printf.eprintf "Usage: %s <GraphemeBreakTest.txt>" pn;
66 |       exit 1
67 |     end;
68 |   let infile = Sys.argv.(1) in
69 | 
70 |   (* Parsing and preparing the data *)
71 |   let infile = open_in infile in
72 |   let _ = Pos.handle_exception parse infile in
73 | 
74 |   close_in infile;
75 |   if not !good then exit 1
76 | 


--------------------------------------------------------------------------------
/tests/big_expr.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | let rec seq op n atom =
 3 |   if n <= 0 then atom ()
 4 |   else (atom (); print_string op; seq op (n-1) atom)
 5 | 
 6 | let paren atom =
 7 |   print_string "("; atom (); print_string ")"
 8 | 
 9 | let int () =
10 |   print_string (string_of_int (Random.int 0x20000000))
11 | 
12 | let rec expr n p s =
13 |   if n <= 0 then int ()
14 |   else seq (if Random.bool () then " + " else " - ")
15 |          s (fun () -> seq
16 |                         (if Random.bool () then "*" else "/")
17 |                         p (fun () ->
18 |                           if n = 1 then int () else
19 |                             paren (fun () -> expr (n - 1) p s)))
20 | 
21 | let n = int_of_string (Sys.argv.(1))
22 | let p = int_of_string (Sys.argv.(2))
23 | let s = int_of_string (Sys.argv.(3))
24 | 
25 | let _ = expr n p s; print_newline ()
26 | 


--------------------------------------------------------------------------------
/tests/calc_factor.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | open Pos
 3 | open Grammar
 4 | 
 5 | (* factorisation ... just  a test for the ppx,
 6 |    here left factorisation is done by the elimination of left recursion,
 7 |    to the result is the same as with calc_prio.ml *)
 8 | type p = Atom | Prod | Sum
 9 | let%parser rec
10 |      expr p = Atom < Prod < Sum
11 |             ; (p=Atom) (x::FLOAT)                        => x
12 |             ; (p=Atom) '(' (e::expr Sum) ')'             => e
13 |             ; (p=Prod) (x::expr Prod) => ( '*' (y::expr Atom) => x*.y
14 |                                          ; '/' (y::expr Atom) => x/.y)
15 |             ; (p=Sum ) (x::expr Sum ) => ('+' (y::expr Prod) => x+.y
16 |                                          ; '-' (y::expr Prod) => x-.y)
17 | 
18 | let top = expr Sum
19 | 
20 | let blank = Blank.from_charset (Charset.singleton ' ')
21 | 
22 | let _ =
23 |   try
24 |     while true do
25 |       let f () =
26 |         Printf.printf "=> %!";
27 |         let line = input_line stdin in
28 |         let n = parse_string top blank line in
29 |         Printf.printf "%f\n%!" n
30 |       in
31 |       handle_exception ~error:(fun _ -> ()) f ()
32 |     done
33 |   with
34 |     End_of_file -> ()
35 | 


--------------------------------------------------------------------------------
/tests/calc_utf8.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | open Pos
 3 | open Grammar
 4 | 
 5 | (*  If -v  option  is given,  the  value of  expression  between parenthesis  is
 6 |    printed *)
 7 | let show_sub = Array.length Sys.argv > 1 && Sys.argv.(1) = "-v"
 8 | 
 9 | (* Here is the definition of the  parser with the ppx syntax extension described
10 |    in the documentation.
11 | 
12 |    Here, we deal with priorities  by manually defining three different grammars.
13 |    Starting with the grammar for atomic expressions. *)
14 | let%parser rec
15 |         atom = (x::FLOAT)        => x                             (* constant *)
16 |              ; (show_sub=false) '(' (e::expr) ')' => e (*   rule for parenthesis
17 |                                                        when show_sub is false *)
18 |              ; (show_sub=true) '(' (e::expr) ')' =>(* idem with show_sub true *)
19 | 
20 |                  (Format.printf "%a: %f\n"
21 |                     (Pos.print_pos ()) _pos e;
22 |                                           (* ^^^^^^ to access position of l   *)
23 |                   e)
24 | 
25 | (* Here is the grammar for products *)
26 | and prod = (a::atom)               => a
27 |           ; (x::prod) '*' (y::atom) => x*.y
28 |           ; (x::prod) '/' (y::atom) => x/.y
29 | 
30 | (* and finally all remaining expressions *)
31 | and expr = (a::prod)               => a
32 |          ; (x::expr) '+' (y::prod) => x+.y
33 |          ; (x::expr) '-' (y::prod) => x-.y
34 | 
35 | (* A subtlety : we want to parse expression, on by one and print the
36 |    result. Pacomb evaluates action after the next token to avoid some useless
37 |    evaluation but still make give_up usable. So we evaluate after newline...
38 |    But if we where parsing blank after newline, we still would have to wait
39 |    the input after the newline.
40 | 
41 |    To solve this, we parse line with no blank, use [Grammar.layout] to accept
42 |    blank inside expression.
43 | 
44 |    A simpler solution (used in calc_ext.ml) is to read the input line
45 |    by line and parse each line using [Grammar.parse_string].
46 | *)
47 | 
48 | let config =
49 |   Blank.{ default_layout_config with
50 |           new_blanks_before = true
51 |         ; new_blanks_after = true}
52 | 
53 | (* The parsing calling expression and changing the blank,
54 |    printing the result and the next prompt. *)
55 | let%parser rec exprs =
56 |     () => ()
57 |   ; exprs (e::expr) '\n' => Format.printf "%f\n=> %!" e
58 | 
59 | (* we define the characters to be ignored, here space only *)
60 | let blank = Blank.from_charset (Charset.singleton ' ')
61 | 
62 | let _ =
63 |   try
64 |     while true do
65 |       let f () =
66 |         Format.printf "=> %!"; (* initial prompt *)
67 |         parse_channel ~utf8:Utf8.UTF8 exprs blank stdin;
68 |         print_grammar ~def:false stdout exprs;
69 |         raise End_of_file
70 |       in
71 |       (* [Pos] module provides a function to handle exception with
72 |          an optional argument to call for error (default is to exit with
73 |          code 1 *)
74 |       handle_exception ~error:(fun _ -> ()) f ();
75 |     done
76 |   with
77 |     End_of_file -> ()
78 | 


--------------------------------------------------------------------------------
/tests/dseq_test.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | open Pos
 3 | open Grammar
 4 | 
 5 | let bspace = Blank.from_charset (Charset.singleton ' ')
 6 | 
 7 | let test ?(blank=bspace) g s r =
 8 |   assert (parse_string g blank s = r)
 9 | 
10 | let tests ?(blank=bspace) g l =
11 |   List.iter (fun (s,r) -> test ~blank g s r) l
12 | 
13 | let test_fail ?(blank=bspace) g s =
14 |   try
15 |     let _ = parse_string g blank s in assert false
16 |   with Parse_error _ -> ()
17 | 
18 | let tests_fail ?(blank=bspace) g l =
19 |   List.iter (test_fail ~blank g) l
20 | 
21 | let%parser [@cache] rec seq (n:int) =
22 |    (n = 0) ()                 => []
23 |  ; (n > 0) 'a' (l::seq (n-1)) => ('a' :: l)
24 | 
25 | let%parser [@cache] seqf =
26 |   ((n,__)>:(n::INT => (n,()))) (l::seq n) => l
27 | 
28 | let _ = test seqf "0" []
29 | let _ = test seqf "1 a" ['a']
30 | let _ = test seqf "2 a a" ['a';'a']
31 | let _ = test seqf "3 a a a" ['a';'a';'a']
32 | 
33 | let _ = test_fail seqf "1"
34 | let _ = test_fail seqf "1 a a"
35 | 
36 | let%parser infix = '+' => (2,'+')
37 |                  ; '*' => (1,'*')
38 | 
39 | type t = L of int | N of t * char * t
40 | 
41 | let%parser [@cache] rec term =
42 |     (n::INT) => (0, L n)
43 |   ; '(' ((__,t) :: term) ')' => (0,t)
44 |   ; (t::term_infix) => t
45 | 
46 | and [@cache] term_infix =
47 |   ((pl,tl)>:term)
48 |       ((ps,s)::(((ps,__) = c) :: infix => (if pl > ps then Lex.give_up (); c)))
49 |       ((pr,tr)::term)
50 |     => (if pr >= ps then Lex.give_up (); (ps,N(tl,s,tr)))
51 | 
52 | let _ = test term "1" (0, L 1)
53 | let _ = test term "1 + 1" (2, N(L 1,'+', L 1))
54 | let _ = test term "1 + 1 + 1" (2, N(N(L 1,'+',L 1),'+', L 1))
55 | let _ = test term "1 * 1" (1, N(L 1,'*', L 1))
56 | let _ = test term "1 + 1 * 1 + 1" (2, N(N(L 1,'+', N(L 1,'*',L 1)),'+',L 1))
57 | 


--------------------------------------------------------------------------------
/tests/dune:
--------------------------------------------------------------------------------
 1 | (test
 2 |   (name test)
 3 |   (modules test)
 4 |   (libraries unix pacomb)
 5 |   (action (run ./test.exe)))
 6 | 
 7 | (test
 8 |   (name hard)
 9 |   (modules hard)
10 |   (preprocess (pps pacomb.ppx))
11 |   (libraries unix pacomb)
12 |   (action (run ./hard.exe 1000)))
13 | 
14 | (test
15 |   (name ppx_test)
16 |   (modules ppx_test)
17 |   (preprocess (pps pacomb.ppx))
18 |   (libraries unix pacomb)
19 |   (action (run ./ppx_test.exe)))
20 | 
21 | (test
22 |   (name dseq_test)
23 |   (modules dseq_test)
24 |   (preprocess (pps pacomb.ppx))
25 |   (libraries unix pacomb)
26 |   (action (run ./dseq_test.exe)))
27 | 
28 | (executable
29 |   (name big_expr)
30 |   (modules big_expr)
31 |   (libraries))
32 | 
33 | (executable
34 |   (name scan)
35 |   (modules scan)
36 |   (libraries unix pacomb))
37 | 
38 | (test
39 |   (name Break)
40 |   (modules Break)
41 |   (preprocess (pps pacomb.ppx))
42 |   (libraries unix pacomb)
43 |   (deps Break.exe GraphemeBreakTest.txt)
44 |   (action
45 |     (run ./Break.exe ./GraphemeBreakTest.txt)))
46 | 
47 | (test
48 |   (name calc_factor)
49 |   (modules calc_factor)
50 |   (preprocess (pps pacomb.ppx))
51 |   (libraries unix pacomb)
52 |   (deps big_expr.exe calc_factor.exe)
53 |   (action
54 |     (system "./big_expr.exe 4 4 4 | ./calc_factor.exe")))
55 | 
56 | (test
57 |   (name calc_utf8)
58 |   (modules calc_utf8)
59 |   (preprocess (pps pacomb.ppx))
60 |   (libraries unix pacomb)
61 |   (deps big_expr.exe calc_utf8.exe)
62 |   (action
63 |     (system "./big_expr.exe 4 4 4 | ./calc_utf8.exe")))
64 | 


--------------------------------------------------------------------------------
/tests/hard.ml:
--------------------------------------------------------------------------------
 1 | open Pacomb
 2 | open Grammar
 3 | 
 4 | 
 5 | let gamma_gen n =
 6 |   let b = Buffer.create (2*n) in
 7 |   let rec gen n =
 8 |     if n <= 0 then () else
 9 |       match Random.int 3 with
10 |       | 0 ->
11 |            Buffer.add_string b "a";
12 |            gen (n-1);
13 |            Buffer.add_string b "b"
14 |       | 1 ->
15 |          Buffer.add_string b "a";
16 |          Buffer.add_string b "b";
17 |          gen (n-1);
18 |          Buffer.add_string b "c"
19 |       | 2 ->
20 |          Buffer.add_string b "a";
21 |          Buffer.add_string b "b";
22 |          Buffer.add_string b "c";
23 |          gen (n-1);
24 |          Buffer.add_string b "d"
25 |       | _ -> assert false
26 |   in
27 |   gen n;
28 |   Buffer.contents b
29 | 
30 | let parse_string c = parse_string c (Blank.from_charset (Charset.singleton ' '))
31 | 
32 | let%parser [@cache] rec g =     () => ()
33 |                        ; 'a' g 'b' => ()
34 |                        ; 'a' g 'b' g 'c' => ()
35 |                        ; 'a' g 'b' g 'c' g 'd' => ()
36 | 
37 | let n = int_of_string Sys.argv.(1)
38 | 
39 | let chrono_parse g s =
40 |   let n = String.length s in
41 |   Printf.printf "parsing %d chars in %!" n;
42 |   let t0 = Unix.gettimeofday () in
43 |   let r = parse_string g s in
44 |   let t1 = Unix.gettimeofday () in
45 |   Printf.printf "%f seconds\n%!" (t1 -. t0);
46 |   r
47 | 
48 | let _ =
49 |   for i = 1 to 4 do
50 |     let str = gamma_gen (n*i) in
51 |     chrono_parse g str
52 |   done
53 | 


--------------------------------------------------------------------------------
/tests/ppx_test.ml:
--------------------------------------------------------------------------------
  1 | open Pacomb
  2 | open Pos
  3 | open Grammar
  4 | 
  5 | let ps pos = (pos.offset_start, pos.offset_end)
  6 | 
  7 | let bspace = Blank.from_charset (Charset.singleton ' ')
  8 | 
  9 | let test ?(blank=bspace) g s r =
 10 |   assert (parse_string g blank s = r)
 11 | 
 12 | let tests ?(blank=bspace) g l =
 13 |   List.iter (fun (s,r) -> test ~blank g s r) l
 14 | 
 15 | let test_fail ?(blank=bspace) g s =
 16 |   try
 17 |     let _ = parse_string g blank s in assert false
 18 |   with Parse_error _ -> ()
 19 | 
 20 | let tests_fail ?(blank=bspace) g l =
 21 |   List.iter (test_fail ~blank g) l
 22 | 
 23 | (*  test syntax for terminal *)
 24 | let%parser g : unit grammar = () => ()
 25 | let _ = test g "" ()
 26 | let%parser g : unit grammar = 'a' => ()
 27 | let _ = test g "a" ()
 28 | let%parser g : unit grammar = "a" => ()
 29 | let _ = test g "a" ()
 30 | let%parser g : int grammar = (x::INT) => x
 31 | let _ = test g "42" 42
 32 | let%parser g : float grammar = (x::FLOAT) => x
 33 | let _ = test g "42.42E-42" 42.42E-42
 34 | 
 35 | let%parser g : string grammar = (x::RE "\\([a-zA-Z_][a-zA-Z_0-9]*\\)") => x
 36 | let _ = test g "toto_x3" "toto_x3"
 37 | let%parser g : 'a -> 'a -> float grammar =
 38 |   fun x y -> (x=y) (z::INT) => float_of_int z
 39 |            ; (x<>y) (z::FLOAT) => z
 40 | let _ = test (g 0 0) "42" 42.0
 41 | let _ = test (g 0 1) "42.0" 42.0
 42 | 
 43 | 
 44 | (* test patterns in terminals *)
 45 | let%parser g0 : (int * int) grammar =
 46 |   (x::INT) => ps x_pos
 47 | let _ = test g0 " 123 " (1,4)
 48 | 
 49 | let%parser g : (int * int) grammar = ((x,y)::g0) => (y,x)
 50 | let _ = test g " 123 " (4,1)
 51 | let%parser g : (int * int * int) grammar =
 52 |   (((x,y)=z)::g0) => (y,x,snd (ps z_pos))
 53 | let _ = test g " 123 " (4,1,4)
 54 | let%parser g : (int * int * int) grammar =
 55 |   ((((x:int),(y:int))=z)::g0) => (y,x,snd (ps z_pos))
 56 | let _ = test g " 123 " (4,1,4)
 57 | 
 58 | (* test rules and sequences *)
 59 | type op = Add | Sub | Mul | Div
 60 | let%parser bin = (x::INT)
 61 |                    (op::('+'=>Add ; '-'=>Sub; '*'=>Mul; '/'=>Div))
 62 |                    (y::INT) => (x,op,y)
 63 | let _ = test bin "42 + 73" (42,Add,73)
 64 | let%parser g = (x::INT) 'a' 'b' => x
 65 |              ; 'a' (x::INT) 'b' => x
 66 |              ; 'a' 'b' (x::INT) => x
 67 | let _ = tests g [("42 a b",42); ("a 42 b",42); ("a b 42",42)]
 68 | 
 69 | (* test condiiton *)
 70 | let%parser g = (true && true) 'a' => ()
 71 | let _ = test g "a" ()
 72 | let%parser g = (=) true true 'a' => ()
 73 | let _ = test g "a" ()
 74 | let%parser g = (true = true) 'a' => ()
 75 | let _ = test g "a" ()
 76 | let%parser g = (true <= true) 'a' => ()
 77 | let _ = test g "a" ()
 78 | let%parser g = (not false) 'a' => ()
 79 | let _ = test g "a" ()
 80 | 
 81 | (* test positions *)
 82 | let%parser g =
 83 |     (x::INT) 'a' 'b' => (fst (ps x_pos),x,snd (ps x_pos))
 84 |   ; 'a' (x::INT) (b::'b') => (fst (ps x_pos),x,snd (ps b_pos))
 85 |   ; (a::'a') 'b' (x::INT) => (fst (ps a_pos),x,snd (ps x_pos))
 86 | let _ = tests g [("42 a b ",(0,42,2))
 87 |                ; ("a 42 b ",(2,42,6))
 88 |                ; ("a b 42 ",(0,42,6))]
 89 | let%parser g =
 90 |     (x::bin) 'a' 'b' => (fst (ps x_pos),x,snd (ps x_pos))
 91 |   ; 'a' (x::bin) (b::'b') => (fst (ps x_pos),x,snd (ps b_pos))
 92 |   ; (a::'a') 'b' (x::bin) => (fst (ps a_pos),x,snd (ps x_pos))
 93 | let _ = tests g [("42+13 a b ",(0,(42,Add,13),5))
 94 |                ; ("a 42 * 4 b ",(2,(42,Mul,4),10))
 95 |                ; ("a b 42 / 2 ",(0,(42,Div,2),10))]
 96 | let%parser g = 'a' 'b' => Pos.(_pos.offset_start,_pos.offset_end)
 97 | let _ = tests g [("a b ",(0,3))]
 98 | 
 99 | (* test recursion *)
100 | let%parser rec g = (y::g) (x::INT) => x+y
101 |                  ; (x::INT) => x
102 | let _ = tests g [("42", 42); ("1 2 3",6)]
103 | let%parser rec g = (x::INT) (y::g) => x+y
104 |                  ; (x::INT) => x
105 | let _ = tests g [("42", 42); ("1 2 3",6)]
106 | let%parser rec g = (x::INT) '+' (y::g) => x+y
107 |                  ; (x::g) '-' (y::INT) => x-y
108 |                  ; (x::INT) => x
109 | (* This is ambiguous !!! *)
110 | let _ = tests g [("42", 42); ("1 + 2 - 3",0); ("1 - 2 - 3",-4)]
111 | let%parser rec g1 = (x::g3) 'a' 'b' => x+1
112 |                   ; 'c' (x::g1) 'd' => x-1
113 |                   ; 'e' 'f' (x::g2) => x
114 |                   ; ()              => 0
115 | and g2 = (x::g1) 'b' 'a' => x+1
116 |        ; 'c' (x::g2) 'd' => x-1
117 |        ; 'f' 'e' (x::g3) => x
118 |        ; ()              => 0
119 | and g3 = (x::g2) 'a' 'b' => x+1
120 |        ; 'd' (x::g3) 'c' => x-1
121 |        ; 'e' 'f' (x::g1) => x
122 |        ; ()              => 0
123 | let _ = tests g1 [("", 0); ("ab",1); ("cd",-1); ("ef", 0)]
124 | let _ = tests g2 [("", 0); ("ba",1); ("cd",-1); ("fe", 0)]
125 | let _ = tests g3 [("", 0); ("ab",1); ("dc",-1); ("ef", 0)]
126 | let _ = tests g1 [("cdefefcfedcabd",-2)]
127 | 
128 | (* test right recursion and lazy *)
129 | let n = ref 0
130 | let%parser rec g = (c::CHAR) (l::force g) => lazy (incr n; String.make 1 c :: l)
131 |                    ; () => lazy []
132 | let%parser h = (lazy l::g) EOF => String.concat "" l
133 | 
134 | let _ =
135 |   let nb = 1_000_000 in
136 |   let s = (String.make nb 'a') in test h s s; assert (!n = nb)
137 | 
138 | (* test parameters *)
139 | let%parser rec g g0 n = (n=0) ()                 => 0
140 |                       ; (n>0) (x::g g0 (n-1)) g0 => x+1
141 | let%parser _ =
142 |   for i = 0 to 10 do
143 |     test (g ('a' => ()) i) (String.make i 'a') i;
144 |     test_fail (g ('a' => ()) i) (String.make (i+1) 'a')
145 |   done
146 | 
147 | let%parser rec g g0 n = (n=0) ()                 => 0
148 |                       ; (n>0) g0 (x::g g0 (n-1)) => x+1
149 | let%parser _ =
150 |   for i = 0 to 10 do
151 |     test (g ('a' => ()) i) (String.make i 'a') i;
152 |     test_fail (g ('a' => ()) i) (String.make (i+1) 'a')
153 |   done
154 | 
155 | let%parser rec g ~g:g0 ?(n=0) () = (n=0) ()               => 0
156 |                       ; (n>0) g0 (x::g ~g:g0 ~n:(n-1) ()) => x+1
157 | let%parser _ =
158 |   test (g ~g:('a' => ()) ()) (String.make 0 'a') 0;
159 |   test_fail (g ~g:('a' => ()) ()) (String.make 1 'a');
160 |   for i = 0 to 10 do
161 |     test (g ~g:('a' => ()) ~n:i ()) (String.make i 'a') i;
162 |     test_fail (g ~g:('a' => ()) ~n:i ()) (String.make (i+1) 'a')
163 |   done
164 | 
165 | 
166 | (* test grammar under sub expressions or sub modules *)
167 | let noblank = layout Blank.none
168 | let%parser f = (x::(noblank ('a' 'a' => 2))) => x
169 | 
170 | module%parser H =
171 |   struct
172 |     let f = 'b' (x::(noblank ('a' 'a' => 2))) => x
173 |   end
174 | let _ = test f "aa" 2
175 | let _ = test_fail f "a a"
176 | 
177 | let _ = test H.f "b aa " 2
178 | let _ = test_fail H.f "b a a "
179 | 
180 | type tree = Nil of int | Bin of tree * tree | Alt of tree * tree
181 | let rec nb_tree = function
182 |   | Nil _ -> 1
183 |   | Bin(t1,t2) -> nb_tree t1 * nb_tree t2
184 |   | Alt(t1,t2) -> nb_tree t1 + nb_tree t2
185 | 
186 | let size t =
187 |   let adone = ref [] in
188 |   let rec fn t =
189 |     if List.memq t !adone then 0 else
190 |       begin
191 |         adone := t :: !adone;
192 |         match t with
193 |         | Nil _ -> 0
194 |         | Bin(t1,t2) -> fn t1 + fn t2 + 1
195 |         | Alt(t1,t2) -> fn t1 + fn t2 + 1
196 |       end
197 |   in fn t
198 | 
199 | let catalan_fn =
200 |   let memo = Hashtbl.create 128 in
201 |   let rec fn n =
202 |     if n = 0 then 1 else if n = 1 then 1 else
203 |     try Hashtbl.find memo n
204 |     with Not_found ->
205 |       let r = ref 0 in
206 |       for i = 0 to n-1 do
207 |         r := fn i * fn (n - i - 1) + !r
208 |       done;
209 |       Hashtbl.add memo n !r;
210 |       !r
211 |   in
212 |   fn
213 |                                     (* test cache and merge attribute *)
214 | let%parser [@merge (fun x y -> Alt(x,y))] rec catalan =
215 |     (x::INT) => Nil x
216 |   ; (t1::catalan) ',' (t2::catalan) => Bin(t1,t2)
217 | 
218 | let _ = nb_tree (parse_string catalan bspace "0,0,0,0") = catalan_fn 4
219 | let _ = nb_tree (parse_string catalan bspace "0,0,0,0,0,0") = catalan_fn 6
220 | 


--------------------------------------------------------------------------------
/tests/scan.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | (* This test is for measuring the performance of Input *)
 3 | open Pacomb
 4 | 
 5 | let utf8 = ref Utf8.ASCII
 6 | 
 7 | let files = ref []
 8 | 
 9 | let spec = Arg.[("--utf8", Unit(fun () ->  utf8 := Utf8.UTF8), "use utf8 mode")]
10 | 
11 | let _ = Arg.parse spec
12 |           (fun x -> files := x :: ! files)
13 |           "scan [--utf8] files ('-' to read stdin)"
14 | 
15 | let utf8 = !utf8
16 | let files = if !files = [] then ["-"] else !files
17 | 
18 | let do_file file =
19 |   let buf =
20 |     if file = "-" then
21 |       Input.from_channel ~utf8 stdin
22 |     else Input.from_file ~utf8 file
23 |   in
24 |   let t0 = Unix.gettimeofday () in
25 |   let rec fn n buf pos =
26 |     let (c,buf,pos) = Input.read buf pos in
27 |     if c = '\255' then Input.line_num buf, n else fn (n+1) buf pos
28 |   in
29 |   let l, n = fn 0 buf Input.init_pos in
30 |   let t1 = Unix.gettimeofday () in
31 |   let d = t1 -. t0 in
32 |   let s = (float) n /. d /. (1024. *. 1024.) in
33 |   let m = (float n) /.  (1024. *. 1024.) in
34 |   Printf.printf
35 |     "%s: %.2f Mo/%d lines read in %f.3s (%.2f Mo/s) with utf8 mode %b\n"
36 |     file m l d s (utf8 = Utf8.UTF8)
37 | 
38 | let _ = List.iter do_file files
39 | 


--------------------------------------------------------------------------------
/tools/sanity_check.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/bin/bash
 3 | 
 4 | # Sanity checks for source files in directory [src].
 5 | 
 6 | ML_FILES=`find lib -name "*.ml" -or -name "*.mli"`
 7 | TESTS_FILES=`find tests -name "*.ml" -or -name "*.mli" -or -name "*.mll" -or -name "*.mly"`
 8 | FILES="ppx/ppx_pacomb.ml $ML_FILES $TESTS_FILES"
 9 | 
10 | awk 'length>80 {print FILENAME ", line " FNR ": more than 80 characters..."}' $FILES
11 | awk '/.*\s$/   {print FILENAME ", line " FNR ": trailing spaces..."}        ' $FILES
12 | awk '/.*\t.*/  {print FILENAME ", line " FNR ": contains tabs..."}          ' $FILES
13 | 
14 | # Check for issue number of FIXMEs and TODOs in source files and PML files.
15 | 
16 | awk '/FIXME:? [^#]/ {print FILENAME ", line " FNR ": FIXME without issue number"}' $FILES
17 | awk '/TODO:? [^#]/  {print FILENAME ", line " FNR ": TODO without issue number"}'  $FILES
18 | 


--------------------------------------------------------------------------------