├── README.md
├── src
    ├── jbuild
    ├── Stream.ml
    ├── Pratt.mli
    └── Pratt.ml
├── Init.ml
├── examples
    ├── jbuild
    ├── .merlin
    ├── Javascript_lexer.ml
    └── Javascript.ml
├── .gitignore
├── tests
    ├── .merlin
    ├── Local.ml
    ├── jbuild
    ├── Lispy_lexer.ml
    ├── Test_pratt_calc.ml
    ├── Test_parser.ml
    ├── Test_lispy.ml
    └── Nanotest.ml
├── pratt.opam
├── Makefile
├── LICENSE
└── Algorithm.dot


/README.md:
--------------------------------------------------------------------------------
1 | # Pratt Parser
2 | 
3 | Top down operator precedence parser (also known as Pratt parser) implementation for OCaml.
4 | 
5 | 


--------------------------------------------------------------------------------
/src/jbuild:
--------------------------------------------------------------------------------
1 | (jbuild_version 1)
2 | 
3 | (library
4 |  ((name pratt)
5 |   (public_name pratt)
6 |   (modules (Pratt Stream))
7 |   (libraries (fmt astring yojson))))
8 | 
9 | 


--------------------------------------------------------------------------------
/Init.ml:
--------------------------------------------------------------------------------
1 | #require "iter";;
2 | #require "iter";;
3 | #require "fmt.top";;
4 | #directory "_build/default/src/";;
5 | #load_rec  "_build/default/src/Pratt.cmo";;
6 | 
7 | open Pratt;;
8 | 


--------------------------------------------------------------------------------
/examples/jbuild:
--------------------------------------------------------------------------------
1 | (jbuild_version 1)
2 | 
3 | (executables
4 |   ((names (Javascript))
5 |    (libraries (pratt astring fmt unix sedlex))
6 |    (preprocess (pps (sedlex.ppx)))))
7 | 
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.annot
 2 | *.cmo
 3 | *.cma
 4 | *.cmi
 5 | *.a
 6 | *.o
 7 | *.cmx
 8 | *.cmxs
 9 | *.cmxa
10 | 
11 | # ocamlbuild working directory
12 | _build/
13 | 
14 | # ocamlbuild targets
15 | *.byte
16 | *.native
17 | 
18 | # oasis generated files
19 | setup.data
20 | setup.log
21 | 


--------------------------------------------------------------------------------
/examples/.merlin:
--------------------------------------------------------------------------------
 1 | B ../_build/default/examples
 2 | B ../_build/default/src
 3 | FLG -ppx '/Users/rizo/Developer/OCaml/Code/pratt/_build/default/.ppx/sedlex.ppx/ppx.exe --as-ppx'
 4 | FLG -w -40
 5 | PKG astring
 6 | PKG bytes
 7 | PKG fmt
 8 | PKG gen
 9 | PKG proto
10 | PKG proto.kernel
11 | PKG proto.shadow-stdlib
12 | PKG result
13 | PKG sedlex
14 | PKG uchar
15 | PKG unix
16 | S ../src
17 | 


--------------------------------------------------------------------------------
/tests/.merlin:
--------------------------------------------------------------------------------
 1 | B ../_build/default/src
 2 | B ../_build/default/tests
 3 | FLG -ppx '/Users/rizo/Developer/OCaml/Code/pratt/_build/default/.ppx/sedlex.ppx/ppx.exe --as-ppx'
 4 | FLG -w -40
 5 | PKG astring
 6 | PKG bytes
 7 | PKG fmt
 8 | PKG gen
 9 | PKG proto
10 | PKG proto.kernel
11 | PKG proto.shadow-stdlib
12 | PKG result
13 | PKG sedlex
14 | PKG uchar
15 | PKG unix
16 | S .
17 | S ../src
18 | 


--------------------------------------------------------------------------------
/tests/Local.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | module Char = Astring.Char
 3 | 
 4 | let identity x = x
 5 | 
 6 | module String = struct
 7 |   include String
 8 | 
 9 |   let implode l =
10 |     let arr = Array.of_list l in
11 |     String.init (Array.length arr) (Array.get arr)
12 | 
13 |   let join ?(sep = "") self =
14 |     concat sep self
15 | end
16 | 
17 | 
18 | module Int = struct
19 |   let unsafe_of_string =
20 |     int_of_string
21 | end
22 | 
23 | module Result = struct
24 |   let map f = function Ok x -> Ok (f x) | Error e -> Error e
25 | end
26 | 
27 | 


--------------------------------------------------------------------------------
/pratt.opam:
--------------------------------------------------------------------------------
 1 | opam-version: "1.2"
 2 | version:      "0.2.0"
 3 | maintainer:   "Rizo Isrof <rizo@odis.io>"
 4 | authors:      "Rizo Isrof <rizo@odis.io>"
 5 | license:      "ISC"
 6 | homepage:     "https://github.com/rizo/pratt"
 7 | bug-reports:  "https://github.com/rizo/pratt/issues"
 8 | dev-repo:     "https://github.com/rizo/pratt.git"
 9 | 
10 | build: ["jbuilder" "build" "-p" name "-j" jobs]
11 | build-test: ["jbuilder" "runtest" "-p" name]
12 | 
13 | depends: [
14 |   "pure"
15 |   "fmt"
16 |   "jbuilder" {build}
17 | ]
18 | available: [ocaml-version >= "4.02.0"]
19 | 


--------------------------------------------------------------------------------
/tests/jbuild:
--------------------------------------------------------------------------------
 1 | (jbuild_version 1)
 2 | (executables
 3 |   ((names (Test_pratt_calc Test_parser Test_lispy))
 4 |    (libraries (pratt astring fmt unix sedlex))
 5 |    (preprocess (pps (sedlex.ppx)))))
 6 | 
 7 | (alias
 8 |  ((name runtest)
 9 |   (package pratt)
10 |   (deps (Test_parser.exe))
11 |   (action (run ${<}))))
12 | 
13 | 
14 | (alias
15 |  ((name runtest)
16 |   (package pratt)
17 |   (deps (Test_pratt_calc.exe))
18 |   (action (run ${<}))))
19 | 
20 | (alias
21 |  ((name runtest)
22 |   (package pratt)
23 |   (deps (Test_lispy.exe))
24 |   (action (run ${<}))))
25 | 
26 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | build:
 3 | 	jbuilder build -j4 @install
 4 | 
 5 | install: build
 6 | 	jbuilder install
 7 | 
 8 | uninstall:
 9 | 	jbuilder uninstall
10 | 
11 | reinstall: uninstall install
12 | 
13 | test:
14 | 	jbuilder runtest
15 | 
16 | clean:
17 | 	jbuilder clean
18 | 
19 | shell: build
20 | 	/usr/bin/env bash -c 'utop -init <(cat ~/.ocamlinit Init.ml)'
21 | 
22 | watch:
23 | 	ls src/*.ml* tests/*.ml* | entr -cr make test
24 | 
25 | watch-js:
26 | 	ls src/*.ml* examples/*.ml* | entr -cr sh -c 'jbuilder build examples/Javascript.exe; ./_build/default/examples/Javascript.exe'
27 | 
28 | .PHONY: test all clean
29 | 
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Rizo Isrof
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any
 4 | purpose with or without fee is hereby granted, provided that the above
 5 | copyright notice and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/Algorithm.dot:
--------------------------------------------------------------------------------
 1 | 
 2 | digraph pratt {
 3 |   node [shape=record];
 4 |   nud [shape=hexagon, color=Blue];
 5 |   led [shape=hexagon, color=Blue];
 6 |   nud_invalid [color=Red,label="nud.invalid_nud(token)"];
 7 |   led_return [color=Green,label="led.return(left)"];
 8 | 
 9 |   // NUD
10 |   nud -> "nud.has_nud(token)" [style=dashed];
11 |   "nud.has_nud(token)"  -> "nud.parse_nud(token)" [label="yes"];
12 |   "nud.has_nud(token)"  -> "nud.has_led(token)" [label="no"];
13 |   "nud.has_led(token)"  -> "nud.parse_term(token)" [label="no"];
14 |   "nud.has_led(token)"  -> nud_invalid [label="yes"];
15 |   "nud.parse_nud(token)" -> led [style=dashed];
16 |   "nud.parse_term(token)" -> led [style=dashed];
17 | 
18 |   // LED
19 |   led -> "led.is_eof" [style=dashed];
20 |   "led.is_eof" -> led_return [label="yes"];
21 |   "led.is_eof" -> "led.has_led(token)" [label="no"];
22 |   "led.has_led(token)" -> "led.(lbp > rbp)" [label="yes"];
23 |   "led.has_led(token)" -> "invalid_led(token)" [label="no", color=Red];
24 |   "led.(lbp > rbp)" -> "led(token)" [label="yes"];
25 |   "led.(lbp > rbp)" -> led_return [label="no"];
26 |   "led(token)" -> "advance()" [style=dashed];
27 |   "advance()" -> "nud(token)" [style=dashed];
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/src/Stream.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | type 'a t = Yield of 'a * (unit -> 'a t) | Empty
 3 | 
 4 | let rec count n : int t = Yield (n, fun () -> count (n + 1))
 5 | 
 6 | let rec make n f : 'a t =
 7 |   let rec loop i =
 8 |     if i = n then Empty
 9 |     else Yield (f i, fun () -> loop (i + 1)) in
10 |   loop 0
11 | 
12 | let rec fold f acc (stream : 'a t) =
13 |   match stream with
14 |   | Empty -> acc
15 |   | Yield (x, k) -> fold f (f acc x) (k ())
16 | 
17 | let rec map f (stream : 'a t) =
18 |   match stream with
19 |   | Empty -> Empty
20 |   | Yield (x, k) -> Yield (f x, fun () -> map f (k ()))
21 | 
22 | let rec head stream =
23 |   match stream with
24 |   | Empty -> None
25 |   | Yield (x, _) -> Some x
26 | 
27 | let is_empty (stream : 'a t) =
28 |   match stream with
29 |   | Empty -> true
30 |   | _ -> false
31 | 
32 | let rec of_list list : 'a t =
33 |   match list with
34 |   | [] -> Empty
35 |   | x :: xs -> Yield (x, fun () -> of_list xs)
36 | 
37 | let to_list (stream : 'a t) =
38 |   let rec loop acc stream' =
39 |     match stream' with
40 |     | Empty -> acc
41 |     | Yield (x, k) -> loop (List.cons x acc) (k ()) in
42 |   List.rev (loop [] stream)
43 | 
44 | let of_string str : char t =
45 |   let rec loop i =
46 |     try
47 |       Yield (String.get str i, fun () -> loop (i + 1))
48 |     with Invalid_argument _ ->
49 |       Empty in
50 |   loop 0
51 | 
52 | let rec filter p stream  =
53 |   match stream with
54 |   | Empty -> Empty
55 |   | Yield (x, k) ->
56 |     if p x then
57 |       Yield (x, fun () -> filter p (k ()))
58 |     else
59 |       filter p (k ())
60 | 
61 | let reject p stream =
62 |   filter (fun x -> not (p x)) stream
63 | 
64 | let next stream =
65 |   match stream with
66 |   | Empty -> None
67 |   | Yield (x, stream') -> Some (x, stream' ())
68 | 
69 | 


--------------------------------------------------------------------------------
/tests/Lispy_lexer.ml:
--------------------------------------------------------------------------------
 1 | 
 2 | module String = Astring.String
 3 | 
 4 | let identifier =
 5 |   [%sedlex.regexp? Plus (alphabetic | Chars "_'")]
 6 | 
 7 | let white_space =
 8 |   [%sedlex.regexp? Plus (' ' | '\t')]
 9 | 
10 | module Token = struct
11 |   type t = string
12 | 
13 |   let pp = Fmt.string
14 |   let compare = Pervasives.compare
15 | end
16 | 
17 | type t =
18 |   { mutable lexbuf      : Sedlexing.lexbuf;
19 |     mutable line_start  : int;
20 |     mutable line_count  : int;
21 |     mutable group_count : int }
22 | 
23 | 
24 | let increment_line self =
25 |   self.line_start <- Sedlexing.lexeme_end self.lexbuf;
26 |   self.line_count <- self.line_count + 1
27 | 
28 | 
29 | let current_lexeme self =
30 |   Sedlexing.Utf8.lexeme self.lexbuf
31 | 
32 | 
33 | let rec read self =
34 |   let lexbuf = self.lexbuf in
35 |   match%sedlex lexbuf with
36 | 
37 |   (* Whitespace *)
38 |   | Plus white_space -> read self
39 | 
40 |   (* Group start *)
41 |   | '('  ->
42 |     self.group_count <- (self.group_count + 1);
43 |     current_lexeme self
44 | 
45 |   (* Operators *)
46 |   | '+' | '-' ->
47 |     current_lexeme self
48 | 
49 |   (* Group end *)
50 |   | ')' ->
51 |     self.group_count <- (self.group_count - 1);
52 | 
53 |     if self.group_count < 0 then
54 |       failwith "unbalanced parenthesis"
55 |     else
56 |       current_lexeme self
57 | 
58 |   (* Identifiers *)
59 |   | identifier -> current_lexeme self
60 | 
61 |   (* Newline symbol *)
62 |   | '\n' ->
63 |     increment_line self;
64 |     read self
65 | 
66 |   (* EOF symbol *)
67 |   | eof -> raise End_of_file
68 | 
69 |   (* Everything else is illegal *)
70 |   | any ->
71 |     failwith ("illegal character: " ^ current_lexeme self)
72 | 
73 |   (* Sedlex: the last branch must be a catch-all error case *)
74 |   | _ -> failwith "impossible"
75 | 
76 | 
77 | let from_lexbuf lexbuf =
78 |   { lexbuf;
79 |     line_start  = 0;
80 |     line_count  = 0;
81 |     group_count = 0 }
82 | 
83 | let of_string s =
84 |   from_lexbuf (Sedlexing.Utf8.from_string s)
85 | 
86 | let of_channel c =
87 |   from_lexbuf (Sedlexing.Utf8.from_channel c)
88 | 
89 | let rec to_stream lexer =
90 |   try
91 |     let token = read lexer in
92 |     Pratt.Stream.Yield (token, fun () -> to_stream lexer)
93 |   with End_of_file ->
94 |     Pratt.Stream.Empty
95 | 
96 | 


--------------------------------------------------------------------------------
/tests/Test_pratt_calc.ml:
--------------------------------------------------------------------------------
 1 | open Local
 2 | 
 3 | let _1 = fst
 4 | module Stream = Pratt.Stream
 5 | module P = Pratt.Make(Char)
 6 | open P
 7 | 
 8 | (* Integer parser for char tokens. *)
 9 | let int g =
10 |   some (range '0' '9') >>= fun (x, xs) ->
11 |   return (Int.unsafe_of_string (String.implode (x :: xs)))
12 | 
13 | let rec fac = function
14 |   | 0 | 1 -> 1
15 |   | n -> n * fac (n - 1)
16 | 
17 | (* Basic calculator grammar. *)
18 | let calc =
19 |   grammar [
20 |    term       int;
21 |    null       '+'     (unary identity);
22 |    left    30 '+'     (binary ( + ));
23 |    null       '-'     (unary ( ~- ));
24 |    left    30 '-'     (binary ( - ));
25 |    left    40 '*'     (binary ( * ));
26 |    left    40 '/'     (binary ( / ));
27 |    postfix 70 '!'     (fun a   -> fac a);
28 |    between    '(' ')' (fun a   -> a);
29 |    delimiter  ')';
30 |   ]
31 | 
32 | 
33 | (* Basic string lexer (ignores blank characters). *)
34 | let lexer str =
35 |   Stream.of_string str |> Stream.reject Char.Ascii.is_blank
36 | 
37 | module T = Nanotest
38 | 
39 | (* Helper testing function that parses the input and checks the result. *)
40 | let (==>) str expected =
41 |   let actual = Result.map _1 (run (parse calc) (lexer str)) in
42 |   let testable = T.(result int (testable (Fmt.of_to_string error_to_string))) in
43 |   T.test testable str ~actual ~expected
44 | 
45 | 
46 | let stream =
47 |   T.testable ~equal:(fun a b -> Stream.(to_list a == to_list b))
48 |     (Fmt.of_to_string (fun _ -> "<stream>"))
49 | 
50 | let (==>!) str expected =
51 |   let actual = run (parse calc) (lexer str) in
52 |   let testable = T.(result (pair int stream) (testable (Fmt.of_to_string error_to_string))) in
53 |   T.test testable str ~actual ~expected
54 | 
55 | 
56 | (* Tests *)
57 | let () =
58 |   T.group "Test basic" [
59 |     "1"            ==> Ok 1;
60 |     "+1"           ==> Ok 1;
61 |     "+-+-1"        ==> Ok 1;
62 |     "1 + 1"        ==> Ok 2;
63 |     "100 + 300"    ==> Ok 400;
64 |     "1 + -1"       ==> Ok 0;
65 |     "1 + --1"      ==> Ok 2;
66 |     "(((0)))"      ==> Ok 0;
67 |     "2 + 2 * 2"    ==> Ok 6;
68 |     "(2 + 2) * 2"  ==> Ok 8;
69 |   ];
70 | 
71 |   T.group "Factorial" [
72 |     "5!"           ==> Ok 120;
73 |     "0!"           ==> Ok 1;
74 |   ];
75 | 
76 |   T.group "Check errors" [
77 |     "x"            ==> Error (unexpected_token 'x');
78 |     ""             ==> Error (unexpected_end ());
79 |     "/"            ==> Error (invalid_prefix '/');
80 |     "2 /"          ==> Error (unexpected_end ());
81 |     "2 / -"        ==> Error (unexpected_end ());
82 |     "2 ("          ==>! Ok (2, Stream.of_list ['(']);
83 |     (* XXX: Alternatively fail with unexpected ( *)
84 |   ]
85 | 
86 | 


--------------------------------------------------------------------------------
/tests/Test_parser.ml:
--------------------------------------------------------------------------------
  1 | open Astring
  2 | 
  3 | module Stream = Pratt.Stream
  4 | module P = Pratt.Make(Char)
  5 | module T = Nanotest
  6 | open Local
  7 | 
  8 | let _1 = fst
  9 | 
 10 | (* Helper test function, tests a particular [parser] with a given [input]. *)
 11 | let test parser input expected =
 12 |   let printer = T.(result char (testable P.pp_error)) in
 13 |   let actual  = Result.map _1 @@ P.run parser (Stream.of_string input) in
 14 |   let message = "input: \"" ^ input ^ "\"" in
 15 |   T.test ~verbose:true printer message ~expected ~actual
 16 | 
 17 | (* Helper test function, tests a particular [parser] with a given [input]. *)
 18 | let test' parser input expected =
 19 |   let printer = T.result T.(list char) (T.testable P.pp_error) in
 20 |   let actual  = Result.map _1 @@ P.run parser (Pratt.Stream.of_string input) in
 21 |   let message = "input: \"" ^ input ^ "\"" in
 22 |   T.test ~verbose:true printer message ~expected ~actual
 23 | 
 24 | 
 25 | let test_error () =
 26 |   let (==>) = test P.(error Zero) in
 27 |   T.group "Parser.error" [
 28 |     ""    ==> Error P.Zero;
 29 |     "a"   ==> Error P.Zero;
 30 |     "abc" ==> Error P.Zero;
 31 |   ]
 32 | 
 33 | let test_expect () =
 34 |   let (==>) = test (P.expect 'a') in
 35 |   T.group "Parser.expect" [
 36 |     ""    ==> Error P.(unexpected_end ~expected:'a' ());
 37 |     "x"   ==> Error P.(unexpected_token ~expected:'a' 'x');
 38 |     "a"   ==> Ok 'a';
 39 |     "abc" ==> Ok 'a';
 40 |   ]
 41 | 
 42 | let test_exactly () =
 43 |   let (==>) (x, input) = test (P.exactly x) input in
 44 |   T.group "Parser.exactly" [
 45 |     ('x', "x") ==> Ok 'x';
 46 |     ('x', "y") ==> Error P.(unexpected_token ~expected:'x' 'y');
 47 |     ('x', "")  ==> Error P.(unexpected_end ~expected:'x' ());
 48 |   ]
 49 | 
 50 | let test_satisfy () =
 51 |   let (==>) = test (P.satisfy Char.Ascii.is_upper) in
 52 |   T.group "Parser.satisfy is_upper" [
 53 |     "A" ==> Ok 'A';
 54 |     ""  ==> Error P.(unexpected_end ());
 55 |     "0" ==> Error P.(unexpected_token '0');
 56 |     "a" ==> Error P.(unexpected_token 'a');
 57 |   ]
 58 | 
 59 | let test_any () =
 60 |   let (==>) = test P.any in
 61 |   T.group "Parser.any" [
 62 |     "x" ==> Ok 'x';
 63 |     "0" ==> Ok '0';
 64 |     "?" ==> Ok '?';
 65 |     ""  ==> Error P.(unexpected_end ());
 66 |   ]
 67 | 
 68 | let test_from () =
 69 |   let (==>) (options, input) = test (P.from options) input in
 70 |   T.group "Parser.from" [
 71 |     ([], "x")         ==> Error P.(unexpected_token 'x');
 72 |     (['x'], "x")      ==> Ok 'x';
 73 |     (['x'; 'y'], "y") ==> Ok 'y';
 74 |     (['x'; 'y'], "z") ==> Error P.(unexpected_token 'z');
 75 |     ([],  "")         ==> Error P.(unexpected_end ());
 76 |   ]
 77 | 
 78 | let test_while () =
 79 |   let (==>) input = test' P.(many_while ((!=) 'x') any) input in
 80 |   T.group "Parser.many_while" [
 81 |     ""         ==> Ok [];
 82 |     "x"        ==> Ok [];
 83 |     "ax"       ==> Ok ['a'];
 84 |     "abcx"     ==> Ok ['a'; 'b'; 'c'];
 85 |     "abcxxx"   ==> Ok ['a'; 'b'; 'c'];
 86 |     "abc"      ==> Ok ['a'; 'b'; 'c'];
 87 |   ]
 88 | 
 89 | let test_many () =
 90 |   let (==>) input = test' P.(many any) input in
 91 |   T.group "Parser.many_while" [
 92 |     ""         ==> Ok [];
 93 |     "ax"       ==> Ok ['a'];
 94 |     "abc"      ==> Ok ['a'; 'b'; 'c'];
 95 |   ]
 96 | 
 97 | let test_many_while () =
 98 |   let (==>) input = test' P.(many_while ((!=) 'x') any) input in
 99 |   T.group "Parser.many_while" [
100 |     ""         ==> Ok [];
101 |     "ax"       ==> Ok ['a'];
102 |     "abcx"     ==> Ok ['a'; 'b'; 'c'];
103 |     "abc"      ==> Ok ['a'; 'b'; 'c'];
104 |   ]
105 | 
106 | let () = begin
107 |   test_error ();
108 |   test_expect ();
109 |   test_exactly ();
110 |   test_satisfy ();
111 |   test_any ();
112 |   test_from ();
113 |   test_many ();
114 |   test_many_while ();
115 | end
116 | 
117 | 


--------------------------------------------------------------------------------
/tests/Test_lispy.ml:
--------------------------------------------------------------------------------
  1 | open Local
  2 | 
  3 | 
  4 | let inspect pp =
  5 |   Format.fprintf Fmt.stderr "@[%a@.@]" pp
  6 | 
  7 | let log fmt =
  8 |   Format.kfprintf (fun f -> Format.pp_print_newline f ()) Fmt.stderr fmt
  9 | 
 10 | 
 11 | let _1 = fst
 12 | module Stream = Pratt.Stream
 13 | module P = Pratt.Make(Lispy_lexer.Token)
 14 | module G = P.Grammar
 15 | let (>>=) = P.(>>=)
 16 | 
 17 | module E = struct
 18 |   type t = [ `sym of string | `add of t * t | `sub of t * t | `neg of t | `seq of t list ]
 19 |   let rec pp formatter e =
 20 |     match e with
 21 |     | `sym a -> Format.fprintf formatter "%s" a
 22 |     | `add (a, b) -> Format.fprintf formatter "(%a + %a)" pp a pp b
 23 |     | `sub (a, b) -> Format.fprintf formatter "(%a - %a)" pp a pp b
 24 |     | `neg a -> Format.fprintf formatter "(- %a)" pp a
 25 |     | `seq xs -> Format.fprintf formatter "(%a)" (Fmt.list ~sep:(Fmt.unit " ") pp) xs
 26 |   let equal = (=)
 27 | end
 28 | 
 29 | let parse g =
 30 |   let left =
 31 |     P.some (P.nud 0 g) >>= fun (x, xs) ->
 32 |     if List.length xs = 0 then
 33 |       P.return x
 34 |     else
 35 |       P.return (`seq (x :: xs)) in
 36 |   left >>= P.led 0 g
 37 | 
 38 | let parse_term g =
 39 |   P.any >>= fun x ->
 40 |   P.return (`sym x)
 41 | 
 42 | let parse_add g a =
 43 |   P.advance >>= fun () ->
 44 |   parse g >>= fun b ->
 45 |   P.return (`add (a, b))
 46 | 
 47 | let parse_sub g a =
 48 |   P.advance >>= fun () ->
 49 |   parse g >>= fun b ->
 50 |   P.return (`sub (a, b))
 51 | 
 52 | let parse_neg g =
 53 |   P.advance >>= fun () ->
 54 |   parse g >>= fun a ->
 55 |   P.return (`neg a)
 56 | 
 57 | let parse_group g =
 58 |   P.advance >>= fun () ->
 59 |   parse g >>= fun a ->
 60 |   P.consume ")" >>= fun () ->
 61 |   P.return a
 62 | 
 63 | (* Basic calculator grammar. *)
 64 | let lispy =
 65 |   P.grammar P.[
 66 |     term               parse_term;
 67 |     left    30 "+"     parse_add;
 68 |     left    30 "-"     parse_sub;
 69 |     null       "-"     parse_neg;
 70 |     null       "("     parse_group;
 71 |     delimiter  ")"
 72 |   ]
 73 | 
 74 | let parser = parse lispy
 75 | 
 76 | module T = Nanotest
 77 | 
 78 | (* Helper testing function that parses the input and checks the result. *)
 79 | let (==>) str expected =
 80 |   let actual = Result.map _1 (P.run parser (Lispy_lexer.(of_string str |> to_stream))) in
 81 |   let testable = T.(result (module E) (testable (Fmt.of_to_string P.error_to_string))) in
 82 |   T.test testable str ~actual ~expected
 83 | 
 84 | 
 85 | (* Tests *)
 86 | let () =
 87 |   let (a, b, c, d, e) =
 88 |     `sym "a", `sym "b", `sym "c", `sym "d", `sym "e" in
 89 |   let (+) a b = `add (a, b) in
 90 |   let (~-) a = `neg a in
 91 |   let seq l = `seq l in
 92 | 
 93 |   T.group "Test basic" [
 94 |     "a"               ==> Ok a;
 95 |     "a + b"           ==> Ok (a + b);
 96 |     "-a"              ==> Ok ~-a;
 97 |     "a b"             ==> Ok (seq [a; b]);
 98 |     "a b c"           ==> Ok (seq [a; b; c]);
 99 |     "a b + c"         ==> Ok (seq [a; b] + c);
100 |     "a + b c"         ==> Ok (a + seq [b; c]);
101 |     "(a + b) (c + d)" ==> Ok (seq [a + b; c + d]);
102 |     "(a b) (c d)"     ==> Ok (seq [seq [a; b]; seq [c; d]]);
103 |   ];
104 | 
105 |   let def f args body =
106 |     seq [`sym "def"; f; seq args; body] in
107 |   let let' bindings body =
108 |     seq [`sym "let"; seq bindings; body] in
109 |   let lambda args body =
110 |     seq [`sym "lambda"; seq args; body] in
111 | 
112 |   let ex1_str = {|
113 |       (def a (a b c)
114 |         (let
115 |           ((d (a + b))
116 |            (e (-c)))
117 |          ((lambda (a b) (a + e)) d)))
118 |     |} in
119 |   let ex1_expr =
120 |     def a [a; b; c]
121 |       (let' [seq [d; a + b];
122 |              seq [e; ~-c]]
123 |          (seq [lambda [a; b] (a + e); d])) in
124 | 
125 |   T.group "Compound" [
126 |     ex1_str ==> Ok ex1_expr;
127 |     ex1_str ^ ex1_str ==> Ok (seq [ex1_expr; ex1_expr])
128 |   ]
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/tests/Nanotest.ml:
--------------------------------------------------------------------------------
  1 | let fmt = Format.asprintf
  2 | 
  3 | module C = struct
  4 |   let color_pp color =
  5 |     fmt "\027[%dm%s\027[0m"
  6 |       (match color with
  7 |        | `Black   -> 30
  8 |        | `Red     -> 31
  9 |        | `Green   -> 32
 10 |        | `Yellow  -> 33
 11 |        | `Blue    -> 34
 12 |        | `Magenta -> 35
 13 |        | `Cyan    -> 36
 14 |        | `White   -> 37)
 15 | 
 16 |   let blue               = color_pp `Blue
 17 |   let red                = color_pp `Red
 18 |   let yellow             = color_pp `Yellow
 19 |   let magenta            = color_pp `Magenta
 20 |   let cyan               = color_pp `Cyan
 21 |   let white              = color_pp `White
 22 |   let green              = color_pp `Green
 23 |   let bright_white x     = fmt "\027[1;37m%s\027[0m" x
 24 |   let bright_blue x      = fmt "\027[1;34m%s\027[0m" x
 25 |   let bright_magenta x   = fmt "\027[1;35m%s\027[0m" x
 26 |   let violet x           = fmt "\027[0;34m%s\027[0m" x
 27 |   let bright_red x       = fmt "\027[1;31m%s\027[0m" x
 28 |   let bright_green x     = fmt "\027[1;32m%s\027[0m" x
 29 |   let start_bright_white = fmt "\027[1;37m"
 30 |   let start_white        = fmt "\027[37m"
 31 |   let end_color          = "\027[0m"
 32 |   let italic x           = "\027[3m" ^ x ^ "\027[0m"
 33 |   let underline x        = "\027[4m" ^ x ^ "\027[0m"
 34 |   let blink x            = "\027[5m" ^ x ^ "\027[0m"
 35 | end
 36 | 
 37 | 
 38 | module type Testable = sig
 39 |   type t
 40 |   val equal : t -> t -> bool
 41 |   val pp : Format.formatter -> t -> unit
 42 | end
 43 | 
 44 | type 'a testable = (module Testable with type t = 'a)
 45 | 
 46 | module Testable = struct
 47 |   let pp (type a) (t: a testable) = let (module T) = t in T.pp
 48 |   let equal (type a) (t: a testable) = let (module T) = t in T.equal
 49 | end
 50 | 
 51 | 
 52 | let time ?fmt f x =
 53 |   let t0 = Unix.gettimeofday () in
 54 |   let fx = f x in
 55 |   let t1 = Unix.gettimeofday () -. t0 in
 56 |   let () = match fmt with
 57 |   | Some fmt -> Printf.eprintf "%s\n" (fmt fx t1)
 58 |   | None     -> Printf.eprintf "Elapsed time: %f sec\n" t1 in
 59 |   fx
 60 | 
 61 | let test ?(verbose = true) ty msg ~actual ~expected () =
 62 |   let ok = Testable.equal ty actual expected in
 63 |   begin if not ok then begin
 64 |     Fmt.pr "  %s %s@." (C.bright_red "✗") (C.bright_white msg);
 65 |     Fmt.pr "    - %a@." (Testable.pp ty) expected;
 66 |     Fmt.pr "    + %a@." (Testable.pp ty) actual
 67 |   end else if verbose then
 68 |     Fmt.pr "  %s %s@." (C.bright_green "✓") (C.bright_white msg)
 69 |   end;
 70 |   ok
 71 | 
 72 | let testable (type a) (pp: a Fmt.t) (equal: a -> a -> bool) : a testable =
 73 |   let module M = struct
 74 |     type t = a
 75 |     let pp = pp
 76 |     let equal = equal
 77 |   end in
 78 |   (module M)
 79 | 
 80 | let testable (type a) ?(equal: a -> a -> bool = Pervasives.(=)) (pp: a Fmt.t) : a testable =
 81 |   let module M = struct
 82 |     type t = a
 83 |     let pp = pp
 84 |     let equal = equal
 85 |   end in
 86 |   (module M)
 87 | 
 88 | 
 89 | let group name tests =
 90 |   Fmt.pr "━━━ %s ━━━@." (C.bright_blue name);
 91 |   let t0 = Unix.gettimeofday () in
 92 |   let s, f, t =
 93 |     List.fold_left begin fun (s, f, t) test ->
 94 |         if test () then (s + 1, f, t + 1) else (s, f + 1, t + 1)
 95 |       end
 96 |       (0, 0, 0) tests in
 97 |   let t = Unix.gettimeofday () -. t0 in
 98 |   let msg =
 99 |     match s, f with
100 |     | 1, 0 -> "Test passed"
101 |     | s, 0 -> fmt "All %d tests passed" s
102 |     | 0, 1 -> "Test failed"
103 |     | 0, f -> fmt "All %d tests failed" f
104 |     | s, f -> fmt "%d tests passed, %d tests failed" s f in
105 |   Fmt.pr "  %s %s in %0.2fms@." (C.bright_magenta "•") msg (t *. 1000.0)
106 | 
107 | let int    : 'a testable = testable Fmt.int
108 | let float  : 'a testable = testable Fmt.float
109 | let char   : 'a testable = testable Fmt.char
110 | let bool   : 'a testable = testable Fmt.bool
111 | let unit   : 'a testable = testable (Fmt.unit "()")
112 | let int32  : 'a testable = testable ~equal:Int32.equal Fmt.int32
113 | let int64  : 'a testable = testable ~equal:Int64.equal Fmt.int64
114 | let string : 'a testable = testable ~equal:String.equal Fmt.string
115 | 
116 | 
117 | let list e =
118 |   let rec equal l1 l2 =
119 |     match (l1, l2) with
120 |     | (x::xs, y::ys) -> Testable.equal e x y && equal xs ys
121 |     | ([], []) -> true
122 |     | _ -> false in
123 |   testable (Fmt.Dump.list (Testable.pp e)) ~equal
124 | 
125 | let sorted_list (type a) (a : a testable) compare =
126 |   let l = list a in
127 |   let equal l1 l2 = Testable.equal l (List.sort compare l1) (List.sort compare l2) in
128 |   testable (Testable.pp l) ~equal
129 | 
130 | let array e =
131 |   let equal a1 a2 =
132 |     let (m, n) = Array.(length a1, length a2) in
133 |     let rec go i = i = m || (Testable.equal e a1.(i) a2.(i) && go (i + 1)) in
134 |     m = n && go 0 in
135 |   testable (Fmt.Dump.array (Testable.pp e)) ~equal
136 | 
137 | let pair a b =
138 |   let equal (a1, b1) (a2, b2) =
139 |     Testable.equal a a1 a2 && Testable.equal b b1 b2 in
140 |   testable (Fmt.Dump.pair (Testable.pp a) (Testable.pp b)) ~equal
141 | 
142 | let option e =
143 |   let equal x y =
144 |     match (x, y) with
145 |     | (Some a, Some b) -> Testable.equal e a b
146 |     | (None, None) -> true
147 |     | _ -> false in
148 |   testable (Fmt.Dump.option (Testable.pp e)) ~equal
149 | 
150 | let result a e =
151 |   let equal x y =
152 |     match (x, y) with
153 |     | (Ok x, Ok y) -> Testable.equal a x y
154 |     | (Error x, Error y) -> Testable.equal e x y
155 |     | _ -> false in
156 |   testable (Fmt.Dump.result ~ok:(Testable.pp a) ~error:(Testable.pp e)) ~equal
157 | 
158 | 


--------------------------------------------------------------------------------
/examples/Javascript_lexer.ml:
--------------------------------------------------------------------------------
  1 | module String = Astring.String
  2 | 
  3 | let format, undefined = Kernel.(format, undefined)
  4 | 
  5 | let decimal_int =
  6 |   [%sedlex.regexp? '0'..'9', Star ('0'..'9' | '_')]
  7 | 
  8 | let hex_int =
  9 |   [%sedlex.regexp?
 10 |     '0', Chars "xX", ('0'..'9' | 'A'..'F' | 'a'..'f'),
 11 |     Star ('0'..'9' | 'A'..'F' | 'a'..'f' | '_')]
 12 | 
 13 | let oct_int =
 14 |   [%sedlex.regexp? '0', Chars "oO", '0'..'7', Star ('0'..'7' | '_') ]
 15 | 
 16 | let bin_int =
 17 |   [%sedlex.regexp? '0', Chars "bB", '0'..'1', Star ('0'..'1' | '_') ]
 18 | 
 19 | let int =
 20 |   [%sedlex.regexp? decimal_int | hex_int | oct_int | bin_int ]
 21 | 
 22 | let float =
 23 |   [%sedlex.regexp?
 24 |     '0'..'9', Star ('0'..'9' | '_'),
 25 |     Opt ('.', Star ('0'..'9' | '_')),
 26 |     Opt (Chars "eE", Opt (Chars "+-"), '0'..'9', Star ('0'..'9' | '_'))]
 27 | 
 28 | let identifier =
 29 |   [%sedlex.regexp? Plus (alphabetic | Chars "_'")]
 30 | 
 31 | let delimeter =
 32 |   [%sedlex.regexp? Chars "{}[].,;:=?"]
 33 | 
 34 | let comment =
 35 |   [%sedlex.regexp? "//", Star (Compl '\n')]
 36 | 
 37 | let white_space =
 38 |   [%sedlex.regexp? Plus (' ' | '\t')]
 39 | 
 40 | type token = [
 41 |   | `Bool of bool
 42 |   | `Char of char
 43 |   | `Float of float
 44 |   | `Int of int
 45 |   | `String of string
 46 |   | `Operator of string
 47 |   | `Delimiter of string
 48 |   | `Identifier of string
 49 |   | `Keyword of string
 50 | ]
 51 | 
 52 | let is_keyword    = function `Keyword    _ -> true | _ -> false
 53 | let is_delimiter  = function `Delimiter  _ -> true | _ -> false
 54 | let is_identifier = function `Identifier _ -> true | _ -> false
 55 | 
 56 | let rec pp_token ppf token =
 57 |   let open Fmt in
 58 |   match token with
 59 |   | `Bool v       -> pf ppf "%b" v
 60 |   | `Char v       -> pf ppf "%c" v
 61 |   | `Float v      -> pf ppf "%f" v
 62 |   | `Int v        -> pf ppf "%d" v
 63 |   | `String v     -> pf ppf "\"%s\"" v
 64 |   | `Operator v   -> pf ppf "[operator %s]" v
 65 |   | `Delimiter v  -> pf ppf "[delimeter %s]" v
 66 |   | `Identifier v -> pf ppf "[identifier %s]" v
 67 |   | `Keyword v    -> pf ppf "[keyword %s]" v
 68 | 
 69 | module Token = struct
 70 |   type t = token
 71 |   let pp = pp_token
 72 |   let compare (self : token) (other : token) =
 73 |     Pervasives.compare self other
 74 | end
 75 | 
 76 | module Location = struct
 77 |   type t =
 78 |     { line   : int;
 79 |       column : int;
 80 |       length : int }
 81 | 
 82 |   let empty =
 83 |     { line   = 0;
 84 |       column = 0;
 85 |       length = 0 }
 86 | 
 87 |   let to_string self =
 88 |     format "%d,%d/%d" self.line self.column self.length
 89 | end
 90 | 
 91 | type t =
 92 |   { mutable lexbuf      : Sedlexing.lexbuf;
 93 |     mutable line_start  : int;
 94 |     mutable line_count  : int;
 95 |     mutable group_count : int }
 96 | 
 97 | 
 98 | let increment_line self =
 99 |   self.line_start <- Sedlexing.lexeme_end self.lexbuf;
100 |   self.line_count <- self.line_count + 1
101 | 
102 | 
103 | let current_location { lexbuf; line_count; line_start } =
104 |   let open Sedlexing in
105 |   let open Location in
106 |   { line   = line_count;
107 |     column = lexeme_end lexbuf - line_start - lexeme_length lexbuf + 1;
108 |     length = lexeme_length lexbuf }
109 | 
110 | 
111 | let current_lexeme self =
112 |   Sedlexing.Utf8.lexeme self.lexbuf
113 | 
114 | 
115 | exception Error of
116 |     { lexeme   : string;
117 |       location : Location.t;
118 |       message  : string }
119 | 
120 | 
121 | let error self message =
122 |   let lexeme   = current_lexeme self in
123 |   let location = current_location self in
124 |   raise (Error { message; lexeme; location })
125 | 
126 | 
127 | let rec read self =
128 |   let lexbuf = self.lexbuf in
129 |   match%sedlex lexbuf with
130 | 
131 |   (* Operators *)
132 |   | "!="   -> `Operator (current_lexeme self)
133 |   | "!=="  -> `Operator (current_lexeme self)
134 |   | "%"    -> `Operator (current_lexeme self)
135 |   | "%="   -> `Operator (current_lexeme self)
136 |   | "&"    -> `Operator (current_lexeme self)
137 |   | "&&"   -> `Operator (current_lexeme self)
138 |   | "&="   -> `Operator (current_lexeme self)
139 |   | "*"    -> `Operator (current_lexeme self)
140 |   | "*="   -> `Operator (current_lexeme self)
141 |   | "+"    -> `Operator (current_lexeme self)
142 |   | "++"   -> `Operator (current_lexeme self)
143 |   | "+="   -> `Operator (current_lexeme self)
144 |   | "-"    -> `Operator (current_lexeme self)
145 |   | "--"   -> `Operator (current_lexeme self)
146 |   | "-="   -> `Operator (current_lexeme self)
147 |   | "/"    -> `Operator (current_lexeme self)
148 |   | "/="   -> `Operator (current_lexeme self)
149 |   | "<"    -> `Operator (current_lexeme self)
150 |   | "<<"   -> `Operator (current_lexeme self)
151 |   | "<<="  -> `Operator (current_lexeme self)
152 |   | "<="   -> `Operator (current_lexeme self)
153 |   | "=="   -> `Operator (current_lexeme self)
154 |   | "==="  -> `Operator (current_lexeme self)
155 |   | ">"    -> `Operator (current_lexeme self)
156 |   | ">="   -> `Operator (current_lexeme self)
157 |   | ">>"   -> `Operator (current_lexeme self)
158 |   | ">>="  -> `Operator (current_lexeme self)
159 |   | ">>>"  -> `Operator (current_lexeme self)
160 |   | ">>>=" -> `Operator (current_lexeme self)
161 |   | "^="   -> `Operator (current_lexeme self)
162 |   | "|"    -> `Operator (current_lexeme self)
163 |   | "|="   -> `Operator (current_lexeme self)
164 |   | "||"   -> `Operator (current_lexeme self)
165 | 
166 |   (* Whitespace and comment *)
167 |   | Plus (white_space | comment) ->
168 |     read self
169 | 
170 |   (* Int literal *)
171 |   | int ->
172 |     `Int (int_of_string (current_lexeme self))
173 | 
174 |   (* Float literal *)
175 |   | float ->
176 |     `Float (float_of_string (current_lexeme self))
177 | 
178 |   (* Group start *)
179 |   | '('  ->
180 |     self.group_count <- (self.group_count + 1);
181 |     `Delimiter (current_lexeme self)
182 | 
183 |   (* Group end *)
184 |   | ')' ->
185 |     self.group_count <- (self.group_count - 1);
186 | 
187 |     if self.group_count < 0 then
188 |       error self "unbalanced parenthesis"
189 |     else
190 |       `Delimiter (current_lexeme self)
191 | 
192 |   (* Delimiters *)
193 |   | "=>" -> `Delimiter (current_lexeme self)
194 |   | delimeter ->
195 |     `Delimiter (current_lexeme self)
196 | 
197 |   (* Strings *)
198 |   | '"',  Star (Compl '"'), '"' ->
199 |     let lexeme = current_lexeme self in
200 |     let s = String.(Sub.to_string (sub lexeme ~start:1 ~stop:(length lexeme - 1))) in
201 |     `String s
202 | 
203 |   (* Chars *)
204 |   | '\'', Compl '\'', '\'' ->
205 |     `Char (String.get (current_lexeme self) 1)
206 | 
207 |   (* Booleans *)
208 |   | "true"  -> `Bool true
209 |   | "false" -> `Bool false
210 | 
211 |   (* Keywords *)
212 |   | "function" -> `Keyword (current_lexeme self)
213 |   | "var"      -> `Keyword (current_lexeme self)
214 |   | "return"   -> `Keyword (current_lexeme self)
215 |   | "for"      -> `Keyword (current_lexeme self)
216 |   | "while"    -> `Keyword (current_lexeme self)
217 | 
218 |   (* Identifiers *)
219 |   | identifier ->
220 |     `Identifier (current_lexeme self)
221 | 
222 |   (* Newline symbol *)
223 |   | '\n' ->
224 |     increment_line self;
225 |     read self
226 | 
227 |   (* EOF symbol *)
228 |   | eof -> raise End_of_file
229 | 
230 |   (* Everything else is illegal *)
231 |   | any ->
232 |     error self "illegal character"
233 | 
234 |   (* Sedlex: the last branch must be a catch-all error case *)
235 |   | _ -> undefined ()
236 | 
237 | 
238 | let from_lexbuf lexbuf =
239 |   { lexbuf;
240 |     line_start  = 0;
241 |     line_count  = 0;
242 |     group_count = 0 }
243 | 
244 | 
245 | let of_string s =
246 |   from_lexbuf (Sedlexing.Utf8.from_string s)
247 | 
248 | let of_channel c =
249 |   from_lexbuf (Sedlexing.Utf8.from_channel c)
250 | 
251 | let rec to_stream lexer =
252 |   try
253 |     let token = read lexer in
254 |     Pratt.Stream.Yield (token, fun () -> to_stream lexer)
255 |   with End_of_file ->
256 |     Pratt.Stream.Empty
257 | 
258 | 


--------------------------------------------------------------------------------
/examples/Javascript.ml:
--------------------------------------------------------------------------------
  1 | 
  2 | open Proto
  3 | open Astring
  4 | 
  5 | module Stream = Pratt.Stream
  6 | module L = Javascript_lexer
  7 | module P = Pratt.Make(L.Token)
  8 | 
  9 | let (>>=) = P.(>>=)
 10 | let return = P.return
 11 | 
 12 | let separated_by sep p =
 13 |   p >>= fun x ->
 14 |   P.many (sep >>= fun () -> p) >>= fun xs ->
 15 |   return (x :: xs)
 16 | 
 17 | let pair ~sep p1 p2 =
 18 |   p1 >>= fun x ->
 19 |   sep >>= fun () ->
 20 |   p2 >>= fun y ->
 21 |   return (x, y)
 22 | 
 23 | module AST = struct
 24 |   type t = [
 25 |     | `Binary of string * t * t
 26 |     | `Bool of bool
 27 |     | `Call of t * t list
 28 |     | `Dot of t * t
 29 |     | `Function of string * string list * t list
 30 |     | `Identifier of string
 31 |     | `Int of int
 32 |     | `Lambda of string list * t list
 33 |     | `Object of (string * t) list
 34 |     | `Return of t
 35 |     | `String of string
 36 |     | `Symbol of string
 37 |     | `Ternary of t * t * t
 38 |     | `Unary of string * t
 39 |     | `While of t * t list
 40 |     | `Var of string * t
 41 |   ]
 42 | 
 43 |   let rec pp formatter self =
 44 |     let open Fmt in
 45 |     let _br ?(indent = 0) n = fun formatter () ->
 46 |       Format.pp_print_break formatter n indent in
 47 |     let pp_args = hvbox (list ~sep:(always ", ") string) in
 48 |     let pp_body = vbox (list pp) in
 49 |     let pp_item = hbox (pair ~sep:(always ":@ ") String.dump pp) in
 50 |     match self with
 51 |     | `Symbol x | `Identifier x -> String.pp formatter x
 52 |     | `String x -> String.dump formatter x
 53 |     | `Bool x -> Bool.pp formatter x
 54 |     | `Int x -> Int.pp formatter x
 55 | 
 56 |     | `Return (`Lambda (args, body)) ->
 57 |       pf formatter "@[<v4>return function(@[%a@]) {@,%a@]@,}"
 58 |         pp_args args pp_body body
 59 | 
 60 |     | `Return x ->
 61 |       pf formatter "return @[%a@]" pp x
 62 | 
 63 |     | `Lambda (args, body) ->
 64 |       pf formatter "@[<v>@[<v4>function(%a) {@;%a@]@;}@]"
 65 |         pp_args args pp_body body
 66 | 
 67 |     | `Function (name, args, body) ->
 68 |       (* pr "while(%a)" (hvbox (list ~sep:(always ",@;") int)) *)
 69 |       pf formatter "@[<v4>function %s(@[%a@]) {@,%a@]@,}"
 70 |         name pp_args args pp_body body
 71 | 
 72 |     | `Object o ->
 73 |       pf formatter "@[<v4>{@,%a@]@,}" (list ~sep:(always ",@,") pp_item) o
 74 | 
 75 |     | `Call (f, xs) ->
 76 |       pf formatter "%a(@[%a@])" pp f Fmt.(list ~sep:(always ",@ ") pp) xs
 77 | 
 78 |     | `Ternary (t, a, b) ->
 79 |       pf formatter "@[<4>%a@ @,?@ %a@, :@ %a@]" pp t pp a pp b
 80 | 
 81 |     | `Binary (op, a, b) ->
 82 |       pf formatter "@[<2>%a@;%s@;%a@]" pp a op pp b
 83 | 
 84 |     | `Unary (op, a) ->
 85 |       pf formatter "%s%a" op pp a
 86 | 
 87 |     | `Var (name, `Lambda (args, body)) ->
 88 |       pf formatter "@[<v4>var %s = function(@[%a@]) {@,%a@]@,}"
 89 |         name pp_args args pp_body body
 90 | 
 91 |     | `Var (name, value) ->
 92 |       pf formatter "@[<4>var %s =@ %a@]" name pp value
 93 | 
 94 |     | `Dot (x, y) ->
 95 |       pf formatter "%a.%a" pp x pp y
 96 | 
 97 |     | `While (condition, body) ->
 98 |       pr "@[<v4>while (%a) {@,%a@]@,}" (hvbox pp) condition (list pp) body
 99 | end
100 | 
101 | let literal g : AST.t P.parser =
102 |   P.current >>= function
103 |   | `Identifier x -> P.advance >>= fun () -> return (`Identifier x)
104 |   | `String     x -> P.advance >>= fun () -> return (`String x)
105 |   | `Int        x -> P.advance >>= fun () -> return (`Int x)
106 |   | `Bool       x -> P.advance >>= fun () -> return (`Bool x)
107 |   | t -> P.error (P.unexpected_token t)
108 | 
109 | 
110 | module Parser = struct
111 |   let delimiter str = P.consume (`Delimiter str)
112 |   let keyword   str = P.consume (`Keyword   str)
113 | 
114 |   let filter_map f =
115 |     P.current >>= fun token ->
116 |     match f token with
117 |     | Some x -> P.advance >>= fun () -> return x
118 |     | None -> P.error (P.unexpected_token token)
119 | 
120 |   let identifier =
121 |     filter_map (function `Identifier x -> Some x | _ -> None)
122 | 
123 |   let string =
124 |     filter_map (function `String x -> Some x | _ -> None)
125 | 
126 |   let return' grammar =
127 |     keyword "return" >>= fun () ->
128 |     P.parse grammar >>= fun x ->
129 |     return (`Return x)
130 | 
131 |   let var grammar =
132 |     keyword "var" >>= fun () ->
133 |     identifier >>= fun name ->
134 |     delimiter "=" >>= fun () ->
135 |     P.parse grammar >>= fun value ->
136 |     return (`Var (name, value))
137 | 
138 |   let function' grammar =
139 |     let local = P.Grammar.add (P.null (`Keyword "return") return') grammar in
140 |     keyword "function" >>= fun () ->
141 |     P.default "" identifier >>= fun name ->
142 |     delimiter "(" >>= fun () ->
143 |     P.default [] (identifier |> separated_by (delimiter ",")) >>= fun args ->
144 |     delimiter ")" >>= fun () ->
145 |     delimiter "{" >>= fun () ->
146 |     P.many (P.parse local) >>= fun body ->
147 |     delimiter "}" >>= fun () ->
148 |     return (if name = ""
149 |             then `Lambda (args, body)
150 |             else `Function (name, args, body))
151 | 
152 |   let object' grammar =
153 |     delimiter "{" >>= fun () ->
154 |     let item = pair ~sep:(delimiter ":") string (P.parse grammar) in
155 |     P.default [] (item |> separated_by (delimiter ",")) >>= fun args ->
156 |     delimiter "}" >>= fun () ->
157 |     return (`Object args)
158 | 
159 |   let call grammar f =
160 |     delimiter "(" >>= fun () ->
161 |     P.default [] (P.parse grammar |> separated_by (delimiter ",")) >>= fun args ->
162 |     delimiter ")" >>= fun () ->
163 |     return (`Call (f, args))
164 | 
165 |   let ternary grammar condition =
166 |     delimiter "?" >>= fun () ->
167 |     P.parse grammar >>= fun consequence ->
168 |     delimiter ":" >>= fun () ->
169 |     P.parse grammar >>= fun alternative ->
170 |     return (`Ternary (condition, consequence, alternative))
171 | 
172 |   let while' grammar =
173 |     keyword "while" >>= fun () ->
174 |     delimiter "(" >>= fun () ->
175 |     P.parse grammar >>= fun condition ->
176 |     delimiter ")" >>= fun () ->
177 |     delimiter "{" >>= fun () ->
178 |     P.many (P.parse grammar) >>= fun body ->
179 |     delimiter "}" >>= fun () ->
180 |     return (`While (condition, body))
181 | 
182 |   let parse = P.parse @@ P.grammar [
183 |     P.term literal;
184 |     (* XXX: WTF? *)
185 |     (* TODO: Add context: "while parsing `var` expected x but got y". *)
186 |     (* P.null (`Keyword "var") (fun g -> P.consume (`Keyword "varx") >>= fun () -> return (`Int 42)); *)
187 |     P.null (`Keyword "var") var;
188 |     P.null (`Keyword "function") function';
189 |     P.null (`Keyword "while") while';
190 |     P.null (`Delimiter "{") object';
191 |     P.null (`Operator "+") (P.unary (fun a -> (`Unary ("+", a))));
192 |     P.left 30 (`Operator "+") (P.binary (fun a b -> (`Binary ("+", a, b))));
193 |     P.left 85 (`Delimiter ".") (P.binary (fun a b -> (`Dot (a, b))));
194 |     P.left 30 (`Operator "===") (P.binary (fun a b -> (`Binary ("===", a, b))));
195 |     P.left 80 (`Delimiter "(") call;
196 |     P.left 20 (`Delimiter "?") ternary;
197 |   ]
198 | end
199 | 
200 | (* It is more important to understand what is *NOT* valid than what is valid. *)
201 | let input = {|
202 | 
203 | var x = name ? name : "Hello, world!"
204 | 
205 | function hello(name, a) {
206 |     var x = name ? name : "Hello, world!"
207 |     return x
208 | }
209 | var y = 42 + 0
210 | var z = +4
211 | 
212 | function f(x) {
213 |     console.log("hello")
214 |     console.log(" ")
215 |     console.log("wolrd")
216 |     var y = x + 1
217 |     function sum(a, b) { return a + b }
218 |     return sum(x, y)
219 | }
220 | 
221 | var sum = function(x, y) { return x + y }
222 | 
223 | var partialSum = function(x) { return function (y) { return x + y } }
224 | 
225 | var x = cond(a, f(1, 2, true), g()) ? "yes" : "no"
226 | 
227 | {
228 |   "x": 42,
229 |   "y": 11,
230 |   "f": function(x) { return x }
231 | }
232 | 
233 | assert(point.x === 42)
234 | 
235 | while(x === 1 + x === 1 + x === 1) {
236 |   console.log("yes")
237 |   console.log("yes")
238 | }
239 | 
240 | |}
241 | 
242 | let main =
243 |   let rec loop input =
244 |     if not (Stream.is_empty input) then
245 |     match P.run Parser.parse input with
246 |       | Ok (result, input') ->
247 |         Fmt.pr "%a@.@." AST.pp result;
248 |         loop input'
249 |       | Error Zero -> ()
250 |       | Error e -> Fmt.pr "main: %s@." (P.error_to_string e) in
251 |   loop (L.(to_stream (of_string input)))
252 | 
253 | 


--------------------------------------------------------------------------------
/src/Pratt.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * Pratt 0.1
  3 |  * Copyright (c) 2017 Rizo Isrof. All rights reserved.
  4 |  *
  5 |  * Distributed under the ISC license, see LICENSE file.
  6 |  *)
  7 | 
  8 | type 'a fmt = Format.formatter -> 'a -> unit
  9 | type 'a comparator = 'a -> 'a -> int
 10 | 
 11 | module Stream : module type of Stream
 12 | 
 13 | (** Pratt is a simple top-down precedence parser.
 14 | 
 15 |     The grammar is defined as a set of rules that are matched on tokens. Input
 16 |     is processed linearly with an iterator. The parser searches for rules that
 17 |     match the current token based on its position (prefix or infix). Rules store
 18 |     the matching token, the precedence (for infix rules) and the parser that
 19 |     builds the final Abstract Syntax Tree (AST) object. *)
 20 | 
 21 | 
 22 | module type Token = sig
 23 |   type t
 24 | 
 25 |   val pp : t fmt
 26 |   val compare : t comparator
 27 | end
 28 | 
 29 | 
 30 | module Make (Token : Token) : sig
 31 |   type token = Token.t
 32 |   (** The type of tokens to be parsed. *)
 33 | 
 34 |   type error =
 35 |     | Unexpected     of {expected : token option; actual : token option}
 36 |     | Invalid_infix  of token
 37 |     | Invalid_prefix of token
 38 |     | Zero
 39 |   (** The type of errors for tokens of type ['a]. *)
 40 | 
 41 |   val unexpected_token : ?expected : token -> token -> error
 42 |   (** [unexpected_token ?expected t] is [Unexpected {actual = Some t;
 43 |       expected}]. *)
 44 | 
 45 |   val unexpected_end : ?expected : token -> unit -> error
 46 |   (** [unexpected_end ?expected ()] is [Unexpected {actual = None; expected}]. *)
 47 | 
 48 |   val invalid_prefix : token -> error
 49 |   (** [invalid_prefix t] is [Invalid_prefix t]. *)
 50 | 
 51 |   val invalid_infix : token -> error
 52 |   (** [invalid_infix t] is [Invalid_infix t]. *)
 53 | 
 54 |   val error_to_string : error -> string
 55 |   (** [error_to_string token_pp e] is a human-readable representation of [e]. *)
 56 | 
 57 |   val pp_error : error fmt
 58 |   (** [pp_error token_pp] is a pretty printer for values of type [e] and
 59 |       contained tokens. *)
 60 | 
 61 | 
 62 |   (** {1:parser Parser} *)
 63 | 
 64 |   type 'a parser
 65 |   (** A parser producing values of type ['a]. *)
 66 | 
 67 | 
 68 |   (** {2:parser-monad Monad Instance} *)
 69 | 
 70 |   val return : 'a -> 'a parser
 71 |   (** [return x] is a parser producing [x] as a value regardless of the input. *)
 72 | 
 73 |   val (>>=) : 'a parser -> ('a -> 'b parser) -> 'b parser
 74 |   (** [p >>= f] is a parser returned by [f] after applying [f] to the result of
 75 |       [p]. *)
 76 | 
 77 | 
 78 |   val error : error -> 'a parser
 79 |   (** [error e] is a parser that fails with the error [e] without consuming any
 80 |       input *)
 81 | 
 82 |   val zero : 'a parser
 83 |   (** [zero] is a parser that fails without consuming any input. *)
 84 | 
 85 |   val (<|>) : 'a parser -> 'a parser -> 'a parser
 86 |   (** [p <|> q] is a choice combinator. Parser [p] is first applied, if it
 87 |       succeeds its value is returned. If [p] fails {e without consuming any
 88 |       input}, [q] is tried. *)
 89 | 
 90 | 
 91 |   (** {1:combinators Parsing Combinators} *)
 92 | 
 93 |   val default : 'a -> 'a parser -> 'a parser
 94 |   (** [default x p] runs the parser [p] producing the default [x] value if
 95 |       it fails. *)
 96 | 
 97 |   val combine : 'a parser -> 'b parser -> ('a * 'b) parser
 98 |   (** [combine p q] first parses [p] and then [q] returning a pair with
 99 |       corresponding results. *)
100 | 
101 |   val many : 'a parser -> 'a list parser
102 |   (** [many p] applies the parser [p] zero or more times. Returns a list of the
103 |       returned values of [p]. *)
104 | 
105 |   val some : 'a parser -> ('a * 'a list) parser
106 |   (** [some p] applies the parser [p] one or more times. Returns the guaranteed
107 |       first value and a potentially empty list of values parsed by [p]. *)
108 | 
109 |   val optional : 'a parser -> unit parser
110 |   (** [optional p] tries to optionally parse the input with parser [p] without
111 |       returning its output. *)
112 | 
113 |   val current : token parser
114 |   (** [current] is the parser that produces the current token as the result. *)
115 | 
116 |   val next : token parser
117 |   (** [next] returns the current token advances and advances to the next one. *)
118 | 
119 |   val expect : token -> token parser
120 |   (** [expect token] checks if the current token in the input is equal to [token]
121 |       failing if it is not. *)
122 | 
123 |   val advance : unit parser
124 |   (** [advance] advances the parser to the next token. *)
125 | 
126 |   val consume : token -> unit parser
127 |   (** [consume token] checks if the current token is equal to [token] and
128 |       advances the parser to the next token, or fails if they are different. *)
129 | 
130 |   val satisfy : (token -> bool) -> token parser
131 |   (** [satisfy test] is a parser that returns the current input token if it
132 |       satisfies [test] predicate or fails otherwise. *)
133 | 
134 |   val exactly : token -> token parser
135 |   (** [exactly token] parses *exactly* the given [token]. *)
136 | 
137 |   val any : token parser
138 |   (** [any] is a parser that accepts any input token. *)
139 | 
140 |   val from : token list -> token parser
141 |   (** [from tokens] parses any token from [tokens] list. *)
142 | 
143 |   val none : token list -> token parser
144 |   (** [none tokens] parses any token *not* present in [tokens] list. *)
145 | 
146 |   val range : ?compare: token comparator -> token -> token -> token parser
147 |   (** [range ?compare s e] parses any token in the range defined by [s] and [e].
148 |       Optionally a custom [compare] function can be supplied. *)
149 | 
150 |   val choice : 'a parser list -> 'a parser
151 |   (** [choice ps] is a parser that tries all the parsers in [ps] until one of
152 |       them succeeds. *)
153 | 
154 |   val guard : bool -> unit parser
155 | 
156 |   val when'  : bool -> unit parser -> unit parser
157 |   val unless : bool -> unit parser -> unit parser
158 | 
159 |   val many_while : (token -> bool) -> 'a parser -> 'a list parser
160 |   (** [many_while test p] repeatedly runs the parser [p] while the input token
161 |       satisfies [test]. Stops when the token fails the [test] or if the input is
162 |       empty. *)
163 | 
164 |   val some_while : (token -> bool) -> 'a parser -> ('a * 'a list) parser
165 |   (** [some_while test p] is just like [many_while] except it parser at least one ['a]. *)
166 | 
167 | 
168 |   type 'a rule
169 |   (** The type for parsing rules for tokens of type ['t] producing results of
170 |       type ['a]. *)
171 | 
172 |   (** {1:grammar Grammar} *)
173 | 
174 |   type 'a grammar
175 |   (** Grammar type holding parsing rules for tokens of type ['t] and parsed
176 |       values of type ['a]. *)
177 | 
178 |   module Grammar : sig
179 |     type 'a t = 'a grammar
180 | 
181 |     val has_null : token -> 'a grammar -> bool
182 |     val has_left : token -> 'a grammar -> bool
183 | 
184 |     val dump : token fmt -> 'a t -> unit
185 | 
186 |     val new_scope : 'a t -> 'a t
187 |     val pop_scope : 'a t -> 'a t
188 | 
189 |     val add : 'a rule -> 'a t -> 'a t
190 |   end
191 | 
192 |   val nud : int -> 'a grammar -> 'a parser
193 |   val led : int -> 'a grammar -> 'a -> 'a parser
194 | 
195 | 
196 |   (** {1:rules Rules} *)
197 | 
198 |   val rule : token -> ('a grammar -> 'a parser) -> 'a rule
199 |   (** [rule t p] is a rule with parser [p] for prefix token [t]. *)
200 | 
201 |   val term : ('a grammar -> 'a parser) -> 'a rule
202 |   (** [term p] is a parser for literals or variables. *)
203 | 
204 |   val infix : int -> token -> ('a -> 'a -> 'a) -> 'a rule
205 |   (** [infix precedence token f] is a rule that parses infix occurrences of
206 |       [token] with given [precedence] applying [f] to the {e lhs} and {e rhs}
207 |       expressions. This rule is left-associative. *)
208 | 
209 |   val infixr : int -> token -> ('a -> 'a -> 'a) -> 'a rule
210 |   (** [infixr precedence token f] is like {!infix} except it is
211 |       right-associative. *)
212 | 
213 |   val prefix : token -> ('a -> 'a) -> 'a rule
214 |   (** [prefix token f] is a rule that parses prefix occurrences of [token]
215 |       applying [f] to the {e rhs} expression. *)
216 | 
217 |   val postfix : int -> token -> ('a -> 'a) -> 'a rule
218 |   (** [postfix precedence token f] is a rule that parses postfix occurrences of
219 |       [token] with given [precedence] applying [f] to the {e lhs} expression. *)
220 | 
221 |   val between : token -> token -> ('a -> 'a) -> 'a rule
222 | 
223 |   val delimiter : token -> 'a rule
224 |   (** [delimiter token] is a rule that parses a delimiter [token]. *)
225 | 
226 |   val binary : ('a -> 'a -> 'a) -> 'a grammar -> 'a -> 'a parser
227 |   val unary : ('a -> 'a) -> 'a grammar -> 'a parser
228 | 
229 |   val null :        token -> ('a grammar ->       'a parser) -> 'a rule
230 |   val left : int -> token -> ('a grammar -> 'a -> 'a parser) -> 'a rule
231 | 
232 | 
233 |   (** {1:parsing Parsing} *)
234 | 
235 |   val grammar : 'a rule list -> 'a grammar
236 |   (** [grammar rules] is a grammar for a language constructed with [rules]. *)
237 | 
238 |   val parse : ?precedence: int -> 'a grammar -> 'a parser
239 |   (** [parse ?precedence g] is the parser for the grammar [g] starting with
240 |       binding power [precedence]. *)
241 | 
242 |   val parse_many : 'a grammar -> 'a list parser
243 |   val parse_some : 'a grammar -> ('a * 'a list) parser
244 | 
245 |   val run : 'a parser -> token Stream.t -> ('a * token Stream.t, error) result
246 |   (** [run p input] is the result of running the parser [p] with the given
247 |       [input]. The parsed value is produced with the remaining input. *)
248 | end
249 | 
250 | 


--------------------------------------------------------------------------------
/src/Pratt.ml:
--------------------------------------------------------------------------------
  1 | 
  2 | let inspect pp =
  3 |   Format.fprintf Fmt.stderr "@[%a@.@]" pp
  4 | 
  5 | let log fmt =
  6 |   Format.kfprintf (fun f -> Format.pp_print_newline f ()) Fmt.stderr fmt
  7 | 
  8 | let constantly x _ = x
  9 | let (<<) f g = fun x -> f (g x)
 10 | let is_some = function Some _ -> true | None -> false
 11 | let flip f x y = f y x
 12 | 
 13 | (* TODO: When failing show the so-far parsed result. *)
 14 | 
 15 | type 'a fmt = Format.formatter -> 'a -> unit
 16 | type 'a comparator = 'a -> 'a -> int
 17 | 
 18 | 
 19 | module Stream = Stream
 20 | 
 21 | 
 22 | module type Token = sig
 23 |   type t
 24 | 
 25 |   val pp : t fmt
 26 |   val compare : t comparator
 27 | end
 28 | 
 29 | 
 30 | module Make (Token : Token) = struct
 31 |   module Table = struct
 32 |     include Map.Make(Token)
 33 | 
 34 |     let get tbl x =
 35 |       try Some (find tbl x)
 36 |       with Not_found -> None
 37 |   end
 38 | 
 39 |   type token = Token.t
 40 | 
 41 |   let pp_token = Token.pp
 42 | 
 43 |   type error =
 44 |     | Unexpected     of { expected : token option; actual : token option }
 45 |     | Invalid_infix  of token
 46 |     | Invalid_prefix of token
 47 |     | Zero
 48 | 
 49 |   let unexpected_token ?expected actual =
 50 |     Unexpected {expected; actual = Some actual}
 51 | 
 52 |   let unexpected_end ?expected () =
 53 |     Unexpected {expected; actual = None}
 54 | 
 55 |   let invalid_prefix t =
 56 |     Invalid_prefix t
 57 | 
 58 |   let invalid_infix t =
 59 |     Invalid_infix t
 60 | 
 61 |   let error_to_string = function
 62 |     | Unexpected {expected = Some t1; actual = Some t2} ->
 63 |       Fmt.strf "Syntax error: expected '%a' but got '%a'" pp_token t1 pp_token t2
 64 |     | Unexpected  { expected = Some t; actual = None } ->
 65 |       Fmt.strf "Syntax error: unexpected end of file while parsing '%a'" pp_token t
 66 |     | Unexpected { expected = None; actual = None } ->
 67 |       Fmt.strf "Syntax error: unexpected end of file"
 68 |     | Unexpected { expected = None; actual = Some t } ->
 69 |       Fmt.strf "Syntax error: unexpected token '%a'" pp_token t
 70 |     | Invalid_infix token ->
 71 |       Fmt.strf "Syntax error: '%a' cannot be used in infix postion" pp_token token
 72 |     | Invalid_prefix token ->
 73 |       Fmt.strf "Syntax error: '%a' cannot be used in prefix position" pp_token token
 74 |     | Zero ->
 75 |       Fmt.strf "Syntax error: empty parser result"
 76 | 
 77 |   let pp_error ppf = function
 78 |     | Unexpected { expected; actual } ->
 79 |       Fmt.pf ppf "@[<2>Unexpected@ {@ expected =@ @[%a@];@ actual =@ @[%a@] }@]"
 80 |         (Fmt.Dump.option pp_token) expected (Fmt.Dump.option pp_token) actual
 81 |     | Invalid_infix token ->
 82 |       Fmt.pf ppf "@[<2>Invalid_infix@ @[%a@] @]" pp_token token
 83 |     | Invalid_prefix token ->
 84 |       Fmt.pf ppf "@[<2>Invalid_prefix@ @[%a@] @]" pp_token token
 85 |     | Zero -> Fmt.pf ppf "Empty"
 86 | 
 87 |   type 'a parser = token Stream.t -> ('a * token Stream.t, error) result
 88 | 
 89 |   let return x =
 90 |     fun input -> Ok (x, input)
 91 | 
 92 |   let (>>=) p f =
 93 |     fun input ->
 94 |       match p input with
 95 |       | Ok (x, input') ->
 96 |         let p' = f x in p' input'
 97 |       | Error e -> Error e
 98 | 
 99 | 
100 |   let put s = fun _ -> Ok ((), s)
101 |   let get   = fun s -> Ok (s, s)
102 | 
103 | 
104 |   let zero = fun _input -> Error Zero
105 | 
106 |   let (<|>) p q = fun input ->
107 |     match p input with
108 |     | Ok value -> Ok value
109 |     | Error _  -> q input
110 |   (* XXX: What if p consumes input? *)
111 |   (* | Error Empty  -> q input *)
112 |   (* | Error e -> Error e *)
113 | 
114 | 
115 |   let default x p =
116 |     p <|> return x
117 | 
118 |   let rec many p =
119 |     (p >>= fun x -> many p >>= fun xs -> return (x :: xs))
120 |     |> default []
121 | 
122 |   let combine p1 p2 =
123 |     p1 >>= fun x ->
124 |     p2 >>= fun y -> return (x, y)
125 | 
126 |   let rec some p =
127 |     combine p (many p)
128 | 
129 |   let optional p =
130 |     default () (p >>= fun _ -> return ())
131 | 
132 |   let error e =
133 |     fun _state -> Error e
134 | 
135 |   let advance s =
136 |     let p =
137 |       get >>= fun stream ->
138 |       match Stream.next stream with
139 |       | Some (token, stream') -> put stream'
140 |       | None -> return () in
141 |     p s
142 | 
143 |   let current = fun s ->
144 |     let p = get >>= fun state ->
145 |       match Stream.head state with
146 |       | Some token -> return token
147 |       | None -> error (unexpected_end ()) in
148 |     p s
149 | 
150 |   let next s =
151 |     let p =
152 |       current >>= fun x ->
153 |       advance >>= fun () -> return x in
154 |     p s
155 | 
156 |   let expect expected : 'a parser =
157 |     get >>= fun stream ->
158 |     match Stream.head stream with
159 |     | Some actual when actual = expected -> return actual
160 |     | Some actual -> error (unexpected_token ~expected actual)
161 |     | None -> error (unexpected_end ~expected ())
162 | 
163 |   let consume tok =
164 |     expect tok >>= fun _ -> advance
165 | 
166 |   let exactly x =
167 |     expect x >>= fun x -> advance >>= fun () -> return x
168 | 
169 |   let satisfy test =
170 |     next >>= function
171 |     | actual when test actual -> return actual
172 |     | actual -> error (unexpected_token actual)
173 | 
174 |   let any s = (satisfy (constantly true)) s
175 | 
176 |   let from list =
177 |     satisfy (fun x -> List.mem x list)
178 | 
179 |   let none list =
180 |     satisfy (fun x -> not (List.mem x list))
181 | 
182 |   let range ?(compare = Pervasives.compare) s e =
183 |     let (<=) a b = not (compare a b > 0) in
184 |     satisfy (fun x -> s <= x && x <= e)
185 | 
186 |   let rec choice ps =
187 |     match ps with
188 |     | [] -> zero
189 |     | p :: ps' -> p <|> choice ps'
190 | 
191 |   let guard = function
192 |     | true  -> return ()
193 |     | false -> zero
194 | 
195 |   let when' test m =
196 |     if test then m
197 |     else return ()
198 | 
199 |   let unless test m =
200 |     if test then return ()
201 |     else m
202 | 
203 |   let many_while test p =
204 |     many (current >>= (guard << test) >>= fun () -> p)
205 | 
206 |   let some_while test p =
207 |     some (current >>= (guard << test) >>= fun () -> p)
208 | 
209 |   type 'a grammar = {
210 |     data : 'a scope list;
211 |     term : 'a null
212 |   }
213 | 
214 |   and 'a scope = {
215 |     null : 'a null Table.t;
216 |     left : 'a left Table.t;
217 |   }
218 | 
219 |   and 'a null = ('a grammar ->       'a parser)
220 |   and 'a left = ('a grammar -> 'a -> 'a parser) * int
221 | 
222 |   type 'a rule =
223 |     | Term of         'a null
224 |     | Null of token * 'a null
225 |     | Left of token * 'a left
226 | 
227 |   module Grammar = struct
228 |     type 'a t = 'a grammar
229 | 
230 |     let make_scope () = {
231 |       null = Table.empty;
232 |       left = Table.empty
233 |     }
234 | 
235 |     let empty = {
236 |       term = (fun g -> current >>= fun t -> error (Invalid_prefix t));
237 |       data = [];
238 |     }
239 | 
240 |     let add rule grammar =
241 |       let scope, data =
242 |       match grammar.data with
243 |       | [] -> make_scope (), []
244 |       | scope :: grammar' -> scope, grammar' in
245 |       match rule with
246 |       | Term term -> { grammar with term }
247 |       | Null (t, rule) ->
248 |         let scope = { scope with null = Table.add t rule scope.null } in
249 |         { grammar with data = scope :: data }
250 |       | Left (t, rule) ->
251 |         let scope = { scope with left = Table.add t rule scope.left } in
252 |         { grammar with data = scope :: data }
253 | 
254 |     let dump pp_token grammar =
255 |       let dump_scope scope =
256 |         Fmt.pr "grammar.null:\n";
257 |         Table.iter (fun t _ -> Fmt.pr "- %a\n" pp_token t) scope.null;
258 |         Fmt.pr "grammar.left:\n";
259 |         Table.iter (fun t _ -> Fmt.pr "- %a\n" pp_token t) scope.left in
260 |       let rec loop (data : 'a scope list) =
261 |         match data with
262 |         | [] -> ()
263 |         | scope :: data' ->
264 |           dump_scope scope;
265 |           Fmt.pr "***@.";
266 |           loop data' in
267 |       loop grammar.data
268 | 
269 |     let get_left token grammar =
270 |       let rec find data =
271 |         match data with
272 |         | [] -> None
273 |         | scope :: data' ->
274 |           begin match Table.get token scope.left with
275 |             | Some rule -> Some rule
276 |             | None -> find data'
277 |           end in
278 |       find grammar.data
279 | 
280 |     let get_null token grammar =
281 |       let rec find data =
282 |         match data with
283 |         | [] -> None
284 |         | scope :: data' ->
285 |           begin match Table.get token scope.null with
286 |             | Some rule -> Some rule
287 |             | None -> find data'
288 |           end in
289 |       find grammar.data
290 | 
291 |     let has_null token grammar =
292 |       is_some (get_null token grammar)
293 | 
294 |     let has_left token grammar =
295 |       is_some (get_left token grammar)
296 | 
297 |     let new_scope grammar =
298 |       { grammar with data = make_scope () :: grammar.data }
299 | 
300 |     let pop_scope grammar =
301 |       let data =
302 |         match grammar.data with
303 |         | [] -> []
304 |         | _ :: data' -> data' in
305 |       { grammar with data }
306 | 
307 |     let get_term grammar =
308 |       grammar.term
309 |   end
310 | 
311 |   let nud rbp grammar =
312 |     current >>= fun token ->
313 |     match Grammar.get_null token grammar with
314 |     | Some parse -> parse grammar
315 |     | None ->
316 |       (* Infix tokens can only be a valid prefix if they are directly defined
317 |          as such. If the token has a led definition it is not consumed,
318 |          otherwise the term parser is called. *)
319 |       if Grammar.has_left token grammar then
320 |         error (invalid_prefix token)
321 |       else
322 |         let parse = Grammar.get_term grammar in
323 |         parse grammar
324 | 
325 |   let rec led rbp grammar x =
326 |     get >>= fun stream ->
327 |     match Stream.head stream with
328 |     | Some token ->
329 |       begin match Grammar.get_left token grammar with
330 |         | Some (parse, lbp) ->
331 |           if lbp > rbp then
332 |             parse grammar x >>= led rbp grammar
333 |           else
334 |             return x
335 |         | None ->
336 |           return x
337 |       end
338 |     | None ->
339 |       return x
340 | 
341 |   let parse ?precedence:(rbp = 0) grammar =
342 |     nud rbp grammar >>= led rbp grammar
343 | 
344 |   let parse_many grammar =
345 |     many begin
346 |       current >>= fun token ->
347 |       guard (not (Grammar.has_left token grammar)) >>= fun () ->
348 |       nud 0 grammar
349 |     end
350 | 
351 |   let parse_some grammar =
352 |     some begin
353 |       current >>= fun token ->
354 |       guard (not (Grammar.has_left token grammar)) >>= fun () ->
355 |       nud 0 grammar
356 |     end
357 | 
358 | 
359 |   let grammar rules =
360 |     List.fold_left (flip Grammar.add) Grammar.empty rules
361 | 
362 |   let run p stream =
363 |     match p stream with
364 |     | Ok (x, stream') -> Ok (x, stream')
365 |     | Error e -> Error e
366 | 
367 |   let rule token parse =
368 |     Null (token, parse)
369 | 
370 |   let term parse =
371 |     Term parse
372 | 
373 |   let infix precedence token f =
374 |     let parse grammar x =
375 |       advance >>= fun () ->
376 |       parse ~precedence grammar >>= fun y ->
377 |       return (f x y) in
378 |     Left (token, (parse, precedence))
379 | 
380 |   let infixr precedence token f =
381 |     let parse grammar x =
382 |       advance >>= fun () ->
383 |       parse ~precedence:(precedence - 1) grammar >>= fun y ->
384 |       return (f x y) in
385 |     Left (token, (parse, precedence))
386 | 
387 |   let prefix token f =
388 |     let parse grammar =
389 |       advance >>= fun () ->
390 |       parse grammar >>= fun x ->
391 |       return (f x) in
392 |     Null (token, parse)
393 | 
394 |   let postfix precedence token f =
395 |     let parse grammar x =
396 |       advance >>= fun () ->
397 |       return (f x) in
398 |     Left (token, (parse, precedence))
399 | 
400 |   let between token1 token2 f =
401 |     let parse grammar =
402 |       advance >>= fun () ->
403 |       parse grammar >>= fun x ->
404 |       consume token2 >>= fun () ->
405 |       return (f x) in
406 |     Null (token1, parse)
407 | 
408 |   let delimiter token =
409 |     let parse g x = error (Invalid_infix token) in
410 |     Left (token, (parse, 0))
411 | 
412 |   let null token parse =
413 |     Null (token, parse)
414 | 
415 |   let left precedence token parse =
416 |     Left (token, (parse, precedence))
417 | 
418 |   let binary f = fun g a ->
419 |     advance >>= fun () ->
420 |     parse g >>= fun b ->
421 |     return (f a b)
422 | 
423 |   let unary f = fun g ->
424 |     advance >>= fun () ->
425 |     parse g >>= fun a ->
426 |     return (f a)
427 | end
428 | 


--------------------------------------------------------------------------------