├── README.md ├── src ├── jbuild ├── Stream.ml ├── Pratt.mli └── Pratt.ml ├── Init.ml ├── examples ├── jbuild ├── .merlin ├── Javascript_lexer.ml └── Javascript.ml ├── .gitignore ├── tests ├── .merlin ├── Local.ml ├── jbuild ├── Lispy_lexer.ml ├── Test_pratt_calc.ml ├── Test_parser.ml ├── Test_lispy.ml └── Nanotest.ml ├── pratt.opam ├── Makefile ├── LICENSE └── Algorithm.dot /README.md: -------------------------------------------------------------------------------- 1 | # Pratt Parser 2 | 3 | Top down operator precedence parser (also known as Pratt parser) implementation for OCaml. 4 | 5 | -------------------------------------------------------------------------------- /src/jbuild: -------------------------------------------------------------------------------- 1 | (jbuild_version 1) 2 | 3 | (library 4 | ((name pratt) 5 | (public_name pratt) 6 | (modules (Pratt Stream)) 7 | (libraries (fmt astring yojson)))) 8 | 9 | -------------------------------------------------------------------------------- /Init.ml: -------------------------------------------------------------------------------- 1 | #require "iter";; 2 | #require "iter";; 3 | #require "fmt.top";; 4 | #directory "_build/default/src/";; 5 | #load_rec "_build/default/src/Pratt.cmo";; 6 | 7 | open Pratt;; 8 | -------------------------------------------------------------------------------- /examples/jbuild: -------------------------------------------------------------------------------- 1 | (jbuild_version 1) 2 | 3 | (executables 4 | ((names (Javascript)) 5 | (libraries (pratt astring fmt unix sedlex)) 6 | (preprocess (pps (sedlex.ppx))))) 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.annot 2 | *.cmo 3 | *.cma 4 | *.cmi 5 | *.a 6 | *.o 7 | *.cmx 8 | *.cmxs 9 | *.cmxa 10 | 11 | # ocamlbuild working directory 12 | _build/ 13 | 14 | # ocamlbuild targets 15 | *.byte 16 | *.native 17 | 18 | # oasis generated files 19 | setup.data 20 | setup.log 21 | -------------------------------------------------------------------------------- /examples/.merlin: -------------------------------------------------------------------------------- 1 | B ../_build/default/examples 2 | B ../_build/default/src 3 | FLG -ppx '/Users/rizo/Developer/OCaml/Code/pratt/_build/default/.ppx/sedlex.ppx/ppx.exe --as-ppx' 4 | FLG -w -40 5 | PKG astring 6 | PKG bytes 7 | PKG fmt 8 | PKG gen 9 | PKG proto 10 | PKG proto.kernel 11 | PKG proto.shadow-stdlib 12 | PKG result 13 | PKG sedlex 14 | PKG uchar 15 | PKG unix 16 | S ../src 17 | -------------------------------------------------------------------------------- /tests/.merlin: -------------------------------------------------------------------------------- 1 | B ../_build/default/src 2 | B ../_build/default/tests 3 | FLG -ppx '/Users/rizo/Developer/OCaml/Code/pratt/_build/default/.ppx/sedlex.ppx/ppx.exe --as-ppx' 4 | FLG -w -40 5 | PKG astring 6 | PKG bytes 7 | PKG fmt 8 | PKG gen 9 | PKG proto 10 | PKG proto.kernel 11 | PKG proto.shadow-stdlib 12 | PKG result 13 | PKG sedlex 14 | PKG uchar 15 | PKG unix 16 | S . 17 | S ../src 18 | -------------------------------------------------------------------------------- /tests/Local.ml: -------------------------------------------------------------------------------- 1 | 2 | module Char = Astring.Char 3 | 4 | let identity x = x 5 | 6 | module String = struct 7 | include String 8 | 9 | let implode l = 10 | let arr = Array.of_list l in 11 | String.init (Array.length arr) (Array.get arr) 12 | 13 | let join ?(sep = "") self = 14 | concat sep self 15 | end 16 | 17 | 18 | module Int = struct 19 | let unsafe_of_string = 20 | int_of_string 21 | end 22 | 23 | module Result = struct 24 | let map f = function Ok x -> Ok (f x) | Error e -> Error e 25 | end 26 | 27 | -------------------------------------------------------------------------------- /pratt.opam: -------------------------------------------------------------------------------- 1 | opam-version: "1.2" 2 | version: "0.2.0" 3 | maintainer: "Rizo Isrof " 4 | authors: "Rizo Isrof " 5 | license: "ISC" 6 | homepage: "https://github.com/rizo/pratt" 7 | bug-reports: "https://github.com/rizo/pratt/issues" 8 | dev-repo: "https://github.com/rizo/pratt.git" 9 | 10 | build: ["jbuilder" "build" "-p" name "-j" jobs] 11 | build-test: ["jbuilder" "runtest" "-p" name] 12 | 13 | depends: [ 14 | "pure" 15 | "fmt" 16 | "jbuilder" {build} 17 | ] 18 | available: [ocaml-version >= "4.02.0"] 19 | -------------------------------------------------------------------------------- /tests/jbuild: -------------------------------------------------------------------------------- 1 | (jbuild_version 1) 2 | (executables 3 | ((names (Test_pratt_calc Test_parser Test_lispy)) 4 | (libraries (pratt astring fmt unix sedlex)) 5 | (preprocess (pps (sedlex.ppx))))) 6 | 7 | (alias 8 | ((name runtest) 9 | (package pratt) 10 | (deps (Test_parser.exe)) 11 | (action (run ${<})))) 12 | 13 | 14 | (alias 15 | ((name runtest) 16 | (package pratt) 17 | (deps (Test_pratt_calc.exe)) 18 | (action (run ${<})))) 19 | 20 | (alias 21 | ((name runtest) 22 | (package pratt) 23 | (deps (Test_lispy.exe)) 24 | (action (run ${<})))) 25 | 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | build: 3 | jbuilder build -j4 @install 4 | 5 | install: build 6 | jbuilder install 7 | 8 | uninstall: 9 | jbuilder uninstall 10 | 11 | reinstall: uninstall install 12 | 13 | test: 14 | jbuilder runtest 15 | 16 | clean: 17 | jbuilder clean 18 | 19 | shell: build 20 | /usr/bin/env bash -c 'utop -init <(cat ~/.ocamlinit Init.ml)' 21 | 22 | watch: 23 | ls src/*.ml* tests/*.ml* | entr -cr make test 24 | 25 | watch-js: 26 | ls src/*.ml* examples/*.ml* | entr -cr sh -c 'jbuilder build examples/Javascript.exe; ./_build/default/examples/Javascript.exe' 27 | 28 | .PHONY: test all clean 29 | 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Rizo Isrof 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /Algorithm.dot: -------------------------------------------------------------------------------- 1 | 2 | digraph pratt { 3 | node [shape=record]; 4 | nud [shape=hexagon, color=Blue]; 5 | led [shape=hexagon, color=Blue]; 6 | nud_invalid [color=Red,label="nud.invalid_nud(token)"]; 7 | led_return [color=Green,label="led.return(left)"]; 8 | 9 | // NUD 10 | nud -> "nud.has_nud(token)" [style=dashed]; 11 | "nud.has_nud(token)" -> "nud.parse_nud(token)" [label="yes"]; 12 | "nud.has_nud(token)" -> "nud.has_led(token)" [label="no"]; 13 | "nud.has_led(token)" -> "nud.parse_term(token)" [label="no"]; 14 | "nud.has_led(token)" -> nud_invalid [label="yes"]; 15 | "nud.parse_nud(token)" -> led [style=dashed]; 16 | "nud.parse_term(token)" -> led [style=dashed]; 17 | 18 | // LED 19 | led -> "led.is_eof" [style=dashed]; 20 | "led.is_eof" -> led_return [label="yes"]; 21 | "led.is_eof" -> "led.has_led(token)" [label="no"]; 22 | "led.has_led(token)" -> "led.(lbp > rbp)" [label="yes"]; 23 | "led.has_led(token)" -> "invalid_led(token)" [label="no", color=Red]; 24 | "led.(lbp > rbp)" -> "led(token)" [label="yes"]; 25 | "led.(lbp > rbp)" -> led_return [label="no"]; 26 | "led(token)" -> "advance()" [style=dashed]; 27 | "advance()" -> "nud(token)" [style=dashed]; 28 | } 29 | 30 | -------------------------------------------------------------------------------- /src/Stream.ml: -------------------------------------------------------------------------------- 1 | 2 | type 'a t = Yield of 'a * (unit -> 'a t) | Empty 3 | 4 | let rec count n : int t = Yield (n, fun () -> count (n + 1)) 5 | 6 | let rec make n f : 'a t = 7 | let rec loop i = 8 | if i = n then Empty 9 | else Yield (f i, fun () -> loop (i + 1)) in 10 | loop 0 11 | 12 | let rec fold f acc (stream : 'a t) = 13 | match stream with 14 | | Empty -> acc 15 | | Yield (x, k) -> fold f (f acc x) (k ()) 16 | 17 | let rec map f (stream : 'a t) = 18 | match stream with 19 | | Empty -> Empty 20 | | Yield (x, k) -> Yield (f x, fun () -> map f (k ())) 21 | 22 | let rec head stream = 23 | match stream with 24 | | Empty -> None 25 | | Yield (x, _) -> Some x 26 | 27 | let is_empty (stream : 'a t) = 28 | match stream with 29 | | Empty -> true 30 | | _ -> false 31 | 32 | let rec of_list list : 'a t = 33 | match list with 34 | | [] -> Empty 35 | | x :: xs -> Yield (x, fun () -> of_list xs) 36 | 37 | let to_list (stream : 'a t) = 38 | let rec loop acc stream' = 39 | match stream' with 40 | | Empty -> acc 41 | | Yield (x, k) -> loop (List.cons x acc) (k ()) in 42 | List.rev (loop [] stream) 43 | 44 | let of_string str : char t = 45 | let rec loop i = 46 | try 47 | Yield (String.get str i, fun () -> loop (i + 1)) 48 | with Invalid_argument _ -> 49 | Empty in 50 | loop 0 51 | 52 | let rec filter p stream = 53 | match stream with 54 | | Empty -> Empty 55 | | Yield (x, k) -> 56 | if p x then 57 | Yield (x, fun () -> filter p (k ())) 58 | else 59 | filter p (k ()) 60 | 61 | let reject p stream = 62 | filter (fun x -> not (p x)) stream 63 | 64 | let next stream = 65 | match stream with 66 | | Empty -> None 67 | | Yield (x, stream') -> Some (x, stream' ()) 68 | 69 | -------------------------------------------------------------------------------- /tests/Lispy_lexer.ml: -------------------------------------------------------------------------------- 1 | 2 | module String = Astring.String 3 | 4 | let identifier = 5 | [%sedlex.regexp? Plus (alphabetic | Chars "_'")] 6 | 7 | let white_space = 8 | [%sedlex.regexp? Plus (' ' | '\t')] 9 | 10 | module Token = struct 11 | type t = string 12 | 13 | let pp = Fmt.string 14 | let compare = Pervasives.compare 15 | end 16 | 17 | type t = 18 | { mutable lexbuf : Sedlexing.lexbuf; 19 | mutable line_start : int; 20 | mutable line_count : int; 21 | mutable group_count : int } 22 | 23 | 24 | let increment_line self = 25 | self.line_start <- Sedlexing.lexeme_end self.lexbuf; 26 | self.line_count <- self.line_count + 1 27 | 28 | 29 | let current_lexeme self = 30 | Sedlexing.Utf8.lexeme self.lexbuf 31 | 32 | 33 | let rec read self = 34 | let lexbuf = self.lexbuf in 35 | match%sedlex lexbuf with 36 | 37 | (* Whitespace *) 38 | | Plus white_space -> read self 39 | 40 | (* Group start *) 41 | | '(' -> 42 | self.group_count <- (self.group_count + 1); 43 | current_lexeme self 44 | 45 | (* Operators *) 46 | | '+' | '-' -> 47 | current_lexeme self 48 | 49 | (* Group end *) 50 | | ')' -> 51 | self.group_count <- (self.group_count - 1); 52 | 53 | if self.group_count < 0 then 54 | failwith "unbalanced parenthesis" 55 | else 56 | current_lexeme self 57 | 58 | (* Identifiers *) 59 | | identifier -> current_lexeme self 60 | 61 | (* Newline symbol *) 62 | | '\n' -> 63 | increment_line self; 64 | read self 65 | 66 | (* EOF symbol *) 67 | | eof -> raise End_of_file 68 | 69 | (* Everything else is illegal *) 70 | | any -> 71 | failwith ("illegal character: " ^ current_lexeme self) 72 | 73 | (* Sedlex: the last branch must be a catch-all error case *) 74 | | _ -> failwith "impossible" 75 | 76 | 77 | let from_lexbuf lexbuf = 78 | { lexbuf; 79 | line_start = 0; 80 | line_count = 0; 81 | group_count = 0 } 82 | 83 | let of_string s = 84 | from_lexbuf (Sedlexing.Utf8.from_string s) 85 | 86 | let of_channel c = 87 | from_lexbuf (Sedlexing.Utf8.from_channel c) 88 | 89 | let rec to_stream lexer = 90 | try 91 | let token = read lexer in 92 | Pratt.Stream.Yield (token, fun () -> to_stream lexer) 93 | with End_of_file -> 94 | Pratt.Stream.Empty 95 | 96 | -------------------------------------------------------------------------------- /tests/Test_pratt_calc.ml: -------------------------------------------------------------------------------- 1 | open Local 2 | 3 | let _1 = fst 4 | module Stream = Pratt.Stream 5 | module P = Pratt.Make(Char) 6 | open P 7 | 8 | (* Integer parser for char tokens. *) 9 | let int g = 10 | some (range '0' '9') >>= fun (x, xs) -> 11 | return (Int.unsafe_of_string (String.implode (x :: xs))) 12 | 13 | let rec fac = function 14 | | 0 | 1 -> 1 15 | | n -> n * fac (n - 1) 16 | 17 | (* Basic calculator grammar. *) 18 | let calc = 19 | grammar [ 20 | term int; 21 | null '+' (unary identity); 22 | left 30 '+' (binary ( + )); 23 | null '-' (unary ( ~- )); 24 | left 30 '-' (binary ( - )); 25 | left 40 '*' (binary ( * )); 26 | left 40 '/' (binary ( / )); 27 | postfix 70 '!' (fun a -> fac a); 28 | between '(' ')' (fun a -> a); 29 | delimiter ')'; 30 | ] 31 | 32 | 33 | (* Basic string lexer (ignores blank characters). *) 34 | let lexer str = 35 | Stream.of_string str |> Stream.reject Char.Ascii.is_blank 36 | 37 | module T = Nanotest 38 | 39 | (* Helper testing function that parses the input and checks the result. *) 40 | let (==>) str expected = 41 | let actual = Result.map _1 (run (parse calc) (lexer str)) in 42 | let testable = T.(result int (testable (Fmt.of_to_string error_to_string))) in 43 | T.test testable str ~actual ~expected 44 | 45 | 46 | let stream = 47 | T.testable ~equal:(fun a b -> Stream.(to_list a == to_list b)) 48 | (Fmt.of_to_string (fun _ -> "")) 49 | 50 | let (==>!) str expected = 51 | let actual = run (parse calc) (lexer str) in 52 | let testable = T.(result (pair int stream) (testable (Fmt.of_to_string error_to_string))) in 53 | T.test testable str ~actual ~expected 54 | 55 | 56 | (* Tests *) 57 | let () = 58 | T.group "Test basic" [ 59 | "1" ==> Ok 1; 60 | "+1" ==> Ok 1; 61 | "+-+-1" ==> Ok 1; 62 | "1 + 1" ==> Ok 2; 63 | "100 + 300" ==> Ok 400; 64 | "1 + -1" ==> Ok 0; 65 | "1 + --1" ==> Ok 2; 66 | "(((0)))" ==> Ok 0; 67 | "2 + 2 * 2" ==> Ok 6; 68 | "(2 + 2) * 2" ==> Ok 8; 69 | ]; 70 | 71 | T.group "Factorial" [ 72 | "5!" ==> Ok 120; 73 | "0!" ==> Ok 1; 74 | ]; 75 | 76 | T.group "Check errors" [ 77 | "x" ==> Error (unexpected_token 'x'); 78 | "" ==> Error (unexpected_end ()); 79 | "/" ==> Error (invalid_prefix '/'); 80 | "2 /" ==> Error (unexpected_end ()); 81 | "2 / -" ==> Error (unexpected_end ()); 82 | "2 (" ==>! Ok (2, Stream.of_list ['(']); 83 | (* XXX: Alternatively fail with unexpected ( *) 84 | ] 85 | 86 | -------------------------------------------------------------------------------- /tests/Test_parser.ml: -------------------------------------------------------------------------------- 1 | open Astring 2 | 3 | module Stream = Pratt.Stream 4 | module P = Pratt.Make(Char) 5 | module T = Nanotest 6 | open Local 7 | 8 | let _1 = fst 9 | 10 | (* Helper test function, tests a particular [parser] with a given [input]. *) 11 | let test parser input expected = 12 | let printer = T.(result char (testable P.pp_error)) in 13 | let actual = Result.map _1 @@ P.run parser (Stream.of_string input) in 14 | let message = "input: \"" ^ input ^ "\"" in 15 | T.test ~verbose:true printer message ~expected ~actual 16 | 17 | (* Helper test function, tests a particular [parser] with a given [input]. *) 18 | let test' parser input expected = 19 | let printer = T.result T.(list char) (T.testable P.pp_error) in 20 | let actual = Result.map _1 @@ P.run parser (Pratt.Stream.of_string input) in 21 | let message = "input: \"" ^ input ^ "\"" in 22 | T.test ~verbose:true printer message ~expected ~actual 23 | 24 | 25 | let test_error () = 26 | let (==>) = test P.(error Zero) in 27 | T.group "Parser.error" [ 28 | "" ==> Error P.Zero; 29 | "a" ==> Error P.Zero; 30 | "abc" ==> Error P.Zero; 31 | ] 32 | 33 | let test_expect () = 34 | let (==>) = test (P.expect 'a') in 35 | T.group "Parser.expect" [ 36 | "" ==> Error P.(unexpected_end ~expected:'a' ()); 37 | "x" ==> Error P.(unexpected_token ~expected:'a' 'x'); 38 | "a" ==> Ok 'a'; 39 | "abc" ==> Ok 'a'; 40 | ] 41 | 42 | let test_exactly () = 43 | let (==>) (x, input) = test (P.exactly x) input in 44 | T.group "Parser.exactly" [ 45 | ('x', "x") ==> Ok 'x'; 46 | ('x', "y") ==> Error P.(unexpected_token ~expected:'x' 'y'); 47 | ('x', "") ==> Error P.(unexpected_end ~expected:'x' ()); 48 | ] 49 | 50 | let test_satisfy () = 51 | let (==>) = test (P.satisfy Char.Ascii.is_upper) in 52 | T.group "Parser.satisfy is_upper" [ 53 | "A" ==> Ok 'A'; 54 | "" ==> Error P.(unexpected_end ()); 55 | "0" ==> Error P.(unexpected_token '0'); 56 | "a" ==> Error P.(unexpected_token 'a'); 57 | ] 58 | 59 | let test_any () = 60 | let (==>) = test P.any in 61 | T.group "Parser.any" [ 62 | "x" ==> Ok 'x'; 63 | "0" ==> Ok '0'; 64 | "?" ==> Ok '?'; 65 | "" ==> Error P.(unexpected_end ()); 66 | ] 67 | 68 | let test_from () = 69 | let (==>) (options, input) = test (P.from options) input in 70 | T.group "Parser.from" [ 71 | ([], "x") ==> Error P.(unexpected_token 'x'); 72 | (['x'], "x") ==> Ok 'x'; 73 | (['x'; 'y'], "y") ==> Ok 'y'; 74 | (['x'; 'y'], "z") ==> Error P.(unexpected_token 'z'); 75 | ([], "") ==> Error P.(unexpected_end ()); 76 | ] 77 | 78 | let test_while () = 79 | let (==>) input = test' P.(many_while ((!=) 'x') any) input in 80 | T.group "Parser.many_while" [ 81 | "" ==> Ok []; 82 | "x" ==> Ok []; 83 | "ax" ==> Ok ['a']; 84 | "abcx" ==> Ok ['a'; 'b'; 'c']; 85 | "abcxxx" ==> Ok ['a'; 'b'; 'c']; 86 | "abc" ==> Ok ['a'; 'b'; 'c']; 87 | ] 88 | 89 | let test_many () = 90 | let (==>) input = test' P.(many any) input in 91 | T.group "Parser.many_while" [ 92 | "" ==> Ok []; 93 | "ax" ==> Ok ['a']; 94 | "abc" ==> Ok ['a'; 'b'; 'c']; 95 | ] 96 | 97 | let test_many_while () = 98 | let (==>) input = test' P.(many_while ((!=) 'x') any) input in 99 | T.group "Parser.many_while" [ 100 | "" ==> Ok []; 101 | "ax" ==> Ok ['a']; 102 | "abcx" ==> Ok ['a'; 'b'; 'c']; 103 | "abc" ==> Ok ['a'; 'b'; 'c']; 104 | ] 105 | 106 | let () = begin 107 | test_error (); 108 | test_expect (); 109 | test_exactly (); 110 | test_satisfy (); 111 | test_any (); 112 | test_from (); 113 | test_many (); 114 | test_many_while (); 115 | end 116 | 117 | -------------------------------------------------------------------------------- /tests/Test_lispy.ml: -------------------------------------------------------------------------------- 1 | open Local 2 | 3 | 4 | let inspect pp = 5 | Format.fprintf Fmt.stderr "@[%a@.@]" pp 6 | 7 | let log fmt = 8 | Format.kfprintf (fun f -> Format.pp_print_newline f ()) Fmt.stderr fmt 9 | 10 | 11 | let _1 = fst 12 | module Stream = Pratt.Stream 13 | module P = Pratt.Make(Lispy_lexer.Token) 14 | module G = P.Grammar 15 | let (>>=) = P.(>>=) 16 | 17 | module E = struct 18 | type t = [ `sym of string | `add of t * t | `sub of t * t | `neg of t | `seq of t list ] 19 | let rec pp formatter e = 20 | match e with 21 | | `sym a -> Format.fprintf formatter "%s" a 22 | | `add (a, b) -> Format.fprintf formatter "(%a + %a)" pp a pp b 23 | | `sub (a, b) -> Format.fprintf formatter "(%a - %a)" pp a pp b 24 | | `neg a -> Format.fprintf formatter "(- %a)" pp a 25 | | `seq xs -> Format.fprintf formatter "(%a)" (Fmt.list ~sep:(Fmt.unit " ") pp) xs 26 | let equal = (=) 27 | end 28 | 29 | let parse g = 30 | let left = 31 | P.some (P.nud 0 g) >>= fun (x, xs) -> 32 | if List.length xs = 0 then 33 | P.return x 34 | else 35 | P.return (`seq (x :: xs)) in 36 | left >>= P.led 0 g 37 | 38 | let parse_term g = 39 | P.any >>= fun x -> 40 | P.return (`sym x) 41 | 42 | let parse_add g a = 43 | P.advance >>= fun () -> 44 | parse g >>= fun b -> 45 | P.return (`add (a, b)) 46 | 47 | let parse_sub g a = 48 | P.advance >>= fun () -> 49 | parse g >>= fun b -> 50 | P.return (`sub (a, b)) 51 | 52 | let parse_neg g = 53 | P.advance >>= fun () -> 54 | parse g >>= fun a -> 55 | P.return (`neg a) 56 | 57 | let parse_group g = 58 | P.advance >>= fun () -> 59 | parse g >>= fun a -> 60 | P.consume ")" >>= fun () -> 61 | P.return a 62 | 63 | (* Basic calculator grammar. *) 64 | let lispy = 65 | P.grammar P.[ 66 | term parse_term; 67 | left 30 "+" parse_add; 68 | left 30 "-" parse_sub; 69 | null "-" parse_neg; 70 | null "(" parse_group; 71 | delimiter ")" 72 | ] 73 | 74 | let parser = parse lispy 75 | 76 | module T = Nanotest 77 | 78 | (* Helper testing function that parses the input and checks the result. *) 79 | let (==>) str expected = 80 | let actual = Result.map _1 (P.run parser (Lispy_lexer.(of_string str |> to_stream))) in 81 | let testable = T.(result (module E) (testable (Fmt.of_to_string P.error_to_string))) in 82 | T.test testable str ~actual ~expected 83 | 84 | 85 | (* Tests *) 86 | let () = 87 | let (a, b, c, d, e) = 88 | `sym "a", `sym "b", `sym "c", `sym "d", `sym "e" in 89 | let (+) a b = `add (a, b) in 90 | let (~-) a = `neg a in 91 | let seq l = `seq l in 92 | 93 | T.group "Test basic" [ 94 | "a" ==> Ok a; 95 | "a + b" ==> Ok (a + b); 96 | "-a" ==> Ok ~-a; 97 | "a b" ==> Ok (seq [a; b]); 98 | "a b c" ==> Ok (seq [a; b; c]); 99 | "a b + c" ==> Ok (seq [a; b] + c); 100 | "a + b c" ==> Ok (a + seq [b; c]); 101 | "(a + b) (c + d)" ==> Ok (seq [a + b; c + d]); 102 | "(a b) (c d)" ==> Ok (seq [seq [a; b]; seq [c; d]]); 103 | ]; 104 | 105 | let def f args body = 106 | seq [`sym "def"; f; seq args; body] in 107 | let let' bindings body = 108 | seq [`sym "let"; seq bindings; body] in 109 | let lambda args body = 110 | seq [`sym "lambda"; seq args; body] in 111 | 112 | let ex1_str = {| 113 | (def a (a b c) 114 | (let 115 | ((d (a + b)) 116 | (e (-c))) 117 | ((lambda (a b) (a + e)) d))) 118 | |} in 119 | let ex1_expr = 120 | def a [a; b; c] 121 | (let' [seq [d; a + b]; 122 | seq [e; ~-c]] 123 | (seq [lambda [a; b] (a + e); d])) in 124 | 125 | T.group "Compound" [ 126 | ex1_str ==> Ok ex1_expr; 127 | ex1_str ^ ex1_str ==> Ok (seq [ex1_expr; ex1_expr]) 128 | ] 129 | 130 | 131 | -------------------------------------------------------------------------------- /tests/Nanotest.ml: -------------------------------------------------------------------------------- 1 | let fmt = Format.asprintf 2 | 3 | module C = struct 4 | let color_pp color = 5 | fmt "\027[%dm%s\027[0m" 6 | (match color with 7 | | `Black -> 30 8 | | `Red -> 31 9 | | `Green -> 32 10 | | `Yellow -> 33 11 | | `Blue -> 34 12 | | `Magenta -> 35 13 | | `Cyan -> 36 14 | | `White -> 37) 15 | 16 | let blue = color_pp `Blue 17 | let red = color_pp `Red 18 | let yellow = color_pp `Yellow 19 | let magenta = color_pp `Magenta 20 | let cyan = color_pp `Cyan 21 | let white = color_pp `White 22 | let green = color_pp `Green 23 | let bright_white x = fmt "\027[1;37m%s\027[0m" x 24 | let bright_blue x = fmt "\027[1;34m%s\027[0m" x 25 | let bright_magenta x = fmt "\027[1;35m%s\027[0m" x 26 | let violet x = fmt "\027[0;34m%s\027[0m" x 27 | let bright_red x = fmt "\027[1;31m%s\027[0m" x 28 | let bright_green x = fmt "\027[1;32m%s\027[0m" x 29 | let start_bright_white = fmt "\027[1;37m" 30 | let start_white = fmt "\027[37m" 31 | let end_color = "\027[0m" 32 | let italic x = "\027[3m" ^ x ^ "\027[0m" 33 | let underline x = "\027[4m" ^ x ^ "\027[0m" 34 | let blink x = "\027[5m" ^ x ^ "\027[0m" 35 | end 36 | 37 | 38 | module type Testable = sig 39 | type t 40 | val equal : t -> t -> bool 41 | val pp : Format.formatter -> t -> unit 42 | end 43 | 44 | type 'a testable = (module Testable with type t = 'a) 45 | 46 | module Testable = struct 47 | let pp (type a) (t: a testable) = let (module T) = t in T.pp 48 | let equal (type a) (t: a testable) = let (module T) = t in T.equal 49 | end 50 | 51 | 52 | let time ?fmt f x = 53 | let t0 = Unix.gettimeofday () in 54 | let fx = f x in 55 | let t1 = Unix.gettimeofday () -. t0 in 56 | let () = match fmt with 57 | | Some fmt -> Printf.eprintf "%s\n" (fmt fx t1) 58 | | None -> Printf.eprintf "Elapsed time: %f sec\n" t1 in 59 | fx 60 | 61 | let test ?(verbose = true) ty msg ~actual ~expected () = 62 | let ok = Testable.equal ty actual expected in 63 | begin if not ok then begin 64 | Fmt.pr " %s %s@." (C.bright_red "✗") (C.bright_white msg); 65 | Fmt.pr " - %a@." (Testable.pp ty) expected; 66 | Fmt.pr " + %a@." (Testable.pp ty) actual 67 | end else if verbose then 68 | Fmt.pr " %s %s@." (C.bright_green "✓") (C.bright_white msg) 69 | end; 70 | ok 71 | 72 | let testable (type a) (pp: a Fmt.t) (equal: a -> a -> bool) : a testable = 73 | let module M = struct 74 | type t = a 75 | let pp = pp 76 | let equal = equal 77 | end in 78 | (module M) 79 | 80 | let testable (type a) ?(equal: a -> a -> bool = Pervasives.(=)) (pp: a Fmt.t) : a testable = 81 | let module M = struct 82 | type t = a 83 | let pp = pp 84 | let equal = equal 85 | end in 86 | (module M) 87 | 88 | 89 | let group name tests = 90 | Fmt.pr "━━━ %s ━━━@." (C.bright_blue name); 91 | let t0 = Unix.gettimeofday () in 92 | let s, f, t = 93 | List.fold_left begin fun (s, f, t) test -> 94 | if test () then (s + 1, f, t + 1) else (s, f + 1, t + 1) 95 | end 96 | (0, 0, 0) tests in 97 | let t = Unix.gettimeofday () -. t0 in 98 | let msg = 99 | match s, f with 100 | | 1, 0 -> "Test passed" 101 | | s, 0 -> fmt "All %d tests passed" s 102 | | 0, 1 -> "Test failed" 103 | | 0, f -> fmt "All %d tests failed" f 104 | | s, f -> fmt "%d tests passed, %d tests failed" s f in 105 | Fmt.pr " %s %s in %0.2fms@." (C.bright_magenta "•") msg (t *. 1000.0) 106 | 107 | let int : 'a testable = testable Fmt.int 108 | let float : 'a testable = testable Fmt.float 109 | let char : 'a testable = testable Fmt.char 110 | let bool : 'a testable = testable Fmt.bool 111 | let unit : 'a testable = testable (Fmt.unit "()") 112 | let int32 : 'a testable = testable ~equal:Int32.equal Fmt.int32 113 | let int64 : 'a testable = testable ~equal:Int64.equal Fmt.int64 114 | let string : 'a testable = testable ~equal:String.equal Fmt.string 115 | 116 | 117 | let list e = 118 | let rec equal l1 l2 = 119 | match (l1, l2) with 120 | | (x::xs, y::ys) -> Testable.equal e x y && equal xs ys 121 | | ([], []) -> true 122 | | _ -> false in 123 | testable (Fmt.Dump.list (Testable.pp e)) ~equal 124 | 125 | let sorted_list (type a) (a : a testable) compare = 126 | let l = list a in 127 | let equal l1 l2 = Testable.equal l (List.sort compare l1) (List.sort compare l2) in 128 | testable (Testable.pp l) ~equal 129 | 130 | let array e = 131 | let equal a1 a2 = 132 | let (m, n) = Array.(length a1, length a2) in 133 | let rec go i = i = m || (Testable.equal e a1.(i) a2.(i) && go (i + 1)) in 134 | m = n && go 0 in 135 | testable (Fmt.Dump.array (Testable.pp e)) ~equal 136 | 137 | let pair a b = 138 | let equal (a1, b1) (a2, b2) = 139 | Testable.equal a a1 a2 && Testable.equal b b1 b2 in 140 | testable (Fmt.Dump.pair (Testable.pp a) (Testable.pp b)) ~equal 141 | 142 | let option e = 143 | let equal x y = 144 | match (x, y) with 145 | | (Some a, Some b) -> Testable.equal e a b 146 | | (None, None) -> true 147 | | _ -> false in 148 | testable (Fmt.Dump.option (Testable.pp e)) ~equal 149 | 150 | let result a e = 151 | let equal x y = 152 | match (x, y) with 153 | | (Ok x, Ok y) -> Testable.equal a x y 154 | | (Error x, Error y) -> Testable.equal e x y 155 | | _ -> false in 156 | testable (Fmt.Dump.result ~ok:(Testable.pp a) ~error:(Testable.pp e)) ~equal 157 | 158 | -------------------------------------------------------------------------------- /examples/Javascript_lexer.ml: -------------------------------------------------------------------------------- 1 | module String = Astring.String 2 | 3 | let format, undefined = Kernel.(format, undefined) 4 | 5 | let decimal_int = 6 | [%sedlex.regexp? '0'..'9', Star ('0'..'9' | '_')] 7 | 8 | let hex_int = 9 | [%sedlex.regexp? 10 | '0', Chars "xX", ('0'..'9' | 'A'..'F' | 'a'..'f'), 11 | Star ('0'..'9' | 'A'..'F' | 'a'..'f' | '_')] 12 | 13 | let oct_int = 14 | [%sedlex.regexp? '0', Chars "oO", '0'..'7', Star ('0'..'7' | '_') ] 15 | 16 | let bin_int = 17 | [%sedlex.regexp? '0', Chars "bB", '0'..'1', Star ('0'..'1' | '_') ] 18 | 19 | let int = 20 | [%sedlex.regexp? decimal_int | hex_int | oct_int | bin_int ] 21 | 22 | let float = 23 | [%sedlex.regexp? 24 | '0'..'9', Star ('0'..'9' | '_'), 25 | Opt ('.', Star ('0'..'9' | '_')), 26 | Opt (Chars "eE", Opt (Chars "+-"), '0'..'9', Star ('0'..'9' | '_'))] 27 | 28 | let identifier = 29 | [%sedlex.regexp? Plus (alphabetic | Chars "_'")] 30 | 31 | let delimeter = 32 | [%sedlex.regexp? Chars "{}[].,;:=?"] 33 | 34 | let comment = 35 | [%sedlex.regexp? "//", Star (Compl '\n')] 36 | 37 | let white_space = 38 | [%sedlex.regexp? Plus (' ' | '\t')] 39 | 40 | type token = [ 41 | | `Bool of bool 42 | | `Char of char 43 | | `Float of float 44 | | `Int of int 45 | | `String of string 46 | | `Operator of string 47 | | `Delimiter of string 48 | | `Identifier of string 49 | | `Keyword of string 50 | ] 51 | 52 | let is_keyword = function `Keyword _ -> true | _ -> false 53 | let is_delimiter = function `Delimiter _ -> true | _ -> false 54 | let is_identifier = function `Identifier _ -> true | _ -> false 55 | 56 | let rec pp_token ppf token = 57 | let open Fmt in 58 | match token with 59 | | `Bool v -> pf ppf "%b" v 60 | | `Char v -> pf ppf "%c" v 61 | | `Float v -> pf ppf "%f" v 62 | | `Int v -> pf ppf "%d" v 63 | | `String v -> pf ppf "\"%s\"" v 64 | | `Operator v -> pf ppf "[operator %s]" v 65 | | `Delimiter v -> pf ppf "[delimeter %s]" v 66 | | `Identifier v -> pf ppf "[identifier %s]" v 67 | | `Keyword v -> pf ppf "[keyword %s]" v 68 | 69 | module Token = struct 70 | type t = token 71 | let pp = pp_token 72 | let compare (self : token) (other : token) = 73 | Pervasives.compare self other 74 | end 75 | 76 | module Location = struct 77 | type t = 78 | { line : int; 79 | column : int; 80 | length : int } 81 | 82 | let empty = 83 | { line = 0; 84 | column = 0; 85 | length = 0 } 86 | 87 | let to_string self = 88 | format "%d,%d/%d" self.line self.column self.length 89 | end 90 | 91 | type t = 92 | { mutable lexbuf : Sedlexing.lexbuf; 93 | mutable line_start : int; 94 | mutable line_count : int; 95 | mutable group_count : int } 96 | 97 | 98 | let increment_line self = 99 | self.line_start <- Sedlexing.lexeme_end self.lexbuf; 100 | self.line_count <- self.line_count + 1 101 | 102 | 103 | let current_location { lexbuf; line_count; line_start } = 104 | let open Sedlexing in 105 | let open Location in 106 | { line = line_count; 107 | column = lexeme_end lexbuf - line_start - lexeme_length lexbuf + 1; 108 | length = lexeme_length lexbuf } 109 | 110 | 111 | let current_lexeme self = 112 | Sedlexing.Utf8.lexeme self.lexbuf 113 | 114 | 115 | exception Error of 116 | { lexeme : string; 117 | location : Location.t; 118 | message : string } 119 | 120 | 121 | let error self message = 122 | let lexeme = current_lexeme self in 123 | let location = current_location self in 124 | raise (Error { message; lexeme; location }) 125 | 126 | 127 | let rec read self = 128 | let lexbuf = self.lexbuf in 129 | match%sedlex lexbuf with 130 | 131 | (* Operators *) 132 | | "!=" -> `Operator (current_lexeme self) 133 | | "!==" -> `Operator (current_lexeme self) 134 | | "%" -> `Operator (current_lexeme self) 135 | | "%=" -> `Operator (current_lexeme self) 136 | | "&" -> `Operator (current_lexeme self) 137 | | "&&" -> `Operator (current_lexeme self) 138 | | "&=" -> `Operator (current_lexeme self) 139 | | "*" -> `Operator (current_lexeme self) 140 | | "*=" -> `Operator (current_lexeme self) 141 | | "+" -> `Operator (current_lexeme self) 142 | | "++" -> `Operator (current_lexeme self) 143 | | "+=" -> `Operator (current_lexeme self) 144 | | "-" -> `Operator (current_lexeme self) 145 | | "--" -> `Operator (current_lexeme self) 146 | | "-=" -> `Operator (current_lexeme self) 147 | | "/" -> `Operator (current_lexeme self) 148 | | "/=" -> `Operator (current_lexeme self) 149 | | "<" -> `Operator (current_lexeme self) 150 | | "<<" -> `Operator (current_lexeme self) 151 | | "<<=" -> `Operator (current_lexeme self) 152 | | "<=" -> `Operator (current_lexeme self) 153 | | "==" -> `Operator (current_lexeme self) 154 | | "===" -> `Operator (current_lexeme self) 155 | | ">" -> `Operator (current_lexeme self) 156 | | ">=" -> `Operator (current_lexeme self) 157 | | ">>" -> `Operator (current_lexeme self) 158 | | ">>=" -> `Operator (current_lexeme self) 159 | | ">>>" -> `Operator (current_lexeme self) 160 | | ">>>=" -> `Operator (current_lexeme self) 161 | | "^=" -> `Operator (current_lexeme self) 162 | | "|" -> `Operator (current_lexeme self) 163 | | "|=" -> `Operator (current_lexeme self) 164 | | "||" -> `Operator (current_lexeme self) 165 | 166 | (* Whitespace and comment *) 167 | | Plus (white_space | comment) -> 168 | read self 169 | 170 | (* Int literal *) 171 | | int -> 172 | `Int (int_of_string (current_lexeme self)) 173 | 174 | (* Float literal *) 175 | | float -> 176 | `Float (float_of_string (current_lexeme self)) 177 | 178 | (* Group start *) 179 | | '(' -> 180 | self.group_count <- (self.group_count + 1); 181 | `Delimiter (current_lexeme self) 182 | 183 | (* Group end *) 184 | | ')' -> 185 | self.group_count <- (self.group_count - 1); 186 | 187 | if self.group_count < 0 then 188 | error self "unbalanced parenthesis" 189 | else 190 | `Delimiter (current_lexeme self) 191 | 192 | (* Delimiters *) 193 | | "=>" -> `Delimiter (current_lexeme self) 194 | | delimeter -> 195 | `Delimiter (current_lexeme self) 196 | 197 | (* Strings *) 198 | | '"', Star (Compl '"'), '"' -> 199 | let lexeme = current_lexeme self in 200 | let s = String.(Sub.to_string (sub lexeme ~start:1 ~stop:(length lexeme - 1))) in 201 | `String s 202 | 203 | (* Chars *) 204 | | '\'', Compl '\'', '\'' -> 205 | `Char (String.get (current_lexeme self) 1) 206 | 207 | (* Booleans *) 208 | | "true" -> `Bool true 209 | | "false" -> `Bool false 210 | 211 | (* Keywords *) 212 | | "function" -> `Keyword (current_lexeme self) 213 | | "var" -> `Keyword (current_lexeme self) 214 | | "return" -> `Keyword (current_lexeme self) 215 | | "for" -> `Keyword (current_lexeme self) 216 | | "while" -> `Keyword (current_lexeme self) 217 | 218 | (* Identifiers *) 219 | | identifier -> 220 | `Identifier (current_lexeme self) 221 | 222 | (* Newline symbol *) 223 | | '\n' -> 224 | increment_line self; 225 | read self 226 | 227 | (* EOF symbol *) 228 | | eof -> raise End_of_file 229 | 230 | (* Everything else is illegal *) 231 | | any -> 232 | error self "illegal character" 233 | 234 | (* Sedlex: the last branch must be a catch-all error case *) 235 | | _ -> undefined () 236 | 237 | 238 | let from_lexbuf lexbuf = 239 | { lexbuf; 240 | line_start = 0; 241 | line_count = 0; 242 | group_count = 0 } 243 | 244 | 245 | let of_string s = 246 | from_lexbuf (Sedlexing.Utf8.from_string s) 247 | 248 | let of_channel c = 249 | from_lexbuf (Sedlexing.Utf8.from_channel c) 250 | 251 | let rec to_stream lexer = 252 | try 253 | let token = read lexer in 254 | Pratt.Stream.Yield (token, fun () -> to_stream lexer) 255 | with End_of_file -> 256 | Pratt.Stream.Empty 257 | 258 | -------------------------------------------------------------------------------- /examples/Javascript.ml: -------------------------------------------------------------------------------- 1 | 2 | open Proto 3 | open Astring 4 | 5 | module Stream = Pratt.Stream 6 | module L = Javascript_lexer 7 | module P = Pratt.Make(L.Token) 8 | 9 | let (>>=) = P.(>>=) 10 | let return = P.return 11 | 12 | let separated_by sep p = 13 | p >>= fun x -> 14 | P.many (sep >>= fun () -> p) >>= fun xs -> 15 | return (x :: xs) 16 | 17 | let pair ~sep p1 p2 = 18 | p1 >>= fun x -> 19 | sep >>= fun () -> 20 | p2 >>= fun y -> 21 | return (x, y) 22 | 23 | module AST = struct 24 | type t = [ 25 | | `Binary of string * t * t 26 | | `Bool of bool 27 | | `Call of t * t list 28 | | `Dot of t * t 29 | | `Function of string * string list * t list 30 | | `Identifier of string 31 | | `Int of int 32 | | `Lambda of string list * t list 33 | | `Object of (string * t) list 34 | | `Return of t 35 | | `String of string 36 | | `Symbol of string 37 | | `Ternary of t * t * t 38 | | `Unary of string * t 39 | | `While of t * t list 40 | | `Var of string * t 41 | ] 42 | 43 | let rec pp formatter self = 44 | let open Fmt in 45 | let _br ?(indent = 0) n = fun formatter () -> 46 | Format.pp_print_break formatter n indent in 47 | let pp_args = hvbox (list ~sep:(always ", ") string) in 48 | let pp_body = vbox (list pp) in 49 | let pp_item = hbox (pair ~sep:(always ":@ ") String.dump pp) in 50 | match self with 51 | | `Symbol x | `Identifier x -> String.pp formatter x 52 | | `String x -> String.dump formatter x 53 | | `Bool x -> Bool.pp formatter x 54 | | `Int x -> Int.pp formatter x 55 | 56 | | `Return (`Lambda (args, body)) -> 57 | pf formatter "@[return function(@[%a@]) {@,%a@]@,}" 58 | pp_args args pp_body body 59 | 60 | | `Return x -> 61 | pf formatter "return @[%a@]" pp x 62 | 63 | | `Lambda (args, body) -> 64 | pf formatter "@[@[function(%a) {@;%a@]@;}@]" 65 | pp_args args pp_body body 66 | 67 | | `Function (name, args, body) -> 68 | (* pr "while(%a)" (hvbox (list ~sep:(always ",@;") int)) *) 69 | pf formatter "@[function %s(@[%a@]) {@,%a@]@,}" 70 | name pp_args args pp_body body 71 | 72 | | `Object o -> 73 | pf formatter "@[{@,%a@]@,}" (list ~sep:(always ",@,") pp_item) o 74 | 75 | | `Call (f, xs) -> 76 | pf formatter "%a(@[%a@])" pp f Fmt.(list ~sep:(always ",@ ") pp) xs 77 | 78 | | `Ternary (t, a, b) -> 79 | pf formatter "@[<4>%a@ @,?@ %a@, :@ %a@]" pp t pp a pp b 80 | 81 | | `Binary (op, a, b) -> 82 | pf formatter "@[<2>%a@;%s@;%a@]" pp a op pp b 83 | 84 | | `Unary (op, a) -> 85 | pf formatter "%s%a" op pp a 86 | 87 | | `Var (name, `Lambda (args, body)) -> 88 | pf formatter "@[var %s = function(@[%a@]) {@,%a@]@,}" 89 | name pp_args args pp_body body 90 | 91 | | `Var (name, value) -> 92 | pf formatter "@[<4>var %s =@ %a@]" name pp value 93 | 94 | | `Dot (x, y) -> 95 | pf formatter "%a.%a" pp x pp y 96 | 97 | | `While (condition, body) -> 98 | pr "@[while (%a) {@,%a@]@,}" (hvbox pp) condition (list pp) body 99 | end 100 | 101 | let literal g : AST.t P.parser = 102 | P.current >>= function 103 | | `Identifier x -> P.advance >>= fun () -> return (`Identifier x) 104 | | `String x -> P.advance >>= fun () -> return (`String x) 105 | | `Int x -> P.advance >>= fun () -> return (`Int x) 106 | | `Bool x -> P.advance >>= fun () -> return (`Bool x) 107 | | t -> P.error (P.unexpected_token t) 108 | 109 | 110 | module Parser = struct 111 | let delimiter str = P.consume (`Delimiter str) 112 | let keyword str = P.consume (`Keyword str) 113 | 114 | let filter_map f = 115 | P.current >>= fun token -> 116 | match f token with 117 | | Some x -> P.advance >>= fun () -> return x 118 | | None -> P.error (P.unexpected_token token) 119 | 120 | let identifier = 121 | filter_map (function `Identifier x -> Some x | _ -> None) 122 | 123 | let string = 124 | filter_map (function `String x -> Some x | _ -> None) 125 | 126 | let return' grammar = 127 | keyword "return" >>= fun () -> 128 | P.parse grammar >>= fun x -> 129 | return (`Return x) 130 | 131 | let var grammar = 132 | keyword "var" >>= fun () -> 133 | identifier >>= fun name -> 134 | delimiter "=" >>= fun () -> 135 | P.parse grammar >>= fun value -> 136 | return (`Var (name, value)) 137 | 138 | let function' grammar = 139 | let local = P.Grammar.add (P.null (`Keyword "return") return') grammar in 140 | keyword "function" >>= fun () -> 141 | P.default "" identifier >>= fun name -> 142 | delimiter "(" >>= fun () -> 143 | P.default [] (identifier |> separated_by (delimiter ",")) >>= fun args -> 144 | delimiter ")" >>= fun () -> 145 | delimiter "{" >>= fun () -> 146 | P.many (P.parse local) >>= fun body -> 147 | delimiter "}" >>= fun () -> 148 | return (if name = "" 149 | then `Lambda (args, body) 150 | else `Function (name, args, body)) 151 | 152 | let object' grammar = 153 | delimiter "{" >>= fun () -> 154 | let item = pair ~sep:(delimiter ":") string (P.parse grammar) in 155 | P.default [] (item |> separated_by (delimiter ",")) >>= fun args -> 156 | delimiter "}" >>= fun () -> 157 | return (`Object args) 158 | 159 | let call grammar f = 160 | delimiter "(" >>= fun () -> 161 | P.default [] (P.parse grammar |> separated_by (delimiter ",")) >>= fun args -> 162 | delimiter ")" >>= fun () -> 163 | return (`Call (f, args)) 164 | 165 | let ternary grammar condition = 166 | delimiter "?" >>= fun () -> 167 | P.parse grammar >>= fun consequence -> 168 | delimiter ":" >>= fun () -> 169 | P.parse grammar >>= fun alternative -> 170 | return (`Ternary (condition, consequence, alternative)) 171 | 172 | let while' grammar = 173 | keyword "while" >>= fun () -> 174 | delimiter "(" >>= fun () -> 175 | P.parse grammar >>= fun condition -> 176 | delimiter ")" >>= fun () -> 177 | delimiter "{" >>= fun () -> 178 | P.many (P.parse grammar) >>= fun body -> 179 | delimiter "}" >>= fun () -> 180 | return (`While (condition, body)) 181 | 182 | let parse = P.parse @@ P.grammar [ 183 | P.term literal; 184 | (* XXX: WTF? *) 185 | (* TODO: Add context: "while parsing `var` expected x but got y". *) 186 | (* P.null (`Keyword "var") (fun g -> P.consume (`Keyword "varx") >>= fun () -> return (`Int 42)); *) 187 | P.null (`Keyword "var") var; 188 | P.null (`Keyword "function") function'; 189 | P.null (`Keyword "while") while'; 190 | P.null (`Delimiter "{") object'; 191 | P.null (`Operator "+") (P.unary (fun a -> (`Unary ("+", a)))); 192 | P.left 30 (`Operator "+") (P.binary (fun a b -> (`Binary ("+", a, b)))); 193 | P.left 85 (`Delimiter ".") (P.binary (fun a b -> (`Dot (a, b)))); 194 | P.left 30 (`Operator "===") (P.binary (fun a b -> (`Binary ("===", a, b)))); 195 | P.left 80 (`Delimiter "(") call; 196 | P.left 20 (`Delimiter "?") ternary; 197 | ] 198 | end 199 | 200 | (* It is more important to understand what is *NOT* valid than what is valid. *) 201 | let input = {| 202 | 203 | var x = name ? name : "Hello, world!" 204 | 205 | function hello(name, a) { 206 | var x = name ? name : "Hello, world!" 207 | return x 208 | } 209 | var y = 42 + 0 210 | var z = +4 211 | 212 | function f(x) { 213 | console.log("hello") 214 | console.log(" ") 215 | console.log("wolrd") 216 | var y = x + 1 217 | function sum(a, b) { return a + b } 218 | return sum(x, y) 219 | } 220 | 221 | var sum = function(x, y) { return x + y } 222 | 223 | var partialSum = function(x) { return function (y) { return x + y } } 224 | 225 | var x = cond(a, f(1, 2, true), g()) ? "yes" : "no" 226 | 227 | { 228 | "x": 42, 229 | "y": 11, 230 | "f": function(x) { return x } 231 | } 232 | 233 | assert(point.x === 42) 234 | 235 | while(x === 1 + x === 1 + x === 1) { 236 | console.log("yes") 237 | console.log("yes") 238 | } 239 | 240 | |} 241 | 242 | let main = 243 | let rec loop input = 244 | if not (Stream.is_empty input) then 245 | match P.run Parser.parse input with 246 | | Ok (result, input') -> 247 | Fmt.pr "%a@.@." AST.pp result; 248 | loop input' 249 | | Error Zero -> () 250 | | Error e -> Fmt.pr "main: %s@." (P.error_to_string e) in 251 | loop (L.(to_stream (of_string input))) 252 | 253 | -------------------------------------------------------------------------------- /src/Pratt.mli: -------------------------------------------------------------------------------- 1 | (* 2 | * Pratt 0.1 3 | * Copyright (c) 2017 Rizo Isrof. All rights reserved. 4 | * 5 | * Distributed under the ISC license, see LICENSE file. 6 | *) 7 | 8 | type 'a fmt = Format.formatter -> 'a -> unit 9 | type 'a comparator = 'a -> 'a -> int 10 | 11 | module Stream : module type of Stream 12 | 13 | (** Pratt is a simple top-down precedence parser. 14 | 15 | The grammar is defined as a set of rules that are matched on tokens. Input 16 | is processed linearly with an iterator. The parser searches for rules that 17 | match the current token based on its position (prefix or infix). Rules store 18 | the matching token, the precedence (for infix rules) and the parser that 19 | builds the final Abstract Syntax Tree (AST) object. *) 20 | 21 | 22 | module type Token = sig 23 | type t 24 | 25 | val pp : t fmt 26 | val compare : t comparator 27 | end 28 | 29 | 30 | module Make (Token : Token) : sig 31 | type token = Token.t 32 | (** The type of tokens to be parsed. *) 33 | 34 | type error = 35 | | Unexpected of {expected : token option; actual : token option} 36 | | Invalid_infix of token 37 | | Invalid_prefix of token 38 | | Zero 39 | (** The type of errors for tokens of type ['a]. *) 40 | 41 | val unexpected_token : ?expected : token -> token -> error 42 | (** [unexpected_token ?expected t] is [Unexpected {actual = Some t; 43 | expected}]. *) 44 | 45 | val unexpected_end : ?expected : token -> unit -> error 46 | (** [unexpected_end ?expected ()] is [Unexpected {actual = None; expected}]. *) 47 | 48 | val invalid_prefix : token -> error 49 | (** [invalid_prefix t] is [Invalid_prefix t]. *) 50 | 51 | val invalid_infix : token -> error 52 | (** [invalid_infix t] is [Invalid_infix t]. *) 53 | 54 | val error_to_string : error -> string 55 | (** [error_to_string token_pp e] is a human-readable representation of [e]. *) 56 | 57 | val pp_error : error fmt 58 | (** [pp_error token_pp] is a pretty printer for values of type [e] and 59 | contained tokens. *) 60 | 61 | 62 | (** {1:parser Parser} *) 63 | 64 | type 'a parser 65 | (** A parser producing values of type ['a]. *) 66 | 67 | 68 | (** {2:parser-monad Monad Instance} *) 69 | 70 | val return : 'a -> 'a parser 71 | (** [return x] is a parser producing [x] as a value regardless of the input. *) 72 | 73 | val (>>=) : 'a parser -> ('a -> 'b parser) -> 'b parser 74 | (** [p >>= f] is a parser returned by [f] after applying [f] to the result of 75 | [p]. *) 76 | 77 | 78 | val error : error -> 'a parser 79 | (** [error e] is a parser that fails with the error [e] without consuming any 80 | input *) 81 | 82 | val zero : 'a parser 83 | (** [zero] is a parser that fails without consuming any input. *) 84 | 85 | val (<|>) : 'a parser -> 'a parser -> 'a parser 86 | (** [p <|> q] is a choice combinator. Parser [p] is first applied, if it 87 | succeeds its value is returned. If [p] fails {e without consuming any 88 | input}, [q] is tried. *) 89 | 90 | 91 | (** {1:combinators Parsing Combinators} *) 92 | 93 | val default : 'a -> 'a parser -> 'a parser 94 | (** [default x p] runs the parser [p] producing the default [x] value if 95 | it fails. *) 96 | 97 | val combine : 'a parser -> 'b parser -> ('a * 'b) parser 98 | (** [combine p q] first parses [p] and then [q] returning a pair with 99 | corresponding results. *) 100 | 101 | val many : 'a parser -> 'a list parser 102 | (** [many p] applies the parser [p] zero or more times. Returns a list of the 103 | returned values of [p]. *) 104 | 105 | val some : 'a parser -> ('a * 'a list) parser 106 | (** [some p] applies the parser [p] one or more times. Returns the guaranteed 107 | first value and a potentially empty list of values parsed by [p]. *) 108 | 109 | val optional : 'a parser -> unit parser 110 | (** [optional p] tries to optionally parse the input with parser [p] without 111 | returning its output. *) 112 | 113 | val current : token parser 114 | (** [current] is the parser that produces the current token as the result. *) 115 | 116 | val next : token parser 117 | (** [next] returns the current token advances and advances to the next one. *) 118 | 119 | val expect : token -> token parser 120 | (** [expect token] checks if the current token in the input is equal to [token] 121 | failing if it is not. *) 122 | 123 | val advance : unit parser 124 | (** [advance] advances the parser to the next token. *) 125 | 126 | val consume : token -> unit parser 127 | (** [consume token] checks if the current token is equal to [token] and 128 | advances the parser to the next token, or fails if they are different. *) 129 | 130 | val satisfy : (token -> bool) -> token parser 131 | (** [satisfy test] is a parser that returns the current input token if it 132 | satisfies [test] predicate or fails otherwise. *) 133 | 134 | val exactly : token -> token parser 135 | (** [exactly token] parses *exactly* the given [token]. *) 136 | 137 | val any : token parser 138 | (** [any] is a parser that accepts any input token. *) 139 | 140 | val from : token list -> token parser 141 | (** [from tokens] parses any token from [tokens] list. *) 142 | 143 | val none : token list -> token parser 144 | (** [none tokens] parses any token *not* present in [tokens] list. *) 145 | 146 | val range : ?compare: token comparator -> token -> token -> token parser 147 | (** [range ?compare s e] parses any token in the range defined by [s] and [e]. 148 | Optionally a custom [compare] function can be supplied. *) 149 | 150 | val choice : 'a parser list -> 'a parser 151 | (** [choice ps] is a parser that tries all the parsers in [ps] until one of 152 | them succeeds. *) 153 | 154 | val guard : bool -> unit parser 155 | 156 | val when' : bool -> unit parser -> unit parser 157 | val unless : bool -> unit parser -> unit parser 158 | 159 | val many_while : (token -> bool) -> 'a parser -> 'a list parser 160 | (** [many_while test p] repeatedly runs the parser [p] while the input token 161 | satisfies [test]. Stops when the token fails the [test] or if the input is 162 | empty. *) 163 | 164 | val some_while : (token -> bool) -> 'a parser -> ('a * 'a list) parser 165 | (** [some_while test p] is just like [many_while] except it parser at least one ['a]. *) 166 | 167 | 168 | type 'a rule 169 | (** The type for parsing rules for tokens of type ['t] producing results of 170 | type ['a]. *) 171 | 172 | (** {1:grammar Grammar} *) 173 | 174 | type 'a grammar 175 | (** Grammar type holding parsing rules for tokens of type ['t] and parsed 176 | values of type ['a]. *) 177 | 178 | module Grammar : sig 179 | type 'a t = 'a grammar 180 | 181 | val has_null : token -> 'a grammar -> bool 182 | val has_left : token -> 'a grammar -> bool 183 | 184 | val dump : token fmt -> 'a t -> unit 185 | 186 | val new_scope : 'a t -> 'a t 187 | val pop_scope : 'a t -> 'a t 188 | 189 | val add : 'a rule -> 'a t -> 'a t 190 | end 191 | 192 | val nud : int -> 'a grammar -> 'a parser 193 | val led : int -> 'a grammar -> 'a -> 'a parser 194 | 195 | 196 | (** {1:rules Rules} *) 197 | 198 | val rule : token -> ('a grammar -> 'a parser) -> 'a rule 199 | (** [rule t p] is a rule with parser [p] for prefix token [t]. *) 200 | 201 | val term : ('a grammar -> 'a parser) -> 'a rule 202 | (** [term p] is a parser for literals or variables. *) 203 | 204 | val infix : int -> token -> ('a -> 'a -> 'a) -> 'a rule 205 | (** [infix precedence token f] is a rule that parses infix occurrences of 206 | [token] with given [precedence] applying [f] to the {e lhs} and {e rhs} 207 | expressions. This rule is left-associative. *) 208 | 209 | val infixr : int -> token -> ('a -> 'a -> 'a) -> 'a rule 210 | (** [infixr precedence token f] is like {!infix} except it is 211 | right-associative. *) 212 | 213 | val prefix : token -> ('a -> 'a) -> 'a rule 214 | (** [prefix token f] is a rule that parses prefix occurrences of [token] 215 | applying [f] to the {e rhs} expression. *) 216 | 217 | val postfix : int -> token -> ('a -> 'a) -> 'a rule 218 | (** [postfix precedence token f] is a rule that parses postfix occurrences of 219 | [token] with given [precedence] applying [f] to the {e lhs} expression. *) 220 | 221 | val between : token -> token -> ('a -> 'a) -> 'a rule 222 | 223 | val delimiter : token -> 'a rule 224 | (** [delimiter token] is a rule that parses a delimiter [token]. *) 225 | 226 | val binary : ('a -> 'a -> 'a) -> 'a grammar -> 'a -> 'a parser 227 | val unary : ('a -> 'a) -> 'a grammar -> 'a parser 228 | 229 | val null : token -> ('a grammar -> 'a parser) -> 'a rule 230 | val left : int -> token -> ('a grammar -> 'a -> 'a parser) -> 'a rule 231 | 232 | 233 | (** {1:parsing Parsing} *) 234 | 235 | val grammar : 'a rule list -> 'a grammar 236 | (** [grammar rules] is a grammar for a language constructed with [rules]. *) 237 | 238 | val parse : ?precedence: int -> 'a grammar -> 'a parser 239 | (** [parse ?precedence g] is the parser for the grammar [g] starting with 240 | binding power [precedence]. *) 241 | 242 | val parse_many : 'a grammar -> 'a list parser 243 | val parse_some : 'a grammar -> ('a * 'a list) parser 244 | 245 | val run : 'a parser -> token Stream.t -> ('a * token Stream.t, error) result 246 | (** [run p input] is the result of running the parser [p] with the given 247 | [input]. The parsed value is produced with the remaining input. *) 248 | end 249 | 250 | -------------------------------------------------------------------------------- /src/Pratt.ml: -------------------------------------------------------------------------------- 1 | 2 | let inspect pp = 3 | Format.fprintf Fmt.stderr "@[%a@.@]" pp 4 | 5 | let log fmt = 6 | Format.kfprintf (fun f -> Format.pp_print_newline f ()) Fmt.stderr fmt 7 | 8 | let constantly x _ = x 9 | let (<<) f g = fun x -> f (g x) 10 | let is_some = function Some _ -> true | None -> false 11 | let flip f x y = f y x 12 | 13 | (* TODO: When failing show the so-far parsed result. *) 14 | 15 | type 'a fmt = Format.formatter -> 'a -> unit 16 | type 'a comparator = 'a -> 'a -> int 17 | 18 | 19 | module Stream = Stream 20 | 21 | 22 | module type Token = sig 23 | type t 24 | 25 | val pp : t fmt 26 | val compare : t comparator 27 | end 28 | 29 | 30 | module Make (Token : Token) = struct 31 | module Table = struct 32 | include Map.Make(Token) 33 | 34 | let get tbl x = 35 | try Some (find tbl x) 36 | with Not_found -> None 37 | end 38 | 39 | type token = Token.t 40 | 41 | let pp_token = Token.pp 42 | 43 | type error = 44 | | Unexpected of { expected : token option; actual : token option } 45 | | Invalid_infix of token 46 | | Invalid_prefix of token 47 | | Zero 48 | 49 | let unexpected_token ?expected actual = 50 | Unexpected {expected; actual = Some actual} 51 | 52 | let unexpected_end ?expected () = 53 | Unexpected {expected; actual = None} 54 | 55 | let invalid_prefix t = 56 | Invalid_prefix t 57 | 58 | let invalid_infix t = 59 | Invalid_infix t 60 | 61 | let error_to_string = function 62 | | Unexpected {expected = Some t1; actual = Some t2} -> 63 | Fmt.strf "Syntax error: expected '%a' but got '%a'" pp_token t1 pp_token t2 64 | | Unexpected { expected = Some t; actual = None } -> 65 | Fmt.strf "Syntax error: unexpected end of file while parsing '%a'" pp_token t 66 | | Unexpected { expected = None; actual = None } -> 67 | Fmt.strf "Syntax error: unexpected end of file" 68 | | Unexpected { expected = None; actual = Some t } -> 69 | Fmt.strf "Syntax error: unexpected token '%a'" pp_token t 70 | | Invalid_infix token -> 71 | Fmt.strf "Syntax error: '%a' cannot be used in infix postion" pp_token token 72 | | Invalid_prefix token -> 73 | Fmt.strf "Syntax error: '%a' cannot be used in prefix position" pp_token token 74 | | Zero -> 75 | Fmt.strf "Syntax error: empty parser result" 76 | 77 | let pp_error ppf = function 78 | | Unexpected { expected; actual } -> 79 | Fmt.pf ppf "@[<2>Unexpected@ {@ expected =@ @[%a@];@ actual =@ @[%a@] }@]" 80 | (Fmt.Dump.option pp_token) expected (Fmt.Dump.option pp_token) actual 81 | | Invalid_infix token -> 82 | Fmt.pf ppf "@[<2>Invalid_infix@ @[%a@] @]" pp_token token 83 | | Invalid_prefix token -> 84 | Fmt.pf ppf "@[<2>Invalid_prefix@ @[%a@] @]" pp_token token 85 | | Zero -> Fmt.pf ppf "Empty" 86 | 87 | type 'a parser = token Stream.t -> ('a * token Stream.t, error) result 88 | 89 | let return x = 90 | fun input -> Ok (x, input) 91 | 92 | let (>>=) p f = 93 | fun input -> 94 | match p input with 95 | | Ok (x, input') -> 96 | let p' = f x in p' input' 97 | | Error e -> Error e 98 | 99 | 100 | let put s = fun _ -> Ok ((), s) 101 | let get = fun s -> Ok (s, s) 102 | 103 | 104 | let zero = fun _input -> Error Zero 105 | 106 | let (<|>) p q = fun input -> 107 | match p input with 108 | | Ok value -> Ok value 109 | | Error _ -> q input 110 | (* XXX: What if p consumes input? *) 111 | (* | Error Empty -> q input *) 112 | (* | Error e -> Error e *) 113 | 114 | 115 | let default x p = 116 | p <|> return x 117 | 118 | let rec many p = 119 | (p >>= fun x -> many p >>= fun xs -> return (x :: xs)) 120 | |> default [] 121 | 122 | let combine p1 p2 = 123 | p1 >>= fun x -> 124 | p2 >>= fun y -> return (x, y) 125 | 126 | let rec some p = 127 | combine p (many p) 128 | 129 | let optional p = 130 | default () (p >>= fun _ -> return ()) 131 | 132 | let error e = 133 | fun _state -> Error e 134 | 135 | let advance s = 136 | let p = 137 | get >>= fun stream -> 138 | match Stream.next stream with 139 | | Some (token, stream') -> put stream' 140 | | None -> return () in 141 | p s 142 | 143 | let current = fun s -> 144 | let p = get >>= fun state -> 145 | match Stream.head state with 146 | | Some token -> return token 147 | | None -> error (unexpected_end ()) in 148 | p s 149 | 150 | let next s = 151 | let p = 152 | current >>= fun x -> 153 | advance >>= fun () -> return x in 154 | p s 155 | 156 | let expect expected : 'a parser = 157 | get >>= fun stream -> 158 | match Stream.head stream with 159 | | Some actual when actual = expected -> return actual 160 | | Some actual -> error (unexpected_token ~expected actual) 161 | | None -> error (unexpected_end ~expected ()) 162 | 163 | let consume tok = 164 | expect tok >>= fun _ -> advance 165 | 166 | let exactly x = 167 | expect x >>= fun x -> advance >>= fun () -> return x 168 | 169 | let satisfy test = 170 | next >>= function 171 | | actual when test actual -> return actual 172 | | actual -> error (unexpected_token actual) 173 | 174 | let any s = (satisfy (constantly true)) s 175 | 176 | let from list = 177 | satisfy (fun x -> List.mem x list) 178 | 179 | let none list = 180 | satisfy (fun x -> not (List.mem x list)) 181 | 182 | let range ?(compare = Pervasives.compare) s e = 183 | let (<=) a b = not (compare a b > 0) in 184 | satisfy (fun x -> s <= x && x <= e) 185 | 186 | let rec choice ps = 187 | match ps with 188 | | [] -> zero 189 | | p :: ps' -> p <|> choice ps' 190 | 191 | let guard = function 192 | | true -> return () 193 | | false -> zero 194 | 195 | let when' test m = 196 | if test then m 197 | else return () 198 | 199 | let unless test m = 200 | if test then return () 201 | else m 202 | 203 | let many_while test p = 204 | many (current >>= (guard << test) >>= fun () -> p) 205 | 206 | let some_while test p = 207 | some (current >>= (guard << test) >>= fun () -> p) 208 | 209 | type 'a grammar = { 210 | data : 'a scope list; 211 | term : 'a null 212 | } 213 | 214 | and 'a scope = { 215 | null : 'a null Table.t; 216 | left : 'a left Table.t; 217 | } 218 | 219 | and 'a null = ('a grammar -> 'a parser) 220 | and 'a left = ('a grammar -> 'a -> 'a parser) * int 221 | 222 | type 'a rule = 223 | | Term of 'a null 224 | | Null of token * 'a null 225 | | Left of token * 'a left 226 | 227 | module Grammar = struct 228 | type 'a t = 'a grammar 229 | 230 | let make_scope () = { 231 | null = Table.empty; 232 | left = Table.empty 233 | } 234 | 235 | let empty = { 236 | term = (fun g -> current >>= fun t -> error (Invalid_prefix t)); 237 | data = []; 238 | } 239 | 240 | let add rule grammar = 241 | let scope, data = 242 | match grammar.data with 243 | | [] -> make_scope (), [] 244 | | scope :: grammar' -> scope, grammar' in 245 | match rule with 246 | | Term term -> { grammar with term } 247 | | Null (t, rule) -> 248 | let scope = { scope with null = Table.add t rule scope.null } in 249 | { grammar with data = scope :: data } 250 | | Left (t, rule) -> 251 | let scope = { scope with left = Table.add t rule scope.left } in 252 | { grammar with data = scope :: data } 253 | 254 | let dump pp_token grammar = 255 | let dump_scope scope = 256 | Fmt.pr "grammar.null:\n"; 257 | Table.iter (fun t _ -> Fmt.pr "- %a\n" pp_token t) scope.null; 258 | Fmt.pr "grammar.left:\n"; 259 | Table.iter (fun t _ -> Fmt.pr "- %a\n" pp_token t) scope.left in 260 | let rec loop (data : 'a scope list) = 261 | match data with 262 | | [] -> () 263 | | scope :: data' -> 264 | dump_scope scope; 265 | Fmt.pr "***@."; 266 | loop data' in 267 | loop grammar.data 268 | 269 | let get_left token grammar = 270 | let rec find data = 271 | match data with 272 | | [] -> None 273 | | scope :: data' -> 274 | begin match Table.get token scope.left with 275 | | Some rule -> Some rule 276 | | None -> find data' 277 | end in 278 | find grammar.data 279 | 280 | let get_null token grammar = 281 | let rec find data = 282 | match data with 283 | | [] -> None 284 | | scope :: data' -> 285 | begin match Table.get token scope.null with 286 | | Some rule -> Some rule 287 | | None -> find data' 288 | end in 289 | find grammar.data 290 | 291 | let has_null token grammar = 292 | is_some (get_null token grammar) 293 | 294 | let has_left token grammar = 295 | is_some (get_left token grammar) 296 | 297 | let new_scope grammar = 298 | { grammar with data = make_scope () :: grammar.data } 299 | 300 | let pop_scope grammar = 301 | let data = 302 | match grammar.data with 303 | | [] -> [] 304 | | _ :: data' -> data' in 305 | { grammar with data } 306 | 307 | let get_term grammar = 308 | grammar.term 309 | end 310 | 311 | let nud rbp grammar = 312 | current >>= fun token -> 313 | match Grammar.get_null token grammar with 314 | | Some parse -> parse grammar 315 | | None -> 316 | (* Infix tokens can only be a valid prefix if they are directly defined 317 | as such. If the token has a led definition it is not consumed, 318 | otherwise the term parser is called. *) 319 | if Grammar.has_left token grammar then 320 | error (invalid_prefix token) 321 | else 322 | let parse = Grammar.get_term grammar in 323 | parse grammar 324 | 325 | let rec led rbp grammar x = 326 | get >>= fun stream -> 327 | match Stream.head stream with 328 | | Some token -> 329 | begin match Grammar.get_left token grammar with 330 | | Some (parse, lbp) -> 331 | if lbp > rbp then 332 | parse grammar x >>= led rbp grammar 333 | else 334 | return x 335 | | None -> 336 | return x 337 | end 338 | | None -> 339 | return x 340 | 341 | let parse ?precedence:(rbp = 0) grammar = 342 | nud rbp grammar >>= led rbp grammar 343 | 344 | let parse_many grammar = 345 | many begin 346 | current >>= fun token -> 347 | guard (not (Grammar.has_left token grammar)) >>= fun () -> 348 | nud 0 grammar 349 | end 350 | 351 | let parse_some grammar = 352 | some begin 353 | current >>= fun token -> 354 | guard (not (Grammar.has_left token grammar)) >>= fun () -> 355 | nud 0 grammar 356 | end 357 | 358 | 359 | let grammar rules = 360 | List.fold_left (flip Grammar.add) Grammar.empty rules 361 | 362 | let run p stream = 363 | match p stream with 364 | | Ok (x, stream') -> Ok (x, stream') 365 | | Error e -> Error e 366 | 367 | let rule token parse = 368 | Null (token, parse) 369 | 370 | let term parse = 371 | Term parse 372 | 373 | let infix precedence token f = 374 | let parse grammar x = 375 | advance >>= fun () -> 376 | parse ~precedence grammar >>= fun y -> 377 | return (f x y) in 378 | Left (token, (parse, precedence)) 379 | 380 | let infixr precedence token f = 381 | let parse grammar x = 382 | advance >>= fun () -> 383 | parse ~precedence:(precedence - 1) grammar >>= fun y -> 384 | return (f x y) in 385 | Left (token, (parse, precedence)) 386 | 387 | let prefix token f = 388 | let parse grammar = 389 | advance >>= fun () -> 390 | parse grammar >>= fun x -> 391 | return (f x) in 392 | Null (token, parse) 393 | 394 | let postfix precedence token f = 395 | let parse grammar x = 396 | advance >>= fun () -> 397 | return (f x) in 398 | Left (token, (parse, precedence)) 399 | 400 | let between token1 token2 f = 401 | let parse grammar = 402 | advance >>= fun () -> 403 | parse grammar >>= fun x -> 404 | consume token2 >>= fun () -> 405 | return (f x) in 406 | Null (token1, parse) 407 | 408 | let delimiter token = 409 | let parse g x = error (Invalid_infix token) in 410 | Left (token, (parse, 0)) 411 | 412 | let null token parse = 413 | Null (token, parse) 414 | 415 | let left precedence token parse = 416 | Left (token, (parse, precedence)) 417 | 418 | let binary f = fun g a -> 419 | advance >>= fun () -> 420 | parse g >>= fun b -> 421 | return (f a b) 422 | 423 | let unary f = fun g -> 424 | advance >>= fun () -> 425 | parse g >>= fun a -> 426 | return (f a) 427 | end 428 | --------------------------------------------------------------------------------