├── .gitignore ├── Makefile ├── ast.ml ├── dune ├── dune-project ├── example.ml ├── json_lexer.ml ├── json_parser.messages ├── json_parser.mly └── json_parser_errors.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | .merlin 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | json_parser.messages.raw: json_parser.mly 2 | menhir --canonical --list-errors json_parser.mly > json_parser.messages 3 | 4 | update_messages: json_parser.messages 5 | menhir --canonical --update-errors json_parser.messages json_parser.mly > json_parser.messages.new 6 | 7 | .PHONY: json_parser_errors.ml 8 | 9 | json_parser_errors.ml: json_parser.mly 10 | menhir --canonical json_parser.mly --compile-errors json_parser.messages > json_parser_errors.ml 11 | -------------------------------------------------------------------------------- /ast.ml: -------------------------------------------------------------------------------- 1 | 2 | let pp_pos out { Ppxlib.pos_lnum; pos_cnum; pos_bol; _} = 3 | Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol) 4 | 5 | type loc = Ppxlib.position * Ppxlib.position 6 | 7 | let pp_loc out loc = Format.fprintf out "%a-%a" pp_pos (fst loc) pp_pos (snd loc) 8 | 9 | type json = 10 | | String of string 11 | | Int of int 12 | | Bool of bool 13 | | List of ast list 14 | | Assoc of (string * ast) list 15 | [@@deriving show] 16 | 17 | and ast = { 18 | loc : loc; 19 | json : json 20 | } 21 | [@@deriving show] 22 | 23 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name example) 3 | (libraries astring containers menhirLib sedlex ppxlib ppx_deriving) 4 | (preprocess (pps sedlex.ppx ppx_deriving.std)) 5 | ) 6 | 7 | (menhir (flags --table --canonical) (modules json_parser)) 8 | 9 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.8) 2 | (using menhir 2.0) 3 | -------------------------------------------------------------------------------- /example.ml: -------------------------------------------------------------------------------- 1 | module I = Json_parser.MenhirInterpreter 2 | module S = MenhirLib.General 3 | 4 | let state checkpoint : int = 5 | match Lazy.force (I.stack checkpoint) with 6 | | S.Nil -> 7 | (* Hmm... The parser is in its initial state. Its number is 8 | usually 0. This is a BIG HACK. TEMPORARY *) 9 | 0 10 | | S.Cons (Element (s, _, _, _), _) -> 11 | I.number s 12 | 13 | let handle_syntax_error lexbuf checkpoint = 14 | let message = 15 | match Json_parser_errors.message (state checkpoint) with 16 | | exception Not_found -> 17 | "Syntax error" 18 | | msg -> 19 | msg 20 | in 21 | Format.fprintf Format.err_formatter "%s %a\n%!" message Ast.pp_pos 22 | (fst @@ Sedlexing.lexing_positions lexbuf) 23 | 24 | let rec loop next_token lexbuf (checkpoint : Ast.ast I.checkpoint) = 25 | match checkpoint with 26 | | I.InputNeeded _env -> 27 | let token = next_token () in 28 | let checkpoint = I.offer checkpoint token in 29 | loop next_token lexbuf checkpoint 30 | | I.Shifting _ | I.AboutToReduce _ -> 31 | let checkpoint = I.resume checkpoint in 32 | loop next_token lexbuf checkpoint 33 | | I.HandlingError env -> 34 | handle_syntax_error lexbuf env 35 | | I.Accepted ast -> 36 | Format.fprintf Format.std_formatter "%a\n%!" Ast.pp_ast ast 37 | | I.Rejected -> 38 | (* Cannot happen as we stop at syntax error immediatly *) 39 | assert false 40 | 41 | let process lexbuf = 42 | let lexer = Json_lexer.lexer lexbuf in 43 | try 44 | loop lexer lexbuf 45 | (Json_parser.Incremental.json (fst @@ Sedlexing.lexing_positions lexbuf)) 46 | with Json_lexer.LexError (pos, msg) -> 47 | Format.fprintf Format.err_formatter "lexing error : %s at %a%!" msg 48 | Ast.pp_pos pos 49 | 50 | let _ = 51 | let lexbuf = Sedlexing.Utf8.from_channel stdin in 52 | process lexbuf 53 | -------------------------------------------------------------------------------- /json_lexer.ml: -------------------------------------------------------------------------------- 1 | open Sedlexing 2 | open Astring 3 | 4 | type token = Json_parser.token 5 | 6 | open Json_parser 7 | 8 | exception LexError of Lexing.position * string 9 | 10 | let digit = [%sedlex.regexp? '0' .. '9'] 11 | 12 | let number = [%sedlex.regexp? Plus digit] 13 | 14 | let blank = [%sedlex.regexp? ' ' | '\t'] 15 | 16 | let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n"] 17 | 18 | let any_blank = [%sedlex.regexp? blank | newline] 19 | 20 | let letter = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z'] 21 | 22 | let decimal_ascii = [%sedlex.regexp? Plus ('0' .. '9')] 23 | 24 | let octal_ascii = [%sedlex.regexp? "0o", Plus ('0' .. '7')] 25 | 26 | let hex_ascii = [%sedlex.regexp? "0x", Plus (('0' .. '9' | 'a' .. 'f' | 'A' .. 'F'))] 27 | 28 | let rec nom buf = 29 | match%sedlex buf with 30 | | Plus any_blank -> nom buf 31 | | _ -> () 32 | 33 | let string buf = 34 | let buffer = Buffer.create 10 in 35 | let rec read_string buf = 36 | match%sedlex buf with 37 | | {|\"|} -> 38 | Buffer.add_char buffer '"'; 39 | read_string buf 40 | | '"' -> STRING (Buffer.contents buffer) 41 | | Star (Compl '"') -> 42 | Buffer.add_string buffer (Utf8.lexeme buf); 43 | read_string buf 44 | | _ -> assert false 45 | in 46 | read_string buf 47 | 48 | let digit_value c = 49 | let open Stdlib in 50 | match c with 51 | | 'a' .. 'f' -> 10 + Char.code c - Char.code 'a' 52 | | 'A' .. 'F' -> 10 + Char.code c - Char.code 'A' 53 | | '0' .. '9' -> Char.code c - Char.code '0' 54 | | _ -> assert false 55 | 56 | let num_value buffer ~base ~first = 57 | let buf = Utf8.lexeme buffer in 58 | let c = ref 0 in 59 | for i = first to String.length buf - 1 do 60 | let v = digit_value buf.[i] in 61 | assert (v < base); 62 | c := (base * !c) + v 63 | done; 64 | !c 65 | 66 | let token buf = 67 | nom buf; 68 | match%sedlex buf with 69 | | eof -> EOF 70 | | "" -> EOF 71 | | '-' -> MINUS 72 | | '+' -> PLUS 73 | | '"' -> string buf 74 | | ':' -> COLON 75 | | '[' -> LSQUARE 76 | | ']' -> RSQUARE 77 | | '{' -> LBRACKET 78 | | '}' -> RBRACKET 79 | | ',' -> COMMA 80 | | "true" -> BOOL true 81 | | "false" -> BOOL false 82 | | hex_ascii -> 83 | let number = num_value ~base:16 ~first:2 buf in 84 | INT number 85 | | octal_ascii -> 86 | let number = num_value ~base:8 ~first:2 buf in 87 | INT number 88 | | decimal_ascii -> 89 | let number = num_value ~base:10 ~first:0 buf in 90 | INT number 91 | | _ -> 92 | let position = fst @@ lexing_positions buf in 93 | let tok = Utf8.lexeme buf in 94 | raise @@ LexError (position, Printf.sprintf "unexpected character %S" tok) 95 | 96 | let lexer buf = 97 | Sedlexing.with_tokenizer token buf 98 | -------------------------------------------------------------------------------- /json_parser.messages: -------------------------------------------------------------------------------- 1 | json: LBRACKET RBRACKET STRING 2 | ## 3 | ## Ends in an error in state: 38. 4 | ## 5 | ## json -> LBRACKET loption(separated_nonempty_list(COMMA,field)) RBRACKET . EOF [ # ] 6 | ## 7 | ## The known suffix of the stack is as follows: 8 | ## LBRACKET loption(separated_nonempty_list(COMMA,field)) RBRACKET 9 | ## 10 | 11 | Unexpected value after end of object 12 | 13 | json: LBRACKET RSQUARE 14 | ## 15 | ## Ends in an error in state: 1. 16 | ## 17 | ## json -> LBRACKET . loption(separated_nonempty_list(COMMA,field)) RBRACKET EOF [ # ] 18 | ## 19 | ## The known suffix of the stack is as follows: 20 | ## LBRACKET 21 | ## 22 | 23 | Unexpected list in object key 24 | 25 | json: LBRACKET STRING COLON LBRACKET RSQUARE 26 | ## 27 | ## Ends in an error in state: 28. 28 | ## 29 | ## json_value -> LBRACKET . loption(separated_nonempty_list(COMMA,field)) RBRACKET [ RBRACKET COMMA ] 30 | ## 31 | ## The known suffix of the stack is as follows: 32 | ## LBRACKET 33 | ## 34 | 35 | Unexpected list termination in object key position 36 | 37 | json: LBRACKET STRING COLON LSQUARE LBRACKET RSQUARE 38 | ## 39 | ## Ends in an error in state: 13. 40 | ## 41 | ## json_value -> LBRACKET . loption(separated_nonempty_list(COMMA,field)) RBRACKET [ RSQUARE COMMA ] 42 | ## 43 | ## The known suffix of the stack is as follows: 44 | ## LBRACKET 45 | ## 46 | 47 | Unclosed object inside list 48 | 49 | json: LBRACKET STRING COLON LSQUARE LSQUARE RBRACKET 50 | ## 51 | ## Ends in an error in state: 12. 52 | ## 53 | ## json_value -> LSQUARE . loption(separated_nonempty_list(COMMA,json_value)) RSQUARE [ RSQUARE COMMA ] 54 | ## 55 | ## The known suffix of the stack is as follows: 56 | ## LSQUARE 57 | ## 58 | 59 | Unclosed list inside object 60 | 61 | json: LBRACKET STRING COLON LSQUARE MINUS STRING 62 | ## 63 | ## Ends in an error in state: 10. 64 | ## 65 | ## json_value -> MINUS . INT [ RSQUARE COMMA ] 66 | ## 67 | ## The known suffix of the stack is as follows: 68 | ## MINUS 69 | ## 70 | 71 | Unexpected arithmetic operation applied to non number value inside list 72 | 73 | json: LBRACKET STRING COLON LSQUARE PLUS EOF 74 | ## 75 | ## Ends in an error in state: 19. 76 | ## 77 | ## json_value -> option(PLUS) . INT [ RSQUARE COMMA ] 78 | ## 79 | ## The known suffix of the stack is as follows: 80 | ## option(PLUS) 81 | ## 82 | 83 | Unexpected arithmetic operation applied to non number value 84 | 85 | json: LBRACKET STRING COLON LSQUARE RBRACKET 86 | ## 87 | ## Ends in an error in state: 8. 88 | ## 89 | ## json_value -> LSQUARE . loption(separated_nonempty_list(COMMA,json_value)) RSQUARE [ RBRACKET COMMA ] 90 | ## 91 | ## The known suffix of the stack is as follows: 92 | ## LSQUARE 93 | ## 94 | 95 | Unclosed list in object field 96 | 97 | json: LBRACKET STRING COLON LSQUARE STRING COMMA RSQUARE 98 | ## 99 | ## Ends in an error in state: 24. 100 | ## 101 | ## separated_nonempty_list(COMMA,json_value) -> json_value COMMA . separated_nonempty_list(COMMA,json_value) [ RSQUARE ] 102 | ## 103 | ## The known suffix of the stack is as follows: 104 | ## json_value COMMA 105 | ## 106 | 107 | Trailing separator in list 108 | 109 | json: LBRACKET STRING COLON LSQUARE STRING STRING 110 | ## 111 | ## Ends in an error in state: 23. 112 | ## 113 | ## separated_nonempty_list(COMMA,json_value) -> json_value . [ RSQUARE ] 114 | ## separated_nonempty_list(COMMA,json_value) -> json_value . COMMA separated_nonempty_list(COMMA,json_value) [ RSQUARE ] 115 | ## 116 | ## The known suffix of the stack is as follows: 117 | ## json_value 118 | ## 119 | 120 | Missing list separator between values 121 | 122 | json: LBRACKET STRING COLON MINUS STRING 123 | ## 124 | ## Ends in an error in state: 6. 125 | ## 126 | ## json_value -> MINUS . INT [ RBRACKET COMMA ] 127 | ## 128 | ## The known suffix of the stack is as follows: 129 | ## MINUS 130 | ## 131 | 132 | Unexpected arithmetic operation on non number value 133 | 134 | json: LBRACKET STRING COLON PLUS EOF 135 | ## 136 | ## Ends in an error in state: 32. 137 | ## 138 | ## json_value -> option(PLUS) . INT [ RBRACKET COMMA ] 139 | ## 140 | ## The known suffix of the stack is as follows: 141 | ## option(PLUS) 142 | ## 143 | 144 | Expecting number before end of input 145 | 146 | json: LBRACKET STRING COLON RSQUARE 147 | ## 148 | ## Ends in an error in state: 3. 149 | ## 150 | ## separated_nonempty_list(COMMA,field) -> STRING COLON . json_value [ RBRACKET ] 151 | ## separated_nonempty_list(COMMA,field) -> STRING COLON . json_value COMMA separated_nonempty_list(COMMA,field) [ RBRACKET ] 152 | ## 153 | ## The known suffix of the stack is as follows: 154 | ## STRING COLON 155 | ## 156 | 157 | Invalid list 158 | 159 | json: LBRACKET STRING COLON STRING COMMA RSQUARE 160 | ## 161 | ## Ends in an error in state: 35. 162 | ## 163 | ## separated_nonempty_list(COMMA,field) -> STRING COLON json_value COMMA . separated_nonempty_list(COMMA,field) [ RBRACKET ] 164 | ## 165 | ## The known suffix of the stack is as follows: 166 | ## STRING COLON json_value COMMA 167 | ## 168 | 169 | Missing field key or value 170 | 171 | json: LBRACKET STRING COLON STRING STRING 172 | ## 173 | ## Ends in an error in state: 34. 174 | ## 175 | ## separated_nonempty_list(COMMA,field) -> STRING COLON json_value . [ RBRACKET ] 176 | ## separated_nonempty_list(COMMA,field) -> STRING COLON json_value . COMMA separated_nonempty_list(COMMA,field) [ RBRACKET ] 177 | ## 178 | ## The known suffix of the stack is as follows: 179 | ## STRING COLON json_value 180 | ## 181 | 182 | Missing field separartor 183 | 184 | json: LBRACKET STRING STRING 185 | ## 186 | ## Ends in an error in state: 2. 187 | ## 188 | ## separated_nonempty_list(COMMA,field) -> STRING . COLON json_value [ RBRACKET ] 189 | ## separated_nonempty_list(COMMA,field) -> STRING . COLON json_value COMMA separated_nonempty_list(COMMA,field) [ RBRACKET ] 190 | ## 191 | ## The known suffix of the stack is as follows: 192 | ## STRING 193 | ## 194 | 195 | Missing field value 196 | 197 | json: STRING 198 | ## 199 | ## Ends in an error in state: 0. 200 | ## 201 | ## json' -> . json [ # ] 202 | ## 203 | ## The known suffix of the stack is as follows: 204 | ## 205 | ## 206 | 207 | Json top level value must be an oject 208 | -------------------------------------------------------------------------------- /json_parser.mly: -------------------------------------------------------------------------------- 1 | %token EOF 2 | %token LBRACKET RBRACKET COLON COMMA 3 | %token MINUS PLUS LSQUARE RSQUARE 4 | %token STRING 5 | %token BOOL 6 | %token INT 7 | 8 | %{ 9 | (* exception ParseError of token * Lexing.position * Lexing.position *) 10 | 11 | let make_loc pos = pos 12 | 13 | let make pos json = 14 | let location = make_loc pos in 15 | { Ast.loc = location; json } 16 | 17 | open Ast 18 | %} 19 | 20 | %start json 21 | 22 | %% 23 | 24 | let field == 25 | | ~ = STRING; COLON; ~ = json_value; <> 26 | 27 | let assoc == 28 | | LBRACKET; fields = separated_list(COMMA, field); RBRACKET; { make $loc (Assoc fields) } 29 | 30 | let array == 31 | | LSQUARE; elems = separated_list(COMMA, json_value); RSQUARE; { make $loc (List elems) } 32 | 33 | let number == 34 | | PLUS?; ~ = INT; <> 35 | | MINUS; i = INT; { - i } 36 | 37 | let json_value := 38 | | s = STRING; { make $loc (String s) } 39 | | i = number; { make $loc (Int i) } 40 | | b = BOOL; { make $loc (Bool b) } 41 | | ~ = assoc; <> 42 | | ~ = array; <> 43 | 44 | let json := 45 | ~ = assoc; EOF; <> 46 | -------------------------------------------------------------------------------- /json_parser_errors.ml: -------------------------------------------------------------------------------- 1 | 2 | (* This file was auto-generated based on "json_parser.messages". *) 3 | 4 | (* Please note that the function [message] can raise [Not_found]. *) 5 | 6 | let message = 7 | fun s -> 8 | match s with 9 | | 0 -> 10 | "Json top level value must be an oject\n" 11 | | 2 -> 12 | "Missing field value\n" 13 | | 34 -> 14 | "Missing field separartor\n" 15 | | 35 -> 16 | "Missing field key or value\n" 17 | | 3 -> 18 | "Invalid list\n" 19 | | 32 -> 20 | "Expecting number before end of input\n" 21 | | 6 -> 22 | "Unexpected arithmetic operation on non number value\n" 23 | | 23 -> 24 | "Missing list separator between values\n" 25 | | 24 -> 26 | "Trailing separator in list\n" 27 | | 8 -> 28 | "Unclosed list in object field\n" 29 | | 19 -> 30 | "Unexpected arithmetic operation applied to non number value\n" 31 | | 10 -> 32 | "Unexpected arithmetic operation applied to non number value inside list\n" 33 | | 12 -> 34 | "Unclosed list inside object\n" 35 | | 13 -> 36 | "Unclosed object inside list\n" 37 | | 28 -> 38 | "Unexpected list termination in object key position\n" 39 | | 1 -> 40 | "Unexpected list in object key\n" 41 | | 38 -> 42 | "Unexpected value after end of object\n" 43 | | _ -> 44 | raise Not_found 45 | --------------------------------------------------------------------------------