├── README.md └── recordparser /README.md: -------------------------------------------------------------------------------- 1 | JANE 2 | ==== 3 | JS's Acknowledgement of Native Erlang types 4 | ------------------------------------------- 5 | 6 | Rationale 7 | --------- 8 | Nowadays, Erlang is getting a huge attention. This attention reveals a growing need for an interaction with JavaScript and other languages. Despite of Erlang's concise syntax and neat type annotations (which are used by every conscious Erlang programmer), JavaScript is the only language for the Web (for now, at least). This document represents an attempt to change the way how JS and Erlang interact by specifying how Erlang's type annotated records can be represented using JSON. The translated type information can be used to draw edit forms, check types of ingoing and outgoing messages and control the way how data is represented. 9 | 10 | Format 11 | ------ 12 | The reference implementation takes a .hrl file containing typed record definitions and translates it into a .json file containing JSON-encoded record definitions. Each record is encoded as an entry in a first-level dictionary. Then all fields are specified - again as a dictionary where key is a field name and value is a field specification. 13 | 14 | Record field specification consists of "type" field and optional "default" field. "default" field contains a default value (obviously) and "type" field is encoded as follows: 15 | 16 | *type name is set as a key, arguments list is set as a value; 17 | *each type argument can be a value or a dictionary that contains type as a key and their arguments list as a value; 18 | *these list+dictionary constructs can go deeper. 19 | 20 | By default all record fields contain a union of their actual type and atom "undefined". The reference implementation ignores these unions if flag "ignore_undefined" is set. 21 | 22 | Implementation 23 | -------------- 24 | Current implementation of JANE is an escript file that can be used inside of rebar post-hook: 25 | 26 | ```erlang 27 | {post_hooks, [{'compile', './priv/recordparser ignore_undefined include/test.hrl'}]}. 28 | ``` 29 | 30 | By default, all resulting .json files will be located in priv/records/test.json, where test is a name of .hrl file. 31 | 32 | Example 33 | ------- 34 | Here is an example .hrl file with typed records: 35 | 36 | ```erlang 37 | -record(params_ping, {host :: nonempty_string()}). 38 | -record(params_tcp, {host :: list(atom()), 39 | port = 80 :: pos_integer(), 40 | timeout :: pos_integer()}). 41 | ``` 42 | 43 | Here is how it is translated to JSON with ignore_undefined: 44 | 45 | ```javascript 46 | { 47 | "params_ping": { 48 | "host": { 49 | "type": { 50 | "nonempty_string": [] 51 | } 52 | } 53 | }, 54 | "params_tcp": { 55 | "host": { 56 | "type": { 57 | "list": [ 58 | { 59 | "atom": [] 60 | } 61 | ] 62 | } 63 | }, 64 | "port": { 65 | "type": { 66 | "pos_integer": [] 67 | }, 68 | "default": 80 69 | }, 70 | "timeout": { 71 | "type": { 72 | "pos_integer": [] 73 | } 74 | } 75 | } 76 | } 77 | ``` 78 | 79 | And here is without ignore_undefined: 80 | 81 | ```javascript 82 | { 83 | "params_ping": { 84 | "host": { 85 | "type": { 86 | "union": [ 87 | { 88 | "atom": [ 89 | "undefined" 90 | ] 91 | }, 92 | { 93 | "nonempty_string": [] 94 | } 95 | ] 96 | } 97 | } 98 | }, 99 | "params_tcp": { 100 | "host": { 101 | "type": { 102 | "union": [ 103 | { 104 | "atom": [ 105 | "undefined" 106 | ] 107 | }, 108 | { 109 | "nonempty_string": [] 110 | } 111 | ] 112 | } 113 | }, 114 | "port": { 115 | "type": { 116 | "pos_integer": [] 117 | }, 118 | "default": 80 119 | }, 120 | "timeout": { 121 | "type": { 122 | "union": [ 123 | { 124 | "atom": [ 125 | "undefined" 126 | ] 127 | }, 128 | { 129 | "pos_integer": [] 130 | } 131 | ] 132 | } 133 | } 134 | } 135 | } 136 | ``` 137 | 138 | License 139 | ------- 140 | Copyright 2011 Selectel. All rights reserved. 141 | 142 | Redistribution and use in source and binary forms, with or without modification, are 143 | permitted provided that the following conditions are met: 144 | 145 | 1. Redistributions of source code must retain the above copyright notice, this list of 146 | conditions and the following disclaimer. 147 | 148 | 2. Redistributions in binary form must reproduce the above copyright notice, this list 149 | of conditions and the following disclaimer in the documentation and/or other materials 150 | provided with the distribution. 151 | 152 | THIS SOFTWARE IS PROVIDED BY SELECTEL ``AS IS'' AND ANY EXPRESS OR IMPLIED 153 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 154 | FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL OR 155 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 156 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 157 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 158 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 159 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 160 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 161 | 162 | The views and conclusions contained in the software and documentation are those of the 163 | authors and should not be interpreted as representing official policies, either expressed 164 | or implied, of Selectel. 165 | -------------------------------------------------------------------------------- /recordparser: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env escript 2 | %%! -noshell -noinput 3 | %% -*- mode: erlang -*- 4 | %% jsonerl is included from https://github.com/essiene/jsonerl 5 | -mode(compile). 6 | -define(ME, filename:basename(escript:script_name())). 7 | -define(PRINT(STR, PARAMS), io:format("~s: " ++ STR ++ "~n", [?ME | PARAMS])). 8 | -define(PRINT(STR), ?PRINT(STR, [])). 9 | 10 | %% this one goes from jsonerl 11 | -export([encoder/1, encode/1]). 12 | -export([decoder/1, decode/1]). 13 | -export([test/0]). 14 | -export([to_ex_a/1]). 15 | 16 | -record(parse_param, {ignore_undefined = false :: boolean()}). 17 | 18 | main(["ignore_undefined" | Files]) -> 19 | main(Files, #parse_param{ignore_undefined = true}); 20 | main(Files) -> 21 | main(Files, #parse_param{}). 22 | 23 | main(Files, #parse_param{}=PP) -> 24 | lists:foreach(fun check_extension/1, Files), 25 | lists:foreach(fun(X) -> handle_file(X, PP) end, Files). 26 | 27 | check_extension(File) -> 28 | case lists:suffix(".hrl", File) of 29 | true -> 30 | ok; 31 | false -> 32 | ?PRINT("Arguments should be .hrl files"), 33 | halt(1) 34 | end. 35 | 36 | handle_file(File, #parse_param{}=PP) -> 37 | ?PRINT("Processing ~s", [File]), 38 | FileNameHrl = filename:basename(File), 39 | FileName = lists:takewhile(fun(X) -> X /= $. end, FileNameHrl), 40 | {ok, FileBin} = file:read_file(File), 41 | FileStr = binary_to_list(FileBin) ++ eof, 42 | Forms = get_forms(FileStr), 43 | ASTs = [AST || {ok, AST} <- [erl_parse:parse_form(Form) || Form <- Forms]], 44 | %?PRINT("AST: ~p", [ASTs]), 45 | PreJSONs = lists:map(fun(X) -> handle_record(X, PP) end, ASTs), 46 | %?PRINT("PreJSON: ~p", [PreJSONs]), 47 | JSON = encode(list_to_tuple(PreJSONs)), 48 | %?PRINT("JSON: ~s", [JSON]), 49 | Path = filename:absname("priv/records/" ++ FileName ++ ".json"), 50 | ?PRINT("Saved to ~s", [Path]), 51 | ok = file:write_file(Path, JSON). 52 | 53 | get_forms(Str) -> 54 | lists:reverse(get_forms(Str, [], [])). 55 | 56 | get_forms(Str, Forms, Cont) -> 57 | case erl_scan:tokens(Cont, Str, 0) of 58 | {done, Result, LeftOverChars} -> 59 | case Result of 60 | {ok, Tokens, _} -> 61 | get_forms(LeftOverChars, [Tokens|Forms], []); 62 | {eof, _} -> 63 | Forms; 64 | {error, ErrInfo, ErrLocation} -> 65 | ?PRINT("Error while scanning at ~p: ~p", 66 | [ErrLocation, ErrInfo]), 67 | halt(1) 68 | end; 69 | {more, NewCont} -> 70 | get_forms(Str, Forms, NewCont) 71 | end. 72 | 73 | handle_record({attribute, _, record, {RawRecName, RawFields}}, 74 | #parse_param{}=PP) -> 75 | RecName = atom_to_binary(RawRecName), 76 | Fields = lists:map(fun(X) -> handle_field(X, PP) end, RawFields), 77 | {RecName, list_to_tuple(Fields)}. 78 | 79 | handle_field({typed_record_field, 80 | {record_field, _, {atom, _, RawName}}, 81 | RawType}, 82 | #parse_param{}=PP) -> 83 | Type = handle_type(RawType, PP), 84 | Name = atom_to_binary(RawName), 85 | {Name, {{<<"type">>, Type}}}; 86 | handle_field({typed_record_field, 87 | {record_field, _, 88 | {atom, _, RawName}, 89 | {_, _, RawDefault}}, 90 | RawType}, 91 | #parse_param{}=PP) -> 92 | Type = handle_type(RawType, PP), 93 | Name = atom_to_binary(RawName), 94 | {Name, {{<<"type">>, Type}, 95 | {<<"default">>, RawDefault}}}. 96 | 97 | handle_type({type, _, union, RawArgs}, 98 | #parse_param{ignore_undefined=true}=PP) -> 99 | case RawArgs of 100 | [{atom, _, undefined}, Defined] -> 101 | handle_type(Defined, PP); 102 | _ -> 103 | Args = [handle_type(RawArg, PP) || RawArg <- RawArgs], 104 | {{<<"union">>, Args}} 105 | end; 106 | handle_type({atom, _, RawAtom}, 107 | #parse_param{}) -> 108 | Atom = atom_to_binary(RawAtom), 109 | {{<<"atom">>, [Atom]}}; 110 | handle_type({type, _, RawName, RawArgs}, 111 | #parse_param{}=PP) -> 112 | Args = case RawArgs of 113 | any -> <<"any">>; 114 | T -> [handle_type(RawArg, PP) || RawArg <- T] 115 | end, 116 | Name = atom_to_binary(RawName), 117 | {{Name, Args}}. 118 | 119 | atom_to_binary(Atom) -> 120 | list_to_binary(atom_to_list(Atom)). 121 | 122 | %%% ---------------------jsonerl goes here----------------------------- 123 | 124 | % This is a macro to placate syntax highlighters.. 125 | -define(Q, $\"). 126 | -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, 127 | column=N+S#decoder.column}). 128 | -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, 129 | column=1+S#decoder.column}). 130 | -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, 131 | column=1, 132 | line=1+S#decoder.line}). 133 | -define(INC_CHAR(S, C), 134 | case C of 135 | $\n -> 136 | S#decoder{column=1, 137 | line=1+S#decoder.line, 138 | offset=1+S#decoder.offset}; 139 | _ -> 140 | S#decoder{column=1+S#decoder.column, 141 | offset=1+S#decoder.offset} 142 | end). 143 | -define(IS_WHITESPACE(C), 144 | (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). 145 | 146 | %% @type iolist() = [char() | binary() | iolist()] 147 | %% @type iodata() = iolist() | binary() 148 | %% @type json_string() = atom | binary() 149 | %% @type json_number() = integer() | float() 150 | %% @type json_array() = [json_term()] 151 | %% @type json_object() = {{json_string(), json_term()}, ...} 152 | %% @type json_term() = json_string() | json_number() | json_array() | 153 | %% json_object() 154 | 155 | -record(encoder, {handler=null}). 156 | 157 | -record(decoder, {object_hook=null, 158 | offset=0, 159 | line=1, 160 | column=1, 161 | state=null}). 162 | 163 | %% @spec encoder([encoder_option()]) -> function() 164 | %% @doc Create an encoder/1 with the given options. 165 | encoder(Options) -> 166 | State = parse_encoder_options(Options, #encoder{}), 167 | fun (O) -> json_encode(O, State) end. 168 | 169 | %% @spec encode(json_term()) -> iolist() 170 | %% @doc Encode the given as JSON to an iolist. 171 | encode(Any) -> 172 | json_encode(Any, #encoder{}). 173 | 174 | %% @spec decoder([decoder_option()]) -> function() 175 | %% @doc Create a decoder/1 with the given options. 176 | decoder(Options) -> 177 | State = parse_decoder_options(Options, #decoder{}), 178 | fun (O) -> json_decode(O, State) end. 179 | 180 | %% @spec decode(iolist()) -> json_term() 181 | %% @doc Decode the given iolist to Erlang terms. 182 | decode(S) -> 183 | json_decode(S, #decoder{}). 184 | 185 | test() -> 186 | test_all(). 187 | 188 | %% Internal API 189 | 190 | parse_encoder_options([], State) -> 191 | State; 192 | parse_encoder_options([{handler, Handler} | Rest], State) -> 193 | parse_encoder_options(Rest, State#encoder{handler=Handler}). 194 | 195 | parse_decoder_options([], State) -> 196 | State; 197 | parse_decoder_options([{object_hook, Hook} | Rest], State) -> 198 | parse_decoder_options(Rest, State#decoder{object_hook=Hook}). 199 | 200 | json_encode(true, _State) -> 201 | <<"true">>; 202 | json_encode(false, _State) -> 203 | <<"false">>; 204 | json_encode(null, _State) -> 205 | <<"null">>; 206 | json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 -> 207 | %% Anything outside of 32-bit integers should be encoded as a float 208 | integer_to_list(I); 209 | json_encode(I, _State) when is_integer(I) -> 210 | mochinum:digits(float(I)); 211 | json_encode(F, _State) when is_float(F) -> 212 | mochinum:digits(F); 213 | json_encode(S, State) when is_binary(S); is_atom(S) -> 214 | json_encode_string(S, State); 215 | json_encode(Array, State) when is_list(Array) -> 216 | json_encode_array(Array, State); 217 | json_encode(Tuples, State) when is_tuple(Tuples) -> 218 | json_encode_proplist(Tuples, State); 219 | json_encode(Bad, #encoder{handler=null}) -> 220 | exit({json_encode, {bad_term, Bad}}); 221 | json_encode(Bad, State=#encoder{handler=Handler}) -> 222 | json_encode(Handler(Bad), State). 223 | 224 | json_encode_array([], _State) -> 225 | <<"[]">>; 226 | json_encode_array(L, State) -> 227 | F = fun (O, Acc) -> 228 | [$,, json_encode(O, State) | Acc] 229 | end, 230 | [$, | Acc1] = lists:foldl(F, "[", L), 231 | lists:reverse([$\] | Acc1]). 232 | 233 | json_encode_proplist({}, _State) -> 234 | <<"{}">>; 235 | json_encode_proplist(Tuples, State) when is_tuple(Tuples) -> 236 | F = fun ({K, V}, Acc) -> 237 | KS = json_encode_string(K, State), 238 | VS = json_encode(V, State), 239 | [$,, VS, $:, KS | Acc] 240 | end, 241 | [$, | Acc1] = lists:foldl(F, "{", tuple_to_list(Tuples)), 242 | lists:reverse([$\} | Acc1]). 243 | 244 | json_encode_string(A, _State) when is_atom(A) -> 245 | L = atom_to_list(A), 246 | case json_string_is_safe(L) of 247 | true -> 248 | [?Q, L, ?Q]; 249 | false -> 250 | json_encode_string_unicode(xmerl_ucs:from_utf8(L), [?Q]) 251 | end; 252 | json_encode_string(B, _State) when is_binary(B) -> 253 | case json_bin_is_safe(B) of 254 | true -> 255 | [?Q, B, ?Q]; 256 | false -> 257 | json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q]) 258 | end; 259 | json_encode_string(I, _State) when is_integer(I) -> 260 | [?Q, integer_to_list(I), ?Q]; 261 | json_encode_string(L, _State) when is_list(L) -> 262 | case json_string_is_safe(L) of 263 | true -> 264 | [?Q, L, ?Q]; 265 | false -> 266 | json_encode_string_unicode(L, [?Q]) 267 | end. 268 | 269 | json_string_is_safe([]) -> 270 | true; 271 | json_string_is_safe([C | Rest]) -> 272 | case C of 273 | ?Q -> 274 | false; 275 | $\\ -> 276 | false; 277 | $\b -> 278 | false; 279 | $\f -> 280 | false; 281 | $\n -> 282 | false; 283 | $\r -> 284 | false; 285 | $\t -> 286 | false; 287 | C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> 288 | false; 289 | C when C < 16#7f -> 290 | json_string_is_safe(Rest); 291 | _ -> 292 | false 293 | end. 294 | 295 | json_bin_is_safe(<<>>) -> 296 | true; 297 | json_bin_is_safe(<>) -> 298 | case C of 299 | ?Q -> 300 | false; 301 | $\\ -> 302 | false; 303 | $\b -> 304 | false; 305 | $\f -> 306 | false; 307 | $\n -> 308 | false; 309 | $\r -> 310 | false; 311 | $\t -> 312 | false; 313 | C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> 314 | false; 315 | C when C < 16#7f -> 316 | json_bin_is_safe(Rest); 317 | _ -> 318 | false 319 | end. 320 | 321 | json_encode_string_unicode([], Acc) -> 322 | lists:reverse([$\" | Acc]); 323 | json_encode_string_unicode([C | Cs], Acc) -> 324 | Acc1 = case C of 325 | ?Q -> 326 | [?Q, $\\ | Acc]; 327 | %% Escaping solidus is only useful when trying to protect 328 | %% against "" injection attacks which are only 329 | %% possible when JSON is inserted into a HTML document 330 | %% in-line. json does not protect you from this, so 331 | %% if you do insert directly into HTML then you need to 332 | %% uncomment the following case or escape the output of encode. 333 | %% 334 | %% $/ -> 335 | %% [$/, $\\ | Acc]; 336 | %% 337 | $\\ -> 338 | [$\\, $\\ | Acc]; 339 | $\b -> 340 | [$b, $\\ | Acc]; 341 | $\f -> 342 | [$f, $\\ | Acc]; 343 | $\n -> 344 | [$n, $\\ | Acc]; 345 | $\r -> 346 | [$r, $\\ | Acc]; 347 | $\t -> 348 | [$t, $\\ | Acc]; 349 | C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> 350 | [unihex(C) | Acc]; 351 | C when C < 16#7f -> 352 | [C | Acc]; 353 | _ -> 354 | exit({json_encode, {bad_char, C}}) 355 | end, 356 | json_encode_string_unicode(Cs, Acc1). 357 | 358 | hexdigit(C) when C >= 0, C =< 9 -> 359 | C + $0; 360 | hexdigit(C) when C =< 15 -> 361 | C + $a - 10. 362 | 363 | unihex(C) when C < 16#10000 -> 364 | <> = <>, 365 | Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], 366 | [$\\, $u | Digits]; 367 | unihex(C) when C =< 16#10FFFF -> 368 | N = C - 16#10000, 369 | S1 = 16#d800 bor ((N bsr 10) band 16#3ff), 370 | S2 = 16#dc00 bor (N band 16#3ff), 371 | [unihex(S1), unihex(S2)]. 372 | 373 | json_decode(L, S) when is_list(L) -> 374 | json_decode(iolist_to_binary(L), S); 375 | json_decode(B, S) -> 376 | {Res, S1} = decode1(B, S), 377 | {eof, _} = tokenize(B, S1#decoder{state=trim}), 378 | Res. 379 | 380 | decode1(B, S=#decoder{state=null}) -> 381 | case tokenize(B, S#decoder{state=any}) of 382 | {{const, C}, S1} -> 383 | {C, S1}; 384 | {start_array, S1} -> 385 | decode_array(B, S1); 386 | {start_object, S1} -> 387 | decode_object(B, S1) 388 | end. 389 | 390 | make_object(V, #decoder{object_hook=null}) -> 391 | V; 392 | make_object(V, #decoder{object_hook=Hook}) -> 393 | Hook(V). 394 | 395 | decode_object(B, S) -> 396 | decode_object(B, S#decoder{state=key}, []). 397 | 398 | decode_object(B, S=#decoder{state=key}, Acc) -> 399 | case tokenize(B, S) of 400 | {end_object, S1} -> 401 | V = make_object(list_to_tuple(lists:reverse(Acc)), S1), 402 | {V, S1#decoder{state=null}}; 403 | {{const, K}, S1} -> 404 | {colon, S2} = tokenize(B, S1), 405 | {V, S3} = decode1(B, S2#decoder{state=null}), 406 | decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) 407 | end; 408 | decode_object(B, S=#decoder{state=comma}, Acc) -> 409 | case tokenize(B, S) of 410 | {end_object, S1} -> 411 | V = make_object(list_to_tuple(lists:reverse(Acc)), S1), 412 | {V, S1#decoder{state=null}}; 413 | {comma, S1} -> 414 | decode_object(B, S1#decoder{state=key}, Acc) 415 | end. 416 | 417 | decode_array(B, S) -> 418 | decode_array(B, S#decoder{state=any}, []). 419 | 420 | decode_array(B, S=#decoder{state=any}, Acc) -> 421 | case tokenize(B, S) of 422 | {end_array, S1} -> 423 | {lists:reverse(Acc), S1#decoder{state=null}}; 424 | {start_array, S1} -> 425 | {Array, S2} = decode_array(B, S1), 426 | decode_array(B, S2#decoder{state=comma}, [Array | Acc]); 427 | {start_object, S1} -> 428 | {Array, S2} = decode_object(B, S1), 429 | decode_array(B, S2#decoder{state=comma}, [Array | Acc]); 430 | {{const, Const}, S1} -> 431 | decode_array(B, S1#decoder{state=comma}, [Const | Acc]) 432 | end; 433 | decode_array(B, S=#decoder{state=comma}, Acc) -> 434 | case tokenize(B, S) of 435 | {end_array, S1} -> 436 | {lists:reverse(Acc), S1#decoder{state=null}}; 437 | {comma, S1} -> 438 | decode_array(B, S1#decoder{state=any}, Acc) 439 | end. 440 | 441 | tokenize_string(B, S=#decoder{offset=O}) -> 442 | case tokenize_string_fast(B, O) of 443 | {escape, O1} -> 444 | Length = O1 - O, 445 | S1 = ?ADV_COL(S, Length), 446 | <<_:O/binary, Head:Length/binary, _/binary>> = B, 447 | tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); 448 | O1 -> 449 | Length = O1 - O, 450 | <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, 451 | {{const, String}, ?ADV_COL(S, Length + 1)} 452 | % USING ATOMS TO REPRESENT STRINGS 453 | % S = try 454 | % list_to_existing_atom(binary_to_list(String)) 455 | % catch 456 | % error:badarg -> String 457 | % end 458 | % {{const, S}, ?ADV_COL(S, Length + 1)} 459 | end. 460 | 461 | tokenize_string_fast(B, O) -> 462 | case B of 463 | <<_:O/binary, ?Q, _/binary>> -> 464 | O; 465 | <<_:O/binary, C, _/binary>> when C =/= $\\ -> 466 | tokenize_string_fast(B, 1 + O); 467 | _ -> 468 | {escape, O} 469 | end. 470 | 471 | tokenize_string(B, S=#decoder{offset=O}, Acc) -> 472 | case B of 473 | <<_:O/binary, ?Q, _/binary>> -> 474 | {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; 475 | <<_:O/binary, "\\\"", _/binary>> -> 476 | tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); 477 | <<_:O/binary, "\\\\", _/binary>> -> 478 | tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); 479 | <<_:O/binary, "\\/", _/binary>> -> 480 | tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); 481 | <<_:O/binary, "\\b", _/binary>> -> 482 | tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); 483 | <<_:O/binary, "\\f", _/binary>> -> 484 | tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); 485 | <<_:O/binary, "\\n", _/binary>> -> 486 | tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); 487 | <<_:O/binary, "\\r", _/binary>> -> 488 | tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); 489 | <<_:O/binary, "\\t", _/binary>> -> 490 | tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); 491 | <<_:O/binary, "\\u", C3, C2, C1, C0, _/binary>> -> 492 | %% coalesce UTF-16 surrogate pair? 493 | C = erlang:list_to_integer([C3, C2, C1, C0], 16), 494 | Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), 495 | tokenize_string(B, ?ADV_COL(S, 6), Acc1); 496 | <<_:O/binary, C, _/binary>> -> 497 | tokenize_string(B, ?INC_CHAR(S, C), [C | Acc]) 498 | end. 499 | 500 | tokenize_number(B, S) -> 501 | case tokenize_number(B, sign, S, []) of 502 | {{int, Int}, S1} -> 503 | {{const, list_to_integer(Int)}, S1}; 504 | {{float, Float}, S1} -> 505 | {{const, list_to_float(Float)}, S1} 506 | end. 507 | 508 | tokenize_number(B, sign, S=#decoder{offset=O}, []) -> 509 | case B of 510 | <<_:O/binary, $-, _/binary>> -> 511 | tokenize_number(B, int, ?INC_COL(S), [$-]); 512 | _ -> 513 | tokenize_number(B, int, S, []) 514 | end; 515 | tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> 516 | case B of 517 | <<_:O/binary, $0, _/binary>> -> 518 | tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); 519 | <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> 520 | tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) 521 | end; 522 | tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> 523 | case B of 524 | <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 525 | tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); 526 | _ -> 527 | tokenize_number(B, frac, S, Acc) 528 | end; 529 | tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> 530 | case B of 531 | <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> 532 | tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); 533 | <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> 534 | tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); 535 | _ -> 536 | {{int, lists:reverse(Acc)}, S} 537 | end; 538 | tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> 539 | case B of 540 | <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 541 | tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); 542 | <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> 543 | tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); 544 | _ -> 545 | {{float, lists:reverse(Acc)}, S} 546 | end; 547 | tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> 548 | case B of 549 | <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> 550 | tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); 551 | _ -> 552 | tokenize_number(B, eint, S, Acc) 553 | end; 554 | tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> 555 | case B of 556 | <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 557 | tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) 558 | end; 559 | tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> 560 | case B of 561 | <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 562 | tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); 563 | _ -> 564 | {{float, lists:reverse(Acc)}, S} 565 | end. 566 | 567 | tokenize(B, S=#decoder{offset=O}) -> 568 | case B of 569 | <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> 570 | tokenize(B, ?INC_CHAR(S, C)); 571 | <<_:O/binary, "{", _/binary>> -> 572 | {start_object, ?INC_COL(S)}; 573 | <<_:O/binary, "}", _/binary>> -> 574 | {end_object, ?INC_COL(S)}; 575 | <<_:O/binary, "[", _/binary>> -> 576 | {start_array, ?INC_COL(S)}; 577 | <<_:O/binary, "]", _/binary>> -> 578 | {end_array, ?INC_COL(S)}; 579 | <<_:O/binary, ",", _/binary>> -> 580 | {comma, ?INC_COL(S)}; 581 | <<_:O/binary, ":", _/binary>> -> 582 | {colon, ?INC_COL(S)}; 583 | <<_:O/binary, "null", _/binary>> -> 584 | {{const, null}, ?ADV_COL(S, 4)}; 585 | <<_:O/binary, "true", _/binary>> -> 586 | {{const, true}, ?ADV_COL(S, 4)}; 587 | <<_:O/binary, "false", _/binary>> -> 588 | {{const, false}, ?ADV_COL(S, 5)}; 589 | <<_:O/binary, "\"", _/binary>> -> 590 | tokenize_string(B, ?INC_COL(S)); 591 | <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) 592 | orelse C =:= $- -> 593 | tokenize_number(B, S); 594 | <<_:O/binary>> -> 595 | trim = S#decoder.state, 596 | {eof, S} 597 | end. 598 | 599 | %% testing constructs borrowed from the Yaws JSON implementation. 600 | 601 | %% Create an object from a list of Key/Value pairs. 602 | 603 | obj_new() -> 604 | {}. 605 | 606 | is_obj(Tuples) when is_tuple(Tuples) -> is_obj(tuple_to_list(Tuples)); 607 | is_obj(Tuples) when is_list(Tuples) -> 608 | F = fun ({K, _}) when is_binary(K) -> 609 | true; 610 | ({K, _}) when is_atom(K) -> 611 | true; 612 | (_) -> 613 | false 614 | end, 615 | lists:all(F, Tuples). 616 | 617 | obj_from_list(Tuples) when is_list(Tuples) -> 618 | case is_obj(Tuples) of 619 | true -> list_to_tuple(Tuples); 620 | false -> exit({json_bad_object, Tuples}) 621 | end. 622 | 623 | %% Test for equivalence of Erlang terms. 624 | %% Due to arbitrary order of construction, equivalent objects might 625 | %% compare unequal as erlang terms, so we need to carefully recurse 626 | %% through aggregates (tuples and objects). 627 | 628 | equiv(X, X) -> true; 629 | equiv(T1, T2) when is_tuple(T1), is_tuple(T2) -> 630 | equiv_object(T1, T2); 631 | equiv(L1, L2) when is_list(L1), is_list(L2) -> 632 | equiv_list(L1, L2); 633 | equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; 634 | equiv(A1, B2) when is_atom(A1) -> equiv(list_to_binary(atom_to_list(A1)), B2); 635 | equiv(B1, A2) when is_atom(A2) -> equiv(B1, list_to_binary(atom_to_list(A2))); 636 | equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; 637 | equiv(true, true) -> true; 638 | equiv(false, false) -> true; 639 | equiv(null, null) -> true. 640 | 641 | %% Object representation and traversal order is unknown. 642 | %% Use the sledgehammer and sort property lists. 643 | 644 | equiv_object(T1, T2) -> 645 | 646 | SortFun = fun({X1,_}, {X2,_}) -> 647 | G = fun(K1, K2, _F) when is_binary(K1), is_binary(K2) -> 648 | K1 < K2; 649 | (K1, K2, F) when is_atom(K1) -> 650 | F(list_to_binary(atom_to_list(K1)), K2, F); 651 | (K1, K2, F) when is_atom(K2) -> 652 | F(K1, list_to_binary(atom_to_list(K2)), F) 653 | end, 654 | G(X1, X2, G) 655 | end, 656 | 657 | L1 = lists:sort(SortFun, tuple_to_list(T1)), 658 | L2 = lists:sort(SortFun, tuple_to_list(T2)), 659 | Pairs = lists:zip(L1, L2), 660 | true = lists:all(fun({{K1, V1}, {K2, V2}}) -> 661 | equiv(K1, K2) and equiv(V1, V2) 662 | end, Pairs). 663 | 664 | %% Recursively compare tuple elements for equivalence. 665 | 666 | equiv_list([], []) -> 667 | true; 668 | equiv_list([V1 | L1], [V2 | L2]) -> 669 | case equiv(V1, V2) of 670 | true -> 671 | equiv_list(L1, L2); 672 | false -> 673 | false 674 | end. 675 | 676 | 677 | % ------------ TESTS --------------------- 678 | 679 | test_all() -> 680 | [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), 681 | test_one(e2j_test_vec(utf8), 1). 682 | 683 | test_one([], _N) -> 684 | %% io:format("~p tests passed~n", [N-1]), 685 | ok; 686 | test_one([{E, J} | Rest], N) -> 687 | %% io:format("[~p] ~p ~p~n", [N, E, J]), 688 | true = equiv(E, decode(J)), 689 | true = equiv(E, decode(encode(E))), 690 | test_one(Rest, 1+N). 691 | 692 | e2j_test_vec(utf8) -> 693 | [ 694 | {1, "1"}, 695 | {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes 696 | {-1, "-1"}, 697 | {-3.1416, "-3.14160"}, 698 | {12.0e10, "1.20000e+11"}, 699 | {1.234E+10, "1.23400e+10"}, 700 | {-1.234E-10, "-1.23400e-10"}, 701 | {10.0, "1.0e+01"}, 702 | {123.456, "1.23456E+2"}, 703 | {10.0, "1e1"}, 704 | {<<"foo">>, "\"foo\""}, 705 | {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, 706 | {<<"">>, "\"\""}, 707 | {<<"\n\n\n">>, "\"\\n\\n\\n\""}, 708 | {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, 709 | {obj_new(), "{}"}, 710 | {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, 711 | {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), 712 | "{\"foo\":\"bar\",\"baz\":123}"}, 713 | {[], "[]"}, 714 | {[[]], "[[]]"}, 715 | {[1, <<"foo">>], "[1,\"foo\"]"}, 716 | 717 | %% json array in a json object 718 | {obj_from_list([{<<"foo">>, [123]}]), 719 | "{\"foo\":[123]}"}, 720 | 721 | %% json object in a json object 722 | {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), 723 | "{\"foo\":{\"bar\":true}}"}, 724 | 725 | %% fold evaluation order 726 | {obj_from_list([{<<"foo">>, []}, 727 | {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, 728 | {<<"alice">>, <<"bob">>}]), 729 | "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, 730 | 731 | %% json object in a json array 732 | {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], 733 | "[-123,\"foo\",{\"bar\":[]},null]"} 734 | ]. 735 | 736 | % ------ utils 737 | 738 | to_ex_a(A) when is_atom(A) -> A; 739 | to_ex_a(B) when is_binary(B) -> 740 | to_ex_a(binary_to_list(B)); 741 | to_ex_a(S) when is_list(S) -> 742 | list_to_existing_atom(S); 743 | to_ex_a(T) -> 744 | to_ex_a(io_lib:print(T)). 745 | --------------------------------------------------------------------------------