├── rebar.config ├── .gitignore ├── src ├── cbor.app.src └── cbor.erl ├── LICENSE └── README.md /rebar.config: -------------------------------------------------------------------------------- 1 | {cover_enabled, true}. 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.beam 2 | .eunit 3 | .rebar 4 | ebin 5 | -------------------------------------------------------------------------------- /src/cbor.app.src: -------------------------------------------------------------------------------- 1 | {application, cbor, 2 | [ 3 | {description, "An Erlang cbor library"}, 4 | {vsn, "1"}, 5 | {modules, [ 6 | cbor 7 | ]}, 8 | {registered, []}, 9 | {applications, [ 10 | kernel, 11 | stdlib 12 | ]}, 13 | {env, []} 14 | ]}. 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Jihyun Yu 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of cbor-erlang nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Erlang CBOR encoder/decoder 2 | 3 | ### Requirements 4 | 5 | - Erlang 17.0 or higher: cbor-erlang uses erlang maps to represent map, which 6 | introduced from 17.0. 7 | 8 | ### Highlights 9 | 10 | - Optimized: no sub-binary is created while decoding, hot code path is built 11 | with single jump table built by pattern matching. 12 | - Tested: contains all testcases on standard documents. 13 | - Correct floating-point implemntation: can decode floating points that are 14 | not supported by language: half-presition floating points, NaN, 15 | (+-)Infinity. 16 | 17 | ### Decoder mappings 18 | 19 | #### Genral 20 | 21 | - Both byte string and UTF-8 string is mapped to binary. 22 | - All numbers(integers, bignums, floating points) are mapped to number. 23 | - Lists are mapped to plain lists, maps are mapped to maps. 24 | 25 | #### Tags 26 | 27 | - Bignums are decoded to numbers. 28 | - Date/time string (tag 0) is mapped to {timetext, Binary} 29 | - Epoch-based date/time (tag 1) is mapped to {timeepoch, Number} 30 | - Other tags are mapped to {tag, TagId, Value} 31 | 32 | 33 | ### Encoder mappings 34 | 35 | Encoder encodes erlang terms to CBOR-encoded iolist. `iolist_to_binary` could 36 | be used if you need single binary output. 37 | 38 | - Numbers (fixed-size numbers, big numbers, floating-point numbers) are supported. 39 | - All binaries are encoded to byte strings. 40 | - Atoms `true`, `false`, `null`, `undefined` are suported. 41 | - All lists/maps are encoded to break-terminating format. 42 | 43 | 44 | ### TODO 45 | 46 | - cbor-erlang does not distingiush byte string and UTF-8 string while 47 | decoding. Some invalid CBOR tagged values could be decoded without problem, 48 | for example date/time string (tag 0) with byte string data items. The bug 49 | should be fixed later. 50 | - No error reporting on encoding/decoding: no detailed error information is 51 | provided to caller, for example why encoding/decoding is failed or which 52 | byte decoder failed to decode. 53 | - Decimal fraction (tag 3) and bigfloat (tag 4) is not supported. 54 | - Add faster encoder which generates non-canonical CBOR data. 55 | -------------------------------------------------------------------------------- /src/cbor.erl: -------------------------------------------------------------------------------- 1 | -module(cbor). 2 | 3 | -export([decode/1, encode/1]). 4 | 5 | % encoder 6 | -define(MAX_8BYTE, 16#ffffffffffffffff). 7 | -define(MAX_4BYTE, 16#ffffffff). 8 | -define(MAX_2BYTE, 16#ffff). 9 | -define(MAX_1BYTE, 16#ff). 10 | 11 | encode(false) -> <<16#f4>>; 12 | encode(true) -> <<16#f5>>; 13 | encode(null) -> <<16#f6>>; 14 | encode(undefined) -> <<16#f7>>; 15 | encode(inf) -> <<16#f9, 16#7c, 0>>; % translate floating-point value atoms 16 | encode(neginf) -> <<16#f9, 16#fc, 0>>; 17 | encode(nan) -> <<16#f9, 16#fc, 1>>; 18 | encode(Num) when is_integer(Num) -> encode_num(Num); 19 | encode(Float) when is_float(Float) -> encode_float(Float); 20 | encode(Bin) when is_binary(Bin) -> encode_bin(Bin); 21 | encode(List) when is_list(List) -> [<<16#9f>>, lists:map(fun encode/1, List), <<16#ff>>]; 22 | encode(Map) when is_map(Map) -> 23 | [<<16#bf>>, lists:map(fun({K, V}) -> [encode(K), encode(V)] end, maps:to_list(Map)), <<16#ff>>]; 24 | encode({tag, N, Data}) -> encode_tag(N, Data); 25 | encode({timetext, Data}) -> encode({tag, 0, Data}); 26 | encode({timeepoch, Data}) -> encode({tag, 1, Data}); 27 | encode({simple, N}) -> encode_simple(N); 28 | encode(Term) -> throw({invalid, Term}). 29 | 30 | encode_float(Float) when is_float(Float) -> <<16#fb, Float:64/float>>. % TODO: add canonical impl 31 | 32 | % positive bignums 33 | encode_num(N) when N > ?MAX_8BYTE -> 34 | Len = bignum_bytes(N) * 8, 35 | [<<16#c2>>, encode_bin(<>)]; 36 | 37 | % positive integers 38 | encode_num(N) when N > ?MAX_4BYTE -> <<16#1b, N:64>>; 39 | encode_num(N) when N > ?MAX_2BYTE -> <<16#1a, N:32>>; 40 | encode_num(N) when N > ?MAX_1BYTE -> <<16#19, N:16>>; 41 | encode_num(N) when N > 23 -> <<16#18, N:8>>; 42 | encode_num(N) when N >= 0 -> <>; 43 | 44 | % nagative integers 45 | encode_num(N) when N > -25 -> <<(16#1f-N):8>>; 46 | encode_num(N) when N >= -?MAX_1BYTE -> <<16#38, (-1-N):8>>; 47 | encode_num(N) when N >= -?MAX_2BYTE -> <<16#39, (-1-N):16>>; 48 | encode_num(N) when N >= -?MAX_4BYTE -> <<16#3a, (-1-N):32>>; 49 | encode_num(N) when N >= -?MAX_8BYTE -> <<16#3b, (-1-N):64>>; 50 | encode_num(N) when is_integer(N) -> 51 | Inv = -1-N, 52 | Len = bignum_bytes(Inv) * 8, 53 | [<<16#c3>>, encode_bin(<>)]. 54 | 55 | bignum_bytes(N) when N > 255 -> 1 + bignum_bytes(N div 256); 56 | bignum_bytes(_) -> 1. 57 | 58 | encode_bin(Bin) when byte_size(Bin) =< 16#17 -> [<<(byte_size(Bin) + 16#40)>>, Bin]; 59 | encode_bin(Bin) when byte_size(Bin) =< ?MAX_1BYTE -> [<<16#58, (byte_size(Bin)):8>>, Bin]; 60 | encode_bin(Bin) when byte_size(Bin) =< ?MAX_2BYTE -> [<<16#59, (byte_size(Bin)):16>>, Bin]; 61 | encode_bin(Bin) when byte_size(Bin) =< ?MAX_4BYTE -> [<<16#5a, (byte_size(Bin)):32>>, Bin]; 62 | encode_bin(Bin) when byte_size(Bin) =< ?MAX_8BYTE -> [<<16#5b, (byte_size(Bin)):64>>, Bin]; 63 | encode_bin(_) -> throw(not_implemented). 64 | 65 | encode_tag(N, Data) when N =< 16#17 -> 66 | [<<(N + 16#c0)>>, encode(Data)]; 67 | encode_tag(N, Data) when N =< ?MAX_1BYTE -> [<<16#d8, (N):8>>, encode(Data)]; 68 | encode_tag(N, Data) when N =< ?MAX_2BYTE -> [<<16#d9, (N):16>>, encode(Data)]; 69 | encode_tag(N, Data) when N =< ?MAX_4BYTE -> [<<16#da, (N):32>>, encode(Data)]; 70 | encode_tag(N, Data) when N =< ?MAX_8BYTE -> [<<16#db, (N):64>>, encode(Data)]. 71 | 72 | encode_simple(N) when N =< 19 -> <<(N + 16#e0)>>; 73 | encode_simple(N) when N =< ?MAX_1BYTE -> <<16#f8, N>>. 74 | 75 | % decoder 76 | decode(List) when is_list(List) -> 77 | decode(hexstr_to_bin(List)); 78 | decode(Data) -> 79 | build(tokenize(Data, [])). 80 | 81 | -define(TK(N, EXPR), tokenize(<>,Acc) -> EXPR). 82 | -define(TK_ITEM(N, ITEM), ?TK(N, tokenize(T, [(ITEM)|Acc]))). 83 | -define(TK_SM(N), ?TK_ITEM(N, N)). 84 | -define(TK_NM(N), ?TK_ITEM(N, 16#1f-N)). 85 | -define(TK_STR(N), ?TK(N, tokenize_str((N-16#40), T, Acc))). 86 | -define(TK_UTF8(N), ?TK(N, tokenize_str((N-16#60), T, Acc))). 87 | -define(TK_ARR(N), ?TK_ITEM(N, {list, N-16#80})). 88 | -define(TK_MAP(N), ?TK_ITEM(N, {map, N-16#a0})). 89 | -define(TK_TAG(N), ?TK_ITEM(N, {tag, N - 16#c0})). 90 | -define(TK_SIMPLE(N), ?TK_ITEM(N, {simple, N - 16#e0})). 91 | -define(TK_NI(N), tokenize(<>,_) -> throw(not_implemented)). 92 | 93 | -define(TK_FIXED(BASE, NAME), 94 | ?NAME((BASE+0)); ?NAME((BASE+1)); ?NAME((BASE+2)); ?NAME((BASE+3)); 95 | ?NAME((BASE+4)); ?NAME((BASE+5)); ?NAME((BASE+6)); ?NAME((BASE+7)); 96 | ?NAME((BASE+8)); ?NAME((BASE+9)); ?NAME((BASE+10)); ?NAME((BASE+11)); 97 | ?NAME((BASE+12)); ?NAME((BASE+13)); ?NAME((BASE+14)); ?NAME((BASE+15)); 98 | ?NAME((BASE+16)); ?NAME((BASE+17)); ?NAME((BASE+18)); ?NAME((BASE+19)); 99 | ?NAME((BASE+20)); ?NAME((BASE+21)); ?NAME((BASE+22)); ?NAME((BASE+23)) 100 | ). 101 | -define(TK_LENGTHED(BASE, EXPR), 102 | tokenize(<<(BASE), Num:8, T/binary>>, Acc) -> EXPR; 103 | tokenize(<<(BASE+1), Num:16, T/binary>>, Acc) -> EXPR; 104 | tokenize(<<(BASE+2), Num:32, T/binary>>, Acc) -> EXPR; 105 | tokenize(<<(BASE+3), Num:64, T/binary>>, Acc) -> EXPR). 106 | 107 | -define(TK_LENGTHED_ITEM(BASE, ITEM), ?TK_LENGTHED(BASE, tokenize(T, [(ITEM) | Acc]))). 108 | 109 | ?TK_FIXED(0, TK_SM); % small integers 110 | ?TK_LENGTHED_ITEM(16#18, Num); % 1, 2, 4, 8 byte integers 111 | 112 | ?TK_FIXED(16#20, TK_NM); % small negative integers 113 | ?TK_LENGTHED_ITEM(16#38, -1-Num); % 1, 2, 4, 8 byte negative integers 114 | 115 | ?TK_FIXED(16#40, TK_STR); % small string 116 | ?TK_LENGTHED(16#58, tokenize_str(Num, T, Acc)); % N byte-lengthed string 117 | ?TK(16#5f, tokenize(T, [strb | Acc])); % break terminating string 118 | 119 | ?TK_FIXED(16#60, TK_UTF8); % small UTF-8 120 | ?TK_LENGTHED(16#78, tokenize_str(Num, T, Acc)); % N byte-lengthed UTF-8 121 | ?TK(16#7f, tokenize(T, [strb | Acc])); % break terminating UTF-8 122 | 123 | ?TK_FIXED(16#80, TK_ARR); % small array 124 | ?TK_LENGTHED_ITEM(16#98, {list, Num}); % N byte-lengthed array 125 | ?TK(16#9f, tokenize(T, [listb | Acc])); % break terminating array 126 | 127 | ?TK_FIXED(16#a0, TK_MAP); % small map 128 | ?TK_LENGTHED_ITEM(16#b8, {map, Num}); % N byte-lengthed map 129 | ?TK(16#bf, tokenize(T, [mapb | Acc])); % break terminating array 130 | 131 | ?TK(16#c0, tokenize(T, [timetext | Acc])); % text-based date/time 132 | ?TK(16#c1, tokenize(T, [timeepoch | Acc])); % epoch-based date/time 133 | ?TK(16#c2, tokenize(T, [posbignum | Acc])); % positive bignum 134 | ?TK(16#c3, tokenize(T, [negbignum | Acc])); % negative bignum 135 | 136 | ?TK_NI(16#c4); % decimal fraction 137 | ?TK_NI(16#c5); % bigfloat 138 | 139 | % tagged item: 6~20:unassigned, 21:base64, 22:base16, 23:encoded CBOR 140 | ?TK_TAG(16#c6); ?TK_TAG(16#c7); ?TK_TAG(16#c8); ?TK_TAG(16#c9); 141 | ?TK_TAG(16#ca); ?TK_TAG(16#cb); ?TK_TAG(16#cc); ?TK_TAG(16#cd); 142 | ?TK_TAG(16#ce); ?TK_TAG(16#cf); ?TK_TAG(16#d0); ?TK_TAG(16#d1); 143 | ?TK_TAG(16#d2); ?TK_TAG(16#d3); ?TK_TAG(16#d4); ?TK_TAG(16#d5); 144 | ?TK_TAG(16#d6); ?TK_TAG(16#d7); 145 | % N byte-tagged item, 32:URL, 33:base64url, 34:base64, 35:regex, 36:mime, 55799: selfdesc 146 | ?TK_LENGTHED_ITEM(16#d8, {tag, Num}); 147 | % simple value 148 | ?TK_SIMPLE(16#e0); ?TK_SIMPLE(16#e1); ?TK_SIMPLE(16#e2); ?TK_SIMPLE(16#e3); 149 | ?TK_SIMPLE(16#e4); ?TK_SIMPLE(16#e5); ?TK_SIMPLE(16#e6); ?TK_SIMPLE(16#e7); 150 | ?TK_SIMPLE(16#e8); ?TK_SIMPLE(16#e9); ?TK_SIMPLE(16#ea); ?TK_SIMPLE(16#eb); 151 | ?TK_SIMPLE(16#ec); ?TK_SIMPLE(16#ed); ?TK_SIMPLE(16#ee); ?TK_SIMPLE(16#ef); 152 | ?TK_SIMPLE(16#f0); ?TK_SIMPLE(16#f1); ?TK_SIMPLE(16#f2); ?TK_SIMPLE(16#f3); 153 | 154 | % atoms 155 | ?TK_ITEM(16#f4, false); 156 | ?TK_ITEM(16#f5, true); 157 | ?TK_ITEM(16#f6, null); 158 | ?TK_ITEM(16#f7, undefined); 159 | 160 | % simple value, one byte follows 161 | tokenize(<<16#f8, Value, T/binary>>, Acc) -> tokenize(T, [{simple, Value} | Acc]); 162 | 163 | % N byte floats 164 | tokenize(<<16#f9, Value:2/binary, T/binary>>, Acc) -> tokenize(T, [decode_hf(Value) | Acc]); 165 | tokenize(<<16#fa, Value:4/binary, T/binary>>, Acc) -> tokenize(T, [decode_sf(Value) | Acc]); 166 | tokenize(<<16#fb, Value:8/binary, T/binary>>, Acc) -> tokenize(T, [decode_df(Value) | Acc]); 167 | 168 | ?TK_ITEM(16#ff, break); 169 | tokenize(<<>>, Acc) -> Acc; 170 | tokenize(_Data, _Acc) -> throw(invalid). 171 | 172 | tokenize_str(Len, Data, Acc) -> 173 | <> = Data, 174 | tokenize(T, [Str | Acc]). 175 | 176 | build(Tokens) -> build(Tokens, []). 177 | 178 | % handle indefinite-length items 179 | build([break | Tail], Acc) -> 180 | {Item, Tail2} = build(Tail, []), 181 | build(Tail2, [Item | Acc]); 182 | 183 | build([listb | Tail], Acc) -> {Acc, Tail}; 184 | build([strb | Tail], Acc) -> {iolist_to_binary(Acc), Tail}; 185 | build([mapb | Tail], Acc) -> {build_map(Acc), Tail}; 186 | 187 | % handle fixed-length items 188 | build([{list, N} | Tail], Acc) -> 189 | {NList, Tail2} = reverse_n(N, Acc, []), 190 | build(Tail, [lists:reverse(NList) | Tail2]); 191 | build([{map, N} | Tail], Acc) -> 192 | {NList, Tail2} = reverse_n(N*2, Acc, []), 193 | build(Tail, [build_map(lists:reverse(NList)) | Tail2]); 194 | 195 | % bignums 196 | build([posbignum | Tail], [Bin | AccTail]) -> 197 | Len = byte_size(Bin) * 8, 198 | <> = Bin, 199 | build(Tail, [Num | AccTail]); 200 | build([negbignum | Tail], [Bin | AccTail]) -> 201 | Len = byte_size(Bin) * 8, 202 | <> = Bin, 203 | build(Tail, [-1-Num | AccTail]); 204 | 205 | % times 206 | build([timetext | Tail], [Value | AccTail]) -> build(Tail, [{timetext, Value} | AccTail]); 207 | build([timeepoch | Tail], [Value | AccTail]) -> build(Tail, [{timeepoch, Value} | AccTail]); 208 | build([{tag, N} | Tail], [Value | AccTail]) -> build(Tail, [{tag, N, Value} | AccTail]); 209 | 210 | build([Token | Tail], Acc) -> build(Tail, [Token | Acc]); 211 | 212 | build([], [Item]) -> Item; 213 | 214 | build(Tokens, Stacks) -> throw({invalid, Tokens, Stacks}). 215 | 216 | build_map(List) -> build_map(List, []). 217 | build_map([K, V | Tail], Acc) -> build_map(Tail, [{K, V} | Acc]); 218 | build_map([], Acc) -> maps:from_list(Acc). 219 | 220 | reverse_n(0, List, Acc) -> {Acc, List}; 221 | reverse_n(N, [Hd | Tl], Acc) -> reverse_n(N-1, Tl, [Hd | Acc]). 222 | 223 | % Erlang does not support half presition floating point, so emulate it 224 | decode_hf(<<0:1, 0:5, 0:10>>) -> 0.0; 225 | decode_hf(<<1:1, 0:5, 0:10>>) -> -0.0; 226 | decode_hf(<>) -> 227 | {Frac2, Count} = hf_norm(Frac, 0), 228 | <> = <>, Value; 229 | decode_hf(<<0:1, 31:5, 0:10>>) -> inf; 230 | decode_hf(<<1:1, 31:5, 0:10>>) -> neginf; 231 | decode_hf(<<_:1, 31:5, _:10>>) -> nan; 232 | decode_hf(<>) -> 233 | Exp32 = Exp - 15 + 127, 234 | <> = <>, Value. 235 | 236 | % Erlang does not support binary matching for nan/inf, so emulate it 237 | decode_sf(<>)-> Value; 238 | decode_sf(<<0:1, 255:8, 0:23>>) -> inf; 239 | decode_sf(<<1:1, 255:8, 0:23>>) -> neginf; 240 | decode_sf(<<_:1, 255:8, _:23>>) -> nan. 241 | 242 | decode_df(<>)-> Value; 243 | decode_df(<<0:1, 2047:11, 0:52>>) -> inf; 244 | decode_df(<<1:1, 2047:11, 0:52>>) -> neginf; 245 | decode_df(<<_:1, 2047:11, _:52>>) -> nan. 246 | 247 | hf_norm(Frac, Count) when Frac < 1024 -> hf_norm(Frac * 2, Count+1); 248 | hf_norm(Frac, Count) -> {Frac, Count}. 249 | 250 | %% helper functions 251 | int(C) when $0 =< C, C =< $9 -> 252 | C - $0; 253 | int(C) when $A =< C, C =< $F -> 254 | C - $A + 10; 255 | int(C) when $a =< C, C =< $f -> 256 | C - $a + 10. 257 | 258 | hexstr_to_bin(S) -> 259 | list_to_binary(hexstr_to_list(S)). 260 | 261 | hexstr_to_list([X,Y|T]) -> 262 | [int(X)*16 + int(Y) | hexstr_to_list(T)]; 263 | hexstr_to_list([]) -> 264 | []. 265 | 266 | % unit tests 267 | -include_lib("eunit/include/eunit.hrl"). 268 | 269 | rfc_value_test_() -> 270 | % testcases from RFC7049, single value 271 | lists:flatten([[ 272 | ?_assertEqual(Result, decode(hexstr_to_bin(Hex))), 273 | ?_assertEqual(Result, decode(iolist_to_binary(encode(Result)))) 274 | ] || {Hex, Result} <- [ 275 | {"00", 0}, 276 | {"01", 1}, 277 | {"0a", 10}, 278 | {"17", 23}, 279 | {"1818", 24}, 280 | {"1819", 25}, 281 | {"1864", 100}, 282 | {"1864", 100}, 283 | {"1903e8", 1000}, 284 | {"1903e8", 1000}, 285 | {"1a000f4240", 1000000}, 286 | {"1b000000e8d4a51000", 1000000000000}, 287 | {"1bffffffffffffffff", 18446744073709551615}, 288 | {"c249010000000000000000", 18446744073709551616}, 289 | {"3bffffffffffffffff", -18446744073709551616}, 290 | {"c349010000000000000000", -18446744073709551617}, 291 | {"20", -1}, 292 | {"29", -10}, 293 | {"3863", -100}, 294 | {"3903e7", -1000}, 295 | {"f90000", 0.0}, 296 | {"f98000", -0.0}, 297 | {"f93c00", 1.0}, 298 | {"fb3ff199999999999a", 1.1}, 299 | {"f93e00", 1.5}, 300 | {"f97bff", 65504.0}, 301 | {"fa47c35000", 100000.0}, 302 | {"fa7f7fffff", 3.4028234663852886e+38}, 303 | {"fb7e37e43c8800759c", 1.0e+300}, 304 | {"f90001", 5.960464477539063e-8}, 305 | {"f90400", 0.00006103515625}, 306 | {"f9c400", -4.0}, 307 | {"fbc010666666666666", -4.1}, 308 | {"f97c00", inf}, 309 | {"f97e00", nan}, 310 | {"f9fc00", neginf}, 311 | {"fa7f800000", inf}, 312 | {"fa7fc00000", nan}, 313 | {"faff800000", neginf}, 314 | {"fb7ff0000000000000", inf}, 315 | {"fb7ff8000000000000", nan}, 316 | {"fbfff0000000000000", neginf}, 317 | {"f4", false}, 318 | {"f5", true}, 319 | {"f6", null}, 320 | {"f7", undefined}, 321 | {"f0", {simple, 16}}, 322 | {"f818", {simple, 24}}, 323 | {"f8ff", {simple, 255}}, 324 | {"c074323031332d30332d32315432303a30343a30305a", {timetext, <<"2013-03-21T20:04:00Z">>}}, 325 | {"c11a514b67b0", {timeepoch, 1363896240}}, 326 | {"c1fb41d452d9ec200000", {timeepoch, 1363896240.5}}, 327 | {"d74401020304", {tag, 23, <<1, 2, 3, 4>>}}, 328 | {"d818456449455446", {tag, 24, <<"dIETF">>}}, 329 | {"d82076687474703a2f2f7777772e6578616d706c652e636f6d", {tag, 32, <<"http://www.example.com">>}}, 330 | {"40", <<"">>}, 331 | {"4401020304", <<1, 2, 3, 4>>}, 332 | {"60", <<"">>}, 333 | {"6161", <<"a">>}, 334 | {"6449455446", <<"IETF">>}, 335 | {"62225c", <<"\"\\">>}, 336 | % All unicodes are decoded to binaries, to check binary values 337 | {"62c3bc", <<16#c3, 16#bc>>}, 338 | {"63e6b0b4", <<16#e6, 16#b0, 16#b4>>}, 339 | {"64f0908591", <<16#f0, 16#90, 16#85, 16#91>>} 340 | ]]). 341 | 342 | rfc_data_test_() -> 343 | % testcases from RFC7049, datastructures 344 | lists:flatten([[ 345 | ?_assertEqual(Tokens, tokenize(hexstr_to_bin(Hex), [])), 346 | ?_assertEqual(Result, build(Tokens)), 347 | ?_assertEqual(Result, decode(iolist_to_binary(encode(Result)))) 348 | ] || {Hex, Tokens, Result} <- [ 349 | {"80", [{list, 0}], []}, 350 | {"83010203", [3, 2, 1, {list, 3}], [1, 2, 3]}, 351 | {"8301820203820405", [5, 4, {list, 2}, 3, 2, {list, 2}, 1, {list, 3}], [1, [2, 3], [4, 5]]}, 352 | {"98190102030405060708090a0b0c0d0e0f101112131415161718181819", 353 | [25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,{list,25}], 354 | [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]}, 355 | {"a0", [{map, 0}], #{}}, 356 | {"a201020304", [4, 3, 2, 1, {map, 2}], #{1=>2, 3=>4}}, 357 | {"a26161016162820203", 358 | [3, 2, {list, 2}, <<"b">>, 1, <<"a">>, {map, 2}], 359 | #{<<"a">> => 1, <<"b">> => [2, 3]}}, 360 | {"826161a161626163", 361 | [<<"c">>, <<"b">>, {map, 1}, <<"a">>, {list, 2}], 362 | [<<"a">>, #{<<"b">> => <<"c">>}]}, 363 | {"a56161614161626142616361436164614461656145", 364 | [<<"E">>, <<"e">>, <<"D">>, <<"d">>, <<"C">>, <<"c">>, 365 | <<"B">>, <<"b">>, <<"A">>, <<"a">>, {map, 5}], 366 | #{<<"a">> => <<"A">>, <<"b">> => <<"B">>, <<"c">> => <<"C">>, 367 | <<"d">> => <<"D">>, <<"e">> => <<"E">>}}, 368 | {"5f42010243030405ff", [break, <<3, 4, 5>>, <<1, 2>>, strb], <<1, 2, 3, 4, 5>>}, 369 | {"7f657374726561646d696e67ff", 370 | [break, <<"ming">>, <<"strea">>, strb], 371 | <<"streaming">>}, 372 | {"9fff", [break, listb], []}, 373 | {"9f018202039f0405ffff", 374 | [break, break, 5, 4, listb, 3, 2, {list, 2}, 1, listb], 375 | [1, [2, 3], [4, 5]]}, 376 | {"9f01820203820405ff", 377 | [break, 5, 4, {list, 2}, 3, 2, {list, 2}, 1, listb], 378 | [1, [2, 3], [4, 5]]}, 379 | {"83018202039f0405ff", 380 | [break, 5, 4, listb, 3, 2, {list, 2}, 1, {list, 3}], 381 | [1, [2, 3], [4, 5]]}, 382 | {"83019f0203ff820405", 383 | [5, 4, {list, 2}, break, 3, 2, listb, 1, {list, 3}], 384 | [1, [2, 3], [4, 5]]}, 385 | {"9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff", 386 | [break,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,listb], 387 | [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25]}, 388 | {"bf61610161629f0203ffff", 389 | [break, break, 3, 2, listb, <<"b">>, 1, <<"a">>, mapb], 390 | #{<<"a">> => 1, <<"b">> => [2, 3]}}, 391 | {"826161bf61626163ff", [break, <<"c">>, <<"b">>, mapb, <<"a">>, {list, 2}], 392 | [<<"a">>, #{<<"b">> => <<"c">>}]}, 393 | {"bf6346756ef563416d7421ff", [break, -2, <<"Amt">>, true, <<"Fun">>, mapb], 394 | #{<<"Fun">> => true, <<"Amt">> => -2}} 395 | ]]). 396 | 397 | bench_test() -> 398 | Tc = [ 399 | {"nd list", "9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff"}, 400 | {"fixed list", "98190102030405060708090a0b0c0d0e0f101112131415161718181819"}, 401 | {"fixed map", "a56161614161626142616361436164614461656145"}, 402 | {"nested", "bf61610161629f0203ffff"} 403 | ], 404 | N = 1000, 405 | lists:foreach(fun({Name, Hex}) -> 406 | Bin = hexstr_to_bin(Hex), 407 | {Usec, ok} = timer:tc(fun() -> repeat_decode_n(N, Bin) end), 408 | io:format("~s: ~p #/s ~.4f us/# ~.2f MB/s~n", 409 | [Name, N * 1000000 div Usec, Usec / N, byte_size(Bin) * N / Usec]) 410 | end, Tc). 411 | 412 | repeat_decode_n(0, _) -> ok; 413 | repeat_decode_n(N, Bin) -> decode(Bin), repeat_decode_n(N-1, Bin). 414 | --------------------------------------------------------------------------------