├── .github └── workflows │ └── main.yml ├── .gitignore ├── CHANGES.md ├── LICENSE ├── README.md ├── rebar.config ├── rebar.lock └── src ├── jsx.app.src ├── jsx.erl ├── jsx_config.erl ├── jsx_config.hrl ├── jsx_consult.erl ├── jsx_decoder.erl ├── jsx_encoder.erl ├── jsx_parser.erl ├── jsx_to_json.erl ├── jsx_to_term.erl └── jsx_verify.erl /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: EUnit 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - 'main' 7 | push: 8 | branches: 9 | - 'main' 10 | 11 | jobs: 12 | build: 13 | name: Test on OTP ${{ matrix.otp_version }} and ${{ matrix.os }} 14 | runs-on: ${{ matrix.os }} 15 | 16 | strategy: 17 | matrix: 18 | otp_version: ['19.3.6', '20.3.8.26', '21.3.8.16', '22.3.4.2', '23.0.2', '24.0'] 19 | os: [ubuntu-18.04] 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - uses: gleam-lang/setup-erlang@v1.1.2 24 | with: 25 | otp-version: ${{ matrix.otp_version }} 26 | 27 | - name: compile 28 | run: rebar3 compile 29 | 30 | - name: test 31 | run: rebar3 eunit 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .rebar3 2 | .eunit 3 | deps 4 | ebin 5 | *.o 6 | *.beam 7 | *.plt 8 | erl_crash.dump 9 | .DS_Store 10 | doc 11 | .rebar 12 | *.sublime-* 13 | rebar3 14 | _build/ 15 | 16 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | v3.1.0 2 | 3 | * Dialyzer and api spec improvements 4 | 5 | v3.0 6 | 7 | * drop support for OTP versions before 17.0 8 | * remove definition options for disabling maps globally, `{return_maps, false}` is still an accepted option to `decode/2` 9 | 10 | v2.8.2 11 | 12 | * enable `debug_info` for rebar3 13 | 14 | v2.8.1 15 | 16 | * enable `debug_info` when used via mix 17 | * accept `erlang:timestamp` as input to the parser 18 | 19 | 20 | v2.8.0 21 | 22 | * add `JSX_FORCE_MAPS` env var for forcing decoding to maps rather than 23 | attempting to autodetect 24 | 25 | v2.7.2 26 | 27 | * fix an issue where tuples were assumed to be jsx ast and not checked 28 | * mask a `function_clause` error in encoder with a `badarg` error for api unity 29 | 30 | v2.7.1 31 | 32 | * support for milliseconds in datetimes 33 | 34 | v2.7.0 35 | 36 | * `return_tail` option 37 | * fixes for edoc generation 38 | 39 | v2.6.2 40 | 41 | * ensure maps are always enabled when compiling via mix 42 | 43 | v2.6.1 44 | 45 | * hex.pm maintenance release 46 | 47 | v2.6.0 48 | 49 | * equivalent to v2.5.3 but created for semver reasons 50 | 51 | v2.5.3 52 | 53 | * add a `mix.exs` to be buildable by both mix and rebar 54 | * minor README updates 55 | 56 | v2.5.2 57 | 58 | * fix regression parsing <<"-0e...">> (thanks @c-bik) 59 | 60 | v2.5.1 61 | 62 | * assume all datetimes are UTC time and add `Z` designator to indicate 63 | * fix parsing issue with datetimes in arrays 64 | 65 | v2.5.0 66 | 67 | * `consult/2` function for reading a file directly to a json term 68 | * `maps_always` build flag for always returning maps on platforms 69 | that support them 70 | * dialyzer fixes 71 | 72 | v2.4.0 73 | 74 | * enough performance improvements to justify a new version. 2-3x 75 | speedup depending on mode of operation 76 | 77 | v2.3.1 78 | 79 | * fixes an issue where astral plane json escape sequences were 80 | inadvertently being converted to the unicode replacement 81 | character 82 | 83 | v2.3 84 | 85 | * switched to a faster implementation of string parsing in both 86 | the decoder and encoder 87 | * expand `uescape` option to the decoder 88 | * allow control codes in json passed to decoder (contrary to the spec, 89 | yes) 90 | 91 | v2.2 92 | 93 | * `return_maps` option 94 | * `uescape` option for 7-bit clean output 95 | * add `Makefile` for slightly better `erlang.mk` compatibility 96 | * add `maps_support/0` call to determine whether `jsx` was compiled 97 | with support for maps or not 98 | 99 | v2.1.1 100 | 101 | * faster generation of json via iolists 102 | * `repeat_keys` option 103 | 104 | v2.1 105 | 106 | * force the end of streams with `end_json` in addition to `end_stream` 107 | * support for encoding erlang datetime tuples to iso8601 format 108 | * allow a single trailing comma in objects and arrays 109 | 110 | v2.0.4 111 | 112 | * more typespec adjustments 113 | 114 | v2.0.3 115 | 116 | * update some typespecs to make them more comprehensive 117 | 118 | v2.0.2 119 | 120 | * fixes travis-ci spec 121 | 122 | v2.0.1 123 | 124 | * fix regression in output of empty objects/arrays 125 | 126 | v2.0 127 | 128 | * jsx is much more pragmatic by default; common json errors are silently 129 | ignored (and fixed). stricter parsing must be enabled with options 130 | * add support for encoding otp 17.0's new maps data type 131 | * removed `pre_encode` and `post_decode` options in favour of making jsx 132 | functions easier to wrap and customize 133 | * streaming behavior is now disabled by default and must be requested explicitly 134 | * removed deprecated function names (`to_json`, `to_term`, `term_to_json`, etc) 135 | * expanded test coverage 136 | 137 | 138 | v1.4.5 139 | 140 | * various fixes to typespecs uncovered by dialyzer 141 | * allow integer keys during encoding 142 | * convert atoms (other than `true`, `false` and `null`) to strings during encoding 143 | 144 | v1.4.4 145 | 146 | * typespec for `json_term/0` fixed 147 | * incorrect boolean shortcircuiting fixed in multibyte escape processing 148 | 149 | v1.4.3 150 | 151 | * add empty rebar.config for mix build tool 152 | * add `attempt_atom` option for decoding json objects 153 | * fix a bug related to multibyte codepoints and streaming input 154 | * add a missing error state in the encoder 155 | 156 | v1.4.2 157 | 158 | * build apparatus cleaned up and streamlined 159 | * new `{raw, <<"json goes here">>}` intermediate form to support direct generation of json 160 | * bugfixes involving inappropriate exceptions from jsx functions 161 | 162 | v1.4.1 163 | 164 | * fixes a bug with interaction between `dirty_strings` and even numbers of escape characters 165 | * performance enhancements 166 | 167 | v1.4 168 | 169 | * radically refactored decoder 170 | * `dirty_strings` now behaves intuitively in decoding. bad codepoints, bad utf8, illegal characters and escapes (except `"` and `'` if `single_quoted_strings` is enabled) are ignored completely 171 | * `incomplete_handler` & `error_handler` are now available for use, see documentation in README 172 | 173 | v1.3.3 174 | 175 | * `pre_encode` now orders input in the order you'd expect 176 | 177 | v1.3.2 178 | 179 | * `pre_encode` is now able to handle tuples *correctly* 180 | 181 | v1.3.1 182 | 183 | * `pre_encode` is now able to handle tuples 184 | 185 | v1.3 186 | 187 | * introduces `prettify/1` and `minify/1`, shortcuts for `format/2` 188 | * introduce `encode/1,2` and `decode/1,2` as primary interface to built in tokenizers. `to_json/1,2` and `to_term/1,2` remain accessible but not advertised 189 | * new `parser/3` function exposes syntactic analysis stage for use with user defined tokenizers 190 | * improved documentation 191 | 192 | v1.2.1 193 | 194 | * fixes incorrect handling of escaped forward slashes, thanks bob ippolito 195 | 196 | v1.2 197 | 198 | * rewritten handling of string escaping to improve performance 199 | * `pre_encode` and `post_decode` hooks, see README 200 | * `relax` option 201 | 202 | v1.1.2 203 | 204 | * add `dirty_strings` option 205 | * more fixes for invalid unicode in strings 206 | 207 | v1.1.1 208 | 209 | * fixes bug regarding handling of invalid unicode in R14Bxx 210 | 211 | v1.1 212 | 213 | * improvements to string escaping and json generation performance 214 | 215 | v1.0.2 216 | 217 | * fixes to function specs 218 | * rewritten README 219 | * `comments` option 220 | 221 | v1.0.1 222 | 223 | * rebar fix 224 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2010-2013 alisdair sullivan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jsx (v3.0.0) # 2 | 3 | 4 | an erlang application for consuming, producing and manipulating [json][json]. 5 | inspired by [yajl][yajl] 6 | 7 | **jsx** is built via [rebar3][rebar3] 8 | 9 | current status: ![](https://github.com/talentdeficit/jsx/workflows/EUnit/badge.svg) 10 | 11 | **jsx** is released under the terms of the [MIT][MIT] license 12 | 13 | copyright 2010-2016 alisdair sullivan 14 | 15 | ## index ## 16 | 17 | * [quickstart](#quickstart) 18 | * [description](#description) 19 | - [migrating from 1.x](#migrating) 20 | - [json <-> erlang mapping](#json---erlang-mapping) 21 | - [incomplete input](#incomplete-input) 22 | * [data types](#data-types) 23 | - [`json_term()`](#json_term) 24 | - [`json_text()`](#json_text) 25 | - [`event()`](#event) 26 | - [`option()`](#option) 27 | * [exports](#exports) 28 | - [`encoder/3`, `decoder/3` & `parser/3`](#encoder3-decoder3--parser3) 29 | - [`decode/1,2`](#decode12) 30 | - [`encode/1,2`](#encode12) 31 | - [`format/1,2`](#format12) 32 | - [`minify/1`](#minify1) 33 | - [`prettify/1`](#prettify1) 34 | - [`is_json/1,2`](#is_json12) 35 | - [`is_term/1,2`](#is_term12) 36 | * [callback exports](#callback_exports) 37 | - [`Module:init/1`](#moduleinit1) 38 | - [`Module:handle_event/2`](#modulehandle_event2) 39 | * [acknowledgements](#acknowledgements) 40 | 41 | 42 | ## quickstart ## 43 | 44 | #### to add to a rebar3 project #### 45 | Add to `rebar.config` 46 | ```erlang 47 | ... 48 | {erl_opts, [debug_info]}. 49 | {deps, [ 50 | ... 51 | {jsx, "~> 3.0"} 52 | ]}. 53 | ... 54 | ``` 55 | 56 | #### to build the library and run tests #### 57 | 58 | ```bash 59 | $ rebar3 compile 60 | $ rebar3 eunit 61 | ``` 62 | 63 | #### to convert a utf8 binary containing a json string into an erlang term #### 64 | 65 | ```erlang 66 | 1> jsx:decode(<<"{\"library\": \"jsx\", \"awesome\": true}">>, []). 67 | #{<<"awesome">> => true,<<"library">> => <<"jsx">>} 68 | 2> jsx:decode(<<"{\"library\": \"jsx\", \"awesome\": true}">>, [{return_maps, false}]). 69 | [{<<"library">>,<<"jsx">>},{<<"awesome">>,true}] 70 | 3> jsx:decode(<<"[\"a\",\"list\",\"of\",\"words\"]">>). 71 | [<<"a">>, <<"list">>, <<"of">>, <<"words">>] 72 | ``` 73 | 74 | #### to convert an erlang term into a utf8 binary containing a json string #### 75 | 76 | ```erlang 77 | 1> jsx:encode(#{<<"library">> => <<"jsx">>, <<"awesome">> => true}). 78 | <<"{\"awesome\":true,\"library\":\"jsx\"}">> 79 | 2> jsx:encode([{<<"library">>,<<"jsx">>},{<<"awesome">>,true}]). 80 | <<"{\"library\": \"jsx\", \"awesome\": true}">> 81 | 3> jsx:encode([<<"a">>, <<"list">>, <<"of">>, <<"words">>]). 82 | <<"[\"a\",\"list\",\"of\",\"words\"]">> 83 | ``` 84 | 85 | #### to check if a binary or a term is valid json #### 86 | 87 | ```erlang 88 | 1> jsx:is_json(<<"[\"this is json\"]">>). 89 | true 90 | 2> jsx:is_json("[\"this is not\"]"). 91 | false 92 | 3> jsx:is_term([<<"this is a term">>]). 93 | true 94 | 4> jsx:is_term([this, is, not]). 95 | false 96 | ``` 97 | 98 | #### to minify some json #### 99 | 100 | ```erlang 101 | 1> jsx:minify(<<"{ 102 | \"a list\": [ 103 | 1, 104 | 2, 105 | 3 106 | ] 107 | }">>). 108 | <<"{\"a list\":[1,2,3]}">> 109 | ``` 110 | 111 | #### to prettify some json #### 112 | 113 | ```erlang 114 | 1> jsx:prettify(<<"{\"a list\":[1,2,3]}">>). 115 | <<"{ 116 | \"a list\": [ 117 | 1, 118 | 2, 119 | 3 120 | ] 121 | }">> 122 | ``` 123 | 124 | ## description ## 125 | 126 | 127 | **jsx** is an erlang application for consuming, producing and manipulating 128 | [json][json] 129 | 130 | **jsx** follows the json [spec][rfc4627] as closely as possible with allowances for 131 | real world usage 132 | 133 | **jsx** is pragmatic. the json spec allows extensions so **jsx** extends the spec in a 134 | number of ways. see the section on `strict` in [options](#option) below though 135 | 136 | json has no official comments but this parser allows c/c++ style comments. 137 | anywhere whitespace is allowed you can insert comments (both `// ...` and `/* ... */`) 138 | 139 | some particularly irresponsible json emitters leave trailing commas at the end of 140 | objects or arrays. **jsx** allows a single trailing comma in input. multiple commas 141 | in any position or a preceding comma are still errors 142 | 143 | all **jsx** decoder input should be `utf8` encoded binaries. sometimes you get binaries 144 | that are almost but not quite valid utf8 whether due to improper escaping or poor 145 | encoding. **jsx** replaces invalid codepoints and poorly formed sequences with the 146 | unicode replacement character (`u+FFFD`) but does it's best to return something 147 | comprehensible 148 | 149 | json only allows keys and strings to be delimited by double quotes (`u+0022`) but 150 | javascript allows them to be delimited by single quotes (`u+0027`) as well. **jsx** 151 | follows javascript in this. strings that start with single quotes can contain double 152 | quotes but must end with single quotes and must escape any single quotes they contain 153 | 154 | json and **jsx** only recognize escape sequences as outlined in the json spec. it just 155 | ignores bad escape sequences leaving them in strings unaltered 156 | 157 | ### json <-> erlang mapping ### 158 | 159 | **json** | **erlang** 160 | --------------------------------|-------------------------------- 161 | `number` | `integer()` and `float()` 162 | `string` | `binary()` and `atom()` 163 | `true`, `false` and `null` | `true`, `false` and `null` 164 | `array` | `[]` and `[JSON]` 165 | `object` | `#{}`, `[{}]` and `[{binary() OR atom() OR integer(), JSON}]` 166 | see below | `datetime()` 167 | 168 | * numbers 169 | 170 | javascript and thus json represent all numeric values with floats. there's no 171 | reason for erlang -- a language that supports arbitrarily large integers -- to 172 | restrict all numbers to the ieee754 range 173 | 174 | whenever possible, **jsx** will interpret json numbers that look like integers as 175 | integers. other numbers will be converted to erlang's floating point type, which 176 | is nearly but not quite iee754. negative zero is not representable in erlang (zero 177 | is unsigned in erlang and `0` is equivalent to `-0`) and will be interpreted as 178 | regular zero. numbers not representable are beyond the concern of this implementation, 179 | and will result in parsing errors 180 | 181 | when converting from erlang to json, floats are represented with their 182 | shortest representation that will round trip without loss of precision. this 183 | means that some floats may be superficially dissimilar (although 184 | functionally equivalent). for example, `1.0000000000000001` will be 185 | represented by `1.0` 186 | 187 | * strings 188 | 189 | json strings must be unicode encoded binaries or erlang atoms. in practice, 190 | because **jsx** only accepts `utf8` binaries all binary strings must be `utf8`. 191 | in addition to being unicode json strings restrict a number of codepoints and 192 | define a number of escape sequences 193 | 194 | json string escapes of the form `\uXXXX` will be converted to their 195 | equivalent codepoints during parsing. this means control characters and 196 | other codepoints disallowed by the json spec may be encountered in resulting 197 | strings. the utf8 restriction means the surrogates are explicitly disallowed. 198 | if a string contains escaped surrogates (`u+d800` to `u+dfff`) they are 199 | interpreted but only when they form valid surrogate pairs. surrogates 200 | encountered otherwise are replaced with the replacement codepoint (`u+fffd`) 201 | 202 | all erlang strings are represented by **valid** `utf8` encoded binaries. the 203 | encoder will check strings for conformance. badly formed `utf8` sequences may 204 | be replaced with the replacement codepoint (`u+fffd`) according to the unicode 205 | spec 206 | 207 | this implementation performs no normalization on strings beyond that 208 | detailed here. be careful when comparing strings as equivalent strings 209 | may have different `utf8` encodings 210 | 211 | * true, false and null 212 | 213 | the json primitives `true`, `false` and `null` are represented by the 214 | erlang atoms `true`, `false` and `null`. surprise 215 | 216 | * arrays 217 | 218 | json arrays are represented with erlang lists of json values as described 219 | in this section 220 | 221 | * objects 222 | 223 | json objects are represented by erlang maps. 224 | 225 | * datetime 226 | 227 | erlang datetime tuples (`{{Year, Month, Day}, {Hour, Min, Sec}}`) as returned 228 | from `erlang:localtime/0` are automatically encoded as [iso8601][iso8601] 229 | strings and are assumed to be UTC time. no conversion is attempted of json [iso8601][iso8601] strings in decoded json 230 | 231 | 232 | ### incomplete input ### 233 | 234 | **jsx** can handle incomplete json texts. if the option `stream` is passed to the decoder 235 | or parser and if a partial json text is parsed, rather than returning a term from 236 | your callback handler, **jsx** returns `{incomplete, F}` where `F` is a function with 237 | an identical API to the anonymous fun returned from `decoder/3`, `encoder/3` or 238 | `parser/3`. it retains the internal state of the parser at the point where input 239 | was exhausted. this allows you to parse as you stream json over a socket or file 240 | descriptor, or to parse large json texts without needing to keep them entirely in 241 | memory 242 | 243 | however, it is important to recognize that **jsx** is conservative by default. **jsx** will 244 | not consider the parsing complete even when input is exhausted and the json text is 245 | unambiguously incomplete. to end parsing call the `incomplete` function with the 246 | argument `end_stream` (or `end_json`) like: 247 | 248 | ```erlang 249 | 1> {incomplete, F} = jsx:decode(<<"[">>, [stream]). 250 | {incomplete,#Fun} 251 | 2> F(end_stream). % can also be `F(end_json)` 252 | ** exception error: bad argument 253 | 3> {incomplete, G} = F(<<"]">>). 254 | {incomplete,#Fun} 255 | 4> G(end_stream). % can also be `G(end_json)` 256 | [] 257 | ``` 258 | 259 | 260 | ## data types ## 261 | 262 | #### `json_term()` #### 263 | 264 | ```erlang 265 | json_term() = [json_term()] 266 | | [{binary() | atom() | integer(), json_term()}] 267 | | #{} % map of any size, not just the empty map 268 | | true 269 | | false 270 | | null 271 | | integer() 272 | | float() 273 | | binary() 274 | | atom() 275 | | datetime() 276 | ``` 277 | 278 | the erlang representation of json. binaries should be `utf8` encoded, or close 279 | at least 280 | 281 | #### `json_text()` #### 282 | 283 | ```erlang 284 | json_text() = binary() 285 | ``` 286 | 287 | a utf8 encoded binary containing a json string 288 | 289 | #### `event()` #### 290 | 291 | ```erlang 292 | event() = start_object 293 | | end_object 294 | | start_array 295 | | end_array 296 | | {key, binary()} 297 | | {string, binary()} 298 | | {integer, integer()} 299 | | {float, float()} 300 | | {literal, true} 301 | | {literal, false} 302 | | {literal, null} 303 | | end_json 304 | ``` 305 | 306 | the subset of [`token()`](#token) emitted by the decoder and encoder to handlers 307 | 308 | #### `option()` #### 309 | 310 | ```erlang 311 | option() = dirty_strings 312 | | escaped_forward_slashes 313 | | escaped_strings 314 | | repeat_keys 315 | | stream 316 | | strict 317 | | {strict, [strict_option()]} 318 | | return_tail 319 | | uescape 320 | | unescaped_jsonp 321 | 322 | strict_option() = comments 323 | | trailing_commas 324 | | utf8 325 | | single_quotes 326 | | escapes 327 | ``` 328 | 329 | **jsx** functions all take a common set of options. not all flags have meaning 330 | in all contexts, but they are always valid options. functions may have 331 | additional options beyond these. see 332 | [individual function documentation](#exports) for details 333 | 334 | - `dirty_strings` 335 | 336 | json escaping is lossy; it mutates the json string and repeated application 337 | can result in unwanted behaviour. if your strings are already escaped (or 338 | you'd like to force invalid strings into "json" you monster) use this flag 339 | to bypass escaping. this can also be used to read in **really** invalid json 340 | strings. everything between unescaped quotes are passed as is to the resulting 341 | string term. note that this takes precedence over any other options 342 | 343 | - `escaped_forward_slashes` 344 | 345 | json strings are escaped according to the json spec. this means forward 346 | slashes (solidus) are only escaped when this flag is present. otherwise they 347 | are left unescaped. you may want to use this if you are embedding json 348 | directly into a html or xml document 349 | 350 | - `escaped_strings` 351 | 352 | by default both the encoder and decoder return strings as utf8 binaries 353 | appropriate for use in erlang. escape sequences that were present in decoded 354 | terms are converted into the appropriate codepoint while encoded terms are 355 | unaltered. this flag escapes strings as if for output in json, removing 356 | control codes and problematic codepoints and replacing them with the 357 | appropriate escapes 358 | 359 | - `stream` 360 | 361 | see [incomplete input](#incomplete-input) 362 | 363 | - `strict` 364 | 365 | as mentioned [earlier](#description), **jsx** is pragmatic. if you're more of a 366 | json purist or you're really into bdsm stricter adherence to the spec is 367 | possible. the following restrictions are available 368 | 369 | * `comments` 370 | 371 | comments are disabled and result in a `badarg` error 372 | 373 | * `trailing_commas` 374 | 375 | trailing commas in an object or list result in `badarg` errors 376 | 377 | * `utf8` 378 | 379 | invalid codepoints and malformed unicode result in `badarg` errors 380 | 381 | * `single_quotes` 382 | 383 | only keys and strings delimited by double quotes (`u+0022`) are allowed. the 384 | single quote (`u+0027`) results in a `badarg` error 385 | 386 | * `escapes` 387 | 388 | escape sequences not adhering to the json spec result in a `badarg` error 389 | 390 | * `control_codes` 391 | 392 | control codes in strings result in `badarg` errors 393 | 394 | any combination of these can be passed to **jsx** by using `{strict, [strict_option()]}`. 395 | `strict` is equivalent to `{strict, [comments, trailing_commas, utf8, single_quotes, escapes, control_codes]}` 396 | 397 | - `return_tail` 398 | 399 | upon reaching the end of a valid json term in an input stream return the term and any 400 | remaining bytes in the input stream as `{with_tail, term(), binary()}` where the second 401 | member of the tuple is the json term and the third is any remaining bytes. note that 402 | leading whitespace will be stripped from the tail 403 | 404 | - `uescape` 405 | 406 | escape all codepoints outside the ascii range for 7 bit clean output. note 407 | this escaping takes place even if no other string escaping is requested (via 408 | `escaped_strings`) 409 | 410 | - `unescaped_jsonp` 411 | 412 | javascript interpreters treat the codepoints `u+2028` and `u+2029` as 413 | significant whitespace. json strings that contain either of these codepoints 414 | will be parsed incorrectly by some javascript interpreters. by default, 415 | these codepoints are escaped (to `\u2028` and `\u2029`, respectively) to 416 | retain compatibility. this option simply removes that escaping 417 | 418 | 419 | ## exports ## 420 | 421 | 422 | #### `encoder/3`, `decoder/3` & `parser/3` #### 423 | 424 | ```erlang 425 | decoder(Module, Args, Opts) -> Fun((JSONText) -> any()) 426 | encoder(Module, Args, Opts) -> Fun((JSONTerm) -> any()) 427 | parser(Module, Args, Opts) -> Fun((Tokens) -> any()) 428 | 429 | Module = atom() 430 | Args = any() 431 | Opts = [option()] 432 | JSONText = json_text() 433 | JSONTerm = json_term() 434 | Tokens = event() | [event()] 435 | ``` 436 | 437 | **jsx** is a json compiler with interleaved tokenizing, syntactic analysis and 438 | semantic analysis stages. included are two tokenizers; one that handles json 439 | texts (`decoder/3`) and one that handles erlang terms (`encoder/3`). there is 440 | also an entry point to the syntactic analysis stage for use with user-defined 441 | tokenizers (`parser/3`) 442 | 443 | all three functions return an anonymous function that takes the appropriate type 444 | of input and returns the result of performing semantic analysis, the tuple 445 | `{incomplete, F}` where `F` is a new anonymous function (see 446 | [incomplete input](#incomplete_input)) or a `badarg` error exception if 447 | syntactic analysis fails 448 | 449 | `Module` is the name of the callback module 450 | 451 | `Args` is any term that will be passed to `Module:init/1` prior to syntactic 452 | analysis to produce an initial state 453 | 454 | `Opts` are detailed [here](#option) 455 | 456 | check out [callback module documentation](#callback_exports) for details of 457 | the callback module interface 458 | 459 | #### `decode/1,2` #### 460 | 461 | ```erlang 462 | decode(JSON) -> Term 463 | decode(JSON, Opts) -> Term 464 | 465 | JSON = json_text() 466 | Term = json_term() 467 | Opts = [option() | labels | {labels, Label} | return_maps] 468 | Label = binary | atom | existing_atom | attempt_atom 469 | F = fun((any()) -> any()) 470 | ``` 471 | 472 | `decode` parses a json text (a `utf8` encoded binary) and produces an erlang 473 | term 474 | 475 | the option `labels` controls how keys are converted from json to 476 | erlang terms. `binary` (the default behavior) does no conversion 477 | beyond normal escaping. `atom` converts keys to erlang atoms and 478 | results in a `badarg` error if the keys fall outside the range of erlang 479 | atoms. `existing_atom` is identical to `atom` except it will not add 480 | new atoms to the atom table and will result in a `badarg` error if the atom 481 | does not exist. `attempt_atom` will convert keys to atoms when they exist, 482 | and leave them as binary otherwise 483 | 484 | the option `{return_maps, false}` will return objects as proplists instead 485 | of maps. 486 | 487 | raises a `badarg` error exception if input is not valid json 488 | 489 | 490 | #### `encode/1,2` #### 491 | 492 | ```erlang 493 | encode(Term) -> JSON 494 | encode(Term, Opts) -> JSON 495 | 496 | Term = json_term() 497 | JSON = json_text() 498 | Opts = [option() | space | {space, N} | indent | {indent, N}] 499 | N = pos_integer() 500 | ``` 501 | 502 | `encode` converts an erlang term into json text (a `utf8` encoded binary) 503 | 504 | the option `{space, N}` inserts `N` spaces after every comma and colon in your 505 | json output. `space` is an alias for `{space, 1}`. the default is `{space, 0}` 506 | 507 | the option `{indent, N}` inserts a newline and `N` spaces for each level of 508 | indentation in your json output. note that this overrides spaces inserted after 509 | a comma. `indent` is an alias for `{indent, 1}`. the default is `{indent, 0}` 510 | 511 | raises a `badarg` error exception if input is not a valid 512 | [erlang representation of json](#json---erlang-mapping) 513 | 514 | 515 | #### `format/1,2` #### 516 | 517 | ```erlang 518 | format(JSON) -> JSON 519 | format(JSON, Opts) -> JSON 520 | 521 | JSON = json_text() 522 | Opts = [option() | space | {space, N} | indent | {indent, N} | {newline, LF}] 523 | N = pos_integer() 524 | LF = binary() 525 | ``` 526 | 527 | `format` parses a json text (a `utf8` encoded binary) and produces a new json 528 | text according to the format rules specified by `Opts` 529 | 530 | the option `{space, N}` inserts `N` spaces after every comma and colon in your 531 | json output. `space` is an alias for `{space, 1}`. the default is `{space, 0}` 532 | 533 | the option `{indent, N}` inserts a newline and `N` spaces for each level of 534 | indentation in your json output. note that this overrides spaces inserted after 535 | a comma. `indent` is an alias for `{indent, 1}`. the default is `{indent, 0}` 536 | 537 | the option `{newline, LF}` defines a custom newline symbol(s). 538 | the default is `{newline, <<$\n>>}` 539 | 540 | raises a `badarg` error exception if input is not valid json 541 | 542 | 543 | #### `minify/1` #### 544 | 545 | ```erlang 546 | minify(JSON) -> JSON 547 | 548 | JSON = json_text() 549 | ``` 550 | 551 | `minify` parses a json text (a `utf8` encoded binary) and produces a new json 552 | text stripped of whitespace 553 | 554 | raises a `badarg` error exception if input is not valid json 555 | 556 | 557 | #### `prettify/1` #### 558 | 559 | ```erlang 560 | prettify(JSON) -> JSON 561 | 562 | JSON = json_text() 563 | ``` 564 | 565 | `prettify` parses a json text (a `utf8` encoded binary) and produces a new json 566 | text equivalent to `format(JSON, [{space, 1}, {indent, 2}])` 567 | 568 | raises a `badarg` error exception if input is not valid json 569 | 570 | 571 | #### `is_json/1,2` #### 572 | 573 | ```erlang 574 | is_json(MaybeJSON) -> true | false 575 | is_json(MaybeJSON, Opts) -> true | false 576 | 577 | MaybeJSON = any() 578 | Opts = options() 579 | ``` 580 | 581 | returns true if input is a valid json text, false if not 582 | 583 | what exactly constitutes valid json may be [altered](#option) 584 | 585 | 586 | #### `is_term/1,2` #### 587 | 588 | ```erlang 589 | is_term(MaybeJSON) -> true | false 590 | is_term(MaybeJSON, Opts) -> true | false 591 | 592 | MaybeJSON = any() 593 | Opts = options() 594 | ``` 595 | 596 | returns true if input is a valid erlang representation of json, false if not 597 | 598 | what exactly constitutes valid json may be altered via [options](#option) 599 | 600 | ## callback exports ## 601 | 602 | the following functions should be exported from a **jsx** callback module 603 | 604 | #### `Module:init/1` #### 605 | 606 | ```erlang 607 | Module:init(Args) -> InitialState 608 | 609 | Args = any() 610 | InitialState = any() 611 | ``` 612 | 613 | whenever any of `encoder/3`, `decoder/3` or `parser/3` are called, this function 614 | is called with the `Args` argument provided in the calling function to obtain 615 | `InitialState` 616 | 617 | #### `Module:handle_event/2` #### 618 | 619 | ```erlang 620 | Module:handle_event(Event, State) -> NewState 621 | 622 | Event = [event()] 623 | State = any() 624 | NewState = any() 625 | ``` 626 | 627 | semantic analysis is performed by repeatedly calling `handle_event/2` with a 628 | stream of events emitted by the tokenizer and the current state. the new state 629 | returned is used as the input to the next call to `handle_event/2`. the 630 | following events must be handled: 631 | 632 | - `start_object` 633 | 634 | the start of a json object 635 | 636 | - '{key, binary()}' 637 | 638 | the key of an entry in a json object 639 | 640 | - `end_object` 641 | 642 | the end of a json object 643 | 644 | - `start_array` 645 | 646 | the start of a json array 647 | 648 | - `end_array` 649 | 650 | the end of a json array 651 | 652 | - `{string, binary()}` 653 | 654 | a json string. it will usually be a `utf8` encoded binary. see the 655 | [options](#option) for possible exceptions. note that keys are also 656 | json strings 657 | 658 | - `{integer, integer()}` 659 | 660 | an erlang integer (bignum) 661 | 662 | - `{float, float()}` 663 | 664 | an erlang float 665 | 666 | - `{literal, true}` 667 | 668 | the atom `true` 669 | 670 | - `{literal, false}` 671 | 672 | the atom `false` 673 | 674 | - `{literal, null}` 675 | 676 | the atom `null` 677 | 678 | - `end_json` 679 | 680 | this event is emitted when syntactic analysis is completed. you should 681 | do any cleanup and return the result of your semantic analysis 682 | 683 | 684 | ## acknowledgements ## 685 | 686 | jsx wouldn't be what it is without the contributions of [Paul J. Davis](https://github.com/davisp), [Lloyd Hilaiel](https://github.com/lloyd), [John Engelhart](https://github.com/johnezang), [Bob Ippolito](https://github.com/etrepum), [Brujo Benavides](https://github.com/elbrujohalcon), [Alex Kropivny](https://github.com/amtal), [Steve Strong](https://github.com/srstrong), [Michael Truog](https://github.com/okeuday), [Devin Torres](https://github.com/devinus), [fogfish](https://github.com/fogfish), [emptytea](https://github.com/emptytea), [John Daily](https://github.com/macintux), [Ola Bäckström](https://github.com/olabackstrom), [Joseph Crowe](https://github.com/JosephCrowe), [Patrick Gombert](https://github.com/patrickgombert), [Eshengazin S. Kuat](https://github.com/eskuat), [Max Lapshin](https://github.com/maxlapshin), [Bikram Chatterjee](https://github.com/c-bik), [Michael Uvarov](https://github.com/arcusfelis), [Led](https://github.com/Ledest) and [tvv](https://github.com/tvv) 687 | 688 | [json]: http://json.org 689 | [yajl]: http://lloyd.github.com/yajl 690 | [MIT]: http://www.opensource.org/licenses/mit-license.html 691 | [rebar3]: https://rebar3.org 692 | [meck]: https://github.com/eproxus/meck 693 | [rfc4627]: http://tools.ietf.org/html/rfc4627 694 | [travis]: https://travis-ci.org/ 695 | [jsxn]: https://github.com/talentdeficit/jsxn 696 | [iso8601]: http://www.iso.org/iso/iso8601 697 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {edoc_opts, [{preprocess, true}]}. 2 | {erl_opts, [debug_info]}. 3 | {dialyzer, [ 4 | {warnings, [ 5 | unknown, 6 | unmatched_returns, 7 | error_handling, 8 | underspecs 9 | ]} 10 | ]}. 11 | {profiles, [ 12 | {test, [ 13 | {dialyzer, [ 14 | {plt_extra_apps, [eunit]} 15 | ]} 16 | ]} 17 | ]}. 18 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /src/jsx.app.src: -------------------------------------------------------------------------------- 1 | {application, jsx, 2 | [ 3 | {description, "a streaming, evented json parsing toolkit"}, 4 | {vsn, "3.1.0"}, 5 | {modules, [ 6 | jsx, 7 | jsx_encoder, 8 | jsx_decoder, 9 | jsx_parser, 10 | jsx_to_json, 11 | jsx_to_term, 12 | jsx_config, 13 | jsx_verify 14 | ]}, 15 | {registered, []}, 16 | {applications, [ 17 | kernel, 18 | stdlib 19 | ]}, 20 | {env, []}, 21 | 22 | {licenses, ["MIT"]}, 23 | {links, [{"GitHub", "https://github.com/talentdeficit/jsx"}]} 24 | ]}. 25 | -------------------------------------------------------------------------------- /src/jsx.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 alisdair sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx). 25 | 26 | -export([encode/1, encode/2, decode/1, decode/2]). 27 | -export([is_json/1, is_json/2, is_term/1, is_term/2]). 28 | -export([format/1, format/2, minify/1, prettify/1]). 29 | -export([consult/1, consult/2]). 30 | -export([encoder/3, decoder/3, parser/3]). 31 | -export([resume/3]). 32 | 33 | -export_type([json_term/0, json_text/0, token/0]). 34 | -export_type([encoder/0, decoder/0, parser/0, internal_state/0]). 35 | -export_type([config/0]). 36 | 37 | 38 | -ifdef(TEST). 39 | %% data and helper functions for tests 40 | -export([test_cases/0, special_test_cases/0]). 41 | -export([init/1, handle_event/2]). 42 | -endif. 43 | 44 | -type json_term() :: [{binary() | atom(), json_term()}] | [{},...] 45 | | [json_term()] | [] 46 | | {with_tail, json_term(), binary()} 47 | | #{ binary() | atom() => json_term() } 48 | | true | false | null 49 | | integer() | float() 50 | | binary() | atom() 51 | | calendar:datetime(). 52 | 53 | -type json_text() :: binary(). 54 | 55 | -type config() :: jsx_config:config(). 56 | 57 | -spec encode(Source::json_term()) -> json_text() | {incomplete, encoder()}. 58 | 59 | encode(Source) -> encode(Source, []). 60 | 61 | -spec encode(Source::json_term(), Config::jsx_config:options()) -> json_text() | {incomplete, encoder()}. 62 | 63 | encode(Source, Config) -> jsx_to_json:to_json(Source, Config). 64 | 65 | 66 | -spec decode(Source::json_text()) -> json_term() | {incomplete, decoder()}. 67 | 68 | decode(Source) -> decode(Source, []). 69 | 70 | -spec decode(Source::json_text(), Config::jsx_config:options()) -> json_term() | {incomplete, decoder()}. 71 | 72 | decode(Source, Config) -> jsx_to_term:to_term(Source, Config). 73 | 74 | 75 | -spec format(Source::json_text()) -> json_text(). 76 | 77 | format(Source) -> format(Source, []). 78 | 79 | -spec format(Source::json_text(), Config::jsx_config:options()) -> json_text(). 80 | 81 | format(Source, Config) -> jsx_to_json:format(Source, Config). 82 | 83 | 84 | -spec minify(Source::json_text()) -> json_text(). 85 | 86 | minify(Source) -> format(Source, []). 87 | 88 | 89 | -spec prettify(Source::json_text()) -> json_text(). 90 | 91 | prettify(Source) -> format(Source, [space, {indent, 2}]). 92 | 93 | 94 | -spec is_json(Source::binary()) -> boolean() | {incomplete, decoder()}. 95 | 96 | is_json(Source) -> is_json(Source, []). 97 | 98 | -spec is_json(Source::binary(), Config::jsx_config:options()) -> boolean() | {incomplete, decoder()}. 99 | 100 | is_json(Source, Config) -> jsx_verify:is_json(Source, Config). 101 | 102 | 103 | -spec is_term(Source::json_term() | end_stream | end_json) -> boolean() | {incomplete, encoder()}. 104 | 105 | is_term(Source) -> is_term(Source, []). 106 | 107 | -spec is_term(Source::json_term() | end_stream | end_json, 108 | Config::jsx_config:options()) -> boolean() | {incomplete, encoder()}. 109 | 110 | is_term(Source, Config) -> jsx_verify:is_term(Source, Config). 111 | 112 | 113 | -spec consult(File::file:name_all()) -> list(jsx_consult:json_value()). 114 | 115 | consult(File) -> consult(File, []). 116 | 117 | -spec consult(File::file:name_all(), Config::jsx_consult:config()) -> list(jsx_consult:json_value()). 118 | 119 | consult(File, Config) -> jsx_consult:consult(File, Config). 120 | 121 | 122 | -type decoder() :: fun((json_text() | end_stream | end_json) -> any()). 123 | 124 | -spec decoder(Handler::module(), State::any(), Config::jsx_config:options()) -> decoder(). 125 | 126 | decoder(Handler, State, Config) -> jsx_decoder:decoder(Handler, State, Config). 127 | 128 | 129 | -type encoder() :: fun((json_term() | end_stream | end_json) -> any()). 130 | 131 | -spec encoder(Handler::module(), State::any(), Config::jsx_config:options()) -> encoder(). 132 | 133 | encoder(Handler, State, Config) -> jsx_encoder:encoder(Handler, State, Config). 134 | 135 | 136 | -type token() :: [token()] 137 | | start_object 138 | | end_object 139 | | start_array 140 | | end_array 141 | | {key, binary()} 142 | | {string, binary()} 143 | | binary() 144 | | {number, integer() | float()} 145 | | {integer, integer()} 146 | | {float, float()} 147 | | integer() 148 | | float() 149 | | {literal, true} 150 | | {literal, false} 151 | | {literal, null} 152 | | true 153 | | false 154 | | null 155 | | end_json. 156 | 157 | 158 | -type parser() :: fun((token() | end_stream) -> any()). 159 | 160 | -spec parser(Handler::module(), State::any(), Config::jsx_config:options()) -> parser(). 161 | 162 | parser(Handler, State, Config) -> jsx_parser:parser(Handler, State, Config). 163 | 164 | -opaque internal_state() :: tuple(). 165 | 166 | -spec resume(Term::json_text() | token(), InternalState::internal_state(), 167 | Config::jsx_config:options()) -> jsx:decoder() | {incomplete, jsx:decoder()}. 168 | 169 | resume(Term, {decoder, State, Handler, Acc, Stack}, Config) -> 170 | jsx_decoder:resume(Term, State, Handler, Acc, Stack, jsx_config:parse_config(Config)); 171 | resume(Term, {parser, State, Handler, Stack}, Config) -> 172 | jsx_parser:resume(Term, State, Handler, Stack, jsx_config:parse_config(Config)). 173 | 174 | -ifdef(TEST). 175 | 176 | -include_lib("eunit/include/eunit.hrl"). 177 | 178 | 179 | %% test handler 180 | init([]) -> []. 181 | 182 | handle_event(end_json, State) -> lists:reverse([end_json] ++ State); 183 | handle_event(Event, State) -> [Event] ++ State. 184 | 185 | 186 | test_cases() -> 187 | empty_array() 188 | ++ nested_array() 189 | ++ empty_object() 190 | ++ nested_object() 191 | ++ strings() 192 | ++ literals() 193 | ++ integers() 194 | ++ floats() 195 | ++ compound_object(). 196 | 197 | %% segregate these so we can skip them in `jsx_to_term` 198 | special_test_cases() -> special_objects() ++ special_array(). 199 | 200 | 201 | empty_array() -> [{"[]", <<"[]">>, [], [start_array, end_array]}]. 202 | 203 | 204 | nested_array() -> 205 | [{ 206 | "[[[]]]", 207 | <<"[[[]]]">>, 208 | [[[]]], 209 | [start_array, start_array, start_array, end_array, end_array, end_array] 210 | }]. 211 | 212 | 213 | empty_object() -> [{"{}", <<"{}">>, [{}], [start_object, end_object]}]. 214 | 215 | 216 | nested_object() -> 217 | [{ 218 | "{\"key\":{\"key\":{}}}", 219 | <<"{\"key\":{\"key\":{}}}">>, 220 | [{<<"key">>, [{<<"key">>, [{}]}]}], 221 | [ 222 | start_object, 223 | {key, <<"key">>}, 224 | start_object, 225 | {key, <<"key">>}, 226 | start_object, 227 | end_object, 228 | end_object, 229 | end_object 230 | ] 231 | }]. 232 | 233 | 234 | naked_strings() -> 235 | Raw = [ 236 | "", 237 | "hello world" 238 | ], 239 | [ 240 | { 241 | String, 242 | <<"\"", (list_to_binary(String))/binary, "\"">>, 243 | list_to_binary(String), 244 | [{string, list_to_binary(String)}] 245 | } 246 | || String <- Raw 247 | ]. 248 | 249 | 250 | strings() -> 251 | naked_strings() 252 | ++ [ wrap_with_array(Test) || Test <- naked_strings() ] 253 | ++ [ wrap_with_object(Test) || Test <- naked_strings() ]. 254 | 255 | 256 | naked_integers() -> 257 | Raw = [ 258 | 1, 2, 3, 259 | 127, 128, 129, 260 | 255, 256, 257, 261 | 65534, 65535, 65536, 262 | 18446744073709551616, 263 | 18446744073709551617 264 | ], 265 | [ 266 | { 267 | integer_to_list(X), 268 | list_to_binary(integer_to_list(X)), 269 | X, 270 | [{integer, X}] 271 | } 272 | || X <- Raw ++ [ -1 * Y || Y <- Raw ] ++ [0] 273 | ]. 274 | 275 | 276 | integers() -> 277 | naked_integers() 278 | ++ [ wrap_with_array(Test) || Test <- naked_integers() ] 279 | ++ [ wrap_with_object(Test) || Test <- naked_integers() ]. 280 | 281 | 282 | naked_floats() -> 283 | Raw = [ 284 | 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 285 | 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 286 | 1234567890.0987654321, 287 | 0.0e0, 288 | 1234567890.0987654321e16, 289 | 0.1e0, 0.1e1, 0.1e2, 0.1e4, 0.1e8, 0.1e16, 0.1e308, 290 | 1.0e0, 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e308, 291 | 2.2250738585072014e-308, %% min normalized float 292 | 1.7976931348623157e308, %% max normalized float 293 | 5.0e-324, %% min denormalized float 294 | 2.225073858507201e-308 %% max denormalized float 295 | ], 296 | [ 297 | { 298 | sane_float_to_list(X), 299 | list_to_binary(sane_float_to_list(X)), 300 | X, 301 | [{float, X}] 302 | } 303 | || X <- Raw ++ [ -1 * Y || Y <- Raw ] 304 | ]. 305 | 306 | 307 | floats() -> 308 | naked_floats() 309 | ++ [ wrap_with_array(Test) || Test <- naked_floats() ] 310 | ++ [ wrap_with_object(Test) || Test <- naked_floats() ]. 311 | 312 | 313 | naked_literals() -> 314 | [ 315 | { 316 | atom_to_list(Literal), 317 | atom_to_binary(Literal, unicode), 318 | Literal, 319 | [{literal, Literal}] 320 | } 321 | || Literal <- [true, false, null] 322 | ]. 323 | 324 | 325 | literals() -> 326 | naked_literals() 327 | ++ [ wrap_with_array(Test) || Test <- naked_literals() ] 328 | ++ [ wrap_with_object(Test) || Test <- naked_literals() ]. 329 | 330 | 331 | compound_object() -> 332 | [{ 333 | "[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]", 334 | <<"[{\"alpha\":[1,2,3],\"beta\":{\"alpha\":[1.0,2.0,3.0],\"beta\":[true,false]}},[{}]]">>, 335 | [[{<<"alpha">>, [1, 2, 3]}, {<<"beta">>, [{<<"alpha">>, [1.0, 2.0, 3.0]}, {<<"beta">>, [true, false]}]}], [[{}]]], 336 | [ 337 | start_array, 338 | start_object, 339 | {key, <<"alpha">>}, 340 | start_array, 341 | {integer, 1}, 342 | {integer, 2}, 343 | {integer, 3}, 344 | end_array, 345 | {key, <<"beta">>}, 346 | start_object, 347 | {key, <<"alpha">>}, 348 | start_array, 349 | {float, 1.0}, 350 | {float, 2.0}, 351 | {float, 3.0}, 352 | end_array, 353 | {key, <<"beta">>}, 354 | start_array, 355 | {literal, true}, 356 | {literal, false}, 357 | end_array, 358 | end_object, 359 | end_object, 360 | start_array, 361 | start_object, 362 | end_object, 363 | end_array, 364 | end_array 365 | ] 366 | }]. 367 | 368 | 369 | special_objects() -> 370 | [ 371 | { 372 | "[{key, atom}]", 373 | <<"{\"key\":\"atom\"}">>, 374 | [{key, atom}], 375 | [start_object, {key, <<"key">>}, {string, <<"atom">>}, end_object] 376 | }, 377 | { 378 | "[{1, true}]", 379 | <<"{\"1\":true}">>, 380 | [{1, true}], 381 | [start_object, {key, <<"1">>}, {literal, true}, end_object] 382 | } 383 | ]. 384 | 385 | 386 | special_array() -> 387 | [ 388 | { 389 | "[foo, bar]", 390 | <<"[\"foo\",\"bar\"]">>, 391 | [foo, bar], 392 | [start_array, {string, <<"foo">>}, {string, <<"bar">>}, end_array] 393 | } 394 | ]. 395 | 396 | 397 | wrap_with_array({Title, JSON, Term, Events}) -> 398 | { 399 | "[" ++ Title ++ "]", 400 | <<"[", JSON/binary, "]">>, 401 | [Term], 402 | [start_array] ++ Events ++ [end_array] 403 | }. 404 | 405 | 406 | wrap_with_object({Title, JSON, Term, Events}) -> 407 | { 408 | "{\"key\":" ++ Title ++ "}", 409 | <<"{\"key\":", JSON/binary, "}">>, 410 | [{<<"key">>, Term}], 411 | [start_object, {key, <<"key">>}] ++ Events ++ [end_object] 412 | }. 413 | 414 | 415 | sane_float_to_list(X) -> 416 | [Output] = io_lib:format("~p", [X]), 417 | Output. 418 | 419 | 420 | incremental_decode(JSON) -> 421 | Final = lists:foldl( 422 | fun(Byte, Decoder) -> {incomplete, F} = Decoder(Byte), F end, 423 | decoder(jsx, [], [stream]), 424 | json_to_bytes(JSON) 425 | ), 426 | Final(end_stream). 427 | 428 | 429 | incremental_parse(Events) -> 430 | Final = lists:foldl( 431 | fun(Event, Parser) -> {incomplete, F} = Parser(Event), F end, 432 | parser(?MODULE, [], [stream]), 433 | lists:map(fun(X) -> [X] end, Events) 434 | ), 435 | Final(end_stream). 436 | 437 | 438 | %% used to convert a json text into a list of codepoints to be incrementally 439 | %% parsed 440 | json_to_bytes(JSON) -> json_to_bytes(JSON, []). 441 | 442 | json_to_bytes(<<>>, Acc) -> [<<>>] ++ lists:reverse(Acc); 443 | json_to_bytes(<>, Acc) -> json_to_bytes(Rest, [<>] ++ Acc). 444 | 445 | 446 | %% actual tests! 447 | decode_test_() -> 448 | Data = test_cases(), 449 | [{Title, ?_assertEqual(Events ++ [end_json], (decoder(?MODULE, [], []))(JSON))} 450 | || {Title, JSON, _, Events} <- Data 451 | ] ++ 452 | [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_decode(JSON))} 453 | || {Title, JSON, _, Events} <- Data 454 | ]. 455 | 456 | 457 | parse_test_() -> 458 | Data = test_cases(), 459 | [{Title, ?_assertEqual(Events ++ [end_json], (parser(?MODULE, [], []))(Events ++ [end_json]))} 460 | || {Title, _, _, Events} <- Data 461 | ] ++ 462 | [{Title ++ " (incremental)", ?_assertEqual(Events ++ [end_json], incremental_parse(Events))} 463 | || {Title, _, _, Events} <- Data 464 | ]. 465 | 466 | 467 | encode_test_() -> 468 | Data = test_cases(), 469 | [ 470 | { 471 | Title, ?_assertEqual( 472 | Events ++ [end_json], 473 | (jsx:encoder(jsx, [], []))(Term) 474 | ) 475 | } || {Title, _, Term, Events} <- Data 476 | ]. 477 | 478 | end_stream_test_() -> 479 | Tokens = [start_object, end_object, end_json], 480 | [ 481 | {"encoder end_stream", ?_assertEqual( 482 | Tokens, 483 | begin 484 | {incomplete, F} = (jsx:parser(jsx, [], [stream]))([start_object, end_object]), 485 | F(end_stream) 486 | end 487 | )}, 488 | {"encoder end_json", ?_assertEqual( 489 | Tokens, 490 | begin 491 | {incomplete, F} = (jsx:parser(jsx, [], [stream]))([start_object, end_object]), 492 | F(end_json) 493 | end 494 | )}, 495 | {"decoder end_stream", ?_assertEqual( 496 | Tokens, 497 | begin {incomplete, F} = (jsx:decoder(jsx, [], [stream]))(<<"{}">>), F(end_stream) end 498 | )}, 499 | {"decoder end_json", ?_assertEqual( 500 | Tokens, 501 | begin {incomplete, F} = (jsx:decoder(jsx, [], [stream]))(<<"{}">>), F(end_json) end 502 | )} 503 | ]. 504 | 505 | 506 | -endif. 507 | -------------------------------------------------------------------------------- /src/jsx_config.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 alisdair sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_config). 25 | 26 | -export([parse_config/1]). 27 | -export([config_to_list/1]). 28 | -export([extract_config/1, valid_flags/0]). 29 | 30 | -ifdef(TEST). 31 | -export([fake_error_handler/3]). 32 | -endif. 33 | 34 | -include("jsx_config.hrl"). 35 | 36 | -type handler_type(Handler) :: 37 | fun((jsx:json_text() | end_stream | 38 | jsx:json_term(), 39 | {decoder, any(), module(), null | list(), list()} | 40 | {parser, any(), module(), list()} | 41 | {encoder, any(), module()}, 42 | list({pre_encode, fun((any()) -> any())} | 43 | {error_handler, Handler} | 44 | {incomplete_handler, Handler} | 45 | atom())) -> any()). 46 | -type handler() :: handler_type(handler()). 47 | -export_type([handler/0]). 48 | 49 | -type config() :: #config{}. 50 | -export_type([config/0]). 51 | 52 | -type option() :: valid_flag() 53 | | {valid_flag(), boolean()} 54 | | {strict, [strict_option()]} 55 | | {error_handler, fun((any(), any(), any()) -> ok)} 56 | | {incomplete_handler, fun((any(), any(), any()) -> ok)} 57 | | {return_maps, boolean()} 58 | | {labels, label_option()} 59 | | {space, non_neg_integer()} 60 | | {indent, non_neg_integer()} 61 | | {depth, non_neg_integer()} 62 | | {newline, binary()} 63 | | legacy_option() 64 | | {legacy_option(), boolean()}. 65 | -type legacy_option() :: strict_comments 66 | | strict_commas 67 | | strict_utf8 68 | | strict_single_quotes 69 | | strict_escapes 70 | | strict_control_codes. 71 | 72 | -type options() :: [option()]. 73 | -export_type([options/0]). 74 | 75 | -type strict_option() :: comments 76 | | trailing_commas 77 | | utf8 78 | | single_quotes 79 | | escapes 80 | | control_codes. 81 | -type label_option() :: binary 82 | | atom 83 | | existing_atom 84 | | attempt_atom. 85 | 86 | -type valid_flag() :: escaped_forward_slashes 87 | | escaped_strings 88 | | unescaped_jsonp 89 | | dirty_strings 90 | | multi_term 91 | | return_tail 92 | | repeat_keys 93 | | strict 94 | | stream 95 | | uescape 96 | | error_handler 97 | | incomplete_handler. 98 | 99 | %% parsing of jsx config 100 | -spec parse_config(Config::options()) -> config(). 101 | 102 | parse_config(Config) -> parse_config(Config, #config{}). 103 | 104 | parse_config([], Config) -> Config; 105 | parse_config([escaped_forward_slashes|Rest], Config) -> 106 | parse_config(Rest, Config#config{escaped_forward_slashes=true}); 107 | parse_config([escaped_strings|Rest], Config) -> 108 | parse_config(Rest, Config#config{escaped_strings=true}); 109 | parse_config([unescaped_jsonp|Rest], Config) -> 110 | parse_config(Rest, Config#config{unescaped_jsonp=true}); 111 | parse_config([dirty_strings|Rest], Config) -> 112 | parse_config(Rest, Config#config{dirty_strings=true}); 113 | parse_config([multi_term|Rest], Config) -> 114 | parse_config(Rest, Config#config{multi_term=true}); 115 | parse_config([return_tail|Rest], Config) -> 116 | parse_config(Rest, Config#config{return_tail=true}); 117 | %% retained for backwards compat, now does nothing however 118 | parse_config([repeat_keys|Rest], Config) -> 119 | parse_config(Rest, Config); 120 | parse_config([uescape|Rest], Config) -> 121 | parse_config(Rest, Config#config{uescape=true}); 122 | parse_config([strict|Rest], Config) -> 123 | parse_config(Rest, Config#config{ 124 | strict_comments=true, 125 | strict_commas=true, 126 | strict_utf8=true, 127 | strict_single_quotes=true, 128 | strict_escapes=true, 129 | strict_control_codes=true 130 | }); 131 | parse_config([{strict, Strict}|Rest], Config) -> 132 | parse_strict(Strict, Rest, Config); 133 | parse_config([stream|Rest], Config) -> 134 | parse_config(Rest, Config#config{stream=true}); 135 | parse_config([{error_handler, ErrorHandler}|Rest] = Options, Config) when is_function(ErrorHandler, 3) -> 136 | case Config#config.error_handler of 137 | false -> parse_config(Rest, Config#config{error_handler=ErrorHandler}) 138 | ; _ -> erlang:error(badarg, [Options, Config]) 139 | end; 140 | parse_config([{incomplete_handler, IncompleteHandler}|Rest] = Options, Config) when is_function(IncompleteHandler, 3) -> 141 | case Config#config.incomplete_handler of 142 | false -> parse_config(Rest, Config#config{incomplete_handler=IncompleteHandler}) 143 | ; _ -> erlang:error(badarg, [Options, Config]) 144 | end; 145 | parse_config(_Options, _Config) -> erlang:error(badarg). 146 | 147 | 148 | parse_strict([], Rest, Config) -> parse_config(Rest, Config); 149 | parse_strict([comments|Strict], Rest, Config) -> 150 | parse_strict(Strict, Rest, Config#config{strict_comments=true}); 151 | parse_strict([trailing_commas|Strict], Rest, Config) -> 152 | parse_strict(Strict, Rest, Config#config{strict_commas=true}); 153 | parse_strict([utf8|Strict], Rest, Config) -> 154 | parse_strict(Strict, Rest, Config#config{strict_utf8=true}); 155 | parse_strict([single_quotes|Strict], Rest, Config) -> 156 | parse_strict(Strict, Rest, Config#config{strict_single_quotes=true}); 157 | parse_strict([escapes|Strict], Rest, Config) -> 158 | parse_strict(Strict, Rest, Config#config{strict_escapes=true}); 159 | parse_strict([control_codes|Strict], Rest, Config) -> 160 | parse_strict(Strict, Rest, Config#config{strict_control_codes=true}); 161 | parse_strict(_Strict, _Rest, _Config) -> 162 | erlang:error(badarg). 163 | 164 | 165 | 166 | -spec config_to_list(Config::config()) -> options(). 167 | 168 | config_to_list(Config) -> 169 | reduce_config(lists:map( 170 | fun ({error_handler, F}) -> {error_handler, F}; 171 | ({incomplete_handler, F}) -> {incomplete_handler, F}; 172 | ({Key, true}) -> Key 173 | end, 174 | lists:filter( 175 | fun({_, false}) -> false; (_) -> true end, 176 | lists:zip(record_info(fields, config), tl(tuple_to_list(Config))) 177 | ) 178 | )). 179 | 180 | 181 | reduce_config(Input) -> reduce_config(Input, [], []). 182 | 183 | reduce_config([], Output, Strict) -> 184 | case length(Strict) of 185 | 0 -> lists:reverse(Output); 186 | 5 -> lists:reverse(Output) ++ [strict]; 187 | _ -> lists:reverse(Output) ++ [{strict, lists:reverse(Strict)}] 188 | end; 189 | reduce_config([strict_comments|Input], Output, Strict) -> 190 | reduce_config(Input, Output, [comments] ++ Strict); 191 | reduce_config([strict_utf8|Input], Output, Strict) -> 192 | reduce_config(Input, Output, [utf8] ++ Strict); 193 | reduce_config([strict_single_quotes|Input], Output, Strict) -> 194 | reduce_config(Input, Output, [single_quotes] ++ Strict); 195 | reduce_config([strict_escapes|Input], Output, Strict) -> 196 | reduce_config(Input, Output, [escapes] ++ Strict); 197 | reduce_config([strict_control_codes|Input], Output, Strict) -> 198 | reduce_config(Input, Output, [control_codes] ++ Strict); 199 | reduce_config([Else|Input], Output, Strict) -> 200 | reduce_config(Input, [Else] ++ Output, Strict). 201 | 202 | 203 | -spec valid_flags() -> [valid_flag(), ...]. 204 | 205 | valid_flags() -> 206 | [ 207 | escaped_forward_slashes, 208 | escaped_strings, 209 | unescaped_jsonp, 210 | dirty_strings, 211 | multi_term, 212 | return_tail, 213 | repeat_keys, 214 | strict, 215 | stream, 216 | uescape, 217 | error_handler, 218 | incomplete_handler 219 | ]. 220 | 221 | 222 | -spec extract_config(Config::options()) -> options(). 223 | 224 | extract_config(Config) -> 225 | extract_parser_config(Config, []). 226 | 227 | extract_parser_config([], Acc) -> Acc; 228 | extract_parser_config([{K,V}|Rest], Acc) -> 229 | case lists:member(K, valid_flags()) of 230 | true -> extract_parser_config(Rest, [{K,V}] ++ Acc) 231 | ; false -> extract_parser_config(Rest, Acc) 232 | end; 233 | extract_parser_config([K|Rest], Acc) -> 234 | case lists:member(K, valid_flags()) of 235 | true -> extract_parser_config(Rest, [K] ++ Acc) 236 | ; false -> extract_parser_config(Rest, Acc) 237 | end. 238 | 239 | 240 | %% eunit tests 241 | -ifdef(TEST). 242 | -include_lib("eunit/include/eunit.hrl"). 243 | 244 | 245 | config_test_() -> 246 | [ 247 | {"all flags", 248 | ?_assertEqual( 249 | #config{escaped_forward_slashes = true, 250 | escaped_strings = true, 251 | unescaped_jsonp = true, 252 | dirty_strings = true, 253 | multi_term = true, 254 | return_tail = true, 255 | strict_comments = true, 256 | strict_commas = true, 257 | strict_utf8 = true, 258 | strict_single_quotes = true, 259 | strict_escapes = true, 260 | strict_control_codes = true, 261 | stream = true, 262 | uescape = true 263 | }, 264 | parse_config([dirty_strings, 265 | escaped_forward_slashes, 266 | escaped_strings, 267 | unescaped_jsonp, 268 | multi_term, 269 | return_tail, 270 | repeat_keys, 271 | strict, 272 | stream, 273 | uescape 274 | ]) 275 | ) 276 | }, 277 | {"strict flag", 278 | ?_assertEqual( 279 | #config{strict_comments = true, 280 | strict_commas = true, 281 | strict_utf8 = true, 282 | strict_single_quotes = true, 283 | strict_escapes = true, 284 | strict_control_codes = true 285 | }, 286 | parse_config([strict]) 287 | ) 288 | }, 289 | {"strict selective", 290 | ?_assertEqual( 291 | #config{strict_comments = true}, 292 | parse_config([{strict, [comments]}]) 293 | ) 294 | }, 295 | {"strict expanded", 296 | ?_assertEqual( 297 | #config{strict_comments = true, 298 | strict_utf8 = true, 299 | strict_single_quotes = true, 300 | strict_escapes = true 301 | }, 302 | parse_config([{strict, [comments, utf8, single_quotes, escapes]}]) 303 | ) 304 | }, 305 | {"error_handler flag", ?_assertEqual( 306 | #config{error_handler=fun ?MODULE:fake_error_handler/3}, 307 | parse_config([{error_handler, fun ?MODULE:fake_error_handler/3}]) 308 | )}, 309 | {"two error_handlers defined", ?_assertError( 310 | badarg, 311 | parse_config([ 312 | {error_handler, fun(_, _, _) -> true end}, 313 | {error_handler, fun(_, _, _) -> false end} 314 | ]) 315 | )}, 316 | {"incomplete_handler flag", ?_assertEqual( 317 | #config{incomplete_handler=fun ?MODULE:fake_error_handler/3}, 318 | parse_config([{incomplete_handler, fun ?MODULE:fake_error_handler/3}]) 319 | )}, 320 | {"two incomplete_handlers defined", ?_assertError( 321 | badarg, 322 | parse_config([ 323 | {incomplete_handler, fun(_, _, _) -> true end}, 324 | {incomplete_handler, fun(_, _, _) -> false end} 325 | ]) 326 | )}, 327 | {"bad option flag", ?_assertError(badarg, parse_config([this_flag_does_not_exist]))} 328 | ]. 329 | 330 | 331 | config_to_list_test_() -> 332 | [ 333 | {"empty config", ?_assertEqual( 334 | [], 335 | config_to_list(#config{}) 336 | )}, 337 | {"all flags", ?_assertEqual( 338 | [dirty_strings, 339 | escaped_forward_slashes, 340 | escaped_strings, 341 | multi_term, 342 | stream, 343 | uescape, 344 | unescaped_jsonp, 345 | strict 346 | ], 347 | config_to_list( 348 | #config{escaped_forward_slashes = true, 349 | escaped_strings = true, 350 | unescaped_jsonp = true, 351 | dirty_strings = true, 352 | multi_term = true, 353 | strict_comments = true, 354 | strict_utf8 = true, 355 | strict_single_quotes = true, 356 | strict_escapes = true, 357 | strict_control_codes = true, 358 | stream = true, 359 | uescape = true 360 | } 361 | ) 362 | )}, 363 | {"single strict", ?_assertEqual( 364 | [{strict, [comments]}], 365 | config_to_list(#config{strict_comments = true}) 366 | )}, 367 | {"multiple strict", ?_assertEqual( 368 | [{strict, [utf8, single_quotes, escapes]}], 369 | config_to_list(#config{strict_utf8 = true, strict_single_quotes = true, strict_escapes = true}) 370 | )}, 371 | {"all strict", ?_assertEqual( 372 | [strict], 373 | config_to_list(#config{strict_comments = true, 374 | strict_utf8 = true, 375 | strict_single_quotes = true, 376 | strict_escapes = true, 377 | strict_control_codes = true}) 378 | )}, 379 | {"error handler", ?_assertEqual( 380 | [{error_handler, fun ?MODULE:fake_error_handler/3}], 381 | config_to_list(#config{error_handler=fun ?MODULE:fake_error_handler/3}) 382 | )}, 383 | {"incomplete handler", ?_assertEqual( 384 | [{incomplete_handler, fun ?MODULE:fake_error_handler/3}], 385 | config_to_list(#config{incomplete_handler=fun ?MODULE:fake_error_handler/3}) 386 | )} 387 | ]. 388 | 389 | 390 | fake_error_handler(_, _, _) -> ok. 391 | 392 | 393 | -endif. 394 | -------------------------------------------------------------------------------- /src/jsx_config.hrl: -------------------------------------------------------------------------------- 1 | -record(config, { 2 | dirty_strings = false :: boolean(), 3 | escaped_forward_slashes = false :: boolean(), 4 | escaped_strings = false :: boolean(), 5 | multi_term = false :: boolean(), 6 | strict_comments = false :: boolean(), 7 | strict_commas = false :: boolean(), 8 | strict_utf8 = false :: boolean(), 9 | strict_single_quotes = false :: boolean(), 10 | strict_escapes = false :: boolean(), 11 | strict_control_codes = false :: boolean(), 12 | stream = false :: boolean(), 13 | return_tail = false :: boolean(), 14 | uescape = false :: boolean(), 15 | unescaped_jsonp = false :: boolean(), 16 | error_handler = false :: false | jsx_config:handler(), 17 | incomplete_handler = false :: false | jsx_config:handler() 18 | }). 19 | -------------------------------------------------------------------------------- /src/jsx_consult.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2015 Alisdair Sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_consult). 25 | 26 | -export([consult/2]). 27 | -export([init/1, reset/1, handle_event/2]). 28 | 29 | 30 | -record(config, { 31 | labels = binary, 32 | return_maps = false 33 | }). 34 | 35 | -type config() :: proplists:proplist(). 36 | -export_type([config/0]). 37 | 38 | -type json_value() :: list(json_value()) 39 | | map() 40 | | true 41 | | false 42 | | null 43 | | integer() 44 | | float() 45 | | binary(). 46 | -export_type([json_value/0]). 47 | 48 | opts(Opts) -> [return_maps, multi_term] ++ Opts. 49 | 50 | -spec consult(File::file:name_all(), Config::config()) -> [json_value()]. 51 | 52 | consult(File, Config) when is_list(Config) -> 53 | case file:read_file(File) of 54 | {ok, Bin} -> 55 | {Final, _, _} = (jsx:decoder( 56 | ?MODULE, 57 | opts(Config), 58 | jsx_config:extract_config(opts(Config)) 59 | ))(Bin), 60 | lists:reverse(Final); 61 | {error, _} -> erlang:error(badarg) 62 | end. 63 | 64 | 65 | -type state() :: {[], config(), {list(), #config{}}}. 66 | -spec init(Config::config()) -> state(). 67 | 68 | init(Config) -> {[], Config, jsx_to_term:start_term(Config)}. 69 | 70 | 71 | -spec reset(State::state()) -> state(). 72 | 73 | reset({Acc, Config, _}) -> {Acc, Config, jsx_to_term:start_term(Config)}. 74 | 75 | 76 | -spec handle_event(Event::any(), State::state()) -> state(). 77 | 78 | handle_event(end_json, {Acc, Config, State}) -> 79 | {[jsx_to_term:get_value(State)] ++ Acc, Config, State}; 80 | handle_event(Event, {Acc, Config, State}) -> 81 | {Acc, Config, jsx_to_term:handle_event(Event, State)}. 82 | -------------------------------------------------------------------------------- /src/jsx_encoder.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 Alisdair Sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_encoder). 25 | 26 | -export([encoder/3, encode/1, encode/2]). 27 | 28 | -spec encoder(Handler::module(), State::any(), Config::jsx_config:options()) -> jsx:encoder(). 29 | 30 | encoder(Handler, State, Config) -> 31 | Parser = jsx:parser(Handler, State, Config), 32 | fun(Term) -> Parser(encode(Term) ++ [end_json]) end. 33 | 34 | 35 | -spec encode(Term::any()) -> [any(), ...]. 36 | 37 | encode(Term) -> encode(Term, ?MODULE). 38 | 39 | 40 | -spec encode(Term::any(), EntryPoint::module()) -> [any(), ...]. 41 | 42 | encode(Map, _EntryPoint) when is_map(Map), map_size(Map) < 1 -> 43 | [start_object, end_object]; 44 | encode(Term, EntryPoint) when is_map(Term) -> 45 | [start_object] ++ unpack(Term, EntryPoint); 46 | encode(Term, EntryPoint) -> encode_(Term, EntryPoint). 47 | 48 | encode_([], _EntryPoint) -> [start_array, end_array]; 49 | encode_([{}], _EntryPoint) -> [start_object, end_object]; 50 | 51 | %% datetime special case 52 | encode_([{{_,_,_},{_,_,_}} = DateTime|Rest], EntryPoint) -> 53 | [start_array] ++ [DateTime] ++ unhitch(Rest, EntryPoint); 54 | encode_([{_, _}|_] = Term, EntryPoint) -> 55 | [start_object] ++ unzip(Term, EntryPoint); 56 | encode_(Term, EntryPoint) when is_list(Term) -> 57 | [start_array] ++ unhitch(Term, EntryPoint); 58 | 59 | encode_(Else, _EntryPoint) -> [Else]. 60 | 61 | 62 | unzip([{K, V}|Rest], EntryPoint) when is_integer(K); is_binary(K); is_atom(K) -> 63 | [K] ++ EntryPoint:encode(V, EntryPoint) ++ unzip(Rest, EntryPoint); 64 | unzip([], _) -> [end_object]; 65 | unzip(_, _) -> erlang:error(badarg). 66 | 67 | 68 | unhitch([V|Rest], EntryPoint) -> 69 | EntryPoint:encode(V, EntryPoint) ++ unhitch(Rest, EntryPoint); 70 | unhitch([], _) -> [end_array]. 71 | 72 | unpack(Map, EntryPoint) -> unpack(Map, maps:keys(Map), EntryPoint). 73 | 74 | unpack(Map, [K|Rest], EntryPoint) when is_integer(K); is_binary(K); is_atom(K) -> 75 | [K] ++ EntryPoint:encode(maps:get(K, Map), EntryPoint) ++ unpack(Map, Rest, EntryPoint); 76 | unpack(_, [], _) -> [end_object]. 77 | 78 | -ifdef(TEST). 79 | -include_lib("eunit/include/eunit.hrl"). 80 | 81 | 82 | parser(Term, Opts) -> (jsx:parser(jsx, [], Opts))(Term). 83 | 84 | 85 | error_test_() -> 86 | [ 87 | {"value error", ?_assertError(badarg, parser(self(), []))}, 88 | {"string error", ?_assertError(badarg, parser(<<239, 191, 191>>, [strict]))} 89 | ]. 90 | 91 | custom_error_handler_test_() -> 92 | Error = fun(Term, {_, State, _, _}, _) -> {State, Term} end, 93 | [ 94 | {"value error", ?_assertEqual( 95 | {value, [self()]}, 96 | parser(self(), [{error_handler, Error}]) 97 | )}, 98 | {"string error", ?_assertEqual( 99 | {value, [{string, <<237, 160, 128>>}]}, 100 | parser(<<237, 160, 128>>, [{error_handler, Error}, strict]) 101 | )} 102 | ]. 103 | 104 | improper_lists_test_() -> 105 | [ 106 | {"improper proplist", ?_assertError( 107 | badarg, 108 | encode([{<<"key">>, <<"value">>}, false]) 109 | )}, 110 | {"improper list", ?_assertError( 111 | badarg, 112 | encode([{literal, true}, false, null]) 113 | )} 114 | ]. 115 | 116 | -endif. 117 | -------------------------------------------------------------------------------- /src/jsx_parser.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 Alisdair Sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_parser). 25 | 26 | -export([parser/3, resume/5]). 27 | -export([init/1, handle_event/2]). 28 | 29 | 30 | -spec parser(Handler::module(), State::any(), Config::jsx_config:options()) -> jsx:parser(). 31 | 32 | parser(Handler, State, Config) -> 33 | fun(Tokens) -> value(Tokens, {Handler, Handler:init(State)}, [], jsx_config:parse_config(Config)) end. 34 | 35 | 36 | %% resume allows continuation from interrupted decoding without having to explicitly export 37 | %% all states 38 | -spec resume( 39 | Rest::jsx:token(), 40 | State::atom(), 41 | Handler::module(), 42 | Stack::list(atom()), 43 | Config::jsx:config() 44 | ) -> jsx:parser() | {incomplete, jsx:parser()}. 45 | 46 | resume(Rest, State, Handler, Stack, Config) -> 47 | case State of 48 | value -> value(Rest, Handler, Stack, Config); 49 | object -> object(Rest, Handler, Stack, Config); 50 | array -> array(Rest, Handler, Stack, Config); 51 | maybe_done -> maybe_done(Rest, Handler, Stack, Config); 52 | done -> done(Rest, Handler, Stack, Config) 53 | end. 54 | 55 | 56 | -include("jsx_config.hrl"). 57 | 58 | 59 | %% error, incomplete and event macros 60 | -ifndef(error). 61 | -define(error(State, Terms, Handler, Stack, Config), 62 | case Config#config.error_handler of 63 | false -> erlang:error(badarg); 64 | F -> F(Terms, {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)) 65 | end 66 | 67 | ). 68 | -endif. 69 | 70 | 71 | incomplete(State, Handler, Stack, Config=#config{stream=false}) -> 72 | ?error(State, [], Handler, Stack, Config); 73 | incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) -> 74 | {incomplete, fun(End) when End == end_stream; End == end_json -> 75 | case resume([end_json], State, Handler, Stack, Config) of 76 | {incomplete, _} -> ?error(State, [], Handler, Stack, Config); 77 | Else -> Else 78 | end; 79 | (Tokens) -> 80 | resume(Tokens, State, Handler, Stack, Config) 81 | end 82 | }; 83 | incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) -> 84 | F([], {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)). 85 | 86 | 87 | handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}. 88 | 89 | 90 | value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> 91 | try clean_string(String, Config) of Clean -> 92 | maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config) 93 | catch error:badarg -> 94 | ?error(value, [{string, String}|Tokens], Handler, Stack, Config) 95 | end; 96 | value([true|Tokens], Handler, Stack, Config) -> 97 | maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config); 98 | value([false|Tokens], Handler, Stack, Config) -> 99 | maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config); 100 | value([null|Tokens], Handler, Stack, Config) -> 101 | maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config); 102 | value([start_object|Tokens], Handler, Stack, Config) -> 103 | object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); 104 | value([start_array|Tokens], Handler, Stack, Config) -> 105 | array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); 106 | value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) -> 107 | maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config); 108 | value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> 109 | maybe_done(Tokens, handle_event({float, Number}, Handler, Config), Stack, Config); 110 | value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) -> 111 | value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config); 112 | value([{_,_,_}=Timestamp|Tokens], Handler, Stack, Config) -> 113 | {{Year, Month, Day}, {Hour, Min, Sec}} = calendar:now_to_datetime( 114 | Timestamp), 115 | value([{string, unicode:characters_to_binary(io_lib:format( 116 | "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0BZ", 117 | [Year, Month, Day, Hour, Min, Sec] 118 | ))}|Tokens], 119 | Handler, 120 | Stack, 121 | Config 122 | ); 123 | value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config) 124 | when is_integer(Year), is_integer(Month), is_integer(Day), is_integer(Hour), is_integer(Min), is_integer(Sec) -> 125 | value([{string, unicode:characters_to_binary(io_lib:format( 126 | "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0BZ", 127 | [Year, Month, Day, Hour, Min, Sec] 128 | ))}|Tokens], 129 | Handler, 130 | Stack, 131 | Config 132 | ); 133 | value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config) 134 | when is_integer(Year), is_integer(Month), is_integer(Day), is_integer(Hour), is_integer(Min), is_float(Sec) -> 135 | value([{string, unicode:characters_to_binary(io_lib:format( 136 | "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~9.6.0fZ", 137 | [Year, Month, Day, Hour, Min, Sec] 138 | ))}|Tokens], 139 | Handler, 140 | Stack, 141 | Config 142 | ); 143 | value([{literal, Value}|Tokens], Handler, Stack, Config) 144 | when Value == true; Value == false; Value == null -> 145 | value([Value] ++ Tokens, Handler, Stack, Config); 146 | value([{integer, Value}|Tokens], Handler, Stack, Config) 147 | when is_integer(Value) -> 148 | value([Value] ++ Tokens, Handler, Stack, Config); 149 | value([{float, Value}|Tokens], Handler, Stack, Config) 150 | when is_float(Value) -> 151 | value([Value] ++ Tokens, Handler, Stack, Config); 152 | value([{string, Value}|Tokens], Handler, Stack, Config) 153 | when is_binary(Value); is_atom(Value) -> 154 | value([Value] ++ Tokens, Handler, Stack, Config); 155 | value([{number, Value}|Tokens], Handler, Stack, Config) 156 | when is_float(Value); is_integer(Value) -> 157 | value([Value] ++ Tokens, Handler, Stack, Config); 158 | value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> 159 | value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); 160 | value([], Handler, Stack, Config) -> 161 | incomplete(value, Handler, Stack, Config); 162 | value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> 163 | ?error(value, BadTokens, Handler, Stack, Config); 164 | value(Token, Handler, Stack, Config) -> 165 | value([Token], Handler, Stack, Config). 166 | 167 | 168 | object([end_object|Tokens], Handler, [object|Stack], Config) -> 169 | maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config); 170 | object([{key, Key}|Tokens], Handler, Stack, Config) 171 | when is_atom(Key); is_binary(Key); is_integer(Key) -> 172 | object([Key|Tokens], Handler, Stack, Config); 173 | object([Key|Tokens], Handler, [object|Stack], Config) 174 | when is_atom(Key); is_binary(Key); is_integer(Key) -> 175 | try clean_string(fix_key(Key), Config) 176 | of K -> 177 | value( 178 | Tokens, 179 | handle_event({key, K}, Handler, Config), 180 | [object|Stack], 181 | Config 182 | ) 183 | catch error:badarg -> 184 | ?error(object, [{string, Key}|Tokens], Handler, Stack, Config) 185 | end; 186 | object([], Handler, Stack, Config) -> 187 | incomplete(object, Handler, Stack, Config); 188 | object(Token, Handler, Stack, Config) -> 189 | object([Token], Handler, Stack, Config). 190 | 191 | 192 | array([end_array|Tokens], Handler, [array|Stack], Config) -> 193 | maybe_done(Tokens, handle_event(end_array, Handler, Config), Stack, Config); 194 | array([], Handler, Stack, Config) -> 195 | incomplete(array, Handler, Stack, Config); 196 | array(Tokens, Handler, Stack, Config) when is_list(Tokens) -> 197 | value(Tokens, Handler, Stack, Config); 198 | array(Token, Handler, Stack, Config) -> 199 | array([Token], Handler, Stack, Config). 200 | 201 | 202 | maybe_done([end_json], Handler, [], Config) -> 203 | done([end_json], Handler, [], Config); 204 | maybe_done(Tokens, Handler, [object|_] = Stack, Config) when is_list(Tokens) -> 205 | object(Tokens, Handler, Stack, Config); 206 | maybe_done(Tokens, Handler, [array|_] = Stack, Config) when is_list(Tokens) -> 207 | array(Tokens, Handler, Stack, Config); 208 | maybe_done([], Handler, Stack, Config) -> 209 | incomplete(maybe_done, Handler, Stack, Config); 210 | maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> 211 | ?error(maybe_done, BadTokens, Handler, Stack, Config); 212 | maybe_done(Token, Handler, Stack, Config) -> 213 | maybe_done([Token], Handler, Stack, Config). 214 | 215 | 216 | done([], Handler, [], Config=#config{stream=true}) -> 217 | incomplete(done, Handler, [], Config); 218 | done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] -> 219 | {_, State} = handle_event(end_json, Handler, Config), 220 | State; 221 | done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> 222 | ?error(done, BadTokens, Handler, Stack, Config); 223 | done(Token, Handler, Stack, Config) -> 224 | done([Token], Handler, Stack, Config). 225 | 226 | 227 | fix_key(Key) when is_atom(Key) -> atom_to_binary(Key, utf8); 228 | fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key)); 229 | fix_key(Key) when is_binary(Key) -> Key. 230 | 231 | 232 | clean_string(Bin, #config{dirty_strings=true}) -> Bin; 233 | clean_string(Bin, Config) -> clean(Bin, [], Config). 234 | 235 | 236 | %% unroll the control characters 237 | clean(<<0, Rest/binary>>, Acc, Config) -> 238 | clean(Rest, [Acc, maybe_replace(0, Config)], Config); 239 | clean(<<1, Rest/binary>>, Acc, Config) -> 240 | clean(Rest, [Acc, maybe_replace(1, Config)], Config); 241 | clean(<<2, Rest/binary>>, Acc, Config) -> 242 | clean(Rest, [Acc, maybe_replace(2, Config)], Config); 243 | clean(<<3, Rest/binary>>, Acc, Config) -> 244 | clean(Rest, [Acc, maybe_replace(3, Config)], Config); 245 | clean(<<4, Rest/binary>>, Acc, Config) -> 246 | clean(Rest, [Acc, maybe_replace(4, Config)], Config); 247 | clean(<<5, Rest/binary>>, Acc, Config) -> 248 | clean(Rest, [Acc, maybe_replace(5, Config)], Config); 249 | clean(<<6, Rest/binary>>, Acc, Config) -> 250 | clean(Rest, [Acc, maybe_replace(6, Config)], Config); 251 | clean(<<7, Rest/binary>>, Acc, Config) -> 252 | clean(Rest, [Acc, maybe_replace(7, Config)], Config); 253 | clean(<<8, Rest/binary>>, Acc, Config) -> 254 | clean(Rest, [Acc, maybe_replace(8, Config)], Config); 255 | clean(<<9, Rest/binary>>, Acc, Config) -> 256 | clean(Rest, [Acc, maybe_replace(9, Config)], Config); 257 | clean(<<10, Rest/binary>>, Acc, Config) -> 258 | clean(Rest, [Acc, maybe_replace(10, Config)], Config); 259 | clean(<<11, Rest/binary>>, Acc, Config) -> 260 | clean(Rest, [Acc, maybe_replace(11, Config)], Config); 261 | clean(<<12, Rest/binary>>, Acc, Config) -> 262 | clean(Rest, [Acc, maybe_replace(12, Config)], Config); 263 | clean(<<13, Rest/binary>>, Acc, Config) -> 264 | clean(Rest, [Acc, maybe_replace(13, Config)], Config); 265 | clean(<<14, Rest/binary>>, Acc, Config) -> 266 | clean(Rest, [Acc, maybe_replace(14, Config)], Config); 267 | clean(<<15, Rest/binary>>, Acc, Config) -> 268 | clean(Rest, [Acc, maybe_replace(15, Config)], Config); 269 | clean(<<16, Rest/binary>>, Acc, Config) -> 270 | clean(Rest, [Acc, maybe_replace(16, Config)], Config); 271 | clean(<<17, Rest/binary>>, Acc, Config) -> 272 | clean(Rest, [Acc, maybe_replace(17, Config)], Config); 273 | clean(<<18, Rest/binary>>, Acc, Config) -> 274 | clean(Rest, [Acc, maybe_replace(18, Config)], Config); 275 | clean(<<19, Rest/binary>>, Acc, Config) -> 276 | clean(Rest, [Acc, maybe_replace(19, Config)], Config); 277 | clean(<<20, Rest/binary>>, Acc, Config) -> 278 | clean(Rest, [Acc, maybe_replace(20, Config)], Config); 279 | clean(<<21, Rest/binary>>, Acc, Config) -> 280 | clean(Rest, [Acc, maybe_replace(21, Config)], Config); 281 | clean(<<22, Rest/binary>>, Acc, Config) -> 282 | clean(Rest, [Acc, maybe_replace(22, Config)], Config); 283 | clean(<<23, Rest/binary>>, Acc, Config) -> 284 | clean(Rest, [Acc, maybe_replace(23, Config)], Config); 285 | clean(<<24, Rest/binary>>, Acc, Config) -> 286 | clean(Rest, [Acc, maybe_replace(24, Config)], Config); 287 | clean(<<25, Rest/binary>>, Acc, Config) -> 288 | clean(Rest, [Acc, maybe_replace(25, Config)], Config); 289 | clean(<<26, Rest/binary>>, Acc, Config) -> 290 | clean(Rest, [Acc, maybe_replace(26, Config)], Config); 291 | clean(<<27, Rest/binary>>, Acc, Config) -> 292 | clean(Rest, [Acc, maybe_replace(27, Config)], Config); 293 | clean(<<28, Rest/binary>>, Acc, Config) -> 294 | clean(Rest, [Acc, maybe_replace(28, Config)], Config); 295 | clean(<<29, Rest/binary>>, Acc, Config) -> 296 | clean(Rest, [Acc, maybe_replace(29, Config)], Config); 297 | clean(<<30, Rest/binary>>, Acc, Config) -> 298 | clean(Rest, [Acc, maybe_replace(30, Config)], Config); 299 | clean(<<31, Rest/binary>>, Acc, Config) -> 300 | clean(Rest, [Acc, maybe_replace(31, Config)], Config); 301 | clean(<<34, Rest/binary>>, Acc, Config) -> 302 | clean(Rest, [Acc, maybe_replace(34, Config)], Config); 303 | clean(<<47, Rest/binary>>, Acc, Config) -> 304 | clean(Rest, [Acc, maybe_replace(47, Config)], Config); 305 | clean(<<92, Rest/binary>>, Acc, Config) -> 306 | clean(Rest, [Acc, maybe_replace(92, Config)], Config); 307 | clean(<> = Bin, Acc, Config=#config{uescape=true}) -> 308 | case X of 309 | X when X < 16#80 -> start_count(Bin, Acc, Config); 310 | _ -> clean(Rest, [Acc, json_escape_sequence(X)], Config) 311 | end; 312 | %% u+2028 313 | clean(<<226, 128, 168, Rest/binary>>, Acc, Config) -> 314 | clean(Rest, [Acc, maybe_replace(16#2028, Config)], Config); 315 | %% u+2029 316 | clean(<<226, 128, 169, Rest/binary>>, Acc, Config) -> 317 | clean(Rest, [Acc, maybe_replace(16#2029, Config)], Config); 318 | clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> start_count(Bin, Acc, Config); 319 | %% surrogates 320 | clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> 321 | clean(Rest, [Acc, maybe_replace(surrogate, Config)], Config); 322 | %% overlong encodings and missing continuations of a 2 byte sequence 323 | clean(<>, Acc, Config) when X >= 192, X =< 223 -> 324 | clean(strip_continuations(Rest, 1), [Acc, maybe_replace(badutf, Config)], Config); 325 | %% overlong encodings and missing continuations of a 3 byte sequence 326 | clean(<>, Acc, Config) when X >= 224, X =< 239 -> 327 | clean(strip_continuations(Rest, 2), [Acc, maybe_replace(badutf, Config)], Config); 328 | %% overlong encodings and missing continuations of a 4 byte sequence 329 | clean(<>, Acc, Config) when X >= 240, X =< 247 -> 330 | clean(strip_continuations(Rest, 3), [Acc, maybe_replace(badutf, Config)], Config); 331 | clean(<<_, Rest/binary>>, Acc, Config) -> 332 | clean(Rest, [Acc, maybe_replace(badutf, Config)], Config); 333 | clean(<<>>, Acc, _) -> iolist_to_binary(Acc). 334 | 335 | 336 | start_count(Bin, Acc, Config) -> 337 | Size = count(Bin, 0, Config), 338 | <> = Bin, 339 | clean(Rest, [Acc, Clean], Config). 340 | 341 | 342 | %% again, unrolling ascii makes a huge difference. sadly 343 | count(<<0, _/binary>>, N, _) -> N; 344 | count(<<1, _/binary>>, N, _) -> N; 345 | count(<<2, _/binary>>, N, _) -> N; 346 | count(<<3, _/binary>>, N, _) -> N; 347 | count(<<4, _/binary>>, N, _) -> N; 348 | count(<<5, _/binary>>, N, _) -> N; 349 | count(<<6, _/binary>>, N, _) -> N; 350 | count(<<7, _/binary>>, N, _) -> N; 351 | count(<<8, _/binary>>, N, _) -> N; 352 | count(<<9, _/binary>>, N, _) -> N; 353 | count(<<10, _/binary>>, N, _) -> N; 354 | count(<<11, _/binary>>, N, _) -> N; 355 | count(<<12, _/binary>>, N, _) -> N; 356 | count(<<13, _/binary>>, N, _) -> N; 357 | count(<<14, _/binary>>, N, _) -> N; 358 | count(<<15, _/binary>>, N, _) -> N; 359 | count(<<16, _/binary>>, N, _) -> N; 360 | count(<<17, _/binary>>, N, _) -> N; 361 | count(<<18, _/binary>>, N, _) -> N; 362 | count(<<19, _/binary>>, N, _) -> N; 363 | count(<<20, _/binary>>, N, _) -> N; 364 | count(<<21, _/binary>>, N, _) -> N; 365 | count(<<22, _/binary>>, N, _) -> N; 366 | count(<<23, _/binary>>, N, _) -> N; 367 | count(<<24, _/binary>>, N, _) -> N; 368 | count(<<25, _/binary>>, N, _) -> N; 369 | count(<<26, _/binary>>, N, _) -> N; 370 | count(<<27, _/binary>>, N, _) -> N; 371 | count(<<28, _/binary>>, N, _) -> N; 372 | count(<<29, _/binary>>, N, _) -> N; 373 | count(<<30, _/binary>>, N, _) -> N; 374 | count(<<31, _/binary>>, N, _) -> N; 375 | count(<<32, Rest/binary>>, N, Config) -> 376 | count(Rest, N + 1, Config); 377 | count(<<33, Rest/binary>>, N, Config) -> 378 | count(Rest, N + 1, Config); 379 | count(<<34, _/binary>>, N, _) -> N; 380 | count(<<35, Rest/binary>>, N, Config) -> 381 | count(Rest, N + 1, Config); 382 | count(<<36, Rest/binary>>, N, Config) -> 383 | count(Rest, N + 1, Config); 384 | count(<<37, Rest/binary>>, N, Config) -> 385 | count(Rest, N + 1, Config); 386 | count(<<38, Rest/binary>>, N, Config) -> 387 | count(Rest, N + 1, Config); 388 | count(<<39, Rest/binary>>, N, Config) -> 389 | count(Rest, N + 1, Config); 390 | count(<<40, Rest/binary>>, N, Config) -> 391 | count(Rest, N + 1, Config); 392 | count(<<41, Rest/binary>>, N, Config) -> 393 | count(Rest, N + 1, Config); 394 | count(<<42, Rest/binary>>, N, Config) -> 395 | count(Rest, N + 1, Config); 396 | count(<<43, Rest/binary>>, N, Config) -> 397 | count(Rest, N + 1, Config); 398 | count(<<44, Rest/binary>>, N, Config) -> 399 | count(Rest, N + 1, Config); 400 | count(<<45, Rest/binary>>, N, Config) -> 401 | count(Rest, N + 1, Config); 402 | count(<<46, Rest/binary>>, N, Config) -> 403 | count(Rest, N + 1, Config); 404 | count(<<47, _/binary>>, N, _) -> N; 405 | count(<<48, Rest/binary>>, N, Config) -> 406 | count(Rest, N + 1, Config); 407 | count(<<49, Rest/binary>>, N, Config) -> 408 | count(Rest, N + 1, Config); 409 | count(<<50, Rest/binary>>, N, Config) -> 410 | count(Rest, N + 1, Config); 411 | count(<<51, Rest/binary>>, N, Config) -> 412 | count(Rest, N + 1, Config); 413 | count(<<52, Rest/binary>>, N, Config) -> 414 | count(Rest, N + 1, Config); 415 | count(<<53, Rest/binary>>, N, Config) -> 416 | count(Rest, N + 1, Config); 417 | count(<<54, Rest/binary>>, N, Config) -> 418 | count(Rest, N + 1, Config); 419 | count(<<55, Rest/binary>>, N, Config) -> 420 | count(Rest, N + 1, Config); 421 | count(<<56, Rest/binary>>, N, Config) -> 422 | count(Rest, N + 1, Config); 423 | count(<<57, Rest/binary>>, N, Config) -> 424 | count(Rest, N + 1, Config); 425 | count(<<58, Rest/binary>>, N, Config) -> 426 | count(Rest, N + 1, Config); 427 | count(<<59, Rest/binary>>, N, Config) -> 428 | count(Rest, N + 1, Config); 429 | count(<<60, Rest/binary>>, N, Config) -> 430 | count(Rest, N + 1, Config); 431 | count(<<61, Rest/binary>>, N, Config) -> 432 | count(Rest, N + 1, Config); 433 | count(<<62, Rest/binary>>, N, Config) -> 434 | count(Rest, N + 1, Config); 435 | count(<<63, Rest/binary>>, N, Config) -> 436 | count(Rest, N + 1, Config); 437 | count(<<64, Rest/binary>>, N, Config) -> 438 | count(Rest, N + 1, Config); 439 | count(<<65, Rest/binary>>, N, Config) -> 440 | count(Rest, N + 1, Config); 441 | count(<<66, Rest/binary>>, N, Config) -> 442 | count(Rest, N + 1, Config); 443 | count(<<67, Rest/binary>>, N, Config) -> 444 | count(Rest, N + 1, Config); 445 | count(<<68, Rest/binary>>, N, Config) -> 446 | count(Rest, N + 1, Config); 447 | count(<<69, Rest/binary>>, N, Config) -> 448 | count(Rest, N + 1, Config); 449 | count(<<70, Rest/binary>>, N, Config) -> 450 | count(Rest, N + 1, Config); 451 | count(<<71, Rest/binary>>, N, Config) -> 452 | count(Rest, N + 1, Config); 453 | count(<<72, Rest/binary>>, N, Config) -> 454 | count(Rest, N + 1, Config); 455 | count(<<73, Rest/binary>>, N, Config) -> 456 | count(Rest, N + 1, Config); 457 | count(<<74, Rest/binary>>, N, Config) -> 458 | count(Rest, N + 1, Config); 459 | count(<<75, Rest/binary>>, N, Config) -> 460 | count(Rest, N + 1, Config); 461 | count(<<76, Rest/binary>>, N, Config) -> 462 | count(Rest, N + 1, Config); 463 | count(<<77, Rest/binary>>, N, Config) -> 464 | count(Rest, N + 1, Config); 465 | count(<<78, Rest/binary>>, N, Config) -> 466 | count(Rest, N + 1, Config); 467 | count(<<79, Rest/binary>>, N, Config) -> 468 | count(Rest, N + 1, Config); 469 | count(<<80, Rest/binary>>, N, Config) -> 470 | count(Rest, N + 1, Config); 471 | count(<<81, Rest/binary>>, N, Config) -> 472 | count(Rest, N + 1, Config); 473 | count(<<82, Rest/binary>>, N, Config) -> 474 | count(Rest, N + 1, Config); 475 | count(<<83, Rest/binary>>, N, Config) -> 476 | count(Rest, N + 1, Config); 477 | count(<<84, Rest/binary>>, N, Config) -> 478 | count(Rest, N + 1, Config); 479 | count(<<85, Rest/binary>>, N, Config) -> 480 | count(Rest, N + 1, Config); 481 | count(<<86, Rest/binary>>, N, Config) -> 482 | count(Rest, N + 1, Config); 483 | count(<<87, Rest/binary>>, N, Config) -> 484 | count(Rest, N + 1, Config); 485 | count(<<88, Rest/binary>>, N, Config) -> 486 | count(Rest, N + 1, Config); 487 | count(<<89, Rest/binary>>, N, Config) -> 488 | count(Rest, N + 1, Config); 489 | count(<<90, Rest/binary>>, N, Config) -> 490 | count(Rest, N + 1, Config); 491 | count(<<91, Rest/binary>>, N, Config) -> 492 | count(Rest, N + 1, Config); 493 | count(<<92, _/binary>>, N, _) -> N; 494 | count(<<93, Rest/binary>>, N, Config) -> 495 | count(Rest, N + 1, Config); 496 | count(<<94, Rest/binary>>, N, Config) -> 497 | count(Rest, N + 1, Config); 498 | count(<<95, Rest/binary>>, N, Config) -> 499 | count(Rest, N + 1, Config); 500 | count(<<96, Rest/binary>>, N, Config) -> 501 | count(Rest, N + 1, Config); 502 | count(<<97, Rest/binary>>, N, Config) -> 503 | count(Rest, N + 1, Config); 504 | count(<<98, Rest/binary>>, N, Config) -> 505 | count(Rest, N + 1, Config); 506 | count(<<99, Rest/binary>>, N, Config) -> 507 | count(Rest, N + 1, Config); 508 | count(<<100, Rest/binary>>, N, Config) -> 509 | count(Rest, N + 1, Config); 510 | count(<<101, Rest/binary>>, N, Config) -> 511 | count(Rest, N + 1, Config); 512 | count(<<102, Rest/binary>>, N, Config) -> 513 | count(Rest, N + 1, Config); 514 | count(<<103, Rest/binary>>, N, Config) -> 515 | count(Rest, N + 1, Config); 516 | count(<<104, Rest/binary>>, N, Config) -> 517 | count(Rest, N + 1, Config); 518 | count(<<105, Rest/binary>>, N, Config) -> 519 | count(Rest, N + 1, Config); 520 | count(<<106, Rest/binary>>, N, Config) -> 521 | count(Rest, N + 1, Config); 522 | count(<<107, Rest/binary>>, N, Config) -> 523 | count(Rest, N + 1, Config); 524 | count(<<108, Rest/binary>>, N, Config) -> 525 | count(Rest, N + 1, Config); 526 | count(<<109, Rest/binary>>, N, Config) -> 527 | count(Rest, N + 1, Config); 528 | count(<<110, Rest/binary>>, N, Config) -> 529 | count(Rest, N + 1, Config); 530 | count(<<111, Rest/binary>>, N, Config) -> 531 | count(Rest, N + 1, Config); 532 | count(<<112, Rest/binary>>, N, Config) -> 533 | count(Rest, N + 1, Config); 534 | count(<<113, Rest/binary>>, N, Config) -> 535 | count(Rest, N + 1, Config); 536 | count(<<114, Rest/binary>>, N, Config) -> 537 | count(Rest, N + 1, Config); 538 | count(<<115, Rest/binary>>, N, Config) -> 539 | count(Rest, N + 1, Config); 540 | count(<<116, Rest/binary>>, N, Config) -> 541 | count(Rest, N + 1, Config); 542 | count(<<117, Rest/binary>>, N, Config) -> 543 | count(Rest, N + 1, Config); 544 | count(<<118, Rest/binary>>, N, Config) -> 545 | count(Rest, N + 1, Config); 546 | count(<<119, Rest/binary>>, N, Config) -> 547 | count(Rest, N + 1, Config); 548 | count(<<120, Rest/binary>>, N, Config) -> 549 | count(Rest, N + 1, Config); 550 | count(<<121, Rest/binary>>, N, Config) -> 551 | count(Rest, N + 1, Config); 552 | count(<<122, Rest/binary>>, N, Config) -> 553 | count(Rest, N + 1, Config); 554 | count(<<123, Rest/binary>>, N, Config) -> 555 | count(Rest, N + 1, Config); 556 | count(<<124, Rest/binary>>, N, Config) -> 557 | count(Rest, N + 1, Config); 558 | count(<<125, Rest/binary>>, N, Config) -> 559 | count(Rest, N + 1, Config); 560 | count(<<126, Rest/binary>>, N, Config) -> 561 | count(Rest, N + 1, Config); 562 | count(<<127, Rest/binary>>, N, Config) -> 563 | count(Rest, N + 1, Config); 564 | count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; 565 | count(<>, N, Config) -> 566 | case X of 567 | X when X < 16#800 -> count(Rest, N + 2, Config); 568 | 16#2028 -> N; 569 | 16#2029 -> N; 570 | X when X < 16#10000 -> count(Rest, N + 3, Config); 571 | _ -> count(Rest, N + 4, Config) 572 | end; 573 | count(<<_, _/binary>>, N, _) -> N; 574 | count(<<>>, N, _) -> N. 575 | 576 | 577 | strip_continuations(Bin, 0) -> Bin; 578 | strip_continuations(<>, N) when X >= 128, X =< 191 -> 579 | strip_continuations(Rest, N - 1); 580 | %% not a continuation byte 581 | strip_continuations(Bin, _) -> Bin. 582 | 583 | 584 | maybe_replace($\b, #config{escaped_strings=true}) -> <<$\\, $b>>; 585 | maybe_replace($\t, #config{escaped_strings=true}) -> <<$\\, $t>>; 586 | maybe_replace($\n, #config{escaped_strings=true}) -> <<$\\, $n>>; 587 | maybe_replace($\f, #config{escaped_strings=true}) -> <<$\\, $f>>; 588 | maybe_replace($\r, #config{escaped_strings=true}) -> <<$\\, $r>>; 589 | maybe_replace($\", #config{escaped_strings=true}) -> <<$\\, $\">>; 590 | maybe_replace($/, Config=#config{escaped_strings=true}) -> 591 | case Config#config.escaped_forward_slashes of 592 | true -> <<$\\, $/>>; 593 | false -> <<$/>> 594 | end; 595 | maybe_replace($\\, #config{escaped_strings=true}) -> <<$\\, $\\>>; 596 | maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> 597 | json_escape_sequence(X); 598 | maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> 599 | case Config#config.unescaped_jsonp of 600 | true -> <>; 601 | false -> json_escape_sequence(X) 602 | end; 603 | maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) -> 604 | erlang:error(badarg); 605 | maybe_replace(surrogate, _Config) -> 606 | <<16#fffd/utf8>>; 607 | maybe_replace(badutf, _Config) -> 608 | <<16#fffd/utf8>>; 609 | maybe_replace(X, _Config) -> 610 | <>. 611 | 612 | 613 | %% convert a codepoint to it's \uXXXX equiv. 614 | json_escape_sequence(X) when X < 65536 -> 615 | <> = <>, 616 | <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>; 617 | json_escape_sequence(X) -> 618 | Adjusted = X - 16#10000, 619 | <> = <>, 620 | [json_escape_sequence(A + 16#d800), json_escape_sequence(B + 16#dc00)]. 621 | 622 | 623 | to_hex(10) -> $a; 624 | to_hex(11) -> $b; 625 | to_hex(12) -> $c; 626 | to_hex(13) -> $d; 627 | to_hex(14) -> $e; 628 | to_hex(15) -> $f; 629 | to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... 630 | 631 | 632 | %% for raw input 633 | -spec init([]) -> []. 634 | 635 | init([]) -> []. 636 | 637 | 638 | -spec handle_event(Event::any(), Acc::list()) -> list(). 639 | 640 | handle_event(end_json, State) -> lists:reverse(State); 641 | handle_event(Event, State) -> [Event] ++ State. 642 | 643 | 644 | 645 | -ifdef(TEST). 646 | -include_lib("eunit/include/eunit.hrl"). 647 | 648 | 649 | parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). 650 | 651 | 652 | error_test_() -> 653 | [ 654 | {"value error", ?_assertError(badarg, parse([self()], []))}, 655 | {"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))}, 656 | {"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))}, 657 | {"string error", ?_assertError(badarg, parse([{string, <<237, 160, 128>>}, end_json], [strict]))} 658 | ]. 659 | 660 | 661 | custom_error_handler_test_() -> 662 | Error = fun(Rest, {_, State, _, _}, _) -> {State, Rest} end, 663 | [ 664 | {"value error", ?_assertEqual( 665 | {value, [self()]}, 666 | parse([self()], [{error_handler, Error}]) 667 | )}, 668 | {"maybe_done error", ?_assertEqual( 669 | {maybe_done, [start_array, end_json]}, 670 | parse([start_array, end_array, start_array, end_json], [{error_handler, Error}]) 671 | )}, 672 | {"done error", ?_assertEqual( 673 | {maybe_done, [{literal, true}, end_json]}, 674 | parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) 675 | )}, 676 | {"string error", ?_assertEqual( 677 | {value, [{string, <<237, 160, 128>>}, end_json]}, 678 | parse([{string, <<237, 160, 128>>}, end_json], [{error_handler, Error}, strict]) 679 | )} 680 | ]. 681 | 682 | 683 | incomplete_test_() -> 684 | Cases = [ 685 | {"incomplete value", []}, 686 | {"incomplete object", [start_object]}, 687 | {"incomplete array", [start_array]}, 688 | {"incomplete maybe_done", [start_array, end_array]} 689 | ], 690 | [{Title, ?_assertError(badarg, parse(Events, []))} 691 | || {Title, Events} <- Cases 692 | ]. 693 | 694 | 695 | custom_incomplete_handler_test_() -> 696 | [ 697 | {"custom incomplete handler", ?_assertError( 698 | badarg, 699 | parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) 700 | )} 701 | ]. 702 | 703 | 704 | raw_test_() -> 705 | Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end, 706 | [ 707 | {"raw empty list", ?_assertEqual( 708 | [start_array, end_array], 709 | Parse([{raw, <<"[]">>}], []) 710 | )}, 711 | {"raw empty object", ?_assertEqual( 712 | [start_object, end_object], 713 | Parse([{raw, <<"{}">>}], []) 714 | )}, 715 | {"raw chunk inside stream", ?_assertEqual( 716 | [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object], 717 | Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) 718 | )} 719 | ]. 720 | 721 | 722 | %% erlang refuses to encode certain codepoints, so fake them 723 | to_fake_utf8(N) when N < 16#0080 -> <>; 724 | to_fake_utf8(N) when N < 16#0800 -> 725 | <<0:5, Y:5, X:6>> = <>, 726 | <<2#110:3, Y:5, 2#10:2, X:6>>; 727 | to_fake_utf8(N) when N < 16#10000 -> 728 | <> = <>, 729 | <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; 730 | to_fake_utf8(N) -> 731 | <<0:3, W:3, Z:6, Y:6, X:6>> = <>, 732 | <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. 733 | 734 | 735 | codepoints() -> 736 | unicode:characters_to_binary( 737 | [32, 33] 738 | ++ lists:seq(35, 46) 739 | ++ lists:seq(48, 91) 740 | ++ lists:seq(93, 16#2027) 741 | ++ lists:seq(16#202a, 16#d7ff) 742 | ++ lists:seq(16#e000, 16#ffff) 743 | ). 744 | 745 | 746 | extended_codepoints() -> 747 | unicode:characters_to_binary( 748 | lists:seq(16#10000, 16#1ffff) ++ [ 749 | 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, 750 | 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, 751 | 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 752 | ] 753 | ). 754 | 755 | 756 | surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. 757 | 758 | 759 | clean_string_helper(String) -> 760 | try clean_string(String, #config{strict_utf8=true}) of Clean -> Clean 761 | catch error:badarg -> {error, badarg} 762 | end. 763 | 764 | 765 | clean_string_test_() -> 766 | [ 767 | {"clean codepoints", ?_assertEqual( 768 | codepoints(), 769 | clean_string(codepoints(), #config{}) 770 | )}, 771 | {"clean extended codepoints", ?_assertEqual( 772 | extended_codepoints(), 773 | clean_string(extended_codepoints(), #config{}) 774 | )}, 775 | {"escape path codepoints", ?_assertEqual( 776 | codepoints(), 777 | clean_string(codepoints(), #config{escaped_strings=true}) 778 | )}, 779 | {"escape path extended codepoints", ?_assertEqual( 780 | extended_codepoints(), 781 | clean_string(extended_codepoints(), #config{escaped_strings=true}) 782 | )}, 783 | {"error surrogates", ?_assertEqual( 784 | lists:duplicate(length(surrogates()), {error, badarg}), 785 | lists:map(fun(Codepoint) -> clean_string_helper(Codepoint) end, surrogates()) 786 | )}, 787 | {"clean surrogates", ?_assertEqual( 788 | lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), 789 | lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) 790 | )} 791 | ]. 792 | 793 | 794 | escape_test_() -> 795 | [ 796 | {"maybe_escape backspace", ?_assertEqual( 797 | <<"\\b">>, 798 | clean_string(<<16#0008/utf8>>, #config{escaped_strings=true}) 799 | )}, 800 | {"don't escape backspace", ?_assertEqual( 801 | <<"\b">>, 802 | clean_string(<<16#0008/utf8>>, #config{}) 803 | )}, 804 | {"maybe_escape tab", ?_assertEqual( 805 | <<"\\t">>, 806 | clean_string(<<16#0009/utf8>>, #config{escaped_strings=true}) 807 | )}, 808 | {"maybe_escape newline", ?_assertEqual( 809 | <<"\\n">>, 810 | clean_string(<<16#000a/utf8>>, #config{escaped_strings=true}) 811 | )}, 812 | {"maybe_escape formfeed", ?_assertEqual( 813 | <<"\\f">>, 814 | clean_string(<<16#000c/utf8>>, #config{escaped_strings=true}) 815 | )}, 816 | {"maybe_escape carriage return", ?_assertEqual( 817 | <<"\\r">>, 818 | clean_string(<<16#000d/utf8>>, #config{escaped_strings=true}) 819 | )}, 820 | {"maybe_escape quote", ?_assertEqual( 821 | <<"\\\"">>, 822 | clean_string(<<16#0022/utf8>>, #config{escaped_strings=true}) 823 | )}, 824 | {"maybe_escape forward slash", ?_assertEqual( 825 | <<"\\/">>, 826 | clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) 827 | )}, 828 | {"do not maybe_escape forward slash", ?_assertEqual( 829 | <<"/">>, 830 | clean_string(<<16#002f/utf8>>, #config{escaped_strings=true}) 831 | )}, 832 | {"maybe_escape backslash", ?_assertEqual( 833 | <<"\\\\">>, 834 | clean_string(<<16#005c/utf8>>, #config{escaped_strings=true}) 835 | )}, 836 | {"maybe_escape jsonp (u2028)", ?_assertEqual( 837 | <<"\\u2028">>, 838 | clean_string(<<16#2028/utf8>>, #config{escaped_strings=true}) 839 | )}, 840 | {"do not maybe_escape jsonp (u2028)", ?_assertEqual( 841 | <<16#2028/utf8>>, 842 | clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) 843 | )}, 844 | {"maybe_escape jsonp (u2029)", ?_assertEqual( 845 | <<"\\u2029">>, 846 | clean_string(<<16#2029/utf8>>, #config{escaped_strings=true}) 847 | )}, 848 | {"do not maybe_escape jsonp (u2029)", ?_assertEqual( 849 | <<16#2029/utf8>>, 850 | clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) 851 | )}, 852 | {"maybe_escape u0000", ?_assertEqual( 853 | <<"\\u0000">>, 854 | clean_string(<<16#0000/utf8>>, #config{escaped_strings=true}) 855 | )}, 856 | {"maybe_escape u0001", ?_assertEqual( 857 | <<"\\u0001">>, 858 | clean_string(<<16#0001/utf8>>, #config{escaped_strings=true}) 859 | )}, 860 | {"maybe_escape u0002", ?_assertEqual( 861 | <<"\\u0002">>, 862 | clean_string(<<16#0002/utf8>>, #config{escaped_strings=true}) 863 | )}, 864 | {"maybe_escape u0003", ?_assertEqual( 865 | <<"\\u0003">>, 866 | clean_string(<<16#0003/utf8>>, #config{escaped_strings=true}) 867 | )}, 868 | {"maybe_escape u0004", ?_assertEqual( 869 | <<"\\u0004">>, 870 | clean_string(<<16#0004/utf8>>, #config{escaped_strings=true}) 871 | )}, 872 | {"maybe_escape u0005", ?_assertEqual( 873 | <<"\\u0005">>, 874 | clean_string(<<16#0005/utf8>>, #config{escaped_strings=true}) 875 | )}, 876 | {"maybe_escape u0006", ?_assertEqual( 877 | <<"\\u0006">>, 878 | clean_string(<<16#0006/utf8>>, #config{escaped_strings=true}) 879 | )}, 880 | {"maybe_escape u0007", ?_assertEqual( 881 | <<"\\u0007">>, 882 | clean_string(<<16#0007/utf8>>, #config{escaped_strings=true}) 883 | )}, 884 | {"maybe_escape u000b", ?_assertEqual( 885 | <<"\\u000b">>, 886 | clean_string(<<16#000b/utf8>>, #config{escaped_strings=true}) 887 | )}, 888 | {"maybe_escape u000e", ?_assertEqual( 889 | <<"\\u000e">>, 890 | clean_string(<<16#000e/utf8>>, #config{escaped_strings=true}) 891 | )}, 892 | {"maybe_escape u000f", ?_assertEqual( 893 | <<"\\u000f">>, 894 | clean_string(<<16#000f/utf8>>, #config{escaped_strings=true}) 895 | )}, 896 | {"maybe_escape u0010", ?_assertEqual( 897 | <<"\\u0010">>, 898 | clean_string(<<16#0010/utf8>>, #config{escaped_strings=true}) 899 | )}, 900 | {"maybe_escape u0011", ?_assertEqual( 901 | <<"\\u0011">>, 902 | clean_string(<<16#0011/utf8>>, #config{escaped_strings=true}) 903 | )}, 904 | {"maybe_escape u0012", ?_assertEqual( 905 | <<"\\u0012">>, 906 | clean_string(<<16#0012/utf8>>, #config{escaped_strings=true}) 907 | )}, 908 | {"maybe_escape u0013", ?_assertEqual( 909 | <<"\\u0013">>, 910 | clean_string(<<16#0013/utf8>>, #config{escaped_strings=true}) 911 | )}, 912 | {"maybe_escape u0014", ?_assertEqual( 913 | <<"\\u0014">>, 914 | clean_string(<<16#0014/utf8>>, #config{escaped_strings=true}) 915 | )}, 916 | {"maybe_escape u0015", ?_assertEqual( 917 | <<"\\u0015">>, 918 | clean_string(<<16#0015/utf8>>, #config{escaped_strings=true}) 919 | )}, 920 | {"maybe_escape u0016", ?_assertEqual( 921 | <<"\\u0016">>, 922 | clean_string(<<16#0016/utf8>>, #config{escaped_strings=true}) 923 | )}, 924 | {"maybe_escape u0017", ?_assertEqual( 925 | <<"\\u0017">>, 926 | clean_string(<<16#0017/utf8>>, #config{escaped_strings=true}) 927 | )}, 928 | {"maybe_escape u0018", ?_assertEqual( 929 | <<"\\u0018">>, 930 | clean_string(<<16#0018/utf8>>, #config{escaped_strings=true}) 931 | )}, 932 | {"maybe_escape u0019", ?_assertEqual( 933 | <<"\\u0019">>, 934 | clean_string(<<16#0019/utf8>>, #config{escaped_strings=true}) 935 | )}, 936 | {"maybe_escape u001a", ?_assertEqual( 937 | <<"\\u001a">>, 938 | clean_string(<<16#001a/utf8>>, #config{escaped_strings=true}) 939 | )}, 940 | {"maybe_escape u001b", ?_assertEqual( 941 | <<"\\u001b">>, 942 | clean_string(<<16#001b/utf8>>, #config{escaped_strings=true}) 943 | )}, 944 | {"maybe_escape u001c", ?_assertEqual( 945 | <<"\\u001c">>, 946 | clean_string(<<16#001c/utf8>>, #config{escaped_strings=true}) 947 | )}, 948 | {"maybe_escape u001d", ?_assertEqual( 949 | <<"\\u001d">>, 950 | clean_string(<<16#001d/utf8>>, #config{escaped_strings=true}) 951 | )}, 952 | {"maybe_escape u001e", ?_assertEqual( 953 | <<"\\u001e">>, 954 | clean_string(<<16#001e/utf8>>, #config{escaped_strings=true}) 955 | )}, 956 | {"maybe_escape u001f", ?_assertEqual( 957 | <<"\\u001f">>, 958 | clean_string(<<16#001f/utf8>>, #config{escaped_strings=true}) 959 | )} 960 | ]. 961 | 962 | 963 | bad_utf8_test_() -> 964 | [ 965 | {"orphan continuation byte u+0080", ?_assertError( 966 | badarg, 967 | clean_string(<<16#0080>>, #config{strict_utf8=true}) 968 | )}, 969 | {"orphan continuation byte u+0080 replaced", ?_assertEqual( 970 | <<16#fffd/utf8>>, 971 | clean_string(<<16#0080>>, #config{}) 972 | )}, 973 | {"orphan continuation byte u+00bf", ?_assertError( 974 | badarg, 975 | clean_string(<<16#00bf>>, #config{strict_utf8=true}) 976 | )}, 977 | {"orphan continuation byte u+00bf replaced", ?_assertEqual( 978 | <<16#fffd/utf8>>, 979 | clean_string(<<16#00bf>>, #config{}) 980 | )}, 981 | {"2 continuation bytes", ?_assertError( 982 | badarg, 983 | clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) 984 | )}, 985 | {"2 continuation bytes replaced", ?_assertEqual( 986 | binary:copy(<<16#fffd/utf8>>, 2), 987 | clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) 988 | )}, 989 | {"3 continuation bytes", ?_assertError( 990 | badarg, 991 | clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) 992 | )}, 993 | {"3 continuation bytes replaced", ?_assertEqual( 994 | binary:copy(<<16#fffd/utf8>>, 3), 995 | clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) 996 | )}, 997 | {"4 continuation bytes", ?_assertError( 998 | badarg, 999 | clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) 1000 | )}, 1001 | {"4 continuation bytes replaced", ?_assertEqual( 1002 | binary:copy(<<16#fffd/utf8>>, 4), 1003 | clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) 1004 | )}, 1005 | {"5 continuation bytes", ?_assertError( 1006 | badarg, 1007 | clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) 1008 | )}, 1009 | {"5 continuation bytes replaced", ?_assertEqual( 1010 | binary:copy(<<16#fffd/utf8>>, 5), 1011 | clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) 1012 | )}, 1013 | {"6 continuation bytes", ?_assertError( 1014 | badarg, 1015 | clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) 1016 | )}, 1017 | {"6 continuation bytes replaced", ?_assertEqual( 1018 | binary:copy(<<16#fffd/utf8>>, 6), 1019 | clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) 1020 | )}, 1021 | {"all continuation bytes", ?_assertError( 1022 | badarg, 1023 | clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) 1024 | )}, 1025 | {"all continuation bytes replaced", ?_assertEqual( 1026 | binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), 1027 | clean_string( 1028 | <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, 1029 | #config{} 1030 | ) 1031 | )}, 1032 | {"lonely start byte", ?_assertError( 1033 | badarg, 1034 | clean_string(<<16#00c0>>, #config{strict_utf8=true}) 1035 | )}, 1036 | {"lonely start byte replaced", ?_assertEqual( 1037 | <<16#fffd/utf8>>, 1038 | clean_string(<<16#00c0>>, #config{}) 1039 | )}, 1040 | {"lonely start bytes (2 byte)", ?_assertError( 1041 | badarg, 1042 | clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) 1043 | )}, 1044 | {"lonely start bytes (2 byte) replaced", ?_assertEqual( 1045 | <<16#fffd/utf8, 32, 16#fffd/utf8>>, 1046 | clean_string(<<16#00c0, 32, 16#00df>>, #config{}) 1047 | )}, 1048 | {"lonely start bytes (3 byte)", ?_assertError( 1049 | badarg, 1050 | clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) 1051 | )}, 1052 | {"lonely start bytes (3 byte) replaced", ?_assertEqual( 1053 | <<16#fffd/utf8, 32, 16#fffd/utf8>>, 1054 | clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) 1055 | )}, 1056 | {"lonely start bytes (4 byte)", ?_assertError( 1057 | badarg, 1058 | clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) 1059 | )}, 1060 | {"lonely start bytes (4 byte) replaced", ?_assertEqual( 1061 | <<16#fffd/utf8, 32, 16#fffd/utf8>>, 1062 | clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) 1063 | )}, 1064 | {"missing continuation byte (3 byte)", ?_assertError( 1065 | badarg, 1066 | clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) 1067 | )}, 1068 | {"missing continuation byte (3 byte) replaced", ?_assertEqual( 1069 | <<16#fffd/utf8, 32>>, 1070 | clean_string(<<224, 160, 32>>, #config{}) 1071 | )}, 1072 | {"missing continuation byte (4 byte missing one)", ?_assertError( 1073 | badarg, 1074 | clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) 1075 | )}, 1076 | {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( 1077 | <<16#fffd/utf8, 32>>, 1078 | clean_string(<<240, 144, 128, 32>>, #config{}) 1079 | )}, 1080 | {"missing continuation byte (4 byte missing two)", ?_assertError( 1081 | badarg, 1082 | clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) 1083 | )}, 1084 | {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( 1085 | <<16#fffd/utf8, 32>>, 1086 | clean_string(<<240, 144, 32>>, #config{}) 1087 | )}, 1088 | {"overlong encoding of u+002f (2 byte)", ?_assertError( 1089 | badarg, 1090 | clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) 1091 | )}, 1092 | {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( 1093 | <<16#fffd/utf8, 32>>, 1094 | clean_string(<<16#c0, 16#af, 32>>, #config{}) 1095 | )}, 1096 | {"overlong encoding of u+002f (3 byte)", ?_assertError( 1097 | badarg, 1098 | clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) 1099 | )}, 1100 | {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( 1101 | <<16#fffd/utf8, 32>>, 1102 | clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) 1103 | )}, 1104 | {"overlong encoding of u+002f (4 byte)", ?_assertError( 1105 | badarg, 1106 | clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) 1107 | )}, 1108 | {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( 1109 | <<16#fffd/utf8, 32>>, 1110 | clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) 1111 | )}, 1112 | {"highest overlong 2 byte sequence", ?_assertError( 1113 | badarg, 1114 | clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) 1115 | )}, 1116 | {"highest overlong 2 byte sequence replaced", ?_assertEqual( 1117 | <<16#fffd/utf8, 32>>, 1118 | clean_string(<<16#c1, 16#bf, 32>>, #config{}) 1119 | )}, 1120 | {"highest overlong 3 byte sequence", ?_assertError( 1121 | badarg, 1122 | clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) 1123 | )}, 1124 | {"highest overlong 3 byte sequence replaced", ?_assertEqual( 1125 | <<16#fffd/utf8, 32>>, 1126 | clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) 1127 | )}, 1128 | {"highest overlong 4 byte sequence", ?_assertError( 1129 | badarg, 1130 | clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) 1131 | )}, 1132 | {"highest overlong 4 byte sequence replaced", ?_assertEqual( 1133 | <<16#fffd/utf8, 32>>, 1134 | clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) 1135 | )} 1136 | ]. 1137 | 1138 | 1139 | json_escape_sequence_test_() -> 1140 | [ 1141 | {"json escape sequence test - 16#0000", ?_assertEqual(<<"\\u0000"/utf8>>, json_escape_sequence(16#0000))}, 1142 | {"json escape sequence test - 16#abc", ?_assertEqual(<<"\\u0abc"/utf8>>, json_escape_sequence(16#abc))}, 1143 | {"json escape sequence test - 16#def", ?_assertEqual(<<"\\u0def"/utf8>>, json_escape_sequence(16#def))} 1144 | ]. 1145 | 1146 | 1147 | uescape_test_() -> 1148 | [ 1149 | {"\"\\u0080\"", ?_assertEqual( 1150 | <<"\\u0080">>, 1151 | clean_string(<<128/utf8>>, #config{uescape=true}) 1152 | )}, 1153 | {"\"\\u8ca8\\u5481\\u3002\\u0091\\u0091\"", ?_assertEqual( 1154 | <<"\\u8ca8\\u5481\\u3002\\u0091\\u0091">>, 1155 | clean_string( 1156 | <<232,178,168,229,146,129,227,128,130,194,145,194,145>>, 1157 | #config{uescape=true} 1158 | ) 1159 | )}, 1160 | {"\"\\ud834\\udd1e\"", ?_assertEqual( 1161 | <<"\\ud834\\udd1e">>, 1162 | clean_string(<<240, 157, 132, 158>>, #config{uescape=true}) 1163 | )}, 1164 | {"\"\\ud83d\\ude0a\"", ?_assertEqual( 1165 | <<"\\ud83d\\ude0a">>, 1166 | clean_string(<<240, 159, 152, 138>>, #config{uescape=true}) 1167 | )} 1168 | ]. 1169 | 1170 | 1171 | fix_key_test_() -> 1172 | [ 1173 | {"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)}, 1174 | {"atom key", ?_assertEqual(fix_key(foo), <<"foo">>)}, 1175 | {"integer key", ?_assertEqual(fix_key(123), <<"123">>)} 1176 | ]. 1177 | 1178 | 1179 | datetime_test_() -> 1180 | [ 1181 | {"datetime", ?_assertEqual( 1182 | [start_array, {string, <<"2014-08-13T23:12:34Z">>}, end_array, end_json], 1183 | parse([start_array, {{2014,08,13},{23,12,34}}, end_array, end_json], []) 1184 | )}, 1185 | {"datetime", ?_assertEqual( 1186 | [start_array, {string, <<"2014-08-13T23:12:34.363369Z">>}, end_array, end_json], 1187 | parse([start_array, {{2014,08,13},{23,12,34.363369}}, end_array, end_json], []) 1188 | )} 1189 | ]. 1190 | 1191 | 1192 | timestamp_test_() -> 1193 | [ 1194 | {"timestamp", ?_assertEqual( 1195 | [start_array, {string, <<"2016-01-15T18:19:28Z">>}, end_array, end_json], 1196 | parse([start_array, {1452,881968,111772}, end_array, end_json], []) 1197 | )} 1198 | ]. 1199 | 1200 | 1201 | rogue_tuple_test_() -> 1202 | [ 1203 | {"kv in value position of object", ?_assertError( 1204 | badarg, 1205 | parse([start_object, <<"key">>, {<<"key">>, <<"value">>}, end_object, end_json], []) 1206 | )}, 1207 | {"kv in value position of list", ?_assertError( 1208 | badarg, 1209 | parse([start_array, {<<"key">>, <<"value">>}, end_array, end_json], []) 1210 | )} 1211 | ]. 1212 | 1213 | 1214 | -endif. 1215 | -------------------------------------------------------------------------------- /src/jsx_to_json.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 alisdair sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_to_json). 25 | 26 | -export([to_json/2, format/2]). 27 | -export([init/1, handle_event/2]). 28 | -export([start_json/0, start_json/1]). 29 | -export([start_object/1, start_array/1, finish/1, insert/2, get_key/1, get_value/1]). 30 | 31 | 32 | -record(config, { 33 | space = 0, 34 | indent = 0, 35 | depth = 0, 36 | newline = <<$\n>> 37 | }). 38 | 39 | -type config() :: proplists:proplist(). 40 | 41 | 42 | -spec to_json(Source::jsx:json_term(), Config::jsx_config:options()) -> binary(). 43 | 44 | to_json(Source, Config) when is_list(Config) -> 45 | (jsx:encoder(?MODULE, Config, jsx_config:extract_config(Config ++ [escaped_strings])))(Source). 46 | 47 | 48 | -spec format(Source::binary(), Config::jsx_config:options()) -> jsx:json_text(). 49 | 50 | format(Source, Config) when is_binary(Source) andalso is_list(Config) -> 51 | (jsx:decoder(?MODULE, Config, jsx_config:extract_config(Config ++ [escaped_strings])))(Source); 52 | format(_, _) -> erlang:error(badarg). 53 | 54 | 55 | parse_config(Config) -> parse_config(Config, #config{}). 56 | 57 | parse_config([{space, Val}|Rest], Config) when is_integer(Val), Val > 0 -> 58 | parse_config(Rest, Config#config{space = Val}); 59 | parse_config([space|Rest], Config) -> 60 | parse_config(Rest, Config#config{space = 1}); 61 | parse_config([{indent, Val}|Rest], Config) when is_integer(Val), Val > 0 -> 62 | parse_config(Rest, Config#config{indent = Val}); 63 | parse_config([indent|Rest], Config) -> 64 | parse_config(Rest, Config#config{indent = 1}); 65 | parse_config([{newline, Val}|Rest], Config) when is_binary(Val) -> 66 | parse_config(Rest, Config#config{newline = Val}); 67 | parse_config([{K, _}|Rest] = Options, Config) -> 68 | case lists:member(K, jsx_config:valid_flags()) of 69 | true -> parse_config(Rest, Config) 70 | ; false -> erlang:error(badarg, [Options, Config]) 71 | end; 72 | parse_config([K|Rest] = Options, Config) -> 73 | case lists:member(K, jsx_config:valid_flags()) of 74 | true -> parse_config(Rest, Config) 75 | ; false -> erlang:error(badarg, [Options, Config]) 76 | end; 77 | parse_config([], Config) -> 78 | Config. 79 | 80 | 81 | -define(start_object, <<"{">>). 82 | -define(start_array, <<"[">>). 83 | -define(end_object, <<"}">>). 84 | -define(end_array, <<"]">>). 85 | -define(colon, <<":">>). 86 | -define(comma, <<",">>). 87 | -define(quote, <<"\"">>). 88 | -define(space, <<" ">>). 89 | -define(newline, <<"\n">>). 90 | 91 | 92 | -type state() :: {unicode:charlist(), #config{}}. 93 | -spec init(Config::config()) -> state(). 94 | 95 | init(Config) -> {[], parse_config(Config)}. 96 | 97 | 98 | -spec handle_event(Event::any(), State::state()) -> state(). 99 | 100 | handle_event(end_json, State) -> get_value(State); 101 | 102 | handle_event(start_object, State) -> start_object(State); 103 | handle_event(end_object, State) -> finish(State); 104 | 105 | handle_event(start_array, State) -> start_array(State); 106 | handle_event(end_array, State) -> finish(State); 107 | 108 | handle_event({Type, Event}, {_, Config} = State) -> insert(encode(Type, Event, Config), State). 109 | 110 | 111 | encode(string, String, _Config) -> 112 | [?quote, String, ?quote]; 113 | encode(key, Key, _Config) -> 114 | [?quote, Key, ?quote]; 115 | encode(literal, Literal, _Config) -> 116 | erlang:atom_to_list(Literal); 117 | encode(integer, Integer, _Config) -> 118 | erlang:integer_to_list(Integer); 119 | encode(float, Float, _Config) -> 120 | io_lib:format("~p", [Float]). 121 | 122 | 123 | space(Config) -> 124 | case Config#config.space of 125 | 0 -> <<>> 126 | ; X when X > 0 -> binary:copy(?space, X) 127 | end. 128 | 129 | 130 | indent(Config) -> 131 | case Config#config.indent of 132 | 0 -> <<>> 133 | ; X when X > 0 -> <<(Config#config.newline)/binary, (binary:copy(?space, X * Config#config.depth))/binary>> 134 | end. 135 | 136 | 137 | indent_or_space(Config) -> 138 | case Config#config.indent > 0 of 139 | true -> indent(Config) 140 | ; false -> space(Config) 141 | end. 142 | 143 | 144 | %% internal state is a stack and a config object 145 | %% `{Stack, Config}` 146 | %% the stack is a list of in progress objects/arrays 147 | %% `[Current, Parent, Grandparent,...OriginalAncestor]` 148 | %% an object has the representation on the stack of 149 | %% `{object, Object}` 150 | %% of if there's a key with a yet to be matched value 151 | %% `{object, Key, Object}` 152 | %% an array looks like 153 | %% `{array, Array}` 154 | %% `Object` and `Array` are utf8 encoded binaries 155 | 156 | start_json() -> {[], #config{}}. 157 | 158 | start_json(Config) when is_list(Config) -> {[], parse_config(Config)}. 159 | 160 | %% allocate a new object on top of the stack 161 | start_object({Stack, Config = #config{depth = Depth}}) -> 162 | {[{object, ?start_object}] ++ Stack, Config#config{depth = Depth + 1}}. 163 | 164 | %% allocate a new array on top of the stack 165 | start_array({Stack, Config = #config{depth = Depth}}) -> 166 | {[{array, ?start_array}] ++ Stack, Config#config{depth = Depth + 1}}. 167 | 168 | %% finish an object or array and insert it into the parent object if it exists 169 | finish({Stack, Config = #config{depth = Depth}}) -> 170 | NewConfig = Config#config{depth = Depth - 1}, 171 | finish_({Stack, NewConfig}). 172 | 173 | finish_({[{object, <<"{">>}], Config}) -> {<<"{}">>, Config}; 174 | finish_({[{array, <<"[">>}], Config}) -> {<<"[]">>, Config}; 175 | finish_({[{object, <<"{">>}|Rest], Config}) -> insert(<<"{}">>, {Rest, Config}); 176 | finish_({[{array, <<"[">>}|Rest], Config}) -> insert(<<"[]">>, {Rest, Config}); 177 | finish_({[{object, Object}], Config}) -> 178 | {[Object, indent(Config), ?end_object], Config}; 179 | finish_({[{object, Object}|Rest], Config}) -> 180 | insert([Object, indent(Config), ?end_object], {Rest, Config}); 181 | finish_({[{array, Array}], Config}) -> 182 | {[Array, indent(Config), ?end_array], Config}; 183 | finish_({[{array, Array}|Rest], Config}) -> 184 | insert([Array, indent(Config), ?end_array], {Rest, Config}); 185 | finish_(_) -> erlang:error(badarg). 186 | 187 | %% insert a value when there's no parent object or array 188 | insert(Value, {[], Config}) -> 189 | {Value, Config}; 190 | %% insert a key or value into an object or array, autodetects the 'right' thing 191 | insert(Key, {[{object, Object}|Rest], Config}) -> 192 | {[{object, Key, Object}] ++ Rest, Config}; 193 | insert(Value, {[{object, Key, ?start_object}|Rest], Config}) -> 194 | { 195 | [{object, [ 196 | ?start_object, 197 | indent(Config), 198 | Key, 199 | ?colon, 200 | space(Config), 201 | Value 202 | ]}] ++ Rest, 203 | Config 204 | }; 205 | insert(Value, {[{object, Key, Object}|Rest], Config}) -> 206 | { 207 | [{object, [ 208 | Object, 209 | ?comma, 210 | indent_or_space(Config), 211 | Key, 212 | ?colon, 213 | space(Config), 214 | Value 215 | ]}] ++ Rest, 216 | Config 217 | }; 218 | insert(Value, {[{array, ?start_array}|Rest], Config}) -> 219 | {[{array, [?start_array, indent(Config), Value]}] ++ Rest, Config}; 220 | insert(Value, {[{array, Array}|Rest], Config}) -> 221 | { 222 | [{array, [Array, 223 | ?comma, 224 | indent_or_space(Config), 225 | Value 226 | ]}] ++ Rest, 227 | Config 228 | }; 229 | insert(_, _) -> erlang:error(badarg). 230 | 231 | 232 | get_key({[{object, Key, _}|_], _}) -> Key; 233 | get_key(_) -> erlang:error(badarg). 234 | 235 | 236 | get_value({Value, _Config}) -> 237 | try unicode:characters_to_binary(Value) 238 | catch error:_ -> erlang:error(badarg) 239 | end; 240 | get_value(_) -> erlang:error(badarg). 241 | 242 | 243 | 244 | %% eunit tests 245 | 246 | -ifdef(TEST). 247 | -include_lib("eunit/include/eunit.hrl"). 248 | 249 | 250 | config_test_() -> 251 | [ 252 | {"empty config", ?_assertEqual(#config{}, parse_config([]))}, 253 | {"unspecified indent/space", ?_assertEqual( 254 | #config{space=1, indent=1}, 255 | parse_config([space, indent]) 256 | )}, 257 | {"specific indent", ?_assertEqual( 258 | #config{indent=4}, 259 | parse_config([{indent, 4}]) 260 | )}, 261 | {"specific space", ?_assertEqual( 262 | #config{space=2}, 263 | parse_config([{space, 2}]) 264 | )}, 265 | {"specific space and indent", ?_assertEqual( 266 | #config{space=2, indent=2}, 267 | parse_config([{space, 2}, {indent, 2}]) 268 | )}, 269 | {"invalid opt flag", ?_assertError(badarg, parse_config([error]))}, 270 | {"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))} 271 | ]. 272 | 273 | 274 | space_test_() -> 275 | [ 276 | {"no space", ?_assertEqual(<<>>, space(#config{space=0}))}, 277 | {"one space", ?_assertEqual(<<" ">>, space(#config{space=1}))}, 278 | {"four spaces", ?_assertEqual(<<" ">>, space(#config{space=4}))} 279 | ]. 280 | 281 | 282 | indent_test_() -> 283 | [ 284 | {"no indent", ?_assertEqual(<<>>, indent(#config{indent=0, depth=1}))}, 285 | {"indent 1 depth 1", ?_assertEqual( 286 | <>/binary>>, 287 | indent(#config{indent=1, depth=1}) 288 | )}, 289 | {"indent 1 depth 2", ?_assertEqual( 290 | <>/binary>>, 291 | indent(#config{indent=1, depth=2}) 292 | )}, 293 | {"indent 4 depth 1", ?_assertEqual( 294 | <>/binary>>, 295 | indent(#config{indent=4, depth=1}) 296 | )}, 297 | {"indent 4 depth 2", ?_assertEqual( 298 | <>/binary, <<" ">>/binary>>, 299 | indent(#config{indent=4, depth=2}) 300 | )} 301 | ]. 302 | 303 | 304 | indent_or_space_test_() -> 305 | [ 306 | {"no indent so space", ?_assertEqual( 307 | <<" ">>, 308 | indent_or_space(#config{space=1, indent=0, depth=1}) 309 | )}, 310 | {"indent so no space", ?_assertEqual( 311 | <>/binary>>, 312 | indent_or_space(#config{space=1, indent=1, depth=1}) 313 | )} 314 | ]. 315 | 316 | 317 | encode_test_() -> 318 | [ 319 | {"0.0", ?_assert(encode(float, 0.0, #config{}) =:= ["0.0"])}, 320 | {"1.0", ?_assert(encode(float, 1.0, #config{}) =:= ["1.0"])}, 321 | {"-1.0", ?_assert(encode(float, -1.0, #config{}) =:= ["-1.0"])}, 322 | {"3.1234567890987654321", 323 | ?_assert( 324 | encode(float, 3.1234567890987654321, #config{}) =:= ["3.1234567890987655"]) 325 | }, 326 | {"1.0e23", ?_assert(encode(float, 1.0e23, #config{}) =:= ["1.0e23"])}, 327 | {"0.3", ?_assert(encode(float, 3.0/10.0, #config{}) =:= ["0.3"])}, 328 | {"0.0001", ?_assert(encode(float, 0.0001, #config{}) =:= ["0.0001"] 329 | orelse encode(float, 0.0001, #config{}) =:= ["1.0e-4"])}, % OTP-24 330 | {"0.00001", ?_assert(encode(float, 0.00001, #config{}) =:= ["1.0e-5"])}, 331 | {"0.00000001", ?_assert(encode(float, 0.00000001, #config{}) =:= ["1.0e-8"])}, 332 | {"1.0e-323", ?_assert(encode(float, 1.0e-323, #config{}) =:= ["1.0e-323"])}, 333 | {"1.0e308", ?_assert(encode(float, 1.0e308, #config{}) =:= ["1.0e308"])}, 334 | {"min normalized float", 335 | ?_assert( 336 | encode(float, math:pow(2, -1022), #config{}) =:= ["2.2250738585072014e-308"] 337 | ) 338 | }, 339 | {"max normalized float", 340 | ?_assert( 341 | encode(float, (2 - math:pow(2, -52)) * math:pow(2, 1023), #config{}) 342 | =:= ["1.7976931348623157e308"] 343 | ) 344 | }, 345 | {"min denormalized float", 346 | ?_assert(encode(float, math:pow(2, -1074), #config{}) =:= ["5.0e-324"]) 347 | }, 348 | {"max denormalized float", 349 | ?_assert( 350 | encode(float, (1 - math:pow(2, -52)) * math:pow(2, -1022), #config{}) 351 | =:= ["2.225073858507201e-308"] 352 | ) 353 | }, 354 | {"hello world", ?_assert(encode(string, <<"hello world">>, #config{}) 355 | =:= [<<"\"">>, <<"hello world">>, <<"\"">>] 356 | )}, 357 | {"key", ?_assert(encode(key, <<"key">>, #config{}) =:= [<<"\"">>, <<"key">>, <<"\"">>])}, 358 | {"1", ?_assert(encode(integer, 1, #config{}) =:= "1")}, 359 | {"-1", ?_assert(encode(integer, -1, #config{}) =:= "-1")}, 360 | {"true", ?_assert(encode(literal, true, #config{}) =:= "true")}, 361 | {"false", ?_assert(encode(literal, false, #config{}) =:= "false")}, 362 | {"null", ?_assert(encode(literal, null, #config{}) =:= "null")} 363 | ]. 364 | 365 | 366 | format_test_() -> 367 | % {minified version, pretty version} 368 | Cases = [ 369 | {"empty object", <<"{}">>, <<"{}">>}, 370 | {"empty array", <<"[]">>, <<"[]">>}, 371 | {"single key object", <<"{\"k\":\"v\"}">>, <<"{\n \"k\": \"v\"\n}">>}, 372 | {"single member array", <<"[true]">>, <<"[\n true\n]">>}, 373 | {"multiple key object", 374 | <<"{\"k\":\"v\",\"x\":\"y\"}">>, 375 | <<"{\n \"k\": \"v\",\n \"x\": \"y\"\n}">> 376 | }, 377 | {"multiple member array", 378 | <<"[1.0,2.0,3.0]">>, 379 | <<"[\n 1.0,\n 2.0,\n 3.0\n]">> 380 | }, 381 | {"nested structure", 382 | <<"[[{},[],true],{\"k\":\"v\",\"x\":\"y\"}]">>, 383 | <<"[\n [\n {},\n [],\n true\n ],\n {\n \"k\": \"v\",\n \"x\": \"y\"\n }\n]">> 384 | } 385 | ], 386 | [{Title, ?_assertEqual(Min, jsx:minify(Pretty))} || {Title, Min, Pretty} <- Cases] ++ 387 | [{Title, ?_assertEqual(Pretty, jsx:prettify(Min))} || {Title, Min, Pretty} <- Cases]. 388 | 389 | custom_newline_test_() -> 390 | [ 391 | {"single key object", ?_assert( 392 | jsx:format(<<"{\"k\":\"v\"}">>, [space, {indent, 2}, {newline, <<$\r>>}]) 393 | =:= <<"{\r \"k\": \"v\"\r}">>) 394 | } 395 | ]. 396 | 397 | handle_event_test_() -> 398 | Data = jsx:test_cases() ++ jsx:special_test_cases(), 399 | [ 400 | { 401 | Title, ?_assertEqual( 402 | JSON, 403 | lists:foldl(fun handle_event/2, init([]), Events ++ [end_json]) 404 | ) 405 | } || {Title, JSON, _, Events} <- Data 406 | ]. 407 | 408 | 409 | -endif. 410 | -------------------------------------------------------------------------------- /src/jsx_to_term.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 Alisdair Sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_to_term). 25 | 26 | -export([to_term/2]). 27 | -export([init/1, handle_event/2]). 28 | -export([ 29 | start_term/1, 30 | start_object/1, 31 | start_array/1, 32 | finish/1, 33 | insert/2, 34 | get_key/1, 35 | get_value/1 36 | ]). 37 | 38 | 39 | -record(config, { 40 | labels = binary, 41 | return_maps = false 42 | }). 43 | 44 | -type config() :: proplists:proplist(). 45 | 46 | -spec to_term(Source::binary(), Config::jsx_config:options()) -> jsx:json_term() | {incomplete, jsx:decoder()}. 47 | 48 | to_term(Source, Config) when is_list(Config) -> 49 | (jsx:decoder(?MODULE, [return_maps] ++ Config, jsx_config:extract_config(Config)))(Source). 50 | 51 | parse_config(Config) -> parse_config(Config, #config{}). 52 | 53 | parse_config([{labels, Val}|Rest], Config) 54 | when Val == binary; Val == atom; Val == existing_atom; Val == attempt_atom -> 55 | parse_config(Rest, Config#config{labels = Val}); 56 | parse_config([labels|Rest], Config) -> 57 | parse_config(Rest, Config#config{labels = binary}); 58 | parse_config([{return_maps, Val}|Rest], Config) 59 | when Val == true; Val == false -> 60 | parse_config(Rest, Config#config{return_maps = Val}); 61 | parse_config([return_maps|Rest], Config) -> 62 | parse_config(Rest, Config#config{return_maps = true}); 63 | parse_config([{K, _}|Rest] = Options, Config) -> 64 | case lists:member(K, jsx_config:valid_flags()) of 65 | true -> parse_config(Rest, Config) 66 | ; false -> erlang:error(badarg, [Options, Config]) 67 | end; 68 | parse_config([K|Rest] = Options, Config) -> 69 | case lists:member(K, jsx_config:valid_flags()) of 70 | true -> parse_config(Rest, Config) 71 | ; false -> erlang:error(badarg, [Options, Config]) 72 | end; 73 | parse_config([], Config) -> 74 | Config. 75 | 76 | 77 | -type state() :: {list(), #config{}}. 78 | -spec init(Config::config()) -> state(). 79 | 80 | init(Config) -> start_term(Config). 81 | 82 | -spec handle_event(Event::any(), State::state()) -> state(). 83 | 84 | handle_event(end_json, State) -> get_value(State); 85 | 86 | handle_event(start_object, State) -> start_object(State); 87 | handle_event(end_object, State) -> finish(State); 88 | 89 | handle_event(start_array, State) -> start_array(State); 90 | handle_event(end_array, State) -> finish(State); 91 | 92 | handle_event({key, Key}, {_, Config} = State) -> insert(format_key(Key, Config), State); 93 | 94 | handle_event({_, Event}, State) -> insert(Event, State). 95 | 96 | 97 | format_key(Key, Config) -> 98 | case Config#config.labels of 99 | binary -> Key 100 | ; atom -> binary_to_atom(Key, utf8) 101 | ; existing_atom -> binary_to_existing_atom(Key, utf8) 102 | ; attempt_atom -> 103 | try binary_to_existing_atom(Key, utf8) of 104 | Result -> Result 105 | catch 106 | error:badarg -> Key 107 | end 108 | end. 109 | 110 | 111 | %% internal state is a stack and a config object 112 | %% `{Stack, Config}` 113 | %% the stack is a list of in progress objects/arrays 114 | %% `[Current, Parent, Grandparent,...OriginalAncestor]` 115 | %% an object has the representation on the stack of 116 | %% `{object, [ 117 | %% {NthKey, NthValue}, 118 | %% {NMinus1Key, NthMinus1Value}, 119 | %% ..., 120 | %% {FirstKey, FirstValue} 121 | %% ]}` 122 | %% or if returning maps 123 | %% `{object, #{ 124 | %% FirstKey => FirstValue, 125 | %% SecondKey => SecondValue, 126 | %% ..., 127 | %% NthKey => NthValue 128 | %% }}` 129 | %% or if there's a key with a yet to be matched value 130 | %% `{object, Key, ...}` 131 | %% an array looks like 132 | %% `{array, [NthValue, NthMinus1Value,...FirstValue]}` 133 | 134 | start_term(Config) when is_list(Config) -> {[], parse_config(Config)}. 135 | 136 | %% allocate a new object on top of the stack 137 | start_object({Stack, Config=#config{return_maps=true}}) -> 138 | {[{object, #{}}] ++ Stack, Config}; 139 | start_object({Stack, Config}) -> 140 | {[{object, []}] ++ Stack, Config}. 141 | 142 | 143 | %% allocate a new array on top of the stack 144 | start_array({Stack, Config}) -> {[{array, []}] ++ Stack, Config}. 145 | 146 | 147 | %% finish an object or array and insert it into the parent object if it exists or 148 | %% return it if it is the root object 149 | finish({[{object, Map}], Config=#config{return_maps=true}}) -> {Map, Config}; 150 | finish({[{object, Map}|Rest], Config=#config{return_maps=true}}) -> insert(Map, {Rest, Config}); 151 | finish({[{object, []}], Config}) -> {[{}], Config}; 152 | finish({[{object, []}|Rest], Config}) -> insert([{}], {Rest, Config}); 153 | finish({[{object, Pairs}], Config}) -> {lists:reverse(Pairs), Config}; 154 | finish({[{object, Pairs}|Rest], Config}) -> insert(lists:reverse(Pairs), {Rest, Config}); 155 | finish({[{array, Values}], Config}) -> {lists:reverse(Values), Config}; 156 | finish({[{array, Values}|Rest], Config}) -> insert(lists:reverse(Values), {Rest, Config}); 157 | finish(_) -> erlang:error(badarg). 158 | 159 | 160 | %% insert a value when there's no parent object or array 161 | insert(Value, {[], Config}) -> {Value, Config}; 162 | %% insert a key or value into an object or array, autodetects the 'right' thing 163 | insert(Key, {[{object, Map}|Rest], Config=#config{return_maps=true}}) -> 164 | {[{object, Key, Map}] ++ Rest, Config}; 165 | insert(Key, {[{object, Pairs}|Rest], Config}) -> 166 | {[{object, Key, Pairs}] ++ Rest, Config}; 167 | insert(Value, {[{object, Key, Map}|Rest], Config=#config{return_maps=true}}) -> 168 | {[{object, maps:put(Key, Value, Map)}] ++ Rest, Config}; 169 | insert(Value, {[{object, Key, Pairs}|Rest], Config}) -> 170 | {[{object, [{Key, Value}] ++ Pairs}] ++ Rest, Config}; 171 | insert(Value, {[{array, Values}|Rest], Config}) -> 172 | {[{array, [Value] ++ Values}] ++ Rest, Config}; 173 | insert(_, _) -> erlang:error(badarg). 174 | 175 | get_key({[{object, Key, _}|_], _}) -> Key; 176 | get_key(_) -> erlang:error(badarg). 177 | 178 | 179 | get_value({Value, _Config}) -> Value; 180 | get_value(_) -> erlang:error(badarg). 181 | 182 | 183 | 184 | %% eunit tests 185 | 186 | -ifdef(TEST). 187 | -include_lib("eunit/include/eunit.hrl"). 188 | 189 | 190 | config_test_() -> 191 | [ 192 | {"empty config", ?_assertEqual(#config{}, parse_config([]))}, 193 | {"implicit binary labels", ?_assertEqual(#config{}, parse_config([labels]))}, 194 | {"binary labels", ?_assertEqual(#config{}, parse_config([{labels, binary}]))}, 195 | {"atom labels", ?_assertEqual(#config{labels=atom}, parse_config([{labels, atom}]))}, 196 | {"existing atom labels", ?_assertEqual( 197 | #config{labels=existing_atom}, 198 | parse_config([{labels, existing_atom}]) 199 | )}, 200 | {"return_maps true", ?_assertEqual( 201 | #config{return_maps=true}, 202 | parse_config([return_maps]) 203 | )}, 204 | {"invalid opt flag", ?_assertError(badarg, parse_config([error]))}, 205 | {"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))} 206 | ]. 207 | 208 | 209 | format_key_test_() -> 210 | [ 211 | {"binary key", ?_assertEqual(<<"key">>, format_key(<<"key">>, #config{labels=binary}))}, 212 | {"atom key", ?_assertEqual(key, format_key(<<"key">>, #config{labels=atom}))}, 213 | {"existing atom key", ?_assertEqual( 214 | key, 215 | format_key(<<"key">>, #config{labels=existing_atom}) 216 | )}, 217 | {"nonexisting atom key", ?_assertError( 218 | badarg, 219 | format_key(<<"nonexistentatom">>, #config{labels=existing_atom}) 220 | )}, 221 | {"sloppy existing atom key", ?_assertEqual( 222 | key, 223 | format_key(<<"key">>, #config{labels=attempt_atom}) 224 | )}, 225 | {"nonexisting atom key", ?_assertEqual( 226 | <<"nonexistentatom">>, 227 | format_key(<<"nonexistentatom">>, #config{labels=attempt_atom}) 228 | )} 229 | ]. 230 | 231 | 232 | rep_manipulation_test_() -> 233 | [ 234 | {"allocate a new context with option", ?_assertEqual( 235 | {[], #config{labels=atom}}, 236 | start_term([{labels, atom}]) 237 | )}, 238 | {"allocate a new object on an empty stack", ?_assertEqual( 239 | {[{object, []}], #config{}}, 240 | start_object({[], #config{}}) 241 | )}, 242 | {"allocate a new object on a stack", ?_assertEqual( 243 | {[{object, []}, {object, []}], #config{}}, 244 | start_object({[{object, []}], #config{}}) 245 | )}, 246 | {"allocate a new array on an empty stack", ?_assertEqual( 247 | {[{array, []}], #config{}}, 248 | start_array({[], #config{}}) 249 | )}, 250 | {"allocate a new array on a stack", ?_assertEqual( 251 | {[{array, []}, {object, []}], #config{}}, 252 | start_array({[{object, []}], #config{}}) 253 | )}, 254 | {"insert a key into an object", ?_assertEqual( 255 | {[{object, key, []}, junk], #config{}}, 256 | insert(key, {[{object, []}, junk], #config{}}) 257 | )}, 258 | {"get current key", ?_assertEqual( 259 | key, 260 | get_key({[{object, key, []}], #config{}}) 261 | )}, 262 | {"try to get non-key from object", ?_assertError( 263 | badarg, 264 | get_key({[{object, []}], #config{}}) 265 | )}, 266 | {"try to get key from array", ?_assertError( 267 | badarg, 268 | get_key({[{array, []}], #config{}}) 269 | )}, 270 | {"insert a value into an object", ?_assertEqual( 271 | {[{object, [{key, value}]}, junk], #config{}}, 272 | insert(value, {[{object, key, []}, junk], #config{}}) 273 | )}, 274 | {"insert a value into an array", ?_assertEqual( 275 | {[{array, [value]}, junk], #config{}}, 276 | insert(value, {[{array, []}, junk], #config{}}) 277 | )}, 278 | {"finish an object with no ancestor", ?_assertEqual( 279 | {[{a, b}, {x, y}], #config{}}, 280 | finish({[{object, [{x, y}, {a, b}]}], #config{}}) 281 | )}, 282 | {"finish an empty object", ?_assertEqual( 283 | {[{}], #config{}}, 284 | finish({[{object, []}], #config{}}) 285 | )}, 286 | {"finish an object with an ancestor", ?_assertEqual( 287 | {[{object, [{key, [{a, b}, {x, y}]}, {foo, bar}]}], #config{}}, 288 | finish({[{object, [{x, y}, {a, b}]}, {object, key, [{foo, bar}]}], #config{}}) 289 | )}, 290 | {"finish an array with no ancestor", ?_assertEqual( 291 | {[a, b, c], #config{}}, 292 | finish({[{array, [c, b, a]}], #config{}}) 293 | )}, 294 | {"finish an array with an ancestor", ?_assertEqual( 295 | {[{array, [[a, b, c], d, e, f]}], #config{}}, 296 | finish({[{array, [c, b, a]}, {array, [d, e, f]}], #config{}}) 297 | )} 298 | ]. 299 | 300 | 301 | rep_manipulation_with_maps_test_() -> 302 | [ 303 | {"allocate a new object on an empty stack", ?_assertEqual( 304 | {[{object, #{}}], #config{return_maps=true}}, 305 | start_object({[], #config{return_maps=true}}) 306 | )}, 307 | {"allocate a new object on a stack", ?_assertEqual( 308 | {[{object, #{}}, {object, #{}}], #config{return_maps=true}}, 309 | start_object({[{object, #{}}], #config{return_maps=true}}) 310 | )}, 311 | {"insert a key into an object", ?_assertEqual( 312 | {[{object, key, #{}}, junk], #config{return_maps=true}}, 313 | insert(key, {[{object, #{}}, junk], #config{return_maps=true}}) 314 | )}, 315 | {"get current key", ?_assertEqual( 316 | key, 317 | get_key({[{object, key, #{}}], #config{return_maps=true}}) 318 | )}, 319 | {"try to get non-key from object", ?_assertError( 320 | badarg, 321 | get_key({[{object, #{}}], #config{return_maps=true}}) 322 | )}, 323 | {"insert a value into an object", ?_assertEqual( 324 | {[{object, #{key => value}}, junk], #config{return_maps=true}}, 325 | insert(value, {[{object, key, #{}}, junk], #config{return_maps=true}}) 326 | )}, 327 | {"finish an object with no ancestor", ?_assertEqual( 328 | {#{a => b, x => y}, #config{return_maps=true}}, 329 | finish({[{object, #{x => y, a => b}}], #config{return_maps=true}}) 330 | )}, 331 | {"finish an empty object", ?_assertEqual( 332 | {#{}, #config{return_maps=true}}, 333 | finish({[{object, #{}}], #config{return_maps=true}}) 334 | )}, 335 | {"finish an object with an ancestor", ?_assertEqual( 336 | { 337 | [{object, #{key => #{a => b, x => y}, foo => bar}}], 338 | #config{return_maps=true} 339 | }, 340 | finish({ 341 | [{object, #{x => y, a => b}}, {object, key, #{foo => bar}}], 342 | #config{return_maps=true} 343 | }) 344 | )} 345 | ]. 346 | 347 | 348 | return_maps_test_() -> 349 | [ 350 | {"an empty map", ?_assertEqual( 351 | #{}, 352 | jsx:decode(<<"{}">>, []) 353 | )}, 354 | {"an empty map", ?_assertEqual( 355 | #{}, 356 | jsx:decode(<<"{}">>, []) 357 | )}, 358 | {"an empty map", ?_assertEqual( 359 | [{}], 360 | jsx:decode(<<"{}">>, [{return_maps, false}]) 361 | )}, 362 | {"a small map", ?_assertEqual( 363 | #{<<"awesome">> => true, <<"library">> => <<"jsx">>}, 364 | jsx:decode(<<"{\"library\": \"jsx\", \"awesome\": true}">>, []) 365 | )}, 366 | {"a recursive map", ?_assertEqual( 367 | #{<<"key">> => #{<<"key">> => true}}, 368 | jsx:decode(<<"{\"key\": {\"key\": true}}">>, []) 369 | )}, 370 | {"a map inside a list", ?_assertEqual( 371 | [#{}], 372 | jsx:decode(<<"[{}]">>, []) 373 | )} 374 | ]. 375 | 376 | 377 | handle_event_test_() -> 378 | Data = jsx:test_cases(), 379 | [ 380 | { 381 | Title, ?_assertEqual( 382 | Term, 383 | lists:foldl(fun handle_event/2, init([]), Events ++ [end_json]) 384 | ) 385 | } || {Title, _, Term, Events} <- Data 386 | ]. 387 | 388 | 389 | -endif. 390 | -------------------------------------------------------------------------------- /src/jsx_verify.erl: -------------------------------------------------------------------------------- 1 | %% The MIT License 2 | 3 | %% Copyright (c) 2010-2013 alisdair sullivan 4 | 5 | %% Permission is hereby granted, free of charge, to any person obtaining a copy 6 | %% of this software and associated documentation files (the "Software"), to deal 7 | %% in the Software without restriction, including without limitation the rights 8 | %% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | %% copies of the Software, and to permit persons to whom the Software is 10 | %% furnished to do so, subject to the following conditions: 11 | 12 | %% The above copyright notice and this permission notice shall be included in 13 | %% all copies or substantial portions of the Software. 14 | 15 | %% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | %% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | %% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | %% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | %% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | %% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | %% THE SOFTWARE. 22 | 23 | 24 | -module(jsx_verify). 25 | 26 | -export([is_json/2, is_term/2]). 27 | -export([init/1, handle_event/2]). 28 | 29 | -type config() :: proplists:proplist(). 30 | 31 | -spec is_json(Source::binary(), Config::jsx_config:options()) -> true | false | {incomplete, jsx:decoder()}. 32 | 33 | is_json(Source, Config) when is_list(Config) -> 34 | try (jsx:decoder(?MODULE, Config, jsx_config:extract_config(Config)))(Source) 35 | catch error:badarg -> false 36 | end. 37 | 38 | 39 | -spec is_term(Source::jsx:json_term() | end_stream | end_json, 40 | Config::jsx_config:options()) -> true | false | {incomplete, jsx:encoder()}. 41 | 42 | is_term(Source, Config) when is_list(Config) -> 43 | try (jsx:encoder(?MODULE, Config, jsx_config:extract_config(Config)))(Source) 44 | catch error:badarg -> false 45 | end. 46 | 47 | 48 | parse_config(Config) -> parse_config(Config, []). 49 | 50 | %% ignore deprecated flags 51 | parse_config([no_repeated_keys|Rest], Config) -> 52 | parse_config(Rest, Config); 53 | parse_config([{repeated_keys, Val}|Rest], Config) when Val == true; Val == false -> 54 | parse_config(Rest, Config); 55 | parse_config([repeated_keys|Rest], Config) -> 56 | parse_config(Rest, Config); 57 | parse_config([{K, _}|Rest] = Options, Config) -> 58 | case lists:member(K, jsx_config:valid_flags()) of 59 | true -> parse_config(Rest, Config); 60 | false -> erlang:error(badarg, [Options, Config]) 61 | end; 62 | parse_config([K|Rest] = Options, Config) -> 63 | case lists:member(K, jsx_config:valid_flags()) of 64 | true -> parse_config(Rest, Config); 65 | false -> erlang:error(badarg, [Options, Config]) 66 | end; 67 | parse_config([], Config) -> 68 | Config. 69 | 70 | 71 | %% we don't actually need any state for this 72 | -type state() :: []. 73 | -spec init(Config::config()) -> state(). 74 | 75 | init(Config) -> parse_config(Config). 76 | 77 | 78 | -spec handle_event(Event::any(), State::state()) -> state(). 79 | 80 | handle_event(end_json, _) -> true; 81 | 82 | handle_event(_, State) -> State. 83 | 84 | 85 | 86 | %% eunit tests 87 | -ifdef(TEST). 88 | -include_lib("eunit/include/eunit.hrl"). 89 | 90 | 91 | config_test_() -> 92 | [ 93 | {"empty config", ?_assertEqual([], parse_config([]))}, 94 | {"no repeat keys", ?_assertEqual([], parse_config([no_repeated_keys]))}, 95 | {"bare repeated keys", ?_assertEqual([], parse_config([repeated_keys]))}, 96 | {"repeated keys true", ?_assertEqual( 97 | [], 98 | parse_config([{repeated_keys, true}]) 99 | )}, 100 | {"repeated keys false", ?_assertEqual( 101 | [], 102 | parse_config([{repeated_keys, false}]) 103 | )}, 104 | {"invalid opt flag", ?_assertError(badarg, parse_config([error]))}, 105 | {"invalid opt tuple", ?_assertError(badarg, parse_config([{error, true}]))} 106 | ]. 107 | 108 | 109 | handle_event_test_() -> 110 | Data = jsx:test_cases() ++ jsx:special_test_cases(), 111 | [ 112 | { 113 | Title, ?_assertEqual( 114 | true, 115 | lists:foldl(fun handle_event/2, [], Events ++ [end_json]) 116 | ) 117 | } || {Title, _, _, Events} <- Data 118 | ]. 119 | 120 | 121 | -endif. 122 | --------------------------------------------------------------------------------