├── .gitattributes ├── README.md ├── update.js ├── broken.js ├── draft-rundgren-comparable-json.xml ├── ietf-104-report.html ├── LICENSE ├── draft-rundgren-json-canonicalization-scheme.xml └── xmlv3 └── draft-rundgren-json-canonicalization-scheme.xml /.gitattributes: -------------------------------------------------------------------------------- 1 | # Disable LF normalization for all files 2 | * -text -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ietf-json-canon 2 | Internet-Draft for Canonical JSON 3 | 4 | Completed: https://tools.ietf.org/html/rfc8785 5 | -------------------------------------------------------------------------------- /update.js: -------------------------------------------------------------------------------- 1 | // Date serializer fixes the problem 2 | Date.prototype.toJSON = function () { 3 | let date = this.toISOString(); 4 | // In this particular case we selected a UTC notation 5 | // yyyy-mm-ddThh:mm:ssZ 6 | return date.substring(0, date.indexOf('.')) + 'Z'; 7 | }; 8 | -------------------------------------------------------------------------------- /broken.js: -------------------------------------------------------------------------------- 1 | // Sample showing how a naive use of JCS will fail 2 | 'use strict'; 3 | var canonicalize = function(object) { 4 | 5 | var buffer = ''; 6 | serialize(object); 7 | return buffer; 8 | 9 | function serialize(object) { 10 | if (object === null || typeof object !== 'object' || 11 | object.toJSON != null) { 12 | ///////////////////////////////////////////////// 13 | // Primitive type or toJSON - Use ES6/JSON // 14 | ///////////////////////////////////////////////// 15 | buffer += JSON.stringify(object); 16 | 17 | } else if (Array.isArray(object)) { 18 | ///////////////////////////////////////////////// 19 | // Array - Maintain element order // 20 | ///////////////////////////////////////////////// 21 | buffer += '['; 22 | let next = false; 23 | object.forEach((element) => { 24 | if (next) { 25 | buffer += ','; 26 | } 27 | next = true; 28 | ///////////////////////////////////////// 29 | // Array element - Recursive expansion // 30 | ///////////////////////////////////////// 31 | serialize(element); 32 | }); 33 | buffer += ']'; 34 | 35 | } else { 36 | ///////////////////////////////////////////////// 37 | // Object - Sort properties before serializing // 38 | ///////////////////////////////////////////////// 39 | buffer += '{'; 40 | let next = false; 41 | Object.keys(object).sort().forEach((property) => { 42 | if (next) { 43 | buffer += ','; 44 | } 45 | next = true; 46 | /////////////////////////////////////////////// 47 | // Property names are strings - Use ES6/JSON // 48 | /////////////////////////////////////////////// 49 | buffer += JSON.stringify(property); 50 | buffer += ':'; 51 | ////////////////////////////////////////// 52 | // Property value - Recursive expansion // 53 | ////////////////////////////////////////// 54 | serialize(object[property]); 55 | }); 56 | buffer += '}'; 57 | } 58 | } 59 | }; 60 | 61 | const jstring = 62 | '{"time": "2019-01-28T07:45:10Z", "big": "055", "val": 3.5}'; 63 | 64 | BigInt.prototype.toJSON = function () { 65 | return this.toString(); 66 | }; 67 | 68 | var object = JSON.parse(jstring, 69 | (k,v) => k == 'time' ? new Date(v) : k == 'big' ? BigInt(v) : v 70 | ); 71 | 72 | console.log(canonicalize(object)); -------------------------------------------------------------------------------- /draft-rundgren-comparable-json.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | "Comparable" JSON (JSONCOMP) 17 | 18 | 19 | 20 | Independent 21 |
22 | 23 | 24 | 25 | Montpellier 26 | France 27 | 28 | anders.rundgren.net@gmail.com 29 | https://www.linkedin.com/in/andersrundgren/ 30 |
31 |
32 | 33 | 34 | 35 | Security 36 | 37 | 38 | 39 | 40 | JSON, ECMAScript, Canonicalization, Normalization 41 | 42 | 43 | 44 | 45 | This application note describes how JCS 46 | can be utilized to support applications needing canonicalization 47 | beyond the core JSON level, 48 | with comparisons as the primary target. 49 | 50 | 51 |
52 | 53 | 54 |
55 | 56 | The purpose of JCS is creating "Hashable" representations 57 | of JSON data intended for cryptographic solutions. 58 | JCS accomplishes this by combining normalization of the native JSON 59 | String and Number primitives with a deterministic property sorting scheme. 60 | That is, JCS provides canonicalization at the core JSON level. 61 | For interoperability reasons JCS also constrains data to the I-JSON subset. 62 | 63 | 64 | However, if you rather would like to compare JSON data from 65 | different sources or runs, JCS would in many cases be 66 | inadequate since the JSON String type is commonly used 67 | for holding subtypes like "DateTime" or "BigInteger" objects. 68 | 69 | 70 | This application note outlines how JCS in spite of having a limited 71 | canonicalization scope still may be utilized by applications like above. 72 | 73 |
74 | 75 |
76 | 77 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL 78 | NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", 79 | "MAY", and "OPTIONAL" in this document are to be interpreted as 80 | described in BCP 14 81 | when, and only when, they appear in all capitals, as shown here. 82 | 83 |
84 | 85 |
86 | 87 | Assume you want to compare productions of JSON data where the schema 88 | designer assigned the property "big" for holding a "BigInteger" subtype and 89 | "time" for holding a "DateTime" subtype, while "val" is supposed to be a JSON Number 90 | compliant with JCS. The following example shows such an object: 91 | 92 | 93 |
98 |
99 | 100 | A problem here is that "055" clearly is not a canonical form for a "BigInteger" 101 | while a "DateTime" object like "2019-01-28T07:45:10Z" 102 | might as well be expressed as "2019-01-28T08:45:10.000+01:00" making 103 | comparisons based on JCS canonicalization fail. 104 | 105 | 106 | To resolve this issue using JCS the following measures MUST be taken: 107 | 108 | 109 | The community or standard utilizing a specific JSON schema 110 | defines a strict normalized form for each of the used subtypes. 111 | 112 | 113 | Compatible serializers are created for each subtype. 114 | 115 | 116 | A positive side effect of this arrangement is that it enforces strict definitions 117 | of subtypes which improves interoperability in general as well. 118 | 119 | 120 | Defining specific subtypes and their normalized form is out of scope for 121 | this application note. Although the JSON example illustrated a "BigInteger" in 122 | decimal notation, applications transferring huge integers 123 | (like raw RSA keys) typically rather use Base64 124 | encoding to conserve space. 125 | 126 | 127 | Below is an example of a strict serializer expressed in ECMAScript 128 | for a "DateTime" subtype: 129 | 130 | 131 |
137 |
138 |
139 | 140 |
141 | 142 | This document has no IANA actions. 143 | 144 |
145 | 146 |
147 | 148 | Systems implementing this application note are subject 149 | to the same security considerations as JCS. 150 | 151 |
152 | 153 |
154 | 155 | This document was created based on feedback (on JCS) from many people 156 | including Mark Nottingham and Jim Schaad. 157 | 158 |
159 |
160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | JSON Canonicalization Scheme - Work in progress 170 | 171 | A. Rundgren, B. Jordan, S. Erdtman 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | ECMAScript 2015 Language Specification 182 | 183 | Ecma International 184 | 185 | 186 | 187 | 188 | 189 | 190 |
191 | -------------------------------------------------------------------------------- /ietf-104-report.html: -------------------------------------------------------------------------------- 1 | JCS - IETF-104 Report 10 | 11 |
JCS - IETF-104 Report
12 | There were in total 100 minutes of meeting time (including a 1 hour 13 | side meeting with 10+ participants) devoted to JCS at IETF-104. 14 | Here is a list of issues raised during these meetings. 15 | I have taken the liberty commenting them here. 16 |

17 | For those who are not familiar with JCS 18 | (https://tools.ietf.org/html/draft-rundgren-json-canonicalization-scheme-06), 20 | the core rationale is simply "keeping JSON as JSON even when signed". 21 |

22 | 23 |
24 | 1. The need for clear text messages is a weak argument 25 |
26 |
27 | The recommended use of the current IETF JSON signature solution 28 | (JWS) is that you: 30 | 39 | None of the Open Banking systems out there have to date chosen 40 | this route; they all build on variants using detached signatures and clear text JSON data. 41 | That none of them utilize JCS is quite logical since 42 | JCS (correctly) is not perceived as a standard. 43 |
44 | 45 |
46 | 2. Canonicalization introduces security vulnerabilities 47 |
48 |
49 |

50 | If a canonicalization scheme is incorrectly implemented 51 | (irrespective in which end), the likely result is that signatures will not validate. 52 | Broken signatures in similarity to any other input error, including missing or 53 | incorrectly formatted data should in a properly designed application lead to a 54 | rejected message/application failure. The core of a JCS implementation is 55 | typically only a couple of kilobytes of executable code 56 | making it reasonably easy to verify for correctness. 57 |

58 |

59 | It has been mentioned that clear text data will tempt developers into trusting 60 | (=acting upon) received data without verifying signatures. 61 | JCS obviously does not 62 | come with a cure for naïve developers. 63 | See JCS Security Considerations. 65 |

66 |

67 | In fact, the absence of clear text signatures also creates security issues as shown 68 | by the following example from IETF's Trusted Execution Protocol WG: 69 |

70 |
71 | https://tools.ietf.org/html/draft-ietf-teep-opentrustprotocol-02 73 |

74 | 75 | The top element "[Signed][Request|Response]" cannot be fully 76 | trusted to match the content because it doesn't participate in the 77 | signature generation. However, a recipient can always match it with 78 | the value associated with the property "payload". It purely serves 79 | to provide a quick reference for reading and method invocation. 80 | 81 |

82 |
83 | By using JWS with JCS 85 | the need for artificial holder objects and associated matching requirements 86 | disappear, while message content is provided in clear. 87 |
88 | 89 |
90 | 3. Number serialization is a huge problem 91 |
92 |
93 | I clearly underestimated this part when I started with JCS back in 2015, but 94 | recently fast, open sourced and quite simple 95 | algorithms have been developed 97 | making number serialization according to JCS/ES6 in scope for any platform. 98 | Extensive test data is 99 | publicly available. 101 |
102 | 103 |
104 | 4. You should have stayed with the ES6 predictive parsing/serialization scheme 105 |
106 |
107 | That had been cool but the sentiment among other JSON tool vendors was 108 | that "ECMA got it all wrong" so 109 | I was forced to select another and more conventional route. 110 | Fortunately, the revised scheme turned out to be very simple to get running 111 | in other platforms including Go, Python, C# and Java/Android, while leaving 112 | parsers and serializers unchanged. 113 | The original concept would OTOH 115 | require a total rewrite of the entire JSON landscape. 116 | Sometimes "pushback" is just good 😀 117 |
118 | 119 |
120 | 5. You need a data model 121 |
122 |
123 | JCS builds on the same a bare-bones data model for primitives as JSON 124 | (null, true, false, Number, String), 125 | albeit with a couple of constraints: 126 | 135 | This is all what is needed with respect to data models for creating reliable and interoperable "hashable" JSON. 136 | Existing JSON-based systems use external mappings to emulate 137 | missing data types like int32, DateTime, Money, Binary and similar. 138 | That not all JSON applications use the same conventions 139 | do not seem to have hampered the popularity and ubiquity of JSON. 140 | Standardizing external mappings is another [possible] IETF activity, not related to JCS. 141 |
142 | 143 |
144 | 6. I-JSON (JCS builds on that) only says SHOULD for IEEE-754 while JCS says MUST 145 |
146 |
147 | That is correct but if you for example send 64-bit integers expressed as 148 | JSON Numbers to JavaScript based systems, applications will typically break 149 | every now and then since the inherent precision is only 53 bits. 150 | JCS was designed to also be fully JavaScript compatible. 151 |
152 |
153 | 7. XML canonicalization was a disaster 154 |
155 |
156 |

157 | JCS is not a fullblown canonicalization scheme like XML's C14; it is 158 | a (fairly rudimentary) serialization method. 159 |

160 |

161 | A proper and fair evaluation should be based on the actual draft rather than 162 | bad experiences from the XML era which BTW 163 | also were due to other factors 164 | such as Namespaces, Default values, 165 | SOAP and an elaborate WS* stack which indeed took years to get 166 | fully interoperable between vendors. 167 |

168 |
169 |
Version 1.06, Anders Rundgren 2019-05-12
170 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /draft-rundgren-json-canonicalization-scheme.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | JSON Canonicalization Scheme (JCS) 17 | 18 | 19 | 20 | Independent 21 |
22 | 23 | 24 | 25 | Montpellier 26 | France 27 | 28 | anders.rundgren.net@gmail.com 29 | https://www.linkedin.com/in/andersrundgren/ 30 |
31 |
32 | 33 | 34 | Symantec Corporation 35 |
36 | 37 | 350 Ellis Street 38 | CA 94043 39 | Mountain View 40 | USA 41 | 42 | bret_jordan@symantec.com 43 |
44 |
45 | 46 | 47 | Spotify AB 48 |
49 | 50 | Birger Jarlsgatan 61, 4tr 51 | 113 56 52 | Stockholm 53 | Sweden 54 | 55 | erdtman@spotify.com 56 |
57 |
58 | 59 | 60 | 61 | Security 62 | 63 | 64 | 65 | 66 | JSON, ECMAScript, Signatures, Cryptography, Canonicalization 67 | 68 | 69 | 70 | 71 | Cryptographic operations like hashing and signing requires that the 72 | original data does not change during serialization or parsing. One 73 | way addressing this issue is creating a canonical form of the data. 74 | Canonicalization also permits data to be exchanged in its original 75 | form on the "wire" while still being subject to secure cryptographic 76 | operations. The JSON Canonicalization Scheme (JCS) provides 77 | canonicalization support for data in the JSON format by building on 78 | the strict serialization methods for JSON primitives defined by 79 | ECMAScript, constraining JSON data to the I-JSON subset, and through 80 | a deterministic property sorting scheme. 81 | 82 | 83 |
84 | 85 | 86 |
87 | 88 | Cryptographic operations like hashing and signing requires that the 89 | original data does not change during serialization or parsing. 90 | One way of accomplishing this is converting the data into 91 | a format that has a simple and fixed representation like Base64Url , 92 | which is how JWS addressed this issue. 93 | 94 | 95 | Another solution is to create a canonical version of the data, 96 | similar to what was done for the XML Signature standard. 97 | The primary advantage with a canonicalizing scheme is that data 98 | can be kept in its original form. This is the core rationale behind JCS. 99 | Put another way: by using canonicalization a JSON Object may remain a JSON Object 100 | even after being signed which simplifies system design, documentation and logging. 101 | 102 | 103 | To avoid "reinventing the wheel", JCS relies on serialization of JSON primitives compatible with 104 | ECMAScript (aka JavaScript) beginning with version 6 , 105 | hereafter referred to as "ES6". 106 | 107 | 108 | Seasoned XML developers recalling difficulties getting signatures 109 | to validate (usually due to different interpretations of the quite intricate 110 | XML canonicalization rules as well as of the equally extensive 111 | Web Services security standards), may rightfully wonder why JCS 112 | would not suffer from similar issues. The reasons are twofold: 113 | 114 | 115 | The absence of a namespace concept and default values, as well 116 | as constraining data to the I&nbhy;JSON subset eliminate the need for specific 117 | parsers for dealing with canonicalization. 118 | 119 | 120 | JCS compatible serialization of JSON primitives is supported by most 121 | current Web browsers and as well as by Node.js , 122 | while the full JCS specification is supported by multiple 123 | Open Source implementations (see ). 124 | See also . 125 | 126 | 127 | 128 | 129 | In summary the JCS specification describes how serialization of JSON primitives compliant 130 | with ES6 combined with a deterministic property sorting scheme can be used for 131 | creating "Hashable" representations of JSON data intended for consumption by cryptographic methods. 132 | 133 | 134 | JCS is compatible with some existing systems relying on JSON canonicalization 135 | such as JWK Thumbprint and Keybase . 136 | 137 | 138 | For potential uses outside of cryptography see . 139 | 140 | 141 | The intended audiences of this document are JSON tool vendors, as 142 | well as designers of JSON based cryptographic solutions. 143 | 144 |
145 | 146 |
147 | 148 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL 149 | NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", 150 | "MAY", and "OPTIONAL" in this document are to be interpreted as 151 | described in BCP 14 152 | when, and only when, they appear in all capitals, as shown here. 153 | 154 |
155 | 156 |
157 | 158 | This section describes the different issues related to creating 159 | a canonical JSON representation, and how they are addressed by JCS. 160 | 161 |
162 | 163 | In order to serialize JSON data, one needs data 164 | that is adapted for JSON serialization. This is usually achieved by: 165 | 166 | 167 | 168 | Parsing previously generated JSON data. 169 | 170 | 171 | Programmatically creating data. 172 | 173 | 174 | 175 | 176 | Irrespective of the method used, the data to be serialized MUST be compatible 177 | with I&nbhy;JSON , which implies the following: 178 | 179 | 180 | 181 | 182 | JSON Objects MUST NOT exhibit duplicate property names. 183 | 184 | 185 | JSON String data MUST be expressible 186 | as Unicode . 187 | 188 | 189 | JSON Number data MUST be expressible 190 | as IEEE-754 double precision values. 191 | For applications needing higher precision or longer integers than 192 | offered by IEEE-754 double precision, 193 | outlines how 194 | such requirements can be supported in an interoperable and extensible way. 195 | 196 | 197 | 198 | 199 | An additional constraint is that parsed JSON String data MUST NOT be altered during subsequent serializations. 200 | For more information see . 201 | 202 | 203 | Note: although the Unicode standard offers a possibility combining 204 | certain characters into one, referred to as "Unicode Normalization" 205 | (https://www.unicode.org/reports/tr15/), 206 | such functionality MUST be delegated to the application layer. 207 | 208 |
209 |
210 | 211 | The following subsections describe the steps required for creating a canonical 212 | JSON representation of the data elaborated on in the previous section. 213 | 214 | 215 | shows sample code for an ES6 based canonicalizer, 216 | matching the JCS specification. 217 | 218 |
219 | 220 | Whitespace between JSON elements MUST NOT be emitted. 221 | 222 |
223 |
224 | 225 | Assume that you parse a JSON object like the following: 226 | 227 | 228 |
234 |
235 | 236 | If you subsequently serialize the parsed data 237 | using a serializer compliant with ES6's JSON.stringify(), 238 | the result would (with a line wrap added for display purposes only), 239 | be rather divergent with respect to representation of data: 240 | 241 | 242 |
244 |
245 | 246 | 247 | 248 | Note: EURO denotes a single Euro character (Unicode: U+20AC), 249 | 250 | which not being ASCII, is currently not displayable in RFCs. 251 | 252 | 253 | 254 | 255 | The reason for the difference between the parsed data and its 256 | serialized counterpart, is due to a wide tolerance on input data (as defined 257 | by JSON ), while output data (as defined by ES6), 258 | has a fixed representation. As can be seen by the example, 259 | numbers are subject to rounding as well. 260 | 261 | 262 | The following subsections describe serialization of primitive JSON data types 263 | according to JCS. This part is identical to that of ES6. 264 | 265 |
266 | 267 | The JSON literals null, true, 268 | and false present no challenge since they already have a 269 | fixed definition in JSON . 270 | 271 |
272 |
273 | 274 | For JSON String data (which includes 275 | JSON Object property names as well), each Unicode code point MUST be serialized as 276 | described below (also matching Section 24.3.2.2 of ): 277 | 278 | 279 | 280 | 281 | If the Unicode value falls within the traditional ASCII control 282 | character range (U+0000 through U+001F), it MUST 283 | be serialized using lowercase hexadecimal Unicode notation (\uhhhh) unless it is in the 284 | set of predefined JSON control characters U+0008, U+0009, U+000A, U+000C or U+000D 285 | which MUST be serialized as \b, \t, \n, \f and \r respectively. 286 | 287 | 288 | If the Unicode value is outside of the ASCII control character range, it MUST 289 | be serialized "as is" unless it is equivalent to 290 | U+005C (\) or U+0022 (") which MUST be serialized as \\ and \" respectively. 291 | 292 | 293 | 294 | 295 | Finally, the resulting sequence of Unicode code points MUST be enclosed in double quotes ("). 296 | 297 | 298 | Note: some JSON systems permit the use of invalid Unicode data 299 | including "lone surrogates" (e.g. U+DEAD). 300 | Since this leads to interoperability issues including broken signatures, 301 | occurrences of such data MUST cause the JCS algorithm to terminate 302 | with an error indication. 303 | 304 |
305 | 306 |
307 | 308 | JSON Number data MUST be serialized according to 309 | Section 7.1.12.1 of including the "Note 2" enhancement. 310 | 311 | 312 | Due to the relative complexity of this part, the algorithm itself is not included in this document. 313 | However, the specification is fully implemented by for example Google's V8 . 314 | The open source Java implementation mentioned in 315 | uses a recently developed number serialization algorithm called Ryu . 316 | 317 | 318 | ES6 builds on the IEEE-754 double precision 319 | standard for representing JSON Number data. 320 | holds a set of IEEE-754 sample values and their 321 | corresponding JSON serialization. 322 | 323 | 324 | Note: since NaN (Not a Number) and Infinity are not permitted in JSON, 325 | occurrences of such values MUST cause the JCS algorithm to terminate 326 | with an error indication. 327 | 328 |
329 |
330 |
331 | 332 | Although the previous step indeed normalized the representation of primitive 333 | JSON data types, the result would not qualify as "canonical" since JSON 334 | Object properties are not in lexicographic (alphabetical) order. 335 | 336 | 337 | Applied to the sample in , 338 | a properly canonicalized version should (with a 339 | line wrap added for display purposes only), read as: 340 | 341 | 342 |
344 |
345 | 346 | 347 | 348 | Note: EURO denotes a single Euro character (Unicode: U+20AC), 349 | 350 | which not being ASCII, is currently not displayable in RFCs. 351 | 352 | 353 | 354 | 355 | The rules for lexicographic sorting of JSON Object 356 | properties according to JCS are as follows: 357 | 358 | 359 | JSON Object properties MUST be sorted in a recursive manner 360 | which means that possible JSON child Objects 361 | MUST have their properties sorted as well. 362 | 363 | 364 | JSON Array data MUST also be scanned for 365 | presence of JSON Objects (and applying associated property sorting), 366 | but array element order MUST NOT be changed. 367 | 368 | 369 | 370 | 371 | When a JSON Object is about to have its properties 372 | sorted, the following measures MUST be adhered to: 373 | 374 | 375 | The sorting process is applied to property name strings in their "raw" (unescaped) form. 376 | That is, a newline character is treated as U+000A. 377 | 378 | 379 | Property name strings to be sorted are formatted 380 | as arrays of UTF-16 code units. 381 | The sorting is based on pure value comparisons, where code units are treated as 382 | unsigned integers, independent of locale settings. 383 | 384 | 385 | Property name strings either have different values at some index that is 386 | a valid index for both strings, or their lengths are different, or both. 387 | If they have different values at one or more index 388 | positions, let k be the smallest such index; then the string whose 389 | value at position k has the smaller value, as determined by using 390 | the < operator, lexicographically precedes the other string. 391 | If there is no index position at which they differ, 392 | then the shorter string lexicographically precedes the longer string. 393 | 394 | In plain English this means that property names are sorted in ascending order like the following: 395 | 396 | 397 | 398 | 399 |
403 |
404 |
405 | 406 | The rationale for basing the sorting algorithm on UTF-16 code units is that 407 | it maps directly to the string type in ECMAScript (featured in Web browsers 408 | and Node.js), Java and .NET. 409 | Systems using another internal representation of string data will need to convert 410 | JSON property name strings into arrays of UTF-16 code units before sorting. 411 | The conversion from UTF-8 or UTF-32 to UTF-16 is defined by the 412 | Unicode standard. 413 | 414 | 415 | Note: for the purpose of obtaining a deterministic property order, sorting on 416 | UTF-8 or UTF-32 encoded data would also work, but the result would differ 417 | and thus be incompatible with this specification. 418 | However, in practice property names rarely go outside of 419 | 7-bit ASCII making it possible sorting on the UTF-8 byte level and 420 | still be compatible with JCS. If this is a viable option or not 421 | depends on the environment JCS is supposed to be used in. 422 | 423 |
424 |
425 | 426 | Finally, in order to create a platform independent representation, 427 | the result of the preceding step MUST be encoded in UTF-8. 428 | 429 | 430 | Applied to the sample in this 431 | should yield the following bytes here shown in hexadecimal notation: 432 | 433 | 434 |
440 |
441 | 442 | This data is intended to be usable as input to cryptographic methods. 443 | 444 |
445 |
446 |
447 | 448 |
449 | 450 | This document has no IANA actions. 451 | 452 |
453 | 454 |
455 | 456 | It is vital performing "sanity" checks 457 | on input data to avoid overflowing buffers and similar things that 458 | could affect the integrity of the system. 459 | 460 | 461 | When JCS is applied to signature schemes like the one in , 462 | applications MUST perform the following operations before acting 463 | upon received data: 464 | 465 | 466 | Parse the JSON data 467 | 468 | 469 | Verify the data for correctness 470 | 471 | 472 | Verify the signature 473 | 474 | 475 | 476 |
477 | 478 |
479 | 480 | Building on ES6 Number serialization was 481 | originally proposed by James Manger. This ultimately led to the 482 | adoption of the entire ES6 serialization scheme for JSON primitives. 483 | 484 | 485 | Other people who have contributed with valuable input to this specification include 486 | Scott Ananian, 487 | Ben Campbell, 488 | Richard Gibson, 489 | Bron Gondwana, 490 | John-Mark Gurney, 491 | Mike Jones, 492 | Mike Miller, 493 | Mark Nottingham, 494 | Mike Samuel, 495 | Jim Schaad, 496 | Robert Tupelo-Schneck 497 | and Michal Wadas. 498 | 499 | 500 | For carrying out real world concept verification, the software and 501 | support for number serialization provided by 502 | Ulf Adams, 503 | Tanner Gooding 504 | and Remy Oudompheng 505 | was very helpful. 506 | 507 |
508 |
509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | ECMAScript 2015 Language Specification 520 | 521 | Ecma International 522 | 523 | 524 | 525 | 526 | 527 | 528 | IEEE Standard for Floating-Point Arithmetic 529 | 530 | IEEE 531 | 532 | 533 | 534 | 535 | 536 | 537 | The Unicode Standard, Version 10.0.0 538 | 539 | The Unicode Consortium 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | "Comparable" JSON - Work in progress 552 | 553 | A. Rundgren 554 | 555 | 556 | 557 | 558 | 559 | 560 | Chrome V8 Open Source JavaScript Engine 561 | 562 | Google LLC 563 | 564 | 565 | 566 | 567 | 568 | 569 | Ryu floating point number serializing algorithm 570 | 571 | Ulf Adams 572 | 573 | 574 | 575 | 576 | 577 | 578 | Node.js 579 | 580 | 581 | 582 | 583 | 584 | 585 | Keybase 586 | 587 | 588 | 589 | 590 | 591 | 592 | The OpenAPI Initiative 593 | 594 | 595 | 596 | 597 | 598 | 599 | XML Signature Syntax and Processing Version 1.1 600 | 601 | W3C 602 | 603 | 604 | 605 | 606 | 607 | 608 |
609 | 610 | Below is an example of a JCS canonicalizer for usage with ES6 based systems: 611 | 612 | 613 |
{ 639 | if (next) { 640 | buffer += ','; 641 | } 642 | next = true; 643 | ///////////////////////////////////////// 644 | // Array element - Recursive expansion // 645 | ///////////////////////////////////////// 646 | serialize(element); 647 | }); 648 | buffer += ']'; 649 | 650 | } else { 651 | ///////////////////////////////////////////////// 652 | // Object - Sort properties before serializing // 653 | ///////////////////////////////////////////////// 654 | buffer += '{'; 655 | let next = false; 656 | Object.keys(object).sort().forEach((property) => { 657 | if (next) { 658 | buffer += ','; 659 | } 660 | next = true; 661 | /////////////////////////////////////////////// 662 | // Property names are strings - Use ES6/JSON // 663 | /////////////////////////////////////////////// 664 | buffer += JSON.stringify(property); 665 | buffer += ':'; 666 | ////////////////////////////////////////// 667 | // Property value - Recursive expansion // 668 | ////////////////////////////////////////// 669 | serialize(object[property]); 670 | }); 671 | buffer += '}'; 672 | } 673 | } 674 | };]]>
675 |
676 |
677 | 678 |
679 | 680 | The following table holds a set of ES6 compatible Number serialization samples, 681 | including some edge cases. The column 682 | "IEEE&nbhy;754" refers to the internal 683 | ES6 representation of the Number data type which is based on the 684 | IEEE-754 standard using 64-bit (double precision) values, 685 | here expressed in hexadecimal. 686 | 687 | 688 |
741 |
742 | 743 | Notes: 744 | 745 | 746 | For maximum compliance with the ES6 JSON object 747 | values that are to be interpreted as true integers 748 | SHOULD be in the range -9007199254740991 to 9007199254740991. 749 | However, how numbers are used in applications do not affect the JCS algorithm. 750 | 751 | 752 | Although a set of specific integers like 2**68 could be regarded as having 753 | extended precision, the JCS/ES6 number serialization 754 | algorithm does not take this in consideration. 755 | 756 | 757 | Invalid. See . 758 | 759 | 760 | 761 |
762 | 763 |
764 | 765 | Since the result from the canonicalization process (see ), 766 | is fully valid JSON, it can also be used as "Wire Format". 767 | However, this is just an option since cryptographic schemes 768 | based on JCS, in most cases would not depend on that externally 769 | supplied JSON data already is canonicalized. 770 | 771 | 772 | In fact, the ES6 standard way of serializing objects using 773 | JSON.stringify() produces a 774 | more "logical" format, where properties are 775 | kept in the order they were created or received. The 776 | example below shows an address record which could benefit from 777 | ES6 standard serialization: 778 | 779 | 780 |
787 |
788 | 789 | Using canonicalization the properties above would be output in the order 790 | "address", "city", "name", "state" and "zip", which adds fuzziness 791 | to the data from a human (developer or technical support), perspective. 792 | Canonicalization also converts JSON data into a single line of text, which may 793 | be less than ideal for debugging and logging. 794 | 795 |
796 | 797 |
798 | 799 | There are several issues associated with the 800 | JSON Number type, here illustrated by the following 801 | sample object: 802 | 803 | 804 |
809 |
810 | 811 | Although the sample above conforms to JSON , 812 | applications would normally use different native data types for storing 813 | "giantNumber" and "int64Max". In addition, monetary data like "payMeThis" would 814 | presumably not rely on floating point data types due to rounding issues with respect 815 | to decimal arithmetic. 816 | 817 | 818 | The established way handling this kind of "overloading" of the 819 | JSON Number type (at least in an extensible manner), is through 820 | mapping mechanisms, instructing parsers what to do with different properties 821 | based on their name. However, this greatly limits the value of using the 822 | JSON Number type outside of its original somewhat constrained, JavaScript context. 823 | The ES6 JSON object does not support mappings to JSON Number either. 824 | 825 | 826 | Due to the above, numbers that do not have a natural place in the current 827 | JSON ecosystem MUST be wrapped using the JSON String type. This is close to 828 | a de-facto standard for open systems. This is also applicable for 829 | other data types that do not have direct support in JSON, like "DateTime" 830 | objects as described in . 831 | 832 | 833 | Aided by a system using the JSON String type; be it programmatic like 834 | 835 | 836 |
837 |
839 |
840 | 841 | or declarative schemes like OpenAPI , 842 | JCS imposes no limits on applications, including when using ES6. 843 | 844 |
845 | 846 |
847 | 848 | Due to the limited set of data types featured in JSON, 849 | the JSON String type is commonly used for holding subtypes. 850 | This can depending on JSON parsing method lead to 851 | interoperability problems which MUST be dealt with by 852 | JCS compliant applications targeting a wider audience. 853 | 854 | 855 | Assume you want to parse a JSON object where the schema 856 | designer assigned the property "big" for holding a "BigInteger" subtype and 857 | "time" for holding a "DateTime" subtype, while "val" is supposed to be a JSON Number 858 | compliant with JCS. The following example shows such an object: 859 | 860 | 861 |
866 |
867 | Parsing of this object can accomplished by the following ES6 statement: 868 | 869 |
870 |
871 | After parsing the actual data can be extracted which for subtypes also involve a conversion 872 | step using the result of the parsing process (an ECMAScript object) as input: 873 | 874 |
877 |
878 | Canonicalization of "object" using the sample code in would return the 879 | following string: 880 | 881 |
882 |
883 | 884 | Although this is (with respect to JCS) technically correct, there is another way parsing JSON data 885 | which also can be used with ES6 as shown below: 886 | 887 | 888 |
k == 'time' ? new Date(v) : k == 'big' ? BigInt(v) : v 896 | );]]>
897 |
898 | 899 | If you now apply the canonicalizer in to "object", the 900 | following string would be generated: 901 | 902 | 903 |
904 |
905 | 906 | In this case the string arguments for "big" and "time" have changed with respect to the original, 907 | presumable making an application depending on JCS fail. 908 | 909 | 910 | The reason for the deviation is that in stream and schema based JSON parsers, 911 | the original "string" argument is typically replaced on-the-fly 912 | by the native subtype which when serialized, may exhibit a different 913 | and platform dependent pattern. 914 | 915 | 916 | That is, stream and schema based parsing MUST treat subtypes as "pure" (immutable) JSON String types, 917 | and perform the actual conversion to the designated native type in a subsequent step. 918 | In modern programming platforms like Go, Java and C# this can be achieved with 919 | moderate efforts by combining annotations, getters and setters. 920 | Below is an example in C#/Json.NET showing a part of a class that is serializable 921 | as a JSON Object: 922 | 923 | 924 |
935 |
936 | 937 | In an application "Amount" can be accessed as any other property 938 | while it is actually represented by a quoted string in JSON contexts. 939 | 940 | 941 | Note: the example above also addresses the constraints on numeric data 942 | implied by I-JSON (the C# "decimal" data type has quite different 943 | characteristics compared to IEEE-754 double precision). 944 | 945 |
946 | 947 | Since the JSON Array construct permits mixing arbitrary JSON elements, 948 | custom parsing and serialization code must normally 949 | be used to cope with subtypes anyway. 950 | 951 |
952 |
953 | 954 |
955 | 956 | The optimal solution is integrating support for JCS directly 957 | in JSON serializers (parsers need no changes). 958 | That is, canonicalization would just be an additional "mode" 959 | for a JSON serializer. However, this is currently not the case. 960 | Fortunately JCS support can be performed through externally supplied 961 | canonicalizer software, enabling signature creation schemes like the following: 962 | 963 | 964 | Create the data to be signed. 965 | 966 | 967 | Serialize the data using existing JSON tools. 968 | 969 | 970 | Let the external canonicalizer process the serialized data and return canonicalized result data. 971 | 972 | 973 | Sign the canonicalized data. 974 | 975 | 976 | Add the resulting signature value to the original JSON data through a designated signature property. 977 | 978 | 979 | Serialize the completed (now signed) JSON object using existing JSON tools. 980 | 981 | 982 | A compatible signature verification scheme would then be as follows: 983 | 984 | 985 | Parse the signed JSON data using existing JSON tools. 986 | 987 | 988 | Read and save the signature value from the designated signature property. 989 | 990 | 991 | Remove the signature property from the parsed JSON object. 992 | 993 | 994 | Serialize the remaining JSON data using existing JSON tools. 995 | 996 | 997 | Let the external canonicalizer process the serialized data and return canonicalized result data. 998 | 999 | 1000 | Verify that the canonicalized data matches the saved signature value 1001 | using the algorithm and key used for creating the signature. 1002 | 1003 | 1004 | 1005 | 1006 | A canonicalizer like above is effectively only a "filter", potentially usable with 1007 | a multitude of quite different cryptographic schemes. 1008 | 1009 | 1010 | Using a JSON serializer with integrated JCS support, the serialization performed 1011 | before the canonicalization step could be eliminated for both processes. 1012 | 1013 |
1014 | 1015 |
1016 | 1017 | The following Open Source implementations have been verified to be 1018 | compatible with JCS: 1019 | 1020 | 1021 | JavaScript: https://www.npmjs.com/package/canonicalize 1022 | 1023 | 1024 | Java: https://github.com/erdtman/java-json-canonicalization 1025 | 1026 | 1027 | Go: https://github.com/cyberphone/json-canonicalization/tree/master/go 1028 | 1029 | 1030 | .NET/C#: https://github.com/cyberphone/json-canonicalization/tree/master/dotnet 1031 | 1032 | 1033 | Python: https://github.com/cyberphone/json-canonicalization/tree/master/python3 1034 | 1035 | 1036 | 1037 |
1038 | 1039 |
1040 | 1041 | There are (and have been) other efforts creating "Canonical JSON". 1042 | Below is a list of URLs to some of them: 1043 | 1044 | 1045 | https://tools.ietf.org/html/draft-staykov-hu-json-canonical-form-00 1046 | 1047 | 1048 | https://gibson042.github.io/canonicaljson-spec/ 1049 | 1050 | 1051 | http://wiki.laptop.org/go/Canonical_JSON 1052 | 1053 | 1054 | In contrast to JCS which is a serialization scheme, the listed efforts build on text level JSON to JSON 1055 | transformations. 1056 | 1057 |
1058 | 1059 |
1060 | 1061 | The JCS specification is currently developed at: 1062 | https://github.com/cyberphone/ietf-json-canon. 1063 | 1064 | 1065 | The most recent "editors' copy" can be found at: 1066 | https://cyberphone.github.io/ietf-json-canon. 1067 | 1068 | 1069 | JCS source code and test data is available at: 1070 | https://github.com/cyberphone/json-canonicalization 1071 | 1072 |
1073 |
1074 |
1075 | -------------------------------------------------------------------------------- /xmlv3/draft-rundgren-json-canonicalization-scheme.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 21 | 22 | 23 | JSON Canonicalization Scheme (JCS) 24 | 25 | 26 | 27 | Independent 28 |
29 | 30 | Montpellier 31 | France 32 | 33 | anders.rundgren.net@gmail.com 34 | https://www.linkedin.com/in/andersrundgren/ 35 |
36 |
37 | 38 | Broadcom 39 |
40 | 41 | 1320 Ridder Park Drive 42 | CA 43 | 95131 44 | San Jose 45 | USA 46 | 47 | bret.jordan@broadcom.com 48 |
49 |
50 | 51 | Spotify AB 52 |
53 | 54 | Birger Jarlsgatan 61, 4tr 55 | 113 56 56 | Stockholm 57 | Sweden 58 | 59 | erdtman@spotify.com 60 |
61 |
62 | 63 | Security 64 | 65 | 66 | JSON, ECMAScript, Signatures, Cryptography, Canonicalization 67 | 68 | 69 | 70 | Cryptographic operations like hashing and signing need the data to be 71 | expressed in an invariant format so that the operations are reliably 72 | repeatable. 73 | 74 | One way to address this is to create a canonical representation of 75 | the data. Canonicalization also permits data to be exchanged in its 76 | original form on the "wire" while cryptographic operations 77 | performed on the canonicalized counterpart of the data in the 78 | producer and consumer end points, generate consistent results. 79 | 80 | 81 | This document describes the JSON Canonicalization Scheme (JCS). 82 | The JCS specification defines how to create a canonical representation 83 | of JSON data by building on the strict serialization methods for 84 | JSON primitives defined by ECMAScript, constraining JSON data to 85 | the I-JSON subset, and by using deterministic property sorting. 86 | 87 | 88 |
89 | 90 |
91 | Introduction 92 | 93 | This document describes the JSON Canonicalization Scheme (JCS). 94 | The JCS specification defines how to create a canonical representation 95 | of JSON data by building 96 | on the strict serialization methods for 97 | JSON primitives defined by ECMAScript , 98 | constraining JSON data to the I-JSON 99 | subset, and by using deterministic property sorting. The output from JCS is a 100 | "Hashable" representation of JSON data that can be used by cryptographic methods. 101 | The subsequent paragraphs outline the primary design considerations. 102 | 103 | 104 | Cryptographic operations like hashing and signing need the data to be 105 | expressed in an invariant format so that the operations are reliably 106 | repeatable. 107 | One way to accomplish this is to convert the data into 108 | a format that has a simple and fixed representation, 109 | like Base64Url . 110 | This is how JWS addressed this issue. 111 | Another solution is to create a canonical version of the data, 112 | similar to what was done for the XML Signature standard. 113 | 114 | 115 | The primary advantage with a canonicalizing scheme is that data 116 | can be kept in its original form. This is the core rationale behind JCS. 117 | Put another way, using canonicalization enables a JSON Object to remain a JSON Object 118 | even after being signed. This can simplify system design, documentation, and logging. 119 | 120 | 121 | To avoid "reinventing the wheel", JCS relies on the serialization of JSON primitives 122 | (strings, numbers and literals), as defined by 123 | ECMAScript (aka JavaScript) beginning with version 6 , 124 | hereafter referred to as "ES6". 125 | 126 | 127 | Seasoned XML developers may recall difficulties getting XML signatures 128 | to validate. This was usually due to different interpretations of the quite intricate 129 | XML canonicalization rules as well as of the equally complex 130 | Web Services security standards. 131 | The reasons why JCS should not suffer from similar issues are: 132 | 133 |
134 |
o
135 |
136 | The absence of a namespace concept and default values. 137 |
138 |
o
139 |
140 | Constraining data to the I‑JSON subset. 141 | This eliminates the need for specific parsers for dealing with canonicalization. 142 |
143 |
o
144 |
145 | JCS compatible serialization of JSON primitives is currently supported 146 | by most Web browsers as well as by Node.js , 147 |
148 |
o
149 |
150 | The full JCS specification is currently supported by multiple 151 | Open Source implementations (see ). 152 | See also for implementation 153 | guidelines. 154 |
155 |
156 | 157 | JCS is compatible with some existing systems relying on JSON canonicalization 158 | such as JWK Thumbprint and Keybase . 159 | 160 | 161 | For potential uses outside of cryptography see . 162 | 163 | 164 | The intended audiences of this document are JSON tool vendors, as 165 | well as designers of JSON based cryptographic solutions. 166 | The reader is assumed to be knowledgeable in ECMAScript including the JSON object. 167 | 168 |
169 |
170 | Terminology 171 | 172 | Note that this document is not on the IETF standards track. However, a conformant 173 | implementation is supposed to adhere to the specified behavior for 174 | security and interoperability reasons. This text uses BCP 14 to 175 | describe that necessary behavior. 176 | 177 | 178 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL 179 | NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", 180 | "MAY", and "OPTIONAL" in this document are to be interpreted as 181 | described in BCP 14 182 | when, and only when, they appear in all capitals, as shown here. 183 | 184 |
185 |
186 | Detailed Operation 187 | 188 | This section describes the details related to creating 189 | a canonical JSON representation, and how they are addressed by JCS. 190 | 191 | 192 | describes 193 | the RECOMMENDED way of adding JCS support to existing JSON tools. 194 | 195 |
196 | Creation of Input Data 197 | 198 | Data to be canonically serialized is usually created by: 199 | 200 |
201 |
o
202 |
203 | Parsing previously generated JSON data. 204 |
205 |
o
206 |
207 | Programmatically creating data. 208 |
209 |
210 | 211 | Irrespective of the method used, the data to be serialized MUST be adapted 212 | for I‑JSON formatting, which implies the following: 213 | 214 |
215 |
o
216 |
217 | JSON Objects MUST NOT exhibit duplicate property names. 218 |
219 |
o
220 |
221 | JSON String data MUST be expressible 222 | as Unicode . 223 |
224 |
o
225 |
226 | JSON Number data MUST be expressible 227 | as IEEE-754 double precision values. 228 | For applications needing higher precision or longer integers than 229 | offered by IEEE-754 double precision, it is RECOMMENDED to represent such 230 | numbers as JSON Strings, see for 231 | details on how this can be performed in an interoperable and extensible way. 232 |
233 |
234 | 235 | An additional constraint is that parsed JSON String data MUST NOT be altered during subsequent serializations. 236 | For more information see . 237 | 238 | 239 | Note: although the Unicode standard offers the possibility of rearranging 240 | certain character sequences, referred to as "Unicode Normalization" 241 | (), 242 | JCS compliant string processing does not take this in consideration. 243 | That is, all components involved in a scheme depending on JCS, 244 | MUST preserve Unicode string data "as is". 245 | 246 |
247 |
248 | Generation of Canonical JSON Data 249 | 250 | The following subsections describe the steps required to create a canonical 251 | JSON representation of the data elaborated on in the previous section. 252 | 253 | 254 | shows sample code for an ES6 based canonicalizer, 255 | matching the JCS specification. 256 | 257 |
258 | Whitespace 259 | 260 | Whitespace between JSON tokens MUST NOT be emitted. 261 | 262 |
263 |
264 | Serialization of Primitive Data Types 265 | 266 | Assume a JSON object as follows is parsed: 267 | 268 | 274 | 275 | If the parsed data is subsequently serialized 276 | using a serializer compliant with ES6's JSON.stringify(), 277 | the result would (with a line wrap added for display purposes only), 278 | be rather divergent with respect to the original data: 279 | 280 | 282 | 283 | The reason for the difference between the parsed data and its 284 | serialized counterpart, is due to a wide tolerance on input data (as defined 285 | by JSON ), while output data (as defined by ES6), 286 | has a fixed representation. As can be seen in the example, 287 | numbers are subject to rounding as well. 288 | 289 | 290 | The following subsections describe the serialization of primitive JSON data types 291 | according to JCS. This part is identical to that of ES6. 292 | In the (unlikely) event that a future version of ECMAScript would 293 | invalidate any of the following serialization methods, it will be 294 | up to the developer community to 295 | either stick to this specification or create a new specification. 296 | 297 |
298 | Serialization of Literals 299 | 300 | In accordance with JSON , 301 | the literals "null", "true", and 302 | "false" MUST be serialized as null, true, and false respectively. 303 | 304 |
305 |
306 | Serialization of Strings 307 | 308 | For JSON String data (which includes 309 | JSON Object property names as well), each Unicode code point MUST be serialized as 310 | described below (see section 24.3.2.2 of ): 311 | 312 |
313 |
o
314 |
315 | If the Unicode value falls within the traditional ASCII control 316 | character range (U+0000 through U+001F), it MUST 317 | be serialized using lowercase hexadecimal Unicode notation (\uhhhh) unless it is in the 318 | set of predefined JSON control characters U+0008, U+0009, U+000A, U+000C or U+000D 319 | which MUST be serialized as \b, \t, \n, \f and \r respectively. 320 |
321 |
o
322 | If the Unicode value is outside of the ASCII control character range, it MUST 323 | be serialized "as is" unless it is equivalent to 324 | U+005C (\) or U+0022 (") which MUST be serialized as \\ and \" respectively. 325 |
326 |
327 | 328 | Finally, the resulting sequence of Unicode code points MUST be enclosed in double quotes ("). 329 | 330 | 331 | Note: since invalid Unicode data like "lone surrogates" (e.g. U+DEAD) 332 | may lead to interoperability issues including broken signatures, 333 | occurrences of such data MUST cause a compliant JCS implementation to terminate 334 | with an appropriate error. 335 | 336 |
337 |
338 | Serialization of Numbers 339 | 340 | ES6 builds on the IEEE-754 double precision 341 | standard for representing JSON Number data. 342 | Such data MUST be serialized according to section 7.1.12.1 of 343 | including the "Note 2" enhancement. 344 | 345 | 346 | Due to the relative complexity of this part, the algorithm 347 | itself is not included in this document. 348 | For implementers of JCS compliant number serialization, 349 | Google's implementation in V8 may serve as a reference. 350 | Another compatible number serialization reference implementation 351 | is Ryu , 352 | that is used by the JCS open source Java implementation 353 | mentioned in . 354 | holds a set of IEEE-754 sample values and their 355 | corresponding JSON serialization. 356 | 357 | 358 | Note: since NaN (Not a Number) and Infinity are not permitted in JSON, 359 | occurrences of NaN or Infinity MUST cause a compliant JCS implementation to terminate 360 | with an appropriate error. 361 | 362 |
363 |
364 |
365 | Sorting of Object Properties 366 | 367 | Although the previous step normalized the representation of primitive 368 | JSON data types, the result would not yet qualify as "canonical" since JSON 369 | Object properties are not in lexicographic (alphabetical) order. 370 | 371 | 372 | Applied to the sample in , 373 | a properly canonicalized version should (with a 374 | line wrap added for display purposes only), read as: 375 | 376 | 378 | 379 | The rules for lexicographic sorting of JSON Object 380 | properties according to JCS are as follows: 381 | 382 |
383 |
o
384 |
385 | JSON Object properties MUST be sorted recursively, 386 | which means that JSON child Objects 387 | MUST have their properties sorted as well. 388 |
389 |
o
390 |
391 | JSON Array data MUST also be scanned for the 392 | presence of JSON Objects (if an object is found then its properties MUST be sorted), 393 | but array element order MUST NOT be changed. 394 |
395 |
396 | 397 | When a JSON Object is about to have its properties 398 | sorted, the following measures MUST be adhered to: 399 | 400 |
401 |
o
402 |
403 | The sorting process is applied to property name strings in their "raw" (unescaped) form. 404 | That is, a newline character is treated as U+000A. 405 |
406 |
o
407 |
408 | Property name strings to be sorted are formatted 409 | as arrays of UTF-16 code units. 410 | The sorting is based on pure value comparisons, where code units are treated as 411 | unsigned integers, independent of locale settings. 412 |
413 |
o
414 |
415 | 416 | Property name strings either have different values at some index that is 417 | a valid index for both strings, or their lengths are different, or both. 418 | If they have different values at one or more index 419 | positions, let k be the smallest such index; then the string whose 420 | value at position k has the smaller value, as determined by using 421 | the < operator, lexicographically precedes the other string. 422 | If there is no index position at which they differ, 423 | then the shorter string lexicographically precedes the longer string. 424 | 425 | 426 | In plain English this means that property names are sorted in ascending order like the following: 427 | 428 |
429 |
430 | 434 | 435 | The rationale for basing the sorting algorithm on UTF-16 code units is that 436 | it maps directly to the string type in ECMAScript (featured in Web browsers 437 | and Node.js), Java and .NET. In addition, JSON only supports escape sequences 438 | expressed as UTF-16 code units making knowledge and handling of such data 439 | a necessity anyway. 440 | Systems using another internal representation of string data will need to convert 441 | JSON property name strings into arrays of UTF-16 code units before sorting. 442 | The conversion from UTF-8 or UTF-32 to UTF-16 is defined by the 443 | Unicode standard. 444 | 445 | 446 | The following test data can be used for verifying the correctness of the sorting 447 | scheme in a JCS implementation. JSON test data: 448 | 449 | 458 | 459 | Expected argument order after sorting property strings: 460 | 461 | 468 | 469 | Note: for the purpose of obtaining a deterministic property order, sorting on 470 | UTF-8 or UTF-32 encoded data would also work, but the outcome for JSON data 471 | like above would differ and thus be incompatible with this specification. 472 | However, in practice, property names are rarely defined outside of 7-bit ASCII making 473 | it possible to sort on string data in UTF-8 or UTF-32 format without conversions 474 | to UTF-16 and still be compatible with JCS. If this is a viable option or not 475 | depends on the environment JCS is used in. 476 | 477 |
478 |
479 | UTF-8 Generation 480 | 481 | Finally, in order to create a platform independent representation, 482 | the result of the preceding step MUST be encoded in UTF-8. 483 | 484 | 485 | Applied to the sample in this 486 | should yield the following bytes, here shown in hexadecimal notation: 487 | 488 | 494 | 495 | This data is intended to be usable as input to cryptographic methods. 496 | 497 |
498 |
499 |
500 |
501 | IANA Considerations 502 | 503 | This document has no IANA actions. 504 | 505 |
506 |
507 | Security Considerations 508 | 509 | It is crucial to perform sanity checks on input data to avoid 510 | overflowing buffers and similar things that could affect the 511 | integrity of the system. 512 | 513 | 514 | When JCS is applied to signature schemes like the one described 515 | in , 516 | applications MUST perform the following operations before acting 517 | upon received data: 518 | 519 |
    520 |
  1. 521 | Parse the JSON data and verify that it adheres to I-JSON. 522 |
  2. 523 |
  3. 524 | Verify the data for correctness according to the conventions defined by the 525 | ecosystem where it is to be used. This also includes locating the 526 | property holding the signature data. 527 |
  4. 528 |
  5. 529 | Verify the signature. 530 |
  6. 531 |
532 | 533 | If any of these steps fail, the operation in progress MUST be aborted. 534 | 535 |
536 |
537 | Acknowledgements 538 | 539 | Building on ES6 Number serialization was 540 | originally proposed by James Manger. This ultimately led to the 541 | adoption of the entire ES6 serialization scheme for JSON primitives. 542 | 543 | 544 | Other people who have contributed with valuable input to this specification include 545 | Scott Ananian, 546 | Tim Bray, 547 | Ben Campbell, 548 | Adrian Farell, 549 | Richard Gibson, 550 | Bron Gondwana, 551 | John-Mark Gurney, 552 | John Levine, 553 | Mark Miller, 554 | Matthew Miller, 555 | Mike Jones, 556 | Mark Nottingham, 557 | Mike Samuel, 558 | Jim Schaad, 559 | Robert Tupelo-Schneck 560 | and Michal Wadas. 561 | 562 | 563 | For carrying out real world concept verification, the software and 564 | support for number serialization provided by 565 | Ulf Adams, 566 | Tanner Gooding 567 | and Remy Oudompheng 568 | was very helpful. 569 | 570 |
571 |
572 | 573 | 574 | References 575 | 576 | Normative References 577 | 578 | 579 | Key words for use in RFCs to Indicate Requirement Levels 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | In many standards track documents several words are used to signify 590 | the requirements in the specification. These words are often capitalized. 591 | This document defines these words as they should be interpreted in IETF 592 | documents. This document specifies an Internet Best Current Practices for 593 | the Internet Community, and requests discussion and suggestions for improvements. 594 | 595 | 596 | 597 | 598 | 599 | 600 | The JavaScript Object Notation (JSON) Data Interchange Format 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | JavaScript Object Notation (JSON) is a lightweight, text-based, 611 | language-independent data interchange format. It was derived from the 612 | ECMAScript Programming Language Standard. JSON defines a small set of 613 | formatting rules for the portable representation of structured data. 614 | 615 | 616 | This document removes inconsistencies with other specifications of 617 | JSON, repairs specification errors, and offers experience-based 618 | interoperability guidance. 619 | 620 | 621 | 622 | 623 | 624 | 625 | Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | RFC 2119 specifies common key words that may be used in protocol 636 | specifications. This document aims to reduce the ambiguity by 637 | clarifying that only UPPERCASE usage of the key words have the 638 | defined special meanings. 639 | 640 | 641 | 642 | 643 | 644 | 645 | The I-JSON Message Format 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | I-JSON (short for "Internet JSON") is a restricted profile of 655 | JSON designed to maximize interoperability and increase confidence 656 | that software can process it successfully with predictable results. 657 | 658 | 659 | 660 | 661 | 662 | 663 | ECMAScript 2015 Language Specification 664 | 665 | Ecma International 666 | 667 | 668 | 669 | 670 | 671 | 672 | IEEE Standard for Floating-Point Arithmetic 673 | 674 | IEEE 675 | 676 | 677 | 678 | 679 | 680 | 681 | The Unicode Standard, Version 12.1.0 682 | 683 | The Unicode Consortium 684 | 685 | 686 | 687 | 688 | 689 | 690 | Informative References 691 | 692 | 693 | JSON Web Key (JWK) Thumbprint 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | This specification defines a method for computing a hash value 706 | over a JSON Web Key (JWK). It defines which fields in a JWK are used 707 | in the hash computation, the method of creating a canonical form for 708 | those fields, and how to convert the resulting Unicode string into a 709 | byte sequence to be hashed. The resulting hash value can be used for 710 | identifying or selecting the key represented by the JWK that is the 711 | subject of the thumbprint. 712 | 713 | 714 | 715 | 716 | 717 | 718 | The Base16, Base32, and Base64 Data Encodings 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | This document describes the commonly used base 64, base 32, and base 16 728 | encoding schemes. It also discusses the use of line-feeds in encoded data, 729 | use of padding in encoded data, use of non-alphabet characters in encoded data, 730 | use of different encoding alphabets, and canonical encodings. [STANDARDS-TRACK] 731 | 732 | 733 | 734 | 735 | 736 | 737 | JSON Web Signature (JWS) 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | JSON Web Signature (JWS) represents content secured with digital 753 | signatures or Message Authentication Codes (MACs) using JSON-based 754 | data structures. Cryptographic algorithms and identifiers for use 755 | with this specification are described in the separate 756 | JSON Web Algorithms (JWA) specification and an IANA registry defined 757 | by that specification. Related encryption capabilities are described 758 | in the separate JSON Web Encryption (JWE) specification. 759 | 760 | 761 | 762 | 763 | 764 | 765 | "Comparable" JSON - Work in progress 766 | 767 | A. Rundgren 768 | 769 | 770 | 771 | 772 | 773 | Chrome V8 Open Source JavaScript Engine 774 | 775 | Google LLC 776 | 777 | 778 | 779 | 780 | 781 | Ryu floating point number serializing algorithm 782 | 783 | Ulf Adams 784 | 785 | 786 | 787 | 788 | 789 | Node.js 790 | 791 | 792 | 793 | 794 | 795 | Keybase 796 | 797 | 798 | 799 | 800 | 801 | The OpenAPI Initiative 802 | 803 | 804 | 805 | 806 | 807 | XML Signature Syntax and Processing Version 1.1 808 | 809 | W3C 810 | 811 | 812 | 813 | 814 | 815 |
816 | ES6 Sample Canonicalizer 817 | 818 | Below is an example of a JCS canonicalizer for usage with ES6 based systems: 819 | 820 | { 846 | if (next) { 847 | buffer += ','; 848 | } 849 | next = true; 850 | ///////////////////////////////////////// 851 | // Array element - Recursive expansion // 852 | ///////////////////////////////////////// 853 | serialize(element); 854 | }); 855 | buffer += ']'; 856 | 857 | } else { 858 | ///////////////////////////////////////////////// 859 | // Object - Sort properties before serializing // 860 | ///////////////////////////////////////////////// 861 | buffer += '{'; 862 | let next = false; 863 | Object.keys(object).sort().forEach((property) => { 864 | if (next) { 865 | buffer += ','; 866 | } 867 | next = true; 868 | /////////////////////////////////////////////// 869 | // Property names are strings - Use ES6/JSON // 870 | /////////////////////////////////////////////// 871 | buffer += JSON.stringify(property); 872 | buffer += ':'; 873 | ////////////////////////////////////////// 874 | // Property value - Recursive expansion // 875 | ////////////////////////////////////////// 876 | serialize(object[property]); 877 | }); 878 | buffer += '}'; 879 | } 880 | } 881 | };]]> 882 |
883 |
884 | Number Serialization Samples 885 | 886 | The following table holds a set of ES6 compatible Number serialization samples, 887 | including some edge cases. The column 888 | "IEEE‑754" refers to the internal 889 | ES6 representation of the Number data type which is based on the 890 | IEEE-754 standard using 64-bit (double precision) values, 891 | here expressed in hexadecimal. 892 | 893 | 948 | 949 | 950 | Notes: 951 | 952 |
    953 |
  1. 954 | For maximum compliance with the ES6 JSON object, 955 | values that are to be interpreted as true integers 956 | SHOULD be in the range -9007199254740991 to 9007199254740991. 957 | However, how numbers are used in applications do not affect the JCS algorithm. 958 |
  2. 959 |
  3. 960 | Although a set of specific integers like 2**68 could be regarded as having 961 | extended precision, the JCS/ES6 number serialization 962 | algorithm does not take this in consideration. 963 |
  4. 964 |
  5. 965 | Value out range, not permitted in JSON. 966 | See . 967 |
  6. 968 |
  7. 969 | This number is exactly 1424953923781206.25 but will after the "Note 2" rule 970 | mentioned in be truncated and 971 | rounded to the closest even value. 972 |
  8. 973 |
974 | 975 | For a more exhaustive validation of a JCS number serializer, you may test 976 | against a file (currently) available in the development portal 977 | (see ), 978 | containing a large set of sample values. Another option 979 | is running V8 980 | as a live reference together with a program generating a 981 | substantial amount of random IEEE-754 values. 982 | 983 |
984 |
985 | Canonicalized JSON as "Wire Format" 986 | 987 | Since the result from the canonicalization process (see ), 988 | is fully valid JSON, it can also be used as "Wire Format". 989 | However, this is just an option since cryptographic schemes 990 | based on JCS, in most cases would not depend on that externally 991 | supplied JSON data already is canonicalized. 992 | 993 | 994 | In fact, the ES6 standard way of serializing objects using 995 | JSON.stringify() produces a 996 | more "logical" format, where properties are 997 | kept in the order they were created or received. The 998 | example below shows an address record which could benefit from 999 | ES6 standard serialization: 1000 | 1001 | 1008 | 1009 | Using canonicalization the properties above would be output in the order 1010 | "address", "city", "name", "state" and "zip", which adds fuzziness 1011 | to the data from a human (developer or technical support), perspective. 1012 | Canonicalization also converts JSON data into a single line of text, which may 1013 | be less than ideal for debugging and logging. 1014 | 1015 |
1016 |
1017 | Dealing with Big Numbers 1018 | 1019 | There are several issues associated with the 1020 | JSON Number type, here illustrated by the following 1021 | sample object: 1022 | 1023 | 1028 | 1029 | Although the sample above conforms to JSON , 1030 | applications would normally use different native data types for storing 1031 | "giantNumber" and "int64Max". In addition, monetary data like "payMeThis" would 1032 | presumably not rely on floating point data types due to rounding issues with respect 1033 | to decimal arithmetic. 1034 | 1035 | 1036 | The established way handling this kind of "overloading" of the 1037 | JSON Number type (at least in an extensible manner), is through 1038 | mapping mechanisms, instructing parsers what to do with different properties 1039 | based on their name. However, this greatly limits the value of using the 1040 | JSON Number type outside of its original somewhat constrained, JavaScript context. 1041 | The ES6 JSON object does not support mappings to JSON Number either. 1042 | 1043 | 1044 | Due to the above, numbers that do not have a natural place in the current 1045 | JSON ecosystem MUST be wrapped using the JSON String type. This is close to 1046 | a de-facto standard for open systems. This is also applicable for 1047 | other data types that do not have direct support in JSON, like DateTime 1048 | objects as described in . 1049 | 1050 | 1051 | Aided by a system using the JSON String type; be it programmatic like 1052 | 1053 | 1055 | 1056 | or declarative schemes like OpenAPI , 1057 | JCS imposes no limits on applications, including when using ES6. 1058 | 1059 |
1060 |
1061 | String Subtype Handling 1062 | 1063 | Due to the limited set of data types featured in JSON, 1064 | the JSON String type is commonly used for holding subtypes. 1065 | This can depending on JSON parsing method lead to 1066 | interoperability problems which MUST be dealt with by 1067 | JCS compliant applications targeting a wider audience. 1068 | 1069 | 1070 | Assume you want to parse a JSON object where the schema 1071 | designer assigned the property "big" for holding a BigInt subtype and 1072 | "time" for holding a DateTime subtype, while "val" is supposed to be a JSON Number 1073 | compliant with JCS. The following example shows such an object: 1074 | 1075 | 1080 | Parsing of this object can accomplished by the following ES6 statement: 1081 | 1082 | After parsing the actual data can be extracted which for subtypes also involve a conversion 1083 | step using the result of the parsing process (an ECMAScript object) as input: 1084 | 1087 | 1088 | Note that the BigInt data type is currently only natively supported by V8 . 1089 | 1090 | 1091 | Canonicalization of "object" using the sample code in would return the 1092 | following string: 1093 | 1094 | 1095 | 1096 | Although this is (with respect to JCS) technically correct, there is another way parsing JSON data 1097 | which also can be used with ECMAScript as shown below: 1098 | 1099 | k == 'time' ? new Date(v) : k == 'big' ? BigInt(v) : v 1107 | );]]> 1108 | 1109 | If you now apply the canonicalizer in to "object", the 1110 | following string would be generated: 1111 | 1112 | 1113 | 1114 | In this case the string arguments for "big" and "time" have changed with respect to the original, 1115 | presumable making an application depending on JCS fail. 1116 | 1117 | 1118 | The reason for the deviation is that in stream and schema based JSON parsers, 1119 | the original "string" argument is typically replaced on-the-fly 1120 | by the native subtype which when serialized, may exhibit a different 1121 | and platform dependent pattern. 1122 | 1123 | 1124 | That is, stream and schema based parsing MUST treat subtypes as "pure" (immutable) JSON String types, 1125 | and perform the actual conversion to the designated native type in a subsequent step. 1126 | In modern programming platforms like Go, Java and C# this can be achieved with 1127 | moderate efforts by combining annotations, getters and setters. 1128 | Below is an example in C#/Json.NET showing a part of a class that is serializable 1129 | as a JSON Object: 1130 | 1131 | 1142 | 1143 | In an application "Amount" can be accessed as any other property 1144 | while it is actually represented by a quoted string in JSON contexts. 1145 | 1146 | 1147 | Note: the example above also addresses the constraints on numeric data 1148 | implied by I-JSON (the C# "decimal" data type has quite different 1149 | characteristics compared to IEEE-754 double precision). 1150 | 1151 |
1152 | Subtypes in Arrays 1153 | 1154 | Since the JSON Array construct permits mixing arbitrary JSON data types, 1155 | custom parsing and serialization code may be required 1156 | to cope with subtypes anyway. 1157 | 1158 |
1159 |
1160 |
1161 | Implementation Guidelines 1162 | 1163 | The optimal solution is integrating support for JCS directly 1164 | in JSON serializers (parsers need no changes). 1165 | That is, canonicalization would just be an additional "mode" 1166 | for a JSON serializer. However, this is currently not the case. 1167 | Fortunately, JCS support can be introduced through externally supplied 1168 | canonicalizer software acting as a post processor to existing 1169 | JSON serializers. This arrangement also relieves the JCS implementer from 1170 | having to deal with how underlying data is to be represented in JSON. 1171 | 1172 | 1173 | The post processor concept enables signature creation schemes like the following: 1174 | 1175 |
    1176 |
  1. 1177 | Create the data to be signed. 1178 |
  2. 1179 |
  3. 1180 | Serialize the data using existing JSON tools. 1181 |
  4. 1182 |
  5. 1183 | Let the external canonicalizer process the serialized data and return canonicalized result data. 1184 |
  6. 1185 |
  7. 1186 | Sign the canonicalized data. 1187 |
  8. 1188 |
  9. 1189 | Add the resulting signature value to the original JSON data through a designated signature property. 1190 |
  10. 1191 |
  11. 1192 | Serialize the completed (now signed) JSON object using existing JSON tools. 1193 |
  12. 1194 |
1195 | 1196 | A compatible signature verification scheme would then be as follows: 1197 | 1198 |
    1199 |
  1. 1200 | Parse the signed JSON data using existing JSON tools. 1201 |
  2. 1202 |
  3. 1203 | Read and save the signature value from the designated signature property. 1204 |
  4. 1205 |
  5. 1206 | Remove the signature property from the parsed JSON object. 1207 |
  6. 1208 |
  7. 1209 | Serialize the remaining JSON data using existing JSON tools. 1210 |
  8. 1211 |
  9. 1212 | Let the external canonicalizer process the serialized data and return canonicalized result data. 1213 |
  10. 1214 |
  11. 1215 | Verify that the canonicalized data matches the saved signature value 1216 | using the algorithm and key used for creating the signature. 1217 |
  12. 1218 |
1219 | 1220 | A canonicalizer like above is effectively only a "filter", potentially usable with 1221 | a multitude of quite different cryptographic schemes. 1222 | 1223 | 1224 | Using a JSON serializer with integrated JCS support, the serialization performed 1225 | before the canonicalization step could be eliminated for both processes. 1226 | 1227 |
1228 |
1229 | Open Source Implementations 1230 | 1231 | The following Open Source implementations have been verified to be 1232 | compatible with JCS: 1233 | 1234 |
    1235 |
  • 1236 | JavaScript: 1237 |
  • 1238 |
  • 1239 | Java: 1240 |
  • 1241 |
  • 1242 | Go: 1243 |
  • 1244 |
  • 1245 | .NET/C#: 1246 |
  • 1247 |
  • 1248 | Python: 1249 |
  • 1250 |
1251 |
1252 |
1253 | Other JSON Canonicalization Efforts 1254 | 1255 | There are (and have been) other efforts creating "Canonical JSON". 1256 | Below is a list of URLs to some of them: 1257 | 1258 |
    1259 |
  • 1260 | 1261 |
  • 1262 |
  • 1263 | 1264 |
  • 1265 |
  • 1266 | 1267 |
  • 1268 |
1269 | 1270 | The listed efforts all build 1271 | on text level JSON to JSON transformations. The primary feature 1272 | of text level canonicalization is that it can be made neutral to 1273 | the flavor of JSON used. However, such schemes also 1274 | imply major changes to the JSON parsing process which is a likely 1275 | hurdle for adoption. Albeit at the expense of certain JSON and 1276 | application constraints, 1277 | JCS was designed to be compatible with existing JSON tools. 1278 | 1279 |
1280 |
1281 | Development Portal 1282 | 1283 | The JCS specification is currently developed at: 1284 | . 1285 | 1286 | 1287 | JCS source code and extensive test data is available at: 1288 | 1289 | 1290 |
1291 |
1292 | Document History 1293 | 1294 | [[ This section to be removed by the RFC Editor before publication as 1295 | an RFC ]] 1296 | 1297 | Version 00-06: 1298 |
    1299 |
  • 1300 | See IETF diff listings. 1301 |
  • 1302 |
1303 | Version 07: 1304 |
    1305 |
  • 1306 | Initial converson to XML RFC version 3. 1307 |
  • 1308 |
  • 1309 | Changed intended status to "Informational". 1310 |
  • 1311 |
  • 1312 | Added UTF-16 test data and explanations. 1313 |
  • 1314 |
1315 | Version 08: 1316 |
    1317 |
  • 1318 | Updated Abstract. 1319 |
  • 1320 |
  • 1321 | Added a "Note 2" number serialization sample. 1322 |
  • 1323 |
  • 1324 | Updated Security Considerations. 1325 |
  • 1326 |
  • 1327 | Tried to clear up the JSON input data section. 1328 |
  • 1329 |
  • 1330 | Added a line about Unicode normalization. 1331 |
  • 1332 |
  • 1333 | Added a line about serialiation of structured data. 1334 |
  • 1335 |
  • 1336 | Added a missing fact about "BigInt" (V8 not ES6). 1337 |
  • 1338 |
1339 | Version 09: 1340 |
    1341 |
  • 1342 | Updated initial line of Abstract and Introduction. 1343 |
  • 1344 |
  • 1345 | Added note about breaking ECMAScript changes. 1346 |
  • 1347 |
  • 1348 | Minor language nit fixes. 1349 |
  • 1350 |
1351 | Version 10-12: 1352 |
    1353 |
  • 1354 | Language tweaks. 1355 |
  • 1356 |
1357 | Version 13: 1358 |
    1359 |
  • 1360 | Reorganized . 1361 |
  • 1362 |
1363 | Version 14: 1364 |
    1365 |
  • 1366 | Improved introduction + some minor changes in security considerations, aknowlegdgements, and 1367 | unicode normalization. 1368 |
  • 1369 |
  • 1370 | Generalized data representation issues by updating . 1371 |
  • 1372 |
1373 | Version 15: 1374 |
    1375 |
  • 1376 | Minor nits, reverted the IEEE-754 table to ASCII. 1377 |
  • 1378 |
  • 1379 | Added a bit more meat to the IEEE-754 table. 1380 |
  • 1381 |
  • 1382 | Changed all <artwork> to: type="ascii-art" and removed name="". 1383 |
  • 1384 |
1385 | Version 16: 1386 |
    1387 |
  • 1388 | Updated section 2 according to AD's wish. 1389 |
  • 1390 |
1391 | Version 17: 1392 |
    1393 |
  • 1394 | Updated section 2 after IESG input. 1395 |
  • 1396 |
  • 1397 | Author affiliation update. 1398 |
  • 1399 |
1400 |
1401 |
1402 |
1403 | --------------------------------------------------------------------------------