├── .gitattributes
├── README.md
├── update.js
├── broken.js
├── draft-rundgren-comparable-json.xml
├── ietf-104-report.html
├── LICENSE
├── draft-rundgren-json-canonicalization-scheme.xml
└── xmlv3
└── draft-rundgren-json-canonicalization-scheme.xml
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Disable LF normalization for all files
2 | * -text
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ietf-json-canon
2 | Internet-Draft for Canonical JSON
3 |
4 | Completed: https://tools.ietf.org/html/rfc8785
5 |
--------------------------------------------------------------------------------
/update.js:
--------------------------------------------------------------------------------
1 | // Date serializer fixes the problem
2 | Date.prototype.toJSON = function () {
3 | let date = this.toISOString();
4 | // In this particular case we selected a UTC notation
5 | // yyyy-mm-ddThh:mm:ssZ
6 | return date.substring(0, date.indexOf('.')) + 'Z';
7 | };
8 |
--------------------------------------------------------------------------------
/broken.js:
--------------------------------------------------------------------------------
1 | // Sample showing how a naive use of JCS will fail
2 | 'use strict';
3 | var canonicalize = function(object) {
4 |
5 | var buffer = '';
6 | serialize(object);
7 | return buffer;
8 |
9 | function serialize(object) {
10 | if (object === null || typeof object !== 'object' ||
11 | object.toJSON != null) {
12 | /////////////////////////////////////////////////
13 | // Primitive type or toJSON - Use ES6/JSON //
14 | /////////////////////////////////////////////////
15 | buffer += JSON.stringify(object);
16 |
17 | } else if (Array.isArray(object)) {
18 | /////////////////////////////////////////////////
19 | // Array - Maintain element order //
20 | /////////////////////////////////////////////////
21 | buffer += '[';
22 | let next = false;
23 | object.forEach((element) => {
24 | if (next) {
25 | buffer += ',';
26 | }
27 | next = true;
28 | /////////////////////////////////////////
29 | // Array element - Recursive expansion //
30 | /////////////////////////////////////////
31 | serialize(element);
32 | });
33 | buffer += ']';
34 |
35 | } else {
36 | /////////////////////////////////////////////////
37 | // Object - Sort properties before serializing //
38 | /////////////////////////////////////////////////
39 | buffer += '{';
40 | let next = false;
41 | Object.keys(object).sort().forEach((property) => {
42 | if (next) {
43 | buffer += ',';
44 | }
45 | next = true;
46 | ///////////////////////////////////////////////
47 | // Property names are strings - Use ES6/JSON //
48 | ///////////////////////////////////////////////
49 | buffer += JSON.stringify(property);
50 | buffer += ':';
51 | //////////////////////////////////////////
52 | // Property value - Recursive expansion //
53 | //////////////////////////////////////////
54 | serialize(object[property]);
55 | });
56 | buffer += '}';
57 | }
58 | }
59 | };
60 |
61 | const jstring =
62 | '{"time": "2019-01-28T07:45:10Z", "big": "055", "val": 3.5}';
63 |
64 | BigInt.prototype.toJSON = function () {
65 | return this.toString();
66 | };
67 |
68 | var object = JSON.parse(jstring,
69 | (k,v) => k == 'time' ? new Date(v) : k == 'big' ? BigInt(v) : v
70 | );
71 |
72 | console.log(canonicalize(object));
--------------------------------------------------------------------------------
/draft-rundgren-comparable-json.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | "Comparable" JSON (JSONCOMP)
17 |
18 |
19 |
20 | Independent
21 |
22 |
23 |
24 |
25 | Montpellier
26 | France
27 |
28 | anders.rundgren.net@gmail.com
29 | https://www.linkedin.com/in/andersrundgren/
30 |
31 |
32 |
33 |
34 |
35 | Security
36 |
37 |
38 |
39 |
40 | JSON, ECMAScript, Canonicalization, Normalization
41 |
42 |
43 |
44 |
45 | This application note describes how JCS
46 | can be utilized to support applications needing canonicalization
47 | beyond the core JSON level,
48 | with comparisons as the primary target.
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | The purpose of JCS is creating "Hashable" representations
57 | of JSON data intended for cryptographic solutions.
58 | JCS accomplishes this by combining normalization of the native JSON
59 | String and Number primitives with a deterministic property sorting scheme.
60 | That is, JCS provides canonicalization at the core JSON level.
61 | For interoperability reasons JCS also constrains data to the I-JSON subset.
62 |
63 |
64 | However, if you rather would like to compare JSON data from
65 | different sources or runs, JCS would in many cases be
66 | inadequate since the JSON String type is commonly used
67 | for holding subtypes like "DateTime" or "BigInteger" objects.
68 |
69 |
70 | This application note outlines how JCS in spite of having a limited
71 | canonicalization scope still may be utilized by applications like above.
72 |
73 |
74 |
75 |
76 |
77 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
78 | NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED",
79 | "MAY", and "OPTIONAL" in this document are to be interpreted as
80 | described in BCP 14
81 | when, and only when, they appear in all capitals, as shown here.
82 |
83 |
84 |
85 |
86 |
87 | Assume you want to compare productions of JSON data where the schema
88 | designer assigned the property "big" for holding a "BigInteger" subtype and
89 | "time" for holding a "DateTime" subtype, while "val" is supposed to be a JSON Number
90 | compliant with JCS. The following example shows such an object:
91 |
92 |
93 |
98 |
99 |
100 | A problem here is that "055" clearly is not a canonical form for a "BigInteger"
101 | while a "DateTime" object like "2019-01-28T07:45:10Z"
102 | might as well be expressed as "2019-01-28T08:45:10.000+01:00" making
103 | comparisons based on JCS canonicalization fail.
104 |
105 |
106 | To resolve this issue using JCS the following measures MUST be taken:
107 |
108 |
109 | The community or standard utilizing a specific JSON schema
110 | defines a strict normalized form for each of the used subtypes.
111 |
112 |
113 | Compatible serializers are created for each subtype.
114 |
115 |
116 | A positive side effect of this arrangement is that it enforces strict definitions
117 | of subtypes which improves interoperability in general as well.
118 |
119 |
120 | Defining specific subtypes and their normalized form is out of scope for
121 | this application note. Although the JSON example illustrated a "BigInteger" in
122 | decimal notation, applications transferring huge integers
123 | (like raw RSA keys) typically rather use Base64
124 | encoding to conserve space.
125 |
126 |
127 | Below is an example of a strict serializer expressed in ECMAScript
128 | for a "DateTime" subtype:
129 |
130 |
131 |
137 |
138 |
139 |
140 |
141 |
142 | This document has no IANA actions.
143 |
144 |
145 |
146 |
147 |
148 | Systems implementing this application note are subject
149 | to the same security considerations as JCS.
150 |
151 |
152 |
153 |
154 |
155 | This document was created based on feedback (on JCS) from many people
156 | including Mark Nottingham and Jim Schaad.
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 | JSON Canonicalization Scheme - Work in progress
170 |
171 | A. Rundgren, B. Jordan, S. Erdtman
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 | ECMAScript 2015 Language Specification
182 |
183 | Ecma International
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
--------------------------------------------------------------------------------
/ietf-104-report.html:
--------------------------------------------------------------------------------
1 |
JCS - IETF-104 Report
10 |
11 |
JCS - IETF-104 Report
12 | There were in total 100 minutes of meeting time (including a 1 hour
13 | side meeting with 10+ participants) devoted to JCS at IETF-104.
14 | Here is a list of issues raised during these meetings.
15 | I have taken the liberty commenting them here.
16 |
24 | 1. The need for clear text messages is a weak argument
25 |
26 |
27 | The recommended use of the current IETF JSON signature solution
28 | (JWS) is that you:
30 |
31 |
32 | Encode JSON data to be signed in Base64Url
33 |
34 |
35 | Disrupt the natural structure of JSON messages by embedding
36 | signed message data in specific signature containers
37 |
38 |
39 | None of the Open Banking systems out there have to date chosen
40 | this route; they all build on variants using detached signatures and clear text JSON data.
41 | That none of them utilize JCS is quite logical since
42 | JCS (correctly) is not perceived as a standard.
43 |
50 | If a canonicalization scheme is incorrectly implemented
51 | (irrespective in which end), the likely result is that signatures will not validate.
52 | Broken signatures in similarity to any other input error, including missing or
53 | incorrectly formatted data should in a properly designed application lead to a
54 | rejected message/application failure. The core of a JCS implementation is
55 | typically only a couple of kilobytes of executable code
56 | making it reasonably easy to verify for correctness.
57 |
58 |
59 | It has been mentioned that clear text data will tempt developers into trusting
60 | (=acting upon) received data without verifying signatures.
61 | JCS obviously does not
62 | come with a cure for naïve developers.
63 | See JCS Security Considerations.
65 |
66 |
67 | In fact, the absence of clear text signatures also creates security issues as shown
68 | by the following example from IETF's Trusted Execution Protocol WG:
69 |
74 |
75 | The top element "[Signed][Request|Response]" cannot be fully
76 | trusted to match the content because it doesn't participate in the
77 | signature generation. However, a recipient can always match it with
78 | the value associated with the property "payload". It purely serves
79 | to provide a quick reference for reading and method invocation.
80 |
81 |
82 |
83 | By using JWS with JCS
85 | the need for artificial holder objects and associated matching requirements
86 | disappear, while message content is provided in clear.
87 |
88 |
89 |
90 | 3. Number serialization is a huge problem
91 |
92 |
93 | I clearly underestimated this part when I started with JCS back in 2015, but
94 | recently fast, open sourced and quite simple
95 | algorithms have been developed
97 | making number serialization according to JCS/ES6 in scope for any platform.
98 | Extensive test data is
99 | publicly available.
101 |
102 |
103 |
104 | 4. You should have stayed with the ES6 predictive parsing/serialization scheme
105 |
106 |
107 | That had been cool but the sentiment among other JSON tool vendors was
108 | that "ECMA got it all wrong" so
109 | I was forced to select another and more conventional route.
110 | Fortunately, the revised scheme turned out to be very simple to get running
111 | in other platforms including Go, Python, C# and Java/Android, while leaving
112 | parsers and serializers unchanged.
113 | The original concept would OTOH
115 | require a total rewrite of the entire JSON landscape.
116 | Sometimes "pushback" is just good 😀
117 |
118 |
119 |
120 | 5. You need a data model
121 |
122 |
123 | JCS builds on the same a bare-bones data model for primitives as JSON
124 | (null, true, false, Number, String),
125 | albeit with a couple of constraints:
126 |
127 |
128 | JSON Numbers MUST conceptually be treated as IEEE-754 double precision data during parsing/serialization
129 | (which also is a generic requirement for being JavaScript compatible)
130 |
131 |
132 | JSON Strings MUST (modulo escaping) be treated as immutable during parsing/serialization
133 |
134 |
135 | This is all what is needed with respect to data models for creating reliable and interoperable "hashable" JSON.
136 | Existing JSON-based systems use external mappings to emulate
137 | missing data types like int32, DateTime, Money, Binary and similar.
138 | That not all JSON applications use the same conventions
139 | do not seem to have hampered the popularity and ubiquity of JSON.
140 | Standardizing external mappings is another [possible] IETF activity, not related to JCS.
141 |
142 |
143 |
144 | 6. I-JSON (JCS builds on that) only says SHOULD for IEEE-754 while JCS says MUST
145 |
146 |
147 | That is correct but if you for example send 64-bit integers expressed as
148 | JSON Numbers to JavaScript based systems, applications will typically break
149 | every now and then since the inherent precision is only 53 bits.
150 | JCS was designed to also be fully JavaScript compatible.
151 |
152 |
153 | 7. XML canonicalization was a disaster
154 |
155 |
156 |
157 | JCS is not a fullblown canonicalization scheme like XML's C14; it is
158 | a (fairly rudimentary) serialization method.
159 |
160 |
161 | A proper and fair evaluation should be based on the actual draft rather than
162 | bad experiences from the XML era which BTW
163 | also were due to other factors
164 | such as Namespaces, Default values,
165 | SOAP and an elaborate WS* stack which indeed took years to get
166 | fully interoperable between vendors.
167 |
168 |
169 |
Version 1.06, Anders Rundgren 2019-05-12
170 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/draft-rundgren-json-canonicalization-scheme.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | JSON Canonicalization Scheme (JCS)
17 |
18 |
19 |
20 | Independent
21 |
22 |
23 |
24 |
25 | Montpellier
26 | France
27 |
28 | anders.rundgren.net@gmail.com
29 | https://www.linkedin.com/in/andersrundgren/
30 |
31 |
32 |
33 |
34 | Symantec Corporation
35 |
36 |
37 | 350 Ellis Street
38 | CA 94043
39 | Mountain View
40 | USA
41 |
42 | bret_jordan@symantec.com
43 |
44 |
45 |
46 |
47 | Spotify AB
48 |
49 |
50 | Birger Jarlsgatan 61, 4tr
51 | 113 56
52 | Stockholm
53 | Sweden
54 |
55 | erdtman@spotify.com
56 |
57 |
58 |
59 |
60 |
61 | Security
62 |
63 |
64 |
65 |
66 | JSON, ECMAScript, Signatures, Cryptography, Canonicalization
67 |
68 |
69 |
70 |
71 | Cryptographic operations like hashing and signing requires that the
72 | original data does not change during serialization or parsing. One
73 | way addressing this issue is creating a canonical form of the data.
74 | Canonicalization also permits data to be exchanged in its original
75 | form on the "wire" while still being subject to secure cryptographic
76 | operations. The JSON Canonicalization Scheme (JCS) provides
77 | canonicalization support for data in the JSON format by building on
78 | the strict serialization methods for JSON primitives defined by
79 | ECMAScript, constraining JSON data to the I-JSON subset, and through
80 | a deterministic property sorting scheme.
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 | Cryptographic operations like hashing and signing requires that the
89 | original data does not change during serialization or parsing.
90 | One way of accomplishing this is converting the data into
91 | a format that has a simple and fixed representation like Base64Url ,
92 | which is how JWS addressed this issue.
93 |
94 |
95 | Another solution is to create a canonical version of the data,
96 | similar to what was done for the XML Signature standard.
97 | The primary advantage with a canonicalizing scheme is that data
98 | can be kept in its original form. This is the core rationale behind JCS.
99 | Put another way: by using canonicalization a JSON Object may remain a JSON Object
100 | even after being signed which simplifies system design, documentation and logging.
101 |
102 |
103 | To avoid "reinventing the wheel", JCS relies on serialization of JSON primitives compatible with
104 | ECMAScript (aka JavaScript) beginning with version 6 ,
105 | hereafter referred to as "ES6".
106 |
107 |
108 | Seasoned XML developers recalling difficulties getting signatures
109 | to validate (usually due to different interpretations of the quite intricate
110 | XML canonicalization rules as well as of the equally extensive
111 | Web Services security standards), may rightfully wonder why JCS
112 | would not suffer from similar issues. The reasons are twofold:
113 |
114 |
115 | The absence of a namespace concept and default values, as well
116 | as constraining data to the I&nbhy;JSON subset eliminate the need for specific
117 | parsers for dealing with canonicalization.
118 |
119 |
120 | JCS compatible serialization of JSON primitives is supported by most
121 | current Web browsers and as well as by Node.js ,
122 | while the full JCS specification is supported by multiple
123 | Open Source implementations (see ).
124 | See also .
125 |
126 |
127 |
128 |
129 | In summary the JCS specification describes how serialization of JSON primitives compliant
130 | with ES6 combined with a deterministic property sorting scheme can be used for
131 | creating "Hashable" representations of JSON data intended for consumption by cryptographic methods.
132 |
133 |
134 | JCS is compatible with some existing systems relying on JSON canonicalization
135 | such as JWK Thumbprint and Keybase .
136 |
137 |
138 | For potential uses outside of cryptography see .
139 |
140 |
141 | The intended audiences of this document are JSON tool vendors, as
142 | well as designers of JSON based cryptographic solutions.
143 |
144 |
145 |
146 |
147 |
148 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
149 | NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED",
150 | "MAY", and "OPTIONAL" in this document are to be interpreted as
151 | described in BCP 14
152 | when, and only when, they appear in all capitals, as shown here.
153 |
154 |
155 |
156 |
157 |
158 | This section describes the different issues related to creating
159 | a canonical JSON representation, and how they are addressed by JCS.
160 |
161 |
162 |
163 | In order to serialize JSON data, one needs data
164 | that is adapted for JSON serialization. This is usually achieved by:
165 |
166 |
167 |
168 | Parsing previously generated JSON data.
169 |
170 |
171 | Programmatically creating data.
172 |
173 |
174 |
175 |
176 | Irrespective of the method used, the data to be serialized MUST be compatible
177 | with I&nbhy;JSON , which implies the following:
178 |
179 |
180 |
181 |
182 | JSON Objects MUST NOT exhibit duplicate property names.
183 |
184 |
185 | JSON String data MUST be expressible
186 | as Unicode .
187 |
188 |
189 | JSON Number data MUST be expressible
190 | as IEEE-754 double precision values.
191 | For applications needing higher precision or longer integers than
192 | offered by IEEE-754 double precision,
193 | outlines how
194 | such requirements can be supported in an interoperable and extensible way.
195 |
196 |
197 |
198 |
199 | An additional constraint is that parsed JSON String data MUST NOT be altered during subsequent serializations.
200 | For more information see .
201 |
202 |
203 | Note: although the Unicode standard offers a possibility combining
204 | certain characters into one, referred to as "Unicode Normalization"
205 | (https://www.unicode.org/reports/tr15/),
206 | such functionality MUST be delegated to the application layer.
207 |
208 |
209 |
210 |
211 | The following subsections describe the steps required for creating a canonical
212 | JSON representation of the data elaborated on in the previous section.
213 |
214 |
215 | shows sample code for an ES6 based canonicalizer,
216 | matching the JCS specification.
217 |
218 |
219 |
220 | Whitespace between JSON elements MUST NOT be emitted.
221 |
222 |
223 |
224 |
225 | Assume that you parse a JSON object like the following:
226 |
227 |
228 |
234 |
235 |
236 | If you subsequently serialize the parsed data
237 | using a serializer compliant with ES6's JSON.stringify(),
238 | the result would (with a line wrap added for display purposes only),
239 | be rather divergent with respect to representation of data:
240 |
241 |
242 |
244 |
245 |
246 |
247 |
248 | Note: EURO denotes a single Euro character (Unicode: U+20AC),
249 |
250 | which not being ASCII, is currently not displayable in RFCs.
251 |
252 |
253 |
254 |
255 | The reason for the difference between the parsed data and its
256 | serialized counterpart, is due to a wide tolerance on input data (as defined
257 | by JSON ), while output data (as defined by ES6),
258 | has a fixed representation. As can be seen by the example,
259 | numbers are subject to rounding as well.
260 |
261 |
262 | The following subsections describe serialization of primitive JSON data types
263 | according to JCS. This part is identical to that of ES6.
264 |
265 |
266 |
267 | The JSON literals null, true,
268 | and false present no challenge since they already have a
269 | fixed definition in JSON .
270 |
271 |
272 |
273 |
274 | For JSON String data (which includes
275 | JSON Object property names as well), each Unicode code point MUST be serialized as
276 | described below (also matching Section 24.3.2.2 of ):
277 |
278 |
279 |
280 |
281 | If the Unicode value falls within the traditional ASCII control
282 | character range (U+0000 through U+001F), it MUST
283 | be serialized using lowercase hexadecimal Unicode notation (\uhhhh) unless it is in the
284 | set of predefined JSON control characters U+0008, U+0009, U+000A, U+000C or U+000D
285 | which MUST be serialized as \b, \t, \n, \f and \r respectively.
286 |
287 |
288 | If the Unicode value is outside of the ASCII control character range, it MUST
289 | be serialized "as is" unless it is equivalent to
290 | U+005C (\) or U+0022 (") which MUST be serialized as \\ and \" respectively.
291 |
292 |
293 |
294 |
295 | Finally, the resulting sequence of Unicode code points MUST be enclosed in double quotes (").
296 |
297 |
298 | Note: some JSON systems permit the use of invalid Unicode data
299 | including "lone surrogates" (e.g. U+DEAD).
300 | Since this leads to interoperability issues including broken signatures,
301 | occurrences of such data MUST cause the JCS algorithm to terminate
302 | with an error indication.
303 |
304 |
305 |
306 |
307 |
308 | JSON Number data MUST be serialized according to
309 | Section 7.1.12.1 of including the "Note 2" enhancement.
310 |
311 |
312 | Due to the relative complexity of this part, the algorithm itself is not included in this document.
313 | However, the specification is fully implemented by for example Google's V8 .
314 | The open source Java implementation mentioned in
315 | uses a recently developed number serialization algorithm called Ryu .
316 |
317 |
318 | ES6 builds on the IEEE-754 double precision
319 | standard for representing JSON Number data.
320 | holds a set of IEEE-754 sample values and their
321 | corresponding JSON serialization.
322 |
323 |
324 | Note: since NaN (Not a Number) and Infinity are not permitted in JSON,
325 | occurrences of such values MUST cause the JCS algorithm to terminate
326 | with an error indication.
327 |
328 |
329 |
330 |
331 |
332 | Although the previous step indeed normalized the representation of primitive
333 | JSON data types, the result would not qualify as "canonical" since JSON
334 | Object properties are not in lexicographic (alphabetical) order.
335 |
336 |
337 | Applied to the sample in ,
338 | a properly canonicalized version should (with a
339 | line wrap added for display purposes only), read as:
340 |
341 |
342 |
344 |
345 |
346 |
347 |
348 | Note: EURO denotes a single Euro character (Unicode: U+20AC),
349 |
350 | which not being ASCII, is currently not displayable in RFCs.
351 |
352 |
353 |
354 |
355 | The rules for lexicographic sorting of JSON Object
356 | properties according to JCS are as follows:
357 |
358 |
359 | JSON Object properties MUST be sorted in a recursive manner
360 | which means that possible JSON child Objects
361 | MUST have their properties sorted as well.
362 |
363 |
364 | JSON Array data MUST also be scanned for
365 | presence of JSON Objects (and applying associated property sorting),
366 | but array element order MUST NOT be changed.
367 |
368 |
369 |
370 |
371 | When a JSON Object is about to have its properties
372 | sorted, the following measures MUST be adhered to:
373 |
374 |
375 | The sorting process is applied to property name strings in their "raw" (unescaped) form.
376 | That is, a newline character is treated as U+000A.
377 |
378 |
379 | Property name strings to be sorted are formatted
380 | as arrays of UTF-16 code units.
381 | The sorting is based on pure value comparisons, where code units are treated as
382 | unsigned integers, independent of locale settings.
383 |
384 |
385 | Property name strings either have different values at some index that is
386 | a valid index for both strings, or their lengths are different, or both.
387 | If they have different values at one or more index
388 | positions, let k be the smallest such index; then the string whose
389 | value at position k has the smaller value, as determined by using
390 | the < operator, lexicographically precedes the other string.
391 | If there is no index position at which they differ,
392 | then the shorter string lexicographically precedes the longer string.
393 |
394 | In plain English this means that property names are sorted in ascending order like the following:
395 |
396 |
397 |
398 |
399 |
403 |
404 |
405 |
406 | The rationale for basing the sorting algorithm on UTF-16 code units is that
407 | it maps directly to the string type in ECMAScript (featured in Web browsers
408 | and Node.js), Java and .NET.
409 | Systems using another internal representation of string data will need to convert
410 | JSON property name strings into arrays of UTF-16 code units before sorting.
411 | The conversion from UTF-8 or UTF-32 to UTF-16 is defined by the
412 | Unicode standard.
413 |
414 |
415 | Note: for the purpose of obtaining a deterministic property order, sorting on
416 | UTF-8 or UTF-32 encoded data would also work, but the result would differ
417 | and thus be incompatible with this specification.
418 | However, in practice property names rarely go outside of
419 | 7-bit ASCII making it possible sorting on the UTF-8 byte level and
420 | still be compatible with JCS. If this is a viable option or not
421 | depends on the environment JCS is supposed to be used in.
422 |
423 |
424 |
425 |
426 | Finally, in order to create a platform independent representation,
427 | the result of the preceding step MUST be encoded in UTF-8.
428 |
429 |
430 | Applied to the sample in this
431 | should yield the following bytes here shown in hexadecimal notation:
432 |
433 |
434 |
440 |
441 |
442 | This data is intended to be usable as input to cryptographic methods.
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 | This document has no IANA actions.
451 |
452 |
453 |
454 |
455 |
456 | It is vital performing "sanity" checks
457 | on input data to avoid overflowing buffers and similar things that
458 | could affect the integrity of the system.
459 |
460 |
461 | When JCS is applied to signature schemes like the one in ,
462 | applications MUST perform the following operations before acting
463 | upon received data:
464 |
465 |
466 | Parse the JSON data
467 |
468 |
469 | Verify the data for correctness
470 |
471 |
472 | Verify the signature
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 | Building on ES6 Number serialization was
481 | originally proposed by James Manger. This ultimately led to the
482 | adoption of the entire ES6 serialization scheme for JSON primitives.
483 |
484 |
485 | Other people who have contributed with valuable input to this specification include
486 | Scott Ananian,
487 | Ben Campbell,
488 | Richard Gibson,
489 | Bron Gondwana,
490 | John-Mark Gurney,
491 | Mike Jones,
492 | Mike Miller,
493 | Mark Nottingham,
494 | Mike Samuel,
495 | Jim Schaad,
496 | Robert Tupelo-Schneck
497 | and Michal Wadas.
498 |
499 |
500 | For carrying out real world concept verification, the software and
501 | support for number serialization provided by
502 | Ulf Adams,
503 | Tanner Gooding
504 | and Remy Oudompheng
505 | was very helpful.
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 | ECMAScript 2015 Language Specification
520 |
521 | Ecma International
522 |
523 |
524 |
525 |
526 |
527 |
528 | IEEE Standard for Floating-Point Arithmetic
529 |
530 | IEEE
531 |
532 |
533 |
534 |
535 |
536 |
537 | The Unicode Standard, Version 10.0.0
538 |
539 | The Unicode Consortium
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 | "Comparable" JSON - Work in progress
552 |
553 | A. Rundgren
554 |
555 |
556 |
557 |
558 |
559 |
560 | Chrome V8 Open Source JavaScript Engine
561 |
562 | Google LLC
563 |
564 |
565 |
566 |
567 |
568 |
569 | Ryu floating point number serializing algorithm
570 |
571 | Ulf Adams
572 |
573 |
574 |
575 |
576 |
577 |
578 | Node.js
579 |
580 |
581 |
582 |
583 |
584 |
585 | Keybase
586 |
587 |
588 |
589 |
590 |
591 |
592 | The OpenAPI Initiative
593 |
594 |
595 |
596 |
597 |
598 |
599 | XML Signature Syntax and Processing Version 1.1
600 |
601 | W3C
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 | Below is an example of a JCS canonicalizer for usage with ES6 based systems:
611 |
612 |
613 | {
639 | if (next) {
640 | buffer += ',';
641 | }
642 | next = true;
643 | /////////////////////////////////////////
644 | // Array element - Recursive expansion //
645 | /////////////////////////////////////////
646 | serialize(element);
647 | });
648 | buffer += ']';
649 |
650 | } else {
651 | /////////////////////////////////////////////////
652 | // Object - Sort properties before serializing //
653 | /////////////////////////////////////////////////
654 | buffer += '{';
655 | let next = false;
656 | Object.keys(object).sort().forEach((property) => {
657 | if (next) {
658 | buffer += ',';
659 | }
660 | next = true;
661 | ///////////////////////////////////////////////
662 | // Property names are strings - Use ES6/JSON //
663 | ///////////////////////////////////////////////
664 | buffer += JSON.stringify(property);
665 | buffer += ':';
666 | //////////////////////////////////////////
667 | // Property value - Recursive expansion //
668 | //////////////////////////////////////////
669 | serialize(object[property]);
670 | });
671 | buffer += '}';
672 | }
673 | }
674 | };]]>
675 |
676 |
677 |
678 |
679 |
680 | The following table holds a set of ES6 compatible Number serialization samples,
681 | including some edge cases. The column
682 | "IEEE&nbhy;754" refers to the internal
683 | ES6 representation of the Number data type which is based on the
684 | IEEE-754 standard using 64-bit (double precision) values,
685 | here expressed in hexadecimal.
686 |
687 |
688 |
741 |
742 |
743 | Notes:
744 |
745 |
746 | For maximum compliance with the ES6 JSON object
747 | values that are to be interpreted as true integers
748 | SHOULD be in the range -9007199254740991 to 9007199254740991.
749 | However, how numbers are used in applications do not affect the JCS algorithm.
750 |
751 |
752 | Although a set of specific integers like 2**68 could be regarded as having
753 | extended precision, the JCS/ES6 number serialization
754 | algorithm does not take this in consideration.
755 |
756 |
757 | Invalid. See .
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 | Since the result from the canonicalization process (see ),
766 | is fully valid JSON, it can also be used as "Wire Format".
767 | However, this is just an option since cryptographic schemes
768 | based on JCS, in most cases would not depend on that externally
769 | supplied JSON data already is canonicalized.
770 |
771 |
772 | In fact, the ES6 standard way of serializing objects using
773 | JSON.stringify() produces a
774 | more "logical" format, where properties are
775 | kept in the order they were created or received. The
776 | example below shows an address record which could benefit from
777 | ES6 standard serialization:
778 |
779 |
780 |
787 |
788 |
789 | Using canonicalization the properties above would be output in the order
790 | "address", "city", "name", "state" and "zip", which adds fuzziness
791 | to the data from a human (developer or technical support), perspective.
792 | Canonicalization also converts JSON data into a single line of text, which may
793 | be less than ideal for debugging and logging.
794 |
795 |
796 |
797 |
798 |
799 | There are several issues associated with the
800 | JSON Number type, here illustrated by the following
801 | sample object:
802 |
803 |
804 |
809 |
810 |
811 | Although the sample above conforms to JSON ,
812 | applications would normally use different native data types for storing
813 | "giantNumber" and "int64Max". In addition, monetary data like "payMeThis" would
814 | presumably not rely on floating point data types due to rounding issues with respect
815 | to decimal arithmetic.
816 |
817 |
818 | The established way handling this kind of "overloading" of the
819 | JSON Number type (at least in an extensible manner), is through
820 | mapping mechanisms, instructing parsers what to do with different properties
821 | based on their name. However, this greatly limits the value of using the
822 | JSON Number type outside of its original somewhat constrained, JavaScript context.
823 | The ES6 JSON object does not support mappings to JSON Number either.
824 |
825 |
826 | Due to the above, numbers that do not have a natural place in the current
827 | JSON ecosystem MUST be wrapped using the JSON String type. This is close to
828 | a de-facto standard for open systems. This is also applicable for
829 | other data types that do not have direct support in JSON, like "DateTime"
830 | objects as described in .
831 |
832 |
833 | Aided by a system using the JSON String type; be it programmatic like
834 |
835 |
836 |
837 |
839 |
840 |
841 | or declarative schemes like OpenAPI ,
842 | JCS imposes no limits on applications, including when using ES6.
843 |
844 |
845 |
846 |
847 |
848 | Due to the limited set of data types featured in JSON,
849 | the JSON String type is commonly used for holding subtypes.
850 | This can depending on JSON parsing method lead to
851 | interoperability problems which MUST be dealt with by
852 | JCS compliant applications targeting a wider audience.
853 |
854 |
855 | Assume you want to parse a JSON object where the schema
856 | designer assigned the property "big" for holding a "BigInteger" subtype and
857 | "time" for holding a "DateTime" subtype, while "val" is supposed to be a JSON Number
858 | compliant with JCS. The following example shows such an object:
859 |
860 |
861 |
866 |
867 | Parsing of this object can accomplished by the following ES6 statement:
868 |
869 |
870 |
871 | After parsing the actual data can be extracted which for subtypes also involve a conversion
872 | step using the result of the parsing process (an ECMAScript object) as input:
873 |
874 |
877 |
878 | Canonicalization of "object" using the sample code in would return the
879 | following string:
880 |
881 |
882 |
883 |
884 | Although this is (with respect to JCS) technically correct, there is another way parsing JSON data
885 | which also can be used with ES6 as shown below:
886 |
887 |
888 | k == 'time' ? new Date(v) : k == 'big' ? BigInt(v) : v
896 | );]]>
897 |
898 |
899 | If you now apply the canonicalizer in to "object", the
900 | following string would be generated:
901 |
902 |
903 |
904 |
905 |
906 | In this case the string arguments for "big" and "time" have changed with respect to the original,
907 | presumable making an application depending on JCS fail.
908 |
909 |
910 | The reason for the deviation is that in stream and schema based JSON parsers,
911 | the original "string" argument is typically replaced on-the-fly
912 | by the native subtype which when serialized, may exhibit a different
913 | and platform dependent pattern.
914 |
915 |
916 | That is, stream and schema based parsing MUST treat subtypes as "pure" (immutable) JSON String types,
917 | and perform the actual conversion to the designated native type in a subsequent step.
918 | In modern programming platforms like Go, Java and C# this can be achieved with
919 | moderate efforts by combining annotations, getters and setters.
920 | Below is an example in C#/Json.NET showing a part of a class that is serializable
921 | as a JSON Object:
922 |
923 |
924 |
935 |
936 |
937 | In an application "Amount" can be accessed as any other property
938 | while it is actually represented by a quoted string in JSON contexts.
939 |
940 |
941 | Note: the example above also addresses the constraints on numeric data
942 | implied by I-JSON (the C# "decimal" data type has quite different
943 | characteristics compared to IEEE-754 double precision).
944 |
945 |
946 |
947 | Since the JSON Array construct permits mixing arbitrary JSON elements,
948 | custom parsing and serialization code must normally
949 | be used to cope with subtypes anyway.
950 |
951 |
952 |
953 |
954 |
955 |
956 | The optimal solution is integrating support for JCS directly
957 | in JSON serializers (parsers need no changes).
958 | That is, canonicalization would just be an additional "mode"
959 | for a JSON serializer. However, this is currently not the case.
960 | Fortunately JCS support can be performed through externally supplied
961 | canonicalizer software, enabling signature creation schemes like the following:
962 |
963 |
964 | Create the data to be signed.
965 |
966 |
967 | Serialize the data using existing JSON tools.
968 |
969 |
970 | Let the external canonicalizer process the serialized data and return canonicalized result data.
971 |
972 |
973 | Sign the canonicalized data.
974 |
975 |
976 | Add the resulting signature value to the original JSON data through a designated signature property.
977 |
978 |
979 | Serialize the completed (now signed) JSON object using existing JSON tools.
980 |
981 |
982 | A compatible signature verification scheme would then be as follows:
983 |
984 |
985 | Parse the signed JSON data using existing JSON tools.
986 |
987 |
988 | Read and save the signature value from the designated signature property.
989 |
990 |
991 | Remove the signature property from the parsed JSON object.
992 |
993 |
994 | Serialize the remaining JSON data using existing JSON tools.
995 |
996 |
997 | Let the external canonicalizer process the serialized data and return canonicalized result data.
998 |
999 |
1000 | Verify that the canonicalized data matches the saved signature value
1001 | using the algorithm and key used for creating the signature.
1002 |
1003 |
1004 |
1005 |
1006 | A canonicalizer like above is effectively only a "filter", potentially usable with
1007 | a multitude of quite different cryptographic schemes.
1008 |
1009 |
1010 | Using a JSON serializer with integrated JCS support, the serialization performed
1011 | before the canonicalization step could be eliminated for both processes.
1012 |
1013 |
1014 |
1015 |
1016 |
1017 | The following Open Source implementations have been verified to be
1018 | compatible with JCS:
1019 |
1020 |
1021 | JavaScript: https://www.npmjs.com/package/canonicalize
1022 |
1023 |
1024 | Java: https://github.com/erdtman/java-json-canonicalization
1025 |
1026 |
1027 | Go: https://github.com/cyberphone/json-canonicalization/tree/master/go
1028 |
1029 |
1030 | .NET/C#: https://github.com/cyberphone/json-canonicalization/tree/master/dotnet
1031 |
1032 |
1033 | Python: https://github.com/cyberphone/json-canonicalization/tree/master/python3
1034 |
1035 |
1036 |
1037 |
1038 |
1039 |
1040 |
1041 | There are (and have been) other efforts creating "Canonical JSON".
1042 | Below is a list of URLs to some of them:
1043 |
1044 |
1045 | https://tools.ietf.org/html/draft-staykov-hu-json-canonical-form-00
1046 |
1047 |
1048 | https://gibson042.github.io/canonicaljson-spec/
1049 |
1050 |
1051 | http://wiki.laptop.org/go/Canonical_JSON
1052 |
1053 |
1054 | In contrast to JCS which is a serialization scheme, the listed efforts build on text level JSON to JSON
1055 | transformations.
1056 |
1057 |
1058 |
1059 |
1060 |
1061 | The JCS specification is currently developed at:
1062 | https://github.com/cyberphone/ietf-json-canon.
1063 |
1064 |
1065 | The most recent "editors' copy" can be found at:
1066 | https://cyberphone.github.io/ietf-json-canon.
1067 |
1068 |
1069 | JCS source code and test data is available at:
1070 | https://github.com/cyberphone/json-canonicalization
1071 |
1072 |
1073 |
1074 |
1075 |
--------------------------------------------------------------------------------
/xmlv3/draft-rundgren-json-canonicalization-scheme.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
21 |
22 |
23 | JSON Canonicalization Scheme (JCS)
24 |
25 |
26 |
27 | Independent
28 |
29 |
30 | Montpellier
31 | France
32 |
33 | anders.rundgren.net@gmail.com
34 | https://www.linkedin.com/in/andersrundgren/
35 |
36 |
37 |
38 | Broadcom
39 |
40 |
41 | 1320 Ridder Park Drive
42 | CA
43 | 95131
44 | San Jose
45 | USA
46 |
47 | bret.jordan@broadcom.com
48 |
49 |
50 |
51 | Spotify AB
52 |
53 |
54 | Birger Jarlsgatan 61, 4tr
55 | 113 56
56 | Stockholm
57 | Sweden
58 |
59 | erdtman@spotify.com
60 |
61 |
62 |
63 | Security
64 |
65 |
66 | JSON, ECMAScript, Signatures, Cryptography, Canonicalization
67 |
68 |
69 |
70 | Cryptographic operations like hashing and signing need the data to be
71 | expressed in an invariant format so that the operations are reliably
72 | repeatable.
73 |
74 | One way to address this is to create a canonical representation of
75 | the data. Canonicalization also permits data to be exchanged in its
76 | original form on the "wire" while cryptographic operations
77 | performed on the canonicalized counterpart of the data in the
78 | producer and consumer end points, generate consistent results.
79 |
80 |
81 | This document describes the JSON Canonicalization Scheme (JCS).
82 | The JCS specification defines how to create a canonical representation
83 | of JSON data by building on the strict serialization methods for
84 | JSON primitives defined by ECMAScript, constraining JSON data to
85 | the I-JSON subset, and by using deterministic property sorting.
86 |
87 |
88 |
89 |
90 |
91 | Introduction
92 |
93 | This document describes the JSON Canonicalization Scheme (JCS).
94 | The JCS specification defines how to create a canonical representation
95 | of JSON data by building
96 | on the strict serialization methods for
97 | JSON primitives defined by ECMAScript ,
98 | constraining JSON data to the I-JSON
99 | subset, and by using deterministic property sorting. The output from JCS is a
100 | "Hashable" representation of JSON data that can be used by cryptographic methods.
101 | The subsequent paragraphs outline the primary design considerations.
102 |
103 |
104 | Cryptographic operations like hashing and signing need the data to be
105 | expressed in an invariant format so that the operations are reliably
106 | repeatable.
107 | One way to accomplish this is to convert the data into
108 | a format that has a simple and fixed representation,
109 | like Base64Url .
110 | This is how JWS addressed this issue.
111 | Another solution is to create a canonical version of the data,
112 | similar to what was done for the XML Signature standard.
113 |
114 |
115 | The primary advantage with a canonicalizing scheme is that data
116 | can be kept in its original form. This is the core rationale behind JCS.
117 | Put another way, using canonicalization enables a JSON Object to remain a JSON Object
118 | even after being signed. This can simplify system design, documentation, and logging.
119 |
120 |
121 | To avoid "reinventing the wheel", JCS relies on the serialization of JSON primitives
122 | (strings, numbers and literals), as defined by
123 | ECMAScript (aka JavaScript) beginning with version 6 ,
124 | hereafter referred to as "ES6".
125 |
126 |
127 | Seasoned XML developers may recall difficulties getting XML signatures
128 | to validate. This was usually due to different interpretations of the quite intricate
129 | XML canonicalization rules as well as of the equally complex
130 | Web Services security standards.
131 | The reasons why JCS should not suffer from similar issues are:
132 |
133 |
134 |
o
135 |
136 | The absence of a namespace concept and default values.
137 |
138 |
o
139 |
140 | Constraining data to the I‑JSON subset.
141 | This eliminates the need for specific parsers for dealing with canonicalization.
142 |
143 |
o
144 |
145 | JCS compatible serialization of JSON primitives is currently supported
146 | by most Web browsers as well as by Node.js ,
147 |
148 |
o
149 |
150 | The full JCS specification is currently supported by multiple
151 | Open Source implementations (see ).
152 | See also for implementation
153 | guidelines.
154 |
155 |
156 |
157 | JCS is compatible with some existing systems relying on JSON canonicalization
158 | such as JWK Thumbprint and Keybase .
159 |
160 |
161 | For potential uses outside of cryptography see .
162 |
163 |
164 | The intended audiences of this document are JSON tool vendors, as
165 | well as designers of JSON based cryptographic solutions.
166 | The reader is assumed to be knowledgeable in ECMAScript including the JSON object.
167 |
168 |
169 |
170 | Terminology
171 |
172 | Note that this document is not on the IETF standards track. However, a conformant
173 | implementation is supposed to adhere to the specified behavior for
174 | security and interoperability reasons. This text uses BCP 14 to
175 | describe that necessary behavior.
176 |
177 |
178 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL
179 | NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED",
180 | "MAY", and "OPTIONAL" in this document are to be interpreted as
181 | described in BCP 14
182 | when, and only when, they appear in all capitals, as shown here.
183 |
184 |
185 |
186 | Detailed Operation
187 |
188 | This section describes the details related to creating
189 | a canonical JSON representation, and how they are addressed by JCS.
190 |
191 |
192 | describes
193 | the RECOMMENDED way of adding JCS support to existing JSON tools.
194 |
195 |
196 | Creation of Input Data
197 |
198 | Data to be canonically serialized is usually created by:
199 |
200 |
210 |
211 | Irrespective of the method used, the data to be serialized MUST be adapted
212 | for I‑JSON formatting, which implies the following:
213 |
214 |
215 |
o
216 |
217 | JSON Objects MUST NOT exhibit duplicate property names.
218 |
219 |
o
220 |
221 | JSON String data MUST be expressible
222 | as Unicode .
223 |
224 |
o
225 |
226 | JSON Number data MUST be expressible
227 | as IEEE-754 double precision values.
228 | For applications needing higher precision or longer integers than
229 | offered by IEEE-754 double precision, it is RECOMMENDED to represent such
230 | numbers as JSON Strings, see for
231 | details on how this can be performed in an interoperable and extensible way.
232 |
233 |
234 |
235 | An additional constraint is that parsed JSON String data MUST NOT be altered during subsequent serializations.
236 | For more information see .
237 |
238 |
239 | Note: although the Unicode standard offers the possibility of rearranging
240 | certain character sequences, referred to as "Unicode Normalization"
241 | (),
242 | JCS compliant string processing does not take this in consideration.
243 | That is, all components involved in a scheme depending on JCS,
244 | MUST preserve Unicode string data "as is".
245 |
246 |
247 |
248 | Generation of Canonical JSON Data
249 |
250 | The following subsections describe the steps required to create a canonical
251 | JSON representation of the data elaborated on in the previous section.
252 |
253 |
254 | shows sample code for an ES6 based canonicalizer,
255 | matching the JCS specification.
256 |
257 |
258 | Whitespace
259 |
260 | Whitespace between JSON tokens MUST NOT be emitted.
261 |
262 |
263 |
264 | Serialization of Primitive Data Types
265 |
266 | Assume a JSON object as follows is parsed:
267 |
268 |
274 |
275 | If the parsed data is subsequently serialized
276 | using a serializer compliant with ES6's JSON.stringify(),
277 | the result would (with a line wrap added for display purposes only),
278 | be rather divergent with respect to the original data:
279 |
280 |
282 |
283 | The reason for the difference between the parsed data and its
284 | serialized counterpart, is due to a wide tolerance on input data (as defined
285 | by JSON ), while output data (as defined by ES6),
286 | has a fixed representation. As can be seen in the example,
287 | numbers are subject to rounding as well.
288 |
289 |
290 | The following subsections describe the serialization of primitive JSON data types
291 | according to JCS. This part is identical to that of ES6.
292 | In the (unlikely) event that a future version of ECMAScript would
293 | invalidate any of the following serialization methods, it will be
294 | up to the developer community to
295 | either stick to this specification or create a new specification.
296 |
297 |
298 | Serialization of Literals
299 |
300 | In accordance with JSON ,
301 | the literals "null", "true", and
302 | "false" MUST be serialized as null, true, and false respectively.
303 |
304 |
305 |
306 | Serialization of Strings
307 |
308 | For JSON String data (which includes
309 | JSON Object property names as well), each Unicode code point MUST be serialized as
310 | described below (see section 24.3.2.2 of ):
311 |
312 |
313 |
o
314 |
315 | If the Unicode value falls within the traditional ASCII control
316 | character range (U+0000 through U+001F), it MUST
317 | be serialized using lowercase hexadecimal Unicode notation (\uhhhh) unless it is in the
318 | set of predefined JSON control characters U+0008, U+0009, U+000A, U+000C or U+000D
319 | which MUST be serialized as \b, \t, \n, \f and \r respectively.
320 |
321 |
o
322 | If the Unicode value is outside of the ASCII control character range, it MUST
323 | be serialized "as is" unless it is equivalent to
324 | U+005C (\) or U+0022 (") which MUST be serialized as \\ and \" respectively.
325 |
326 |
327 |
328 | Finally, the resulting sequence of Unicode code points MUST be enclosed in double quotes (").
329 |
330 |
331 | Note: since invalid Unicode data like "lone surrogates" (e.g. U+DEAD)
332 | may lead to interoperability issues including broken signatures,
333 | occurrences of such data MUST cause a compliant JCS implementation to terminate
334 | with an appropriate error.
335 |
336 |
337 |
338 | Serialization of Numbers
339 |
340 | ES6 builds on the IEEE-754 double precision
341 | standard for representing JSON Number data.
342 | Such data MUST be serialized according to section 7.1.12.1 of
343 | including the "Note 2" enhancement.
344 |
345 |
346 | Due to the relative complexity of this part, the algorithm
347 | itself is not included in this document.
348 | For implementers of JCS compliant number serialization,
349 | Google's implementation in V8 may serve as a reference.
350 | Another compatible number serialization reference implementation
351 | is Ryu ,
352 | that is used by the JCS open source Java implementation
353 | mentioned in .
354 | holds a set of IEEE-754 sample values and their
355 | corresponding JSON serialization.
356 |
357 |
358 | Note: since NaN (Not a Number) and Infinity are not permitted in JSON,
359 | occurrences of NaN or Infinity MUST cause a compliant JCS implementation to terminate
360 | with an appropriate error.
361 |
362 |
363 |
364 |
365 | Sorting of Object Properties
366 |
367 | Although the previous step normalized the representation of primitive
368 | JSON data types, the result would not yet qualify as "canonical" since JSON
369 | Object properties are not in lexicographic (alphabetical) order.
370 |
371 |
372 | Applied to the sample in ,
373 | a properly canonicalized version should (with a
374 | line wrap added for display purposes only), read as:
375 |
376 |
378 |
379 | The rules for lexicographic sorting of JSON Object
380 | properties according to JCS are as follows:
381 |
382 |
383 |
o
384 |
385 | JSON Object properties MUST be sorted recursively,
386 | which means that JSON child Objects
387 | MUST have their properties sorted as well.
388 |
389 |
o
390 |
391 | JSON Array data MUST also be scanned for the
392 | presence of JSON Objects (if an object is found then its properties MUST be sorted),
393 | but array element order MUST NOT be changed.
394 |
395 |
396 |
397 | When a JSON Object is about to have its properties
398 | sorted, the following measures MUST be adhered to:
399 |
400 |
401 |
o
402 |
403 | The sorting process is applied to property name strings in their "raw" (unescaped) form.
404 | That is, a newline character is treated as U+000A.
405 |
406 |
o
407 |
408 | Property name strings to be sorted are formatted
409 | as arrays of UTF-16 code units.
410 | The sorting is based on pure value comparisons, where code units are treated as
411 | unsigned integers, independent of locale settings.
412 |
413 |
o
414 |
415 |
416 | Property name strings either have different values at some index that is
417 | a valid index for both strings, or their lengths are different, or both.
418 | If they have different values at one or more index
419 | positions, let k be the smallest such index; then the string whose
420 | value at position k has the smaller value, as determined by using
421 | the < operator, lexicographically precedes the other string.
422 | If there is no index position at which they differ,
423 | then the shorter string lexicographically precedes the longer string.
424 |
425 |
426 | In plain English this means that property names are sorted in ascending order like the following:
427 |
428 |
429 |
430 |
434 |
435 | The rationale for basing the sorting algorithm on UTF-16 code units is that
436 | it maps directly to the string type in ECMAScript (featured in Web browsers
437 | and Node.js), Java and .NET. In addition, JSON only supports escape sequences
438 | expressed as UTF-16 code units making knowledge and handling of such data
439 | a necessity anyway.
440 | Systems using another internal representation of string data will need to convert
441 | JSON property name strings into arrays of UTF-16 code units before sorting.
442 | The conversion from UTF-8 or UTF-32 to UTF-16 is defined by the
443 | Unicode standard.
444 |
445 |
446 | The following test data can be used for verifying the correctness of the sorting
447 | scheme in a JCS implementation. JSON test data:
448 |
449 |
458 |
459 | Expected argument order after sorting property strings:
460 |
461 |
468 |
469 | Note: for the purpose of obtaining a deterministic property order, sorting on
470 | UTF-8 or UTF-32 encoded data would also work, but the outcome for JSON data
471 | like above would differ and thus be incompatible with this specification.
472 | However, in practice, property names are rarely defined outside of 7-bit ASCII making
473 | it possible to sort on string data in UTF-8 or UTF-32 format without conversions
474 | to UTF-16 and still be compatible with JCS. If this is a viable option or not
475 | depends on the environment JCS is used in.
476 |
477 |
478 |
479 | UTF-8 Generation
480 |
481 | Finally, in order to create a platform independent representation,
482 | the result of the preceding step MUST be encoded in UTF-8.
483 |
484 |
485 | Applied to the sample in this
486 | should yield the following bytes, here shown in hexadecimal notation:
487 |
488 |
494 |
495 | This data is intended to be usable as input to cryptographic methods.
496 |
497 |
498 |
499 |
500 |
501 | IANA Considerations
502 |
503 | This document has no IANA actions.
504 |
505 |
506 |
507 | Security Considerations
508 |
509 | It is crucial to perform sanity checks on input data to avoid
510 | overflowing buffers and similar things that could affect the
511 | integrity of the system.
512 |
513 |
514 | When JCS is applied to signature schemes like the one described
515 | in ,
516 | applications MUST perform the following operations before acting
517 | upon received data:
518 |
519 |
520 |
521 | Parse the JSON data and verify that it adheres to I-JSON.
522 |
523 |
524 | Verify the data for correctness according to the conventions defined by the
525 | ecosystem where it is to be used. This also includes locating the
526 | property holding the signature data.
527 |
528 |
529 | Verify the signature.
530 |
531 |
532 |
533 | If any of these steps fail, the operation in progress MUST be aborted.
534 |
535 |
536 |
537 | Acknowledgements
538 |
539 | Building on ES6 Number serialization was
540 | originally proposed by James Manger. This ultimately led to the
541 | adoption of the entire ES6 serialization scheme for JSON primitives.
542 |
543 |
544 | Other people who have contributed with valuable input to this specification include
545 | Scott Ananian,
546 | Tim Bray,
547 | Ben Campbell,
548 | Adrian Farell,
549 | Richard Gibson,
550 | Bron Gondwana,
551 | John-Mark Gurney,
552 | John Levine,
553 | Mark Miller,
554 | Matthew Miller,
555 | Mike Jones,
556 | Mark Nottingham,
557 | Mike Samuel,
558 | Jim Schaad,
559 | Robert Tupelo-Schneck
560 | and Michal Wadas.
561 |
562 |
563 | For carrying out real world concept verification, the software and
564 | support for number serialization provided by
565 | Ulf Adams,
566 | Tanner Gooding
567 | and Remy Oudompheng
568 | was very helpful.
569 |
570 |
571 |
572 |
573 |
574 | References
575 |
576 | Normative References
577 |
578 |
579 | Key words for use in RFCs to Indicate Requirement Levels
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 | In many standards track documents several words are used to signify
590 | the requirements in the specification. These words are often capitalized.
591 | This document defines these words as they should be interpreted in IETF
592 | documents. This document specifies an Internet Best Current Practices for
593 | the Internet Community, and requests discussion and suggestions for improvements.
594 |
595 |
596 |
597 |
598 |
599 |
600 | The JavaScript Object Notation (JSON) Data Interchange Format
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 | JavaScript Object Notation (JSON) is a lightweight, text-based,
611 | language-independent data interchange format. It was derived from the
612 | ECMAScript Programming Language Standard. JSON defines a small set of
613 | formatting rules for the portable representation of structured data.
614 |
615 |
616 | This document removes inconsistencies with other specifications of
617 | JSON, repairs specification errors, and offers experience-based
618 | interoperability guidance.
619 |
620 |
621 |
622 |
623 |
624 |
625 | Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 | RFC 2119 specifies common key words that may be used in protocol
636 | specifications. This document aims to reduce the ambiguity by
637 | clarifying that only UPPERCASE usage of the key words have the
638 | defined special meanings.
639 |
640 |
641 |
642 |
643 |
644 |
645 | The I-JSON Message Format
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 | I-JSON (short for "Internet JSON") is a restricted profile of
655 | JSON designed to maximize interoperability and increase confidence
656 | that software can process it successfully with predictable results.
657 |
658 |
659 |
660 |
661 |
662 |
663 | ECMAScript 2015 Language Specification
664 |
665 | Ecma International
666 |
667 |
668 |
669 |
670 |
671 |
672 | IEEE Standard for Floating-Point Arithmetic
673 |
674 | IEEE
675 |
676 |
677 |
678 |
679 |
680 |
681 | The Unicode Standard, Version 12.1.0
682 |
683 | The Unicode Consortium
684 |
685 |
686 |
687 |
688 |
689 |
690 | Informative References
691 |
692 |
693 | JSON Web Key (JWK) Thumbprint
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 | This specification defines a method for computing a hash value
706 | over a JSON Web Key (JWK). It defines which fields in a JWK are used
707 | in the hash computation, the method of creating a canonical form for
708 | those fields, and how to convert the resulting Unicode string into a
709 | byte sequence to be hashed. The resulting hash value can be used for
710 | identifying or selecting the key represented by the JWK that is the
711 | subject of the thumbprint.
712 |
713 |
714 |
715 |
716 |
717 |
718 | The Base16, Base32, and Base64 Data Encodings
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 | This document describes the commonly used base 64, base 32, and base 16
728 | encoding schemes. It also discusses the use of line-feeds in encoded data,
729 | use of padding in encoded data, use of non-alphabet characters in encoded data,
730 | use of different encoding alphabets, and canonical encodings. [STANDARDS-TRACK]
731 |
732 |
733 |
734 |
735 |
736 |
737 | JSON Web Signature (JWS)
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 | JSON Web Signature (JWS) represents content secured with digital
753 | signatures or Message Authentication Codes (MACs) using JSON-based
754 | data structures. Cryptographic algorithms and identifiers for use
755 | with this specification are described in the separate
756 | JSON Web Algorithms (JWA) specification and an IANA registry defined
757 | by that specification. Related encryption capabilities are described
758 | in the separate JSON Web Encryption (JWE) specification.
759 |
760 |
761 |
762 |
763 |
764 |
765 | "Comparable" JSON - Work in progress
766 |
767 | A. Rundgren
768 |
769 |
770 |
771 |
772 |
773 | Chrome V8 Open Source JavaScript Engine
774 |
775 | Google LLC
776 |
777 |
778 |
779 |
780 |
781 | Ryu floating point number serializing algorithm
782 |
783 | Ulf Adams
784 |
785 |
786 |
787 |
788 |
789 | Node.js
790 |
791 |
792 |
793 |
794 |
795 | Keybase
796 |
797 |
798 |
799 |
800 |
801 | The OpenAPI Initiative
802 |
803 |
804 |
805 |
806 |
807 | XML Signature Syntax and Processing Version 1.1
808 |
809 | W3C
810 |
811 |
812 |
813 |
814 |
815 |
816 | ES6 Sample Canonicalizer
817 |
818 | Below is an example of a JCS canonicalizer for usage with ES6 based systems:
819 |
820 | {
846 | if (next) {
847 | buffer += ',';
848 | }
849 | next = true;
850 | /////////////////////////////////////////
851 | // Array element - Recursive expansion //
852 | /////////////////////////////////////////
853 | serialize(element);
854 | });
855 | buffer += ']';
856 |
857 | } else {
858 | /////////////////////////////////////////////////
859 | // Object - Sort properties before serializing //
860 | /////////////////////////////////////////////////
861 | buffer += '{';
862 | let next = false;
863 | Object.keys(object).sort().forEach((property) => {
864 | if (next) {
865 | buffer += ',';
866 | }
867 | next = true;
868 | ///////////////////////////////////////////////
869 | // Property names are strings - Use ES6/JSON //
870 | ///////////////////////////////////////////////
871 | buffer += JSON.stringify(property);
872 | buffer += ':';
873 | //////////////////////////////////////////
874 | // Property value - Recursive expansion //
875 | //////////////////////////////////////////
876 | serialize(object[property]);
877 | });
878 | buffer += '}';
879 | }
880 | }
881 | };]]>
882 |
883 |
884 | Number Serialization Samples
885 |
886 | The following table holds a set of ES6 compatible Number serialization samples,
887 | including some edge cases. The column
888 | "IEEE‑754" refers to the internal
889 | ES6 representation of the Number data type which is based on the
890 | IEEE-754 standard using 64-bit (double precision) values,
891 | here expressed in hexadecimal.
892 |
893 |
948 |
949 |
950 | Notes:
951 |
952 |
953 |
954 | For maximum compliance with the ES6 JSON object,
955 | values that are to be interpreted as true integers
956 | SHOULD be in the range -9007199254740991 to 9007199254740991.
957 | However, how numbers are used in applications do not affect the JCS algorithm.
958 |
959 |
960 | Although a set of specific integers like 2**68 could be regarded as having
961 | extended precision, the JCS/ES6 number serialization
962 | algorithm does not take this in consideration.
963 |
964 |
965 | Value out range, not permitted in JSON.
966 | See .
967 |
968 |
969 | This number is exactly 1424953923781206.25 but will after the "Note 2" rule
970 | mentioned in be truncated and
971 | rounded to the closest even value.
972 |
973 |
974 |
975 | For a more exhaustive validation of a JCS number serializer, you may test
976 | against a file (currently) available in the development portal
977 | (see ),
978 | containing a large set of sample values. Another option
979 | is running V8
980 | as a live reference together with a program generating a
981 | substantial amount of random IEEE-754 values.
982 |
983 |
984 |
985 | Canonicalized JSON as "Wire Format"
986 |
987 | Since the result from the canonicalization process (see ),
988 | is fully valid JSON, it can also be used as "Wire Format".
989 | However, this is just an option since cryptographic schemes
990 | based on JCS, in most cases would not depend on that externally
991 | supplied JSON data already is canonicalized.
992 |
993 |
994 | In fact, the ES6 standard way of serializing objects using
995 | JSON.stringify() produces a
996 | more "logical" format, where properties are
997 | kept in the order they were created or received. The
998 | example below shows an address record which could benefit from
999 | ES6 standard serialization:
1000 |
1001 |
1008 |
1009 | Using canonicalization the properties above would be output in the order
1010 | "address", "city", "name", "state" and "zip", which adds fuzziness
1011 | to the data from a human (developer or technical support), perspective.
1012 | Canonicalization also converts JSON data into a single line of text, which may
1013 | be less than ideal for debugging and logging.
1014 |
1015 |
1016 |
1017 | Dealing with Big Numbers
1018 |
1019 | There are several issues associated with the
1020 | JSON Number type, here illustrated by the following
1021 | sample object:
1022 |
1023 |
1028 |
1029 | Although the sample above conforms to JSON ,
1030 | applications would normally use different native data types for storing
1031 | "giantNumber" and "int64Max". In addition, monetary data like "payMeThis" would
1032 | presumably not rely on floating point data types due to rounding issues with respect
1033 | to decimal arithmetic.
1034 |
1035 |
1036 | The established way handling this kind of "overloading" of the
1037 | JSON Number type (at least in an extensible manner), is through
1038 | mapping mechanisms, instructing parsers what to do with different properties
1039 | based on their name. However, this greatly limits the value of using the
1040 | JSON Number type outside of its original somewhat constrained, JavaScript context.
1041 | The ES6 JSON object does not support mappings to JSON Number either.
1042 |
1043 |
1044 | Due to the above, numbers that do not have a natural place in the current
1045 | JSON ecosystem MUST be wrapped using the JSON String type. This is close to
1046 | a de-facto standard for open systems. This is also applicable for
1047 | other data types that do not have direct support in JSON, like DateTime
1048 | objects as described in .
1049 |
1050 |
1051 | Aided by a system using the JSON String type; be it programmatic like
1052 |
1053 |
1055 |
1056 | or declarative schemes like OpenAPI ,
1057 | JCS imposes no limits on applications, including when using ES6.
1058 |
1059 |
1060 |
1061 | String Subtype Handling
1062 |
1063 | Due to the limited set of data types featured in JSON,
1064 | the JSON String type is commonly used for holding subtypes.
1065 | This can depending on JSON parsing method lead to
1066 | interoperability problems which MUST be dealt with by
1067 | JCS compliant applications targeting a wider audience.
1068 |
1069 |
1070 | Assume you want to parse a JSON object where the schema
1071 | designer assigned the property "big" for holding a BigInt subtype and
1072 | "time" for holding a DateTime subtype, while "val" is supposed to be a JSON Number
1073 | compliant with JCS. The following example shows such an object:
1074 |
1075 |
1080 | Parsing of this object can accomplished by the following ES6 statement:
1081 |
1082 | After parsing the actual data can be extracted which for subtypes also involve a conversion
1083 | step using the result of the parsing process (an ECMAScript object) as input:
1084 |
1087 |
1088 | Note that the BigInt data type is currently only natively supported by V8 .
1089 |
1090 |
1091 | Canonicalization of "object" using the sample code in would return the
1092 | following string:
1093 |
1094 |
1095 |
1096 | Although this is (with respect to JCS) technically correct, there is another way parsing JSON data
1097 | which also can be used with ECMAScript as shown below:
1098 |
1099 | k == 'time' ? new Date(v) : k == 'big' ? BigInt(v) : v
1107 | );]]>
1108 |
1109 | If you now apply the canonicalizer in to "object", the
1110 | following string would be generated:
1111 |
1112 |
1113 |
1114 | In this case the string arguments for "big" and "time" have changed with respect to the original,
1115 | presumable making an application depending on JCS fail.
1116 |
1117 |
1118 | The reason for the deviation is that in stream and schema based JSON parsers,
1119 | the original "string" argument is typically replaced on-the-fly
1120 | by the native subtype which when serialized, may exhibit a different
1121 | and platform dependent pattern.
1122 |
1123 |
1124 | That is, stream and schema based parsing MUST treat subtypes as "pure" (immutable) JSON String types,
1125 | and perform the actual conversion to the designated native type in a subsequent step.
1126 | In modern programming platforms like Go, Java and C# this can be achieved with
1127 | moderate efforts by combining annotations, getters and setters.
1128 | Below is an example in C#/Json.NET showing a part of a class that is serializable
1129 | as a JSON Object:
1130 |
1131 |
1142 |
1143 | In an application "Amount" can be accessed as any other property
1144 | while it is actually represented by a quoted string in JSON contexts.
1145 |
1146 |
1147 | Note: the example above also addresses the constraints on numeric data
1148 | implied by I-JSON (the C# "decimal" data type has quite different
1149 | characteristics compared to IEEE-754 double precision).
1150 |
1151 |
1152 | Subtypes in Arrays
1153 |
1154 | Since the JSON Array construct permits mixing arbitrary JSON data types,
1155 | custom parsing and serialization code may be required
1156 | to cope with subtypes anyway.
1157 |
1158 |
1159 |
1160 |
1161 | Implementation Guidelines
1162 |
1163 | The optimal solution is integrating support for JCS directly
1164 | in JSON serializers (parsers need no changes).
1165 | That is, canonicalization would just be an additional "mode"
1166 | for a JSON serializer. However, this is currently not the case.
1167 | Fortunately, JCS support can be introduced through externally supplied
1168 | canonicalizer software acting as a post processor to existing
1169 | JSON serializers. This arrangement also relieves the JCS implementer from
1170 | having to deal with how underlying data is to be represented in JSON.
1171 |
1172 |
1173 | The post processor concept enables signature creation schemes like the following:
1174 |
1175 |
1176 |
1177 | Create the data to be signed.
1178 |
1179 |
1180 | Serialize the data using existing JSON tools.
1181 |
1182 |
1183 | Let the external canonicalizer process the serialized data and return canonicalized result data.
1184 |
1185 |
1186 | Sign the canonicalized data.
1187 |
1188 |
1189 | Add the resulting signature value to the original JSON data through a designated signature property.
1190 |
1191 |
1192 | Serialize the completed (now signed) JSON object using existing JSON tools.
1193 |
1194 |
1195 |
1196 | A compatible signature verification scheme would then be as follows:
1197 |
1198 |
1199 |
1200 | Parse the signed JSON data using existing JSON tools.
1201 |
1202 |
1203 | Read and save the signature value from the designated signature property.
1204 |
1205 |
1206 | Remove the signature property from the parsed JSON object.
1207 |
1208 |
1209 | Serialize the remaining JSON data using existing JSON tools.
1210 |
1211 |
1212 | Let the external canonicalizer process the serialized data and return canonicalized result data.
1213 |
1214 |
1215 | Verify that the canonicalized data matches the saved signature value
1216 | using the algorithm and key used for creating the signature.
1217 |
1218 |
1219 |
1220 | A canonicalizer like above is effectively only a "filter", potentially usable with
1221 | a multitude of quite different cryptographic schemes.
1222 |
1223 |
1224 | Using a JSON serializer with integrated JCS support, the serialization performed
1225 | before the canonicalization step could be eliminated for both processes.
1226 |
1227 |
1228 |
1229 | Open Source Implementations
1230 |
1231 | The following Open Source implementations have been verified to be
1232 | compatible with JCS:
1233 |
1234 |
1235 |
1236 | JavaScript:
1237 |
1238 |
1239 | Java:
1240 |
1241 |
1242 | Go:
1243 |
1244 |
1245 | .NET/C#:
1246 |
1247 |
1248 | Python:
1249 |
1250 |
1251 |
1252 |
1253 | Other JSON Canonicalization Efforts
1254 |
1255 | There are (and have been) other efforts creating "Canonical JSON".
1256 | Below is a list of URLs to some of them:
1257 |
1258 |
1259 |
1260 |
1261 |
1262 |
1263 |
1264 |
1265 |
1266 |
1267 |
1268 |
1269 |
1270 | The listed efforts all build
1271 | on text level JSON to JSON transformations. The primary feature
1272 | of text level canonicalization is that it can be made neutral to
1273 | the flavor of JSON used. However, such schemes also
1274 | imply major changes to the JSON parsing process which is a likely
1275 | hurdle for adoption. Albeit at the expense of certain JSON and
1276 | application constraints,
1277 | JCS was designed to be compatible with existing JSON tools.
1278 |
1279 |
1280 |
1281 | Development Portal
1282 |
1283 | The JCS specification is currently developed at:
1284 | .
1285 |
1286 |
1287 | JCS source code and extensive test data is available at:
1288 |
1289 |
1290 |
1291 |
1292 | Document History
1293 |
1294 | [[ This section to be removed by the RFC Editor before publication as
1295 | an RFC ]]
1296 |
1297 | Version 00-06:
1298 |
1299 |
1300 | See IETF diff listings.
1301 |
1302 |
1303 | Version 07:
1304 |
1305 |
1306 | Initial converson to XML RFC version 3.
1307 |
1308 |
1309 | Changed intended status to "Informational".
1310 |
1311 |
1312 | Added UTF-16 test data and explanations.
1313 |
1314 |
1315 | Version 08:
1316 |
1317 |
1318 | Updated Abstract.
1319 |
1320 |
1321 | Added a "Note 2" number serialization sample.
1322 |
1323 |
1324 | Updated Security Considerations.
1325 |
1326 |
1327 | Tried to clear up the JSON input data section.
1328 |
1329 |
1330 | Added a line about Unicode normalization.
1331 |
1332 |
1333 | Added a line about serialiation of structured data.
1334 |
1335 |
1336 | Added a missing fact about "BigInt" (V8 not ES6).
1337 |
1338 |
1339 | Version 09:
1340 |
1341 |
1342 | Updated initial line of Abstract and Introduction.
1343 |
1344 |
1345 | Added note about breaking ECMAScript changes.
1346 |
1347 |
1348 | Minor language nit fixes.
1349 |
1350 |
1351 | Version 10-12:
1352 |
1353 |
1354 | Language tweaks.
1355 |
1356 |
1357 | Version 13:
1358 |
1359 |
1360 | Reorganized .
1361 |
1362 |
1363 | Version 14:
1364 |
1365 |
1366 | Improved introduction + some minor changes in security considerations, aknowlegdgements, and
1367 | unicode normalization.
1368 |
1369 |
1370 | Generalized data representation issues by updating .
1371 |
1372 |
1373 | Version 15:
1374 |
1375 |
1376 | Minor nits, reverted the IEEE-754 table to ASCII.
1377 |
1378 |
1379 | Added a bit more meat to the IEEE-754 table.
1380 |
1381 |
1382 | Changed all <artwork> to: type="ascii-art" and removed name="".
1383 |
1384 |
1385 | Version 16:
1386 |
1387 |
1388 | Updated section 2 according to AD's wish.
1389 |