├── .circleci
└── config.yml
├── .github
└── in-solidarity.yml
├── .gitignore
├── .note.xml
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE.md
├── Makefile
├── README.md
├── draft-ietf-quic-load-balancers.md
├── draft-ietf-quic-retry-offload.md
├── package.json
└── quic_lb_protocol.md
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | build:
4 | docker:
5 | - image: martinthomson/i-d-template:latest
6 | resource_class: small
7 | working_directory: ~/draft
8 |
9 | steps:
10 | - run:
11 | name: "Print Configuration"
12 | command: |
13 | xml2rfc --version
14 | gem list -q kramdown-rfc
15 | echo -n 'mmark '; mmark --version
16 |
17 | - restore_cache:
18 | name: "Restoring cache - Git"
19 | keys:
20 | - v2-cache-git-{{ .Branch }}-{{ .Revision }}
21 | - v2-cache-git-{{ .Branch }}
22 | - v2-cache-git-
23 |
24 | - restore_cache:
25 | name: "Restoring cache - References"
26 | keys:
27 | - v1-cache-references-{{ epoch }}
28 | - v1-cache-references-
29 |
30 | # Workaround for https://discuss.circleci.com/t/22437
31 | - run:
32 | name: Tag Checkout
33 | command: |
34 | if [ -n "$CIRCLE_TAG" ] && [ -d .git ]; then
35 | remote=$(echo "$CIRCLE_REPOSITORY_URL" | \
36 | sed -e 's,/^git.github.com:,https://github.com/,')
37 | git fetch -f "$remote" "refs/tags/$CIRCLE_TAG:refs/tags/$CIRCLE_TAG" || \
38 | (echo 'Removing .git cache for tag build'; rm -rf .git)
39 | fi
40 |
41 | - checkout
42 |
43 | # Build txt and html versions of drafts
44 | - run:
45 | name: "Build Drafts"
46 | command: make
47 |
48 | # Update editor's copy on gh-pages
49 | - run:
50 | name: "Update GitHub Pages"
51 | command: |
52 | if [ "${CIRCLE_TAG#draft-}" == "$CIRCLE_TAG" ]; then
53 | make gh-pages
54 | fi
55 |
56 | # For tagged builds, upload to the datatracker.
57 | - deploy:
58 | name: "Upload to Datatracker"
59 | command: |
60 | if [ "${CIRCLE_TAG#draft-}" != "$CIRCLE_TAG" ]; then
61 | make upload
62 | fi
63 |
64 | # Archive GitHub Issues
65 | - run:
66 | name: "Archive GitHub Issues"
67 | command: "make archive || make archive DISABLE_ARCHIVE_FETCH=true && make gh-archive"
68 |
69 | # Create and store artifacts
70 | - run:
71 | name: "Create Artifacts"
72 | command: "make artifacts CI_ARTIFACTS=/tmp/artifacts"
73 |
74 | - store_artifacts:
75 | path: /tmp/artifacts
76 |
77 | - run:
78 | name: "Prepare for Caching"
79 | command: "git reflog expire --expire=now --all && git gc --prune=now"
80 |
81 | - save_cache:
82 | name: "Saving Cache - Git"
83 | key: v2-cache-git-{{ .Branch }}-{{ .Revision }}
84 | paths:
85 | - ~/draft/.git
86 |
87 | - save_cache:
88 | name: "Saving Cache - Drafts"
89 | key: v1-cache-references-{{ epoch }}
90 | paths:
91 | - ~/.cache/xml2rfc
92 |
93 |
94 | workflows:
95 | version: 2
96 | build:
97 | jobs:
98 | - build:
99 | filters:
100 | tags:
101 | only: /.*?/
102 |
--------------------------------------------------------------------------------
/.github/in-solidarity.yml:
--------------------------------------------------------------------------------
1 | _extends: ietf/terminology
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *~
3 | /*-[0-9][0-9].xml
4 | archive.json
5 | draft-ietf-quic-load-balancers.xml
6 | *.html
7 | issues.json
8 | *.js
9 | lib
10 | old-stream-ciphers.md
11 | *.pdf
12 | pulls.json
13 | *.redxml
14 | .refcache
15 | report.xml
16 | *.swp
17 | .tags
18 | .targets.mk
19 | *.txt
20 | *.upload
21 | node_modules/
22 | package-lock.json
23 | venv/
24 | lib
25 | draft-ietf-quic-load-balancers.xml
26 |
--------------------------------------------------------------------------------
/.note.xml:
--------------------------------------------------------------------------------
1 |
2 | Discussion of this document takes place on the
3 | QUIC Working Group mailing list (quic@ietf.org),
4 | which is archived at https://mailarchive.ietf.org/arch/browse/quic/.
5 | Source for this draft and an issue tracker can be found at
6 | https://github.com/quicwg/load-balancers.
7 |
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 | dist: xenial
3 |
4 | services:
5 | - docker
6 |
7 | env:
8 | DRAFT_DIR: /home/idci/draft
9 |
10 | before_install:
11 | - docker --version
12 | - docker pull martinthomson/i-d-template
13 |
14 | script:
15 | - docker run -d -v "$PWD:/tmp/draft" --tmpfs "$DRAFT_DIR:rw,exec" --name idci
16 | martinthomson/i-d-template sleep 300
17 | - docker exec idci cp -rn /tmp/draft /home/idci
18 | - docker exec -w "$DRAFT_DIR" -e CI=true -e TRAVIS
19 | -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST
20 | idci make CLONE_ARGS='--reference /home/idci/git-reference'
21 | - docker exec idci ls -l /home/idci/draft/lib
22 | - if [ "${TRAVIS_TAG#draft-}" == "${TRAVIS_TAG}" ]; then
23 | docker exec -w "$DRAFT_DIR" -e CI=true -e GH_TOKEN -e TRAVIS
24 | -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST
25 | idci make ghpages;
26 | fi
27 |
28 | deploy:
29 | provider: script
30 | script:
31 | - docker exec -w "$DRAFT_DIR" -e CI=true -e GH_TOKEN -e TRAVIS
32 | -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST
33 | idci make upload
34 | skip_cleanup: true
35 | on:
36 | tags: true
37 |
38 | after_script:
39 | - docker container rm -f idci
40 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | This repository relates to activities in the Internet Engineering Task Force
4 | ([IETF](https://www.ietf.org/)). All material in this repository is considered
5 | Contributions to the IETF Standards Process, as defined in the intellectual
6 | property policies of IETF currently designated as
7 | [BCP 78](https://www.rfc-editor.org/info/bcp78),
8 | [BCP 79](https://www.rfc-editor.org/info/bcp79) and the
9 | [IETF Trust Legal Provisions (TLP) Relating to IETF Documents](http://trustee.ietf.org/trust-legal-provisions.html).
10 |
11 | Any edit, commit, pull request, issue, comment or other change made to this
12 | repository constitutes Contributions to the IETF Standards Process
13 | (https://www.ietf.org/).
14 |
15 | You agree to comply with all applicable IETF policies and procedures, including,
16 | BCP 78, 79, the TLP, and the TLP rules regarding code components (e.g. being
17 | subject to a Simplified BSD License) in Contributions.
18 |
19 |
20 | ## Other Resources
21 |
22 | Discussion of this work occurs on the
23 | [quic working group mailing list](https://mailarchive.ietf.org/arch/browse/quic/)
24 | ([subscribe](https://www.ietf.org/mailman/listinfo/quic)). In addition to
25 | contributions in GitHub, you are encouraged to participate in discussions there.
26 |
27 | **Note**: Some working groups adopt a policy whereby substantive discussion of
28 | technical issues needs to occur on the mailing list.
29 |
30 | You might also like to familiarize yourself with other
31 | [working group documents](https://datatracker.ietf.org/wg/quic/documents/).
32 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # License
2 |
3 | See the
4 | [guidelines for contributions](https://github.com/quicwg/load-balancers/blob/master/CONTRIBUTING.md).
5 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | LIBDIR := lib
2 | include $(LIBDIR)/main.mk
3 |
4 | $(LIBDIR)/main.mk:
5 | ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null))
6 | git submodule sync
7 | git submodule update $(CLONE_ARGS) --init
8 | else
9 | git clone -q --depth 10 $(CLONE_ARGS) \
10 | -b main https://github.com/martinthomson/i-d-template $(LIBDIR)
11 | endif
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # QUIC-LB: Generating Routable QUIC Connection IDs
2 |
3 | This is the working area for the IETF [QUIC Working Group](https://datatracker.ietf.org/wg/quic/documents/) Internet-Draft, "QUIC-LB: Generating Routable QUIC Connection IDs".
4 |
5 | * [Editor's Copy](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-load-balancers.html)
6 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-quic-load-balancers)
7 | * [Compare Editor's Copy to Working Group Draft](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-load-balancers.diff)
8 |
9 | It also the home for "QUIC Retry Offload".
10 |
11 | * [Editor's Copy](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-retry-offload.html)
12 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-quic-retry-offload)
13 | * [Compare Editor's Copy to Working Group Draft](https://quicwg.github.io/load-balancers/#go.draft-duke-quic-retry-offload.diff)
14 |
15 | ## Building the Draft
16 |
17 | Formatted text and HTML versions of the draft can be built using `make`.
18 |
19 | ```sh
20 | $ make
21 | ```
22 |
23 | This requires that you have the necessary software installed. See
24 | [the instructions](https://github.com/martinthomson/i-d-template/blob/master/doc/SETUP.md).
25 |
26 |
27 | ## Contributing
28 |
29 | See the
30 | [guidelines for contributions](https://github.com/quicwg/load-balancers/blob/master/CONTRIBUTING.md).
31 |
--------------------------------------------------------------------------------
/draft-ietf-quic-load-balancers.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "QUIC-LB: Generating Routable QUIC Connection IDs"
3 | abbrev: QUIC-LB
4 | docname: draft-ietf-quic-load-balancers-latest
5 | date: {DATE}
6 | category: std
7 | ipr: trust200902
8 | area: Transport
9 | workgroup: QUIC
10 |
11 | stand_alone: yes
12 | pi: [toc, sortrefs, symrefs, docmapping]
13 |
14 | author:
15 | -
16 | ins: M. Duke
17 | name: Martin Duke
18 | org: Google
19 | email: martin.h.duke@gmail.com
20 |
21 | -
22 | ins: N. Banks
23 | name: Nick Banks
24 | org: Microsoft
25 | email: nibanks@microsoft.com
26 |
27 | -
28 | ins: C. Huitema
29 | name: Christian Huitema
30 | org: Private Octopus Inc.
31 | email: huitema@huitema.net
32 |
33 | normative:
34 | NIST-AES-ECB:
35 | title: "Recommendation for Block Cipher Modes of Operation: Methods and Techniques"
36 | author:
37 | - ins: M. Dworkin
38 | date: 2021
39 | refcontent:
40 | - "NIST Special Publication 800-38A"
41 | target: "https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf"
42 |
43 | informative:
44 | Patarin2008:
45 | target: https://eprint.iacr.org/2008/036.pdf
46 | title: Generic Attacks on Feistel Schemes - Extended Version
47 | author:
48 | ins: J. Patarin
49 | name: Jacques Patarin
50 | org: PRiSM, University of Versailles
51 | date: 2008
52 |
53 | --- abstract
54 |
55 | QUIC address migration allows clients to change their IP address while
56 | maintaining connection state. To reduce the ability of an observer to link two
57 | IP addresses, clients and servers use new connection IDs when they communicate
58 | via different client addresses. This poses a problem for traditional "layer-4"
59 | load balancers that route packets via the IP address and port 4-tuple. This
60 | specification provides a standardized means of securely encoding routing
61 | information in the server's connection IDs so that a properly configured load
62 | balancer can route packets with migrated addresses correctly. As it proposes a
63 | structured connection ID format, it also provides a means of connection IDs
64 | self-encoding their length to aid some hardware offloads.
65 |
66 | --- middle
67 |
68 | # Introduction
69 |
70 | QUIC packets {{!RFC9000}} usually contain a connection ID to allow endpoints to
71 | associate packets with different address/port 4-tuples to the same connection
72 | context. This feature makes connections robust in the event of NAT rebinding.
73 | QUIC endpoints usually designate the connection ID which peers use to address
74 | packets. Server-generated connection IDs create a potential need for out-of-band
75 | communication to support QUIC.
76 |
77 | QUIC allows servers (or load balancers) to encode useful routing information for
78 | load balancers in connection IDs. It also encourages servers, in packets
79 | protected by cryptography, to provide additional connection IDs to the client.
80 | This allows clients that know they are going to change IP address or port to use
81 | a separate connection ID on the new path, thus reducing linkability as clients
82 | move through the world.
83 |
84 | There is a tension between the requirements to provide routing information and
85 | mitigate linkability. Ultimately, because new connection IDs are in protected
86 | packets, they must be generated at the server if the load balancer does not have
87 | access to the connection keys. However, it is the load balancer that has the
88 | context necessary to generate a connection ID that encodes useful routing
89 | information. In the absence of any shared state between load balancer and
90 | server, the load balancer must maintain a relatively expensive table of
91 | server-generated connection IDs, and will not route packets correctly if they
92 | use a connection ID that was originally communicated in a protected
93 | NEW_CONNECTION_ID frame.
94 |
95 | This specification provides common algorithms for encoding the server mapping in
96 | a connection ID given some shared parameters. The mapping is generally only
97 | discoverable by observers that have the parameters, preserving unlinkability as
98 | much as possible.
99 |
100 | As this document proposes a structured QUIC Connection ID, it also proposes a
101 | system for self-encoding connection ID length in all packets, so that crypto
102 | offload can efficiently obtain key information.
103 |
104 | While this document describes a small set of configuration parameters to make
105 | the server mapping intelligible, the means of distributing these parameters
106 | between load balancers, servers, and other trusted intermediaries is out of its
107 | scope. There are numerous well-known infrastructures for distribution of
108 | configuration.
109 |
110 | ## Terminology
111 |
112 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
113 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
114 | interpreted as described in RFC 2119 {{?RFC2119}}.
115 |
116 | In this document, these words will appear with that interpretation only when in
117 | ALL CAPS. Lower case uses of these words are not to be interpreted as carrying
118 | significance described in RFC 2119.
119 |
120 | In this document, "client" and "server" refer to the endpoints of a QUIC
121 | connection unless otherwise indicated. A "load balancer" is an intermediary for
122 | that connection that does not possess QUIC connection keys, but it may rewrite
123 | IP addresses or conduct other IP or UDP processing. A "configuration agent" is
124 | the entity that determines the QUIC-LB configuration parameters for the network
125 | and leverages some system to distribute that configuration.
126 |
127 | Note that stateful load balancers that act as proxies, by terminating a QUIC
128 | connection with the client and then retrieving data from the server using QUIC
129 | or another protocol, are treated as a server with respect to this specification.
130 |
131 | For brevity, "Connection ID" will often be abbreviated as "CID".
132 |
133 | ## Notation
134 |
135 | All wire formats will be depicted using the notation defined in Section 1.3 of
136 | {{RFC9000}}.
137 |
138 | # Overview
139 |
140 | In QUIC-LB, load balancers do not generate individual connection IDs for
141 | servers. Instead, they communicate the parameters of an algorithm to generate
142 | routable connection IDs.
143 |
144 | The algorithms differ in the complexity of configuration at both load balancer
145 | and server. Increasing complexity improves obfuscation of the server mapping.
146 |
147 | This specificationn describes three participants: the configuration agent, the
148 | load balancer, and the server. For any given QUIC-LB configuration that enables
149 | connection-ID-aware load balancing, there must be a choice of (1) routing
150 | algorithm, (2) server ID allocation strategy, and (3) algorithm parameters.
151 |
152 | Fundamentally, servers generate connection IDs that encode their server ID.
153 | Load balancers decode the server ID from the CID in incoming packets to route
154 | to the correct server.
155 |
156 | {{!RFC8999}} specifies that endpoints generate their own connection IDs,
157 | implying that all QUIC versions will have a mechanism to communicate their
158 | connection IDs to the peer. In QUIC version 1 and 2, the server does so using
159 | the Source Connection ID field of its long header packets for the first
160 | connection ID, and NEW_CONNECTION_ID frames for subsequent CIDs.
161 |
162 | There are situations where a server pool might be operating two or more routing
163 | algorithms or parameter sets simultaneously. The load balancer uses the first
164 | three bits of the connection ID to multiplex incoming Destination Connection IDs
165 | (DCIDs) over these schemes (see {{config-rotation}}).
166 |
167 | # First CID octet {#first-octet}
168 |
169 | The Connection ID construction schemes defined in this document reserve the
170 | first octet of a CID for two special purposes: one mandatory (config rotation)
171 | and one optional (length self-description).
172 |
173 | Subsequent sections of this document refer to the contents of this octet as the
174 | "first octet."
175 |
176 | ## Config Rotation {#config-rotation}
177 |
178 | The first three bits of any connection ID MUST encode an identifier for the
179 | configuration that the connection ID uses. This enables incremental deployment
180 | of new QUIC-LB settings (e.g., keys). A configuration MUST NOT use the
181 | reserved identifier 0b111 (see {{config-failover}} below).
182 |
183 | When new configuration is distributed to servers, there will be a transition
184 | period when connection IDs reflecting old and new configuration coexist in the
185 | network. The rotation bits allow load balancers to apply the correct routing
186 | algorithm and parameters to incoming packets.
187 |
188 | Configuration Agents SHOULD deliver new configurations to load balancers before
189 | doing so to servers, so that load balancers are ready to process CIDs using the
190 | new parameters when they arrive.
191 |
192 | A Configuration Agent SHOULD NOT use a codepoint to represent a new
193 | configuration until it takes precautions to make sure that all connections using
194 | CIDs with an old configuration at that codepoint have closed or transitioned.
195 |
196 | Servers MUST NOT generate new connection IDs using an old configuration after
197 | receiving a new one from the configuration agent. Servers MUST use that QUIC
198 | version's methods to update the client with CIDs (e.g., NEW_CONNECTION_ID
199 | frames) using the new configuration and retire CIDs using the old configuration.
200 |
201 | It also possible to use these bits for more long-lived distinction of different
202 | configurations, but this has privacy implications (see {{multiple-configs}}).
203 |
204 | ## Configuration Failover {#config-failover}
205 |
206 | In some deployments, an infrastructure will not receive traffic unless all
207 | servers have received a configuration, and load balancers have a superset of all
208 | configurations that are active in the server pool, thus guaranteeing that any
209 | CID generated by a server is decodable by any load balancer. Servers and load
210 | balancers deployed under all of these assumptions can ignore the provisions in
211 | this subsection.
212 |
213 | Load balancers treat connection IDs for which they have no corresponding config
214 | ID as unroutable (see {{unroutable}}). If they have no configuration at all,
215 | then all connection IDs are unroutable.
216 |
217 | Servers with no active configuration MUST issue connection IDs with the reserved
218 | value of the three most significant bits set to 0b111 to signify the connection
219 | ID is unroutable. These connection IDs MUST self-encode their length (see
220 | {{length-self-description}}).
221 |
222 | Servers with no active configuration SHOULD provide the client exactly one CID
223 | over the life of the connection. In QUIC versions 1 and 2, therefore, servers
224 | SHOULD NOT send any NEW_CONNECTION_ID frames, instead delivering a single CID
225 | via the Source Connection ID of long headers it sends.
226 |
227 | Servers with no active configuration SHOULD send the "disable_active_migration"
228 | transport parameter, or a similar message in future QUIC versions.
229 |
230 | When using codepoint 0b111, all bytes but the first SHOULD have no larger of a
231 | chance of collision as random bytes. The connection ID SHOULD be of at least
232 | length 8 to provide 7 bytes of entropy after the first octet with a low chance
233 | of collision.
234 |
235 | ## Length Self-Description {#length-self-description}
236 |
237 | Local hardware cryptographic offload devices may accelerate QUIC servers by
238 | receiving keys from the QUIC implementation indexed to the connection ID.
239 | However, on physical devices operating multiple QUIC servers, it might be
240 | impractical to efficiently lookup keys if the connection ID varies in length and
241 | does not self-encode its own length.
242 |
243 | Note that this is a function of particular server devices and is irrelevant to
244 | load balancers. As such, load balancers MAY omit this from their configuration.
245 | However, the remaining 5 bits in the first octet of the Connection ID are
246 | reserved to express the length of the following connection ID, not including
247 | the first octet.
248 |
249 | A server not using this functionality SHOULD choose the five bits so as to have
250 | no observable relationship to previous connection IDs issued for that
251 | connection.
252 |
253 | ## Format
254 |
255 | ~~~
256 | First Octet {
257 | Config Rotation (3),
258 | CID Len or Random Bits (5),
259 | }
260 | ~~~
261 | {: #first-octet-format title="First Octet Format"}
262 |
263 | The first octet has the following fields:
264 |
265 | Config Rotation: Indicates the configuration used to interpret the CID.
266 |
267 | CID Len or Random Bits: Length Self-Description (if applicable), or random bits
268 | otherwise. Encodes the length of the Connection ID following the First Octet.
269 |
270 | # Unroutable Connection IDs {#unroutable}
271 |
272 | ## Definition
273 |
274 | QUIC-LB servers with a valid configuration will generate Connection IDs that are
275 | decodable to extract a server ID in accordance with a specified algorithm and
276 | parameters. However, QUIC often uses client-generated Connection IDs prior to
277 | receiving a packet from the server.
278 |
279 | Furthermore, servers without a valid configuration, or a configuration not
280 | present at the load balancer, will also generate connection IDs that are not
281 | decodable, and these CIDs are likely to persist for the duration of the
282 | connection.
283 |
284 | These CIDs might not conform to the expectations of the routing algorithm and
285 | therefore not be routable by the load balancer. Those that are not routable are
286 | "unroutable DCIDs" and receive similar treatment regardless of why they're
287 | unroutable:
288 |
289 | * The config rotation bits ({{config-rotation}}) do not correspond to an active
290 | configuration. Note: a packet with a DCID with config ID codepoint 0b111 (see
291 | {{config-failover}}) is always unroutable.
292 | * If the packet header encodes the DCID length, the DCID is not long enough for
293 | the decoder to process.
294 | * The extracted server mapping does not correspond to an active server.
295 |
296 | If the load balancer has knowledge that all servers in the pool are encoding
297 | CID length in the first octet (see {{length-self-description}}), it MAY
298 | perform additional checks based on that self-encoded length:
299 |
300 | * In a long header, verify that the self-encoded length is consistent with the
301 | CID length field in the header (i.e. the self-encoded length is one less)
302 | * Verify that the self-encoded length is consistent with the QUIC version, if
303 | known.
304 | * Verify that the self-encoded length is large enough for the decoder to process
305 | using the indicated config ID.
306 |
307 | DCIDs that do not meet any of these criteria are routable.
308 |
309 | ## Load Balancer Forwarding {#load-balancer-forwarding}
310 |
311 | Load balancers execute the following steps in order until one results in a
312 | routing decision. The steps refer to state that some load balancers will
313 | maintain, depending on the deployment's underlying assumptions. See
314 | {{fallback-algorithm}} for further discussion of this state.
315 |
316 | 1. If the packet contains a routable CID, route the packet accordingly.
317 | 1. If the packet has a long header and matches an entry in a table of routing
318 | decisions indexed by a concatenation of 4-tuple and Source CID, route the packet
319 | accordingly.
320 | 1. If the packet matches an entry in a table of routing decisions by destination
321 | CID, route the packet accordingly.
322 | 1. If packet matches an entry in a table of routing decisions by 4-tuple, route
323 | the packet accordingly.
324 | 1. Use the fallback algorithm to make a routing decision and, if applicable,
325 | record the results in the tables indexed by 4-tuple and/or CID. In some cases,
326 | described below, the load balancer might buffer the packet to defer a decision.
327 |
328 | ## Fallback Algorithms {#fallback-algorithm}
329 |
330 | There are conditions described above where a load balancer routes a packet using
331 | a "fallback algorithm." A standardized algorithm design is not necessary for
332 | interoperability, so load balancers can implement any algorithm that meets the
333 | relevant requirements below.
334 |
335 | There is a baseline case that has relatively simple requirements of the chosen
336 | fallback algorithm, and an advanced case with more capabilities and more complex
337 | requirements.
338 |
339 | ### Baseline Fallback Algorithm
340 |
341 | All load balancers MUST implement a baseline fallback algorithm that takes only
342 | the 4-tuple as an input and outputs a routing decision.
343 |
344 | If it is impossible for the server to generate CIDs that the load balancer
345 | cannot decode (see {{config-failover}}), there are no further requirements in
346 | this subsection.
347 |
348 | Otherwise, the load balancer SHOULD maintain a table of 4-tuples that carried
349 | unroutable DCIDs and the resulting routing decision. Provided the table does
350 | not overflow, and the load balancer does not lose state, this allows connections
351 | to survive when the server pool changes, which would sometimes change the output
352 | of the fallback algorithm.
353 |
354 | The load balancer MAY maintain a table of observed unroutable DCIDs and the
355 | resulting routing decision. Provided the table does not overflow, these
356 | connections will be robust to NAT rebinding.
357 |
358 | Load balancers SHOULD maintain per-flow timers to periodically purge state in
359 | the tables described above.
360 |
361 | ### Advanced Fallback Algorithm
362 |
363 | Some architectures might require a load balancer to choose a server pool based
364 | on deep packet inspection of a client packet. For example, it may use the TLS
365 | 1.3 Server Name Indication (SNI) ({{?RFC6066}}) field. The advanced fallback
366 | algorithm enables this capability but levies several additional requirements to
367 | make consistent routing decisions.
368 |
369 | For packets not known to belong to a QUIC version the load balancer can parse,
370 | load balancers MUST use the baseline fallback algorithm if the DCID is
371 | unroutable.
372 |
373 | For known QUIC versions, the fallback algorithm MAY parse packets and use that
374 | information to make a routing decision.
375 |
376 | If so, it MUST have the ability to buffer packets with unroutable DCIDs to await
377 | further packets that allow it to make a routing decision, as the fields of
378 | interest can be an arbitary number of packets into the connection.
379 |
380 | 4-tuple routing is not sufficient for this use case, because a client can use
381 | the same 4-tuple for two connections that should be routed differently (e.g.
382 | because they target different SNIs), as long as the packet contains a source
383 | connection ID of nonzero length.
384 |
385 | Therefore, the load balancer SHOULD maintain two tables that map different
386 | values to a routing decision:
387 |
388 | - a table indexed by a concatenation of the 4-tuple and source CID, which might
389 | be zero-length, to route subsequent long header packets that do not contain the
390 | server-generated connection ID;
391 |
392 | - a table indexed by destination CID, if and only if it is possible for the
393 | server to generate unroutable CIDs. This table can be shared with the one in use
394 | for the baseline fallback algorithm.
395 |
396 | If either table overflows, or if the load balancer loses state, it is likely the
397 | load balancer will misroute packets.
398 |
399 | Load balancers SHOULD maintain per-flow timers to periodically purge state in
400 | the tables described above.
401 |
402 | # Server ID Encoding in Connection IDs
403 |
404 | ## Server ID Allocation {#sid-allocation}
405 |
406 | Load Balancer configurations include a mapping of server IDs to forwarding
407 | addresses. The corresponding server configurations contain one or
408 | more unique server IDs.
409 |
410 | The configuration agent chooses a server ID length for each configuration that
411 | MUST be at least one octet.
412 |
413 | A QUIC-LB configuration MAY significantly over-provision the server ID space
414 | (i.e., provide far more codepoints than there are servers) to increase the
415 | probability that a randomly generated Destination Connection ID is unroutable.
416 |
417 | The configuration agent SHOULD provide a means for servers to express the
418 | number of server IDs it can usefully employ, because a single routing address
419 | actually corresponds to multiple server entities (see {{lb-chains}}).
420 |
421 | Conceptually, each configuration has its own set of server ID allocations,
422 | though two static configurations with identical server ID lengths MAY use a
423 | common allocation between them.
424 |
425 | A server encodes one of its assigned server IDs in any CID it generates using
426 | the relevant configuration.
427 |
428 | ## CID format
429 |
430 | All connection IDs use the following format:
431 |
432 | ~~~
433 | QUIC-LB Connection ID {
434 | First Octet (8),
435 | Plaintext Block (40..152),
436 | }
437 | Plaintext Block {
438 | Server ID (8..),
439 | Nonce (32..),
440 | }
441 | ~~~
442 | {: #plaintext-cid-format title="CID Format"}
443 |
444 | The First Octet field serves one or two purposes, as defined in {{first-octet}}.
445 |
446 | The Server ID field encodes the information necessary for the load balancer to
447 | route a packet with that connection ID. It is often encrypted.
448 |
449 | The server uses the Nonce field to make sure that each connection ID it
450 | generates is unique, even though they all use the same Server ID.
451 |
452 | ## Configuration Agent Actions
453 |
454 | The configuration agent assigns a server ID to every server in its pool in
455 | accordance with {{sid-allocation}}, and determines a server ID length (in
456 | octets) sufficiently large to encode all server IDs, including potential future
457 | servers.
458 |
459 | Each configuration specifies the length of the Server ID and Nonce fields, with
460 | limits defined for each algorithm.
461 |
462 | Optionally, it also defines a 16-octet key. Note that failure to define a key
463 | means that observers can determine the assigned server of any connection,
464 | significantly increasing the linkability of QUIC address migration.
465 |
466 | The nonce length MUST be at least 4 octets. The server ID length MUST be at
467 | least 1 octet.
468 |
469 | As QUIC version 1 limits connection IDs to 20 octets, the server ID and nonce
470 | lengths MUST sum to 19 octets or less.
471 |
472 | ## Server Actions
473 |
474 | The server writes the first octet and its server ID into their respective
475 | fields.
476 |
477 | If there is no key in the configuration, the server MUST fill the Nonce field
478 | with bytes that have no observable relationship to the field in previously
479 | issued connection IDs. If there is a key, the server fills the nonce field with
480 | a nonce of its choosing. See {{cid-entropy}} for details.
481 |
482 | The server MAY append additional bytes to the connection ID, up to the limit
483 | specified in that version of QUIC, for its own use. These bytes MUST NOT
484 | provide observers with any information that could link two connection IDs to
485 | the same connection, client, or server. In particular, all servers using a
486 | configuration MUST consistently add the same length to each connection ID,
487 | to preserve the linkability objectives of QUIC-LB. Any additional bytes SHOULD
488 | NOT provide any observable correlation to previous connection IDs for that
489 | connection (e.g., the bytes can be chosen at random).
490 |
491 | If there is no key in the configuration, the Connection ID is complete.
492 | Otherwise, there are further steps, as described in the two following
493 | subsections.
494 |
495 | Encryption below uses the AES-128-ECB cipher {{NIST-AES-ECB}}. Future standards
496 | could add new algorithms that use other ciphers to provide cryptographic agility
497 | in accordance with {{?RFC7696}}. QUIC-LB implementations SHOULD be extensible to
498 | support new algorithms.
499 |
500 | ### Special Case: Single Pass Encryption
501 |
502 | When the nonce length and server ID length sum to exactly 16 octets, the server
503 | MUST use a single-pass encryption algorithm. All connection ID octets except the
504 | first form an AES-ECB block. This block is encrypted once, and the result forms
505 | the second through seventeenth most significant bytes of the connection ID.
506 |
507 | ### General Case: Four-Pass Encryption
508 |
509 | Any other field length requires four passes for encryption and at least three
510 | for decryption. To understand this algorithm, it is useful to define four
511 | functions that minimize the amount of bit-shifting necessary in the event that
512 | there are an odd number of octets.
513 |
514 | When configured with both a key, and a nonce length and server ID length that
515 | sum to any number other than 16, the server MUST follow the algorith below to
516 | encrypt the connection ID.
517 |
518 | #### Overview
519 |
520 | The 4-pass algorithm is a four-round Feistel Network with the round function
521 | being AES-ECB. Most modern applications of Feistel Networks have more than four
522 | rounds. The implications of this choice, which is meant to limit the per-packet
523 | compute overhead at load balancers, are discussed in
524 | {{distinguishing-attacks}}.
525 |
526 | The server concatenates the server ID and nonce into a single field, which is
527 | then split into equal halves. In successive passes, one of these halves is
528 | expanded into a 16B plaintext, encrypted with AES-ECB, and the result XORed with
529 | the other half. The diagram below shows the conceptual processing of a plaintext
530 | server ID and nonce into a connection ID. 'FO' stands for 'First Octet'.
531 |
532 | ~~~ aasvg
533 | +-----+-----------+-----------------------+
534 | | FO | Server ID | Nonce |
535 | +--+--+-----------+-----+-----------------+
536 | | |
537 | | V
538 | | +-----------------+-----------------+
539 | | | left_0 | right_0 |
540 | | +--+--------------+--------------+--+
541 | | | |
542 | | | |
543 | | | .--------. V
544 | | +-------->| AES-ECB +-------->⊕
545 | | | '--------' |
546 | | V .--------. | right_1
547 | | ⊕<-----------+ AES-ECB |<-----+
548 | | | '--------' |
549 | | | left_1 .--------. V
550 | | +-------->| AES-ECB +-------->⊕
551 | | | '--------' |
552 | | V .--------. |
553 | | ⊕<-----------+ AES-ECB |<-----+
554 | | | '--------' |
555 | | | |
556 | | V V
557 | | +-----------------+-----------------+
558 | | | left_2 | right_2 |
559 | | +-------+---------+--------+--------+
560 | | | |
561 | V V V
562 | +-----+-----------------------------------+
563 | | FO | Ciphertext |
564 | +-----+-----------------------------------+
565 | ~~~
566 |
567 | #### Useful functions
568 |
569 | Two functions are useful to define:
570 |
571 | The expand(length, pass, input_bytes) function concatenates three arguments and
572 | outputs 16 zero-padded octets.
573 |
574 | The output of expand is as follows:
575 |
576 | ~~~pseudocode
577 | ExpandResult {
578 | input_bytes(...),
579 | ZeroPad(...),
580 | length(8),
581 | pass(8)
582 | }
583 | ~~~
584 |
585 | in which:
586 |
587 | * 'input_bytes' is drawn from one half of the plaintext. It forms the N most
588 | significant octets of the output, where N is half the 'length' argument, rounded
589 | up, and thus a number between 3 and 10, inclusive.
590 |
591 | * 'Zeropad' is a set of 14-N octets set to zero.
592 |
593 | * 'length' is an 8-bit integer that reports the sum of the configured nonce
594 | length and server id length in octets, and forms the fifteenth octet of the
595 | output. The 'length' argument MUST NOT exceed 19 and MUST NOT be less than 5.
596 |
597 | * 'pass' is an 8-bit integer that reports the 'pass' argument of the algorithm,
598 | and forms the sixteenth (least significant) octet of the output. It guarantees
599 | that the cryptographic input of every pass of the algorithm is unique.
600 |
601 | For example,
602 |
603 | ~~~pseudocode
604 | expand(0x06, 0x02, 0xaaba3c) = 0xaaba3c00000000000000000000000602
605 | ~~~
606 |
607 | Similarly, truncate(input, n) returns the first n octets of 'input'.
608 |
609 | ~~~pseudocode
610 | truncate(0x2094842ca49256198c2deaa0ba53caa0, 4) = 0x2094842c
611 | ~~~
612 |
613 | Let 'half_len' be equal to 'plaintext_len' / 2, rounded up.
614 |
615 | #### Algorithm Description
616 |
617 | The example at the end of this section helps to clarify the steps described
618 | below.
619 |
620 | 1. The server concatenates the server ID and nonce to create plaintext_CID. The
621 | length of the result in octets is plaintext_len.
622 |
623 | 2. The server splits plaintext_CID into components left_0 and right_0 of equal
624 | length half_len. If plaintext_len is odd, right_0 clears its first four bits,
625 | and left_0 clears its last four bits. For example, 0x7040b81b55ccf3 would split
626 | into a left_0 of 0x7040b810 and right_0 of 0x0b55ccf3.
627 |
628 | 3. Encrypt the result of expand(plaintext_len, 1, left_0) using an AES-ECB-128
629 | cipher to obtain a ciphertext.
630 |
631 | 4. XOR the first half_len octets of the ciphertext with right_0 to form right_1.
632 | Steps 3 and 4 can be summarized as
633 |
634 | ~~~psuedocode
635 | result = AES_ECB(key, expand(plaintext_len, 1, left_0))
636 | right_1 = XOR(right_0, truncate(result, half_len))
637 | ~~~
638 |
639 | {:start="5"}
640 | 5. If the plaintext_len is odd, clear the first four bits of right_1.
641 |
642 | 6. Repeat steps 3 and 4, but use them to compute left_1 by expanding and
643 | encrypting right_1 with pass = 2, and XOR the results with left_0.
644 |
645 | ~~~psuedocode
646 | result = AES_ECB(key, expand(plaintext_len, 2, right_1))
647 | left_1 = XOR(left_0, truncate(result, half_len))
648 | ~~~
649 |
650 | {:start="7"}
651 | 7. If the plaintext_len is odd, clear the last four bits of left_1.
652 |
653 | 8. Repeat steps 3 and 4, but use them to compute right_2 by expanding and
654 | encrypting left_1 with pass = 3, and XOR the results with right_1.
655 |
656 | ~~~pseudocode
657 | result = AES_ECB(key, expand(plaintext_len, 3, left_1))
658 | right_2 = XOR(right_1, truncate(result, half_len))
659 | ~~~
660 |
661 | {:start="9"}
662 | 9. If the plaintext_len is odd, clear the first four bits of right_2.
663 |
664 | 10. Repeat steps 3 and 4, but use them to compute left_2 by expanding and
665 | encrypting right_2 with pass = 4, and XOR the results with left_1.
666 |
667 | ~~~psuedocode
668 | result = AES_ECB(key, expand(plaintext_len, 4, right_2))
669 | left_2 = XOR(left_1, truncate(result, half_len))
670 | ~~~
671 |
672 | {:start="11"}
673 | 11. If the plaintext_len is odd, clear the last four bits of left_2.
674 |
675 | 12. The server concatenates left_2 with right_2 to form the ciphertext CID,
676 | which it appends to the first octet. If plaintext_len is odd, the four
677 | least significant bits of left_2 and four most significant bits of right_2,
678 | which are all zero, are stripped off before concatenation to make the
679 | resulting ciphertext the same length as the original plaintext.
680 |
681 | #### Encryption Example
682 |
683 | The following example executes the steps for the provided inputs. Note that the
684 | plaintext is of odd octet length, so the middle octet will be split evenly
685 | left_0 and right_0.
686 |
687 | ~~~pseudocode
688 | server_id = 0x31441a
689 | nonce = 0x9c69c275
690 | key = 0xfdf726a9893ec05c0632d3956680baf0
691 |
692 | // step 1
693 | plaintext_CID = 0x31441a9c69c275
694 | plaintext_len = 7
695 |
696 | // step 2
697 | hash_len = 4
698 | left_0 = 0x31441a90
699 | right_0 = 0x0c69c275
700 |
701 | // step 3
702 | aes_input = 0x31441a90000000000000000000000701
703 | aes_output = 0xa255dd8cdacf01948d3a848c3c7fee23
704 |
705 | // step 4
706 | right_1 = 0x0c69c275 ^ 0xa255dd8c = 0xae3c1ff9
707 |
708 | // step 5 (clear bits)
709 | right_1 = 0x0e3c1ff9
710 |
711 | // step 6
712 | aes_input = 0x0e3c1ff9000000000000000000000702
713 | aes_output = 0xe5e452cb9e1bedb0b2bf830506bf4c4e
714 | left_1 = 0x31441a90 ^ 0xe5e452cb = 0xd4a0485b
715 |
716 | // step 7 (clear bits)
717 | left_1 = 0xd4a04850
718 |
719 | // step 8
720 | aes_input = 0xd4a04850000000000000000000000703
721 | aes_output = 0xb7821ab3024fed0913b6a04d18e3216f
722 | right_2 = 0x0e3c1ff9 ^ 0xb7821ab3 = 0xb9be054a
723 |
724 | // step 9 (clear bits)
725 | right_2 = 0x09be054a
726 |
727 | // step 10
728 | aes_input = 0x09be054a000000000000000000000704
729 | aes_output = 0xb334357cfdf81e3fafe180154eaf7378
730 | left_2 = 0xd4a04850 ^ 0xb3e4357c = 0x67947d2c
731 |
732 | // step 11 (clear bits)
733 | left_2 = 0x67947d20
734 |
735 | // step 12
736 | cid = first_octet || left_2 || right_2 = 0x0767947d29be054a
737 | ~~~
738 |
739 | ## Load Balancer Actions
740 |
741 | On each incoming packet, the load balancer extracts consecutive octets,
742 | beginning with the second octet. If there is no key, the first octets
743 | correspond to the server ID.
744 |
745 | If there is a key, the load balancer takes one of two actions:
746 |
747 | ### Special Case: Single Pass Encryption
748 |
749 | If server ID length and nonce length sum to exactly 16 octets, they form a
750 | ciphertext block. The load balancer decrypts the block using the AES-ECB key
751 | and extracts the server ID from the most significant bytes of the resulting
752 | plaintext.
753 |
754 | ### General Case: Four-Pass Encryption
755 |
756 | First, split the ciphertext CID (excluding the first octet) into its equal-
757 | length components left_2 and right_2. Then follow the process below:
758 |
759 | ~~~pseudocode
760 | result = AES_ECB(key, expand(plaintext_len, 4, right_2))
761 | left_1 = XOR(left_2, truncate(result, half_len))
762 | if (plaintext_len_is_odd()) clear_last_bits(left_1, 4)
763 |
764 | result = AES_ECB(key, expand(plaintext_len, 3, left_1))
765 | right_1 = XOR(right_2, truncate(result, half_len))
766 | if (plaintext_len_is_odd()) clear_first_bits(left_1, 4)
767 |
768 | result = AES_ECB(key, expand(plaintext_len, 2, right_1))
769 | left_0 = XOR(left_1, truncate(result, half_len))
770 | if (plaintext_len_is_odd()) clear_last_bits(left_0, 4)
771 | ~~~
772 |
773 | As the load balancer has no need for the nonce, it can conclude after 3 passes
774 | as long as the server ID is entirely contained in left_0 (i.e., the nonce is at
775 | least as large as the server ID). If the server ID is longer, a fourth pass
776 | is necessary:
777 |
778 | ~~~pseudocode
779 | result = AES_ECB(key, expand(plaintext_len, 1, left_0))
780 | right_0 = XOR(right_1, truncate(result, half_len))
781 | if (plaintext_len_is_odd()) clear_first_bits(right_0, 4)
782 | ~~~
783 |
784 | and the load balancer has to concatenate left_0 and right_0 to obtain the
785 | complete server ID.
786 |
787 | # Per-connection state {#per-connection-state}
788 |
789 | The CID allocation methods QUIC-LB defines no per-connection state at
790 | the load balancer, with a few conditional exceptions described in
791 | {{unroutable}}. Otherwise, the load balancer can extract the server ID from
792 | the connection ID of each incoming packet and route that packet accordingly.
793 |
794 | However, once a routing decision has been made, the load balancer MAY
795 | associate the 4-tuple or connection ID with the decision. This has two
796 | advantages:
797 |
798 | * The load balancer only extracts the server ID once until the 4-tuple or
799 | connection ID changes. When the CID is encrypted, this might reduce
800 | computational load.
801 |
802 | * Incoming Stateless Reset packets and ICMP messages are easily routed to the
803 | correct origin server.
804 |
805 | In addition to the increased state requirements, however, load balancers cannot
806 | detect the packets that indicate the end of the connection, so they rely on a
807 | timeout to delete connection state. There are numerous considerations around
808 | setting such a timeout.
809 |
810 | In the event a connection ends, freeing an IP and port, and a different
811 | connection migrates to that IP and port before the timeout, the load balancer
812 | will misroute the different connection's packets to the original server. A short
813 | timeout limits the likelihood of such a misrouting.
814 |
815 | Furthermore, if a short timeout causes premature deletion of state, the routing
816 | is easily recoverable by decoding an incoming Connection ID. However, a short
817 | timeout also reduces the chance that an incoming Stateless Reset is correctly
818 | routed.
819 |
820 | Note that some heuristics to purge state early can introduce Denial of Service
821 | vulnerabilities. For example, one heuristic might delete flow state once the
822 | load balancer observes a routable CID on that flow. An attacker that can observe
823 | a target flow can store a routable CID from a previous connection and spoof the
824 | target flow's 4-tuple with the routable CID, causing premature deletion of that
825 | state.
826 |
827 | Servers MAY implement the technique described in {{Section 14.4.1 of RFC9000}}
828 | in case the load balancer is stateless, to increase the likelihood a Source
829 | Connection ID is included in ICMP responses to Path Maximum Transmission Unit
830 | (PMTU) probes. Load balancers MAY parse the echoed packet to extract the Source
831 | Connection ID, if it contains a QUIC long header, and extract the Server ID as
832 | if it were in a Destination CID.
833 |
834 | # Additional Use Cases
835 |
836 | This section discusses considerations for some deployment scenarios not implied
837 | by the specification above.
838 |
839 | ## Load balancer chains {#lb-chains}
840 |
841 | Some network architectures may have multiple tiers of low-state load balancers,
842 | where a first tier of devices makes a routing decision to the next tier, and so
843 | on, until packets reach the server. Although QUIC-LB is not explicitly designed
844 | for this use case, it is possible to support it.
845 |
846 | If each load balancer is assigned a range of server IDs that is a subset of the
847 | range of IDs assigned to devices that are closer to the client, then the first
848 | devices to process an incoming packet can extract the server ID and then map it
849 | to the correct forwarding address. Note that this solution is extensible to
850 | arbitrarily large numbers of load-balancing tiers, as the maximum server ID
851 | space is quite large.
852 |
853 | If the number of necessary server IDs per next hop is uniform, a simple
854 | implementation would use successively longer server IDs at each tier of load
855 | balancing, and the server configuration would match the last tier. Load
856 | balancers closer to the client can then treat any parts of the server ID they
857 | did not use as part of the nonce.
858 |
859 | ## Server Process Demultiplexing
860 |
861 | QUIC servers might have QUIC running on multiple processes or threads listening
862 | on the same address, and have a need to demultiplex between them. In principle,
863 | this demultiplexer is a Layer 4 load balancer, and the guidance in {{lb-chains}}
864 | applies. However, in many deployments the demultiplexer lacks the capability to
865 | perform decryption operations. Internal server coordination is out of scope of
866 | this specification, but this non-normative section proposes some approaches
867 | that could work given certain server capabilities:
868 |
869 | * Some bytes of the server ID are reserved to encode the process ID. The
870 | demultiplexer might operate based on the 4-tuple or other legacy indicator, but
871 | the receiving server process extracts the server ID, and if it does not match
872 | the one for that process, the process could "toss" the packet to the correct
873 | destination process.
874 |
875 | * Each process could register the connection IDs it generates with the
876 | demultiplexer, which routes those connection IDs accordingly.
877 |
878 | * In a combination of the two approaches above, the demultiplexer generally
879 | routes by 4-tuple. After a migration, the process tosses the first flight of
880 | packets and registers the new connection ID with the demultiplexer. This
881 | alternative limits the bandwidth consumption of tossing and the memory footprint
882 | of a full connection ID table.
883 |
884 | * When generating a connection ID, the server writes the process ID to the
885 | random field of the first octet, or if this is being used for length encoding,
886 | in an octet it appends after the ciphertext. It then applies a keyed hash (with
887 | a key locally generated for the sole use of that server). The hash result is
888 | used as a bitmask to XOR with the bits encoding the process ID. On packet
889 | receipt, the demultiplexer applies the same keyed hash to generate the same
890 | mask and recoversthe process ID. (Note that this approach is conceptually
891 | similar to QUIC header protection). It is important that the server also appends
892 | the process ID to the server ID in the plaintext, so that different processes do
893 | not generate the same ciphertext. The load balancer will consider this data to
894 | be part of the nonce.
895 |
896 | ## Moving connections between servers
897 |
898 | Some deployments may transparently move a connection from one server to another.
899 | The means of transferring connection state between servers is out of scope of
900 | this document.
901 |
902 | To support a handover, a server involved in the transition could issue CIDs that
903 | map to the new server via a NEW_CONNECTION_ID frame, and retire CIDs associated
904 | with the old server using the "Retire Prior To" field in that frame.
905 |
906 | # Version Invariance of QUIC-LB {#version-invariance}
907 |
908 | The server ID encodings, and requirements for their handling, are designed to be
909 | QUIC version independent (see {{?RFC8999}}). A QUIC-LB load balancer will
910 | generally not require changes as servers deploy new versions of QUIC. However,
911 | there are several unlikely future design decisions that could impact the
912 | operation of QUIC-LB.
913 |
914 | A QUIC version might define limits on connection ID length that make some or all
915 | of the mechanisms in this document unusable. For example, a maximum connection
916 | ID length could be below the minimum necessary to use all or part of this
917 | specification; or, the minimum connection ID length could be larger than the
918 | largest value in this specification. Similarly, the length self-encoding
919 | specification cannot accommodate connection IDs longer than 32 bytes.
920 |
921 | The advanced fallback implementation supports a requirement to inspect version-
922 | specific elements of packets to make a routing decision, such as the Server Name
923 | Indication (SNI) extension in the TLS Client Hello. The format and
924 | cryptographic protection of this information may change in future versions or
925 | extensions of TLS or QUIC, and therefore this functionality is inherently
926 | version-dependent. Such a load balancer, when it receives packets from an
927 | unknown QUIC version, might misdirect initial packets to the wrong tenant. While
928 | this can be inefficient, the design in this document preserves the ability for
929 | tenants to deploy new versions provided they have an out-of-band means of
930 | providing a connection ID for the client to use.
931 |
932 | {{load-balancer-forwarding}} provides guidance about how load balancers should
933 | handle unroutable DCIDs. This guidance, and the implementation of an algorithm
934 | to handle these DCIDs, rests on some assumptions about packets that contain
935 | client-generated DCIDs that are not specified in RFC 8999:
936 |
937 | 1. they do not have short headers;
938 | 1. the 4-tuple remains constant;
939 | 1. if the load-balancer uses the Advanced Fallback Algorithm, the packets have
940 | a constant Source Connection ID.
941 |
942 | While this document does not update the commitments in {{RFC8999}}, the
943 | additional assumptions are minimal and narrowly scoped, and provide a likely
944 | set of constants that load balancers can use with minimal risk of version-
945 | dependence.
946 |
947 | If these assumptions are not valid, this specification is likely to lead to loss
948 | of packets that contain unroutable DCIDs, and in extreme cases connection
949 | failure. A QUIC version that violates the assumptions in this section therefore
950 | cannot be safely deployed with a load balancer that follows this specification.
951 | An updated or alternative version of this specification might address these
952 | shortcomings for such a QUIC version.
953 |
954 | # Security Considerations {#security-considerations}
955 |
956 | QUIC-LB is intended to prevent linkability. Attacks would therefore attempt to
957 | subvert this purpose.
958 |
959 | Note that without a key for the encoding, QUIC-LB makes no attempt to obscure
960 | the server mapping, and therefore does not address these concerns. Without a
961 | key, QUIC-LB merely allows consistent CID encoding for compatibility across a
962 | network infrastructure, which makes QUIC robust to NAT rebinding. Servers that
963 | are encoding their server ID without a key algorithm SHOULD only use it to
964 | generate new CIDs for the Server Initial Packet and SHOULD NOT send CIDs in QUIC
965 | NEW_CONNECTION_ID frames, except that it sends one new Connection ID in the
966 | event of config rotation {{config-rotation}}. Doing so might falsely suggest to
967 | the client that said CIDs were generated in a secure fashion.
968 |
969 | A linkability attack would find some means of determining that two connection
970 | IDs route to the same server. Due to the limitations of measures at QUIC layer,
971 | there is no scheme that strictly prevents linkability for all traffic patterns.
972 |
973 | To see why, consider two limits. At one extreme, one client is connected to the
974 | server pool and migrates its address. An observer can easily link the two
975 | addresses, and there is no remedy at the QUIC layer.
976 |
977 | At the other extreme, a very large number of clients are connected to each
978 | server, and they all migrate address constantly. At this limit, even an
979 | unencrypted server ID encoding is unlikely to definitively link two addresses.
980 |
981 | Therefore, efforts to frustrate any analysis of server ID encoding have
982 | diminishing returns. Nevertheless, this specification seeks to minimize the
983 | probability two addresses can be linked.
984 |
985 | ## Attackers not between the load balancer and server
986 |
987 | Any attacker might open a connection to the server infrastructure and
988 | aggressively simulate migration to obtain a large sample of IDs that map to the
989 | same server. It could then apply analytical techniques to try to obtain the
990 | server encoding.
991 |
992 | An encrypted encoding provides robust protection against this. An unencrypted
993 | one provides none.
994 |
995 | Were this analysis to obtain the server encoding, then on-path observers might
996 | apply this analysis to correlating different client IP addresses.
997 |
998 | ## Attackers between the load balancer and server
999 |
1000 | Attackers in this privileged position are intrinsically able to map two
1001 | connection IDs to the same server. These algorithms ensure that two connection
1002 | IDs for the same connection cannot be identified as such as long as the server
1003 | chooses the first octet and any plaintext nonce correctly.
1004 |
1005 | ## Multiple Configuration IDs {#multiple-configs}
1006 |
1007 | During the period in which there are multiple deployed configuration IDs (see
1008 | {{config-rotation}}), there is a slight increase in linkability. The server
1009 | space is effectively divided into segments with CIDs that have different config
1010 | rotation bits. Entities that manage servers SHOULD strive to minimize these
1011 | periods by quickly deploying new configurations across the server pool.
1012 |
1013 | ## Limited configuration scope
1014 |
1015 | A simple deployment of QUIC-LB in a cloud provider might use the same global
1016 | QUIC-LB configuration across all its load balancers that route to customer
1017 | servers. An attacker could then simply become a customer, obtain the
1018 | configuration, and then extract server IDs of other customers' connections at
1019 | will.
1020 |
1021 | To avoid this, the configuration agent SHOULD issue QUIC-LB configurations to
1022 | mutually distrustful servers that have different keys for encryption
1023 | algorithms. In many cases, the load balancers can distinguish these
1024 | configurations by external IP address.
1025 |
1026 | However, assigning multiple entities to an IP address is complimentary with
1027 | concealing DNS requests (e.g., DoH {{?RFC8484}}) and the TLS Server Name
1028 | Indicator (SNI) ({{?I-D.ietf-tls-esni}}) to obscure the ultimate destination
1029 | of traffic. While the load balancer's fallback algorithm
1030 | ({{fallback-algorithm}}) can use the SNI to make a routing decision on the
1031 | first packet, there are three ways to route subsequent packets:
1032 |
1033 | * all co-tenants can use the same QUIC-LB configuration, leaking the server
1034 | mapping to each other as described above;
1035 |
1036 | * co-tenants can be issued one of up to seven configurations distinguished by
1037 | the config rotation bits ({{config-rotation}}), exposing information about the
1038 | target domain to the entire network; or
1039 |
1040 | * tenants can use the 0b111 codepoint in their CIDs (in which case they SHOULD
1041 | disable migration in their connections), which neutralizes the value of
1042 | QUIC-LB but preserves privacy.
1043 |
1044 | When configuring QUIC-LB, administrators evaluate the privacy tradeoff by
1045 | considering the relative value of each of these properties, given the trust
1046 | model between tenants, the presence of methods to obscure the domain name, and
1047 | value of address migration in the tenant use cases.
1048 |
1049 | As the plaintext algorithm makes no attempt to conceal the server mapping,
1050 | these deployments MAY simply use a common configuration.
1051 |
1052 | ## Stateless Reset Oracle
1053 |
1054 | Section 21.9 of {{RFC9000}} discusses the Stateless Reset Oracle attack. For a
1055 | server deployment to be vulnerable, an attacking client must be able to cause
1056 | two packets with the same Destination CID to arrive at two different servers
1057 | that share the same cryptographic context for Stateless Reset tokens. As QUIC-LB
1058 | requires deterministic routing of DCIDs over the life of a connection, it is a
1059 | sufficient means of avoiding an Oracle without additional measures.
1060 |
1061 | Note also that when a server starts using a new QUIC-LB config rotation
1062 | codepoint, new CIDs might not be unique with respect to previous configurations
1063 | that occupied that codepoint, and therefore different clients may have observed
1064 | the same CID and stateless reset token. A straightforward method of managing
1065 | stateless reset keys is to maintain a separate key for each config rotation
1066 | codepoint, and replace each key when the configuration for that codepoint
1067 | changes. Thus, a server transitions from one config to another, it will be able
1068 | to generate correct tokens for connections using either type of CID.
1069 |
1070 | ## Connection ID Entropy {#cid-entropy}
1071 |
1072 | If a server ever reuses a nonce in generating a CID for a given configuration,
1073 | it risks exposing sensitive information. Given the same server ID, the CID will
1074 | be identical (aside from a possible difference in the first octet). This can
1075 | risk exposure of the QUIC-LB key. If two clients receive the same connection ID,
1076 | they also have each other's stateless reset token unless that key has changed in
1077 | the interim.
1078 |
1079 | The encrypted mode needs to generate different cipher text for each generated
1080 | Connection ID instance to protect the Server ID. To do so, at least four octets
1081 | of the CID are reserved for a nonce that, if used only once, will result in
1082 | unique cipher text for each Connection ID.
1083 |
1084 | If servers simply increment the nonce by one with each generated connection ID,
1085 | then it is safe to use the existing keys until any server's nonce counter
1086 | exhausts the allocated space and rolls over. To maximize entropy, servers SHOULD
1087 | start with a random nonce value, in which case the configuration is usable until
1088 | the nonce value wraps around to zero and then reaches the initial value again.
1089 |
1090 | Whether or not it implements the counter method, the server MUST NOT reuse a
1091 | nonce until it switches to a configuration with new keys.
1092 |
1093 | Servers are forbidden from generating linkable plaintext nonces, because
1094 | observable correlations between plaintext nonces would provide trivial
1095 | linkability between individual connections, rather than just to a common server.
1096 |
1097 | For any algorithm, configuration agents SHOULD implement an out-of-band method
1098 | to discover when servers are in danger of exhausting their nonce space, and
1099 | SHOULD respond by issuing a new configuration. A server that has exhausted its
1100 | nonces MUST either switch to a different configuration, or if none exists, use
1101 | the 4-tuple routing config rotation codepoint.
1102 |
1103 | When sizing a nonce that is to be randomly generated, the configuration agent
1104 | SHOULD consider that a server generating a N-bit nonce will create a duplicate
1105 | about every 2^(N/2) attempts, and therefore compare the expected rate at which
1106 | servers will generate CIDs with the lifetime of a configuration.
1107 |
1108 | ## Distinguishing Attacks {#distinguishing-attacks}
1109 |
1110 | The Four Pass Encryption algorithm is structured as a 4-round Feistel network
1111 | with non-bijective round function. As such, it does not offer a very high
1112 | security level against distinguishing attacks, as explained in [Patarin2008].
1113 | Attackers can mount these attacks if they are in possession of O(SQRT(len/2))
1114 | pairs of ciphertext and known corresponding plain text, where "len" is the
1115 | sum of the lengths of the Server ID and the Nonce.
1116 |
1117 | The authors considered increasing the number of passes from 4 to 12,
1118 | which would definitely block these attacks. However, this would require
1119 | 12 round of AES decryption by load balancers accessing the CID, a cost deemed
1120 | prohibitive in the planned deployments.
1121 |
1122 | The attacks described in [Patarin2008] rely on known plain text. In a normal
1123 | deployment, the plain text is only known by the server that generates the ID
1124 | and by the load balancer that decrypts the content of the CID. Attackers
1125 | would have to compensate by guesses about the allocation of server identifiers
1126 | or the generation of nonces. These attacks are thus mitigated by making nonces
1127 | hard to guess, as specified in {{cid-entropy}}, and by rules related to mixed
1128 | deployments that use both clear text CID and encrypted CID, for example when
1129 | transitioning from clear text to encryption. Such deployments MUST use different
1130 | server ID allocations for the clear text and the encrypted versions.
1131 |
1132 | These attacks cannot be mounted against the Single Pass Encryption algorithm.
1133 |
1134 | ## Early deletion of load balancer connection state
1135 |
1136 | Potential vulnerabilities related to heuristics that delete per-connection state
1137 | are described in {{per-connection-state}}. Under certain assumptions about
1138 | server configuration and fallback algorithm, this state might be critical to
1139 | maintaining connectivity. Under other assumptions, the state provides robustness
1140 | to improbable network events.
1141 |
1142 | # IANA Considerations
1143 |
1144 | There are no IANA requirements.
1145 |
1146 | --- back
1147 |
1148 | # QUIC-LB YANG Model {#yang-model}
1149 |
1150 | These YANG models conform to {{?RFC6020}} and express a complete QUIC-LB
1151 | configuration. There is one model for the server and one for the middlebox
1152 | (i.e the load balancer and/or Retry Service).
1153 |
1154 | ~~~
1155 | module ietf-quic-lb-server {
1156 | yang-version "1.1";
1157 | namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb";
1158 | prefix "quic-lb";
1159 |
1160 | import ietf-yang-types {
1161 | prefix yang;
1162 | reference
1163 | "RFC 6991: Common YANG Data Types.";
1164 | }
1165 |
1166 | import ietf-inet-types {
1167 | prefix inet;
1168 | reference
1169 | "RFC 6991: Common YANG Data Types.";
1170 | }
1171 |
1172 | organization
1173 | "IETF QUIC Working Group";
1174 |
1175 | contact
1176 | "WG Web:
1177 | WG List:
1178 |
1179 | Authors: Martin Duke (martin.h.duke at gmail dot com)
1180 | Nick Banks (nibanks at microsoft dot com)
1181 | Christian Huitema (huitema at huitema.net)";
1182 |
1183 | description
1184 | "This module enables the explicit cooperation of QUIC servers
1185 | with trusted intermediaries without breaking important
1186 | protocol features.
1187 |
1188 | Copyright (c) 2022 IETF Trust and the persons identified as
1189 | authors of the code. All rights reserved.
1190 |
1191 | Redistribution and use in source and binary forms, with or
1192 | without modification, is permitted pursuant to, and subject to
1193 | the license terms contained in, the Simplified BSD License set
1194 | forth in Section 4.c of the IETF Trust's Legal Provisions
1195 | Relating to IETF Documents
1196 | (https://trustee.ietf.org/license-info).
1197 |
1198 | This version of this YANG module is part of RFC XXXX
1199 | (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself
1200 | for full legal notices.
1201 |
1202 | The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL
1203 | NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED',
1204 | 'MAY', and 'OPTIONAL' in this document are to be interpreted as
1205 | described in BCP 14 (RFC 2119) (RFC 8174) when, and only when,
1206 | they appear in all capitals, as shown here.";
1207 |
1208 | revision "2023-07-14" {
1209 | description
1210 | "Updated to design in version 17 of the draft";
1211 | reference
1212 | "RFC XXXX, QUIC-LB: Generating Routable QUIC Connection IDs";
1213 | }
1214 |
1215 | container quic-lb {
1216 | presence "The container for QUIC-LB configuration.";
1217 |
1218 | description
1219 | "QUIC-LB container.";
1220 |
1221 | typedef quic-lb-key {
1222 | type yang:hex-string {
1223 | length 47;
1224 | }
1225 | description
1226 | "This is a 16-byte key, represented with 47 bytes";
1227 | }
1228 |
1229 | leaf config-id {
1230 | type uint8 {
1231 | range "0..6";
1232 | }
1233 | mandatory true;
1234 | description
1235 | "Identifier for this CID configuration.";
1236 | }
1237 |
1238 | leaf first-octet-encodes-cid-length {
1239 | type boolean;
1240 | default false;
1241 | description
1242 | "If true, the six least significant bits of the first
1243 | CID octet encode the CID length minus one.";
1244 | }
1245 |
1246 | leaf server-id-length {
1247 | type uint8 {
1248 | range "1..15";
1249 | }
1250 | must '. <= (19 - ../nonce-length)' {
1251 | error-message
1252 | "Server ID and nonce lengths must sum
1253 | to no more than 19.";
1254 | }
1255 | mandatory true;
1256 | description
1257 | "Length (in octets) of a server ID. Further range-limited
1258 | by nonce-length.";
1259 | }
1260 |
1261 | leaf nonce-length {
1262 | type uint8 {
1263 | range "4..18";
1264 | }
1265 | mandatory true;
1266 | description
1267 | "Length, in octets, of the nonce. Short nonces mean there
1268 | will be frequent configuration updates.";
1269 | }
1270 |
1271 | leaf cid-key {
1272 | type quic-lb-key;
1273 | description
1274 | "Key for encrypting the connection ID.";
1275 | }
1276 |
1277 | leaf server-id {
1278 | type yang:hex-string;
1279 | must "string-length(.) = 3 * ../../server-id-length - 1";
1280 | mandatory true;
1281 | description
1282 | "An allocated server ID";
1283 | }
1284 | }
1285 | }
1286 | ~~~
1287 |
1288 | ~~~
1289 | module ietf-quic-lb-middlebox {
1290 | yang-version "1.1";
1291 | namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb";
1292 | prefix "quic-lb";
1293 |
1294 | import ietf-yang-types {
1295 | prefix yang;
1296 | reference
1297 | "RFC 6991: Common YANG Data Types.";
1298 | }
1299 |
1300 | import ietf-inet-types {
1301 | prefix inet;
1302 | reference
1303 | "RFC 6991: Common YANG Data Types.";
1304 | }
1305 |
1306 | organization
1307 | "IETF QUIC Working Group";
1308 |
1309 | contact
1310 | "WG Web:
1311 | WG List:
1312 |
1313 | Authors: Martin Duke (martin.h.duke at gmail dot com)
1314 | Nick Banks (nibanks at microsoft dot com)
1315 | Christian Huitema (huitema at huitema.net)";
1316 |
1317 | description
1318 | "This module enables the explicit cooperation of QUIC servers
1319 | with trusted intermediaries without breaking important
1320 | protocol features.
1321 |
1322 | Copyright (c) 2021 IETF Trust and the persons identified as
1323 | authors of the code. All rights reserved.
1324 |
1325 | Redistribution and use in source and binary forms, with or
1326 | without modification, is permitted pursuant to, and subject to
1327 | the license terms contained in, the Simplified BSD License set
1328 | forth in Section 4.c of the IETF Trust's Legal Provisions
1329 | Relating to IETF Documents
1330 | (https://trustee.ietf.org/license-info).
1331 |
1332 | This version of this YANG module is part of RFC XXXX
1333 | (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself
1334 | for full legal notices.
1335 |
1336 | The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL
1337 | NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED',
1338 | 'MAY', and 'OPTIONAL' in this document are to be interpreted as
1339 | described in BCP 14 (RFC 2119) (RFC 8174) when, and only when,
1340 | they appear in all capitals, as shown here.";
1341 |
1342 | revision "2021-02-11" {
1343 | description
1344 | "Updated to design in version 13 of the draft";
1345 | reference
1346 | "RFC XXXX, QUIC-LB: Generating Routable QUIC Connection IDs";
1347 | }
1348 |
1349 | container quic-lb {
1350 | presence "The container for QUIC-LB configuration.";
1351 |
1352 | description
1353 | "QUIC-LB container.";
1354 |
1355 | typedef quic-lb-key {
1356 | type yang:hex-string {
1357 | length 47;
1358 | }
1359 | description
1360 | "This is a 16-byte key, represented with 47 bytes";
1361 | }
1362 |
1363 | list cid-configs {
1364 | key "config-rotation-bits";
1365 | description
1366 | "List up to three load balancer configurations";
1367 |
1368 | leaf config-rotation-bits {
1369 | type uint8 {
1370 | range "0..2";
1371 | }
1372 | mandatory true;
1373 | description
1374 | "Identifier for this CID configuration.";
1375 | }
1376 |
1377 | leaf server-id-length {
1378 | type uint8 {
1379 | range "1..15";
1380 | }
1381 | must '. <= (19 - ../nonce-length)' {
1382 | error-message
1383 | "Server ID and nonce lengths must sum to
1384 | no more than 19.";
1385 | }
1386 | mandatory true;
1387 | description
1388 | "Length (in octets) of a server ID. Further range-limited
1389 | by nonce-length.";
1390 | }
1391 |
1392 | leaf cid-key {
1393 | type quic-lb-key;
1394 | description
1395 | "Key for encrypting the connection ID.";
1396 | }
1397 |
1398 | leaf nonce-length {
1399 | type uint8 {
1400 | range "4..18";
1401 | }
1402 | mandatory true;
1403 | description
1404 | "Length, in octets, of the nonce. Short nonces mean there
1405 | will be frequent configuration updates.";
1406 | }
1407 |
1408 | list server-id-mappings {
1409 | key "server-id";
1410 | description "Statically allocated Server IDs";
1411 |
1412 | leaf server-id {
1413 | type yang:hex-string;
1414 | must "string-length(.) = 3 * ../../server-id-length - 1";
1415 | mandatory true;
1416 | description
1417 | "An allocated server ID";
1418 |
1419 | }
1420 |
1421 | leaf server-address {
1422 | type inet:ip-address;
1423 | mandatory true;
1424 | description
1425 | "Destination address corresponding to the server ID";
1426 | }
1427 | }
1428 | }
1429 | }
1430 | }
1431 | ~~~
1432 |
1433 | ## Tree Diagram
1434 |
1435 | This summary of the YANG models uses the notation in {{?RFC8340}}.
1436 |
1437 | ~~~
1438 | module: ietf-quic-lb-server
1439 | +--rw quic-lb!
1440 | +--rw config-id uint8
1441 | +--rw first-octet-encodes-cid-length? boolean
1442 | +--rw server-id-length uint8
1443 | +--rw nonce-length uint8
1444 | +--rw cid-key? quic-lb-key
1445 | +--rw server-id yang:hex-string
1446 | ~~~
1447 |
1448 | ~~~
1449 | module: ietf-quic-lb-middlebox
1450 | +--rw quic-lb!
1451 | +--rw cid-configs* [config-rotation-bits]
1452 | | +--rw config-rotation-bits uint8
1453 | | +--rw server-id-length uint8
1454 | | +--rw cid-key? quic-lb-key
1455 | | +--rw nonce-length uint8
1456 | | +--rw server-id-mappings* [server-id]
1457 | | +--rw server-id yang:hex-string
1458 | | +--rw server-address inet:ip-address
1459 | ~~~
1460 |
1461 | # Load Balancer Test Vectors {#test-vectors}
1462 |
1463 | This section uses the following abbreviations:
1464 |
1465 | ~~~
1466 | cid Connection ID
1467 | cr_bits Config Rotation Bits
1468 | LB Load Balancer
1469 | sid Server ID
1470 | ~~~
1471 |
1472 | In all cases, the server is configured to encode the CID length.
1473 |
1474 | ## Unencrypted CIDs
1475 |
1476 | ~~~pseudocode
1477 | cr_bits sid nonce cid
1478 | 0 c4605e 4504cc4f 07c4605e4504cc4f
1479 | 1 350d28b420 3487d970b 20a350d28b4203487d970b
1480 | ~~~
1481 |
1482 | ## Encrypted CIDs
1483 |
1484 | The key for all of these examples is 8f95f09245765f80256934e50c66207f. The
1485 | test vectors include an example that uses the 16-octet single-pass special
1486 | case, as well as an instance where the server ID length exceeds the nonce
1487 | length, requiring a fourth decryption pass.
1488 |
1489 | ~~~pseudocode
1490 | cr_bits sid nonce cid
1491 | 0 ed793a ee080dbf 0720b1d07b359d3c
1492 | 1 ed793a51d49b8f5fab65 ee080dbf48
1493 | 2fcc381bc74cb4fbad2823a3d1f8fed2
1494 | 2 ed793a51d49b8f5f ee080dbf48c0d1e5
1495 | 504dd2d05a7b0de9b2b9907afb5ecf8cc3
1496 | 3 ed793a51d49b8f5fab ee080dbf48c0d1e55d
1497 | 125779c9cc86beb3a3a4a3ca96fce4bfe0cdbc
1498 | ~~~
1499 |
1500 | # Interoperability with DTLS over UDP
1501 |
1502 | Some environments may contain DTLS traffic as well as QUIC operating over UDP,
1503 | which may be hard to distinguish.
1504 |
1505 | In most cases, the packet parsing rules above will cause a QUIC-LB load
1506 | balancer to route DTLS traffic in an appropriate way. DTLS 1.3 implementations
1507 | that use the connection_id extension {{?RFC9146}} might use the techniques in
1508 | this document to generate connection IDs and achieve robust routability for DTLS
1509 | associations if they meet a few additional requirements. This non-normative
1510 | appendix describes this interaction.
1511 |
1512 | ## DTLS 1.0 and 1.2
1513 |
1514 | DTLS 1.0 {{?RFC4347}} and 1.2 {{?RFC6347}} use packet formats that a QUIC-LB
1515 | router will interpret as short header packets with CIDs that request 4-tuple
1516 | routing. As such, they will route such packets consistently as long as the
1517 | 4-tuple does not change. Note that DTLS 1.0 has been deprecated by the IETF.
1518 |
1519 | The first octet of every DTLS 1.0 or 1.2 datagram contains the content type.
1520 | A QUIC-LB load balancer will interpret any content type less than 128 as a short
1521 | header packet, meaning that the subsequent octets should contain a connection
1522 | ID.
1523 |
1524 | Existing TLS content types comfortably fit in the range below 128. Assignment of
1525 | codepoints greater than 64 would require coordination in accordance with
1526 | {{?RFC7983}}, and anyway would likely create problems demultiplexing DTLS and
1527 | version 1 of QUIC. Therefore, this document believes it is extremely unlikely
1528 | that TLS content types of 128 or greater will be assigned. Nevertheless, such
1529 | an assignment would cause a QUIC-LB load balancer to interpret the packet as a
1530 | QUIC long header with an essentially random connection ID, which is likely to be
1531 | routed irregularly.
1532 |
1533 | The second octet of every DTLS 1.0 or 1.2 datagram is the bitwise complement
1534 | of the DTLS Major version (i.e. version 1.x = 0xfe). A QUIC-LB load balancer
1535 | will interpret this as a connection ID that requires 4-tuple based load
1536 | balancing, meaning that the routing will be consistent as long as the 4-tuple
1537 | remains the same.
1538 |
1539 | {{?RFC9146}} defines an extension to add connection IDs to DTLS 1.2.
1540 | Unfortunately, a QUIC-LB load balancer will not correctly parse the connection
1541 | ID and will continue 4-tuple routing. An modified QUIC-LB load balancer that
1542 | correctly identifies DTLS and parses a DTLS 1.2 datagram for the connection ID
1543 | is outside the scope of this document.
1544 |
1545 | ## DTLS 1.3
1546 |
1547 | DTLS 1.3 {{?RFC9147}} changes the structure of datagram headers in relevant
1548 | ways.
1549 |
1550 | Handshake packets continue to have a TLS content type in the first octet and
1551 | 0xfe in the second octet, so they will be 4-tuple routed, which should not
1552 | present problems for likely NAT rebinding or address change events.
1553 |
1554 | Non-handshake packets always have zero in their most significant bit and will
1555 | therefore always be treated as QUIC short headers. If the connection ID is
1556 | present, it follows in the succeeding octets. Therefore, a DTLS 1.3 association
1557 | where the server utilizes Connection IDs and the encodings in this document
1558 | will be routed correctly in the presence of client address and port changes.
1559 |
1560 | However, if the client does not include the connection_id extension in its
1561 | ClientHello, the server is unable to use connection IDs. In this case, non-
1562 | handshake packets will appear to contain random connection IDs and be routed
1563 | randomly. Thus, unmodified QUIC-LB load balancers will not work with DTLS 1.3
1564 | if the client does not advertise support for connection IDs, or the server does
1565 | not request the use of a compliant connection ID.
1566 |
1567 | A QUIC-LB load balancer might be modified to identify DTLS 1.3 packets and
1568 | correctly parse the fields to identify when there is no connection ID and
1569 | revert to 4-tuple routing, removing the server requirement above. However, such
1570 | a modification is outside the scope of this document, and classifying some
1571 | packets as DTLS might be incompatible with future versions of QUIC.
1572 |
1573 | ## Future Versions of DTLS
1574 |
1575 | As DTLS does not have an IETF consensus document that defines what parts of
1576 | DTLS will be invariant in future versions, it is difficult to speculate about
1577 | the applicability of this section to future versions of DTLS.
1578 |
1579 | # Acknowledgments
1580 |
1581 | Manasi Deval, Erik Fuller, Toma Gavrichenkov, Greg Greenway, Jana Iyengar,
1582 | Subodh Iyengar, Stefan Kolbl, Ladislav Lhotka, Jan Lindblad, Ling Tao Nju,
1583 | Ilari Liusvaara, Kazuho Oku, Udip Pant, Zaheduzzaman Sarker, Ian Swett, Andy
1584 | Sykes, Martin Thomson, Dmitri Tikhonov, Victor Vasiliev, Xingcan Lan, Yu Zhu,
1585 | and William Zeng Ke all provided useful input to this document.
1586 |
1587 | # Change Log
1588 |
1589 | > **RFC Editor's Note:** Please remove this section prior to
1590 | > publication of a final version of this document.
1591 |
1592 | ## since draft-ietf-quic-load-balancers-20
1593 |
1594 | - Changed definition of Unroutable DCIDs, and rewrote sections on config
1595 | failover and fallback routing to avoid misrouted connections.
1596 | - Deleted text on dropping packets
1597 | - Rewrote version invariance section
1598 |
1599 | ## since draft-ietf-quic-load-balancers-19
1600 |
1601 | - Further guidance on multiple server processes/threads
1602 | - Fixed error in encryption example.
1603 | - Clarified fallback algorithms and known QUIC versions.
1604 |
1605 | ## since draft-ietf-quic-load-balancers-18
1606 |
1607 | - Rearranged the output of the expand function to reduce CPU load of decrypt
1608 |
1609 | ## since draft-ietf-quic-load-balancers-17
1610 |
1611 | - fixed regressions in draft-17 publication
1612 |
1613 | ## since draft-ietf-quic-load-balancers-16
1614 |
1615 | - added a config ID bit (now there are 3).
1616 |
1617 | ## since draft-ietf-quic-load-balancers-15
1618 |
1619 | - aasvg fixes.
1620 |
1621 | ## since draft-ietf-quic-load-balancers-14
1622 |
1623 | - Revised process demultiplexing text
1624 | - Restored lost text in Security Considerations
1625 | - Editorial comments from Martin Thomson.
1626 | - Tweaked 4-pass algorithm to avoid accidental plaintext similarities
1627 |
1628 | ## since draft-ietf-quic-load-balancers-13
1629 |
1630 | - Incorporated Connection ID length in argument of truncate function
1631 | - Added requirements for codepoint 0b11.
1632 | - Describe Distinguishing Attack in Security Considerations.
1633 | - Added non-normative language about server process demultiplexers
1634 |
1635 | ## since draft-ietf-quic-load-balancers-12
1636 |
1637 | - Separated Retry Service design into a separate draft
1638 |
1639 | ## since draft-ietf-quic-load-balancers-11
1640 |
1641 | - Fixed mistakes in test vectors
1642 |
1643 | ## since draft-ietf-quic-load-balancers-10
1644 |
1645 | - Refactored algorithm descriptions; made the 4-pass algorithm easier to
1646 | implement
1647 | - Revised test vectors
1648 | - Split YANG model into a server and middlebox version
1649 |
1650 | ## since draft-ietf-quic-load-balancers-09
1651 | - Renamed "Stream Cipher" and "Block Cipher" to "Encrypted Short" and
1652 | "Encrypted Long"
1653 | - Added section on per-connection state
1654 | - Changed "Encrypted Short" to a 4-pass algorithm.
1655 | - Recommended a random initial nonce when incrementing.
1656 | - Clarified what SNI LBs should do with unknown QUIC versions.
1657 |
1658 | ## since draft-ietf-quic-load-balancers-08
1659 | - Eliminate Dynamic SID allocation
1660 | - Eliminated server use bytes
1661 |
1662 | ## since draft-ietf-quic-load-balancers-07
1663 | - Shortened SSCID nonce minimum length to 4 bytes
1664 | - Removed RSCID from Retry token body
1665 | - Simplified CID formats
1666 | - Shrunk size of SID table
1667 |
1668 | ## since draft-ietf-quic-load-balancers-06
1669 | - Added interoperability with DTLS
1670 | - Changed "non-compliant" to "unroutable"
1671 | - Changed "arbitrary" algorithm to "fallback"
1672 | - Revised security considerations for mistrustful tenants
1673 | - Added retry service considerations for non-Initial packets
1674 |
1675 | ## since draft-ietf-quic-load-balancers-05
1676 | - Added low-config CID for further discussion
1677 | - Complete revision of shared-state Retry Token
1678 | - Added YANG model
1679 | - Updated configuration limits to ensure CID entropy
1680 | - Switched to notation from quic-transport
1681 |
1682 | ## since draft-ietf-quic-load-balancers-04
1683 | - Rearranged the shared-state retry token to simplify token processing
1684 | - More compact timestamp in shared-state retry token
1685 | - Revised server requirements for shared-state retries
1686 | - Eliminated zero padding from the test vectors
1687 | - Added server use bytes to the test vectors
1688 | - Additional compliant DCID criteria
1689 |
1690 | ## since-draft-ietf-quic-load-balancers-03
1691 | - Improved Config Rotation text
1692 | - Added stream cipher test vectors
1693 | - Deleted the Obfuscated CID algorithm
1694 |
1695 | ## since-draft-ietf-quic-load-balancers-02
1696 | - Replaced stream cipher algorithm with three-pass version
1697 | - Updated Retry format to encode info for required TPs
1698 | - Added discussion of version invariance
1699 | - Cleaned up text about config rotation
1700 | - Added Reset Oracle and limited configuration considerations
1701 | - Allow dropped long-header packets for known QUIC versions
1702 |
1703 | ## since-draft-ietf-quic-load-balancers-01
1704 | - Test vectors for load balancer decoding
1705 | - Deleted remnants of in-band protocol
1706 | - Light edit of Retry Services section
1707 | - Discussed load balancer chains
1708 |
1709 | ## since-draft-ietf-quic-load-balancers-00
1710 | - Removed in-band protocol from the document
1711 |
1712 | ## Since draft-duke-quic-load-balancers-06
1713 | - Switch to IETF WG draft.
1714 |
1715 | ## Since draft-duke-quic-load-balancers-05
1716 | - Editorial changes
1717 | - Made load balancer behavior independent of QUIC version
1718 | - Got rid of token in stream cipher encoding, because server might not have it
1719 | - Defined "non-compliant DCID" and specified rules for handling them.
1720 | - Added psuedocode for config schema
1721 |
1722 | ## Since draft-duke-quic-load-balancers-04
1723 | - Added standard for retry services
1724 |
1725 | ## Since draft-duke-quic-load-balancers-03
1726 | - Renamed Plaintext CID algorithm as Obfuscated CID
1727 | - Added new Plaintext CID algorithm
1728 | - Updated to allow 20B CIDs
1729 | - Added self-encoding of CID length
1730 |
1731 | ## Since draft-duke-quic-load-balancers-02
1732 | - Added Config Rotation
1733 | - Added failover mode
1734 | - Tweaks to existing CID algorithms
1735 | - Added Block Cipher CID algorithm
1736 | - Reformatted QUIC-LB packets
1737 |
1738 | ## Since draft-duke-quic-load-balancers-01
1739 | - Complete rewrite
1740 | - Supports multiple security levels
1741 | - Lightweight messages
1742 |
1743 | ## Since draft-duke-quic-load-balancers-00
1744 | - Converted to markdown
1745 | - Added variable length connection IDs
1746 |
--------------------------------------------------------------------------------
/draft-ietf-quic-retry-offload.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "QUIC Retry Offload"
3 | abbrev: QUIC Retry Offload
4 | docname: draft-ietf-quic-retry-offload-latest
5 | date: {DATE}
6 | category: std
7 | ipr: trust200902
8 | area: Transport
9 | workgroup: QUIC
10 |
11 | stand_alone: yes
12 | pi: [toc, sortrefs, symrefs, docmapping]
13 |
14 | author:
15 | -
16 | ins: M. Duke
17 | name: Martin Duke
18 | org: Google
19 | email: martin.h.duke@gmail.com
20 |
21 | -
22 | ins: N. Banks
23 | name: Nick Banks
24 | org: Microsoft
25 | email: nibanks@microsoft.com
26 |
27 | normative:
28 |
29 | TIME_T:
30 | title: "Open Group Standard: Vol. 1: Base Definitions, Issue 7"
31 | date: 2018
32 | seriesinfo: IEEE Std 1003.1
33 | target: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_16
34 |
35 | --- abstract
36 |
37 | QUIC uses Retry packets to reduce load on stressed servers, by forcing the
38 | client to prove ownership of its address before the server commits state.
39 | QUIC also has an anti-tampering mechanism to prevent the unauthorized injection
40 | of Retry packets into a connection. However, a server operator may want to
41 | offload production of Retry packets to an anti-Denial-of-Service agent or
42 | hardware accelerator. "Retry Offload" is a mechanism for coordination between
43 | a server and an external generator of Retry packets that can succeed despite
44 | the anti-tampering mechanism.
45 |
46 | --- middle
47 |
48 | # Introduction
49 |
50 | QUIC {{!RFC9000}} servers send Retry packets to avoid prematurely allocating
51 | resources when under stress, such as during a Denial of Service (DoS) attack.
52 | Because both Initial packets and Retry packets have weak authentication
53 | properties, the Retry packet contains an encrypted token that helps the client
54 | and server to validate, via transport parameters, that an attacker did not
55 | inject or modify a packet of either type for this connection attempt.
56 |
57 | However, a server under stress is less inclined to process incoming Initial
58 | packets and compute the Retry token in the first place. An analogous mechanism
59 | for TCP is syncookies {{?RFC4987}}. As TCP has weaker authentication properties
60 | to QUIC, syncookie generation can often be offloaded to a hardware device, or
61 | to a anti-Denial-of-Service provider that is topologically far from the
62 | protected server. As such an offload would behave exactly like an attacker,
63 | QUIC's authentication methods make such a capability impossible.
64 |
65 | This document seeks to enable offloading of Retry generation to QUIC via
66 | explicit coordination between servers and the hardware or provider offload,
67 | which this document refers to as a "Retry Offload." It has two different
68 | modes, to conform to two different use cases.
69 |
70 | The no-shared-state mode has minimal coordination and does not require key
71 | sharing. While operationally easier to configure and manage, it places severe
72 | constraints on the operational profile of the offload. In particular, the
73 | offload must control all ingress to the server and fail closed.
74 |
75 | The shared-state mode removes the operational constraints, but also requires
76 | more sophisticated key management.
77 |
78 | Both modes specify a common format for encoding information in the Retry token,
79 | so that the server can correctly populate the relevant transport parameter
80 | fields.
81 |
82 | ## Terminology
83 |
84 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
85 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
86 | interpreted as described in RFC 2119 {{?RFC2119}}.
87 |
88 | In this document, these words will appear with that interpretation only when in
89 | ALL CAPS. Lower case uses of these words are not to be interpreted as carrying
90 | significance described in RFC 2119.
91 |
92 | For brevity, "Connection ID" will often be abbreviated as "CID".
93 |
94 | A "Retry Offload" is a hardware or software device that is conceptually separate
95 | from a QUIC server that terminates QUIC connections. This document assumes that
96 | the Retry Offload and the server have an administrative relationship that allows
97 | them to accept common configuation.
98 |
99 | A "configuration agent" is some entity that determines the common configuration
100 | to be distributed to the servers and the Retry Offload.
101 |
102 | This document uses "QUIC" to refer to the protocol in QUIC version 1
103 | {{RFC9000}}. Retry offloads can be applied to other versions of QUIC that use
104 | Retry packets and have identical information requirements for Retry validation.
105 | However, note that source and destination connection IDs are the only relevant
106 | data fields that are invariant across QUIC versions {{?RFC8999}}.
107 |
108 | ## Notation
109 |
110 | All wire formats will be depicted using the notation defined in Section 1.3 of
111 | {{RFC9000}}.
112 |
113 | The example below illustrates the basic framework:
114 |
115 | ~~~
116 | Example Structure {
117 | One-bit Field (1),
118 | 7-bit Field with Fixed Value (7) = 61,
119 | Field with Variable-Length Integer (i),
120 | Arbitrary-Length Field (..),
121 | Variable-Length Field (8..24),
122 | Field With Minimum Length (16..),
123 | Field With Maximum Length (..128),
124 | [Optional Field (64)],
125 | Repeated Field (8) ...,
126 | }
127 | ~~~
128 | {: #fig-ex-format title="Example Format"}
129 |
130 | # Common Requirements {#common-requirements}
131 |
132 | Regardless of mechanism, a Retry Offload has an active mode, where it is
133 | generating Retry packets, and an inactive mode, where it is not, based on its
134 | assessment of server load and the likelihood an attack is underway. The choice
135 | of mode MAY be made on a per-packet or per-connection basis, through a
136 | stochastic process or based on client address.
137 |
138 | A configuration agent MUST distribute a list of QUIC versions the Retry Offload
139 | supports. It MAY also distribute either an "Allow-List" or a "Deny-List" of
140 | other QUIC versions. It MUST NOT distribute both an Allow-List and a Deny-List.
141 |
142 | The Allow-List or Deny-List MUST NOT include any versions included for Retry
143 | Offload support.
144 |
145 | The Configuration Agent MUST provide a means for the entity that controls the
146 | Retry Offload to report its supported version(s) to the configuration Agent. If
147 | the entity has not reported this information, it MUST NOT activate the Retry
148 | Offload and the configuration agent MUST NOT distribute configuration that
149 | activates it.
150 |
151 | The configuration agent MAY delete versions from the final supported version
152 | list if policy does not require the Retry Offload to operate on those versions.
153 |
154 | The configuration Agent MUST provide a means for the entities that control
155 | servers behind the Retry Offload to report either an Allow-List or a Deny-List.
156 |
157 | If all entities supply Allow-Lists, the consolidated list MUST be the union of
158 | these sets. If all entities supply Deny-Lists, the consolidated list MUST be
159 | the intersection of these sets.
160 |
161 | If entities provide a mixture of Allow-Lists and Deny-Lists, the consolidated
162 | list MUST be a Deny-List that is the intersection of all provided Deny-Lists and
163 | the inverses of all Allow-Lists.
164 |
165 | If no entities that control servers have reported Allow-Lists or Deny-Lists,
166 | the default is a Deny-List with the null set (i.e., all unsupported versions
167 | will be admitted). This preserves the future extensibilty of QUIC.
168 |
169 | A Retry Offload MUST forward all packets for a QUIC version it does not
170 | support that are not on a Deny-List or absent from an Allow-List. Note that if
171 | servers support versions the Retry Offload does not, this may increase load on
172 | the servers.
173 |
174 | Note that future versions of QUIC might not have Retry packets, require
175 | different information in Retry, or use different packet type indicators.
176 |
177 | ## Consistent Treatment of Initials
178 |
179 | Retry Offloads SHOULD treat Initial packets from the same connection with a
180 | uniform policy. Initial packets of the first and second client flight can be
181 | difficult to distinguish without expensive decryption of the contents, which is
182 | unsuitable under the conditions of a DDoS attack. If the first packet of a
183 | connection is admitted without Retry, but the second triggers a Retry, that
184 | Retry packet will be ignored and the loss of an Initial coalesced with other
185 | packets can impair performance. In some situations, the client does not yet have
186 | handshake keys, and dropping further client Initial packets creates a deadlock
187 | where the connection cannot progress.
188 |
189 | The simplest means to ensure this is to require, when active, a Retry Token
190 | for all incoming Initial packets, and send a Retry packet otherwise. If the
191 | Retry Offload is to be more selective, one technique keeps state on which
192 | address/port 4-tuples have been admitted. Another would be to apply a secure
193 | hash to the source IP address, port, and connection ID to deterministically
194 | compute whether the Initial requires a Retry Token or not. These source
195 | values remain consistent over the handshake.
196 |
197 | However, even with these techniques there is a potential problem when a Retry
198 | Offload switches from inactive to active mode. The Retry Offload could admit
199 | the first packet while in inactive mode, and then drop subsequent Initials in
200 | active mode.
201 |
202 | If the Retry Offload is always on-path, it MAY keep state on incoming
203 | connections while in inactive mode to avoid this problem. If it cannot or will
204 | not keep such state, it SHOULD implement "transition mode" for an interval
205 | chosen to include the likely Initial packet exchange of most clients (200ms is a
206 | sensible default).
207 |
208 | In transition mode, Retry Offloads process Initial packets with Retry tokens
209 | as in active mode. When the Retry Offload receives an Initial packet with no
210 | token, it issues a Retry AND forwards the packet to the server. If the client
211 | has already received a packet from the server, it will ignore the Retry and the
212 | connection will progress normally. If not, the client will reconnect based on
213 | the Retry, the server's response to the first initial will be discarded, and
214 | the connection will progress normally based on the client's second Initial.
215 | {{mid-handshake}} explores the various possible packet sequences in
216 | transition mode.
217 |
218 | Note that transition mode provides no actual DDoS relief to the server, so its
219 | duration should be as short as possible. The Retry Offload can choose not to
220 | implement transition mode and cause some client connections to fail.
221 |
222 | Servers operating behind a Retry Offload SHOULD implement a mechanism that
223 | operates whenever a client Initial arrives with a valid Retry token. If there
224 | is another connection with identical client Connection ID, IP, and Port, but
225 | with an unvalidated address, that connection is immediately and silently
226 | terminated. This mechanism eliminates incorrect connection state that is an
227 | artifact of transition mode, as explained in {{mid-handshake}}.
228 |
229 | ## Considerations for Non-Initial Packets
230 |
231 | Initial Packets are especially effective at consuming server resources
232 | because they cause the server to create connection state. Even when mitigating
233 | this load with Retry Packets, the act of validating an Initial Token and sending
234 | a Retry Packet is more expensive than the response to a non-Initial packet with
235 | an unknown Connection ID: simply dropping it and/or sending a Stateless Reset.
236 |
237 | Nevertheless, a Retry Offload in Active Mode might desire to shield servers
238 | from non-Initial packets that do not correspond to a previously admitted
239 | Initial Packet. This has a number of considerations.
240 |
241 | * If a Retry Offload maintains no per-flow state, it cannot distinguish between
242 | valid and invalid non-Initial packets and MUST forward all non-Initial Packets
243 | to the server.
244 |
245 | * For QUIC versions the Retry Offload does not support and are present on the
246 | Allow-List (or absent from the Deny-List), the Retry Offload cannot distinguish
247 | Initial Packets from other long headers and therefore MUST admit all long
248 | headers.
249 |
250 | * If a Retry Offload keeps per-flow state, it can identify 4-tuples that have
251 | been previously approved, admit non-Initial packets from those flows, and
252 | drop all others. However, dropping short headers will effectively break Address
253 | Migration and NAT Rebinding when in Active Mode, as post-migration packets will
254 | arrive with a previously unknown 4-tuple. This policy will also break connection
255 | attempts using any new QUIC versions that begin connections with a short header.
256 |
257 | * If a Retry Offload is integrated with a QUIC-LB routable load balancer
258 | {{?I-D.ietf-quic-load-balancers}}, it can verify that the Destination Connection
259 | ID is routable, and only admit non-Initial packets with routable DCIDs. As the
260 | Connection ID encoding is invariant across QUIC versions, the Retry Offload can
261 | do this for all short headers.
262 |
263 | Nothing in this section prevents Retry Offloads from making basic syntax
264 | correctness checks on packets with QUIC versions that it understands (e.g.,
265 | enforcing the Initial Packet datagram size minimum in version 1).
266 |
267 | # No-Shared-State Retry Offload
268 |
269 | The no-shared-state Retry Offload requires no coordination, except that the
270 | server must be configured to accept this offload and know which QUIC versions
271 | the Retry Offload supports. The scheme uses the first bit of the token to
272 | distinguish between tokens from Retry packets (codepoint '0') and tokens from
273 | NEW_TOKEN frames (codepoint '1').
274 |
275 | ## Configuration Agent Actions
276 |
277 | See {{common-requirements}}.
278 |
279 | ## Offload Requirements {#nss-offload-requirements}
280 |
281 | A no-shared-state Retry Offload MUST be present on all paths from potential
282 | clients to the server. These paths MUST fail to pass QUIC traffic should the
283 | offload fail for any reason. That is, if the offload is not operational, the
284 | server MUST NOT be exposed to client traffic. Otherwise, servers that have
285 | already disabled their Retry capability would be vulnerable to attack.
286 |
287 | The path between offload and server MUST be free of any potential attackers.
288 | Note that this and other requirements above severely restrict the operational
289 | conditions in which a no-shared-state Retry Offload can safely operate.
290 |
291 | Retry tokens generated by the offload MUST have the format below.
292 |
293 | ~~~
294 | No-Shared-State Retry Offload Token {
295 | Token Type (1) = 0,
296 | ODCIL (7) = 8..20,
297 | Original Destination Connection ID (64..160),
298 | Opaque Data (..),
299 | }
300 | ~~~
301 | {: #nss-retry-offload-token-format title="Format of non-shared-state Retry Offload tokens"}
302 |
303 | The first bit of retry tokens generated by the offload MUST be zero. The token
304 | has the following additional fields:
305 |
306 | ODCIL: The length of the original destination connection ID from the triggering
307 | Initial packet. This is in cleartext to be readable for the server, but
308 | authenticated later in the token. The Retry Offload SHOULD reject any token
309 | in which the value is less than 8.
310 |
311 | Original Destination Connection ID: This also in cleartext and authenticated
312 | later.
313 |
314 | Opaque Data: This data contains the information necessary to authenticate the
315 | Retry token in accordance with the QUIC specification. A straightforward
316 | implementation would encode the Retry Source Connection ID, client IP address,
317 | and a timestamp in the Opaque Data. A more space-efficient implementation would
318 | use the Retry Source Connection ID and Client IP as associated data in an
319 | encryption operation, and encode only the timestamp and the authentication tag
320 | in the Opaque Data. If the Initial packet alters the Connection ID or source IP
321 | address, authentication of the token will fail.
322 |
323 | Upon receipt of an Initial packet with a token that begins with '0', the Retry
324 | Offload MUST validate the token in accordance with the QUIC specification.
325 |
326 | In active mode, the offload MUST issue Retry packets for all client Initial
327 | packets that contain no token, or a token that has the first bit set to '1'. It
328 | MUST NOT forward the packet to the server. The offload MUST validate all tokens
329 | with the first bit set to '0'. If successful, the offload MUST forward the
330 | packet with the token intact. If unsuccessful, it MUST drop the packet. The
331 | Retry Offload MAY send an Initial Packet containing a CONNECTION_CLOSE frame
332 | with the INVALID_TOKEN error code when dropping the packet.
333 |
334 | Note that this scheme has a performance drawback. When the Retry Offload is in
335 | active mode, clients with a token from a NEW_TOKEN frame will suffer a 1-RTT
336 | penalty even though its token provides proof of address.
337 |
338 | In inactive mode, the offload MUST forward all packets that have no token or a
339 | token with the first bit set to '1'. It MUST validate all tokens with the first
340 | bit set to '0'. If successful, the offload MUST forward the packet with the
341 | token intact. If unsuccessful, it MUST drop the packet.
342 |
343 | ## Server Requirements
344 |
345 | A server behind a non-shared-state Retry Offload MUST NOT send Retry packets
346 | for a QUIC version the Retry Offload understands. It MAY send Retry for QUIC
347 | versions the Retry Offload does not understand.
348 |
349 | Tokens sent in NEW_TOKEN frames MUST have the first bit set to '1'.
350 |
351 | If a server receives an Initial Packet with the first bit in the token set to
352 | '1', it could be from a server-generated NEW_TOKEN frame and should be processed
353 | in accordance with the QUIC specification. If a server receives an Initial
354 | Packet with the first bit to '0', it is a Retry token and the server MUST NOT
355 | attempt to validate it. Instead, it MUST assume the address is validated, MUST
356 | include the packet's Destination Connection ID in a Retry Source Connection ID
357 | transport parameter, and MUST extract the Original Destination Connection ID
358 | from the token cleartext for use in the transport parameter of the same name.
359 |
360 | # Shared-State Retry Offload {#shared-state-retry}
361 |
362 | A shared-state Retry Offload uses a shared key, so that the server can decode
363 | the offload's retry tokens. It does not require that all traffic pass through
364 | the Retry Offload, so servers MAY send Retry packets in response to Initial
365 | packets without a valid token.
366 |
367 | Both server and offload MUST have time synchronized within two seconds of each
368 | other to prevent tokens being incorrectly marked as expired.
369 |
370 | The tokens are protected using AES128-GCM AEAD, as explained in
371 | {{token-protection-with-aead}}. All tokens, generated by either the server or
372 | Retry Offload, MUST use the following format, which includes:
373 |
374 | - A 1 bit token type identifier.
375 | - A 7 bit token key identifier.
376 | - A 96 bit unique token number transmitted in clear text, but protected as part
377 | of the AEAD associated data.
378 | - A token body, encoding the Original Destination Connection ID and the
379 | Timestamp, optionally followed by server specific Opaque Data.
380 |
381 | The token protection uses an 128 bit representation of the source IP address
382 | from the triggering Initial packet. The client IP address is 16 octets. If an
383 | IPv4 address, the last 12 octets are zeroes. It also uses the Source Connection
384 | ID of the Retry packet, which will cause an authentication failure if it
385 | differs from the Destination Connection ID of the packet bearing the token.
386 |
387 | If there is a Network Address Translator (NAT) in the server infrastructure that
388 | changes the client IP, the Retry Offload MUST either be positioned behind the
389 | NAT, or the NAT must have the token key to rewrite the Retry token accordingly.
390 | Note also that a host that obtains a token through a NAT and then attempts to
391 | connect over a path that does not have an identically configured NAT will fail
392 | address validation.
393 |
394 | The 96 bit unique token number is set to a random value using a
395 | cryptography-grade random number generator.
396 |
397 | The token key identifier and the corresponding AEAD key and AEAD IV are
398 | provisioned by the configuration agent.
399 |
400 | The token body is encoded as follows:
401 |
402 | ~~~
403 | Shared-State Retry Offload Token Body {
404 | Timestamp (64),
405 | [ODCIL (8) = 8..20],
406 | [Original Destination Connection ID (64..160)],
407 | [Port (16)],
408 | Opaque Data (..),
409 | }
410 | ~~~
411 | {: #ss-retry-offload-token-body title="Body of shared-state Retry Offload tokens"}
412 | The token body has the following fields:
413 |
414 | Timestamp: The Timestamp is a 64-bit integer, in network order, that expresses
415 | the expiration time of the token as a number of seconds in POSIX time (see Sec.
416 | 4.16 of {{TIME_T}}).
417 |
418 | ODCIL: The original destination connection ID length. Tokens in NEW_TOKEN frames
419 | do not have this field.
420 |
421 | Original Destination Connection ID: The server or Retry Offload copies this
422 | from the field in the client Initial packet. Tokens in NEW_TOKEN frames do not
423 | have this field.
424 |
425 | Port: The Source Port of the UDP datagram that triggered the Retry packet.
426 | This field MUST be present if and only if the ODCIL is greater than zero. This
427 | field is therefore always absent in tokens in NEW_TOKEN frames.
428 |
429 | Opaque Data: The server may use this field to encode additional information,
430 | such as congestion window, RTT, or MTU. The Retry Offload MUST have zero-length
431 | opaque data.
432 |
433 | Some implementations of QUIC encode in the token the Initial Packet Number used
434 | by the client, in order to verify that the client sends the retried Initial
435 | with a PN larger that the triggering Initial. Such implementations will encode
436 | the Initial Packet Number as part of the opaque data. As tokens may be
437 | generated by the Service, servers MUST NOT reject tokens because they lack
438 | opaque data and therefore the packet number.
439 |
440 | Shared-state Retry Offloads use the AES-128-ECB cipher. Future standards could
441 | add new algorithms that use other ciphers to provide cryptographic agility in
442 | accordance with {{?RFC7696}}. Retry Offload and server implementations SHOULD be
443 | extensible to support new algorithms.
444 |
445 | ### Token Protection with AEAD {#token-protection-with-aead}
446 |
447 | On the wire, the token is presented as:
448 |
449 | ~~~
450 | Shared-State Retry Offload Token {
451 | Token Type (1),
452 | Key Sequence (7),
453 | Unique Token Number (96),
454 | Encrypted Shared-State Retry Offload Token Body (64..),
455 | AEAD Integrity Check Value (128),
456 | }
457 | ~~~
458 | {: #ss-retry-offload-token-wire-image title="Wire image of shared-state Retry Offload tokens"}
459 |
460 | The tokens are protected using AES128-GCM as follows:
461 |
462 | * The Key Sequence is the 7 bit identifier to retrieve the token key and IV.
463 |
464 | * The AEAD IV, is 96 bits generated by the configuration agent.
465 |
466 | * The AEAD nonce, N, is formed by XORing the AEAD IV with the 96 bit unique
467 | token number.
468 |
469 | * The associated data is a formatted as a pseudo header by combining the
470 | cleartext part of the token with the IP address of the client. The format of
471 | the pseudoheader depends on whether the Token Type bit is '1' (a NEW_TOKEN
472 | token) or '0' (a Retry token).
473 |
474 | ~~~
475 | Shared-State Retry Offload Token Pseudoheader {
476 | IP Address (128),
477 | Token Type (1),
478 | Key Sequence (7),
479 | Unique Token Number (96),
480 | [RSCIL (8)],
481 | [Retry Source Connection ID (0..20)],
482 | }
483 | ~~~
484 | {: #ss-retry-offload-token-pseudoheader title="Psuedoheader for shared-state Retry Offload tokens"}
485 |
486 | RSCIL: The Retry Source Connection ID Length in octets. This field is only
487 | present when the Token Type is '0'.
488 |
489 | Retry Source Connection ID: To create a Retry Token, populate this field with
490 | the Source Connection ID the Retry packet will use. To validate a Retry token,
491 | populate it with the Destination Connection ID of the Initial packet that
492 | carries the token. This field is only present when the Token Type is '0'.
493 |
494 | * The input plaintext for the AEAD is the token body. The output ciphertext of
495 | the AEAD is transmitted in place of the token body.
496 | * The AEAD Integrity Check Value(ICV), defined in Section 6 of {{?RFC4106}}, is
497 | computed as part of the AEAD encryption process, and is verified during
498 | decryption.
499 |
500 | ## Configuration Agent Actions
501 |
502 | The configuration agent generates and distributes a "token key", a "token IV",
503 | a key sequence, and the information described in {{common-requirements}}.
504 |
505 | ## Offload Requirements {#ss-offload}
506 |
507 | In inactive mode, the Retry Offload forwards all packets without further
508 | inspection or processing. The rest of this section only applies to a offload in
509 | active mode.
510 |
511 | Retry Offloads MUST NOT issue Retry packets except where explicitly allowed
512 | below, to avoid sending a Retry packet in response to a Retry token.
513 |
514 | The offload MUST generate Retry tokens with the format described above when it
515 | receives a client Initial packet with no token.
516 |
517 | If there is a token of either type, the offload MUST attempt to decrypt it.
518 |
519 | To decrypt a packet, the offload checks the Token Type and constructs a
520 | pseudoheader with the appropriate format for that type, using the bearing
521 | packet's Destination Connection ID to populate the Retry Source Connection ID
522 | field, if any.
523 |
524 | A token is invalid if:
525 |
526 | * it uses an unknown key sequence,
527 |
528 | * the AEAD ICV does not match the expected value (By construction, it will only
529 | match if the client IP Address, and any Retry Source Connection ID, also
530 | matches),
531 |
532 | * the ODCIL, if present, is invalid for a client-generated CID (less than 8 or
533 | more than 20 in QUIC version 1),
534 |
535 | * the Timestamp of a token points to time in the past (however, in order to
536 | allow for clock skew, it SHOULD NOT consider tokens to be expired if the
537 | Timestamp encodes less than two seconds in the past), or
538 |
539 | * the port number, if present, does not match the source port in the
540 | encapsulating UDP header.
541 |
542 | Packets with valid tokens MUST be forwarded to the server.
543 |
544 | The offload MUST drop packets with invalid tokens. If the token is of type '1'
545 | (NEW_TOKEN), it MUST respond with a Retry packet. If of type '0', it MUST NOT
546 | respond with a Retry packet.
547 |
548 | ## Server Requirements
549 |
550 | The server MAY issue Retry or NEW_TOKEN tokens in accordance with {{RFC9000}}.
551 | When doing so, it MUST follow the format above.
552 |
553 | The server MUST validate all tokens that arrive in Initial packets, as they may
554 | have bypassed the Retry Offload. It determines validity using the procedure
555 | in {{ss-offload}}.
556 |
557 | If a valid Retry token, the server populates the
558 | original_destination_connection_id transport parameter using the
559 | corresponding token field. It populates the retry_source_connection_id transport
560 | parameter with the Destination Connection ID of the packet bearing the token.
561 |
562 | In all other respects, the server processes both valid and invalid tokens in
563 | accordance with {{RFC9000}}.
564 |
565 | For QUIC versions the offload does not support, the server MAY use any token
566 | format.
567 |
568 | # Security Considerations {#security-considerations}
569 |
570 | ## Shared-State Retry Keys
571 |
572 | The Shared-State Retry Offload defined in {{shared-state-retry}} describes the
573 | format of retry tokens or new tokens protected and encrypted using AES128-GCM.
574 | Each token includes a 96 bit randomly generated unique token number, and an 8
575 | bit identifier used to get the AES-GCM encryption context. The AES-GCM
576 | encryption context contains a 128 bit key and an AEAD IV. There are three
577 | important security considerations for these tokens:
578 |
579 | * An attacker that obtains a copy of the encryption key will be able to decrypt
580 | and forge tokens.
581 |
582 | * Attackers may be able to retrieve the key if they capture a sufficently large
583 | number of retry tokens encrypted with a given key.
584 |
585 | * Confidentiality of the token data will fail if separate tokens reuse the
586 | same 96 bit unique token number and the same key.
587 |
588 | To protect against disclosure of keys to attackers, offload and servers MUST
589 | ensure that the keys are stored securely. To limit the consequences of potential
590 | exposures, the lifetime of any given key should be limited.
591 |
592 | Section 6.6 of {{?RFC9001}} states that "Endpoints MUST count the number of
593 | encrypted packets for each set of keys. If the total number of encrypted packets
594 | with the same key exceeds the confidentiality limit for the selected AEAD, the
595 | endpoint MUST stop using those keys." It goes on with the specific limit: "For
596 | AEAD_AES_128_GCM and AEAD_AES_256_GCM, the confidentiality limit is 2^23
597 | encrypted packets; see Appendix B.1." It is prudent to adopt the same limit
598 | here, and configure the offload in such a way that no more than 2^23 tokens are
599 | generated with the same key.
600 |
601 | In order to protect against collisions, the 96 bit unique token numbers should
602 | be generated using a cryptographically secure pseudorandom number generator
603 | (CSPRNG), as specified in Appendix C.1 of the TLS 1.3 specification
604 | {{!RFC8446}}. With proper random numbers, if fewer than 2^40 tokens are
605 | generated with a single key, the risk of collisions is lower than 0.001%.
606 |
607 | # IANA Considerations
608 |
609 | There are no IANA requirements.
610 |
611 | --- back
612 |
613 | # Retry Offload YANG Model {#yang-model}
614 |
615 | These YANG models conform to {{?RFC6020}} and express a complete Retry Offload
616 | configuration.
617 |
618 | ~~~
619 | module ietf-retry-offload {
620 | yang-version "1.1";
621 | namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb";
622 | prefix "quic-lb";
623 |
624 | import ietf-yang-types {
625 | prefix yang;
626 | reference
627 | "RFC 6991: Common YANG Data Types.";
628 | }
629 |
630 | import ietf-inet-types {
631 | prefix inet;
632 | reference
633 | "RFC 6991: Common YANG Data Types.";
634 | }
635 |
636 | organization
637 | "IETF QUIC Working Group";
638 |
639 | contact
640 | "WG Web:
641 | WG List:
642 |
643 | Authors: Martin Duke (martin.h.duke at gmail dot com)
644 | Nick Banks (nibanks at microsoft dot com)
645 | Christian Huitema (huitema at huitema.net)";
646 |
647 | description
648 | "This module enables the explicit cooperation of QUIC servers
649 | with offloads that generate Retry packets on their behalf.
650 |
651 | Copyright (c) 2022 IETF Trust and the persons identified as
652 | authors of the code. All rights reserved.
653 |
654 | Redistribution and use in source and binary forms, with or
655 | without modification, is permitted pursuant to, and subject to
656 | the license terms contained in, the Simplified BSD License set
657 | forth in Section 4.c of the IETF Trust's Legal Provisions
658 | Relating to IETF Documents
659 | (https://trustee.ietf.org/license-info).
660 |
661 | This version of this YANG module is part of RFC XXXX
662 | (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself
663 | for full legal notices.
664 |
665 | The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL
666 | NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED',
667 | 'MAY', and 'OPTIONAL' in this document are to be interpreted as
668 | described in BCP 14 (RFC 2119) (RFC 8174) when, and only when,
669 | they appear in all capitals, as shown here.";
670 |
671 | revision "2022-02-11" {
672 | description
673 | "Initial version";
674 | reference
675 | "RFC XXXX, QUIC Retry Offloads";
676 | }
677 |
678 | container retry-offload-config {
679 | description
680 | "Configuration of Retry Offload. If supported-versions is empty,
681 | there is no Retry Offload. If token-keys is empty, it uses the
682 | non-shared-state offload. If present, it uses shared-state
683 | tokens.";
684 |
685 | leaf-list supported-versions {
686 | type uint32;
687 | description
688 | "QUIC versions that the Retry Offload supports. If empty,
689 | there is no Retry Offload.";
690 | }
691 |
692 | leaf unsupported-version-default {
693 | type enumeration {
694 | enum allow {
695 | description "Unsupported versions admitted by default";
696 | }
697 | enum deny {
698 | description "Unsupported versions denied by default";
699 | }
700 | }
701 | default allow;
702 | description
703 | "Are unsupported versions not in version-exceptions allowed
704 | or denied?";
705 | }
706 |
707 | leaf-list version-exceptions {
708 | type uint32;
709 | description
710 | "Exceptions to the default-deny or default-allow rule.";
711 | }
712 |
713 | list token-keys {
714 | key "key-sequence-number";
715 | description
716 | "list of active keys, for key rotation purposes. Existence
717 | implies shared-state format";
718 |
719 | leaf key-sequence-number {
720 | type uint8 {
721 | range "0..127";
722 | }
723 | mandatory true;
724 | description
725 | "Identifies the key used to encrypt the token";
726 | }
727 |
728 | leaf token-key {
729 | type retry-offload-key;
730 | mandatory true;
731 | description
732 | "16-byte key to encrypt the token";
733 | }
734 |
735 | leaf token-iv {
736 | type yang:hex-string {
737 | length 23;
738 | }
739 | mandatory true;
740 | description
741 | "8-byte IV to encrypt the token, encoded in 23 bytes";
742 | }
743 | }
744 | }
745 | }
746 | ~~~
747 |
748 | ## Tree Diagram
749 |
750 | This summary of the YANG models uses the notation in {{?RFC8340}}.
751 |
752 | ~~~
753 | module: retry-offload-config
754 | +--rw retry-offload-config
755 | +--rw supported-versions* uint32
756 | +--rw unsupported-version-default? enumeration
757 | +--rw version-exceptions* uint32
758 | +--rw token-keys* [key-sequence-number]
759 | +--rw key-sequence-number uint8
760 | +--rw token-key quic-lb-key
761 | +--rw token-iv yang:hex-string
762 | ~~~
763 |
764 | ## Shared State Retry Token Test Vectors
765 |
766 | In this case, the shared-state retry token is issued by Retry Offload, so the
767 | opaque data of shared-state retry token body would be null
768 | ({{shared-state-retry}}).
769 |
770 | ~~~
771 | Configuration:
772 | key_seq 0x00
773 | encrypt_key 0x30313233343536373839303132333435
774 | AEAD_IV 0x313233343536373839303132
775 |
776 | Shared-State Retry Offload Token Body:
777 | ODCIL 0x12
778 | RSCIL 0x10
779 | port 0x1a0a
780 | original_destination_connection_id 0x0c3817b544ca1c94313bba41757547eec937
781 | retry_source_connection_id 0x0301e770d24b3b13070dd5c2a9264307
782 | timestamp 0x0000000060c7bf4d
783 |
784 | Shared-State Retry Offload Token:
785 | unique_token_number 0x59ef316b70575e793e1a8782
786 | key_sequence 0x00
787 | encrypted_shared_state_retry_offload_token_body
788 | 0x7d38b274aa4427c7a1557c3fa666945931defc65da387a83855196a7cb73caac1e28e5346fd76868de94f8b62294
789 | AEAD_ICV 0xf91174fdd711543a32d5e959867f9c22
790 |
791 | AEAD related parameters:
792 | client_ip_addr 127.0.0.1
793 | client_port 6666
794 | AEAD_nonce 0x68dd025f45616941072ab6b0
795 | AEAD_associated_data 0x7f00000100000000000000000000000059ef316b70575e793e1a878200
796 | ~~~
797 |
798 | # Transition Mode Scenarios {#mid-handshake}
799 |
800 | The logic motivating transition mode behavior involves detailed reasoning about
801 | endpoint behavior during the handshake. This non-normative appendix walks
802 | through the scenarios.
803 |
804 | Dropping Initial packets in the client's second flight can cause performance
805 | problems or deadlocks. In the case where the client and server first flight end
806 | with both sides having handshake keys, there will generally be no impact on
807 | performance. However, if an Initial ACK is critical to progress, as it can be in
808 | the case of multiple-packet TLS messages, Hello Retry Requests, and similar
809 | cases, dropping subsequent Initial ACKs results in deadlock.
810 |
811 | In transition mode, the Retry Offload forwards Initials with no token while also
812 | generating a Retry. This allows handshakes to progress without further incident.
813 |
814 | ## Handshakes in Progress
815 |
816 | If the client hello was admitted in inactive mode, then the client has already
817 | received a packet from the server. Although subsequent client Initial packets
818 | will trigger a Retry, the client will ignore these packets. Those Initials will
819 | also be processed by the server to continue the handshake.
820 |
821 | ## New Connections
822 |
823 | After sending a Client Hello in Initial Packet A, a client will rapidly receive
824 | a Retry Packet from the Offload and attempt to reconnect accordingly with
825 | Initial Packet B.
826 |
827 | The client will discard any server response to Initial A. If a Retry, it is a
828 | second Retry on the connection. If an Initial, its is encrypted with keys
829 | derived from Initial A, which have already been discarded, and will be a
830 | decryption failure.
831 |
832 | Initial B's destination connection ID will be new, so the server will process
833 | it as a new connection and proceed normally.
834 |
835 | Unfortunately, the server connection state initiated by Initial A will remain.
836 | For this reason, this document suggests that servers silently terminate the
837 | older connection. Requiring the address to be validated avoids cases where an
838 | attacker simply replays a client Initial with a new Destination Connection ID
839 | to terminate a valid connection.
840 |
841 | Note that there are corner cases involving further packet loss that result in
842 | connection timeout. For instance, if the Retry Offload's response to Initial A
843 | is lost, then the connection will proceed based on Initial A. If the Retry
844 | Offload then switches from transition mode to active mode before the client's
845 | second flight arrives, the Retry Offload will drop the Initial packet in that
846 | flight, and the connection might deadlock.
847 |
848 | # Acknowledgments
849 |
850 | Christian Huitema, Ling Tao Nju, and William Zeng Ke all provided useful input
851 | to this document.
852 |
853 | # Change Log
854 |
855 | > **RFC Editor's Note:** Please remove this section prior to
856 | > publication of a final version of this document.
857 |
858 | ## since draft-duke-quic-retry-offload-00
859 | - Converted to adopted IETF draft
860 | - Cleaner transition from inactive to active mode
861 |
862 | ## since draft-ietf-quic-load-balancers-12
863 | - Separated from the QUIC-LB draft
864 | - Renamed "Retry Service" to "Retry Offload"
865 |
866 | ## since draft-ietf-quic-load-balancers-11
867 |
868 | - Fixed mistakes in test vectors
869 |
870 | ## since draft-ietf-quic-load-balancers-10
871 |
872 | - Refactored algorithm descriptions; made the 4-pass algorithm easier to
873 | implement
874 | - Revised test vectors
875 | - Split YANG model into a server and middlebox version
876 |
877 | ## since draft-ietf-quic-load-balancers-09
878 | - Renamed "Stream Cipher" and "Block Cipher" to "Encrypted Short" and
879 | "Encrypted Long"
880 | - Added section on per-connection state
881 | - Changed "Encrypted Short" to a 4-pass algorithm.
882 | - Recommended a random initial nonce when incrementing.
883 | - Clarified what SNI LBs should do with unknown QUIC versions.
884 |
885 | ## since draft-ietf-quic-load-balancers-08
886 | - Eliminate Dynamic SID allocation
887 | - Eliminated server use bytes
888 |
889 | ## since draft-ietf-quic-load-balancers-07
890 | - Shortened SSCID nonce minimum length to 4 bytes
891 | - Removed RSCID from Retry token body
892 | - Simplified CID formats
893 | - Shrunk size of SID table
894 |
895 | ## since draft-ietf-quic-load-balancers-06
896 | - Added interoperability with DTLS
897 | - Changed "non-compliant" to "unroutable"
898 | - Changed "arbitrary" algorithm to "fallback"
899 | - Revised security considerations for mistrustful tenants
900 | - Added Retry Offload considerations for non-Initial packets
901 |
902 | ## since draft-ietf-quic-load-balancers-05
903 | - Added low-config CID for further discussion
904 | - Complete revision of shared-state Retry Token
905 | - Added YANG model
906 | - Updated configuration limits to ensure CID entropy
907 | - Switched to notation from quic-transport
908 |
909 | ## since draft-ietf-quic-load-balancers-04
910 | - Rearranged the shared-state retry token to simplify token processing
911 | - More compact timestamp in shared-state retry token
912 | - Revised server requirements for shared-state retries
913 | - Eliminated zero padding from the test vectors
914 | - Added server use bytes to the test vectors
915 | - Additional compliant DCID criteria
916 |
917 | ## since-draft-ietf-quic-load-balancers-03
918 | - Improved Config Rotation text
919 | - Added stream cipher test vectors
920 | - Deleted the Obfuscated CID algorithm
921 |
922 | ## since-draft-ietf-quic-load-balancers-02
923 | - Replaced stream cipher algorithm with three-pass version
924 | - Updated Retry format to encode info for required TPs
925 | - Added discussion of version invariance
926 | - Cleaned up text about config rotation
927 | - Added Reset Oracle and limited configuration considerations
928 | - Allow dropped long-header packets for known QUIC versions
929 |
930 | ## since-draft-ietf-quic-load-balancers-01
931 | - Test vectors for load balancer decoding
932 | - Deleted remnants of in-band protocol
933 | - Light edit of Retry Offloads section
934 | - Discussed load balancer chains
935 |
936 | ## since-draft-ietf-quic-load-balancers-00
937 | - Removed in-band protocol from the document
938 |
939 | ## Since draft-duke-quic-load-balancers-06
940 | - Switch to IETF WG draft.
941 |
942 | ## Since draft-duke-quic-load-balancers-05
943 | - Editorial changes
944 | - Made load balancer behavior independent of QUIC version
945 | - Got rid of token in stream cipher encoding, because server might not have it
946 | - Defined "non-compliant DCID" and specified rules for handling them.
947 | - Added psuedocode for config schema
948 |
949 | ## Since draft-duke-quic-load-balancers-04
950 | - Added standard for Retry Offloads
951 |
952 | ## Since draft-duke-quic-load-balancers-03
953 | - Renamed Plaintext CID algorithm as Obfuscated CID
954 | - Added new Plaintext CID algorithm
955 | - Updated to allow 20B CIDs
956 | - Added self-encoding of CID length
957 |
958 | ## Since draft-duke-quic-load-balancers-02
959 | - Added Config Rotation
960 | - Added failover mode
961 | - Tweaks to existing CID algorithms
962 | - Added Block Cipher CID algorithm
963 | - Reformatted QUIC-LB packets
964 |
965 | ## Since draft-duke-quic-load-balancers-01
966 | - Complete rewrite
967 | - Supports multiple security levels
968 | - Lightweight messages
969 |
970 | ## Since draft-duke-quic-load-balancers-00
971 | - Converted to markdown
972 | - Added variable length connection IDs
973 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "aasvg": "^0.3.3"
4 | }
5 | }
6 |
--------------------------------------------------------------------------------
/quic_lb_protocol.md:
--------------------------------------------------------------------------------
1 | NOTE: This file describes the deleted in-band QUIC-LB protocol, should we ever
2 | revise a form of it.
3 |
4 | # Protocol Description {#protocol-description}
5 |
6 | There are multiple means of configuration that correspond to differing
7 | deployment models and increasing levels of concern about the security of the
8 | load balancer-server path.
9 |
10 | ## Out of band sharing
11 |
12 | When there are concerns about the integrity of the path between load balancer
13 | and server, operators MAY share routing information using an out-of-band
14 | technique, which is out of the scope of this specification.
15 |
16 | To simplify configuration, the global parameters can be shared out-of-band,
17 | while the load balancer sends the unique server IDs via the truncated message
18 | formats presented below.
19 |
20 | ## QUIC-LB Message Exchange
21 |
22 | QUIC-LB load balancers and servers exchange messages via the QUIC-LBv1 protocol,
23 | which uses the QUIC invariants with version number 0xF1000000. The QUIC-LB
24 | load balancers send the encoding parameters to servers and periodically
25 | retransmit until that server responds with an acknowledgement. Specifics of this
26 | retransmission are implementation-dependent.
27 |
28 | ## QUIC-LB Packet {#quic-lb-packet}
29 |
30 | A QUIC-LB packet uses a long header. It carries configuration information from
31 | the load balancer and acknowledgements from the servers. They are sent when a
32 | load balancer boots up, detects a new server in the pool or needs to update the
33 | server configuration.
34 |
35 | ~~~~~
36 | 0 1 2 3
37 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
38 | +-+-+-+-+-+-+-+-+
39 | |1|C R| Reserved|
40 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
41 | | Version (32) |
42 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
43 | | 0x00 | 0x00 |
44 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
45 | | |
46 | + Authentication Token (64) +
47 | | |
48 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
49 | | Message Type |
50 | +-+-+-+-+-+-+-+-+
51 | ~~~~~
52 | {: #quic-lb-packet-format title="QUIC-LB Packet Format"}
53 |
54 | The Version field allows QUIC-LB to use the Version Negotiation mechanism. All
55 | messages in this specification are specific to QUIC-LBv1. It should be set to
56 | 0xF1000000.
57 |
58 | Load balancers MUST cease sending QUIC-LB packets of this version to a server
59 | when that server sends a Version Negotiation packet that does not advertise the
60 | version.
61 |
62 | The length of the DCIL and SCIL fields are 0x00.
63 |
64 | CR
65 |
66 | : The 2-bit CR field indicates the Config Rotation described in
67 | {{config-rotation}}.
68 |
69 | Authentication Token
70 |
71 | : The Authentication Token is an 8-byte field that both entities obtain at
72 | configuration time. It is used to verify that the sender is not an inside
73 | off-path attacker. Servers and load balancers SHOULD silently discard QUIC-LB
74 | packets with an incorrect token.
75 |
76 | Message Type
77 |
78 | : The Message Type indicates the type of message payload that follows the
79 | QUIC-LB header.
80 |
81 | ## Message Types and Formats
82 |
83 | As described in {{quic-lb-packet}}, QUIC-LB packets contain a single message.
84 | This section describes the format and semantics of the QUIC-LB message types.
85 |
86 | ### ACK_LB Message {#message-ack-lb}
87 |
88 | A server uses the ACK_LB message (type=0x00) to acknowledge a QUIC-LB packet
89 | received from the load balancer. The ACK-LB message has no additional payload
90 | beyond the QUIC-LB packet header.
91 |
92 | Load balancers SHOULD continue to retransmit a QUIC-LB packet until a valid
93 | ACK_LB message, FAIL message or Version Negotiation Packet is received from the
94 | server.
95 |
96 | ### FAIL Message {#message-fail}
97 |
98 | A server uses the FAIL message (type=0x01) to indicate the configuration
99 | received from the load balancer is unsupported.
100 |
101 | ~~~~~
102 | 0 1 2 3
103 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
104 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105 | | Supp. Type | Supp. Type | ...
106 | +-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
107 | ~~~~~
108 |
109 | Servers MUST send a FAIL message upon receipt of a message type which they do
110 | not support, or if they do not possess all of the implied out-of-band
111 | configuration to support a particular message type.
112 |
113 | The payload of the FAIL message consists of a list of all the message types
114 | supported by the server.
115 |
116 | Upon receipt of a FAIL message, Load Balancers MUST either send a QUIC-LB
117 | message the server supports or remove the server from the server pool.
118 |
119 | ### ROUTING_INFO Message {#message-routing-info}
120 |
121 | A load balancer uses the ROUTING_INFO message (type=0x02) to exchange all the
122 | parameters for the Obfuscated CID algorithm.
123 |
124 | ~~~~~
125 | 0 1 2 3
126 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
127 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
128 | | |
129 | + +
130 | | |
131 | + Routing Bit Mask (152) +
132 | | |
133 | + +
134 | | |
135 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
136 | | | Modulus (16) |
137 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
138 | | Divisor (16) |
139 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
140 | ~~~~~
141 |
142 | Routing Bit Mask
143 |
144 | : The Routing Bit Mask encodes a '1' at every bit position in the server
145 | connection ID that will encode routing information.
146 |
147 | These bits, along with the Modulus and Divisor, are chosen by the load balancer
148 | as described in {{obfuscated-cid-algorithm}}.
149 |
150 | ### STREAM_CID Message {#message-stream-cid}
151 |
152 | A load balancer uses the STREAM_CID message (type=0x03) to exchange all the
153 | parameters for using Stream Cipher CIDs.
154 |
155 | ~~~~~
156 | 0 1 2 3
157 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
158 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
159 | | Nonce Len (8) | SIDL (8) |
160 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
161 | | Server ID (variable) |
162 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
163 | | |
164 | + Key (128) +
165 | | |
166 | + +
167 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
168 | ~~~~~
169 | {: #Stream-cid-format title="Stream CID Payload"}
170 |
171 | Nonce Len
172 |
173 | : The Nonce Len field is a one-octet unsigned integer that describes the
174 | nonce length necessary to use this routing algorithm, in octets.
175 |
176 | SIDL
177 |
178 | : The SIDL field is a one-octet unsigned integer that describes the server ID
179 | length necessary to use this routing algorithm, in octets.
180 |
181 | Server ID
182 |
183 | : The Server ID is the unique value assigned to the receiving server. Its
184 | length is determined by the SIDL field.
185 |
186 | Key
187 |
188 | : The Key is an 16-octet field that contains the key that the load balancer
189 | will use to decrypt server IDs on QUIC packets. See
190 | {{security-considerations}} to understand why sending keys in plaintext may
191 | be a safe strategy.
192 |
193 | ### BLOCK_CID Message {#message-block-cid}
194 |
195 | A load balancer uses the BLOCK_CID message (type=0x04) to exchange all the
196 | parameters for using Stream Cipher CIDs.
197 |
198 | ~~~~~
199 | 0 1 2 3
200 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
201 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
202 | | ZP Len (8) | SIDL (8) |
203 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
204 | | Server ID (variable) |
205 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
206 | | |
207 | + Key (128) +
208 | | |
209 | + +
210 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
211 | ~~~~~
212 | {: #block-cid-format title="Block CID Payload"}
213 |
214 | ZP Len
215 |
216 | : The ZP Len field is a one-octet unsigned integer that describes the
217 | zero-padding length necessary to use this routing algorithm, in octets.
218 |
219 | SIDL
220 |
221 | : The SIDL field is a one-octet unsigned integer that describes the server ID
222 | length necessary to use this routing algorithm, in octets.
223 |
224 | Server ID
225 |
226 | : The Server ID is the unique value assigned to the receiving server. Its
227 | length is determined by the SIDL field.
228 |
229 | Key
230 |
231 | : The Key is an 16-octet field that contains the key that the load balancer
232 | will use to decrypt server IDs on QUIC packets. See
233 | {{security-considerations}} to understand why sending keys in plaintext may
234 | be a safe strategy.
235 |
236 | ### SERVER_ID Message {#message-server-id}
237 |
238 | A load balancer uses the SERVER_ID message (type=0x05) to exchange
239 | explicit server IDs.
240 |
241 | ~~~~~
242 | 0 1 2 3
243 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
244 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
245 | | SIDL (8) | Server ID (variable) |
246 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
247 | ~~~~~
248 |
249 | Load balancers send the SERVER_ID message when all global values for Stream or
250 | Block CIDs are sent out-of-band, so that only the server-unique values must be
251 | sent in-band. It also provides all necessary paramters for Plaintext CIDs. The
252 | fields are identical to their counterparts in the {{message-stream-cid}}
253 | payload.
254 |
255 | ### MODULUS Message {#message-modulus}
256 |
257 | A load balancer uses the MODULUS message (type=0x06) to exchange just the
258 | modulus used in the Obfuscated CID algorithm.
259 |
260 | ~~~~~
261 | 0 1 2 3
262 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
263 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
264 | | Modulus (16) |
265 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
266 | ~~~~~
267 |
268 | Load balancers send the MODULUS when all global values for Obfuscated CIDs
269 | are sent out-of-band, so that only the server-unique values must be sent
270 | in-band. The Modulus field is identical to its counterpart in the
271 | ROUTING_INFO message.
272 |
273 | ### PLAINTEXT Message {#message-plaintext}
274 |
275 | A load balancer uses the PLAINTEXT message (type=0x07) to exchange all
276 | parameters needed for the Plaintext CID algorithm.
277 |
278 | ~~~~~
279 | 0 1 2 3
280 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
281 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
282 | | SIDL (8) |
283 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
284 | | |
285 | + Server ID (variable) +
286 | | |
287 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
288 | ~~~~~
289 |
290 | The SIDL field indicates the length of the server ID field. The
291 | Server ID field indicates the encoding that represents the
292 | destination server.
293 |
294 | ### RETRY_SERVICE_STATELESS message
295 |
296 | A no-shared-state retry service uses this message (type=0x08) to notify the
297 | server of the existence of this service. This message has no fields.
298 |
299 | ### RETRY_SERVICE_STATEFUL message
300 |
301 | A shared-state retry service uses this message (type=0x09) to tell the server
302 | about its existence, and share the key needed to decrypt server-generated retry
303 | tokens.
304 |
305 | ~~~~~
306 | 0 1 2 3
307 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
308 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
309 | | |
310 | + +
311 | | |
312 | + Key (128) +
313 | | |
314 | + +
315 | | |
316 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
317 | ~~~~~
318 |
--------------------------------------------------------------------------------