├── .circleci └── config.yml ├── .github └── in-solidarity.yml ├── .gitignore ├── .note.xml ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── draft-ietf-quic-load-balancers.md ├── draft-ietf-quic-retry-offload.md ├── package.json └── quic_lb_protocol.md /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: martinthomson/i-d-template:latest 6 | resource_class: small 7 | working_directory: ~/draft 8 | 9 | steps: 10 | - run: 11 | name: "Print Configuration" 12 | command: | 13 | xml2rfc --version 14 | gem list -q kramdown-rfc 15 | echo -n 'mmark '; mmark --version 16 | 17 | - restore_cache: 18 | name: "Restoring cache - Git" 19 | keys: 20 | - v2-cache-git-{{ .Branch }}-{{ .Revision }} 21 | - v2-cache-git-{{ .Branch }} 22 | - v2-cache-git- 23 | 24 | - restore_cache: 25 | name: "Restoring cache - References" 26 | keys: 27 | - v1-cache-references-{{ epoch }} 28 | - v1-cache-references- 29 | 30 | # Workaround for https://discuss.circleci.com/t/22437 31 | - run: 32 | name: Tag Checkout 33 | command: | 34 | if [ -n "$CIRCLE_TAG" ] && [ -d .git ]; then 35 | remote=$(echo "$CIRCLE_REPOSITORY_URL" | \ 36 | sed -e 's,/^git.github.com:,https://github.com/,') 37 | git fetch -f "$remote" "refs/tags/$CIRCLE_TAG:refs/tags/$CIRCLE_TAG" || \ 38 | (echo 'Removing .git cache for tag build'; rm -rf .git) 39 | fi 40 | 41 | - checkout 42 | 43 | # Build txt and html versions of drafts 44 | - run: 45 | name: "Build Drafts" 46 | command: make 47 | 48 | # Update editor's copy on gh-pages 49 | - run: 50 | name: "Update GitHub Pages" 51 | command: | 52 | if [ "${CIRCLE_TAG#draft-}" == "$CIRCLE_TAG" ]; then 53 | make gh-pages 54 | fi 55 | 56 | # For tagged builds, upload to the datatracker. 57 | - deploy: 58 | name: "Upload to Datatracker" 59 | command: | 60 | if [ "${CIRCLE_TAG#draft-}" != "$CIRCLE_TAG" ]; then 61 | make upload 62 | fi 63 | 64 | # Archive GitHub Issues 65 | - run: 66 | name: "Archive GitHub Issues" 67 | command: "make archive || make archive DISABLE_ARCHIVE_FETCH=true && make gh-archive" 68 | 69 | # Create and store artifacts 70 | - run: 71 | name: "Create Artifacts" 72 | command: "make artifacts CI_ARTIFACTS=/tmp/artifacts" 73 | 74 | - store_artifacts: 75 | path: /tmp/artifacts 76 | 77 | - run: 78 | name: "Prepare for Caching" 79 | command: "git reflog expire --expire=now --all && git gc --prune=now" 80 | 81 | - save_cache: 82 | name: "Saving Cache - Git" 83 | key: v2-cache-git-{{ .Branch }}-{{ .Revision }} 84 | paths: 85 | - ~/draft/.git 86 | 87 | - save_cache: 88 | name: "Saving Cache - Drafts" 89 | key: v1-cache-references-{{ epoch }} 90 | paths: 91 | - ~/.cache/xml2rfc 92 | 93 | 94 | workflows: 95 | version: 2 96 | build: 97 | jobs: 98 | - build: 99 | filters: 100 | tags: 101 | only: /.*?/ 102 | -------------------------------------------------------------------------------- /.github/in-solidarity.yml: -------------------------------------------------------------------------------- 1 | _extends: ietf/terminology 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *~ 3 | /*-[0-9][0-9].xml 4 | archive.json 5 | draft-ietf-quic-load-balancers.xml 6 | *.html 7 | issues.json 8 | *.js 9 | lib 10 | old-stream-ciphers.md 11 | *.pdf 12 | pulls.json 13 | *.redxml 14 | .refcache 15 | report.xml 16 | *.swp 17 | .tags 18 | .targets.mk 19 | *.txt 20 | *.upload 21 | node_modules/ 22 | package-lock.json 23 | venv/ 24 | lib 25 | draft-ietf-quic-load-balancers.xml 26 | -------------------------------------------------------------------------------- /.note.xml: -------------------------------------------------------------------------------- 1 | 2 | Discussion of this document takes place on the 3 | QUIC Working Group mailing list (quic@ietf.org), 4 | which is archived at https://mailarchive.ietf.org/arch/browse/quic/. 5 | Source for this draft and an issue tracker can be found at 6 | https://github.com/quicwg/load-balancers. 7 | 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: xenial 3 | 4 | services: 5 | - docker 6 | 7 | env: 8 | DRAFT_DIR: /home/idci/draft 9 | 10 | before_install: 11 | - docker --version 12 | - docker pull martinthomson/i-d-template 13 | 14 | script: 15 | - docker run -d -v "$PWD:/tmp/draft" --tmpfs "$DRAFT_DIR:rw,exec" --name idci 16 | martinthomson/i-d-template sleep 300 17 | - docker exec idci cp -rn /tmp/draft /home/idci 18 | - docker exec -w "$DRAFT_DIR" -e CI=true -e TRAVIS 19 | -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST 20 | idci make CLONE_ARGS='--reference /home/idci/git-reference' 21 | - docker exec idci ls -l /home/idci/draft/lib 22 | - if [ "${TRAVIS_TAG#draft-}" == "${TRAVIS_TAG}" ]; then 23 | docker exec -w "$DRAFT_DIR" -e CI=true -e GH_TOKEN -e TRAVIS 24 | -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST 25 | idci make ghpages; 26 | fi 27 | 28 | deploy: 29 | provider: script 30 | script: 31 | - docker exec -w "$DRAFT_DIR" -e CI=true -e GH_TOKEN -e TRAVIS 32 | -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST 33 | idci make upload 34 | skip_cleanup: true 35 | on: 36 | tags: true 37 | 38 | after_script: 39 | - docker container rm -f idci 40 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This repository relates to activities in the Internet Engineering Task Force 4 | ([IETF](https://www.ietf.org/)). All material in this repository is considered 5 | Contributions to the IETF Standards Process, as defined in the intellectual 6 | property policies of IETF currently designated as 7 | [BCP 78](https://www.rfc-editor.org/info/bcp78), 8 | [BCP 79](https://www.rfc-editor.org/info/bcp79) and the 9 | [IETF Trust Legal Provisions (TLP) Relating to IETF Documents](http://trustee.ietf.org/trust-legal-provisions.html). 10 | 11 | Any edit, commit, pull request, issue, comment or other change made to this 12 | repository constitutes Contributions to the IETF Standards Process 13 | (https://www.ietf.org/). 14 | 15 | You agree to comply with all applicable IETF policies and procedures, including, 16 | BCP 78, 79, the TLP, and the TLP rules regarding code components (e.g. being 17 | subject to a Simplified BSD License) in Contributions. 18 | 19 | 20 | ## Other Resources 21 | 22 | Discussion of this work occurs on the 23 | [quic working group mailing list](https://mailarchive.ietf.org/arch/browse/quic/) 24 | ([subscribe](https://www.ietf.org/mailman/listinfo/quic)). In addition to 25 | contributions in GitHub, you are encouraged to participate in discussions there. 26 | 27 | **Note**: Some working groups adopt a policy whereby substantive discussion of 28 | technical issues needs to occur on the mailing list. 29 | 30 | You might also like to familiarize yourself with other 31 | [working group documents](https://datatracker.ietf.org/wg/quic/documents/). 32 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | See the 4 | [guidelines for contributions](https://github.com/quicwg/load-balancers/blob/master/CONTRIBUTING.md). 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LIBDIR := lib 2 | include $(LIBDIR)/main.mk 3 | 4 | $(LIBDIR)/main.mk: 5 | ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null)) 6 | git submodule sync 7 | git submodule update $(CLONE_ARGS) --init 8 | else 9 | git clone -q --depth 10 $(CLONE_ARGS) \ 10 | -b main https://github.com/martinthomson/i-d-template $(LIBDIR) 11 | endif 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # QUIC-LB: Generating Routable QUIC Connection IDs 2 | 3 | This is the working area for the IETF [QUIC Working Group](https://datatracker.ietf.org/wg/quic/documents/) Internet-Draft, "QUIC-LB: Generating Routable QUIC Connection IDs". 4 | 5 | * [Editor's Copy](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-load-balancers.html) 6 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-quic-load-balancers) 7 | * [Compare Editor's Copy to Working Group Draft](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-load-balancers.diff) 8 | 9 | It also the home for "QUIC Retry Offload". 10 | 11 | * [Editor's Copy](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-retry-offload.html) 12 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-quic-retry-offload) 13 | * [Compare Editor's Copy to Working Group Draft](https://quicwg.github.io/load-balancers/#go.draft-duke-quic-retry-offload.diff) 14 | 15 | ## Building the Draft 16 | 17 | Formatted text and HTML versions of the draft can be built using `make`. 18 | 19 | ```sh 20 | $ make 21 | ``` 22 | 23 | This requires that you have the necessary software installed. See 24 | [the instructions](https://github.com/martinthomson/i-d-template/blob/master/doc/SETUP.md). 25 | 26 | 27 | ## Contributing 28 | 29 | See the 30 | [guidelines for contributions](https://github.com/quicwg/load-balancers/blob/master/CONTRIBUTING.md). 31 | -------------------------------------------------------------------------------- /draft-ietf-quic-load-balancers.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "QUIC-LB: Generating Routable QUIC Connection IDs" 3 | abbrev: QUIC-LB 4 | docname: draft-ietf-quic-load-balancers-latest 5 | date: {DATE} 6 | category: std 7 | ipr: trust200902 8 | area: Transport 9 | workgroup: QUIC 10 | 11 | stand_alone: yes 12 | pi: [toc, sortrefs, symrefs, docmapping] 13 | 14 | author: 15 | - 16 | ins: M. Duke 17 | name: Martin Duke 18 | org: Google 19 | email: martin.h.duke@gmail.com 20 | 21 | - 22 | ins: N. Banks 23 | name: Nick Banks 24 | org: Microsoft 25 | email: nibanks@microsoft.com 26 | 27 | - 28 | ins: C. Huitema 29 | name: Christian Huitema 30 | org: Private Octopus Inc. 31 | email: huitema@huitema.net 32 | 33 | normative: 34 | NIST-AES-ECB: 35 | title: "Recommendation for Block Cipher Modes of Operation: Methods and Techniques" 36 | author: 37 | - ins: M. Dworkin 38 | date: 2021 39 | refcontent: 40 | - "NIST Special Publication 800-38A" 41 | target: "https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf" 42 | 43 | informative: 44 | Patarin2008: 45 | target: https://eprint.iacr.org/2008/036.pdf 46 | title: Generic Attacks on Feistel Schemes - Extended Version 47 | author: 48 | ins: J. Patarin 49 | name: Jacques Patarin 50 | org: PRiSM, University of Versailles 51 | date: 2008 52 | 53 | --- abstract 54 | 55 | QUIC address migration allows clients to change their IP address while 56 | maintaining connection state. To reduce the ability of an observer to link two 57 | IP addresses, clients and servers use new connection IDs when they communicate 58 | via different client addresses. This poses a problem for traditional "layer-4" 59 | load balancers that route packets via the IP address and port 4-tuple. This 60 | specification provides a standardized means of securely encoding routing 61 | information in the server's connection IDs so that a properly configured load 62 | balancer can route packets with migrated addresses correctly. As it proposes a 63 | structured connection ID format, it also provides a means of connection IDs 64 | self-encoding their length to aid some hardware offloads. 65 | 66 | --- middle 67 | 68 | # Introduction 69 | 70 | QUIC packets {{!RFC9000}} usually contain a connection ID to allow endpoints to 71 | associate packets with different address/port 4-tuples to the same connection 72 | context. This feature makes connections robust in the event of NAT rebinding. 73 | QUIC endpoints usually designate the connection ID which peers use to address 74 | packets. Server-generated connection IDs create a potential need for out-of-band 75 | communication to support QUIC. 76 | 77 | QUIC allows servers (or load balancers) to encode useful routing information for 78 | load balancers in connection IDs. It also encourages servers, in packets 79 | protected by cryptography, to provide additional connection IDs to the client. 80 | This allows clients that know they are going to change IP address or port to use 81 | a separate connection ID on the new path, thus reducing linkability as clients 82 | move through the world. 83 | 84 | There is a tension between the requirements to provide routing information and 85 | mitigate linkability. Ultimately, because new connection IDs are in protected 86 | packets, they must be generated at the server if the load balancer does not have 87 | access to the connection keys. However, it is the load balancer that has the 88 | context necessary to generate a connection ID that encodes useful routing 89 | information. In the absence of any shared state between load balancer and 90 | server, the load balancer must maintain a relatively expensive table of 91 | server-generated connection IDs, and will not route packets correctly if they 92 | use a connection ID that was originally communicated in a protected 93 | NEW_CONNECTION_ID frame. 94 | 95 | This specification provides common algorithms for encoding the server mapping in 96 | a connection ID given some shared parameters. The mapping is generally only 97 | discoverable by observers that have the parameters, preserving unlinkability as 98 | much as possible. 99 | 100 | As this document proposes a structured QUIC Connection ID, it also proposes a 101 | system for self-encoding connection ID length in all packets, so that crypto 102 | offload can efficiently obtain key information. 103 | 104 | While this document describes a small set of configuration parameters to make 105 | the server mapping intelligible, the means of distributing these parameters 106 | between load balancers, servers, and other trusted intermediaries is out of its 107 | scope. There are numerous well-known infrastructures for distribution of 108 | configuration. 109 | 110 | ## Terminology 111 | 112 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", 113 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be 114 | interpreted as described in RFC 2119 {{?RFC2119}}. 115 | 116 | In this document, these words will appear with that interpretation only when in 117 | ALL CAPS. Lower case uses of these words are not to be interpreted as carrying 118 | significance described in RFC 2119. 119 | 120 | In this document, "client" and "server" refer to the endpoints of a QUIC 121 | connection unless otherwise indicated. A "load balancer" is an intermediary for 122 | that connection that does not possess QUIC connection keys, but it may rewrite 123 | IP addresses or conduct other IP or UDP processing. A "configuration agent" is 124 | the entity that determines the QUIC-LB configuration parameters for the network 125 | and leverages some system to distribute that configuration. 126 | 127 | Note that stateful load balancers that act as proxies, by terminating a QUIC 128 | connection with the client and then retrieving data from the server using QUIC 129 | or another protocol, are treated as a server with respect to this specification. 130 | 131 | For brevity, "Connection ID" will often be abbreviated as "CID". 132 | 133 | ## Notation 134 | 135 | All wire formats will be depicted using the notation defined in Section 1.3 of 136 | {{RFC9000}}. 137 | 138 | # Overview 139 | 140 | In QUIC-LB, load balancers do not generate individual connection IDs for 141 | servers. Instead, they communicate the parameters of an algorithm to generate 142 | routable connection IDs. 143 | 144 | The algorithms differ in the complexity of configuration at both load balancer 145 | and server. Increasing complexity improves obfuscation of the server mapping. 146 | 147 | This specificationn describes three participants: the configuration agent, the 148 | load balancer, and the server. For any given QUIC-LB configuration that enables 149 | connection-ID-aware load balancing, there must be a choice of (1) routing 150 | algorithm, (2) server ID allocation strategy, and (3) algorithm parameters. 151 | 152 | Fundamentally, servers generate connection IDs that encode their server ID. 153 | Load balancers decode the server ID from the CID in incoming packets to route 154 | to the correct server. 155 | 156 | {{!RFC8999}} specifies that endpoints generate their own connection IDs, 157 | implying that all QUIC versions will have a mechanism to communicate their 158 | connection IDs to the peer. In QUIC version 1 and 2, the server does so using 159 | the Source Connection ID field of its long header packets for the first 160 | connection ID, and NEW_CONNECTION_ID frames for subsequent CIDs. 161 | 162 | There are situations where a server pool might be operating two or more routing 163 | algorithms or parameter sets simultaneously. The load balancer uses the first 164 | three bits of the connection ID to multiplex incoming Destination Connection IDs 165 | (DCIDs) over these schemes (see {{config-rotation}}). 166 | 167 | # First CID octet {#first-octet} 168 | 169 | The Connection ID construction schemes defined in this document reserve the 170 | first octet of a CID for two special purposes: one mandatory (config rotation) 171 | and one optional (length self-description). 172 | 173 | Subsequent sections of this document refer to the contents of this octet as the 174 | "first octet." 175 | 176 | ## Config Rotation {#config-rotation} 177 | 178 | The first three bits of any connection ID MUST encode an identifier for the 179 | configuration that the connection ID uses. This enables incremental deployment 180 | of new QUIC-LB settings (e.g., keys). A configuration MUST NOT use the 181 | reserved identifier 0b111 (see {{config-failover}} below). 182 | 183 | When new configuration is distributed to servers, there will be a transition 184 | period when connection IDs reflecting old and new configuration coexist in the 185 | network. The rotation bits allow load balancers to apply the correct routing 186 | algorithm and parameters to incoming packets. 187 | 188 | Configuration Agents SHOULD deliver new configurations to load balancers before 189 | doing so to servers, so that load balancers are ready to process CIDs using the 190 | new parameters when they arrive. 191 | 192 | A Configuration Agent SHOULD NOT use a codepoint to represent a new 193 | configuration until it takes precautions to make sure that all connections using 194 | CIDs with an old configuration at that codepoint have closed or transitioned. 195 | 196 | Servers MUST NOT generate new connection IDs using an old configuration after 197 | receiving a new one from the configuration agent. Servers MUST use that QUIC 198 | version's methods to update the client with CIDs (e.g., NEW_CONNECTION_ID 199 | frames) using the new configuration and retire CIDs using the old configuration. 200 | 201 | It also possible to use these bits for more long-lived distinction of different 202 | configurations, but this has privacy implications (see {{multiple-configs}}). 203 | 204 | ## Configuration Failover {#config-failover} 205 | 206 | In some deployments, an infrastructure will not receive traffic unless all 207 | servers have received a configuration, and load balancers have a superset of all 208 | configurations that are active in the server pool, thus guaranteeing that any 209 | CID generated by a server is decodable by any load balancer. Servers and load 210 | balancers deployed under all of these assumptions can ignore the provisions in 211 | this subsection. 212 | 213 | Load balancers treat connection IDs for which they have no corresponding config 214 | ID as unroutable (see {{unroutable}}). If they have no configuration at all, 215 | then all connection IDs are unroutable. 216 | 217 | Servers with no active configuration MUST issue connection IDs with the reserved 218 | value of the three most significant bits set to 0b111 to signify the connection 219 | ID is unroutable. These connection IDs MUST self-encode their length (see 220 | {{length-self-description}}). 221 | 222 | Servers with no active configuration SHOULD provide the client exactly one CID 223 | over the life of the connection. In QUIC versions 1 and 2, therefore, servers 224 | SHOULD NOT send any NEW_CONNECTION_ID frames, instead delivering a single CID 225 | via the Source Connection ID of long headers it sends. 226 | 227 | Servers with no active configuration SHOULD send the "disable_active_migration" 228 | transport parameter, or a similar message in future QUIC versions. 229 | 230 | When using codepoint 0b111, all bytes but the first SHOULD have no larger of a 231 | chance of collision as random bytes. The connection ID SHOULD be of at least 232 | length 8 to provide 7 bytes of entropy after the first octet with a low chance 233 | of collision. 234 | 235 | ## Length Self-Description {#length-self-description} 236 | 237 | Local hardware cryptographic offload devices may accelerate QUIC servers by 238 | receiving keys from the QUIC implementation indexed to the connection ID. 239 | However, on physical devices operating multiple QUIC servers, it might be 240 | impractical to efficiently lookup keys if the connection ID varies in length and 241 | does not self-encode its own length. 242 | 243 | Note that this is a function of particular server devices and is irrelevant to 244 | load balancers. As such, load balancers MAY omit this from their configuration. 245 | However, the remaining 5 bits in the first octet of the Connection ID are 246 | reserved to express the length of the following connection ID, not including 247 | the first octet. 248 | 249 | A server not using this functionality SHOULD choose the five bits so as to have 250 | no observable relationship to previous connection IDs issued for that 251 | connection. 252 | 253 | ## Format 254 | 255 | ~~~ 256 | First Octet { 257 | Config Rotation (3), 258 | CID Len or Random Bits (5), 259 | } 260 | ~~~ 261 | {: #first-octet-format title="First Octet Format"} 262 | 263 | The first octet has the following fields: 264 | 265 | Config Rotation: Indicates the configuration used to interpret the CID. 266 | 267 | CID Len or Random Bits: Length Self-Description (if applicable), or random bits 268 | otherwise. Encodes the length of the Connection ID following the First Octet. 269 | 270 | # Unroutable Connection IDs {#unroutable} 271 | 272 | ## Definition 273 | 274 | QUIC-LB servers with a valid configuration will generate Connection IDs that are 275 | decodable to extract a server ID in accordance with a specified algorithm and 276 | parameters. However, QUIC often uses client-generated Connection IDs prior to 277 | receiving a packet from the server. 278 | 279 | Furthermore, servers without a valid configuration, or a configuration not 280 | present at the load balancer, will also generate connection IDs that are not 281 | decodable, and these CIDs are likely to persist for the duration of the 282 | connection. 283 | 284 | These CIDs might not conform to the expectations of the routing algorithm and 285 | therefore not be routable by the load balancer. Those that are not routable are 286 | "unroutable DCIDs" and receive similar treatment regardless of why they're 287 | unroutable: 288 | 289 | * The config rotation bits ({{config-rotation}}) do not correspond to an active 290 | configuration. Note: a packet with a DCID with config ID codepoint 0b111 (see 291 | {{config-failover}}) is always unroutable. 292 | * If the packet header encodes the DCID length, the DCID is not long enough for 293 | the decoder to process. 294 | * The extracted server mapping does not correspond to an active server. 295 | 296 | If the load balancer has knowledge that all servers in the pool are encoding 297 | CID length in the first octet (see {{length-self-description}}), it MAY 298 | perform additional checks based on that self-encoded length: 299 | 300 | * In a long header, verify that the self-encoded length is consistent with the 301 | CID length field in the header (i.e. the self-encoded length is one less) 302 | * Verify that the self-encoded length is consistent with the QUIC version, if 303 | known. 304 | * Verify that the self-encoded length is large enough for the decoder to process 305 | using the indicated config ID. 306 | 307 | DCIDs that do not meet any of these criteria are routable. 308 | 309 | ## Load Balancer Forwarding {#load-balancer-forwarding} 310 | 311 | Load balancers execute the following steps in order until one results in a 312 | routing decision. The steps refer to state that some load balancers will 313 | maintain, depending on the deployment's underlying assumptions. See 314 | {{fallback-algorithm}} for further discussion of this state. 315 | 316 | 1. If the packet contains a routable CID, route the packet accordingly. 317 | 1. If the packet has a long header and matches an entry in a table of routing 318 | decisions indexed by a concatenation of 4-tuple and Source CID, route the packet 319 | accordingly. 320 | 1. If the packet matches an entry in a table of routing decisions by destination 321 | CID, route the packet accordingly. 322 | 1. If packet matches an entry in a table of routing decisions by 4-tuple, route 323 | the packet accordingly. 324 | 1. Use the fallback algorithm to make a routing decision and, if applicable, 325 | record the results in the tables indexed by 4-tuple and/or CID. In some cases, 326 | described below, the load balancer might buffer the packet to defer a decision. 327 | 328 | ## Fallback Algorithms {#fallback-algorithm} 329 | 330 | There are conditions described above where a load balancer routes a packet using 331 | a "fallback algorithm." A standardized algorithm design is not necessary for 332 | interoperability, so load balancers can implement any algorithm that meets the 333 | relevant requirements below. 334 | 335 | There is a baseline case that has relatively simple requirements of the chosen 336 | fallback algorithm, and an advanced case with more capabilities and more complex 337 | requirements. 338 | 339 | ### Baseline Fallback Algorithm 340 | 341 | All load balancers MUST implement a baseline fallback algorithm that takes only 342 | the 4-tuple as an input and outputs a routing decision. 343 | 344 | If it is impossible for the server to generate CIDs that the load balancer 345 | cannot decode (see {{config-failover}}), there are no further requirements in 346 | this subsection. 347 | 348 | Otherwise, the load balancer SHOULD maintain a table of 4-tuples that carried 349 | unroutable DCIDs and the resulting routing decision. Provided the table does 350 | not overflow, and the load balancer does not lose state, this allows connections 351 | to survive when the server pool changes, which would sometimes change the output 352 | of the fallback algorithm. 353 | 354 | The load balancer MAY maintain a table of observed unroutable DCIDs and the 355 | resulting routing decision. Provided the table does not overflow, these 356 | connections will be robust to NAT rebinding. 357 | 358 | Load balancers SHOULD maintain per-flow timers to periodically purge state in 359 | the tables described above. 360 | 361 | ### Advanced Fallback Algorithm 362 | 363 | Some architectures might require a load balancer to choose a server pool based 364 | on deep packet inspection of a client packet. For example, it may use the TLS 365 | 1.3 Server Name Indication (SNI) ({{?RFC6066}}) field. The advanced fallback 366 | algorithm enables this capability but levies several additional requirements to 367 | make consistent routing decisions. 368 | 369 | For packets not known to belong to a QUIC version the load balancer can parse, 370 | load balancers MUST use the baseline fallback algorithm if the DCID is 371 | unroutable. 372 | 373 | For known QUIC versions, the fallback algorithm MAY parse packets and use that 374 | information to make a routing decision. 375 | 376 | If so, it MUST have the ability to buffer packets with unroutable DCIDs to await 377 | further packets that allow it to make a routing decision, as the fields of 378 | interest can be an arbitary number of packets into the connection. 379 | 380 | 4-tuple routing is not sufficient for this use case, because a client can use 381 | the same 4-tuple for two connections that should be routed differently (e.g. 382 | because they target different SNIs), as long as the packet contains a source 383 | connection ID of nonzero length. 384 | 385 | Therefore, the load balancer SHOULD maintain two tables that map different 386 | values to a routing decision: 387 | 388 | - a table indexed by a concatenation of the 4-tuple and source CID, which might 389 | be zero-length, to route subsequent long header packets that do not contain the 390 | server-generated connection ID; 391 | 392 | - a table indexed by destination CID, if and only if it is possible for the 393 | server to generate unroutable CIDs. This table can be shared with the one in use 394 | for the baseline fallback algorithm. 395 | 396 | If either table overflows, or if the load balancer loses state, it is likely the 397 | load balancer will misroute packets. 398 | 399 | Load balancers SHOULD maintain per-flow timers to periodically purge state in 400 | the tables described above. 401 | 402 | # Server ID Encoding in Connection IDs 403 | 404 | ## Server ID Allocation {#sid-allocation} 405 | 406 | Load Balancer configurations include a mapping of server IDs to forwarding 407 | addresses. The corresponding server configurations contain one or 408 | more unique server IDs. 409 | 410 | The configuration agent chooses a server ID length for each configuration that 411 | MUST be at least one octet. 412 | 413 | A QUIC-LB configuration MAY significantly over-provision the server ID space 414 | (i.e., provide far more codepoints than there are servers) to increase the 415 | probability that a randomly generated Destination Connection ID is unroutable. 416 | 417 | The configuration agent SHOULD provide a means for servers to express the 418 | number of server IDs it can usefully employ, because a single routing address 419 | actually corresponds to multiple server entities (see {{lb-chains}}). 420 | 421 | Conceptually, each configuration has its own set of server ID allocations, 422 | though two static configurations with identical server ID lengths MAY use a 423 | common allocation between them. 424 | 425 | A server encodes one of its assigned server IDs in any CID it generates using 426 | the relevant configuration. 427 | 428 | ## CID format 429 | 430 | All connection IDs use the following format: 431 | 432 | ~~~ 433 | QUIC-LB Connection ID { 434 | First Octet (8), 435 | Plaintext Block (40..152), 436 | } 437 | Plaintext Block { 438 | Server ID (8..), 439 | Nonce (32..), 440 | } 441 | ~~~ 442 | {: #plaintext-cid-format title="CID Format"} 443 | 444 | The First Octet field serves one or two purposes, as defined in {{first-octet}}. 445 | 446 | The Server ID field encodes the information necessary for the load balancer to 447 | route a packet with that connection ID. It is often encrypted. 448 | 449 | The server uses the Nonce field to make sure that each connection ID it 450 | generates is unique, even though they all use the same Server ID. 451 | 452 | ## Configuration Agent Actions 453 | 454 | The configuration agent assigns a server ID to every server in its pool in 455 | accordance with {{sid-allocation}}, and determines a server ID length (in 456 | octets) sufficiently large to encode all server IDs, including potential future 457 | servers. 458 | 459 | Each configuration specifies the length of the Server ID and Nonce fields, with 460 | limits defined for each algorithm. 461 | 462 | Optionally, it also defines a 16-octet key. Note that failure to define a key 463 | means that observers can determine the assigned server of any connection, 464 | significantly increasing the linkability of QUIC address migration. 465 | 466 | The nonce length MUST be at least 4 octets. The server ID length MUST be at 467 | least 1 octet. 468 | 469 | As QUIC version 1 limits connection IDs to 20 octets, the server ID and nonce 470 | lengths MUST sum to 19 octets or less. 471 | 472 | ## Server Actions 473 | 474 | The server writes the first octet and its server ID into their respective 475 | fields. 476 | 477 | If there is no key in the configuration, the server MUST fill the Nonce field 478 | with bytes that have no observable relationship to the field in previously 479 | issued connection IDs. If there is a key, the server fills the nonce field with 480 | a nonce of its choosing. See {{cid-entropy}} for details. 481 | 482 | The server MAY append additional bytes to the connection ID, up to the limit 483 | specified in that version of QUIC, for its own use. These bytes MUST NOT 484 | provide observers with any information that could link two connection IDs to 485 | the same connection, client, or server. In particular, all servers using a 486 | configuration MUST consistently add the same length to each connection ID, 487 | to preserve the linkability objectives of QUIC-LB. Any additional bytes SHOULD 488 | NOT provide any observable correlation to previous connection IDs for that 489 | connection (e.g., the bytes can be chosen at random). 490 | 491 | If there is no key in the configuration, the Connection ID is complete. 492 | Otherwise, there are further steps, as described in the two following 493 | subsections. 494 | 495 | Encryption below uses the AES-128-ECB cipher {{NIST-AES-ECB}}. Future standards 496 | could add new algorithms that use other ciphers to provide cryptographic agility 497 | in accordance with {{?RFC7696}}. QUIC-LB implementations SHOULD be extensible to 498 | support new algorithms. 499 | 500 | ### Special Case: Single Pass Encryption 501 | 502 | When the nonce length and server ID length sum to exactly 16 octets, the server 503 | MUST use a single-pass encryption algorithm. All connection ID octets except the 504 | first form an AES-ECB block. This block is encrypted once, and the result forms 505 | the second through seventeenth most significant bytes of the connection ID. 506 | 507 | ### General Case: Four-Pass Encryption 508 | 509 | Any other field length requires four passes for encryption and at least three 510 | for decryption. To understand this algorithm, it is useful to define four 511 | functions that minimize the amount of bit-shifting necessary in the event that 512 | there are an odd number of octets. 513 | 514 | When configured with both a key, and a nonce length and server ID length that 515 | sum to any number other than 16, the server MUST follow the algorith below to 516 | encrypt the connection ID. 517 | 518 | #### Overview 519 | 520 | The 4-pass algorithm is a four-round Feistel Network with the round function 521 | being AES-ECB. Most modern applications of Feistel Networks have more than four 522 | rounds. The implications of this choice, which is meant to limit the per-packet 523 | compute overhead at load balancers, are discussed in 524 | {{distinguishing-attacks}}. 525 | 526 | The server concatenates the server ID and nonce into a single field, which is 527 | then split into equal halves. In successive passes, one of these halves is 528 | expanded into a 16B plaintext, encrypted with AES-ECB, and the result XORed with 529 | the other half. The diagram below shows the conceptual processing of a plaintext 530 | server ID and nonce into a connection ID. 'FO' stands for 'First Octet'. 531 | 532 | ~~~ aasvg 533 | +-----+-----------+-----------------------+ 534 | | FO | Server ID | Nonce | 535 | +--+--+-----------+-----+-----------------+ 536 | | | 537 | | V 538 | | +-----------------+-----------------+ 539 | | | left_0 | right_0 | 540 | | +--+--------------+--------------+--+ 541 | | | | 542 | | | | 543 | | | .--------. V 544 | | +-------->| AES-ECB +-------->⊕ 545 | | | '--------' | 546 | | V .--------. | right_1 547 | | ⊕<-----------+ AES-ECB |<-----+ 548 | | | '--------' | 549 | | | left_1 .--------. V 550 | | +-------->| AES-ECB +-------->⊕ 551 | | | '--------' | 552 | | V .--------. | 553 | | ⊕<-----------+ AES-ECB |<-----+ 554 | | | '--------' | 555 | | | | 556 | | V V 557 | | +-----------------+-----------------+ 558 | | | left_2 | right_2 | 559 | | +-------+---------+--------+--------+ 560 | | | | 561 | V V V 562 | +-----+-----------------------------------+ 563 | | FO | Ciphertext | 564 | +-----+-----------------------------------+ 565 | ~~~ 566 | 567 | #### Useful functions 568 | 569 | Two functions are useful to define: 570 | 571 | The expand(length, pass, input_bytes) function concatenates three arguments and 572 | outputs 16 zero-padded octets. 573 | 574 | The output of expand is as follows: 575 | 576 | ~~~pseudocode 577 | ExpandResult { 578 | input_bytes(...), 579 | ZeroPad(...), 580 | length(8), 581 | pass(8) 582 | } 583 | ~~~ 584 | 585 | in which: 586 | 587 | * 'input_bytes' is drawn from one half of the plaintext. It forms the N most 588 | significant octets of the output, where N is half the 'length' argument, rounded 589 | up, and thus a number between 3 and 10, inclusive. 590 | 591 | * 'Zeropad' is a set of 14-N octets set to zero. 592 | 593 | * 'length' is an 8-bit integer that reports the sum of the configured nonce 594 | length and server id length in octets, and forms the fifteenth octet of the 595 | output. The 'length' argument MUST NOT exceed 19 and MUST NOT be less than 5. 596 | 597 | * 'pass' is an 8-bit integer that reports the 'pass' argument of the algorithm, 598 | and forms the sixteenth (least significant) octet of the output. It guarantees 599 | that the cryptographic input of every pass of the algorithm is unique. 600 | 601 | For example, 602 | 603 | ~~~pseudocode 604 | expand(0x06, 0x02, 0xaaba3c) = 0xaaba3c00000000000000000000000602 605 | ~~~ 606 | 607 | Similarly, truncate(input, n) returns the first n octets of 'input'. 608 | 609 | ~~~pseudocode 610 | truncate(0x2094842ca49256198c2deaa0ba53caa0, 4) = 0x2094842c 611 | ~~~ 612 | 613 | Let 'half_len' be equal to 'plaintext_len' / 2, rounded up. 614 | 615 | #### Algorithm Description 616 | 617 | The example at the end of this section helps to clarify the steps described 618 | below. 619 | 620 | 1. The server concatenates the server ID and nonce to create plaintext_CID. The 621 | length of the result in octets is plaintext_len. 622 | 623 | 2. The server splits plaintext_CID into components left_0 and right_0 of equal 624 | length half_len. If plaintext_len is odd, right_0 clears its first four bits, 625 | and left_0 clears its last four bits. For example, 0x7040b81b55ccf3 would split 626 | into a left_0 of 0x7040b810 and right_0 of 0x0b55ccf3. 627 | 628 | 3. Encrypt the result of expand(plaintext_len, 1, left_0) using an AES-ECB-128 629 | cipher to obtain a ciphertext. 630 | 631 | 4. XOR the first half_len octets of the ciphertext with right_0 to form right_1. 632 | Steps 3 and 4 can be summarized as 633 | 634 | ~~~psuedocode 635 | result = AES_ECB(key, expand(plaintext_len, 1, left_0)) 636 | right_1 = XOR(right_0, truncate(result, half_len)) 637 | ~~~ 638 | 639 | {:start="5"} 640 | 5. If the plaintext_len is odd, clear the first four bits of right_1. 641 | 642 | 6. Repeat steps 3 and 4, but use them to compute left_1 by expanding and 643 | encrypting right_1 with pass = 2, and XOR the results with left_0. 644 | 645 | ~~~psuedocode 646 | result = AES_ECB(key, expand(plaintext_len, 2, right_1)) 647 | left_1 = XOR(left_0, truncate(result, half_len)) 648 | ~~~ 649 | 650 | {:start="7"} 651 | 7. If the plaintext_len is odd, clear the last four bits of left_1. 652 | 653 | 8. Repeat steps 3 and 4, but use them to compute right_2 by expanding and 654 | encrypting left_1 with pass = 3, and XOR the results with right_1. 655 | 656 | ~~~pseudocode 657 | result = AES_ECB(key, expand(plaintext_len, 3, left_1)) 658 | right_2 = XOR(right_1, truncate(result, half_len)) 659 | ~~~ 660 | 661 | {:start="9"} 662 | 9. If the plaintext_len is odd, clear the first four bits of right_2. 663 | 664 | 10. Repeat steps 3 and 4, but use them to compute left_2 by expanding and 665 | encrypting right_2 with pass = 4, and XOR the results with left_1. 666 | 667 | ~~~psuedocode 668 | result = AES_ECB(key, expand(plaintext_len, 4, right_2)) 669 | left_2 = XOR(left_1, truncate(result, half_len)) 670 | ~~~ 671 | 672 | {:start="11"} 673 | 11. If the plaintext_len is odd, clear the last four bits of left_2. 674 | 675 | 12. The server concatenates left_2 with right_2 to form the ciphertext CID, 676 | which it appends to the first octet. If plaintext_len is odd, the four 677 | least significant bits of left_2 and four most significant bits of right_2, 678 | which are all zero, are stripped off before concatenation to make the 679 | resulting ciphertext the same length as the original plaintext. 680 | 681 | #### Encryption Example 682 | 683 | The following example executes the steps for the provided inputs. Note that the 684 | plaintext is of odd octet length, so the middle octet will be split evenly 685 | left_0 and right_0. 686 | 687 | ~~~pseudocode 688 | server_id = 0x31441a 689 | nonce = 0x9c69c275 690 | key = 0xfdf726a9893ec05c0632d3956680baf0 691 | 692 | // step 1 693 | plaintext_CID = 0x31441a9c69c275 694 | plaintext_len = 7 695 | 696 | // step 2 697 | hash_len = 4 698 | left_0 = 0x31441a90 699 | right_0 = 0x0c69c275 700 | 701 | // step 3 702 | aes_input = 0x31441a90000000000000000000000701 703 | aes_output = 0xa255dd8cdacf01948d3a848c3c7fee23 704 | 705 | // step 4 706 | right_1 = 0x0c69c275 ^ 0xa255dd8c = 0xae3c1ff9 707 | 708 | // step 5 (clear bits) 709 | right_1 = 0x0e3c1ff9 710 | 711 | // step 6 712 | aes_input = 0x0e3c1ff9000000000000000000000702 713 | aes_output = 0xe5e452cb9e1bedb0b2bf830506bf4c4e 714 | left_1 = 0x31441a90 ^ 0xe5e452cb = 0xd4a0485b 715 | 716 | // step 7 (clear bits) 717 | left_1 = 0xd4a04850 718 | 719 | // step 8 720 | aes_input = 0xd4a04850000000000000000000000703 721 | aes_output = 0xb7821ab3024fed0913b6a04d18e3216f 722 | right_2 = 0x0e3c1ff9 ^ 0xb7821ab3 = 0xb9be054a 723 | 724 | // step 9 (clear bits) 725 | right_2 = 0x09be054a 726 | 727 | // step 10 728 | aes_input = 0x09be054a000000000000000000000704 729 | aes_output = 0xb334357cfdf81e3fafe180154eaf7378 730 | left_2 = 0xd4a04850 ^ 0xb3e4357c = 0x67947d2c 731 | 732 | // step 11 (clear bits) 733 | left_2 = 0x67947d20 734 | 735 | // step 12 736 | cid = first_octet || left_2 || right_2 = 0x0767947d29be054a 737 | ~~~ 738 | 739 | ## Load Balancer Actions 740 | 741 | On each incoming packet, the load balancer extracts consecutive octets, 742 | beginning with the second octet. If there is no key, the first octets 743 | correspond to the server ID. 744 | 745 | If there is a key, the load balancer takes one of two actions: 746 | 747 | ### Special Case: Single Pass Encryption 748 | 749 | If server ID length and nonce length sum to exactly 16 octets, they form a 750 | ciphertext block. The load balancer decrypts the block using the AES-ECB key 751 | and extracts the server ID from the most significant bytes of the resulting 752 | plaintext. 753 | 754 | ### General Case: Four-Pass Encryption 755 | 756 | First, split the ciphertext CID (excluding the first octet) into its equal- 757 | length components left_2 and right_2. Then follow the process below: 758 | 759 | ~~~pseudocode 760 | result = AES_ECB(key, expand(plaintext_len, 4, right_2)) 761 | left_1 = XOR(left_2, truncate(result, half_len)) 762 | if (plaintext_len_is_odd()) clear_last_bits(left_1, 4) 763 | 764 | result = AES_ECB(key, expand(plaintext_len, 3, left_1)) 765 | right_1 = XOR(right_2, truncate(result, half_len)) 766 | if (plaintext_len_is_odd()) clear_first_bits(left_1, 4) 767 | 768 | result = AES_ECB(key, expand(plaintext_len, 2, right_1)) 769 | left_0 = XOR(left_1, truncate(result, half_len)) 770 | if (plaintext_len_is_odd()) clear_last_bits(left_0, 4) 771 | ~~~ 772 | 773 | As the load balancer has no need for the nonce, it can conclude after 3 passes 774 | as long as the server ID is entirely contained in left_0 (i.e., the nonce is at 775 | least as large as the server ID). If the server ID is longer, a fourth pass 776 | is necessary: 777 | 778 | ~~~pseudocode 779 | result = AES_ECB(key, expand(plaintext_len, 1, left_0)) 780 | right_0 = XOR(right_1, truncate(result, half_len)) 781 | if (plaintext_len_is_odd()) clear_first_bits(right_0, 4) 782 | ~~~ 783 | 784 | and the load balancer has to concatenate left_0 and right_0 to obtain the 785 | complete server ID. 786 | 787 | # Per-connection state {#per-connection-state} 788 | 789 | The CID allocation methods QUIC-LB defines no per-connection state at 790 | the load balancer, with a few conditional exceptions described in 791 | {{unroutable}}. Otherwise, the load balancer can extract the server ID from 792 | the connection ID of each incoming packet and route that packet accordingly. 793 | 794 | However, once a routing decision has been made, the load balancer MAY 795 | associate the 4-tuple or connection ID with the decision. This has two 796 | advantages: 797 | 798 | * The load balancer only extracts the server ID once until the 4-tuple or 799 | connection ID changes. When the CID is encrypted, this might reduce 800 | computational load. 801 | 802 | * Incoming Stateless Reset packets and ICMP messages are easily routed to the 803 | correct origin server. 804 | 805 | In addition to the increased state requirements, however, load balancers cannot 806 | detect the packets that indicate the end of the connection, so they rely on a 807 | timeout to delete connection state. There are numerous considerations around 808 | setting such a timeout. 809 | 810 | In the event a connection ends, freeing an IP and port, and a different 811 | connection migrates to that IP and port before the timeout, the load balancer 812 | will misroute the different connection's packets to the original server. A short 813 | timeout limits the likelihood of such a misrouting. 814 | 815 | Furthermore, if a short timeout causes premature deletion of state, the routing 816 | is easily recoverable by decoding an incoming Connection ID. However, a short 817 | timeout also reduces the chance that an incoming Stateless Reset is correctly 818 | routed. 819 | 820 | Note that some heuristics to purge state early can introduce Denial of Service 821 | vulnerabilities. For example, one heuristic might delete flow state once the 822 | load balancer observes a routable CID on that flow. An attacker that can observe 823 | a target flow can store a routable CID from a previous connection and spoof the 824 | target flow's 4-tuple with the routable CID, causing premature deletion of that 825 | state. 826 | 827 | Servers MAY implement the technique described in {{Section 14.4.1 of RFC9000}} 828 | in case the load balancer is stateless, to increase the likelihood a Source 829 | Connection ID is included in ICMP responses to Path Maximum Transmission Unit 830 | (PMTU) probes. Load balancers MAY parse the echoed packet to extract the Source 831 | Connection ID, if it contains a QUIC long header, and extract the Server ID as 832 | if it were in a Destination CID. 833 | 834 | # Additional Use Cases 835 | 836 | This section discusses considerations for some deployment scenarios not implied 837 | by the specification above. 838 | 839 | ## Load balancer chains {#lb-chains} 840 | 841 | Some network architectures may have multiple tiers of low-state load balancers, 842 | where a first tier of devices makes a routing decision to the next tier, and so 843 | on, until packets reach the server. Although QUIC-LB is not explicitly designed 844 | for this use case, it is possible to support it. 845 | 846 | If each load balancer is assigned a range of server IDs that is a subset of the 847 | range of IDs assigned to devices that are closer to the client, then the first 848 | devices to process an incoming packet can extract the server ID and then map it 849 | to the correct forwarding address. Note that this solution is extensible to 850 | arbitrarily large numbers of load-balancing tiers, as the maximum server ID 851 | space is quite large. 852 | 853 | If the number of necessary server IDs per next hop is uniform, a simple 854 | implementation would use successively longer server IDs at each tier of load 855 | balancing, and the server configuration would match the last tier. Load 856 | balancers closer to the client can then treat any parts of the server ID they 857 | did not use as part of the nonce. 858 | 859 | ## Server Process Demultiplexing 860 | 861 | QUIC servers might have QUIC running on multiple processes or threads listening 862 | on the same address, and have a need to demultiplex between them. In principle, 863 | this demultiplexer is a Layer 4 load balancer, and the guidance in {{lb-chains}} 864 | applies. However, in many deployments the demultiplexer lacks the capability to 865 | perform decryption operations. Internal server coordination is out of scope of 866 | this specification, but this non-normative section proposes some approaches 867 | that could work given certain server capabilities: 868 | 869 | * Some bytes of the server ID are reserved to encode the process ID. The 870 | demultiplexer might operate based on the 4-tuple or other legacy indicator, but 871 | the receiving server process extracts the server ID, and if it does not match 872 | the one for that process, the process could "toss" the packet to the correct 873 | destination process. 874 | 875 | * Each process could register the connection IDs it generates with the 876 | demultiplexer, which routes those connection IDs accordingly. 877 | 878 | * In a combination of the two approaches above, the demultiplexer generally 879 | routes by 4-tuple. After a migration, the process tosses the first flight of 880 | packets and registers the new connection ID with the demultiplexer. This 881 | alternative limits the bandwidth consumption of tossing and the memory footprint 882 | of a full connection ID table. 883 | 884 | * When generating a connection ID, the server writes the process ID to the 885 | random field of the first octet, or if this is being used for length encoding, 886 | in an octet it appends after the ciphertext. It then applies a keyed hash (with 887 | a key locally generated for the sole use of that server). The hash result is 888 | used as a bitmask to XOR with the bits encoding the process ID. On packet 889 | receipt, the demultiplexer applies the same keyed hash to generate the same 890 | mask and recoversthe process ID. (Note that this approach is conceptually 891 | similar to QUIC header protection). It is important that the server also appends 892 | the process ID to the server ID in the plaintext, so that different processes do 893 | not generate the same ciphertext. The load balancer will consider this data to 894 | be part of the nonce. 895 | 896 | ## Moving connections between servers 897 | 898 | Some deployments may transparently move a connection from one server to another. 899 | The means of transferring connection state between servers is out of scope of 900 | this document. 901 | 902 | To support a handover, a server involved in the transition could issue CIDs that 903 | map to the new server via a NEW_CONNECTION_ID frame, and retire CIDs associated 904 | with the old server using the "Retire Prior To" field in that frame. 905 | 906 | # Version Invariance of QUIC-LB {#version-invariance} 907 | 908 | The server ID encodings, and requirements for their handling, are designed to be 909 | QUIC version independent (see {{?RFC8999}}). A QUIC-LB load balancer will 910 | generally not require changes as servers deploy new versions of QUIC. However, 911 | there are several unlikely future design decisions that could impact the 912 | operation of QUIC-LB. 913 | 914 | A QUIC version might define limits on connection ID length that make some or all 915 | of the mechanisms in this document unusable. For example, a maximum connection 916 | ID length could be below the minimum necessary to use all or part of this 917 | specification; or, the minimum connection ID length could be larger than the 918 | largest value in this specification. Similarly, the length self-encoding 919 | specification cannot accommodate connection IDs longer than 32 bytes. 920 | 921 | The advanced fallback implementation supports a requirement to inspect version- 922 | specific elements of packets to make a routing decision, such as the Server Name 923 | Indication (SNI) extension in the TLS Client Hello. The format and 924 | cryptographic protection of this information may change in future versions or 925 | extensions of TLS or QUIC, and therefore this functionality is inherently 926 | version-dependent. Such a load balancer, when it receives packets from an 927 | unknown QUIC version, might misdirect initial packets to the wrong tenant. While 928 | this can be inefficient, the design in this document preserves the ability for 929 | tenants to deploy new versions provided they have an out-of-band means of 930 | providing a connection ID for the client to use. 931 | 932 | {{load-balancer-forwarding}} provides guidance about how load balancers should 933 | handle unroutable DCIDs. This guidance, and the implementation of an algorithm 934 | to handle these DCIDs, rests on some assumptions about packets that contain 935 | client-generated DCIDs that are not specified in RFC 8999: 936 | 937 | 1. they do not have short headers; 938 | 1. the 4-tuple remains constant; 939 | 1. if the load-balancer uses the Advanced Fallback Algorithm, the packets have 940 | a constant Source Connection ID. 941 | 942 | While this document does not update the commitments in {{RFC8999}}, the 943 | additional assumptions are minimal and narrowly scoped, and provide a likely 944 | set of constants that load balancers can use with minimal risk of version- 945 | dependence. 946 | 947 | If these assumptions are not valid, this specification is likely to lead to loss 948 | of packets that contain unroutable DCIDs, and in extreme cases connection 949 | failure. A QUIC version that violates the assumptions in this section therefore 950 | cannot be safely deployed with a load balancer that follows this specification. 951 | An updated or alternative version of this specification might address these 952 | shortcomings for such a QUIC version. 953 | 954 | # Security Considerations {#security-considerations} 955 | 956 | QUIC-LB is intended to prevent linkability. Attacks would therefore attempt to 957 | subvert this purpose. 958 | 959 | Note that without a key for the encoding, QUIC-LB makes no attempt to obscure 960 | the server mapping, and therefore does not address these concerns. Without a 961 | key, QUIC-LB merely allows consistent CID encoding for compatibility across a 962 | network infrastructure, which makes QUIC robust to NAT rebinding. Servers that 963 | are encoding their server ID without a key algorithm SHOULD only use it to 964 | generate new CIDs for the Server Initial Packet and SHOULD NOT send CIDs in QUIC 965 | NEW_CONNECTION_ID frames, except that it sends one new Connection ID in the 966 | event of config rotation {{config-rotation}}. Doing so might falsely suggest to 967 | the client that said CIDs were generated in a secure fashion. 968 | 969 | A linkability attack would find some means of determining that two connection 970 | IDs route to the same server. Due to the limitations of measures at QUIC layer, 971 | there is no scheme that strictly prevents linkability for all traffic patterns. 972 | 973 | To see why, consider two limits. At one extreme, one client is connected to the 974 | server pool and migrates its address. An observer can easily link the two 975 | addresses, and there is no remedy at the QUIC layer. 976 | 977 | At the other extreme, a very large number of clients are connected to each 978 | server, and they all migrate address constantly. At this limit, even an 979 | unencrypted server ID encoding is unlikely to definitively link two addresses. 980 | 981 | Therefore, efforts to frustrate any analysis of server ID encoding have 982 | diminishing returns. Nevertheless, this specification seeks to minimize the 983 | probability two addresses can be linked. 984 | 985 | ## Attackers not between the load balancer and server 986 | 987 | Any attacker might open a connection to the server infrastructure and 988 | aggressively simulate migration to obtain a large sample of IDs that map to the 989 | same server. It could then apply analytical techniques to try to obtain the 990 | server encoding. 991 | 992 | An encrypted encoding provides robust protection against this. An unencrypted 993 | one provides none. 994 | 995 | Were this analysis to obtain the server encoding, then on-path observers might 996 | apply this analysis to correlating different client IP addresses. 997 | 998 | ## Attackers between the load balancer and server 999 | 1000 | Attackers in this privileged position are intrinsically able to map two 1001 | connection IDs to the same server. These algorithms ensure that two connection 1002 | IDs for the same connection cannot be identified as such as long as the server 1003 | chooses the first octet and any plaintext nonce correctly. 1004 | 1005 | ## Multiple Configuration IDs {#multiple-configs} 1006 | 1007 | During the period in which there are multiple deployed configuration IDs (see 1008 | {{config-rotation}}), there is a slight increase in linkability. The server 1009 | space is effectively divided into segments with CIDs that have different config 1010 | rotation bits. Entities that manage servers SHOULD strive to minimize these 1011 | periods by quickly deploying new configurations across the server pool. 1012 | 1013 | ## Limited configuration scope 1014 | 1015 | A simple deployment of QUIC-LB in a cloud provider might use the same global 1016 | QUIC-LB configuration across all its load balancers that route to customer 1017 | servers. An attacker could then simply become a customer, obtain the 1018 | configuration, and then extract server IDs of other customers' connections at 1019 | will. 1020 | 1021 | To avoid this, the configuration agent SHOULD issue QUIC-LB configurations to 1022 | mutually distrustful servers that have different keys for encryption 1023 | algorithms. In many cases, the load balancers can distinguish these 1024 | configurations by external IP address. 1025 | 1026 | However, assigning multiple entities to an IP address is complimentary with 1027 | concealing DNS requests (e.g., DoH {{?RFC8484}}) and the TLS Server Name 1028 | Indicator (SNI) ({{?I-D.ietf-tls-esni}}) to obscure the ultimate destination 1029 | of traffic. While the load balancer's fallback algorithm 1030 | ({{fallback-algorithm}}) can use the SNI to make a routing decision on the 1031 | first packet, there are three ways to route subsequent packets: 1032 | 1033 | * all co-tenants can use the same QUIC-LB configuration, leaking the server 1034 | mapping to each other as described above; 1035 | 1036 | * co-tenants can be issued one of up to seven configurations distinguished by 1037 | the config rotation bits ({{config-rotation}}), exposing information about the 1038 | target domain to the entire network; or 1039 | 1040 | * tenants can use the 0b111 codepoint in their CIDs (in which case they SHOULD 1041 | disable migration in their connections), which neutralizes the value of 1042 | QUIC-LB but preserves privacy. 1043 | 1044 | When configuring QUIC-LB, administrators evaluate the privacy tradeoff by 1045 | considering the relative value of each of these properties, given the trust 1046 | model between tenants, the presence of methods to obscure the domain name, and 1047 | value of address migration in the tenant use cases. 1048 | 1049 | As the plaintext algorithm makes no attempt to conceal the server mapping, 1050 | these deployments MAY simply use a common configuration. 1051 | 1052 | ## Stateless Reset Oracle 1053 | 1054 | Section 21.9 of {{RFC9000}} discusses the Stateless Reset Oracle attack. For a 1055 | server deployment to be vulnerable, an attacking client must be able to cause 1056 | two packets with the same Destination CID to arrive at two different servers 1057 | that share the same cryptographic context for Stateless Reset tokens. As QUIC-LB 1058 | requires deterministic routing of DCIDs over the life of a connection, it is a 1059 | sufficient means of avoiding an Oracle without additional measures. 1060 | 1061 | Note also that when a server starts using a new QUIC-LB config rotation 1062 | codepoint, new CIDs might not be unique with respect to previous configurations 1063 | that occupied that codepoint, and therefore different clients may have observed 1064 | the same CID and stateless reset token. A straightforward method of managing 1065 | stateless reset keys is to maintain a separate key for each config rotation 1066 | codepoint, and replace each key when the configuration for that codepoint 1067 | changes. Thus, a server transitions from one config to another, it will be able 1068 | to generate correct tokens for connections using either type of CID. 1069 | 1070 | ## Connection ID Entropy {#cid-entropy} 1071 | 1072 | If a server ever reuses a nonce in generating a CID for a given configuration, 1073 | it risks exposing sensitive information. Given the same server ID, the CID will 1074 | be identical (aside from a possible difference in the first octet). This can 1075 | risk exposure of the QUIC-LB key. If two clients receive the same connection ID, 1076 | they also have each other's stateless reset token unless that key has changed in 1077 | the interim. 1078 | 1079 | The encrypted mode needs to generate different cipher text for each generated 1080 | Connection ID instance to protect the Server ID. To do so, at least four octets 1081 | of the CID are reserved for a nonce that, if used only once, will result in 1082 | unique cipher text for each Connection ID. 1083 | 1084 | If servers simply increment the nonce by one with each generated connection ID, 1085 | then it is safe to use the existing keys until any server's nonce counter 1086 | exhausts the allocated space and rolls over. To maximize entropy, servers SHOULD 1087 | start with a random nonce value, in which case the configuration is usable until 1088 | the nonce value wraps around to zero and then reaches the initial value again. 1089 | 1090 | Whether or not it implements the counter method, the server MUST NOT reuse a 1091 | nonce until it switches to a configuration with new keys. 1092 | 1093 | Servers are forbidden from generating linkable plaintext nonces, because 1094 | observable correlations between plaintext nonces would provide trivial 1095 | linkability between individual connections, rather than just to a common server. 1096 | 1097 | For any algorithm, configuration agents SHOULD implement an out-of-band method 1098 | to discover when servers are in danger of exhausting their nonce space, and 1099 | SHOULD respond by issuing a new configuration. A server that has exhausted its 1100 | nonces MUST either switch to a different configuration, or if none exists, use 1101 | the 4-tuple routing config rotation codepoint. 1102 | 1103 | When sizing a nonce that is to be randomly generated, the configuration agent 1104 | SHOULD consider that a server generating a N-bit nonce will create a duplicate 1105 | about every 2^(N/2) attempts, and therefore compare the expected rate at which 1106 | servers will generate CIDs with the lifetime of a configuration. 1107 | 1108 | ## Distinguishing Attacks {#distinguishing-attacks} 1109 | 1110 | The Four Pass Encryption algorithm is structured as a 4-round Feistel network 1111 | with non-bijective round function. As such, it does not offer a very high 1112 | security level against distinguishing attacks, as explained in [Patarin2008]. 1113 | Attackers can mount these attacks if they are in possession of O(SQRT(len/2)) 1114 | pairs of ciphertext and known corresponding plain text, where "len" is the 1115 | sum of the lengths of the Server ID and the Nonce. 1116 | 1117 | The authors considered increasing the number of passes from 4 to 12, 1118 | which would definitely block these attacks. However, this would require 1119 | 12 round of AES decryption by load balancers accessing the CID, a cost deemed 1120 | prohibitive in the planned deployments. 1121 | 1122 | The attacks described in [Patarin2008] rely on known plain text. In a normal 1123 | deployment, the plain text is only known by the server that generates the ID 1124 | and by the load balancer that decrypts the content of the CID. Attackers 1125 | would have to compensate by guesses about the allocation of server identifiers 1126 | or the generation of nonces. These attacks are thus mitigated by making nonces 1127 | hard to guess, as specified in {{cid-entropy}}, and by rules related to mixed 1128 | deployments that use both clear text CID and encrypted CID, for example when 1129 | transitioning from clear text to encryption. Such deployments MUST use different 1130 | server ID allocations for the clear text and the encrypted versions. 1131 | 1132 | These attacks cannot be mounted against the Single Pass Encryption algorithm. 1133 | 1134 | ## Early deletion of load balancer connection state 1135 | 1136 | Potential vulnerabilities related to heuristics that delete per-connection state 1137 | are described in {{per-connection-state}}. Under certain assumptions about 1138 | server configuration and fallback algorithm, this state might be critical to 1139 | maintaining connectivity. Under other assumptions, the state provides robustness 1140 | to improbable network events. 1141 | 1142 | # IANA Considerations 1143 | 1144 | There are no IANA requirements. 1145 | 1146 | --- back 1147 | 1148 | # QUIC-LB YANG Model {#yang-model} 1149 | 1150 | These YANG models conform to {{?RFC6020}} and express a complete QUIC-LB 1151 | configuration. There is one model for the server and one for the middlebox 1152 | (i.e the load balancer and/or Retry Service). 1153 | 1154 | ~~~ 1155 | module ietf-quic-lb-server { 1156 | yang-version "1.1"; 1157 | namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb"; 1158 | prefix "quic-lb"; 1159 | 1160 | import ietf-yang-types { 1161 | prefix yang; 1162 | reference 1163 | "RFC 6991: Common YANG Data Types."; 1164 | } 1165 | 1166 | import ietf-inet-types { 1167 | prefix inet; 1168 | reference 1169 | "RFC 6991: Common YANG Data Types."; 1170 | } 1171 | 1172 | organization 1173 | "IETF QUIC Working Group"; 1174 | 1175 | contact 1176 | "WG Web: 1177 | WG List: 1178 | 1179 | Authors: Martin Duke (martin.h.duke at gmail dot com) 1180 | Nick Banks (nibanks at microsoft dot com) 1181 | Christian Huitema (huitema at huitema.net)"; 1182 | 1183 | description 1184 | "This module enables the explicit cooperation of QUIC servers 1185 | with trusted intermediaries without breaking important 1186 | protocol features. 1187 | 1188 | Copyright (c) 2022 IETF Trust and the persons identified as 1189 | authors of the code. All rights reserved. 1190 | 1191 | Redistribution and use in source and binary forms, with or 1192 | without modification, is permitted pursuant to, and subject to 1193 | the license terms contained in, the Simplified BSD License set 1194 | forth in Section 4.c of the IETF Trust's Legal Provisions 1195 | Relating to IETF Documents 1196 | (https://trustee.ietf.org/license-info). 1197 | 1198 | This version of this YANG module is part of RFC XXXX 1199 | (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself 1200 | for full legal notices. 1201 | 1202 | The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL 1203 | NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED', 1204 | 'MAY', and 'OPTIONAL' in this document are to be interpreted as 1205 | described in BCP 14 (RFC 2119) (RFC 8174) when, and only when, 1206 | they appear in all capitals, as shown here."; 1207 | 1208 | revision "2023-07-14" { 1209 | description 1210 | "Updated to design in version 17 of the draft"; 1211 | reference 1212 | "RFC XXXX, QUIC-LB: Generating Routable QUIC Connection IDs"; 1213 | } 1214 | 1215 | container quic-lb { 1216 | presence "The container for QUIC-LB configuration."; 1217 | 1218 | description 1219 | "QUIC-LB container."; 1220 | 1221 | typedef quic-lb-key { 1222 | type yang:hex-string { 1223 | length 47; 1224 | } 1225 | description 1226 | "This is a 16-byte key, represented with 47 bytes"; 1227 | } 1228 | 1229 | leaf config-id { 1230 | type uint8 { 1231 | range "0..6"; 1232 | } 1233 | mandatory true; 1234 | description 1235 | "Identifier for this CID configuration."; 1236 | } 1237 | 1238 | leaf first-octet-encodes-cid-length { 1239 | type boolean; 1240 | default false; 1241 | description 1242 | "If true, the six least significant bits of the first 1243 | CID octet encode the CID length minus one."; 1244 | } 1245 | 1246 | leaf server-id-length { 1247 | type uint8 { 1248 | range "1..15"; 1249 | } 1250 | must '. <= (19 - ../nonce-length)' { 1251 | error-message 1252 | "Server ID and nonce lengths must sum 1253 | to no more than 19."; 1254 | } 1255 | mandatory true; 1256 | description 1257 | "Length (in octets) of a server ID. Further range-limited 1258 | by nonce-length."; 1259 | } 1260 | 1261 | leaf nonce-length { 1262 | type uint8 { 1263 | range "4..18"; 1264 | } 1265 | mandatory true; 1266 | description 1267 | "Length, in octets, of the nonce. Short nonces mean there 1268 | will be frequent configuration updates."; 1269 | } 1270 | 1271 | leaf cid-key { 1272 | type quic-lb-key; 1273 | description 1274 | "Key for encrypting the connection ID."; 1275 | } 1276 | 1277 | leaf server-id { 1278 | type yang:hex-string; 1279 | must "string-length(.) = 3 * ../../server-id-length - 1"; 1280 | mandatory true; 1281 | description 1282 | "An allocated server ID"; 1283 | } 1284 | } 1285 | } 1286 | ~~~ 1287 | 1288 | ~~~ 1289 | module ietf-quic-lb-middlebox { 1290 | yang-version "1.1"; 1291 | namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb"; 1292 | prefix "quic-lb"; 1293 | 1294 | import ietf-yang-types { 1295 | prefix yang; 1296 | reference 1297 | "RFC 6991: Common YANG Data Types."; 1298 | } 1299 | 1300 | import ietf-inet-types { 1301 | prefix inet; 1302 | reference 1303 | "RFC 6991: Common YANG Data Types."; 1304 | } 1305 | 1306 | organization 1307 | "IETF QUIC Working Group"; 1308 | 1309 | contact 1310 | "WG Web: 1311 | WG List: 1312 | 1313 | Authors: Martin Duke (martin.h.duke at gmail dot com) 1314 | Nick Banks (nibanks at microsoft dot com) 1315 | Christian Huitema (huitema at huitema.net)"; 1316 | 1317 | description 1318 | "This module enables the explicit cooperation of QUIC servers 1319 | with trusted intermediaries without breaking important 1320 | protocol features. 1321 | 1322 | Copyright (c) 2021 IETF Trust and the persons identified as 1323 | authors of the code. All rights reserved. 1324 | 1325 | Redistribution and use in source and binary forms, with or 1326 | without modification, is permitted pursuant to, and subject to 1327 | the license terms contained in, the Simplified BSD License set 1328 | forth in Section 4.c of the IETF Trust's Legal Provisions 1329 | Relating to IETF Documents 1330 | (https://trustee.ietf.org/license-info). 1331 | 1332 | This version of this YANG module is part of RFC XXXX 1333 | (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself 1334 | for full legal notices. 1335 | 1336 | The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL 1337 | NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED', 1338 | 'MAY', and 'OPTIONAL' in this document are to be interpreted as 1339 | described in BCP 14 (RFC 2119) (RFC 8174) when, and only when, 1340 | they appear in all capitals, as shown here."; 1341 | 1342 | revision "2021-02-11" { 1343 | description 1344 | "Updated to design in version 13 of the draft"; 1345 | reference 1346 | "RFC XXXX, QUIC-LB: Generating Routable QUIC Connection IDs"; 1347 | } 1348 | 1349 | container quic-lb { 1350 | presence "The container for QUIC-LB configuration."; 1351 | 1352 | description 1353 | "QUIC-LB container."; 1354 | 1355 | typedef quic-lb-key { 1356 | type yang:hex-string { 1357 | length 47; 1358 | } 1359 | description 1360 | "This is a 16-byte key, represented with 47 bytes"; 1361 | } 1362 | 1363 | list cid-configs { 1364 | key "config-rotation-bits"; 1365 | description 1366 | "List up to three load balancer configurations"; 1367 | 1368 | leaf config-rotation-bits { 1369 | type uint8 { 1370 | range "0..2"; 1371 | } 1372 | mandatory true; 1373 | description 1374 | "Identifier for this CID configuration."; 1375 | } 1376 | 1377 | leaf server-id-length { 1378 | type uint8 { 1379 | range "1..15"; 1380 | } 1381 | must '. <= (19 - ../nonce-length)' { 1382 | error-message 1383 | "Server ID and nonce lengths must sum to 1384 | no more than 19."; 1385 | } 1386 | mandatory true; 1387 | description 1388 | "Length (in octets) of a server ID. Further range-limited 1389 | by nonce-length."; 1390 | } 1391 | 1392 | leaf cid-key { 1393 | type quic-lb-key; 1394 | description 1395 | "Key for encrypting the connection ID."; 1396 | } 1397 | 1398 | leaf nonce-length { 1399 | type uint8 { 1400 | range "4..18"; 1401 | } 1402 | mandatory true; 1403 | description 1404 | "Length, in octets, of the nonce. Short nonces mean there 1405 | will be frequent configuration updates."; 1406 | } 1407 | 1408 | list server-id-mappings { 1409 | key "server-id"; 1410 | description "Statically allocated Server IDs"; 1411 | 1412 | leaf server-id { 1413 | type yang:hex-string; 1414 | must "string-length(.) = 3 * ../../server-id-length - 1"; 1415 | mandatory true; 1416 | description 1417 | "An allocated server ID"; 1418 | 1419 | } 1420 | 1421 | leaf server-address { 1422 | type inet:ip-address; 1423 | mandatory true; 1424 | description 1425 | "Destination address corresponding to the server ID"; 1426 | } 1427 | } 1428 | } 1429 | } 1430 | } 1431 | ~~~ 1432 | 1433 | ## Tree Diagram 1434 | 1435 | This summary of the YANG models uses the notation in {{?RFC8340}}. 1436 | 1437 | ~~~ 1438 | module: ietf-quic-lb-server 1439 | +--rw quic-lb! 1440 | +--rw config-id uint8 1441 | +--rw first-octet-encodes-cid-length? boolean 1442 | +--rw server-id-length uint8 1443 | +--rw nonce-length uint8 1444 | +--rw cid-key? quic-lb-key 1445 | +--rw server-id yang:hex-string 1446 | ~~~ 1447 | 1448 | ~~~ 1449 | module: ietf-quic-lb-middlebox 1450 | +--rw quic-lb! 1451 | +--rw cid-configs* [config-rotation-bits] 1452 | | +--rw config-rotation-bits uint8 1453 | | +--rw server-id-length uint8 1454 | | +--rw cid-key? quic-lb-key 1455 | | +--rw nonce-length uint8 1456 | | +--rw server-id-mappings* [server-id] 1457 | | +--rw server-id yang:hex-string 1458 | | +--rw server-address inet:ip-address 1459 | ~~~ 1460 | 1461 | # Load Balancer Test Vectors {#test-vectors} 1462 | 1463 | This section uses the following abbreviations: 1464 | 1465 | ~~~ 1466 | cid Connection ID 1467 | cr_bits Config Rotation Bits 1468 | LB Load Balancer 1469 | sid Server ID 1470 | ~~~ 1471 | 1472 | In all cases, the server is configured to encode the CID length. 1473 | 1474 | ## Unencrypted CIDs 1475 | 1476 | ~~~pseudocode 1477 | cr_bits sid nonce cid 1478 | 0 c4605e 4504cc4f 07c4605e4504cc4f 1479 | 1 350d28b420 3487d970b 20a350d28b4203487d970b 1480 | ~~~ 1481 | 1482 | ## Encrypted CIDs 1483 | 1484 | The key for all of these examples is 8f95f09245765f80256934e50c66207f. The 1485 | test vectors include an example that uses the 16-octet single-pass special 1486 | case, as well as an instance where the server ID length exceeds the nonce 1487 | length, requiring a fourth decryption pass. 1488 | 1489 | ~~~pseudocode 1490 | cr_bits sid nonce cid 1491 | 0 ed793a ee080dbf 0720b1d07b359d3c 1492 | 1 ed793a51d49b8f5fab65 ee080dbf48 1493 | 2fcc381bc74cb4fbad2823a3d1f8fed2 1494 | 2 ed793a51d49b8f5f ee080dbf48c0d1e5 1495 | 504dd2d05a7b0de9b2b9907afb5ecf8cc3 1496 | 3 ed793a51d49b8f5fab ee080dbf48c0d1e55d 1497 | 125779c9cc86beb3a3a4a3ca96fce4bfe0cdbc 1498 | ~~~ 1499 | 1500 | # Interoperability with DTLS over UDP 1501 | 1502 | Some environments may contain DTLS traffic as well as QUIC operating over UDP, 1503 | which may be hard to distinguish. 1504 | 1505 | In most cases, the packet parsing rules above will cause a QUIC-LB load 1506 | balancer to route DTLS traffic in an appropriate way. DTLS 1.3 implementations 1507 | that use the connection_id extension {{?RFC9146}} might use the techniques in 1508 | this document to generate connection IDs and achieve robust routability for DTLS 1509 | associations if they meet a few additional requirements. This non-normative 1510 | appendix describes this interaction. 1511 | 1512 | ## DTLS 1.0 and 1.2 1513 | 1514 | DTLS 1.0 {{?RFC4347}} and 1.2 {{?RFC6347}} use packet formats that a QUIC-LB 1515 | router will interpret as short header packets with CIDs that request 4-tuple 1516 | routing. As such, they will route such packets consistently as long as the 1517 | 4-tuple does not change. Note that DTLS 1.0 has been deprecated by the IETF. 1518 | 1519 | The first octet of every DTLS 1.0 or 1.2 datagram contains the content type. 1520 | A QUIC-LB load balancer will interpret any content type less than 128 as a short 1521 | header packet, meaning that the subsequent octets should contain a connection 1522 | ID. 1523 | 1524 | Existing TLS content types comfortably fit in the range below 128. Assignment of 1525 | codepoints greater than 64 would require coordination in accordance with 1526 | {{?RFC7983}}, and anyway would likely create problems demultiplexing DTLS and 1527 | version 1 of QUIC. Therefore, this document believes it is extremely unlikely 1528 | that TLS content types of 128 or greater will be assigned. Nevertheless, such 1529 | an assignment would cause a QUIC-LB load balancer to interpret the packet as a 1530 | QUIC long header with an essentially random connection ID, which is likely to be 1531 | routed irregularly. 1532 | 1533 | The second octet of every DTLS 1.0 or 1.2 datagram is the bitwise complement 1534 | of the DTLS Major version (i.e. version 1.x = 0xfe). A QUIC-LB load balancer 1535 | will interpret this as a connection ID that requires 4-tuple based load 1536 | balancing, meaning that the routing will be consistent as long as the 4-tuple 1537 | remains the same. 1538 | 1539 | {{?RFC9146}} defines an extension to add connection IDs to DTLS 1.2. 1540 | Unfortunately, a QUIC-LB load balancer will not correctly parse the connection 1541 | ID and will continue 4-tuple routing. An modified QUIC-LB load balancer that 1542 | correctly identifies DTLS and parses a DTLS 1.2 datagram for the connection ID 1543 | is outside the scope of this document. 1544 | 1545 | ## DTLS 1.3 1546 | 1547 | DTLS 1.3 {{?RFC9147}} changes the structure of datagram headers in relevant 1548 | ways. 1549 | 1550 | Handshake packets continue to have a TLS content type in the first octet and 1551 | 0xfe in the second octet, so they will be 4-tuple routed, which should not 1552 | present problems for likely NAT rebinding or address change events. 1553 | 1554 | Non-handshake packets always have zero in their most significant bit and will 1555 | therefore always be treated as QUIC short headers. If the connection ID is 1556 | present, it follows in the succeeding octets. Therefore, a DTLS 1.3 association 1557 | where the server utilizes Connection IDs and the encodings in this document 1558 | will be routed correctly in the presence of client address and port changes. 1559 | 1560 | However, if the client does not include the connection_id extension in its 1561 | ClientHello, the server is unable to use connection IDs. In this case, non- 1562 | handshake packets will appear to contain random connection IDs and be routed 1563 | randomly. Thus, unmodified QUIC-LB load balancers will not work with DTLS 1.3 1564 | if the client does not advertise support for connection IDs, or the server does 1565 | not request the use of a compliant connection ID. 1566 | 1567 | A QUIC-LB load balancer might be modified to identify DTLS 1.3 packets and 1568 | correctly parse the fields to identify when there is no connection ID and 1569 | revert to 4-tuple routing, removing the server requirement above. However, such 1570 | a modification is outside the scope of this document, and classifying some 1571 | packets as DTLS might be incompatible with future versions of QUIC. 1572 | 1573 | ## Future Versions of DTLS 1574 | 1575 | As DTLS does not have an IETF consensus document that defines what parts of 1576 | DTLS will be invariant in future versions, it is difficult to speculate about 1577 | the applicability of this section to future versions of DTLS. 1578 | 1579 | # Acknowledgments 1580 | 1581 | Manasi Deval, Erik Fuller, Toma Gavrichenkov, Greg Greenway, Jana Iyengar, 1582 | Subodh Iyengar, Stefan Kolbl, Ladislav Lhotka, Jan Lindblad, Ling Tao Nju, 1583 | Ilari Liusvaara, Kazuho Oku, Udip Pant, Zaheduzzaman Sarker, Ian Swett, Andy 1584 | Sykes, Martin Thomson, Dmitri Tikhonov, Victor Vasiliev, Xingcan Lan, Yu Zhu, 1585 | and William Zeng Ke all provided useful input to this document. 1586 | 1587 | # Change Log 1588 | 1589 | > **RFC Editor's Note:** Please remove this section prior to 1590 | > publication of a final version of this document. 1591 | 1592 | ## since draft-ietf-quic-load-balancers-20 1593 | 1594 | - Changed definition of Unroutable DCIDs, and rewrote sections on config 1595 | failover and fallback routing to avoid misrouted connections. 1596 | - Deleted text on dropping packets 1597 | - Rewrote version invariance section 1598 | 1599 | ## since draft-ietf-quic-load-balancers-19 1600 | 1601 | - Further guidance on multiple server processes/threads 1602 | - Fixed error in encryption example. 1603 | - Clarified fallback algorithms and known QUIC versions. 1604 | 1605 | ## since draft-ietf-quic-load-balancers-18 1606 | 1607 | - Rearranged the output of the expand function to reduce CPU load of decrypt 1608 | 1609 | ## since draft-ietf-quic-load-balancers-17 1610 | 1611 | - fixed regressions in draft-17 publication 1612 | 1613 | ## since draft-ietf-quic-load-balancers-16 1614 | 1615 | - added a config ID bit (now there are 3). 1616 | 1617 | ## since draft-ietf-quic-load-balancers-15 1618 | 1619 | - aasvg fixes. 1620 | 1621 | ## since draft-ietf-quic-load-balancers-14 1622 | 1623 | - Revised process demultiplexing text 1624 | - Restored lost text in Security Considerations 1625 | - Editorial comments from Martin Thomson. 1626 | - Tweaked 4-pass algorithm to avoid accidental plaintext similarities 1627 | 1628 | ## since draft-ietf-quic-load-balancers-13 1629 | 1630 | - Incorporated Connection ID length in argument of truncate function 1631 | - Added requirements for codepoint 0b11. 1632 | - Describe Distinguishing Attack in Security Considerations. 1633 | - Added non-normative language about server process demultiplexers 1634 | 1635 | ## since draft-ietf-quic-load-balancers-12 1636 | 1637 | - Separated Retry Service design into a separate draft 1638 | 1639 | ## since draft-ietf-quic-load-balancers-11 1640 | 1641 | - Fixed mistakes in test vectors 1642 | 1643 | ## since draft-ietf-quic-load-balancers-10 1644 | 1645 | - Refactored algorithm descriptions; made the 4-pass algorithm easier to 1646 | implement 1647 | - Revised test vectors 1648 | - Split YANG model into a server and middlebox version 1649 | 1650 | ## since draft-ietf-quic-load-balancers-09 1651 | - Renamed "Stream Cipher" and "Block Cipher" to "Encrypted Short" and 1652 | "Encrypted Long" 1653 | - Added section on per-connection state 1654 | - Changed "Encrypted Short" to a 4-pass algorithm. 1655 | - Recommended a random initial nonce when incrementing. 1656 | - Clarified what SNI LBs should do with unknown QUIC versions. 1657 | 1658 | ## since draft-ietf-quic-load-balancers-08 1659 | - Eliminate Dynamic SID allocation 1660 | - Eliminated server use bytes 1661 | 1662 | ## since draft-ietf-quic-load-balancers-07 1663 | - Shortened SSCID nonce minimum length to 4 bytes 1664 | - Removed RSCID from Retry token body 1665 | - Simplified CID formats 1666 | - Shrunk size of SID table 1667 | 1668 | ## since draft-ietf-quic-load-balancers-06 1669 | - Added interoperability with DTLS 1670 | - Changed "non-compliant" to "unroutable" 1671 | - Changed "arbitrary" algorithm to "fallback" 1672 | - Revised security considerations for mistrustful tenants 1673 | - Added retry service considerations for non-Initial packets 1674 | 1675 | ## since draft-ietf-quic-load-balancers-05 1676 | - Added low-config CID for further discussion 1677 | - Complete revision of shared-state Retry Token 1678 | - Added YANG model 1679 | - Updated configuration limits to ensure CID entropy 1680 | - Switched to notation from quic-transport 1681 | 1682 | ## since draft-ietf-quic-load-balancers-04 1683 | - Rearranged the shared-state retry token to simplify token processing 1684 | - More compact timestamp in shared-state retry token 1685 | - Revised server requirements for shared-state retries 1686 | - Eliminated zero padding from the test vectors 1687 | - Added server use bytes to the test vectors 1688 | - Additional compliant DCID criteria 1689 | 1690 | ## since-draft-ietf-quic-load-balancers-03 1691 | - Improved Config Rotation text 1692 | - Added stream cipher test vectors 1693 | - Deleted the Obfuscated CID algorithm 1694 | 1695 | ## since-draft-ietf-quic-load-balancers-02 1696 | - Replaced stream cipher algorithm with three-pass version 1697 | - Updated Retry format to encode info for required TPs 1698 | - Added discussion of version invariance 1699 | - Cleaned up text about config rotation 1700 | - Added Reset Oracle and limited configuration considerations 1701 | - Allow dropped long-header packets for known QUIC versions 1702 | 1703 | ## since-draft-ietf-quic-load-balancers-01 1704 | - Test vectors for load balancer decoding 1705 | - Deleted remnants of in-band protocol 1706 | - Light edit of Retry Services section 1707 | - Discussed load balancer chains 1708 | 1709 | ## since-draft-ietf-quic-load-balancers-00 1710 | - Removed in-band protocol from the document 1711 | 1712 | ## Since draft-duke-quic-load-balancers-06 1713 | - Switch to IETF WG draft. 1714 | 1715 | ## Since draft-duke-quic-load-balancers-05 1716 | - Editorial changes 1717 | - Made load balancer behavior independent of QUIC version 1718 | - Got rid of token in stream cipher encoding, because server might not have it 1719 | - Defined "non-compliant DCID" and specified rules for handling them. 1720 | - Added psuedocode for config schema 1721 | 1722 | ## Since draft-duke-quic-load-balancers-04 1723 | - Added standard for retry services 1724 | 1725 | ## Since draft-duke-quic-load-balancers-03 1726 | - Renamed Plaintext CID algorithm as Obfuscated CID 1727 | - Added new Plaintext CID algorithm 1728 | - Updated to allow 20B CIDs 1729 | - Added self-encoding of CID length 1730 | 1731 | ## Since draft-duke-quic-load-balancers-02 1732 | - Added Config Rotation 1733 | - Added failover mode 1734 | - Tweaks to existing CID algorithms 1735 | - Added Block Cipher CID algorithm 1736 | - Reformatted QUIC-LB packets 1737 | 1738 | ## Since draft-duke-quic-load-balancers-01 1739 | - Complete rewrite 1740 | - Supports multiple security levels 1741 | - Lightweight messages 1742 | 1743 | ## Since draft-duke-quic-load-balancers-00 1744 | - Converted to markdown 1745 | - Added variable length connection IDs 1746 | -------------------------------------------------------------------------------- /draft-ietf-quic-retry-offload.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "QUIC Retry Offload" 3 | abbrev: QUIC Retry Offload 4 | docname: draft-ietf-quic-retry-offload-latest 5 | date: {DATE} 6 | category: std 7 | ipr: trust200902 8 | area: Transport 9 | workgroup: QUIC 10 | 11 | stand_alone: yes 12 | pi: [toc, sortrefs, symrefs, docmapping] 13 | 14 | author: 15 | - 16 | ins: M. Duke 17 | name: Martin Duke 18 | org: Google 19 | email: martin.h.duke@gmail.com 20 | 21 | - 22 | ins: N. Banks 23 | name: Nick Banks 24 | org: Microsoft 25 | email: nibanks@microsoft.com 26 | 27 | normative: 28 | 29 | TIME_T: 30 | title: "Open Group Standard: Vol. 1: Base Definitions, Issue 7" 31 | date: 2018 32 | seriesinfo: IEEE Std 1003.1 33 | target: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_16 34 | 35 | --- abstract 36 | 37 | QUIC uses Retry packets to reduce load on stressed servers, by forcing the 38 | client to prove ownership of its address before the server commits state. 39 | QUIC also has an anti-tampering mechanism to prevent the unauthorized injection 40 | of Retry packets into a connection. However, a server operator may want to 41 | offload production of Retry packets to an anti-Denial-of-Service agent or 42 | hardware accelerator. "Retry Offload" is a mechanism for coordination between 43 | a server and an external generator of Retry packets that can succeed despite 44 | the anti-tampering mechanism. 45 | 46 | --- middle 47 | 48 | # Introduction 49 | 50 | QUIC {{!RFC9000}} servers send Retry packets to avoid prematurely allocating 51 | resources when under stress, such as during a Denial of Service (DoS) attack. 52 | Because both Initial packets and Retry packets have weak authentication 53 | properties, the Retry packet contains an encrypted token that helps the client 54 | and server to validate, via transport parameters, that an attacker did not 55 | inject or modify a packet of either type for this connection attempt. 56 | 57 | However, a server under stress is less inclined to process incoming Initial 58 | packets and compute the Retry token in the first place. An analogous mechanism 59 | for TCP is syncookies {{?RFC4987}}. As TCP has weaker authentication properties 60 | to QUIC, syncookie generation can often be offloaded to a hardware device, or 61 | to a anti-Denial-of-Service provider that is topologically far from the 62 | protected server. As such an offload would behave exactly like an attacker, 63 | QUIC's authentication methods make such a capability impossible. 64 | 65 | This document seeks to enable offloading of Retry generation to QUIC via 66 | explicit coordination between servers and the hardware or provider offload, 67 | which this document refers to as a "Retry Offload." It has two different 68 | modes, to conform to two different use cases. 69 | 70 | The no-shared-state mode has minimal coordination and does not require key 71 | sharing. While operationally easier to configure and manage, it places severe 72 | constraints on the operational profile of the offload. In particular, the 73 | offload must control all ingress to the server and fail closed. 74 | 75 | The shared-state mode removes the operational constraints, but also requires 76 | more sophisticated key management. 77 | 78 | Both modes specify a common format for encoding information in the Retry token, 79 | so that the server can correctly populate the relevant transport parameter 80 | fields. 81 | 82 | ## Terminology 83 | 84 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", 85 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be 86 | interpreted as described in RFC 2119 {{?RFC2119}}. 87 | 88 | In this document, these words will appear with that interpretation only when in 89 | ALL CAPS. Lower case uses of these words are not to be interpreted as carrying 90 | significance described in RFC 2119. 91 | 92 | For brevity, "Connection ID" will often be abbreviated as "CID". 93 | 94 | A "Retry Offload" is a hardware or software device that is conceptually separate 95 | from a QUIC server that terminates QUIC connections. This document assumes that 96 | the Retry Offload and the server have an administrative relationship that allows 97 | them to accept common configuation. 98 | 99 | A "configuration agent" is some entity that determines the common configuration 100 | to be distributed to the servers and the Retry Offload. 101 | 102 | This document uses "QUIC" to refer to the protocol in QUIC version 1 103 | {{RFC9000}}. Retry offloads can be applied to other versions of QUIC that use 104 | Retry packets and have identical information requirements for Retry validation. 105 | However, note that source and destination connection IDs are the only relevant 106 | data fields that are invariant across QUIC versions {{?RFC8999}}. 107 | 108 | ## Notation 109 | 110 | All wire formats will be depicted using the notation defined in Section 1.3 of 111 | {{RFC9000}}. 112 | 113 | The example below illustrates the basic framework: 114 | 115 | ~~~ 116 | Example Structure { 117 | One-bit Field (1), 118 | 7-bit Field with Fixed Value (7) = 61, 119 | Field with Variable-Length Integer (i), 120 | Arbitrary-Length Field (..), 121 | Variable-Length Field (8..24), 122 | Field With Minimum Length (16..), 123 | Field With Maximum Length (..128), 124 | [Optional Field (64)], 125 | Repeated Field (8) ..., 126 | } 127 | ~~~ 128 | {: #fig-ex-format title="Example Format"} 129 | 130 | # Common Requirements {#common-requirements} 131 | 132 | Regardless of mechanism, a Retry Offload has an active mode, where it is 133 | generating Retry packets, and an inactive mode, where it is not, based on its 134 | assessment of server load and the likelihood an attack is underway. The choice 135 | of mode MAY be made on a per-packet or per-connection basis, through a 136 | stochastic process or based on client address. 137 | 138 | A configuration agent MUST distribute a list of QUIC versions the Retry Offload 139 | supports. It MAY also distribute either an "Allow-List" or a "Deny-List" of 140 | other QUIC versions. It MUST NOT distribute both an Allow-List and a Deny-List. 141 | 142 | The Allow-List or Deny-List MUST NOT include any versions included for Retry 143 | Offload support. 144 | 145 | The Configuration Agent MUST provide a means for the entity that controls the 146 | Retry Offload to report its supported version(s) to the configuration Agent. If 147 | the entity has not reported this information, it MUST NOT activate the Retry 148 | Offload and the configuration agent MUST NOT distribute configuration that 149 | activates it. 150 | 151 | The configuration agent MAY delete versions from the final supported version 152 | list if policy does not require the Retry Offload to operate on those versions. 153 | 154 | The configuration Agent MUST provide a means for the entities that control 155 | servers behind the Retry Offload to report either an Allow-List or a Deny-List. 156 | 157 | If all entities supply Allow-Lists, the consolidated list MUST be the union of 158 | these sets. If all entities supply Deny-Lists, the consolidated list MUST be 159 | the intersection of these sets. 160 | 161 | If entities provide a mixture of Allow-Lists and Deny-Lists, the consolidated 162 | list MUST be a Deny-List that is the intersection of all provided Deny-Lists and 163 | the inverses of all Allow-Lists. 164 | 165 | If no entities that control servers have reported Allow-Lists or Deny-Lists, 166 | the default is a Deny-List with the null set (i.e., all unsupported versions 167 | will be admitted). This preserves the future extensibilty of QUIC. 168 | 169 | A Retry Offload MUST forward all packets for a QUIC version it does not 170 | support that are not on a Deny-List or absent from an Allow-List. Note that if 171 | servers support versions the Retry Offload does not, this may increase load on 172 | the servers. 173 | 174 | Note that future versions of QUIC might not have Retry packets, require 175 | different information in Retry, or use different packet type indicators. 176 | 177 | ## Consistent Treatment of Initials 178 | 179 | Retry Offloads SHOULD treat Initial packets from the same connection with a 180 | uniform policy. Initial packets of the first and second client flight can be 181 | difficult to distinguish without expensive decryption of the contents, which is 182 | unsuitable under the conditions of a DDoS attack. If the first packet of a 183 | connection is admitted without Retry, but the second triggers a Retry, that 184 | Retry packet will be ignored and the loss of an Initial coalesced with other 185 | packets can impair performance. In some situations, the client does not yet have 186 | handshake keys, and dropping further client Initial packets creates a deadlock 187 | where the connection cannot progress. 188 | 189 | The simplest means to ensure this is to require, when active, a Retry Token 190 | for all incoming Initial packets, and send a Retry packet otherwise. If the 191 | Retry Offload is to be more selective, one technique keeps state on which 192 | address/port 4-tuples have been admitted. Another would be to apply a secure 193 | hash to the source IP address, port, and connection ID to deterministically 194 | compute whether the Initial requires a Retry Token or not. These source 195 | values remain consistent over the handshake. 196 | 197 | However, even with these techniques there is a potential problem when a Retry 198 | Offload switches from inactive to active mode. The Retry Offload could admit 199 | the first packet while in inactive mode, and then drop subsequent Initials in 200 | active mode. 201 | 202 | If the Retry Offload is always on-path, it MAY keep state on incoming 203 | connections while in inactive mode to avoid this problem. If it cannot or will 204 | not keep such state, it SHOULD implement "transition mode" for an interval 205 | chosen to include the likely Initial packet exchange of most clients (200ms is a 206 | sensible default). 207 | 208 | In transition mode, Retry Offloads process Initial packets with Retry tokens 209 | as in active mode. When the Retry Offload receives an Initial packet with no 210 | token, it issues a Retry AND forwards the packet to the server. If the client 211 | has already received a packet from the server, it will ignore the Retry and the 212 | connection will progress normally. If not, the client will reconnect based on 213 | the Retry, the server's response to the first initial will be discarded, and 214 | the connection will progress normally based on the client's second Initial. 215 | {{mid-handshake}} explores the various possible packet sequences in 216 | transition mode. 217 | 218 | Note that transition mode provides no actual DDoS relief to the server, so its 219 | duration should be as short as possible. The Retry Offload can choose not to 220 | implement transition mode and cause some client connections to fail. 221 | 222 | Servers operating behind a Retry Offload SHOULD implement a mechanism that 223 | operates whenever a client Initial arrives with a valid Retry token. If there 224 | is another connection with identical client Connection ID, IP, and Port, but 225 | with an unvalidated address, that connection is immediately and silently 226 | terminated. This mechanism eliminates incorrect connection state that is an 227 | artifact of transition mode, as explained in {{mid-handshake}}. 228 | 229 | ## Considerations for Non-Initial Packets 230 | 231 | Initial Packets are especially effective at consuming server resources 232 | because they cause the server to create connection state. Even when mitigating 233 | this load with Retry Packets, the act of validating an Initial Token and sending 234 | a Retry Packet is more expensive than the response to a non-Initial packet with 235 | an unknown Connection ID: simply dropping it and/or sending a Stateless Reset. 236 | 237 | Nevertheless, a Retry Offload in Active Mode might desire to shield servers 238 | from non-Initial packets that do not correspond to a previously admitted 239 | Initial Packet. This has a number of considerations. 240 | 241 | * If a Retry Offload maintains no per-flow state, it cannot distinguish between 242 | valid and invalid non-Initial packets and MUST forward all non-Initial Packets 243 | to the server. 244 | 245 | * For QUIC versions the Retry Offload does not support and are present on the 246 | Allow-List (or absent from the Deny-List), the Retry Offload cannot distinguish 247 | Initial Packets from other long headers and therefore MUST admit all long 248 | headers. 249 | 250 | * If a Retry Offload keeps per-flow state, it can identify 4-tuples that have 251 | been previously approved, admit non-Initial packets from those flows, and 252 | drop all others. However, dropping short headers will effectively break Address 253 | Migration and NAT Rebinding when in Active Mode, as post-migration packets will 254 | arrive with a previously unknown 4-tuple. This policy will also break connection 255 | attempts using any new QUIC versions that begin connections with a short header. 256 | 257 | * If a Retry Offload is integrated with a QUIC-LB routable load balancer 258 | {{?I-D.ietf-quic-load-balancers}}, it can verify that the Destination Connection 259 | ID is routable, and only admit non-Initial packets with routable DCIDs. As the 260 | Connection ID encoding is invariant across QUIC versions, the Retry Offload can 261 | do this for all short headers. 262 | 263 | Nothing in this section prevents Retry Offloads from making basic syntax 264 | correctness checks on packets with QUIC versions that it understands (e.g., 265 | enforcing the Initial Packet datagram size minimum in version 1). 266 | 267 | # No-Shared-State Retry Offload 268 | 269 | The no-shared-state Retry Offload requires no coordination, except that the 270 | server must be configured to accept this offload and know which QUIC versions 271 | the Retry Offload supports. The scheme uses the first bit of the token to 272 | distinguish between tokens from Retry packets (codepoint '0') and tokens from 273 | NEW_TOKEN frames (codepoint '1'). 274 | 275 | ## Configuration Agent Actions 276 | 277 | See {{common-requirements}}. 278 | 279 | ## Offload Requirements {#nss-offload-requirements} 280 | 281 | A no-shared-state Retry Offload MUST be present on all paths from potential 282 | clients to the server. These paths MUST fail to pass QUIC traffic should the 283 | offload fail for any reason. That is, if the offload is not operational, the 284 | server MUST NOT be exposed to client traffic. Otherwise, servers that have 285 | already disabled their Retry capability would be vulnerable to attack. 286 | 287 | The path between offload and server MUST be free of any potential attackers. 288 | Note that this and other requirements above severely restrict the operational 289 | conditions in which a no-shared-state Retry Offload can safely operate. 290 | 291 | Retry tokens generated by the offload MUST have the format below. 292 | 293 | ~~~ 294 | No-Shared-State Retry Offload Token { 295 | Token Type (1) = 0, 296 | ODCIL (7) = 8..20, 297 | Original Destination Connection ID (64..160), 298 | Opaque Data (..), 299 | } 300 | ~~~ 301 | {: #nss-retry-offload-token-format title="Format of non-shared-state Retry Offload tokens"} 302 | 303 | The first bit of retry tokens generated by the offload MUST be zero. The token 304 | has the following additional fields: 305 | 306 | ODCIL: The length of the original destination connection ID from the triggering 307 | Initial packet. This is in cleartext to be readable for the server, but 308 | authenticated later in the token. The Retry Offload SHOULD reject any token 309 | in which the value is less than 8. 310 | 311 | Original Destination Connection ID: This also in cleartext and authenticated 312 | later. 313 | 314 | Opaque Data: This data contains the information necessary to authenticate the 315 | Retry token in accordance with the QUIC specification. A straightforward 316 | implementation would encode the Retry Source Connection ID, client IP address, 317 | and a timestamp in the Opaque Data. A more space-efficient implementation would 318 | use the Retry Source Connection ID and Client IP as associated data in an 319 | encryption operation, and encode only the timestamp and the authentication tag 320 | in the Opaque Data. If the Initial packet alters the Connection ID or source IP 321 | address, authentication of the token will fail. 322 | 323 | Upon receipt of an Initial packet with a token that begins with '0', the Retry 324 | Offload MUST validate the token in accordance with the QUIC specification. 325 | 326 | In active mode, the offload MUST issue Retry packets for all client Initial 327 | packets that contain no token, or a token that has the first bit set to '1'. It 328 | MUST NOT forward the packet to the server. The offload MUST validate all tokens 329 | with the first bit set to '0'. If successful, the offload MUST forward the 330 | packet with the token intact. If unsuccessful, it MUST drop the packet. The 331 | Retry Offload MAY send an Initial Packet containing a CONNECTION_CLOSE frame 332 | with the INVALID_TOKEN error code when dropping the packet. 333 | 334 | Note that this scheme has a performance drawback. When the Retry Offload is in 335 | active mode, clients with a token from a NEW_TOKEN frame will suffer a 1-RTT 336 | penalty even though its token provides proof of address. 337 | 338 | In inactive mode, the offload MUST forward all packets that have no token or a 339 | token with the first bit set to '1'. It MUST validate all tokens with the first 340 | bit set to '0'. If successful, the offload MUST forward the packet with the 341 | token intact. If unsuccessful, it MUST drop the packet. 342 | 343 | ## Server Requirements 344 | 345 | A server behind a non-shared-state Retry Offload MUST NOT send Retry packets 346 | for a QUIC version the Retry Offload understands. It MAY send Retry for QUIC 347 | versions the Retry Offload does not understand. 348 | 349 | Tokens sent in NEW_TOKEN frames MUST have the first bit set to '1'. 350 | 351 | If a server receives an Initial Packet with the first bit in the token set to 352 | '1', it could be from a server-generated NEW_TOKEN frame and should be processed 353 | in accordance with the QUIC specification. If a server receives an Initial 354 | Packet with the first bit to '0', it is a Retry token and the server MUST NOT 355 | attempt to validate it. Instead, it MUST assume the address is validated, MUST 356 | include the packet's Destination Connection ID in a Retry Source Connection ID 357 | transport parameter, and MUST extract the Original Destination Connection ID 358 | from the token cleartext for use in the transport parameter of the same name. 359 | 360 | # Shared-State Retry Offload {#shared-state-retry} 361 | 362 | A shared-state Retry Offload uses a shared key, so that the server can decode 363 | the offload's retry tokens. It does not require that all traffic pass through 364 | the Retry Offload, so servers MAY send Retry packets in response to Initial 365 | packets without a valid token. 366 | 367 | Both server and offload MUST have time synchronized within two seconds of each 368 | other to prevent tokens being incorrectly marked as expired. 369 | 370 | The tokens are protected using AES128-GCM AEAD, as explained in 371 | {{token-protection-with-aead}}. All tokens, generated by either the server or 372 | Retry Offload, MUST use the following format, which includes: 373 | 374 | - A 1 bit token type identifier. 375 | - A 7 bit token key identifier. 376 | - A 96 bit unique token number transmitted in clear text, but protected as part 377 | of the AEAD associated data. 378 | - A token body, encoding the Original Destination Connection ID and the 379 | Timestamp, optionally followed by server specific Opaque Data. 380 | 381 | The token protection uses an 128 bit representation of the source IP address 382 | from the triggering Initial packet. The client IP address is 16 octets. If an 383 | IPv4 address, the last 12 octets are zeroes. It also uses the Source Connection 384 | ID of the Retry packet, which will cause an authentication failure if it 385 | differs from the Destination Connection ID of the packet bearing the token. 386 | 387 | If there is a Network Address Translator (NAT) in the server infrastructure that 388 | changes the client IP, the Retry Offload MUST either be positioned behind the 389 | NAT, or the NAT must have the token key to rewrite the Retry token accordingly. 390 | Note also that a host that obtains a token through a NAT and then attempts to 391 | connect over a path that does not have an identically configured NAT will fail 392 | address validation. 393 | 394 | The 96 bit unique token number is set to a random value using a 395 | cryptography-grade random number generator. 396 | 397 | The token key identifier and the corresponding AEAD key and AEAD IV are 398 | provisioned by the configuration agent. 399 | 400 | The token body is encoded as follows: 401 | 402 | ~~~ 403 | Shared-State Retry Offload Token Body { 404 | Timestamp (64), 405 | [ODCIL (8) = 8..20], 406 | [Original Destination Connection ID (64..160)], 407 | [Port (16)], 408 | Opaque Data (..), 409 | } 410 | ~~~ 411 | {: #ss-retry-offload-token-body title="Body of shared-state Retry Offload tokens"} 412 | The token body has the following fields: 413 | 414 | Timestamp: The Timestamp is a 64-bit integer, in network order, that expresses 415 | the expiration time of the token as a number of seconds in POSIX time (see Sec. 416 | 4.16 of {{TIME_T}}). 417 | 418 | ODCIL: The original destination connection ID length. Tokens in NEW_TOKEN frames 419 | do not have this field. 420 | 421 | Original Destination Connection ID: The server or Retry Offload copies this 422 | from the field in the client Initial packet. Tokens in NEW_TOKEN frames do not 423 | have this field. 424 | 425 | Port: The Source Port of the UDP datagram that triggered the Retry packet. 426 | This field MUST be present if and only if the ODCIL is greater than zero. This 427 | field is therefore always absent in tokens in NEW_TOKEN frames. 428 | 429 | Opaque Data: The server may use this field to encode additional information, 430 | such as congestion window, RTT, or MTU. The Retry Offload MUST have zero-length 431 | opaque data. 432 | 433 | Some implementations of QUIC encode in the token the Initial Packet Number used 434 | by the client, in order to verify that the client sends the retried Initial 435 | with a PN larger that the triggering Initial. Such implementations will encode 436 | the Initial Packet Number as part of the opaque data. As tokens may be 437 | generated by the Service, servers MUST NOT reject tokens because they lack 438 | opaque data and therefore the packet number. 439 | 440 | Shared-state Retry Offloads use the AES-128-ECB cipher. Future standards could 441 | add new algorithms that use other ciphers to provide cryptographic agility in 442 | accordance with {{?RFC7696}}. Retry Offload and server implementations SHOULD be 443 | extensible to support new algorithms. 444 | 445 | ### Token Protection with AEAD {#token-protection-with-aead} 446 | 447 | On the wire, the token is presented as: 448 | 449 | ~~~ 450 | Shared-State Retry Offload Token { 451 | Token Type (1), 452 | Key Sequence (7), 453 | Unique Token Number (96), 454 | Encrypted Shared-State Retry Offload Token Body (64..), 455 | AEAD Integrity Check Value (128), 456 | } 457 | ~~~ 458 | {: #ss-retry-offload-token-wire-image title="Wire image of shared-state Retry Offload tokens"} 459 | 460 | The tokens are protected using AES128-GCM as follows: 461 | 462 | * The Key Sequence is the 7 bit identifier to retrieve the token key and IV. 463 | 464 | * The AEAD IV, is 96 bits generated by the configuration agent. 465 | 466 | * The AEAD nonce, N, is formed by XORing the AEAD IV with the 96 bit unique 467 | token number. 468 | 469 | * The associated data is a formatted as a pseudo header by combining the 470 | cleartext part of the token with the IP address of the client. The format of 471 | the pseudoheader depends on whether the Token Type bit is '1' (a NEW_TOKEN 472 | token) or '0' (a Retry token). 473 | 474 | ~~~ 475 | Shared-State Retry Offload Token Pseudoheader { 476 | IP Address (128), 477 | Token Type (1), 478 | Key Sequence (7), 479 | Unique Token Number (96), 480 | [RSCIL (8)], 481 | [Retry Source Connection ID (0..20)], 482 | } 483 | ~~~ 484 | {: #ss-retry-offload-token-pseudoheader title="Psuedoheader for shared-state Retry Offload tokens"} 485 | 486 | RSCIL: The Retry Source Connection ID Length in octets. This field is only 487 | present when the Token Type is '0'. 488 | 489 | Retry Source Connection ID: To create a Retry Token, populate this field with 490 | the Source Connection ID the Retry packet will use. To validate a Retry token, 491 | populate it with the Destination Connection ID of the Initial packet that 492 | carries the token. This field is only present when the Token Type is '0'. 493 | 494 | * The input plaintext for the AEAD is the token body. The output ciphertext of 495 | the AEAD is transmitted in place of the token body. 496 | * The AEAD Integrity Check Value(ICV), defined in Section 6 of {{?RFC4106}}, is 497 | computed as part of the AEAD encryption process, and is verified during 498 | decryption. 499 | 500 | ## Configuration Agent Actions 501 | 502 | The configuration agent generates and distributes a "token key", a "token IV", 503 | a key sequence, and the information described in {{common-requirements}}. 504 | 505 | ## Offload Requirements {#ss-offload} 506 | 507 | In inactive mode, the Retry Offload forwards all packets without further 508 | inspection or processing. The rest of this section only applies to a offload in 509 | active mode. 510 | 511 | Retry Offloads MUST NOT issue Retry packets except where explicitly allowed 512 | below, to avoid sending a Retry packet in response to a Retry token. 513 | 514 | The offload MUST generate Retry tokens with the format described above when it 515 | receives a client Initial packet with no token. 516 | 517 | If there is a token of either type, the offload MUST attempt to decrypt it. 518 | 519 | To decrypt a packet, the offload checks the Token Type and constructs a 520 | pseudoheader with the appropriate format for that type, using the bearing 521 | packet's Destination Connection ID to populate the Retry Source Connection ID 522 | field, if any. 523 | 524 | A token is invalid if: 525 | 526 | * it uses an unknown key sequence, 527 | 528 | * the AEAD ICV does not match the expected value (By construction, it will only 529 | match if the client IP Address, and any Retry Source Connection ID, also 530 | matches), 531 | 532 | * the ODCIL, if present, is invalid for a client-generated CID (less than 8 or 533 | more than 20 in QUIC version 1), 534 | 535 | * the Timestamp of a token points to time in the past (however, in order to 536 | allow for clock skew, it SHOULD NOT consider tokens to be expired if the 537 | Timestamp encodes less than two seconds in the past), or 538 | 539 | * the port number, if present, does not match the source port in the 540 | encapsulating UDP header. 541 | 542 | Packets with valid tokens MUST be forwarded to the server. 543 | 544 | The offload MUST drop packets with invalid tokens. If the token is of type '1' 545 | (NEW_TOKEN), it MUST respond with a Retry packet. If of type '0', it MUST NOT 546 | respond with a Retry packet. 547 | 548 | ## Server Requirements 549 | 550 | The server MAY issue Retry or NEW_TOKEN tokens in accordance with {{RFC9000}}. 551 | When doing so, it MUST follow the format above. 552 | 553 | The server MUST validate all tokens that arrive in Initial packets, as they may 554 | have bypassed the Retry Offload. It determines validity using the procedure 555 | in {{ss-offload}}. 556 | 557 | If a valid Retry token, the server populates the 558 | original_destination_connection_id transport parameter using the 559 | corresponding token field. It populates the retry_source_connection_id transport 560 | parameter with the Destination Connection ID of the packet bearing the token. 561 | 562 | In all other respects, the server processes both valid and invalid tokens in 563 | accordance with {{RFC9000}}. 564 | 565 | For QUIC versions the offload does not support, the server MAY use any token 566 | format. 567 | 568 | # Security Considerations {#security-considerations} 569 | 570 | ## Shared-State Retry Keys 571 | 572 | The Shared-State Retry Offload defined in {{shared-state-retry}} describes the 573 | format of retry tokens or new tokens protected and encrypted using AES128-GCM. 574 | Each token includes a 96 bit randomly generated unique token number, and an 8 575 | bit identifier used to get the AES-GCM encryption context. The AES-GCM 576 | encryption context contains a 128 bit key and an AEAD IV. There are three 577 | important security considerations for these tokens: 578 | 579 | * An attacker that obtains a copy of the encryption key will be able to decrypt 580 | and forge tokens. 581 | 582 | * Attackers may be able to retrieve the key if they capture a sufficently large 583 | number of retry tokens encrypted with a given key. 584 | 585 | * Confidentiality of the token data will fail if separate tokens reuse the 586 | same 96 bit unique token number and the same key. 587 | 588 | To protect against disclosure of keys to attackers, offload and servers MUST 589 | ensure that the keys are stored securely. To limit the consequences of potential 590 | exposures, the lifetime of any given key should be limited. 591 | 592 | Section 6.6 of {{?RFC9001}} states that "Endpoints MUST count the number of 593 | encrypted packets for each set of keys. If the total number of encrypted packets 594 | with the same key exceeds the confidentiality limit for the selected AEAD, the 595 | endpoint MUST stop using those keys." It goes on with the specific limit: "For 596 | AEAD_AES_128_GCM and AEAD_AES_256_GCM, the confidentiality limit is 2^23 597 | encrypted packets; see Appendix B.1." It is prudent to adopt the same limit 598 | here, and configure the offload in such a way that no more than 2^23 tokens are 599 | generated with the same key. 600 | 601 | In order to protect against collisions, the 96 bit unique token numbers should 602 | be generated using a cryptographically secure pseudorandom number generator 603 | (CSPRNG), as specified in Appendix C.1 of the TLS 1.3 specification 604 | {{!RFC8446}}. With proper random numbers, if fewer than 2^40 tokens are 605 | generated with a single key, the risk of collisions is lower than 0.001%. 606 | 607 | # IANA Considerations 608 | 609 | There are no IANA requirements. 610 | 611 | --- back 612 | 613 | # Retry Offload YANG Model {#yang-model} 614 | 615 | These YANG models conform to {{?RFC6020}} and express a complete Retry Offload 616 | configuration. 617 | 618 | ~~~ 619 | module ietf-retry-offload { 620 | yang-version "1.1"; 621 | namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb"; 622 | prefix "quic-lb"; 623 | 624 | import ietf-yang-types { 625 | prefix yang; 626 | reference 627 | "RFC 6991: Common YANG Data Types."; 628 | } 629 | 630 | import ietf-inet-types { 631 | prefix inet; 632 | reference 633 | "RFC 6991: Common YANG Data Types."; 634 | } 635 | 636 | organization 637 | "IETF QUIC Working Group"; 638 | 639 | contact 640 | "WG Web: 641 | WG List: 642 | 643 | Authors: Martin Duke (martin.h.duke at gmail dot com) 644 | Nick Banks (nibanks at microsoft dot com) 645 | Christian Huitema (huitema at huitema.net)"; 646 | 647 | description 648 | "This module enables the explicit cooperation of QUIC servers 649 | with offloads that generate Retry packets on their behalf. 650 | 651 | Copyright (c) 2022 IETF Trust and the persons identified as 652 | authors of the code. All rights reserved. 653 | 654 | Redistribution and use in source and binary forms, with or 655 | without modification, is permitted pursuant to, and subject to 656 | the license terms contained in, the Simplified BSD License set 657 | forth in Section 4.c of the IETF Trust's Legal Provisions 658 | Relating to IETF Documents 659 | (https://trustee.ietf.org/license-info). 660 | 661 | This version of this YANG module is part of RFC XXXX 662 | (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself 663 | for full legal notices. 664 | 665 | The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL 666 | NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED', 667 | 'MAY', and 'OPTIONAL' in this document are to be interpreted as 668 | described in BCP 14 (RFC 2119) (RFC 8174) when, and only when, 669 | they appear in all capitals, as shown here."; 670 | 671 | revision "2022-02-11" { 672 | description 673 | "Initial version"; 674 | reference 675 | "RFC XXXX, QUIC Retry Offloads"; 676 | } 677 | 678 | container retry-offload-config { 679 | description 680 | "Configuration of Retry Offload. If supported-versions is empty, 681 | there is no Retry Offload. If token-keys is empty, it uses the 682 | non-shared-state offload. If present, it uses shared-state 683 | tokens."; 684 | 685 | leaf-list supported-versions { 686 | type uint32; 687 | description 688 | "QUIC versions that the Retry Offload supports. If empty, 689 | there is no Retry Offload."; 690 | } 691 | 692 | leaf unsupported-version-default { 693 | type enumeration { 694 | enum allow { 695 | description "Unsupported versions admitted by default"; 696 | } 697 | enum deny { 698 | description "Unsupported versions denied by default"; 699 | } 700 | } 701 | default allow; 702 | description 703 | "Are unsupported versions not in version-exceptions allowed 704 | or denied?"; 705 | } 706 | 707 | leaf-list version-exceptions { 708 | type uint32; 709 | description 710 | "Exceptions to the default-deny or default-allow rule."; 711 | } 712 | 713 | list token-keys { 714 | key "key-sequence-number"; 715 | description 716 | "list of active keys, for key rotation purposes. Existence 717 | implies shared-state format"; 718 | 719 | leaf key-sequence-number { 720 | type uint8 { 721 | range "0..127"; 722 | } 723 | mandatory true; 724 | description 725 | "Identifies the key used to encrypt the token"; 726 | } 727 | 728 | leaf token-key { 729 | type retry-offload-key; 730 | mandatory true; 731 | description 732 | "16-byte key to encrypt the token"; 733 | } 734 | 735 | leaf token-iv { 736 | type yang:hex-string { 737 | length 23; 738 | } 739 | mandatory true; 740 | description 741 | "8-byte IV to encrypt the token, encoded in 23 bytes"; 742 | } 743 | } 744 | } 745 | } 746 | ~~~ 747 | 748 | ## Tree Diagram 749 | 750 | This summary of the YANG models uses the notation in {{?RFC8340}}. 751 | 752 | ~~~ 753 | module: retry-offload-config 754 | +--rw retry-offload-config 755 | +--rw supported-versions* uint32 756 | +--rw unsupported-version-default? enumeration 757 | +--rw version-exceptions* uint32 758 | +--rw token-keys* [key-sequence-number] 759 | +--rw key-sequence-number uint8 760 | +--rw token-key quic-lb-key 761 | +--rw token-iv yang:hex-string 762 | ~~~ 763 | 764 | ## Shared State Retry Token Test Vectors 765 | 766 | In this case, the shared-state retry token is issued by Retry Offload, so the 767 | opaque data of shared-state retry token body would be null 768 | ({{shared-state-retry}}). 769 | 770 | ~~~ 771 | Configuration: 772 | key_seq 0x00 773 | encrypt_key 0x30313233343536373839303132333435 774 | AEAD_IV 0x313233343536373839303132 775 | 776 | Shared-State Retry Offload Token Body: 777 | ODCIL 0x12 778 | RSCIL 0x10 779 | port 0x1a0a 780 | original_destination_connection_id 0x0c3817b544ca1c94313bba41757547eec937 781 | retry_source_connection_id 0x0301e770d24b3b13070dd5c2a9264307 782 | timestamp 0x0000000060c7bf4d 783 | 784 | Shared-State Retry Offload Token: 785 | unique_token_number 0x59ef316b70575e793e1a8782 786 | key_sequence 0x00 787 | encrypted_shared_state_retry_offload_token_body 788 | 0x7d38b274aa4427c7a1557c3fa666945931defc65da387a83855196a7cb73caac1e28e5346fd76868de94f8b62294 789 | AEAD_ICV 0xf91174fdd711543a32d5e959867f9c22 790 | 791 | AEAD related parameters: 792 | client_ip_addr 127.0.0.1 793 | client_port 6666 794 | AEAD_nonce 0x68dd025f45616941072ab6b0 795 | AEAD_associated_data 0x7f00000100000000000000000000000059ef316b70575e793e1a878200 796 | ~~~ 797 | 798 | # Transition Mode Scenarios {#mid-handshake} 799 | 800 | The logic motivating transition mode behavior involves detailed reasoning about 801 | endpoint behavior during the handshake. This non-normative appendix walks 802 | through the scenarios. 803 | 804 | Dropping Initial packets in the client's second flight can cause performance 805 | problems or deadlocks. In the case where the client and server first flight end 806 | with both sides having handshake keys, there will generally be no impact on 807 | performance. However, if an Initial ACK is critical to progress, as it can be in 808 | the case of multiple-packet TLS messages, Hello Retry Requests, and similar 809 | cases, dropping subsequent Initial ACKs results in deadlock. 810 | 811 | In transition mode, the Retry Offload forwards Initials with no token while also 812 | generating a Retry. This allows handshakes to progress without further incident. 813 | 814 | ## Handshakes in Progress 815 | 816 | If the client hello was admitted in inactive mode, then the client has already 817 | received a packet from the server. Although subsequent client Initial packets 818 | will trigger a Retry, the client will ignore these packets. Those Initials will 819 | also be processed by the server to continue the handshake. 820 | 821 | ## New Connections 822 | 823 | After sending a Client Hello in Initial Packet A, a client will rapidly receive 824 | a Retry Packet from the Offload and attempt to reconnect accordingly with 825 | Initial Packet B. 826 | 827 | The client will discard any server response to Initial A. If a Retry, it is a 828 | second Retry on the connection. If an Initial, its is encrypted with keys 829 | derived from Initial A, which have already been discarded, and will be a 830 | decryption failure. 831 | 832 | Initial B's destination connection ID will be new, so the server will process 833 | it as a new connection and proceed normally. 834 | 835 | Unfortunately, the server connection state initiated by Initial A will remain. 836 | For this reason, this document suggests that servers silently terminate the 837 | older connection. Requiring the address to be validated avoids cases where an 838 | attacker simply replays a client Initial with a new Destination Connection ID 839 | to terminate a valid connection. 840 | 841 | Note that there are corner cases involving further packet loss that result in 842 | connection timeout. For instance, if the Retry Offload's response to Initial A 843 | is lost, then the connection will proceed based on Initial A. If the Retry 844 | Offload then switches from transition mode to active mode before the client's 845 | second flight arrives, the Retry Offload will drop the Initial packet in that 846 | flight, and the connection might deadlock. 847 | 848 | # Acknowledgments 849 | 850 | Christian Huitema, Ling Tao Nju, and William Zeng Ke all provided useful input 851 | to this document. 852 | 853 | # Change Log 854 | 855 | > **RFC Editor's Note:** Please remove this section prior to 856 | > publication of a final version of this document. 857 | 858 | ## since draft-duke-quic-retry-offload-00 859 | - Converted to adopted IETF draft 860 | - Cleaner transition from inactive to active mode 861 | 862 | ## since draft-ietf-quic-load-balancers-12 863 | - Separated from the QUIC-LB draft 864 | - Renamed "Retry Service" to "Retry Offload" 865 | 866 | ## since draft-ietf-quic-load-balancers-11 867 | 868 | - Fixed mistakes in test vectors 869 | 870 | ## since draft-ietf-quic-load-balancers-10 871 | 872 | - Refactored algorithm descriptions; made the 4-pass algorithm easier to 873 | implement 874 | - Revised test vectors 875 | - Split YANG model into a server and middlebox version 876 | 877 | ## since draft-ietf-quic-load-balancers-09 878 | - Renamed "Stream Cipher" and "Block Cipher" to "Encrypted Short" and 879 | "Encrypted Long" 880 | - Added section on per-connection state 881 | - Changed "Encrypted Short" to a 4-pass algorithm. 882 | - Recommended a random initial nonce when incrementing. 883 | - Clarified what SNI LBs should do with unknown QUIC versions. 884 | 885 | ## since draft-ietf-quic-load-balancers-08 886 | - Eliminate Dynamic SID allocation 887 | - Eliminated server use bytes 888 | 889 | ## since draft-ietf-quic-load-balancers-07 890 | - Shortened SSCID nonce minimum length to 4 bytes 891 | - Removed RSCID from Retry token body 892 | - Simplified CID formats 893 | - Shrunk size of SID table 894 | 895 | ## since draft-ietf-quic-load-balancers-06 896 | - Added interoperability with DTLS 897 | - Changed "non-compliant" to "unroutable" 898 | - Changed "arbitrary" algorithm to "fallback" 899 | - Revised security considerations for mistrustful tenants 900 | - Added Retry Offload considerations for non-Initial packets 901 | 902 | ## since draft-ietf-quic-load-balancers-05 903 | - Added low-config CID for further discussion 904 | - Complete revision of shared-state Retry Token 905 | - Added YANG model 906 | - Updated configuration limits to ensure CID entropy 907 | - Switched to notation from quic-transport 908 | 909 | ## since draft-ietf-quic-load-balancers-04 910 | - Rearranged the shared-state retry token to simplify token processing 911 | - More compact timestamp in shared-state retry token 912 | - Revised server requirements for shared-state retries 913 | - Eliminated zero padding from the test vectors 914 | - Added server use bytes to the test vectors 915 | - Additional compliant DCID criteria 916 | 917 | ## since-draft-ietf-quic-load-balancers-03 918 | - Improved Config Rotation text 919 | - Added stream cipher test vectors 920 | - Deleted the Obfuscated CID algorithm 921 | 922 | ## since-draft-ietf-quic-load-balancers-02 923 | - Replaced stream cipher algorithm with three-pass version 924 | - Updated Retry format to encode info for required TPs 925 | - Added discussion of version invariance 926 | - Cleaned up text about config rotation 927 | - Added Reset Oracle and limited configuration considerations 928 | - Allow dropped long-header packets for known QUIC versions 929 | 930 | ## since-draft-ietf-quic-load-balancers-01 931 | - Test vectors for load balancer decoding 932 | - Deleted remnants of in-band protocol 933 | - Light edit of Retry Offloads section 934 | - Discussed load balancer chains 935 | 936 | ## since-draft-ietf-quic-load-balancers-00 937 | - Removed in-band protocol from the document 938 | 939 | ## Since draft-duke-quic-load-balancers-06 940 | - Switch to IETF WG draft. 941 | 942 | ## Since draft-duke-quic-load-balancers-05 943 | - Editorial changes 944 | - Made load balancer behavior independent of QUIC version 945 | - Got rid of token in stream cipher encoding, because server might not have it 946 | - Defined "non-compliant DCID" and specified rules for handling them. 947 | - Added psuedocode for config schema 948 | 949 | ## Since draft-duke-quic-load-balancers-04 950 | - Added standard for Retry Offloads 951 | 952 | ## Since draft-duke-quic-load-balancers-03 953 | - Renamed Plaintext CID algorithm as Obfuscated CID 954 | - Added new Plaintext CID algorithm 955 | - Updated to allow 20B CIDs 956 | - Added self-encoding of CID length 957 | 958 | ## Since draft-duke-quic-load-balancers-02 959 | - Added Config Rotation 960 | - Added failover mode 961 | - Tweaks to existing CID algorithms 962 | - Added Block Cipher CID algorithm 963 | - Reformatted QUIC-LB packets 964 | 965 | ## Since draft-duke-quic-load-balancers-01 966 | - Complete rewrite 967 | - Supports multiple security levels 968 | - Lightweight messages 969 | 970 | ## Since draft-duke-quic-load-balancers-00 971 | - Converted to markdown 972 | - Added variable length connection IDs 973 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "aasvg": "^0.3.3" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /quic_lb_protocol.md: -------------------------------------------------------------------------------- 1 | NOTE: This file describes the deleted in-band QUIC-LB protocol, should we ever 2 | revise a form of it. 3 | 4 | # Protocol Description {#protocol-description} 5 | 6 | There are multiple means of configuration that correspond to differing 7 | deployment models and increasing levels of concern about the security of the 8 | load balancer-server path. 9 | 10 | ## Out of band sharing 11 | 12 | When there are concerns about the integrity of the path between load balancer 13 | and server, operators MAY share routing information using an out-of-band 14 | technique, which is out of the scope of this specification. 15 | 16 | To simplify configuration, the global parameters can be shared out-of-band, 17 | while the load balancer sends the unique server IDs via the truncated message 18 | formats presented below. 19 | 20 | ## QUIC-LB Message Exchange 21 | 22 | QUIC-LB load balancers and servers exchange messages via the QUIC-LBv1 protocol, 23 | which uses the QUIC invariants with version number 0xF1000000. The QUIC-LB 24 | load balancers send the encoding parameters to servers and periodically 25 | retransmit until that server responds with an acknowledgement. Specifics of this 26 | retransmission are implementation-dependent. 27 | 28 | ## QUIC-LB Packet {#quic-lb-packet} 29 | 30 | A QUIC-LB packet uses a long header. It carries configuration information from 31 | the load balancer and acknowledgements from the servers. They are sent when a 32 | load balancer boots up, detects a new server in the pool or needs to update the 33 | server configuration. 34 | 35 | ~~~~~ 36 | 0 1 2 3 37 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 38 | +-+-+-+-+-+-+-+-+ 39 | |1|C R| Reserved| 40 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 41 | | Version (32) | 42 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 43 | | 0x00 | 0x00 | 44 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 45 | | | 46 | + Authentication Token (64) + 47 | | | 48 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 49 | | Message Type | 50 | +-+-+-+-+-+-+-+-+ 51 | ~~~~~ 52 | {: #quic-lb-packet-format title="QUIC-LB Packet Format"} 53 | 54 | The Version field allows QUIC-LB to use the Version Negotiation mechanism. All 55 | messages in this specification are specific to QUIC-LBv1. It should be set to 56 | 0xF1000000. 57 | 58 | Load balancers MUST cease sending QUIC-LB packets of this version to a server 59 | when that server sends a Version Negotiation packet that does not advertise the 60 | version. 61 | 62 | The length of the DCIL and SCIL fields are 0x00. 63 | 64 | CR 65 | 66 | : The 2-bit CR field indicates the Config Rotation described in 67 | {{config-rotation}}. 68 | 69 | Authentication Token 70 | 71 | : The Authentication Token is an 8-byte field that both entities obtain at 72 | configuration time. It is used to verify that the sender is not an inside 73 | off-path attacker. Servers and load balancers SHOULD silently discard QUIC-LB 74 | packets with an incorrect token. 75 | 76 | Message Type 77 | 78 | : The Message Type indicates the type of message payload that follows the 79 | QUIC-LB header. 80 | 81 | ## Message Types and Formats 82 | 83 | As described in {{quic-lb-packet}}, QUIC-LB packets contain a single message. 84 | This section describes the format and semantics of the QUIC-LB message types. 85 | 86 | ### ACK_LB Message {#message-ack-lb} 87 | 88 | A server uses the ACK_LB message (type=0x00) to acknowledge a QUIC-LB packet 89 | received from the load balancer. The ACK-LB message has no additional payload 90 | beyond the QUIC-LB packet header. 91 | 92 | Load balancers SHOULD continue to retransmit a QUIC-LB packet until a valid 93 | ACK_LB message, FAIL message or Version Negotiation Packet is received from the 94 | server. 95 | 96 | ### FAIL Message {#message-fail} 97 | 98 | A server uses the FAIL message (type=0x01) to indicate the configuration 99 | received from the load balancer is unsupported. 100 | 101 | ~~~~~ 102 | 0 1 2 3 103 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 104 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 105 | | Supp. Type | Supp. Type | ... 106 | +-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 107 | ~~~~~ 108 | 109 | Servers MUST send a FAIL message upon receipt of a message type which they do 110 | not support, or if they do not possess all of the implied out-of-band 111 | configuration to support a particular message type. 112 | 113 | The payload of the FAIL message consists of a list of all the message types 114 | supported by the server. 115 | 116 | Upon receipt of a FAIL message, Load Balancers MUST either send a QUIC-LB 117 | message the server supports or remove the server from the server pool. 118 | 119 | ### ROUTING_INFO Message {#message-routing-info} 120 | 121 | A load balancer uses the ROUTING_INFO message (type=0x02) to exchange all the 122 | parameters for the Obfuscated CID algorithm. 123 | 124 | ~~~~~ 125 | 0 1 2 3 126 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 127 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 128 | | | 129 | + + 130 | | | 131 | + Routing Bit Mask (152) + 132 | | | 133 | + + 134 | | | 135 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 136 | | | Modulus (16) | 137 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 138 | | Divisor (16) | 139 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 140 | ~~~~~ 141 | 142 | Routing Bit Mask 143 | 144 | : The Routing Bit Mask encodes a '1' at every bit position in the server 145 | connection ID that will encode routing information. 146 | 147 | These bits, along with the Modulus and Divisor, are chosen by the load balancer 148 | as described in {{obfuscated-cid-algorithm}}. 149 | 150 | ### STREAM_CID Message {#message-stream-cid} 151 | 152 | A load balancer uses the STREAM_CID message (type=0x03) to exchange all the 153 | parameters for using Stream Cipher CIDs. 154 | 155 | ~~~~~ 156 | 0 1 2 3 157 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 158 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 159 | | Nonce Len (8) | SIDL (8) | 160 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 161 | | Server ID (variable) | 162 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 163 | | | 164 | + Key (128) + 165 | | | 166 | + + 167 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 168 | ~~~~~ 169 | {: #Stream-cid-format title="Stream CID Payload"} 170 | 171 | Nonce Len 172 | 173 | : The Nonce Len field is a one-octet unsigned integer that describes the 174 | nonce length necessary to use this routing algorithm, in octets. 175 | 176 | SIDL 177 | 178 | : The SIDL field is a one-octet unsigned integer that describes the server ID 179 | length necessary to use this routing algorithm, in octets. 180 | 181 | Server ID 182 | 183 | : The Server ID is the unique value assigned to the receiving server. Its 184 | length is determined by the SIDL field. 185 | 186 | Key 187 | 188 | : The Key is an 16-octet field that contains the key that the load balancer 189 | will use to decrypt server IDs on QUIC packets. See 190 | {{security-considerations}} to understand why sending keys in plaintext may 191 | be a safe strategy. 192 | 193 | ### BLOCK_CID Message {#message-block-cid} 194 | 195 | A load balancer uses the BLOCK_CID message (type=0x04) to exchange all the 196 | parameters for using Stream Cipher CIDs. 197 | 198 | ~~~~~ 199 | 0 1 2 3 200 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 201 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 202 | | ZP Len (8) | SIDL (8) | 203 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 204 | | Server ID (variable) | 205 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 206 | | | 207 | + Key (128) + 208 | | | 209 | + + 210 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 211 | ~~~~~ 212 | {: #block-cid-format title="Block CID Payload"} 213 | 214 | ZP Len 215 | 216 | : The ZP Len field is a one-octet unsigned integer that describes the 217 | zero-padding length necessary to use this routing algorithm, in octets. 218 | 219 | SIDL 220 | 221 | : The SIDL field is a one-octet unsigned integer that describes the server ID 222 | length necessary to use this routing algorithm, in octets. 223 | 224 | Server ID 225 | 226 | : The Server ID is the unique value assigned to the receiving server. Its 227 | length is determined by the SIDL field. 228 | 229 | Key 230 | 231 | : The Key is an 16-octet field that contains the key that the load balancer 232 | will use to decrypt server IDs on QUIC packets. See 233 | {{security-considerations}} to understand why sending keys in plaintext may 234 | be a safe strategy. 235 | 236 | ### SERVER_ID Message {#message-server-id} 237 | 238 | A load balancer uses the SERVER_ID message (type=0x05) to exchange 239 | explicit server IDs. 240 | 241 | ~~~~~ 242 | 0 1 2 3 243 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 244 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 245 | | SIDL (8) | Server ID (variable) | 246 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 247 | ~~~~~ 248 | 249 | Load balancers send the SERVER_ID message when all global values for Stream or 250 | Block CIDs are sent out-of-band, so that only the server-unique values must be 251 | sent in-band. It also provides all necessary paramters for Plaintext CIDs. The 252 | fields are identical to their counterparts in the {{message-stream-cid}} 253 | payload. 254 | 255 | ### MODULUS Message {#message-modulus} 256 | 257 | A load balancer uses the MODULUS message (type=0x06) to exchange just the 258 | modulus used in the Obfuscated CID algorithm. 259 | 260 | ~~~~~ 261 | 0 1 2 3 262 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 263 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 264 | | Modulus (16) | 265 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 266 | ~~~~~ 267 | 268 | Load balancers send the MODULUS when all global values for Obfuscated CIDs 269 | are sent out-of-band, so that only the server-unique values must be sent 270 | in-band. The Modulus field is identical to its counterpart in the 271 | ROUTING_INFO message. 272 | 273 | ### PLAINTEXT Message {#message-plaintext} 274 | 275 | A load balancer uses the PLAINTEXT message (type=0x07) to exchange all 276 | parameters needed for the Plaintext CID algorithm. 277 | 278 | ~~~~~ 279 | 0 1 2 3 280 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 281 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 282 | | SIDL (8) | 283 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 284 | | | 285 | + Server ID (variable) + 286 | | | 287 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 288 | ~~~~~ 289 | 290 | The SIDL field indicates the length of the server ID field. The 291 | Server ID field indicates the encoding that represents the 292 | destination server. 293 | 294 | ### RETRY_SERVICE_STATELESS message 295 | 296 | A no-shared-state retry service uses this message (type=0x08) to notify the 297 | server of the existence of this service. This message has no fields. 298 | 299 | ### RETRY_SERVICE_STATEFUL message 300 | 301 | A shared-state retry service uses this message (type=0x09) to tell the server 302 | about its existence, and share the key needed to decrypt server-generated retry 303 | tokens. 304 | 305 | ~~~~~ 306 | 0 1 2 3 307 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 308 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 309 | | | 310 | + + 311 | | | 312 | + Key (128) + 313 | | | 314 | + + 315 | | | 316 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 317 | ~~~~~ 318 | --------------------------------------------------------------------------------