├── .circleci
    └── config.yml
├── .github
    └── in-solidarity.yml
├── .gitignore
├── .note.xml
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE.md
├── Makefile
├── README.md
├── draft-ietf-quic-load-balancers.md
├── draft-ietf-quic-retry-offload.md
├── package.json
└── quic_lb_protocol.md


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | version: 2
  2 | jobs:
  3 |   build:
  4 |     docker:
  5 |       - image: martinthomson/i-d-template:latest
  6 |     resource_class: small
  7 |     working_directory: ~/draft
  8 | 
  9 |     steps:
 10 |       - run:
 11 |           name: "Print Configuration"
 12 |           command: |
 13 |             xml2rfc --version
 14 |             gem list -q kramdown-rfc
 15 |             echo -n 'mmark '; mmark --version
 16 | 
 17 |       - restore_cache:
 18 |           name: "Restoring cache - Git"
 19 |           keys:
 20 |             - v2-cache-git-{{ .Branch }}-{{ .Revision }}
 21 |             - v2-cache-git-{{ .Branch }}
 22 |             - v2-cache-git-
 23 | 
 24 |       - restore_cache:
 25 |           name: "Restoring cache - References"
 26 |           keys:
 27 |             - v1-cache-references-{{ epoch }}
 28 |             - v1-cache-references-
 29 | 
 30 |       # Workaround for https://discuss.circleci.com/t/22437
 31 |       - run:
 32 |           name: Tag Checkout
 33 |           command: |
 34 |             if [ -n "$CIRCLE_TAG" ] && [ -d .git ]; then
 35 |               remote=$(echo "$CIRCLE_REPOSITORY_URL" | \
 36 |                        sed -e 's,/^git.github.com:,https://github.com/,')
 37 |               git fetch -f "$remote" "refs/tags/$CIRCLE_TAG:refs/tags/$CIRCLE_TAG" || \
 38 |                 (echo 'Removing .git cache for tag build'; rm -rf .git)
 39 |             fi
 40 | 
 41 |       - checkout
 42 | 
 43 |       # Build txt and html versions of drafts
 44 |       - run:
 45 |           name: "Build Drafts"
 46 |           command: make
 47 | 
 48 |       # Update editor's copy on gh-pages
 49 |       - run:
 50 |           name: "Update GitHub Pages"
 51 |           command: |
 52 |             if [ "${CIRCLE_TAG#draft-}" == "$CIRCLE_TAG" ]; then
 53 |               make gh-pages
 54 |             fi
 55 | 
 56 |       # For tagged builds, upload to the datatracker.
 57 |       - deploy:
 58 |           name: "Upload to Datatracker"
 59 |           command: |
 60 |             if [ "${CIRCLE_TAG#draft-}" != "$CIRCLE_TAG" ]; then
 61 |               make upload
 62 |             fi
 63 | 
 64 |       # Archive GitHub Issues
 65 |       - run:
 66 |           name: "Archive GitHub Issues"
 67 |           command: "make archive || make archive DISABLE_ARCHIVE_FETCH=true && make gh-archive"
 68 | 
 69 |       # Create and store artifacts
 70 |       - run:
 71 |           name: "Create Artifacts"
 72 |           command: "make artifacts CI_ARTIFACTS=/tmp/artifacts"
 73 | 
 74 |       - store_artifacts:
 75 |           path: /tmp/artifacts
 76 | 
 77 |       - run:
 78 |           name: "Prepare for Caching"
 79 |           command: "git reflog expire --expire=now --all && git gc --prune=now"
 80 | 
 81 |       - save_cache:
 82 |           name: "Saving Cache - Git"
 83 |           key: v2-cache-git-{{ .Branch }}-{{ .Revision }}
 84 |           paths:
 85 |             - ~/draft/.git
 86 | 
 87 |       - save_cache:
 88 |           name: "Saving Cache - Drafts"
 89 |           key: v1-cache-references-{{ epoch }}
 90 |           paths:
 91 |             - ~/.cache/xml2rfc
 92 | 
 93 | 
 94 | workflows:
 95 |   version: 2
 96 |   build:
 97 |     jobs:
 98 |       - build:
 99 |           filters:
100 |             tags:
101 |               only: /.*?/
102 | 


--------------------------------------------------------------------------------
/.github/in-solidarity.yml:
--------------------------------------------------------------------------------
1 | _extends: ietf/terminology
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | *~
 3 | /*-[0-9][0-9].xml
 4 | archive.json
 5 | draft-ietf-quic-load-balancers.xml
 6 | *.html
 7 | issues.json
 8 | *.js
 9 | lib
10 | old-stream-ciphers.md
11 | *.pdf
12 | pulls.json
13 | *.redxml
14 | .refcache
15 | report.xml
16 | *.swp
17 | .tags
18 | .targets.mk
19 | *.txt
20 | *.upload
21 | node_modules/
22 | package-lock.json
23 | venv/
24 | lib
25 | draft-ietf-quic-load-balancers.xml
26 | 


--------------------------------------------------------------------------------
/.note.xml:
--------------------------------------------------------------------------------
1 | <note title="Note to Readers">
2 | <t>Discussion of this document takes place on the
3 |   QUIC Working Group mailing list (quic@ietf.org),
4 |   which is archived at <eref target="https://mailarchive.ietf.org/arch/browse/quic/">https://mailarchive.ietf.org/arch/browse/quic/</eref>.</t>
5 | <t>Source for this draft and an issue tracker can be found at
6 |   <eref target="https://github.com/quicwg/load-balancers">https://github.com/quicwg/load-balancers</eref>.</t>
7 | </note>
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | dist: xenial
 3 | 
 4 | services:
 5 |   - docker
 6 | 
 7 | env:
 8 |   DRAFT_DIR: /home/idci/draft
 9 | 
10 | before_install:
11 |   - docker --version
12 |   - docker pull martinthomson/i-d-template
13 | 
14 | script:
15 |   - docker run -d -v "$PWD:/tmp/draft" --tmpfs "$DRAFT_DIR:rw,exec" --name idci
16 |       martinthomson/i-d-template sleep 300
17 |   - docker exec idci cp -rn /tmp/draft /home/idci
18 |   - docker exec -w "$DRAFT_DIR" -e CI=true -e TRAVIS
19 |       -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST
20 |       idci make CLONE_ARGS='--reference /home/idci/git-reference'
21 |   - docker exec idci ls -l /home/idci/draft/lib
22 |   - if [ "${TRAVIS_TAG#draft-}" == "${TRAVIS_TAG}" ]; then
23 |       docker exec -w "$DRAFT_DIR" -e CI=true -e GH_TOKEN -e TRAVIS
24 |         -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST
25 |         idci make ghpages;
26 |     fi
27 | 
28 | deploy:
29 |   provider: script
30 |   script:
31 |     - docker exec -w "$DRAFT_DIR" -e CI=true -e GH_TOKEN -e TRAVIS
32 |         -e TRAVIS_REPO_SLUG -e TRAVIS_BRANCH -e TRAVIS_TAG -e TRAVIS_PULL_REQUEST
33 |         idci make upload
34 |   skip_cleanup: true
35 |   on:
36 |     tags: true
37 | 
38 | after_script:
39 |   - docker container rm -f idci
40 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | This repository relates to activities in the Internet Engineering Task Force
 4 | ([IETF](https://www.ietf.org/)). All material in this repository is considered
 5 | Contributions to the IETF Standards Process, as defined in the intellectual
 6 | property policies of IETF currently designated as
 7 | [BCP 78](https://www.rfc-editor.org/info/bcp78),
 8 | [BCP 79](https://www.rfc-editor.org/info/bcp79) and the
 9 | [IETF Trust Legal Provisions (TLP) Relating to IETF Documents](http://trustee.ietf.org/trust-legal-provisions.html).
10 | 
11 | Any edit, commit, pull request, issue, comment or other change made to this
12 | repository constitutes Contributions to the IETF Standards Process
13 | (https://www.ietf.org/).
14 | 
15 | You agree to comply with all applicable IETF policies and procedures, including,
16 | BCP 78, 79, the TLP, and the TLP rules regarding code components (e.g. being
17 | subject to a Simplified BSD License) in Contributions.
18 | 
19 | 
20 | ## Other Resources
21 | 
22 | Discussion of this work occurs on the
23 | [quic working group mailing list](https://mailarchive.ietf.org/arch/browse/quic/)
24 | ([subscribe](https://www.ietf.org/mailman/listinfo/quic)).  In addition to
25 | contributions in GitHub, you are encouraged to participate in discussions there.
26 | 
27 | **Note**: Some working groups adopt a policy whereby substantive discussion of
28 | technical issues needs to occur on the mailing list.
29 | 
30 | You might also like to familiarize yourself with other
31 | [working group documents](https://datatracker.ietf.org/wg/quic/documents/).
32 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | See the
4 | [guidelines for contributions](https://github.com/quicwg/load-balancers/blob/master/CONTRIBUTING.md).
5 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | LIBDIR := lib
 2 | include $(LIBDIR)/main.mk
 3 | 
 4 | $(LIBDIR)/main.mk:
 5 | ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null))
 6 | 	git submodule sync
 7 | 	git submodule update $(CLONE_ARGS) --init
 8 | else
 9 | 	git clone -q --depth 10 $(CLONE_ARGS) \
10 | 	    -b main https://github.com/martinthomson/i-d-template $(LIBDIR)
11 | endif
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # QUIC-LB: Generating Routable QUIC Connection IDs
 2 | 
 3 | This is the working area for the IETF [QUIC Working Group](https://datatracker.ietf.org/wg/quic/documents/) Internet-Draft, "QUIC-LB: Generating Routable QUIC Connection IDs".
 4 | 
 5 | * [Editor's Copy](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-load-balancers.html)
 6 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-quic-load-balancers)
 7 | * [Compare Editor's Copy to Working Group Draft](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-load-balancers.diff)
 8 | 
 9 | It also the home for "QUIC Retry Offload".
10 | 
11 | * [Editor's Copy](https://quicwg.github.io/load-balancers/#go.draft-ietf-quic-retry-offload.html)
12 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-quic-retry-offload)
13 | * [Compare Editor's Copy to Working Group Draft](https://quicwg.github.io/load-balancers/#go.draft-duke-quic-retry-offload.diff)
14 | 
15 | ## Building the Draft
16 | 
17 | Formatted text and HTML versions of the draft can be built using `make`.
18 | 
19 | ```sh
20 | $ make
21 | ```
22 | 
23 | This requires that you have the necessary software installed.  See
24 | [the instructions](https://github.com/martinthomson/i-d-template/blob/master/doc/SETUP.md).
25 | 
26 | 
27 | ## Contributing
28 | 
29 | See the
30 | [guidelines for contributions](https://github.com/quicwg/load-balancers/blob/master/CONTRIBUTING.md).
31 | 


--------------------------------------------------------------------------------
/draft-ietf-quic-load-balancers.md:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: "QUIC-LB: Generating Routable QUIC Connection IDs"
   3 | abbrev: QUIC-LB
   4 | docname: draft-ietf-quic-load-balancers-latest
   5 | date: {DATE}
   6 | category: std
   7 | ipr: trust200902
   8 | area: Transport
   9 | workgroup: QUIC
  10 | 
  11 | stand_alone: yes
  12 | pi: [toc, sortrefs, symrefs, docmapping]
  13 | 
  14 | author:
  15 |   -
  16 |     ins: M. Duke
  17 |     name: Martin Duke
  18 |     org: Google
  19 |     email: martin.h.duke@gmail.com
  20 | 
  21 |   -
  22 |     ins: N. Banks
  23 |     name: Nick Banks
  24 |     org: Microsoft
  25 |     email: nibanks@microsoft.com
  26 | 
  27 |   -
  28 |     ins: C. Huitema
  29 |     name: Christian Huitema
  30 |     org: Private Octopus Inc.
  31 |     email: huitema@huitema.net
  32 | 
  33 | normative:
  34 |   NIST-AES-ECB:
  35 |     title: "Recommendation for Block Cipher Modes of Operation: Methods and Techniques"
  36 |     author:
  37 |       - ins: M. Dworkin
  38 |     date: 2021
  39 |     refcontent:
  40 |       - "NIST Special Publication 800-38A"
  41 |     target: "https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf"
  42 | 
  43 | informative:
  44 |     Patarin2008:
  45 |         target: https://eprint.iacr.org/2008/036.pdf
  46 |         title: Generic Attacks on Feistel Schemes - Extended Version
  47 |         author:
  48 |             ins: J. Patarin
  49 |             name: Jacques Patarin
  50 |             org: PRiSM, University of Versailles
  51 |         date: 2008
  52 | 
  53 | --- abstract
  54 | 
  55 | QUIC address migration allows clients to change their IP address while
  56 | maintaining connection state. To reduce the ability of an observer to link two
  57 | IP addresses, clients and servers use new connection IDs when they communicate
  58 | via different client addresses. This poses a problem for traditional "layer-4"
  59 | load balancers that route packets via the IP address and port 4-tuple. This
  60 | specification provides a standardized means of securely encoding routing
  61 | information in the server's connection IDs so that a properly configured load
  62 | balancer can route packets with migrated addresses correctly. As it proposes a
  63 | structured connection ID format, it also provides a means of connection IDs
  64 | self-encoding their length to aid some hardware offloads.
  65 | 
  66 | --- middle
  67 | 
  68 | # Introduction
  69 | 
  70 | QUIC packets {{!RFC9000}} usually contain a connection ID to allow endpoints to
  71 | associate packets with different address/port 4-tuples to the same connection
  72 | context. This feature makes connections robust in the event of NAT rebinding.
  73 | QUIC endpoints usually designate the connection ID which peers use to address
  74 | packets. Server-generated connection IDs create a potential need for out-of-band
  75 | communication to support QUIC.
  76 | 
  77 | QUIC allows servers (or load balancers) to encode useful routing information for
  78 | load balancers in connection IDs.  It also encourages servers, in packets
  79 | protected by cryptography, to provide additional connection IDs to the client.
  80 | This allows clients that know they are going to change IP address or port to use
  81 | a separate connection ID on the new path, thus reducing linkability as clients
  82 | move through the world.
  83 | 
  84 | There is a tension between the requirements to provide routing information and
  85 | mitigate linkability.  Ultimately, because new connection IDs are in protected
  86 | packets, they must be generated at the server if the load balancer does not have
  87 | access to the connection keys. However, it is the load balancer that has the
  88 | context necessary to generate a connection ID that encodes useful routing
  89 | information. In the absence of any shared state between load balancer and
  90 | server, the load balancer must maintain a relatively expensive table of
  91 | server-generated connection IDs, and will not route packets correctly if they
  92 | use a connection ID that was originally communicated in a protected
  93 | NEW_CONNECTION_ID frame.
  94 | 
  95 | This specification provides common algorithms for encoding the server mapping in
  96 | a connection ID given some shared parameters. The mapping is generally only
  97 | discoverable by observers that have the parameters, preserving unlinkability as
  98 | much as possible.
  99 | 
 100 | As this document proposes a structured QUIC Connection ID, it also proposes a
 101 | system for self-encoding connection ID length in all packets, so that crypto
 102 | offload can efficiently obtain key information.
 103 | 
 104 | While this document describes a small set of configuration parameters to make
 105 | the server mapping intelligible, the means of distributing these parameters
 106 | between load balancers, servers, and other trusted intermediaries is out of its
 107 | scope. There are numerous well-known infrastructures for distribution of
 108 | configuration.
 109 | 
 110 | ## Terminology
 111 | 
 112 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 113 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
 114 | interpreted as described in RFC 2119 {{?RFC2119}}.
 115 | 
 116 | In this document, these words will appear with that interpretation only when in
 117 | ALL CAPS.  Lower case uses of these words are not to be interpreted as carrying
 118 | significance described in RFC 2119.
 119 | 
 120 | In this document, "client" and "server" refer to the endpoints of a QUIC
 121 | connection unless otherwise indicated.  A "load balancer" is an intermediary for
 122 | that connection that does not possess QUIC connection keys, but it may rewrite
 123 | IP addresses or conduct other IP or UDP processing. A "configuration agent" is
 124 | the entity that determines the QUIC-LB configuration parameters for the network
 125 | and leverages some system to distribute that configuration.
 126 | 
 127 | Note that stateful load balancers that act as proxies, by terminating a QUIC
 128 | connection with the client and then retrieving data from the server using QUIC
 129 | or another protocol, are treated as a server with respect to this specification.
 130 | 
 131 | For brevity, "Connection ID" will often be abbreviated as "CID".
 132 | 
 133 | ## Notation
 134 | 
 135 | All wire formats will be depicted using the notation defined in Section 1.3 of
 136 | {{RFC9000}}.
 137 | 
 138 | # Overview
 139 | 
 140 | In QUIC-LB, load balancers do not generate individual connection IDs for
 141 | servers.  Instead, they communicate the parameters of an algorithm to generate
 142 | routable connection IDs.
 143 | 
 144 | The algorithms differ in the complexity of configuration at both load balancer
 145 | and server. Increasing complexity improves obfuscation of the server mapping.
 146 | 
 147 | This specificationn describes three participants: the configuration agent, the
 148 | load balancer, and the server. For any given QUIC-LB configuration that enables
 149 | connection-ID-aware load balancing, there must be a choice of (1) routing
 150 | algorithm, (2) server ID allocation strategy, and (3) algorithm parameters.
 151 | 
 152 | Fundamentally, servers generate connection IDs that encode their server ID.
 153 | Load balancers decode the server ID from the CID in incoming packets to route
 154 | to the correct server.
 155 | 
 156 | {{!RFC8999}} specifies that endpoints generate their own connection IDs,
 157 | implying that all QUIC versions will have a mechanism to communicate their
 158 | connection IDs to the peer. In QUIC version 1 and 2, the server does so using
 159 | the Source Connection ID field of its long header packets for the first
 160 | connection ID, and NEW_CONNECTION_ID frames for subsequent CIDs.
 161 | 
 162 | There are situations where a server pool might be operating two or more routing
 163 | algorithms or parameter sets simultaneously.  The load balancer uses the first
 164 | three bits of the connection ID to multiplex incoming Destination Connection IDs
 165 | (DCIDs) over these schemes (see {{config-rotation}}).
 166 | 
 167 | # First CID octet {#first-octet}
 168 | 
 169 | The Connection ID construction schemes defined in this document reserve the
 170 | first octet of a CID for two special purposes: one mandatory (config rotation)
 171 | and one optional (length self-description).
 172 | 
 173 | Subsequent sections of this document refer to the contents of this octet as the
 174 | "first octet."
 175 | 
 176 | ## Config Rotation {#config-rotation}
 177 | 
 178 | The first three bits of any connection ID MUST encode an identifier for the
 179 | configuration that the connection ID uses. This enables incremental deployment
 180 | of new QUIC-LB settings (e.g., keys). A configuration MUST NOT use the
 181 | reserved identifier 0b111 (see {{config-failover}} below).
 182 | 
 183 | When new configuration is distributed to servers, there will be a transition
 184 | period when connection IDs reflecting old and new configuration coexist in the
 185 | network.  The rotation bits allow load balancers to apply the correct routing
 186 | algorithm and parameters to incoming packets.
 187 | 
 188 | Configuration Agents SHOULD deliver new configurations to load balancers before
 189 | doing so to servers, so that load balancers are ready to process CIDs using the
 190 | new parameters when they arrive.
 191 | 
 192 | A Configuration Agent SHOULD NOT use a codepoint to represent a new
 193 | configuration until it takes precautions to make sure that all connections using
 194 | CIDs with an old configuration at that codepoint have closed or transitioned.
 195 | 
 196 | Servers MUST NOT generate new connection IDs using an old configuration after
 197 | receiving a new one from the configuration agent. Servers MUST use that QUIC
 198 | version's methods to update the client with CIDs (e.g., NEW_CONNECTION_ID
 199 | frames) using the new configuration and retire CIDs using the old configuration.
 200 | 
 201 | It also possible to use these bits for more long-lived distinction of different
 202 | configurations, but this has privacy implications (see {{multiple-configs}}).
 203 | 
 204 | ## Configuration Failover {#config-failover}
 205 | 
 206 | In some deployments, an infrastructure will not receive traffic unless all
 207 | servers have received a configuration, and load balancers have a superset of all
 208 | configurations that are active in the server pool, thus guaranteeing that any
 209 | CID generated by a server is decodable by any load balancer. Servers and load
 210 | balancers deployed under all of these assumptions can ignore the provisions in
 211 | this subsection.
 212 | 
 213 | Load balancers treat connection IDs for which they have no corresponding config
 214 | ID as unroutable (see {{unroutable}}). If they have no configuration at all,
 215 | then all connection IDs are unroutable.
 216 | 
 217 | Servers with no active configuration MUST issue connection IDs with the reserved
 218 | value of the three most significant bits set to 0b111 to signify the connection
 219 | ID is unroutable. These connection IDs MUST self-encode their length (see
 220 | {{length-self-description}}).
 221 | 
 222 | Servers with no active configuration SHOULD provide the client exactly one CID
 223 | over the life of the connection. In QUIC versions 1 and 2, therefore, servers
 224 | SHOULD NOT send any NEW_CONNECTION_ID frames, instead delivering a single CID
 225 | via the Source Connection ID of long headers it sends.
 226 | 
 227 | Servers with no active configuration SHOULD send the "disable_active_migration"
 228 | transport parameter, or a similar message in future QUIC versions.
 229 | 
 230 | When using codepoint 0b111, all bytes but the first SHOULD have no larger of a
 231 | chance of collision as random bytes. The connection ID SHOULD be of at least
 232 | length 8 to provide 7 bytes of entropy after the first octet with a low chance
 233 | of collision.
 234 | 
 235 | ## Length Self-Description {#length-self-description}
 236 | 
 237 | Local hardware cryptographic offload devices may accelerate QUIC servers by
 238 | receiving keys from the QUIC implementation indexed to the connection ID.
 239 | However, on physical devices operating multiple QUIC servers, it might be
 240 | impractical to efficiently lookup keys if the connection ID varies in length and
 241 | does not self-encode its own length.
 242 | 
 243 | Note that this is a function of particular server devices and is irrelevant to
 244 | load balancers. As such, load balancers MAY omit this from their configuration.
 245 | However, the remaining 5 bits in the first octet of the Connection ID are
 246 | reserved to express the length of the following connection ID, not including
 247 | the first octet.
 248 | 
 249 | A server not using this functionality SHOULD choose the five bits so as to have
 250 | no observable relationship to previous connection IDs issued for that
 251 | connection.
 252 | 
 253 | ## Format
 254 | 
 255 | ~~~
 256 | First Octet {
 257 |   Config Rotation (3),
 258 |   CID Len or Random Bits (5),
 259 | }
 260 | ~~~
 261 | {: #first-octet-format title="First Octet Format"}
 262 | 
 263 | The first octet has the following fields:
 264 | 
 265 | Config Rotation: Indicates the configuration used to interpret the CID.
 266 | 
 267 | CID Len or Random Bits: Length Self-Description (if applicable), or random bits
 268 | otherwise. Encodes the length of the Connection ID following the First Octet.
 269 | 
 270 | # Unroutable Connection IDs {#unroutable}
 271 | 
 272 | ## Definition
 273 | 
 274 | QUIC-LB servers with a valid configuration will generate Connection IDs that are
 275 | decodable to extract a server ID in accordance with a specified algorithm and
 276 | parameters.  However, QUIC often uses client-generated Connection IDs prior to
 277 | receiving a packet from the server.
 278 | 
 279 | Furthermore, servers without a valid configuration, or a configuration not
 280 | present at the load balancer, will also generate connection IDs that are not
 281 | decodable, and these CIDs are likely to persist for the duration of the
 282 | connection.
 283 | 
 284 | These CIDs might not conform to the expectations of the routing algorithm and
 285 | therefore not be routable by the load balancer. Those that are not routable are
 286 | "unroutable DCIDs" and receive similar treatment regardless of why they're
 287 |  unroutable:
 288 | 
 289 | * The config rotation bits ({{config-rotation}}) do not correspond to an active
 290 | configuration. Note: a packet with a DCID with config ID codepoint 0b111 (see
 291 | {{config-failover}}) is always unroutable.
 292 | * If the packet header encodes the DCID length, the DCID is not long enough for
 293 | the decoder to process.
 294 | * The extracted server mapping does not correspond to an active server.
 295 | 
 296 | If the load balancer has knowledge that all servers in the pool are encoding
 297 | CID length in the first octet (see {{length-self-description}}), it MAY
 298 | perform additional checks based on that self-encoded length:
 299 | 
 300 | * In a long header, verify that the self-encoded length is consistent with the
 301 | CID length field in the header (i.e. the self-encoded length is one less)
 302 | * Verify that the self-encoded length is consistent with the QUIC version, if
 303 | known.
 304 | * Verify that the self-encoded length is large enough for the decoder to process
 305 | using the indicated config ID.
 306 | 
 307 | DCIDs that do not meet any of these criteria are routable.
 308 | 
 309 | ## Load Balancer Forwarding {#load-balancer-forwarding}
 310 | 
 311 | Load balancers execute the following steps in order until one results in a
 312 | routing decision. The steps refer to state that some load balancers will
 313 | maintain, depending on the deployment's underlying assumptions. See
 314 | {{fallback-algorithm}} for further discussion of this state.
 315 | 
 316 | 1. If the packet contains a routable CID, route the packet accordingly.
 317 | 1. If the packet has a long header and matches an entry in a table of routing
 318 | decisions indexed by a concatenation of 4-tuple and Source CID, route the packet
 319 | accordingly.
 320 | 1. If the packet matches an entry in a table of routing decisions by destination
 321 | CID, route the packet accordingly.
 322 | 1. If packet matches an entry in a table of routing decisions by 4-tuple, route
 323 | the packet accordingly.
 324 | 1. Use the fallback algorithm to make a routing decision and, if applicable,
 325 | record the results in the tables indexed by 4-tuple and/or CID. In some cases,
 326 | described below, the load balancer might buffer the packet to defer a decision.
 327 | 
 328 | ## Fallback Algorithms {#fallback-algorithm}
 329 | 
 330 | There are conditions described above where a load balancer routes a packet using
 331 | a "fallback algorithm." A standardized algorithm design is not necessary for
 332 | interoperability, so load balancers can implement any algorithm that meets the
 333 | relevant requirements below.
 334 | 
 335 | There is a baseline case that has relatively simple requirements of the chosen
 336 | fallback algorithm, and an advanced case with more capabilities and more complex
 337 | requirements.
 338 | 
 339 | ### Baseline Fallback Algorithm
 340 | 
 341 | All load balancers MUST implement a baseline fallback algorithm that takes only
 342 | the 4-tuple as an input and outputs a routing decision.
 343 | 
 344 | If it is impossible for the server to generate CIDs that the load balancer
 345 | cannot decode (see {{config-failover}}), there are no further requirements in
 346 | this subsection.
 347 | 
 348 | Otherwise, the load balancer SHOULD maintain a table of 4-tuples that carried
 349 | unroutable DCIDs and the resulting routing decision. Provided the table does
 350 | not overflow, and the load balancer does not lose state, this allows connections
 351 | to survive when the server pool changes, which would sometimes change the output
 352 | of the fallback algorithm.
 353 | 
 354 | The load balancer MAY maintain a table of observed unroutable DCIDs and the
 355 | resulting routing decision. Provided the table does not overflow, these
 356 | connections will be robust to NAT rebinding.
 357 | 
 358 | Load balancers SHOULD maintain per-flow timers to periodically purge state in
 359 | the tables described above.
 360 | 
 361 | ### Advanced Fallback Algorithm
 362 | 
 363 | Some architectures might require a load balancer to choose a server pool based
 364 | on deep packet inspection of a client packet. For example, it may use the TLS
 365 | 1.3 Server Name Indication (SNI) ({{?RFC6066}}) field. The advanced fallback
 366 | algorithm enables this capability but levies several additional requirements to
 367 | make consistent routing decisions.
 368 | 
 369 | For packets not known to belong to a QUIC version the load balancer can parse,
 370 | load balancers MUST use the baseline fallback algorithm if the DCID is
 371 | unroutable.
 372 | 
 373 | For known QUIC versions, the fallback algorithm MAY parse packets and use that
 374 | information to make a routing decision.
 375 | 
 376 | If so, it MUST have the ability to buffer packets with unroutable DCIDs to await
 377 | further packets that allow it to make a routing decision, as the fields of
 378 | interest can be an arbitary number of packets into the connection.
 379 | 
 380 | 4-tuple routing is not sufficient for this use case, because a client can use
 381 | the same 4-tuple for two connections that should be routed differently (e.g.
 382 | because they target different SNIs), as long as the packet contains a source
 383 | connection ID of nonzero length.
 384 | 
 385 | Therefore, the load balancer SHOULD maintain two tables that map different
 386 | values to a routing decision:
 387 | 
 388 | - a table indexed by a concatenation of the 4-tuple and source CID, which might
 389 | be zero-length, to route subsequent long header packets that do not contain the
 390 | server-generated connection ID;
 391 | 
 392 | - a table indexed by destination CID, if and only if it is possible for the
 393 | server to generate unroutable CIDs. This table can be shared with the one in use
 394 | for the baseline fallback algorithm.
 395 | 
 396 | If either table overflows, or if the load balancer loses state, it is likely the
 397 | load balancer will misroute packets.
 398 | 
 399 | Load balancers SHOULD maintain per-flow timers to periodically purge state in
 400 | the tables described above.
 401 | 
 402 | # Server ID Encoding in Connection IDs
 403 | 
 404 | ## Server ID Allocation {#sid-allocation}
 405 | 
 406 | Load Balancer configurations include a mapping of server IDs to forwarding
 407 | addresses. The corresponding server configurations contain one or
 408 | more unique server IDs.
 409 | 
 410 | The configuration agent chooses a server ID length for each configuration that
 411 | MUST be at least one octet.
 412 | 
 413 | A QUIC-LB configuration MAY significantly over-provision the server ID space
 414 | (i.e., provide far more codepoints than there are servers) to increase the
 415 | probability that a randomly generated Destination Connection ID is unroutable.
 416 | 
 417 | The configuration agent SHOULD provide a means for servers to express the
 418 | number of server IDs it can usefully employ, because a single routing address
 419 | actually corresponds to multiple server entities (see {{lb-chains}}).
 420 | 
 421 | Conceptually, each configuration has its own set of server ID allocations,
 422 | though two static configurations with identical server ID lengths MAY use a
 423 | common allocation between them.
 424 | 
 425 | A server encodes one of its assigned server IDs in any CID it generates using
 426 | the relevant configuration.
 427 | 
 428 | ## CID format
 429 | 
 430 | All connection IDs use the following format:
 431 | 
 432 | ~~~
 433 | QUIC-LB Connection ID {
 434 |     First Octet (8),
 435 |     Plaintext Block (40..152),
 436 | }
 437 | Plaintext Block {
 438 |     Server ID (8..),
 439 |     Nonce (32..),
 440 | }
 441 | ~~~
 442 | {: #plaintext-cid-format title="CID Format"}
 443 | 
 444 | The First Octet field serves one or two purposes, as defined in {{first-octet}}.
 445 | 
 446 | The Server ID field encodes the information necessary for the load balancer to
 447 | route a packet with that connection ID. It is often encrypted.
 448 | 
 449 | The server uses the Nonce field to make sure that each connection ID it
 450 | generates is unique, even though they all use the same Server ID.
 451 | 
 452 | ## Configuration Agent Actions
 453 | 
 454 | The configuration agent assigns a server ID to every server in its pool in
 455 | accordance with {{sid-allocation}}, and determines a server ID length (in
 456 | octets) sufficiently large to encode all server IDs, including potential future
 457 | servers.
 458 | 
 459 | Each configuration specifies the length of the Server ID and Nonce fields, with
 460 | limits defined for each algorithm.
 461 | 
 462 | Optionally, it also defines a 16-octet key. Note that failure to define a key
 463 | means that observers can determine the assigned server of any connection,
 464 | significantly increasing the linkability of QUIC address migration.
 465 | 
 466 | The nonce length MUST be at least 4 octets. The server ID length MUST be at
 467 | least 1 octet.
 468 | 
 469 | As QUIC version 1 limits connection IDs to 20 octets, the server ID and nonce
 470 | lengths MUST sum to 19 octets or less.
 471 | 
 472 | ## Server Actions
 473 | 
 474 | The server writes the first octet and its server ID into their respective
 475 | fields.
 476 | 
 477 | If there is no key in the configuration, the server MUST fill the Nonce field
 478 | with bytes that have no observable relationship to the field in previously
 479 | issued connection IDs. If there is a key, the server fills the nonce field with
 480 | a nonce of its choosing. See {{cid-entropy}} for details.
 481 | 
 482 | The server MAY append additional bytes to the connection ID, up to the limit
 483 | specified in that version of QUIC, for its own use. These bytes MUST NOT
 484 | provide observers with any information that could link two connection IDs to
 485 | the same connection, client, or server. In particular, all servers using a
 486 | configuration MUST consistently add the same length to each connection ID,
 487 | to preserve the linkability objectives of QUIC-LB. Any additional bytes SHOULD
 488 | NOT provide any observable correlation to previous connection IDs for that
 489 | connection (e.g., the bytes can be chosen at random).
 490 | 
 491 | If there is no key in the configuration, the Connection ID is complete.
 492 | Otherwise, there are further steps, as described in the two following
 493 | subsections.
 494 | 
 495 | Encryption below uses the AES-128-ECB cipher {{NIST-AES-ECB}}. Future standards
 496 | could add new algorithms that use other ciphers to provide cryptographic agility
 497 | in accordance with {{?RFC7696}}. QUIC-LB implementations SHOULD be extensible to
 498 | support new algorithms.
 499 | 
 500 | ### Special Case: Single Pass Encryption
 501 | 
 502 | When the nonce length and server ID length sum to exactly 16 octets, the server
 503 | MUST use a single-pass encryption algorithm. All connection ID octets except the
 504 | first form an AES-ECB block. This block is encrypted once, and the result forms
 505 | the second through seventeenth most significant bytes of the connection ID.
 506 | 
 507 | ### General Case: Four-Pass Encryption
 508 | 
 509 | Any other field length requires four passes for encryption and at least three
 510 | for decryption. To understand this algorithm, it is useful to define four
 511 | functions that minimize the amount of bit-shifting necessary in the event that
 512 | there are an odd number of octets.
 513 | 
 514 | When configured with both a key, and a nonce length and server ID length that
 515 | sum to any number other than 16, the server MUST follow the algorith below to
 516 | encrypt the connection ID.
 517 | 
 518 | #### Overview
 519 | 
 520 | The 4-pass algorithm is a four-round Feistel Network with the round function
 521 | being AES-ECB. Most modern applications of Feistel Networks have more than four
 522 | rounds. The implications of this choice, which is meant to limit the per-packet
 523 | compute overhead at load balancers, are discussed in
 524 | {{distinguishing-attacks}}.
 525 | 
 526 | The server concatenates the server ID and nonce into a single field, which is
 527 | then split into equal halves. In successive passes, one of these halves is
 528 | expanded into a 16B plaintext, encrypted with AES-ECB, and the result XORed with
 529 | the other half. The diagram below shows the conceptual processing of a plaintext
 530 | server ID and nonce into a connection ID. 'FO' stands for 'First Octet'.
 531 | 
 532 | ~~~ aasvg
 533 |    +-----+-----------+-----------------------+
 534 |    | FO  | Server ID |         Nonce         |
 535 |    +--+--+-----------+-----+-----------------+
 536 |       |                    |
 537 |       |                    V
 538 |       |  +-----------------+-----------------+
 539 |       |  |      left_0     |      right_0    |
 540 |       |  +--+--------------+--------------+--+
 541 |       |     |                             |
 542 |       |     |                             |
 543 |       |     |         .--------.          V
 544 |       |     +-------->| AES-ECB +-------->⊕
 545 |       |     |         '--------'          |
 546 |       |     V             .--------.      | right_1
 547 |       |     ⊕<-----------+ AES-ECB |<-----+
 548 |       |     |             '--------'      |
 549 |       |     | left_1  .--------.          V
 550 |       |     +-------->| AES-ECB +-------->⊕
 551 |       |     |         '--------'          |
 552 |       |     V             .--------.      |
 553 |       |     ⊕<-----------+ AES-ECB |<-----+
 554 |       |     |             '--------'      |
 555 |       |     |                             |
 556 |       |     V                             V
 557 |       |  +-----------------+-----------------+
 558 |       |  |      left_2     |      right_2    |
 559 |       |  +-------+---------+--------+--------+
 560 |       |          |                  |
 561 |       V          V                  V
 562 |    +-----+-----------------------------------+
 563 |    | FO  |            Ciphertext             |
 564 |    +-----+-----------------------------------+
 565 | ~~~
 566 | 
 567 | #### Useful functions
 568 | 
 569 | Two functions are useful to define:
 570 | 
 571 | The expand(length, pass, input_bytes) function concatenates three arguments and
 572 | outputs 16 zero-padded octets.
 573 | 
 574 | The output of expand is as follows:
 575 | 
 576 | ~~~pseudocode
 577 | ExpandResult {
 578 |      input_bytes(...),
 579 |      ZeroPad(...),
 580 |      length(8),
 581 |      pass(8)
 582 | }
 583 | ~~~
 584 | 
 585 | in which:
 586 | 
 587 | * 'input_bytes' is drawn from one half of the plaintext. It forms the N most
 588 | significant octets of the output, where N is half the 'length' argument, rounded
 589 | up, and thus a number between 3 and 10, inclusive.
 590 | 
 591 | * 'Zeropad' is a set of 14-N octets set to zero.
 592 | 
 593 | * 'length' is an 8-bit integer that reports the sum of the configured nonce
 594 | length and server id length in octets, and forms the fifteenth octet of the
 595 | output. The 'length' argument MUST NOT exceed 19 and MUST NOT be less than 5.
 596 | 
 597 | * 'pass' is an 8-bit integer that reports the 'pass' argument of the algorithm,
 598 | and forms the sixteenth (least significant) octet of the output. It guarantees
 599 | that the cryptographic input of every pass of the algorithm is unique.
 600 | 
 601 | For example,
 602 | 
 603 | ~~~pseudocode
 604 | expand(0x06, 0x02, 0xaaba3c) = 0xaaba3c00000000000000000000000602
 605 | ~~~
 606 | 
 607 | Similarly, truncate(input, n) returns the first n octets of 'input'.
 608 | 
 609 | ~~~pseudocode
 610 | truncate(0x2094842ca49256198c2deaa0ba53caa0, 4) = 0x2094842c
 611 | ~~~
 612 | 
 613 | Let 'half_len' be equal to 'plaintext_len' / 2, rounded up.
 614 | 
 615 | #### Algorithm Description
 616 | 
 617 | The example at the end of this section helps to clarify the steps described
 618 | below.
 619 | 
 620 | 1. The server concatenates the server ID and nonce to create plaintext_CID. The
 621 | length of the result in octets is plaintext_len.
 622 | 
 623 | 2. The server splits plaintext_CID into components left_0 and right_0 of equal
 624 | length half_len. If plaintext_len is odd, right_0 clears its first four bits,
 625 | and left_0 clears its last four bits. For example, 0x7040b81b55ccf3 would split
 626 | into a left_0 of 0x7040b810 and right_0 of 0x0b55ccf3.
 627 | 
 628 | 3. Encrypt the result of expand(plaintext_len, 1, left_0) using an AES-ECB-128
 629 | cipher to obtain a ciphertext.
 630 | 
 631 | 4. XOR the first half_len octets of the ciphertext with right_0 to form right_1.
 632 | Steps 3 and 4 can be summarized as
 633 | 
 634 | ~~~psuedocode
 635 |     result = AES_ECB(key, expand(plaintext_len, 1, left_0))
 636 |     right_1 = XOR(right_0, truncate(result, half_len))
 637 | ~~~
 638 | 
 639 | {:start="5"}
 640 | 5. If the plaintext_len is odd, clear the first four bits of right_1.
 641 | 
 642 | 6. Repeat steps 3 and 4, but use them to compute left_1 by expanding and
 643 | encrypting right_1 with pass = 2, and XOR the results with left_0.
 644 | 
 645 | ~~~psuedocode
 646 |     result = AES_ECB(key, expand(plaintext_len, 2, right_1))
 647 |     left_1 = XOR(left_0, truncate(result, half_len))
 648 | ~~~
 649 | 
 650 | {:start="7"}
 651 | 7. If the plaintext_len is odd, clear the last four bits of left_1.
 652 | 
 653 | 8. Repeat steps 3 and 4, but use them to compute right_2 by expanding and
 654 | encrypting left_1 with pass = 3, and XOR the results with right_1.
 655 | 
 656 | ~~~pseudocode
 657 |     result = AES_ECB(key, expand(plaintext_len, 3, left_1))
 658 |     right_2 = XOR(right_1, truncate(result, half_len))
 659 | ~~~
 660 | 
 661 | {:start="9"}
 662 | 9. If the plaintext_len is odd, clear the first four bits of right_2.
 663 | 
 664 | 10. Repeat steps 3 and 4, but use them to compute left_2 by expanding and
 665 | encrypting right_2 with pass = 4, and XOR the results with left_1.
 666 | 
 667 | ~~~psuedocode
 668 |     result = AES_ECB(key, expand(plaintext_len, 4, right_2))
 669 |     left_2 = XOR(left_1, truncate(result, half_len))
 670 | ~~~
 671 | 
 672 | {:start="11"}
 673 | 11. If the plaintext_len is odd, clear the last four bits of left_2.
 674 | 
 675 | 12. The server concatenates left_2 with right_2 to form the ciphertext CID,
 676 | which it appends to the first octet. If plaintext_len is odd, the four
 677 | least significant bits of left_2 and four most significant bits of right_2,
 678 | which are all zero, are stripped off before concatenation to make the
 679 | resulting ciphertext the same length as the original plaintext.
 680 | 
 681 | #### Encryption Example
 682 | 
 683 | The following example executes the steps for the provided inputs. Note that the
 684 | plaintext is of odd octet length, so the middle octet will be split evenly
 685 | left_0 and right_0.
 686 | 
 687 | ~~~pseudocode
 688 | server_id = 0x31441a
 689 | nonce = 0x9c69c275
 690 | key = 0xfdf726a9893ec05c0632d3956680baf0
 691 | 
 692 | // step 1
 693 | plaintext_CID = 0x31441a9c69c275
 694 | plaintext_len = 7
 695 | 
 696 | // step 2
 697 | hash_len = 4
 698 | left_0 = 0x31441a90
 699 | right_0 = 0x0c69c275
 700 | 
 701 | // step 3
 702 | aes_input = 0x31441a90000000000000000000000701
 703 | aes_output = 0xa255dd8cdacf01948d3a848c3c7fee23
 704 | 
 705 | // step 4
 706 | right_1 = 0x0c69c275 ^ 0xa255dd8c = 0xae3c1ff9
 707 | 
 708 | // step 5 (clear bits)
 709 | right_1 = 0x0e3c1ff9
 710 | 
 711 | // step 6
 712 | aes_input = 0x0e3c1ff9000000000000000000000702
 713 | aes_output = 0xe5e452cb9e1bedb0b2bf830506bf4c4e
 714 | left_1 = 0x31441a90 ^ 0xe5e452cb = 0xd4a0485b
 715 | 
 716 | // step 7 (clear bits)
 717 | left_1 = 0xd4a04850
 718 | 
 719 | // step 8
 720 | aes_input = 0xd4a04850000000000000000000000703
 721 | aes_output = 0xb7821ab3024fed0913b6a04d18e3216f
 722 | right_2 = 0x0e3c1ff9 ^ 0xb7821ab3 = 0xb9be054a
 723 | 
 724 | // step 9 (clear bits)
 725 | right_2 = 0x09be054a
 726 | 
 727 | // step 10
 728 | aes_input = 0x09be054a000000000000000000000704
 729 | aes_output = 0xb334357cfdf81e3fafe180154eaf7378
 730 | left_2 = 0xd4a04850 ^ 0xb3e4357c = 0x67947d2c
 731 | 
 732 | // step 11 (clear bits)
 733 | left_2 = 0x67947d20
 734 | 
 735 | // step 12
 736 | cid = first_octet || left_2 || right_2 = 0x0767947d29be054a
 737 | ~~~
 738 | 
 739 | ## Load Balancer Actions
 740 | 
 741 | On each incoming packet, the load balancer extracts consecutive octets,
 742 | beginning with the second octet. If there is no key, the first octets
 743 | correspond to the server ID.
 744 | 
 745 | If there is a key, the load balancer takes one of two actions:
 746 | 
 747 | ### Special Case: Single Pass Encryption
 748 | 
 749 | If server ID length and nonce length sum to exactly 16 octets, they form a
 750 | ciphertext block. The load balancer decrypts the block using the AES-ECB key
 751 | and extracts the server ID from the most significant bytes of the resulting
 752 | plaintext.
 753 | 
 754 | ### General Case: Four-Pass Encryption
 755 | 
 756 | First, split the ciphertext CID (excluding the first octet) into its equal-
 757 | length components left_2 and right_2. Then follow the process below:
 758 | 
 759 | ~~~pseudocode
 760 |     result = AES_ECB(key, expand(plaintext_len, 4, right_2))
 761 |     left_1 = XOR(left_2, truncate(result, half_len))
 762 |     if (plaintext_len_is_odd()) clear_last_bits(left_1, 4)
 763 | 
 764 |     result = AES_ECB(key, expand(plaintext_len, 3, left_1))
 765 |     right_1 = XOR(right_2, truncate(result, half_len))
 766 |     if (plaintext_len_is_odd()) clear_first_bits(left_1, 4)
 767 | 
 768 |     result = AES_ECB(key, expand(plaintext_len, 2, right_1))
 769 |     left_0 = XOR(left_1, truncate(result, half_len))
 770 |     if (plaintext_len_is_odd()) clear_last_bits(left_0, 4)
 771 | ~~~
 772 | 
 773 | As the load balancer has no need for the nonce, it can conclude after 3 passes
 774 | as long as the server ID is entirely contained in left_0 (i.e., the nonce is at
 775 | least as large as the server ID). If the server ID is longer, a fourth pass
 776 | is necessary:
 777 | 
 778 | ~~~pseudocode
 779 |     result = AES_ECB(key, expand(plaintext_len, 1, left_0))
 780 |     right_0 = XOR(right_1, truncate(result, half_len))
 781 |     if (plaintext_len_is_odd()) clear_first_bits(right_0, 4)
 782 | ~~~
 783 | 
 784 | and the load balancer has to concatenate left_0 and right_0 to obtain the
 785 | complete server ID.
 786 | 
 787 | # Per-connection state {#per-connection-state}
 788 | 
 789 | The CID allocation methods QUIC-LB defines no per-connection state at
 790 | the load balancer, with a few conditional exceptions described in
 791 | {{unroutable}}. Otherwise, the load balancer can extract the server ID from
 792 | the connection ID of each incoming packet and route that packet accordingly.
 793 | 
 794 | However, once a routing decision has been made, the load balancer MAY
 795 | associate the 4-tuple or connection ID with the decision. This has two
 796 | advantages:
 797 | 
 798 | * The load balancer only extracts the server ID once until the 4-tuple or
 799 | connection ID changes. When the CID is encrypted, this might reduce
 800 | computational load.
 801 | 
 802 | * Incoming Stateless Reset packets and ICMP messages are easily routed to the
 803 | correct origin server.
 804 | 
 805 | In addition to the increased state requirements, however, load balancers cannot
 806 | detect the packets that indicate the end of the connection, so they rely on a
 807 | timeout to delete connection state. There are numerous considerations around
 808 | setting such a timeout.
 809 | 
 810 | In the event a connection ends, freeing an IP and port, and a different
 811 | connection migrates to that IP and port before the timeout, the load balancer
 812 | will misroute the different connection's packets to the original server. A short
 813 | timeout limits the likelihood of such a misrouting.
 814 | 
 815 | Furthermore, if a short timeout causes premature deletion of state, the routing
 816 | is easily recoverable by decoding an incoming Connection ID. However, a short
 817 | timeout also reduces the chance that an incoming Stateless Reset is correctly
 818 | routed.
 819 | 
 820 | Note that some heuristics to purge state early can introduce Denial of Service
 821 | vulnerabilities. For example, one heuristic might delete flow state once the
 822 | load balancer observes a routable CID on that flow. An attacker that can observe
 823 | a target flow can store a routable CID from a previous connection and spoof the
 824 | target flow's 4-tuple with the routable CID, causing premature deletion of that
 825 | state.
 826 | 
 827 | Servers MAY implement the technique described in {{Section 14.4.1 of RFC9000}}
 828 | in case the load balancer is stateless, to increase the likelihood a Source
 829 | Connection ID is included in ICMP responses to Path Maximum Transmission Unit
 830 | (PMTU) probes.  Load balancers MAY parse the echoed packet to extract the Source
 831 | Connection ID, if it contains a QUIC long header, and extract the Server ID as
 832 | if it were in a Destination CID.
 833 | 
 834 | # Additional Use Cases
 835 | 
 836 | This section discusses considerations for some deployment scenarios not implied
 837 | by the specification above.
 838 | 
 839 | ## Load balancer chains {#lb-chains}
 840 | 
 841 | Some network architectures may have multiple tiers of low-state load balancers,
 842 | where a first tier of devices makes a routing decision to the next tier, and so
 843 | on, until packets reach the server. Although QUIC-LB is not explicitly designed
 844 | for this use case, it is possible to support it.
 845 | 
 846 | If each load balancer is assigned a range of server IDs that is a subset of the
 847 | range of IDs assigned to devices that are closer to the client, then the first
 848 | devices to process an incoming packet can extract the server ID and then map it
 849 | to the correct forwarding address. Note that this solution is extensible to
 850 | arbitrarily large numbers of load-balancing tiers, as the maximum server ID
 851 | space is quite large.
 852 | 
 853 | If the number of necessary server IDs per next hop is uniform, a simple
 854 | implementation would use successively longer server IDs at each tier of load
 855 | balancing, and the server configuration would match the last tier. Load
 856 | balancers closer to the client can then treat any parts of the server ID they
 857 | did not use as part of the nonce.
 858 | 
 859 | ## Server Process Demultiplexing
 860 | 
 861 | QUIC servers might have QUIC running on multiple processes or threads listening
 862 | on the same address, and have a need to demultiplex between them. In principle,
 863 | this demultiplexer is a Layer 4 load balancer, and the guidance in {{lb-chains}}
 864 | applies. However, in many deployments the demultiplexer lacks the capability to
 865 | perform decryption operations. Internal server coordination is out of scope of
 866 | this specification, but this non-normative section proposes some approaches
 867 | that could work given certain server capabilities:
 868 | 
 869 | * Some bytes of the server ID are reserved to encode the process ID. The
 870 | demultiplexer might operate based on the 4-tuple or other legacy indicator, but
 871 | the receiving server process extracts the server ID, and if it does not match
 872 | the one for that process, the process could "toss" the packet to the correct
 873 | destination process.
 874 | 
 875 | * Each process could register the connection IDs it generates with the
 876 | demultiplexer, which routes those connection IDs accordingly.
 877 | 
 878 | * In a combination of the two approaches above, the demultiplexer generally
 879 | routes by 4-tuple. After a migration, the process tosses the first flight of
 880 | packets and registers the new connection ID with the demultiplexer. This
 881 | alternative limits the bandwidth consumption of tossing and the memory footprint
 882 | of a full connection ID table.
 883 | 
 884 | * When generating a connection ID, the server writes the process ID to the
 885 | random field of the first octet, or if this is being used for length encoding,
 886 | in an octet it appends after the ciphertext. It then applies a keyed hash (with
 887 | a key locally generated for the sole use of that server). The hash result is
 888 | used as a bitmask to XOR with the bits encoding the process ID. On packet
 889 | receipt, the demultiplexer applies the same keyed hash to generate the same
 890 | mask and recoversthe process ID. (Note that this approach is conceptually
 891 | similar to QUIC header protection). It is important that the server also appends
 892 | the process ID to the server ID in the plaintext, so that different processes do
 893 | not generate the same ciphertext. The load balancer will consider this data to
 894 | be part of the nonce.
 895 | 
 896 | ## Moving connections between servers
 897 | 
 898 | Some deployments may transparently move a connection from one server to another.
 899 | The means of transferring connection state between servers is out of scope of
 900 | this document.
 901 | 
 902 | To support a handover, a server involved in the transition could issue CIDs that
 903 | map to the new server via a NEW_CONNECTION_ID frame, and retire CIDs associated
 904 | with the old server using the "Retire Prior To" field in that frame.
 905 | 
 906 | # Version Invariance of QUIC-LB {#version-invariance}
 907 | 
 908 | The server ID encodings, and requirements for their handling, are designed to be
 909 | QUIC version independent (see {{?RFC8999}}). A QUIC-LB load balancer will
 910 | generally not require changes as servers deploy new versions of QUIC. However,
 911 | there are several unlikely future design decisions that could impact the
 912 | operation of QUIC-LB.
 913 | 
 914 | A QUIC version might define limits on connection ID length that make some or all
 915 | of the mechanisms in this document unusable.  For example, a maximum connection
 916 | ID length could be below the minimum necessary to use all or part of this
 917 | specification; or, the minimum connection ID length could be larger than the
 918 | largest value in this specification. Similarly, the length self-encoding
 919 | specification cannot accommodate connection IDs longer than 32 bytes.
 920 | 
 921 | The advanced fallback implementation supports a requirement to inspect version-
 922 | specific elements of packets to make a routing decision, such as the Server Name
 923 | Indication (SNI) extension in the TLS Client Hello.  The format and
 924 | cryptographic protection of this information may change in future versions or
 925 | extensions of TLS or QUIC, and therefore this functionality is inherently
 926 | version-dependent. Such a load balancer, when it receives packets from an
 927 | unknown QUIC version, might misdirect initial packets to the wrong tenant. While
 928 | this can be inefficient, the design in this document preserves the ability for
 929 | tenants to deploy new versions provided they have an out-of-band means of
 930 | providing a connection ID for the client to use.
 931 | 
 932 | {{load-balancer-forwarding}} provides guidance about how load balancers should
 933 | handle unroutable DCIDs. This guidance, and the implementation of an algorithm
 934 | to handle these DCIDs, rests on some assumptions about packets that contain
 935 | client-generated DCIDs that are not specified in RFC 8999:
 936 | 
 937 | 1. they do not have short headers;
 938 | 1. the 4-tuple remains constant;
 939 | 1. if the load-balancer uses the Advanced Fallback Algorithm, the packets have
 940 | a constant Source Connection ID.
 941 | 
 942 | While this document does not update the commitments in {{RFC8999}}, the
 943 | additional assumptions are minimal and narrowly scoped, and provide a likely
 944 | set of constants that load balancers can use with minimal risk of version-
 945 | dependence.
 946 | 
 947 | If these assumptions are not valid, this specification is likely to lead to loss
 948 | of packets that contain unroutable DCIDs, and in extreme cases connection
 949 | failure.  A QUIC version that violates the assumptions in this section therefore
 950 | cannot be safely deployed with a load balancer that follows this specification.
 951 | An updated or alternative version of this specification might address these
 952 | shortcomings for such a QUIC version.
 953 | 
 954 | # Security Considerations {#security-considerations}
 955 | 
 956 | QUIC-LB is intended to prevent linkability.  Attacks would therefore attempt to
 957 | subvert this purpose.
 958 | 
 959 | Note that without a key for the encoding, QUIC-LB makes no attempt to obscure
 960 | the server mapping, and therefore does not address these concerns. Without a
 961 | key, QUIC-LB merely allows consistent CID encoding for compatibility across a
 962 | network infrastructure, which makes QUIC robust to NAT rebinding. Servers that
 963 | are encoding their server ID without a key algorithm SHOULD only use it to
 964 | generate new CIDs for the Server Initial Packet and SHOULD NOT send CIDs in QUIC
 965 | NEW_CONNECTION_ID frames, except that it sends one new Connection ID in the
 966 | event of config rotation {{config-rotation}}. Doing so might falsely suggest to
 967 | the client that said CIDs were generated in a secure fashion.
 968 | 
 969 | A linkability attack would find some means of determining that two connection
 970 | IDs route to the same server. Due to the limitations of measures at QUIC layer,
 971 | there is no scheme that strictly prevents linkability for all traffic patterns.
 972 | 
 973 | To see why, consider two limits. At one extreme, one client is connected to the
 974 | server pool and migrates its address. An observer can easily link the two
 975 | addresses, and there is no remedy at the QUIC layer.
 976 | 
 977 | At the other extreme, a very large number of clients are connected to each
 978 | server, and they all migrate address constantly. At this limit, even an
 979 | unencrypted server ID encoding is unlikely to definitively link two addresses.
 980 | 
 981 | Therefore, efforts to frustrate any analysis of server ID encoding have
 982 | diminishing returns. Nevertheless, this specification seeks to minimize the
 983 | probability two addresses can be linked.
 984 | 
 985 | ## Attackers not between the load balancer and server
 986 | 
 987 | Any attacker might open a connection to the server infrastructure and
 988 | aggressively simulate migration to obtain a large sample of IDs that map to the
 989 | same server. It could then apply analytical techniques to try to obtain the
 990 | server encoding.
 991 | 
 992 | An encrypted encoding provides robust protection against this. An unencrypted
 993 | one provides none.
 994 | 
 995 | Were this analysis to obtain the server encoding, then on-path observers might
 996 | apply this analysis to correlating different client IP addresses.
 997 | 
 998 | ## Attackers between the load balancer and server
 999 | 
1000 | Attackers in this privileged position are intrinsically able to map two
1001 | connection IDs to the same server. These algorithms ensure that two connection
1002 | IDs for the same connection cannot be identified as such as long as the server
1003 | chooses the first octet and any plaintext nonce correctly.
1004 | 
1005 | ## Multiple Configuration IDs {#multiple-configs}
1006 | 
1007 | During the period in which there are multiple deployed configuration IDs (see
1008 | {{config-rotation}}), there is a slight increase in linkability. The server
1009 | space is effectively divided into segments with CIDs that have different config
1010 | rotation bits. Entities that manage servers SHOULD strive to minimize these
1011 | periods by quickly deploying new configurations across the server pool.
1012 | 
1013 | ## Limited configuration scope
1014 | 
1015 | A simple deployment of QUIC-LB in a cloud provider might use the same global
1016 | QUIC-LB configuration across all its load balancers that route to customer
1017 | servers. An attacker could then simply become a customer, obtain the
1018 | configuration, and then extract server IDs of other customers' connections at
1019 | will.
1020 | 
1021 | To avoid this, the configuration agent SHOULD issue QUIC-LB configurations to
1022 | mutually distrustful servers that have different keys for encryption
1023 | algorithms. In many cases, the load balancers can distinguish these
1024 | configurations by external IP address.
1025 | 
1026 | However, assigning multiple entities to an IP address is complimentary with
1027 | concealing DNS requests (e.g., DoH {{?RFC8484}}) and the TLS Server Name
1028 | Indicator (SNI) ({{?I-D.ietf-tls-esni}}) to obscure the ultimate destination
1029 | of traffic. While the load balancer's fallback algorithm
1030 | ({{fallback-algorithm}}) can use the SNI to make a routing decision on the
1031 | first packet, there are three ways to route subsequent packets:
1032 | 
1033 | * all co-tenants can use the same QUIC-LB configuration, leaking the server
1034 | mapping to each other as described above;
1035 | 
1036 | * co-tenants can be issued one of up to seven configurations distinguished by
1037 | the config rotation bits ({{config-rotation}}), exposing information about the
1038 | target domain to the entire network; or
1039 | 
1040 | * tenants can use the 0b111 codepoint in their CIDs (in which case they SHOULD
1041 | disable migration in their connections), which neutralizes the value of
1042 | QUIC-LB but preserves privacy.
1043 | 
1044 | When configuring QUIC-LB, administrators evaluate the privacy tradeoff by
1045 | considering the relative value of each of these properties, given the trust
1046 | model between tenants, the presence of methods to obscure the domain name, and
1047 | value of address migration in the tenant use cases.
1048 | 
1049 | As the plaintext algorithm makes no attempt to conceal the server mapping,
1050 | these deployments MAY simply use a common configuration.
1051 | 
1052 | ## Stateless Reset Oracle
1053 | 
1054 | Section 21.9 of {{RFC9000}} discusses the Stateless Reset Oracle attack.  For a
1055 | server deployment to be vulnerable, an attacking client must be able to cause
1056 | two packets with the same Destination CID to arrive at two different servers
1057 | that share the same cryptographic context for Stateless Reset tokens. As QUIC-LB
1058 | requires deterministic routing of DCIDs over the life of a connection, it is a
1059 | sufficient means of avoiding an Oracle without additional measures.
1060 | 
1061 | Note also that when a server starts using a new QUIC-LB config rotation
1062 | codepoint, new CIDs might not be unique with respect to previous configurations
1063 | that occupied that codepoint, and therefore different clients may have observed
1064 | the same CID and stateless reset token. A straightforward method of managing
1065 | stateless reset keys is to maintain a separate key for each config rotation
1066 | codepoint, and replace each key when the configuration for that codepoint
1067 | changes. Thus, a server transitions from one config to another, it will be able
1068 | to generate correct tokens for connections using either type of CID.
1069 | 
1070 | ## Connection ID Entropy {#cid-entropy}
1071 | 
1072 | If a server ever reuses a nonce in generating a CID for a given configuration,
1073 | it risks exposing sensitive information. Given the same server ID, the CID will
1074 | be identical (aside from a possible difference in the first octet).  This can
1075 | risk exposure of the QUIC-LB key. If two clients receive the same connection ID,
1076 | they also have each other's stateless reset token unless that key has changed in
1077 | the interim.
1078 | 
1079 | The encrypted mode needs to generate different cipher text for each generated
1080 | Connection ID instance to protect the Server ID. To do so, at least four octets
1081 | of the CID are reserved for a nonce that, if used only once, will result in
1082 | unique cipher text for each Connection ID.
1083 | 
1084 | If servers simply increment the nonce by one with each generated connection ID,
1085 | then it is safe to use the existing keys until any server's nonce counter
1086 | exhausts the allocated space and rolls over. To maximize entropy, servers SHOULD
1087 | start with a random nonce value, in which case the configuration is usable until
1088 | the nonce value wraps around to zero and then reaches the initial value again.
1089 | 
1090 | Whether or not it implements the counter method, the server MUST NOT reuse a
1091 | nonce until it switches to a configuration with new keys.
1092 | 
1093 | Servers are forbidden from generating linkable plaintext nonces, because
1094 | observable correlations between plaintext nonces would provide trivial
1095 | linkability between individual connections, rather than just to a common server.
1096 | 
1097 | For any algorithm, configuration agents SHOULD implement an out-of-band method
1098 | to discover when servers are in danger of exhausting their nonce space, and
1099 | SHOULD respond by issuing a new configuration. A server that has exhausted its
1100 | nonces MUST either switch to a different configuration, or if none exists, use
1101 | the 4-tuple routing config rotation codepoint.
1102 | 
1103 | When sizing a nonce that is to be randomly generated, the configuration agent
1104 | SHOULD consider that a server generating a N-bit nonce will create a duplicate
1105 | about every 2^(N/2) attempts, and therefore compare the expected rate at which
1106 | servers will generate CIDs with the lifetime of a configuration.
1107 | 
1108 | ## Distinguishing Attacks {#distinguishing-attacks}
1109 | 
1110 | The Four Pass Encryption algorithm is structured as a 4-round Feistel network
1111 | with non-bijective round function. As such, it does not offer a very high
1112 | security level against distinguishing attacks, as explained in [Patarin2008].
1113 | Attackers can mount these attacks if they are in possession of O(SQRT(len/2))
1114 | pairs of ciphertext and known corresponding plain text, where "len" is the
1115 | sum of the lengths of the Server ID and the Nonce.
1116 | 
1117 | The authors considered increasing the number of passes from 4 to 12,
1118 | which would definitely block these attacks. However, this would require
1119 | 12 round of AES decryption by load balancers accessing the CID, a cost deemed
1120 | prohibitive in the planned deployments.
1121 | 
1122 | The attacks described in [Patarin2008] rely on known plain text. In a normal
1123 | deployment, the plain text is only known by the server that generates the ID
1124 | and by the load balancer that decrypts the content of the CID. Attackers
1125 | would have to compensate by guesses about the allocation of server identifiers
1126 | or the generation of nonces. These attacks are thus mitigated by making nonces
1127 | hard to guess, as specified in {{cid-entropy}}, and by rules related to mixed
1128 | deployments that use both clear text CID and encrypted CID, for example when
1129 | transitioning from clear text to encryption. Such deployments MUST use different
1130 | server ID allocations for the clear text and the encrypted versions.
1131 | 
1132 | These attacks cannot be mounted against the Single Pass Encryption algorithm.
1133 | 
1134 | ## Early deletion of load balancer connection state
1135 | 
1136 | Potential vulnerabilities related to heuristics that delete per-connection state
1137 | are described in {{per-connection-state}}. Under certain assumptions about
1138 | server configuration and fallback algorithm, this state might be critical to
1139 | maintaining connectivity. Under other assumptions, the state provides robustness
1140 | to improbable network events.
1141 | 
1142 | # IANA Considerations
1143 | 
1144 | There are no IANA requirements.
1145 | 
1146 | --- back
1147 | 
1148 | # QUIC-LB YANG Model {#yang-model}
1149 | 
1150 | These YANG models conform to {{?RFC6020}} and express a complete QUIC-LB
1151 | configuration. There is one model for the server and one for the middlebox
1152 | (i.e the load balancer and/or Retry Service).
1153 | 
1154 | ~~~
1155 | module ietf-quic-lb-server {
1156 |   yang-version "1.1";
1157 |   namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb";
1158 |   prefix "quic-lb";
1159 | 
1160 |   import ietf-yang-types {
1161 |     prefix yang;
1162 |     reference
1163 |       "RFC 6991: Common YANG Data Types.";
1164 |   }
1165 | 
1166 |   import ietf-inet-types {
1167 |     prefix inet;
1168 |     reference
1169 |       "RFC 6991: Common YANG Data Types.";
1170 |   }
1171 | 
1172 |   organization
1173 |     "IETF QUIC Working Group";
1174 | 
1175 |   contact
1176 |     "WG Web:   <http://datatracker.ietf.org/wg/quic>
1177 |      WG List:  <quic@ietf.org>
1178 | 
1179 |      Authors: Martin Duke (martin.h.duke at gmail dot com)
1180 |               Nick Banks (nibanks at microsoft dot com)
1181 |               Christian Huitema (huitema at huitema.net)";
1182 | 
1183 |   description
1184 |     "This module enables the explicit cooperation of QUIC servers
1185 |      with trusted intermediaries without breaking important
1186 |      protocol features.
1187 | 
1188 |      Copyright (c) 2022 IETF Trust and the persons identified as
1189 |      authors of the code.  All rights reserved.
1190 | 
1191 |      Redistribution and use in source and binary forms, with or
1192 |      without modification, is permitted pursuant to, and subject to
1193 |      the license terms contained in, the Simplified BSD License set
1194 |      forth in Section 4.c of the IETF Trust's Legal Provisions
1195 |      Relating to IETF Documents
1196 |      (https://trustee.ietf.org/license-info).
1197 | 
1198 |      This version of this YANG module is part of RFC XXXX
1199 |      (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself
1200 |      for full legal notices.
1201 | 
1202 |      The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL
1203 |      NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED',
1204 |      'MAY', and 'OPTIONAL' in this document are to be interpreted as
1205 |      described in BCP 14 (RFC 2119) (RFC 8174) when, and only when,
1206 |      they appear in all capitals, as shown here.";
1207 | 
1208 |   revision "2023-07-14" {
1209 |     description
1210 |       "Updated to design in version 17 of the draft";
1211 |     reference
1212 |       "RFC XXXX, QUIC-LB: Generating Routable QUIC Connection IDs";
1213 |   }
1214 | 
1215 |   container quic-lb {
1216 |     presence "The container for QUIC-LB configuration.";
1217 | 
1218 |     description
1219 |       "QUIC-LB container.";
1220 | 
1221 |     typedef quic-lb-key {
1222 |       type yang:hex-string {
1223 |         length 47;
1224 |       }
1225 |       description
1226 |         "This is a 16-byte key, represented with 47 bytes";
1227 |     }
1228 | 
1229 |     leaf config-id {
1230 |       type uint8 {
1231 |         range "0..6";
1232 |       }
1233 |       mandatory true;
1234 |       description
1235 |         "Identifier for this CID configuration.";
1236 |     }
1237 | 
1238 |     leaf first-octet-encodes-cid-length {
1239 |       type boolean;
1240 |       default false;
1241 |       description
1242 |         "If true, the six least significant bits of the first
1243 |          CID octet encode the CID length minus one.";
1244 |     }
1245 | 
1246 |     leaf server-id-length {
1247 |       type uint8 {
1248 |         range "1..15";
1249 |       }
1250 |       must '. <= (19 - ../nonce-length)' {
1251 |         error-message
1252 |           "Server ID and nonce lengths must sum
1253 |            to no more than 19.";
1254 |       }
1255 |       mandatory true;
1256 |       description
1257 |         "Length (in octets) of a server ID. Further range-limited
1258 |          by nonce-length.";
1259 |     }
1260 | 
1261 |     leaf nonce-length {
1262 |       type uint8 {
1263 |         range "4..18";
1264 |       }
1265 |       mandatory true;
1266 |       description
1267 |         "Length, in octets, of the nonce. Short nonces mean there
1268 |          will be frequent configuration updates.";
1269 |     }
1270 | 
1271 |     leaf cid-key {
1272 |       type quic-lb-key;
1273 |       description
1274 |         "Key for encrypting the connection ID.";
1275 |     }
1276 | 
1277 |     leaf server-id {
1278 |       type yang:hex-string;
1279 |       must "string-length(.) = 3 * ../../server-id-length - 1";
1280 |       mandatory true;
1281 |       description
1282 |         "An allocated server ID";
1283 |     }
1284 |   }
1285 | }
1286 | ~~~
1287 | 
1288 | ~~~
1289 | module ietf-quic-lb-middlebox {
1290 |   yang-version "1.1";
1291 |   namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb";
1292 |   prefix "quic-lb";
1293 | 
1294 |   import ietf-yang-types {
1295 |     prefix yang;
1296 |     reference
1297 |       "RFC 6991: Common YANG Data Types.";
1298 |   }
1299 | 
1300 |   import ietf-inet-types {
1301 |     prefix inet;
1302 |     reference
1303 |       "RFC 6991: Common YANG Data Types.";
1304 |   }
1305 | 
1306 |   organization
1307 |     "IETF QUIC Working Group";
1308 | 
1309 |   contact
1310 |     "WG Web:   <http://datatracker.ietf.org/wg/quic>
1311 |      WG List:  <quic@ietf.org>
1312 | 
1313 |      Authors: Martin Duke (martin.h.duke at gmail dot com)
1314 |               Nick Banks (nibanks at microsoft dot com)
1315 |               Christian Huitema (huitema at huitema.net)";
1316 | 
1317 |   description
1318 |     "This module enables the explicit cooperation of QUIC servers
1319 |      with trusted intermediaries without breaking important
1320 |      protocol features.
1321 | 
1322 |      Copyright (c) 2021 IETF Trust and the persons identified as
1323 |      authors of the code.  All rights reserved.
1324 | 
1325 |      Redistribution and use in source and binary forms, with or
1326 |      without modification, is permitted pursuant to, and subject to
1327 |      the license terms contained in, the Simplified BSD License set
1328 |      forth in Section 4.c of the IETF Trust's Legal Provisions
1329 |      Relating to IETF Documents
1330 |      (https://trustee.ietf.org/license-info).
1331 | 
1332 |      This version of this YANG module is part of RFC XXXX
1333 |      (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself
1334 |      for full legal notices.
1335 | 
1336 |      The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL
1337 |      NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED',
1338 |      'MAY', and 'OPTIONAL' in this document are to be interpreted as
1339 |      described in BCP 14 (RFC 2119) (RFC 8174) when, and only when,
1340 |      they appear in all capitals, as shown here.";
1341 | 
1342 |   revision "2021-02-11" {
1343 |     description
1344 |       "Updated to design in version 13 of the draft";
1345 |     reference
1346 |       "RFC XXXX, QUIC-LB: Generating Routable QUIC Connection IDs";
1347 |   }
1348 | 
1349 |   container quic-lb {
1350 |     presence "The container for QUIC-LB configuration.";
1351 | 
1352 |     description
1353 |       "QUIC-LB container.";
1354 | 
1355 |     typedef quic-lb-key {
1356 |       type yang:hex-string {
1357 |         length 47;
1358 |       }
1359 |       description
1360 |         "This is a 16-byte key, represented with 47 bytes";
1361 |     }
1362 | 
1363 |     list cid-configs {
1364 |       key "config-rotation-bits";
1365 |       description
1366 |         "List up to three load balancer configurations";
1367 | 
1368 |       leaf config-rotation-bits {
1369 |         type uint8 {
1370 |           range "0..2";
1371 |         }
1372 |         mandatory true;
1373 |         description
1374 |           "Identifier for this CID configuration.";
1375 |       }
1376 | 
1377 |       leaf server-id-length {
1378 |         type uint8 {
1379 |           range "1..15";
1380 |         }
1381 |         must '. <= (19 - ../nonce-length)' {
1382 |           error-message
1383 |             "Server ID and nonce lengths must sum to
1384 |              no more than 19.";
1385 |         }
1386 |         mandatory true;
1387 |         description
1388 |           "Length (in octets) of a server ID. Further range-limited
1389 |            by nonce-length.";
1390 |       }
1391 | 
1392 |       leaf cid-key {
1393 |         type quic-lb-key;
1394 |         description
1395 |           "Key for encrypting the connection ID.";
1396 |       }
1397 | 
1398 |       leaf nonce-length {
1399 |         type uint8 {
1400 |           range "4..18";
1401 |         }
1402 |         mandatory true;
1403 |         description
1404 |           "Length, in octets, of the nonce. Short nonces mean there
1405 |            will be frequent configuration updates.";
1406 |       }
1407 | 
1408 |       list server-id-mappings {
1409 |         key "server-id";
1410 |         description "Statically allocated Server IDs";
1411 | 
1412 |         leaf server-id {
1413 |           type yang:hex-string;
1414 |           must "string-length(.) = 3 * ../../server-id-length - 1";
1415 |           mandatory true;
1416 |           description
1417 |             "An allocated server ID";
1418 | 
1419 |         }
1420 | 
1421 |         leaf server-address {
1422 |           type inet:ip-address;
1423 |           mandatory true;
1424 |           description
1425 |             "Destination address corresponding to the server ID";
1426 |         }
1427 |       }
1428 |     }
1429 |   }
1430 | }
1431 | ~~~
1432 | 
1433 | ## Tree Diagram
1434 | 
1435 | This summary of the YANG models uses the notation in {{?RFC8340}}.
1436 | 
1437 | ~~~
1438 | module: ietf-quic-lb-server
1439 |   +--rw quic-lb!
1440 |      +--rw config-id                         uint8
1441 |      +--rw first-octet-encodes-cid-length?   boolean
1442 |      +--rw server-id-length                  uint8
1443 |      +--rw nonce-length                      uint8
1444 |      +--rw cid-key?                          quic-lb-key
1445 |      +--rw server-id                         yang:hex-string
1446 | ~~~
1447 | 
1448 | ~~~
1449 | module: ietf-quic-lb-middlebox
1450 |   +--rw quic-lb!
1451 |      +--rw cid-configs* [config-rotation-bits]
1452 |      |  +--rw config-rotation-bits    uint8
1453 |      |  +--rw server-id-length        uint8
1454 |      |  +--rw cid-key?                quic-lb-key
1455 |      |  +--rw nonce-length            uint8
1456 |      |  +--rw server-id-mappings* [server-id]
1457 |      |     +--rw server-id         yang:hex-string
1458 |      |     +--rw server-address    inet:ip-address
1459 | ~~~
1460 | 
1461 | # Load Balancer Test Vectors {#test-vectors}
1462 | 
1463 | This section uses the following abbreviations:
1464 | 
1465 | ~~~
1466 | cid      Connection ID
1467 | cr_bits  Config Rotation Bits
1468 | LB       Load Balancer
1469 | sid      Server ID
1470 | ~~~
1471 | 
1472 | In all cases, the server is configured to encode the CID length.
1473 | 
1474 | ## Unencrypted CIDs
1475 | 
1476 | ~~~pseudocode
1477 | cr_bits sid nonce cid
1478 | 0 c4605e 4504cc4f 07c4605e4504cc4f
1479 | 1 350d28b420 3487d970b 20a350d28b4203487d970b
1480 | ~~~
1481 | 
1482 | ## Encrypted CIDs
1483 | 
1484 | The key for all of these examples is 8f95f09245765f80256934e50c66207f. The
1485 | test vectors include an example that uses the 16-octet single-pass special
1486 | case, as well as an instance where the server ID length exceeds the nonce
1487 | length, requiring a fourth decryption pass.
1488 | 
1489 | ~~~pseudocode
1490 | cr_bits sid nonce cid
1491 | 0 ed793a ee080dbf 0720b1d07b359d3c
1492 | 1 ed793a51d49b8f5fab65 ee080dbf48
1493 |                          2fcc381bc74cb4fbad2823a3d1f8fed2
1494 | 2 ed793a51d49b8f5f ee080dbf48c0d1e5
1495 |                          504dd2d05a7b0de9b2b9907afb5ecf8cc3
1496 | 3 ed793a51d49b8f5fab ee080dbf48c0d1e55d
1497 |                          125779c9cc86beb3a3a4a3ca96fce4bfe0cdbc
1498 | ~~~
1499 | 
1500 | # Interoperability with DTLS over UDP
1501 | 
1502 | Some environments may contain DTLS traffic as well as QUIC operating over UDP,
1503 | which may be hard to distinguish.
1504 | 
1505 | In most cases, the packet parsing rules above will cause a QUIC-LB load
1506 | balancer to route DTLS traffic in an appropriate way. DTLS 1.3 implementations
1507 | that use the connection_id extension {{?RFC9146}} might use the techniques in
1508 | this document to generate connection IDs and achieve robust routability for DTLS
1509 | associations if they meet a few additional requirements. This non-normative
1510 | appendix describes this interaction.
1511 | 
1512 | ## DTLS 1.0 and 1.2
1513 | 
1514 | DTLS 1.0 {{?RFC4347}} and 1.2 {{?RFC6347}} use packet formats that a QUIC-LB
1515 | router will interpret as short header packets with CIDs that request 4-tuple
1516 | routing.  As such, they will route such packets consistently as long as the
1517 | 4-tuple does not change. Note that DTLS 1.0 has been deprecated by the IETF.
1518 | 
1519 | The first octet of every DTLS 1.0 or 1.2 datagram contains the content type.
1520 | A QUIC-LB load balancer will interpret any content type less than 128 as a short
1521 | header packet, meaning that the subsequent octets should contain a connection
1522 | ID.
1523 | 
1524 | Existing TLS content types comfortably fit in the range below 128. Assignment of
1525 | codepoints greater than 64 would require coordination in accordance with
1526 | {{?RFC7983}}, and anyway would likely create problems demultiplexing DTLS and
1527 | version 1 of QUIC. Therefore, this document believes it is extremely unlikely
1528 | that TLS content types of 128 or greater will be assigned. Nevertheless, such
1529 | an assignment would cause a QUIC-LB load balancer to interpret the packet as a
1530 | QUIC long header with an essentially random connection ID, which is likely to be
1531 | routed irregularly.
1532 | 
1533 | The second octet of every DTLS 1.0 or 1.2 datagram is the bitwise complement
1534 | of the DTLS Major version (i.e. version 1.x = 0xfe). A QUIC-LB load balancer
1535 | will interpret this as a connection ID that requires 4-tuple based load
1536 | balancing, meaning that the routing will be consistent as long as the 4-tuple
1537 | remains the same.
1538 | 
1539 | {{?RFC9146}} defines an extension to add connection IDs to DTLS 1.2.
1540 | Unfortunately, a QUIC-LB load balancer will not correctly parse the connection
1541 | ID and will continue 4-tuple routing. An modified QUIC-LB load balancer that
1542 | correctly identifies DTLS and parses a DTLS 1.2 datagram for the connection ID
1543 | is outside the scope of this document.
1544 | 
1545 | ## DTLS 1.3
1546 | 
1547 | DTLS 1.3 {{?RFC9147}} changes the structure of datagram headers in relevant
1548 | ways.
1549 | 
1550 | Handshake packets continue to have a TLS content type in the first octet and
1551 | 0xfe in the second octet, so they will be 4-tuple routed, which should not
1552 | present problems for likely NAT rebinding or address change events.
1553 | 
1554 | Non-handshake packets always have zero in their most significant bit and will
1555 | therefore always be treated as QUIC short headers. If the connection ID is
1556 | present, it follows in the succeeding octets. Therefore, a DTLS 1.3 association
1557 | where the server utilizes Connection IDs and the encodings in this document
1558 | will be routed correctly in the presence of client address and port changes.
1559 | 
1560 | However, if the client does not include the connection_id extension in its
1561 | ClientHello, the server is unable to use connection IDs. In this case, non-
1562 | handshake packets will appear to contain random connection IDs and be routed
1563 | randomly. Thus, unmodified QUIC-LB load balancers will not work with DTLS 1.3
1564 | if the client does not advertise support for connection IDs, or the server does
1565 | not request the use of a compliant connection ID.
1566 | 
1567 | A QUIC-LB load balancer might be modified to identify DTLS 1.3 packets and
1568 | correctly parse the fields to identify when there is no connection ID and
1569 | revert to 4-tuple routing, removing the server requirement above. However, such
1570 | a modification is outside the scope of this document, and classifying some
1571 | packets as DTLS might be incompatible with future versions of QUIC.
1572 | 
1573 | ## Future Versions of DTLS
1574 | 
1575 | As DTLS does not have an IETF consensus document that defines what parts of
1576 | DTLS will be invariant in future versions, it is difficult to speculate about
1577 | the applicability of this section to future versions of DTLS.
1578 | 
1579 | # Acknowledgments
1580 | 
1581 | Manasi Deval, Erik Fuller, Toma Gavrichenkov, Greg Greenway, Jana Iyengar,
1582 | Subodh Iyengar, Stefan Kolbl, Ladislav Lhotka, Jan Lindblad, Ling Tao Nju,
1583 | Ilari Liusvaara, Kazuho Oku, Udip Pant, Zaheduzzaman Sarker, Ian Swett, Andy
1584 | Sykes, Martin Thomson, Dmitri Tikhonov, Victor Vasiliev, Xingcan Lan, Yu Zhu,
1585 | and William Zeng Ke all provided useful input to this document.
1586 | 
1587 | # Change Log
1588 | 
1589 | > **RFC Editor's Note:**  Please remove this section prior to
1590 | > publication of a final version of this document.
1591 | 
1592 | ## since draft-ietf-quic-load-balancers-20
1593 | 
1594 | - Changed definition of Unroutable DCIDs, and rewrote sections on config
1595 | failover and fallback routing to avoid misrouted connections.
1596 | - Deleted text on dropping packets
1597 | - Rewrote version invariance section
1598 | 
1599 | ## since draft-ietf-quic-load-balancers-19
1600 | 
1601 | - Further guidance on multiple server processes/threads
1602 | - Fixed error in encryption example.
1603 | - Clarified fallback algorithms and known QUIC versions.
1604 | 
1605 | ## since draft-ietf-quic-load-balancers-18
1606 | 
1607 | - Rearranged the output of the expand function to reduce CPU load of decrypt
1608 | 
1609 | ## since draft-ietf-quic-load-balancers-17
1610 | 
1611 | - fixed regressions in draft-17 publication
1612 | 
1613 | ## since draft-ietf-quic-load-balancers-16
1614 | 
1615 | - added a config ID bit (now there are 3).
1616 | 
1617 | ## since draft-ietf-quic-load-balancers-15
1618 | 
1619 | - aasvg fixes.
1620 | 
1621 | ## since draft-ietf-quic-load-balancers-14
1622 | 
1623 | - Revised process demultiplexing text
1624 | - Restored lost text in Security Considerations
1625 | - Editorial comments from Martin Thomson.
1626 | - Tweaked 4-pass algorithm to avoid accidental plaintext similarities
1627 | 
1628 | ## since draft-ietf-quic-load-balancers-13
1629 | 
1630 | - Incorporated Connection ID length in argument of truncate function
1631 | - Added requirements for codepoint 0b11.
1632 | - Describe Distinguishing Attack in Security Considerations.
1633 | - Added non-normative language about server process demultiplexers
1634 | 
1635 | ## since draft-ietf-quic-load-balancers-12
1636 | 
1637 | - Separated Retry Service design into a separate draft
1638 | 
1639 | ## since draft-ietf-quic-load-balancers-11
1640 | 
1641 | - Fixed mistakes in test vectors
1642 | 
1643 | ## since draft-ietf-quic-load-balancers-10
1644 | 
1645 | - Refactored algorithm descriptions; made the 4-pass algorithm easier to
1646 | implement
1647 | - Revised test vectors
1648 | - Split YANG model into a server and middlebox version
1649 | 
1650 | ## since draft-ietf-quic-load-balancers-09
1651 | - Renamed "Stream Cipher" and "Block Cipher" to "Encrypted Short" and
1652 | "Encrypted Long"
1653 | - Added section on per-connection state
1654 | - Changed "Encrypted Short" to a 4-pass algorithm.
1655 | - Recommended a random initial nonce when incrementing.
1656 | - Clarified what SNI LBs should do with unknown QUIC versions.
1657 | 
1658 | ## since draft-ietf-quic-load-balancers-08
1659 | - Eliminate Dynamic SID allocation
1660 | - Eliminated server use bytes
1661 | 
1662 | ## since draft-ietf-quic-load-balancers-07
1663 | - Shortened SSCID nonce minimum length to 4 bytes
1664 | - Removed RSCID from Retry token body
1665 | - Simplified CID formats
1666 | - Shrunk size of SID table
1667 | 
1668 | ## since draft-ietf-quic-load-balancers-06
1669 | - Added interoperability with DTLS
1670 | - Changed "non-compliant" to "unroutable"
1671 | - Changed "arbitrary" algorithm to "fallback"
1672 | - Revised security considerations for mistrustful tenants
1673 | - Added retry service considerations for non-Initial packets
1674 | 
1675 | ## since draft-ietf-quic-load-balancers-05
1676 | - Added low-config CID for further discussion
1677 | - Complete revision of shared-state Retry Token
1678 | - Added YANG model
1679 | - Updated configuration limits to ensure CID entropy
1680 | - Switched to notation from quic-transport
1681 | 
1682 | ## since draft-ietf-quic-load-balancers-04
1683 | - Rearranged the shared-state retry token to simplify token processing
1684 | - More compact timestamp in shared-state retry token
1685 | - Revised server requirements for shared-state retries
1686 | - Eliminated zero padding from the test vectors
1687 | - Added server use bytes to the test vectors
1688 | - Additional compliant DCID criteria
1689 | 
1690 | ## since-draft-ietf-quic-load-balancers-03
1691 | - Improved Config Rotation text
1692 | - Added stream cipher test vectors
1693 | - Deleted the Obfuscated CID algorithm
1694 | 
1695 | ## since-draft-ietf-quic-load-balancers-02
1696 | - Replaced stream cipher algorithm with three-pass version
1697 | - Updated Retry format to encode info for required TPs
1698 | - Added discussion of version invariance
1699 | - Cleaned up text about config rotation
1700 | - Added Reset Oracle and limited configuration considerations
1701 | - Allow dropped long-header packets for known QUIC versions
1702 | 
1703 | ## since-draft-ietf-quic-load-balancers-01
1704 | - Test vectors for load balancer decoding
1705 | - Deleted remnants of in-band protocol
1706 | - Light edit of Retry Services section
1707 | - Discussed load balancer chains
1708 | 
1709 | ## since-draft-ietf-quic-load-balancers-00
1710 | - Removed in-band protocol from the document
1711 | 
1712 | ## Since draft-duke-quic-load-balancers-06
1713 | - Switch to IETF WG draft.
1714 | 
1715 | ## Since draft-duke-quic-load-balancers-05
1716 | - Editorial changes
1717 | - Made load balancer behavior independent of QUIC version
1718 | - Got rid of token in stream cipher encoding, because server might not have it
1719 | - Defined "non-compliant DCID" and specified rules for handling them.
1720 | - Added psuedocode for config schema
1721 | 
1722 | ## Since draft-duke-quic-load-balancers-04
1723 | - Added standard for retry services
1724 | 
1725 | ## Since draft-duke-quic-load-balancers-03
1726 | - Renamed Plaintext CID algorithm as Obfuscated CID
1727 | - Added new Plaintext CID algorithm
1728 | - Updated to allow 20B CIDs
1729 | - Added self-encoding of CID length
1730 | 
1731 | ## Since draft-duke-quic-load-balancers-02
1732 | - Added Config Rotation
1733 | - Added failover mode
1734 | - Tweaks to existing CID algorithms
1735 | - Added Block Cipher CID algorithm
1736 | - Reformatted QUIC-LB packets
1737 | 
1738 | ## Since draft-duke-quic-load-balancers-01
1739 | - Complete rewrite
1740 | - Supports multiple security levels
1741 | - Lightweight messages
1742 | 
1743 | ## Since draft-duke-quic-load-balancers-00
1744 | - Converted to markdown
1745 | - Added variable length connection IDs
1746 | 


--------------------------------------------------------------------------------
/draft-ietf-quic-retry-offload.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "QUIC Retry Offload"
  3 | abbrev: QUIC Retry Offload
  4 | docname: draft-ietf-quic-retry-offload-latest
  5 | date: {DATE}
  6 | category: std
  7 | ipr: trust200902
  8 | area: Transport
  9 | workgroup: QUIC
 10 | 
 11 | stand_alone: yes
 12 | pi: [toc, sortrefs, symrefs, docmapping]
 13 | 
 14 | author:
 15 |   -
 16 |     ins: M. Duke
 17 |     name: Martin Duke
 18 |     org: Google
 19 |     email: martin.h.duke@gmail.com
 20 | 
 21 |   -
 22 |     ins: N. Banks
 23 |     name: Nick Banks
 24 |     org: Microsoft
 25 |     email: nibanks@microsoft.com
 26 | 
 27 | normative:
 28 | 
 29 |   TIME_T:
 30 |     title: "Open Group Standard: Vol. 1: Base Definitions, Issue 7"
 31 |     date: 2018
 32 |     seriesinfo: IEEE Std 1003.1
 33 |     target: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_16
 34 | 
 35 | --- abstract
 36 | 
 37 | QUIC uses Retry packets to reduce load on stressed servers, by forcing the
 38 | client to prove ownership of its address before the server commits state.
 39 | QUIC also has an anti-tampering mechanism to prevent the unauthorized injection
 40 | of Retry packets into a connection. However, a server operator may want to
 41 | offload production of Retry packets to an anti-Denial-of-Service agent or
 42 | hardware accelerator. "Retry Offload" is a mechanism for coordination between
 43 | a server and an external generator of Retry packets that can succeed despite
 44 | the anti-tampering mechanism.
 45 | 
 46 | --- middle
 47 | 
 48 | # Introduction
 49 | 
 50 | QUIC {{!RFC9000}} servers send Retry packets to avoid prematurely allocating
 51 | resources when under stress, such as during a Denial of Service (DoS) attack.
 52 | Because both Initial packets and Retry packets have weak authentication
 53 | properties, the Retry packet contains an encrypted token that helps the client
 54 | and server to validate, via transport parameters, that an attacker did not
 55 | inject or modify a packet of either type for this connection attempt.
 56 | 
 57 | However, a server under stress is less inclined to process incoming Initial
 58 | packets and compute the Retry token in the first place. An analogous mechanism
 59 | for TCP is syncookies {{?RFC4987}}. As TCP has weaker authentication properties
 60 | to QUIC, syncookie generation can often be offloaded to a hardware device, or
 61 | to a anti-Denial-of-Service provider that is topologically far from the
 62 | protected server. As such an offload would behave exactly like an attacker,
 63 | QUIC's authentication methods make such a capability impossible.
 64 | 
 65 | This document seeks to enable offloading of Retry generation to QUIC via
 66 | explicit coordination between servers and the hardware or provider offload,
 67 | which this document refers to as a "Retry Offload." It has two different
 68 | modes, to conform to two different use cases.
 69 | 
 70 | The no-shared-state mode has minimal coordination and does not require key
 71 | sharing. While operationally easier to configure and manage, it places severe
 72 | constraints on the operational profile of the offload. In particular, the
 73 | offload must control all ingress to the server and fail closed.
 74 | 
 75 | The shared-state mode removes the operational constraints, but also requires
 76 | more sophisticated key management.
 77 | 
 78 | Both modes specify a common format for encoding information in the Retry token,
 79 | so that the server can correctly populate the relevant transport parameter
 80 | fields.
 81 | 
 82 | ## Terminology
 83 | 
 84 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 85 | "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
 86 | interpreted as described in RFC 2119 {{?RFC2119}}.
 87 | 
 88 | In this document, these words will appear with that interpretation only when in
 89 | ALL CAPS.  Lower case uses of these words are not to be interpreted as carrying
 90 | significance described in RFC 2119.
 91 | 
 92 | For brevity, "Connection ID" will often be abbreviated as "CID".
 93 | 
 94 | A "Retry Offload" is a hardware or software device that is conceptually separate
 95 | from a QUIC server that terminates QUIC connections. This document assumes that
 96 | the Retry Offload and the server have an administrative relationship that allows
 97 | them to accept common configuation.
 98 | 
 99 | A "configuration agent" is some entity that determines the common configuration
100 | to be distributed to the servers and the Retry Offload.
101 | 
102 | This document uses "QUIC" to refer to the protocol in QUIC version 1
103 | {{RFC9000}}. Retry offloads can be applied to other versions of QUIC that use
104 | Retry packets and have identical information requirements for Retry validation.
105 | However, note that source and destination connection IDs are the only relevant
106 | data fields that are invariant across QUIC versions {{?RFC8999}}.
107 | 
108 | ## Notation
109 | 
110 | All wire formats will be depicted using the notation defined in Section 1.3 of
111 | {{RFC9000}}.
112 | 
113 | The example below illustrates the basic framework:
114 | 
115 | ~~~
116 | Example Structure {
117 |   One-bit Field (1),
118 |   7-bit Field with Fixed Value (7) = 61,
119 |   Field with Variable-Length Integer (i),
120 |   Arbitrary-Length Field (..),
121 |   Variable-Length Field (8..24),
122 |   Field With Minimum Length (16..),
123 |   Field With Maximum Length (..128),
124 |   [Optional Field (64)],
125 |   Repeated Field (8) ...,
126 | }
127 | ~~~
128 | {: #fig-ex-format title="Example Format"}
129 | 
130 | # Common Requirements {#common-requirements}
131 | 
132 | Regardless of mechanism, a Retry Offload has an active mode, where it is
133 | generating Retry packets, and an inactive mode, where it is not, based on its
134 | assessment of server load and the likelihood an attack is underway. The choice
135 | of mode MAY be made on a per-packet or per-connection basis, through a
136 | stochastic process or based on client address.
137 | 
138 | A configuration agent MUST distribute a list of QUIC versions the Retry Offload
139 | supports. It MAY also distribute either an "Allow-List" or a "Deny-List" of
140 | other QUIC versions. It MUST NOT distribute both an Allow-List and a Deny-List.
141 | 
142 | The Allow-List or Deny-List MUST NOT include any versions included for Retry
143 | Offload support.
144 | 
145 | The Configuration Agent MUST provide a means for the entity that controls the
146 | Retry Offload to report its supported version(s) to the configuration Agent. If
147 | the entity has not reported this information, it MUST NOT activate the Retry
148 | Offload and the configuration agent MUST NOT distribute configuration that
149 | activates it.
150 | 
151 | The configuration agent MAY delete versions from the final supported version
152 | list if policy does not require the Retry Offload to operate on those versions.
153 | 
154 | The configuration Agent MUST provide a means for the entities that control
155 | servers behind the Retry Offload to report either an Allow-List or a Deny-List.
156 | 
157 | If all entities supply Allow-Lists, the consolidated list MUST be the union of
158 | these sets. If all entities supply Deny-Lists, the consolidated list MUST be
159 | the intersection of these sets.
160 | 
161 | If entities provide a mixture of Allow-Lists and Deny-Lists, the consolidated
162 | list MUST be a Deny-List that is the intersection of all provided Deny-Lists and
163 | the inverses of all Allow-Lists.
164 | 
165 | If no entities that control servers have reported Allow-Lists or Deny-Lists,
166 | the default is a Deny-List with the null set (i.e., all unsupported versions
167 | will be admitted). This preserves the future extensibilty of QUIC.
168 | 
169 | A Retry Offload MUST forward all packets for a QUIC version it does not
170 | support that are not on a Deny-List or absent from an Allow-List. Note that if
171 | servers support versions the Retry Offload does not, this may increase load on
172 | the servers.
173 | 
174 | Note that future versions of QUIC might not have Retry packets, require
175 | different information in Retry, or use different packet type indicators.
176 | 
177 | ## Consistent Treatment of Initials
178 | 
179 | Retry Offloads SHOULD treat Initial packets from the same connection with a
180 | uniform policy. Initial packets of the first and second client flight can be
181 | difficult to distinguish without expensive decryption of the contents, which is
182 | unsuitable under the conditions of a DDoS attack. If the first packet of a
183 | connection is admitted without Retry, but the second triggers a Retry, that
184 | Retry packet will be ignored and the loss of an Initial coalesced with other
185 | packets can impair performance. In some situations, the client does not yet have
186 | handshake keys, and dropping further client Initial packets creates a deadlock
187 | where the connection cannot progress.
188 | 
189 | The simplest means to ensure this is to require, when active, a Retry Token
190 | for all incoming Initial packets, and send a Retry packet otherwise. If the
191 | Retry Offload is to be more selective, one technique keeps state on which
192 | address/port 4-tuples have been admitted. Another would be to apply a secure
193 | hash to the source IP address, port, and connection ID to deterministically
194 | compute whether the Initial requires a Retry Token or not. These source
195 | values remain consistent over the handshake.
196 | 
197 | However, even with these techniques there is a potential problem when a Retry
198 | Offload switches from inactive to active mode. The Retry Offload could admit
199 | the first packet while in inactive mode, and then drop subsequent Initials in
200 | active mode.
201 | 
202 | If the Retry Offload is always on-path, it MAY keep state on incoming
203 | connections while in inactive mode to avoid this problem. If it cannot or will
204 | not keep such state, it SHOULD implement "transition mode" for an interval
205 | chosen to include the likely Initial packet exchange of most clients (200ms is a
206 | sensible default).
207 | 
208 | In transition mode, Retry Offloads process Initial packets with Retry tokens
209 | as in active mode. When the Retry Offload receives an Initial packet with no
210 | token, it issues a Retry AND forwards the packet to the server. If the client
211 | has already received a packet from the server, it will ignore the Retry and the
212 | connection will progress normally. If not, the client will reconnect based on
213 | the Retry, the server's response to the first initial will be discarded, and
214 | the connection will progress normally based on the client's second Initial.
215 | {{mid-handshake}} explores the various possible packet sequences in
216 | transition mode.
217 | 
218 | Note that transition mode provides no actual DDoS relief to the server, so its
219 | duration should be as short as possible. The Retry Offload can choose not to
220 | implement transition mode and cause some client connections to fail.
221 | 
222 | Servers operating behind a Retry Offload SHOULD implement a mechanism that
223 | operates whenever a client Initial arrives with a valid Retry token. If there
224 | is another connection with identical client Connection ID, IP, and Port, but
225 | with an unvalidated address, that connection is immediately and silently
226 | terminated. This mechanism eliminates incorrect connection state that is an
227 | artifact of transition mode, as explained in {{mid-handshake}}.
228 | 
229 | ## Considerations for Non-Initial Packets
230 | 
231 | Initial Packets are especially effective at consuming server resources
232 | because they cause the server to create connection state. Even when mitigating
233 | this load with Retry Packets, the act of validating an Initial Token and sending
234 | a Retry Packet is more expensive than the response to a non-Initial packet with
235 | an unknown Connection ID: simply dropping it and/or sending a Stateless Reset.
236 | 
237 | Nevertheless, a Retry Offload in Active Mode might desire to shield servers
238 | from non-Initial packets that do not correspond to a previously admitted
239 | Initial Packet. This has a number of considerations.
240 | 
241 | * If a Retry Offload maintains no per-flow state, it cannot distinguish between
242 | valid and invalid non-Initial packets and MUST forward all non-Initial Packets
243 | to the server.
244 | 
245 | * For QUIC versions the Retry Offload does not support and are present on the
246 | Allow-List (or absent from the Deny-List), the Retry Offload cannot distinguish
247 | Initial Packets from other long headers and therefore MUST admit all long
248 | headers.
249 | 
250 | * If a Retry Offload keeps per-flow state, it can identify 4-tuples that have
251 | been previously approved, admit non-Initial packets from those flows, and
252 | drop all others. However, dropping short headers will effectively break Address
253 | Migration and NAT Rebinding when in Active Mode, as post-migration packets will
254 | arrive with a previously unknown 4-tuple. This policy will also break connection
255 | attempts using any new QUIC versions that begin connections with a short header.
256 | 
257 | * If a Retry Offload is integrated with a QUIC-LB routable load balancer
258 | {{?I-D.ietf-quic-load-balancers}}, it can verify that the Destination Connection
259 | ID is routable, and only admit non-Initial packets with routable DCIDs. As the
260 | Connection ID encoding is invariant across QUIC versions, the Retry Offload can
261 | do this for all short headers.
262 | 
263 | Nothing in this section prevents Retry Offloads from making basic syntax
264 | correctness checks on packets with QUIC versions that it understands (e.g.,
265 | enforcing the Initial Packet datagram size minimum in version 1).
266 | 
267 | # No-Shared-State Retry Offload
268 | 
269 | The no-shared-state Retry Offload requires no coordination, except that the
270 | server must be configured to accept this offload and know which QUIC versions
271 | the Retry Offload supports. The scheme uses the first bit of the token to
272 | distinguish between tokens from Retry packets (codepoint '0') and tokens from
273 | NEW_TOKEN frames (codepoint '1').
274 | 
275 | ## Configuration Agent Actions
276 | 
277 | See {{common-requirements}}.
278 | 
279 | ## Offload Requirements {#nss-offload-requirements}
280 | 
281 | A no-shared-state Retry Offload MUST be present on all paths from potential
282 | clients to the server. These paths MUST fail to pass QUIC traffic should the
283 | offload fail for any reason. That is, if the offload is not operational, the
284 | server MUST NOT be exposed to client traffic. Otherwise, servers that have
285 | already disabled their Retry capability would be vulnerable to attack.
286 | 
287 | The path between offload and server MUST be free of any potential attackers.
288 | Note that this and other requirements above severely restrict the operational
289 | conditions in which a no-shared-state Retry Offload can safely operate.
290 | 
291 | Retry tokens generated by the offload MUST have the format below.
292 | 
293 | ~~~
294 | No-Shared-State Retry Offload Token {
295 |   Token Type (1) = 0,
296 |   ODCIL (7) = 8..20,
297 |   Original Destination Connection ID (64..160),
298 |   Opaque Data (..),
299 | }
300 | ~~~
301 | {: #nss-retry-offload-token-format title="Format of non-shared-state Retry Offload tokens"}
302 | 
303 | The first bit of retry tokens generated by the offload MUST be zero. The token
304 | has the following additional fields:
305 | 
306 | ODCIL: The length of the original destination connection ID from the triggering
307 | Initial packet. This is in cleartext to be readable for the server, but
308 | authenticated later in the token. The Retry Offload SHOULD reject any token
309 | in which the value is less than 8.
310 | 
311 | Original Destination Connection ID: This also in cleartext and authenticated
312 | later.
313 | 
314 | Opaque Data: This data contains the information necessary to authenticate the
315 | Retry token in accordance with the QUIC specification. A straightforward
316 | implementation would encode the Retry Source Connection ID, client IP address,
317 | and a timestamp in the Opaque Data. A more space-efficient implementation would
318 | use the Retry Source Connection ID and Client IP as associated data in an
319 | encryption operation, and encode only the timestamp and the authentication tag
320 | in the Opaque Data. If the Initial packet alters the Connection ID or source IP
321 | address, authentication of the token will fail.
322 | 
323 | Upon receipt of an Initial packet with a token that begins with '0', the Retry
324 | Offload MUST validate the token in accordance with the QUIC specification.
325 | 
326 | In active mode, the offload MUST issue Retry packets for all client Initial
327 | packets that contain no token, or a token that has the first bit set to '1'. It
328 | MUST NOT forward the packet to the server. The offload MUST validate all tokens
329 | with the first bit set to '0'. If successful, the offload MUST forward the
330 | packet with the token intact. If unsuccessful, it MUST drop the packet. The
331 | Retry Offload MAY send an Initial Packet containing a CONNECTION_CLOSE frame
332 | with the INVALID_TOKEN error code when dropping the packet.
333 | 
334 | Note that this scheme has a performance drawback. When the Retry Offload is in
335 | active mode, clients with a token from a NEW_TOKEN frame will suffer a 1-RTT
336 | penalty even though its token provides proof of address.
337 | 
338 | In inactive mode, the offload MUST forward all packets that have no token or a
339 | token with the first bit set to '1'. It MUST validate all tokens with the first
340 | bit set to '0'. If successful, the offload MUST forward the packet with the
341 | token intact. If unsuccessful, it MUST drop the packet.
342 | 
343 | ## Server Requirements
344 | 
345 | A server behind a non-shared-state Retry Offload MUST NOT send Retry packets
346 | for a QUIC version the Retry Offload understands. It MAY send Retry for QUIC
347 | versions the Retry Offload does not understand.
348 | 
349 | Tokens sent in NEW_TOKEN frames MUST have the first bit set to '1'.
350 | 
351 | If a server receives an Initial Packet with the first bit in the token set to
352 | '1', it could be from a server-generated NEW_TOKEN frame and should be processed
353 | in accordance with the QUIC specification. If a server receives an Initial
354 | Packet with the first bit to '0', it is a Retry token and the server MUST NOT
355 | attempt to validate it. Instead, it MUST assume the address is validated, MUST
356 | include the packet's Destination Connection ID in a Retry Source Connection ID
357 | transport parameter, and MUST extract the Original Destination Connection ID
358 | from the token cleartext for use in the transport parameter of the same name.
359 | 
360 | # Shared-State Retry Offload {#shared-state-retry}
361 | 
362 | A shared-state Retry Offload uses a shared key, so that the server can decode
363 | the offload's retry tokens. It does not require that all traffic pass through
364 | the Retry Offload, so servers MAY send Retry packets in response to Initial
365 | packets without a valid token.
366 | 
367 | Both server and offload MUST have time synchronized within two seconds of each
368 | other to prevent tokens being incorrectly marked as expired.
369 | 
370 | The tokens are protected using AES128-GCM AEAD, as explained in
371 | {{token-protection-with-aead}}. All tokens, generated by either the server or
372 | Retry Offload, MUST use the following format, which includes:
373 | 
374 | - A 1 bit token type identifier.
375 | - A 7 bit token key identifier.
376 | - A 96 bit unique token number transmitted in clear text, but protected as part
377 | of the AEAD associated data.
378 | - A token body, encoding the Original Destination Connection ID and the
379 | Timestamp, optionally followed by server specific Opaque Data.
380 | 
381 | The token protection uses an 128 bit representation of the source IP address
382 | from the triggering Initial packet.  The client IP address is 16 octets. If an
383 | IPv4 address, the last 12 octets are zeroes. It also uses the Source Connection
384 | ID of the Retry packet, which will cause an authentication failure if it
385 | differs from the Destination Connection ID of the packet bearing the token.
386 | 
387 | If there is a Network Address Translator (NAT) in the server infrastructure that
388 | changes the client IP, the Retry Offload MUST either be positioned behind the
389 | NAT, or the NAT must have the token key to rewrite the Retry token accordingly.
390 | Note also that a host that obtains a token through a NAT and then attempts to
391 | connect over a path that does not have an identically configured NAT will fail
392 | address validation.
393 | 
394 | The 96 bit unique token number is set to a random value using a
395 | cryptography-grade random number generator.
396 | 
397 | The token key identifier and the corresponding AEAD key and AEAD IV are
398 | provisioned by the configuration agent.
399 | 
400 | The token body is encoded as follows:
401 | 
402 | ~~~
403 | Shared-State Retry Offload Token Body {
404 |    Timestamp (64),
405 |    [ODCIL (8) = 8..20],
406 |    [Original Destination Connection ID (64..160)],
407 |    [Port (16)],
408 |    Opaque Data (..),
409 | }
410 | ~~~
411 | {: #ss-retry-offload-token-body title="Body of shared-state Retry Offload tokens"}
412 | The token body has the following fields:
413 | 
414 | Timestamp: The Timestamp is a 64-bit integer, in network order, that expresses
415 | the expiration time of the token as a number of seconds in POSIX time (see Sec.
416 | 4.16 of {{TIME_T}}).
417 | 
418 | ODCIL: The original destination connection ID length. Tokens in NEW_TOKEN frames
419 | do not have this field.
420 | 
421 | Original Destination Connection ID: The server or Retry Offload copies this
422 | from the field in the client Initial packet. Tokens in NEW_TOKEN frames do not
423 | have this field.
424 | 
425 | Port: The Source Port of the UDP datagram that triggered the Retry packet.
426 | This field MUST be present if and only if the ODCIL is greater than zero. This
427 | field is therefore always absent in tokens in NEW_TOKEN frames.
428 | 
429 | Opaque Data: The server may use this field to encode additional information,
430 | such as congestion window, RTT, or MTU. The Retry Offload MUST have zero-length
431 | opaque data.
432 | 
433 | Some implementations of QUIC encode in the token the Initial Packet Number used
434 | by the client, in order to verify that the client sends the retried Initial
435 | with a PN larger that the triggering Initial. Such implementations will encode
436 | the Initial Packet Number as part of the opaque data. As tokens may be
437 | generated by the Service, servers MUST NOT reject tokens because they lack
438 | opaque data and therefore the packet number.
439 | 
440 | Shared-state Retry Offloads use the AES-128-ECB cipher. Future standards could
441 | add new algorithms that use other ciphers to provide cryptographic agility in
442 | accordance with {{?RFC7696}}. Retry Offload and server implementations SHOULD be
443 | extensible to support new algorithms.
444 | 
445 | ### Token Protection with AEAD {#token-protection-with-aead}
446 | 
447 | On the wire, the token is presented as:
448 | 
449 | ~~~
450 | Shared-State Retry Offload Token {
451 |   Token Type (1),
452 |   Key Sequence (7),
453 |   Unique Token Number (96),
454 |   Encrypted Shared-State Retry Offload Token Body (64..),
455 |   AEAD Integrity Check Value (128),
456 | }
457 | ~~~
458 | {: #ss-retry-offload-token-wire-image title="Wire image of shared-state Retry Offload tokens"}
459 | 
460 | The tokens are protected using AES128-GCM as follows:
461 | 
462 | * The Key Sequence is the 7 bit identifier to retrieve the token key and IV.
463 | 
464 | * The AEAD IV, is 96 bits generated by the configuration agent.
465 | 
466 | * The AEAD nonce, N, is formed by XORing the AEAD IV with the 96 bit unique
467 | token number.
468 | 
469 | * The associated data is a formatted as a pseudo header by combining the
470 | cleartext part of the token with the IP address of the client. The format of
471 | the pseudoheader depends on whether the Token Type bit is '1' (a NEW_TOKEN
472 | token) or '0' (a Retry token).
473 | 
474 | ~~~
475 | Shared-State Retry Offload Token Pseudoheader {
476 |   IP Address (128),
477 |   Token Type (1),
478 |   Key Sequence (7),
479 |   Unique Token Number (96),
480 |   [RSCIL (8)],
481 |   [Retry Source Connection ID (0..20)],
482 | }
483 | ~~~
484 | {: #ss-retry-offload-token-pseudoheader title="Psuedoheader for shared-state Retry Offload tokens"}
485 | 
486 | RSCIL: The Retry Source Connection ID Length in octets. This field is only
487 | present when the Token Type is '0'.
488 | 
489 | Retry Source Connection ID: To create a Retry Token, populate this field with
490 | the Source Connection ID the Retry packet will use. To validate a Retry token,
491 | populate it with the Destination Connection ID of the Initial packet that
492 | carries the token. This field is only present when the Token Type is '0'.
493 | 
494 | * The input plaintext for the AEAD is the token body. The output ciphertext of
495 | the AEAD is transmitted in place of the token body.
496 | * The AEAD Integrity Check Value(ICV), defined in Section 6 of {{?RFC4106}}, is
497 | computed as part of the AEAD encryption process, and is verified during
498 | decryption.
499 | 
500 | ## Configuration Agent Actions
501 | 
502 | The configuration agent generates and distributes a "token key", a "token IV",
503 | a key sequence, and the information described in {{common-requirements}}.
504 | 
505 | ## Offload Requirements {#ss-offload}
506 | 
507 | In inactive mode, the Retry Offload forwards all packets without further
508 | inspection or processing. The rest of this section only applies to a offload in
509 | active mode.
510 | 
511 | Retry Offloads MUST NOT issue Retry packets except where explicitly allowed
512 | below, to avoid sending a Retry packet in response to a Retry token.
513 | 
514 | The offload MUST generate Retry tokens with the format described above when it
515 | receives a client Initial packet with no token.
516 | 
517 | If there is a token of either type, the offload MUST attempt to decrypt it.
518 | 
519 | To decrypt a packet, the offload checks the Token Type and constructs a
520 | pseudoheader with the appropriate format for that type, using the bearing
521 | packet's Destination Connection ID to populate the Retry Source Connection ID
522 | field, if any.
523 | 
524 | A token is invalid if:
525 | 
526 | * it uses an unknown key sequence,
527 | 
528 | * the AEAD ICV does not match the expected value (By construction, it will only
529 | match if the client IP Address, and any Retry Source Connection ID, also
530 | matches),
531 | 
532 | * the ODCIL, if present, is invalid for a client-generated CID (less than 8 or
533 | more than 20 in QUIC version 1),
534 | 
535 | * the Timestamp of a token points to time in the past (however, in order to
536 | allow for clock skew, it SHOULD NOT consider tokens to be expired if the
537 | Timestamp encodes less than two seconds in the past), or
538 | 
539 | * the port number, if present, does not match the source port in the
540 | encapsulating UDP header.
541 | 
542 | Packets with valid tokens MUST be forwarded to the server.
543 | 
544 | The offload MUST drop packets with invalid tokens. If the token is of type '1'
545 | (NEW_TOKEN), it MUST respond with a Retry packet. If of type '0', it MUST NOT
546 | respond with a Retry packet.
547 | 
548 | ## Server Requirements
549 | 
550 | The server MAY issue Retry or NEW_TOKEN tokens in accordance with {{RFC9000}}.
551 | When doing so, it MUST follow the format above.
552 | 
553 | The server MUST validate all tokens that arrive in Initial packets, as they may
554 | have bypassed the Retry Offload. It determines validity using the procedure
555 | in {{ss-offload}}.
556 | 
557 | If a valid Retry token, the server populates the
558 | original_destination_connection_id transport parameter using the
559 | corresponding token field. It populates the retry_source_connection_id transport
560 | parameter with the Destination Connection ID of the packet bearing the token.
561 | 
562 | In all other respects, the server processes both valid and invalid tokens in
563 | accordance with {{RFC9000}}.
564 | 
565 | For QUIC versions the offload does not support, the server MAY use any token
566 | format.
567 | 
568 | # Security Considerations {#security-considerations}
569 | 
570 | ## Shared-State Retry Keys
571 | 
572 | The Shared-State Retry Offload defined in {{shared-state-retry}} describes the
573 | format of retry tokens or new tokens protected and encrypted using AES128-GCM.
574 | Each token includes a 96 bit randomly generated unique token number, and an 8
575 | bit identifier used to get the AES-GCM encryption context. The AES-GCM
576 | encryption context contains a 128 bit key and an AEAD IV. There are three
577 | important security considerations for these tokens:
578 | 
579 | * An attacker that obtains a copy of the encryption key will be able to decrypt
580 |   and forge tokens.
581 | 
582 | * Attackers may be able to retrieve the key if they capture a sufficently large
583 |   number of retry tokens encrypted with a given key.
584 | 
585 | * Confidentiality of the token data will fail if separate tokens reuse the
586 |   same 96 bit unique token number and the same key.
587 | 
588 | To protect against disclosure of keys to attackers, offload and servers MUST
589 | ensure that the keys are stored securely. To limit the consequences of potential
590 | exposures, the lifetime of any given key should be limited.
591 | 
592 | Section 6.6 of {{?RFC9001}} states that "Endpoints MUST count the number of
593 | encrypted packets for each set of keys. If the total number of encrypted packets
594 | with the same key exceeds the confidentiality limit for the selected AEAD, the
595 | endpoint MUST stop using those keys." It goes on with the specific limit: "For
596 | AEAD_AES_128_GCM and AEAD_AES_256_GCM, the confidentiality limit is 2^23
597 | encrypted packets; see Appendix B.1." It is prudent to adopt the same limit
598 | here, and configure the offload in such a way that no more than 2^23 tokens are
599 | generated with the same key.
600 | 
601 | In order to protect against collisions, the 96 bit unique token numbers should
602 | be generated using a cryptographically secure pseudorandom number generator
603 | (CSPRNG), as specified in Appendix C.1 of the TLS 1.3 specification
604 | {{!RFC8446}}. With proper random numbers, if fewer than 2^40 tokens are
605 | generated with a single key, the risk of collisions is lower than 0.001%.
606 | 
607 | # IANA Considerations
608 | 
609 | There are no IANA requirements.
610 | 
611 | --- back
612 | 
613 | # Retry Offload YANG Model {#yang-model}
614 | 
615 | These YANG models conform to {{?RFC6020}} and express a complete Retry Offload
616 | configuration.
617 | 
618 | ~~~
619 | module ietf-retry-offload {
620 |   yang-version "1.1";
621 |   namespace "urn:ietf:params:xml:ns:yang:ietf-quic-lb";
622 |   prefix "quic-lb";
623 | 
624 |   import ietf-yang-types {
625 |     prefix yang;
626 |     reference
627 |       "RFC 6991: Common YANG Data Types.";
628 |   }
629 | 
630 |   import ietf-inet-types {
631 |     prefix inet;
632 |     reference
633 |       "RFC 6991: Common YANG Data Types.";
634 |   }
635 | 
636 |   organization
637 |     "IETF QUIC Working Group";
638 | 
639 |   contact
640 |     "WG Web:   <http://datatracker.ietf.org/wg/quic>
641 |      WG List:  <quic@ietf.org>
642 | 
643 |      Authors: Martin Duke (martin.h.duke at gmail dot com)
644 |               Nick Banks (nibanks at microsoft dot com)
645 |               Christian Huitema (huitema at huitema.net)";
646 | 
647 |   description
648 |     "This module enables the explicit cooperation of QUIC servers
649 |      with offloads that generate Retry packets on their behalf.
650 | 
651 |      Copyright (c) 2022 IETF Trust and the persons identified as
652 |      authors of the code.  All rights reserved.
653 | 
654 |      Redistribution and use in source and binary forms, with or
655 |      without modification, is permitted pursuant to, and subject to
656 |      the license terms contained in, the Simplified BSD License set
657 |      forth in Section 4.c of the IETF Trust's Legal Provisions
658 |      Relating to IETF Documents
659 |      (https://trustee.ietf.org/license-info).
660 | 
661 |      This version of this YANG module is part of RFC XXXX
662 |      (https://www.rfc-editor.org/info/rfcXXXX); see the RFC itself
663 |      for full legal notices.
664 | 
665 |      The key words 'MUST', 'MUST NOT', 'REQUIRED', 'SHALL', 'SHALL
666 |      NOT', 'SHOULD', 'SHOULD NOT', 'RECOMMENDED', 'NOT RECOMMENDED',
667 |      'MAY', and 'OPTIONAL' in this document are to be interpreted as
668 |      described in BCP 14 (RFC 2119) (RFC 8174) when, and only when,
669 |      they appear in all capitals, as shown here.";
670 | 
671 |   revision "2022-02-11" {
672 |     description
673 |       "Initial version";
674 |     reference
675 |       "RFC XXXX, QUIC Retry Offloads";
676 |   }
677 | 
678 |   container retry-offload-config {
679 |     description
680 |       "Configuration of Retry Offload. If supported-versions is empty,
681 |        there is no Retry Offload. If token-keys is empty, it uses the
682 |        non-shared-state offload. If present, it uses shared-state
683 |        tokens.";
684 | 
685 |     leaf-list supported-versions {
686 |       type uint32;
687 |       description
688 |         "QUIC versions that the Retry Offload supports. If empty,
689 |          there is no Retry Offload.";
690 |     }
691 | 
692 |     leaf unsupported-version-default {
693 |       type enumeration {
694 |         enum allow {
695 |           description "Unsupported versions admitted by default";
696 |         }
697 |         enum deny {
698 |           description "Unsupported versions denied by default";
699 |         }
700 |       }
701 |       default allow;
702 |       description
703 |         "Are unsupported versions not in version-exceptions allowed
704 |          or denied?";
705 |     }
706 | 
707 |     leaf-list version-exceptions {
708 |       type uint32;
709 |       description
710 |         "Exceptions to the default-deny or default-allow rule.";
711 |     }
712 | 
713 |     list token-keys {
714 |       key "key-sequence-number";
715 |       description
716 |         "list of active keys, for key rotation purposes. Existence
717 |          implies shared-state format";
718 | 
719 |       leaf key-sequence-number {
720 |         type uint8 {
721 |           range "0..127";
722 |         }
723 |         mandatory true;
724 |         description
725 |           "Identifies the key used to encrypt the token";
726 |         }
727 | 
728 |       leaf token-key {
729 |         type retry-offload-key;
730 |         mandatory true;
731 |         description
732 |           "16-byte key to encrypt the token";
733 |       }
734 | 
735 |       leaf token-iv {
736 |         type yang:hex-string {
737 |           length 23;
738 |         }
739 |         mandatory true;
740 |         description
741 |           "8-byte IV to encrypt the token, encoded in 23 bytes";
742 |       }
743 |     }
744 |   }
745 | }
746 | ~~~
747 | 
748 | ## Tree Diagram
749 | 
750 | This summary of the YANG models uses the notation in {{?RFC8340}}.
751 | 
752 | ~~~
753 | module: retry-offload-config
754 |   +--rw retry-offload-config
755 |      +--rw supported-versions*            uint32
756 |      +--rw unsupported-version-default?   enumeration
757 |      +--rw version-exceptions*            uint32
758 |      +--rw token-keys* [key-sequence-number]
759 |         +--rw key-sequence-number    uint8
760 |         +--rw token-key              quic-lb-key
761 |         +--rw token-iv               yang:hex-string
762 | ~~~
763 | 
764 | ## Shared State Retry Token Test Vectors
765 | 
766 | In this case, the shared-state retry token is issued by Retry Offload, so the
767 | opaque data of shared-state retry token body would be null
768 | ({{shared-state-retry}}).
769 | 
770 | ~~~
771 | Configuration:
772 | key_seq 0x00
773 | encrypt_key 0x30313233343536373839303132333435
774 | AEAD_IV 0x313233343536373839303132
775 | 
776 | Shared-State Retry Offload Token Body:
777 | ODCIL 0x12
778 | RSCIL 0x10
779 | port 0x1a0a
780 | original_destination_connection_id 0x0c3817b544ca1c94313bba41757547eec937
781 | retry_source_connection_id 0x0301e770d24b3b13070dd5c2a9264307
782 | timestamp 0x0000000060c7bf4d
783 | 
784 | Shared-State Retry Offload Token:
785 | unique_token_number 0x59ef316b70575e793e1a8782
786 | key_sequence 0x00
787 | encrypted_shared_state_retry_offload_token_body
788 | 0x7d38b274aa4427c7a1557c3fa666945931defc65da387a83855196a7cb73caac1e28e5346fd76868de94f8b62294
789 | AEAD_ICV 0xf91174fdd711543a32d5e959867f9c22
790 | 
791 | AEAD related parameters:
792 | client_ip_addr 127.0.0.1
793 | client_port 6666
794 | AEAD_nonce 0x68dd025f45616941072ab6b0
795 | AEAD_associated_data 0x7f00000100000000000000000000000059ef316b70575e793e1a878200
796 | ~~~
797 | 
798 | # Transition Mode Scenarios {#mid-handshake}
799 | 
800 | The logic motivating transition mode behavior involves detailed reasoning about
801 | endpoint behavior during the handshake. This non-normative appendix walks
802 | through the scenarios.
803 | 
804 | Dropping Initial packets in the client's second flight can cause performance
805 | problems or deadlocks. In the case where the client and server first flight end
806 | with both sides having handshake keys, there will generally be no impact on
807 | performance. However, if an Initial ACK is critical to progress, as it can be in
808 | the case of multiple-packet TLS messages, Hello Retry Requests, and similar
809 | cases, dropping subsequent Initial ACKs results in deadlock.
810 | 
811 | In transition mode, the Retry Offload forwards Initials with no token while also
812 | generating a Retry. This allows handshakes to progress without further incident.
813 | 
814 | ## Handshakes in Progress
815 | 
816 | If the client hello was admitted in inactive mode, then the client has already
817 | received a packet from the server. Although subsequent client Initial packets
818 | will trigger a Retry, the client will ignore these packets. Those Initials will
819 | also be processed by the server to continue the handshake.
820 | 
821 | ## New Connections
822 | 
823 | After sending a Client Hello in Initial Packet A, a client will rapidly receive
824 | a Retry Packet from the Offload and attempt to reconnect accordingly with
825 | Initial Packet B.
826 | 
827 | The client will discard any server response to Initial A. If a Retry, it is a
828 | second Retry on the connection. If an Initial, its is encrypted with keys
829 | derived from Initial A, which have already been discarded, and will be a
830 | decryption failure.
831 | 
832 | Initial B's destination connection ID will be new, so the server will process
833 | it as a new connection and proceed normally.
834 | 
835 | Unfortunately, the server connection state initiated by Initial A will remain.
836 | For this reason, this document suggests that servers silently terminate the
837 | older connection. Requiring the address to be validated avoids cases where an
838 | attacker simply replays a client Initial with a new Destination Connection ID
839 | to terminate a valid connection.
840 | 
841 | Note that there are corner cases involving further packet loss that result in
842 | connection timeout. For instance, if the Retry Offload's response to Initial A
843 | is lost, then the connection will proceed based on Initial A. If the Retry
844 | Offload then switches from transition mode to active mode before the client's
845 | second flight arrives, the Retry Offload will drop the Initial packet in that
846 | flight, and the connection might deadlock.
847 | 
848 | # Acknowledgments
849 | 
850 | Christian Huitema, Ling Tao Nju, and William Zeng Ke all provided useful input
851 | to this document.
852 | 
853 | # Change Log
854 | 
855 | > **RFC Editor's Note:**  Please remove this section prior to
856 | > publication of a final version of this document.
857 | 
858 | ## since draft-duke-quic-retry-offload-00
859 | - Converted to adopted IETF draft
860 | - Cleaner transition from inactive to active mode
861 | 
862 | ## since draft-ietf-quic-load-balancers-12
863 | - Separated from the QUIC-LB draft
864 | - Renamed "Retry Service" to "Retry Offload"
865 | 
866 | ## since draft-ietf-quic-load-balancers-11
867 | 
868 | - Fixed mistakes in test vectors
869 | 
870 | ## since draft-ietf-quic-load-balancers-10
871 | 
872 | - Refactored algorithm descriptions; made the 4-pass algorithm easier to
873 | implement
874 | - Revised test vectors
875 | - Split YANG model into a server and middlebox version
876 | 
877 | ## since draft-ietf-quic-load-balancers-09
878 | - Renamed "Stream Cipher" and "Block Cipher" to "Encrypted Short" and
879 | "Encrypted Long"
880 | - Added section on per-connection state
881 | - Changed "Encrypted Short" to a 4-pass algorithm.
882 | - Recommended a random initial nonce when incrementing.
883 | - Clarified what SNI LBs should do with unknown QUIC versions.
884 | 
885 | ## since draft-ietf-quic-load-balancers-08
886 | - Eliminate Dynamic SID allocation
887 | - Eliminated server use bytes
888 | 
889 | ## since draft-ietf-quic-load-balancers-07
890 | - Shortened SSCID nonce minimum length to 4 bytes
891 | - Removed RSCID from Retry token body
892 | - Simplified CID formats
893 | - Shrunk size of SID table
894 | 
895 | ## since draft-ietf-quic-load-balancers-06
896 | - Added interoperability with DTLS
897 | - Changed "non-compliant" to "unroutable"
898 | - Changed "arbitrary" algorithm to "fallback"
899 | - Revised security considerations for mistrustful tenants
900 | - Added Retry Offload considerations for non-Initial packets
901 | 
902 | ## since draft-ietf-quic-load-balancers-05
903 | - Added low-config CID for further discussion
904 | - Complete revision of shared-state Retry Token
905 | - Added YANG model
906 | - Updated configuration limits to ensure CID entropy
907 | - Switched to notation from quic-transport
908 | 
909 | ## since draft-ietf-quic-load-balancers-04
910 | - Rearranged the shared-state retry token to simplify token processing
911 | - More compact timestamp in shared-state retry token
912 | - Revised server requirements for shared-state retries
913 | - Eliminated zero padding from the test vectors
914 | - Added server use bytes to the test vectors
915 | - Additional compliant DCID criteria
916 | 
917 | ## since-draft-ietf-quic-load-balancers-03
918 | - Improved Config Rotation text
919 | - Added stream cipher test vectors
920 | - Deleted the Obfuscated CID algorithm
921 | 
922 | ## since-draft-ietf-quic-load-balancers-02
923 | - Replaced stream cipher algorithm with three-pass version
924 | - Updated Retry format to encode info for required TPs
925 | - Added discussion of version invariance
926 | - Cleaned up text about config rotation
927 | - Added Reset Oracle and limited configuration considerations
928 | - Allow dropped long-header packets for known QUIC versions
929 | 
930 | ## since-draft-ietf-quic-load-balancers-01
931 | - Test vectors for load balancer decoding
932 | - Deleted remnants of in-band protocol
933 | - Light edit of Retry Offloads section
934 | - Discussed load balancer chains
935 | 
936 | ## since-draft-ietf-quic-load-balancers-00
937 | - Removed in-band protocol from the document
938 | 
939 | ## Since draft-duke-quic-load-balancers-06
940 | - Switch to IETF WG draft.
941 | 
942 | ## Since draft-duke-quic-load-balancers-05
943 | - Editorial changes
944 | - Made load balancer behavior independent of QUIC version
945 | - Got rid of token in stream cipher encoding, because server might not have it
946 | - Defined "non-compliant DCID" and specified rules for handling them.
947 | - Added psuedocode for config schema
948 | 
949 | ## Since draft-duke-quic-load-balancers-04
950 | - Added standard for Retry Offloads
951 | 
952 | ## Since draft-duke-quic-load-balancers-03
953 | - Renamed Plaintext CID algorithm as Obfuscated CID
954 | - Added new Plaintext CID algorithm
955 | - Updated to allow 20B CIDs
956 | - Added self-encoding of CID length
957 | 
958 | ## Since draft-duke-quic-load-balancers-02
959 | - Added Config Rotation
960 | - Added failover mode
961 | - Tweaks to existing CID algorithms
962 | - Added Block Cipher CID algorithm
963 | - Reformatted QUIC-LB packets
964 | 
965 | ## Since draft-duke-quic-load-balancers-01
966 | - Complete rewrite
967 | - Supports multiple security levels
968 | - Lightweight messages
969 | 
970 | ## Since draft-duke-quic-load-balancers-00
971 | - Converted to markdown
972 | - Added variable length connection IDs
973 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "aasvg": "^0.3.3"
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/quic_lb_protocol.md:
--------------------------------------------------------------------------------
  1 | NOTE: This file describes the deleted in-band QUIC-LB protocol, should we ever
  2 | revise a form of it.
  3 | 
  4 | # Protocol Description {#protocol-description}
  5 | 
  6 | There are multiple means of configuration that correspond to differing
  7 | deployment models and increasing levels of concern about the security of the
  8 | load balancer-server path.
  9 | 
 10 | ## Out of band sharing
 11 | 
 12 | When there are concerns about the integrity of the path between load balancer
 13 | and server, operators MAY share routing information using an out-of-band
 14 | technique, which is out of the scope of this specification.
 15 | 
 16 | To simplify configuration, the global parameters can be shared out-of-band,
 17 | while the load balancer sends the unique server IDs via the truncated message
 18 | formats presented below.
 19 | 
 20 | ## QUIC-LB Message Exchange
 21 | 
 22 | QUIC-LB load balancers and servers exchange messages via the QUIC-LBv1 protocol,
 23 | which uses the QUIC invariants with version number 0xF1000000. The QUIC-LB
 24 | load balancers send the encoding parameters to servers and periodically
 25 | retransmit until that server responds with an acknowledgement. Specifics of this
 26 | retransmission are implementation-dependent.
 27 | 
 28 | ## QUIC-LB Packet {#quic-lb-packet}
 29 | 
 30 | A QUIC-LB packet uses a long header.  It carries configuration information from
 31 | the load balancer and acknowledgements from the servers.  They are sent when a
 32 | load balancer boots up, detects a new server in the pool or needs to update the
 33 | server configuration.
 34 | 
 35 | ~~~~~
 36 | 0                   1                   2                   3
 37 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 38 | +-+-+-+-+-+-+-+-+
 39 | |1|C R| Reserved|
 40 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 41 | |                        Version (32)                           |
 42 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 43 | |  0x00 | 0x00  |
 44 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 45 | |                                                               |
 46 | +                  Authentication Token (64)                    +
 47 | |                                                               |
 48 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 49 | | Message Type  |
 50 | +-+-+-+-+-+-+-+-+
 51 | ~~~~~
 52 | {: #quic-lb-packet-format title="QUIC-LB Packet Format"}
 53 | 
 54 | The Version field allows QUIC-LB to use the Version Negotiation mechanism.  All
 55 | messages in this specification are specific to QUIC-LBv1.  It should be set to
 56 | 0xF1000000.
 57 | 
 58 | Load balancers MUST cease sending QUIC-LB packets of this version to a server
 59 | when that server sends a Version Negotiation packet that does not advertise the
 60 | version.
 61 | 
 62 | The length of the DCIL and SCIL fields are 0x00.
 63 | 
 64 | CR
 65 | 
 66 | : The 2-bit CR field indicates the Config Rotation described in
 67 |   {{config-rotation}}.
 68 | 
 69 | Authentication Token
 70 | 
 71 | : The Authentication Token is an 8-byte field that both entities obtain at
 72 |   configuration time. It is used to verify that the sender is not an inside
 73 |   off-path attacker. Servers and load balancers SHOULD silently discard QUIC-LB
 74 |   packets with an incorrect token.
 75 | 
 76 | Message Type
 77 | 
 78 | : The Message Type indicates the type of message payload that follows the
 79 |   QUIC-LB header.
 80 | 
 81 | ## Message Types and Formats
 82 | 
 83 | As described in {{quic-lb-packet}}, QUIC-LB packets contain a single message.
 84 | This section describes the format and semantics of the QUIC-LB message types.
 85 | 
 86 | ### ACK_LB Message {#message-ack-lb}
 87 | 
 88 | A server uses the ACK_LB message (type=0x00) to acknowledge a QUIC-LB packet
 89 | received from the load balancer.  The ACK-LB message has no additional payload
 90 | beyond the QUIC-LB packet header.
 91 | 
 92 | Load balancers SHOULD continue to retransmit a QUIC-LB packet until a valid
 93 | ACK_LB message, FAIL message or Version Negotiation Packet is received from the
 94 | server.
 95 | 
 96 | ### FAIL Message {#message-fail}
 97 | 
 98 | A server uses the FAIL message (type=0x01) to indicate the configuration
 99 | received from the load balancer is unsupported.
100 | 
101 | ~~~~~
102 | 0                   1                   2                   3
103 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
104 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105 | |   Supp. Type  |  Supp. Type   |  ...
106 | +-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
107 | ~~~~~
108 | 
109 | Servers MUST send a FAIL message upon receipt of a message type which they do
110 | not support, or if they do not possess all of the implied out-of-band
111 | configuration to support a particular message type.
112 | 
113 | The payload of the FAIL message consists of a list of all the message types
114 | supported by the server.
115 | 
116 | Upon receipt of a FAIL message, Load Balancers MUST either send a QUIC-LB
117 | message the server supports or remove the server from the server pool.
118 | 
119 | ### ROUTING_INFO Message {#message-routing-info}
120 | 
121 | A load balancer uses the ROUTING_INFO message (type=0x02) to exchange all the
122 | parameters for the Obfuscated CID algorithm.
123 | 
124 | ~~~~~
125 | 0                   1                   2                   3
126 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
127 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
128 | |                                                               |
129 | +                                                               +
130 | |                                                               |
131 | +                       Routing Bit Mask (152)                  +
132 | |                                                               |
133 | +                                                               +
134 | |                                                               |
135 | +                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
136 | |                               |         Modulus (16)          |
137 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
138 | |         Divisor (16)          |
139 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
140 | ~~~~~
141 | 
142 | Routing Bit Mask
143 | 
144 | : The Routing Bit Mask encodes a '1' at every bit position in the server
145 |  connection ID that will encode routing information.
146 | 
147 | These bits, along with the Modulus and Divisor,  are chosen by the load balancer
148 | as described in {{obfuscated-cid-algorithm}}.
149 | 
150 | ### STREAM_CID Message {#message-stream-cid}
151 | 
152 | A load balancer uses the STREAM_CID message (type=0x03) to exchange all the
153 | parameters for using Stream Cipher CIDs.
154 | 
155 | ~~~~~
156 | 0                   1                   2                   3
157 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
158 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
159 | | Nonce Len (8) |    SIDL (8)   |
160 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
161 | |                       Server ID (variable)                    |
162 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
163 | |                                                               |
164 | +                             Key (128)                         +
165 | |                                                               |
166 | +                                                               +
167 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
168 | ~~~~~
169 | {: #Stream-cid-format title="Stream CID Payload"}
170 | 
171 | Nonce Len
172 | 
173 | : The Nonce Len field is a one-octet unsigned integer that describes the
174 |   nonce length necessary to use this routing algorithm, in octets.
175 | 
176 | SIDL
177 | 
178 | : The SIDL field is a one-octet unsigned integer that describes the server ID
179 |   length necessary to use this routing algorithm, in octets.
180 | 
181 | Server ID
182 | 
183 | : The Server ID is the unique value assigned to the receiving server. Its
184 |   length is determined by the SIDL field.
185 | 
186 | Key
187 | 
188 | : The Key is an 16-octet field that contains the key that the load balancer
189 |   will use to decrypt server IDs on QUIC packets.  See
190 |   {{security-considerations}} to understand why sending keys in plaintext may
191 |   be a safe strategy.
192 | 
193 | ### BLOCK_CID Message {#message-block-cid}
194 | 
195 | A load balancer uses the BLOCK_CID message (type=0x04) to exchange all the
196 | parameters for using Stream Cipher CIDs.
197 | 
198 | ~~~~~
199 | 0                   1                   2                   3
200 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
201 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
202 | |   ZP Len (8)  |    SIDL (8)   |
203 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
204 | |                       Server ID (variable)                    |
205 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
206 | |                                                               |
207 | +                             Key (128)                         +
208 | |                                                               |
209 | +                                                               +
210 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
211 | ~~~~~
212 | {: #block-cid-format title="Block CID Payload"}
213 | 
214 | ZP Len
215 | 
216 | : The ZP Len field is a one-octet unsigned integer that describes the
217 |   zero-padding length necessary to use this routing algorithm, in octets.
218 | 
219 | SIDL
220 | 
221 | : The SIDL field is a one-octet unsigned integer that describes the server ID
222 |   length necessary to use this routing algorithm, in octets.
223 | 
224 | Server ID
225 | 
226 | : The Server ID is the unique value assigned to the receiving server. Its
227 |   length is determined by the SIDL field.
228 | 
229 | Key
230 | 
231 | : The Key is an 16-octet field that contains the key that the load balancer
232 |   will use to decrypt server IDs on QUIC packets.  See
233 |   {{security-considerations}} to understand why sending keys in plaintext may
234 |   be a safe strategy.
235 | 
236 | ### SERVER_ID Message {#message-server-id}
237 | 
238 | A load balancer uses the SERVER_ID message (type=0x05) to exchange
239 | explicit server IDs.
240 | 
241 | ~~~~~
242 | 0                   1                   2                   3
243 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
244 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
245 | |    SIDL (8)   |       Server ID (variable)    |
246 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
247 | ~~~~~
248 | 
249 | Load balancers send the SERVER_ID message when all global values for Stream or
250 | Block CIDs are sent out-of-band, so that only the server-unique values must be
251 | sent in-band. It also provides all necessary paramters for Plaintext CIDs. The
252 | fields are identical to their counterparts in the {{message-stream-cid}}
253 | payload.
254 | 
255 | ### MODULUS Message {#message-modulus}
256 | 
257 | A load balancer uses the MODULUS message (type=0x06) to exchange just the
258 | modulus used in the Obfuscated CID algorithm.
259 | 
260 | ~~~~~
261 | 0                   1                   2                   3
262 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
263 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
264 | |           Modulus (16)        |
265 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
266 | ~~~~~
267 | 
268 | Load balancers send the MODULUS when all global values for Obfuscated CIDs
269 | are sent out-of-band, so that only the server-unique values must be sent
270 | in-band. The Modulus field is identical to its counterpart in the
271 | ROUTING_INFO message.
272 | 
273 | ### PLAINTEXT Message {#message-plaintext}
274 | 
275 | A load balancer uses the PLAINTEXT message (type=0x07) to exchange all
276 | parameters needed for the Plaintext CID algorithm.
277 | 
278 | ~~~~~
279 | 0                   1                   2                   3
280 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
281 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
282 | |   SIDL (8)  |
283 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
284 | |                                                               |
285 | +                      Server ID (variable)                     +
286 | |                                                               |
287 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
288 | ~~~~~
289 | 
290 | The SIDL field indicates the length of the server ID field. The
291 | Server ID field indicates the encoding that represents the
292 | destination server.
293 | 
294 | ### RETRY_SERVICE_STATELESS message
295 | 
296 | A no-shared-state retry service uses this message (type=0x08) to notify the
297 | server of the existence of this service. This message has no fields.
298 | 
299 | ### RETRY_SERVICE_STATEFUL message
300 | 
301 | A shared-state retry service uses this message (type=0x09) to tell the server
302 | about its existence, and share the key needed to decrypt server-generated retry
303 | tokens.
304 | 
305 | ~~~~~
306 | 0                   1                   2                   3
307 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
308 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
309 | |                                                               |
310 | +                                                               +
311 | |                                                               |
312 | +                           Key (128)                           +
313 | |                                                               |
314 | +                                                               +
315 | |                                                               |
316 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
317 | ~~~~~
318 | 


--------------------------------------------------------------------------------