├── .editorconfig
├── .gitattributes
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   └── quic-draft-issue.md
    ├── in-solidarity.yml
    ├── release-drafter.yml
    └── workflows
    │   ├── archive.yml
    │   ├── assign-to-project.yml
    │   ├── ghpages.yml
    │   ├── publish.yml
    │   └── update.yml
├── .gitignore
├── .lint.py
├── .travis.yml
├── CONTRIBUTING.md
├── Makefile
├── README.md
├── ietf.json
├── protection-samples.js
├── rfc8999.md
├── rfc9000.md
├── rfc9001.md
├── rfc9002.md
├── rfc9114.md
├── rfc9204.md
├── tag.sh
├── writeups
    └── base-drafts.md
└── xml2rfc-tidy.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # See http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*.md]
 6 | charset = utf-8
 7 | end_of_line = lf
 8 | indent_size = 2
 9 | indent_style = space
10 | insert_final_newline = true
11 | max_line_length = 80
12 | trim_trailing_whitespace = true
13 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted
 5 | # to native line endings on checkout.
 6 | *.md text
 7 | *.xml text
 8 | 
 9 | # Declare files that will always have LF line endings on checkout.
10 | *.sh text eol=lf
11 | *.mk txt eol=lf


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | *                               @quicwg/chairs
 2 | 
 3 | draft-ietf-quic-http.md         @MikeBishop
 4 | 
 5 | draft-ietf-quic-invariants.md   @martinthomson 
 6 | 
 7 | draft-ietf-quic-qpack.md        @MikeBishop @afrind
 8 | 
 9 | draft-ietf-quic-recovery.md     @janaiyengar @ianswett
10 | 
11 | draft-ietf-quic-tls.md          @martinthomson @seanturner
12 | 
13 | draft-ietf-quic-transport.md    @janaiyengar @martinthomson
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/quic-draft-issue.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: QUIC Draft Issue
 3 | about: File an issue with a QUIC draft document
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Before opening an issue, please familiarise yourself with the QUIC WG [Contribution Guidlines](https://github.com/quicwg/base-drafts/blob/master/CONTRIBUTING.md) and [Late-Stage Process](https://github.com/quicwg/base-drafts/blob/master/CONTRIBUTING.md#late-stage-process). 
11 | 
12 | All documents in this repository follow this process. Before filing a new issue against any of them, please consider a few things:
13 | 
14 | * Issues should be just that; issues with our deliverables, **not proposals, questions or support requests**.
15 | * Please review the issues list to make sure that you aren't filing a duplicate. Design issues that revisit a topic where there's already declared consensus (see https://github.com/quicwg/base-drafts/issues?q=is%3Aclosed+label%3Ahas-consensus) need to provide compelling reasons to warrant reopening the discussion.
16 | * If you're not sure how to phrase your issue, please ask on the [mailing list](https://www.ietf.org/mailman/listinfo/quic).
17 | 


--------------------------------------------------------------------------------
/.github/in-solidarity.yml:
--------------------------------------------------------------------------------
1 | _extends: ietf/terminology
2 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | categories:
 2 |   - title: Transport
 3 |     label: -transport
 4 |   - title: Recovery
 5 |     label: -recovery
 6 |   - title: TLS
 7 |     label: -tls
 8 |   - title: HTTP/3
 9 |     label: -http
10 | change-template: - $TITLE (#$NUMBER)
11 | template: |
12 |   ## What’s Changed
13 | 
14 |   $CHANGES


--------------------------------------------------------------------------------
/.github/workflows/archive.yml:
--------------------------------------------------------------------------------
 1 | name: "Archive Issues and Pull Requests"
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 * * 0,2,4'
 6 |   repository_dispatch:
 7 |     types: [archive]
 8 |   workflow_dispatch:
 9 |     inputs:
10 |       archive_full:
11 |         description: 'Recreate the archive from scratch'
12 |         default: false
13 |         type: boolean
14 | 
15 | jobs:
16 |   build:
17 |     name: "Archive Issues and Pull Requests"
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |     - name: "Checkout"
21 |       uses: actions/checkout@v4
22 | 
23 |     # Note: No caching for this build!
24 | 
25 |     - name: "Update Archive"
26 |       uses: martinthomson/i-d-template@v1
27 |       env:
28 |         ARCHIVE_FULL: ${{ inputs.archive_full }}
29 |       with:
30 |         make: archive
31 |         token: ${{ github.token }}
32 | 
33 |     - name: "Update GitHub Pages"
34 |       uses: martinthomson/i-d-template@v1
35 |       with:
36 |         make: gh-archive
37 |         token: ${{ github.token }}
38 | 
39 |     - name: "Save Archive"
40 |       uses: actions/upload-artifact@v4
41 |       with:
42 |         path: archive.json
43 | 


--------------------------------------------------------------------------------
/.github/workflows/assign-to-project.yml:
--------------------------------------------------------------------------------
 1 | name: Auto Assign to Late Stage Processing Project
 2 | 
 3 | on:
 4 |   issues:
 5 |     types: [opened, labeled]
 6 | env:
 7 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 8 | 
 9 | jobs:
10 |   assign_one_project:
11 |     runs-on: ubuntu-latest
12 |     name: Assign to One Project
13 |     steps:
14 |     - name: Assign NEW issues to Late Stage Processing
15 |       uses: srggrs/assign-one-project-github-action@1.2.0
16 |       if: github.event.action == 'opened'
17 |       with:
18 |         project: 'https://github.com/quicwg/base-drafts/projects/5'
19 | 


--------------------------------------------------------------------------------
/.github/workflows/ghpages.yml:
--------------------------------------------------------------------------------
 1 | name: "Update Editor's Copy"
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |     - README.md
 7 |     - CONTRIBUTING.md
 8 |     - LICENSE.md
 9 |     - .gitignore
10 |   pull_request:
11 |     paths-ignore:
12 |     - README.md
13 |     - CONTRIBUTING.md
14 |     - LICENSE.md
15 |     - .gitignore
16 | 
17 | jobs:
18 |   build:
19 |     name: "Update Editor's Copy"
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |     - name: "Checkout"
23 |       uses: actions/checkout@v4
24 | 
25 |     - name: "Setup"
26 |       id: setup
27 |       run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
28 | 
29 |     - name: "Caching"
30 |       uses: actions/cache@v4
31 |       with:
32 |         path: |
33 |           .refcache
34 |           .venv
35 |           .gems
36 |           node_modules
37 |           .targets.mk
38 |         key: i-d-${{ steps.setup.outputs.date }}
39 |         restore-keys: i-d-
40 | 
41 |     - name: "Build Drafts"
42 |       uses: martinthomson/i-d-template@v1
43 |       with:
44 |         token: ${{ github.token }}
45 | 
46 |     - name: "Update GitHub Pages"
47 |       uses: martinthomson/i-d-template@v1
48 |       if: ${{ github.event_name == 'push' }}
49 |       with:
50 |         make: gh-pages
51 |         token: ${{ github.token }}
52 | 
53 |     - name: "Archive Built Drafts"
54 |       uses: actions/upload-artifact@v4
55 |       with:
56 |         path: |
57 |           draft-*.html
58 |           draft-*.txt
59 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: "Publish New Draft Version"
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "draft-*"
 7 |   workflow_dispatch:
 8 |     inputs:
 9 |       email:
10 |         description: "Submitter email"
11 |         default: ""
12 |         type: string
13 | 
14 | jobs:
15 |   build:
16 |     name: "Publish New Draft Version"
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - name: "Checkout"
20 |       uses: actions/checkout@v4
21 | 
22 |     # See https://github.com/actions/checkout/issues/290
23 |     - name: "Get Tag Annotations"
24 |       run: git fetch -f origin ${{ github.ref }}:${{ github.ref }}
25 | 
26 |     - name: "Setup"
27 |       id: setup
28 |       run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
29 | 
30 |     - name: "Caching"
31 |       uses: actions/cache@v4
32 |       with:
33 |         path: |
34 |           .refcache
35 |           .venv
36 |           .gems
37 |           node_modules
38 |           .targets.mk
39 |         key: i-d-${{ steps.setup.outputs.date }}
40 |         restore-keys: i-d-
41 | 
42 |     - name: "Build Drafts"
43 |       uses: martinthomson/i-d-template@v1
44 |       with:
45 |         token: ${{ github.token }}
46 | 
47 |     - name: "Upload to Datatracker"
48 |       uses: martinthomson/i-d-template@v1
49 |       with:
50 |         make: upload
51 |       env:
52 |         UPLOAD_EMAIL: ${{ inputs.email }}
53 | 
54 |     - name: "Archive Submitted Drafts"
55 |       uses: actions/upload-artifact@v4
56 |       with:
57 |         path: "versioned/draft-*-[0-9][0-9].*"
58 | 


--------------------------------------------------------------------------------
/.github/workflows/update.yml:
--------------------------------------------------------------------------------
 1 | name: "Update Generated Files"
 2 | # This rule is not run automatically.
 3 | # It can be run manually to update all of the files that are part
 4 | # of the template, specifically:
 5 | #  - README.md
 6 | #  - CONTRIBUTING.md
 7 | #  - .note.xml
 8 | #  - .github/CODEOWNERS
 9 | #  - Makefile
10 | #
11 | #
12 | # This might be useful if you have:
13 | #  - added, removed, or renamed drafts (including after adoption)
14 | #  - added, removed, or changed draft editors
15 | #  - changed the title of drafts
16 | #
17 | # Note that this removes any customizations you have made to
18 | # the affected files.
19 | on: workflow_dispatch
20 | 
21 | jobs:
22 |   build:
23 |     name: "Update Files"
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |     - name: "Checkout"
27 |       uses: actions/checkout@v4
28 | 
29 |     - name: "Update Generated Files"
30 |       uses: martinthomson/i-d-template@v1
31 |       with:
32 |         make: update-files
33 |         token: ${{ github.token }}
34 | 
35 |     - name: "Push Update"
36 |       run: git push
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.html
 2 | *.pdf
 3 | *.redxml
 4 | *.swp
 5 | *.txt
 6 | *.upload
 7 | *~
 8 | .refcache
 9 | .tags
10 | .targets.mk
11 | /*-[0-9][0-9].xml
12 | /lib
13 | /node_modules/
14 | /old/
15 | Gemfile.lock
16 | archive.json
17 | package-lock.json
18 | report.xml
19 | rfc8999.xml
20 | rfc9000.xml
21 | rfc9001.xml
22 | rfc9002.xml
23 | rfc9114.xml
24 | rfc9204.xml
25 | 


--------------------------------------------------------------------------------
/.lint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | import argparse
 5 | import re
 6 | 
 7 | parser = argparse.ArgumentParser(description="Lint markdown drafts.")
 8 | parser.add_argument("files", metavar="file", nargs="+", help="Files to lint")
 9 | parser.add_argument("-l", dest="maxLineLength", default=80)
10 | parser.add_argument("-f", dest="maxFigureLineLength", default=66)
11 | 
12 | args = parser.parse_args()
13 | 
14 | foundError = False
15 | 
16 | for inputfile in args.files:
17 |     insideFigure = False
18 |     beforeAbstract = True
19 | 
20 |     with open(inputfile, mode="rt", newline=None, encoding="utf-8") as draft:
21 |         linenumber = 0
22 |         lines = draft.readlines()
23 | 
24 |         abstract = re.compile("^--- abstract")
25 |         table = re.compile("^\s*(?:\||{:)")
26 |         figure = re.compile("^[~`]{3,}")
27 | 
28 |         for line in lines:
29 |             line = line.rstrip("\r\n")
30 |             linenumber += 1
31 | 
32 |             def err(msg):
33 |                 global foundError
34 |                 foundError = True
35 |                 sys.stderr.write("{0}:{1}: {2}\n".format(inputfile, linenumber, msg))
36 |                 sys.stderr.write("{0}\n".format(line))
37 | 
38 |             if line.find("\t") >= 0:
39 |                 err("Line contains HTAB")
40 | 
41 |             # Skip everything before abstract
42 |             if beforeAbstract:
43 |                 matchObj = abstract.match(line)
44 |                 if matchObj:
45 |                     beforeAbstract = False
46 |                 continue
47 | 
48 |             # Skip tables
49 |             matchObj = table.match(line)
50 |             if matchObj:
51 |                 continue
52 | 
53 |             # Toggle figure state
54 |             matchObj = figure.match(line)
55 |             if matchObj:
56 |                 insideFigure = not insideFigure
57 |                 continue
58 | 
59 |             # Check length
60 |             length = len(line)
61 |             limit = (
62 |                 int(args.maxFigureLineLength)
63 |                 if insideFigure
64 |                 else int(args.maxLineLength)
65 |             )
66 |             if length > limit:
67 |                 err("Line is {0} characters; limit is {1}".format(length, limit))
68 | 
69 | sys.exit(1 if foundError else 0)
70 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: false
 3 | dist: trusty
 4 | 
 5 | addons:
 6 |   apt:
 7 |     packages:
 8 |      - python-pip
 9 |      - xsltproc
10 | 
11 | env:
12 |   global:
13 |    - GOPATH="${TRAVIS_BUILD_DIR}/.go_workspace"
14 |    - mmark_src=github.com/miekg/mmark/mmark
15 |    - mmark=./mmark
16 | 
17 | install:
18 |  - pip install xml2rfc
19 |  - if head -1 -q *.md | grep '^\-\-\-' >/dev/null 2>&1; then gem install --no-doc kramdown-rfc2629; fi
20 |  - if head -1 -q *.md | grep '^%%%' >/dev/null 2>&1; then go get "$mmark_src" && go build "$mmark_src"; fi
21 | 
22 | script: make ghpages
23 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # QUIC version 1 is done
 2 | 
 3 | The base-drafts repository is the historical home of the QUIC version 1
 4 | specifications that were written by the IETF QUIC Working Group.
 5 | 
 6 | The set of documents are described [here](https://github.com/quicwg/base-drafts/blob/main/README.md).
 7 | 
 8 | **Be aware that all contributions fall under the "[NOTE WELL](#note-well)" terms
 9 | outlined below and our [Code of Conduct](#code-of-conduct) applies.**
10 | 
11 | # Engaging with the QUIC community
12 | 
13 | The scope of work in the QUIC Working Group is described in our
14 | [charter](https://datatracker.ietf.org/wg/quic/about/) and it extends beyond the
15 | development of the documents held in this repository. Anyone is welcome to
16 | contribute to the QUIC Working Group; you don't have to join the Working Group,
17 | because there is no "membership" -- anyone who participates in the work **is** a
18 | part of the QUIC Working Group.
19 | 
20 | Before doing so, please familiarize yourself with our
21 | [charter](https://datatracker.ietf.org/wg/quic/about/). If you're new to IETF
22 | work, you may also want to read the [Tao of the
23 | IETF](https://www.ietf.org/tao.html).
24 | 
25 | ## Following Discussion
26 | 
27 | The Working Group has a few venues for discussion:
28 | 
29 | * We plan to meet at all [IETF meetings](https://www.ietf.org/meeting/) for the
30 |   foreseeable future, and possibly hold interim meetings between them as
31 |   required. Agendas, minutes, and presentations are available in our [meeting
32 |   materials repository](https://github.com/quicwg/wg-materials) and the
33 |   [official proceedings](https://datatracker.ietf.org/wg/quic/meetings/).
34 | 
35 | * Our [mailing list](https://www.ietf.org/mailman/listinfo/quic) is used for
36 |   most communication, including notifications of meetings, new drafts, consensus
37 |   calls and other business, as well as issue discussion.
38 | 
39 | * We maintain several repositories in our GitHub organization
40 |   [Github](https://github.com/quicwg/). Specific issues are discussed on the
41 |   relevant issues list. If you don't want to use Github to follow these
42 |   discussions, you can subscribe to the [issue announce
43 |   list](https://www.ietf.org/mailman/listinfo/quic-issues).
44 | 
45 | * The [quicdev Slack](https://quicdev.slack.com/) is used for more realtime
46 |   communication, typcially amongst implementers, operators and researchers.
47 |   Contact the [WG chairs](quic-chairs@ietf.org) for an invitation. Note that
48 |   discussions on Slack are subject to the contribution guideline described in
49 |   this document.
50 | 
51 | To be active in the Working Group, you can participate in any of these places.
52 | Most activity takes place on the mailing list, but if you just want to comment
53 | on and raise issues, that's fine too.
54 | 
55 | ## Code of Conduct
56 | 
57 | The [IETF Guidelines for Conduct](https://tools.ietf.org/html/rfc7154) applies to all Working Group
58 | communications and meetings.
59 | 
60 | 
61 | ## NOTE WELL
62 | 
63 | Any submission to the [IETF](https://www.ietf.org/) intended by the Contributor for publication as
64 | all or part of an IETF Internet-Draft or RFC and any statement made within the context of an IETF
65 | activity is considered an "IETF Contribution". Such statements include oral statements in IETF
66 | sessions, as well as written and electronic communications made at any time or place, which are
67 | addressed to:
68 | 
69 |  * The IETF plenary session
70 |  * The IESG, or any member thereof on behalf of the IESG
71 |  * Any IETF mailing list, including the IETF list itself, any working group
72 |    or design team list, or any other list functioning under IETF auspices
73 |  * Any IETF working group or portion thereof
74 |  * Any Birds of a Feather (BOF) session
75 |  * The IAB or any member thereof on behalf of the IAB
76 |  * The RFC Editor or the Internet-Drafts function
77 |  * All IETF Contributions are subject to the rules of
78 |    [RFC 5378](https://tools.ietf.org/html/rfc5378) and
79 |    [RFC 8179](https://tools.ietf.org/html/rfc8179).
80 | 
81 | Statements made outside of an IETF session, mailing list or other function, that are clearly not
82 | intended to be input to an IETF activity, group or function, are not IETF Contributions in the
83 | context of this notice.
84 | 
85 | Please consult [RFC 5378](https://tools.ietf.org/html/rfc5378) and [RFC 8179](https://tools.ietf.org/html/rfc8179) for details.
86 | 
87 | A participant in any IETF activity is deemed to accept all IETF rules of process, as documented in
88 | Best Current Practices RFCs and IESG Statements.
89 | 
90 | A participant in any IETF activity acknowledges that written, audio and video records of meetings
91 | may be made and may be available to the public.
92 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | MD_PREPROCESSOR := sed -e 's/{DATE}/$(shell date '+%Y-%m-%d')/g'
 2 | TIDY := true
 3 | 
 4 | LIBDIR := lib
 5 | include $(LIBDIR)/main.mk
 6 | 
 7 | $(LIBDIR)/main.mk:
 8 | ifneq (,$(shell git submodule status $(LIBDIR) 2>/dev/null))
 9 | 	git submodule sync
10 | 	git submodule update $(CLONE_ARGS) --init
11 | else
12 | 	git clone -q --depth 10 $(CLONE_ARGS) \
13 | 	    -b main https://github.com/martinthomson/i-d-template $(LIBDIR)
14 | endif
15 | 
16 | latest:: lint
17 | .PHONY: lint
18 | 
19 | lint::
20 | 	@$(trace) wslint $(python) ./.lint.py $(addsuffix .md,$(drafts))
21 | 
22 | show-next:
23 | 	@echo $(drafts_next)
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # QUIC Protocol Drafts
 2 | 
 3 | The base-drafts repository is the historical home of the QUIC version 1
 4 | specifications that were written by the QUIC Working Group.
 5 | 
 6 | **The documents have now been published as RFCs. Technical or editorial
 7 | erratum can be reported to the RFC Editor using the [errata
 8 | tool](https://www.rfc-editor.org/errata.php).**
 9 | 
10 | **The QUIC Working Group welcomes discussion about new versions of QUIC, and new
11 | extensions to QUIC, or other proposals related to the QUIC transport. See
12 | [Engaging with the QUIC
13 | community](https://github.com/quicwg/base-drafts/blob/main/CONTRIBUTING.md#engaging-with-the-quic-community)
14 | for guidance.**
15 | 
16 | ## QUIC Invariants
17 | 
18 | * [RFC 8999](https://quicwg.org/base-drafts/rfc8999.html)
19 | * [Datatracker](https://datatracker.ietf.org/doc/html/draft-ietf-quic-invariants)
20 | 
21 | ## Core Transport Protocol
22 | 
23 | * [RFC 9000](https://quicwg.org/base-drafts/rfc9000.html)
24 | * [Working Group Draft](https://datatracker.ietf.org/doc/html/draft-ietf-quic-transport)
25 | 
26 | ## Loss Detection & Congestion Control
27 | 
28 | * [RFC 9002](https://quicwg.org/base-drafts/rfc9002.html)
29 | * [Working Group Draft](https://datatracker.ietf.org/doc/html/draft-ietf-quic-recovery)
30 | 
31 | ## TLS Mapping
32 | 
33 | * [RFC 9001](https://quicwg.org/base-drafts/rfc9001.html)
34 | * [Datatracker](https://datatracker.ietf.org/doc/html/draft-ietf-quic-tls)
35 | 
36 | ## HTTP Mapping
37 | 
38 | * [RFC 9114](https://quicwg.org/base-drafts/rfc9114.html)
39 | * [Datatracker](https://datatracker.ietf.org/doc/html/draft-ietf-quic-http)
40 | 
41 | ## QPACK
42 | 
43 | * [RFC 9204](https://quicwg.org/base-drafts/rfc9204.html)
44 | * [Datatracker](https://datatracker.ietf.org/doc/html/draft-ietf-quic-qpack)
45 | 


--------------------------------------------------------------------------------
/ietf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "group": "quic",
 3 |     "group_info": {
 4 |         "name": "QUIC",
 5 |         "type": "wg",
 6 |         "email": "quic@ietf.org",
 7 |         "activity_exclude_labels": ["editorial"]
 8 |     },
 9 |     "repo_type": "specs",
10 |     "revisions_tagged": true,
11 |     "activity_summary_to": ["group_email"]
12 | }


--------------------------------------------------------------------------------
/protection-samples.js:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | ':' //; exec "$(command -v nodejs || command -v node)" "$0" "$@"
  3 | 
  4 | // This script performs simple encryption and decryption for Initial packets.
  5 | // It's crude, but it should be sufficient to generate examples.
  6 | 
  7 | 
  8 | 'use strict';
  9 | require('buffer');
 10 | const assert = require('assert');
 11 | const crypto = require('crypto');
 12 | 
 13 | const INITIAL_SALT = Buffer.from('38762cf7f55934b34d179ae6a4c80cadccbb7f0a', 'hex');
 14 | const RETRY_KEY = Buffer.from('be0c690b9f66575a1d766b54e368c84e', 'hex');
 15 | const RETRY_NONCE = Buffer.from('461599d35d632bf2239825bb', 'hex');
 16 | const SHA256 = 'sha256';
 17 | const AES_GCM = 'aes-128-gcm';
 18 | const AES_ECB = 'aes-128-ecb';
 19 | 
 20 | const version = '00000001';
 21 | 
 22 | function chunk(s, n) {
 23 |   return (new Array(Math.ceil(s.length / n)))
 24 |     .fill()
 25 |     .map((_, i) => s.slice(i * n, i * n + n));
 26 | }
 27 | 
 28 | function log(m, k) {
 29 |   console.log(m + ' [' + k.length + ']: ' + chunk(k.toString('hex'), 32).join(' '));
 30 | };
 31 | 
 32 | class HMAC {
 33 |   constructor(hash) {
 34 |     this.hash = hash;
 35 |   }
 36 | 
 37 |   digest(key, input) {
 38 |     var hmac = crypto.createHmac(this.hash, key);
 39 |     hmac.update(input);
 40 |     return hmac.digest();
 41 |   }
 42 | }
 43 | 
 44 | /* HKDF as defined in RFC5869, with HKDF-Expand-Label from RFC8446. */
 45 | class QHKDF {
 46 |   constructor(hmac, prk) {
 47 |     this.hmac = hmac;
 48 |     this.prk = prk;
 49 |   }
 50 | 
 51 |   static extract(hash, salt, ikm) {
 52 |     var hmac = new HMAC(hash);
 53 |     return new QHKDF(hmac, hmac.digest(salt, ikm));
 54 |   }
 55 | 
 56 |   expand(info, len) {
 57 |     var output = Buffer.alloc(0);
 58 |     var T = Buffer.alloc(0);
 59 |     info = Buffer.from(info, 'ascii');
 60 |     var counter = 0;
 61 |     var cbuf = Buffer.alloc(1);
 62 |     while (output.length < len) {
 63 |       cbuf.writeUIntBE(++counter, 0, 1);
 64 |       T = this.hmac.digest(this.prk, Buffer.concat([T, info, cbuf]));
 65 |       output = Buffer.concat([output, T]);
 66 |     }
 67 | 
 68 |     return output.slice(0, len);
 69 |   }
 70 | 
 71 |   expand_label(label, len) {
 72 |     const prefix = "tls13 ";
 73 |     var info = Buffer.alloc(2 + 1 + prefix.length + label.length + 1);
 74 |     // Note that Buffer.write returns the number of bytes written, whereas
 75 |     // Buffer.writeUIntBE returns the end offset of the write.  Consistency FTW.
 76 |     var offset = info.writeUIntBE(len, 0, 2);
 77 |     offset = info.writeUIntBE(prefix.length + label.length, offset, 1);
 78 |     offset += info.write(prefix + label, offset);
 79 |     info.writeUIntBE(0, offset, 1);
 80 |     log('info for ' + label, info);
 81 |     return this.expand(info, len);
 82 |   }
 83 | }
 84 | 
 85 | // XOR b into a.
 86 | function xor(a, b) {
 87 |     a.forEach((_, i) => {
 88 |       a[i] ^= b[i];
 89 |     });
 90 | }
 91 | 
 92 | function applyNonce(iv, counter) {
 93 |   var nonce = Buffer.from(iv);
 94 |   const m = nonce.readUIntBE(nonce.length - 6, 6);
 95 |   const x = ((m ^ counter) & 0xffffff) +
 96 |      ((((m / 0x1000000) ^ (counter / 0x1000000)) & 0xffffff) * 0x1000000);
 97 |   nonce.writeUIntBE(x, nonce.length - 6, 6);
 98 |   return nonce;
 99 | }
100 | 
101 | class InitialProtection {
102 |   constructor(label, cid) {
103 |     var qhkdf = QHKDF.extract(SHA256, INITIAL_SALT, cid);
104 |     log('initial_secret', qhkdf.prk);
105 |     qhkdf = new QHKDF(qhkdf.hmac, qhkdf.expand_label(label, 32));
106 |     log(label + ' secret', qhkdf.prk);
107 |     this.key = qhkdf.expand_label("quic key", 16);
108 |     log(label + ' key', this.key);
109 |     this.iv = qhkdf.expand_label("quic iv", 12);
110 |     log(label + ' iv', this.iv);
111 |     this.hp = qhkdf.expand_label("quic hp", 16);
112 |     log(label + ' hp', this.hp);
113 |   }
114 | 
115 |   generateNonce(counter) {
116 |     return applyNonce(this.iv, counter);
117 |   }
118 | 
119 |   // Returns the encrypted data with authentication tag appended.  The AAD is
120 |   // used, but not added to the output.
121 |   encipher(pn, aad, data) {
122 |     console.log('encipher pn', pn);
123 |     log('encipher aad', aad);
124 |     log('encipher data', data);
125 |     var nonce = this.generateNonce(pn);
126 |     var gcm = crypto.createCipheriv(AES_GCM, this.key, nonce);
127 |     gcm.setAAD(aad);
128 |     var e = gcm.update(data);
129 |     gcm.final();
130 |     e = Buffer.concat([e, gcm.getAuthTag()]);
131 |     log('enciphered', e);
132 |     return e;
133 |   }
134 | 
135 |   decipher(pn, aad, data) {
136 |     console.log('decipher pn', pn);
137 |     log('decipher aad', aad);
138 |     log('decipher data', data);
139 |     var nonce = this.generateNonce(pn);
140 |     var gcm = crypto.createDecipheriv(AES_GCM, this.key, nonce);
141 |     gcm.setAAD(aad);
142 |     gcm.setAuthTag(data.slice(data.length - 16));
143 |     var d = gcm.update(data.slice(0, data.length - 16));
144 |     gcm.final();
145 |     log('deciphered', d);
146 |     return d;
147 |   }
148 | 
149 |   // Calculates the header protection mask.  Returns 16 bytes of output.
150 |   hpMask(sample) {
151 |     log('hp sample', sample);
152 |     // var ctr = crypto.createCipheriv('aes-128-ctr', this.hp, sample);
153 |     // var mask = ctr.update(Buffer.alloc(5));
154 |     var ecb = crypto.createCipheriv(AES_ECB, this.hp, Buffer.alloc(0));
155 |     var mask = ecb.update(sample);
156 |     log('hp mask', mask);
157 |     return mask;
158 |   }
159 | 
160 |   // hdr is everything before the length field
161 |   // hdr[0] has the packet number length already in place
162 |   // pn is the packet number
163 |   // data is the payload (i.e., encoded frames)
164 |   encrypt(hdr, pn, data) {
165 |     var pn_len = 1 + (hdr[0] & 0x3);
166 |     if (pn_len + data.length < 4) {
167 |       throw new Error('insufficient length of packet number and payload');
168 |     }
169 | 
170 |     var aad = Buffer.alloc(hdr.length + 2 + pn_len);
171 |     var offset = hdr.copy(aad);
172 |     // Add a length that covers the packet number encoding and the auth tag.
173 |     offset = aad.writeUIntBE(0x4000 | (pn_len + data.length + 16), offset, 2);
174 |     var pn_offset = offset;
175 |     var pn_mask = 0xffffffff >> (8 * (4 - pn_len));
176 |     offset = aad.writeUIntBE(pn & pn_mask, offset, pn_len)
177 |     log('header', aad);
178 | 
179 |     var payload = this.encipher(pn, aad, data);
180 | 
181 |     var mask = this.hpMask(payload.slice(4 - pn_len, 20 - pn_len));
182 |     aad[0] ^= mask[0] & (0x1f >> (aad[0] >> 7));
183 |     xor(aad.slice(pn_offset), mask.slice(1));
184 |     log('masked header', aad);
185 |     return Buffer.concat([aad, payload]);
186 |   }
187 | 
188 |   cidLen(v) {
189 |     if (!v) {
190 |       return 0;
191 |     }
192 |     return v + 3;
193 |   }
194 | 
195 |   decrypt(data) {
196 |     log('decrypt', data);
197 |     if (data[0] & 0x40 !== 0x40) {
198 |       throw new Error('missing QUIC bit');
199 |     }
200 |     if (data[0] & 0x80 === 0) {
201 |       throw new Error('short header unsupported');
202 |     }
203 |     var hdr_len = 1 + 4;
204 |     hdr_len += 1 + data[hdr_len]; // DCID
205 |     hdr_len += 1 + data[hdr_len]; // SCID
206 |     if ((data[0] & 0x30) === 0) { // Initial packet: token.
207 |       if ((data[hdr_len] & 0xc0) !== 0) {
208 |         throw new Error('multi-byte token length unsupported');
209 |       }
210 |       hdr_len += 1 + data[hdr_len];  // oops: this only handles single octet lengths.
211 |     }
212 |     // Skip the length.
213 |     hdr_len += 1 << (data[hdr_len] >> 6);
214 |     // Now we're at the encrypted bit.
215 |     var mask = this.hpMask(data.slice(hdr_len + 4, hdr_len + 20));
216 | 
217 |     var octet0 = data[0] ^ (mask[0] & (0x1f >> (data[0] >> 7)));
218 |     var pn_len = (octet0 & 3) + 1;
219 |     var hdr = Buffer.from(data.slice(0, hdr_len + pn_len));
220 |     hdr[0] = octet0;
221 |     log('header', hdr);
222 |     xor(hdr.slice(hdr_len), mask.slice(1));
223 |     log('unmasked header', hdr);
224 |     var pn = hdr.readUIntBE(hdr_len, pn_len);
225 |     // Important: this doesn't recover PN based on expected value.
226 |     // The expectation being that Initial packets won't ever need that.
227 |     return this.decipher(pn, hdr, data.slice(hdr.length));
228 |   }
229 | }
230 | 
231 | function pad(hdr, body) {
232 |   var pn_len = (hdr[0] & 3) + 1;
233 |   var size = 1200 - hdr.length - 2 - pn_len - 16; // Assume 2 byte length.
234 |   if (size < 0) {
235 |     return body;
236 |   }
237 |   var padded = Buffer.allocUnsafe(size);
238 |   console.log('pad amount', size);
239 |   body.copy(padded);
240 |   padded.fill(0, body.length);
241 |   log('padded', padded);
242 |   return padded;
243 | }
244 | 
245 | function test(role, cid, hdr, pn, body) {
246 |   cid = Buffer.from(cid, 'hex');
247 |   log('connection ID', cid);
248 |   hdr = Buffer.from(hdr, 'hex');
249 |   log('header', hdr);
250 |   console.log('packet number = ' + pn);
251 |   body = Buffer.from(body, 'hex');
252 |   log('body', hdr);
253 | 
254 |   if (role === 'client' && (hdr[0] & 0x30) === 0) {
255 |     body = pad(hdr, body);
256 |   }
257 | 
258 |   var endpoint = new InitialProtection(role + ' in', cid);
259 |   var packet = endpoint.encrypt(hdr, pn, body);
260 |   log('encrypted packet', packet);
261 | 
262 |   var content = endpoint.decrypt(packet);
263 |   log('decrypted content', content);
264 |   if (content.compare(body) !== 0) {
265 |     throw new Error('decrypted result not the same as the original');
266 |   }
267 | }
268 | 
269 | function hex_cid(cid) {
270 |   return '0' + (cid.length / 2).toString(16) + cid;
271 | }
272 | 
273 | // Verify that the retry keys are correct.
274 | function derive_retry() {
275 |   let secret = Buffer.from('d9c9943e6101fd200021506bcc02814c73030f25c79d71ce876eca876e6fca8e', 'hex');
276 |   let qhkdf = new QHKDF(new HMAC(SHA256), secret);
277 |   let key = qhkdf.expand_label("quic key", 16);
278 |   log('retry key', key);
279 |   assert.deepStrictEqual(key, RETRY_KEY);
280 |   let nonce = qhkdf.expand_label("quic iv", 12);
281 |   log('retry nonce', nonce);
282 |   assert.deepStrictEqual(nonce, RETRY_NONCE);
283 | }
284 | 
285 | function retry(dcid, scid, odcid) {
286 |   var pfx = Buffer.from(hex_cid(odcid), 'hex');
287 |   var encoded = Buffer.from('ff' + version + hex_cid(dcid) + hex_cid(scid), 'hex');
288 |   var token = Buffer.from('token', 'ascii');
289 |   var header = Buffer.concat([encoded, token]);
290 |   log('retry header', header);
291 |   var aad = Buffer.concat([pfx, header]);
292 |   log('retry aad', aad);
293 | 
294 |   var gcm = crypto.createCipheriv(AES_GCM, RETRY_KEY, RETRY_NONCE);
295 |   gcm.setAAD(aad);
296 |   gcm.update('');
297 |   gcm.final();
298 |   log('retry', Buffer.concat([header, gcm.getAuthTag()]));
299 | }
300 | 
301 | // A simple ChaCha20-Poly1305 packet.
302 | function chacha20(pn, payload) {
303 |   log('chacha20poly1305 pn=' + pn.toString(), payload);
304 |   let header = Buffer.alloc(4);
305 |   header.writeUIntBE(0x42, 0, 1);
306 |   header.writeUIntBE(pn & 0xffffff, 1, 3);
307 |   log('unprotected header', header);
308 |   const key = Buffer.from('c6d98ff3441c3fe1b2182094f69caa2e' +
309 |                           'd4b716b65488960a7a984979fb23e1c8', 'hex');
310 |   const iv = Buffer.from('e0459b3474bdd0e44a41c144', 'hex');
311 |   const nonce = applyNonce(iv, pn);
312 |   log('nonce', nonce);
313 |   let aead = crypto.createCipheriv('ChaCha20-Poly1305', key, nonce, { authTagLength: 16 });
314 |   aead.setAAD(header);
315 |   const e = aead.update(payload);
316 |   aead.final();
317 |   let ct = Buffer.concat([e, aead.getAuthTag()]);
318 |   log('ciphertext', ct);
319 | 
320 |   const sample = ct.slice(1, 17);
321 |   log('sample', sample);
322 |   const hp = Buffer.from('25a282b9e82f06f21f488917a4fc8f1b' +
323 |                          '73573685608597d0efcb076b0ab7a7a4', 'hex');
324 |   let chacha = crypto.createCipheriv('ChaCha20', hp, sample);
325 |   const mask = chacha.update(Buffer.alloc(5));
326 |   log('mask', mask);
327 |   let packet = Buffer.concat([header, ct]);
328 |   header[0] ^= mask[0] & 0x1f;
329 |   xor(header.slice(1), mask.slice(1));
330 |   log('header', header);
331 |   log('protected packet', Buffer.concat([header, ct]));
332 | }
333 | 
334 | var cid = '8394c8f03e515708';
335 | 
336 | var ci_hdr = 'c3' + version + hex_cid(cid) + '0000';
337 | // This is a client Initial.
338 | var crypto_frame = '060040f1' +
339 |     '010000ed0303ebf8fa56f12939b9584a3896472ec40bb863cfd3e86804fe3a47' +
340 |     'f06a2b69484c00000413011302010000c000000010000e00000b6578616d706c' +
341 |     '652e636f6dff01000100000a00080006001d0017001800100007000504616c70' +
342 |     '6e000500050100000000003300260024001d00209370b2c9caa47fbabaf4559f' +
343 |     'edba753de171fa71f50f1ce15d43e994ec74d748002b0003020304000d001000' +
344 |     '0e0403050306030203080408050806002d00020101001c000240010039003204' +
345 |     '08ffffffffffffffff05048000ffff07048000ffff0801100104800075300901' +
346 |     '100f088394c8f03e51570806048000ffff';
347 | 
348 | test('client', cid, ci_hdr, 2, crypto_frame);
349 | 
350 | // This should be a valid server Initial.
351 | var frames = '02000000000600405a' +
352 |     '020000560303eefce7f7b37ba1d163' +
353 |     '2e96677825ddf73988cfc79825df566dc5430b9a04' +
354 |     '5a1200130100002e00330024001d00209d3c940d89' +
355 |     '690b84d08a60993c144eca684d1081287c834d5311' +
356 |     'bcf32bb9da1a002b00020304';
357 | var scid = 'f067a5502a4262b5';
358 | var si_hdr = 'c1' + version + '00' + hex_cid(scid) + '00';
359 | test('server', cid, si_hdr, 1, frames);
360 | 
361 | derive_retry();
362 | retry('', scid, cid);
363 | chacha20(654360564, Buffer.from('01', 'hex'));
364 | 


--------------------------------------------------------------------------------
/rfc8999.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Version-Independent Properties of QUIC"
  3 | abbrev: QUIC Invariants
  4 | number: 8999
  5 | docName: draft-ietf-quic-invariants-13
  6 | date: 2021-05
  7 | category: std
  8 | consensus: true
  9 | ipr: trust200902
 10 | area: Transport
 11 | workgroup: QUIC
 12 | keyword:
 13 |   - crypto
 14 |   - next generation
 15 |   - protocol
 16 |   - secure
 17 |   - transport
 18 |   - UDP
 19 | 
 20 | stand_alone: yes
 21 | pi: [toc, sortrefs, symrefs, docmapping]
 22 | 
 23 | author:
 24 |   -
 25 |     ins: M. Thomson
 26 |     name: Martin Thomson
 27 |     org: Mozilla
 28 |     email: mt@lowentropy.net
 29 | 
 30 | informative:
 31 | 
 32 |   QUIC-TRANSPORT:
 33 |     title: "QUIC: A UDP-Based Multiplexed and Secure Transport"
 34 |     date: 2021-05
 35 |     seriesinfo:
 36 |       RFC: 9000
 37 |       DOI: 10.17487/RFC9000
 38 |     author:
 39 |       -
 40 |         ins: J. Iyengar
 41 |         name: Jana Iyengar
 42 |         org: Google
 43 |         role: editor
 44 |       -
 45 |         ins: M. Thomson
 46 |         name: Martin Thomson
 47 |         org: Mozilla
 48 |         role: editor
 49 | 
 50 |   QUIC-TLS:
 51 |     title: "Using TLS to Secure QUIC"
 52 |     date: 2021-05
 53 |     seriesinfo:
 54 |       RFC: 9001
 55 |       DOI: 10.17487/RFC9001
 56 |     author:
 57 |       -
 58 |         ins: M. Thomson
 59 |         name: Martin Thomson
 60 |         org: Mozilla
 61 |         role: editor
 62 |       -
 63 |         ins: S. Turner
 64 |         name: Sean Turner
 65 |         org: sn3rd
 66 |         role: editor
 67 | 
 68 | 
 69 | --- abstract
 70 | 
 71 | This document defines the properties of the QUIC transport protocol that are
 72 | common to all versions of the protocol.
 73 | 
 74 | 
 75 | --- middle
 76 | 
 77 | # An Extremely Abstract Description of QUIC
 78 | 
 79 | QUIC is a connection-oriented protocol between two endpoints.  Those endpoints
 80 | exchange UDP datagrams.  These UDP datagrams contain QUIC packets.  QUIC
 81 | endpoints use QUIC packets to establish a QUIC connection, which is shared
 82 | protocol state between those endpoints.
 83 | 
 84 | 
 85 | # Fixed Properties of All QUIC Versions
 86 | 
 87 | In addition to providing secure, multiplexed transport, QUIC {{QUIC-TRANSPORT}}
 88 | allows for the option to negotiate a version.  This allows the protocol to
 89 | change over time in response to new requirements.  Many characteristics of the
 90 | protocol could change between versions.
 91 | 
 92 | This document describes the subset of QUIC that is intended to remain stable as
 93 | new versions are developed and deployed.  All of these invariants are
 94 | independent of the IP version.
 95 | 
 96 | The primary goal of this document is to ensure that it is possible to deploy new
 97 | versions of QUIC.  By documenting the properties that cannot change, this
 98 | document aims to preserve the ability for QUIC endpoints to negotiate changes to
 99 | any other aspect of the protocol.  As a consequence, this also guarantees a
100 | minimal amount of information that is made available to entities other than
101 | endpoints.  Unless specifically prohibited in this document, any aspect of the
102 | protocol can change between different versions.
103 | 
104 | {{bad-assumptions}} contains a non-exhaustive list of some incorrect assumptions
105 | that might be made based on knowledge of QUIC version 1; these do not apply to
106 | every version of QUIC.
107 | 
108 | 
109 | # Conventions and Definitions
110 | 
111 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
112 | "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" in this
113 | document are to be interpreted as described in BCP 14 {{!RFC2119}} {{!RFC8174}}
114 | when, and only when, they appear in all capitals, as shown here.
115 | 
116 | This document defines requirements on future QUIC versions, even where normative
117 | language is not used.
118 | 
119 | This document uses terms and notational conventions from {{QUIC-TRANSPORT}}.
120 | 
121 | 
122 | # Notational Conventions
123 | 
124 | The format of packets is described using the notation defined in this section.
125 | This notation is the same as that used in {{QUIC-TRANSPORT}}.
126 | 
127 | Complex fields are named and then followed by a list of fields surrounded by a
128 | pair of matching braces. Each field in this list is separated by commas.
129 | 
130 | Individual fields include length information, plus indications about fixed
131 | value, optionality, or repetitions. Individual fields use the following
132 | notational conventions, with all lengths in bits:
133 | 
134 | x (A):
135 | : Indicates that x is A bits long
136 | 
137 | x (A..B):
138 | : Indicates that x can be any length from A to B; A can be omitted to indicate
139 |   a minimum of zero bits, and B can be omitted to indicate no set upper limit;
140 |   values in this format always end on a byte boundary
141 | 
142 | x (L) = C:
143 | : Indicates that x has a fixed value of C; the length of x is described by
144 |   L, which can use any of the length forms above
145 | 
146 | x (L) ...:
147 | : Indicates that x is repeated zero or more times and that each instance has a
148 |   length of L
149 | 
150 | This document uses network byte order (that is, big endian) values.  Fields
151 | are placed starting from the high-order bits of each byte.
152 | 
153 | {{fig-ex-format}} shows an example structure:
154 | 
155 | ~~~
156 | Example Structure {
157 |   One-bit Field (1),
158 |   7-bit Field with Fixed Value (7) = 61,
159 |   Arbitrary-Length Field (..),
160 |   Variable-Length Field (8..24),
161 |   Repeated Field (8) ...,
162 | }
163 | ~~~
164 | {: #fig-ex-format title="Example Format"}
165 | 
166 | 
167 | # QUIC Packets
168 | 
169 | QUIC endpoints exchange UDP datagrams that contain one or more QUIC packets.
170 | This section describes the invariant characteristics of a QUIC packet.  A
171 | version of QUIC could permit multiple QUIC packets in a single UDP datagram, but
172 | the invariant properties only describe the first packet in a datagram.
173 | 
174 | QUIC defines two types of packet headers: long and short.  Packets with a long
175 | header are identified by the most significant bit of the first byte being set;
176 | packets with a short header have that bit cleared.
177 | 
178 | QUIC packets might be integrity protected, including the header.  However, QUIC
179 | Version Negotiation packets are not integrity protected; see {{vn}}.
180 | 
181 | Aside from the values described here, the payload of QUIC packets is
182 | version specific and of arbitrary length.
183 | 
184 | 
185 | ## Long Header
186 | 
187 | Long headers take the form described in {{fig-long}}.
188 | 
189 | ~~~
190 | Long Header Packet {
191 |   Header Form (1) = 1,
192 |   Version-Specific Bits (7),
193 |   Version (32),
194 |   Destination Connection ID Length (8),
195 |   Destination Connection ID (0..2040),
196 |   Source Connection ID Length (8),
197 |   Source Connection ID (0..2040),
198 |   Version-Specific Data (..),
199 | }
200 | ~~~
201 | {: #fig-long title="QUIC Long Header"}
202 | 
203 | A QUIC packet with a long header has the high bit of the first byte set to 1.
204 | All other bits in that byte are version specific.
205 | 
206 | The next four bytes include a 32-bit Version field.  Versions are described in
207 | {{version}}.
208 | 
209 | The next byte contains the length in bytes of the Destination Connection ID
210 | field that follows it.  This length is encoded as an 8-bit unsigned integer.
211 | The Destination Connection ID field follows the Destination Connection ID Length
212 | field and is between 0 and 255 bytes in length.  Connection IDs are described in
213 | {{connection-id}}.
214 | 
215 | The next byte contains the length in bytes of the Source Connection ID field
216 | that follows it.  This length is encoded as an 8-bit unsigned integer.  The
217 | Source Connection ID field follows the Source Connection ID Length field and is
218 | between 0 and 255 bytes in length.
219 | 
220 | The remainder of the packet contains version-specific content.
221 | 
222 | 
223 | ## Short Header
224 | 
225 | Short headers take the form described in {{fig-short}}.
226 | 
227 | ~~~~~
228 | Short Header Packet {
229 |   Header Form (1) = 0,
230 |   Version-Specific Bits (7),
231 |   Destination Connection ID (..),
232 |   Version-Specific Data (..),
233 | }
234 | ~~~~~
235 | {: #fig-short title="QUIC Short Header"}
236 | 
237 | A QUIC packet with a short header has the high bit of the first byte set to 0.
238 | 
239 | A QUIC packet with a short header includes a Destination Connection ID
240 | immediately following the first byte.  The short header does not include the
241 | Destination Connection ID Length, Source Connection ID Length, Source Connection
242 | ID, or Version fields.  The length of the Destination Connection ID is not
243 | encoded in packets with a short header and is not constrained by this
244 | specification.
245 | 
246 | The remainder of the packet has version-specific semantics.
247 | 
248 | 
249 | ## Connection ID
250 | 
251 | A connection ID is an opaque field of arbitrary length.
252 | 
253 | The primary function of a connection ID is to ensure that changes in addressing
254 | at lower protocol layers (UDP, IP, and below) do not cause packets for a QUIC
255 | connection to be delivered to the wrong QUIC endpoint.  The connection ID
256 | is used by endpoints and the intermediaries that support them to ensure that
257 | each QUIC packet can be delivered to the correct instance of an endpoint.  At
258 | the endpoint, the connection ID is used to identify the QUIC connection for
259 | which the packet is intended.
260 | 
261 | The connection ID is chosen by each endpoint using version-specific methods.
262 | Packets for the same QUIC connection might use different connection ID values.
263 | 
264 | 
265 | ## Version
266 | 
267 | The Version field contains a 4-byte identifier.  This value can be used by
268 | endpoints to identify a QUIC version.  A Version field with a value of
269 | 0x00000000 is reserved for version negotiation; see {{vn}}.  All other values
270 | are potentially valid.
271 | 
272 | The properties described in this document apply to all versions of QUIC. A
273 | protocol that does not conform to the properties described in this document is
274 | not QUIC.  Future documents might describe additional properties that apply to
275 | a specific QUIC version or to a range of QUIC versions.
276 | 
277 | 
278 | # Version Negotiation {#vn}
279 | 
280 | A QUIC endpoint that receives a packet with a long header and a version it
281 | either does not understand or does not support might send a Version Negotiation
282 | packet in response.  Packets with a short header do not trigger version
283 | negotiation.
284 | 
285 | A Version Negotiation packet sets the high bit of the first byte, and thus it
286 | conforms with the format of a packet with a long header as defined in
287 | {{long-header}}.  A Version Negotiation packet is identifiable as such by the
288 | Version field, which is set to 0x00000000.
289 | 
290 | ~~~
291 | Version Negotiation Packet {
292 |   Header Form (1) = 1,
293 |   Unused (7),
294 |   Version (32) = 0,
295 |   Destination Connection ID Length (8),
296 |   Destination Connection ID (0..2040),
297 |   Source Connection ID Length (8),
298 |   Source Connection ID (0..2040),
299 |   Supported Version (32) ...,
300 | }
301 | ~~~
302 | {: #version-negotiation-format title="Version Negotiation Packet"}
303 | 
304 | Only the most significant bit of the first byte of a Version Negotiation packet
305 | has any defined value.  The remaining 7 bits, labeled "Unused", can be set to
306 | any value when sending and MUST be ignored on receipt.
307 | 
308 | After the Source Connection ID field, the Version Negotiation packet contains a
309 | list of Supported Version fields, each identifying a version that the endpoint
310 | sending the packet supports.  A Version Negotiation packet contains no other
311 | fields.  An endpoint MUST ignore a packet that contains no Supported Version
312 | fields or contains a truncated Supported Version value.
313 | 
314 | Version Negotiation packets do not use integrity or confidentiality protection.
315 | Specific QUIC versions might include protocol elements that allow endpoints to
316 | detect modification or corruption in the set of supported versions.
317 | 
318 | An endpoint MUST include the value from the Source Connection ID field of the
319 | packet it receives in the Destination Connection ID field.  The value for the
320 | Source Connection ID field MUST be copied from the Destination Connection ID
321 | field of the received packet, which is initially randomly selected by a client.
322 | Echoing both connection IDs gives clients some assurance that the server
323 | received the packet and that the Version Negotiation packet was not generated by
324 | an attacker that is unable to observe packets.
325 | 
326 | An endpoint that receives a Version Negotiation packet might change the version
327 | that it decides to use for subsequent packets.  The conditions under which an
328 | endpoint changes its QUIC version will depend on the version of QUIC that it
329 | chooses.
330 | 
331 | See {{QUIC-TRANSPORT}} for a more thorough description of how an endpoint that
332 | supports QUIC version 1 generates and consumes a Version Negotiation packet.
333 | 
334 | 
335 | # Security and Privacy Considerations
336 | 
337 | It is possible that middleboxes could observe traits of a specific version of
338 | QUIC and assume that when other versions of QUIC exhibit similar traits the same
339 | underlying semantic is being expressed.  There are potentially many such traits;
340 | see {{bad-assumptions}}.  Some effort has been made to either eliminate or
341 | obscure some observable traits in QUIC version 1, but many of these remain.
342 | Other QUIC versions might make different design decisions and so exhibit
343 | different traits.
344 | 
345 | The QUIC version number does not appear in all QUIC packets, which means that
346 | reliably extracting information from a flow based on version-specific traits
347 | requires that middleboxes retain state for every connection ID they see.
348 | 
349 | The Version Negotiation packet described in this document is not
350 | integrity protected; it only has modest protection against insertion by
351 | attackers.  An endpoint MUST authenticate the semantic content of a Version
352 | Negotiation packet if it attempts a different QUIC version as a result.
353 | 
354 | 
355 | --- back
356 | 
357 | # Incorrect Assumptions {#bad-assumptions}
358 | 
359 | There are several traits of QUIC version 1 {{QUIC-TRANSPORT}} that are not
360 | protected from observation but are nonetheless considered to be changeable when
361 | a new version is deployed.
362 | 
363 | This section lists a sampling of incorrect assumptions that might be made about
364 | QUIC based on knowledge of QUIC version 1.  Some of these statements are not
365 | even true for QUIC version 1.  This is not an exhaustive list; it is intended to
366 | be illustrative only.
367 | 
368 | **Any and all of the following statements can be false for a given QUIC
369 | version:**
370 | 
371 | * QUIC uses TLS {{QUIC-TLS}} and some TLS messages are visible on the wire.
372 | 
373 | * QUIC long headers are only exchanged during connection establishment.
374 | 
375 | * Every flow on a given 5-tuple will include a connection establishment phase.
376 | 
377 | * The first packets exchanged on a flow use the long header.
378 | 
379 | * The last packet before a long period of quiescence might be assumed
380 |   to contain only an acknowledgment.
381 | 
382 | * QUIC uses an Authenticated Encryption with Associated Data (AEAD) function
383 |   (AEAD_AES_128_GCM; see {{?RFC5116}}) to protect the packets it exchanges
384 |   during connection establishment.
385 | 
386 | * QUIC packet numbers are encrypted and appear as the first encrypted bytes.
387 | 
388 | * QUIC packet numbers increase by one for every packet sent.
389 | 
390 | * QUIC has a minimum size for the first handshake packet sent by a client.
391 | 
392 | * QUIC stipulates that a client speak first.
393 | 
394 | * QUIC packets always have the second bit of the first byte (0x40) set.
395 | 
396 | * A QUIC Version Negotiation packet is only sent by a server.
397 | 
398 | * A QUIC connection ID changes infrequently.
399 | 
400 | * QUIC endpoints change the version they speak if they are sent a Version
401 |   Negotiation packet.
402 | 
403 | * The Version field in a QUIC long header is the same in both directions.
404 | 
405 | * A QUIC packet with a particular value in the Version field means that the
406 |   corresponding version of QUIC is in use.
407 | 
408 | * Only one connection at a time is established between any pair of QUIC
409 |   endpoints.
410 | 


--------------------------------------------------------------------------------
/rfc9002.md:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: QUIC Loss Detection and Congestion Control
   3 | abbrev: QUIC Loss Detection
   4 | number: 9002
   5 | docName: draft-ietf-quic-recovery-34
   6 | date: 2021-05
   7 | category: std
   8 | consensus: true
   9 | ipr: trust200902
  10 | area: Transport
  11 | workgroup: QUIC
  12 | keyword:
  13 |   - bbr
  14 |   - delay-sensitive congestion control
  15 |   - fec
  16 |   - loss-tolerant congestion control
  17 |   - next generation
  18 | 
  19 | stand_alone: yes
  20 | pi: [toc, sortrefs, symrefs, docmapping]
  21 | 
  22 | author:
  23 |  -
  24 |     ins: J. Iyengar
  25 |     name: Jana Iyengar
  26 |     org: Fastly
  27 |     email: jri.ietf@gmail.com
  28 |     role: editor
  29 |  -
  30 |     ins: I. Swett
  31 |     name: Ian Swett
  32 |     org: Google
  33 |     email: ianswett@google.com
  34 |     role: editor
  35 | 
  36 | normative:
  37 | 
  38 |   QUIC-TRANSPORT:
  39 |     title: "QUIC: A UDP-Based Multiplexed and Secure Transport"
  40 |     date: 2021-05
  41 |     seriesinfo:
  42 |       RFC: 9000
  43 |       DOI: 10.17487/RFC9000
  44 |     author:
  45 |       -
  46 |         ins: J. Iyengar
  47 |         name: Jana Iyengar
  48 |         org: Fastly
  49 |         role: editor
  50 |       -
  51 |         ins: M. Thomson
  52 |         name: Martin Thomson
  53 |         org: Mozilla
  54 |         role: editor
  55 | 
  56 |   QUIC-TLS:
  57 |     title: "Using TLS to Secure QUIC"
  58 |     date: 2021-05
  59 |     seriesinfo:
  60 |       RFC: 9001
  61 |       DOI: 10.17487/RFC9001
  62 |     author:
  63 |       -
  64 |         ins: M. Thomson
  65 |         name: Martin Thomson
  66 |         org: Mozilla
  67 |         role: editor
  68 |       -
  69 |         ins: S. Turner
  70 |         name: Sean Turner
  71 |         org: sn3rd
  72 |         role: editor
  73 | 
  74 |   RFC8085:
  75 | 
  76 | informative:
  77 | 
  78 |   FACK:
  79 |     title: "Forward acknowledgement: Refining TCP Congestion Control"
  80 |     author:
  81 |       -
  82 |         initials: M.
  83 |         surname: Mathis
  84 |       -
  85 |         initials: J.
  86 |         surname: Mahdavi
  87 |     date: 1996-08
  88 |     refcontent: ACM SIGCOMM Computer Communication Review
  89 |     seriesinfo:
  90 |       DOI: 10.1145/248157.248181
  91 | 
  92 |   RETRANSMISSION:
  93 |     title: "Improving Round-Trip Time Estimates in Reliable Transport Protocols"
  94 |     author:
  95 |       -
  96 |         initials: P.
  97 |         surname: Karn
  98 |       -
  99 |         initials: C.
 100 |         surname: Partridge
 101 |     date: 1991-11
 102 |     refcontent: ACM Transactions on Computer Systems
 103 |     seriesinfo:
 104 |        DOI: 10.1145/118544.118549
 105 | 
 106 |   RFC3465:
 107 | 
 108 | --- abstract
 109 | 
 110 | This document describes loss detection and congestion control mechanisms for
 111 | QUIC.
 112 | 
 113 | 
 114 | --- middle
 115 | 
 116 | # Introduction
 117 | 
 118 | 
 119 | QUIC is a secure, general-purpose transport protocol, described in
 120 | {{QUIC-TRANSPORT}}. This document describes loss detection and congestion
 121 | control mechanisms for QUIC.
 122 | 
 123 | # Conventions and Definitions
 124 | 
 125 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 126 | "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" in this
 127 | document are to be interpreted as described in BCP 14 {{!RFC2119}} {{!RFC8174}}
 128 | when, and only when, they appear in all capitals, as shown here.
 129 | 
 130 | Definitions of terms that are used in this document:
 131 | 
 132 | Ack-eliciting frames:
 133 | 
 134 | : All frames other than ACK, PADDING, and CONNECTION_CLOSE are considered
 135 |   ack-eliciting.
 136 | 
 137 | Ack-eliciting packets:
 138 | 
 139 | : Packets that contain ack-eliciting frames elicit an ACK from the receiver
 140 |   within the maximum acknowledgment delay and are called ack-eliciting packets.
 141 | 
 142 | In-flight packets:
 143 | 
 144 | : Packets are considered in flight when they are ack-eliciting or contain a
 145 |   PADDING frame, and they have been sent but are not acknowledged, declared
 146 |   lost, or discarded along with old keys.
 147 | 
 148 | # Design of the QUIC Transmission Machinery
 149 | 
 150 | All transmissions in QUIC are sent with a packet-level header, which indicates
 151 | the encryption level and includes a packet sequence number (referred to below as
 152 | a packet number).  The encryption level indicates the packet number space, as
 153 | described in {{Section 12.3 of QUIC-TRANSPORT}}.  Packet numbers never repeat
 154 | within a packet number space for the lifetime of a connection.  Packet numbers
 155 | are sent in monotonically increasing order within a space, preventing ambiguity.
 156 | It is permitted for some packet numbers to never be used, leaving intentional
 157 | gaps.
 158 | 
 159 | This design obviates the need for disambiguating between transmissions and
 160 | retransmissions; this eliminates significant complexity from QUIC's
 161 | interpretation of TCP loss detection mechanisms.
 162 | 
 163 | QUIC packets can contain multiple frames of different types. The recovery
 164 | mechanisms ensure that data and frames that need reliable delivery are
 165 | acknowledged or declared lost and sent in new packets as necessary. The types
 166 | of frames contained in a packet affect recovery and congestion control logic:
 167 | 
 168 | * All packets are acknowledged, though packets that contain no
 169 |   ack-eliciting frames are only acknowledged along with ack-eliciting
 170 |   packets.
 171 | 
 172 | * Long header packets that contain CRYPTO frames are critical to the
 173 |   performance of the QUIC handshake and use shorter timers for
 174 |   acknowledgment.
 175 | 
 176 | * Packets containing frames besides ACK or CONNECTION_CLOSE frames count toward
 177 |   congestion control limits and are considered to be in flight.
 178 | 
 179 | * PADDING frames cause packets to contribute toward bytes in flight without
 180 |   directly causing an acknowledgment to be sent.
 181 | 
 182 | # Relevant Differences Between QUIC and TCP
 183 | 
 184 | Readers familiar with TCP's loss detection and congestion control will find
 185 | algorithms here that parallel well-known TCP ones. However, protocol differences
 186 | between QUIC and TCP contribute to algorithmic differences. These protocol
 187 | differences are briefly described below.
 188 | 
 189 | ## Separate Packet Number Spaces
 190 | 
 191 | QUIC uses separate packet number spaces for each encryption level,
 192 | except 0-RTT and all generations of 1-RTT keys use the same packet
 193 | number space.  Separate packet number spaces ensures that the
 194 | acknowledgment of packets sent with one level of encryption will not
 195 | cause spurious retransmission of packets sent with a different
 196 | encryption level.  Congestion control and round-trip time (RTT)
 197 | measurement are unified across packet number spaces.
 198 | 
 199 | ## Monotonically Increasing Packet Numbers
 200 | 
 201 | TCP conflates transmission order at the sender with delivery order at the
 202 | receiver, resulting in the retransmission ambiguity problem
 203 | {{RETRANSMISSION}}.  QUIC separates transmission order from delivery order:
 204 | packet numbers indicate transmission order, and delivery order is determined by
 205 | the stream offsets in STREAM frames.
 206 | 
 207 | QUIC's packet number is strictly increasing within a packet number space
 208 | and directly encodes transmission order.  A higher packet number signifies
 209 | that the packet was sent later, and a lower packet number signifies that
 210 | the packet was sent earlier.  When a packet containing ack-eliciting
 211 | frames is detected lost, QUIC includes necessary frames in a new packet
 212 | with a new packet number, removing ambiguity about which packet is
 213 | acknowledged when an ACK is received.  Consequently, more accurate RTT
 214 | measurements can be made, spurious retransmissions are trivially detected, and
 215 | mechanisms such as Fast Retransmit can be applied universally, based only on
 216 | packet number.
 217 | 
 218 | This design point significantly simplifies loss detection mechanisms for QUIC.
 219 | Most TCP mechanisms implicitly attempt to infer transmission ordering based on
 220 | TCP sequence numbers -- a nontrivial task, especially when TCP timestamps are
 221 | not available.
 222 | 
 223 | ## Clearer Loss Epoch
 224 | 
 225 | QUIC starts a loss epoch when a packet is lost. The loss epoch ends when any
 226 | packet sent after the start of the epoch is acknowledged.  TCP waits for the gap
 227 | in the sequence number space to be filled, and so if a segment is lost multiple
 228 | times in a row, the loss epoch may not end for several round trips. Because both
 229 | should reduce their congestion windows only once per epoch, QUIC will do it once
 230 | for every round trip that experiences loss, while TCP may only do it once across
 231 | multiple round trips.
 232 | 
 233 | ## No Reneging
 234 | 
 235 | QUIC ACK frames contain information similar to that in TCP Selective
 236 | Acknowledgments (SACKs) {{?RFC2018}}. However, QUIC does not allow a packet
 237 | acknowledgment to be reneged, greatly simplifying implementations on both sides
 238 | and reducing memory pressure on the sender.
 239 | 
 240 | ## More ACK Ranges
 241 | 
 242 | QUIC supports many ACK ranges, as opposed to TCP's three SACK ranges.  In
 243 | high-loss environments, this speeds recovery, reduces spurious retransmits, and
 244 | ensures forward progress without relying on timeouts.
 245 | 
 246 | ## Explicit Correction For Delayed Acknowledgments
 247 | 
 248 | QUIC endpoints measure the delay incurred between when a packet is received and
 249 | when the corresponding acknowledgment is sent, allowing a peer to maintain a
 250 | more accurate RTT estimate; see {{Section 13.2 of QUIC-TRANSPORT}}.
 251 | 
 252 | ## Probe Timeout Replaces RTO and TLP
 253 | 
 254 | QUIC uses a probe timeout (PTO; see {{pto}}), with a timer based on TCP's
 255 | retransmission timeout (RTO) computation; see {{?RFC6298}}.  QUIC's PTO includes
 256 | the peer's maximum expected acknowledgment delay instead of using a fixed
 257 | minimum timeout.
 258 | 
 259 | Similar to the RACK-TLP loss detection algorithm for TCP {{?RFC8985}}, QUIC does
 260 | not collapse the congestion window when the PTO expires, since a single packet
 261 | loss at the tail does not indicate persistent congestion.  Instead, QUIC
 262 | collapses the congestion window when persistent congestion is declared; see
 263 | {{persistent-congestion}}. In doing this, QUIC avoids unnecessary congestion
 264 | window reductions, obviating the need for correcting mechanisms such as Forward
 265 | RTO-Recovery (F-RTO) {{?RFC5682}}. Since QUIC does not collapse the congestion
 266 | window on a PTO expiration, a QUIC sender is not limited from sending more
 267 | in-flight packets after a PTO expiration if it still has available congestion
 268 | window. This occurs when a sender is application limited and the PTO timer
 269 | expires. This is more aggressive than TCP's RTO mechanism when application
 270 | limited, but identical when not application limited.
 271 | 
 272 | QUIC allows probe packets to temporarily exceed the congestion window whenever
 273 | the timer expires.
 274 | 
 275 | ## The Minimum Congestion Window Is Two Packets
 276 | 
 277 | TCP uses a minimum congestion window of one packet. However, loss of that single
 278 | packet means that the sender needs to wait for a PTO to recover ({{pto}}), which
 279 | can be much longer than an RTT.  Sending a single ack-eliciting packet also
 280 | increases the chances of incurring additional latency when a receiver delays its
 281 | acknowledgment.
 282 | 
 283 | QUIC therefore recommends that the minimum congestion window be two
 284 | packets. While this increases network load, it is considered safe since the
 285 | sender will still reduce its sending rate exponentially under persistent
 286 | congestion ({{pto}}).
 287 | 
 288 | ## Handshake Packets Are Not Special
 289 | 
 290 | TCP treats the loss of SYN or SYN-ACK packet as persistent congestion and
 291 | reduces the congestion window to one packet; see {{?RFC5681}}. QUIC treats loss
 292 | of a packet containing handshake data the same as other losses.
 293 | 
 294 | # Estimating the Round-Trip Time {#compute-rtt}
 295 | 
 296 | At a high level, an endpoint measures the time from when a packet was sent to
 297 | when it is acknowledged as an RTT sample. The endpoint uses RTT samples and
 298 | peer-reported host delays (see {{Section 13.2 of QUIC-TRANSPORT}}) to generate a
 299 | statistical description of the network path's RTT. An endpoint computes the
 300 | following three values for each path: the minimum value over a period of time
 301 | (min_rtt), an exponentially weighted moving average (smoothed_rtt), and the mean
 302 | deviation (referred to as "variation" in the rest of this document) in the
 303 | observed RTT samples (rttvar).
 304 | 
 305 | ## Generating RTT Samples {#latest-rtt}
 306 | 
 307 | An endpoint generates an RTT sample on receiving an ACK frame that meets the
 308 | following two conditions:
 309 | 
 310 | - the largest acknowledged packet number is newly acknowledged, and
 311 | 
 312 | - at least one of the newly acknowledged packets was ack-eliciting.
 313 | 
 314 | The RTT sample, latest_rtt, is generated as the time elapsed since the largest
 315 | acknowledged packet was sent:
 316 | 
 317 | ~~~pseudocode
 318 | latest_rtt = ack_time - send_time_of_largest_acked
 319 | ~~~
 320 | 
 321 | An RTT sample is generated using only the largest acknowledged packet in the
 322 | received ACK frame.  This is because a peer reports acknowledgment delays for
 323 | only the largest acknowledged packet in an ACK frame.  While the reported
 324 | acknowledgment delay is not used by the RTT sample measurement, it is used to
 325 | adjust the RTT sample in subsequent computations of smoothed_rtt and rttvar
 326 | ({{smoothed-rtt}}).
 327 | 
 328 | To avoid generating multiple RTT samples for a single packet, an ACK frame
 329 | SHOULD NOT be used to update RTT estimates if it does not newly acknowledge the
 330 | largest acknowledged packet.
 331 | 
 332 | An RTT sample MUST NOT be generated on receiving an ACK frame that does not
 333 | newly acknowledge at least one ack-eliciting packet. A peer usually does not
 334 | send an ACK frame when only non-ack-eliciting packets are received. Therefore,
 335 | an ACK frame that contains acknowledgments for only non-ack-eliciting packets
 336 | could include an arbitrarily large ACK Delay value.  Ignoring
 337 | such ACK frames avoids complications in subsequent smoothed_rtt and rttvar
 338 | computations.
 339 | 
 340 | A sender might generate multiple RTT samples per RTT when multiple ACK frames
 341 | are received within an RTT.  As suggested in {{?RFC6298}}, doing so might result
 342 | in inadequate history in smoothed_rtt and rttvar.  Ensuring that RTT estimates
 343 | retain sufficient history is an open research question.
 344 | 
 345 | ## Estimating min_rtt {#min-rtt}
 346 | 
 347 | min_rtt is the sender's estimate of the minimum RTT observed for a given network
 348 | path over a period of time. In this document, min_rtt is used by loss detection
 349 | to reject implausibly small RTT samples.
 350 | 
 351 | min_rtt MUST be set to the latest_rtt on the first RTT sample. min_rtt MUST be
 352 | set to the lesser of min_rtt and latest_rtt ({{latest-rtt}}) on all other
 353 | samples.
 354 | 
 355 | An endpoint uses only locally observed times in computing the min_rtt and does
 356 | not adjust for acknowledgment delays reported by the peer. Doing so allows the
 357 | endpoint to set a lower bound for the smoothed_rtt based entirely on what it
 358 | observes (see {{smoothed-rtt}}) and limits potential underestimation due to
 359 | erroneously reported delays by the peer.
 360 | 
 361 | The RTT for a network path may change over time. If a path's actual RTT
 362 | decreases, the min_rtt will adapt immediately on the first low sample.  If the
 363 | path's actual RTT increases, however, the min_rtt will not adapt to it, allowing
 364 | future RTT samples that are smaller than the new RTT to be included in
 365 | smoothed_rtt.
 366 | 
 367 | Endpoints SHOULD set the min_rtt to the newest RTT sample after persistent
 368 | congestion is established. This avoids repeatedly declaring persistent
 369 | congestion when the RTT increases. This also allows a connection to reset
 370 | its estimate of min_rtt and smoothed_rtt after a disruptive network event;
 371 | see {{smoothed-rtt}}.
 372 | 
 373 | Endpoints MAY reestablish the min_rtt at other times in the connection, such as
 374 | when traffic volume is low and an acknowledgment is received with a low
 375 | acknowledgment delay. Implementations SHOULD NOT refresh the min_rtt
 376 | value too often since the actual minimum RTT of the path is not
 377 | frequently observable.
 378 | 
 379 | 
 380 | ## Estimating smoothed_rtt and rttvar {#smoothed-rtt}
 381 | 
 382 | smoothed_rtt is an exponentially weighted moving average of an endpoint's RTT
 383 | samples, and rttvar estimates the variation in the RTT samples using a mean
 384 | variation.
 385 | 
 386 | The calculation of smoothed_rtt uses RTT samples after adjusting them for
 387 | acknowledgment delays. These delays are decoded from the ACK Delay field of
 388 | ACK frames as described in {{Section 19.3 of QUIC-TRANSPORT}}.
 389 | 
 390 | The peer might report acknowledgment delays that are larger than the peer's
 391 | max_ack_delay during the handshake ({{Section 13.2.1 of QUIC-TRANSPORT}}). To
 392 | account for this, the endpoint SHOULD ignore max_ack_delay until the handshake
 393 | is confirmed, as defined in {{Section 4.1.2 of QUIC-TLS}}. When they occur,
 394 | these large acknowledgment delays are likely to be non-repeating and limited to
 395 | the handshake. The endpoint can therefore use them without limiting them to the
 396 | max_ack_delay, avoiding unnecessary inflation of the RTT estimate.
 397 | 
 398 | Note that a large acknowledgment delay can result in a substantially inflated
 399 | smoothed_rtt if there is an error either in the peer's reporting of the
 400 | acknowledgment delay or in the endpoint's min_rtt estimate.  Therefore, prior
 401 | to handshake confirmation, an endpoint MAY ignore RTT samples if adjusting
 402 | the RTT sample for acknowledgment delay causes the sample to be less than the
 403 | min_rtt.
 404 | 
 405 | After the handshake is confirmed, any acknowledgment delays reported by the
 406 | peer that are greater than the peer's max_ack_delay are attributed to
 407 | unintentional but potentially repeating delays, such as scheduler latency at the
 408 | peer or loss of previous acknowledgments.  Excess delays could also be due to
 409 | a noncompliant receiver.  Therefore, these extra delays are considered
 410 | effectively part of path delay and incorporated into the RTT estimate.
 411 | 
 412 | Therefore, when adjusting an RTT sample using peer-reported acknowledgment
 413 | delays, an endpoint:
 414 | 
 415 | - MAY ignore the acknowledgment delay for Initial packets, since these
 416 |   acknowledgments are not delayed by the peer ({{Section 13.2.1 of
 417 |   QUIC-TRANSPORT}});
 418 | 
 419 | - SHOULD ignore the peer's max_ack_delay until the handshake is confirmed;
 420 | 
 421 | - MUST use the lesser of the acknowledgment delay and the peer's max_ack_delay
 422 |   after the handshake is confirmed; and
 423 | 
 424 | - MUST NOT subtract the acknowledgment delay from the RTT sample if the
 425 |   resulting value is smaller than the min_rtt.  This limits the underestimation
 426 |   of the smoothed_rtt due to a misreporting peer.
 427 | 
 428 | Additionally, an endpoint might postpone the processing of acknowledgments when
 429 | the corresponding decryption keys are not immediately available. For example, a
 430 | client might receive an acknowledgment for a 0-RTT packet that it cannot
 431 | decrypt because 1-RTT packet protection keys are not yet available to it. In
 432 | such cases, an endpoint SHOULD subtract such local delays from its RTT sample
 433 | until the handshake is confirmed.
 434 | 
 435 | Similar to {{?RFC6298}}, smoothed_rtt and rttvar are computed as follows.
 436 | 
 437 | An endpoint initializes the RTT estimator during connection establishment and
 438 | when the estimator is reset during connection migration; see {{Section 9.4 of
 439 | QUIC-TRANSPORT}}. Before any RTT samples are available for a new path or when
 440 | the estimator is reset, the estimator is initialized using the initial RTT; see
 441 | {{pto-handshake}}.
 442 | 
 443 | smoothed_rtt and rttvar are initialized as follows, where kInitialRtt contains
 444 | the initial RTT value:
 445 | 
 446 | ~~~pseudocode
 447 | smoothed_rtt = kInitialRtt
 448 | rttvar = kInitialRtt / 2
 449 | ~~~
 450 | 
 451 | RTT samples for the network path are recorded in latest_rtt; see
 452 | {{latest-rtt}}. On the first RTT sample after initialization, the estimator is
 453 | reset using that sample. This ensures that the estimator retains no history of
 454 | past samples.  Packets sent on other paths do not contribute RTT samples to the
 455 | current path, as described in {{Section 9.4 of QUIC-TRANSPORT}}.
 456 | 
 457 | On the first RTT sample after initialization, smoothed_rtt and rttvar are set as
 458 | follows:
 459 | 
 460 | ~~~pseudocode
 461 | smoothed_rtt = latest_rtt
 462 | rttvar = latest_rtt / 2
 463 | ~~~
 464 | 
 465 | On subsequent RTT samples, smoothed_rtt and rttvar evolve as follows:
 466 | 
 467 | ~~~pseudocode
 468 | ack_delay = decoded acknowledgment delay from ACK frame
 469 | if (handshake confirmed):
 470 |   ack_delay = min(ack_delay, max_ack_delay)
 471 | adjusted_rtt = latest_rtt
 472 | if (latest_rtt >= min_rtt + ack_delay):
 473 |   adjusted_rtt = latest_rtt - ack_delay
 474 | smoothed_rtt = 7/8 * smoothed_rtt + 1/8 * adjusted_rtt
 475 | rttvar_sample = abs(smoothed_rtt - adjusted_rtt)
 476 | rttvar = 3/4 * rttvar + 1/4 * rttvar_sample
 477 | ~~~
 478 | 
 479 | # Loss Detection {#loss-detection}
 480 | 
 481 | QUIC senders use acknowledgments to detect lost packets and a PTO to ensure
 482 | acknowledgments are received; see {{pto}}. This section provides a description
 483 | of these algorithms.
 484 | 
 485 | If a packet is lost, the QUIC transport needs to recover from that loss, such
 486 | as by retransmitting the data, sending an updated frame, or discarding the
 487 | frame.  For more information, see {{Section 13.3 of QUIC-TRANSPORT}}.
 488 | 
 489 | Loss detection is separate per packet number space, unlike RTT measurement and
 490 | congestion control, because RTT and congestion control are properties of the
 491 | path, whereas loss detection also relies upon key availability.
 492 | 
 493 | ## Acknowledgment-Based Detection {#ack-loss-detection}
 494 | 
 495 | Acknowledgment-based loss detection implements the spirit of TCP's Fast
 496 | Retransmit {{?RFC5681}}, Early Retransmit {{?RFC5827}}, Forward Acknowledgment
 497 | {{FACK}}, SACK loss recovery {{?RFC6675}}, and RACK-TLP {{?RFC8985}}. This
 498 | section provides an overview of how these algorithms are implemented in QUIC.
 499 | 
 500 | A packet is declared lost if it meets all of the following conditions:
 501 | 
 502 | * The packet is unacknowledged, in flight, and was sent prior to an
 503 |   acknowledged packet.
 504 | 
 505 | * The packet was sent kPacketThreshold packets before an acknowledged packet
 506 |   ({{packet-threshold}}), or it was sent long enough in the past
 507 |   ({{time-threshold}}).
 508 | 
 509 | The acknowledgment indicates that a packet sent later was delivered, and the
 510 | packet and time thresholds provide some tolerance for packet reordering.
 511 | 
 512 | Spuriously declaring packets as lost leads to unnecessary retransmissions and
 513 | may result in degraded performance due to the actions of the congestion
 514 | controller upon detecting loss.  Implementations can detect spurious
 515 | retransmissions and increase the packet or time reordering threshold to
 516 | reduce future spurious retransmissions and loss events. Implementations with
 517 | adaptive time thresholds MAY choose to start with smaller initial reordering
 518 | thresholds to minimize recovery latency.
 519 | 
 520 | ### Packet Threshold {#packet-threshold}
 521 | 
 522 | The RECOMMENDED initial value for the packet reordering threshold
 523 | (kPacketThreshold) is 3, based on best practices for TCP loss detection
 524 | {{?RFC5681}} {{?RFC6675}}.  In order to remain similar to TCP,
 525 | implementations SHOULD NOT use a packet threshold less than 3; see {{?RFC5681}}.
 526 | 
 527 | Some networks may exhibit higher degrees of packet reordering, causing a sender
 528 | to detect spurious losses. Additionally, packet reordering could be more common
 529 | with QUIC than TCP because network elements that could observe and reorder TCP
 530 | packets cannot do that for QUIC and also because QUIC packet numbers are
 531 | encrypted.  Algorithms that increase the reordering threshold after spuriously
 532 | detecting losses, such as RACK {{?RFC8985}}, have proven to be useful in TCP and
 533 | are expected to be at least as useful in QUIC.
 534 | 
 535 | ### Time Threshold {#time-threshold}
 536 | 
 537 | Once a later packet within the same packet number space has been acknowledged,
 538 | an endpoint SHOULD declare an earlier packet lost if it was sent a threshold
 539 | amount of time in the past. To avoid declaring packets as lost too early, this
 540 | time threshold MUST be set to at least the local timer granularity, as
 541 | indicated by the kGranularity constant.  The time threshold is:
 542 | 
 543 | ~~~pseudocode
 544 | max(kTimeThreshold * max(smoothed_rtt, latest_rtt), kGranularity)
 545 | ~~~
 546 | 
 547 | If packets sent prior to the largest acknowledged packet cannot yet be declared
 548 | lost, then a timer SHOULD be set for the remaining time.
 549 | 
 550 | Using max(smoothed_rtt, latest_rtt) protects from the two following cases:
 551 | 
 552 | * the latest RTT sample is lower than the smoothed RTT, perhaps due to
 553 |   reordering where the acknowledgment encountered a shorter path;
 554 | 
 555 | * the latest RTT sample is higher than the smoothed RTT, perhaps due to a
 556 |   sustained increase in the actual RTT, but the smoothed RTT has not yet caught
 557 |   up.
 558 | 
 559 | The RECOMMENDED time threshold (kTimeThreshold), expressed as an RTT multiplier,
 560 | is 9/8. The RECOMMENDED value of the timer granularity (kGranularity) is 1
 561 | millisecond.
 562 | 
 563 | <aside markdown="block">
 564 | Note: TCP's RACK {{?RFC8985}} specifies a slightly larger threshold, equivalent
 565 | to 5/4, for a similar purpose. Experience with QUIC shows that 9/8 works well.
 566 | </aside>
 567 | 
 568 | Implementations MAY experiment with absolute thresholds, thresholds from
 569 | previous connections, adaptive thresholds, or the including of RTT variation.
 570 | Smaller thresholds reduce reordering resilience and increase spurious
 571 | retransmissions, and larger thresholds increase loss detection delay.
 572 | 
 573 | 
 574 | ## Probe Timeout {#pto}
 575 | 
 576 | A Probe Timeout (PTO) triggers the sending of one or two probe datagrams when
 577 | ack-eliciting packets are not acknowledged within the expected period of
 578 | time or the server may not have validated the client's address.  A PTO enables
 579 | a connection to recover from loss of tail packets or acknowledgments.
 580 | 
 581 | As with loss detection, the PTO is per packet number space. That is, a
 582 | PTO value is computed per packet number space.
 583 | 
 584 | A PTO timer expiration event does not indicate packet loss and MUST NOT cause
 585 | prior unacknowledged packets to be marked as lost. When an acknowledgment is
 586 | received that newly acknowledges packets, loss detection proceeds as dictated
 587 | by the packet and time threshold mechanisms; see {{ack-loss-detection}}.
 588 | 
 589 | The PTO algorithm used in QUIC implements the reliability functions of Tail Loss
 590 | Probe {{?RFC8985}}, RTO {{?RFC5681}}, and F-RTO algorithms for TCP
 591 | {{?RFC5682}}. The timeout computation is based on TCP's RTO period {{?RFC6298}}.
 592 | 
 593 | ### Computing PTO
 594 | 
 595 | When an ack-eliciting packet is transmitted, the sender schedules a timer for
 596 | the PTO period as follows:
 597 | 
 598 | ~~~pseudocode
 599 | PTO = smoothed_rtt + max(4*rttvar, kGranularity) + max_ack_delay
 600 | ~~~
 601 | 
 602 | The PTO period is the amount of time that a sender ought to wait for an
 603 | acknowledgment of a sent packet.  This time period includes the estimated
 604 | network RTT (smoothed_rtt), the variation in the estimate (4*rttvar),
 605 | and max_ack_delay, to account for the maximum time by which a receiver might
 606 | delay sending an acknowledgment.
 607 | 
 608 | When the PTO is armed for Initial or Handshake packet number spaces, the
 609 | max_ack_delay in the PTO period computation is set to 0, since the peer is
 610 | expected to not delay these packets intentionally; see {{Section 13.2.1 of
 611 | QUIC-TRANSPORT}}.
 612 | 
 613 | The PTO period MUST be at least kGranularity to avoid the timer expiring
 614 | immediately.
 615 | 
 616 | When ack-eliciting packets in multiple packet number spaces are in flight, the
 617 | timer MUST be set to the earlier value of the Initial and Handshake packet
 618 | number spaces.
 619 | 
 620 | An endpoint MUST NOT set its PTO timer for the Application Data packet number
 621 | space until the handshake is confirmed. Doing so prevents the endpoint from
 622 | retransmitting information in packets when either the peer does not yet have the
 623 | keys to process them or the endpoint does not yet have the keys to process their
 624 | acknowledgments. For example, this can happen when a client sends 0-RTT packets
 625 | to the server; it does so without knowing whether the server will be able to
 626 | decrypt them. Similarly, this can happen when a server sends 1-RTT packets
 627 | before confirming that the client has verified the server's certificate and can
 628 | therefore read these 1-RTT packets.
 629 | 
 630 | A sender SHOULD restart its PTO timer every time an ack-eliciting packet is
 631 | sent or acknowledged, or when Initial or Handshake keys are discarded
 632 | ({{Section 4.9 of QUIC-TLS}}). This ensures the PTO is always set based on the
 633 | latest estimate of the RTT and for the correct packet across packet
 634 | number spaces.
 635 | 
 636 | When a PTO timer expires, the PTO backoff MUST be increased, resulting in the
 637 | PTO period being set to twice its current value. The PTO backoff factor is reset
 638 | when an acknowledgment is received, except in the following case. A server
 639 | might take longer to respond to packets during the handshake than otherwise.  To
 640 | protect such a server from repeated client probes, the PTO backoff is not reset
 641 | at a client that is not yet certain that the server has finished validating the
 642 | client's address. That is, a client does not reset the PTO backoff factor on
 643 | receiving acknowledgments in Initial packets.
 644 | 
 645 | This exponential reduction in the sender's rate is important because consecutive
 646 | PTOs might be caused by loss of packets or acknowledgments due to severe
 647 | congestion.  Even when there are ack-eliciting packets in flight in multiple
 648 | packet number spaces, the exponential increase in PTO occurs across all spaces
 649 | to prevent excess load on the network.  For example, a timeout in the Initial
 650 | packet number space doubles the length of the timeout in the Handshake packet
 651 | number space.
 652 | 
 653 | The total length of time over which consecutive PTOs expire is limited by the
 654 | idle timeout.
 655 | 
 656 | The PTO timer MUST NOT be set if a timer is set for time threshold
 657 | loss detection; see {{time-threshold}}.  A timer that is set for time
 658 | threshold loss detection will expire earlier than the PTO timer
 659 | in most cases and is less likely to spuriously retransmit data.
 660 | 
 661 | ### Handshakes and New Paths {#pto-handshake}
 662 | 
 663 | Resumed connections over the same network MAY use the previous connection's
 664 | final smoothed RTT value as the resumed connection's initial RTT.  When no
 665 | previous RTT is available, the initial RTT SHOULD be set to 333 milliseconds.
 666 | This results in handshakes starting with a PTO of 1 second, as recommended for
 667 | TCP's initial RTO; see {{Section 2 of RFC6298}}.
 668 | 
 669 | A connection MAY use the delay between sending a PATH_CHALLENGE and receiving a
 670 | PATH_RESPONSE to set the initial RTT (see kInitialRtt in
 671 | {{constants-of-interest}}) for a new path, but the delay SHOULD NOT be
 672 | considered an RTT sample.
 673 | 
 674 | When the Initial keys and Handshake keys are discarded (see
 675 | {{discarding-packets}}), any Initial packets and Handshake packets can
 676 | no longer be acknowledged, so they are removed from bytes in
 677 | flight. When Initial or Handshake keys are discarded, the PTO and loss
 678 | detection timers MUST be reset, because discarding keys indicates
 679 | forward progress and the loss detection timer might have been set for
 680 | a now-discarded packet number space.
 681 | 
 682 | #### Before Address Validation
 683 | 
 684 | Until the server has validated the client's address on the path, the amount of
 685 | data it can send is limited to three times the amount of data received,
 686 | as specified in {{Section 8.1 of QUIC-TRANSPORT}}. If no additional data can be
 687 | sent, the server's PTO timer MUST NOT be armed until datagrams have been
 688 | received from the client because packets sent on PTO count against the
 689 | anti-amplification limit.
 690 | 
 691 | When the server receives a datagram from the client, the amplification limit is
 692 | increased and the server resets the PTO timer.  If the PTO timer is then set to
 693 | a time in the past, it is executed immediately. Doing so avoids sending new
 694 | 1-RTT packets prior to packets critical to the completion of the handshake.
 695 | In particular, this can happen when 0-RTT is accepted but the server fails to
 696 | validate the client's address.
 697 | 
 698 | Since the server could be blocked until more datagrams are received from the
 699 | client, it is the client's responsibility to send packets to unblock the server
 700 | until it is certain that the server has finished its address validation (see
 701 | {{Section 8 of QUIC-TRANSPORT}}). That is, the client MUST set the PTO timer
 702 | if the client has not received an acknowledgment for any of its Handshake
 703 | packets and the handshake is not confirmed (see {{Section 4.1.2 of QUIC-TLS}}),
 704 | even if there are no packets in flight. When the PTO fires, the client MUST
 705 | send a Handshake packet if it has Handshake keys, otherwise it MUST send an
 706 | Initial packet in a UDP datagram with a payload of at least 1200 bytes.
 707 | 
 708 | ### Speeding up Handshake Completion
 709 | 
 710 | When a server receives an Initial packet containing duplicate CRYPTO data,
 711 | it can assume the client did not receive all of the server's CRYPTO data sent
 712 | in Initial packets, or the client's estimated RTT is too small. When a
 713 | client receives Handshake or 1-RTT packets prior to obtaining Handshake keys,
 714 | it may assume some or all of the server's Initial packets were lost.
 715 | 
 716 | To speed up handshake completion under these conditions, an endpoint MAY, for a
 717 | limited number of times per connection, send a packet containing
 718 | unacknowledged CRYPTO data earlier than the PTO expiry, subject to the address
 719 | validation limits in {{Section 8.1 of QUIC-TRANSPORT}}. Doing so at most once
 720 | for each connection is adequate to quickly recover from a single packet loss.
 721 | An endpoint that always retransmits packets in response to receiving packets
 722 | that it cannot process risks creating an infinite exchange of packets.
 723 | 
 724 | Endpoints can also use coalesced packets (see {{Section 12.2 of
 725 | QUIC-TRANSPORT}}) to ensure that each datagram elicits at least one
 726 | acknowledgment. For example, a client can coalesce an Initial packet containing
 727 | PING and PADDING frames with a 0-RTT data packet, and a server can coalesce an
 728 | Initial packet containing a PING frame with one or more packets in its first
 729 | flight.
 730 | 
 731 | ### Sending Probe Packets
 732 | 
 733 | When a PTO timer expires, a sender MUST send at least one ack-eliciting packet
 734 | in the packet number space as a probe.  An endpoint MAY send up to two
 735 | full-sized datagrams containing ack-eliciting packets to avoid an expensive
 736 | consecutive PTO expiration due to a single lost datagram or to transmit data
 737 | from multiple packet number spaces. All probe packets sent on a PTO MUST be
 738 | ack-eliciting.
 739 | 
 740 | In addition to sending data in the packet number space for which the timer
 741 | expired, the sender SHOULD send ack-eliciting packets from other packet number
 742 | spaces with in-flight data, coalescing packets if possible.  This is
 743 | particularly valuable when the server has both Initial and Handshake data in
 744 | flight or when the client has both Handshake and Application Data in flight
 745 | because the peer might only have receive keys for one of the two packet number
 746 | spaces.
 747 | 
 748 | If the sender wants to elicit a faster acknowledgment on PTO, it can skip a
 749 | packet number to eliminate the acknowledgment delay.
 750 | 
 751 | An endpoint SHOULD include new data in packets that are sent on PTO expiration.
 752 | Previously sent data MAY be sent if no new data can be sent. Implementations
 753 | MAY use alternative strategies for determining the content of probe packets,
 754 | including sending new or retransmitted data based on the application's
 755 | priorities.
 756 | 
 757 | It is possible the sender has no new or previously sent data to send.
 758 | As an example, consider the following sequence of events: new application data
 759 | is sent in a STREAM frame, deemed lost, then retransmitted in a new packet,
 760 | and then the original transmission is acknowledged.  When there is no data to
 761 | send, the sender SHOULD send a PING or other ack-eliciting frame in a single
 762 | packet, rearming the PTO timer.
 763 | 
 764 | Alternatively, instead of sending an ack-eliciting packet, the sender MAY mark
 765 | any packets still in flight as lost.  Doing so avoids sending an additional
 766 | packet but increases the risk that loss is declared too aggressively, resulting
 767 | in an unnecessary rate reduction by the congestion controller.
 768 | 
 769 | Consecutive PTO periods increase exponentially, and as a result, connection
 770 | recovery latency increases exponentially as packets continue to be dropped in
 771 | the network.  Sending two packets on PTO expiration increases resilience to
 772 | packet drops, thus reducing the probability of consecutive PTO events.
 773 | 
 774 | When the PTO timer expires multiple times and new data cannot be sent,
 775 | implementations must choose between sending the same payload every time
 776 | or sending different payloads.  Sending the same payload may be simpler
 777 | and ensures the highest priority frames arrive first.  Sending different
 778 | payloads each time reduces the chances of spurious retransmission.
 779 | 
 780 | 
 781 | ## Handling Retry Packets
 782 | 
 783 | A Retry packet causes a client to send another Initial packet, effectively
 784 | restarting the connection process.  A Retry packet indicates that the Initial
 785 | packet was received but not processed.  A Retry packet cannot be treated as an
 786 | acknowledgment because it does not indicate that a packet was processed or
 787 | specify the packet number.
 788 | 
 789 | Clients that receive a Retry packet reset congestion control and loss recovery
 790 | state, including resetting any pending timers.  Other connection state, in
 791 | particular cryptographic handshake messages, is retained; see
 792 | {{Section 17.2.5 of QUIC-TRANSPORT}}.
 793 | 
 794 | The client MAY compute an RTT estimate to the server as the time period from
 795 | when the first Initial packet was sent to when a Retry or a Version Negotiation
 796 | packet is received.  The client MAY use this value in place of its default for
 797 | the initial RTT estimate.
 798 | 
 799 | ## Discarding Keys and Packet State {#discarding-packets}
 800 | 
 801 | When Initial and Handshake packet protection keys are discarded
 802 | (see {{Section 4.9 of QUIC-TLS}}), all packets that were sent with those keys
 803 | can no longer be acknowledged because their acknowledgments cannot be processed.
 804 | The sender MUST discard all recovery state associated with those packets
 805 | and MUST remove them from the count of bytes in flight.
 806 | 
 807 | Endpoints stop sending and receiving Initial packets once they start exchanging
 808 | Handshake packets; see {{Section 17.2.2.1 of QUIC-TRANSPORT}}. At this point,
 809 | recovery state for all in-flight Initial packets is discarded.
 810 | 
 811 | When 0-RTT is rejected, recovery state for all in-flight 0-RTT packets is
 812 | discarded.
 813 | 
 814 | If a server accepts 0-RTT, but does not buffer 0-RTT packets that arrive
 815 | before Initial packets, early 0-RTT packets will be declared lost, but that
 816 | is expected to be infrequent.
 817 | 
 818 | It is expected that keys are discarded at some time after the packets
 819 | encrypted with them are either acknowledged or declared lost. However,
 820 | Initial and Handshake secrets are discarded as soon as Handshake and
 821 | 1-RTT keys are proven to be available to both client and server; see
 822 | {{Section 4.9.1 of QUIC-TLS}}.
 823 | 
 824 | # Congestion Control {#congestion-control}
 825 | 
 826 | This document specifies a sender-side congestion controller for QUIC similar to
 827 | TCP NewReno {{?RFC6582}}.
 828 | 
 829 | The signals QUIC provides for congestion control are generic and are designed to
 830 | support different sender-side algorithms. A sender can unilaterally choose a
 831 | different algorithm to use, such as CUBIC {{?RFC8312}}.
 832 | 
 833 | If a sender uses a different controller than that specified in this document,
 834 | the chosen controller MUST conform to the congestion control guidelines
 835 | specified in {{Section 3.1 of RFC8085}}.
 836 | 
 837 | Similar to TCP, packets containing only ACK frames do not count toward bytes
 838 | in flight and are not congestion controlled.  Unlike TCP, QUIC can detect the
 839 | loss of these packets and MAY use that information to adjust the congestion
 840 | controller or the rate of ACK-only packets being sent, but this document does
 841 | not describe a mechanism for doing so.
 842 | 
 843 | The congestion controller is per path, so packets sent on other paths do not
 844 | alter the current path's congestion controller, as described in
 845 | {{Section 9.4 of QUIC-TRANSPORT}}.
 846 | 
 847 | The algorithm in this document specifies and uses the controller's congestion
 848 | window in bytes.
 849 | 
 850 | An endpoint MUST NOT send a packet if it would cause bytes_in_flight (see
 851 | {{vars-of-interest}}) to be larger than the congestion window, unless the packet
 852 | is sent on a PTO timer expiration (see {{pto}}) or when entering recovery
 853 | (see {{recovery-period}}).
 854 | 
 855 | ## Explicit Congestion Notification {#congestion-ecn}
 856 | 
 857 | If a path has been validated to support Explicit Congestion Notification (ECN)
 858 | {{!RFC3168}} {{?RFC8311}}, QUIC treats a Congestion Experienced (CE) codepoint
 859 | in the IP header as a signal of congestion. This document specifies an
 860 | endpoint's response when the peer-reported ECN-CE count increases; see {{Section
 861 | 13.4.2 of QUIC-TRANSPORT}}.
 862 | 
 863 | ## Initial and Minimum Congestion Window {#initial-cwnd}
 864 | 
 865 | QUIC begins every connection in slow start with the congestion window set to an
 866 | initial value.  Endpoints SHOULD use an initial congestion window of ten times
 867 | the maximum datagram size (max_datagram_size), while limiting the window to the
 868 | larger of 14,720 bytes or twice the maximum datagram size. This follows the
 869 | analysis and recommendations in {{?RFC6928}}, increasing the byte limit to
 870 | account for the smaller 8-byte overhead of UDP compared to the 20-byte overhead
 871 | for TCP.
 872 | 
 873 | If the maximum datagram size changes during the connection, the initial
 874 | congestion window SHOULD be recalculated with the new size.  If the maximum
 875 | datagram size is decreased in order to complete the handshake, the
 876 | congestion window SHOULD be set to the new initial congestion window.
 877 | 
 878 | Prior to validating the client's address, the server can be further limited by
 879 | the anti-amplification limit as specified in {{Section 8.1 of QUIC-TRANSPORT}}.
 880 | Though the anti-amplification limit can prevent the congestion window from
 881 | being fully utilized and therefore slow down the increase in congestion window,
 882 | it does not directly affect the congestion window.
 883 | 
 884 | The minimum congestion window is the smallest value the congestion window can
 885 | attain in response to loss, an increase in the peer-reported ECN-CE count,
 886 | or persistent congestion.  The RECOMMENDED value is 2 * max_datagram_size.
 887 | 
 888 | ## Congestion Control States
 889 | 
 890 | The NewReno congestion controller described in this document has three
 891 | distinct states, as shown in {{fig-cc-fsm}}.
 892 | 
 893 | ~~~
 894 |                  New path or      +------------+
 895 |             persistent congestion |   Slow     |
 896 |         (O)---------------------->|   Start    |
 897 |                                   +------------+
 898 |                                         |
 899 |                                 Loss or |
 900 |                         ECN-CE increase |
 901 |                                         v
 902 |  +------------+     Loss or       +------------+
 903 |  | Congestion |  ECN-CE increase  |  Recovery  |
 904 |  | Avoidance  |------------------>|   Period   |
 905 |  +------------+                   +------------+
 906 |            ^                            |
 907 |            |                            |
 908 |            +----------------------------+
 909 |               Acknowledgment of packet
 910 |                 sent during recovery
 911 | ~~~
 912 | {: #fig-cc-fsm title="Congestion Control States and Transitions"}
 913 | 
 914 | These states and the transitions between them are described in subsequent
 915 | sections.
 916 | 
 917 | ### Slow Start
 918 | 
 919 | A NewReno sender is in slow start any time the congestion window is below the
 920 | slow start threshold. A sender begins in slow start because the slow start
 921 | threshold is initialized to an infinite value.
 922 | 
 923 | While a sender is in slow start, the congestion window increases by the number
 924 | of bytes acknowledged when each acknowledgment is processed. This results in
 925 | exponential growth of the congestion window.
 926 | 
 927 | The sender MUST exit slow start and enter a recovery period when a packet is
 928 | lost or when the ECN-CE count reported by its peer increases.
 929 | 
 930 | A sender reenters slow start any time the congestion window is less than the
 931 | slow start threshold, which only occurs after persistent congestion is
 932 | declared.
 933 | 
 934 | ### Recovery {#recovery-period}
 935 | 
 936 | A NewReno sender enters a recovery period when it detects the loss of a packet
 937 | or when the ECN-CE count reported by its peer increases. A sender that is
 938 | already in a recovery period stays in it and does not reenter it.
 939 | 
 940 | On entering a recovery period, a sender MUST set the slow start threshold to
 941 | half the value of the congestion window when loss is detected. The congestion
 942 | window MUST be set to the reduced value of the slow start threshold before
 943 | exiting the recovery period.
 944 | 
 945 | Implementations MAY reduce the congestion window immediately upon entering a
 946 | recovery period or use other mechanisms, such as Proportional Rate Reduction
 947 | {{?PRR=RFC6937}}, to reduce the congestion window more gradually. If the
 948 | congestion window is reduced immediately, a single packet can be sent prior to
 949 | reduction. This speeds up loss recovery if the data in the lost packet is
 950 | retransmitted and is similar to TCP as described in {{Section 5 of RFC6675}}.
 951 | 
 952 | The recovery period aims to limit congestion window reduction to once per round
 953 | trip. Therefore, during a recovery period, the congestion window does not change
 954 | in response to new losses or increases in the ECN-CE count.
 955 | 
 956 | A recovery period ends and the sender enters congestion avoidance when a packet
 957 | sent during the recovery period is acknowledged. This is slightly different
 958 | from TCP's definition of recovery, which ends when the lost segment that
 959 | started recovery is acknowledged {{?RFC5681}}.
 960 | 
 961 | ### Congestion Avoidance
 962 | 
 963 | A NewReno sender is in congestion avoidance any time the congestion window is
 964 | at or above the slow start threshold and not in a recovery period.
 965 | 
 966 | A sender in congestion avoidance uses an Additive Increase Multiplicative
 967 | Decrease (AIMD) approach that MUST limit the increase to the congestion window
 968 | to at most one maximum datagram size for each congestion window that is
 969 | acknowledged.
 970 | 
 971 | The sender exits congestion avoidance and enters a recovery period when a
 972 | packet is lost or when the ECN-CE count reported by its peer increases.
 973 | 
 974 | ## Ignoring Loss of Undecryptable Packets
 975 | 
 976 | During the handshake, some packet protection keys might not be available when
 977 | a packet arrives, and the receiver can choose to drop the packet. In particular,
 978 | Handshake and 0-RTT packets cannot be processed until the Initial packets
 979 | arrive, and 1-RTT packets cannot be processed until the handshake completes.
 980 | Endpoints MAY ignore the loss of Handshake, 0-RTT, and 1-RTT packets that might
 981 | have arrived before the peer had packet protection keys to process those
 982 | packets. Endpoints MUST NOT ignore the loss of packets that were sent after
 983 | the earliest acknowledged packet in a given packet number space.
 984 | 
 985 | ## Probe Timeout
 986 | 
 987 | Probe packets MUST NOT be blocked by the congestion controller.  A sender MUST
 988 | however count these packets as being additionally in flight, since these packets
 989 | add network load without establishing packet loss.  Note that sending probe
 990 | packets might cause the sender's bytes in flight to exceed the congestion window
 991 | until an acknowledgment is received that establishes loss or delivery of
 992 | packets.
 993 | 
 994 | ## Persistent Congestion {#persistent-congestion}
 995 | 
 996 | When a sender establishes loss of all packets sent over a long enough duration,
 997 | the network is considered to be experiencing persistent congestion.
 998 | 
 999 | ### Duration {#pc-duration}
1000 | 
1001 | The persistent congestion duration is computed as follows:
1002 | 
1003 | ~~~pseudocode
1004 | (smoothed_rtt + max(4*rttvar, kGranularity) + max_ack_delay) *
1005 |     kPersistentCongestionThreshold
1006 | ~~~
1007 | 
1008 | Unlike the PTO computation in {{pto}}, this duration includes the max_ack_delay
1009 | irrespective of the packet number spaces in which losses are established.
1010 | 
1011 | This duration allows a sender to send as many packets before establishing
1012 | persistent congestion, including some in response to PTO expiration, as TCP does
1013 | with Tail Loss Probes {{?RFC8985}} and an RTO {{?RFC5681}}.
1014 | 
1015 | Larger values of kPersistentCongestionThreshold cause the sender to become less
1016 | responsive to persistent congestion in the network, which can result in
1017 | aggressive sending into a congested network. Too small a value can result in a
1018 | sender declaring persistent congestion unnecessarily, resulting in reduced
1019 | throughput for the sender.
1020 | 
1021 | The RECOMMENDED value for kPersistentCongestionThreshold is 3, which results in
1022 | behavior that is approximately equivalent to a TCP sender declaring an RTO after
1023 | two TLPs.
1024 | 
1025 | This design does not use consecutive PTO events to establish persistent
1026 | congestion, since application patterns impact PTO expiration. For example, a
1027 | sender that sends small amounts of data with silence periods between them
1028 | restarts the PTO timer every time it sends, potentially preventing the PTO timer
1029 | from expiring for a long period of time, even when no acknowledgments are being
1030 | received. The use of a duration enables a sender to establish persistent
1031 | congestion without depending on PTO expiration.
1032 | 
1033 | ### Establishing Persistent Congestion
1034 | 
1035 | A sender establishes persistent congestion after the receipt of an
1036 | acknowledgment if two packets that are ack-eliciting are declared lost, and:
1037 | 
1038 | * across all packet number spaces, none of the packets sent between the send
1039 |   times of these two packets are acknowledged;
1040 | 
1041 | * the duration between the send times of these two packets exceeds the
1042 |   persistent congestion duration ({{pc-duration}}); and
1043 | 
1044 | * a prior RTT sample existed when these two packets were sent.
1045 | 
1046 | These two packets MUST be ack-eliciting, since a receiver is required to
1047 | acknowledge only ack-eliciting packets within its maximum acknowledgment delay;
1048 | see {{Section 13.2 of QUIC-TRANSPORT}}.
1049 | 
1050 | The persistent congestion period SHOULD NOT start until there is at least one
1051 | RTT sample. Before the first RTT sample, a sender arms its PTO timer based on
1052 | the initial RTT ({{pto-handshake}}), which could be substantially larger than
1053 | the actual RTT. Requiring a prior RTT sample prevents a sender from establishing
1054 | persistent congestion with potentially too few probes.
1055 | 
1056 | Since network congestion is not affected by packet number spaces, persistent
1057 | congestion SHOULD consider packets sent across packet number spaces. A sender
1058 | that does not have state for all packet number spaces or an implementation that
1059 | cannot compare send times across packet number spaces MAY use state for just the
1060 | packet number space that was acknowledged. This might result in erroneously
1061 | declaring persistent congestion, but it will not lead to a failure to detect
1062 | persistent congestion.
1063 | 
1064 | When persistent congestion is declared, the sender's congestion window MUST be
1065 | reduced to the minimum congestion window (kMinimumWindow), similar to a TCP
1066 | sender's response on an RTO {{RFC5681}}.
1067 | 
1068 | ### Example
1069 | 
1070 | The following example illustrates how a sender might establish persistent
1071 | congestion. Assume:
1072 | 
1073 | ~~~pseudocode
1074 | smoothed_rtt + max(4*rttvar, kGranularity) + max_ack_delay = 2
1075 | kPersistentCongestionThreshold = 3
1076 | ~~~
1077 | 
1078 | Consider the following sequence of events:
1079 | 
1080 | | Time   |              Action               |
1081 | |:-------|:----------------------------------|
1082 | | t=0    | Send packet #1 (application data) |
1083 | | t=1    | Send packet #2 (application data) |
1084 | | t=1.2  | Receive acknowledgment of #1      |
1085 | | t=2    | Send packet #3 (application data) |
1086 | | t=3    | Send packet #4 (application data) |
1087 | | t=4    | Send packet #5 (application data) |
1088 | | t=5    | Send packet #6 (application data) |
1089 | | t=6    | Send packet #7 (application data) |
1090 | | t=8    | Send packet #8 (PTO 1)            |
1091 | | t=12   | Send packet #9 (PTO 2)            |
1092 | | t=12.2 | Receive acknowledgment of #9      |
1093 | 
1094 | Packets 2 through 8 are declared lost when the acknowledgment for packet 9 is
1095 | received at `t = 12.2`.
1096 | 
1097 | The congestion period is calculated as the time between the oldest and newest
1098 | lost packets: `8 - 1 = 7`.  The persistent congestion duration is `2 * 3 = 6`.
1099 | Because the threshold was reached and because none of the packets between the
1100 | oldest and the newest lost packets were acknowledged, the network is considered
1101 | to have experienced persistent congestion.
1102 | 
1103 | While this example shows PTO expiration, they are not required for persistent
1104 | congestion to be established.
1105 | 
1106 | 
1107 | ## Pacing {#pacing}
1108 | 
1109 | A sender SHOULD pace sending of all in-flight packets based on input from the
1110 | congestion controller.
1111 | 
1112 | Sending multiple packets into the network without any delay between them creates
1113 | a packet burst that might cause short-term congestion and losses. Senders MUST
1114 | either use pacing or limit such bursts. Senders SHOULD limit bursts to the
1115 | initial congestion window; see {{initial-cwnd}}. A sender with knowledge that
1116 | the network path to the receiver can absorb larger bursts MAY use a higher
1117 | limit.
1118 | 
1119 | An implementation should take care to architect its congestion controller to
1120 | work well with a pacer.  For instance, a pacer might wrap the congestion
1121 | controller and control the availability of the congestion window, or a pacer
1122 | might pace out packets handed to it by the congestion controller.
1123 | 
1124 | Timely delivery of ACK frames is important for efficient loss recovery. To avoid
1125 | delaying their delivery to the peer, packets containing only ACK frames SHOULD
1126 | therefore not be paced.
1127 | 
1128 | Endpoints can implement pacing as they choose. A perfectly paced sender spreads
1129 | packets exactly evenly over time. For a window-based congestion controller, such
1130 | as the one in this document, that rate can be computed by averaging the
1131 | congestion window over the RTT. Expressed as a rate in units of
1132 | bytes per time, where congestion_window is in bytes:
1133 | 
1134 | ~~~pseudocode
1135 | rate = N * congestion_window / smoothed_rtt
1136 | ~~~
1137 | 
1138 | Or expressed as an inter-packet interval in units of time:
1139 | 
1140 | ~~~pseudocode
1141 | interval = ( smoothed_rtt * packet_size / congestion_window ) / N
1142 | ~~~
1143 | 
1144 | Using a value for `N` that is small, but at least 1 (for example, 1.25) ensures
1145 | that variations in RTT do not result in underutilization of the
1146 | congestion window.
1147 | 
1148 | Practical considerations, such as packetization, scheduling delays, and
1149 | computational efficiency, can cause a sender to deviate from this rate over time
1150 | periods that are much shorter than an RTT.
1151 | 
1152 | One possible implementation strategy for pacing uses a leaky bucket algorithm,
1153 | where the capacity of the "bucket" is limited to the maximum burst size and the
1154 | rate the "bucket" fills is determined by the above function.
1155 | 
1156 | ## Underutilizing the Congestion Window
1157 | 
1158 | When bytes in flight is smaller than the congestion window and sending is not
1159 | pacing limited, the congestion window is underutilized. This can happen due to
1160 | insufficient application data or flow control limits. When this occurs,
1161 | the congestion window SHOULD NOT be increased in either slow start or
1162 | congestion avoidance.
1163 | 
1164 | A sender that paces packets (see {{pacing}}) might delay sending packets
1165 | and not fully utilize the congestion window due to this delay. A sender
1166 | SHOULD NOT consider itself application limited if it would have fully
1167 | utilized the congestion window without pacing delay.
1168 | 
1169 | A sender MAY implement alternative mechanisms to update its congestion window
1170 | after periods of underutilization, such as those proposed for TCP in
1171 | {{?RFC7661}}.
1172 | 
1173 | 
1174 | # Security Considerations
1175 | 
1176 | ## Loss and Congestion Signals
1177 | 
1178 | Loss detection and congestion control fundamentally involve the consumption of
1179 | signals, such as delay, loss, and ECN markings, from unauthenticated
1180 | entities. An attacker can cause endpoints to reduce their sending rate by
1181 | manipulating these signals: by dropping packets, by altering path delay
1182 | strategically, or by changing ECN codepoints.
1183 | 
1184 | ## Traffic Analysis
1185 | 
1186 | Packets that carry only ACK frames can be heuristically identified by observing
1187 | packet size.  Acknowledgment patterns may expose information about link
1188 | characteristics or application behavior.  To reduce leaked information,
1189 | endpoints can bundle acknowledgments with other frames, or they can use PADDING
1190 | frames at a potential cost to performance.
1191 | 
1192 | ## Misreporting ECN Markings
1193 | 
1194 | A receiver can misreport ECN markings to alter the congestion response of a
1195 | sender.  Suppressing reports of ECN-CE markings could cause a sender to
1196 | increase their send rate.  This increase could result in congestion and loss.
1197 | 
1198 | A sender can detect suppression of reports by marking occasional packets that it
1199 | sends with an ECN-CE marking. If a packet sent with an ECN-CE marking is not
1200 | reported as having been CE marked when the packet is acknowledged, then the
1201 | sender can disable ECN for that path by not setting ECN-Capable Transport (ECT)
1202 | codepoints in subsequent packets sent on that path {{!RFC3168}}.
1203 | 
1204 | Reporting additional ECN-CE markings will cause a sender to reduce their sending
1205 | rate, which is similar in effect to advertising reduced connection flow control
1206 | limits and so no advantage is gained by doing so.
1207 | 
1208 | Endpoints choose the congestion controller that they use. Congestion controllers
1209 | respond to reports of ECN-CE by reducing their rate, but the response may vary.
1210 | Markings can be treated as equivalent to loss {{!RFC3168}}, but other
1211 | responses can be specified, such as {{?RFC8511}} or {{?RFC8311}}.
1212 | 
1213 | 
1214 | --- back
1215 | 
1216 | # Loss Recovery Pseudocode
1217 | 
1218 | We now describe an example implementation of the loss detection mechanisms
1219 | described in {{loss-detection}}.
1220 | 
1221 | The pseudocode segments in this section are licensed as Code Components; see the
1222 | copyright notice.
1223 | 
1224 | ## Tracking Sent Packets {#tracking-sent-packets}
1225 | 
1226 | To correctly implement congestion control, a QUIC sender tracks every
1227 | ack-eliciting packet until the packet is acknowledged or lost.
1228 | It is expected that implementations will be able to access this information by
1229 | packet number and crypto context and store the per-packet fields
1230 | ({{sent-packets-fields}}) for loss recovery and congestion control.
1231 | 
1232 | After a packet is declared lost, the endpoint can still maintain state for it
1233 | for an amount of time to allow for packet reordering; see {{Section 13.3 of
1234 | QUIC-TRANSPORT}}. This enables a sender to detect spurious retransmissions.
1235 | 
1236 | Sent packets are tracked for each packet number space, and ACK
1237 | processing only applies to a single space.
1238 | 
1239 | ### Sent Packet Fields {#sent-packets-fields}
1240 | 
1241 | packet_number:
1242 | : The packet number of the sent packet.
1243 | 
1244 | ack_eliciting:
1245 | : A Boolean that indicates whether a packet is ack-eliciting.
1246 |   If true, it is expected that an acknowledgment will be received,
1247 |   though the peer could delay sending the ACK frame containing it
1248 |   by up to the max_ack_delay.
1249 | 
1250 | in_flight:
1251 | : A Boolean that indicates whether the packet counts toward bytes in
1252 |   flight.
1253 | 
1254 | sent_bytes:
1255 | : The number of bytes sent in the packet, not including UDP or IP
1256 |   overhead, but including QUIC framing overhead.
1257 | 
1258 | time_sent:
1259 | : The time the packet was sent.
1260 | 
1261 | 
1262 | ## Constants of Interest {#constants-of-interest}
1263 | 
1264 | Constants used in loss recovery are based on a combination of RFCs, papers, and
1265 | common practice.
1266 | 
1267 | kPacketThreshold:
1268 | : Maximum reordering in packets before packet threshold loss detection
1269 |   considers a packet lost. The value recommended in {{packet-threshold}} is 3.
1270 | 
1271 | kTimeThreshold:
1272 | 
1273 | : Maximum reordering in time before time threshold loss detection
1274 |   considers a packet lost. Specified as an RTT multiplier. The value
1275 |   recommended in {{time-threshold}} is 9/8.
1276 | 
1277 | kGranularity:
1278 | 
1279 | : Timer granularity. This is a system-dependent value, and {{time-threshold}}
1280 |   recommends a value of 1 ms.
1281 | 
1282 | kInitialRtt:
1283 | : The RTT used before an RTT sample is taken. The value recommended in
1284 | {{pto-handshake}} is 333 ms.
1285 | 
1286 | kPacketNumberSpace:
1287 | : An enum to enumerate the three packet number spaces:
1288 | 
1289 | ~~~
1290 | enum kPacketNumberSpace {
1291 |   Initial,
1292 |   Handshake,
1293 |   ApplicationData,
1294 | }
1295 | ~~~
1296 | 
1297 | ## Variables of Interest {#ld-vars-of-interest}
1298 | 
1299 | Variables required to implement the congestion control mechanisms
1300 | are described in this section.
1301 | 
1302 | latest_rtt:
1303 | : The most recent RTT measurement made when receiving an acknowledgment for
1304 |   a previously unacknowledged packet.
1305 | 
1306 | smoothed_rtt:
1307 | : The smoothed RTT of the connection, computed as described in
1308 |   {{smoothed-rtt}}.
1309 | 
1310 | rttvar:
1311 | : The RTT variation, computed as described in {{smoothed-rtt}}.
1312 | 
1313 | min_rtt:
1314 | : The minimum RTT seen over a period of time, ignoring acknowledgment delay, as
1315 |   described in {{min-rtt}}.
1316 | 
1317 | first_rtt_sample:
1318 | : The time that the first RTT sample was obtained.
1319 | 
1320 | max_ack_delay:
1321 | : The maximum amount of time by which the receiver intends to delay
1322 |   acknowledgments for packets in the Application Data packet number
1323 |   space, as defined by the eponymous transport parameter ({{Section 18.2
1324 |   of QUIC-TRANSPORT}}). Note that the actual ack_delay in a received
1325 |   ACK frame may be larger due to late timers, reordering, or loss.
1326 | 
1327 | loss_detection_timer:
1328 | : Multi-modal timer used for loss detection.
1329 | 
1330 | pto_count:
1331 | : The number of times a PTO has been sent without receiving an acknowledgment.
1332 | 
1333 | time_of_last_ack_eliciting_packet\[kPacketNumberSpace]:
1334 | : The time the most recent ack-eliciting packet was sent.
1335 | 
1336 | largest_acked_packet\[kPacketNumberSpace]:
1337 | : The largest packet number acknowledged in the packet number space so far.
1338 | 
1339 | loss_time\[kPacketNumberSpace]:
1340 | : The time at which the next packet in that packet number space can be
1341 |   considered lost based on exceeding the reordering window in time.
1342 | 
1343 | sent_packets\[kPacketNumberSpace]:
1344 | : An association of packet numbers in a packet number space to information
1345 |   about them.  Described in detail above in {{tracking-sent-packets}}.
1346 | 
1347 | 
1348 | ## Initialization
1349 | 
1350 | At the beginning of the connection, initialize the loss detection variables as
1351 | follows:
1352 | 
1353 | ~~~pseudocode
1354 | loss_detection_timer.reset()
1355 | pto_count = 0
1356 | latest_rtt = 0
1357 | smoothed_rtt = kInitialRtt
1358 | rttvar = kInitialRtt / 2
1359 | min_rtt = 0
1360 | first_rtt_sample = 0
1361 | for pn_space in [ Initial, Handshake, ApplicationData ]:
1362 |   largest_acked_packet[pn_space] = infinite
1363 |   time_of_last_ack_eliciting_packet[pn_space] = 0
1364 |   loss_time[pn_space] = 0
1365 | ~~~
1366 | 
1367 | 
1368 | ## On Sending a Packet
1369 | 
1370 | After a packet is sent, information about the packet is stored.  The parameters
1371 | to OnPacketSent are described in detail above in {{sent-packets-fields}}.
1372 | 
1373 | Pseudocode for OnPacketSent follows:
1374 | 
1375 | ~~~pseudocode
1376 | OnPacketSent(packet_number, pn_space, ack_eliciting,
1377 |              in_flight, sent_bytes):
1378 |   sent_packets[pn_space][packet_number].packet_number =
1379 |                                            packet_number
1380 |   sent_packets[pn_space][packet_number].time_sent = now()
1381 |   sent_packets[pn_space][packet_number].ack_eliciting =
1382 |                                            ack_eliciting
1383 |   sent_packets[pn_space][packet_number].in_flight = in_flight
1384 |   sent_packets[pn_space][packet_number].sent_bytes = sent_bytes
1385 |   if (in_flight):
1386 |     if (ack_eliciting):
1387 |       time_of_last_ack_eliciting_packet[pn_space] = now()
1388 |     OnPacketSentCC(sent_bytes)
1389 |     SetLossDetectionTimer()
1390 | ~~~
1391 | 
1392 | ## On Receiving a Datagram
1393 | 
1394 | When a server is blocked by anti-amplification limits, receiving
1395 | a datagram unblocks it, even if none of the packets in the
1396 | datagram are successfully processed. In such a case, the PTO
1397 | timer will need to be rearmed.
1398 | 
1399 | Pseudocode for OnDatagramReceived follows:
1400 | 
1401 | ~~~pseudocode
1402 | OnDatagramReceived(datagram):
1403 |   // If this datagram unblocks the server, arm the
1404 |   // PTO timer to avoid deadlock.
1405 |   if (server was at anti-amplification limit):
1406 |     SetLossDetectionTimer()
1407 |     if loss_detection_timer.timeout < now():
1408 |       // Execute PTO if it would have expired
1409 |       // while the amplification limit applied.
1410 |       OnLossDetectionTimeout()
1411 | ~~~
1412 | 
1413 | ## On Receiving an Acknowledgment
1414 | 
1415 | When an ACK frame is received, it may newly acknowledge any number of packets.
1416 | 
1417 | Pseudocode for OnAckReceived and UpdateRtt follow:
1418 | 
1419 | ~~~pseudocode
1420 | IncludesAckEliciting(packets):
1421 |   for packet in packets:
1422 |     if (packet.ack_eliciting):
1423 |       return true
1424 |   return false
1425 | 
1426 | OnAckReceived(ack, pn_space):
1427 |   if (largest_acked_packet[pn_space] == infinite):
1428 |     largest_acked_packet[pn_space] = ack.largest_acked
1429 |   else:
1430 |     largest_acked_packet[pn_space] =
1431 |         max(largest_acked_packet[pn_space], ack.largest_acked)
1432 | 
1433 |   // DetectAndRemoveAckedPackets finds packets that are newly
1434 |   // acknowledged and removes them from sent_packets.
1435 |   newly_acked_packets =
1436 |       DetectAndRemoveAckedPackets(ack, pn_space)
1437 |   // Nothing to do if there are no newly acked packets.
1438 |   if (newly_acked_packets.empty()):
1439 |     return
1440 | 
1441 |   // Update the RTT if the largest acknowledged is newly acked
1442 |   // and at least one ack-eliciting was newly acked.
1443 |   if (newly_acked_packets.largest().packet_number ==
1444 |           ack.largest_acked &&
1445 |       IncludesAckEliciting(newly_acked_packets)):
1446 |     latest_rtt =
1447 |       now() - newly_acked_packets.largest().time_sent
1448 |     UpdateRtt(ack.ack_delay)
1449 | 
1450 |   // Process ECN information if present.
1451 |   if (ACK frame contains ECN information):
1452 |       ProcessECN(ack, pn_space)
1453 | 
1454 |   lost_packets = DetectAndRemoveLostPackets(pn_space)
1455 |   if (!lost_packets.empty()):
1456 |     OnPacketsLost(lost_packets)
1457 |   OnPacketsAcked(newly_acked_packets)
1458 | 
1459 |   // Reset pto_count unless the client is unsure if
1460 |   // the server has validated the client's address.
1461 |   if (PeerCompletedAddressValidation()):
1462 |     pto_count = 0
1463 |   SetLossDetectionTimer()
1464 | 
1465 | 
1466 | UpdateRtt(ack_delay):
1467 |   if (first_rtt_sample == 0):
1468 |     min_rtt = latest_rtt
1469 |     smoothed_rtt = latest_rtt
1470 |     rttvar = latest_rtt / 2
1471 |     first_rtt_sample = now()
1472 |     return
1473 | 
1474 |   // min_rtt ignores acknowledgment delay.
1475 |   min_rtt = min(min_rtt, latest_rtt)
1476 |   // Limit ack_delay by max_ack_delay after handshake
1477 |   // confirmation.
1478 |   if (handshake confirmed):
1479 |     ack_delay = min(ack_delay, max_ack_delay)
1480 | 
1481 |   // Adjust for acknowledgment delay if plausible.
1482 |   adjusted_rtt = latest_rtt
1483 |   if (latest_rtt >= min_rtt + ack_delay):
1484 |     adjusted_rtt = latest_rtt - ack_delay
1485 | 
1486 |   rttvar = 3/4 * rttvar + 1/4 * abs(smoothed_rtt - adjusted_rtt)
1487 |   smoothed_rtt = 7/8 * smoothed_rtt + 1/8 * adjusted_rtt
1488 | ~~~
1489 | 
1490 | ## Setting the Loss Detection Timer
1491 | 
1492 | QUIC loss detection uses a single timer for all timeout loss detection.  The
1493 | duration of the timer is based on the timer's mode, which is set in the packet
1494 | and timer events further below.  The function SetLossDetectionTimer defined
1495 | below shows how the single timer is set.
1496 | 
1497 | This algorithm may result in the timer being set in the past, particularly if
1498 | timers wake up late. Timers set in the past fire immediately.
1499 | 
1500 | Pseudocode for SetLossDetectionTimer follows (where the "^" operator represents
1501 | exponentiation):
1502 | 
1503 | ~~~pseudocode
1504 | GetLossTimeAndSpace():
1505 |   time = loss_time[Initial]
1506 |   space = Initial
1507 |   for pn_space in [ Handshake, ApplicationData ]:
1508 |     if (time == 0 || loss_time[pn_space] < time):
1509 |       time = loss_time[pn_space];
1510 |       space = pn_space
1511 |   return time, space
1512 | 
1513 | GetPtoTimeAndSpace():
1514 |   duration = (smoothed_rtt + max(4 * rttvar, kGranularity))
1515 |       * (2 ^ pto_count)
1516 |   // Anti-deadlock PTO starts from the current time
1517 |   if (no ack-eliciting packets in flight):
1518 |     assert(!PeerCompletedAddressValidation())
1519 |     if (has handshake keys):
1520 |       return (now() + duration), Handshake
1521 |     else:
1522 |       return (now() + duration), Initial
1523 |   pto_timeout = infinite
1524 |   pto_space = Initial
1525 |   for space in [ Initial, Handshake, ApplicationData ]:
1526 |     if (no ack-eliciting packets in flight in space):
1527 |         continue;
1528 |     if (space == ApplicationData):
1529 |       // Skip Application Data until handshake confirmed.
1530 |       if (handshake is not confirmed):
1531 |         return pto_timeout, pto_space
1532 |       // Include max_ack_delay and backoff for Application Data.
1533 |       duration += max_ack_delay * (2 ^ pto_count)
1534 | 
1535 |     t = time_of_last_ack_eliciting_packet[space] + duration
1536 |     if (t < pto_timeout):
1537 |       pto_timeout = t
1538 |       pto_space = space
1539 |   return pto_timeout, pto_space
1540 | 
1541 | PeerCompletedAddressValidation():
1542 |   // Assume clients validate the server's address implicitly.
1543 |   if (endpoint is server):
1544 |     return true
1545 |   // Servers complete address validation when a
1546 |   // protected packet is received.
1547 |   return has received Handshake ACK ||
1548 |        handshake confirmed
1549 | 
1550 | SetLossDetectionTimer():
1551 |   earliest_loss_time, _ = GetLossTimeAndSpace()
1552 |   if (earliest_loss_time != 0):
1553 |     // Time threshold loss detection.
1554 |     loss_detection_timer.update(earliest_loss_time)
1555 |     return
1556 | 
1557 |   if (server is at anti-amplification limit):
1558 |     // The server's timer is not set if nothing can be sent.
1559 |     loss_detection_timer.cancel()
1560 |     return
1561 | 
1562 |   if (no ack-eliciting packets in flight &&
1563 |       PeerCompletedAddressValidation()):
1564 |     // There is nothing to detect lost, so no timer is set.
1565 |     // However, the client needs to arm the timer if the
1566 |     // server might be blocked by the anti-amplification limit.
1567 |     loss_detection_timer.cancel()
1568 |     return
1569 | 
1570 |   timeout, _ = GetPtoTimeAndSpace()
1571 |   loss_detection_timer.update(timeout)
1572 | ~~~
1573 | 
1574 | 
1575 | ## On Timeout
1576 | 
1577 | When the loss detection timer expires, the timer's mode determines the action
1578 | to be performed.
1579 | 
1580 | Pseudocode for OnLossDetectionTimeout follows:
1581 | 
1582 | ~~~pseudocode
1583 | OnLossDetectionTimeout():
1584 |   earliest_loss_time, pn_space = GetLossTimeAndSpace()
1585 |   if (earliest_loss_time != 0):
1586 |     // Time threshold loss Detection
1587 |     lost_packets = DetectAndRemoveLostPackets(pn_space)
1588 |     assert(!lost_packets.empty())
1589 |     OnPacketsLost(lost_packets)
1590 |     SetLossDetectionTimer()
1591 |     return
1592 | 
1593 |   if (no ack-eliciting packets in flight):
1594 |     assert(!PeerCompletedAddressValidation())
1595 |     // Client sends an anti-deadlock packet: Initial is padded
1596 |     // to earn more anti-amplification credit,
1597 |     // a Handshake packet proves address ownership.
1598 |     if (has Handshake keys):
1599 |       SendOneAckElicitingHandshakePacket()
1600 |     else:
1601 |       SendOneAckElicitingPaddedInitialPacket()
1602 |   else:
1603 |     // PTO. Send new data if available, else retransmit old data.
1604 |     // If neither is available, send a single PING frame.
1605 |     _, pn_space = GetPtoTimeAndSpace()
1606 |     SendOneOrTwoAckElicitingPackets(pn_space)
1607 | 
1608 |   pto_count++
1609 |   SetLossDetectionTimer()
1610 | ~~~
1611 | 
1612 | 
1613 | ## Detecting Lost Packets
1614 | 
1615 | DetectAndRemoveLostPackets is called every time an ACK is received or the time
1616 | threshold loss detection timer expires. This function operates on the
1617 | sent_packets for that packet number space and returns a list of packets newly
1618 | detected as lost.
1619 | 
1620 | Pseudocode for DetectAndRemoveLostPackets follows:
1621 | 
1622 | ~~~pseudocode
1623 | DetectAndRemoveLostPackets(pn_space):
1624 |   assert(largest_acked_packet[pn_space] != infinite)
1625 |   loss_time[pn_space] = 0
1626 |   lost_packets = []
1627 |   loss_delay = kTimeThreshold * max(latest_rtt, smoothed_rtt)
1628 | 
1629 |   // Minimum time of kGranularity before packets are deemed lost.
1630 |   loss_delay = max(loss_delay, kGranularity)
1631 | 
1632 |   // Packets sent before this time are deemed lost.
1633 |   lost_send_time = now() - loss_delay
1634 | 
1635 |   foreach unacked in sent_packets[pn_space]:
1636 |     if (unacked.packet_number > largest_acked_packet[pn_space]):
1637 |       continue
1638 | 
1639 |     // Mark packet as lost, or set time when it should be marked.
1640 |     // Note: The use of kPacketThreshold here assumes that there
1641 |     // were no sender-induced gaps in the packet number space.
1642 |     if (unacked.time_sent <= lost_send_time ||
1643 |         largest_acked_packet[pn_space] >=
1644 |           unacked.packet_number + kPacketThreshold):
1645 |       sent_packets[pn_space].remove(unacked.packet_number)
1646 |       lost_packets.insert(unacked)
1647 |     else:
1648 |       if (loss_time[pn_space] == 0):
1649 |         loss_time[pn_space] = unacked.time_sent + loss_delay
1650 |       else:
1651 |         loss_time[pn_space] = min(loss_time[pn_space],
1652 |                                   unacked.time_sent + loss_delay)
1653 |   return lost_packets
1654 | ~~~
1655 | 
1656 | 
1657 | ## Upon Dropping Initial or Handshake Keys
1658 | 
1659 | When Initial or Handshake keys are discarded, packets from the space
1660 | are discarded and loss detection state is updated.
1661 | 
1662 | Pseudocode for OnPacketNumberSpaceDiscarded follows:
1663 | 
1664 | ~~~pseudocode
1665 | OnPacketNumberSpaceDiscarded(pn_space):
1666 |   assert(pn_space != ApplicationData)
1667 |   RemoveFromBytesInFlight(sent_packets[pn_space])
1668 |   sent_packets[pn_space].clear()
1669 |   // Reset the loss detection and PTO timer
1670 |   time_of_last_ack_eliciting_packet[pn_space] = 0
1671 |   loss_time[pn_space] = 0
1672 |   pto_count = 0
1673 |   SetLossDetectionTimer()
1674 | ~~~
1675 | 
1676 | 
1677 | # Congestion Control Pseudocode
1678 | 
1679 | We now describe an example implementation of the congestion controller described
1680 | in {{congestion-control}}.
1681 | 
1682 | The pseudocode segments in this section are licensed as Code Components; see the
1683 | copyright notice.
1684 | 
1685 | ## Constants of Interest {#cc-consts-of-interest}
1686 | 
1687 | Constants used in congestion control are based on a combination of RFCs, papers,
1688 | and common practice.
1689 | 
1690 | kInitialWindow:
1691 | : Default limit on the initial bytes in flight as described in {{initial-cwnd}}.
1692 | 
1693 | kMinimumWindow:
1694 | : Minimum congestion window in bytes as described in {{initial-cwnd}}.
1695 | 
1696 | kLossReductionFactor:
1697 | : Scaling factor applied to reduce the congestion window when a new loss event
1698 |   is detected. {{congestion-control}} recommends a value of 0.5.
1699 | 
1700 | kPersistentCongestionThreshold:
1701 | : Period of time for persistent congestion to be established, specified as a PTO
1702 |   multiplier. {{persistent-congestion}} recommends a value of 3.
1703 | 
1704 | 
1705 | ## Variables of Interest {#vars-of-interest}
1706 | 
1707 | Variables required to implement the congestion control mechanisms
1708 | are described in this section.
1709 | 
1710 | max_datagram_size:
1711 | : The sender's current maximum payload size. This does not include UDP or IP
1712 |   overhead.  The max datagram size is used for congestion window
1713 |   computations. An endpoint sets the value of this variable based on its Path
1714 |   Maximum Transmission Unit (PMTU; see {{Section 14.2 of QUIC-TRANSPORT}}), with
1715 |   a minimum value of 1200 bytes.
1716 | 
1717 | ecn_ce_counters\[kPacketNumberSpace]:
1718 | : The highest value reported for the ECN-CE counter in the packet number space
1719 |   by the peer in an ACK frame. This value is used to detect increases in the
1720 |   reported ECN-CE counter.
1721 | 
1722 | bytes_in_flight:
1723 | : The sum of the size in bytes of all sent packets that contain at least one
1724 |   ack-eliciting or PADDING frame and have not been acknowledged or declared
1725 |   lost. The size does not include IP or UDP overhead, but does include the QUIC
1726 |   header and Authenticated Encryption with Associated Data (AEAD) overhead.
1727 |   Packets only containing ACK frames do not count toward bytes_in_flight to
1728 |   ensure congestion control does not impede congestion feedback.
1729 | 
1730 | congestion_window:
1731 | : Maximum number of bytes allowed to be in flight.
1732 | 
1733 | congestion_recovery_start_time:
1734 | : The time the current recovery period started due to the detection of loss
1735 |   or ECN. When a packet sent after this time is acknowledged, QUIC exits
1736 |   congestion recovery.
1737 | 
1738 | ssthresh:
1739 | : Slow start threshold in bytes.  When the congestion window is below ssthresh,
1740 |   the mode is slow start and the window grows by the number of bytes
1741 |   acknowledged.
1742 | 
1743 | The congestion control pseudocode also accesses some of the variables from the
1744 | loss recovery pseudocode.
1745 | 
1746 | ## Initialization
1747 | 
1748 | At the beginning of the connection, initialize the congestion control
1749 | variables as follows:
1750 | 
1751 | ~~~pseudocode
1752 | congestion_window = kInitialWindow
1753 | bytes_in_flight = 0
1754 | congestion_recovery_start_time = 0
1755 | ssthresh = infinite
1756 | for pn_space in [ Initial, Handshake, ApplicationData ]:
1757 |   ecn_ce_counters[pn_space] = 0
1758 | ~~~
1759 | 
1760 | 
1761 | ## On Packet Sent
1762 | 
1763 | Whenever a packet is sent and it contains non-ACK frames, the packet
1764 | increases bytes_in_flight.
1765 | 
1766 | ~~~pseudocode
1767 | OnPacketSentCC(sent_bytes):
1768 |   bytes_in_flight += sent_bytes
1769 | ~~~
1770 | 
1771 | 
1772 | ## On Packet Acknowledgment
1773 | 
1774 | This is invoked from loss detection's OnAckReceived and is supplied with the
1775 | newly acked_packets from sent_packets.
1776 | 
1777 | In congestion avoidance, implementers that use an integer representation
1778 | for congestion_window should be careful with division and can use
1779 | the alternative approach suggested in {{Section 2.1 of RFC3465}}.
1780 | 
1781 | ~~~pseudocode
1782 | InCongestionRecovery(sent_time):
1783 |   return sent_time <= congestion_recovery_start_time
1784 | 
1785 | OnPacketsAcked(acked_packets):
1786 |   for acked_packet in acked_packets:
1787 |     OnPacketAcked(acked_packet)
1788 | 
1789 | OnPacketAcked(acked_packet):
1790 |   if (!acked_packet.in_flight):
1791 |     return;
1792 |   // Remove from bytes_in_flight.
1793 |   bytes_in_flight -= acked_packet.sent_bytes
1794 |   // Do not increase congestion_window if application
1795 |   // limited or flow control limited.
1796 |   if (IsAppOrFlowControlLimited())
1797 |     return
1798 |   // Do not increase congestion window in recovery period.
1799 |   if (InCongestionRecovery(acked_packet.time_sent)):
1800 |     return
1801 |   if (congestion_window < ssthresh):
1802 |     // Slow start.
1803 |     congestion_window += acked_packet.sent_bytes
1804 |   else:
1805 |     // Congestion avoidance.
1806 |     congestion_window +=
1807 |       max_datagram_size * acked_packet.sent_bytes
1808 |       / congestion_window
1809 | ~~~
1810 | 
1811 | 
1812 | ## On New Congestion Event
1813 | 
1814 | This is invoked from ProcessECN and OnPacketsLost when a new congestion event is
1815 | detected. If not already in recovery, this starts a recovery period and
1816 | reduces the slow start threshold and congestion window immediately.
1817 | 
1818 | ~~~pseudocode
1819 | OnCongestionEvent(sent_time):
1820 |   // No reaction if already in a recovery period.
1821 |   if (InCongestionRecovery(sent_time)):
1822 |     return
1823 | 
1824 |   // Enter recovery period.
1825 |   congestion_recovery_start_time = now()
1826 |   ssthresh = congestion_window * kLossReductionFactor
1827 |   congestion_window = max(ssthresh, kMinimumWindow)
1828 |   // A packet can be sent to speed up loss recovery.
1829 |   MaybeSendOnePacket()
1830 | ~~~
1831 | 
1832 | 
1833 | ## Process ECN Information
1834 | 
1835 | This is invoked when an ACK frame with an ECN section is received from the peer.
1836 | 
1837 | ~~~pseudocode
1838 | ProcessECN(ack, pn_space):
1839 |   // If the ECN-CE counter reported by the peer has increased,
1840 |   // this could be a new congestion event.
1841 |   if (ack.ce_counter > ecn_ce_counters[pn_space]):
1842 |     ecn_ce_counters[pn_space] = ack.ce_counter
1843 |     sent_time = sent_packets[ack.largest_acked].time_sent
1844 |     OnCongestionEvent(sent_time)
1845 | ~~~
1846 | 
1847 | 
1848 | ## On Packets Lost
1849 | 
1850 | This is invoked when DetectAndRemoveLostPackets deems packets lost.
1851 | 
1852 | ~~~pseudocode
1853 | OnPacketsLost(lost_packets):
1854 |   sent_time_of_last_loss = 0
1855 |   // Remove lost packets from bytes_in_flight.
1856 |   for lost_packet in lost_packets:
1857 |     if lost_packet.in_flight:
1858 |       bytes_in_flight -= lost_packet.sent_bytes
1859 |       sent_time_of_last_loss =
1860 |         max(sent_time_of_last_loss, lost_packet.time_sent)
1861 |   // Congestion event if in-flight packets were lost
1862 |   if (sent_time_of_last_loss != 0):
1863 |     OnCongestionEvent(sent_time_of_last_loss)
1864 | 
1865 |   // Reset the congestion window if the loss of these
1866 |   // packets indicates persistent congestion.
1867 |   // Only consider packets sent after getting an RTT sample.
1868 |   if (first_rtt_sample == 0):
1869 |     return
1870 |   pc_lost = []
1871 |   for lost in lost_packets:
1872 |     if lost.time_sent > first_rtt_sample:
1873 |       pc_lost.insert(lost)
1874 |   if (InPersistentCongestion(pc_lost)):
1875 |     congestion_window = kMinimumWindow
1876 |     congestion_recovery_start_time = 0
1877 | ~~~
1878 | 
1879 | 
1880 | ## Removing Discarded Packets from Bytes in Flight
1881 | 
1882 | When Initial or Handshake keys are discarded, packets sent in that space no
1883 | longer count toward bytes in flight.
1884 | 
1885 | Pseudocode for RemoveFromBytesInFlight follows:
1886 | 
1887 | ~~~pseudocode
1888 | RemoveFromBytesInFlight(discarded_packets):
1889 |   // Remove any unacknowledged packets from flight.
1890 |   foreach packet in discarded_packets:
1891 |     if packet.in_flight
1892 |       bytes_in_flight -= size
1893 | ~~~
1894 | 
1895 | 
1896 | # Contributors
1897 | {: numbered="false"}
1898 | 
1899 | The IETF QUIC Working Group received an enormous amount of support from many
1900 | people. The following people provided substantive contributions to this
1901 | document:
1902 | 
1903 | <ul spacing="compact">
1904 | <li><t><contact fullname="Alessandro Ghedini"/></t></li>
1905 | <li><t><contact fullname="Benjamin Saunders"/></t></li>
1906 | <li><t><contact fullname="Gorry Fairhurst"/></t></li>
1907 | <li><t><contact asciiFullname="Kazu Yamamoto" fullname="山本和彦"/></t></li>
1908 | <li><t><contact asciiFullname="Kazuho Oku" fullname="奥 一穂"/></t></li>
1909 | <li><t><contact fullname="Lars Eggert"/></t></li>
1910 | <li><t><contact fullname="Magnus Westerlund"/></t></li>
1911 | <li><t><contact fullname="Marten Seemann"/></t></li>
1912 | <li><t><contact fullname="Martin Duke"/></t></li>
1913 | <li><t><contact fullname="Martin Thomson"/></t></li>
1914 | <li><t><contact fullname="Mirja Kühlewind"/></t></li>
1915 | <li><t><contact fullname="Nick Banks"/></t></li>
1916 | <li><t><contact fullname="Praveen Balasubramanian"/></t></li>
1917 | </ul>
1918 | 


--------------------------------------------------------------------------------
/tag.sh:
--------------------------------------------------------------------------------
 1 | # Tag files for submission.
 2 | #
 3 | # You shouldn't need to use this unless you are tagging files for which you are
 4 | # not an author.  Use `git tag -a` instead.
 5 | #
 6 | # This script exists because
 7 | # https://trac.tools.ietf.org/tools/ietfdb/ticket/2390 still isn't fixed.
 8 | 
 9 | if [[ $# -eq 0 ]]; then
10 |     files=(invariants transport tls recovery http qpack)
11 | else
12 |     files=("$@")
13 | fi
14 | 
15 | enabled() {
16 |     r="$1"; shift
17 |     for e; do [[ "$e" == "$r" ]] && return 0; done
18 |     return 1
19 | }
20 | 
21 | declare -A authors=( \
22 |     [transport]=mt@lowentropy.net \
23 |     [tls]=mt@lowentropy.net \
24 |     [recovery]=ianswett@google.com \
25 |     [http]=mbishop@evequefou.be \
26 |     [invariants]=mt@lowentropy.net \
27 |     [qpack]=afrind@fb.com \
28 | )
29 | 
30 | if ! make; then
31 |     echo "FAILED TO BUILD STOP" 1>&2
32 |     exit 1
33 | fi
34 | 
35 | all=($(make show-next))
36 | tags=()
37 | thisuser=$(git config --get user.name)
38 | 
39 | for t in "${all[@]}"; do
40 |     r="${t%-[0-9][0-9]}"
41 |     r="${r#draft-ietf-quic-}"
42 |     if enabled "$r" "${files[@]}"; then
43 |         message="Tag for $t created by $thisuser"
44 |         git -c user.email="${authors[$r]}" tag -am "$message" "$t"
45 | 	tags+=("$t")
46 |     fi
47 | done
48 | for t in "${tags[@]}"; do
49 |     git push origin "$t"
50 | done
51 | 


--------------------------------------------------------------------------------
/writeups/base-drafts.md:
--------------------------------------------------------------------------------
  1 | # Shepherd Writeup for QUIC "base drafts"
  2 | 
  3 | ## 1. Summary
  4 | <!--
  5 | (1) What type of RFC is being requested (BCP, Proposed Standard, Internet
  6 | Standard, Informational, Experimental, or Historic)? Why is this the proper type
  7 | of RFC? Is this type of RFC indicated in the title page header?
  8 | -->
  9 | 
 10 | This publication requests covers the following I-Ds that together define the
 11 | QUIC protocol:
 12 | 
 13 | * **QUIC: A UDP-Based Multiplexed and Secure Transport**,
 14 |   draft-ietf-quic-transport-31
 15 | * **QUIC Loss Detection and Congestion Control**, draft-ietf-quic-recovery-31
 16 | * **Using TLS to Secure QUIC**, draft-ietf-quic-tls-31
 17 | * **Version-Independent Properties of QUIC**, draft-ietf-quic-invariants-11
 18 | * **Hypertext Transfer Protocol Version 3 (HTTP/3)**, draft-ietf-quic-http-31
 19 | * **QPACK: Header Compression for HTTP/3**, draft-ietf-quic-qpack-18
 20 | 
 21 | All of these I-Ds are intended to become Proposed Standard RFCs, and that
 22 | intended status is indicated in their respective title page headers.
 23 | 
 24 | 
 25 | ## 2. Document Announcement Write-Up
 26 | <!--
 27 | (2) The IESG approval announcement includes a Document Announcement Write-Up.
 28 | Please provide such a Document Announcement Write-Up. Recent examples can be
 29 | found in the "Action" announcements for approved documents. The approval
 30 | announcement contains the following sections:
 31 | -->
 32 | 
 33 | 
 34 | ### Technical Summary:
 35 | <!--
 36 | Relevant content can frequently be found in the abstract and/or introduction of
 37 | the document. If not, this may be an indication that there are deficiencies in
 38 | the abstract or introduction.
 39 | -->
 40 | 
 41 | QUIC is a standards-track, UDP-based, stream-multiplexing, encrypted transport
 42 | protocol. Its main features are minimizing connection establishment and overall
 43 | transport latency for applications such as HTTP/3, providing multiplexing
 44 | without head-of-line blocking, requiring only changes to path endpoints to
 45 | enable deployment, providing always-secure transport using TLS 1.3. 
 46 | 
 47 | This document set specifies the QUIC transport protocol and it version-independent
 48 | invariants, its loss detection and recovery approach, its use of TLS1.3 for
 49 | providing security, and a new version of HTTP that uses QUIC (HTTP/3),
 50 | along with QPACK for header compression in that protocol.
 51 | 
 52 | 
 53 | ### Working Group Summary:
 54 | <!--
 55 | Was there anything in WG process that is worth noting? For example, was there
 56 | controversy about particular points or were there decisions where the consensus
 57 | was particularly rough?
 58 | -->
 59 | 
 60 | As can be expected, discussion on many aspects of QUIC was quite intense. The
 61 | resulting consensus, however, was judged by the chairs to be both strong and broad.
 62 | 
 63 | 
 64 | ### Document Quality:
 65 | <!--
 66 | Are there existing implementations of the protocol? Have a significant number of
 67 | vendors indicated their plan to implement the specification? Are there any
 68 | reviewers that merit special mention as having done a thorough review, e.g., one
 69 | that resulted in important changes or a conclusion that the document had no
 70 | substantive issues? If there was a MIB Doctor, YANG Doctor, Media Type or other
 71 | expert review, what was its course (briefly)? In the case of a Media Type
 72 | review, on what date was the request posted?
 73 | -->
 74 | 
 75 | There are over twenty implementations of QUIC that are participating in interop
 76 | testing, including all major web browsers and many server, CDN and standalone
 77 | library implementations.
 78 | 
 79 | The acknowledgements sections of the I-Ds highlight the individuals that made
 80 | major contributions to a given document.
 81 | 
 82 | 
 83 | ### Personnel:
 84 | <!-- Who is the Document Shepherd? Who is the Responsible Area Director? -->
 85 | 
 86 | The document shepherds for the individual I-Ds are:
 87 | 
 88 | * **Lucas Pardue**:
 89 |   * draft-ietf-quic-http-31
 90 |   * draft-ietf-quic-qpack-18
 91 | * **Lars Eggert**:
 92 |   * draft-ietf-quic-transport-31
 93 |   * draft-ietf-quic-recovery-31
 94 | * **Mark Nottingham**:
 95 |   * draft-ietf-quic-tls-31
 96 |   * draft-ietf-quic-invariants-11
 97 | 
 98 | The responsible AD for the document set is Magnus Westerlund.
 99 | 
100 | 
101 | ## 3. Document Shepherd Review
102 | <!--
103 | (3) Briefly describe the review of this document that was performed by the
104 | Document Shepherd. If this version of the document is not ready for publication,
105 | please explain why the document is being forwarded to the IESG.
106 | -->
107 | 
108 | The document shepherds extensively reviewed the documents before this
109 | publication request.
110 | 
111 | 
112 | ## 4. Document Shepherd Review Concerns
113 | <!--
114 | (4) Does the document Shepherd have any concerns about the depth or breadth of
115 | the reviews that have been performed?
116 | -->
117 | 
118 | The document shepherds have no concerns about the depth or breadth of the
119 | reviews for these documents.
120 | 
121 | 
122 | ## 5. Broader Reviews
123 | <!--
124 | (5) Do portions of the document need review from a particular or from broader
125 | perspective, e.g., security, operational complexity, AAA, DNS, DHCP, XML, or
126 | internationalization? If so, describe the review that took place.
127 | -->
128 | 
129 | Parts of the document set benefited from specialized reviews from the TLS, HTTP
130 | and transport IETF communities.
131 | 
132 | 
133 | ## 6. Document Shepherd General Concerns
134 | <!--
135 | (6) Describe any specific concerns or issues that the Document Shepherd has with
136 | this document that the Responsible Area Director and/or the IESG should be aware
137 | of? For example, perhaps he or she is uncomfortable with certain parts of the
138 | document, or has concerns whether there really is a need for it. In any event,
139 | if the WG has discussed those issues and has indicated that it still wishes to
140 | advance the document, detail those concerns here.
141 | -->
142 | 
143 | The document shepherds have no general concerns about these documents.
144 | 
145 | 
146 | # 7. IPR Disclosure Obligation
147 | <!--
148 | (7) Has each author confirmed that any and all appropriate IPR disclosures
149 | required for full conformance with the provisions of BCP 78 and BCP 79 have
150 | already been filed. If not, explain why?
151 | -->
152 | 
153 | The editors of the I-Ds have all declared that they have filed any and all
154 | appropriate IPR disclosures required for full conformance with the provisions of
155 | BCP 78 and BCP 79.
156 | 
157 | 
158 | ## 8. Filed IPR Disclosures
159 | <!--
160 | (8) Has an IPR disclosure been filed that references this document? If so,
161 | summarize any WG discussion and conclusion regarding the IPR disclosures.
162 | -->
163 | 
164 | draft-ietf-quic-recovery has had an IPR disclosure filed on it. No resulting
165 | technical changes were argued for.
166 | 
167 | 
168 | ## 9. Strength of Consensus
169 | <!--
170 | (9) How solid is the WG consensus behind this document? Does it represent the
171 | strong concurrence of a few individuals, with others being silent, or does the
172 | WG as a whole understand and agree with it?
173 | -->
174 | 
175 | The consensus behind the document set is very strong, also as evidenced by the
176 | substantial number of existing implementations.
177 | 
178 | The WG last calls were forwarded to the TLS and HTTP WGs, due to the topical
179 | relationships.
180 | 
181 | 
182 | ## 10. Discontent
183 | <!--
184 | (10) Has anyone threatened an appeal or otherwise indicated extreme discontent?
185 | If so, please summarise the areas of conflict in separate email messages to the
186 | Responsible Area Director. (It should be in a separate email because this
187 | questionnaire is publicly available.)
188 | -->
189 | 
190 | No discontent was voiced.
191 | 
192 | 
193 | ## 11. Document Nits
194 | <!--
195 | (11) Identify any ID nits the Document Shepherd has found in this document. (See
196 | http://www.ietf.org/tools/idnits/ and the Internet-Drafts Checklist).
197 | Boilerplate checks are not enough; this check needs to be thorough.
198 | -->
199 | 
200 | The IDNits tool does not appear to be functioning correctly, both locally and using the Web service, so it's difficult to ascertain whether its results are accurate (there are many "Failure fetching the file, proceeding without it." errors).
201 | 
202 | 
203 | ## 12. Formal Review Criteria
204 | <!--
205 | (12) Describe how the document meets any required formal review criteria, such
206 | as the MIB Doctor, YANG Doctor, media type, and URI type reviews.
207 | -->
208 | 
209 | No formal review requirements are applicable to this document set.
210 | 
211 | 
212 | ## 13. Split References
213 | <!--
214 | (13) Have all references within this document been identified as either
215 | normative or informative?
216 | -->
217 | 
218 | All references within this document set have been identified as either normative
219 | or informative.
220 | 
221 | 
222 | ## 14. Normative References
223 | <!--
224 | (14) Are there normative references to documents that are not ready for
225 | advancement or are otherwise in an unclear state? If such normative references
226 | exist, what is the plan for their completion?
227 | -->
228 | 
229 | The document set contains the following normative references to I-Ds:
230 | 
231 | * draft-ietf-httpbis-cache
232 | * draft-ietf-httpbis-semantics
233 | 
234 | All of these are on track for timely publication in their respective WGs.
235 | 
236 | 
237 | ## 15. Downward References
238 | <!--
239 | (15) Are there downward normative references references (see RFC 3967)? If so,
240 | list these downward references to support the Area Director in the Last Call
241 | procedure.
242 | -->
243 | 
244 | draft-ietf-quic-tls-31 document has a downref to RFC8439 (CHACHA). RFC7539,
245 | which RFC8439 obsoletes, is already listed in the IETF Downref registry.
246 | (draft-ietf-quic-tls-31 also cites a number of NIST standards.)
247 | 
248 | ## 16. RFC Status Changes
249 | <!--
250 | (16) Will publication of this document change the status of any existing RFCs?
251 | Are those RFCs listed on the title page header, listed in the abstract, and
252 | discussed in the introduction? If the RFCs are not listed in the Abstract and
253 | Introduction, explain why, and point to the part of the document where the
254 | relationship of this document to the other RFCs is discussed. If this
255 | information is not in the document, explain why the WG considers it unnecessary.
256 | -->
257 | 
258 | Publication of this document set will not change the status of any existing
259 | RFCs.
260 | 
261 | 
262 | ## 17. IANA Considerations Review
263 | <!--
264 | (17) Describe the Document Shepherd's review of the IANA considerations section,
265 | especially with regard to its consistency with the body of the document. Confirm
266 | that all protocol extensions that the document makes are associated with the
267 | appropriate reservations in IANA registries. Confirm that any referenced IANA
268 | registries have been clearly identified. Confirm that newly created IANA
269 | registries include a detailed specification of the initial contents for the
270 | registry, that allocations procedures for future registrations are defined, and
271 | a reasonable name for the new registry has been suggested (see RFC 8126).
272 | -->
273 | 
274 | The IANA considerations of the document set have been reviewed and no issues
275 | were identified.
276 | 
277 | 
278 | ## 18. New "Expert Review" Registries
279 | <!--
280 | (18) List any new IANA registries that require Expert Review for future
281 | allocations. Provide any public guidance that the IESG would find useful in
282 | selecting the IANA Experts for these new registries.
283 | -->
284 | 
285 | The document set defines several IANA registries that allow for "Provisional
286 | Registrations" and "Permanent Registrations", which both require Expert review.
287 | The IESG should select subject matter experts for these registration types;
288 | candidates include the document editors and the individuals named as
289 | contributors in the acknowledgment sections.
290 | 
291 | 
292 | ## 19. Validation of Formal Language Parts
293 | <!--
294 | (19) Describe reviews and automated checks performed by the Document Shepherd to
295 | validate sections of the document written in a formal language, such as XML
296 | code, BNF rules, MIB definitions, YANG modules, etc.
297 | -->
298 | 
299 | No formal code exists in the document set. draft-ietf-quic-transport,
300 | draft-ietf-quic-recovery and draft-ietf-quic-qpack contain python-like pseudo
301 | code, but not at a level of detail that would lend itself to automated checking.
302 | 
303 | 
304 | ## 20. YANG
305 | <!--
306 | (20) If the document contains a YANG module, has the module been checked with
307 | any of the recommended validation tools
308 | (https://trac.ietf.org/trac/ops/wiki/yang-review-tools) for syntax and
309 | formatting validation? If there are any resulting errors or warnings, what is
310 | the justification for not fixing them at this time? Does the YANG module comply
311 | with the Network Management Datastore Architecture (NMDA) as specified in
312 | RFC8342?
313 | -->
314 | 
315 | The document set does not contain a YANG model.
316 | 


--------------------------------------------------------------------------------
/xml2rfc-tidy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Tidy an xml2rfc file.
  3 | #
  4 | # This:
  5 | # * removes non-semantic content (comments, processing instructions, DOCTYPE
  6 | #   declarations, broken entity references)
  7 | # * wraps BCP 14 language in <bcp14> elements
  8 | # * indents elements neatly
  9 | 
 10 | import sys
 11 | import xml.sax
 12 | import re
 13 | from xml.sax.saxutils import escape, quoteattr
 14 | 
 15 | 
 16 | class Tidy(xml.sax.handler.ContentHandler):
 17 |     pattern = re.compile(
 18 |         r"\b((?:(?:MUST|SHOULD|SHALL)(?:\s+NOT)?)|(?:(?:NOT\s+)?RECOMMENDED)|MAY|OPTIONAL|REQUIRED)\b"
 19 |     )
 20 | 
 21 |     def __init__(self):
 22 |         self.tags = []
 23 |         self.nesting = 0
 24 |         self.c = ""
 25 |         self.state = ""
 26 | 
 27 |     def startDocument(self):
 28 |         print('<?xml version="1.0" encoding="UTF-8"?>')
 29 | 
 30 |     def preserve(tag):
 31 |         return tag in ["artwork", "sourcecode"]
 32 | 
 33 |     def textElement(tag):
 34 |         return tag in [
 35 |             "annotation",
 36 |             "blockquote",
 37 |             "dd",
 38 |             "dt",
 39 |             "em",
 40 |             "li",
 41 |             "preamble",
 42 |             "refcontent",
 43 |             "strong",
 44 |             "sub",
 45 |             "sup",
 46 |             "t",
 47 |             "td",
 48 |             "th",
 49 |             "tt",
 50 |         ]
 51 | 
 52 |     def inline(tag):
 53 |         return tag in [
 54 |             "code",
 55 |             "contact",
 56 |             "cref",
 57 |             "em",
 58 |             "eref",
 59 |             "iref",
 60 |             "sub",
 61 |             "sup",
 62 |             "tt",
 63 |             "xref",
 64 |         ]
 65 | 
 66 |     def flush(self, tag, start=None):
 67 |         if Tidy.preserve(tag):
 68 |             c = f"<![CDATA[{self.c}]]>"
 69 |         else:
 70 |             c = escape(self.c)
 71 |             if Tidy.textElement(tag):
 72 |                 if self.state == "open":
 73 |                     # The element is opening, so strip left is safe.
 74 |                     c = c.lstrip()
 75 |                 if start is None or not Tidy.inline(start):
 76 |                     # The element is closing, or the element that is starting
 77 |                     # isn't inline, so strip right is safe.
 78 |                     c = c.rstrip()
 79 |                 c = Tidy.pattern.sub(r"<bcp14>\1</bcp14>", c)
 80 |             else:
 81 |                 c = c.strip()
 82 | 
 83 |         if c != "":
 84 |             if self.state == "open":
 85 |                 print(">", end="")
 86 |             print(c, end="")
 87 |             self.state = "text"
 88 |             self.nl = False
 89 | 
 90 |         self.c = ""
 91 | 
 92 |     def currentTag(self):
 93 |         return next(reversed(self.tags), False)
 94 | 
 95 |     def startElement(self, tag, attributes):
 96 |         parent = self.currentTag()
 97 |         self.flush(parent, tag)
 98 | 
 99 |         if self.state == "open":
100 |             print(">", end="")
101 |             if not Tidy.inline(tag):
102 |                 print()
103 | 
104 |         self.tags.append(tag)
105 |         if not Tidy.inline(tag):
106 |             print("  " * self.nesting, end="")
107 |             self.nesting = self.nesting + 1
108 | 
109 |         print(f"<{tag}", end="")
110 |         for name, value in attributes.items():
111 |             print(f" {name}={quoteattr(value)}", end="")
112 | 
113 |         self.state = "open"
114 |         self.nl = False
115 | 
116 |     def endElement(self, tag):
117 |         self.flush(self.tags.pop())
118 | 
119 |         if not Tidy.inline(tag):
120 |             self.nesting = self.nesting - 1
121 |             if self.nl and not Tidy.inline(self.currentTag()):
122 |                 print("  " * self.nesting, end="")
123 |         if self.state == "open":
124 |             print("/>", end="")
125 |         else:
126 |             print(f"</{tag}>", end="")
127 |         self.nl = not Tidy.inline(tag)
128 |         if self.nl:
129 |             print()
130 |         self.state = "close"
131 | 
132 |     def characters(self, content):
133 |         self.c = self.c + content
134 | 
135 |     def processingInstruction(self, target, data):
136 |         pass
137 | 
138 | 
139 | parser = xml.sax.make_parser()
140 | parser.setContentHandler(Tidy())
141 | if len(sys.argv) >= 2:
142 |     parser.parse(sys.argv[1])
143 | else:
144 |     parser.parse(sys.stdin)
145 | 


--------------------------------------------------------------------------------