├── .github └── workflows │ ├── archive.yml │ ├── ghpages.yml │ └── publish.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE.md ├── Makefile ├── README.md ├── draft-ietf-taps-arch.md ├── draft-ietf-taps-impl.md ├── draft-ietf-taps-interface.md ├── metadata.min.js └── roadshows ├── ief104-taps-connectiom-pooling.key ├── ief104-taps-connectiom-pooling.pdf ├── ietf101-taps-interface.key ├── ietf101-taps-interface.pdf ├── ietf102-taps-interface.key ├── ietf102-taps-interface.pdf ├── ietf103-taps-draft_discussion.pdf ├── ietf103-taps-draft_discussion.pptx ├── interim-jan19.taps-properties-registry.key └── interim-jan19.taps-properties-registry.pdf /.github/workflows/archive.yml: -------------------------------------------------------------------------------- 1 | name: "Archive Issues and Pull Requests" 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 * * 0,2,4' 6 | repository_dispatch: 7 | types: [archive] 8 | 9 | jobs: 10 | build: 11 | name: "Archive Issues and Pull Requests" 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: "Checkout" 15 | uses: actions/checkout@v2 16 | 17 | - name: "Update Archive" 18 | uses: martinthomson/i-d-template@v1 19 | with: 20 | make: archive 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | - name: "Update GitHub Pages" 24 | uses: martinthomson/i-d-template@v1 25 | with: 26 | make: gh-archive 27 | token: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | - name: "Save Archive" 30 | uses: actions/upload-artifact@v2 31 | with: 32 | path: archive.json 33 | -------------------------------------------------------------------------------- /.github/workflows/ghpages.yml: -------------------------------------------------------------------------------- 1 | name: "Update Editor's Copy" 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - README.md 7 | - CONTRIBUTING.md 8 | - LICENSE.md 9 | - .gitignore 10 | pull_request: 11 | paths-ignore: 12 | - README.md 13 | - CONTRIBUTING.md 14 | - LICENSE.md 15 | - .gitignore 16 | 17 | jobs: 18 | build: 19 | name: "Update Editor's Copy" 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: "Checkout" 23 | uses: actions/checkout@v2 24 | 25 | - name: "Cache Setup" 26 | id: cache-setup 27 | run: | 28 | mkdir -p "$HOME"/.cache/xml2rfc 29 | echo "::set-output name=path::$HOME/.cache/xml2rfc" 30 | date -u "+::set-output name=date::%FT%T" 31 | 32 | - name: "Cache References" 33 | uses: actions/cache@v2 34 | with: 35 | path: ${{ steps.cache-setup.outputs.path }} 36 | key: refcache-${{ steps.cache-setup.outputs.date }} 37 | restore-keys: | 38 | refcache-${{ steps.cache-setup.outputs.date }} 39 | refcache- 40 | 41 | - name: "Build Drafts" 42 | uses: martinthomson/i-d-template@v1 43 | 44 | - name: "Update GitHub Pages" 45 | uses: martinthomson/i-d-template@v1 46 | if: ${{ github.event_name == 'push' }} 47 | with: 48 | make: gh-pages 49 | token: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | - name: "Save HTML" 52 | uses: actions/upload-artifact@v2 53 | with: 54 | path: "*.html" 55 | 56 | - name: "Save Text" 57 | uses: actions/upload-artifact@v2 58 | with: 59 | path: "*.txt" 60 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: "Publish New Draft Version" 2 | 3 | on: 4 | push: 5 | tags: 6 | - "draft-*" 7 | 8 | jobs: 9 | build: 10 | name: "Publish New Draft Version" 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: "Checkout" 14 | uses: actions/checkout@v2 15 | 16 | # See https://github.com/actions/checkout/issues/290 17 | - name: "Get Tag Annotations" 18 | run: git fetch -f origin ${{ github.ref }}:${{ github.ref }} 19 | 20 | - name: "Cache Setup" 21 | id: cache-setup 22 | run: | 23 | mkdir -p "$HOME"/.cache/xml2rfc 24 | echo "::set-output name=path::$HOME/.cache/xml2rfc" 25 | date -u "+::set-output name=date::%FT%T" 26 | 27 | - name: "Cache References" 28 | uses: actions/cache@v2 29 | with: 30 | path: ${{ steps.cache-setup.outputs.path }} 31 | key: refcache-${{ steps.date.outputs.date }} 32 | restore-keys: | 33 | refcache-${{ steps.date.outputs.date }} 34 | refcache- 35 | 36 | - name: "Build Drafts" 37 | uses: martinthomson/i-d-template@v1 38 | 39 | - name: "Upload to Datatracker" 40 | uses: martinthomson/i-d-template@v1 41 | with: 42 | make: upload 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.redxml 2 | *.txt 3 | *.html 4 | draft-*.pdf 5 | *.upload 6 | .tags 7 | *~ 8 | *.swp 9 | /*-[0-9][0-9].xml 10 | .refcache 11 | .targets.mk 12 | venv/ 13 | issues.json 14 | pulls.json 15 | report.xml 16 | lib 17 | draft-ietf-taps-arch.xml 18 | draft-brunstrom-taps-impl.xml 19 | draft-ietf-taps-interface.xml 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This repository relates to activities in the Internet Engineering Task Force 4 | ([IETF](https://www.ietf.org/)). All material in this repository is considered 5 | Contributions to the IETF Standards Process, as defined in the intellectual 6 | property policies of IETF currently designated as 7 | [BCP 78](https://www.rfc-editor.org/info/bcp78), 8 | [BCP 79](https://www.rfc-editor.org/info/bcp79) and the 9 | [IETF Trust Legal Provisions (TLP) Relating to IETF Documents](http://trustee.ietf.org/trust-legal-provisions.html). 10 | 11 | Any edit, commit, pull request, issue, comment or other change made to this 12 | repository constitutes Contributions to the IETF Standards Process 13 | (https://www.ietf.org/). 14 | 15 | You agree to comply with all applicable IETF policies and procedures, including, 16 | BCP 78, 79, the TLP, and the TLP rules regarding code components (e.g. being 17 | subject to a Simplified BSD License) in Contributions. 18 | 19 | 20 | ## Other Resources 21 | 22 | Discussion of this work occurs on the 23 | [taps working group mailing list](https://mailarchive.ietf.org/arch/browse/taps/) 24 | ([subscribe](https://www.ietf.org/mailman/listinfo/taps)). In addition to 25 | contributions in github, you are encouraged to participate in discussions there. 26 | 27 | **Note**: Some working groups adopt a policy whereby substantive discussion of 28 | technical issues needs to occur on the mailing list. 29 | 30 | You might also like to familiarize yourself with other 31 | [working group documents](https://datatracker.ietf.org/wg/taps/documents/). 32 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | See the 4 | [guidelines for contributions](https://github.com/taps-api/drafts/blob/master/CONTRIBUTING.md). 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LIBDIR := lib 2 | include $(LIBDIR)/main.mk 3 | 4 | $(LIBDIR)/main.mk: 5 | ifneq (,$(shell git submodule status $(LIBDIR) 2>/dev/null)) 6 | git submodule sync 7 | git submodule update $(CLONE_ARGS) --init 8 | else 9 | git clone -q --depth 10 $(CLONE_ARGS) \ 10 | -b main https://github.com/martinthomson/i-d-template $(LIBDIR) 11 | endif 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TAPS Drafts 2 | 3 | This is the working area for the IETF TAPS working group Internet-Drafts for the TAPS Architecture, Abstract Interface, and Interface Implementation documents. 4 | 5 | **Contributions to this repository (commits and pull requests, as well as issues and comments) are covered under the IETF Note Well.** See [CONTRIBUTING.md](CONTRIBUTING.md) for more. 6 | 7 | ## Architecture 8 | 9 | * [Editor's Copy](https://ietf-tapswg.github.io/api-drafts/#go.draft-ietf-taps-arch.html) 10 | * [Individual Draft](https://tools.ietf.org/html/draft-ietf-taps-arch) 11 | * [Compare Editor's Copy to Individual Draft](https://ietf-tapswg.github.io/api-drafts/#go.draft-ietf-taps-arch.diff) 12 | 13 | ## Interface 14 | 15 | * [Editor's Copy](https://ietf-tapswg.github.io/api-drafts/#go.draft-ietf-taps-interface.html) 16 | * [Individual Draft](https://tools.ietf.org/html/draft-ietf-taps-interface) 17 | * [Compare Editor's Copy to Individual Draft](https://ietf-tapswg.github.io/api-drafts/#go.draft-ietf-taps-interface.diff) 18 | 19 | ## Implementation 20 | 21 | * [Editor's Copy](https://ietf-tapswg.github.io/api-drafts/#go.draft-ietf-taps-impl.html) 22 | * [Individual Draft](https://tools.ietf.org/html/draft-ietf-taps-impl) 23 | * [Compare Editor's Copy to Individual Draft](https://ietf-tapswg.github.io/api-drafts/#go.draft-ietf-taps-impl.diff) 24 | 25 | 26 | ## Building the Draft 27 | 28 | Formatted text and HTML versions of the draft can be built using `make`. 29 | 30 | ```sh 31 | $ make 32 | ``` 33 | 34 | This requires that you have the necessary software installed. See 35 | [the instructions](https://github.com/martinthomson/i-d-template/blob/master/doc/SETUP.md). 36 | 37 | 38 | ## Contributing 39 | 40 | See the 41 | [guidelines for contributions](https://github.com/taps-api/drafts/blob/master/CONTRIBUTING.md). 42 | -------------------------------------------------------------------------------- /draft-ietf-taps-arch.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Architecture and Requirements for Transport Services 3 | abbrev: TAPS Architecture 4 | docname: draft-ietf-taps-arch-latest 5 | date: 6 | category: std 7 | 8 | ipr: trust200902 9 | area: Transport 10 | workgroup: TAPS Working Group 11 | keyword: Internet-Draft 12 | 13 | stand_alone: yes 14 | pi: [toc, sortrefs, symrefs] 15 | 16 | author: 17 | - 18 | ins: T. Pauly 19 | name: Tommy Pauly 20 | role: editor 21 | org: Apple Inc. 22 | street: One Apple Park Way 23 | city: Cupertino, California 95014 24 | country: United States of America 25 | email: tpauly@apple.com 26 | - 27 | ins: B. Trammell 28 | name: Brian Trammell 29 | role: editor 30 | org: Google Switzerland GmbH 31 | email: ietf@trammell.ch 32 | street: Gustav-Gull-Platz 1 33 | city: 8004 Zurich 34 | country: Switzerland 35 | - 36 | ins: A. Brunstrom 37 | name: Anna Brunstrom 38 | org: Karlstad University 39 | street: Universitetsgatan 2 40 | city: 651 88 Karlstad 41 | country: Sweden 42 | email: anna.brunstrom@kau.se 43 | - 44 | ins: G. Fairhurst 45 | name: Godred Fairhurst 46 | org: University of Aberdeen 47 | street: Department of Engineering 48 | street: Fraser Noble Building 49 | city: Aberdeen, AB24 3UE 50 | country: Scotland 51 | email: gorry@erg.abdn.ac.uk 52 | uri: http://www.erg.abdn.ac.uk/ 53 | - 54 | ins: C. Perkins 55 | name: Colin Perkins 56 | org: University of Glasgow 57 | street: School of Computing Science 58 | city: Glasgow G12 8QQ 59 | country: United Kingdom 60 | email: csp@csperkins.org 61 | 62 | informative: 63 | POSIX: 64 | title: "IEEE Std. 1003.1-2008 Standard for Information Technology -- Portable Operating System Interface (POSIX). Open group Technical Standard: Base Specifications, Issue 7" 65 | url: 66 | date: 2008 67 | 68 | --- abstract 69 | 70 | This document describes an architecture for exposing transport protocol features to applications for network communication. This system exposes transport protocol features to applications for network communication. The Transport Services Application Programming Interface (API) is based on an asynchronous, event-driven interaction pattern. This API uses messages for representing data transfer to applications, and describes how a Transport Services Implementation can use multiple IP addresses, multiple protocols, and multiple paths, and provide multiple application streams. This document provides the architecture and requirements. It defines common terminology and concepts to be used in definitions of a Transport Service API and a Transport Services Implementation. 71 | 72 | --- middle 73 | 74 | # Introduction 75 | 76 | Many application programming interfaces (APIs) to provide transport interfaces to networks have been deployed, perhaps the most widely known and imitated being the BSD Socket {{POSIX}} interface (Socket API). 77 | The naming of objects and functions across these APIs is not consistent and varies depending on the protocol being used. 78 | For example, sending and receiving streams of data is conceptually the same for both an unencrypted Transmission Control Protocol (TCP) stream and operating on an encrypted Transport Layer Security (TLS) {{?RFC8446}} stream over TCP, but applications cannot use the same socket ```send()``` and ```recv()``` calls on top of both kinds of connections. 79 | Similarly, terminology for the implementation of transport protocols varies based on the context of the protocols themselves: terms such as "flow", "stream", "message", and "connection" can take on many different meanings. 80 | This variety can lead to confusion when trying to understand the similarities and differences between protocols, and how applications can use them effectively. 81 | 82 | The goal of the Transport Services System architecture is to provide a flexible 83 | and reusable system with a common interface for transport protocols. 84 | An application uses the Transport Services System through an abstract Connection (we use capitalization to distinguish these from the underlying connections of, e.g., TCP). 85 | This provides 86 | flexible connection establishment allowing an application to request or require a set of properties. 87 | 88 | As applications adopt this interface, they will benefit from a wide set of transport features that can evolve over time, 89 | and ensure that the system providing the interface can optimize its behavior based on the application requirements 90 | and network conditions, without requiring changes to the applications. This flexibility enables faster deployment of new features and protocols. 91 | 92 | This architecture can also support applications by offering racing mechanisms (attempting multiple IP addresses, protocols, or network paths in parallel), which otherwise need to be implemented in each application separately (see {{racing}}). Racing selects one or more candidates each with equivalent protocol stacks that are used to identify 93 | an optimal combination of transport protocol instance such as TCP, UDP, or another transport, together with configuration of parameters and 94 | interfaces. 95 | A Connection represents an object that, once established, can be used to send and receive messages. 96 | A Connection can also be created from another Connection, by cloning, and then forms a part of a Connection Group whose Connections share properties. 97 | 98 | This document was developed in parallel with the specification of the Transport Services API {{?I-D.ietf-taps-interface}} and implementation guidelines {{?I-D.ietf-taps-impl}}. Although following the Transport Services architecture does not require all APIs and implementations to be identical, a common minimal set of features represented in a consistent fashion will enable applications to be easily ported from one implementation of the Transport Services System to another. 99 | 100 | ## Background 101 | 102 | The architecture of the Transport Services System is based on the survey of services provided by IETF transport protocols and congestion control mechanisms {{?RFC8095}}, and the distilled minimal set of the features offered by transport protocols {{?RFC8923}}. These documents identified common features and patterns across all transport protocols developed thus far in the IETF. 103 | 104 | Since transport security is an increasingly relevant aspect of using transport protocols on the Internet, this document also considers the impact of transport security protocols on the feature-set exposed by Transport Services {{?RFC8922}}. 105 | 106 | One of the key insights to come from identifying the minimal set of features provided by transport protocols {{?RFC8923}} was that features either require application interaction and guidance (referred to in that document as Functional or Optimizing Features), or else can be handled automatically by an implementation of the Transport Services System (referred to as Automatable Features). Among the identified Functional and Optimizing Features, some are common across all or nearly all transport protocols, while others present features that, if specified, would only be useful with a subset of protocols, but would not harm the functionality of other protocols. For example, some protocols can deliver messages faster for applications that do not require messages to arrive in the order in which they were sent. This functionality needs to be explicitly allowed by the application, since reordering messages would be undesirable in many cases. 107 | 108 | ## Overview 109 | 110 | This document describes the Transport Services System in three sections: 111 | 112 | - {{model}} describes how the Transport Services API model differs from that of traditional socket-based APIs. Specifically, it offers asynchronous event-driven interaction, the use of messages for data transfer, and the flexibility to use different transport protocols and paths without requiring major changes to the application. 113 | 114 | - {{requirements}} explains the fundamental requirements for a Transport Services System. These principles are intended to make sure that transport protocols can continue to be enhanced and evolve without requiring significant changes by application developers. 115 | 116 | - {{concepts}} presents the Transport Services Implementation and defines the concepts that are used by the API {{?I-D.ietf-taps-interface}} and described in the implementation guidelines {{?I-D.ietf-taps-impl}}. This introduces the Preconnection, which allows applications to configure Connection Properties. 117 | 118 | ## Specification of Requirements 119 | 120 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 121 | "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and 122 | "OPTIONAL" in this document are to be interpreted as described in BCP 14 123 | {{!RFC2119}} {{!RFC8174}} when, and only when, 124 | they appear in all capitals, as shown here. 125 | 126 | ## Glossary of Key Terms 127 | 128 | This subsection provides a glossary of key terms related to the Transport Services architecture. It provides a short description of key terms that are later defined in this document. 129 | 130 | - Application: An entity that uses the transport layer for end-to-end delivery of data across the network {{?RFC8095}}. 131 | - Cached State: The state and history that the Transport Services Implementation keeps for each set of the associated Endpoints that have been used previously. 132 | - Candidate Path: One path that is available to an application and conforms to the Selection Properties and System Policy during racing. 133 | - Candidate Protocol Stack: One Protocol Stack that can be used by an application for a Connection during racing. 134 | - Client: The peer responsible for initiating a Connection. 135 | - Clone: A Connection that was created from another Connection, and forms a part of a Connection Group. 136 | - Connection: Shared state of two or more Endpoints that persists across Messages that are transmitted and received between these Endpoints {{?RFC8303}}. When this document (and other Transport Services documents) use the capitalized "Connection" term, it refers to a Connection object that is being offered by the Transport Services system, as opposed to more generic uses of the word "connection". 137 | - Connection Context: A set of stored properties across Connections, such as cached protocol state, cached path state, and heuristics, which can include one or more Connection Groups. 138 | - Connection Group: A set of Connections that share properties and caches. 139 | - Connection Property: A Transport Property that controls per-Connection behavior of a Transport Services Implementation. 140 | - Endpoint: An entity that communicates with one or more other endpoints using a transport protocol. 141 | - Endpoint Identifier: An identifier that specifies one side of a Connection (local or remote), such as a hostname or URL. 142 | - Equivalent Protocol Stacks: Protocol Stacks that can be safely swapped or raced in parallel during establishment of a Connection. 143 | - Event: A primitive that is invoked by an Endpoint {{?RFC8303}}. 144 | - Framer: A data translation layer that can be added to a Connection to define how application-layer Messages are transmitted over a Protocol Stack. 145 | - Local Endpoint: The local Endpoint. 146 | - Local Endpoint Identifier: A representation of the application's identifier for itself that it uses for a Connection. 147 | - Message: A unit of data that can be transferred between two Endpoints over a Connection. 148 | - Message Property: A property that can be used to specify details about Message transmission, or obtain details about the transmission after receiving a Message. 149 | - Parameter: A value passed between an application and a transport protocol by a primitive {{?RFC8303}}. 150 | - Path: A representation of an available set of properties that a Local Endpoint can use to communicate with a Remote Endpoint. 151 | - Peer: An Endpoint application party to a Connection. 152 | - Preconnection: an object that represents a Connection that has not yet been established. 153 | - Preference: A preference to prohibit, avoid, ignore, prefer, or require a specific Transport Feature. 154 | - Primitive: A function call that is used to locally communicate between an application and an Endpoint, which is related to one or more Transport Features {{?RFC8303}}. 155 | - Protocol Instance: A single instance of one protocol, including any state necessary to establish connectivity or send and receive Messages. 156 | - Protocol Stack: A set of Protocol Instances that are used together to establish connectivity or send and receive Messages. 157 | - Racing: The attempt to select between multiple Protocol Stacks based on the Selection and Connection Properties communicated by the application, along with any Security Parameters. 158 | - Remote Endpoint: The peer that a local Endpoint can communicate with when a Connection is established. 159 | - Remote Endpoint Identifier: A representation of the application's identifier for a peer that can participate in establishing a Connection. 160 | - Rendezvous: The action of establishing a peer-to-peer Connection with a Remote Endpoint. 161 | - Security Parameters: Parameters that define an application's requirements for authentication and encryption on a Connection. 162 | - Server: The peer responsible for responding to a Connection initiation. 163 | - Socket: The combination of a destination IP address and a destination port number {{?RFC8303}}. 164 | - System Policy: The input from an operating system or other global preferences that can constrain or influence how an implementation will gather Candidate Paths and Protocol Stacks and race the candidates during establishment of a Connection. 165 | - Selection Property: A Transport Property that can be set to influence the selection of paths between the Local and Remote Endpoints. 166 | - Transport Feature: A specific end-to-end feature that the transport layer provides to an application. 167 | - Transport Property: A property that expresses requirements, prohibitions and preferences {{?RFC8095}}. 168 | - Transport Service: A set of transport features, without an association to any given framing protocol, that provides a complete service to an application. 169 | - Transport Services Implementation: This consists of all objects and protocol instances used internally to a system or library to implement the functionality needed to provide a transport service across a network, as required by the abstract interface. 170 | - Transport Services System: The Transport Services Implementation and the Transport Services API. 171 | 172 | # API Model {#model} 173 | 174 | The traditional model of using sockets can be represented as follows (see figure 1): 175 | 176 | - Applications create connections and transfer data using the Socket API. 177 | - The Socket API provides the interface to the implementations of TCP and UDP 178 | (typically implemented in the system's kernel). 179 | - TCP and UDP in the kernel send and receive data over the available network-layer interfaces. 180 | - Sockets are bound directly to transport-layer and network-layer addresses, 181 | obtained via a separate resolution step, usually performed by a 182 | system-provided DNS stub resolver. 183 | 184 | ~~~~~~~~~~ 185 | 186 | +-----------------------------------------------------+ 187 | | Application | 188 | +-----------------------------------------------------+ 189 | | | | 190 | +------------+ +------------+ +--------------+ 191 | | DNS stub | | Stream API | | Datagram API | 192 | | resolver | +------------+ +--------------+ 193 | +------------+ | | 194 | +---------------------------------+ 195 | | TCP UDP | 196 | | Kernel Networking Stack | 197 | +---------------------------------+ 198 | | 199 | +-----------------------------------------------------+ 200 | | Network Layer Interface | 201 | +-----------------------------------------------------+ 202 | 203 | ~~~~~~~~~~ 204 | {: #fig-sockets title="Socket API Model"} 205 | 206 | The architecture of the Transport Services System is an evolution of this general model of interaction. It both modernizes the API presented to applications by the transport layer and enriches the capabilities of the Transport Services Implementation below this API. 207 | 208 | ~~~~~~~~~~ 209 | 210 | +-----------------------------------------------------+ 211 | | Application | 212 | +-----------------------------------------------------+ 213 | | 214 | +-----------------------------------------------------+ 215 | | Transport Services API | 216 | +-----------------------------------------------------+ 217 | | 218 | +-----------------------------------------------------+ 219 | | Transport Services Implementation | 220 | | (Using: DNS, UDP, TCP, SCTP, DCCP, TLS, QUIC, etc) | 221 | +-----------------------------------------------------+ 222 | | 223 | +-----------------------------------------------------+ 224 | | Network Layer Interface | 225 | +-----------------------------------------------------+ 226 | 227 | ~~~~~~~~~~ 228 | {: #fig-taps title="Transport Services API Model"} 229 | 230 | The Transport Services API {{?I-D.ietf-taps-interface}} defines the interface for an application to create Connections and transfer data. It combines interfaces for multiple interaction patterns into a unified whole (see figure 2). 231 | This offers generic functions and also the protocol-specific mappings for TCP, UDP, UDP-Lite, and other protocol layers. These mapping are extensible. Future documents could define similar mappings for new layers and for other transport protocols, such as QUIC {{?RFC9000}}. 232 | By combining name resolution with connection establishment and data transfer in a single API, it allows for more flexible implementations to provide path and transport protocol agility on the application's behalf. 233 | 234 | The Transport Services Implementation {{?I-D.ietf-taps-impl}} is the component of the Transport Services System that implements the transport layer protocols and other functions needed to send and receive data. It is responsible for mapping the API to a specific available transport Protocol Stack and managing the available network interfaces and paths. 235 | 236 | There are key differences between the architecture of the Transport Services System and the architecture of the Socket API: the API of the Transport Services System is asynchronous and event-driven; it uses messages for representing data transfer to applications; and it describes how a Transport Services Implementation can resolve Endpoint Identifiers to use multiple IP addresses, multiple protocols, multiple paths, and provide multiple application streams. 237 | 238 | ## Event-Driven API 239 | 240 | Originally, the Socket API presented a blocking interface for establishing connections and transferring data. However, most modern applications interact with the network asynchronously. Emulation of an asynchronous interface using the Socket API can use a try-and-fail model: If the application wants to read, but data has not yet been received from the peer, the call to read will fail. The application then waits and can try again later. 241 | 242 | In contrast to the Socket API, all interactions using the Transport Services API are expected to be asynchronous. The API is defined around an event-driven model (see {{events}}), which models this asynchronous interaction. Other forms of asynchronous communication could also be available to applications, depending on the platform implementing the interface. 243 | 244 | For example, when an application that uses the Transport Services API wants to receive data, it issues an asynchronous call to receive new data from the Connection. When delivered data becomes available, this data is delivered to the application using asynchronous events that contain the data. Error handling is also asynchronous, resulting in asynchronous error events. 245 | 246 | This API also delivers events regarding the lifetime of a connection and changes in the available network links, which were not previously made explicit in the Socket API. 247 | 248 | Using asynchronous events allows for a more natural interaction model when establishing connections and transferring data. Events in time more closely reflect the nature of interactions over networks, as opposed to how the Socket API represents network resources as file system objects that may be temporarily unavailable. 249 | 250 | Separate from events, callbacks are also provided for asynchronous interactions with the Transport Services API that are not directly related to events on the network or network interfaces. 251 | 252 | ## Data Transfer Using Messages 253 | 254 | The Socket API provides a message interface for datagram protocols like UDP, but provides an unstructured stream abstraction for TCP. While TCP has the ability to send and receive data as a byte-stream, most applications need to interpret structure within this byte-stream. For example, HTTP/1.1 uses character delimiters to segment messages over a byte-stream {{?RFC9112}}; TLS record headers carry a version, content type, and length {{?RFC8446}}; and HTTP/2 uses frames to segment its headers and bodies {{?RFC9113}}. 255 | 256 | The Transport Services API represents data as messages, so that it more closely matches the way applications use the network. A message-based abstraction provides many benefits, such as: 257 | 258 | * providing additional information to the Protocol Stack; 259 | * the ability to associate deadlines with messages, for applications that care about timing; 260 | * the ability to control reliability, which messages to retransmit when there is packet loss, and how best to make use of the data that arrived; 261 | * the ability to automatically assign messages and connections to underlying transport connections to utilize multi-streaming and pooled connections. 262 | 263 | Allowing applications to interact with messages is backwards-compatible with existing protocols and APIs because it does not change the wire format of any protocol. Instead, it provides the Protocol Stack with additional information to allow it to make better use of modern transport services, while simplifying the application's role in parsing data. For protocols that inherently use a streaming abstraction, framers ({{datatransfer}}) bridge the gap between the two abstractions. 264 | 265 | ## Flexible Implementation 266 | 267 | The Socket API for protocols like TCP is generally limited to connecting to a single address over a single interface (IP source address). 268 | It also presents a single stream to the application. Software layers built upon this API often propagate this limitation of a single-address single-stream model. The Transport Services architecture is designed: 269 | 270 | - to handle multiple candidate endpoints, protocols, and paths; 271 | - to support candidate protocol racing to select the most optimal stack in each situation; 272 | - to support multipath and multistreaming protocols; 273 | - to provide state caching and application control over it. 274 | 275 | A Transport Services Implementation is intended to be flexible at connection establishment time, considering many different options and trying to select the most optimal combinations by racing them and measuring the results (see {{gathering}} and {{racing}}). This requires applications to specify identifiers for the Local and Remote Endpoint that are higher-level than IP addresses, such as a hostname or URL, which are used by a Transport Services Implementation for resolution, path selection, and racing. An implementation can further implement fallback mechanisms if connection establishment of one protocol fails or performance is detected to be unsatisfactory. 276 | 277 | Information used in connection establishment (e.g. cryptographic resumption tokens, information about usability of certain protocols on the path, results of racing in previous connections) are cached in the Transport Services Implementation. Applications have control over whether this information is used for a specific establishment, in order to allow tradeoffs between efficiency and linkability. 278 | 279 | Flexibility after connection establishment is also important. Transport protocols that can migrate between multiple network-layer interfaces need to be able to process and react to interface changes. Protocols that support multiple application-layer streams need to support initiating and receiving new streams using existing connections. 280 | 281 | ## Coexistence 282 | 283 | While the architecture of the Transport Services System is designed as an enhanced replacement for the Socket API, it need not replace it entirely on a system or platform; indeed, coexistence has been recommended for incremental deployability {{?RFC8170}}. The architecture is therefore designed such that it can run alongside (or, indeed, on top of) an existing Socket API implementation; only applications built to the Transport Services API are managed by the system's Transport Services Implementation. 284 | 285 | # API and Implementation Requirements {#requirements} 286 | 287 | One goal of the architecture is to redefine the interface between applications and transports in a way that allows the transport layer to evolve and improve without fundamentally changing the contract with the application. This requires a careful consideration of how to expose the capabilities of protocols. The architecture also encompasses system policies that can influence and inform how transport protocols use a network path or interface. 288 | 289 | There are several ways the Transport Services System can offer flexibility to an application: it can provide access to transport protocols and protocol features; it can use these protocols across multiple paths that could have different performance and functional characteristics; and it can communicate with different remote systems to optimize performance, robustness to failure, or some other metric. Beyond these, if the Transport Services API remains the same over time, new protocols and features can be added to the Transport Services Implementation without requiring changes in applications for adoption. Similarly, this can provide a common basis for utilizing information about a network path or interface, enabling evolution below the transport layer. 290 | 291 | The normative requirements described in this section allow Transport Services APIs and Transport Services Implementation to provide this functionality without causing incompatibility or introducing security vulnerabilities. 292 | 293 | ## Provide Common APIs for Common Features 294 | 295 | Any functionality that is common across multiple transport protocols SHOULD be made accessible through a unified set of calls using the Transport Services API. As a baseline, any Transport Services API SHOULD allow access to the minimal set of features offered by transport protocols {{?RFC8923}}. If that minimal set is updated or expanded in the future, the Transport Services API ought to be extended to match. 296 | 297 | An application can specify constraints and preferences for the protocols, features, and network interfaces it will use via Properties. Properties are used by an application to declare its preferences for how the transport service should operate at each stage in the lifetime of a connection. Transport Properties are subdivided into Selection Properties, which specify which paths and Protocol Stacks can be used and are preferred by the application; Connection Properties, which inform decisions made during connection establishment and fine-tune the established connection; and Message Properties, set on individual Messages. 298 | 299 | It is RECOMMENDED that the Transport Services API offers properties that are common to multiple transport protocols. This enables a Transport Services System to appropriately select between protocols that offer equivalent features. Similarly, it is RECOMMENDED that the Properties offered by the Transport Services API are applicable to a variety of network layer interfaces and paths, which permits racing of different network paths without affecting the applications using the API. Each is expected to have a default value. 300 | 301 | It is RECOMMENDED that the default values for Properties are selected to ensure correctness for the widest set of applications, while providing the widest set of options for selection. For example, since both applications that require reliability and those that do not require reliability can function correctly when a protocol provides reliability, reliability ought to be enabled by default. As another example, the default value for a Property regarding the selection of network interfaces ought to permit as many interfaces as possible. 302 | 303 | Applications using the Transport Services API need to be designed to be robust to the automated selection provided by the Transport Services System. This automated selection is constrained by the properties and preferences expressed by the application and requires applications to explicitly set properties that define any necessary constraints on protocol, path, and interface selection. 304 | 305 | ## Allow Access to Specialized Features 306 | 307 | There are applications that will need to control fine-grained details of transport protocols to optimize their behavior and ensure compatibility with remote systems. It is therefore RECOMMENDED that the Transport Services API and the Transport Services Implementation permit more specialized protocol features to be used. 308 | 309 | A specialized feature could be needed by an application only when using a specific protocol, and not when using others. For example, if an application is using TCP, it could require control over the User Timeout Option for TCP {{?RFC5482}}; these options would not take effect for other transport protocols. In such cases, the API ought to expose the features in such a way that they take effect when a particular protocol is selected, but do not imply that only that protocol could be used. For example, if the API allows an application to specify a preference to use the User Timeout Option, communication would not fail when a protocol such as UDP is selected. 310 | 311 | Other specialized features, however, can also be strictly required by an application and thus further constrain the set of protocols that can be used. For example, if an application requires support for automatic handover or failover for a connection, only Protocol Stacks that provide this feature are eligible to be used, e.g., Protocol Stacks that include a multipath protocol or a protocol that supports connection migration. A Transport Services API needs to allow applications to define such requirements and constrain the options available to a Transport Services Implementation. Since such options are not part of the core/common features, it will generally be simple for an application to modify its set of constraints and change the set of allowable protocol features without changing the core implementation. 312 | 313 | To control these specialized features, the application can declare its preference – whether the presence of a specific feature is prohibited, should be avoided, can be ignored, is preferred, or is required in the pre-establishment phase. An implementation of a Transport Services API would honor this preference and allow the application to query the availability of each specialized feature after a successful establishment. 314 | 315 | ## Select Between Equivalent Protocol Stacks {#equivalence} 316 | 317 | A Transport Services Implementation can attempt and select between multiple Protocol Stacks based on the Selection and Connection Properties communicated by the application, along with any Security Parameters. The implementation can only attempt to use multiple Protocol Stacks when they are "equivalent", which means that the stacks can provide the same Transport Properties and interface expectations as requested by the application. Equivalent Protocol Stacks can be safely swapped or raced in parallel (see {{racing}}) during connection establishment. 318 | 319 | The following two examples show non-equivalent Protocol Stacks: 320 | 321 | - If the application requires preservation of message boundaries, a Protocol Stack that runs UDP as the top-level interface to the application is not equivalent to a Protocol Stack that runs TCP as the top-level interface. A UDP stack would allow an application to read out message boundaries based on datagrams sent from the remote system, whereas TCP does not preserve message boundaries on its own, but needs a framing protocol on top to determine message boundaries. 322 | 323 | - If the application specifies that it requires reliable transmission of data, then a Protocol Stack using UDP without any reliability layer on top would not be allowed to replace a Protocol Stack using TCP. 324 | 325 | The following example shows Equivalent Protocol Stacks: 326 | 327 | - If the application does not require reliable transmission of data, then a Protocol Stack that adds reliability could be regarded as an Equivalent Protocol Stack as long as providing this would not conflict with any other application-requested properties. 328 | 329 | A Transport Services Implementation can race different security 330 | protocols, e.g., if the System Policy is explicitly configured to consider them equivalent. 331 | A Transport Services Implementation SHOULD only race Protocol Stacks where the transport security protocols within the stacks are identical. 332 | To ensure that security protocols are not incorrectly swapped, a Transport Services Implementation MUST only select Protocol Stacks that meet application requirements ({{?RFC8922}}). 333 | A Transport Services Implementation MUST NOT automatically fall back from secure protocols to insecure protocols, or to weaker versions of secure protocols. 334 | A Transport Services Implementation MAY allow applications to explicitly specify which versions of a protocol ought to be permitted, e.g., to allow a minimum version of TLS 1.2 in case TLS 1.3 is not available. 335 | 336 | A Transport Services Implementation MAY specify security properties relating to how the system operates (e.g., requirements, prohibitions, and preferences for the use of DNS Security Extensions (DNSSEC) or DNS over HTTPS (DoH)). 337 | 338 | ## Maintain Interoperability 339 | 340 | It is important to note that neither the Transport Services API {{?I-D.ietf-taps-interface}} nor the guidelines for implementation of the Transport Service System {{?I-D.ietf-taps-impl}} define new protocols or protocol capabilities that affect what is communicated across the network. A Transport Services System MUST NOT require that a peer on the other side of a connection uses the same API or implementation. A Transport Services Implementation acting as a connection initiator is able to communicate with any existing Endpoint that implements the transport protocol(s) and all the required properties selected. Similarly, a Transport Services Implementation acting as a Listener can receive connections for any protocol that is supported from an existing initiator that implements the protocol, independent of whether the initiator uses the Transport Services System or not. 341 | 342 | A Transport Services Implemenation makes decisions that select protocols and interfaces. In normal use, a given version of a Transport Services System SHOULD result in consistent protocol and interface selection decisions for the same network conditions given the same set of Properties. This is intended to provide predictable outcomes to the application using the API. 343 | 344 | ## Support Monitoring 345 | 346 | The Transport Services API increases the layer of abstraction for applications, and it enables greater automation below the API. Such increased 347 | abstraction comes at the cost of increased complexity when application programmers, users or system administrators 348 | try to understand why any issues and failures may be happening. Transport Services systems should therefore offer monitoring functions that 349 | provide relevant debug and diagnostics information. For example, such monitoring functions could indicate the protocol(s) in use, the 350 | number of open connections per protocol, and any statistics that these protocols may offer. 351 | 352 | 353 | # Transport Services Architecture and Concepts {#concepts} 354 | 355 | This section of the document describes the architecture non-normatively and explains the operation of a Transport Services Implementation. The concepts defined in this document are intended primarily for use in the documents and specifications that describe the Transport Services System. This includes the architecture, the Transport Services API and the associated Transport Services Implementation. While the specific terminology can be used in some implementations, it is expected that there will remain a variety of terms used by running code. 356 | 357 | The architecture divides the concepts for Transport Services System into two categories: 358 | 359 | 1. API concepts, which are intended to be exposed to applications; and 360 | 2. System-implementation concepts, which are intended to be internally used by a Transport Services Implementation. 361 | 362 | The following diagram summarizes the top-level concepts in a Transport Services System and how they relate to one another. 363 | 364 | ~~~~~~~~~~ 365 | 366 | +-----------------------------------------------------+ 367 | | Application | 368 | +-+----------------+------^-------+--------^----------+ 369 | | | | | | 370 | pre- | data | events 371 | establishment | transfer | | 372 | | establishment | termination | 373 | | | | | | 374 | | +--v------v-------v+ | 375 | +-v-------------+ Connection(s) +-------+----------+ 376 | | Transport +--------+---------+ | 377 | | Services | | 378 | | API | +-------------+ | 379 | +------------------------+--+ Framer(s) |-----------+ 380 | | +-------------+ 381 | +------------------------|----------------------------+ 382 | | Transport | | 383 | | System | +-----------------+ | 384 | | Implementation | | Cached | | 385 | | | | State | | 386 | | (Candidate Gathering) | +-----------------+ | 387 | | | | 388 | | (Candidate Racing) | +-----------------+ | 389 | | | | System | | 390 | | | | Policy | | 391 | | +----------v-----+ +-----------------+ | 392 | | | Protocol | | 393 | +-------------+ Stack(s) +----------------------+ 394 | +-------+--------+ 395 | V 396 | +-----------------------------------------------------+ 397 | | Network Layer Interface | 398 | +-----------------------------------------------------+ 399 | ~~~~~~~~~~ 400 | {: #fig-abstractions title="Concepts and Relationships in the Architecture of the Transport Services System"} 401 | 402 | The Transport Services Implementation includes the Cached State and System Policy. 403 | 404 | The System Policy provides input from an operating system or other global preferences that can constrain or influence how an implementation will gather Candidate Paths and Protocol Stacks and race the candidates when establishing a Connection. As the details of System Policy configuration and enforcement are largely platform- and implementation- dependent, and do not affect application-level interoperability, the Transport Services API {{?I-D.ietf-taps-interface}} does not specify an interface for reading or writing System Policy. 405 | 406 | The Cached State is the state and history that the Transport Services Implementation keeps for each set of associated Endpoints that have previously been used. An application ought to explicitly request any required or desired properties via the Transport Services API. 407 | 408 | ## Transport Services API Concepts 409 | 410 | Fundamentally, a Transport Services API needs to provide Connection objects ({{objects}}) that allow applications to establish communication, and then send and receive data. These could be exposed as handles or referenced objects, depending on the chosen programming language. 411 | 412 | Beyond the Connection objects, there are several high-level groups of actions that any Transport Services API needs to provide: 413 | 414 | * Pre-establishment ({{preestablishment}}) encompasses the properties that an application can pass to describe its intent, requirements, prohibitions, and preferences for its networking operations. These properties apply to multiple transport protocols, unless otherwise specified. Properties specified during pre-establishment can have a large impact on the rest of the interface: they modify how establishment occurs, they influence the expectations around data transfer, and they determine the set of events that will be supported. 415 | 416 | * Establishment ({{establishment}}) focuses on the actions that an application takes on the Connection objects to prepare for data transfer. 417 | 418 | * Data Transfer ({{datatransfer}}) consists of how an application represents the data to be sent and received, the functions required to send and receive that data, and how the application is notified of the status of its data transfer. 419 | 420 | * Event Handling ({{events}}) defines categories of notifications that an application can receive during the lifetime of a Connection. Events also provide opportunities for the application to interact with the underlying transport by querying state or updating maintenance options. 421 | 422 | * Termination ({{termination}}) focuses on the methods by which data transmission is stopped, and connection state is torn down. 423 | 424 | The diagram below provides a high-level view of the actions and events during the lifetime of a Connection object. Note that some actions are alternatives (e.g., whether to initiate a connection or to listen for incoming connections), while others are optional (e.g., setting Connection and Message Properties in pre-establishment) or have been omitted for brevity and simplicity. 425 | 426 | 427 | ~~~~~~~~~~ 428 | 429 | Pre-establishment : Established : Termination 430 | ----------------- : ----------- : ----------- 431 | : : 432 | +-- Local Endpoint : Message : 433 | +-- Remote Endpoint : Receive() | : 434 | +-- Transport Properties : Send() | : 435 | +-- Security Parameters : | : 436 | | : | : 437 | | InitiateWithSend() | Close() : 438 | | +---------------+ Initiate() +-----+------+ Abort() : 439 | +---+ Preconnection |------------->| Connection |-----------> Closed 440 | +---------------+ Rendezvous() +------------+ : 441 | Listen() | : | | : 442 | | : | v : 443 | v : | Connection : 444 | +----------+ : | Ready : 445 | | Listener |----------------------+ : 446 | +----------+ Connection Received : 447 | : : 448 | ~~~~~~~~~~ 449 | {: #fig-lifetime title="The lifetime of a Connection object"} 450 | 451 | In this diagram, the lifetime of a Connection object is divided into three phases: 452 | pre-establishment, the Established state, and Termination. 453 | 454 | Pre-establishment is based around a Preconnection object, that contains various 455 | sub-objects that describe the properties and parameters of desired Connections 456 | (Local and Remote Endpoints, Transport Properties, and Security Parameters). 457 | A Preconnection can be used to start listening for inbound connections, 458 | in which case a Listener object is created, or can be used to establish a new 459 | connection directly using `Initiate` (for outbound connections) or `Rendezvous` 460 | (for peer-to-peer connections). 461 | 462 | Once a Connection is in the Established state, an application can send and receive 463 | Message objects, and receive state updates. 464 | 465 | Closing or aborting a connection, either locally or from the peer, can terminate 466 | a connection. 467 | 468 | ### Endpoint Objects 469 | 470 | An Endpoint Identifier specifies one side of a transport connection. 471 | Endpoints can be Local Endpoints or Remote Endpoints, and the Endpoint Identifiers can respectively represent an identity 472 | that the application uses for the source or destination of a connection. 473 | An Endpoint Identifier can be specified at various levels of abstraction. 474 | An Endpoint Identifier at a higher level of abstraction (such as a hostname) can be resolved to more concrete identities 475 | (such as IP addresses). A Remote Endpoint Identifier can also represent a multicast group or anycast address. 476 | In the case of multicast, this selects a multicast transport for communication. 477 | 478 | * Remote Endpoint Identifier: The Remote Endpoint Identifier represents the application's identifier for a peer that can participate in a transport connection; for example, the combination of a DNS name for the peer and a service name/port. 479 | 480 | * Local Endpoint Identifier: The Local Endpoint Identifier represents the application's identifier for itself that it uses for transport connections; for example, a local IP address and port. 481 | 482 | ### Connections and Related Objects {#objects} 483 | 484 | * Connection: A Connection object represents one or more active transport protocol instances that can send and/or receive Messages between Local and Remote Endpoints. It is an abstraction that represents the communication. The Connection object holds state pertaining to the underlying transport protocol instances and any ongoing data transfers. For example, an active Connection can represent a connection-oriented protocol such as TCP, or can represent a fully-specified 5-tuple for a connectionless protocol such as UDP, where the Connection remains an abstraction at the endpoints. It can also represent a pool of transport protocol instances, e.g., a set of TCP and QUIC connections to equivalent endpoints, or a stream of a multi-streaming transport protocol instance. Connections can be created from a Preconnection or by a Listener. 485 | 486 | * Preconnection: A Preconnection object is a representation of a Connection that has not yet been established. It has state that describes parameters of the Connection: the Local Endpoint Identifier from which that Connection will be established, the Remote Endpoint Identifier ({{preestablishment}}) to which it will connect, and Transport Properties that influence the paths and protocols a Connection will use. A Preconnection can be either fully specified (representing a single possible Connection), or it can be partially specified (representing a family of possible Connections). The Local Endpoint ({{preestablishment}}) is required for a Preconnection used to `Listen` for incoming Connections, but optional if it is used to `Initiate` a Connection. The Remote Endpoint Identifier is required in a Preconnection that used to `Initiate` a Connection, but is optional if it is used to `Listen` for incoming Connections. The Local Endpoint Identifier and the Remote Endpoint Identifier are both required if a peer-to-peer `Rendezvous` is to occur based on the Preconnection. 487 | 488 | * Transport Properties: Transport Properties allow the application to express their requirements, prohibitions, and preferences and configure a Transport Services Implementation. There are three kinds of Transport Properties: 489 | 490 | * Selection Properties ({{preestablishment}}): Selection Properties can only be specified on a Preconnection. 491 | 492 | * Connection Properties ({{preestablishment}}): Connection Properties can be specified on a Preconnection and changed on the Connection. 493 | 494 | * Message Properties ({{datatransfer}}): Message Properties can be specified as defaults on a Preconnection or a Connection, and can also be specified during data transfer to affect specific Messages. 495 | 496 | * Listener: A Listener object accepts incoming transport protocol connections from Remote Endpoints and generates corresponding Connection objects. It is created from a Preconnection object that specifies the type of incoming Connections it will accept. 497 | 498 | ### Pre-establishment {#preestablishment} 499 | 500 | * Selection Properties: The Selection Properties consist of the properties that an application can set to influence the selection of paths between the Local and Remote Endpoints, to influence the selection of transport protocols, or to configure the behavior of generic transport protocol features. These properties can take the form of requirements, prohibitions, or preferences. Examples of properties that influence path selection include the interface type (such as a Wi-Fi connection, or a Cellular LTE connection), requirements around the largest Message that can be sent, or preferences for throughput and latency. Examples of properties that influence protocol selection and configuration of transport protocol features include reliability, multipath support, and fast open support. 501 | 502 | * Connection Properties: The Connection Properties are used to configure protocol-specific options and control per-connection behavior of a Transport Services Implementation; for example, a protocol-specific Connection Property can express that if TCP is used, the implementation ought to use the User Timeout Option. Note that the presence of such a property does not require that a specific protocol will be used. In general, these properties do not explicitly determine the selection of paths or protocols, but can be used by an implementation during connection establishment. Connection Properties are specified on a Preconnection prior to Connection establishment, and can be modified on the Connection later. Changes made to Connection Properties after Connection establishment take effect on a best-effort basis. 503 | 504 | * Security Parameters: Security Parameters define an application's requirements for authentication and encryption on a Connection. They are used by Transport Security protocols (such as those described in {{?RFC8922}}) to establish secure Connections. Examples of parameters that can be set include local identities, private keys, supported cryptographic algorithms, and requirements for validating trust of remote identities. Security Parameters are primarily associated with a Preconnection object, but properties related to identities can be associated directly with Endpoints. 505 | 506 | ### Establishment Actions {#establishment} 507 | 508 | * Initiate: The primary action that an application can take to create a Connection to a Remote Endpoint, and prepare any required local or remote state to enable the transmission of Messages. For some protocols, this will initiate a client-to-server style handshake; for other protocols, this will just establish local state (e.g., with connectionless protocols such as UDP). The process of identifying options for connecting, such as resolution of the Remote Endpoint Identifier, occurs in response to the `Initiate` call. 509 | 510 | * Listen: Enables a Listener to accept incoming connections. The Listener will then create Connection objects as incoming connections are accepted ({{events}}). Listeners by default register with multiple paths, protocols, and Local Endpoints, unless constrained by Selection Properties and/or the specified Local Endpoint Identifier(s). Connections can be accepted on any of the available paths or endpoints. 511 | 512 | * Rendezvous: The action of establishing a peer-to-peer connection with a 513 | Remote Endpoint. It simultaneously attempts to initiate a connection to 514 | a Remote Endpoint while listening for an incoming connection from that 515 | Endpoint. The process of identifying options for the connection, such 516 | as resolution of the Remote Endpoint Identifier(s), occurs in response to the `Rendezvous` call. 517 | As with Listeners, the set of local paths and endpoints is constrained 518 | by Selection Properties. If successful, the `Rendezvous` call generates and asynchronously returns a 519 | Connection object to represent the established peer-to-peer connection. 520 | The processes by which connections are initiated during a `Rendezvous` 521 | action will depend on the set of Local and Remote Endpoints configured on 522 | the Preconnection. For example, if the Local and Remote Endpoints are TCP 523 | host candidates, then a TCP simultaneous open {{?RFC9293}} might be performed. 524 | However, if the set of Local Endpoints includes server reflexive 525 | candidates, such as those provided by STUN (Session Traversal Utilities 526 | for NAT) {{?RFC5389}}, a `Rendezvous` action will race 527 | candidates in the style of the ICE (Interactive Connection Establishment) 528 | algorithm {{?RFC8445}} to perform NAT 529 | binding discovery and initiate a peer-to-peer connection. 530 | 531 | ### Data Transfer Objects and Actions {#datatransfer} 532 | 533 | * Message: A Message object is a unit of data that can be represented as bytes that can be transferred between two endpoints over a transport connection. The bytes within a Message are assumed to be ordered. If an application does not care about the order in which a peer receives two distinct spans of bytes, those spans of bytes are considered independent Messages. Messages are sent in the payload of IP packets. One packet can carry one or more Messages or parts of a Message. 534 | 535 | * Message Properties: Message Properties are used to specify details about Message transmission. They can be specified directly on individual Messages, or can be set on a Preconnection or Connection as defaults. These properties might only apply to how a Message is sent (such as how the transport will treat prioritization and reliability), but can also include properties that specific protocols encode and communicate to the Remote Endpoint. When receiving Messages, Message Properties can contain information about the received Message, such as metadata generated at the receiver and information signalled by the Remote Endpoint. For example, a Message can be marked with a Message Property indicating that it is the final Message on a Connection. 536 | 537 | * Send: The action to transmit a Message over a Connection to the Remote Endpoint. The interface to `Send` can accept Message Properties specific to how the Message content is to be sent. The status of the `Send` operation is delivered back to the sending application in an event ({{events}}). 538 | 539 | * Receive: An action that indicates that the application is ready to asynchronously accept a Message over a Connection from a Remote Endpoint, while the Message content itself will be delivered in an event ({{events}}). The interface to `Receive` can include Message Properties specific to the Message that is to be delivered to the application. 540 | 541 | * Framer: A Framer is a data translation layer that can be added to a Connection. Framers allow extending a Connection's Protocol Stack to define how to encapsulate or encode outbound Messages, and how to decapsulate or decode inbound data into Messages. In this way, message boundaries can be preserved when using a Connection object, even with a protocol that otherwise presents unstructured streams, such as TCP. This is designed based on the fact that many of the current application protocols evolved over TCP, which does not provide message boundary preservation, and since many of these protocols require message boundaries to function, each application layer protocol has defined its own framing. For example, when an HTTP application sends and receives HTTP messages over a byte-stream transport, it must parse the boundaries of HTTP messages from the stream of bytes. 542 | 543 | ### Event Handling {#events} 544 | 545 | The following categories of events can be delivered to an application: 546 | 547 | * Connection Ready: Signals to an application that a given Connection is ready to send and/or receive Messages. If the Connection relies on handshakes to establish state between peers, then it is assumed that these steps have been taken. 548 | 549 | * Connection Closed: Signals to an application that a given Connection is no longer usable for sending or receiving Messages. The event delivers a reason or error to the application that describes the nature of the termination. 550 | 551 | * Connection Received: Signals to an application that a given Listener has received a Connection. 552 | 553 | * Message Received: Delivers received Message content to the application, based on a `Receive` action. To allow an application to limit the occurrence of such events, each call to `Receive` will be paired with a single `Receive` event. This can include an error if the `Receive` action cannot be satisfied, e.g., due to the Connection being closed. 554 | 555 | * Message Sent: Notifies the application of the status of its `Send` action. This might indicate a failure if the Message cannot be sent, or an indication that the Message has been processed by the Transport Services System. 556 | 557 | * Path Properties Changed: Notifies the application that a property of the Connection has changed that might influence how and where data is sent and/or received. 558 | 559 | ### Termination Actions {#termination} 560 | 561 | * Close: The action an application takes on a Connection to indicate that it no longer intends to send data, is no longer willing to receive data, and that the protocol should signal this state to the Remote Endpoint if the transport protocol allows this. (Note that this is distinct from the concept of "half-closing" a bidirectional connection, such as when a FIN is sent in one direction of a TCP connection {{?RFC9293}}. The end of a stream can also be indicated using Message Properties when sending.) 562 | 563 | * Abort: The action the application takes on a Connection to indicate a `Close` and also indicate that the Transport Services System should not attempt to deliver any outstanding data, and immediately drop the connection. This is intended for immediate, usually abnormal, termination of a connection. 564 | 565 | ### Connection Groups 566 | 567 | A Connection Group is a set of Connections that shares Connection Properties and cached state generated by protocols. 568 | A Connection Group represents state for managing Connections within a single application, and does not require end-to-end protocol signaling. For transport protocols that support multiplexing, only Connections within the same Connection Group are allowed to be multiplexed together. 569 | 570 | The API allows a Connection to be created from another Connection. This adds the new Connection to the Connection Group. A change to one of the Connection Properties on any Connection in the Connection Group automatically changes the Connection Property for all others. All Connections in a Connection Group share the same set of Connection Properties except for the Connection Priority. These Connection Properties are said to be entangled. 571 | 572 | Passive Connections can also be added to a Connection Group, e.g., when a Listener receives a new Connection that is just a new stream of an already active multi-streaming protocol 573 | instance. 574 | 575 | While Connection Groups are managed by the Transport Services Implementation, an application can define different Connection Contexts for different Connection Groups to explicitly control caching boundaries, as discussed in {{conn-context}}. 576 | 577 | ## Transport Services Implementation 578 | 579 | This section defines the key architectural concepts for the Transport Services Implementation within the Transport Services System. 580 | 581 | The Transport Services System consists of the Transport Services Implementation and the Transport Services API. 582 | The Transport Services Implementation consists of all objects and protocol instances used internally to a system or library to implement the functionality needed to provide a transport service across a network, as required by the abstract interface. 583 | 584 | * Path: Represents an available set of properties that a Local Endpoint can use to communicate with a Remote Endpoint, such as routes, addresses, and physical and virtual network interfaces. 585 | 586 | * Protocol Instance: A single instance of one protocol, including any state necessary to establish connectivity or send and receive Messages. 587 | 588 | * Protocol Stack: A set of Protocol Instances (including relevant application, security, transport, or Internet protocols) that are used together to establish connectivity or send and receive Messages. A single stack can be simple (a single transport protocol instance over IP), or it can be complex (multiple application protocol streams going through a single security and transport protocol, over IP; or, a multi-path transport protocol over multiple transport sub-flows). 589 | 590 | * Candidate Path: One path that is available to an application and conforms to the Selection Properties and System Policy, of which there can be several. Candidate Paths are identified during the gathering phase ({{gathering}}) and can be used during the racing phase ({{racing}}). 591 | 592 | * Candidate Protocol Stack: One Protocol Stack that can be used by an application for a Connection, for which there can be several candidates. Candidate Protocol Stacks are identified during the gathering phase ({{gathering}}) and are started during the racing phase ({{racing}}). 593 | 594 | * System Policy: The input from an operating system or other global preferences that can constrain or influence how an implementation will gather candidate paths and Protocol Stacks ({{gathering}}) and race the candidates during establishment ({{racing}}). Specific aspects of the System Policy either apply to all Connections or only certain ones, depending on the runtime context and properties of the Connection. 595 | 596 | * Cached State: The state and history that the implementation keeps for each set of associated Endpoints that have been used previously. This can include DNS results, TLS session state, previous success and quality of transport protocols over certain paths, as well as other information. This caching does not imply that the same decisions are necessarily made for subsequent connections, rather, it means that cached state is used by a Transport Services Implementation to inform functions such as choosing the candidates to be raced, selecting appropriate transport parameters, etc. An application SHOULD NOT rely on specific caching behaviour, instead it ought to explicitly request any required or desired properties via the Transport Services API. 597 | 598 | ### Candidate Gathering {#gathering} 599 | 600 | * Candidate Path Selection: Candidate Path Selection represents the act of choosing one or more paths that are available to use based on the Selection Properties and any available Local and Remote Endpoint Identifiers provided by the application, as well as the policies and heuristics of a Transport Services Implementation. 601 | 602 | * Candidate Protocol Selection: Candidate Protocol Selection represents the act of choosing one or more sets of Protocol Stacks that are available to use based on the Transport Properties provided by the application, and the heuristics or policies within the Transport Services Implementation. 603 | 604 | ### Candidate Racing {#racing} 605 | 606 | Connection establishment attempts for a set of candidates may be performed simultaneously, synchronously, serially, or using some combination of all of these. We refer to this process as racing, borrowing terminology from Happy Eyeballs {{?RFC8305}}. 607 | 608 | * Protocol Option Racing: Protocol Option Racing is the act of attempting to establish, or scheduling attempts to establish, multiple Protocol Stacks that differ based on the composition of protocols or the options used for protocols. 609 | 610 | * Path Racing: Path Racing is the act of attempting to establish, or scheduling attempts to establish, multiple Protocol Stacks that differ based on a selection from the available Paths. Since different Paths will have distinct configurations (see {{?RFC7556}}) 611 | for local addresses and DNS servers, attempts across different Paths will perform separate DNS resolution steps, which can lead to further racing of the resolved Remote Endpoint Identifiers. 612 | 613 | * Remote Endpoint Racing: Remote Endpoint Racing is the act of attempting to establish, or scheduling attempts to establish, multiple Protocol Stacks that differ based on the specific representation of the Remote Endpoint Identifier, such as a particular IP address that was resolved from a DNS hostname. 614 | 615 | ### Separating Connection Contexts {#conn-context} 616 | 617 | A Transport Services Implementation can by default share stored properties across Connections within an application, such as cached protocol state, cached path state, and heuristics. This provides efficiency and convenience for the application, since the Transport Services System can automatically optimize behavior. 618 | 619 | The Transport Services API can allow applications to explicitly define Connection Contexts that force separation of Cached State and Protocol Stacks. 620 | For example, a web browser application could use Connection Contexts with separate caches when implementing different tabs. Possible reasons to isolate Connections using separate Connection Contexts include: 621 | 622 | - Privacy concerns about re-using cached protocol state that can lead to linkability. Sensitive state could include TLS session state {{?RFC8446}} and HTTP cookies {{?RFC6265}}. These concerns could be addressed using Connection Contexts with separate caches, such as for different browser tabs. 623 | - Privacy concerns about allowing Connections to multiplex together, which can tell a Remote Endpoint that all of the Connections are coming from the same application. Using Connection Contexts avoids the Connections being multiplexed in a HTTP/2 or QUIC stream. 624 | 625 | # IANA Considerations 626 | 627 | This document has no actions for IANA. 628 | 629 | # Security and Privacy Considerations 630 | 631 | The Transport Services System does not recommend use of specific security 632 | protocols or algorithms. Its goal is to offer ease of use for existing protocols 633 | by providing a generic security-related interface. Each provided interface 634 | translates to an existing protocol-specific interface provided by supported 635 | security protocols. For example, trust verification callbacks are common parts 636 | of TLS APIs; a Transport Services API exposes similar functionality 637 | {{?RFC8922}}. 638 | 639 | As described above in {{equivalence}}, if a Transport Services Implementation races 640 | between two different Protocol Stacks, both need to use the same security protocols 641 | and options. However, a Transport Services Implementation can race different security 642 | protocols, e.g., if the application explicitly specifies that it considers them 643 | equivalent. 644 | 645 | The application controls whether 646 | information from previous racing attempts, or other information 647 | about past communications that was cached by 648 | the Transport Services System is used during establishment. 649 | This allows applications to make 650 | tradeoffs between efficiency (through racing) and privacy (via information that 651 | might leak from the cache toward an on-path observer). Some applications have 652 | features (e.g. "incognito mode") that align with this functionality. 653 | 654 | Applications need to ensure that they use security APIs appropriately. In cases 655 | where applications use an interface to provide sensitive keying material, e.g., 656 | access to private keys or copies of pre-shared keys (PSKs), key use needs to be 657 | validated and scoped to the intended protocols and roles. For example, if an 658 | application provides a certificate to only be used as client authentication for 659 | outbound TLS and QUIC connections, the Transport Services System MUST NOT use this 660 | automatically in other contexts (such as server authentication for inbound 661 | connections, or in other another security protocol handshake that is not equivalent to TLS). 662 | 663 | A Transport Services System MUST NOT automatically fall back from 664 | secure protocols to insecure protocols, or to weaker versions of secure 665 | protocols (see {{equivalence}}). For example, if an application requests a specific version of TLS, 666 | but the desired version of TLS is not available, its connection will fail. 667 | As described in {{equivalence}}, the Transport Services API can allow applications 668 | to specify minimum versions that are allowed to be used by the Transport Services System. 669 | 670 | # Acknowledgements 671 | 672 | This work has received funding from the European Union's Horizon 2020 research 673 | and innovation programme under grant agreements No. 644334 (NEAT), No. 688421 674 | (MAMI) and No 815178 (5GENESIS). 675 | 676 | This work has been supported by Leibniz Prize project funds of DFG - German 677 | Research Foundation: Gottfried Wilhelm Leibniz-Preis 2011 (FKZ FE 570/4-1). 678 | 679 | This work has been supported by the UK Engineering and Physical Sciences 680 | Research Council under grant EP/R04144X/1. 681 | 682 | Thanks to Reese Enghardt, Max Franke, Mirja Kuehlewind, Jonathan Lennox, and 683 | Michael Welzl for the discussions and feedback that helped shape the architecture 684 | of the system described here. 685 | Particular thanks is also due to Philipp S. Tiesel and Christopher A. Wood, 686 | who were both co-authors of this specification as it progressed 687 | through the TAPS working group. 688 | Thanks as well to Stuart Cheshire, Josh Graessley, David Schinazi, 689 | and Eric Kinnear for their implementation and design efforts, including Happy 690 | Eyeballs, that heavily influenced this work. 691 | -------------------------------------------------------------------------------- /draft-ietf-taps-impl.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Implementing Interfaces to Transport Services 3 | abbrev: TAPS Implementation 4 | docname: draft-ietf-taps-impl-latest 5 | date: 6 | category: info 7 | 8 | ipr: trust200902 9 | area: Transport 10 | workgroup: TAPS Working Group 11 | keyword: Internet-Draft 12 | 13 | stand_alone: yes 14 | pi: [toc, sortrefs, symrefs] 15 | 16 | author: 17 | - 18 | ins: A. Brunstrom 19 | name: Anna Brunstrom 20 | role: editor 21 | org: Karlstad University 22 | street: Universitetsgatan 2 23 | city: 651 88 Karlstad 24 | country: Sweden 25 | email: anna.brunstrom@kau.se 26 | - 27 | ins: T. Pauly 28 | name: Tommy Pauly 29 | role: editor 30 | org: Apple Inc. 31 | street: One Apple Park Way 32 | city: Cupertino, California 95014 33 | country: United States of America 34 | email: tpauly@apple.com 35 | - 36 | ins: R. Enghardt 37 | name: Reese Enghardt 38 | org: Netflix 39 | street: 121 Albright Way 40 | city: Los Gatos, CA 95032 41 | country: United States of America 42 | email: ietf@tenghardt.net 43 | - 44 | ins: P. Tiesel 45 | name: Philipp S. Tiesel 46 | org: SAP SE 47 | street: George-Stephenson-Straße 7-13 48 | city: 10557 Berlin 49 | country: Germany 50 | email: philipp@tiesel.net 51 | - 52 | ins: M. Welzl 53 | name: Michael Welzl 54 | org: University of Oslo 55 | street: PO Box 1080 Blindern 56 | city: 0316 Oslo 57 | country: Norway 58 | email: michawe@ifi.uio.no 59 | 60 | normative: 61 | I-D.ietf-taps-arch: 62 | I-D.ietf-taps-interface: 63 | 64 | informative: 65 | NEAT-flow-mapping: 66 | title: Transparent Flow Mapping for NEAT 67 | seriesinfo: IFIP NETWORKING 2017 Workshop on Future of Internet Transport (FIT 2017) 68 | authors: 69 | - 70 | ins: F. Weinrank 71 | - 72 | ins: M. Tuexen 73 | date: 2017 74 | TCP-COUPLING: 75 | title: "ctrlTCP: Reducing Latency through Coupled, Heterogeneous Multi-Flow TCP Congestion Control" 76 | seriesinfo: 77 | IEEE INFOCOM Global Internet Symposium (GI) workshop (GI 2018) 78 | authors: 79 | - 80 | ins: S. Islam 81 | name: Safiqul Islam 82 | - 83 | ins: M. Welzl 84 | name: Michael Welzl 85 | - 86 | ins: K. Hiorth 87 | name: Kristian Hiorth 88 | - 89 | ins: D. Hayes 90 | name: David Hayes 91 | - 92 | ins: G. Armitage 93 | name: Grenville Armitage 94 | - 95 | ins: S. Gjessing 96 | name: Stein Gjessing 97 | date: 2018-04-15 98 | 99 | 100 | --- abstract 101 | 102 | The Transport Services system enables applications to use transport protocols flexibly for network communication 103 | and defines a protocol-independent Transport Services Application Programming Interface (API) that is based on an asynchronous, 104 | event-driven interaction pattern. This document serves as a guide to implementing such a system. 105 | 106 | --- middle 107 | 108 | # Introduction 109 | 110 | The Transport Services architecture {{I-D.ietf-taps-arch}} defines a system that allows applications to flexibly use transport networking protocols. The API that such a system exposes to applications is defined as the Transport Services API {{I-D.ietf-taps-interface}}. This API is designed to be generic across multiple transport protocols and sets of protocol features. 111 | 112 | This document serves as a guide to implementing a system that provides a Transport Services API. This guide offers suggestions to developers, but it is not prescriptive: implementations are free to take any desired form as long as the API specification in {{I-D.ietf-taps-interface}} is honored. It is the job of an implementation of a Transport Services system to turn the requests of an application into decisions on how to establish connections, and how to transfer data over those connections once established. The terminology used in this document is based on the Transport Services architecture {{I-D.ietf-taps-arch}}. 113 | 114 | # Implementing Connection Objects 115 | 116 | The connection objects that are exposed to applications for Transport Services are: 117 | 118 | - the Preconnection, the bundle of properties that describes the application constraints on, and preferences for, the transport; 119 | - the Connection, the basic object that represents a flow of data as Messages in either direction between the Local and Remote Endpoints; 120 | - and the Listener, a passive waiting object that delivers new Connections. 121 | 122 | Preconnection objects should be implemented as bundles of properties that an application can both read and write. A Preconnection object influences a Connection only at one point in time: when the Connection is created. Connection objects represent the interface between the application and the implementation to manage transport state, and conduct data transfer. During the process of establishment ({{conn-establish}}), the Connection will not necessarily be immediately bound to a transport protocol instance, since multiple candidate Protocol Stacks might be raced. 123 | 124 | Once a Preconnection has been used to create an outbound Connection or a Listener, the implementation should ensure that the copy of the properties held by the Connection or Listener cannot be mutated by the application making changes to the original Preconnection object. This may involve the implementation performing a deep-copy, copying the object with all the objects that it references. 125 | 126 | Once the Connection is established, the Transport Services Implementation maps actions and events to the details of the chosen Protocol Stack. For example, the same Connection object may ultimately represent a single transport protocol instance (e.g., a TCP connection, a TLS session over TCP, a UDP flow with fully-specified Local and Remote Endpoint Identifiers, a DTLS session, a SCTP stream, a QUIC stream, or an HTTP/2 stream). 127 | The Connection Properties held by a Connection or Listener are independent of other Connections that are not part of the same Connection Group. 128 | 129 | Connection establishment is only a local operation for a connectionless protocols, which serves to simplify the local send/receive functions and to filter the traffic for the specified addresses and ports {{?RFC8085}} (for example using UDP or UDP-Lite transport without a connection handshake procedure). 130 | 131 | Once `Initiate` has been called, the Selection Properties and Endpoint information of the created Connection are immutable (i.e, an application is not able to later modify the properties of a Connection by manipulating the original Preconnection object). 132 | Listener objects are created with a Preconnection, at which point their configuration should be considered immutable by the implementation. The process of listening is described in {{listen}}. 133 | 134 | # Implementing Pre-Establishment 135 | 136 | The pre-establishment phase allows applications to specify properties for the Connections that they are about to make, or to query the API about potential Connections they could make. 137 | 138 | During pre-establishment the application specifies one or more Endpoints to be used for communication as well as protocol preferences and constraints via Selection Properties and, if desired, also Connection Properties. {{Section 4 of I-D.ietf-taps-interface}} states that Connection Properties should preferably be configured during pre-establishment, because they can serve as input to decisions that are made by the implementation (e.g., the capacity profile can guide usage of a protocol offering scavenger-type congestion control). 139 | 140 | The implementation stores these properties as a part of the Preconnection object for use during connection establishment. For Selection Properties that are not provided by the application, the implementation uses the default values specified in the Transport Services API ({{I-D.ietf-taps-interface}}). 141 | 142 | ## Configuration-time errors 143 | 144 | The Transport Services system should have a list of supported protocols available, which each have transport features reflecting the capabilities of the protocol. Once an application specifies its Transport Properties, the Transport Services system matches the required and prohibited properties against the transport features of the available protocols (see {{Section 6.2 of I-D.ietf-taps-interface}} for the definition of property preferences). 145 | 146 | In the following cases, failure should be detected during pre-establishment: 147 | 148 | - A request by an application for properties that cannot be satisfied by any of the available protocols. For example, if an application requires `perMsgReliability`, but no such feature is available in any protocol on the host running the Transport Services system this should result in an error. 149 | - A request by an application for properties that are in conflict with each other, such as specifying required and prohibited properties that cannot be satisfied by any protocol. For example, if an application prohibits `reliability` but then requires `perMsgReliability`, this mismatch should result in an error. 150 | 151 | To avoid allocating resources that are not finally needed, it is important that configuration-time errors fail as early as possible. 152 | 153 | ## Role of system policy 154 | 155 | The properties specified during pre-establishment have a close relationship to system policy. The implementation is responsible for combining and reconciling several different sources of preferences when establishing Connections. These include, but are not limited to: 156 | 157 | 1. Application preferences, i.e., preferences specified during the pre-establishment via Selection Properties. 158 | 2. Dynamic system policy, i.e., policy compiled from internally and externally acquired information about available network interfaces, supported transport protocols, and current/previous Connections. Examples of ways to externally retrieve policy-support information are through OS-specific statistics/measurement tools and tools that reside on middleboxes and routers. 159 | 3. Default implementation policy, i.e., predefined policy by OS or application. 160 | 161 | In general, any protocol or path used for a Connection must conform to all three sources of constraints. A violation that occurs at any of the policy layers should cause a protocol or path to be considered ineligible for use. If such a violation prevents a Connection from being established, this should be communicated to the application, e.g. via the `EstablishmentError` event. For an example of application preferences leading to constraints, an application may prohibit the use of metered network interfaces for a given Connection to avoid user cost. Similarly, the system policy at a given time may prohibit the use of such a metered network interface from the application's process. Lastly, the implementation itself may default to disallowing certain network interfaces unless explicitly requested by the application. 162 | 163 | It is expected that the database of system policies and the method of looking up these policies will vary across various platforms. An implementation should attempt to look up the relevant policies for the system in a dynamic way to make sure it is reflecting an accurate version of the system policy, since the system's policy regarding the application's traffic may change over time due to user or administrative changes. 164 | 165 | # Implementing Connection Establishment {#conn-establish} 166 | 167 | The process of establishing a network connection begins when an application expresses intent to communicate with a Remote Endpoint by calling `Initiate`, at which point the Preconnection object contains all constraints or requirements the application has configured. The establishment process can be considered complete once there is at least one Protocol Stack that has completed any required setup to the point that it can transmit and receive the application's data. 168 | 169 | Connection establishment is divided into two top-level steps: Candidate Gathering (defined in {{Section 4.2.1 of I-D.ietf-taps-arch}}), to identify the paths, protocols, and endpoints to use (see {{gathering}}); and Candidate Racing (defined in {{Section 4.2.2 of I-D.ietf-taps-arch}}), in which the necessary protocol handshakes are conducted so that the Transport Services system can select which set to use (see {{racing}}). Candidate Racing involves attempting multiple options for connection establishment, and choosing the first option to succeed as the Protocol Stack to use for the connection. These attempts are usually staggered, starting each next option after a delay, but they can also be performed in parallel or only after waiting for failures. 170 | 171 | For ease of illustration, this document structures the candidates for racing as a tree (see {{tree-structure}}). 172 | This is not meant to restrict implementations from structuring racing candidates differently. 173 | 174 | The most simple example of this process might involve identifying the single IP address to which the implementation wishes to connect, using the system's current default path (i.e., using the default interface), and starting a TCP handshake to establish a stream to the specified IP address. However, each step may also differ depending on the requirements of the connection: if the Endpoint Identifier is a hostname and port, then there may be multiple resolved addresses that are available; there may also be multiple paths available, (in this case using an interface other than the default system interface); and some protocols may not need any transport handshake to be considered "established" (such as UDP), while other connections may utilize layered protocol handshakes, such as TLS over TCP. 175 | 176 | Whenever an implementation has multiple options for connection establishment, it can view the set of all individual connection establishment options as a single, aggregate connection establishment. The aggregate set conceptually includes every valid combination of endpoints, paths, and protocols. As an example, consider an implementation that initiates a TCP connection to a hostname + port Endpoint Identifier, and has two valid interfaces available (Wi-Fi and LTE). The hostname resolves to a single IPv4 address on the Wi-Fi network, and resolves to the same IPv4 address on the LTE network, as well as a single IPv6 address. The aggregate set of connection establishment options can be viewed as follows: 177 | 178 | ~~~~~~~~~~ 179 | Aggregate [Endpoint Identifier: www.example.com:443] [Interface: Any] [Protocol: TCP] 180 | |-> [Endpoint Identifier: [2001:db8:23::1]:443] [Interface: Wi-Fi] [Protocol: TCP] 181 | |-> [Endpoint Identifier: 192.0.2.1:443] [Interface: LTE] [Protocol: TCP] 182 | |-> [Endpoint Identifier: [2001:db8:42::1]:443] [Interface: LTE] [Protocol: TCP] 183 | ~~~~~~~~~~ 184 | 185 | Any one of these sub-entries on the aggregate connection attempt would satisfy the original application intent. The concern of this section is the algorithm defining which of these options to try, when, and in what order. 186 | 187 | During Candidate Gathering ({{gathering}}), an implementation prunes and sorts branches according 188 | to the Selection Property preferences ({{Section 6.2 of I-D.ietf-taps-interface}}. 189 | It first excludes all protocols and paths that match a Prohibit property or do not 190 | match all Require properties. Then it will sort branches according to Preferred 191 | properties, Avoided properties, and possibly other criteria. 192 | 193 | ## Structuring Candidates as a Tree {#tree-structure} 194 | 195 | As noted above, the consideration of multiple candidates in a gathering and racing process can be conceptually structured as a tree; this terminological convention is used throughout this document. 196 | 197 | Each leaf node of the tree represents a single, coherent connection attempt, with an endpoint, a network path, and a set of protocols that can directly negotiate and send data on the network. Each node in the tree that is not a leaf represents a connection attempt that is either underspecified, or else includes multiple distinct options. For example, when connecting on an IP network, a connection attempt to a hostname and port is underspecified, because the connection attempt requires a resolved IP address as its Remote Endpoint Identifier. In this case, the node represented by the connection attempt to the hostname is a parent node, with child nodes for each IP address. Similarly, an implementation that is allowed to connect using multiple interfaces will have a parent node of the tree for the decision between the network paths, with a branch for each interface. 198 | 199 | The example aggregate connection attempt above can be drawn as a tree by grouping the addresses resolved on the same interface into branches: 200 | 201 | ~~~~~~~~~~ 202 | || 203 | +==============================+ 204 | | www.example.com:443/any path | 205 | +==============================+ 206 | // \\ 207 | +===========================+ +===========================+ 208 | | www.example.com:443/Wi-Fi | | www.example.com:443/LTE | 209 | +===========================+ +===========================+ 210 | || // \\ 211 | +============================+ +=====================+ +==========================+ 212 | | [2001:db8:23::1]:443/Wi-Fi | | 192.0.2.1:443/LTE | | [2001:db8:42::1]:443/LTE | 213 | +============================+ +=====================+ +==========================+ 214 | ~~~~~~~~~~ 215 | 216 | The rest of this section will use a notation scheme to represent this tree. The root node (or parent node) of the tree will be represented by a single integer, such as "1". ("1" is used assuming that this is the first connection made by the system; future connections created by the application would allocate numbers in an increasing manner.) Each child of that node will have an integer that identifies it, from 1 to the number of children. That child node will be uniquely identified by concatenating its integer to its parent's identifier with a dot in between, such as "1.1" and "1.2". Each node will be summarized by a tuple of three elements: endpoint, path (labeled here by interface), and protocol. In Protocol Stacks, the layers are separated by '/' and ordered with the protocol closest to the application first. The above example can now be written more succinctly as: 217 | 218 | ~~~~~~~~~~ 219 | 1 [www.example.com:443, any path, TCP] 220 | 1.1 [www.example.com:443, Wi-Fi, TCP] 221 | 1.1.1 [[2001:db8:23::1]:443, Wi-Fi, TCP] 222 | 1.2 [www.example.com:443, LTE, TCP] 223 | 1.2.1 [192.0.2.1:443, LTE, TCP] 224 | 1.2.2 [[2001:db8.42::1]:443, LTE, TCP] 225 | ~~~~~~~~~~ 226 | 227 | When an implementation is asked to establish a single connection, only one of the leaf nodes in the candidate set is needed to transfer data. Thus, once a single leaf node becomes ready to use, then the connection establishment tree is considered ready. One way to implement this is by having every leaf node update the state of its parent node when it becomes ready, until the root node of the tree is ready, which then notifies the application that the Connection as a whole is ready to use. 228 | 229 | A connection establishment tree may consist of only a single node, such as a connection attempt to an IP address over a single interface with a single protocol. 230 | 231 | ~~~~~~~~~~ 232 | 1 [[2001:db8:23::1]:443, Wi-Fi, TCP] 233 | ~~~~~~~~~~ 234 | 235 | A root node may also only have one child (or leaf) node, such as a when a hostname resolves to only a single IP address. 236 | 237 | ~~~~~~~~~~ 238 | 1 [www.example.com:443, Wi-Fi, TCP] 239 | 1.1 [[2001:db8:23::1]:443, Wi-Fi, TCP] 240 | ~~~~~~~~~~ 241 | 242 | ### Branch Types 243 | 244 | There are three types of branching from a parent node into one or more child nodes. Any parent node of the tree must only use one type of branching. 245 | 246 | #### Derived Endpoints 247 | 248 | If a connection originally targets a single Endpoint Identifer, there may be multiple endpoint candidates of different types that can be derived from the original. This creates an ordered list of the derived endpoint candidates according to application preference, system policy and expected performance. 249 | 250 | DNS hostname-to-address resolution is the most common method of endpoint derivation. When trying to connect to a hostname Endpoint Identifer on a traditional IP network, the implementation should send all applicable DNS queries. Commonly, this will include both A (IPv4) and AAAA (IPv6) records if both address families are supported on the local interface. This can also include SRV records {{?RFC2782}}, SVCB and HTTPS records {{?I-D.ietf-dnsop-svcb-https}}, or other future record types. The algorithm for ordering and racing these addresses should follow the recommendations in Happy Eyeballs {{!RFC8305}}. 251 | 252 | ~~~~~~~~~~ 253 | 1 [www.example.com:443, Wi-Fi, TCP] 254 | 1.1 [[2001:db8::1]:443, Wi-Fi, TCP] 255 | 1.2 [192.0.2.1:443, Wi-Fi, TCP] 256 | 1.3 [[2001:db8::2]:443, Wi-Fi, TCP] 257 | 1.4 [[2001:db8::3]:443, Wi-Fi, TCP] 258 | ~~~~~~~~~~ 259 | 260 | DNS-Based Service Discovery {{?RFC6763}} can also provide an endpoint derivation step. When trying to connect to a named service, the client may discover one or more hostname and port pairs on the local network using multicast DNS {{?RFC6762}}. These hostnames should each be treated as a branch that can be attempted independently from other hostnames. Each of these hostnames might resolve to one or more addresses, which would create multiple layers of branching. 261 | 262 | ~~~~~~~~~~ 263 | 1 [term-printer._ipp._tcp.meeting.example.com, Wi-Fi, TCP] 264 | 1.1 [term-printer.meeting.example.com:631, Wi-Fi, TCP] 265 | 1.1.1 [31.133.160.18:631, Wi-Fi, TCP] 266 | ~~~~~~~~~~ 267 | 268 | Applications can influence which derived Endpoints are allowed and preferred via Selection Properties set on the Preconnection. For example, setting a preference for `useTemporaryLocalAddress` would prefer the use of IPv6 over IPv4, and requiring `useTemporaryLocalAddress` would eliminate IPv4 options, since IPv4 does not support temporary addresses. 269 | 270 | #### Network Paths 271 | 272 | If a client has multiple network paths available to it, e.g., a mobile client with interfaces for both Wi-Fi and Cellular connectivity, it can attempt a connection over any of the paths. This represents a branch point in the connection establishment. Similar to a derived endpoint, the paths should be ranked based on preference, system policy, and performance. Attempts should be started on one path (e.g., a specific interface), and then successively on other paths (or interfaces) after delays based on the expected path round-trip-time or other available metrics. 273 | 274 | ~~~~~~~~~~ 275 | 1 [192.0.2.1:443, any path, TCP] 276 | 1.1 [192.0.2.1:443, Wi-Fi, TCP] 277 | 1.2 [192.0.2.1:443, LTE, TCP] 278 | ~~~~~~~~~~ 279 | 280 | The same approach applies to any situation in which the client is aware of multiple links or views of the network. A single interface may be shared by 281 | multiple network paths, each with a coherent set of addresses, routes, DNS server, and more. A path may also represent a virtual interface service such as a Virtual Private Network (VPN). 282 | 283 | The list of available paths should be constrained by any requirements the application sets, as well as by the system policy. 284 | 285 | #### Protocol Options 286 | 287 | Differences in possible protocol compositions and options can also provide a branching point in connection establishment. This allows clients to be resilient to situations in which a certain protocol is not functioning on a server or network. 288 | 289 | This approach is commonly used for connections with optional proxy server configurations. A single connection might have several options available: an HTTP-based proxy, a SOCKS-based proxy, or no proxy. As above, these options should be ranked based on preference, system policy, and performance and attempted in succession. 290 | 291 | ~~~~~~~~~~ 292 | 1 [www.example.com:443, any path, HTTP/TCP] 293 | 1.1 [192.0.2.8:443, any path, HTTP/HTTP Proxy/TCP] 294 | 1.2 [192.0.2.7:10234, any path, HTTP/SOCKS/TCP] 295 | 1.3 [www.example.com:443, any path, HTTP/TCP] 296 | 1.3.1 [192.0.2.1:443, any path, HTTP/TCP] 297 | ~~~~~~~~~~ 298 | 299 | This approach also allows a client to attempt different sets of application and transport protocols that, when available, could provide preferable features. For example, the protocol options could involve QUIC {{?RFC9000}} over UDP on one branch, and HTTP/2 {{!RFC7540}} over TLS over TCP on the other: 300 | 301 | ~~~~~~~~~~ 302 | 1 [www.example.com:443, any path, HTTP] 303 | 1.1 [www.example.com:443, any path, HTTP3/QUIC/UDP] 304 | 1.1.1 [192.0.2.1:443, any path, HTTP3/QUIC/UDP] 305 | 1.2 [www.example.com:443, any path, HTTP2/TLS/TCP] 306 | 1.2.1 [192.0.2.1:443, any path, HTTP2/TLS/TCP] 307 | ~~~~~~~~~~ 308 | 309 | Another example is racing SCTP with TCP: 310 | 311 | ~~~~~~~~~~ 312 | 1 [www.example.com:4740, any path, reliable-inorder-stream] 313 | 1.1 [www.example.com:4740, any path, SCTP] 314 | 1.1.1 [192.0.2.1:4740, any path, SCTP] 315 | 1.2 [www.example.com:4740, any path, TCP] 316 | 1.2.1 [192.0.2.1:4740, any path, TCP] 317 | ~~~~~~~~~~ 318 | 319 | Implementations that support racing protocols and protocol options should maintain a history of which protocols and protocol options were successfully established, on a per-network and per-endpoint basis (see {{performance-caches}}). This information can influence future racing decisions to prioritize or prune branches. 320 | 321 | ### Branching Order-of-Operations 322 | 323 | Branch types ought to occur in a specific order relative to one another to avoid creating leaf nodes with invalid or incompatible settings. In the example above, it would be invalid to branch for derived endpoints (the DNS results for www.example.com) before branching between interface paths, since there are situations when the results will be different across networks due to private names or different supported IP versions. Implementations need to be careful to branch in a consistent order that results in usable leaf nodes whenever there are multiple branch types that could be used from a single node. 324 | 325 | This document recommends the following order of operations for branching: 326 | 327 | 1. Network Paths 328 | 2. Protocol Options 329 | 3. Derived Endpoints 330 | 331 | where a lower number indicates higher precedence and therefore higher placement in the tree. Branching between paths is the first in the list because results across multiple interfaces are likely not related to one another: endpoint resolution may return different results, especially when using locally resolved host and service names, and which protocols are supported and preferred may differ across interfaces. Thus, if multiple paths are attempted, the overall connection establishment process can be seen as a race between the available paths or interfaces. 332 | 333 | Protocol options are next checked in order. Whether or not a set of protocols, or protocol-specific options, can successfully connect is generally not dependent on which specific IP address is used. Furthermore, the Protocol Stacks being attempted may influence or altogether change the Endpoint Identifers being used. Adding a proxy to a connection's branch will change the Endpoint Identifer to the proxy's IP address or hostname. Choosing an alternate protocol may also modify the ports that should be selected. 334 | 335 | Branching for derived endpoints is the final step, and may have multiple layers of derivation or resolution, such as DNS service resolution and DNS hostname resolution. 336 | 337 | For example, if the application has indicated both a preference for WiFi over LTE and for a feature only available in SCTP, branches will be first sorted accord to path selection, with WiFi attempted first. Then, branches with SCTP will be attempted first within their subtree according to the properties influencing protocol selection. However, if the implementation has current cache information that SCTP is not available on the path over WiFi, there would be no SCTP node in the WiFi subtree. Here, the path over WiFi will be attempted first, and, if connection establishment succeeds, TCP will be used. Thus, the Selection Property preferring WiFi takes precedence over the Property that led to a preference for SCTP. 338 | 339 | ~~~~~~~~~~ 340 | 1. [www.example.com:80, any path, reliable-inorder-stream] 341 | 1.1 [192.0.2.1:443, Wi-Fi, reliable-inorder-stream] 342 | 1.1.1 [192.0.2.1:443, Wi-Fi, TCP] 343 | 1.2 [192.0.3.1:443, LTE, reliable-inorder-stream] 344 | 1.2.1 [192.0.3.1:443, LTE, SCTP] 345 | 1.2.2 [192.0.3.1:443, LTE, TCP] 346 | ~~~~~~~~~~ 347 | 348 | ### Sorting Branches {#branch-sorting} 349 | 350 | Implementations should sort the branches of the tree of connection options in order of their preference rank, from most preferred to least preferred as 351 | specified by Selection Properties {{I-D.ietf-taps-interface}}. 352 | Leaf nodes on branches with higher rankings represent connection attempts that will be raced first. 353 | 354 | In addition to the properties provided by the application, an implementation may include additional criteria such as cached performance estimates, see {{performance-caches}}, or system policy, see {{role-of-system-policy}}, in the ranking. 355 | Two examples of how Selection and Connection Properties may be used to sort branches are provided below: 356 | 357 | * "Interface Instance or Type" (property name `interface`): 358 | If the application specifies an interface type to be preferred or avoided, implementations should accordingly rank the paths. 359 | If the application specifies an interface type to be required or prohibited, an implementation is expected to exclude the non-conforming paths. 360 | 361 | * "Capacity Profile" (property name `connCapacityProfile`): 362 | An implementation can use the capacity profile to prefer paths that match an application's expected traffic profile. This match will use cached performance estimates, see {{performance-caches}}. Some examples of path preferences based on capacity profiles include: 363 | * Low Latency/Interactive: 364 | Prefer paths with the lowest expected Round Trip Time, based on observed Round Trip Time estimates; 365 | * Low Latency/Non-Interactive: 366 | Prefer paths with a low expected Round Trip Time, but can tolerate delay variation; 367 | * Constant-Rate Streaming: 368 | Prefer paths that are expected to satisfy the requested stream send or receive bitrate, based on the observed maximum throughput; 369 | * Capacity-Seeking: 370 | Prefer adapting to paths to determine the highest available capacity, based on the observed maximum throughput. 371 | 372 | As another example, branch sorting can also be influenced by bounds on the send or receive rate (Selection Properties `minSendRate` / `minRecvRate` / `maxSendRate` / `maxRecvRate`): if the application indicates a bound on the expected send or receive bitrate, an implementation may prefer a path that can likely provide the desired bandwidth, based on cached maximum throughput, see {{performance-caches}}. The application may know the send or receive bitrate from metadata in adaptive HTTP streaming, such as MPEG-DASH. 373 | 374 | Implementations process the Properties ({{Section 6.2 of I-D.ietf-taps-interface}}) in the following order: Prohibit, Require, Prefer, Avoid. 375 | If Selection Properties contain any prohibited properties, the implementation should first purge branches containing nodes with these properties. For required properties, it should only keep branches that satisfy these requirements. Finally, it should order the branches according to the preferred properties, and finally use any avoided properties as a tiebreaker. 376 | When ordering branches, an implementation can give more weight to properties that the application has explicitly set, than to the properties that are default. 377 | 378 | The available protocols and paths on a specific system and in a specific context can change; therefore, the result of sorting and the outcome of racing may vary, even when using the same Selection and Connection Properties. However, an implementation ought to provide a consistent outcome to applications, e.g., by preferring protocols and paths that are already used by existing Connections that specified similar Properties. 379 | 380 | 381 | ## Candidate Gathering {#gathering} 382 | 383 | The step of gathering candidates involves identifying which paths, protocols, and endpoints may be used for a given Connection. This list is determined by the requirements, prohibitions, and preferences of the application as specified in the Selection Properties. 384 | 385 | ### Gathering Endpoint Candidates 386 | 387 | Both Local and Remote Endpoint Candidates must be discovered during connection establishment. To support Interactive Connectivity Establishment (ICE) {{?RFC8445}}, or similar protocols that involve out-of-band indirect signalling to exchange candidates with the Remote Endpoint, it is important to query the set of candidate Local Endpoints, and provide the Protocol Stack with a set of candidate Remote Endpoints, before the Local Endpoint attempts to establish connections. 388 | 389 | #### Local Endpoint candidates 390 | 391 | The set of possible Local Endpoints is gathered. In a simple case, this merely enumerates the local interfaces and protocols, and allocates ephemeral source ports. For example, a system that has WiFi and Ethernet and supports IPv4 and IPv6 might gather four candidate Local Endpoints (IPv4 on Ethernet, IPv6 on Ethernet, IPv4 on WiFi, and IPv6 on WiFi) that can form the source for a transient. 392 | 393 | If NAT traversal is required, the process of gathering Local Endpoints becomes broadly equivalent to the ICE Candidate Gathering phase (see {{Section 5.1.1 of RFC8445}}). The endpoint determines its server reflexive Local Endpoints (i.e., the translated address of a Local Endpoint, on the other side of a NAT, e.g via a STUN sever {{?RFC5389}}) and relayed Local Endpoints (e.g., via a TURN server {{?RFC5766}} or other relay), for each interface and network protocol. These are added to the set of candidate Local Endpoint Identifers for this connection. 394 | 395 | Gathering Local Endpoints is primarily a local operation, although it might involve exchanges with a STUN server to derive server reflexive Local Endpoints, or with a TURN server or other relay to derive relayed Local Endpoints. However, it does not involve communication with the Remote Endpoint. 396 | 397 | #### Remote Endpoint Candidates 398 | 399 | The Remote Endpoint Identifer is typically a name that needs to be resolved into a set of possible addresses that can be used for communication. Resolving the Remote Endpoint is the process of recursively performing such name lookups, until fully resolved, to return the set of candidates for the Remote Endpoint of this Connection. 400 | 401 | How this resolution is done will depend on the type of the Remote Endpoint, and can also be specific to each Local Endpoint. A common case is when the Remote Endpoint Identifer is a DNS name, in which case it is resolved to give a set of IPv4 and IPv6 addresses representing that name. Some types of Remote Endpoint Identifers might require more complex resolution. Resolving the Remote Endpoint for a peer-to-peer connection might involve communication with a rendezvous server, which in turn contacts the peer to gain consent to communicate and retrieve its set of candidate Local Endpoints, which are returned and form the candidate remote addresses for contacting that peer. 402 | 403 | Resolving the Remote Endpoint is not a local operation. It will involve a directory service, and can require communication with the Remote Endpoint to rendezvous and exchange peer addresses. This can expose some or all of the candidate Local Endpoints to the Remote Endpoint. 404 | 405 | ## Candidate Racing {#racing} 406 | 407 | The primary goal of the Candidate Racing process is to successfully negotiate a Protocol Stack to an endpoint over an interface to connect a single leaf node of the tree with as little delay and as few unnecessary connections attempts as possible. Optimizing these two factors improves the user experience, while minimizing network load. 408 | 409 | This section covers the dynamic aspect of connection establishment. The tree described above is a useful conceptual and architectural model. However, an implementation is unable to know all of the nodes that will be used until steps like name resolution have occurred, and many of the possible branches ultimately might not be attempted. 410 | 411 | There are three different approaches to racing the attempts for different nodes of the connection establishment tree: 412 | 413 | 1. Simultaneous 414 | 2. Staggered 415 | 3. Failover 416 | 417 | Each approach is appropriate in different use-cases and branch types. However, to avoid consuming unnecessary network resources, implementations should not use simultaneous racing as a default approach. 418 | 419 | The timing algorithms for racing should remain independent across branches of the tree. Any timer or racing logic is isolated to a given parent node, and is not ordered precisely with regards to children of other nodes. 420 | 421 | ### Simultaneous 422 | 423 | Simultaneous racing is when multiple alternate branches are started without waiting for any one branch to make progress before starting the next alternative. This means the attempts are effectively simultaneous. Simultaneous racing should be avoided by implementations, since it consumes extra network resources and establishes state that might not be used. 424 | 425 | ### Staggered 426 | 427 | Staggered racing can be used whenever a single node of the tree has multiple child nodes. Based on the order determined when building the tree, the first child node will be initiated immediately, followed by the next child node after some delay. Once that second child node is initiated, the third child node (if present) will begin after another delay, and so on until all child nodes have been initiated, or one of the child nodes successfully completes its negotiation. 428 | 429 | Staggered racing attempts can proceed in parallel. Implementations should not terminate an earlier child connection attempt upon starting a secondary child. 430 | 431 | If a child node fails to establish connectivity (as in {{determining-successful-establishment}}) before the delay time has expired for the next child, the next child should be started immediately. 432 | 433 | Staggered racing between IP addresses for a generic Connection should follow the Happy Eyeballs algorithm described in {{!RFC8305}}. {{!RFC8421}} provides guidance for racing when performing Interactive Connectivity Establishment (ICE). 434 | 435 | Generally, the delay before starting a given child node ought to be based on the length of time the previously started child node is expected to take before it succeeds or makes progress in connection establishment. Algorithms like Happy Eyeballs choose a delay based on how long the transport connection handshake is expected to take. When performing staggered races in multiple branch types (such as racing between network interfaces, and then racing between IP addresses), a longer delay may be chosen for some branch types. For example, when racing between network interfaces, the delay should also take into account the amount of time it takes to prepare the network interface (such as radio association) and name resolution over that interface, in addition to the delay that would be added for a single transport connection handshake. 436 | 437 | Since the staggered delay can be chosen based on dynamic information, such as predicted Round Trip Time, implementations should define upper and lower bounds for delay times. These bounds are implementation-specific, and may differ based on which branch type is being used. 438 | 439 | ### Failover 440 | 441 | If an implementation or application has a strong preference for one branch over another, the branching node may choose to wait until one child has failed before starting the next. Failure of a leaf node is determined by its protocol negotiation failing or timing out; failure of a parent branching node is determined by all of its children failing. 442 | 443 | An example in which failover is recommended is a race between a preferred Protocol Stack that uses a proxy and an alternate Protocol Stack that bypasses the proxy. Failover is useful in case the proxy is down or misconfigured, but any more aggressive type of racing may end up unnecessarily avoiding a proxy that was preferred by policy. 444 | 445 | ## Completing Establishment 446 | 447 | The process of connection establishment completes when one leaf node of the tree has successfully completed negotiation with the Remote Endpoint, or else all nodes of the tree have failed to connect. The first leaf node to complete its connection is then used by the application to send and receive data. This is signalled to the application using the `Ready` event in the API ({{Section 7.1 of I-D.ietf-taps-interface}}). 448 | 449 | Successes and failures of a given attempt should be reported up to parent nodes (towards the root of the tree). For example, in the following case, if 1.1.1 fails to connect, it reports the failure to 1.1. Since 1.1 has no other child nodes, it also has failed and reports that failure to 1. Because 1.2 has not yet failed, 1 is not considered to have failed. Since 1.2 has not yet started, it is started and the process continues. Similarly, if 1.1.1 successfully connects, then it marks 1.1 as connected, which propagates to the root node 1. At this point, the Connection as a whole is considered to be successfully connected and ready to process application data. 450 | 451 | ~~~~~~~~~~ 452 | 1 [www.example.com:443, Any, TCP] 453 | 1.1 [www.example.com:443, Wi-Fi, TCP] 454 | 1.1.1 [192.0.2.1:443, Wi-Fi, TCP] 455 | 1.2 [www.example.com:443, LTE, TCP] 456 | ... 457 | ~~~~~~~~~~ 458 | 459 | If a leaf node has successfully completed its connection, all other attempts should be made ineligible for use by the application for the original request. 460 | New connection attempts that involve transmitting data on the network ought not to be started after another leaf node has already successfully completed, because the Connection as a whole has now been established. 461 | An implementation could choose to let certain handshakes and negotiations complete to gather metrics that influence future connections. 462 | Keeping additional connections is generally not recommended, because those attempts were slower to connect and may exhibit less desirable properties. 463 | 464 | ### Determining Successful Establishment {#determining-successful-establishment} 465 | 466 | On a per-protocol basis, implementations may select different criteria by which a leaf node is considered to be successfully connected. If the only protocol being used is a transport protocol with a clear handshake, like TCP, then the obvious choice is to declare that node "connected" when the three-way handshake has been completed. If the only protocol being used is an connectionless protocol, like UDP, the implementation may consider the node fully "connected" the moment it determines a route is present, before sending any packets on the network, see further {{connectionless-racing}}. 467 | 468 | When the `Initiate` action is called without any Messages being sent at the same time, depending on the 469 | protocols involved, it is not guaranteed that the Remote Endpoint will be notified of this, and hence a passive 470 | endpoint's application may not receive a `ConnectionReceived` event until it receives the first Message on the new Connection. 471 | 472 | For Protocol Stacks with multiple handshakes, the decision becomes more nuanced. If the Protocol Stack involves both TLS and TCP, an implementation could determine that a leaf node is connected after the TCP handshake is complete, or it can wait for the TLS handshake to complete as well. The benefit of declaring completion when the TCP handshake finishes, and thus stopping the race for other branches of the tree, is reduced burden on the network and Remote Endpoints from further connection attempts that are likely to be abandoned. On the other hand, by waiting until the TLS handshake is complete, an implementation avoids the scenario in which a TCP handshake completes quickly, but TLS negotiation is either very slow or fails altogether in particular network conditions or to a particular endpoint. To avoid the issue of TLS possibly failing, the implementation should not generate a `Ready` event for the Connection until the TLS handshake is complete. 473 | 474 | If all of the leaf nodes fail to connect during racing, i.e. none of the configurations that satisfy all requirements given in the Transport Properties actually work over the available paths, then the Transport Services system should report an `EstablishmentError` to the application. An `EstablishmentError` event should also be generated in case the Transport Services system finds no usable candidates to race. 475 | 476 | ## Establishing multiplexed connections {#establish-mux} 477 | 478 | Multiplexing several Connections over a single underlying transport connection requires that the Connections to be multiplexed belong to the same Connection Group (as is indicated by the application using the `Clone` action). When the underlying transport connection supports multi-streaming, the Transport Services System can map each Connection in the Connection Group to a different stream of this connection. 479 | 480 | For such streams, there is often no explicit connection 481 | establishment procedure for the new stream prior to sending data on it (e.g., with SCTP). In this case, the same 482 | considerations apply to determining stream establishment as apply to establishing a UDP connection, as 483 | discussed in {{determining-successful-establishment}}. 484 | This means that there might not 485 | be any "establishment" message (like a TCP SYN). 486 | 487 | ## Handling connectionless protocols {#connectionless-racing} 488 | 489 | While protocols that use an explicit handshake to validate a connection to a peer can be used for racing multiple establishment attempts in parallel, connectionless protocols such as raw UDP do not offer a way to validate the presence of a peer or the usability of a Connection without application feedback. An implementation should consider such a Protocol Stack to be established as soon as the Transport Services system has selected a path on which to send data. 490 | 491 | However, this can cause a problem if a specific peer is not reachable over the network using the connectionless protocol, or data cannot be exchanged with the peer for any other reason. To handle the lack of an explicit handshake in the underlying protocol, an application can use a Message Framer ({{message-framers}}) on top of a connectionless protocol to only mark a specific connection attempt as ready when some data has been received, or after some application-level handshake has been performed by the Message Framer. 492 | 493 | ## Implementing Listeners {#listen} 494 | 495 | When an implementation is asked to Listen, it registers with the system to wait for incoming traffic to the Local Endpoint. If no Local Endpoint Identifer is specified, the implementation should use an ephemeral port. 496 | 497 | If the Selection Properties do not require a single network interface or path, but allow the use of multiple paths, the Listener object should register for incoming traffic on all of the network interfaces or paths that conform to the Properties. The set of available paths can change over time, so the implementation should monitor network path changes, and change the registration of the Listener across all usable paths as appropriate. When using multiple paths, the Listener is generally expected to use the same port for listening on each. 498 | 499 | If the Selection Properties allow multiple protocols to be used for listening, and the implementation supports it, the Listener object should support receiving inbound connections for each eligible protocol on each eligible path. 500 | 501 | ### Implementing Listeners for Connected Protocols 502 | 503 | Connected protocols such as TCP and TLS-over-TCP have a strong mapping between the Local and Remote Endpoint Identifers (four-tuple) and their protocol connection state. These map into Connection objects. Whenever a new inbound handshake is being started, the Listener should generate a new Connection object and pass it to the application. 504 | 505 | ### Implementing Listeners for Connectionless Protocols 506 | 507 | Connectionless protocols such as UDP and UDP-lite generally do not provide the same mechanisms that connected protocols do to offer Connection objects. Implementations should wait for incoming packets for connectionless protocols on a listening port and should perform four-tuple matching of packets to existing Connection objects if possible. If a matching Connection object does not exist, an incoming packet from a connectionless protocol should cause a new Connection object to be created. 508 | 509 | 510 | ### Implementing Listeners for Multiplexed Protocols 511 | 512 | Protocols that provide multiplexing of streams can listen for entirely new connections as well as for new sub-connections (streams of an already existing connection). A new stream arrival on an existing connection is presented to the application as a new Connection. This new Connection is grouped with all other Connections that are multiplexed via the same protocol. 513 | 514 | # Implementing Sending and Receiving Data 515 | 516 | The most basic mapping for sending a Message is an abstraction of datagrams, in which the transport protocol naturally deals in discrete packets (such as UDP). Each Message here corresponds to a single datagram. 517 | 518 | For protocols that expose byte-streams (such as TCP), the only delineation provided by the protocol is the end of the stream in a given direction. Each Message in this case corresponds to the entire stream of bytes in a direction. These Messages may be quite long, in which case they can be sent in multiple parts. 519 | 520 | Protocols that provide framing (such as length-value protocols, or protocols that use delimiters like HTTP/1.1) may support Message sizes that do not fit within a single datagram. Each Message for framing protocols corresponds to a single frame, which may be sent either as a complete Message in the underlying protocol, or in multiple parts. 521 | 522 | Messages themselves generally consist of bytes passed in the messageData parameter intended to be processed at an application layer. However, Message objects presented through the API 523 | can carry associated Message Properties passed through the messageContext parameter. 524 | When these are Protocol Specific Properties, they can include metadata that exists separately from a byte 525 | encoding. For example, these Properties can include name-value pairs of information, like HTTP header fields. In such cases, Messages might be "empty", 526 | insofar as they contain zero bytes in the messageData parameter, but can still include data in the messageContext that is interpreted by the Protocol Stack. 527 | 528 | ## Sending Messages 529 | 530 | The effect of the application sending a Message is determined by the top-level protocol in the established Protocol Stack. That is, if the top-level protocol provides an abstraction of framed Messages over a connection, the receiving application will be able to obtain multiple Messages on that connection, even if the framing protocol is built on a byte-stream protocol like TCP. 531 | 532 | ### Message Properties {#msg-properties} 533 | 534 | The API allows various properties to be associated with each Message, which should be implemented as discussed below. 535 | 536 | - `msgLifetime`: this should be implemented by removing the Message from the queue of pending Messages after the Lifetime has expired. A queue of pending Messages within the Transport Services Implementation that have yet to be handed to the Protocol Stack can always support this property, but once a Message has been sent into the send buffer of a protocol, only certain protocols may support removing it from their send buffer. For example, a Transport Services Implementation cannot remove bytes from a TCP send buffer, while it can remove data from a SCTP send buffer using the partial reliability extension {{?RFC8303}}. When there is no standing queue of Messages within the system, and the Protocol Stack does not support the removal of a Message from the stack's send buffer, this property may be ignored. 537 | 538 | - `msgPriority`: this represents the ability to prioritize a Message over other Messages. This can be implemented by the Transport Services system by re-ordering Messages that have yet to be handed to the Protocol Stack, or by giving relative priority hints to protocols that support priorities per Message. For example, an implementation of HTTP/2 could choose to send Messages of different priority on streams of different priority. 539 | 540 | - `msgOrdered`: when this is false, this disables the requirement of in-order-delivery for protocols that support configurable ordering. When the Protocol Stack does not support configurable ordering, this property may be ignored. 541 | 542 | - `safelyReplayable`: when this is true, this means that the Message can be used by a transport mechanism that might deliver it multiple times -- e.g., as a result of racing multiple transports or as part of TCP Fast Open. Also, protocols that do not protect against duplicated Messages, such as UDP (when used directly, without a protocol layered atop), can only be used with Messages that are Safely Replayable. When a Transport Services system is permitted to replay Messages, replay protection could be provided by the application. 543 | 544 | - `final`: when this is true, this means that the sender will not send any further Messages. The Connection need not be closed (in case the Protocol Stack supports half-close operation, like TCP). Any Messages sent after a Message marked `final` will result in a SendError. 545 | 546 | - `msgChecksumLen`: when this is set to any value other than `Full Coverage`, it sets the minimum protection in protocols that allow limiting the checksum length (e.g. UDP-Lite). If the Protocol Stack does not support checksum length limitation, this property may be ignored. 547 | 548 | - `msgReliable`: When true, the property specifies that the Message must be reliably transmitted. When false, and if unreliable transmission is supported by the underlying protocol, then the Message should be unreliably transmitted. If the underlying 549 | protocol does not support unreliable transmission, the Message should be reliably transmitted. 550 | 551 | - `msgCapacityProfile`: When true, this expresses a wish to override the 552 | Generic Connection Property `connCapacityProfile` for this Message. Depending on the 553 | value, this can, for example, be implemented by changing the DSCP value of the 554 | associated packet (note that the guidelines in {{Section 6 of ?RFC7657}} apply; e.g., 555 | the DSCP value should not be changed for different packets within a reliable 556 | transport protocol session or DCCP connection). 557 | 558 | - `noFragmentation`: Setting this avoids network-layer fragmentation. Messages exceeding the transport’s current estimate of its maximum packet size (the `singularTransmissionMsgMaxLen` Connection Property) can result in transport segmentation when permitted, or generate an error. When used with transports running over IP version 4, the Don't Fragment bit should be set to avoid on-path IP fragmentation ({{!RFC8304}}). 559 | 560 | - `noSegmentation`: When set, this property limits the Message size to the transport’s current estimate of its maximum packet size (the `singularTransmissionMsgMaxLen` Connection Property). Messages larger than this size generate an error. Setting this avoids transport-layer segmentation and network-layer fragmentation. When used with transports running over IP version 4, the Don't Fragment bit should be set to avoid on-path IP fragmentation ({{!RFC8304}}). 561 | 562 | ### Send Completion 563 | 564 | The application should be notified (using a `Sent`, `Expired` or `SendError` event) whenever a Message or partial Message has been consumed by the Protocol Stack, or has failed to send. The time at which a Message is considered to have been consumed by the Protocol Stack may vary depending on the protocol. For example, for a basic datagram protocol like UDP, this may correspond to the time when the packet is sent into the interface driver. For a protocol that buffers data in queues, like TCP, this may correspond to when the data has entered the send buffer. The time at which a Message failed to send is when the Transport Services Implementation (including the Protocol Stack) has experienced a failure related to sending; this can depend on protocol-specific timeouts. 565 | 566 | ### Batching Sends 567 | 568 | Sending multiple Messages can incur high overhead if each needs to be enqueued separately (e.g., each Message might involve a context switch between the 569 | application and the Transport Services System). To avoid this, the application can indicate a batch of `Send` actions through the API. When this is used, 570 | the implementation can defer the processing of Messages until the batch is complete. 571 | 572 | ## Receiving Messages 573 | 574 | Similar to sending, receiving a Message is determined by the top-level protocol in the established Protocol Stack. The main difference with receiving is that the size and boundaries of the Message are not known beforehand. The application can communicate in its `Receive` action the parameters for the Message, which can help the Transport Services Implementation know how much data to deliver and when. For example, if the application only wants to receive a complete Message, the implementation should wait until an entire Message (datagram, stream, or frame) is read before delivering any Message content to the application. This requires the implementation to understand where Messages end, either via a supplied Message Framer or because the top-level protocol in the established Protocol Stack preserves message boundaries. The application can also control the flow of received data by specifying the minimum and maximum number of bytes of Message content it wants to receive at one time. 575 | 576 | If a Connection finishes before a requested `Receive` action can be satisfied, the Transport Services system should deliver any partial Message content outstanding, or if none is available, an indication that there will be no more received Messages. 577 | 578 | ## Handling of data for fast-open protocols {#fastopen} 579 | 580 | Several protocols allow sending higher-level protocol or application data during their protocol establishment, such as TCP Fast Open {{!RFC7413}} and TLS 1.3 {{!RFC8446}}. This approach is referred to as sending Zero-RTT (0-RTT) data. This is a desirable feature, but poses challenges to an implementation that uses racing during connection establishment. 581 | 582 | The application can express its preference for sending messagess as 0-RTT data by using the `zeroRttMsg` Selection Property on the Preconnection. Then, the application can provide the message to send as 0-RTT data via the `InitiateWithSend` action. In order to be sent as 0-RTT data, the message needs to be marked with the `safelyReplayable` send paramteter. In general, 0-RTT data may be replayed (for example, if a TCP SYN contains data, and the SYN is retransmitted, the data will be retransmitted as well but may be considered as a new connection instead of a retransmission). When racing connections, different leaf nodes have the opportunity to send the same data independently. If data is truly safely replayable, this is permissible. 583 | 584 | Once the application has provided its 0-RTT data, a Transport Services Implementation should keep a copy of this data and provide it to each new leaf node that is started and for which a protocol instance supporting 0-RTT is being used. Note that the amount of data that can actually be sent as 0-RTT data varies by protocol, so any given Protocol Stack might only consume part of the saved data prior to becoming established. The implementation needs to keep track of how much data a particular Protocol Stack has consumed, and ensure that any pending 0-RTT-eligible data from the application is handled before subsequent Messages. 585 | 586 | It is also possible for Protocol Stacks within a particular leaf node to use a 0-RTT handshakes in a lower-level protocol without any safely replayable application data if a higher-level protocol in the stack has idempotent handshake data to send. For example, TCP Fast Open could use a Client Hello from TLS as its 0-RTT data, without any data being provided by the application. 587 | 588 | 0-RTT handshakes often rely on previous state, such as TCP Fast Open cookies, previously established TLS tickets, or out-of-band distributed pre-shared keys (PSKs). Implementations should be aware of security concerns around using these tokens across multiple addresses or paths when racing. In the case of TLS, any given ticket or PSK should only be used on one leaf node, since servers will likely reject duplicate tickets in order to prevent replays (see {{Section 8.1 of ?RFC8446}}). If implementations have multiple tickets available from a previous connection, each leaf node attempt can use a different ticket. In effect, each leaf node will send the same early application data, yet encoded (encrypted) differently on the wire. 589 | 590 | # Implementing Message Framers {#message-framers} 591 | 592 | Message Framers are functions that define 593 | simple transformations between application Message data and raw transport 594 | protocol data. Generally, a Message Framer implements a simple 595 | application protocol that can either be provided by the Transport Services 596 | implementation or by the application. It is optional for Transport Services system implementations to provide Message Framers: the specification {{I-D.ietf-taps-interface}} does not prescribe any particular Message Framers to be implemented. 597 | A Framer can encapsulate or encode outbound Messages, 598 | decapsulate or decode inbound data into Messages, and implement parts of 599 | protocols that do not directly map to application Messages (such as 600 | protocol handshakes or preludes before Message exchange). 601 | 602 | While many protocols can be represented as Message Framers, for the 603 | purposes of the Transport Services API, these are ways for applications 604 | or application frameworks to define their own Message parsing to be 605 | included within a Connection's Protocol Stack. As an example, TLS 606 | is a protocol that is by default built into the Transport Services 607 | API, even though it could also serve the purpose of framing data over TCP. 608 | 609 | Most Message Framers fall into one of two categories: 610 | 611 | - Header-prefixed record formats, such as a basic Type-Length-Value (TLV) structure 612 | 613 | - Delimiter-separated formats, such as HTTP/1.1 614 | 615 | Common Message Framers can be provided by a Transport Services Implementation, 616 | but an implementation ought to allow custom Message Framers to be defined by 617 | the application or some other piece of software. This section describes one 618 | possible API for defining Message Framers, as an example. 619 | 620 | ## Defining Message Framers 621 | 622 | A Message Framer is primarily defined by the code that handles events 623 | for a framer implementation, specifically how it handles inbound and outbound data 624 | parsing. The function that implements custom framing logic will be referred to 625 | as the "framer implementation", which may be provided by a Transport Services 626 | implementation or the application itself. The Message Framer refers to the object 627 | or function within the main Connection implementation that delivers events 628 | to the custom framer implementation whenever data is ready to be parsed or framed. 629 | 630 | The API examples in this section use the notation conventions for the Transport 631 | Services API defined in {{Section 1.1 of I-D.ietf-taps-interface}}. 632 | 633 | The Transport Services Implementation needs to ensure that all of the 634 | events and actions taken on a Message Framer are synchronized to ensure 635 | consistent behavior. For example, some of the actions defined below (such as 636 | PrependFramer and StartPassthrough) modify how data flows in a protocol 637 | stack, and require synchronization with sending and parsing data in the 638 | Message Framer. 639 | 640 | When a Connection establishment attempt begins, an event can be delivered to 641 | notify the framer implementation that a new Connection is being created. 642 | Similarly, a stop event can be delivered when a Connection is being torn down. 643 | The framer implementation can use the Connection object to look up specific 644 | properties of the Connection or the network being used that may influence how 645 | to frame Messages. 646 | 647 | ~~~ 648 | MessageFramer -> Start 649 | MessageFramer -> Stop 650 | ~~~ 651 | 652 | When a Message Framer generates a `Start` event, the framer implementation 653 | has the opportunity to start writing some data prior to the Connection delivering 654 | its `Ready` event. This allows the implementation to communicate control data to the 655 | Remote Endpoint that can be used to parse Messages. 656 | 657 | Once the framer implementation has completed its setup or handshake, it can indicate to 658 | the application that it is ready for handling data with this call. 659 | 660 | ~~~ 661 | MessageFramer.MakeConnectionReady(connection) 662 | ~~~ 663 | 664 | Similarly, when a Message Framer generates a `Stop` event, the framer implementation has the opportunity to write some final data or clear up its local state before the `Closed` event is delivered to the Application. The framer implementation can indicate that it has finished with this call. 665 | 666 | ~~~ 667 | MessageFramer.MakeConnectionClosed(connection) 668 | ~~~ 669 | 670 | At any time if the implementation encounters a fatal error, it can also cause the Connection 671 | to fail and provide an error. 672 | 673 | ~~~ 674 | MessageFramer.FailConnection(connection, error) 675 | ~~~ 676 | 677 | Should the framer implementation deem the candidate selected during racing unsuitable, it can signal this to the Transport Services API by failing the Connection prior to marking it as ready. 678 | If there are no other candidates available, the Connection will fail. Otherwise, the Connection will select a different candidate and the Message Framer will generate a new `Start` event. 679 | 680 | Before an implementation marks a Message Framer as ready, it can also dynamically 681 | add a protocol or framer above it in the stack. This allows protocols that need to add TLS conditionally, 682 | like STARTTLS {{?RFC3207}}, to modify the Protocol Stack based on a handshake result. 683 | 684 | ~~~ 685 | otherFramer := NewMessageFramer() 686 | MessageFramer.PrependFramer(connection, otherFramer) 687 | ~~~ 688 | 689 | A Message Framer might also choose to go into a passthrough mode once an initial exchange or handshake has been completed, such as the STARTTLS case mentioned above. 690 | This can also be useful for proxy protocols like SOCKS {{?RFC1928}} or HTTP CONNECT {{?RFC7230}}. In such cases, a Message Framer implementation can intercept 691 | sending and receiving of Messages at first, but then indicate that no more processing is needed. 692 | 693 | ~~~ 694 | MessageFramer.StartPassthrough() 695 | ~~~ 696 | 697 | ## Sender-side Message Framing {#send-framing} 698 | 699 | Message Framers generate an event whenever a Connection sends a new Message. The parameters to the event 700 | align with the `Send` action in the API ({{Section 9.2 of I-D.ietf-taps-interface}}). 701 | 702 | ~~~ 703 | MessageFramer 704 | | 705 | V 706 | NewSentMessage 707 | ~~~ 708 | 709 | Upon receiving this event, a framer implementation is responsible for 710 | performing any necessary transformations and sending the resulting data back to the Message Framer, which will in turn send it to the next protocol. 711 | To improve performance, implementations should ensure that there is a way to pass the original data 712 | through without copying. 713 | 714 | ~~~ 715 | MessageFramer.Send(connection, messageData) 716 | ~~~ 717 | 718 | To provide an example, a simple protocol that adds the length of the Message data as a header would receive 719 | the `NewSentMessage` event, create a data representation of the length of the Message 720 | data, and then send a block of data that is the concatenation of the length header and the original 721 | Message data. 722 | 723 | ## Receiver-side Message Framing {#receive-framing} 724 | 725 | In order to parse a received flow of data into Messages, the Message Framer 726 | notifies the framer implementation whenever new data is available to parse. 727 | 728 | The parameters to the events and calls for receiving data with a framer 729 | align with the `Receive` action in the API ({{Section 9.3 of I-D.ietf-taps-interface}}). 730 | 731 | ~~~ 732 | MessageFramer -> HandleReceivedData 733 | ~~~ 734 | 735 | Upon receiving this event, the framer implementation can inspect the inbound data. The 736 | data is parsed from a particular cursor representing the unprocessed data. The 737 | application requests a specific amount of data it needs to have available in order to parse. 738 | If the data is not available, the parse fails. 739 | 740 | ~~~ 741 | MessageFramer.Parse(connection, minimumIncompleteLength, maximumLength) 742 | | 743 | V 744 | (messageData, messageContext, endOfMessage) 745 | ~~~ 746 | 747 | The framer implementation can directly advance the receive cursor once it has 748 | parsed data to effectively discard data (for example, discard a header 749 | once the content has been parsed). 750 | 751 | To deliver a Message to the application, the framer implementation can either directly 752 | deliver data that it has allocated, or deliver a range of data directly from the underlying 753 | transport and simultaneously advance the receive cursor. 754 | 755 | ~~~ 756 | MessageFramer.AdvanceReceiveCursor(connection, length) 757 | MessageFramer.DeliverAndAdvanceReceiveCursor(connection, messageContext, length, endOfMessage) 758 | MessageFramer.Deliver(connection, messageContext, messageData, endOfMessage) 759 | ~~~ 760 | 761 | Note that `MessageFramer.DeliverAndAdvanceReceiveCursor` allows the framer implementation 762 | to earmark bytes as part of a Message even before they are received by the transport. This allows the delivery 763 | of very large Messages without requiring the implementation to directly inspect all of the bytes. 764 | 765 | To provide an example, a simple protocol that parses the length of the Message data as a header value would 766 | receive the `HandleReceivedData` event, and call `Parse` with a minimum and maximum 767 | set to the length of the header field. Once the parse succeeded, it would call 768 | `AdvanceReceiveCursor` with the length of the header field, and then call 769 | `DeliverAndAdvanceReceiveCursor` with the length of the body that was parsed from 770 | the header, marking the new Message as complete. 771 | 772 | # Implementing Connection Management 773 | 774 | Once a Connection is established, the Transport Services API allows applications to interact with the Connection by modifying or inspecting 775 | Connection Properties. A Connection can also generate error events in the form of `SoftError` events. 776 | 777 | The set of Connection Properties that are supported for setting and getting on a Connection are described in {{I-D.ietf-taps-interface}}. For 778 | any properties that are generic, and thus could apply to all protocols being used by a Connection, the Transport Services Implementation should store the properties 779 | in storage common to all protocols, and notify the Protocol Stack as a whole whenever the properties have been modified by the application. {{!RFC8303}} and {{!RFC8304}} offer guidance on how to do this for TCP, MPTCP, SCTP, UDP and UDP-Lite; see {{specific-protocol-considerations}} for a description of a back-tracking method to find the relevant protocol primitives using these documents. 780 | For Protocol-specific Properties, such as the User Timeout that applies to TCP, the Transport Services Implementation only needs to update the relevant protocol instance. 781 | 782 | Some Connection Properties might apply to multiple protocols within a Protocol Stack. Depending on the specific property, 783 | it might be appropriate to apply the property across multiple protocols simultaneously, or else only apply it to one protocol. 784 | In general, the Transport Services Implementation should allow the protocol closest to the application to interpret 785 | Connection Properties, and potentially modify the set of Connection Properties passed down to the next protocol in the 786 | stack. For example, if the application has requested to use keepalives with the `keepAlive` property, and the Protocol 787 | Stack contains both HTTP/2 and TCP, the HTTP/2 protocol can choose to enable its own keepalives to satisfy the application 788 | request, and disable TCP-level keepalives. For cases where the application needs to have fine-grained per-protocol control, 789 | the Transport Services Implementation can expose Protocol-specific Properties. 790 | 791 | If an error is encountered in setting a property (for example, if the application tries to set a TCP-specific property on a Connection that is 792 | not using TCP), the action must fail gracefully. The application must be informed of the error, but the Connection itself must not be terminated. 793 | 794 | When protocol instances in the Protocol Stack report generic or protocol-specific 795 | errors, the API will deliver them to the application as `SoftError` events. These allow the application to be informed of ICMP errors, and other similar events. 796 | 797 | ## Pooled Connection {#pooled-connections} 798 | 799 | For applications that do not need in-order delivery of Messages, the Transport Services Implementation may distribute Messages of a single Connection across several underlying transport connections or multiple streams of multi-streaming connections between endpoints, as long as all of these satisfy the Selection Properties. 800 | The Transport Services Implementation will then hide this connection management and only expose a single Connection object, which we here call a "Pooled Connection". This is in contrast to Connection Groups, which explicitly expose combined treatment of Connections, giving the application control over multiplexing, for example. 801 | 802 | Pooled Connections can be useful when the application using the Transport Services system implements a protocol such as HTTP, which employs request/response pairs and does not require in-order delivery of responses. 803 | This enables implementations of Transport Services systems to realize transparent connection coalescing, connection migration, and to perform per-message endpoint and path selection by choosing among multiple underlying connections. 804 | 805 | ## Handling Path Changes 806 | 807 | When a path change occurs, e.g., when the IP address of an interface changes or a new interface becomes available, the Transport Services Implementation is responsible for notifying the Protocol Instance of the change. The path change may interrupt connectivity on a path for an active Connection or provide an opportunity for a transport that supports multipath or migration to adapt to the new paths. Note that, in the model of the Transport Services API, migration is considered a part of multipath connectivity; it is just a limiting policy on multipath usage. If the `multipath` Selection Property is set to `Disabled`, migration is disallowed. 808 | 809 | For protocols that do not support multipath or migration, the Protocol Instances should be informed of the path change, but should not be forcibly disconnected if the previously used path becomes unavailable. There are many common usage scenarios that can lead to a path becoming temporarily unavailable, and then recovering before the transport protocol reaches a timeout error. These are particularly common using mobile devices. Examples include: an Ethernet cable becoming unplugged and then plugged back in; a device losing a Wi-Fi signal while a user is in an elevator, and reattaching when the user leaves the elevator; and a user losing the radio signal while riding a train through a tunnel. If the device is able to rejoin a network with the same IP address, a stateful transport connection can generally resume. Thus, while it is useful for a Protocol Instance to be aware of a temporary loss of connectivity, the Transport Services Implementation should not aggressively close Connections in these scenarios. 810 | 811 | If the Protocol Stack includes a transport protocol that supports multipath connectivity, the Transport Services Implementation should also inform the Protocol Instance about potentially new paths that become permissible based on the `multipath` Selection Property and the `multipathPolicy` Connection Property choices made by the application. 812 | A protocol can then establish new subflows over new paths while an active path is still available or, if migration is supported, also after a break has been detected, and should attempt to tear down subflows over paths that are no longer used. The Connection Property `multipathPolicy` of the Transport Services API 813 | allows an application to indicate when and how different paths should be used. However, detailed handling of these policies is implementation-specific. 814 | For example, if the `multipath` Selection Property is set to `active`, the decision about when to create a new path or to announce a new path or set of paths to the Remote Endpoint, e.g., in the form of additional IP addresses, is implementation-specific. 815 | If the Protocol Stack includes a transport protocol that does not support multipath, but does support migrating between paths, the update to the set of available paths can trigger the connection to be migrated. 816 | 817 | In the case of a Pooled Connection {{pooled-connections}}, the Transport Services Implementation may add connections over new paths to the pool if permissible based on the multipath policy and Selection Properties. 818 | In the case that a previously used path becomes unavailable, the Transport Services system may disconnect all connections that require this path, but should not disconnect the pooled Connection object exposed to the application. 819 | The strategy to do so is implementation-specific, but should be consistent with the behavior of multipath transports. 820 | 821 | # Implementing Connection Termination 822 | 823 | For `Close` (which leads to a `Closed` event) and `Abort` (which leads to a `ConnectionError` event), 824 | the application might find it useful to be informed when a peer closes or aborts a 825 | Connection. Whether this is possible depends on the underlying protocol, and no guarantees 826 | can be given. When an underlying transport connection supports multi-streaming (such as SCTP), the Transport Services system can use a stream reset procedure to cause a Finish event upon a `Close` action from the peer {{NEAT-flow-mapping}}. 827 | 828 | # Cached State 829 | 830 | Beyond a single Connection's lifetime, it is useful for an implementation to keep state and history. This cached 831 | state can help improve future Connection establishment due to re-using results and credentials, and favoring paths and protocols that performed well in the past. 832 | 833 | Cached state may be associated with different endpoints for the same Connection, depending on the protocol generating the cached content. 834 | For example, session tickets for TLS are associated with specific endpoints, and thus should be cached based on a connection's 835 | hostname Endpoint Identifer (if applicable). However, performance characteristics of a path are more likely tied to the IP address 836 | and subnet being used. 837 | 838 | ## Protocol state caches 839 | 840 | Some protocols will have long-term state to be cached in association with endpoints. This state often has some time after which 841 | it is expired, so the implementation should allow each protocol to specify an expiration for cached content. 842 | 843 | Examples of cached protocol state include: 844 | 845 | - The DNS protocol can cache resolved addresses (such as those retrieved from A and AAAA queries), associated with a Time To Live (TTL) to 846 | be used for future hostname resolutions without requiring asking the DNS resolver again. 847 | - TLS caches session state and tickets based on a hostname, which can be used for resuming sessions with a server. 848 | - TCP can cache cookies for use in TCP Fast Open. 849 | 850 | Cached protocol state is primarily used during Connection establishment for a single Protocol Stack, but may be used to influence an 851 | implementation's preference between several candidate Protocol Stacks. For example, if two IP address Endpoint Identifers are otherwise 852 | equally preferred, an implementation may choose to attempt a connection to an address for which it has a TCP Fast Open cookie. 853 | 854 | Applications can use the Transport Services API to request that a Connection Group maintain a separate cache for 855 | protocol state. Connections in the group will not use cached state 856 | from Connections outside the group, and Connections outside the group will not 857 | use state cached from Connections inside the group. This may be necessary, for 858 | example, if application-layer identifiers rotate and clients wish to avoid 859 | linkability via trackable TLS tickets or TFO cookies. 860 | 861 | ## Performance caches 862 | 863 | In addition to protocol state, Protocol Instances should provide data into a performance-oriented cache to help guide future protocol and path selection. Some performance information can be gathered generically across several protocols to allow predictive comparisons between protocols on given paths: 864 | 865 | - Observed Round Trip Time 866 | - Connection establishment latency 867 | - Connection establishment success rate 868 | 869 | These items can be cached on a per-address and per-subnet granularity, and averaged between different values. The information should be cached on a per-network basis, since it is expected that different network attachments will have different performance characteristics. Besides Protocol Instances, other system entities may also provide data into performance-oriented caches. This could for instance be signal strength information reported by radio modems like Wi-Fi and mobile broadband or information about the battery-level of the device. Furthermore, the system may cache the observed maximum throughput on a path as an estimate of the available bandwidth. 870 | 871 | An implementation should use this information, when possible, to influence preference between candidate paths, endpoints, and protocol options. Eligible options that historically had significantly better performance than others should be selected first when gathering candidates (see {{gathering}}) to ensure better performance for the application. 872 | 873 | The reasonable lifetime for cached performance values will vary depending on the nature of the value. Certain information, like the connection establishment success rate to a Remote Endpoint using a given Protocol Stack, can be stored for a long period of time (hours or longer), since it is expected that the capabilities of the Remote Endpoint are not changing very quickly. On the other hand, the Round Trip Time observed by TCP over a particular network path may vary over a relatively short time interval. For such values, the implementation should remove them from the cache more quickly, or treat older values with less confidence/weight. 874 | 875 | {{?RFC9040}} provides guidance about sharing of TCP Control Block information between connections on initialization. 876 | 877 | # Specific Transport Protocol Considerations {#specific-protocol-considerations} 878 | 879 | Each protocol that is supported by a Transport Services Implementation should have a well-defined API mapping. 880 | API mappings for a protocol are important for Connections in which a given protocol is the "top" of the Protocol Stack. 881 | For example, the mapping of the `Send` function for TCP applies to Connections in which the application directly sends over TCP. 882 | 883 | Each protocol has a notion of Connectedness. Possible definitions of 884 | Connectedness for various types of protocols are: 885 | 886 | - Connectionless. Connectionless protocols do not establish explicit state between endpoints, and do not perform a handshake during Connection establishment. 887 | - Connected. Connected (also called "connection-oriented") protocols establish state between endpoints, and perform a handshake during connection establishment. The handshake may be 0-RTT to send data or resume a session, but bidirectional traffic is required to confirm connectedness. 888 | - Multiplexing Connected. Multiplexing Connected protocols share properties with Connected protocols, but also explictly support opening multiple application-level flows. This means that they can support cloning new Connection objects without a new explicit handshake. 889 | 890 | Protocols also have a notion of Data Unit. Possible values for Data Unit are: 891 | 892 | - Byte-stream. Byte-stream protocols do not define any message boundaries of their own apart from the end of a stream in each direction. 893 | - Datagram. Datagram protocols define message boundaries at the same level of transmission, such that only complete (not partial) messages are supported. 894 | - Message. Message protocols support message boundaries that can be sent and received either as complete or partial messages. Maximum message lengths can be defined, and messages can be partially reliable. 895 | 896 | Below, terms in capitals with a dot (e.g., "CONNECT.SCTP") refer to the primitives with the same name in {{Section 4 of !RFC8303}}. For further implementation details, the description of these primitives in {{!RFC8303}} points to {{Section 3 of !RFC8303}} and {{Section 3 of !RFC8304}}, which refers back to the relevant specifications for each protocol. This back-tracking method applies to all elements of {{!RFC8923}} (see appendix D of {{I-D.ietf-taps-interface}}): they are listed in appendix A of {{!RFC8923}} with an implementation hint in the same style, pointing back to {{Section 4 of !RFC8303}}. 897 | 898 | This document presents the protocol mappings defined in {{!RFC8923}}. Other protocol mappings can be provided as separate documents, following the mapping template in {{appendix-mapping-template}}. 899 | 900 | ## TCP {#tcp} 901 | 902 | Connectedness: Connected 903 | 904 | Data Unit: Byte-stream 905 | 906 | Connection Object: 907 | : TCP connections between two hosts map directly to Connection objects. 908 | 909 | Initiate: 910 | : CONNECT.TCP. Calling `Initiate` on a TCP Connection causes it to reserve a local port, and send a SYN to the Remote Endpoint. 911 | 912 | InitiateWithSend: 913 | : CONNECT.TCP with parameter `user message`. Early safely replayable data is sent on a TCP Connection in the SYN, as TCP Fast Open data. 914 | 915 | Ready: 916 | : A TCP Connection is ready once the three-way handshake is complete. 917 | 918 | EstablishmentError: 919 | : Failure of CONNECT.TCP. TCP can throw various errors during connection setup. Specifically, it is important to handle a RST being sent by the peer during the handshake. 920 | 921 | ConnectionError: 922 | : Once established, TCP throws errors whenever the connection is disconnected, such as due to receiving a RST from the peer. 923 | 924 | Listen: 925 | : LISTEN.TCP. Calling `Listen` for TCP binds a local port and prepares it to receive inbound SYN packets from peers. 926 | 927 | ConnectionReceived: 928 | : TCP Listeners will deliver new connections once they have replied to an inbound SYN with a SYN-ACK. 929 | 930 | Clone: 931 | : Calling `Clone` on a TCP Connection creates a new Connection with equivalent parameters. These Connections, and Connections generated via later calls to `Clone` on an Established Connection, form a Connection Group. To realize entanglement for these Connections, with the exception of `connPriority`, changing a Connection Property on one of them must affect the Connection Properties of the others too. No guarantees of honoring the Connection Property `connPriority` are given, and thus it is safe for an implementation of a Transport Services system to ignore this property. When it is reasonable to assume that Connections traverse the same path (e.g., when they share the same encapsulation), support for it can also experimentally be implemented using a congestion control coupling mechanism (see for example {{TCP-COUPLING}} or {{?RFC3124}}). 932 | 933 | Send: 934 | : SEND.TCP. TCP does not on its own preserve message boundaries. Calling `Send` on a TCP connection lays out the bytes on the TCP send stream without any other delineation. Any Message marked as Final will cause TCP to send a FIN once the Message has been completely written, by calling CLOSE.TCP immediately upon successful termination of SEND.TCP. Note that transmitting a Message marked as Final should not cause the `Closed` event to be delivered to the application, as it will still be possible to receive data until the peer closes or aborts the TCP connection. 935 | 936 | Receive: 937 | : With RECEIVE.TCP, TCP delivers a stream of bytes without any Message delineation. All data delivered in the `Received` or `ReceivedPartial` event will be part of a single stream-wide Message that is marked Final (unless a Message Framer is used). EndOfMessage will be delivered when the TCP Connection has received a FIN (CLOSE-EVENT.TCP) from the peer. Note that reception of a FIN should not cause the `Closed` event to be delivered to the application, as it will still be possible for the application to send data. 938 | 939 | Close: 940 | : Calling `Close` on a TCP Connection indicates that the Connection should be gracefully closed (CLOSE.TCP) by sending a FIN to the peer. It will then still be possible to receive data until the peer closes or aborts the TCP connection. The `Closed` event will be issued upon reception of a FIN. 941 | 942 | Abort: 943 | : Calling `Abort` on a TCP Connection indicates that the Connection should be immediately closed by sending a RST to the peer (ABORT.TCP). 944 | 945 | CloseGroup: 946 | : Calling `CloseGroup` on a TCP Connection (CLOSE.TCP) is identical to calling `Close` on this Connection and on all Connections in the same ConnectionGroup. 947 | 948 | AbortGroup: 949 | : Calling `AbortGroup` on a TCP Connection (ABORT.TCP) is identical to calling `Abort` on this Connection and on all Connections in the same ConnectionGroup. 950 | 951 | ## MPTCP 952 | 953 | Connectedness: Connected 954 | 955 | Data Unit: Byte-stream 956 | 957 | The Transport Services API mappings for MPTCP are identical to TCP. MPTCP adds support for multipath properties, 958 | such as `multipath` and `multipathPolicy`, and actions for managing paths, such as `AddRemote` and `RemoveRemote`. 959 | 960 | ## UDP 961 | 962 | Connectedness: Connectionless 963 | 964 | Data Unit: Datagram 965 | 966 | Connection Object: 967 | : UDP Connections represent a pair of specific IP addresses and ports on two hosts. 968 | 969 | Initiate: 970 | : CONNECT.UDP. Calling `Initiate` on a UDP Connection causes it to reserve a local port, but does not generate any traffic. 971 | 972 | InitiateWithSend: 973 | : Early data on a UDP Connection does not have any special meaning. The data is sent whenever the Connection is `Ready`. 974 | 975 | Ready: 976 | : A UDP Connection is ready once the system has reserved a local port and has a path to send to the Remote Endpoint. 977 | 978 | EstablishmentError: 979 | : UDP Connections can only generate errors on initiation due to port conflicts on the local system. 980 | 981 | ConnectionError: 982 | : UDP Connections can only generate Connection errors in response to `Abort` calls. (Once in use, UDP Connections can also generate `SoftError` events (ERROR.UDP) upon receiving ICMP notifications indicating failures in the network.) 983 | 984 | Listen: 985 | : LISTEN.UDP. Calling `Listen` for UDP binds a local port and prepares it to receive inbound UDP datagrams from peers. 986 | 987 | ConnectionReceived: 988 | : UDP Listeners will deliver new connections once they have received traffic from a new Remote Endpoint. 989 | 990 | Clone: 991 | : Calling `Clone` on a UDP Connection creates a new Connection with equivalent parameters. The two Connections are otherwise independent. 992 | 993 | Send: 994 | : SEND.UDP. Calling `Send` on a UDP connection sends the data as the payload of a complete UDP datagram. Marking Messages as Final does not change anything in the datagram's contents. Upon sending a UDP datagram, some relevant fields and flags in the IP header can be controlled: DSCP (SET_DSCP.UDP), DF in IPv4 (SET_DF.UDP) and ECN flag (SET_ECN.UDP). 995 | 996 | Receive: 997 | : RECEIVE.UDP. UDP only delivers complete Messages to `Received`, each of which represents a single datagram received in a UDP packet. Upon receiving a UDP datagram, the ECN flag from the IP header can be obtained (GET_ECN.UDP). 998 | 999 | Close: 1000 | : Calling `Close` on a UDP Connection (ABORT.UDP) releases the local port reservation. The Connection then issues a `Closed` event. 1001 | 1002 | Abort: 1003 | : Calling `Abort` on a UDP Connection (ABORT.UDP) is identical to calling `Close`, except that the Connection will send a `ConnectionError` event rather than a `Closed` event. 1004 | 1005 | CloseGroup: 1006 | : Calling `CloseGroup` on a UDP Connection (ABORT.UDP) is identical to calling `Close` on this Connection and on all Connections in the same ConnectionGroup. 1007 | 1008 | AbortGroup: 1009 | : Calling `AbortGroup` on a UDP Connection (ABORT.UDP) is identical to calling `Close` on this Connection and on all Connections in the same ConnectionGroup. 1010 | 1011 | ## UDP-Lite 1012 | 1013 | Connectedness: Connectionless 1014 | 1015 | Data Unit: Datagram 1016 | 1017 | The Transport Services API mappings for UDP-Lite are identical to UDP. In addition, 1018 | UDP-Lite supports the `msgChecksumLen` and `recvChecksumLen` Properties 1019 | that allow an application to specify the minimum number of bytes in a Message that 1020 | need to be covered by a checksum. 1021 | 1022 | This includes: CONNECT.UDP-Lite; LISTEN.UDP-Lite; SEND.UDP-Lite; RECEIVE.UDP-Lite; ABORT.UDP-Lite; ERROR.UDP-Lite; SET_DSCP.UDP-Lite; SET_DF.UDP-Lite; SET_ECN.UDP-Lite; GET_ECN.UDP-Lite. 1023 | 1024 | ## UDP Multicast Receive 1025 | 1026 | Connectedness: Connectionless 1027 | 1028 | Data Unit: Datagram 1029 | 1030 | Connection Object: 1031 | : Established UDP Multicast Receive connections represent a pair of specific IP addresses and ports. The `direction` Selection Property must be set to `unidirectional receive`, and the Local Endpoint must be configured with a group IP address and a port. 1032 | 1033 | Initiate: 1034 | : Calling `Initiate` on a UDP Multicast Receive Connection causes an immediate `EstablishmentError`. This is an unsupported operation. 1035 | 1036 | InitiateWithSend: 1037 | : Calling `InitiateWithSend` on a UDP Multicast Receive Connection causes an immediate `EstablishmentError`. This is an unsupported operation. 1038 | 1039 | Ready: 1040 | : A UDP Multicast Receive Connection is ready once the system has received traffic for the appropriate group and port. 1041 | 1042 | EstablishmentError: 1043 | : UDP Multicast Receive Connections generate an `EstablishmentError` indicating that joining a multicast group failed if `Initiate` is called. 1044 | 1045 | ConnectionError: 1046 | : The only `ConnectionError` generated by a UDP Multicast Receive Connection is in response to an `Abort` call. 1047 | 1048 | Listen: 1049 | : LISTEN.UDP. Calling `Listen` for UDP Multicast Receive binds a local port, prepares it to receive inbound UDP datagrams from peers, and issues a multicast host join. If a Remote Endpoint Identifer with an address is supplied, the join is Source-specific Multicast, and the path selection is based on the route to the Remote Endpoint. If a Remote Endpoint Identifer is not supplied, the join is Any-source Multicast, and the path selection is based on the outbound route to the group supplied in the Local Endpoint. 1050 | 1051 | There are cases where it is required to open multiple connections for the same address(es). 1052 | For example, one Connection might be opened for a multicast group to for a multicast control bus, 1053 | and another application later opens a separate Connection to the same group to send signals to and/or receive signals from the common bus. 1054 | In such cases, the Transport Services system needs to explicitly enable re-use of the same set of addresses (equivalent to setting SO_REUSEADDR 1055 | in the socket API). 1056 | 1057 | ConnectionReceived: 1058 | : UDP Multicast Receive Listeners will deliver new Connections once they have received traffic from a new Remote Endpoint. 1059 | 1060 | Clone: 1061 | : Calling `Clone` on a UDP Multicast Receive Connection creates a new Connection with equivalent parameters. The two Connections are otherwise independent. 1062 | 1063 | Send: 1064 | : SEND.UDP. Calling `Send` on a UDP Multicast Receive connection causes an immediate `SendError`. This is an unsupported operation. 1065 | 1066 | Receive: 1067 | : RECEIVE.UDP. The `Receive` operation in a UDP Multicast Receive connection only delivers complete Messages to `Received`, each of which represents a single datagram received in a UDP packet. Upon receiving a UDP datagram, the ECN flag from the IP header can be obtained (GET_ECN.UDP). 1068 | 1069 | Close: 1070 | : Calling `Close` on a UDP Multicast Receive Connection (ABORT.UDP) releases the local port reservation and leaves the group. The Connection then issues a `Closed` event. 1071 | 1072 | Abort: 1073 | : Calling `Abort` on a UDP Multicast Receive Connection (ABORT.UDP) is identical to calling `Close`, except that the Connection will send a `ConnectionError` event rather than a `Closed` event. 1074 | 1075 | CloseGroup: 1076 | : Calling `CloseGroup` on a UDP Multicast Receive Connection (ABORT.UDP) is identical to calling `Close` on this Connection and on all Connections in the same ConnectionGroup. 1077 | 1078 | AbortGroup: 1079 | : Calling `AbortGroup` on a UDP Multicast Receive Connection (ABORT.UDP) is identical to calling `Close` 1080 | on this Connection and on all Connections in the same ConnectionGroup. 1081 | 1082 | ## SCTP 1083 | 1084 | Connectedness: Connected 1085 | 1086 | Data Unit: Message 1087 | 1088 | Connection Object: 1089 | : Connection objects can be mapped to an SCTP association or a stream in an SCTP association. Mapping Connection objects to SCTP streams is called "stream mapping" and has additional requirements as follows. The following explanation assumes a client-server communication model. 1090 | 1091 | Stream mapping requires an association to already be in place between the client and the server, and it requires the server to understand that a new incoming stream should be represented as a new Connection object by the Transport Services system. A new SCTP stream is created by sending an SCTP message with a new stream id. Thus, to implement stream mapping, the Transport Services API must provide a newly created Connection object to the application upon the reception of such a message. The necessary semantics to implement a Transport Services system's `Close` and `Abort` primitives are provided by the stream reconfiguration (reset) procedure described in {{?RFC6525}}. This also allows to re-use a stream id after resetting ("closing") the stream. To implement this functionality, SCTP stream reconfiguration {{?RFC6525}} must be supported by both the client and the server side. 1092 | 1093 | To avoid head-of-line blocking, stream mapping should only be implemented when both sides support message interleaving {{?RFC8260}}. This allows a sender to schedule transmissions between multiple streams without risking that transmission of a large message on one stream might block transmissions on other streams for a long time. 1094 | 1095 | To avoid conflicts between stream ids, the following procedure is recommended: the first Connection, for which the SCTP association has been created, must always use stream id zero. All additional Connections are assigned to unused stream ids in growing order. To avoid a conflict when both endpoints map new Connections simultaneously, the peer which initiated association must use even stream ids whereas the remote side must map its Connections to odd stream ids. Both sides maintain a status map of the assigned stream ids. Generally, new streams should consume the lowest available (even or odd, depending on the side) stream id; this rule is relevant when lower ids become available because Connection objects associated with the streams are closed. 1096 | 1097 | SCTP stream mapping as described here has been implemented in a research prototype; a desription of this implementation is given in {{NEAT-flow-mapping}}. 1098 | 1099 | Initiate: 1100 | : If this is the only Connection object that is assigned to the SCTP Association or stream mapping is 1101 | not used, CONNECT.SCTP is called. Else, unless the Selection Property `activeReadBeforeSend` 1102 | is Preferred or Required, a new stream is used: if there are enough streams 1103 | available, `Initiate` is a local operation that assigns a new stream id to the Connection object. 1104 | The number of streams is negotiated as a parameter of the prior CONNECT.SCTP call, and it represents a 1105 | trade-off between local resource usage and the number of Connection objects that can be mapped 1106 | without requiring a reconfiguration signal. When running out of streams, ADD_STREAM.SCTP must be called. 1107 | 1108 | InitiateWithSend: 1109 | : If this is the only Connection object that is assigned to the SCTP association or stream mapping is not used, CONNECT.SCTP is called with the "user message" parameter. Else, a new stream 1110 | is used (see `Initiate` for how to handle running out of streams), and this just sends the first message 1111 | on a new stream. 1112 | 1113 | Ready: 1114 | : `Initiate` or `InitiateWithSend` returns without an error, i.e. SCTP's four-way handshake has completed. If an association with the peer already exists, stream mapping is used and enough streams are available, a Connection object instantly becomes `Ready` after calling `Initiate` or `InitiateWithSend`. 1115 | 1116 | EstablishmentError: 1117 | : Failure of CONNECT.SCTP. 1118 | 1119 | ConnectionError: 1120 | : TIMEOUT.SCTP or ABORT-EVENT.SCTP. 1121 | 1122 | Listen: 1123 | : LISTEN.SCTP. If an association with the peer already exists and stream mapping is used, `Listen` just expects to receive a new message with a new stream id (chosen in accordance with the stream id assignment procedure described above). 1124 | 1125 | ConnectionReceived: 1126 | : LISTEN.SCTP returns without an error (a result of successful CONNECT.SCTP from the peer), or, in case of stream mapping, the first message has arrived on a new stream (in this case, `Receive` is also invoked). 1127 | 1128 | Clone: 1129 | : Calling `Clone` on an SCTP association creates a new Connection object and assigns it a new stream id in accordance with the stream id assignment procedure described above. If there are not enough streams available, ADD_STREAM.SCTP must be called. 1130 | 1131 | Send: 1132 | : SEND.SCTP. Message Properties such as `msgLifetime` and `msgOrdered` map to parameters of this primitive. 1133 | 1134 | Receive: 1135 | : RECEIVE.SCTP. The "partial flag" of RECEIVE.SCTP invokes a `ReceivedPartial` event. 1136 | 1137 | Close: 1138 | If this is the only Connection object that is assigned to the SCTP association, CLOSE.SCTP is called, and the `Closed` event will be delivered to the application upon the ensuing CLOSE-EVENT.SCTP. Else, the Connection object is one out of several Connection objects that are assigned to the same SCTP assocation, and RESET_STREAM.SCTP must be called, which informs the peer that the stream will no longer be used for mapping and can be used by future `Initiate`, `InitiateWithSend` or `Listen` calls. At the peer, the event RESET_STREAM-EVENT.SCTP will fire, which the peer must answer by issuing RESET_STREAM.SCTP too. The resulting local RESET_STREAM-EVENT.SCTP informs the Transport Services system that the stream id can now be re-used by the next `Initiate`, `InitiateWithSend` or `Listen` calls, and invokes a `Closed` event towards the application. 1139 | 1140 | Abort: 1141 | If this is the only Connection object that is assigned to the SCTP association, ABORT.SCTP is called. Else, the Connection object is one out of several Connection objects that are assigned to the same SCTP assocation, and shutdown proceeds as described under `Close`. 1142 | 1143 | CloseGroup: 1144 | Calling `CloseGroup` calls CLOSE.SCTP, closing all Connections in the SCTP association. 1145 | 1146 | AbortGroup: 1147 | Calling `AbortGroup` calls ABORT.SCTP, immediately closing all Connections in the SCTP association. 1148 | 1149 | In addition to the API mappings described above, when there are multiple Connection objects assigned to the same SCTP association, SCTP can support Connection properties such as `connPriority` and `connScheduler` where CONFIGURE_STREAM_SCHEDULER.SCTP can be called to adjust the priorities of streams in the SCTP association. 1150 | 1151 | # IANA Considerations 1152 | 1153 | This document has no actions for IANA. 1154 | 1155 | # Security Considerations 1156 | 1157 | {{I-D.ietf-taps-arch}} provides general security consideration and requirements for any system that implements the Transport Services architecture. {{I-D.ietf-taps-interface}} provides further discussion on security and privacy implications of the Transport Services API. This document provides additional guidance on implementation specifics for the Transport Services API and as such the security considerations in both of these documents apply. The next two subsections discuss further considerations that are specific to mechanisms specified in this document. 1158 | 1159 | ## Considerations for Candidate Gathering 1160 | 1161 | The Security Considerations of the Transport Services Architecture {{I-D.ietf-taps-arch}} forbids gathering and racing with Protocol Stacks that do not have equivalent security properties. Therefore, implementations need to avoid downgrade attacks that allow network interference to cause the implementation to select less secure, or entirely insecure, combinations of paths and protocols. 1162 | 1163 | ## Considerations for Candidate Racing 1164 | 1165 | See {{fastopen}} for security considerations around racing with 0-RTT data. 1166 | 1167 | An attacker that knows a particular device is racing several options during connection establishment may be able to block packets for the first connection attempt, thus inducing the device to fall back to a secondary attempt. This is a problem if the secondary attempts have worse security properties that enable further attacks. Implementations should ensure that all options have equivalent security properties to avoid incentivizing attacks. 1168 | 1169 | Since results from the network can determine how a connection attempt tree is built, such as when DNS returns a list of resolved endpoints, it is possible for the network to cause an implementation to consume significant on-device resources. Implementations should limit the maximum amount of state allowed for any given node, including the number of child nodes, especially when the state is based on results from the network. 1170 | 1171 | # Acknowledgements 1172 | 1173 | This work has received funding from the European Union's Horizon 2020 research and 1174 | innovation programme under grant agreement No. 644334 (NEAT) and No. 815178 (5GENESIS). 1175 | 1176 | This work has been supported by Leibniz Prize project funds of DFG - German 1177 | Research Foundation: Gottfried Wilhelm Leibniz-Preis 2011 (FKZ FE 570/4-1). 1178 | 1179 | This work has been supported by the UK Engineering and Physical Sciences 1180 | Research Council under grant EP/R04144X/1. 1181 | 1182 | This work has been supported by the Research Council of Norway under its "Toppforsk" 1183 | programme through the "OCARINA" project. 1184 | 1185 | 1186 | Thanks to Colin Perkins, Tom Jones, Karl-Johan Grinnemo, Gorry Fairhurst, for their contributions to the design of this specification. 1187 | Thanks also to Stuart Cheshire, Josh Graessley, David Schinazi, and Eric Kinnear for their implementation and design efforts, including Happy Eyeballs, that heavily influenced this work. 1188 | 1189 | --- back 1190 | 1191 | # API Mapping Template {#appendix-mapping-template} 1192 | 1193 | Any protocol mapping for the Transport Services API should follow a common template. 1194 | 1195 | Connectedness: (Connectionless/Connected/Multiplexing Connected) 1196 | 1197 | Data Unit: (Byte-stream/Datagram/Message) 1198 | 1199 | Connection Object: 1200 | 1201 | Initiate: 1202 | 1203 | InitiateWithSend: 1204 | 1205 | Ready: 1206 | 1207 | EstablishmentError: 1208 | 1209 | ConnectionError: 1210 | 1211 | Listen: 1212 | 1213 | ConnectionReceived: 1214 | 1215 | Clone: 1216 | 1217 | Send: 1218 | 1219 | Receive: 1220 | 1221 | Close: 1222 | 1223 | Abort: 1224 | 1225 | CloseGroup: 1226 | 1227 | AbortGroup: 1228 | 1229 | 1230 | # Reasons for errors {#appendix-reasons-errors} 1231 | 1232 | The Transport Services API {{I-D.ietf-taps-interface}} allows for the several generic error types to specify a more detailed reason about why an error occurred. This appendix lists some of the possible reasons. 1233 | 1234 | * InvalidConfiguration: 1235 | The transport properties and Endpoint Identifers provided by the application are either contradictory or incomplete. Examples include the lack of a Remote Endpoint Identifer on an active open or using a multicast group address while not requesting a unidirectional receive. 1236 | 1237 | * NoCandidates: 1238 | The configuration is valid, but none of the available transport protocols can satisfy the transport properties provided by the application. 1239 | 1240 | * ResolutionFailed: 1241 | The remote or local specifier provided by the application can not be resolved. 1242 | 1243 | * EstablishmentFailed: 1244 | The Transport Services system was unable to establish a transport-layer connection to the Remote Endpoint specified by the application. 1245 | 1246 | * PolicyProhibited: 1247 | The system policy prevents the Transport Services system from performing the action requested by the application. 1248 | 1249 | * NotCloneable: 1250 | The Protocol Stack is not capable of being cloned. 1251 | 1252 | * MessageTooLarge: 1253 | The Message is too big for the Transport Services system to handle. 1254 | 1255 | * ProtocolFailed: 1256 | The underlying Protocol Stack failed. 1257 | 1258 | * InvalidMessageProperties: 1259 | The Message Properties either contradict the Transport Properties or they can not be satisfied by the Transport Services system. 1260 | 1261 | * DeframingFailed: 1262 | The data that was received by the underlying Protocol Stack could not be processed by the Message Framer. 1263 | 1264 | * ConnectionAborted: 1265 | The connection was aborted by the peer. 1266 | 1267 | * Timeout: 1268 | Delivery of a Message was not possible after a timeout. 1269 | 1270 | # Existing Implementations {#appendix-implementations} 1271 | 1272 | This appendix gives an overview of existing implementations, at the time of writing, of Transport Services systems that are (to some degree) in line with this document. 1273 | 1274 | 1275 | * Apple's Network.framework: 1276 | * Network.framework is a transport-level API built for C, Objective-C, and Swift. It a connect-by-name API that supports transport security protocols. It provides userspace implementations of TCP, UDP, TLS, DTLS, proxy protocols, and allows extension via custom framers. 1277 | * Documentation: 1278 | 1279 | * NEAT and NEATPy: 1280 | * NEAT is the output of the European H2020 research project "NEAT"; it is a user-space library for protocol-independent communication on top of TCP, UDP and SCTP, with many more features, such as a policy manager. 1281 | * Code: 1282 | * Code at the Software Heritage Archive: 1283 | * NEAT project: 1284 | * NEATPy is a Python shim over NEAT which updates the NEAT API to be in line with version 6 of the Transport Services API draft. 1285 | * Code: 1286 | * Code at the Software Heritage Archive: 1287 | 1288 | * PyTAPS: 1289 | * A TAPS implementation based on Python asyncio, offering protocol-independent communication to applications on top of TCP, UDP and TLS, with support for multicast. 1290 | * Code: 1291 | * Code at the Software Heritage Archive: 1292 | -------------------------------------------------------------------------------- /metadata.min.js: -------------------------------------------------------------------------------- 1 | async function addMetadata(){try{const e=document.styleSheets[0].cssRules;for(let t=0;t=0?document.URL.replace(/html$/,"json"):document.URL+".json";const o=await fetch(t);a=await o.json()}}if(!a)return;e.style.display="block";const s="",d="https://datatracker.ietf.org/doc",n="https://datatracker.ietf.org/ipr/search",c="https://www.rfc-editor.org/info",l=a.doc_id.toLowerCase(),i=a.doc_id.slice(0,3).toLowerCase(),f=a.doc_id.slice(3).replace(/^0+/,""),u={status:"Status",obsoletes:"Obsoletes",obsoleted_by:"Obsoleted By",updates:"Updates",updated_by:"Updated By",see_also:"See Also",errata_url:"Errata"};let h="
";["status","obsoletes","obsoleted_by","updates","updated_by","see_also","errata_url"].forEach(e=>{if("status"==e){a[e]=a[e].toLowerCase();var t=a[e].split(" "),o=t.length,w="",p=1;for(let e=0;e"+a[e][t].slice(3)+", ":m+""+a[e][t].slice(3)+"",b++);a[e]=m}else if("see_also"==e){var y,L="",C=1;y=a[e].length;for(let t=0;t"+_+" "+v+", ":L+""+v+", ":"RFC"!=_?L+""+_+" "+v+"":L+""+v+"",C++}a[e]=L}else if("errata_url"==e){var R="";R=a[e]?R+"Errata exist | Datatracker| IPR | Info page":"Datatracker | IPR | Info page",a[e]=R}""!=a[e]?"Errata"==u[e]?h+=`
More info:
${a[e]}
`:h+=`
${u[e]}:
${a[e]}
`:"Errata"==u[e]&&(h+=`
More info:
${a[e]}
`)}),h+="
",e.innerHTML=h}catch(e){console.log(e)}else console.log("Could not locate metadata
element");function r(e){return e.charAt(0).toUpperCase()+e.slice(1)}}window.removeEventListener("load",addMetadata),window.addEventListener("load",addMetadata); -------------------------------------------------------------------------------- /roadshows/ief104-taps-connectiom-pooling.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ief104-taps-connectiom-pooling.key -------------------------------------------------------------------------------- /roadshows/ief104-taps-connectiom-pooling.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ief104-taps-connectiom-pooling.pdf -------------------------------------------------------------------------------- /roadshows/ietf101-taps-interface.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ietf101-taps-interface.key -------------------------------------------------------------------------------- /roadshows/ietf101-taps-interface.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ietf101-taps-interface.pdf -------------------------------------------------------------------------------- /roadshows/ietf102-taps-interface.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ietf102-taps-interface.key -------------------------------------------------------------------------------- /roadshows/ietf102-taps-interface.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ietf102-taps-interface.pdf -------------------------------------------------------------------------------- /roadshows/ietf103-taps-draft_discussion.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ietf103-taps-draft_discussion.pdf -------------------------------------------------------------------------------- /roadshows/ietf103-taps-draft_discussion.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/ietf103-taps-draft_discussion.pptx -------------------------------------------------------------------------------- /roadshows/interim-jan19.taps-properties-registry.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/interim-jan19.taps-properties-registry.key -------------------------------------------------------------------------------- /roadshows/interim-jan19.taps-properties-registry.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ietf-tapswg/api-drafts/36504d23316ee0d2c6cee5928359b2525965f2f2/roadshows/interim-jan19.taps-properties-registry.pdf --------------------------------------------------------------------------------