├── .gitignore ├── CONTRIBUTING.md ├── Makefile ├── README.md ├── draft-zartbot-sr-udp.html ├── draft-zartbot-sr-udp.md ├── draft-zartbot-sr-udp.txt ├── draft-zartbot-srou-signalling.html ├── draft-zartbot-srou-signalling.md ├── draft-zartbot-srou-signalling.txt ├── example_apps ├── client ├── interim_fwd └── server ├── prototype ├── forwarder │ └── fwd.go └── quic_go.diff └── slides └── QUIC-SR.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | .refcache/ 3 | .targets.mk 4 | *.xml 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This repository relates to activities in the Internet Engineering Task Force 4 | ([IETF](https://www.ietf.org/)). All material in this repository is considered 5 | Contributions to the IETF Standards Process, as defined in the intellectual 6 | property policies of IETF currently designated as 7 | [BCP 78](https://www.rfc-editor.org/info/bcp78), 8 | [BCP 79](https://www.rfc-editor.org/info/bcp79) and the 9 | [IETF Trust Legal Provisions (TLP) Relating to IETF Documents](http://trustee.ietf.org/trust-legal-provisions.html). 10 | 11 | Any edit, commit, pull request, issue, comment or other change made to this 12 | repository constitutes Contributions to the IETF Standards Process 13 | (https://www.ietf.org/). 14 | 15 | You agree to comply with all applicable IETF policies and procedures, including, 16 | BCP 78, 79, the TLP, and the TLP rules regarding code components (e.g. being 17 | subject to a Simplified BSD License) in Contributions. 18 | 19 | 20 | ## Other Resources 21 | 22 | Discussion of this work occurs on the 23 | [{WG_NAME} working group mailing list](https://mailarchive.ietf.org/arch/browse/{WG_NAME}/) 24 | ([subscribe](https://www.ietf.org/mailman/listinfo/{WG_NAME})). In addition to 25 | contributions in GitHub, you are encouraged to participate in discussions there. 26 | 27 | **Note**: Some working groups adopt a policy whereby substantive discussion of 28 | technical issues needs to occur on the mailing list. 29 | 30 | You might also like to familiarize yourself with other 31 | [working group documents](https://datatracker.ietf.org/wg/{WG_NAME}/documents/). 32 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LIBDIR := lib 2 | include $(LIBDIR)/main.mk 3 | 4 | $(LIBDIR)/main.mk: 5 | ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null)) 6 | git submodule sync 7 | git submodule update $(CLONE_ARGS) --init 8 | else 9 | git clone -q --depth 10 $(CLONE_ARGS) \ 10 | -b master https://github.com/martinthomson/i-d-template $(LIBDIR) 11 | endif 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # draft-quic-sr 2 | RFC draft for segment routing over UDP/QUIC 3 | 4 | ## Abstract 5 | 6 | This document defines the Segment Routing Header(RFC8754) extension 7 | in QUIC transport protocol. 8 | It will provide a new general purpose transportation layer with the following features: 9 | * Secure [QUIC TLS] 10 | * Reliable [QUIC Transport] 11 | * Programmable [Segment Routing] 12 | 13 | ## Presentation 14 | 15 | There is easy for understanding presentation available in: 16 | 17 | 18 | 19 | 20 | ## Contribution 21 | Discussion of this work is encouraged to happen on GitHub repository which 22 | contains the draft: 23 | 24 | Issue and PRs are welcome: 25 | 26 | 27 | ## Use case 28 | 29 | 1. Traffic Engineering over IPv4 internet 30 | 2. Client-less VPC access 31 | 3. CNI(Container Network Interface) 32 | 4. Wire and Wireless Converged Access 33 | 5. Cloud native network service platform 34 | 35 | ## Prototype 36 | A working IPv4 based QUIC-SR application avaiable at 37 | 38 | 39 | We just did some hack on quic-go to provide userspace quic support. 40 | 41 | //quic-go create session 42 | session, err := quic.DialAddr(*remoteSock, tlsConf, config) 43 | 44 | //update QUIC-SR segmentlist and it could be runtime modified. 45 | session.SetQUICSR([]string{1.1.1.1:2345,2.2.2.2:4567}, []byte{0x1, 0x2, 0x3}) 46 | -------------------------------------------------------------------------------- /draft-zartbot-sr-udp.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Segment Routing over UDP(SRoU)" 3 | abbrev: Segment Routing over UDP(SRoU) 4 | docname: draft-zartbot-sr-udp-00 5 | date: {DATE} 6 | category: exp 7 | ipr: trust200902 8 | area: Transport 9 | workgroup: SPRING 10 | 11 | stand_alone: yes 12 | pi: [toc, sortrefs, symrefs, docmapping] 13 | 14 | author: 15 | - 16 | ins: K. Fang 17 | name: Kevin Fang 18 | org: Cisco Systems, Inc. 19 | email: zartbot.ietf@gmail.com 20 | - 21 | ins: Y. Li 22 | name: Yinghao Li 23 | org: Google, Inc. 24 | email: liyinghao@gmail.com 25 | - 26 | ins: F. Cai 27 | name: Feng Cai 28 | org: Cisco Systems, Inc. 29 | email: fecai@cisco.com 30 | - 31 | ins: X. Jiang 32 | name: Xing Jiang 33 | org: Cisco Systems, Inc. 34 | email: jamjiang@cisco.com 35 | 36 | --- abstract 37 | 38 | This document defines the Segment Routing Header{{!RFC8754}} extension 39 | in UDP transport protocol with Network Address Translation Traversal. 40 | 41 | --- middle 42 | 43 | # Introduction 44 | 45 | Many UDP based transport protocol(eg, IPSec/DTLS/QUIC) could provide a secure 46 | transportation layer to handle overlay traffic. How ever it does not flexible 47 | for source based path enforcement. 48 | 49 | This document defines a new Segment Routing Header in UDP payload to enable 50 | segment routing over UDP(SRoU) for IPSec/DTLS/QUIC or other UDP based traffic. 51 | 52 | Segment Routing over UDP(SRoU) interworking with QUIC could provide a generic 53 | programmable and secure transport layer for next generation applications. 54 | 55 | Discussion of this work is encouraged to happen on GitHub repository which 56 | contains the draft: 57 | 58 | ## Specification of Requirements 59 | 60 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 61 | "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and 62 | "OPTIONAL" in this document are to be interpreted as described in BCP 14 63 | {{?RFC2119}} {{?RFC8174}} when, and only when, 64 | they appear in all capitals, as shown here. 65 | 66 | ## Motivation 67 | 68 | Segment Routing provides source-based path enforcement and transportation level 69 | programmability but lacks of IPv4 support for transport over internet. 70 | 71 | MPLS-over-UDP{{!RFC7510}} and MPLS Segment Routing over IP{{!RFC8663}} 72 | defined SR-MPLS over IPv4 network, but it lacks of NAT traversal capabilities. 73 | 74 | Many SDWAN vendors defined their private protocols for routing control over 75 | multiple public cloud and internet, it’s hard for interop with multi-vendors. 76 | 77 | Many applications may require intelligence traffic steering(CDN/LB case), 78 | SRoU with QUIC could be used in these cases. 79 | 80 | 81 | # SR over UDP(SRoU) Packet encapsulation 82 | 83 | The SRoU defined a generic segment routing enabled transport layer,the SR Header 84 | insert in UDP payload. 85 | 86 | 87 | ~~~ 88 | 89 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 90 | | IP Header | 91 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 92 | | UDP Header | 93 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 94 | | SRoU Header | 95 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 96 | | | 97 | | Payload | 98 | | | 99 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 100 | ~~~ 101 | {: #srou-encap title="SRoU encapsulation"} 102 | 103 | ## SR over UDP(SRoU) Header 104 | 105 | SR over UDP must be present at the head of UDP payload. 106 | 107 | ~~~ 108 | 0 1 2 3 109 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 110 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 111 | | Magic Number | SRoU Length | Flow ID Length| Protocol-ID | 112 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 113 | | | 114 | | Flow ID( Variable length) | 115 | | | 116 | | | 117 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 118 | | Source Address | 119 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 120 | | Source Port | 121 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 122 | 123 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 124 | | Segment Type | SR Hdr Len | Last Entry | Segments Left | 125 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 126 | | | 127 | | Segment List[0] (length based on segment type) | 128 | | | 129 | | | 130 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 131 | | | 132 | | | 133 | ... 134 | | | 135 | | | 136 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 137 | | | 138 | | Segment List[0] (length based on segment type) | 139 | | | 140 | | | 141 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 142 | // // 143 | // Optional Type Length Value objects (variable) // 144 | // // 145 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 146 | ~~~ 147 | {: #srh-format title="SRoU Header"} 148 | 149 | Magic Number: 150 | 1 Byte field 151 | For QUIC: could set to ALL ZERO to diffenciate with original header. 152 | For IPSec: could set to 0xFE value and avoid SPI allocation in 153 | this range. 154 | *0x00 may conflict with NON-ESP HEADER 155 | *0xFF may conflict with KeepAlive Message 156 | 157 | SRoU Length: 158 | 1 Byte, The byte length of a SRoU header. 159 | 160 | FlowID Length: 161 | 1 Byte, The byte length of FlowID field. 162 | 163 | Protocol-ID: 164 | 165 | | Type | Name |Section | 166 | |-----:|:------------|:------------------------------------| 167 | | 0x0 | OAM | for Link state probe and other OAM | 168 | | 0x1 | IPv4 | Indicate inner payload is IPv4 pkt | 169 | | 0x2 | IPv6 | Indicate inner payload is IPv6 pkt | 170 | {: #protocol-id title="Protocol ID field"} 171 | 172 | Source Address: 173 | Protocol-ID = 1, this field is 4-Bytes IPv4 address 174 | Protocol-ID = 2, this field is 16-Bytes IPv6 address 175 | 176 | Source Port: 177 | Source UDP Port Number 178 | 179 | Segment Type: 180 | 181 | | Type | Name | Len |Section | 182 | |-----:|:------------------------------|:-----|:-----------------------| 183 | | 0x0 | Reserved | | | 184 | | 0x1 | IPv4 Address+ Port | 48b |{{ipv4-locator}} | 185 | | 0x2 | SRv6 | 128b |{{srv6-locator}} | 186 | | 0x3 | Compressed Segment List | 128b |{{cSID}} | 187 | {: #segment-types title="Segment Types"} 188 | 189 | SR Hdr Len: 190 | : SR Header length, include the SR Header flags Segment-List and Optional TLV. 191 | 192 | Last Entry: 193 | : contains the index(zero based), in the Segment List, of 194 | the last element of the Segment List. 195 | 196 | Segments Left: 197 | : 8-bit unsigned integer. Number of route segemnts remaining, 198 | i.e., number of explicitly listed intermediate nodes still 199 | to be visited before reaching the final destination. 200 | 201 | Segment List[0..n]: 202 | : 128-bit/48-bit/144-bit addresses to represent the SR Policy. 203 | Detailed forwarding behavior will be defined in {{pkt-proccessing}} 204 | 205 | TLV: 206 | : Opptional TLV used for future extension.currently only defined 207 | the following TLV. 208 | 209 | | Type | Value | Len |Section | 210 | |-----:|:--------------------|:---------|:-----------------------| 211 | | 0x0 | SR Integrity | 32b |{{sr-integrity}} | 212 | | 0x1 | Micro Segment Policy| variable |{{useg-policy}} | 213 | | 0x2 | End.PacketInfo | variable |{{end-packet-info}} | 214 | {: #optional-tlv-types title="Optional TLV"} 215 | 216 | 217 | # Packet Processing {#pkt-proccessing} 218 | 219 | This section describe the packet proccessing procedure. The following 220 | topology will be used in this section. 221 | 222 | ~~~ 223 | H1---R1----------I1------R3----------+---R4---H2 224 | | | 225 | |-----------R2------------------| 226 | | 227 | | 228 | I2 229 | 230 | I1,I2: Interim Node that support SRoU 231 | R1~R4: Traditional Router 232 | H1,H2: Host 233 | ~~~ 234 | {: #pp-topology title="Topology for packet proccesing"} 235 | 236 | | Host | Address | SRoU Port| Post NAT | 237 | |-----:|:-------------------------|:---------|:-----------------| 238 | | H1 | 192.168.1.2 | 5111 | 10.1.1.1:23456 | 239 | | R1 | 192.168.1.1/10.1.1.1 | | | 240 | | R2 | 10.1.2.2 | | | 241 | | R3 | 10.1.3.3 | | | 242 | | R4 | 10.1.4.4 | | | 243 | | H2 | 10.99.2.2 | 443 | 10.1.4.4:443 | 244 | | I1 | 10.99.1.1 | 8811 | | 245 | | I2 | 192.168.99.2 | 8822 | 10.1.2.2:12345 | 246 | {: #ipv4-addr title="IP address table"} 247 | 248 | 249 | ## Type:0x1, IPv4 Locator Mode {#ipv4-locator} 250 | 251 | In this mode, the endpoint could directly insert the interim node IPv4 252 | addresses and port into the segment-list. 253 | 254 | ### General forwarding case 255 | For example, H1 intend to send packet to H2 via R1-->I2---->H2, 256 | In this case SRoU packet will be NATed twice to show the NAT traversal workflow. 257 | I2's public address could use STUN{{!RFC5389}} protocol detected and sync to all 258 | SRoU enabled devices. 259 | 260 | H1 send packet with SRoU Header as below, H1 could use STUN detect it's source 261 | public address, but consider the simplicity, the 1st hop SRoU forwarder cloud 262 | update the source ip/port field in SRoU header. 263 | 264 | ~~~ 265 | IP/UDP Header { 266 | Source IP: 192.168.1.2, 267 | Destination IP: 10.1.2.2(SegmentList[1],I2 Pre-NAT public address), 268 | Source Port: 5111, 269 | Destination Port: 12345(SegmentList[1],I2 Pre-NAT public port), 270 | } 271 | SRoU Header { 272 | Magic Num = 0x0 273 | SRoU Length = 29 274 | FlowID Length = 0x3 275 | Protocol-ID = 0x1(IPv4), 276 | FlowID = 0x123, 277 | Source Address = 192.168.1.2, 278 | Source Port = 5111, 279 | Segment Left = 0x1, 280 | Last Entry = 0x1, 281 | SegmenetList[0] = 10.1.4.4:443(H2), 282 | SegmenetList[1] = 10.1.2.2:12345(I2), 283 | } 284 | ~~~ 285 | {: #type-1-h1-i2 title="Type:0x1 H1-->I2 Packet Header"} 286 | 287 | 288 | R1 is a NAT Device it will change the Source IP/Port to 10.1.1.1:23456. 289 | But this router may not have ALG function to modify SRoU Header.Then packet 290 | will send to 10.1.2.2:12345. It will be NAT again to I2. 291 | 292 | After twice NAT, I2 Recieved packet as below: 293 | 294 | ~~~ 295 | IP/UDP Header { 296 | Source IP: 10.1.1.1(H1 post NAT addr), 297 | Destination IP: 192.168.99.2(I2 private addr), 298 | Source Port: 23456(H1 post NAT port), 299 | Destination Port: 8822(I2 private port), 300 | } 301 | SRoU Header { 302 | Magic Num = 0x0 303 | SRoU Length = 29 304 | FlowID Length = 0x3 305 | Protocol-ID = 0x1(IPv4), 306 | FlowID = 0x123, 307 | Source Address = 192.168.1.2, 308 | Source Port = 5111, 309 | Segment Left = 0x1, 310 | Last Entry = 0x1, 311 | SegmenetList[0] = 10.1.4.4:443(H2), 312 | SegmenetList[1] = 10.1.2.2:12345(I2), 313 | } 314 | ~~~ 315 | {: #type-1-i2-recieved title="Type:0x1 H1-->I2, I2 Recieved Packet Header"} 316 | 317 | 318 | if the (LastEntry == Segment Left) indicate I2 is the 1st hop SRoU forwarder, 319 | It MUST apply ALG to update the Source Address/Port field by the IP/UDP header. 320 | Then it will execute Segment Left - 1, and copy SegmentList[0] to DA/Dport. 321 | Consider some interim router like R2 has URPF checking, the SA/Sport will also 322 | updated to I2 SRoU socket address. 323 | 324 | I2--->H2 packet: 325 | 326 | ~~~ 327 | IP/UDP Header { 328 | Source IP: 192.168.00.2(I2 Private), 329 | Destination IP: 10.1.4.4(SegmentList[0]), 330 | Source Port: 8822(I2 Private), 331 | Destination Port: 443(SegmentList[0]), 332 | } 333 | SRoU Header { 334 | Magic Num = 0x0 335 | SRoU Length = 29 336 | FlowID Length = 0x3 337 | Protocol-ID = 0x1(IPv4), 338 | FlowID = 0x123, 339 | Source Address = 10.1.1.1(update by I2 ALG), 340 | Source Port = 23456(update by I2 ALG), 341 | Segment Left = 0x0(SL--), 342 | Last Entry = 0x1, 343 | SegmenetList[0] = 10.1.4.4:443(H2), 344 | SegmenetList[1] = 10.1.2.2:12345(I2), 345 | } 346 | ~~~ 347 | {: #type-1-i2h2 title="Type:0x1 I2-->H2 Packet Header"} 348 | 349 | H2 will recieve the packet, and if the segment left == 0, it MUST copy the 350 | Source Address and Port into IP/UDP Header and strip out the SRoU Header and 351 | send to udp socket. It may cache the reversed segmentlist for symmetric routing. 352 | 353 | H2 send to UDP socket 354 | 355 | ~~~ 356 | IP/UDP Header { 357 | Source IP: 10.1.1.1(Copied from SRoU Src field), 358 | Destination IP: 10.99.2.2(Static NAT by R4), 359 | Source Port: 23456(Copied from SRoU Src field), 360 | Destination Port: 443(SegmentList[0]), 361 | } 362 | UDP Payload { 363 | } 364 | ~~~ 365 | {: #type-1-h2tx title="Type:0x1 H2 Send to UDP socket"} 366 | 367 | ### Programable forwarding case 368 | 369 | In type:0x1 mode, each segment has 48bit(32bit IPv4 address with 16bit port). 370 | It could be defined for programable forwarding with special IPv4 prefix( ie. 371 | 255.0.0.0/8): 372 | 373 | Prefix:<32bit functions and args> 374 | 375 | For exmaple we could defined EVPN/VPNv4/VPNv6 function in SRoU header by SID: 376 | 377 | 255:<16bit End.X function>:<24bit VPN ID> 378 | 379 | 380 | ## Type:0x2, SRv6 format {#srv6-locator} 381 | 382 | IPv6 does not need to consider the NAT traversal case, In this mode almost 383 | forwarding action is same as SRv6. This is only used for application driven 384 | traffic steering(like CDN/LB usecase.). It has some benefit interworking with 385 | QUIC, the pure userspace implementation could provide additional flexibility. 386 | 387 | For example some IOT sensor with legacy kernel stack does not support SRv6 could 388 | use SRoU insert SRH in UDP payload, the 1st hop SRoU forwarder could convert it 389 | to standard SRv6 packet. 390 | 391 | ## Type:0x3, Compressed Segment List {#cSID} 392 | 393 | ### Service Registration & Mapping 394 | I1,I2 use SRoU port as source port to inital STUN{{!RFC5389}} session to SR 395 | mapping server, the mapping server could detect the Post NAT address and assign 396 | SID for each host, and distribute IP/port--SID mapping database to all the SRoU 397 | enabled host. 398 | 399 | |Host | Socket | SID | 400 | |-----:|:-----------------------|:---------| 401 | | I1 | 10.99.1.1:8811 | 1111 | 402 | | I2 | 10.1.2.2:12345 | 2222 | 403 | {: #sid_map title="sid mapping"} 404 | 405 | In this mode the socket information could combined with IPv4 and IPv6. 406 | 407 | ## Optional TLV 408 | 409 | ### SR Integrity TLV {#sr-integrity} 410 | SR Integrity Tag to validate the SRH. All fields in the SRH except 411 | Segments Left fields need to be checked. 412 | 413 | ### Micro-segmentation(uSeg) {#useg-policy} 414 | Option-TLV could defined Sub-TLV to support Micro-segmentation Security policy 415 | 416 | ~~~ 417 | OptionTLV { 418 | 0x1, uSeg{ 419 | 0x0, SRC_GROUP_ID, 420 | 0x1, DST_GROUP_ID, 421 | 0x2, APP_GROUP_ID, 422 | 0x3, SRC_DEVICE_ID, 423 | 0x4, DST_DEVICE_ID, 424 | 0x5, APP_ID, 425 | } 426 | } 427 | ~~~ 428 | 429 | Customer also could encode this microsegment policy header in flowID field. 430 | 431 | ### End.PacketInfo {#end-packet-info} 432 | This optional TLV defines extened packet info and Segment-end packet edit 433 | function. Sub-TLV defines as below: 434 | 435 | #### Type:0x0, VPN-ID 436 | The SDWAN Router could use {{!I-D.ietf-quic-datagram}} as VPN tunnel, This 437 | Sub-TLV defined the VPN-ID inside the tunnel. 438 | 439 | If SRoU header has this sub-TLV, the device MUST decrypt inner payload and 440 | use the VPN-ID for inner packet destination lookup. 441 | 442 | #### Type:0x1, Orginal Destination Address/Port 443 | In SR Type 0x3, The original destination address/port cloud not encode in 128bit 444 | field, it could be store in option TLV. 445 | 446 | 447 | # OAM 448 | 449 | SRoU OAM Packet format is defined as below: 450 | 451 | ~~~ 452 | 0 1 2 3 453 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 454 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 455 | | Magic Number | SRoU Length | Flow ID Length| P-ID =0x0 | 456 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 457 | | | 458 | | Flow ID( Variable length) | 459 | | | 460 | | | 461 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 462 | | OAM-Type | OAM Payload(Variable Length based on Type) | 463 | +-+-+-+-+-+-+-+-+ + 464 | | | 465 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 466 | ~~~ 467 | {: #oam-format title="SRoU OAM Header"} 468 | 469 | OAM-Type: 470 | 471 | |ID | Type | Usage | 472 | |-----:|:----------------------:|:--------------------------------------------| 473 | | 0 | PM | Latency & Loss & Jitter Measurement | 474 | | 1 | IPv4 STUN Request | | 475 | | 2 | IPv4 STUN Response | | 476 | | 3 | IPv6 STUN Request | *Reserved for NAT66 Case(Not implement yet) | 477 | | 4 | IPv6 STUN Response | *Reserved for NAT66 Case(Not implement yet) | 478 | {: #oam_type title="oam message type"} 479 | 480 | ## Performance Measurement 481 | 482 | The performance measurement message is follow TWAMP algorithm.This OAM message 483 | could be used for Authentication mode( with HMAC-SHA256) or 484 | Crypt mode(AES-GCM-256). 485 | Crypto key could be synchonized via out-of-band channel. 486 | 487 | PMType: 488 | 489 | |ID | Type | Usage | 490 | |-----:|:---------------:|:--------------------------------------------| 491 | | 0 | PM_Request | Performance Measurement | 492 | | 1 | PM_Response | Performance Measurement | 493 | {: #oam_pm_type title="oam perf measurement message type"} 494 | 495 | 496 | ### PM_Request Message 497 | The initiator send packet with PMType = 0, and it contains sending 498 | timestamp. 499 | 500 | ~~~ 501 | 0 1 2 3 502 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 503 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 504 | | OAM Type=0 |PMType=0 | MBZ | 505 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 506 | | Sequence Number | 507 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 508 | | TimeStamp | 509 | | | 510 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 511 | | HMAC(Optional) | 512 | | | 513 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 514 | ~~~ 515 | {: #oam-ls-req-format title="SRoU OAM Perf Measurement Request Message"} 516 | 517 | Sequence Number: 518 | Start from zero, add one after send request packet. 519 | 520 | TimeStamp: 521 | Packet sending timestamp. 522 | 523 | HMAC(Optional): 524 | HMAC-SHA256 used for message authentication. 525 | 526 | 527 | ### PM_Response Message 528 | The responder message is with PMType = 1, and it contains: 529 | 530 | ~~~ 531 | 0 1 2 3 532 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 533 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 534 | | OAM Type=0 |PMType=1 | MBZ | 535 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 536 | | Sequence Number | 537 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 538 | | TimeStamp | 539 | | | 540 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 541 | | Recieved TimeStamp | 542 | | | 543 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 544 | | Sender Sequence Number | 545 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 546 | | Sender TimeStamp | 547 | | | 548 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 549 | | HMAC(Optional) | 550 | | | 551 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 552 | ~~~ 553 | {: #oam-pm-resp-format title="SRoU OAM Perf Measurement Response Message"} 554 | 555 | Sequence Number: 556 | Start from zero, add one after send response packet. 557 | 558 | TimeStamp: 559 | sending timestamp. 560 | 561 | Recieved Timestamp: 562 | The correspond request message recieved timestamp. 563 | 564 | Sender Sequence Number: 565 | Exactly copied from the correspond request message 566 | 567 | Sender Timestamp: 568 | Exactly copied from the correspond request message 569 | 570 | HMAC(Optional): 571 | HMAC-SHA256 used for message authentication. 572 | 573 | 574 | ## STUN Service 575 | 576 | SRoU forwarding endpoint may stay behind NAT, it request STUN service to 577 | discover the public network address. 578 | 579 | Initiator send address and port with ALL-ZERO to STUN Server, STUN server 580 | copy the recieve source address and port in this payload, and generate HMAC. 581 | The STUN Server's key could be propogate to initiator by a out-of-band channel. 582 | 583 | ~~~ 584 | 0 1 2 3 585 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 586 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 587 | | OAM Type=0 | MBZ | 588 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 589 | | IP Address | 590 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 591 | | Port | 592 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 593 | | HMAC | 594 | | | 595 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 596 | ~~~ 597 | {: #oam-stun-format title="SRoU OAM STUN Header"} 598 | 599 | 600 | # Usage 601 | 602 | ## Traffic engineering over Internet 603 | 604 | ~~~ 605 | Client-------R1------------Internet--------------R2-----------Server 606 | | | 607 | | | 608 | R3----V1----PubliCloud--------V2-----| 609 | ~~~ 610 | {: #use-1 title="Traffic Engineering over internet"} 611 | 612 | Many video/conferencing application requires traffic engineering over IPv4 613 | Internet, Webex/Zoom/Teams may setup V1,V2 in public cloud, The client and 614 | server could encode the V1/V2 information in SRoU header for traffic engineering 615 | 616 | ## Multipath forwarding 617 | 618 | Same as previously topoloy {{use-1}}, customer cloud ask server transmit packet 619 | over different path, two path have same Flow-ID, QUIC could be used in this case 620 | to provide multistream/multihoming support. 621 | 622 | ## Micro Segmentation 623 | 624 | Same as previously topoloy {{use-1}}, the interim Router: R1/R2/R3, V1,V2 could 625 | insert uSeg Sub-TLV based on client and server uSeg identity, and other interim 626 | network equipment could based on this sub-TLV implement security policy or QoS 627 | policy. 628 | 629 | ## Container Network 630 | 631 | ~~~ 632 | C1----SideCar1-----L1-----S1------L2----SideCar2-------C2 633 | | | 634 | |------S2-------| 635 | C1,C2: Container 636 | L1,L2: Leaf switch 637 | S1,S2: Spine switch 638 | ~~~ 639 | {: #use-3 title="Service-Mesh & Container Network"} 640 | 641 | SRoU with QUIC also could be used for container network interface, especially 642 | for service-mesh sidecar. The sidecar could aware the Datacenter underlay 643 | topology by BGP-LinkState, and use SRH select best path to avoid congestion. 644 | At the same time, all traffic are encrypted by {{!I-D.ietf-quic-tls}}. 645 | 646 | ## MPLS-SR with SDWAN 647 | 648 | ~~~ 649 | S1---INET(ipv4)----PE1------MPLS------PE2----S2 650 | 651 | S1,S2: SDWAN Router 652 | PE1,PE2: SR enabled MPLS PE 653 | ~~~ 654 | {: #sr-sdwan-topology title="MPLS-SR with SDWAN"} 655 | 656 | S1 will setup IPSec SA with S2 for end-to-end encryption, 657 | And it will use BSID between PE1--PE2 for traffic engineering. 658 | 659 | MPLS based BSID and IPv4 based locator could be encoded in uSID.A distributed 660 | mapping table could be used to translate uSID to packet action. 661 | 662 | ~~~ 663 | IP/UDP Header { 664 | Source IP: H1, 665 | Destination IP: PE1, 666 | Source Port: srcport, 667 | Destination Port: IPSec, 668 | } 669 | SRoU Header { 670 | SegmentType = 0x1, 671 | SR_HDR_Len = 2, 672 | Last Entry = 0x0, 673 | Segment Left = 0, 674 | SegmenetList[0] = uSID: FC0:2222:3333:4444:: 675 | } 676 | ~~~ 677 | {: #type-1-s1-pe1 title="Type:0x1 S1-->PE1 Packet Header"} 678 | 679 | ## Cloud Native Network platform 680 | 681 | Each of the SRoU forwarder only rely on a UDP socket, it could be implement 682 | by a container. Customer could deploy such SRoU enable container in multiple 683 | cloud to provide a cloud-angonostic solution. All containers could be managed 684 | by K8S. 685 | 686 | A distributed K-V store could be used for SRoU forwarder service registration, 687 | routing(announce prefix), all the SRoU forwarder could measue peer's 688 | reachability/jitter/loss and update link-state to the K-V store. forwarding 689 | policy also could be sync by the K-V store. Detailed information will be 690 | provided in another I.D(ETCD based disaggregated SDN control plane). 691 | 692 | SRoU forwarder also could be implement by BPF for container communication. It 693 | will provide host level traffic engineering for massive scale datacenter to 694 | reduce the West-East traffic congestion. 695 | 696 | The best practice for SRoU is working with QUIC. 697 | SRoU with QUIC transport protocol provides the following benefit for SDWAN : 698 | 699 | * Stream multiplexing 700 | * Stream and connection-level flow control 701 | * Low-latency connection establishment 702 | * Connection migration and resilience to NAT rebinding 703 | * Authenticated and encrypted header and payload 704 | 705 | SRoU add traffic-engineering and VPN capabilites for SDWAN. 706 | Many existing SDWAN features could gain the benefits like: 707 | 708 | * TCP optimization 709 | * Packet duplication 710 | 711 | # Security Considerations 712 | 713 | The SRoU forwarder must validate the packet, FlowID could be used for source 714 | validation. It could be a token based solution, this token could be assigned 715 | by controller with a dedicated expire time. Source/Dest device ID and group 716 | cloud encode in flowid and signed by controller, just like JWT. 717 | 718 | A blacklist on controller k-v store could be implemented to block device when 719 | the token does not expire. 720 | 721 | # IANA Considerations 722 | 723 | ## SRoU with QUIC 724 | 725 | The magic number in SRoU must be ZERO to distiguish with QUIC Long/Short 726 | packet format. 727 | 728 | 729 | # Acknowledgements 730 | {:numbered="false"} 731 | 732 | The following people provided substantial contributions to this document: 733 | 734 | - Bin Shi, Cisco Systems, Inc. 735 | - Yijen Wang, Cisco Systems, Inc. 736 | - Pix Xu, Cisco Systems, Inc. 737 | 738 | 739 | 740 | -------------------------------------------------------------------------------- /draft-zartbot-sr-udp.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SPRING K. Fang 6 | Internet-Draft Cisco Systems, Inc. 7 | Intended status: Experimental Y. Li 8 | Expires: 5 February 2021 Google, Inc. 9 | F. Cai 10 | X. Jiang 11 | Cisco Systems, Inc. 12 | 4 August 2020 13 | 14 | 15 | Segment Routing over UDP(SRoU) 16 | draft-zartbot-sr-udp-00 17 | 18 | Abstract 19 | 20 | This document defines the Segment Routing Header[RFC8754] extension 21 | in UDP transport protocol with Network Address Translation Traversal. 22 | 23 | Status of This Memo 24 | 25 | This Internet-Draft is submitted in full conformance with the 26 | provisions of BCP 78 and BCP 79. 27 | 28 | Internet-Drafts are working documents of the Internet Engineering 29 | Task Force (IETF). Note that other groups may also distribute 30 | working documents as Internet-Drafts. The list of current Internet- 31 | Drafts is at https://datatracker.ietf.org/drafts/current/. 32 | 33 | Internet-Drafts are draft documents valid for a maximum of six months 34 | and may be updated, replaced, or obsoleted by other documents at any 35 | time. It is inappropriate to use Internet-Drafts as reference 36 | material or to cite them other than as "work in progress." 37 | 38 | This Internet-Draft will expire on 5 February 2021. 39 | 40 | Copyright Notice 41 | 42 | Copyright (c) 2020 IETF Trust and the persons identified as the 43 | document authors. All rights reserved. 44 | 45 | This document is subject to BCP 78 and the IETF Trust's Legal 46 | Provisions Relating to IETF Documents (https://trustee.ietf.org/ 47 | license-info) in effect on the date of publication of this document. 48 | Please review these documents carefully, as they describe your rights 49 | and restrictions with respect to this document. Code Components 50 | extracted from this document must include Simplified BSD License text 51 | as described in Section 4.e of the Trust Legal Provisions and are 52 | provided without warranty as described in the Simplified BSD License. 53 | 54 | 55 | 56 | Fang, et al. Expires 5 February 2021 [Page 1] 57 | 58 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 59 | 60 | 61 | Table of Contents 62 | 63 | 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 2 64 | 1.1. Specification of Requirements . . . . . . . . . . . . . . 3 65 | 1.2. Motivation . . . . . . . . . . . . . . . . . . . . . . . 3 66 | 2. SR over UDP(SRoU) Packet encapsulation . . . . . . . . . . . 3 67 | 2.1. SR over UDP(SRoU) Header . . . . . . . . . . . . . . . . 4 68 | 3. Packet Processing . . . . . . . . . . . . . . . . . . . . . . 7 69 | 3.1. Type:0x1, IPv4 Locator Mode . . . . . . . . . . . . . . . 8 70 | 3.1.1. General forwarding case . . . . . . . . . . . . . . . 8 71 | 3.1.2. Programable forwarding case . . . . . . . . . . . . . 11 72 | 3.2. Type:0x2, SRv6 format . . . . . . . . . . . . . . . . . . 11 73 | 3.3. Type:0x3, Compressed Segment List . . . . . . . . . . . . 11 74 | 3.3.1. Service Registration & Mapping . . . . . . . . . . . 11 75 | 3.4. Optional TLV . . . . . . . . . . . . . . . . . . . . . . 12 76 | 3.4.1. SR Integrity TLV . . . . . . . . . . . . . . . . . . 12 77 | 3.4.2. Micro-segmentation(uSeg) . . . . . . . . . . . . . . 12 78 | 3.4.3. End.PacketInfo . . . . . . . . . . . . . . . . . . . 12 79 | 4. OAM . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 12 80 | 4.1. Performance Measurement . . . . . . . . . . . . . . . . . 13 81 | 4.1.1. PM_Request Message . . . . . . . . . . . . . . . . . 14 82 | 4.1.2. PM_Response Message . . . . . . . . . . . . . . . . . 14 83 | 4.2. STUN Service . . . . . . . . . . . . . . . . . . . . . . 15 84 | 5. Usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . 16 85 | 5.1. Traffic engineering over Internet . . . . . . . . . . . . 16 86 | 5.2. Multipath forwarding . . . . . . . . . . . . . . . . . . 16 87 | 5.3. Micro Segmentation . . . . . . . . . . . . . . . . . . . 16 88 | 5.4. Container Network . . . . . . . . . . . . . . . . . . . . 16 89 | 5.5. MPLS-SR with SDWAN . . . . . . . . . . . . . . . . . . . 17 90 | 5.6. Cloud Native Network platform . . . . . . . . . . . . . . 18 91 | 6. Security Considerations . . . . . . . . . . . . . . . . . . . 18 92 | 7. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 19 93 | 7.1. SRoU with QUIC . . . . . . . . . . . . . . . . . . . . . 19 94 | Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . . 19 95 | References . . . . . . . . . . . . . . . . . . . . . . . . . . . 19 96 | Normative References . . . . . . . . . . . . . . . . . . . . . 19 97 | Informative References . . . . . . . . . . . . . . . . . . . . 20 98 | Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 20 99 | 100 | 1. Introduction 101 | 102 | Many UDP based transport protocol(eg, IPSec/DTLS/QUIC) could provide 103 | a secure transportation layer to handle overlay traffic. How ever it 104 | does not flexible for source based path enforcement. 105 | 106 | This document defines a new Segment Routing Header in UDP payload to 107 | enable segment routing over UDP(SRoU) for IPSec/DTLS/QUIC or other 108 | UDP based traffic. 109 | 110 | 111 | 112 | Fang, et al. Expires 5 February 2021 [Page 2] 113 | 114 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 115 | 116 | 117 | Segment Routing over UDP(SRoU) interworking with QUIC could provide a 118 | generic programmable and secure transport layer for next generation 119 | applications. 120 | 121 | Discussion of this work is encouraged to happen on GitHub repository 122 | which contains the draft: https://github.com/zartbot/draft-quic-sr 123 | (https://github.com/zartbot/draft-quic-sr) 124 | 125 | 1.1. Specification of Requirements 126 | 127 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 128 | "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and 129 | "OPTIONAL" in this document are to be interpreted as described in BCP 130 | 14 [RFC2119] [RFC8174] when, and only when, they appear in all 131 | capitals, as shown here. 132 | 133 | 1.2. Motivation 134 | 135 | Segment Routing provides source-based path enforcement and 136 | transportation level programmability but lacks of IPv4 support for 137 | transport over internet. 138 | 139 | MPLS-over-UDP[RFC7510] and MPLS Segment Routing over IP[RFC8663] 140 | defined SR-MPLS over IPv4 network, but it lacks of NAT traversal 141 | capabilities. 142 | 143 | Many SDWAN vendors defined their private protocols for routing 144 | control over multiple public cloud and internet, it's hard for 145 | interop with multi-vendors. 146 | 147 | Many applications may require intelligence traffic steering(CDN/LB 148 | case), SRoU with QUIC could be used in these cases. 149 | 150 | 2. SR over UDP(SRoU) Packet encapsulation 151 | 152 | The SRoU defined a generic segment routing enabled transport 153 | layer,the SR Header insert in UDP payload. 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | Fang, et al. Expires 5 February 2021 [Page 3] 169 | 170 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 171 | 172 | 173 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 174 | | IP Header | 175 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 176 | | UDP Header | 177 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 178 | | SRoU Header | 179 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 180 | | | 181 | | Payload | 182 | | | 183 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 184 | 185 | Figure 1: SRoU encapsulation 186 | 187 | 2.1. SR over UDP(SRoU) Header 188 | 189 | SR over UDP must be present at the head of UDP payload. 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | Fang, et al. Expires 5 February 2021 [Page 4] 225 | 226 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 227 | 228 | 229 | 0 1 2 3 230 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 231 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 232 | | Magic Number | SRoU Length | Flow ID Length| Protocol-ID | 233 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 234 | | | 235 | | Flow ID( Variable length) | 236 | | | 237 | | | 238 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 239 | | Source Address | 240 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 241 | | Source Port | 242 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 243 | 244 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 245 | | Segment Type | SR Hdr Len | Last Entry | Segments Left | 246 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 247 | | | 248 | | Segment List[0] (length based on segment type) | 249 | | | 250 | | | 251 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 252 | | | 253 | | | 254 | ... 255 | | | 256 | | | 257 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 258 | | | 259 | | Segment List[0] (length based on segment type) | 260 | | | 261 | | | 262 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 263 | // // 264 | // Optional Type Length Value objects (variable) // 265 | // // 266 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 267 | 268 | Figure 2: SRoU Header 269 | 270 | Magic Number: 1 Byte field For QUIC: could set to ALL ZERO to 271 | diffenciate with original header. For IPSec: could set to 0xFE value 272 | and avoid SPI allocation in this range. *0x00 may conflict with NON- 273 | ESP HEADER *0xFF may conflict with KeepAlive Message 274 | 275 | SRoU Length: 1 Byte, The byte length of a SRoU header. 276 | 277 | 278 | 279 | 280 | Fang, et al. Expires 5 February 2021 [Page 5] 281 | 282 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 283 | 284 | 285 | FlowID Length: 1 Byte, The byte length of FlowID field. 286 | 287 | Protocol-ID: 288 | 289 | +======+======+====================================+ 290 | | Type | Name | Section | 291 | +======+======+====================================+ 292 | | 0x0 | OAM | for Link state probe and other OAM | 293 | +------+------+------------------------------------+ 294 | | 0x1 | IPv4 | Indicate inner payload is IPv4 pkt | 295 | +------+------+------------------------------------+ 296 | | 0x2 | IPv6 | Indicate inner payload is IPv6 pkt | 297 | +------+------+------------------------------------+ 298 | 299 | Table 1: Protocol ID field 300 | 301 | Source Address: Protocol-ID = 1, this field is 4-Bytes IPv4 address 302 | Protocol-ID = 2, this field is 16-Bytes IPv6 address 303 | 304 | Source Port: Source UDP Port Number 305 | 306 | Segment Type: 307 | 308 | +======+=========================+======+=============+ 309 | | Type | Name | Len | Section | 310 | +======+=========================+======+=============+ 311 | | 0x0 | Reserved | | | 312 | +------+-------------------------+------+-------------+ 313 | | 0x1 | IPv4 Address+ Port | 48b | Section 3.1 | 314 | +------+-------------------------+------+-------------+ 315 | | 0x2 | SRv6 | 128b | Section 3.2 | 316 | +------+-------------------------+------+-------------+ 317 | | 0x3 | Compressed Segment List | 128b | Section 3.3 | 318 | +------+-------------------------+------+-------------+ 319 | 320 | Table 2: Segment Types 321 | 322 | SR Hdr Len: SR Header length, include the SR Header flags Segment- 323 | List and Optional TLV. 324 | 325 | Last Entry: contains the index(zero based), in the Segment List, of 326 | the last element of the Segment List. 327 | 328 | Segments Left: 8-bit unsigned integer. Number of route segemnts 329 | remaining, i.e., number of explicitly listed intermediate nodes 330 | still to be visited before reaching the final destination. 331 | 332 | Segment List[0..n]: 128-bit/48-bit/144-bit addresses to represent 333 | 334 | 335 | 336 | Fang, et al. Expires 5 February 2021 [Page 6] 337 | 338 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 339 | 340 | 341 | the SR Policy. Detailed forwarding behavior will be defined in 342 | Section 3 343 | 344 | TLV: Opptional TLV used for future extension.currently only defined 345 | the following TLV. 346 | 347 | +======+======================+==========+===============+ 348 | | Type | Value | Len | Section | 349 | +======+======================+==========+===============+ 350 | | 0x0 | SR Integrity | 32b | Section 3.4.1 | 351 | +------+----------------------+----------+---------------+ 352 | | 0x1 | Micro Segment Policy | variable | Section 3.4.2 | 353 | +------+----------------------+----------+---------------+ 354 | | 0x2 | End.PacketInfo | variable | Section 3.4.3 | 355 | +------+----------------------+----------+---------------+ 356 | 357 | Table 3: Optional TLV 358 | 359 | 3. Packet Processing 360 | 361 | This section describe the packet proccessing procedure. The 362 | following topology will be used in this section. 363 | 364 | H1---R1----------I1------R3----------+---R4---H2 365 | | | 366 | |-----------R2------------------| 367 | | 368 | | 369 | I2 370 | 371 | I1,I2: Interim Node that support SRoU 372 | R1~R4: Traditional Router 373 | H1,H2: Host 374 | 375 | Figure 3: Topology for packet proccesing 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | Fang, et al. Expires 5 February 2021 [Page 7] 393 | 394 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 395 | 396 | 397 | +======+======================+===========+================+ 398 | | Host | Address | SRoU Port | Post NAT | 399 | +======+======================+===========+================+ 400 | | H1 | 192.168.1.2 | 5111 | 10.1.1.1:23456 | 401 | +------+----------------------+-----------+----------------+ 402 | | R1 | 192.168.1.1/10.1.1.1 | | | 403 | +------+----------------------+-----------+----------------+ 404 | | R2 | 10.1.2.2 | | | 405 | +------+----------------------+-----------+----------------+ 406 | | R3 | 10.1.3.3 | | | 407 | +------+----------------------+-----------+----------------+ 408 | | R4 | 10.1.4.4 | | | 409 | +------+----------------------+-----------+----------------+ 410 | | H2 | 10.99.2.2 | 443 | 10.1.4.4:443 | 411 | +------+----------------------+-----------+----------------+ 412 | | I1 | 10.99.1.1 | 8811 | | 413 | +------+----------------------+-----------+----------------+ 414 | | I2 | 192.168.99.2 | 8822 | 10.1.2.2:12345 | 415 | +------+----------------------+-----------+----------------+ 416 | 417 | Table 4: IP address table 418 | 419 | 3.1. Type:0x1, IPv4 Locator Mode 420 | 421 | In this mode, the endpoint could directly insert the interim node 422 | IPv4 addresses and port into the segment-list. 423 | 424 | 3.1.1. General forwarding case 425 | 426 | For example, H1 intend to send packet to H2 via R1->I2--->H2, In this 427 | case SRoU packet will be NATed twice to show the NAT traversal 428 | workflow. I2's public address could use STUN[RFC5389] protocol 429 | detected and sync to all SRoU enabled devices. 430 | 431 | H1 send packet with SRoU Header as below, H1 could use STUN detect 432 | it's source public address, but consider the simplicity, the 1st hop 433 | SRoU forwarder cloud update the source ip/port field in SRoU header. 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | Fang, et al. Expires 5 February 2021 [Page 8] 449 | 450 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 451 | 452 | 453 | IP/UDP Header { 454 | Source IP: 192.168.1.2, 455 | Destination IP: 10.1.2.2(SegmentList[1],I2 Pre-NAT public address), 456 | Source Port: 5111, 457 | Destination Port: 12345(SegmentList[1],I2 Pre-NAT public port), 458 | } 459 | SRoU Header { 460 | Magic Num = 0x0 461 | SRoU Length = 29 462 | FlowID Length = 0x3 463 | Protocol-ID = 0x1(IPv4), 464 | FlowID = 0x123, 465 | Source Address = 192.168.1.2, 466 | Source Port = 5111, 467 | Segment Left = 0x1, 468 | Last Entry = 0x1, 469 | SegmenetList[0] = 10.1.4.4:443(H2), 470 | SegmenetList[1] = 10.1.2.2:12345(I2), 471 | } 472 | 473 | Figure 4: Type:0x1 H1-->I2 Packet Header 474 | 475 | R1 is a NAT Device it will change the Source IP/Port to 476 | 10.1.1.1:23456. But this router may not have ALG function to modify 477 | SRoU Header.Then packet will send to 10.1.2.2:12345. It will be NAT 478 | again to I2. 479 | 480 | After twice NAT, I2 Recieved packet as below: 481 | 482 | IP/UDP Header { 483 | Source IP: 10.1.1.1(H1 post NAT addr), 484 | Destination IP: 192.168.99.2(I2 private addr), 485 | Source Port: 23456(H1 post NAT port), 486 | Destination Port: 8822(I2 private port), 487 | } 488 | SRoU Header { 489 | Magic Num = 0x0 490 | SRoU Length = 29 491 | FlowID Length = 0x3 492 | Protocol-ID = 0x1(IPv4), 493 | FlowID = 0x123, 494 | Source Address = 192.168.1.2, 495 | Source Port = 5111, 496 | Segment Left = 0x1, 497 | Last Entry = 0x1, 498 | SegmenetList[0] = 10.1.4.4:443(H2), 499 | SegmenetList[1] = 10.1.2.2:12345(I2), 500 | } 501 | 502 | 503 | 504 | Fang, et al. Expires 5 February 2021 [Page 9] 505 | 506 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 507 | 508 | 509 | Figure 5: Type:0x1 H1-->I2, I2 Recieved Packet Header 510 | 511 | if the (LastEntry == Segment Left) indicate I2 is the 1st hop SRoU 512 | forwarder, It MUST apply ALG to update the Source Address/Port field 513 | by the IP/UDP header. Then it will execute Segment Left - 1, and 514 | copy SegmentList[0] to DA/Dport. Consider some interim router like 515 | R2 has URPF checking, the SA/Sport will also updated to I2 SRoU 516 | socket address. 517 | 518 | I2-->H2 packet: 519 | 520 | IP/UDP Header { 521 | Source IP: 192.168.00.2(I2 Private), 522 | Destination IP: 10.1.4.4(SegmentList[0]), 523 | Source Port: 8822(I2 Private), 524 | Destination Port: 443(SegmentList[0]), 525 | } 526 | SRoU Header { 527 | Magic Num = 0x0 528 | SRoU Length = 29 529 | FlowID Length = 0x3 530 | Protocol-ID = 0x1(IPv4), 531 | FlowID = 0x123, 532 | Source Address = 10.1.1.1(update by I2 ALG), 533 | Source Port = 23456(update by I2 ALG), 534 | Segment Left = 0x0(SL--), 535 | Last Entry = 0x1, 536 | SegmenetList[0] = 10.1.4.4:443(H2), 537 | SegmenetList[1] = 10.1.2.2:12345(I2), 538 | } 539 | 540 | Figure 6: Type:0x1 I2-->H2 Packet Header 541 | 542 | H2 will recieve the packet, and if the segment left == 0, it MUST 543 | copy the Source Address and Port into IP/UDP Header and strip out the 544 | SRoU Header and send to udp socket. It may cache the reversed 545 | segmentlist for symmetric routing. 546 | 547 | H2 send to UDP socket 548 | 549 | IP/UDP Header { 550 | Source IP: 10.1.1.1(Copied from SRoU Src field), 551 | Destination IP: 10.99.2.2(Static NAT by R4), 552 | Source Port: 23456(Copied from SRoU Src field), 553 | Destination Port: 443(SegmentList[0]), 554 | } 555 | UDP Payload { 556 | } 557 | 558 | 559 | 560 | Fang, et al. Expires 5 February 2021 [Page 10] 561 | 562 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 563 | 564 | 565 | Figure 7: Type:0x1 H2 Send to UDP socket 566 | 567 | 3.1.2. Programable forwarding case 568 | 569 | In type:0x1 mode, each segment has 48bit(32bit IPv4 address with 570 | 16bit port). It could be defined for programable forwarding with 571 | special IPv4 prefix( ie. 255.0.0.0/8): 572 | 573 | Prefix:<32bit functions and args> 574 | 575 | For exmaple we could defined EVPN/VPNv4/VPNv6 function in SRoU header 576 | by SID: 577 | 578 | 255:<16bit End.X function>:<24bit VPN ID> 579 | 580 | 3.2. Type:0x2, SRv6 format 581 | 582 | IPv6 does not need to consider the NAT traversal case, In this mode 583 | almost forwarding action is same as SRv6. This is only used for 584 | application driven traffic steering(like CDN/LB usecase.). It has 585 | some benefit interworking with QUIC, the pure userspace 586 | implementation could provide additional flexibility. 587 | 588 | For example some IOT sensor with legacy kernel stack does not support 589 | SRv6 could use SRoU insert SRH in UDP payload, the 1st hop SRoU 590 | forwarder could convert it to standard SRv6 packet. 591 | 592 | 3.3. Type:0x3, Compressed Segment List 593 | 594 | 3.3.1. Service Registration & Mapping 595 | 596 | I1,I2 use SRoU port as source port to inital STUN[RFC5389] session to 597 | SR mapping server, the mapping server could detect the Post NAT 598 | address and assign SID for each host, and distribute IP/port-SID 599 | mapping database to all the SRoU enabled host. 600 | 601 | +======+================+======+ 602 | | Host | Socket | SID | 603 | +======+================+======+ 604 | | I1 | 10.99.1.1:8811 | 1111 | 605 | +------+----------------+------+ 606 | | I2 | 10.1.2.2:12345 | 2222 | 607 | +------+----------------+------+ 608 | 609 | Table 5: sid mapping 610 | 611 | In this mode the socket information could combined with IPv4 and 612 | IPv6. 613 | 614 | 615 | 616 | Fang, et al. Expires 5 February 2021 [Page 11] 617 | 618 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 619 | 620 | 621 | 3.4. Optional TLV 622 | 623 | 3.4.1. SR Integrity TLV 624 | 625 | SR Integrity Tag to validate the SRH. All fields in the SRH except 626 | Segments Left fields need to be checked. 627 | 628 | 3.4.2. Micro-segmentation(uSeg) 629 | 630 | Option-TLV could defined Sub-TLV to support Micro-segmentation 631 | Security policy 632 | 633 | OptionTLV { 634 | 0x1, uSeg{ 635 | 0x0, SRC_GROUP_ID, 636 | 0x1, DST_GROUP_ID, 637 | 0x2, APP_GROUP_ID, 638 | 0x3, SRC_DEVICE_ID, 639 | 0x4, DST_DEVICE_ID, 640 | 0x5, APP_ID, 641 | } 642 | } 643 | 644 | Customer also could encode this microsegment policy header in flowID 645 | field. 646 | 647 | 3.4.3. End.PacketInfo 648 | 649 | This optional TLV defines extened packet info and Segment-end packet 650 | edit function. Sub-TLV defines as below: 651 | 652 | 3.4.3.1. Type:0x0, VPN-ID 653 | 654 | The SDWAN Router could use [I-D.ietf-quic-datagram] as VPN tunnel, 655 | This Sub-TLV defined the VPN-ID inside the tunnel. 656 | 657 | If SRoU header has this sub-TLV, the device MUST decrypt inner 658 | payload and use the VPN-ID for inner packet destination lookup. 659 | 660 | 3.4.3.2. Type:0x1, Orginal Destination Address/Port 661 | 662 | In SR Type 0x3, The original destination address/port cloud not 663 | encode in 128bit field, it could be store in option TLV. 664 | 665 | 4. OAM 666 | 667 | SRoU OAM Packet format is defined as below: 668 | 669 | 670 | 671 | 672 | Fang, et al. Expires 5 February 2021 [Page 12] 673 | 674 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 675 | 676 | 677 | 0 1 2 3 678 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 679 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 680 | | Magic Number | SRoU Length | Flow ID Length| P-ID =0x0 | 681 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 682 | | | 683 | | Flow ID( Variable length) | 684 | | | 685 | | | 686 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 687 | | OAM-Type | OAM Payload(Variable Length based on Type) | 688 | +-+-+-+-+-+-+-+-+ + 689 | | | 690 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 691 | 692 | Figure 8: SRoU OAM Header 693 | 694 | OAM-Type: 695 | 696 | +====+====================+=====================================+ 697 | | ID | Type | Usage | 698 | +====+====================+=====================================+ 699 | | 0 | PM | Latency & Loss & Jitter Measurement | 700 | +----+--------------------+-------------------------------------+ 701 | | 1 | IPv4 STUN Request | | 702 | +----+--------------------+-------------------------------------+ 703 | | 2 | IPv4 STUN Response | | 704 | +----+--------------------+-------------------------------------+ 705 | | 3 | IPv6 STUN Request | *Reserved for NAT66 Case(Not | 706 | | | | implement yet) | 707 | +----+--------------------+-------------------------------------+ 708 | | 4 | IPv6 STUN Response | *Reserved for NAT66 Case(Not | 709 | | | | implement yet) | 710 | +----+--------------------+-------------------------------------+ 711 | 712 | Table 6: oam message type 713 | 714 | 4.1. Performance Measurement 715 | 716 | The performance measurement message is follow TWAMP algorithm.This 717 | OAM message could be used for Authentication mode( with HMAC-SHA256) 718 | or Crypt mode(AES-GCM-256). Crypto key could be synchonized via out- 719 | of-band channel. 720 | 721 | PMType: 722 | 723 | 724 | 725 | 726 | 727 | 728 | Fang, et al. Expires 5 February 2021 [Page 13] 729 | 730 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 731 | 732 | 733 | +====+=============+=========================+ 734 | | ID | Type | Usage | 735 | +====+=============+=========================+ 736 | | 0 | PM_Request | Performance Measurement | 737 | +----+-------------+-------------------------+ 738 | | 1 | PM_Response | Performance Measurement | 739 | +----+-------------+-------------------------+ 740 | 741 | Table 7: oam perf measurement message type 742 | 743 | 4.1.1. PM_Request Message 744 | 745 | The initiator send packet with PMType = 0, and it contains sending 746 | timestamp. 747 | 748 | 0 1 2 3 749 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 750 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 751 | | OAM Type=0 |PMType=0 | MBZ | 752 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 753 | | Sequence Number | 754 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 755 | | TimeStamp | 756 | | | 757 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 758 | | HMAC(Optional) | 759 | | | 760 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 761 | 762 | Figure 9: SRoU OAM Perf Measurement Request Message 763 | 764 | Sequence Number: Start from zero, add one after send request packet. 765 | 766 | TimeStamp: Packet sending timestamp. 767 | 768 | HMAC(Optional): HMAC-SHA256 used for message authentication. 769 | 770 | 4.1.2. PM_Response Message 771 | 772 | The responder message is with PMType = 1, and it contains: 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | Fang, et al. Expires 5 February 2021 [Page 14] 785 | 786 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 787 | 788 | 789 | 0 1 2 3 790 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 791 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 792 | | OAM Type=0 |PMType=1 | MBZ | 793 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 794 | | Sequence Number | 795 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 796 | | TimeStamp | 797 | | | 798 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 799 | | Recieved TimeStamp | 800 | | | 801 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 802 | | Sender Sequence Number | 803 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 804 | | Sender TimeStamp | 805 | | | 806 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 807 | | HMAC(Optional) | 808 | | | 809 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 810 | 811 | Figure 10: SRoU OAM Perf Measurement Response Message 812 | 813 | Sequence Number: Start from zero, add one after send response packet. 814 | 815 | TimeStamp: sending timestamp. 816 | 817 | Recieved Timestamp: The correspond request message recieved 818 | timestamp. 819 | 820 | Sender Sequence Number: Exactly copied from the correspond request 821 | message 822 | 823 | Sender Timestamp: Exactly copied from the correspond request message 824 | 825 | HMAC(Optional): HMAC-SHA256 used for message authentication. 826 | 827 | 4.2. STUN Service 828 | 829 | SRoU forwarding endpoint may stay behind NAT, it request STUN service 830 | to discover the public network address. 831 | 832 | Initiator send address and port with ALL-ZERO to STUN Server, STUN 833 | server copy the recieve source address and port in this payload, and 834 | generate HMAC. The STUN Server's key could be propogate to initiator 835 | by a out-of-band channel. 836 | 837 | 838 | 839 | 840 | Fang, et al. Expires 5 February 2021 [Page 15] 841 | 842 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 843 | 844 | 845 | 0 1 2 3 846 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 847 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 848 | | OAM Type=0 | MBZ | 849 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 850 | | IP Address | 851 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 852 | | Port | 853 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 854 | | HMAC | 855 | | | 856 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 857 | 858 | Figure 11: SRoU OAM STUN Header 859 | 860 | 5. Usage 861 | 862 | 5.1. Traffic engineering over Internet 863 | 864 | Client-------R1------------Internet--------------R2-----------Server 865 | | | 866 | | | 867 | R3----V1----PubliCloud--------V2-----| 868 | 869 | Figure 12: Traffic Engineering over internet 870 | 871 | Many video/conferencing application requires traffic engineering over 872 | IPv4 Internet, Webex/Zoom/Teams may setup V1,V2 in public cloud, The 873 | client and server could encode the V1/V2 information in SRoU header 874 | for traffic engineering 875 | 876 | 5.2. Multipath forwarding 877 | 878 | Same as previously topoloy Figure 12, customer cloud ask server 879 | transmit packet over different path, two path have same Flow-ID, QUIC 880 | could be used in this case to provide multistream/multihoming 881 | support. 882 | 883 | 5.3. Micro Segmentation 884 | 885 | Same as previously topoloy Figure 12, the interim Router: R1/R2/R3, 886 | V1,V2 could insert uSeg Sub-TLV based on client and server uSeg 887 | identity, and other interim network equipment could based on this 888 | sub-TLV implement security policy or QoS policy. 889 | 890 | 5.4. Container Network 891 | 892 | 893 | 894 | 895 | 896 | Fang, et al. Expires 5 February 2021 [Page 16] 897 | 898 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 899 | 900 | 901 | C1----SideCar1-----L1-----S1------L2----SideCar2-------C2 902 | | | 903 | |------S2-------| 904 | C1,C2: Container 905 | L1,L2: Leaf switch 906 | S1,S2: Spine switch 907 | 908 | Figure 13: Service-Mesh & Container Network 909 | 910 | SRoU with QUIC also could be used for container network interface, 911 | especially for service-mesh sidecar. The sidecar could aware the 912 | Datacenter underlay topology by BGP-LinkState, and use SRH select 913 | best path to avoid congestion. At the same time, all traffic are 914 | encrypted by [I-D.ietf-quic-tls]. 915 | 916 | 5.5. MPLS-SR with SDWAN 917 | 918 | S1---INET(ipv4)----PE1------MPLS------PE2----S2 919 | 920 | S1,S2: SDWAN Router 921 | PE1,PE2: SR enabled MPLS PE 922 | 923 | Figure 14: MPLS-SR with SDWAN 924 | 925 | S1 will setup IPSec SA with S2 for end-to-end encryption, And it will 926 | use BSID between PE1-PE2 for traffic engineering. 927 | 928 | MPLS based BSID and IPv4 based locator could be encoded in uSID.A 929 | distributed mapping table could be used to translate uSID to packet 930 | action. 931 | 932 | IP/UDP Header { 933 | Source IP: H1, 934 | Destination IP: PE1, 935 | Source Port: srcport, 936 | Destination Port: IPSec, 937 | } 938 | SRoU Header { 939 | SegmentType = 0x1, 940 | SR_HDR_Len = 2, 941 | Last Entry = 0x0, 942 | Segment Left = 0, 943 | SegmenetList[0] = uSID: FC0:2222:3333:4444:: 944 | } 945 | 946 | Figure 15: Type:0x1 S1-->PE1 Packet Header 947 | 948 | 949 | 950 | 951 | 952 | Fang, et al. Expires 5 February 2021 [Page 17] 953 | 954 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 955 | 956 | 957 | 5.6. Cloud Native Network platform 958 | 959 | Each of the SRoU forwarder only rely on a UDP socket, it could be 960 | implement by a container. Customer could deploy such SRoU enable 961 | container in multiple cloud to provide a cloud-angonostic solution. 962 | All containers could be managed by K8S. 963 | 964 | A distributed K-V store could be used for SRoU forwarder service 965 | registration, routing(announce prefix), all the SRoU forwarder could 966 | measue peer's reachability/jitter/loss and update link-state to the 967 | K-V store. forwarding policy also could be sync by the K-V store. 968 | Detailed information will be provided in another I.D(ETCD based 969 | disaggregated SDN control plane). 970 | 971 | SRoU forwarder also could be implement by BPF for container 972 | communication. It will provide host level traffic engineering for 973 | massive scale datacenter to reduce the West-East traffic congestion. 974 | 975 | The best practice for SRoU is working with QUIC. SRoU with QUIC 976 | transport protocol provides the following benefit for SDWAN : 977 | 978 | * Stream multiplexing 979 | 980 | * Stream and connection-level flow control 981 | 982 | * Low-latency connection establishment 983 | 984 | * Connection migration and resilience to NAT rebinding 985 | 986 | * Authenticated and encrypted header and payload 987 | 988 | SRoU add traffic-engineering and VPN capabilites for SDWAN. Many 989 | existing SDWAN features could gain the benefits like: 990 | 991 | * TCP optimization 992 | 993 | * Packet duplication 994 | 995 | 6. Security Considerations 996 | 997 | The SRoU forwarder must validate the packet, FlowID could be used for 998 | source validation. It could be a token based solution, this token 999 | could be assigned by controller with a dedicated expire time. 1000 | Source/Dest device ID and group cloud encode in flowid and signed by 1001 | controller, just like JWT. 1002 | 1003 | A blacklist on controller k-v store could be implemented to block 1004 | device when the token does not expire. 1005 | 1006 | 1007 | 1008 | Fang, et al. Expires 5 February 2021 [Page 18] 1009 | 1010 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 1011 | 1012 | 1013 | 7. IANA Considerations 1014 | 1015 | 7.1. SRoU with QUIC 1016 | 1017 | The magic number in SRoU must be ZERO to distiguish with QUIC Long/ 1018 | Short packet format. 1019 | 1020 | Acknowledgements 1021 | 1022 | The following people provided substantial contributions to this 1023 | document: 1024 | 1025 | * Bin Shi, Cisco Systems, Inc. 1026 | 1027 | * Yijen Wang, Cisco Systems, Inc. 1028 | 1029 | * Pix Xu, Cisco Systems, Inc. 1030 | 1031 | References 1032 | 1033 | Normative References 1034 | 1035 | [I-D.ietf-quic-datagram] 1036 | Pauly, T., Kinnear, E., and D. Schinazi, "An Unreliable 1037 | Datagram Extension to QUIC", Work in Progress, Internet- 1038 | Draft, draft-ietf-quic-datagram-00, 26 February 2020, 1039 | . 1041 | 1042 | [I-D.ietf-quic-tls] 1043 | Thomson, M. and S. Turner, "Using TLS to Secure QUIC", 1044 | Work in Progress, Internet-Draft, draft-ietf-quic-tls-29, 1045 | 9 June 2020, . 1047 | 1048 | [RFC5389] Rosenberg, J., Mahy, R., Matthews, P., and D. Wing, 1049 | "Session Traversal Utilities for NAT (STUN)", RFC 5389, 1050 | DOI 10.17487/RFC5389, October 2008, 1051 | . 1052 | 1053 | [RFC7510] Xu, X., Sheth, N., Yong, L., Callon, R., and D. Black, 1054 | "Encapsulating MPLS in UDP", RFC 7510, 1055 | DOI 10.17487/RFC7510, April 2015, 1056 | . 1057 | 1058 | 1059 | 1060 | 1061 | 1062 | 1063 | 1064 | Fang, et al. Expires 5 February 2021 [Page 19] 1065 | 1066 | Internet-Draft Segment Routing over UDP(SRoU) August 2020 1067 | 1068 | 1069 | [RFC8663] Xu, X., Bryant, S., Farrel, A., Hassan, S., Henderickx, 1070 | W., and Z. Li, "MPLS Segment Routing over IP", RFC 8663, 1071 | DOI 10.17487/RFC8663, December 2019, 1072 | . 1073 | 1074 | [RFC8754] Filsfils, C., Ed., Dukes, D., Ed., Previdi, S., Leddy, J., 1075 | Matsushima, S., and D. Voyer, "IPv6 Segment Routing Header 1076 | (SRH)", RFC 8754, DOI 10.17487/RFC8754, March 2020, 1077 | . 1078 | 1079 | Informative References 1080 | 1081 | [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate 1082 | Requirement Levels", BCP 14, RFC 2119, 1083 | DOI 10.17487/RFC2119, March 1997, 1084 | . 1085 | 1086 | [RFC8174] Leiba, B., "Ambiguity of Uppercase vs Lowercase in RFC 1087 | 2119 Key Words", BCP 14, RFC 8174, DOI 10.17487/RFC8174, 1088 | May 2017, . 1089 | 1090 | Authors' Addresses 1091 | 1092 | Kevin Fang 1093 | Cisco Systems, Inc. 1094 | 1095 | Email: zartbot.ietf@gmail.com 1096 | 1097 | 1098 | Yinghao Li 1099 | Google, Inc. 1100 | 1101 | Email: liyinghao@gmail.com 1102 | 1103 | 1104 | Feng Cai 1105 | Cisco Systems, Inc. 1106 | 1107 | Email: fecai@cisco.com 1108 | 1109 | 1110 | Xing Jiang 1111 | Cisco Systems, Inc. 1112 | 1113 | Email: jamjiang@cisco.com 1114 | 1115 | 1116 | 1117 | 1118 | 1119 | 1120 | Fang, et al. Expires 5 February 2021 [Page 20] 1121 | -------------------------------------------------------------------------------- /draft-zartbot-srou-signalling.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Distributed KV Store based Routing protocol for SR over UDP(SRoU) 8 | 9 | 10 | 11 | 12 | 16 | 17 | 18 | 19 | 20 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 |
Internet-DraftDistributed KV Store based Routing protocol for SR over UDP(SRoU)August 2020
Fang, et al.Expires 20 February 2021[Page]
814 |
815 |
816 |
817 |
Workgroup:
818 |
SPRING
819 |
Internet-Draft:
820 |
draft-zartbot-srou-control-00
821 |
Published:
822 |
823 | 824 |
825 |
Intended Status:
826 |
Experimental
827 |
Expires:
828 |
829 |
Authors:
830 |
831 |
832 |
K. Fang
833 |
Cisco Systems, Inc.
834 |
835 |
836 |
Y. Li
837 |
Google, Inc.
838 |
839 |
840 |
F. Cai
841 |
Cisco Systems, Inc.
842 |
843 |
844 |
X. Jiang
845 |
Cisco Systems, Inc.
846 |
847 |
848 |
849 |
850 |

Distributed KV Store based Routing protocol for SR over UDP(SRoU)

851 |
852 |

Abstract

853 |

This document defines the Distributed KV store based routing protocol for 854 | Segment Routing over UDP.

855 |
856 |
857 |
858 |

859 | Status of This Memo 860 |

861 |

862 | This Internet-Draft is submitted in full conformance with the 863 | provisions of BCP 78 and BCP 79.

864 |

865 | Internet-Drafts are working documents of the Internet Engineering Task 866 | Force (IETF). Note that other groups may also distribute working 867 | documents as Internet-Drafts. The list of current Internet-Drafts is 868 | at https://datatracker.ietf.org/drafts/current/.

869 |

870 | Internet-Drafts are draft documents valid for a maximum of six months 871 | and may be updated, replaced, or obsoleted by other documents at any 872 | time. It is inappropriate to use Internet-Drafts as reference 873 | material or to cite them other than as "work in progress."

874 |

875 | This Internet-Draft will expire on 20 February 2021.

876 |
877 |
878 | 898 |
899 |
900 |

901 | Table of Contents 902 |

903 | 978 |
979 |
980 |
981 |
982 |

983 | 1. Introduction 984 |

985 |

This draft provides a contol plane support for SRoU(Segment Routing over UDP).

986 |

Discussion of this work is encouraged to happen on GitHub repository which 987 | contains the draft: https://github.com/zartbot/draft-quic-sr

988 |
989 |
990 |

991 | 1.1. Specification of Requirements 992 |

993 |

The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 994 | "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and 995 | "OPTIONAL" in this document are to be interpreted as described in BCP 14 996 | [RFC2119] [RFC8174] when, and only when, 997 | they appear in all capitals, as shown here.

998 |
999 |
1000 |
1001 |
1002 |

1003 | 1.2. Motivation 1004 |

1005 |

SRoU support udp transport session over internet, but it lack of reachability 1006 | detection and routing control, existing routing protocol like BGP-EVPN did not 1007 | provide Dynamic NAT traversal capability.

1008 |

This document provide a distributed KV store based routing protocol for SRoU.

1009 |
1010 |
1011 |
1012 |
1013 |

1014 | 1.3. Overview 1015 |

1016 |

The routing protocol is based on source routing, each of the ingress node 1017 | cloud get the overlay prefix and dest location mapping from distributed KV 1018 | store, then the ingress node could fetch linkstate database from this KV store 1019 | and execute A* algorithm to search the candidate path which meet the SLA 1020 | requirement.

1021 |
1022 |
1023 |
1024 |
1025 |
1026 |
1027 |

1028 | 2. Node abstraction and registration 1029 |

1030 |

Each Node has the following attribute

1031 |
    1032 |
  1. Role: the system contains different node type, role attribute is a uint16 1033 | value which contains: 1034 |
  2. 1035 |
1036 |
1037 | 1038 | 1042 | 1043 | 1044 | 1045 | 1046 | 1047 | 1048 | 1049 | 1050 | 1051 | 1052 | 1053 | 1054 | 1055 | 1056 | 1057 | 1058 | 1059 | 1060 | 1061 | 1062 | 1063 | 1064 | 1065 | 1066 |
1039 | Table 1: 1040 | Node Role 1041 |
TypeNameSection
0x0STUNThis node is used as a STUN server to help other nodes discovery their public address.This node must deploy with a public internet address or behind static 1:1 NAT
0x1FabricThis node type is used as a interim node to relay the SRoU traffic, this node MUST initial TWAMP link probe to other Fabric node and report linkstate to KV Store.
0x2LinecardThis node type is used to connect existing network, it could use TWAMP probe other Fabric Node or Linecard node
1067 |
1068 |
    1069 |
  1. SiteID: uint32 number, defined the node which belongs to same site or 1070 | Automomous System. 1071 |
  2. 1072 |
  3. SystemName: unique string type to indicate a node. 1073 |
  4. 1074 |
  5. Label: unique 24bit value, allocation algorithm is described in the following 1075 | section. 1076 |
  6. 1077 |
  7. Location: Optional filed. It contains two float32 value(latitude and 1078 | longitude) to indicate the Geo location. 1079 |
  8. 1080 |
1081 |
1082 |
1083 |

1084 | 2.1. Node Label allocation 1085 |

1086 |

Each node initial TLS session to Distributed KV Store, and fetch a distributed 1087 | lock with key "/lock/systemlabel". The node will fetch prefix "/systemlabel" to 1088 | get all label mapping once it get the lock. Then it will assign the smallest 1089 | unpresent int "X" in the list as it's system label, and register it to KV store 1090 | by key="/systemlabel/X", then it could release the distributed lock. All of the 1091 | fabric node MUST listen the "/systemlabel" to update it's local node mapping 1092 | table, Linecard node may fetch the "/systemlabel" key when it need to optimize 1093 | the local route.

1094 |

This System Label could be used for cSID encoding or VPN based client linecard 1095 | node convert to it's tunnel address.

1096 |
1097 |
1098 |
1099 |
1100 |

1101 | 2.2. Node registration 1102 |

1103 |

Each node will send Key="/node/role/systemName" and Value=" SiteID,SystemLabel, 1104 | Lat,Long" to the distributed KV store.

1105 |
1106 |
1107 |
1108 |
1109 |
1110 |
1111 |

1112 | 3. SRoU Locator and Route 1113 |

1114 |

Each node may have multiple underlay socket which may behind the dynamic NAT, 1115 | it MUST fetch the STUN list from "/node/stun" and "/service/stun" to get 1116 | the STUN server address list, then send the SRoU OAM-STUN packet to the random 1117 | selected stun server to get the public address.

1118 |

Once the socket get the public address, it will encode the udp socket info as a 1119 | SRoU Locator:

1120 |

"SystemName/Color/LocalIP:Port/PublicIP:Port/LocalInterface/TXBW/RXBW"

1121 |

If the local socket has public address and port information, it could be added 1122 | in the service list.

1123 |

The node MUST update it local servicelist to distributed KV store by: 1124 | Key= "/service/role/systemName" 1125 | Value= "SRoULocator1,SRoULocator2"

1126 |
1127 |
1128 |
1129 |
1130 |

1131 | 4. Node Keepalive 1132 |

1133 |

Each KV pair registration MUST have a leasetime and keepalive timer, Once the 1134 | Node out of service and disconnected, the KV store MUST withdraw the KV pair 1135 | after lease timeout.

1136 |
1137 |
1138 | 1157 |
1158 |
1159 |

1160 | 6. Sercurity Key 1161 |

1162 |

Each node may update it node key or per socket key , or per session pair key to 1163 | the KV Store:

1164 |

Key="/key/SystemName" 1165 | Value="Key1,Key2"

1166 |

Key="/key/socket/SRoU_Locator" 1167 | Value="Key1,Key2"

1168 |

Key="/key/session/SRC_SRoU_Locator->DST_SRoU_Locator" 1169 | Value="Key1,Key2"

1170 |

During Rekey, the node must update both OldKey and newKey to the KV Store and 1171 | accept both Key in a while to wait the entire system sync to the new key.

1172 |
1173 |
1174 |
1175 |
1176 |

1177 | 7. Overlay Routing 1178 |

1179 |

RouteDistinguish could encode by SystemName + local VNID 1180 | The overlay routing prefix is encoded as below:

1181 |

Type-2 EVPN Route 1182 | Key="/route/2/exportRT/RD/MAC/IP" 1183 | Value="VNID/SystemName/PolicyTag"

1184 |

Type-5 EVPN Route

1185 |

Key="/route/5/exportRT/RD/IPPrefix/IPMask" 1186 | Value="VNID/SystemName/PolicyTag"

1187 |

Each of the linecard node could based on import RT list to watch key 1188 | prefix ="/route/2/importRT" and "/route/5/importRT" to sync the routing table.

1189 |

Each linecard node could selective fetch the "/stats/linkstate" to get the 1190 | toplogy information and execute flexibile algorithm(SPF,A* search) to calculate 1191 | the candidate path, then enforce it to its forwarding table.

1192 |
1193 |
1194 |
1195 |
1196 |

1197 | 8. Control Policy 1198 |

1199 |
1200 |
1201 |

1202 | 8.1. Route control 1203 |

1204 |

Inspired by BGP FlowSpec,Network operator could update the control policy to 1205 | the entire system by using:

1206 |

Key="/control/RT/2/SRC_MAC/SRC_IP/DST_MAC/DST_IP" 1207 | Key="/control/RT/5/SRC_Prefix/SRC_Mask/DST_Prefix/DST_Mask" 1208 | Value="Action" /"SR Locator list"

1209 |
1210 |
1211 |
1212 |
1213 |

1214 | 8.2. Access Control 1215 |

1216 |

Each node may use the SRoU flowID field as a token based access control. 1217 | This token could grant or revoke by a policy engine.

1218 |

Key="/token/permit/flowid" 1219 | Key="/token/block/flowid"

1220 |

Each node could sync this table to execute the access control policy.

1221 |
1222 |
1223 |
1224 |
1225 |

1226 | 8.3. User identity 1227 |

1228 |

Each of the endpoint may have it's identity or group policy tags, it could be 1229 | updated by

1230 |

key="/identity/userid/user_device_id" 1231 | value="group policy tags"

1232 |

Group policy could be updated and store in ETCD by

1233 |

key="/policy/src_grp/dst_grp" 1234 | value="actions"

1235 |
1236 |
1237 |
1238 |
1239 |
1240 |
1241 |

1242 | 9. Distributed KV Store 1243 |

1244 |

ETCD is used in our prototype, we deploy an etcd cluster in main datacenter 1245 | and place many of the proxy node on public cloud to make sure the node could 1246 | be available connect to the entire system. In some on-prem deployment, each of 1247 | the nodes could act as a ETCD proxy to help other node register to KV store.

1248 |
1249 |
1250 |
1251 |
1252 |

1253 | 10. Security Considerations 1254 |

1255 |

All of the control connection is TLS based and MUST validate the server and 1256 | client certification.

1257 |
1258 |
1259 |
1260 |
1261 |

1262 | 11. IANA Considerations 1263 |

1264 |
1265 |
1266 |
1267 |
1268 |

1269 | Acknowledgements 1270 |

1271 |

The following people provided substantial contributions to this document:

1272 |
    1273 |
  • Yijen Wang, Cisco Systems, Inc. 1274 |
  • 1275 |
1276 |
1277 |
1278 |
1279 |

1280 | Informative References 1281 |

1282 |
1283 |
[RFC2119]
1284 |
1285 | Bradner, S., "Key words for use in RFCs to Indicate Requirement Levels", BCP 14, RFC 2119, DOI 10.17487/RFC2119, , <https://www.rfc-editor.org/info/rfc2119>.
1286 |
1287 |
[RFC8174]
1288 |
1289 | Leiba, B., "Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words", BCP 14, RFC 8174, DOI 10.17487/RFC8174, , <https://www.rfc-editor.org/info/rfc8174>.
1290 |
1291 |
1292 |
1293 |
1294 |
1295 |

1296 | Authors' Addresses 1297 |

1298 |
1299 |
Kevin Fang
1300 |
Cisco Systems, Inc.
1301 | 1305 |
1306 |
1307 |
Yinghao Li
1308 |
Google, Inc.
1309 | 1313 |
1314 |
1315 |
Feng Cai
1316 |
Cisco Systems, Inc.
1317 | 1321 |
1322 |
1323 |
Xing Jiang
1324 |
Cisco Systems, Inc.
1325 | 1329 |
1330 |
1331 |
1332 | 1340 | 1341 | 1342 | -------------------------------------------------------------------------------- /draft-zartbot-srou-signalling.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Distributed KV Store based Routing protocol for SR over UDP(SRoU)" 3 | abbrev: Distributed KV Store based Routing protocol for SR over UDP(SRoU) 4 | docname: draft-zartbot-srou-control-00 5 | date: {DATE} 6 | category: exp 7 | ipr: trust200902 8 | area: Transport 9 | workgroup: SPRING 10 | 11 | stand_alone: yes 12 | pi: [toc, sortrefs, symrefs, docmapping] 13 | 14 | author: 15 | - 16 | ins: K. Fang 17 | name: Kevin Fang 18 | org: Cisco Systems, Inc. 19 | email: zartbot.ietf@gmail.com 20 | - 21 | ins: Y. Li 22 | name: Yinghao Li 23 | org: Google, Inc. 24 | email: liyinghao@gmail.com 25 | - 26 | ins: F. Cai 27 | name: Feng Cai 28 | org: Cisco Systems, Inc. 29 | email: fecai@cisco.com 30 | - 31 | ins: X. Jiang 32 | name: Xing Jiang 33 | org: Cisco Systems, Inc. 34 | email: jamjiang@cisco.com 35 | 36 | 37 | --- abstract 38 | 39 | This document defines the Distributed KV store based routing protocol for 40 | Segment Routing over UDP. 41 | 42 | --- middle 43 | 44 | # Introduction 45 | 46 | This draft provides a contol plane support for SRoU(Segment Routing over UDP). 47 | 48 | Discussion of this work is encouraged to happen on GitHub repository which 49 | contains the draft: 50 | 51 | ## Specification of Requirements 52 | 53 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 54 | "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and 55 | "OPTIONAL" in this document are to be interpreted as described in BCP 14 56 | {{?RFC2119}} {{?RFC8174}} when, and only when, 57 | they appear in all capitals, as shown here. 58 | 59 | ## Motivation 60 | 61 | SRoU support udp transport session over internet, but it lack of reachability 62 | detection and routing control, existing routing protocol like BGP-EVPN did not 63 | provide Dynamic NAT traversal capability. 64 | 65 | This document provide a distributed KV store based routing protocol for SRoU. 66 | 67 | ## Overview 68 | 69 | The routing protocol is based on source routing, each of the ingress node 70 | cloud get the overlay prefix and dest location mapping from distributed KV 71 | store, then the ingress node could fetch linkstate database from this KV store 72 | and execute A* algorithm to search the candidate path which meet the SLA 73 | requirement. 74 | 75 | 76 | # Node abstraction and registration 77 | 78 | Each Node has the following attribute 79 | 80 | 1. Role: the system contains different node type, role attribute is a uint16 81 | value which contains: 82 | 83 | | Type | Name |Section | 84 | |-----:|:------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| 85 | | 0x0 | STUN | This node is used as a STUN server to help other nodes discovery their public address.This node must deploy with a public internet address or behind static 1:1 NAT | 86 | | 0x1 | Fabric | This node type is used as a interim node to relay the SRoU traffic, this node MUST initial TWAMP link probe to other Fabric node and report linkstate to KV Store. | 87 | | 0x2 | Linecard | This node type is used to connect existing network, it could use TWAMP probe other Fabric Node or Linecard node | 88 | |-----:|:------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| 89 | {: #node role title="Node Role"} 90 | 91 | 2. SiteID: uint32 number, defined the node which belongs to same site or 92 | Automomous System. 93 | 3. SystemName: unique string type to indicate a node. 94 | 4. Label: unique 24bit value, allocation algorithm is described in the following 95 | section. 96 | 5. Location: Optional filed. It contains two float32 value(latitude and 97 | longitude) to indicate the Geo location. 98 | 99 | ## Node Label allocation 100 | 101 | Each node initial TLS session to Distributed KV Store, and fetch a distributed 102 | lock with key "/lock/systemlabel". The node will fetch prefix "/systemlabel" to 103 | get all label mapping once it get the lock. Then it will assign the smallest 104 | unpresent int "X" in the list as it's system label, and register it to KV store 105 | by key="/systemlabel/X", then it could release the distributed lock. All of the 106 | fabric node MUST listen the "/systemlabel" to update it's local node mapping 107 | table, Linecard node may fetch the "/systemlabel" key when it need to optimize 108 | the local route. 109 | 110 | This System Label could be used for cSID encoding or VPN based client linecard 111 | node convert to it's tunnel address. 112 | 113 | ## Node registration 114 | 115 | Each node will send Key="/node/role/systemName" and Value=" SiteID,SystemLabel, 116 | Lat,Long" to the distributed KV store. 117 | 118 | 119 | # SRoU Locator and Route 120 | Each node may have multiple underlay socket which may behind the dynamic NAT, 121 | it MUST fetch the STUN list from "/node/stun" and "/service/stun" to get 122 | the STUN server address list, then send the SRoU OAM-STUN packet to the random 123 | selected stun server to get the public address. 124 | 125 | Once the socket get the public address, it will encode the udp socket info as a 126 | SRoU Locator: 127 | 128 | "SystemName/Color/LocalIP:Port/PublicIP:Port/LocalInterface/TXBW/RXBW" 129 | 130 | If the local socket has public address and port information, it could be added 131 | in the service list. 132 | 133 | The node MUST update it local servicelist to distributed KV store by: 134 | Key= "/service/role/systemName" 135 | Value= "SRoULocator1,SRoULocator2" 136 | 137 | # Node Keepalive 138 | 139 | Each KV pair registration MUST have a leasetime and keepalive timer, Once the 140 | Node out of service and disconnected, the KV store MUST withdraw the KV pair 141 | after lease timeout. 142 | 143 | # Link State 144 | Each Fabric Node must watch the "/service/fabric" key prefix to update its local 145 | SRoU Service list database. It MUST initial TWAMP session over the service udp 146 | socket to measure the link performance and reachablity. 147 | 148 | Linkstate measurement result COULD send to the KV store to construct the 149 | linkstate Database by the following Key Value type: 150 | 151 | Key="/stats/linkstate/SRC_SRoU_Locator->DST_SRoU_Locator" 152 | value= TWAMP measured jitter/delay/loss result and underlay interface load. 153 | 154 | The Node CPU,Memory usage also could be updated by: 155 | Key="/stats/node/SystemName" 156 | Value="CPULoad,MemoryUsage" 157 | 158 | An telemetry analytics node could watch key prefix ="/stats" for assurance 159 | and AIOps based routing optimization. 160 | 161 | # Sercurity Key 162 | 163 | Each node may update it node key or per socket key , or per session pair key to 164 | the KV Store: 165 | 166 | Key="/key/SystemName" 167 | Value="Key1,Key2" 168 | 169 | Key="/key/socket/SRoU_Locator" 170 | Value="Key1,Key2" 171 | 172 | Key="/key/session/SRC_SRoU_Locator->DST_SRoU_Locator" 173 | Value="Key1,Key2" 174 | 175 | During Rekey, the node must update both OldKey and newKey to the KV Store and 176 | accept both Key in a while to wait the entire system sync to the new key. 177 | 178 | # Overlay Routing 179 | 180 | RouteDistinguish could encode by SystemName + local VNID 181 | The overlay routing prefix is encoded as below: 182 | 183 | Type-2 EVPN Route 184 | Key="/route/2/exportRT/RD/MAC/IP" 185 | Value="VNID/SystemName/PolicyTag" 186 | 187 | Type-5 EVPN Route 188 | 189 | Key="/route/5/exportRT/RD/IPPrefix/IPMask" 190 | Value="VNID/SystemName/PolicyTag" 191 | 192 | Each of the linecard node could based on import RT list to watch key 193 | prefix ="/route/2/importRT" and "/route/5/importRT" to sync the routing table. 194 | 195 | Each linecard node could selective fetch the "/stats/linkstate" to get the 196 | toplogy information and execute flexibile algorithm(SPF,A* search) to calculate 197 | the candidate path, then enforce it to its forwarding table. 198 | 199 | # Control Policy 200 | 201 | ## Route control 202 | Inspired by BGP FlowSpec,Network operator could update the control policy to 203 | the entire system by using: 204 | 205 | Key="/control/RT/2/SRC_MAC/SRC_IP/DST_MAC/DST_IP" 206 | Key="/control/RT/5/SRC_Prefix/SRC_Mask/DST_Prefix/DST_Mask" 207 | Value="Action" /"SR Locator list" 208 | 209 | ## Access Control 210 | 211 | Each node may use the SRoU flowID field as a token based access control. 212 | This token could grant or revoke by a policy engine. 213 | 214 | Key="/token/permit/flowid" 215 | Key="/token/block/flowid" 216 | 217 | Each node could sync this table to execute the access control policy. 218 | 219 | ## User identity 220 | 221 | Each of the endpoint may have it's identity or group policy tags, it could be 222 | updated by 223 | 224 | key="/identity/userid/user_device_id" 225 | value="group policy tags" 226 | 227 | Group policy could be updated and store in ETCD by 228 | 229 | key="/policy/src_grp/dst_grp" 230 | value="actions" 231 | 232 | 233 | # Distributed KV Store 234 | ETCD is used in our prototype, we deploy an etcd cluster in main datacenter 235 | and place many of the proxy node on public cloud to make sure the node could 236 | be available connect to the entire system. In some on-prem deployment, each of 237 | the nodes could act as a ETCD proxy to help other node register to KV store. 238 | 239 | # Security Considerations 240 | 241 | All of the control connection is TLS based and MUST validate the server and 242 | client certification. 243 | 244 | 245 | # IANA Considerations 246 | 247 | 248 | # Acknowledgements 249 | {:numbered="false"} 250 | 251 | The following people provided substantial contributions to this document: 252 | 253 | - Yijen Wang, Cisco Systems, Inc. 254 | 255 | 256 | 257 | -------------------------------------------------------------------------------- /draft-zartbot-srou-signalling.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SPRING K. Fang 6 | Internet-Draft Cisco Systems, Inc. 7 | Intended status: Experimental Y. Li 8 | Expires: 20 February 2021 Google, Inc. 9 | F. Cai 10 | X. Jiang 11 | Cisco Systems, Inc. 12 | 19 August 2020 13 | 14 | 15 | Distributed KV Store based Routing protocol for SR over UDP(SRoU) 16 | draft-zartbot-srou-control-00 17 | 18 | Abstract 19 | 20 | This document defines the Distributed KV store based routing protocol 21 | for Segment Routing over UDP. 22 | 23 | Status of This Memo 24 | 25 | This Internet-Draft is submitted in full conformance with the 26 | provisions of BCP 78 and BCP 79. 27 | 28 | Internet-Drafts are working documents of the Internet Engineering 29 | Task Force (IETF). Note that other groups may also distribute 30 | working documents as Internet-Drafts. The list of current Internet- 31 | Drafts is at https://datatracker.ietf.org/drafts/current/. 32 | 33 | Internet-Drafts are draft documents valid for a maximum of six months 34 | and may be updated, replaced, or obsoleted by other documents at any 35 | time. It is inappropriate to use Internet-Drafts as reference 36 | material or to cite them other than as "work in progress." 37 | 38 | This Internet-Draft will expire on 20 February 2021. 39 | 40 | Copyright Notice 41 | 42 | Copyright (c) 2020 IETF Trust and the persons identified as the 43 | document authors. All rights reserved. 44 | 45 | This document is subject to BCP 78 and the IETF Trust's Legal 46 | Provisions Relating to IETF Documents (https://trustee.ietf.org/ 47 | license-info) in effect on the date of publication of this document. 48 | Please review these documents carefully, as they describe your rights 49 | and restrictions with respect to this document. Code Components 50 | extracted from this document must include Simplified BSD License text 51 | as described in Section 4.e of the Trust Legal Provisions and are 52 | provided without warranty as described in the Simplified BSD License. 53 | 54 | 55 | 56 | Fang, et al. Expires 20 February 2021 [Page 1] 57 | 58 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 59 | 60 | 61 | Table of Contents 62 | 63 | 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 2 64 | 1.1. Specification of Requirements . . . . . . . . . . . . . . 2 65 | 1.2. Motivation . . . . . . . . . . . . . . . . . . . . . . . 2 66 | 1.3. Overview . . . . . . . . . . . . . . . . . . . . . . . . 3 67 | 2. Node abstraction and registration . . . . . . . . . . . . . . 3 68 | 2.1. Node Label allocation . . . . . . . . . . . . . . . . . . 4 69 | 2.2. Node registration . . . . . . . . . . . . . . . . . . . . 4 70 | 3. SRoU Locator and Route . . . . . . . . . . . . . . . . . . . 4 71 | 4. Node Keepalive . . . . . . . . . . . . . . . . . . . . . . . 4 72 | 5. Link State . . . . . . . . . . . . . . . . . . . . . . . . . 5 73 | 6. Sercurity Key . . . . . . . . . . . . . . . . . . . . . . . . 5 74 | 7. Overlay Routing . . . . . . . . . . . . . . . . . . . . . . . 5 75 | 8. Control Policy . . . . . . . . . . . . . . . . . . . . . . . 6 76 | 8.1. Route control . . . . . . . . . . . . . . . . . . . . . . 6 77 | 8.2. Access Control . . . . . . . . . . . . . . . . . . . . . 6 78 | 8.3. User identity . . . . . . . . . . . . . . . . . . . . . . 6 79 | 9. Distributed KV Store . . . . . . . . . . . . . . . . . . . . 6 80 | 10. Security Considerations . . . . . . . . . . . . . . . . . . . 7 81 | 11. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 7 82 | Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . . 7 83 | Informative References . . . . . . . . . . . . . . . . . . . . . 7 84 | Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 7 85 | 86 | 1. Introduction 87 | 88 | This draft provides a contol plane support for SRoU(Segment Routing 89 | over UDP). 90 | 91 | Discussion of this work is encouraged to happen on GitHub repository 92 | which contains the draft: https://github.com/zartbot/draft-quic-sr 93 | (https://github.com/zartbot/draft-quic-sr) 94 | 95 | 1.1. Specification of Requirements 96 | 97 | The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 98 | "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and 99 | "OPTIONAL" in this document are to be interpreted as described in BCP 100 | 14 [RFC2119] [RFC8174] when, and only when, they appear in all 101 | capitals, as shown here. 102 | 103 | 1.2. Motivation 104 | 105 | SRoU support udp transport session over internet, but it lack of 106 | reachability detection and routing control, existing routing protocol 107 | like BGP-EVPN did not provide Dynamic NAT traversal capability. 108 | 109 | 110 | 111 | 112 | Fang, et al. Expires 20 February 2021 [Page 2] 113 | 114 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 115 | 116 | 117 | This document provide a distributed KV store based routing protocol 118 | for SRoU. 119 | 120 | 1.3. Overview 121 | 122 | The routing protocol is based on source routing, each of the ingress 123 | node cloud get the overlay prefix and dest location mapping from 124 | distributed KV store, then the ingress node could fetch linkstate 125 | database from this KV store and execute A* algorithm to search the 126 | candidate path which meet the SLA requirement. 127 | 128 | 2. Node abstraction and registration 129 | 130 | Each Node has the following attribute 131 | 132 | 1. Role: the system contains different node type, role attribute is 133 | a uint16 value which contains: 134 | 135 | +------+----------+---------------------------------------------+ 136 | | Type | Name | Section | 137 | +======+==========+=============================================+ 138 | | 0x0 | STUN | This node is used as a STUN server to help | 139 | | | | other nodes discovery their public | 140 | | | | address.This node must deploy with a public | 141 | | | | internet address or behind static 1:1 NAT | 142 | +------+----------+---------------------------------------------+ 143 | | 0x1 | Fabric | This node type is used as a interim node to | 144 | | | | relay the SRoU traffic, this node MUST | 145 | | | | initial TWAMP link probe to other Fabric | 146 | | | | node and report linkstate to KV Store. | 147 | +------+----------+---------------------------------------------+ 148 | | 0x2 | Linecard | This node type is used to connect existing | 149 | | | | network, it could use TWAMP probe other | 150 | | | | Fabric Node or Linecard node | 151 | +------+----------+---------------------------------------------+ 152 | 153 | Table 1: Node Role 154 | 155 | 1. SiteID: uint32 number, defined the node which belongs to same 156 | site or Automomous System. 157 | 158 | 2. SystemName: unique string type to indicate a node. 159 | 160 | 3. Label: unique 24bit value, allocation algorithm is described in 161 | the following section. 162 | 163 | 4. Location: Optional filed. It contains two float32 value(latitude 164 | and longitude) to indicate the Geo location. 165 | 166 | 167 | 168 | Fang, et al. Expires 20 February 2021 [Page 3] 169 | 170 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 171 | 172 | 173 | 2.1. Node Label allocation 174 | 175 | Each node initial TLS session to Distributed KV Store, and fetch a 176 | distributed lock with key "/lock/systemlabel". The node will fetch 177 | prefix "/systemlabel" to get all label mapping once it get the lock. 178 | Then it will assign the smallest unpresent int "X" in the list as 179 | it's system label, and register it to KV store by key="/systemlabel/ 180 | X", then it could release the distributed lock. All of the fabric 181 | node MUST listen the "/systemlabel" to update it's local node mapping 182 | table, Linecard node may fetch the "/systemlabel" key when it need to 183 | optimize the local route. 184 | 185 | This System Label could be used for cSID encoding or VPN based client 186 | linecard node convert to it's tunnel address. 187 | 188 | 2.2. Node registration 189 | 190 | Each node will send Key="/node/role/systemName" and Value=" 191 | SiteID,SystemLabel, Lat,Long" to the distributed KV store. 192 | 193 | 3. SRoU Locator and Route 194 | 195 | Each node may have multiple underlay socket which may behind the 196 | dynamic NAT, it MUST fetch the STUN list from "/node/stun" and 197 | "/service/stun" to get the STUN server address list, then send the 198 | SRoU OAM-STUN packet to the random selected stun server to get the 199 | public address. 200 | 201 | Once the socket get the public address, it will encode the udp socket 202 | info as a SRoU Locator: 203 | 204 | "SystemName/Color/LocalIP:Port/PublicIP:Port/LocalInterface/TXBW/ 205 | RXBW" 206 | 207 | If the local socket has public address and port information, it could 208 | be added in the service list. 209 | 210 | The node MUST update it local servicelist to distributed KV store by: 211 | Key= "/service/role/systemName" Value= "SRoULocator1,SRoULocator2" 212 | 213 | 4. Node Keepalive 214 | 215 | Each KV pair registration MUST have a leasetime and keepalive timer, 216 | Once the Node out of service and disconnected, the KV store MUST 217 | withdraw the KV pair after lease timeout. 218 | 219 | 220 | 221 | 222 | 223 | 224 | Fang, et al. Expires 20 February 2021 [Page 4] 225 | 226 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 227 | 228 | 229 | 5. Link State 230 | 231 | Each Fabric Node must watch the "/service/fabric" key prefix to 232 | update its local SRoU Service list database. It MUST initial TWAMP 233 | session over the service udp socket to measure the link performance 234 | and reachablity. 235 | 236 | Linkstate measurement result COULD send to the KV store to construct 237 | the linkstate Database by the following Key Value type: 238 | 239 | Key="/stats/linkstate/SRC_SRoU_Locator->DST_SRoU_Locator" value= 240 | TWAMP measured jitter/delay/loss result and underlay interface load. 241 | 242 | The Node CPU,Memory usage also could be updated by: Key="/stats/node/ 243 | SystemName" Value="CPULoad,MemoryUsage" 244 | 245 | An telemetry analytics node could watch key prefix ="/stats" for 246 | assurance and AIOps based routing optimization. 247 | 248 | 6. Sercurity Key 249 | 250 | Each node may update it node key or per socket key , or per session 251 | pair key to the KV Store: 252 | 253 | Key="/key/SystemName" Value="Key1,Key2" 254 | 255 | Key="/key/socket/SRoU_Locator" Value="Key1,Key2" 256 | 257 | Key="/key/session/SRC_SRoU_Locator->DST_SRoU_Locator" 258 | Value="Key1,Key2" 259 | 260 | During Rekey, the node must update both OldKey and newKey to the KV 261 | Store and accept both Key in a while to wait the entire system sync 262 | to the new key. 263 | 264 | 7. Overlay Routing 265 | 266 | RouteDistinguish could encode by SystemName + local VNID The overlay 267 | routing prefix is encoded as below: 268 | 269 | Type-2 EVPN Route Key="/route/2/exportRT/RD/MAC/IP" 270 | Value="VNID/SystemName/PolicyTag" 271 | 272 | Type-5 EVPN Route 273 | 274 | Key="/route/5/exportRT/RD/IPPrefix/IPMask" Value="VNID/SystemName/ 275 | PolicyTag" 276 | 277 | 278 | 279 | 280 | Fang, et al. Expires 20 February 2021 [Page 5] 281 | 282 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 283 | 284 | 285 | Each of the linecard node could based on import RT list to watch key 286 | prefix ="/route/2/importRT" and "/route/5/importRT" to sync the 287 | routing table. 288 | 289 | Each linecard node could selective fetch the "/stats/linkstate" to 290 | get the toplogy information and execute flexibile algorithm(SPF,A* 291 | search) to calculate the candidate path, then enforce it to its 292 | forwarding table. 293 | 294 | 8. Control Policy 295 | 296 | 8.1. Route control 297 | 298 | Inspired by BGP FlowSpec,Network operator could update the control 299 | policy to the entire system by using: 300 | 301 | Key="/control/RT/2/SRC_MAC/SRC_IP/DST_MAC/DST_IP" 302 | Key="/control/RT/5/SRC_Prefix/SRC_Mask/DST_Prefix/DST_Mask" 303 | Value="Action" /"SR Locator list" 304 | 305 | 8.2. Access Control 306 | 307 | Each node may use the SRoU flowID field as a token based access 308 | control. This token could grant or revoke by a policy engine. 309 | 310 | Key="/token/permit/flowid" Key="/token/block/flowid" 311 | 312 | Each node could sync this table to execute the access control policy. 313 | 314 | 8.3. User identity 315 | 316 | Each of the endpoint may have it's identity or group policy tags, it 317 | could be updated by 318 | 319 | key="/identity/userid/user_device_id" value="group policy tags" 320 | 321 | Group policy could be updated and store in ETCD by 322 | 323 | key="/policy/src_grp/dst_grp" value="actions" 324 | 325 | 9. Distributed KV Store 326 | 327 | ETCD is used in our prototype, we deploy an etcd cluster in main 328 | datacenter and place many of the proxy node on public cloud to make 329 | sure the node could be available connect to the entire system. In 330 | some on-prem deployment, each of the nodes could act as a ETCD proxy 331 | to help other node register to KV store. 332 | 333 | 334 | 335 | 336 | Fang, et al. Expires 20 February 2021 [Page 6] 337 | 338 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 339 | 340 | 341 | 10. Security Considerations 342 | 343 | All of the control connection is TLS based and MUST validate the 344 | server and client certification. 345 | 346 | 11. IANA Considerations 347 | 348 | Acknowledgements 349 | 350 | The following people provided substantial contributions to this 351 | document: 352 | 353 | * Yijen Wang, Cisco Systems, Inc. 354 | 355 | Informative References 356 | 357 | [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate 358 | Requirement Levels", BCP 14, RFC 2119, 359 | DOI 10.17487/RFC2119, March 1997, 360 | . 361 | 362 | [RFC8174] Leiba, B., "Ambiguity of Uppercase vs Lowercase in RFC 363 | 2119 Key Words", BCP 14, RFC 8174, DOI 10.17487/RFC8174, 364 | May 2017, . 365 | 366 | Authors' Addresses 367 | 368 | Kevin Fang 369 | Cisco Systems, Inc. 370 | 371 | Email: zartbot.ietf@gmail.com 372 | 373 | 374 | Yinghao Li 375 | Google, Inc. 376 | 377 | Email: liyinghao@gmail.com 378 | 379 | 380 | Feng Cai 381 | Cisco Systems, Inc. 382 | 383 | Email: fecai@cisco.com 384 | 385 | 386 | Xing Jiang 387 | Cisco Systems, Inc. 388 | 389 | 390 | 391 | 392 | Fang, et al. Expires 20 February 2021 [Page 7] 393 | 394 | Internet-DrafDistributed KV Store based Routing protocol for August 2020 395 | 396 | 397 | Email: jamjiang@cisco.com 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | Fang, et al. Expires 20 February 2021 [Page 8] 449 | -------------------------------------------------------------------------------- /example_apps/client: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zartbot/draft-quic-sr/d93f7315e1c238f7828bca6fbe4db16a2efac5cd/example_apps/client -------------------------------------------------------------------------------- /example_apps/interim_fwd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zartbot/draft-quic-sr/d93f7315e1c238f7828bca6fbe4db16a2efac5cd/example_apps/interim_fwd -------------------------------------------------------------------------------- /example_apps/server: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zartbot/draft-quic-sr/d93f7315e1c238f7828bca6fbe4db16a2efac5cd/example_apps/server -------------------------------------------------------------------------------- /prototype/forwarder/fwd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "flag" 7 | "log" 8 | "net" 9 | 10 | "github.com/sirupsen/logrus" 11 | ) 12 | 13 | var ( 14 | port = flag.String("port", "", "server port") 15 | debugflag = flag.Bool("debug", false, "debug flags") 16 | ) 17 | 18 | func main() { 19 | 20 | flag.Parse() 21 | // check if we have anything 22 | 23 | if "" == *port { 24 | flag.Usage() 25 | log.Fatalln("\nremote server is not specified") 26 | } 27 | pc, err := net.ListenPacket("udp", ":"+*port) 28 | if err != nil { 29 | logrus.Fatal(err) 30 | } 31 | for { 32 | buf := make([]byte, 1500) 33 | n, addr, err := pc.ReadFrom(buf) 34 | if err != nil { 35 | continue 36 | } 37 | go serve(pc, addr, buf[:n]) 38 | } 39 | 40 | } 41 | 42 | func serve(pc net.PacketConn, addr net.Addr, buf []byte) { 43 | //non quic packet 44 | if buf[0] != 0 { 45 | return 46 | } 47 | 48 | srcAddrLoc := uint8(buf[2]) + 4 49 | 50 | af := uint8(buf[3]) 51 | if af == 0 { 52 | return 53 | } 54 | 55 | /* update SRHeader field 56 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 57 | | SR Type | SRH Len | LastEntry | Segment Left | 58 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+*/ 59 | 60 | var segLen uint8 = 6 61 | if af == 2 { 62 | segLen = 18 63 | } 64 | 65 | srhLoc := srcAddrLoc + segLen 66 | segmentLeft := uint8(buf[srhLoc+3]) 67 | lastEntry := buf[srhLoc+2] 68 | 69 | if segmentLeft == lastEntry { 70 | //NAT ALG applied on 1st hop 71 | segment := StrToByte(addr.String()) 72 | copy(buf[srcAddrLoc:srhLoc], segment) 73 | } 74 | //reduce segmentleft 75 | buf[srhLoc+3]-- 76 | //logrus.Warn("SL|LE:", segmentLeft, "|", lastEntry) 77 | if segmentLeft >= 1 { 78 | start := srhLoc + 4 + (segmentLeft-1)*segLen 79 | dst := buf[start : start+segLen] 80 | addr1 := ByteToNetAddr(dst) 81 | //logrus.Warn("start:", start, "|", addr1.String()) 82 | //fmt.Println(hex.Dump(buf)) 83 | pc.WriteTo(buf, addr1) 84 | } 85 | } 86 | 87 | //StrToByte is used convert string uaddr to Net.addr format 88 | func StrToByte(str string) []byte { 89 | uaddr, err := net.ResolveUDPAddr("udp", str) 90 | if err != nil { 91 | return nil 92 | } 93 | port := uint16(uaddr.Port) 94 | ipv4 := uaddr.IP.To4() 95 | 96 | //IPv6 address return [16]Byte Addr + [2]byte Port 97 | if ipv4 == nil { 98 | buf := bytes.NewBuffer([]byte(uaddr.IP)) 99 | buf.WriteByte(byte(port >> 8)) 100 | buf.WriteByte(byte(port & 0xff)) 101 | return buf.Bytes() 102 | } 103 | 104 | //IPv4 address return [4]Byte Addr + [2]byte Port 105 | buf := bytes.NewBuffer([]byte(ipv4)) 106 | buf.WriteByte(byte(port >> 8)) 107 | buf.WriteByte(byte(port & 0xff)) 108 | return buf.Bytes() 109 | } 110 | 111 | //ByteToNetAddr is used to parse segment to net.Addr format 112 | func ByteToNetAddr(b []byte) net.Addr { 113 | blen := len(b) 114 | if blen == 6 { 115 | ip := net.IP(b[0:4]) 116 | port := binary.BigEndian.Uint16(b[4:6]) 117 | 118 | return &net.UDPAddr{ 119 | IP: ip, 120 | Port: int(port), 121 | } 122 | 123 | } else if blen == 18 { 124 | ip := net.IP(b[0:16]) 125 | port := binary.BigEndian.Uint16(b[16:18]) 126 | 127 | return &net.UDPAddr{ 128 | IP: ip, 129 | Port: int(port), 130 | } 131 | } 132 | return nil 133 | } 134 | -------------------------------------------------------------------------------- /prototype/quic_go.diff: -------------------------------------------------------------------------------- 1 | diff --git a/conn.go b/conn.go 2 | index 700c1471..afc2089b 100644 3 | --- a/conn.go 4 | +++ b/conn.go 5 | @@ -1,6 +1,8 @@ 6 | package quic 7 | 8 | import ( 9 | + "bytes" 10 | + "encoding/binary" 11 | "net" 12 | "sync" 13 | ) 14 | @@ -12,6 +14,7 @@ type connection interface { 15 | LocalAddr() net.Addr 16 | RemoteAddr() net.Addr 17 | SetCurrentRemoteAddr(net.Addr) 18 | + SetQUICSR([]string, []byte, bool) 19 | } 20 | 21 | type conn struct { 22 | @@ -19,17 +22,35 @@ type conn struct { 23 | 24 | pconn net.PacketConn 25 | currentAddr net.Addr 26 | + nhopAddr net.Addr 27 | + QUICSRH []byte 28 | + segmentList []string 29 | + flowid []byte 30 | + dupSend bool 31 | } 32 | 33 | var _ connection = &conn{} 34 | 35 | func (c *conn) Write(p []byte) error { 36 | - _, err := c.pconn.WriteTo(p, c.currentAddr) 37 | + if c.QUICSRH == nil { 38 | + _, err := c.pconn.WriteTo(p, c.currentAddr) 39 | + if c.dupSend { 40 | + _, err = c.pconn.WriteTo(p, c.currentAddr) 41 | + } 42 | + return err 43 | + } 44 | + buf := bytes.NewBuffer(c.QUICSRH) 45 | + buf.Write(p) 46 | + _, err := c.pconn.WriteTo(buf.Bytes(), c.nhopAddr) 47 | + if c.dupSend { 48 | + _, err = c.pconn.WriteTo(buf.Bytes(), c.nhopAddr) 49 | + } 50 | return err 51 | } 52 | 53 | func (c *conn) Read(p []byte) (int, net.Addr, error) { 54 | return c.pconn.ReadFrom(p) 55 | + 56 | } 57 | 58 | func (c *conn) SetCurrentRemoteAddr(addr net.Addr) { 59 | @@ -52,3 +73,207 @@ func (c *conn) RemoteAddr() net.Addr { 60 | func (c *conn) Close() error { 61 | return c.pconn.Close() 62 | } 63 | + 64 | +/* 65 | + 0 1 2 3 66 | + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 67 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 68 | + |0 0 0 0 0 0 0 0| SRoU Length | FlowID Length | Protocol ID | 69 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 70 | + | | 71 | + + + 72 | + | | 73 | + + Flow ID + 74 | + | | 75 | + + + 76 | + | | 77 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 78 | + 79 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 80 | + | Source IP Address | 81 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 82 | + | Source Port | 83 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 84 | + 85 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 86 | + | SR Type | SRH Len | LastEntry | Segment Left | 87 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 88 | + | SegmentList[0] | 89 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 90 | + | SegmentList[1] | 91 | + 92 | + ~ ... ~ 93 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 94 | + | SegmentList[N] | 95 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 96 | + | Options | 97 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 98 | + 99 | + Protocol-ID = 0:OAM 1:IPv4 2:IPv6 3:STUN-OAM-IPv4 4:STUN-OAM-IPv6 100 | + SRType: 0: Linkstate probe, used for link state test packet. 101 | + SRType: 1: 32+16 bit IPv4 socket 102 | + SRType: 2: 128+16 bit IPv6 socket 103 | + SRType: 3: 128 bit Label, interworking with SRv6 104 | + 105 | + OAM Packet: 106 | + SRType= 0 107 | + SR Hdr Len : 0=STUN Packet, 40: ProbeOAM 108 | + 109 | +*/ 110 | + 111 | +func (c *conn) SetQUICSR(seglist []string, flowid []byte, dupSend bool) { 112 | + c.dupSend = dupSend 113 | + 114 | + if len(seglist) == 0 { 115 | + return 116 | + } 117 | + 118 | + //add header and flowid 119 | + buf := bytes.NewBuffer([]byte{0, 0, 0, 0}) 120 | + flowidLen, _ := buf.Write(flowid) 121 | + 122 | + var aftype uint8 = 1 123 | + 124 | + //check destination AF and add allocate source segment 125 | + //source segment will be add by 1st segment router 126 | + //we preallocate it to avoid MTU issue in middle routers 127 | + uaddr, _ := net.ResolveUDPAddr("udp", c.currentAddr.String()) 128 | + if uaddr.IP.To4() == nil { 129 | + buf.Write(make([]byte, 18)) 130 | + aftype = 2 131 | + } else { 132 | + buf.Write(make([]byte, 6)) 133 | + } 134 | + 135 | + //add SRH 136 | + srh := c.buildSRH(seglist) 137 | + if srh == nil { 138 | + //TODO: need error handling 139 | + return 140 | + } 141 | + 142 | + buf.Write(srh) 143 | + 144 | + nhop := ByteToNetAddr(StrToByte(seglist[0])) 145 | + 146 | + quicSRH := buf.Bytes() 147 | + quicSRH[0] = uint8(0) 148 | + quicSRH[1] = uint8(len(quicSRH)) 149 | + quicSRH[2] = uint8(flowidLen) 150 | + quicSRH[3] = aftype 151 | + 152 | + c.mutex.Lock() 153 | + c.QUICSRH = make([]byte, len(quicSRH)) 154 | + copy(c.QUICSRH, quicSRH) 155 | + c.segmentList = make([]string, len(seglist)) 156 | + copy(c.segmentList, seglist) 157 | + c.flowid = make([]byte, len(flowid)) 158 | + copy(c.flowid, flowid) 159 | + c.nhopAddr = nhop 160 | + c.mutex.Unlock() 161 | +} 162 | + 163 | +//buildSRH is used to build segment routing header by seglist 164 | +func (c *conn) buildSRH(seglist []string) []byte { 165 | + segListLen := len(seglist) 166 | + if segListLen == 0 { 167 | + return nil 168 | + } 169 | + //append srh 170 | + buf := bytes.NewBuffer([]byte{0x0, 0x0, 0x0, 0x0}) 171 | + 172 | + //append remote socket info in seg[0] 173 | + segment := StrToByte(c.currentAddr.String()) 174 | + if segment == nil { 175 | + return nil 176 | + } 177 | + 178 | + buf.Write(segment) 179 | + //default IPv4 adress family is IPv4 with 6Bytes(4byte IP + 2byte port) segment length 180 | + var aftype uint8 = 1 181 | + seglen := 6 182 | + 183 | + //update for IPv6 address family 18Bytes (16bytes IPv6 + 2Bytes Port) segment length 184 | + if len(segment) == 18 { 185 | + aftype = 2 186 | + seglen = 18 187 | + } 188 | + 189 | + for i := segListLen; i > 0; i-- { 190 | + segment = StrToByte(seglist[i-1]) 191 | + if segment == nil { 192 | + return nil 193 | + } 194 | + //invalid address type 195 | + if len(segment) != seglen { 196 | + return nil 197 | + } 198 | + buf.Write(segment) 199 | + } 200 | + result := buf.Bytes() 201 | + 202 | + /* update SRHeader field 203 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 204 | + | SR Type | SRH Len | LastEntry | Segment Left | 205 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+*/ 206 | + result[0] = uint8(aftype) 207 | + result[1] = uint8(len(result)) 208 | + result[2] = uint8(segListLen) 209 | + result[3] = uint8(segListLen) 210 | + 211 | + return result 212 | +} 213 | + 214 | +//StrToByte is used to build segment byte array 215 | +// Args: 216 | +// str : 192.168.1.2:1234 or [2001:1234::1]:1234 217 | +// 218 | +// Return: 219 | +// []byte{ Address, Port} with network order 220 | +// if str is invalid this function will return nil 221 | +func StrToByte(str string) []byte { 222 | + uaddr, err := net.ResolveUDPAddr("udp", str) 223 | + if err != nil { 224 | + return nil 225 | + } 226 | + port := uint16(uaddr.Port) 227 | + ipv4 := uaddr.IP.To4() 228 | + 229 | + //IPv6 address return [16]Byte Addr + [2]byte Port 230 | + if ipv4 == nil { 231 | + buf := bytes.NewBuffer([]byte(uaddr.IP)) 232 | + buf.WriteByte(byte(port >> 8)) 233 | + buf.WriteByte(byte(port & 0xff)) 234 | + return buf.Bytes() 235 | + } 236 | + 237 | + //IPv4 address return [4]Byte Addr + [2]byte Port 238 | + buf := bytes.NewBuffer([]byte(ipv4)) 239 | + buf.WriteByte(byte(port >> 8)) 240 | + buf.WriteByte(byte(port & 0xff)) 241 | + return buf.Bytes() 242 | +} 243 | + 244 | +//ByteToNetAddr is used to parse segment to net.Addr format 245 | +func ByteToNetAddr(b []byte) net.Addr { 246 | + blen := len(b) 247 | + if blen == 6 { 248 | + ip := net.IP(b[0:4]) 249 | + port := binary.BigEndian.Uint16(b[4:6]) 250 | + 251 | + return &net.UDPAddr{ 252 | + IP: ip, 253 | + Port: int(port), 254 | + } 255 | + 256 | + } else if blen == 18 { 257 | + ip := net.IP(b[0:16]) 258 | + port := binary.BigEndian.Uint16(b[16:18]) 259 | + 260 | + return &net.UDPAddr{ 261 | + IP: ip, 262 | + Port: int(port), 263 | + } 264 | + } 265 | + return nil 266 | +} 267 | diff --git a/interface.go b/interface.go 268 | index eeadf220..c7ccc96a 100644 269 | --- a/interface.go 270 | +++ b/interface.go 271 | @@ -187,6 +187,8 @@ type Session interface { 272 | // It blocks until the handshake completes. 273 | // Warning: This API should not be considered stable and might change soon. 274 | ConnectionState() ConnectionState 275 | + 276 | + SetQUICSR([]string, []byte, bool) 277 | } 278 | 279 | // An EarlySession is a session that is handshaking. 280 | diff --git a/internal/mocks/quic/early_session.go b/internal/mocks/quic/early_session.go 281 | index e5fcf0ae..d49c6040 100644 282 | --- a/internal/mocks/quic/early_session.go 283 | +++ b/internal/mocks/quic/early_session.go 284 | @@ -211,3 +211,15 @@ func (mr *MockEarlySessionMockRecorder) RemoteAddr() *gomock.Call { 285 | mr.mock.ctrl.T.Helper() 286 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoteAddr", reflect.TypeOf((*MockEarlySession)(nil).RemoteAddr)) 287 | } 288 | + 289 | +// SetQUICSR mocks base method 290 | +func (m *MockEarlySession) SetQUICSR(arg0 []string, arg1 []byte, arg2 bool) { 291 | + m.ctrl.T.Helper() 292 | + m.ctrl.Call(m, "SetQUICSR", arg0, arg1, arg2) 293 | +} 294 | + 295 | +// SetQUICSR indicates an expected call of SetQUICSR 296 | +func (mr *MockEarlySessionMockRecorder) SetQUICSR(arg0, arg1, arg2 interface{}) *gomock.Call { 297 | + mr.mock.ctrl.T.Helper() 298 | + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetQUICSR", reflect.TypeOf((*MockEarlySession)(nil).SetQUICSR), arg0, arg1, arg2) 299 | +} 300 | diff --git a/mock_connection_test.go b/mock_connection_test.go 301 | index 4eb2b3ea..81607c21 100644 302 | --- a/mock_connection_test.go 303 | +++ b/mock_connection_test.go 304 | @@ -117,3 +117,15 @@ func (mr *MockConnectionMockRecorder) Write(arg0 interface{}) *gomock.Call { 305 | mr.mock.ctrl.T.Helper() 306 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Write", reflect.TypeOf((*MockConnection)(nil).Write), arg0) 307 | } 308 | + 309 | +// SetQUICSR mocks base method 310 | +func (m *MockConnection) SetQUICSR(arg0 []string, arg1 []byte, arg2 bool) { 311 | + m.ctrl.T.Helper() 312 | + m.ctrl.Call(m, "SetQUICSR", arg0, arg1, arg2) 313 | +} 314 | + 315 | +// SetQUICSR indicates an expected call of SetQUICSR 316 | +func (mr *MockConnectionMockRecorder) SetQUICSR(arg0, arg1, arg2 interface{}) *gomock.Call { 317 | + mr.mock.ctrl.T.Helper() 318 | + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetQUICSR", reflect.TypeOf((*MockConnection)(nil).SetQUICSR), arg0, arg1, arg2) 319 | +} 320 | diff --git a/mock_quic_session_test.go b/mock_quic_session_test.go 321 | index 23e4f9c4..e04f3b17 100644 322 | --- a/mock_quic_session_test.go 323 | +++ b/mock_quic_session_test.go 324 | @@ -316,3 +316,15 @@ func (mr *MockQuicSessionMockRecorder) shutdown() *gomock.Call { 325 | mr.mock.ctrl.T.Helper() 326 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "shutdown", reflect.TypeOf((*MockQuicSession)(nil).shutdown)) 327 | } 328 | + 329 | +// SetQUICSR mocks base method 330 | +func (m *MockQuicSession) SetQUICSR(arg0 []string, arg1 []byte, arg2 bool) { 331 | + m.ctrl.T.Helper() 332 | + m.ctrl.Call(m, "SetQUICSR", arg0, arg1, arg2) 333 | +} 334 | + 335 | +// SetQUICSR indicates an expected call of SetQUICSR 336 | +func (mr *MockQuicSessionMockRecorder) SetQUICSR(arg0, arg1, arg2 interface{}) *gomock.Call { 337 | + mr.mock.ctrl.T.Helper() 338 | + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetQUICSR", reflect.TypeOf((*MockQuicSession)(nil).SetQUICSR), arg0, arg1, arg2) 339 | +} 340 | diff --git a/packet_handler_map.go b/packet_handler_map.go 341 | index 1daec42b..fea8ddd6 100644 342 | --- a/packet_handler_map.go 343 | +++ b/packet_handler_map.go 344 | @@ -236,7 +236,22 @@ func (h *packetHandlerMap) listen() { 345 | h.close(err) 346 | return 347 | } 348 | - h.handlePacket(addr, buffer, data[:n]) 349 | + //Hack for quic-SR 350 | + if data[0] == 0 { 351 | + //quic-SR packet 352 | + qsrLen := data[1] 353 | + srcAddrLoc := uint8(data[2]) + 4 354 | + af := uint8(data[3]) 355 | + var segLen uint8 = 6 356 | + if af == 2 { 357 | + segLen = 18 358 | + } 359 | + src := data[srcAddrLoc : srcAddrLoc+segLen] 360 | + srcAddr := ByteToNetAddr(src) 361 | + h.handlePacket(srcAddr, buffer, data[qsrLen:n]) 362 | + } else { 363 | + h.handlePacket(addr, buffer, data[:n]) 364 | + } 365 | } 366 | } 367 | 368 | diff --git a/server.go b/server.go 369 | index b20f0b13..7f2a4d22 100644 370 | --- a/server.go 371 | +++ b/server.go 372 | @@ -50,6 +50,7 @@ type quicSession interface { 373 | destroy(error) 374 | shutdown() 375 | closeForRecreating() protocol.PacketNumber 376 | + SetQUICSR([]string, []byte,bool) 377 | } 378 | 379 | // A Listener of QUIC 380 | diff --git a/session.go b/session.go 381 | index 040bc12a..009339cf 100644 382 | --- a/session.go 383 | +++ b/session.go 384 | @@ -1652,3 +1652,7 @@ func (s *session) getPerspective() protocol.Perspective { 385 | func (s *session) GetVersion() protocol.VersionNumber { 386 | return s.version 387 | } 388 | + 389 | +func (s *session) SetQUICSR(seglist []string, flowid []byte, dupSend bool) { 390 | + s.conn.SetQUICSR(seglist, flowid, dupSend) 391 | +} 392 | -------------------------------------------------------------------------------- /slides/QUIC-SR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zartbot/draft-quic-sr/d93f7315e1c238f7828bca6fbe4db16a2efac5cd/slides/QUIC-SR.pdf --------------------------------------------------------------------------------