├── .gitignore ├── README.md ├── control-plane ├── bfrt.py ├── brokerctl ├── config.json ├── configd.py ├── default.nix ├── mib.py ├── packet_broker.py ├── sample-config-tofino-model.json ├── schema.json └── setup.py ├── include ├── drop.p4 ├── egress.p4 ├── filter.p4 ├── forward.p4 ├── hash.p4 ├── headers.p4 ├── metadata.p4 ├── mirror.p4 ├── parser.p4 ├── protocol_headers.p4 ├── types.p4 └── vlan.p4 └── packet_broker.p4 /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # packet-broker 2 | 3 | A P4 program that provides "packet broker" functionality on a 4 | Tofino-based system. Access to the Barefoot SDE is required for 5 | compiling and running the P4 program contained in this repository. 6 | The SDE is provided by Intel (who acquired Barefoot Networks in 2019). 7 | Currently, this requires going through an application process and 8 | entering an NDA with Intel. 9 | 10 | Contents 11 | 12 | * [Overview](#overview) 13 | * [Port Designations on Tofino Platforms](#tofino-ports) 14 | * [Architecture](#architecture) 15 | * [Building](#building) 16 | * [Running](#running) 17 | * [Header Parsing](#header-parsing) 18 | * [Configuration](#configuration) 19 | * [Interacting with the Control Plane with `brokerctl`](#brokerctl) 20 | 21 | ## Overview 22 | 23 | The main purpose of the broker is to aggregate traffic from a set of 24 | ingress ports to a group of egress ports. The traffic is distributed 25 | to the members of the egress port group based on its flow signature, 26 | mapping all packets that belong to the same flow to the same port. 27 | 28 | The definition of a flow depends on the type of packet. For a IPv4 or 29 | IPv6 packet (Ethertype `0x0800` and `0x86dd`, respectively), the basic 30 | flow signature is composed of the source and destination IPv4/IPv6 31 | addresses as well as the protocol identifier (the _IP protocol_ and 32 | _next-header_ fields for IPv4 and IPv6, respectively). If the 33 | protocol is UDP or TCP, the source and destination ports are part of 34 | the flow as well. 35 | 36 | For non IPv4/IPv6 packets, the flow signature is composed of the 37 | Ethernet source and destination addresses as well as the Ethertype 38 | field. 39 | 40 | The identity of the port on which a particular packet was received is 41 | lost during the aggregation process. The broker uses VLAN tags to 42 | preserve this information as follows. 43 | 44 | * Untagged packets 45 | 46 | A VLAN header is inserted into the packet with a given VLAN 47 | ID. This action is called _push_. 48 | 49 | * Tagged packets 50 | 51 | The VLAN ID is replaced with a given value. This action is called 52 | _rewrite_. 53 | 54 | In the current implementation, this functionality is mandatory. Every 55 | ingress port must specify how VLAN tags are rewritten and/or pushed to 56 | packets arriving on that port. Accordingly, all packets leaving the 57 | broker contain a VLAN tag, i.e. each egress port group is effectively 58 | a VLAN trunk. 59 | 60 | In addition, the broker can optionally rewrite MAC source and 61 | destination addresses for VLANs (addresses of untagged packets cannot 62 | be rewritten) and drop incoming packets based on source IPv4/IPv6 63 | addresses. The latter functionality is referred to as a 64 | _source-filter_. 65 | 66 | Packets which do not match any of the VLAN actions (i.e. push 67 | or rewrite) defined for the ingress port are dropped. Instead of 68 | actually dropping the packets, they can optionally be sent to a 69 | specified port instead. This feature is called _deflect-on-drop_. 70 | 71 | Finally, the broker also provides the ability to create copies of 72 | packets that match a specific flow pattern and send them to an 73 | arbitrary port for inspection. The mirroring takes place before the 74 | packets are modified by any action described above as well as before 75 | any of the source filters are applied. 76 | 77 | 78 | ## Port Designations on Tofino Platforms 79 | 80 | The first-generation Tofino ASIC has 256 10/25G SerDes Lanes, grouped 81 | into 64 ports with 4 lanes each. It has an additional port, referred 82 | to as _CPU Eth_, which also supports 1G operation on each lane as well 83 | as a PCIe interface, referred to as _CPU PCIe_, consisting of 4 Gen3 84 | lanes. The main purpose of the latter two ports is to exchange 85 | packets with the host CPU, hence their names. 86 | 87 | The ASIC has 4 packet-processing pipelines, numbered from 0 to 3, each 88 | of which has 16 ports hardwired to it. Each SerDes line is uniquely 89 | identified by its _device id_ (also referred to as physical id in this 90 | document). This ID is a 9-bit number whose two most-significant bits 91 | denote the number of the pipe to which the lane is connected. The 92 | lower 7 bits denote the number of the SerDes lane within the pipe 93 | starting with 0, i.e. the lanes on pipe 0 have device ids 0 through 94 | 63, those on pipe 1 have ids 128 through 191 etc. 95 | 96 | The four lanes of the CPU Eth port are associated with pipe 0 and have 97 | the device ids 64-67. 98 | 99 | The CPU PCIe port is special in the sense that its device id depends 100 | on whether the ASIC is used in a 4 pipe or a 2 pipe configuration. In 101 | the former, the port's id is 320 while in the latter it is 192. 102 | 103 | How many of the pipelines are used and which of the ports are exposed 104 | on the front plate of the chassis as physical connectors depends on 105 | the device type. Examples for such devices are the WEDGE100BF-64X and 106 | WEDGE100BF-32X from Edgecore Networks. 107 | 108 | The WEDGE100BF-64X uses all four pipes and exposes 64 QSFP ports for 109 | all regular 256 SerDes Lanes. On that model, the CPU Eth port is 110 | exposed as an additional QSFP port on the front panel as well. 111 | 112 | The WEDGE100BF-32X uses two pipes and exposes 32 QSFP ports on the 113 | front panel. 114 | 115 | The QSFP ports are numbered 1-65 and 1-32 on the 64X and the 32X, 116 | respectively (port 65 on the 64X is the CPU Eth port). Instead of 117 | using device ids, the physical lanes within these ports are addressed 118 | by specifying the port and lane number separated by a slash, where the 119 | lane number ranges from 0 to 3. For example, `2/3` refers to the 120 | fourth lane in QSFP port 2. 121 | 122 | This is also the notation used in the configuration file of the packet 123 | broker wherever a QSFP ports needs to be referenced. 124 | 125 | On the 32X, the CPU Eth port is not wired to the front panel as on the 126 | 64X. Instead, two of its lanes are connected to a dual-port 10G Intel 127 | NIC on the main board (note that it may be necessary to enable these 128 | ports in the BIOS before they become available). The port itself can 129 | be addressed like one of the physical Ports with port id 33. Once the 130 | system has booted, there will be two 10G ports available as devices 131 | `enp4s0f0` and `enp4s0f1`. The association with the ports on the ASIC 132 | is as follows 133 | 134 | * 33/0 -> `enp4s0f1` 135 | * 33/2 -> `enp4s0f0` 136 | 137 | They can be used like any other port in the packet broker 138 | configuration. 139 | 140 | In contrast to this, the CPU PCIe port can only be referred to by its 141 | device id (320 or 192, see above). 142 | 143 | ## Architecture 144 | 145 | The P4 program provides the _data plane_ for the packet broker, 146 | i.e. it applies the algorithm specified in the program to packets 147 | entering the device. An additional component called _control plane_ 148 | is needed to populate the tables that drive the match-action-units in 149 | the processing pipelines according to a high-level configuration by 150 | the user. 151 | 152 | ### Data Plane 153 | 154 | The data plane consists of a process called `bf_switchd`, which takes 155 | the artifacts of the P4 program produced by the compiler and loads 156 | them onto the ASIC. Apart from that, it provides two additional 157 | services. One is to listen to connections on a TCP port to 158 | communicate with the control plane using gRPC. The other is to provide 159 | a CLI (called `bfshell`) to interact with the various components of 160 | the ASIC, e.g. to show the status of ports or information about QSFP 161 | plugins. 162 | 163 | The P4 compiler as well as the `bf_switchd` and `bfshell` programs are 164 | part of the Barefoot SDE and are not provided by this repository. 165 | 166 | ### Control Plane 167 | 168 | When the P4 program is launched by `bf_switchd`, it doesn't do 169 | anything yet, because all match-action tables are empty and all ports 170 | are physically shut down. 171 | 172 | The task of the control plane is to take a configuration file and 173 | translate it into instructions to manipulate ports and match-action 174 | tables. These instructions are then sent to `bf_switchd` for execution 175 | through an interface based on gRPC. In addition to that, the control 176 | plane also queries `bf_switchd` for information about the current 177 | state of the device either from a request issued by a user or 178 | periodically to update interface statistics in a SNMP MIB. 179 | 180 | The control plane of the packet broker consists of a daemon called 181 | `configd`, which is running permanently, and a program called 182 | `brokerctl` which is used by the operator to interact with the daemon. 183 | 184 | ## Building 185 | 186 | ### Data Plane 187 | 188 | To compile `packet_broker.p4`, download, build and install the 189 | Barefoot SDE according to the documentation. The program has been 190 | tested with versions 9.1.1, 9.2.0 and 9.3.0 of the SDE. Unfortunately, 191 | it is not possible to publish any details about this process here due 192 | to the NDA. 193 | 194 | In the remainder of this documentation it is assumed that the 195 | environment variables `SDE` and `SDE_INSTALL` are set correctly 196 | according to the SDE documentation. 197 | 198 | After a successful compilation, the build artifacts are stored in 199 | `$SDE/install`. 200 | 201 | ### Control Plane 202 | 203 | The control plane consists of a collection of Python scripts and 204 | modules in the `control-plane` sub-directory of this repository. A 205 | standard `setup.py` file is supplied for installation with 206 | `setuptools`. Dependencies on non-standard modules are declared in 207 | `setup.py` but there is also an implicit dependence on Python modules 208 | supplied by the SDE, which is covered below. This dependency 209 | currently forces the control plane to use Python version 2.7. 210 | 211 | Assuming that `pip` and `virtualenv` are available, the following 212 | procedure should successfully install the control plane in a Python 213 | virtual environment 214 | 215 | ``` 216 | $ cd control-plane 217 | $ virtualenv /usr/local/packet-broker 218 | $ source /usr/local/packet-broker/bin/activate 219 | $ pip install . 220 | ``` 221 | 222 | ## Running 223 | 224 | ### Data Plane 225 | 226 | The `bf_switchd` process requires the kernel module `bf_kpkt` to be 227 | loaded. This module also makes the CPU PCIe port available as a 228 | regular Linux network interface called `/dev/bf_pci0` (unless it is 229 | being renamed by `udev`). The module can be loaded with 230 | 231 | ``` 232 | $ sudo $SDE_INSTALL/bin/bf_kpkt_mod_load 233 | ``` 234 | 235 | The `packet_broker` P4 program is run with 236 | 237 | ``` 238 | $ sudo $SDE_INSTALL/bin/run_switchd.sh -p packet_broker 239 | ``` 240 | 241 | ### Control Plane 242 | 243 | The control plane daemon needs to be able to access the run time Python 244 | modules from the SDE, which can be arranged with 245 | 246 | ``` 247 | $ source /usr/local/packet-broker/bin/activate 248 | $ export PYTHONPATH=$SDE_INSTALL/lib/python2.7/site-packages/tofino 249 | $ /usr/local/packet-broker/bin/configd.py 250 | ``` 251 | 252 | The following options are available 253 | 254 | * `--config-dir ` 255 | 256 | Path of the directory containing configuration and schema files, 257 | default `/etc/packet-broker` 258 | 259 | * `--ifmibs-dir ` 260 | 261 | Path of the directory where shared memory regions for interface 262 | MIBs are created, default `/var/run/packet-broker`. This 263 | directory must exist when `configd.py` is started. 264 | 265 | * `--stats-update-interval ` 266 | 267 | Interval in seconds, at which the interface statistics in the 268 | ifTable MIB are synchronized with the hardware, default 5 269 | 270 | * `--connect-retries ` 271 | 272 | The number of retries the gRPC client attempts to connect to the 273 | server at one-second intervals, default 30 274 | 275 | * `--listen-on
` 276 | 277 | The local addresses to listen on for communication with the 278 | `brokerctl` command, default is to listen on all local addresses 279 | 280 | * `--port ` 281 | 282 | The port to use for communication with the `brokerctl` command, 283 | default 7000 284 | 285 | 286 | ### SNMP Support 287 | 288 | The `configd.py` process generates shared memory segments which are 289 | compatible with [an implementation of a SNMP 290 | subagent](https://github.com/alexandergall/snabb-snmp-subagent), which 291 | uses the AgentX protocol to interface with a SNMP daemon to provide 292 | the `ifTable` and `ifXTable` MIBs for the interfaces managed by the 293 | packet broker. Details TBD. 294 | 295 | ## Header Parsing 296 | 297 | The packet broker classifies packets received on ingress according to 298 | the [P4 299 | parser](https://github.com/alexandergall/packet-broker/blob/master/include/parser.p4). To 300 | summarize: 301 | 302 | * IPv4 and IPv6 in untagged or single-tagged packets with Ethertype 303 | 0x8100. Packets with two or more VLAN tags are treated as non-IP 304 | packets. 305 | * Arbitrary IPv4 options are detected and skipped. 306 | * Only fragmentation headers are recognized and parsed for IPv6 and 307 | only if they are the first extension header after the base 308 | header. The presence of any other extension headers results in 309 | TCP/UDP ports to not be available for hash calculations when 310 | forwarding packets to a group of egress ports. 311 | * Non-initial fragments are recognized for IPv4 and IPv6. 312 | 313 | ## Configuration 314 | 315 | The packet broker is configured from a file called `config.json` 316 | located in the directory specified with the `--config-dir` option of 317 | `configd.py`. By default, this is `/etc/packet-broker/config.json`. 318 | The file must contain a valid JSON expression which validates against 319 | the schema provided in `control-plane/schema.json`. The schema file 320 | itself must be present in the configuration directory, 321 | e.g. `/etc/packet-broker/schema.json`. 322 | 323 | The overall structure is the following 324 | 325 | ``` 326 | { 327 | "ports": { 328 | "ingress": { 329 | }, 330 | "egress": { 331 | }, 332 | "other": { 333 | } 334 | }, 335 | "source-filter": [ 336 | ], 337 | "flow-mirror": [ 338 | ], 339 | "features": { 340 | } 341 | } 342 | ``` 343 | 344 | Each of these blocks is described in detail in the following sections. 345 | 346 | ### Ports 347 | 348 | The `ports` section defines which ports should be used by the packet 349 | broker. The ports are split into three functional groups `ingress`, 350 | `egress` and `other` as described below. The ports in all of the 351 | groups share the following basic configuration 352 | 353 | ``` 354 | "/": { 355 | "config": { 356 | "description": , 357 | "speed": , 358 | "fec": , 359 | "mtu": , 360 | "shutdown": true | false 361 | } 362 | } 363 | ``` 364 | 365 | The interface is identified by its `` and `` in the 366 | slash-notation introduced above. The `` corresponds to the 367 | labeling of the QSFP ports on the front panel of the device and 368 | `` ranges from 0 to 3, e.g. `1/0` refers to lane 0 on QSFP port 369 | 1. 370 | 371 | * `description`, **optional**, default is an empty string 372 | 373 | An arbitrary string that identifies the purpose of the 374 | interface. This string will also appear as the `ifAlias` object 375 | of the row representing the interface in the `ifXTable` if the 376 | SNMP functionality is enabled. 377 | 378 | * `speed`, **mandatory** 379 | 380 | The bit rate at which to run the SerDes lane, must be one of 381 | 382 | * `BF_SPEED_1G` 383 | * `BF_SPEED_10G` 384 | * `BF_SPEED_25G` 385 | * `BF_SPEED_40G` 386 | * `BF_SPEED_40G_NB` 387 | * `BF_SPEED_40G_NON_BREAKABLE` 388 | * `BF_SPEED_50G` 389 | * `BF_SPEED_100G` 390 | 391 | Note that certain restrictions exist as to which lanes these 392 | setting can be applied to. The most important restrictions are 393 | 394 | * `BF_SPEED_40G` and `BF_SPEED_100G` can only be applied to lane 0 395 | * `BF_SPEED_50G` can only be applied to lanes 0 and 2 396 | 397 | * `fec`, **optional**, default is `BF_FEC_TYP_NONE` 398 | 399 | The FEC algorithm to use, one of 400 | 401 | * `BF_FEC_TYP_NONE` to disable FEC 402 | * `BF_FEC_TYP_FC` to select the Fire code FEC 403 | * `BF_FEC_TYP_RS` to select the Reed Solomon FEC 404 | 405 | * `mtu`, **mandatory** 406 | 407 | The MTU, including all packet headers. Must be in the range 1200 408 | to 10240. 409 | 410 | * `shutdown`, **optional**, default is `false` 411 | 412 | A boolean (`true` or `false`) that determines the operational 413 | state of the interface. 414 | 415 | #### Ingress 416 | 417 | The `ingress` section is mandatory. It contains a list of interfaces 418 | on which the packet broker expects packets to arrive for processing. 419 | Apart from the basic port configuration, each port requires additional 420 | options that define the behavior with respect to VLAN tagging and MAC 421 | rewriting as follows 422 | 423 | ``` 424 | "/": { 425 | "config": { 426 | }, 427 | "egress-group": , 428 | "vlans": { 429 | "push": , 430 | "rewrite": [ 431 | ] 432 | } 433 | } 434 | ``` 435 | 436 | The `egress-group` field is mandatory and must reference a group of 437 | ports defined in the `egress` section. All packets arriving on this 438 | port that pass the criteria set by the rules in the `vlans` section as 439 | described below will be forwarded to one of the members of this port 440 | group according to their flow signature. 441 | 442 | The `push` and `rewrite` sections are both optional, but specifying 443 | neither of them results in all packets being dropped. 444 | 445 | If `push` is specified, a 802.1Q header (Ethertype `0x8100`) is added to 446 | all untagged packets with the VLAN ID set to `` and all other 447 | fields (`PCP`, `DEI`) set to zero. It has no effect on packets that 448 | already have a 802.1Q header. 449 | 450 | The `rewrite` section, if specified, only applies to packets with a 451 | 802.1Q header. It has no effect on untagged packets. This section 452 | must contain a list of objects of the form 453 | 454 | ``` 455 | { 456 | "in": , 457 | "out": 458 | "mac-rewrite": { 459 | "src": { 460 | "": "", 461 | ... 462 | }, 463 | "dst": { 464 | "": "", 465 | ... 466 | } 467 | } 468 | } 469 | ``` 470 | 471 | The `in` and `out` fields are mandatory and have the following effect. 472 | A packet whose VLAN ID matches `` is accepted and its VLAN 473 | ID is replaced with ``. 474 | 475 | The `mac-rewrite` section is optional. If present, it rewrites source 476 | and/or destination MAC addresses as specified by the `src` and `dst` 477 | lists, respectively, for packets whose VLAN ID matches ``. 478 | Addresses that do not appear as `` in any of the `src` or 479 | `dst` sections, remain unchanged. 480 | 481 | Consider the following example 482 | 483 | ``` 484 | "vlans": { 485 | "rewrite": [ 486 | { "in": 600, 487 | "out": 207 488 | }, 489 | { "in": 333, 490 | "out": 211, 491 | "mac-rewrite": { 492 | "src": { 493 | "ac:4b:c8:40:e2:b9": "02:00:00:00:00:01" 494 | } 495 | } 496 | } 497 | ] 498 | } 499 | ``` 500 | 501 | This will replace VLAN ID 600 by 207 without rewriting any addresses 502 | in VLAN 600. It will also replace VLAN ID 333 by VLAN ID 211 and 503 | replace all occurences of `ac:4b:c8:40:e2:b9` as the MAC source 504 | address of packets with VLAN ID 333 with `02:00:00:00:00:01`. 505 | 506 | All tagged packets whose VLAN ID doesn't match any of the `in` fields 507 | are dropped. To accept all packets for a VLAN without changing the 508 | VLAN ID, an explicit `rewrite` clause must be present with 509 | `` set to ``, e.g. 510 | 511 | ``` 512 | "vlans": } 513 | "rewrite": [ 514 | { 515 | "in": 600, 516 | "out": 600 517 | } 518 | ] 519 | } 520 | ``` 521 | 522 | #### Egress 523 | 524 | The `egress` section is mandatory. It defines groups of interfaces to 525 | which packets arriving on ingress interfaces can be sent to. 526 | 527 | ``` 528 | "egress": { 529 | "group-id": , 530 | "members": { 531 | } 532 | } 533 | ``` 534 | 535 | The `group-id` field is mandatory and must specify an integer by which 536 | the group can be uniquely identified by the `egress-group` field of 537 | ports defined in the `ingress` port section. 538 | 539 | The `members` field is mandatory and must contain at least one 540 | standard port definition. 541 | 542 | #### Other 543 | 544 | The `other` ports section is optional and contains only standard port 545 | definition clauses. These ports can be used as egress ports for the 546 | `flow-mirror` and `deflect-on-drop` features. 547 | 548 | ### Source Filter 549 | 550 | The `source-filter` section is optional. It contains a list of strings 551 | which must represent valid IPv4 or IPv6 prefixes, for example 552 | 553 | ``` 554 | "source-filter": [ 555 | "192.168.1.0/24", 556 | "2001:db8::/64" 557 | ] 558 | ``` 559 | 560 | The list is applied to all IPv4 and IPv6 packets (tagged or untagged) 561 | received on any of the ingress ports. All packets whose source IPv4 562 | or IPv6 address match any of the prefixes specified in this list are 563 | dropped. 564 | 565 | ### Flow Mirror 566 | 567 | The `flow-mirror` section is optional. It contains a list of flow 568 | patterns for the purpose of packet mirroring. A copy of every packet 569 | arriving on any of the ingress interfaces or a subset thereof which 570 | matches any of the flow patterns in this list is sent to the port 571 | specified in the `flow-mirror` section of the `features` section. 572 | 573 | A flow pattern is defined as follows 574 | 575 | ``` 576 | { 577 | "ingress-ports": [ , ... ], 578 | "non-ip": true|false, 579 | "src": , 580 | "dst": , 581 | "src_port": { "port": , "mask": }, 582 | "dst_port": { "port": , "mask": }, 583 | "bidir": true|false, 584 | "enable": true|false 585 | } 586 | ``` 587 | 588 | The `ingress-ports` list is optional. If omitted, the mirroring rules 589 | are applied to all ingress ports. Otherwise, the rules are only 590 | applied to the ports in the list. 591 | 592 | If the optional property `non-ip` is present and set to `true`, all 593 | packets that are neither IPv4 (Ethertype `0x0800`) or IPv6 (Ethertype 594 | `0x86dd`) are mirrored and all match fields are ignored. 595 | 596 | If `non-ip` is omitted or set to `false`, the fields `src`, `dst`, 597 | `src_port`, and `dst_port` must be present and determine which packets 598 | are selected for mirroring. The fields `bidir` and `enable` are 599 | optional. Ternary matches are used when comparing the patterns with 600 | the corresponding fields in the packets arriving on the ingress ports. 601 | This means that each pattern consists of a value and a mask, where the 602 | mask is as wide as the value in terms of the number of bits. Only 603 | those bits whose corresponding bit in the mask is equal to 1 are 604 | relevant. All bits in the value whose corresponding bit in the mask is 605 | 0 are ignored. A mask value of 0 effectively ignores the entire field. 606 | 607 | The `src` and `dst` field must use standard prefix notation, e.g. 608 | `"192.168.10.0/24"` or `"2001:db8:1::/64"`. The mask is derived from 609 | the prefix length. The prefixes in both fields must belong to the 610 | same address family (IPv4 or IPv6). 611 | 612 | The `src_port` and `dst_port` fields match UDP or TCP port numbers, 613 | which must be in the range from 0 to 65535. The mask must be 614 | specified explicitly as a decimal number in the same range. 615 | 616 | If the `bidir` field is set to `true`, an additional flow pattern is 617 | automatically generated with all source and destination fields 618 | reversed. The default is `false`. 619 | 620 | If the `enable` field is set to `false`, the flow pattern is not 621 | programmed into the hardware and is thus effectively ignored. The 622 | default is `true`. 623 | 624 | ### Features 625 | 626 | This section is used to configure features that are not directly 627 | associated with specific ports. It is optional with default values 628 | given below. The basic structure is as follows 629 | 630 | ``` 631 | "features": { 632 | "deflect-on-drop": , 633 | "flow-mirror": { 634 | "port": , 635 | "max-packet-length": 636 | }, 637 | "drop-non-initial-fragments": true | false, 638 | "exclude-ports-from-hash": true | false, 639 | "drop-non-ip": true | false 640 | } 641 | ``` 642 | 643 | If the `deflect-on-drop` feature is configured, all packets that are 644 | marked to be dropped are forwarded to the specified port instead. The 645 | port can be specified either as a string of the form `"/"` 646 | just as in the `ports` section or as a number representing the 647 | [physical port id](#tofino-ports). The latter is really only needed to 648 | select the CPU PCIe ports, which are the only ports that do not have a 649 | representation as a `/` pair (they are identified by 192 650 | and 320 for the 32X and 64X platforms, respectively). 651 | 652 | A packet is marked to be dropped if any of the following conditions 653 | are met 654 | 655 | * The packet is untagged but the ingress port doesn't have a `push` 656 | directive 657 | * The packet is tagged but the ingress port either doesn't have a 658 | `rewrite` section or the VLAN ID does not match any of the `"in"` 659 | fields 660 | * The packet is an IPv4 or IPv6 packet and belongs to any of the 661 | prefixes specified in the `source-filter` section 662 | * The packet is a non-initial fragment of a fragmented IPv4 or IPv6 663 | packet and the `drop-non-initial-fragments` feature is enabled 664 | * The packet is neither a IPv4 or IPv6 packet (Ethertypes 0x0800 or 665 | 0x86dd either tagged or untagged) and the `drop-non-ip` feature 666 | is enabled 667 | * The P4 parser is unable to parse the packet headers (e.g. if the 668 | header is truncated) 669 | 670 | The `deflect-on-drop` feature is disabled by default. 671 | 672 | The `flow-mirror` section sets parameters common to all flow mirror 673 | rules. The `port` field specifies the destination port for mirrored 674 | packets and it accepts both logical and physical port ids like the 675 | `defelect-on-drop` field. If the `port` field is omitted, flow 676 | mirroring is effectively disabled, which is the default. Note that 677 | the Tofino architecture mandates that the egress port for mirrored 678 | packets must be a single port, i.e. hash-based distribution to a group 679 | of ports is not supported. 680 | 681 | The `max-packet-length` field is used to limit the size of mirrored 682 | packets to the given number of bytes. It must be in the range from 0 683 | to 16384. The default is 16384, i.e. mirrored packets are not 684 | truncated. 685 | 686 | If the `drop-non-initial-fragments` feature is enabled, non-initial 687 | fragments of fragmented IPv4 or IPv6 packets are dropped. A packet is 688 | considered to be a non-initial fragment if the following condition 689 | holds 690 | 691 | * IPv4: The fragment offset field of the IPv4 header is non-zero 692 | * IPv6: The IPv6 packet contains a fragmentation header as first 693 | extension header and the fragment offset field in the header is 694 | non-zero 695 | 696 | The default is to not drop non-initial fragments. 697 | 698 | If the `exclude-ports-from-hash` feature is enabled, the TCP/UDP ports 699 | are ignored when calculating the flow-based hash for IPv4 and IPv6 700 | packets. This is useful if `drop-non-initial-fragments` is disabled 701 | and it is desired that non-initial fragments are mapped to the same 702 | egress port as the initial fragments. The default is to include the 703 | ports in the hash calculation. 704 | 705 | If the `drop-non-ip` feature is enabled, all untagged or single-tagged 706 | packets whose Ethertype field is not equal to either 0x0800 (IPv4) or 707 | 0x86dd (IPv6) are dropped. The default is to not drop non-IP packets. 708 | 709 | ## Interacting with the Control Plane with `brokerctl` 710 | 711 | The `configd.py` process loads its initial configuration from the file 712 | `conifg.json` in the directory specified by the `--config-dir` command 713 | line option, `/etc/packet-broker` by default. It does not check the 714 | configuration file for changes automatically after that. Any 715 | interaction with the daemon after startup must be performed by the 716 | `brokerctl` command. Its basic usage is as follows 717 | 718 | ``` 719 | usage: brokerctl [-h] [--port PORT] [--host HOST] 720 | {reload,add,remove,dump,show} ... 721 | 722 | Packet Broker controller 723 | 724 | optional arguments: 725 | -h, --help show this help message and exit 726 | --port PORT 727 | --host HOST 728 | 729 | Available commands: 730 | {reload,add,remove,dump,show} 731 | reload Reload configuration 732 | add Add dynamic table entries 733 | remove Remove dynamic table entries 734 | dump Dump tables from hardware 735 | show Show running configuration 736 | ``` 737 | 738 | `brokerctl` connects to `configd.py` via TCP on port 7000 by 739 | default. It uses a simple JSON encoding to submit a command to the 740 | daemon and receive a completion message or error code from the daemon. 741 | By default, `brokerctl` connects to a daemon listening on `127.0.0.1` 742 | (IPv6 is currently not supported). A different address can be supplied 743 | with the `--host` option to communicate with a daemon running on a 744 | remote host. There are currently no security mechanisms in place to 745 | secure the connection, thus it is recommended to use a firewall or ACL 746 | for protection. 747 | 748 | A command can be sent to multiple daemons simultaneously by specifying 749 | multiple `--host` options. Hosts can either be specified as literal 750 | IPv4 addresses or domain names which can be resolved to an IPv4 751 | address. 752 | 753 | After submitting a command, `brokerctl` waits until it receives a 754 | reply from the daemon and displays it as a pretty-printed JSON object. 755 | The format is as follows 756 | 757 | ``` 758 | { 759 | "": { 760 | "success": true | false, 761 | "msgs": [], 762 | "result": {} 763 | }, 764 | "": { 765 | "success": true | false, 766 | "msgs": [], 767 | "result": {} 768 | }, 769 | ... 770 | } 771 | 772 | ``` 773 | 774 | The output contains one section for each address for which a `--host` 775 | option was specified when `brokerctl` was invoked. Each such section 776 | contains an indicator whether the command completed successfully, an 777 | optional list of messages and the result of the command itself. 778 | 779 | The messages (if any) are essentially a copy of the log messages 780 | generated by `configd.py` while executing the command. In most cases, 781 | no messages are generated when a command completes successfully. 782 | 783 | A message is of the form 784 | 785 | ``` 786 | { 787 | "msg": , 788 | "level": 789 | } 790 | ``` 791 | 792 | Here, `` is an arbitrary string set by the daemon and 793 | `` is the numerical value of the logging level as used by 794 | the Python `logging` module when the message was created by the 795 | daemon. In future versions of `brokerctl`, this could be used to feed 796 | the daemon's response directly into another instance of the Python 797 | `logging` module maintained by `brokerctl` itself. 798 | 799 | For example: 800 | 801 | ``` 802 | $ brokerctl add source-filter "foo" 803 | INFO:brokerctl: Trying 127.0.0.1 804 | INFO:brokerctl: Connected 805 | { 806 | "127.0.0.1": { 807 | "success": false, 808 | "msgs": [ 809 | { 810 | "msg": "Command 'add' failed: u'foo' does not appear to be an IPv4 or IPv6 network", 811 | "level": 40 812 | } 813 | ], 814 | "result": null 815 | } 816 | } 817 | ``` 818 | 819 | Each of the available commands is documented below. 820 | 821 | ### `reload` 822 | 823 | This command doesn't take any arguments. It notifies `configd.py` to 824 | re-load the configuration file and update the hardware tables 825 | accordingly. Successful completion is indicated by the following 826 | response: 827 | 828 | ``` 829 | { 830 | "success": true, 831 | "msgs": [], 832 | "result": null 833 | } 834 | ``` 835 | 836 | Any syntactic or semantic error in the configuration will result in a 837 | failure (`success` set to `false`) and a message which provides 838 | details about the error (unfortunately, the Python JSON modules tend 839 | to produce hard to understand messages in case of syntax errors and 840 | failures to validate the configuration against the schema). 841 | 842 | ### `show` 843 | 844 | The `show` command is used to display various components of the 845 | currently active configuration. Its usage is 846 | 847 | ``` 848 | usage: brokerctl show [-h] 849 | {ingress,features,flow-mirror,source-filter,groups,ports} 850 | ... 851 | 852 | optional arguments: 853 | -h, --help show this help message and exit 854 | 855 | {ingress,features,flow-mirror,source-filter,groups,ports} 856 | Show running configuration 857 | ingress Ingress processing 858 | features Features 859 | flow-mirror Flow mirror rules 860 | source-filter Source filters 861 | groups Port configurations 862 | ports Port configurations 863 | ``` 864 | 865 | The output is generated from the daemon's in-memory copy of the 866 | configuration file. For the `flow-mirror` argument, only the entries 867 | with `enable` set to `true` are displayed. The output for 868 | `source-filter` includes the list of filters that have been added with 869 | the `add` command as well. 870 | 871 | ### `add` 872 | 873 | The `add` command is used to modify certain tables in a dynamic manner 874 | (i.e. without modifying the configuration file). Its usage is given by 875 | 876 | ``` 877 | 878 | optional arguments: 879 | -h, --help show this help message and exit 880 | 881 | Available items to add: 882 | {source-filter} Add dynamic table entries 883 | source-filter Source filters 884 | ``` 885 | 886 | Currently, only source filters can be configured dynamically. The 887 | `source-filter` sub-command takes an IPv4 or IPv6 prefix as its only 888 | argument, e.g. 889 | 890 | ``` 891 | $ brokerctl add 192.168.0.0/24 892 | INFO:brokerctl: Trying localhost 893 | INFO:brokerctl: Connected 894 | { 895 | "localhost": { 896 | "success": true, 897 | "msgs": [ 898 | { 899 | "msg": "Added source filter 192.168.0.0/24", 900 | "level": 20 901 | } 902 | ], 903 | "result": null 904 | } 905 | } 906 | ``` 907 | 908 | Source filters added this way are persistent across restarts of 909 | `configd.py` by writing them to the file `source_filter_dynamic` 910 | located in the configuration directory, e.g. 911 | 912 | ``` 913 | $ cat /etc/packet-broker/source_filter_dynamic 914 | ## Automatically generated file. DO NOT EDIT. 915 | 192.168.0.0/24 916 | $ 917 | ``` 918 | 919 | Filters added in this manner are displayed by `brokerctl show 920 | source-filter` in a separate table called `source-filter-dynamic`, 921 | e.g. 922 | 923 | ``` 924 | $ brokerctl show source-filter 925 | INFO:brokerctl: Trying localhost 926 | INFO:brokerctl: Connected 927 | { 928 | "localhost": { 929 | "success": true, 930 | "msgs": [], 931 | "result": { 932 | "source-filter": [], 933 | "source-filter-dynamic": [] 934 | } 935 | } 936 | } 937 | ``` 938 | 939 | ### `remove` 940 | 941 | The `remove` command removes items that have previously been added 942 | with the `add` command. Its usage is 943 | 944 | ``` 945 | usage: brokerctl remove [-h] {source-filter} ... 946 | 947 | optional arguments: 948 | -h, --help show this help message and exit 949 | 950 | {source-filter} Remove dynamic table entries 951 | source-filter Source filters 952 | ``` 953 | 954 | For example 955 | 956 | ``` 957 | $ brokerctl remove source-filter 192.168.0.0/24 958 | INFO:brokerctl: Trying localhost 959 | INFO:brokerctl: Connected 960 | { 961 | "localhost": { 962 | "success": true, 963 | "msgs": [ 964 | { 965 | "msg": "Removed source filter 192.168.0.0/24", 966 | "level": 20 967 | } 968 | ], 969 | "result": null 970 | } 971 | } 972 | ``` 973 | 974 | ### `dump` 975 | 976 | The `dump` command reads the key/data pairs of a given match-action 977 | table from the hardware and displays them. The names of key and data 978 | fields in the output corresponds to the corresponding names used in 979 | the definition of the table in the P4 source code. The usage is 980 | 981 | ``` 982 | usage: brokerctl dump [-h] 983 | {ingress,select-output,flow-mirror,source-filter,mac-rewrite,forward} 984 | ... 985 | 986 | optional arguments: 987 | -h, --help show this help message and exit 988 | 989 | {ingress,select-output,flow-mirror,source-filter,mac-rewrite,forward} 990 | Dump tables from hardware 991 | ingress Ingress VLAN push/rewrite rules 992 | select-output Ingress port to output group mapping 993 | flow-mirror Flow mirror rules 994 | source-filter Source filters 995 | mac-rewrite Ingress source MAC rewrite rules 996 | forward Output group to port mapping 997 | ``` 998 | 999 | An understanding of the source code is necessary to interpret the 1000 | result. 1001 | 1002 | Depending on the type of table, the output can contain additional 1003 | fields which are maintained directly by the hardware. For example, 1004 | the `source-filter` feature also collects the number of bytes and 1005 | packets dropped by a specific rule. For example 1006 | 1007 | ``` 1008 | $ brokerctl dump source-filter 1009 | { 1010 | "localhost": { 1011 | "success": true, 1012 | "msgs": [], 1013 | "result": [ 1014 | { 1015 | "prefix": "78.128.113.42/32", 1016 | "counters": { 1017 | "packets": 178863020, 1018 | "bytes": 11447427238 1019 | } 1020 | } 1021 | } 1022 | } 1023 | } 1024 | ``` 1025 | -------------------------------------------------------------------------------- /control-plane/bfrt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import inspect 5 | 6 | import bfrt_grpc.client as gc 7 | 8 | class Table: 9 | def __init__(self, bfrt, name, loc): 10 | self.bfrt = bfrt 11 | self.name = name 12 | self.loc = loc 13 | self.table = bfrt.info.table_get(loc) 14 | ## Not used in the code. This dict contains the TableInfo 15 | ## object for the table. It can be used to inspect the 16 | ## properties, e.g. to find the names of all valid actions: 17 | ## self.table_info.action_name_list_get() 18 | self.table_info = bfrt.info.parsed_info.table_info_dict[loc] 19 | 20 | def _mk_key(self, keys): 21 | if keys is not None: 22 | return [ self.table.make_key( 23 | list(map(lambda key: gc.KeyTuple(**key), keys))) ] 24 | else: 25 | return None 26 | 27 | def _mk_data_tuple(self, data): 28 | return list(map(lambda elt: gc.DataTuple(**elt), data)) 29 | 30 | def _mk_action(self, name, data): 31 | if name is not None: 32 | return self.table.make_data( 33 | self._mk_data_tuple(data), 34 | name) 35 | else: 36 | return self.table.make_data( 37 | self._mk_data_tuple(data)) 38 | 39 | ### For debugging 40 | def dump(self): 41 | from pprint import pprint 42 | print("DUMP of table " + self.name) 43 | for data, key in self.entry_get_iterator(None): 44 | pprint(key.to_dict()) 45 | pprint(data.to_dict()) 46 | print("DUMP END") 47 | 48 | def clear(self): 49 | self.table.entry_del(self.bfrt.target, None) 50 | 51 | ### Look up a single key. Return the data dictionary of the 52 | ### result or None if no entries match. 53 | def entry_get(self, keys, data_fields = [], from_hw = False): 54 | resp = self.entry_get_iterator(keys, data_fields, from_hw) 55 | try: 56 | data = next(resp)[0].to_dict() 57 | except: 58 | return None 59 | return data 60 | 61 | ### Like entry_get(), but just return the iterable object of 62 | ### results. 63 | def entry_get_iterator(self, keys, data_fields = [], from_hw = False): 64 | return self.table.entry_get(self.bfrt.target, 65 | self._mk_key(keys), 66 | { "from_hw": from_hw }, 67 | self.table.make_data( 68 | self._mk_data_tuple(data_fields))) 69 | 70 | def entry_add(self, keys, action_name, action_data = []): 71 | self.table.entry_add(self.bfrt.target, 72 | self._mk_key(keys), 73 | [ self._mk_action(action_name, action_data) ]) 74 | 75 | def entry_del(self, keys): 76 | self.table.entry_del(self.bfrt.target, self._mk_key(keys)) 77 | 78 | def entry_mod(self, keys, action_name, action_data = []): 79 | self.table.entry_mod(self.bfrt.target, 80 | self._mk_key(keys), 81 | [ self._mk_action(action_name, action_data) ]) 82 | 83 | def default_entry_set(self, action_name, action_data = []): 84 | self.table.default_entry_set(self.bfrt.target, 85 | self._mk_action(action_name, action_data)) 86 | 87 | def default_entry_reset(self): 88 | self.table.default_entry_reset(self.bfrt.target) 89 | 90 | class Bfrt: 91 | def __init__(self, program, retries, addr = 'localhost:50052', 92 | client_id = 0, device_id = 0): 93 | ## Due to a bug in client.py, the num_tries parameter is currently 94 | ## fixed at 5. 95 | re_retries = int((retries-1)/5) + 1 96 | for i in range (0, re_retries): 97 | ## The "is_master" argument has been removed in SED 9.4.0 98 | args = dict(client_id = client_id, num_tries=retries, 99 | device_id = device_id) 100 | if "is_master" in inspect.getfullargspec(gc.ClientInterface.__init__).args: 101 | args["is_master"] = True 102 | try: 103 | self.intf = gc.ClientInterface(addr, **args) 104 | except: 105 | if i < re_retries - 1: 106 | continue 107 | else: 108 | raise Exception("connection attempts exceeded") 109 | else: 110 | break 111 | self.intf.bind_pipeline_config(program) 112 | self.target = gc.Target(device_id = device_id, pipe_id = 0xffff) 113 | self.info = self.intf.bfrt_info_get() 114 | 115 | def table(self, name, loc): 116 | return Table(self, name, loc) 117 | -------------------------------------------------------------------------------- /control-plane/brokerctl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import sys 4 | import logging 5 | import socket 6 | import json 7 | 8 | def reload(args): 9 | return None 10 | 11 | def show(args): 12 | item = args.command_1 13 | if item == 'source-filter': 14 | return [ item, 'source-filter-dynamic' ] 15 | return [ item ] 16 | 17 | def dump(args): 18 | item = args.command_1 19 | return { 20 | 'source-filter': [ 'filter_ipv4', 'filter_ipv6' ], 21 | 'flow-mirror': [ 'mirror_ipv4', 'mirror_ipv6' ], 22 | 'ingress': [ 'ingress_tagged', 'ingress_untagged' ], 23 | 'mac-rewrite': [ 'ingress_src_mac_rewrite', 'ingress_dst_mac_rewrite' ], 24 | 'select-output': [ 'select_output' ], 25 | 'forward': [ 'forward'], 26 | 'port-groups': [ 'port_groups' ], 27 | 'port-groups-sel': [ 'port_groups_sel' ] 28 | }[item] 29 | 30 | def add_remove(args): 31 | item = args.command_1 32 | if item == 'source-filter': 33 | return { item: args.prefix } 34 | 35 | def create_subparser(prsr, spec, level = 0): 36 | for name, args in spec.get('arguments', {}).items(): 37 | prsr.add_argument(name, **args) 38 | if 'commands' in spec.keys(): 39 | sprsr = prsr.add_subparsers(help = spec.get('help', None), 40 | dest = 'command_' + str(level), 41 | description = spec.get('description', None), 42 | title = spec.get('title', None)) 43 | for command, spec in spec['commands'].items(): 44 | prsr = sprsr.add_parser(command, help = spec['help']) 45 | if spec.get('func', None) is not None: 46 | prsr.set_defaults(func=spec['func']) 47 | create_subparser(prsr, spec, level+1) 48 | 49 | def request(host, port, command, args = None): 50 | req = { 51 | 'command': command, 52 | 'args': args 53 | } 54 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 55 | logger.info("Trying {}".format(host)) 56 | s.connect((host, port)) 57 | logger.info("Connected") 58 | s.send((json.dumps(req) + "\n").encode()) 59 | f = s.makefile() 60 | line = f.readline() 61 | resp = json.loads(line) 62 | s.close() 63 | return(resp) 64 | 65 | parse_tree = { 66 | 'title': 'Available commands', 67 | 'arguments': { 68 | '--host': { 69 | 'action': 'append' 70 | }, 71 | '--port': { 72 | 'type': int, 73 | 'default': 7000, 74 | 'action': 'store' 75 | } 76 | }, 77 | 'commands': { 78 | 'reload': { 79 | 'help': "Reload configuration", 80 | 'func': reload 81 | }, 82 | 'show': { 83 | 'help': "Show running configuration", 84 | 'func': show, 85 | 'commands': { 86 | 'ports': { 87 | 'help': "Port configurations", 88 | }, 89 | 'groups': { 90 | 'help': "Port configurations", 91 | }, 92 | 'ingress': { 93 | 'help': "Ingress processing", 94 | }, 95 | 'source-filter': { 96 | 'help': "Source filters", 97 | }, 98 | 'flow-mirror': { 99 | 'help': "Flow mirror rules", 100 | }, 101 | 'features': { 102 | 'help': "Features", 103 | } 104 | } 105 | }, 106 | 'dump': { 107 | 'help': "Dump tables from hardware", 108 | 'func': dump, 109 | 'commands': { 110 | 'source-filter': { 111 | 'help': "Source filters" 112 | }, 113 | 'flow-mirror': { 114 | 'help': "Flow mirror rules" 115 | }, 116 | 'ingress': { 117 | 'help': "Ingress VLAN push/rewrite rules" 118 | }, 119 | 'mac-rewrite': { 120 | 'help': "Ingress source MAC rewrite rules" 121 | }, 122 | 'select-output': { 123 | 'help': "Ingress port to output group mapping" 124 | }, 125 | 'forward': { 126 | 'help': "Output group to port mapping" 127 | } 128 | } 129 | }, 130 | 'add': { 131 | 'help': "Add dynamic table entries", 132 | 'title': "Available items to add", 133 | 'func': add_remove, 134 | 'commands': { 135 | 'source-filter': { 136 | 'help': "Source filters", 137 | 'arguments': { 138 | 'prefix': { 139 | 'nargs': '*', 140 | 'help': "An IPv4 or IPv6 prefix" 141 | } 142 | } 143 | } 144 | } 145 | }, 146 | 'remove': { 147 | 'help': "Remove dynamic table entries", 148 | 'func': add_remove, 149 | 'commands': { 150 | 'source-filter': { 151 | 'help': "Source filters", 152 | 'arguments': { 153 | 'prefix': { 154 | 'nargs': '*', 155 | 'help': "An IPv4 or IPv6 prefix" 156 | } 157 | } 158 | } 159 | } 160 | } 161 | } 162 | } 163 | 164 | logging.basicConfig(level = logging.INFO, 165 | format='%(levelname)s:%(name)s: %(message)s') 166 | logger = logging.getLogger('brokerctl') 167 | 168 | prsr = argparse.ArgumentParser(description = "Packet Broker controller") 169 | create_subparser(prsr, parse_tree) 170 | args = prsr.parse_args() 171 | if not args.host: 172 | args.host = [ 'localhost' ] 173 | 174 | result = {} 175 | rc = 0 176 | for host in args.host: 177 | result[host] = request(host, args.port, args.command_0, args.func(args)) 178 | if not result[host]['success']: 179 | rc = 1 180 | print(json.dumps(result, indent=2)) 181 | sys.exit(rc) 182 | -------------------------------------------------------------------------------- /control-plane/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "ports": { 3 | "ingress": { 4 | }, 5 | "egress": [ 6 | ], 7 | "other": { 8 | } 9 | }, 10 | "source-filter": [ 11 | ], 12 | "flow-mirror": [ 13 | ], 14 | "features": { 15 | "deflect-on-drop": "192", 16 | "flow-mirror": { 17 | "port": "33/0", 18 | "max-packet-length": 0 19 | }, 20 | "drop-non-initial-fragments": false, 21 | "exclude-ports-from-hash": false 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /control-plane/configd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | import argparse 3 | import sys 4 | import os 5 | import signal 6 | import time 7 | import logging 8 | import socket, select 9 | import json 10 | import bfrt, packet_broker 11 | 12 | class JSONEncoder(json.JSONEncoder): 13 | def default(self, obj): 14 | if hasattr(obj, 'compressed'): 15 | return obj.compressed 16 | else: 17 | pprint.pprint(obj) 18 | raise TypeError 19 | 20 | parser = argparse.ArgumentParser(description='Packet-broker configuration daemon') 21 | parser.add_argument('--config-dir', help= 22 | """Path of the directory containing configuration 23 | and schema files""", 24 | required=False, default="/etc/packet-broker") 25 | parser.add_argument('--ifmibs-dir', help= 26 | """Path of the directory where shared memory 27 | regions for interface MIBs are created""", 28 | required=False, default="/var/run/packet-broker") 29 | parser.add_argument('--stats-update-interval', help= 30 | """Interval in seconds, at which the interface 31 | statistics in the ifTable MIB are synchronized 32 | with the hardware counters""", 33 | type=int, required=False, default=5) 34 | parser.add_argument('--connect-retries', help= 35 | """The number of retries the gRPC client attempts 36 | to connect to the server at one-second intervals""", 37 | type=int, required=False, default=30) 38 | parser.add_argument('--listen-on', help= 39 | """The addresses to listen on for communication with 40 | the brokerctl command""", 41 | type=str, default='') 42 | parser.add_argument('--port', help= 43 | """The port to use for communication with the 44 | brokerctl command""", 45 | type=int, default=7000) 46 | args = parser.parse_args() 47 | 48 | ## Make outputs unbuffered for logging purposes 49 | if sys.version_info < (3, 0): 50 | sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) 51 | sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0) 52 | else: 53 | sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', buffering=1) 54 | sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', buffering=1) 55 | 56 | logging.basicConfig(level = logging.INFO, 57 | format='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s: %(message)s', 58 | datefmt='%Y-%m-%d %H:%M:%S') 59 | logger = logging.getLogger('configd') 60 | 61 | ## XXX: make dual-stack 62 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 63 | s.bind((args.listen_on, args.port)) 64 | s.listen(5) 65 | logger.info("Listening on {}/{} for connections". 66 | format(args.listen_on if args.listen_on else 'any', 67 | str(args.port))) 68 | 69 | bfrt = bfrt.Bfrt("packet_broker", retries = args.connect_retries) 70 | broker = packet_broker.PacketBroker(bfrt, args.config_dir, args.ifmibs_dir) 71 | 72 | def tear_down_and_exit(rc): 73 | ## The method has been renamed in SDE 9.4.0 74 | if hasattr(bfrt.intf, '_tear_down_stream'): 75 | bfrt.intf._tear_down_stream() 76 | else: 77 | bfrt.intf.tear_down_stream() 78 | sys.exit(rc) 79 | 80 | if not broker.handle_request(('self', 0), { 'command': 'reload'})['success']: 81 | tear_down_and_exit(1) 82 | 83 | signals = dict((getattr(signal, n), n) \ 84 | for n in dir(signal) if n.startswith('SIG') and '_' not in n ) 85 | 86 | def exit_handler(signal, frame): 87 | logger.info("Received {}, exiting".format(signals[signal])) 88 | s.close() 89 | tear_down_and_exit(0) 90 | 91 | signal.signal(signal.SIGTERM, exit_handler) 92 | signal.signal(signal.SIGINT, exit_handler) 93 | 94 | stats_stamp = time.time() 95 | while True: 96 | r, w, e = select.select([s], [], [], args.stats_update_interval) 97 | if s in r: 98 | c, peer = s.accept() 99 | f = c.makefile() 100 | line = f.readline() 101 | if not line: 102 | print("EOF") 103 | c.close() 104 | break 105 | resp = broker.handle_request(peer, json.loads(line)) 106 | c.send((json.dumps(resp, cls = JSONEncoder) + "\n").encode()) 107 | c.close() 108 | 109 | now = time.time() 110 | 111 | if not r or now - stats_stamp >= args.stats_update_interval: 112 | stats_stamp = now 113 | broker.update_stats() 114 | -------------------------------------------------------------------------------- /control-plane/default.nix: -------------------------------------------------------------------------------- 1 | { sde_version, kernel_version }: 2 | 3 | let 4 | pkgs = import ; 5 | bf-sde = pkgs.bf-sde.${sde_version}.${kernel_version}; 6 | in with pkgs; python2Packages.buildPythonApplication rec { 7 | pname = "packet-broker-configd"; 8 | version = "0.1"; 9 | 10 | src = ./.; 11 | 12 | propagatedBuildInputs = [ 13 | bf-sde 14 | (python2.withPackages (ps: with ps; [ jsonschema ipaddress ])) 15 | ]; 16 | buildInputs = [ makeWrapper ]; 17 | 18 | postInstall = '' 19 | wrapProgram "$out/bin/configd.py" --set PYTHONPATH "${bf-sde}/install/lib/python2.7/site-packages/tofino" 20 | ''; 21 | } 22 | -------------------------------------------------------------------------------- /control-plane/mib.py: -------------------------------------------------------------------------------- 1 | import os, io, mmap, struct, time 2 | 3 | ## Apparently, this is needed to make super() work with python 2.7 4 | __metaclass__ = type 5 | 6 | class SMIv2: 7 | def __init__(self, mib, offset): 8 | self.mib = mib 9 | self.offset = offset 10 | 11 | class int_t(SMIv2): 12 | def __init__(self, mib, offset): 13 | super(int_t, self).__init__(mib, offset) 14 | 15 | def set(self, value): 16 | value = value % self.modulo 17 | struct.pack_into(self.fmt, self.mib.map, self.offset, value) 18 | 19 | def get(self): 20 | res = struct.unpack_from(self.fmt, self.mib.map, self.offset) 21 | return res[0] 22 | 23 | class int32_t(int_t): 24 | def __init__(self, mib, offset): 25 | self.size = 4 26 | self.fmt = "@i" 27 | self.modulo = 2**31 28 | super(int32_t, self).__init__(mib, offset) 29 | 30 | class uint32_t(int_t): 31 | def __init__(self, mib, offset): 32 | self.size = 4 33 | self.fmt = "@I" 34 | self.modulo = 2**32 35 | super(uint32_t, self).__init__(mib, offset) 36 | 37 | class uint64_t(int_t): 38 | def __init__(self, mib, offset): 39 | self.size = 8 40 | self.fmt = "@Q" 41 | self.modulo = 2**64 42 | super(uint64_t, self).__init__(mib, offset) 43 | 44 | class octetstr_t(SMIv2): 45 | def __init__(self, mib, offset, length): 46 | ## Account for length bytes 47 | self.size = 2 + length 48 | super(octetstr_t, self).__init__(mib, offset) 49 | 50 | def set(self, value): 51 | assert(isinstance(value, bytes)) 52 | length = min(len(value), self.size-2) 53 | struct.pack_into("@H", self.mib.map, self.offset, length) 54 | struct.pack_into("{0:d}s".format(length), 55 | self.mib.map, self.offset+2, value) 56 | 57 | def get(self): 58 | len = struct.unpack_from("@H", self.mib.map, self.offset)[0] 59 | res = struct.unpack_from("{0:d}s".format(len), 60 | self.mib.map, self.offset+2) 61 | return res[0] 62 | 63 | types = { 64 | 'Integer32': int32_t, 65 | 'Unsigned32': uint32_t, 66 | 'OctetStr' : octetstr_t, 67 | 'Counter32': uint32_t, 68 | 'Counter64': uint64_t, 69 | 'Gauge32': uint32_t, 70 | 'TimeTicks': uint32_t, 71 | ## Not yet implemented 72 | # 'Bits': octetstr_t 73 | } 74 | 75 | class MIB: 76 | def __init__(self, file): 77 | index_file = file + ".index" 78 | self.file = file 79 | self.data_f = io.open(file, "w+b", buffering=0) 80 | self.index_f = io.open(index_file, "w") 81 | self.index_f.write(u'MIB:1\n') 82 | self.index_f.flush() 83 | 84 | self.map = None 85 | ## Current offset in the data segment where the next object 86 | ## will be allocated by register() 87 | self.offset = 0 88 | self.objects = {} 89 | 90 | def register(self, name, type, value = None, octet_str_len = None): 91 | assert(type in types.keys()) 92 | assert(name not in self.objects.keys()) 93 | 94 | if type == 'OctetStr': 95 | assert(octet_str_len is not None) 96 | obj = types[type](self, self.offset, octet_str_len) 97 | else: 98 | obj = types[type](self, self.offset) 99 | 100 | self.objects[name] = obj 101 | self.offset = self.offset + obj.size 102 | self.index_f.write(u"{0:s}: {1:d}\n".format(name, obj.size)) 103 | self.index_f.flush() 104 | 105 | ## Grow the shared memory region to the new size 106 | fileno = self.data_f.fileno() 107 | os.ftruncate(fileno, self.offset) 108 | if self.map is not None: 109 | self.map.close() 110 | self.map = mmap.mmap(fileno, 0, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE) 111 | 112 | if value is not None: 113 | obj.set(value) 114 | 115 | def set(self, name, value): 116 | assert(name in self.objects.keys()) 117 | self.objects[name].set(value) 118 | 119 | def get(self, name): 120 | assert(name in self.objects.keys()) 121 | return self.objects[name].get() 122 | 123 | class ifmib(MIB): 124 | def __init__(self, file): 125 | super(ifmib, self).__init__(file) 126 | self.register('ifDescr', 'OctetStr', octet_str_len = 255) 127 | self.register('ifName', 'OctetStr', octet_str_len = 255) 128 | self.register('ifAlias', 'OctetStr', octet_str_len = 64 ) # interface description 129 | self.register('ifType', 'Integer32', 6 ) # ethernetCsmacd 130 | self.register('ifMtu', 'Integer32') 131 | self.register('ifSpeed', 'Gauge32') 132 | self.register('ifHighSpeed', 'Gauge32') 133 | self.register('ifPhysAddress', 'OctetStr', octet_str_len = 0) 134 | self.register('ifAdminStatus', 'Integer32', 2) # down 135 | self.register('ifOperStatus', 'Integer32', 2) # down 136 | self.register('ifLastChange', 'TimeTicks', 0) 137 | self.register('_X_ifLastChange_TicksBase', 'Counter64', 138 | int(time.time())) 139 | self.register('ifInOctets', 'Counter32', 0) 140 | self.register('ifInUcastPkts', 'Counter32', 0) 141 | self.register('ifInDiscards', 'Counter32', 0) 142 | self.register('ifInErrors', 'Counter32', 0) 143 | self.register('ifInUnknownProtos', 'Counter32', 0) 144 | self.register('ifOutOctets', 'Counter32', 0) 145 | self.register('ifOutUcastPkts', 'Counter32', 0) 146 | self.register('ifOutDiscards', 'Counter32', 0) 147 | self.register('ifOutErrors', 'Counter32', 0) 148 | 149 | self.register('ifInMulticastPkts', 'Counter32', 0) 150 | self.register('ifInBroadcastPkts', 'Counter32', 0) 151 | self.register('ifOutMulticastPkts', 'Counter32', 0) 152 | self.register('ifOutBroadcastPkts', 'Counter32', 0) 153 | self.register('ifHCInOctets', 'Counter64', 0) 154 | self.register('ifHCInUcastPkts', 'Counter64', 0) 155 | self.register('ifHCInMulticastPkts', 'Counter64', 0) 156 | self.register('ifHCInBroadcastPkts', 'Counter64', 0) 157 | self.register('ifHCOutOctets', 'Counter64', 0) 158 | self.register('ifHCOutUcastPkts', 'Counter64', 0) 159 | self.register('ifHCOutMulticastPkts', 'Counter64', 0) 160 | self.register('ifHCOutBroadcastPkts', 'Counter64', 0) 161 | self.register('ifLinkUpDownTrapEnable', 'Integer32', 2) # disabled 162 | self.register('ifPromiscuousMode', 'Integer32', 2) # false 163 | ## This information is not available from the $PORT table 164 | self.register('ifConnectorPresent', 'Integer32', 1) # true 165 | self.register('ifCounterDiscontinuityTime', 'TimeTicks', 0) 166 | ## Discontinuities per RFC 2233 are not possible for 167 | ## this platform (interfaces indices are fixed) 168 | self.register('_X_ifCounterDiscontinuityTime', 'Counter64', 0) 169 | 170 | def set_properties(self, properties): 171 | p = properties 172 | self.set('ifDescr', p['ifDescr']) 173 | self.set('ifName', p['ifName']) 174 | self.set('ifAlias', p['ifAlias']) 175 | self.set('ifMtu', p['ifMtu']) 176 | if p['speed'] > 1000000000: 177 | self.set('ifSpeed', 4294967295) # RFC3635 sec. 3.2.8 178 | else: 179 | self.set('ifSpeed', p['speed']) 180 | self.set('ifHighSpeed', int(p['speed'] / 1000000)) 181 | 182 | def delete(self): 183 | self.data_f.close() 184 | self.index_f.close() 185 | os.unlink(self.file) 186 | os.unlink(self.file + ".index") 187 | 188 | def update(self, port, stat): 189 | if port['$PORT_ENABLE']: 190 | self.set('ifAdminStatus', 1) # up 191 | else: 192 | self.set('ifAdminStatus', 2) # down 193 | old_oper_status = self.get('ifOperStatus') 194 | if port['$PORT_UP']: 195 | self.set('ifOperStatus', 1) # up 196 | else: 197 | self.set('ifOperStatus', 2) # down 198 | if old_oper_status != self.get('ifOperStatus'): 199 | self.set('ifLastChange', 0) 200 | self.set('_X_ifLastChange_TicksBase', int(time.time())) 201 | 202 | self.set('ifInOctets', stat['$OctetsReceivedinGoodFrames']) 203 | self.set('ifInUcastPkts', stat['$FramesReceivedOK']) 204 | self.set('ifInDiscards', stat['$FramesDroppedBufferFull']) 205 | self.set('ifInErrors', stat['$FrameswithanyError']) 206 | self.set('ifOutOctets', stat['$OctetsTransmittedwithouterror']) 207 | self.set('ifOutUcastPkts', stat['$FramesTransmittedOK']) 208 | self.set('ifOutErrors', stat['$FramesTransmittedwithError']) 209 | self.set('ifInMulticastPkts', stat['$FramesReceivedwithMulticastAddresses']) 210 | self.set('ifInBroadcastPkts', stat['$FramesReceivedwithBroadcastAddresses']) 211 | self.set('ifOutMulticastPkts', stat['$FramesTransmittedMulticast']) 212 | self.set('ifOutBroadcastPkts', stat['$FramesTransmittedBroadcast']) 213 | 214 | self.set('ifHCInOctets', stat['$OctetsReceivedinGoodFrames']) 215 | self.set('ifHCInUcastPkts', stat['$FramesReceivedOK']) 216 | self.set('ifHCInMulticastPkts', stat['$FramesReceivedwithMulticastAddresses']) 217 | self.set('ifHCInBroadcastPkts', stat['$FramesReceivedwithBroadcastAddresses']) 218 | self.set('ifHCOutOctets', stat['$OctetsTransmittedwithouterror']) 219 | self.set('ifHCOutUcastPkts', stat['$FramesTransmittedOK']) 220 | self.set('ifHCOutMulticastPkts', stat['$FramesTransmittedMulticast']) 221 | self.set('ifHCOutBroadcastPkts', stat['$FramesTransmittedBroadcast']) 222 | 223 | ### Not available from stats 224 | #self.set('ifInUnknownProtos', 0) 225 | #self.set('ifOutDiscards', 0) 226 | 227 | return old_oper_status, self.get('ifOperStatus') 228 | -------------------------------------------------------------------------------- /control-plane/packet_broker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import re 4 | import json as JSON 5 | import jsonschema 6 | import ipaddress 7 | import mib 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | MIRROR_SESSION_ID = 1 12 | 13 | ctls = { 14 | 'vlan' : 'pipe.ig_ctl.ctl_push_or_rewrite_vlan', 15 | 'forward' : 'pipe.ig_ctl.ctl_forward_packet', 16 | 'filter_ipv4' : 'pipe.ig_ctl.ctl_filter_source_ipv4', 17 | 'filter_ipv6' : 'pipe.ig_ctl.ctl_filter_source_ipv6', 18 | 'mirror_ipv4' : 'pipe.ig_ctl.ctl_mirror_flows_ipv4', 19 | 'mirror_ipv6' : 'pipe.ig_ctl.ctl_mirror_flows_ipv6', 20 | 'mirror_non_ip' : 'pipe.ig_ctl.ctl_mirror_flows_non_ip', 21 | 'maybe_exclude_l4_from_hash' : 'pipe.ig_ctl.ctl_maybe_exclude_l4_from_hash', 22 | 'maybe_drop_fragment' : 'pipe.ig_ctl.ctl_maybe_drop_fragment', 23 | 'maybe_drop_non_ip' : 'pipe.ig_ctl.ctl_maybe_drop_non_ip', 24 | 'drop' : 'pipe.ig_ctl.ctl_drop_packet' 25 | } 26 | tables = { 27 | ### Internal tables 28 | ## Keys: $DEV_PORT' 29 | 'port': '$PORT', 30 | ## Keys: $DEV_PORT' 31 | 'port_stat': '$PORT_STAT', 32 | ## Keys: $PORT_NAME 33 | 'port_str_info': '$PORT_STR_INFO', 34 | ## Keys: $CONN_ID, $CHNL_ID 35 | 'port_hdl_info': '$PORT_HDL_INFO', 36 | ## Keys: $sid 37 | 'mirror_cfg': '$mirror.cfg', 38 | 39 | ### Program tables 40 | ## Keys: ingress_port 41 | 'ingress_untagged': ctls['vlan'] + '.tbl_ingress_untagged', 42 | ## Keys: ingress_port, ingress_vid 43 | 'ingress_tagged': ctls['vlan'] + '.tbl_ingress_tagged', 44 | ## Keys: ingress_port, ingress_vid, src_mac_addr 45 | 'ingress_src_mac_rewrite': ctls['vlan'] + '.tbl_ingress_src_mac_rewrite', 46 | ## Keys: ingress_port, ingress_vid, src_mac_addr 47 | 'ingress_dst_mac_rewrite': ctls['vlan'] + '.tbl_ingress_dst_mac_rewrite', 48 | ## Keys: src_addr 49 | 'filter_ipv4': ctls['filter_ipv4'] + '.tbl_filter_source_ipv4', 50 | ## Keys: src_addr 51 | 'filter_ipv6': ctls['filter_ipv6'] + '.tbl_filter_source_ipv6', 52 | ## Keys: src_addr, dst_addr, src_port, dst_port 53 | 'mirror_ipv4': ctls['mirror_ipv4'] + '.tbl_mirror_flows_ipv4', 54 | ## Keys: src_addr, dst_addr, src_port, dst_port 55 | 'mirror_ipv6': ctls['mirror_ipv6'] + '.tbl_mirror_flows_ipv6', 56 | ## Keys: ingress_port 57 | 'mirror_non_ip': ctls['mirror_non_ip'] + '.tbl_mirror_flows_non_ip', 58 | ## Keys: ingress_port 59 | 'select_output': ctls['forward'] + '.tbl_select_output', 60 | ## Keys: egress_group 61 | 'forward': ctls['forward'] + '.tbl_forward', 62 | ## Keys: $ACTION_MEMBER_ID 63 | 'port_groups': ctls['forward'] + '.port_groups', 64 | ## Keys: $SELECTOR_GROUP_ID 65 | 'port_groups_sel': ctls['forward'] + '.port_groups_sel', 66 | ## Keys: None 67 | 'maybe_exclude_l4': ctls['maybe_exclude_l4_from_hash'] + '.tbl_maybe_exclude_l4', 68 | ## Keys: None 69 | 'drop': ctls['drop'] + '.tbl_drop', 70 | ## Keys: None 71 | 'maybe_drop_fragment': ctls['maybe_drop_fragment'] + '.tbl_maybe_drop_fragment', 72 | ## Keys: None 73 | 'maybe_drop_non_ip': ctls['maybe_drop_non_ip'] + '.tbl_maybe_drop_non_ip' 74 | } 75 | 76 | ## Mappings of values of the $SPEED field in the 77 | ## $PORT table to bps. Used in the ifMIB to set 78 | ## the ifSpeed/ifHighSpeed elements. 79 | if_speed = { 80 | 'BF_SPEED_NONE': 0, 81 | 'BF_SPEED_1G': 1000000000, 82 | 'BF_SPEED_10G': 10000000000, 83 | 'BF_SPEED_25G': 25000000000, 84 | 'BF_SPEED_40G': 40000000000, 85 | 'BF_SPEED_40G_NB': 40000000000, 86 | 'BF_SPEED_40G_NON_BREAKABLE': 40000000000, 87 | 'BF_SPEED_50G': 50000000000, 88 | 'BF_SPEED_100G': 100000000000, 89 | 'BF_SPEED_200G': 200000000000, 90 | 'BF_SPEED_400G': 400000000000, 91 | ## On Tofino2, some speeds can be realized in distinct 92 | ## configurations that use different numbers of lanes, e.g. 100G 93 | ## as 4x25 or 2x50. The following names are not legal values for 94 | ## $SPEED but appear with similar names in the bfshell port 95 | ## manager. 96 | 'BF_SPEED_50G_R1': 50000000000, ## BF_SPEED_50G defaults to 2 lanes 97 | 'BF_SPEED_100G_R2': 100000000000, ## BF_SPEED_100G defaults to 4 lanes 98 | 'BF_SPEED_200G_R8': 200000000000, ## BF_SPEED_200G defaults to 4 lanes 99 | } 100 | 101 | class semantic_error(Exception): 102 | pass 103 | 104 | def json_load(name): 105 | with open(name) as file: 106 | parsed = JSON.load(file) 107 | file.close() 108 | return parsed 109 | 110 | class Config: 111 | def __init__(self): 112 | self.ports = {} 113 | self.groups = {} 114 | self.groups_ref = {} 115 | self.ingress = {} 116 | self.source_filter = [] 117 | self.source_filter_d = [] 118 | self.flow_mirror = [] 119 | self.features = { 120 | 'drop-non-initial-fragments': False, 121 | 'exclude-ports-from-hash': False, 122 | 'drop-non-ip': False 123 | } 124 | 125 | class PacketBroker: 126 | ## Pseudo class to let us refer to tables via 127 | ## attributes 128 | class t: 129 | pass 130 | 131 | def __init__(self, bfrt, config_dir, ifmibs_dir): 132 | self.bfrt = bfrt 133 | self.config_dir = config_dir 134 | self.ifmibs_dir = ifmibs_dir 135 | 136 | for name, loc in tables.items(): 137 | setattr(self.t, name, bfrt.table(name, loc)) 138 | 139 | ## Remove all shared memory segments to get rid of left-overs 140 | ## from previous runs 141 | for root, dirs, files in os.walk(self.ifmibs_dir): 142 | for file in files: 143 | os.unlink(self.ifmibs_dir+'/'+file) 144 | self.ifmibs = {} 145 | 146 | ## Whenever a new configuration is pushed to the device, all 147 | ## tables are cleared and re-programmed, except for the 148 | ## interfaces to avoid links going down during 149 | ## reconfiguration. This is done by using the port-specific 150 | ## part of the current configuration in the configure() method 151 | ## to perform a smooth transition to the new port 152 | ## configuration. When the config daemon starts, we create an 153 | ## initial pseudo-configuration here that only contains the 154 | ## state of the ports as read from the device for 155 | ## bootstrapping. 156 | logger.info("Detecting active ports") 157 | config = Config() 158 | for _port, _key in self.t.port.entry_get_iterator(None): 159 | port = _port.to_dict() 160 | key = _key.to_dict() 161 | name = port['$PORT_NAME'] 162 | logger.info("Port {0}({1}), Enable {2}, Up {3}".format( 163 | name, 164 | key['$DEV_PORT']['value'], 165 | port['$PORT_ENABLE'], 166 | port['$PORT_UP'] 167 | )) 168 | config.ports[name] = { 169 | 'description': '', 170 | 'speed': port['$SPEED'], 171 | 'mtu': port['$RX_MTU'], 172 | 'fec': port['$FEC'], 173 | 'shutdown': not port['$PORT_ENABLE'] 174 | } 175 | self.config = config 176 | 177 | def _get_dev_port(self, port): 178 | info = self.t.port_str_info.entry_get( 179 | [{ 'name': '$PORT_NAME', 'value': port }]) 180 | if info is None: 181 | raise semantic_error("invalid port {0:s}".format(port)) 182 | return info['$DEV_PORT'] 183 | 184 | def _msgs_clear(self): 185 | self.msgs = [] 186 | 187 | def _msg_add(self, msg, level = logging.INFO): 188 | self.msgs.append({ 189 | 'level': level, 190 | 'msg': msg 191 | }) 192 | 193 | def _info(self, msg): 194 | self._msg_add(msg) 195 | 196 | def _warning(self, msg): 197 | self._msg_add(msg, level = logging.WARNING) 198 | 199 | def _error(self, msg): 200 | self._msg_add(msg, level = logging.ERROR) 201 | 202 | def _dump_dynamic_source_filters(self): 203 | file = self.config_dir + "/source_filter_dynamic" 204 | try: 205 | f = open(file, "w") 206 | except Exception as e: 207 | raise Exception("Error opening {} for writing: {}". 208 | format(file, e)) 209 | f.write("## Automatically generated file. DO NOT EDIT.\n") 210 | for prefix in self.config.source_filter_d: 211 | f.write(prefix.compressed + "\n") 212 | try: 213 | f.close() 214 | except Exception as e: 215 | raise Exception("Error saving dynamic filters to {}: {}". 216 | format(file, e)) 217 | 218 | def _read_dynamic_source_filters(self, config): 219 | file = self.config_dir + "/source_filter_dynamic" 220 | if not os.path.exists(file): 221 | return 222 | try: 223 | f = open(file, "r") 224 | except Exception as e: 225 | raise Exception("Error opening {} for reading: {}". 226 | format(file, e)) 227 | for line in f: 228 | if re.match("^#", line): 229 | continue 230 | prefix = ipaddress.ip_network(line.rstrip().decode()) 231 | if prefix in config.source_filter: 232 | ## Can only happen if the prefix has been added 233 | ## manually to the file 234 | self._warning("Igonring dynamic filter colliding with persistent " + 235 | "filter: {} ".format(prefix.compressed)) 236 | elif prefix in config.source_filter_d: 237 | self._warning("Igonring duplicate dynamic filter: {}". 238 | format(prefix.compressed)) 239 | else: 240 | config.source_filter_d.append(prefix) 241 | f.close() 242 | 243 | def _read(self): 244 | json_file = self.config_dir + '/config.json' 245 | try: 246 | json = json_load(json_file) 247 | except Exception as e: 248 | raise Exception("JSON parse error on {0:s}: {1:s}". 249 | format(json_file, e)) 250 | try: 251 | schema = json_load(self.config_dir + '/schema.json') 252 | except Exception as e: 253 | raise Exception("BUG: JSON parse error on schema: {}".format(e)) 254 | try: 255 | jsonschema.validate(json, schema) 256 | except Exception as e: 257 | raise Exception("JSON validation error: {}".format(e)) 258 | return json 259 | 260 | def _parse(self, json): 261 | config = Config() 262 | 263 | def add_port(port, port_config): 264 | if port in config.ports.keys(): 265 | raise semantic_error("port {0:s} already defined".format(port)) 266 | full_config = { 267 | 'description': '', 268 | 'fec': 'BF_FEC_TYP_NONE', 269 | 'shutdown': False 270 | } 271 | full_config.update(port_config) 272 | config.ports[port] = full_config 273 | 274 | for group in json['ports']['egress']: 275 | id = group['group-id'] 276 | if id in config.groups.keys(): 277 | raise semantic_error("group id {0:d} already defined".format(id)) 278 | 279 | config.groups[id] = {} 280 | for port, dict in sorted(group['members'].items()): 281 | add_port(port, dict['config']) 282 | config.groups[id][port] = {} 283 | 284 | for port, dict in sorted(json['ports']['ingress'].items()): 285 | add_port(port, dict['config']) 286 | egress_group = dict['egress-group'] 287 | config.ingress[port] = { 288 | 'vlans' : dict['vlans'], 289 | 'egress_group' : egress_group 290 | } 291 | if not egress_group in config.groups.keys(): 292 | raise semantic_error("Undefined egress group {0:d}".format(egress_group)) 293 | 294 | for port, dict in sorted(json['ports']['other'].items()): 295 | add_port(port, dict['config']) 296 | 297 | if 'source-filter' in json.keys(): 298 | for str in json['source-filter']: 299 | prefix = ipaddress.ip_network(str) 300 | if prefix in config.source_filter: 301 | self._warning("Ignoring duplicate source filter: {0:s}" 302 | .format(prefix)) 303 | else: 304 | config.source_filter.append(prefix) 305 | 306 | try: 307 | self._read_dynamic_source_filters(config) 308 | except Exception as e: 309 | self._warning("Ignoring dynamic source filters: {}".format(e)) 310 | config.source_filter_d = [] 311 | 312 | if 'flow-mirror' in json.keys(): 313 | def add_flow(flow_in): 314 | flow = flow_in.copy() 315 | flow.pop('bidir', None) 316 | flow.pop('enable', None) 317 | flow['src'] = ipaddress.ip_network(flow['src']) 318 | flow['dst'] = ipaddress.ip_network(flow['dst']) 319 | 320 | if flow['src'].version != flow['dst'].version: 321 | raise semantic_error("Address family mismatch " + 322 | "in flow mirror rule: {}". 323 | format(JSON.dumps(flow_in))) 324 | 325 | flow['ingress-ports'] = [ self._get_dev_port(port) for port in sorted(flow.pop('ingress-ports', [])) ] 326 | if flow in config.flow_mirror: 327 | self._warning("Ignoring duplicate flow mirror rule: {}". 328 | format(JSON.dumps(flow_in))) 329 | else: 330 | config.flow_mirror.append(flow) 331 | 332 | for flow in json['flow-mirror']: 333 | if not flow.get('enable', True): 334 | continue 335 | add_flow(flow) 336 | if flow.get('bidir', False) and not flow.get('non-ip', False): 337 | add_flow({ 'ingress-ports': flow.get('ingress-ports', []), 338 | 'src': flow['dst'], 339 | 'dst': flow['src'], 340 | 'src_port': flow['dst_port'], 341 | 'dst_port': flow['src_port'] }) 342 | 343 | features = json.get('features', {}) 344 | for feature, value in features.items(): 345 | if feature == 'deflect-on-drop': 346 | if not re.match("^[0-9]+$", value): 347 | value = self._get_dev_port(value) 348 | config.features['deflect-on-drop'] = int(value) 349 | 350 | if feature == 'flow-mirror': 351 | cfg = value 352 | 353 | port = cfg['port'] 354 | if not re.match("^[0-9]+$", port): 355 | port = self._get_dev_port(port) 356 | 357 | if 'max-packet-length' in cfg.keys(): 358 | max_pkt_len = cfg['max-packet-length'] 359 | else: 360 | max_pkt_len = 16384 361 | config.features['flow-mirror'] = { 362 | 'port': int(port), 363 | 'max_pkt_len': max_pkt_len 364 | } 365 | 366 | if feature == 'drop-non-initial-fragments' and value: 367 | config.features['drop-non-initial-fragments'] = True 368 | 369 | if feature == 'exclude-ports-from-hash' and value: 370 | config.features['exclude-ports-from-hash'] = True 371 | 372 | if feature == 'drop-non-ip' and value: 373 | config.features['drop-non-ip'] = True 374 | 375 | if len(config.flow_mirror) > 0 and 'flow-mirror' not in features.keys(): 376 | raise semantic_error("Flow mirror feature configuration required " + 377 | "if enabled flow mirror rules are present") 378 | 379 | return config 380 | 381 | def _set_action_selector(self, method, group, members): 382 | id = [ member['id'] for member in members.values() ] 383 | status = [ member['status'] for member in members.values() ] 384 | method( 385 | [ { 'name': '$SELECTOR_GROUP_ID', 'value': group } ], 386 | None, 387 | [ { 'name': '$MAX_GROUP_SIZE', 'val': 8 }, 388 | ## References action_member_ids from profile 389 | { 'name': '$ACTION_MEMBER_ID', 'int_arr_val': id }, 390 | { 'name': '$ACTION_MEMBER_STATUS', 391 | 'bool_arr_val': status } ]) 392 | 393 | def _push(self, config): 394 | get_dev_port = self._get_dev_port 395 | 396 | ### Action profile and selector 397 | ## Order matters here 398 | self.t.forward.clear() 399 | self.t.port_groups_sel.clear() 400 | self.t.port_groups.clear() 401 | 402 | member_id = 1 403 | for group, members in config.groups.items(): 404 | ## Group is not referenced from 405 | ## the forwarding table 406 | config.groups_ref[group] = False 407 | for port, member in members.items(): 408 | dev_port = get_dev_port(port) 409 | member['id'] = member_id 410 | member['status'] = False 411 | self.t.port_groups.entry_add( 412 | [ { 'name': '$ACTION_MEMBER_ID', 'value': member_id } ], 413 | 'act_send', 414 | [ { 'name': 'egress_port', 'val': dev_port } ]) 415 | member_id += 1 416 | self._set_action_selector(self.t.port_groups_sel.entry_add, group, members) 417 | 418 | self.t.select_output.clear() 419 | self.t.ingress_untagged.clear() 420 | self.t.ingress_src_mac_rewrite.clear() 421 | self.t.ingress_dst_mac_rewrite.clear() 422 | self.t.ingress_tagged.clear() 423 | for port, dict in sorted(config.ingress.items()): 424 | dev_port = get_dev_port(port) 425 | vlans = dict['vlans'] 426 | egress_group = dict['egress_group'] 427 | self.t.select_output.entry_add( 428 | [ { 'name': 'ingress_port', 'value': dev_port } ], 429 | 'act_output_group', 430 | [ { 'name': 'group', 'val': egress_group } ]) 431 | 432 | if 'push' in vlans: 433 | self.t.ingress_untagged.entry_add( 434 | [ { 'name': 'ingress_port', 'value': dev_port } ], 435 | 'act_push_vlan', 436 | [ { 'name': 'vid', 'val': vlans['push'] } ]) 437 | 438 | if 'rewrite' in vlans: 439 | for rule in vlans['rewrite']: 440 | self.t.ingress_tagged.entry_add( 441 | [ { 'name': 'ingress_port', 'value': dev_port }, 442 | { 'name': 'ingress_vid', 'value': rule['in'] } ], 443 | 'act_rewrite_vlan', 444 | [ { 'name': 'vid', 'val': rule['out'] } ]) 445 | rewrite = rule.get("mac-rewrite", {}) 446 | for dir, spec in rewrite.items(): 447 | ## Note: the rewrite table is applied before 448 | ## the ingress_tagged table, hence the value 449 | ## of the VLAN tag must be the one of the 450 | ## original packet. 451 | if dir == "src": 452 | tbl = self.t.ingress_src_mac_rewrite 453 | else: 454 | tbl = self.t.ingress_dst_mac_rewrite 455 | field = dir + "_mac_addr" 456 | action = "act_rewrite_" + dir + "_mac" 457 | for addr, new_addr in spec.items(): 458 | tbl.table.info.key_field_annotation_add(field, "mac") 459 | tbl.table.info.data_field_annotation_add("mac_addr", 460 | action, 461 | "mac") 462 | tbl.entry_add( 463 | [ { 'name': 'ingress_port', 'value': dev_port }, 464 | { 'name': 'ingress_vid', 'value': rule['in'] }, 465 | { 'name': field, 'value': addr } ], 466 | action, 467 | [ { 'name': 'mac_addr', 'val': new_addr } ]) 468 | 469 | self.t.filter_ipv4.clear() 470 | self.t.filter_ipv6.clear() 471 | for prefix in (config.source_filter + config.source_filter_d): 472 | if prefix.version == 4: 473 | tbl = self.t.filter_ipv4 474 | ## Makes entry_add() accept "src_addr" as a string rather than 475 | ## a byte array 476 | tbl.table.info.key_field_annotation_add("src_addr", "ipv4") 477 | else: 478 | tbl = self.t.filter_ipv6 479 | tbl.table.info.key_field_annotation_add("src_addr", "ipv6") 480 | tbl.entry_add( 481 | [ { 'name': 'src_addr', 'value': prefix.network_address.exploded, 482 | 'prefix_len': prefix.prefixlen } ], 483 | 'act_drop', []) 484 | 485 | self.t.mirror_ipv4.clear() 486 | self.t.mirror_ipv6.clear() 487 | self.t.mirror_non_ip.clear() 488 | if 'flow-mirror' in config.features.keys(): 489 | for flow in config.flow_mirror: 490 | ports = flow['ingress-ports'] 491 | port_mask = 0x1ff; 492 | if len(ports) == 0: 493 | ports = [ 0 ] 494 | port_mask = 0 495 | if flow['src'].version == 4: 496 | tbl = self.t.mirror_ipv4 497 | tbl.table.info.key_field_annotation_add("src_addr", "ipv4") 498 | tbl.table.info.key_field_annotation_add("dst_addr", "ipv4") 499 | else: 500 | tbl = self.t.mirror_ipv6 501 | tbl.table.info.key_field_annotation_add("src_addr", "ipv6") 502 | tbl.table.info.key_field_annotation_add("dst_addr", "ipv6") 503 | for port in ports: 504 | if flow.get('non-ip', False): 505 | self.t.mirror_non_ip.entry_add( 506 | [ { 'name': 'ingress_port', 507 | 'value': port, 508 | 'mask': port_mask } ], 509 | 'act_mirror', 510 | [ { 'name': 'mirror_session', 'val': MIRROR_SESSION_ID } ]) 511 | else: 512 | tbl.entry_add( 513 | [ { 'name': 'ingress_port', 514 | 'value': port, 515 | 'mask': port_mask }, 516 | { 'name': 'src_addr', 517 | 'value': flow['src'].network_address.exploded, 518 | 'mask': int(flow['src'].netmask) }, 519 | { 'name': 'dst_addr', 520 | 'value': flow['dst'].network_address.exploded, 521 | 'mask': int(flow['dst'].netmask) }, 522 | { 'name': 'src_port', 523 | 'value': flow['src_port']['port'], 524 | 'mask': flow['src_port']['mask'] }, 525 | { 'name': 'dst_port', 526 | 'value': flow['dst_port']['port'], 527 | 'mask': flow['dst_port']['mask'] } ], 528 | 'act_mirror', 529 | [ { 'name': 'mirror_session', 'val': MIRROR_SESSION_ID } ]) 530 | 531 | flow_mirror = config.features['flow-mirror'] 532 | method = self.t.mirror_cfg.entry_add 533 | if self.t.mirror_cfg.entry_get([ { 'name': '$sid', 'value': MIRROR_SESSION_ID } ]): 534 | method = self.t.mirror_cfg.entry_mod 535 | method( 536 | [ { 'name': '$sid', 'value': MIRROR_SESSION_ID } ], 537 | '$normal', 538 | [ { 'name': '$session_enable', 'bool_val': True }, 539 | { 'name': '$direction', 'str_val': 'INGRESS' }, 540 | { 'name': '$ucast_egress_port', 'val': flow_mirror['port'] }, 541 | { 'name': '$ucast_egress_port_valid', 'bool_val': True }, 542 | { 'name': '$max_pkt_len', 'val': flow_mirror['max_pkt_len'] } ]) 543 | 544 | self.t.drop.default_entry_reset() 545 | self.t.maybe_drop_fragment.default_entry_reset() 546 | self.t.maybe_exclude_l4.default_entry_reset() 547 | self.t.maybe_drop_non_ip.default_entry_reset() 548 | 549 | if 'deflect-on-drop' in config.features.keys(): 550 | self.t.drop.default_entry_set( 551 | 'ig_ctl.ctl_drop_packet.send_to_port', 552 | [ { 'name': 'port', 'val': config.features['deflect-on-drop'] } ]) 553 | 554 | if config.features['drop-non-initial-fragments']: 555 | self.t.maybe_drop_fragment.default_entry_set('act_mark_to_drop') 556 | 557 | if config.features['exclude-ports-from-hash']: 558 | self.t.maybe_exclude_l4.default_entry_set('act_exclude_l4') 559 | 560 | if config.features['drop-non-ip']: 561 | self.t.maybe_drop_non_ip.default_entry_set('act_mark_to_drop') 562 | 563 | for port, pconfig in sorted(config.ports.items()): 564 | dev_port = get_dev_port(port) 565 | 566 | if port in self.config.ports.keys(): 567 | if self.config.ports[port] == pconfig: 568 | method = None 569 | else: 570 | method = self.t.port.entry_mod 571 | if self.config.ports[port]['shutdown'] != pconfig['shutdown']: 572 | self._info("port {0} administrative status changed to {1}". 573 | format(port, 'down' if pconfig['shutdown'] else 'up')) 574 | del self.config.ports[port] 575 | else: 576 | method = self.t.port.entry_add 577 | if method is not None: 578 | port_config = [ { 'name': '$FEC', 'str_val': str(pconfig['fec']) }, 579 | { 'name': '$PORT_ENABLE', 'bool_val': not pconfig['shutdown'] }, 580 | { 'name': '$RX_MTU', 'val': pconfig['mtu'] }, 581 | { 'name': '$TX_MTU', 'val': pconfig['mtu'] } ] 582 | speed = str(pconfig['speed']) 583 | lanes_match = re.match('.*_R[1-8]$', speed) 584 | if lanes_match: 585 | port_config.append({ 'name': '$N_LANES', 'val': lanes_match.group(1) }) 586 | speed = re.sub('_R[0-9]$', '', speed) 587 | port_config.append({ 'name': '$SPEED', 'str_val': speed }), 588 | method([ { 'name': '$DEV_PORT', 'value': dev_port } ], None, port_config) 589 | 590 | if dev_port not in self.ifmibs.keys(): 591 | self.ifmibs[dev_port] = mib.ifmib(self.ifmibs_dir+'/'+re.sub('/', '_', port)) 592 | self.ifmibs[dev_port].set_properties( 593 | { 'ifDescr': port.encode('ascii'), 594 | 'ifName': port.encode('ascii'), 595 | 'ifAlias': pconfig['description'].encode('ascii'), 596 | 'ifMtu': pconfig['mtu'], 597 | 'speed': if_speed[pconfig['speed']] } 598 | ) 599 | 600 | for port in sorted(self.config.ports.keys()): 601 | dev_port = get_dev_port(port) 602 | self.t.port.entry_del([ { 'name': '$DEV_PORT', 'value': dev_port } ]) 603 | ## It is possible that a port exists but was not added by 604 | ## us (e.g. port added via bfshell, Tofino model starts up 605 | ## with all ports active). In that case, the port is not 606 | ## registered in the ifmib. 607 | if dev_port in self.ifmibs: 608 | self.ifmibs[dev_port].delete() 609 | self.ifmibs.pop(dev_port, None) 610 | self.config = config 611 | 612 | def update_stats(self): 613 | status = {} 614 | for dev_port, ifTable in self.ifmibs.items(): 615 | port_t = self.t.port.entry_get( 616 | [ { 'name': '$DEV_PORT', 'value': dev_port } ]) 617 | stat_t = self.t.port_stat.entry_get( 618 | [ { 'name': '$DEV_PORT', 'value': dev_port } ]) 619 | old_oper_status, new_oper_status = ifTable.update(port_t, stat_t) 620 | port = port_t['$PORT_NAME'] 621 | status[port] = port_t['$PORT_UP'] 622 | if old_oper_status != new_oper_status: 623 | logger.info("port {0} operational status changed to {1}". 624 | format(port, 'up' if new_oper_status == 1 else 'down')) 625 | 626 | for group, members in self.config.groups.items(): 627 | update = False 628 | at_least_one_valid = False 629 | for port, member in members.items(): 630 | at_least_one_valid = at_least_one_valid or status[port] 631 | if member['status'] != status[port]: 632 | member['status'] = status[port] 633 | logger.info("egress group {0} status of member port {1} changed to {2}". 634 | format(group, port, 'up' if status[port] else 'down')) 635 | update = True 636 | if update: 637 | if not at_least_one_valid: 638 | ## All members are now invalid. We need to remove 639 | ## the reference to the group from the forwarding 640 | ## table before we can set this status for the 641 | ## action selector 642 | logger.warning("egress group {0} all member ports are down".format(group)) 643 | self.t.forward.entry_del([ { 'name': 'egress_group', 'value': group } ]) 644 | self.config.groups_ref[group] = False 645 | 646 | self._set_action_selector(self.t.port_groups_sel.entry_mod, 647 | group, members) 648 | 649 | if at_least_one_valid and not self.config.groups_ref[group]: 650 | self.t.forward.entry_add([ { 'name': 'egress_group', 'value': group } ], 651 | None, 652 | [ { 'name': '$SELECTOR_GROUP_ID', 'val': group } ]) 653 | self.config.groups_ref[group] = True 654 | 655 | def handle_request(self, peer, req): 656 | self._msgs_clear() 657 | result = None 658 | 659 | command = req['command'] 660 | handler = getattr(self, '_cmd_' + command, None) 661 | if handler is None: 662 | self._error("Invalid command '{}' from {}".format(command, peer[0])) 663 | success = False 664 | else: 665 | try: 666 | result = handler(req, peer[0]) 667 | success = True 668 | except Exception as e: 669 | self._error("Command '{}' failed: {}".format(command, e)) 670 | success = False 671 | 672 | for msg in self.msgs: 673 | logger.log(msg['level'], msg['msg']) 674 | 675 | return { 676 | 'success': success, 677 | 'msgs': self.msgs, 678 | 'result': result 679 | } 680 | 681 | def _cmd_reload(self, req, peer): 682 | 683 | logger.info("Reload requested by {}".format(peer)) 684 | try: 685 | json = self._read() 686 | except Exception as e: 687 | raise Exception("Error reading configuration: {}".format(e)) 688 | 689 | if json is not None: 690 | try: 691 | config = self._parse(json) 692 | except Exception as e: 693 | raise Exception("Error parsing configuration: {}".format(e)) 694 | 695 | try: 696 | self._push(config) 697 | except Exception as e: 698 | self._error("This is unexpected and may leave the hardware " 699 | + "in an undefined state") 700 | raise Exception("Error pushing configuration: {}".format(e)) 701 | return None 702 | 703 | def _cmd_show(self, req, peer): 704 | c = self.config 705 | items = { 706 | 'ports': 'ports', 707 | 'groups': 'groups', 708 | 'ingress': 'ingress', 709 | 'source-filter': 'source_filter', 710 | 'source-filter-dynamic': 'source_filter_d', 711 | 'flow-mirror': 'flow_mirror', 712 | 'features': 'features' 713 | } 714 | result = {} 715 | 716 | for item in req['args']: 717 | result[item] = getattr(c, items[item]) 718 | return result 719 | 720 | def _cmd_dump(self, req, peer): 721 | 722 | def filter(key, data, result): 723 | addr = key['src_addr'] 724 | result.append({ 725 | 'prefix': addr['value']+'/'+str(addr['prefix_len']), 726 | 'counters': { 727 | 'packets': data[u'$COUNTER_SPEC_PKTS'], 728 | 'bytes': data[u'$COUNTER_SPEC_BYTES'] 729 | } 730 | }) 731 | 732 | def mirror(key, data, result): 733 | result.append(key) 734 | 735 | def default(key, data, result): 736 | result.append({'key': key, 'data': data}) 737 | 738 | funcs = { 739 | 'filter_ipv4': filter, 740 | 'filter_ipv6': filter, 741 | 'mirror_ipv4': mirror, 742 | 'mirror_ipv6': mirror 743 | } 744 | 745 | result = [] 746 | for name in req['args']: 747 | for data, key in (getattr(self.t, name). 748 | entry_get_iterator([], from_hw = True)): 749 | func = funcs.get(name, default) 750 | func(key.to_dict(), data.to_dict(), result) 751 | return result 752 | 753 | def _add_remove(self, mode, req, peer): 754 | config = self.config 755 | 756 | def source_filter(prefixes): 757 | tables = { 758 | 4: self.t.filter_ipv4, 759 | 6: self.t.filter_ipv6 760 | } 761 | for str in prefixes: 762 | prefix = ipaddress.ip_network(str) 763 | if mode == 'add': 764 | if prefix in config.source_filter + config.source_filter_d: 765 | raise Exception("Duplicate source filter: {}". 766 | format(prefix)) 767 | ## Makes entry_add() accept "src_addr" as a string rather than 768 | ## a byte array 769 | tables[prefix.version].table.info.key_field_annotation_add("src_addr", "ipv4") 770 | tables[prefix.version].entry_add( 771 | [ { 'name': 'src_addr', 772 | 'value': prefix.network_address.exploded, 773 | 'prefix_len': prefix.prefixlen } ], 774 | 'act_drop', []) 775 | self._info("Added source filter {}".format(prefix)) 776 | config.source_filter_d.append(prefix) 777 | else: 778 | if prefix in config.source_filter: 779 | raise Exception("Cannot remove persistent source " + 780 | "filter: {}".format(prefix)) 781 | if not prefix in config.source_filter_d: 782 | raise Exception("Source filter does not exist: {}". 783 | format(prefix)) 784 | tables[prefix.version].entry_del( 785 | [ { 'name': 'src_addr', 786 | 'value': prefix.network_address.exploded, 787 | 'prefix_len': prefix.prefixlen } ]) 788 | self._info("Removed source filter {}".format(prefix)) 789 | config.source_filter_d.remove(prefix) 790 | self._dump_dynamic_source_filters() 791 | 792 | items = { 793 | 'source-filter': { 794 | 'func': source_filter 795 | } 796 | } 797 | 798 | for item, data in req['args'].items(): 799 | logger.info("{} {} requested by {}".format(mode, item, peer)) 800 | items[item]['func'](data) 801 | return None 802 | 803 | def _cmd_add(self, req, peer): 804 | return self._add_remove('add', req, peer) 805 | 806 | def _cmd_remove(self, req, peer): 807 | return self._add_remove('remove', req, peer) 808 | -------------------------------------------------------------------------------- /control-plane/sample-config-tofino-model.json: -------------------------------------------------------------------------------- 1 | { 2 | "ports": { 3 | "ingress": { 4 | "1/0": { 5 | "config": { 6 | "speed": "BF_SPEED_10G", 7 | "mtu": 9014 8 | }, 9 | "vlans": { 10 | "push": 10 11 | }, 12 | "egress-group": 1 13 | }, 14 | "1/1": { 15 | "config": { 16 | "speed": "BF_SPEED_10G", 17 | "mtu": 9014 18 | }, 19 | "vlans": { 20 | "rewrite": [ 21 | { 22 | "in": 100, 23 | "out": 200 24 | } 25 | ] 26 | }, 27 | "egress-group": 1 28 | }, 29 | "1/2": { 30 | "config": { 31 | "speed": "BF_SPEED_10G", 32 | "mtu": 9014 33 | }, 34 | "vlans": { 35 | "push": 20 36 | }, 37 | "egress-group": 1 38 | } 39 | }, 40 | "egress": [ 41 | { 42 | "group-id": 1, 43 | "members": { 44 | "4/0": { 45 | "config": { 46 | "speed": "BF_SPEED_10G", 47 | "mtu": 9014 48 | } 49 | }, 50 | "4/1": { 51 | "config": { 52 | "speed": "BF_SPEED_10G", 53 | "mtu": 9014 54 | } 55 | }, 56 | "4/2": { 57 | "config": { 58 | "speed": "BF_SPEED_10G", 59 | "mtu": 9014 60 | } 61 | } 62 | } 63 | } 64 | ], 65 | "other": { 66 | "65/0": { 67 | "config": { 68 | "speed": "BF_SPEED_10G", 69 | "mtu": 10240 70 | } 71 | } 72 | } 73 | }, 74 | "source-filter": [ 75 | "192.168.0.1/32", 76 | "2001:db8:0:0::1/128" 77 | ], 78 | "features": { 79 | "deflect-on-drop": "65/0", 80 | "drop-non-initial-fragments": false, 81 | "exclude-ports-from-hash": false 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /control-plane/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/schema#", 3 | "$id": "http://switch.ch/schemas/packet_broker.json", 4 | 5 | "definitions": { 6 | "interface-config": { 7 | "type": "object", 8 | "required": [ "speed", "mtu" ], 9 | 10 | "properties": { 11 | "description": { 12 | "type": "string" 13 | }, 14 | 15 | "speed": { 16 | "type": "string", 17 | "enum": [ 18 | "BF_SPEED_1G", 19 | "BF_SPEED_10G", 20 | "BF_SPEED_25G", 21 | "BF_SPEED_40G", 22 | "BF_SPEED_40G_NB", 23 | "BF_SPEED_40G_NON_BREAKABLE", 24 | "BF_SPEED_50G", 25 | "BF_SPEED_50G_R1", 26 | "BF_SPEED_100G", 27 | "BF_SPEED_100G_R2", 28 | "BF_SPEED_200G", 29 | "BF_SPEED_200G_R8", 30 | "BF_SPEED_400G" 31 | ] 32 | }, 33 | 34 | "fec": { 35 | "type": "string", 36 | "enum": [ 37 | "BF_FEC_TYP_NONE", 38 | "BF_FEC_TYP_FIRECODE", 39 | "BF_FEC_TYP_REED_SOLOMON", 40 | "BF_FEC_TYP_FC", 41 | "BF_FEC_TYP_RS" 42 | ] 43 | }, 44 | 45 | "mtu": { 46 | "type": "integer", 47 | "minimum": 1200, 48 | "maximum": 10240 49 | }, 50 | 51 | "shutdown": { 52 | "type": "boolean" 53 | } 54 | }, 55 | "additionalProperties": false 56 | }, 57 | 58 | "port": { 59 | "type": "object", 60 | "patternProperties": { 61 | "^[0-9]+/[0-9]+$": { 62 | "type": "object", 63 | "required": [ "config" ], 64 | 65 | "properties": { 66 | "config": { 67 | "$ref": "#/definitions/interface-config" 68 | } 69 | }, 70 | "additionalProperties": false 71 | } 72 | } 73 | }, 74 | 75 | "logical-port-pattern": { 76 | "type": "string", 77 | "pattern": "^[0-9]+/[0-9]+$" 78 | }, 79 | 80 | "logical-or-physical-port-pattern": { 81 | "type": "string", 82 | "pattern": "^([0-9]+/[0-9]+|[0-9]+)$" 83 | }, 84 | 85 | "vlan": { 86 | "type": "integer", 87 | "minimum": 1, 88 | "maximum": 4095 89 | }, 90 | 91 | "port-spec": { 92 | "type": "object", 93 | "properties": { 94 | "port": { 95 | "type": "integer", 96 | "minimum": 0, 97 | "maximum": 65535 98 | }, 99 | "mask": { 100 | "type": "integer", 101 | "minimum": 0, 102 | "maximum": 65535 103 | } 104 | } 105 | }, 106 | 107 | "mac-rewrite": { 108 | "type": "object", 109 | "patternProperties": { 110 | "^([a-f0-9]{2}:){5}[a-f0-9]{2}$": { 111 | "type": "string", 112 | "pattern": "^([a-f0-9]{2}:){5}[a-f0-9]{2}$" 113 | } 114 | }, 115 | "additionalProperties": false 116 | } 117 | }, 118 | 119 | "type": "object", 120 | "required": [ "ports" ], 121 | 122 | "properties": { 123 | "ports": { 124 | "type": "object", 125 | "required": [ "ingress", "egress" ], 126 | 127 | "properties": { 128 | "ingress": { 129 | "type": "object", 130 | "patternProperties": { 131 | "^[0-9]+/[0-9]+$": { 132 | "type": "object", 133 | "required": [ "config", "egress-group", "vlans" ], 134 | 135 | "properties": { 136 | "config": { 137 | "$ref": "#/definitions/interface-config" 138 | }, 139 | 140 | "egress-group": { 141 | "type": "integer", 142 | "minimum": 1 143 | }, 144 | 145 | "vlans": { 146 | "type": "object", 147 | 148 | "properties": { 149 | "push": { 150 | "$ref": "#/definitions/vlan" 151 | }, 152 | 153 | "rewrite": { 154 | "type": "array", 155 | 156 | "items": { 157 | "type": "object", 158 | "required": [ "in", "out" ], 159 | "properties": { 160 | "in": { 161 | "$ref": "#/definitions/vlan" 162 | }, 163 | "out": { 164 | "$ref": "#/definitions/vlan" 165 | }, 166 | "mac-rewrite": { 167 | "type": "object", 168 | "properties": { 169 | "src": { 170 | "$ref": "#/definitions/mac-rewrite" 171 | }, 172 | "dst": { 173 | "$ref": "#/definitions/mac-rewrite" 174 | } 175 | }, 176 | "additionalProperties": false 177 | } 178 | }, 179 | "additionalProperties": false 180 | } 181 | } 182 | } 183 | } 184 | } 185 | } 186 | }, 187 | "additionalProperties": false 188 | }, 189 | 190 | "egress": { 191 | "type": "array", 192 | 193 | "items": { 194 | "type": "object", 195 | "required": [ "group-id", "members" ], 196 | 197 | "properties": { 198 | "group-id": { 199 | "type": "integer", 200 | "minimum": 1 201 | }, 202 | 203 | "members": { 204 | "$ref": "#/definitions/port" 205 | } 206 | }, 207 | "additionalProperties": false 208 | } 209 | }, 210 | 211 | "other": { 212 | "$ref": "#/definitions/port" 213 | } 214 | 215 | } 216 | }, 217 | "source-filter": { 218 | "type": "array", 219 | 220 | "items": { 221 | "type": "string" 222 | } 223 | }, 224 | 225 | "flow-mirror": { 226 | "type": "array", 227 | 228 | "items": { 229 | "type": "object", 230 | "required": [ "src", "dst", "src_port", "dst_port" ], 231 | 232 | "properties": { 233 | "ingress-ports": { 234 | "type": "array", 235 | 236 | "items": { 237 | "$ref": "#/definitions/logical-port-pattern" 238 | } 239 | }, 240 | "non-ip": { 241 | "type": "boolean" 242 | }, 243 | "src": { 244 | "type": "string" 245 | }, 246 | "dst": { 247 | "type": "string" 248 | }, 249 | "src_port": { 250 | "$ref": "#/definitions/port-spec" 251 | }, 252 | "dst_port": { 253 | "$ref": "#/definitions/port-spec" 254 | }, 255 | "bidir": { 256 | "type": "boolean" 257 | }, 258 | "enable": { 259 | "type": "boolean" 260 | } 261 | }, 262 | "additionalProperties": false 263 | } 264 | }, 265 | 266 | "features": { 267 | "type": "object", 268 | 269 | "properties": { 270 | "deflect-on-drop": { 271 | "$ref": "#/definitions/logical-or-physical-port-pattern" 272 | }, 273 | 274 | "flow-mirror": { 275 | "type": "object", 276 | "required": [ "port" ], 277 | "properties": { 278 | 279 | "port": { 280 | "$ref": "#/definitions/logical-or-physical-port-pattern" 281 | }, 282 | "max-packet-length": { 283 | "type": "integer", 284 | "minimum": 0, 285 | "maximum": 16384 286 | } 287 | }, 288 | "additionalProperties": false 289 | }, 290 | 291 | "drop-non-initial-fragments": { 292 | "type": "boolean" 293 | }, 294 | 295 | "exclude-ports-from-hash": { 296 | "type": "boolean" 297 | }, 298 | 299 | "drop-non-ip": { 300 | "type": "boolean" 301 | } 302 | }, 303 | "additionalProperties": false 304 | } 305 | }, 306 | "additionalProperties": false 307 | } 308 | -------------------------------------------------------------------------------- /control-plane/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import sys 3 | 4 | setuptools.setup( 5 | name="packet-broker-configd", 6 | version="0.0.1", 7 | scripts = [ "configd.py", "brokerctl" ], 8 | py_modules = [ "packet_broker", 9 | "bfrt", "mib" ], 10 | install_requires = [ 11 | "jsonschema" 12 | ] + [ module for module in [ "ipaddress" ] 13 | if sys.version_info < (3, 0) ] 14 | ) 15 | -------------------------------------------------------------------------------- /include/drop.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _DROP_P4_ 4 | #define _DROP_P4_ 5 | 6 | #include "metadata.p4" 7 | 8 | action act_mark_to_drop(inout ingress_metadata_t ig_md) { 9 | ig_md.drop = 1; 10 | } 11 | 12 | control ctl_drop_packet( 13 | inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md, 14 | inout ingress_intrinsic_metadata_for_tm_t ig_tm_md) 15 | { 16 | action real_drop() { 17 | ig_dprsr_md.drop_ctl = 1; 18 | } 19 | 20 | action send_to_port(PortId_t port) { 21 | ig_tm_md.ucast_egress_port = port; 22 | } 23 | 24 | table tbl_drop { 25 | actions = { 26 | real_drop; 27 | send_to_port; 28 | } 29 | default_action = real_drop; 30 | } 31 | 32 | apply { 33 | tbl_drop.apply(); 34 | } 35 | } 36 | 37 | control ctl_maybe_drop_fragment(inout ingress_metadata_t ig_md) 38 | { 39 | table tbl_maybe_drop_fragment { 40 | actions = { 41 | act_mark_to_drop(ig_md); 42 | NoAction; 43 | } 44 | size = 1; 45 | default_action = NoAction; 46 | } 47 | 48 | apply { 49 | if (ig_md.non_first_fragment == 1) { 50 | tbl_maybe_drop_fragment.apply(); 51 | } 52 | } 53 | } 54 | 55 | control ctl_maybe_drop_non_ip(inout ingress_metadata_t ig_md) 56 | { 57 | table tbl_maybe_drop_non_ip { 58 | actions = { 59 | act_mark_to_drop(ig_md); 60 | NoAction; 61 | } 62 | size = 1; 63 | default_action = NoAction; 64 | } 65 | 66 | apply { 67 | tbl_maybe_drop_non_ip.apply(); 68 | } 69 | } 70 | 71 | #endif // _DROP_P4_ 72 | -------------------------------------------------------------------------------- /include/egress.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _EGRESS_P4_ 4 | #define _EGRESS_P4_ 5 | 6 | #include 7 | 8 | // We don't use any egress processing, but these declarations 9 | // are required by the TNA. 10 | 11 | struct egress_headers_t { 12 | } 13 | 14 | struct egress_metadata_t { 15 | } 16 | 17 | parser eg_prs(packet_in pkt, 18 | /* User */ 19 | out egress_headers_t eg_hdr, out egress_metadata_t eg_md, 20 | /* Intrinsic */ 21 | out egress_intrinsic_metadata_t eg_intr_md) 22 | { 23 | /* This is a mandatory state, required by Tofino Architecture */ 24 | state start { 25 | pkt.extract(eg_intr_md); 26 | transition accept; 27 | } 28 | } 29 | 30 | control eg_ctl( 31 | /* User */ 32 | inout egress_headers_t eg_hdr, inout egress_metadata_t eg_md, 33 | /* Intrinsic */ 34 | in egress_intrinsic_metadata_t eg_intr_md, 35 | in egress_intrinsic_metadata_from_parser_t eg_prsr_md, 36 | inout egress_intrinsic_metadata_for_deparser_t eg_dprsr_md, 37 | inout egress_intrinsic_metadata_for_output_port_t eg_oport_md) 38 | { 39 | apply { 40 | } 41 | } 42 | 43 | 44 | control eg_ctl_dprs(packet_out pkt, 45 | /* User */ 46 | inout egress_headers_t eg_hdr, in egress_metadata_t eg_md, 47 | /* Intrinsic */ 48 | in egress_intrinsic_metadata_for_deparser_t eg_dprsr_md) 49 | { 50 | apply { 51 | pkt.emit(eg_hdr); 52 | } 53 | } 54 | 55 | #endif // _EGRESS_P4_ 56 | -------------------------------------------------------------------------------- /include/filter.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _FILTER_P4_ 4 | #define _FILTER_P4_ 5 | 6 | #include "metadata.p4" 7 | #include "headers.p4" 8 | #include "drop.p4" 9 | 10 | DirectCounter>(CounterType_t.PACKETS_AND_BYTES) filter_ipv4_stats; 11 | 12 | control ctl_filter_source_ipv4( 13 | in headers hdr, 14 | inout ingress_metadata_t ig_md) 15 | { 16 | action act_drop() { 17 | filter_ipv4_stats.count(); 18 | act_mark_to_drop(ig_md); 19 | } 20 | 21 | table tbl_filter_source_ipv4 { 22 | key = { 23 | hdr.ipv4.src_addr : lpm @name("src_addr"); 24 | } 25 | actions = { 26 | act_drop(); 27 | @defaultonly NoAction; 28 | } 29 | counters = filter_ipv4_stats; 30 | const default_action = NoAction; 31 | } 32 | 33 | apply { 34 | tbl_filter_source_ipv4.apply(); 35 | } 36 | } 37 | 38 | DirectCounter>(CounterType_t.PACKETS_AND_BYTES) filter_ipv6_stats; 39 | 40 | control ctl_filter_source_ipv6( 41 | in headers hdr, 42 | inout ingress_metadata_t ig_md) 43 | { 44 | action act_drop() { 45 | filter_ipv6_stats.count(); 46 | act_mark_to_drop(ig_md); 47 | } 48 | 49 | table tbl_filter_source_ipv6 { 50 | key = { 51 | hdr.ipv6.src_addr : lpm @name("src_addr"); 52 | } 53 | actions = { 54 | act_drop(); 55 | @defaultonly NoAction; 56 | } 57 | counters = filter_ipv6_stats; 58 | const default_action = NoAction; 59 | } 60 | 61 | apply { 62 | tbl_filter_source_ipv6.apply(); 63 | } 64 | } 65 | 66 | #endif // _FILTER_P4_ 67 | -------------------------------------------------------------------------------- /include/forward.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _FORWARD_P4_ 4 | #define _FORWARD_P4_ 5 | 6 | #include "metadata.p4" 7 | #include "drop.p4" 8 | 9 | #define MAX_OUTPUT_PORTS 16 10 | #define MAX_PORTS_PER_OUTPUT_GROUP 16 11 | #define MAX_OUTPUT_GROUPS 8 12 | 13 | control ctl_forward_packet( 14 | in ingress_intrinsic_metadata_t ig_intr_md, 15 | in bit<32> sel_hash, 16 | inout ingress_metadata_t ig_md, 17 | inout ingress_intrinsic_metadata_for_tm_t ig_tm_md) 18 | { 19 | port_group_t egress_group = 0; 20 | 21 | action act_output_group(port_group_t group) { 22 | egress_group = group; 23 | } 24 | 25 | table tbl_select_output { 26 | key = { 27 | ig_intr_md.ingress_port : exact @name("ingress_port"); 28 | } 29 | actions = { 30 | act_output_group; 31 | @defaultonly act_mark_to_drop(ig_md); 32 | } 33 | const default_action = act_mark_to_drop(ig_md); 34 | } 35 | 36 | action act_send(PortId_t egress_port) { 37 | ig_tm_md.ucast_egress_port = egress_port; 38 | ig_tm_md.bypass_egress = 1; 39 | } 40 | 41 | // 14 hash bits are required by Tofino for fair-hashing with at 42 | // most 120 ports per group. Use IDENTITY hash here because 43 | // the hash has been pre-computed (passed in sel_hash) 44 | Hash> (HashAlgorithm_t.IDENTITY) final_hash; 45 | 46 | ActionProfile(size = MAX_OUTPUT_PORTS) port_groups; 47 | ActionSelector(action_profile = port_groups, 48 | hash = final_hash, 49 | mode = SelectorMode_t.FAIR, 50 | max_group_size = MAX_PORTS_PER_OUTPUT_GROUP, 51 | num_groups = MAX_OUTPUT_GROUPS) port_groups_sel; 52 | 53 | table tbl_forward { 54 | key = { 55 | egress_group : exact; 56 | sel_hash : selector; 57 | } 58 | actions = { 59 | act_send; 60 | @defaultonly act_mark_to_drop(ig_md); 61 | } 62 | size = 256; 63 | implementation = port_groups_sel; 64 | const default_action = act_mark_to_drop(ig_md); 65 | } 66 | 67 | apply { 68 | tbl_select_output.apply(); 69 | tbl_forward.apply(); 70 | } 71 | } 72 | 73 | #endif // _FORWARD_P4_ 74 | -------------------------------------------------------------------------------- /include/hash.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _HASH_P4_ 4 | #define _HASH_P4_ 5 | 6 | #include "metadata.p4" 7 | #include "headers.p4" 8 | 9 | CRCPolynomial>( 10 | coeff = 0x04C11DB7, 11 | reversed = true, 12 | msb = false, 13 | extended = false, 14 | init = 0xFFFFFFFF, 15 | xor = 0xFFFFFFFF) poly; 16 | 17 | control ctl_maybe_exclude_l4_from_hash(inout ingress_metadata_t ig_md) 18 | { 19 | action act_exclude_l4() { 20 | ig_md.l4_lookup = { 0, 0}; 21 | } 22 | 23 | table tbl_maybe_exclude_l4 { 24 | actions = { 25 | act_exclude_l4; 26 | NoAction; 27 | } 28 | size = 1; 29 | default_action = NoAction; 30 | } 31 | 32 | apply { 33 | tbl_maybe_exclude_l4.apply(); 34 | } 35 | } 36 | 37 | control ctl_calc_ipv4_hash( 38 | in headers hdr, 39 | in ingress_metadata_t ig_md, 40 | inout bit<32> sel_hash) 41 | { 42 | Hash>(HashAlgorithm_t.CRC32, poly) hash; 43 | 44 | apply { 45 | sel_hash = hash.get( 46 | { 47 | hdr.ipv4.src_addr, 48 | hdr.ipv4.dst_addr, 49 | hdr.ipv4.protocol, 50 | ig_md.l4_lookup.word_1, 51 | ig_md.l4_lookup.word_2 52 | } 53 | ); 54 | } 55 | } 56 | 57 | control ctl_calc_ipv6_hash( 58 | in headers hdr, 59 | in ingress_metadata_t ig_md, 60 | inout bit<32> sel_hash) 61 | { 62 | Hash>(HashAlgorithm_t.CRC32, poly) hash; 63 | 64 | apply { 65 | ip_proto_t ulp = hdr.ipv6.next_hdr; 66 | 67 | if (hdr.ipv6_frag.isValid()) { 68 | ulp = hdr.ipv6_frag.next_hdr; 69 | } 70 | sel_hash = hash.get( 71 | { 72 | hdr.ipv6.src_addr, 73 | hdr.ipv6.dst_addr, 74 | ulp, 75 | ig_md.l4_lookup.word_1, 76 | ig_md.l4_lookup.word_2 77 | } 78 | ); 79 | } 80 | } 81 | 82 | control ctl_calc_ethernet_hash(in headers hdr, out bit<32> sel_hash) 83 | { 84 | Hash>(HashAlgorithm_t.CRC32) hash; 85 | 86 | apply { 87 | sel_hash = hash.get( 88 | { 89 | hdr.ethernet.dst_mac_addr, 90 | hdr.ethernet.src_mac_addr, 91 | hdr.ethernet.ethertype 92 | } 93 | ); 94 | } 95 | } 96 | 97 | #endif // _HASH_P4_ 98 | -------------------------------------------------------------------------------- /include/headers.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _HEADERS_P4_ 4 | #define _HEADERS_P4_ 5 | 6 | #include "protocol_headers.p4" 7 | 8 | struct headers { 9 | ethernet_t ethernet; 10 | vlan_t vlan; 11 | ipv4_t ipv4; 12 | ipv4_options_t ipv4_options; 13 | ipv6_t ipv6; 14 | ipv6_frag_t ipv6_frag; 15 | } 16 | 17 | #endif // _HEADERS_P4_ 18 | -------------------------------------------------------------------------------- /include/metadata.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _METADATA_P4_ 4 | #define _METADATA_P4_ 5 | 6 | #include "types.p4" 7 | 8 | struct ingress_metadata_t { 9 | l4_lookup_t l4_lookup; 10 | MirrorId_t mirror_session; 11 | bit<1> non_first_fragment; 12 | bit<1> drop; 13 | } 14 | 15 | #endif // _METADATA_P4 16 | -------------------------------------------------------------------------------- /include/mirror.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _MIRROR_P4_ 4 | #define _MIRROR_P4_ 5 | 6 | #include "types.p4" 7 | #include "headers.p4" 8 | #include "metadata.p4" 9 | 10 | action act_mirror( 11 | inout ingress_metadata_t ig_md, 12 | inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md, 13 | MirrorId_t mirror_session) { 14 | ig_dprsr_md.mirror_type = (MirrorType_t)mirror_session_t.FLOW; 15 | ig_md.mirror_session = mirror_session; 16 | } 17 | 18 | control ctl_mirror_flows_ipv4( 19 | in headers hdr, 20 | in ingress_intrinsic_metadata_t ig_intr_md, 21 | inout ingress_metadata_t ig_md, 22 | inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md) 23 | { 24 | table tbl_mirror_flows_ipv4 { 25 | key = { 26 | ig_intr_md.ingress_port : ternary @name("ingress_port"); 27 | hdr.ipv4.src_addr : ternary @name("src_addr"); 28 | hdr.ipv4.dst_addr : ternary @name("dst_addr"); 29 | ig_md.l4_lookup.word_1 : ternary @name("src_port"); 30 | ig_md.l4_lookup.word_2 : ternary @name("dst_port"); 31 | } 32 | actions = { 33 | act_mirror(ig_md, ig_dprsr_md); 34 | @defaultonly NoAction; 35 | } 36 | const default_action = NoAction; 37 | } 38 | 39 | apply { 40 | tbl_mirror_flows_ipv4.apply(); 41 | } 42 | } 43 | 44 | control ctl_mirror_flows_ipv6( 45 | in headers hdr, 46 | in ingress_intrinsic_metadata_t ig_intr_md, 47 | inout ingress_metadata_t ig_md, 48 | inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md) 49 | { 50 | table tbl_mirror_flows_ipv6 { 51 | key = { 52 | ig_intr_md.ingress_port : ternary @name("ingress_port"); 53 | hdr.ipv6.src_addr : ternary @name("src_addr"); 54 | hdr.ipv6.dst_addr : ternary @name("dst_addr"); 55 | ig_md.l4_lookup.word_1 : ternary @name("src_port"); 56 | ig_md.l4_lookup.word_2 : ternary @name("dst_port"); 57 | } 58 | actions = { 59 | act_mirror(ig_md, ig_dprsr_md); 60 | @defaultonly NoAction; 61 | } 62 | const default_action = NoAction; 63 | } 64 | 65 | apply { 66 | tbl_mirror_flows_ipv6.apply(); 67 | } 68 | } 69 | 70 | control ctl_mirror_flows_non_ip( 71 | in headers hdr, 72 | in ingress_intrinsic_metadata_t ig_intr_md, 73 | inout ingress_metadata_t ig_md, 74 | inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md) 75 | { 76 | table tbl_mirror_flows_non_ip { 77 | key = { 78 | ig_intr_md.ingress_port : ternary @name("ingress_port"); 79 | } 80 | actions = { 81 | act_mirror(ig_md, ig_dprsr_md); 82 | @defaultonly NoAction; 83 | } 84 | const default_action = NoAction; 85 | } 86 | 87 | apply { 88 | tbl_mirror_flows_non_ip.apply(); 89 | } 90 | } 91 | 92 | #endif // _MIRROR_P4_ 93 | -------------------------------------------------------------------------------- /include/parser.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _PARSER_P4_ 4 | #define _PARSER_P4_ 5 | 6 | #include "headers.p4" 7 | #include "metadata.p4" 8 | 9 | parser ig_prs( 10 | packet_in pkt, 11 | out headers hdr, 12 | out ingress_metadata_t ig_md, 13 | out ingress_intrinsic_metadata_t ig_intr_md) 14 | { 15 | /* This is a mandatory state, required by the Tofino Architecture */ 16 | state start { 17 | pkt.extract(ig_intr_md); 18 | pkt.advance(PORT_METADATA_SIZE); 19 | 20 | transition meta_init; 21 | } 22 | 23 | state meta_init { 24 | ig_md.l4_lookup = { 0, 0 }; 25 | ig_md.non_first_fragment = 0; 26 | ig_md.drop = 0; 27 | ig_md.mirror_session = 0; 28 | 29 | transition prs_ethernet; 30 | } 31 | 32 | state prs_ethernet { 33 | pkt.extract(hdr.ethernet); 34 | 35 | transition select(hdr.ethernet.ethertype) { 36 | ethertype_t.VLAN: prs_vlan; 37 | ethertype_t.IPV4: prs_ipv4; 38 | ethertype_t.IPV6: prs_ipv6; 39 | default: accept; 40 | } 41 | } 42 | 43 | state prs_vlan { 44 | pkt.extract(hdr.vlan); 45 | 46 | transition select(hdr.vlan.ethertype) { 47 | ethertype_t.IPV4: prs_ipv4; 48 | ethertype_t.IPV6: prs_ipv6; 49 | default: accept; 50 | } 51 | } 52 | 53 | state prs_ipv4 { 54 | pkt.extract(hdr.ipv4); 55 | 56 | transition select(hdr.ipv4.ihl) { 57 | 5 : prs_ipv4_no_options; 58 | 6 &&& 0xE : prs_ipv4_options; 59 | 8 &&& 0x8 : prs_ipv4_options; 60 | default : reject; 61 | } 62 | } 63 | 64 | state prs_ipv4_options { 65 | pkt.extract(hdr.ipv4_options, ((bit<32>)hdr.ipv4.ihl - 5) * 32); 66 | 67 | transition prs_ipv4_no_options; 68 | } 69 | 70 | state prs_ipv4_no_options { 71 | transition select(hdr.ipv4.frag_offset, hdr.ipv4.protocol) { 72 | ( 0, ip_proto_t.TCP ) : prs_l4; 73 | ( 0, ip_proto_t.UDP ) : prs_l4; 74 | ( 0, _ ) : accept; 75 | default: non_first_fragment; 76 | } 77 | } 78 | 79 | state prs_ipv6 { 80 | pkt.extract(hdr.ipv6); 81 | 82 | transition select(hdr.ipv6.next_hdr) { 83 | ip_proto_t.TCP: prs_l4; 84 | ip_proto_t.UDP: prs_l4; 85 | ip_proto_t.IPV6_FRAG : prs_ipv6_frag; 86 | default: accept; 87 | } 88 | } 89 | 90 | state prs_ipv6_frag { 91 | pkt.extract(hdr.ipv6_frag); 92 | 93 | transition select(hdr.ipv6_frag.offset, hdr.ipv6_frag.next_hdr) { 94 | ( 0, ip_proto_t.TCP ) : prs_l4; 95 | ( 0, ip_proto_t.UDP ) : prs_l4; 96 | ( 0, _ ) : accept; 97 | default: non_first_fragment; 98 | } 99 | } 100 | 101 | state non_first_fragment { 102 | ig_md.non_first_fragment = 1; 103 | 104 | transition accept; 105 | } 106 | 107 | state prs_l4 { 108 | ig_md.l4_lookup = pkt.lookahead(); 109 | 110 | transition accept; 111 | } 112 | 113 | } 114 | 115 | #endif // _PARSER_P4_ 116 | -------------------------------------------------------------------------------- /include/protocol_headers.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _PROTOCOL_HEADERS_P4_ 4 | #define _PROTOCOL_HEADERS_P4_ 5 | 6 | #include "types.p4" 7 | 8 | header ethernet_t { 9 | mac_addr_t dst_mac_addr; 10 | mac_addr_t src_mac_addr; 11 | ethertype_t ethertype; 12 | } 13 | 14 | header vlan_t { 15 | bit <3> pcp; 16 | bit <1> cfi; 17 | vlan_id_t vid; 18 | ethertype_t ethertype; 19 | } 20 | 21 | header ipv4_t { 22 | bit<4> version; 23 | bit<4> ihl; 24 | bit<8> diffserv; 25 | bit<16> total_len; 26 | bit<16> identification; 27 | bit<3> flags; 28 | bit<13> frag_offset; 29 | bit<8> ttl; 30 | ip_proto_t protocol; 31 | bit<16> hdr_checksum; 32 | ipv4_addr_t src_addr; 33 | ipv4_addr_t dst_addr; 34 | } 35 | 36 | header ipv4_options_t { 37 | varbit<320> data; 38 | } 39 | 40 | header ipv6_t { 41 | bit<4> version; 42 | bit<8> traffic_class; 43 | bit<20> flow_label; 44 | bit<16> payload_len; 45 | ip_proto_t next_hdr; 46 | bit<8> hop_limit; 47 | bit<128> src_addr; 48 | bit<128> dst_addr; 49 | } 50 | 51 | header ipv6_frag_t { 52 | ip_proto_t next_hdr; 53 | bit<8> reserved; 54 | bit<13> offset; 55 | bit<2> reserved_2; 56 | bit<1> more_fragments; 57 | bit<32> id; 58 | } 59 | 60 | #endif // _PROTOCOL_HEADERS_P4_ 61 | -------------------------------------------------------------------------------- /include/types.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _TYPES_P4_ 4 | #define _TYPES_P4_ 5 | 6 | typedef bit<8> port_group_t; 7 | typedef bit<48> mac_addr_t; 8 | typedef bit<12> vlan_id_t; 9 | typedef bit<32> ipv4_addr_t; 10 | typedef bit<128> ipv6_addr_t; 11 | 12 | // The first two 16-bit words of the L4 header for TCP and UDP. 13 | struct l4_lookup_t { 14 | bit<16> word_1; 15 | bit<16> word_2; 16 | } 17 | 18 | enum bit<8> ip_proto_t { 19 | TCP = 6, 20 | UDP = 17, 21 | IPV6_FRAG = 44 22 | } 23 | 24 | enum bit<16> ethertype_t { 25 | VLAN = 0x8100, 26 | IPV4 = 0x0800, 27 | IPV6 = 0x86dd 28 | } 29 | 30 | /* NOTE: mirror type 0 must not be used in ingress-to-egress 31 | mirroring. It is used to cancel a mirror operation that was 32 | requiested earlier in the ingress pipeline */ 33 | enum MirrorType_t mirror_session_t { 34 | FLOW = 1 35 | } 36 | 37 | #endif // _TYPES_P4_ 38 | -------------------------------------------------------------------------------- /include/vlan.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #ifndef _VLAN_P4_ 4 | #define _VLAN_P4_ 5 | 6 | #include "metadata.p4" 7 | #include "drop.p4" 8 | 9 | control ctl_push_or_rewrite_vlan( 10 | inout headers hdr, 11 | in ingress_intrinsic_metadata_t ig_intr_md, 12 | inout ingress_metadata_t ig_md) 13 | { 14 | action act_push_vlan(vlan_id_t vid) { 15 | hdr.vlan.ethertype = hdr.ethernet.ethertype; 16 | hdr.vlan.vid = vid; 17 | hdr.ethernet.ethertype = ethertype_t.VLAN; 18 | hdr.vlan.setValid(); 19 | } 20 | 21 | table tbl_ingress_untagged { 22 | key = { 23 | ig_intr_md.ingress_port : exact @name("ingress_port"); 24 | } 25 | actions = { 26 | act_push_vlan; 27 | @defaultonly act_mark_to_drop(ig_md); 28 | } 29 | const default_action = act_mark_to_drop(ig_md); 30 | } 31 | 32 | action act_rewrite_vlan(vlan_id_t vid) { 33 | hdr.vlan.vid = vid; 34 | } 35 | 36 | table tbl_ingress_tagged { 37 | key = { 38 | ig_intr_md.ingress_port : exact @name("ingress_port"); 39 | hdr.vlan.vid : exact @name("ingress_vid"); 40 | } 41 | actions = { 42 | act_rewrite_vlan; 43 | @defaultonly act_mark_to_drop(ig_md); 44 | } 45 | const default_action = act_mark_to_drop(ig_md); 46 | } 47 | 48 | action act_rewrite_src_mac(mac_addr_t mac_addr) { 49 | hdr.ethernet.src_mac_addr = mac_addr; 50 | } 51 | 52 | action act_rewrite_dst_mac(mac_addr_t mac_addr) { 53 | hdr.ethernet.dst_mac_addr = mac_addr; 54 | } 55 | 56 | table tbl_ingress_src_mac_rewrite { 57 | key = { 58 | ig_intr_md.ingress_port : exact @name("ingress_port"); 59 | hdr.vlan.vid : exact @name("ingress_vid"); 60 | hdr.ethernet.src_mac_addr : exact @name("src_mac_addr"); 61 | } 62 | actions = { 63 | act_rewrite_src_mac; 64 | @defaultonly NoAction; 65 | } 66 | const default_action = NoAction; 67 | } 68 | 69 | table tbl_ingress_dst_mac_rewrite { 70 | key = { 71 | ig_intr_md.ingress_port : exact @name("ingress_port"); 72 | hdr.vlan.vid : exact @name("ingress_vid"); 73 | hdr.ethernet.dst_mac_addr : exact @name("dst_mac_addr"); 74 | } 75 | actions = { 76 | act_rewrite_dst_mac; 77 | @defaultonly NoAction; 78 | } 79 | const default_action = NoAction; 80 | } 81 | 82 | apply { 83 | if (hdr.vlan.isValid()) { 84 | tbl_ingress_src_mac_rewrite.apply(); 85 | tbl_ingress_dst_mac_rewrite.apply(); 86 | tbl_ingress_tagged.apply(); 87 | } else { 88 | tbl_ingress_untagged.apply(); 89 | } 90 | } 91 | } 92 | 93 | #endif // _VLAN_P4_ 94 | -------------------------------------------------------------------------------- /packet_broker.p4: -------------------------------------------------------------------------------- 1 | /* -*- mode: P4_16 -*- */ 2 | 3 | #include 4 | #if __TARGET_TOFINO__ == 3 5 | #include 6 | #elif __TARGET_TOFINO__ == 2 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | #include "include/types.p4" 13 | #include "include/protocol_headers.p4" 14 | #include "include/metadata.p4" 15 | #include "include/headers.p4" 16 | 17 | #include "include/parser.p4" 18 | #include "include/drop.p4" 19 | #include "include/vlan.p4" 20 | #include "include/filter.p4" 21 | #include "include/hash.p4" 22 | #include "include/forward.p4" 23 | #include "include/egress.p4" 24 | #include "include/mirror.p4" 25 | 26 | control ig_ctl( 27 | inout headers hdr, inout ingress_metadata_t ig_md, 28 | in ingress_intrinsic_metadata_t ig_intr_md, 29 | in ingress_intrinsic_metadata_from_parser_t ig_prsr_md, 30 | inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md, 31 | inout ingress_intrinsic_metadata_for_tm_t ig_tm_md) 32 | { 33 | bit<32> sel_hash = 0; 34 | 35 | apply { 36 | if (ig_prsr_md.parser_err != PARSER_ERROR_OK) { 37 | // Fail hard if the parser terminated with an error 38 | ig_dprsr_md.drop_ctl = 1; 39 | exit; 40 | } 41 | 42 | ctl_maybe_drop_fragment.apply(ig_md); 43 | ctl_maybe_exclude_l4_from_hash.apply(ig_md); 44 | ctl_push_or_rewrite_vlan.apply(hdr, ig_intr_md, ig_md); 45 | 46 | if (hdr.ipv4.isValid()) { 47 | ctl_filter_source_ipv4.apply(hdr, ig_md); 48 | ctl_calc_ipv4_hash.apply(hdr, ig_md, sel_hash); 49 | ctl_mirror_flows_ipv4.apply(hdr, ig_intr_md, ig_md, ig_dprsr_md); 50 | } else if (hdr.ipv6.isValid()) { 51 | ctl_filter_source_ipv6.apply(hdr, ig_md); 52 | ctl_calc_ipv6_hash.apply(hdr, ig_md, sel_hash); 53 | ctl_mirror_flows_ipv6.apply(hdr, ig_intr_md, ig_md, ig_dprsr_md); 54 | } else { 55 | ctl_calc_ethernet_hash.apply(hdr, sel_hash); 56 | ctl_mirror_flows_non_ip.apply(hdr, ig_intr_md, ig_md, ig_dprsr_md); 57 | ctl_maybe_drop_non_ip.apply(ig_md); 58 | } 59 | ctl_forward_packet.apply(ig_intr_md, sel_hash, ig_md, ig_tm_md); 60 | 61 | // Some of the controls above can request the packet to 62 | // be dropped (or sent to a port for inspection). The 63 | // drop is enforced in the traffic manager. 64 | if (ig_md.drop == 1) { 65 | ctl_drop_packet.apply(ig_dprsr_md, ig_tm_md); 66 | } 67 | } 68 | 69 | } 70 | 71 | control ig_ctl_dprs( 72 | packet_out pkt, 73 | inout headers hdr, 74 | in ingress_metadata_t ig_md, 75 | in ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md) 76 | { 77 | Mirror() mirror; 78 | 79 | apply { 80 | if (ig_dprsr_md.mirror_type == (MirrorType_t)mirror_session_t.FLOW) { 81 | mirror.emit(ig_md.mirror_session); 82 | } 83 | pkt.emit(hdr); 84 | } 85 | } 86 | 87 | Pipeline( 88 | ig_prs(), ig_ctl(), ig_ctl_dprs(), 89 | eg_prs(), eg_ctl(), eg_ctl_dprs()) pipe; 90 | 91 | Switch(pipe) main; 92 | --------------------------------------------------------------------------------