├── .github ├── CODEOWNERS └── workflows │ ├── publish-docs.yaml │ └── validate-docs.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.rst ├── VERSION ├── _extra └── robots.txt ├── _static ├── SystemsApproachLogoURL.png ├── bridge.ico ├── cover.jpg ├── css │ └── rtd_theme_mods.css └── fonts │ ├── Inconsolata-Bold.ttf │ └── Inconsolata-Regular.ttf ├── access.rst ├── arch.rst ├── authors.rst ├── bridge.jpg ├── code ├── README ├── actions.p4 ├── build.sh ├── checksum.p4 ├── clone.sh ├── deparser.p4 ├── egress.p4 ├── eth.yang ├── fabric.p4 ├── flowobj.java ├── headers.p4 ├── iface.yang ├── ingress.p4 ├── make.sh ├── onos1.txt ├── onos2.txt ├── onos3.txt ├── onos4.txt ├── p4rt.py ├── parser.p4 ├── pull.sh ├── reboot.proto ├── service.proto ├── switch.p4 ├── system.proto ├── table.p4 └── v1model.p4 ├── conf.py ├── dict.txt ├── exercises.rst ├── figures.pptx ├── figures ├── Slide01.png ├── Slide02.png ├── Slide03.png ├── Slide04.png ├── Slide05.png ├── Slide06.png ├── Slide07.png ├── Slide08.png ├── Slide09.png ├── Slide10.png ├── Slide11.png ├── Slide12.png ├── Slide13.png ├── Slide14.png ├── Slide15.png ├── Slide16.png ├── Slide17.png ├── Slide18.png ├── Slide19.png ├── Slide20.png ├── Slide21.png ├── Slide22.png ├── Slide23.png ├── Slide24.png ├── Slide25.png ├── Slide26.png ├── Slide27.png ├── Slide28.png ├── Slide29.png ├── Slide30.png ├── Slide31.png ├── Slide32.png ├── Slide33.png ├── Slide34.png ├── Slide35.png ├── Slide36.png ├── Slide37.png ├── Slide38.png ├── Slide39.png ├── Slide40.png ├── Slide42.png ├── Slide43.png ├── Slide44.png ├── Slide45.png ├── Slide46.png ├── Slide47.png ├── Slide48.png ├── Slide49.png ├── Slide50.png ├── Slide51.png ├── Slide52.png ├── Slide53.png ├── Slide54.png ├── Slide55.png ├── Slide56.png ├── Slide57.png ├── Slide58.png ├── Slide59.png ├── Slide60.png ├── Slide61.png └── Slide62.png ├── foreword.rst ├── future.rst ├── index.rst ├── intro.rst ├── latest.rst ├── netvirt.rst ├── onos.rst ├── preface.rst ├── print.rst ├── requirements.txt ├── stratum.rst ├── switch.rst ├── trellis.rst └── uses.rst /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | #require review 2 | * @llpeterson @drbruced12 3 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Docs Workflow 2 | run-name: ${{ github.actor }} is publishing document artifacts 🚀 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | 14 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 15 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 16 | concurrency: 17 | group: "pages" 18 | cancel-in-progress: false 19 | 20 | jobs: 21 | # Single deploy job since we're just deploying 22 | deploy: 23 | environment: 24 | name: github-pages 25 | url: ${{ steps.deployment.outputs.page_url }} 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Checkout 29 | uses: actions/checkout@v4 30 | - name: Setup Pages 31 | uses: actions/configure-pages@v4 32 | - name: Build html 33 | run: make html 34 | - name: Upload artifact 35 | uses: actions/upload-pages-artifact@v3 36 | with: 37 | # Upload build repository 38 | path: './_build/html' 39 | - name: Deploy to GitHub Pages 40 | id: deployment 41 | uses: actions/deploy-pages@v4 42 | 43 | 44 | - run: echo "🍏 This job's status is ${{ job.status }}." 45 | -------------------------------------------------------------------------------- /.github/workflows/validate-docs.yaml: -------------------------------------------------------------------------------- 1 | name: Validate Docs Workflow 2 | run-name: ${{ github.actor }} is validating document source 3 | on: [pull_request, workflow_dispatch] 4 | jobs: 5 | Validate_Docs: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." 9 | - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!" 10 | - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." 11 | - name: Check out repo 12 | uses: actions/checkout@v4 13 | - name: Validate source 14 | run: make test 15 | - name: Build html 16 | run: make html 17 | - name: List built files 18 | run: | 19 | ls ${{ github.workspace }}/_build/html 20 | 21 | - run: echo "🍏 This job's status is ${{ job.status }}." 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | *.tex 3 | venv-docs 4 | .DS_Store 5 | */.DS_Store 6 | figures-pdf/ 7 | figures-epub/ 8 | private/ 9 | local/ 10 | scripts/ 11 | _build/ 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution 4.0 International Public License 2 | 3 | By exercising the Licensed Rights (defined below), You accept and 4 | agree to be bound by the terms and conditions of this Creative Commons 5 | Attribution-NonCommercial-NoDerivatives 4.0 International Public 6 | License ("Public License"). To the extent this Public License may be 7 | interpreted as a contract, You are granted the Licensed Rights in 8 | consideration of Your acceptance of these terms and conditions, and 9 | the Licensor grants You such rights in consideration of benefits the 10 | Licensor receives from making the Licensed Material available under 11 | these terms and conditions. 12 | 13 | Section 1 – Definitions. 14 | 15 | (a) Adapted Material means material subject to Copyright and Similar 16 | Rights that is derived from or based upon the Licensed Material and 17 | in which the Licensed Material is translated, altered, arranged, 18 | transformed, or otherwise modified in a manner requiring permission 19 | under the Copyright and Similar Rights held by the Licensor. For 20 | purposes of this Public License, where the Licensed Material is a 21 | musical work, performance, or sound recording, Adapted Material is 22 | always produced where the Licensed Material is synched in timed 23 | relation with a moving image. 24 | 25 | (b) Copyright and Similar Rights means copyright and/or similar 26 | rights closely related to copyright including, without limitation, 27 | performance, broadcast, sound recording, and Sui Generis Database 28 | Rights, without regard to how the rights are labeled or 29 | categorized. For purposes of this Public License, the rights 30 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 31 | Rights. 32 | 33 | (c) Effective Technological Measures means those measures that, in 34 | the absence of proper authority, may not be circumvented under laws 35 | fulfilling obligations under Article 11 of the WIPO Copyright Treaty 36 | adopted on December 20, 1996, and/or similar international 37 | agreements. 38 | 39 | (d) Exceptions and Limitations means fair use, fair dealing, and/or 40 | any other exception or limitation to Copyright and Similar Rights 41 | that applies to Your use of the Licensed Material. 42 | 43 | (e) Licensed Material means the artistic or literary work, database, 44 | or other material to which the Licensor applied this Public License. 45 | 46 | (f) Licensed Rights means the rights granted to You subject to the 47 | terms and conditions of this Public License, which are limited to 48 | all Copyright and Similar Rights that apply to Your use of the 49 | Licensed Material and that the Licensor has authority to license. 50 | 51 | (g) Licensor means the individual(s) or entity(ies) granting rights 52 | under this Public License. 53 | 54 | (h) NonCommercial means not primarily intended for or directed 55 | towards commercial advantage or monetary compensation. For purposes 56 | of this Public License, the exchange of the Licensed Material for 57 | other material subject to Copyright and Similar Rights by digital 58 | file-sharing or similar means is NonCommercial provided there is no 59 | payment of monetary compensation in connection with the exchange. 60 | 61 | (i) Share means to provide material to the public by any means or 62 | process that requires permission under the Licensed Rights, such as 63 | reproduction, public display, public performance, distribution, 64 | dissemination, communication, or importation, and to make material 65 | available to the public including in ways that members of the public 66 | may access the material from a place and at a time individually 67 | chosen by them. 68 | 69 | (j) Sui Generis Database Rights means rights other than copyright 70 | resulting from Directive 96/9/EC of the European Parliament and of 71 | the Council of 11 March 1996 on the legal protection of databases, 72 | as amended and/or succeeded, as well as other essentially equivalent 73 | rights anywhere in the world. 74 | 75 | (k) You means the individual or entity exercising the Licensed 76 | Rights under this Public License. Your has a corresponding meaning. 77 | 78 | Section 2 – Scope. 79 | 80 | (a) License grant. 81 | 82 | (1) Subject to the terms and conditions of this Public License, 83 | the Licensor hereby grants You a worldwide, royalty-free, 84 | non-sublicensable, non-exclusive, irrevocable license to exercise 85 | the Licensed Rights in the Licensed Material to: 86 | 87 | (A) reproduce and Share the Licensed Material, in whole or in 88 | part, for NonCommercial purposes only; and 89 | 90 | (B) produce and reproduce, but not Share, Adapted Material for 91 | NonCommercial purposes only. 92 | 93 | (2) Exceptions and Limitations. For the avoidance of doubt, where 94 | Exceptions and Limitations apply to Your use, this Public License 95 | does not apply, and You do not need to comply with its terms and 96 | conditions. 97 | 98 | (3) Term. The term of this Public License is specified in Section 99 | 6(a). 100 | 101 | (4) Media and formats; technical modifications allowed. The 102 | Licensor authorizes You to exercise the Licensed Rights in all 103 | media and formats whether now known or hereafter created, and to 104 | make technical modifications necessary to do so. The Licensor 105 | waives and/or agrees not to assert any right or authority to 106 | forbid You from making technical modifications necessary to 107 | exercise the Licensed Rights, including technical modifications 108 | necessary to circumvent Effective Technological Measures. For 109 | purposes of this Public License, simply making modifications 110 | authorized by this Section 2(a)(4) never produces Adapted 111 | Material. 112 | 113 | (5) Downstream recipients. 114 | 115 | (A) Offer from the Licensor – Licensed Material. Every recipient 116 | of the Licensed Material automatically receives an offer from 117 | the Licensor to exercise the Licensed Rights under the terms and 118 | conditions of this Public License. 119 | 120 | (B) No downstream restrictions. You may not offer or impose any 121 | additional or different terms or conditions on, or apply any 122 | Effective Technological Measures to, the Licensed Material if 123 | doing so restricts exercise of the Licensed Rights by any 124 | recipient of the Licensed Material. 125 | 126 | (6) No endorsement. Nothing in this Public License constitutes or 127 | may be construed as permission to assert or imply that You are, or 128 | that Your use of the Licensed Material is, connected with, or 129 | sponsored, endorsed, or granted official status by, the Licensor 130 | or others designated to receive attribution as provided in Section 131 | 3(a)(1)(A)(i). 132 | 133 | (b) Other rights. 134 | 135 | (1) Moral rights, such as the right of integrity, are not licensed 136 | under this Public License, nor are publicity, privacy, and/or 137 | other similar personality rights; however, to the extent possible, 138 | the Licensor waives and/or agrees not to assert any such rights 139 | held by the Licensor to the limited extent necessary to allow You 140 | to exercise the Licensed Rights, but not otherwise. 141 | 142 | (2) Patent and trademark rights are not licensed under this Public 143 | License. 144 | 145 | (3) To the extent possible, the Licensor waives any right to 146 | collect royalties from You for the exercise of the Licensed 147 | Rights, whether directly or through a collecting society under any 148 | voluntary or waivable statutory or compulsory licensing scheme. In 149 | all other cases the Licensor expressly reserves any right to 150 | collect such royalties, including when the Licensed Material is 151 | used other than for NonCommercial purposes. 152 | 153 | Section 3 – License Conditions. 154 | 155 | Your exercise of the Licensed Rights is expressly made subject to the 156 | following conditions. 157 | 158 | (a) Attribution. 159 | 160 | (1) If You Share the Licensed Material, You must: 161 | 162 | (A) retain the following if it is supplied by the Licensor with 163 | the Licensed Material: 164 | 165 | (i) identification of the creator(s) of the Licensed Material 166 | and any others designated to receive attribution, in any 167 | reasonable manner requested by the Licensor (including by 168 | pseudonym if designated); 169 | 170 | (ii) a copyright notice; 171 | 172 | (iii) a notice that refers to this Public License; 173 | 174 | (iv) a notice that refers to the disclaimer of warranties; 175 | 176 | (v) a URI or hyperlink to the Licensed Material to the extent 177 | reasonably practicable; 178 | 179 | (B) indicate if You modified the Licensed Material and retain an 180 | indication of any previous modifications; and 181 | 182 | (C) indicate the Licensed Material is licensed under this Public 183 | License, and include the text of, or the URI or hyperlink to, 184 | this Public License. 185 | 186 | For the avoidance of doubt, You do not have permission under this 187 | Public License to Share Adapted Material. 188 | 189 | (2) You may satisfy the conditions in Section 3(a)(1) in any 190 | reasonable manner based on the medium, means, and context in which 191 | You Share the Licensed Material. For example, it may be reasonable 192 | to satisfy the conditions by providing a URI or hyperlink to a 193 | resource that includes the required information. 194 | 195 | (3) If requested by the Licensor, You must remove any of the 196 | information required by Section 3(a)(1)(A) to the extent 197 | reasonably practicable. 198 | 199 | Section 4 – Sui Generis Database Rights. 200 | 201 | Where the Licensed Rights include Sui Generis Database Rights that 202 | apply to Your use of the Licensed Material: 203 | 204 | (a) for the avoidance of doubt, Section 2(a)(1) grants You the right 205 | to extract, reuse, reproduce, and Share all or a substantial portion 206 | of the contents of the database for NonCommercial purposes only and 207 | provided You do not Share Adapted Material; 208 | 209 | (b) if You include all or a substantial portion of the database 210 | contents in a database in which You have Sui Generis Database 211 | Rights, then the database in which You have Sui Generis Database 212 | Rights (but not its individual contents) is Adapted Material; and 213 | 214 | (c) You must comply with the conditions in Section 3(a) if You Share 215 | all or a substantial portion of the contents of the database. 216 | 217 | For the avoidance of doubt, this Section 4 supplements and does not 218 | replace Your obligations under this Public License where the Licensed 219 | Rights include other Copyright and Similar Rights. 220 | 221 | Section 5 – Disclaimer of Warranties and Limitation of Liability. 222 | 223 | (a) Unless otherwise separately undertaken by the Licensor, to the 224 | extent possible, the Licensor offers the Licensed Material as-is and 225 | as-available, and makes no representations or warranties of any kind 226 | concerning the Licensed Material, whether express, implied, 227 | statutory, or other. This includes, without limitation, warranties 228 | of title, merchantability, fitness for a particular purpose, 229 | non-infringement, absence of latent or other defects, accuracy, or 230 | the presence or absence of errors, whether or not known or 231 | discoverable. Where disclaimers of warranties are not allowed in 232 | full or in part, this disclaimer may not apply to You. 233 | 234 | (b) To the extent possible, in no event will the Licensor be liable 235 | to You on any legal theory (including, without limitation, 236 | negligence) or otherwise for any direct, special, indirect, 237 | incidental, consequential, punitive, exemplary, or other losses, 238 | costs, expenses, or damages arising out of this Public License or 239 | use of the Licensed Material, even if the Licensor has been advised 240 | of the possibility of such losses, costs, expenses, or 241 | damages. Where a limitation of liability is not allowed in full or 242 | in part, this limitation may not apply to You. 243 | 244 | (c) The disclaimer of warranties and limitation of liability 245 | provided above shall be interpreted in a manner that, to the extent 246 | possible, most closely approximates an absolute disclaimer and 247 | waiver of all liability. 248 | 249 | Section 6 – Term and Termination. 250 | 251 | (a) This Public License applies for the term of the Copyright and 252 | Similar Rights licensed here. However, if You fail to comply with 253 | this Public License, then Your rights under this Public License 254 | terminate automatically. 255 | 256 | (b) Where Your right to use the Licensed Material has terminated 257 | under Section 6(a), it reinstates: 258 | 259 | (1) automatically as of the date the violation is cured, provided 260 | it is cured within 30 days of Your discovery of the violation; or 261 | 262 | (2) upon express reinstatement by the Licensor. 263 | 264 | For the avoidance of doubt, this Section 6(b) does not affect 265 | any right the Licensor may have to seek remedies for Your violations 266 | of this Public License. 267 | 268 | (c) For the avoidance of doubt, the Licensor may also offer the 269 | Licensed Material under separate terms or conditions or stop 270 | distributing the Licensed Material at any time; however, doing so 271 | will not terminate this Public License. 272 | 273 | (d) Sections 1, 5, 6, 7, and 8 survive termination of this Public 274 | License. 275 | 276 | Section 7 – Other Terms and Conditions. 277 | 278 | (a) The Licensor shall not be bound by any additional or different 279 | terms or conditions communicated by You unless expressly agreed. 280 | 281 | (b) Any arrangements, understandings, or agreements regarding the 282 | Licensed Material not stated herein are separate from and 283 | independent of the terms and conditions of this Public License. 284 | 285 | Section 8 – Interpretation. 286 | 287 | (a) For the avoidance of doubt, this Public License does not, and 288 | shall not be interpreted to, reduce, limit, restrict, or impose 289 | conditions on any use of the Licensed Material that could lawfully 290 | be made without permission under this Public License. 291 | 292 | (b) To the extent possible, if any provision of this Public License 293 | is deemed unenforceable, it shall be automatically reformed to the 294 | minimum extent necessary to make it enforceable. If the provision 295 | cannot be reformed, it shall be severed from this Public License 296 | without affecting the enforceability of the remaining terms and 297 | conditions. 298 | 299 | (c) No term or condition of this Public License will be waived and 300 | no failure to comply consented to unless expressly agreed to by the 301 | Licensor. 302 | 303 | (d) Nothing in this Public License constitutes or may be interpreted 304 | as a limitation upon, or waiver of, any privileges and immunities 305 | that apply to the Licensor or You, including from the legal 306 | processes of any jurisdiction or authority. 307 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for ONF documentation created with Sphinx 2 | 3 | # use bash for pushd/popd, and to fail quickly. virtualenv's activate 4 | # has undefined variables, so no -u 5 | SHELL = bash -e -o pipefail 6 | 7 | # You can set these variables from the command line. 8 | SPHINXOPTS ?= 9 | SPHINXBUILD ?= sphinx-build 10 | SOURCEDIR ?= . 11 | BUILDDIR ?= _build 12 | 13 | # Create the virtualenv with all the tools installed 14 | VIRTUALENV = venv-docs 15 | 16 | # Put it first so that "make" without argument is like "make help". 17 | help: $(VIRTUALENV) 18 | source ./$(VIRTUALENV)/bin/activate ;\ 19 | $(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | 21 | .PHONY: help lint reload Makefile test 22 | 23 | # Create the virtualenv with all the tools installed 24 | $(VIRTUALENV): 25 | python3 -m venv $@ ;\ 26 | source ./$@/bin/activate ;\ 27 | pip install -r requirements.txt 28 | 29 | # lint and link verification. linkcheck is built into sphinx (disabled for now) 30 | test: lint spelling 31 | 32 | # lint all .rst files 33 | lint: $(VIRTUALENV) 34 | source ./$`__ license. The 15 | community is invited to contribute corrections, improvements, updates, 16 | and new material under the same terms. While this license does not 17 | automatically grant the right to make derivative works, we are keen to 18 | discuss derivative works (such as translations) with interested 19 | parties. Please reach out to 20 | `discuss@systemsapproach.org `__. 21 | 22 | If you make use of this work, the attribution should include the 23 | following information: 24 | 25 | | *Title: Software-Defined Networks: A Systems Approach* 26 | | *Authors: Larry Peterson, Carmelo Cascone, Brian O'Connor, Thomas Vachuska, and Bruce Davie* 27 | | *Source:* https://github.com/SystemsApproach/SDN 28 | | *License:* \ `CC BY-NC-ND 4.0 `__ 29 | 30 | Read the Book 31 | ------------- 32 | 33 | This book is part of the `Systems Approach Series 34 | `__, with an online version published 35 | at `https://sdn.systemsapproach.org 36 | `__. 37 | 38 | 39 | To track progress and receive notices about new versions, you can follow 40 | the project on 41 | `Mastodon `__. To read a running 42 | commentary on how the Internet is evolving, and for updates on our writing projects, you can sign up for the 43 | `Systems Approach newsletter `__. 44 | 45 | Releases and Editions 46 | --------------------- 47 | 48 | We continually release open source content in GitHub, with `print and 49 | ebook editions `__ 50 | published from time-to-time. The latest print and ebook (2nd Printing) 51 | corresponds to the ``v2.0`` tag. 52 | 53 | In general, ``master`` contains a coherent and internally consistent 54 | version of the material. (If it were code, the book would build and 55 | run.) Minor fixes are checked directly into ``master``, but new 56 | content under development is checked into branches until it can be 57 | merged into ``master`` without breaking self-consistency. The web 58 | version of the book available at https://sdn.systemsapproach.org is 59 | then continuously generated from ``master``. 60 | 61 | Minor releases (e.g., ``v1.1``) are tagged whenever there is 62 | sufficient improvements or new content to warrant the effort. This is 63 | done primarily to create a snapshot so that everyone in a course can 64 | know they are using the same version. 65 | 66 | 67 | Build the Book 68 | -------------- 69 | 70 | To build a web-viewable version, you first need to download the 71 | source: 72 | 73 | .. literalinclude:: code/build.sh 74 | 75 | The build process is stored in the Makefile and requires Python be 76 | installed. The Makefile will create a virtualenv (``venv-docs``) which 77 | installs the documentation generation toolset. You may also need to 78 | install the ``enchant`` C library using your system’s package manager 79 | for the spelling checker to function properly. 80 | 81 | To generate HTML in ``_build/html``, run ``make html``. 82 | 83 | To check the formatting of the book, run ``make lint``. 84 | 85 | To check spelling, run ``make spelling``. If there are additional 86 | words, names, or acronyms that are correctly spelled but not in the dictionary, 87 | please add them to the ``dict.txt`` file. 88 | 89 | To see the other available output formats, run ``make``. 90 | 91 | 92 | Contribute to the Book 93 | ---------------------- 94 | 95 | We hope that if you use this material, you are also willing to 96 | contribute back to it. If you are new to open source, you might check 97 | out this `How to Contribute to Open 98 | Source `__ guide. Among 99 | other things, you’ll learn about posting *Issues* that you’d like to see 100 | addressed, and issuing *Pull Requests* to merge your improvements back 101 | into GitHub. 102 | 103 | If you’d like to contribute and are looking for something that needs 104 | attention, see the `wiki `__ 105 | for the current TODO list. 106 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | Version 2.1-dev -------------------------------------------------------------------------------- /_extra/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: AI2Bot 2 | User-agent: Ai2Bot-Dolma 3 | User-agent: aiHitBot 4 | User-agent: Amazonbot 5 | User-agent: anthropic-ai 6 | User-agent: Applebot 7 | User-agent: Applebot-Extended 8 | User-agent: Brightbot 1.0 9 | User-agent: Bytespider 10 | User-agent: CCBot 11 | User-agent: ChatGPT-User 12 | User-agent: Claude-Web 13 | User-agent: ClaudeBot 14 | User-agent: cohere-ai 15 | User-agent: cohere-training-data-crawler 16 | User-agent: Cotoyogi 17 | User-agent: Crawlspace 18 | User-agent: Diffbot 19 | User-agent: DuckAssistBot 20 | User-agent: FacebookBot 21 | User-agent: Factset_spyderbot 22 | User-agent: FirecrawlAgent 23 | User-agent: FriendlyCrawler 24 | User-agent: Google-Extended 25 | User-agent: GoogleOther 26 | User-agent: GoogleOther-Image 27 | User-agent: GoogleOther-Video 28 | User-agent: GPTBot 29 | User-agent: iaskspider/2.0 30 | User-agent: ICC-Crawler 31 | User-agent: ImagesiftBot 32 | User-agent: img2dataset 33 | User-agent: imgproxy 34 | User-agent: ISSCyberRiskCrawler 35 | User-agent: Kangaroo Bot 36 | User-agent: meta-externalagent 37 | User-agent: Meta-ExternalAgent 38 | User-agent: meta-externalfetcher 39 | User-agent: Meta-ExternalFetcher 40 | User-agent: NovaAct 41 | User-agent: OAI-SearchBot 42 | User-agent: omgili 43 | User-agent: omgilibot 44 | User-agent: Operator 45 | User-agent: PanguBot 46 | User-agent: Perplexity-User 47 | User-agent: PerplexityBot 48 | User-agent: PetalBot 49 | User-agent: Scrapy 50 | User-agent: SemrushBot-OCOB 51 | User-agent: SemrushBot-SWA 52 | User-agent: Sidetrade indexer bot 53 | User-agent: TikTokSpider 54 | User-agent: Timpibot 55 | User-agent: VelenPublicWebCrawler 56 | User-agent: Webzio-Extended 57 | User-agent: YouBot 58 | Disallow: / 59 | -------------------------------------------------------------------------------- /_static/SystemsApproachLogoURL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/_static/SystemsApproachLogoURL.png -------------------------------------------------------------------------------- /_static/bridge.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/_static/bridge.ico -------------------------------------------------------------------------------- /_static/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/_static/cover.jpg -------------------------------------------------------------------------------- /_static/css/rtd_theme_mods.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019-present Open Networking Foundation 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | 16 | /* Don't restrict content width on the RTD theme 17 | * from: https://stackoverflow.com/a/32898444 */ 18 | 19 | .wy-nav-content { 20 | max-width: none; 21 | } 22 | 23 | .wy-table-responsive table td, .wy-table-responsive table th { 24 | white-space: normal; 25 | } 26 | 27 | /* Colors for navigation */ 28 | 29 | .wy-side-nav-search, .wy-nav-top { 30 | background: #2F5597; 31 | } 32 | 33 | /* .wy-menu-vertical header,.wy-menu-vertical p.caption{color:#2F5597} */ 34 | 35 | .wy-menu-vertical header,.wy-menu-vertical p.caption{color:#6AB0DE} 36 | 37 | /* Headings */ 38 | h1, h2 { 39 | font-weight: bold; 40 | line-height: 1.25; 41 | color: #3279a8 42 | text-rendering: optimizeLegibility; 43 | } 44 | 45 | h3, h4, h5, h6 { 46 | margin-bottom: .5rem; 47 | font-style: italic; 48 | line-height: 1.25; 49 | color: #313131; 50 | text-rendering: optimizeLegibility; 51 | } 52 | 53 | h1 { 54 | margin-bottom: 2rem; 55 | font-size: 2rem; 56 | } 57 | 58 | h2 { 59 | margin-bottom: .5rem; 60 | margin-top: 1rem; 61 | font-size: 1.5rem; 62 | } 63 | 64 | h3 { 65 | margin-top: 1.5rem; 66 | font-size: 1.25rem; 67 | } 68 | 69 | .pop { 70 | color: #6AB0DE; 71 | font-style: italic; 72 | font-weight: bold; 73 | } 74 | aside.sidebar { 75 | margin: 0 0 0.5em 1em; 76 | border: 1px solid #ddb; 77 | padding: 7px 7px 0 7px; 78 | background-color: #ffe; 79 | width: 40%; 80 | float: right; 81 | } 82 | -------------------------------------------------------------------------------- /_static/fonts/Inconsolata-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/_static/fonts/Inconsolata-Bold.ttf -------------------------------------------------------------------------------- /_static/fonts/Inconsolata-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/_static/fonts/Inconsolata-Regular.ttf -------------------------------------------------------------------------------- /arch.rst: -------------------------------------------------------------------------------- 1 | Chapter 3: Basic Architecture 2 | =============================== 3 | 4 | SDN is an approach to building networks that favors programmable 5 | commodity hardware, with the intelligence that controls packet 6 | forwarding and other network operations implemented in software. 7 | Realizing 8 | such a design is independent of any particular protocol stack, but 9 | instead requires a set of open APIs and a new collection of software 10 | components that support those APIs. This chapter introduces the basic 11 | architecture of such an *SDN software stack*. 12 | 13 | This chapter defines the general architecture of such a software 14 | stack, and while there are multiple options for the specific 15 | components and tools that can be plugged into this architecture, it 16 | also introduces an example set. We do this to make the discussion more 17 | concrete, but the particular components we describe have two important 18 | attributes. One, they are open source and freely available on GitHub. 19 | Two, they are designed to work together, providing a comprehensive 20 | solution; there are no gaps in our story. Both attributes make it 21 | possible for *anyone* to build the same end-to-end system that is 22 | running today in production networks. 23 | 24 | 3.1 Software Stack 25 | ------------------- 26 | 27 | An overview of the software stack is given in :numref:`Figure %s 28 | `, which includes a *Bare-Metal Switch* running a local 29 | *Switch OS*, controlled by a global *Network OS* hosting a collection of 30 | *Control Applications*. :numref:`Figure %s ` also calls out 31 | a corresponding set of exemplar open source components (*SD-Fabric*, 32 | *ONOS*, and *Stratum*) on the right, as well as a related *P4 33 | Toolchain* on the left. This chapter introduces these components, with 34 | later chapters giving more detail. 35 | 36 | Note the similarity between this diagram and :numref:`Figure %s 37 | ` in Chapter 1. Both figures include two open 38 | interfaces: one between the Control Apps and the Network OS, and a 39 | second between the Network OS and the underlying programmable 40 | switches. These two interfaces are depicted as “API shims” in 41 | :numref:`Figure %s `, and in the context of the exemplar 42 | components, correspond to a combination of *gNMI*, *gNOI* and 43 | *FlowObjective* in the first case, and a combination of *gNMI*, *gNOI* 44 | and either *P4Runtime* or *OpenFlow* in the second case. gRPC, an 45 | open source remote procedure call framework, is 46 | shown as the transport protocol for these APIs—an implementation 47 | choice, but one that we will generally assume from here 48 | on. (Note that OpenFlow, unlike the other protocols, does not run over 49 | gRPC.) We discuss all of these acronyms and interfaces in further 50 | detail below. 51 | 52 | 53 | .. _fig-stack: 54 | .. figure:: figures/Slide08.png 55 | :width: 550px 56 | :align: center 57 | 58 | Overall architecture of the SDN software stack. 59 | 60 | It is important to keep in mind that the software components listed in 61 | :numref:`Figure %s ` correspond to active open source 62 | projects, and as a consequence, they continue to evolve (as do their 63 | APIs). Specific versions of each component—and their associated 64 | APIs—have been integrated and deployed into both trial and production 65 | environments. For example, while the figure shows P4Runtime as a 66 | candidate control interface exported by the Switch OS, there are 67 | deployed solutions that use OpenFlow instead. (This includes the 68 | Comcast deployment.) Similarly, while the figure shows gNMI/gNOI as 69 | the config/ops interface to each switch, there are solutions that use 70 | NETCONF instead. 71 | 72 | For the purpose of this book, we do not attempt to track all possible 73 | combinations of component versions and APIs, but opt instead to focus 74 | on the single consistent stack enumerated in :numref:`Figure %s 75 | `, since it represents our best judgement as to the “right” 76 | approach based on experience (so far) with earlier versions up-and-down 77 | the stack. 78 | 79 | 3.1.1 Switch vs Host Implementation 80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 81 | 82 | The top-to-bottom view of the software stack shown in :numref:`Figure 83 | %s ` is from the perspective of a single switch, but it is 84 | important to also keep the network perspective in 85 | mind. :numref:`Figure %s ` gives such a perspective by 86 | focusing on an end-to-end path through the network, connecting 87 | Virtual Machines (VMs). 88 | 89 | .. _fig-e2e: 90 | .. figure:: figures/Slide19.png 91 | :width: 500px 92 | :align: center 93 | 94 | End-to-End Perspective of a Software-Defined Network, including 95 | the end hosts and the Virtual Machines (VMs) they host. 96 | 97 | This perspective highlights two important aspects of the system. The 98 | first re-enforces the point we've been making: that the Network OS 99 | (e.g., ONOS) is network-wide, while the Switch OS (e.g., Stratum) is 100 | per-switch. 101 | 102 | The second is that part of the SDN software stack runs on the end 103 | hosts. In particular, there is a *Virtual Switch (vSwitch)*—typically 104 | implemented in software as part of the hypervisor 105 | running on the server—that is responsible for forwarding packets to 106 | and from the VMs. (Of course, not every end-host runs VMs, but a 107 | similar architecture applies to containers hosts or bare-metal servers.) 108 | Just like a physical switch, 109 | the vSwitch forwards packets from input port to output port, but these 110 | are virtual ports connected to VMs (or containers) rather than physical ports 111 | connected to physical machines. 112 | 113 | .. sidebar:: Host-Centric Perspective 114 | 115 | *This book adopts a network-oriented perspective of SDN, one 116 | that treats the end-host (both the virtual switch running in 117 | the host OS and the NIC connecting the host to the network) as 118 | an extension of the network, running under the control of a 119 | Network OS. A more host-centric perspective is equally valid, 120 | and perhaps more importantly, comes with a robust ecosystem of 121 | open source software that runs as part of the host OS.* 122 | 123 | *DPDK is one example, but another gaining traction is the 124 | combination of eBPF (extended Berkeley Packet Filter) and XDP 125 | (eXpress Data Path). When used together, they provide a way to 126 | program generalized Match-Action rules in the OS kernel, or 127 | potentially even on a SmartNIC. This is similar in spirit to 128 | OpenFlow and P4, except they allow for the Action part to be 129 | an arbitrary program. In contrast, OpenFlow defines a fixed 130 | set of Actions, and P4 is a restricted language for expressing 131 | Actions (e.g., it does not include loops). This is necessary 132 | when the Action must execute within a fixed cycle budget, as 133 | is the case for a switch-based forwarding pipeline. It also 134 | enables formal verification of the data plane, a promising 135 | opportunity discussed in Chapter 10.* 136 | 137 | Fortunately, we can view a vSwitch as behaving just like a physical 138 | switch, including the APIs it supports. That a vSwitch is implemented 139 | in software on a general-purpose processor rather than in an ASIC is 140 | an implementation detail. While this is a true statement, being a 141 | software switch dramatically lowers the barrier to introducing 142 | additional features, so the feature set is both richer and more 143 | dynamic. For example, *Open vSwitch (OVS)* is a widely-used open 144 | source vSwitch that supports OpenFlow as a northbound API. It formed 145 | the data plane for the original Nicira network virtualization 146 | platform. OVS has been integrated with an assortment of 147 | complementary tools, such as DPDK (Data Plane Development Kit), another open source 148 | component that optimizes packet forwarding operations on x86 processors. Although it’s an 149 | important topic, this book does not explore the full range of 150 | possibilities for a vSwitch like OVS or other end-host optimizations, 151 | but instead treats vSwitches just like any other switch along the end-to-end 152 | path. 153 | 154 | Another implementation detail shown in :numref:`Figure %s ` 155 | is that the host may have a *Smart Network Interface Card (SmartNIC)* 156 | that assists (or possibly even replaces) the vSwitch. Vendors have a 157 | long history of off-loading kernel functionality onto NICs (e.g., 158 | everything from computing TCP/IP checksums to supporting VMs), but in 159 | the SDN context, the interesting possibility is to replicate the 160 | forwarding pipeline found on the network switches. Again, there are a 161 | range of possible implementation choices, including both FPGA and 162 | ASIC, as well as whether the NIC is fixed-function or programmable 163 | (using P4). For our purposes, we will treat such Smart NICs as yet 164 | another switching element along the end-to-end path. 165 | 166 | 3.2 Bare-Metal Switch 167 | ------------------------- 168 | 169 | Starting at the bottom and working our way up the stack shown in 170 | :numref:`Figures %s ` and :numref:`%s `, the 171 | network data plane is implemented by an interconnected set of 172 | bare-metal switches. Our focus for now is on a single switch, where the 173 | overall network topology is dictated by the Control Applications 174 | running at the top of the software stack. For example, we describe a 175 | Control Application that manages a leaf-spine topology in a later 176 | section. 177 | 178 | The architecture is agnostic as to the switch vendor, but the full 179 | software stack outlined in this chapter runs on switches built using 180 | Tofino and Tomahawk switching chips manufactured by Barefoot Networks 181 | (now an Intel company) and Broadcom, respectively. The Tofino chip 182 | implements a programmable forwarding pipeline based on PISA, while the 183 | Tomahawk chip implements a fixed-function pipeline. 184 | 185 | In the case of both chips, a pair of P4 programs defines the 186 | forwarding pipeline. The first (``forward.p4``) specifies the 187 | forwarding behavior. The second (``arch.p4``) specifies the logical 188 | architecture of the target forwarding chip. The P4 compiler 189 | generates target files that are loaded into both the Network OS and 190 | the switch. These target files are not named in :numref:`Figure %s 191 | ` (we will return to the details in Chapters 4 and 5), but 192 | both components need to know about the output because one *implements* 193 | the forwarding behavior (the switch), and the other *controls* the 194 | forwarding behavior (the Network OS). 195 | 196 | We return to the details of the compiler toolchain in Chapter 4. For 197 | now, we will just address the question of why we need a P4 program in 198 | the case of a fixed-function switching chip (since we are not using P4 199 | to modify its fixed behavior). The quick summary is that a formal 200 | specification of the forwarding pipeline is required to generate the 201 | API to the data plane. P4 programs are written to an abstract model 202 | of the forwarding pipeline, and whether the chip’s actual hardware 203 | pipeline is fixed or programmable, we still need to know how to map 204 | the abstract pipeline onto the physical pipeline. This is where 205 | ``arch.p4`` plays a role. As for the role of ``forward.p4``, this 206 | program actually *prescribes* the pipeline in the case of a 207 | programmable chip, whereas for the fixed-function chip, ``forward.p4`` 208 | merely *describes* the pipeline. But we still need ``forward.p4`` in 209 | both cases because the toolchain uses it, along with ``arch.p4``, to 210 | generate the API that sits between the control and data planes. 211 | 212 | 3.3 Switch OS 213 | ------------------- 214 | 215 | Moving up from the base hardware, each switch runs a local Switch 216 | OS. Not to be confused with the Network OS that manages a network of 217 | switches, this Switch OS runs on a commodity processor internal to 218 | the switch (not shown in :numref:`Figure %s `). It is 219 | responsible for handling API calls issued to the switch, for example 220 | from the Network OS. This includes taking the appropriate action on 221 | the switch’s internal resources, which sometimes affects the switching 222 | chip. 223 | 224 | Multiple open source Switch OSes are available (including SONiC, 225 | originally developed at Microsoft Azure), but we use a combination of 226 | Stratum and *Open Network Linux (ONL)* as our primary example. ONL is 227 | a switch-ready distribution of Linux (originally prepared by 228 | Big Switch Networks), while Stratum (originally developed at Google) is 229 | primarily responsible for translating between the external-facing API 230 | and the internal switch resources. For this reason, we sometimes refer 231 | to Stratum as a *Thin Switch OS*. 232 | 233 | Stratum mediates all interactions between the switch and the outside 234 | world. This includes loading the target files generated by the P4 235 | compiler, which defines a contract between the data plane and the 236 | control plane. This contract effectively replaces OpenFlow’s flow rule 237 | abstraction with an auto-generated specification. The rest of the 238 | Stratum-managed API is defined as follows: 239 | 240 | * **P4Runtime:** An interface for controlling forwarding behavior at 241 | runtime. It is the key for populating forwarding tables and 242 | manipulating forwarding state. The P4Runtime is independent of any 243 | particular P4 program and agnostic to the underlying hardware. This 244 | contrasts to OpenFlow which is rather prescriptive about the 245 | forwarding model and how the control plane interacts with it. 246 | (For completeness, :numref:`Figure %s 247 | ` also lists OpenFlow as an alternative control interface.) 248 | 249 | * **gNMI (gRPC Network Management Interface):** Used to set and 250 | retrieve configuration state. gNMI is usually paired with OpenConfig 251 | YANG models that define the structure of the configuration and state 252 | tree. 253 | 254 | * **gNOI (gRPC Network Operations Interfaces):** Used to set and 255 | retrieve operational state, for example supporting certificates 256 | management, device testing, software upgrades, and networking 257 | troubleshooting. 258 | 259 | If you recall the distinction between Control and Configuration 260 | introduced in Chapter 1, then you will recognize P4Runtime as the 261 | Control API and the gNMI/gNOI combination as a modern version of a 262 | switch’s traditional Configuration API. This latter API has 263 | historically been called the OAM interface (for “Operations, 264 | Administration, and Maintenance”), and it has most often been 265 | implemented as a command-line interface (which is of course not really 266 | an API). 267 | 268 | 3.4 Network OS 269 | ------------------- 270 | 271 | The Network OS is a platform for configuring and controlling a network 272 | of switches. It runs off-switch as a logically centralized 273 | SDN controller, and manages a set of switches on a network-wide 274 | basis. Central to this role is responsibility for monitoring the state 275 | of those switches (e.g., detecting port and link failures), 276 | maintaining a global view of the topology that reflects the current 277 | state of the network, and making that view available to any interested 278 | Control Apps. Those Control Apps, in turn, “instruct” the Network OS 279 | to control packet flows through the underlying switches according to 280 | whatever service they are providing. The way these “control 281 | instructions” are expressed is a key aspect of the Network OS’s API. 282 | 283 | Going beyond this conceptual description requires a specific Network 284 | OS, and we use *ONOS (Open Network Operating System)* as our 285 | exemplar. ONOS is best-of-breed in terms of performance, scalability, 286 | and availability. At a high-level, ONOS takes responsibility for three 287 | things: 288 | 289 | * **Managing Topology:** Tracks inventory of network infrastructure 290 | devices and their interconnection to provide a shared view of the 291 | network environment for the rest of the platform and applications. 292 | 293 | * **Managing Configuration:** Facilitates issuing, tracking, rolling 294 | back, and validating atomic configuration operations on multiple 295 | network devices. This effectively mirrors the per-switch 296 | configuration and operation interfaces (also using gNMI and gNOI), 297 | but does so at the network level rather than the device level. 298 | 299 | * **Controlling Switches:** Controls the data plane packet 300 | processing pipelines of the network switches and provides subsequent control 301 | of flow rules, groups, meters and other building blocks within those 302 | pipelines. 303 | 304 | With respect to this last role, ONOS exports a northbound 305 | *FlowObjectives* abstraction, which generalizes Flow Rules in a 306 | pipeline-independent way.\ [#]_ This interface, which Chapter 6 307 | describes in more detail, is not standardized in the same way as the 308 | control interface exported by individual switches. As with a 309 | conventional server OS, applications written to the ONOS 310 | API do not easily port to another Network OS. The requirement is that 311 | this interface be open and well-defined; not that there be just one 312 | such interface. If over time there is consensus about the Network OS 313 | interface, then applications will be more easily portable. But just as 314 | with server operating systems, the higher one goes up the software 315 | stack, the more difficult it becomes to reach such a consensus. 316 | 317 | .. [#] We make no claim that FlowObjectives are an ideal interface for 318 | controlling a switch. The API evolved out of necessity, allowing 319 | developers to deal with different pipelines. Defining a general 320 | interface is the subject of ongoing research. 321 | 322 | Finally, although :numref:`Figure %s ` does not show any 323 | details about the internals of ONOS, to better appreciate the role it 324 | plays in the larger scheme of things, we note that the most critical 325 | subsystem in any Network OS is a *Scalable Key/Value Store*. Because 326 | ONOS provides a logically centralized view of the network, the key to 327 | its performance, scalability, and availability is how it stores that 328 | state. In the case of ONOS, this store is provided by a companion open 329 | source project, called Atomix, which implements the RAFT consensus 330 | algorithm. Storage services like Atomix are the cornerstone of nearly 331 | all horizontally scalable cloud services today, as Chapter 6 describes 332 | in more detail. 333 | 334 | 3.5 Leaf-Spine Fabric 335 | --------------------------- 336 | 337 | Because we use ONOS as the Network OS, we are limited to ONOS-hosted 338 | SDN Control Applications. For illustrative purposes, we use SD-Fabric 339 | as that Control App—it implements a *leaf-spine* fabric on a network 340 | of programmable switches. This means SD-Fabric dictates a particular 341 | network topology, in particular, the leaf-spine topology common to 342 | datacenter clusters. As outlined in Section 2.3, this topology 343 | includes a set of leaf switches, each of which serves as a Top-of-Rack 344 | switch (i.e., it connects all the servers in a single rack), where the 345 | leaf switches are, in turn, interconnected by a set of spine switches. 346 | 347 | At a high level, SD-Fabric plays three roles. First, it provides a 348 | switching fabric that interconnects servers—and the VMs running on 349 | those servers—in a multi-rack cluster. Second, it connects the cluster 350 | as a whole upstream to peer networks, including the Internet, using 351 | BGP (i.e., it behaves much like a router). Third, it connects the 352 | cluster as a whole to downstream access networks (i.e., it terminates 353 | access network technologies like PON and RAN). In other words, 354 | instead of thinking about SD-Fabric as a conventional leaf-spine fabric 355 | that’s locked away in some datacenter, SD-Fabric is best viewed an 356 | interconnect running at the network edge, helping to bridge 357 | access-specific edge clouds to IP-based datacenter clouds. 358 | 359 | In terms of implementation, SD-Fabric actually corresponds to a suite of 360 | Control Apps running on ONOS, as opposed to a single app. This suite 361 | supports several control plane features, including: 362 | 363 | * VLANs and L2 bridging 364 | * IPv4 and IPv6 unicast and multicast routing 365 | * DHCP L3 relay 366 | * Dual-homing of servers and upstream routers 367 | * QinQ forwarding/termination 368 | * MPLS-based pseudowires. 369 | 370 | For each of these features, the corresponding Control App interacts 371 | with ONOS—by observing changes in the network topology and issuing 372 | Flow Objectives—rather than by using any of the standard protocol 373 | implementations found in legacy routers and switches. The only time a 374 | legacy protocol is involved is when SD-Fabric needs to communicate with 375 | the outside world (e.g., upstream metro/core routers), in which case 376 | it uses standard BGP (as implemented by the open source Quagga 377 | server). This is actually a common feature of SDN environments: they 378 | avoid traditional routing protocols internally, or in a greenfield, 379 | but interaction with the outside world still requires them. 380 | 381 | .. _fig-trellis: 382 | .. figure:: figures/Slide09.png 383 | :width: 600px 384 | :align: center 385 | 386 | SD-Fabric suite of control apps managing a (potentially distributed) 387 | leaf-spine fabric. 388 | 389 | Finally, SD-Fabric is sometimes deployed at a single site with 390 | multiple RAN base stations connected via SD-Fabric leaf-switches. But 391 | SD-Fabric can also be extended to multiple sites deeper into the 392 | network using multiple stages of spines, as shown in :numref:`Figure 393 | %s `. Chapter 7 describes all of this in more detail. 394 | -------------------------------------------------------------------------------- /authors.rst: -------------------------------------------------------------------------------- 1 | About The Authors 2 | ================== 3 | 4 | **Larry Peterson** is the Robert E. Kahn Professor of Computer 5 | Science, Emeritus at Princeton University, where he served as Chair 6 | from 2003-2009. His research focuses on the design, implementation, 7 | and operation of Internet-scale distributed systems, including the 8 | widely used PlanetLab and MeasurementLab platforms. He is currently 9 | contributing to the Aether access-edge cloud project at the Open 10 | Networking Foundation (ONF), where he advises as Chief Scientist. 11 | Peterson is a member of the National Academy of Engineering, a Fellow 12 | of the ACM and the IEEE, the 2010 recipient of the IEEE Kobayashi 13 | Computer and Communication Award, and the 2013 recipient of the ACM 14 | SIGCOMM Award. He received his Ph.D. degree from Purdue University 15 | in 1985. 16 | 17 | **Carmelo Cascone** is a Member of the Technical Staff at the Open 18 | Networking Foundation (ONF), where he currently leads technical 19 | activities around the adoption of programmable switches, P4, and 20 | P4Runtime in ONF projects such as ONOS, CORD, and Aether. Cascone 21 | received a Ph.D. from Politecnico di Milano in 2017, in a joint 22 | program with École Polytechnique de Montréal. He is broadly interested 23 | in computer networks and systems, with a focus on data plane 24 | programmability and Software-Defined Networking (SDN). 25 | 26 | **Brian O'Connor** is a Member of the Technical Staff at the Open 27 | Networking Foundation (ONF), where he currently leads technical 28 | activities around the adoption of switch operating systems. O'Connor 29 | received a BS and MS in Computer Science from Stanford Univesity in 30 | 2012 and 2013, respectively. 31 | 32 | **Thomas Vachuska** is Chief Architect at the Open Networking 33 | Foundation (ONF), where he leads the ONOS project. Before joining ONF, 34 | Vachuska was a Software Architect at Hewlett-Packard. Vachuska 35 | received a BA in Mathematics from California State 36 | University-Sacramento in 1994. 37 | 38 | **Bruce Davie** is a computer scientist noted for his contributions to 39 | the field of networking. He is a former VP and CTO for the Asia 40 | Pacific region at VMware. He joined VMware during the acquisition of 41 | Software Defined Networking (SDN) startup Nicira. Prior to that, he 42 | was a Fellow at Cisco Systems, leading a team of architects 43 | responsible for Multiprotocol Label Switching (MPLS). Davie has over 44 | 30 years of networking industry experience and has co-authored 17 45 | RFCs. He was recognized as an ACM Fellow in 2009 and chaired ACM 46 | SIGCOMM from 2009 to 2013. He was also a visiting lecturer at the 47 | Massachusetts Institute of Technology for five years. Davie is the 48 | author of multiple books and the holder of more than 40 U.S. Patents. 49 | 50 | -------------------------------------------------------------------------------- /bridge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/bridge.jpg -------------------------------------------------------------------------------- /code/README: -------------------------------------------------------------------------------- 1 | Order code fragments are used (substituted in book.tex) 2 | 3 | switch.rst: code/v1model.p4 4 | switch.rst: code/headers.p4 5 | switch.rst: code/parser.p4 6 | switch.rst: code/checksum.p4 7 | switch.rst: code/ingress.p4 8 | switch.rst: code/egress.p4 9 | switch.rst: code/deparser.p4 10 | switch.rst: code/switch.p4 11 | 12 | stratum.rst: code/table.p4 13 | stratum.rst: code/actions.p4 14 | stratum.rst: code/p4rt.py 15 | stratum.rst: code/iface.yang 16 | stratum.rst: code/eth.yang 17 | stratum.rst: code/service.proto 18 | stratum.rst: code/system.proto 19 | stratum.rst: code/reboot.proto 20 | 21 | onos.rst: code/flowobj.java 22 | 23 | trellis.rst: code/onos1.txt 24 | trellis.rst: code/onos2.txt 25 | trellis.rst: code/onos3.txt 26 | trellis.rst: code/onos4.txt 27 | trellis.rst: code/fabric.p4 28 | 29 | exercises.rst: code/clone.sh 30 | exercises.rst: code/pull.sh 31 | exercises.rst: code/make.sh 32 | 33 | README.rst: code/build.sh -------------------------------------------------------------------------------- /code/actions.p4: -------------------------------------------------------------------------------- 1 | actions { 2 | preamble { 3 | id: 16800567 4 | name: "NoAction" 5 | alias: "NoAction" 6 | } 7 | } 8 | actions { 9 | preamble { 10 | id: 16805608 11 | name: "MyIngress.drop" 12 | alias: "drop" 13 | } 14 | } 15 | actions { 16 | preamble { 17 | id: 16799317 18 | name: "MyIngress.ipv4_forward" 19 | alias: "ipv4_forward" 20 | } 21 | params { 22 | id: 1 23 | name: "dstAddr" 24 | bitwidth: 48 25 | } 26 | params { 27 | id: 2 28 | name: "port" 29 | bitwidth: 9 30 | } 31 | } 32 | tables { 33 | preamble { 34 | id: 33574068 35 | name: "MyIngress.ipv4_lpm" 36 | alias: "ipv4_lpm" 37 | } 38 | match_fields { 39 | id: 1 40 | name: "hdr.ipv4.dstAddr" 41 | bitwidth: 32 42 | match_type: LPM 43 | } 44 | action_refs { 45 | id: 16799317 46 | } 47 | action_refs { 48 | id: 16805608 49 | } 50 | action_refs { 51 | id: 16800567 52 | } 53 | size: 1024 54 | } 55 | -------------------------------------------------------------------------------- /code/build.sh: -------------------------------------------------------------------------------- 1 | $ mkdir ~/systemsapproach 2 | $ cd systemsapproach 3 | $ git clone https://github.com/SystemsApproach/SDN.git 4 | $ cd ~/sdn 5 | -------------------------------------------------------------------------------- /code/checksum.p4: -------------------------------------------------------------------------------- 1 | /**************************************************** 2 | *** C H E C K S U M V E R I F I C A T I O N *** 3 | ****************************************************/ 4 | 5 | control MyVerifyChecksum( 6 | inout headers hdr, 7 | inout metadata meta) { 8 | apply { } 9 | } 10 | -------------------------------------------------------------------------------- /code/clone.sh: -------------------------------------------------------------------------------- 1 | $ cd ~ 2 | $ git clone -b advanced https://github.com/opennetworkinglab/ngsdn-tutorial --depth 1 3 | -------------------------------------------------------------------------------- /code/deparser.p4: -------------------------------------------------------------------------------- 1 | /**************************************************** 2 | ************* D E P A R S E R ********************* 3 | ****************************************************/ 4 | 5 | control MyDeparser( 6 | packet_out packet, 7 | in headers hdr) { 8 | 9 | apply { 10 | packet.emit(hdr.ethernet); 11 | packet.emit(hdr.ipv4); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /code/egress.p4: -------------------------------------------------------------------------------- 1 | /**************************************************** 2 | ******* E G R E S S P R O C E S S I N G ******** 3 | ****************************************************/ 4 | 5 | control MyEgress( 6 | inout headers hdr, 7 | inout metadata meta, 8 | inout standard_metadata_t standard_metadata) { 9 | 10 | apply { } 11 | } 12 | 13 | /**************************************************** 14 | *** C H E C K S U M C O M P U T A T I O N **** 15 | ****************************************************/ 16 | 17 | control MyComputeChecksum( 18 | inout headers hdr, 19 | inout metadata meta) { 20 | 21 | apply { 22 | update_checksum( 23 | hdr.ipv4.isValid(), 24 | { hdr.ipv4.version, 25 | hdr.ipv4.ihl, 26 | hdr.ipv4.diffserv, 27 | hdr.ipv4.totalLen, 28 | hdr.ipv4.identification, 29 | hdr.ipv4.flags, 30 | hdr.ipv4.fragOffset, 31 | hdr.ipv4.ttl, 32 | hdr.ipv4.protocol, 33 | hdr.ipv4.srcAddr, 34 | hdr.ipv4.dstAddr }, 35 | hdr.ipv4.hdrChecksum, 36 | HashAlgorithm.csum16); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /code/eth.yang: -------------------------------------------------------------------------------- 1 | Module: openconfig-if-ethernet 2 | augment /ocif:interfaces/ocif:interface: 3 | +--rw ethernet 4 | +--rw config 5 | | +--rw mac-address? 6 | | +--rw auto-negotiate? 7 | | +--rw duplex-mode? 8 | | +--rw port-speed? 9 | | +--rw enable-flow-control? 10 | +--ro state 11 | +--ro mac-address? 12 | +--ro auto-negotiate? 13 | +--ro duplex-mode? 14 | +--ro port-speed? 15 | +--ro enable-flow-control? 16 | +--ro hw-mac-address? 17 | +--ro counters 18 | ... 19 | -------------------------------------------------------------------------------- /code/fabric.p4: -------------------------------------------------------------------------------- 1 | apply { 2 | #ifdef UPF 3 | upf_normalizer.apply(hdr.gtpu.isValid(), hdr.gtpu_ipv4, 4 | hdr.gtpu_udp, hdr.ipv4, hdr.udp, hdr.inner_ipv4, 5 | hdr.inner_udp); 6 | #endif // UPF 7 | 8 | // Filtering Objective 9 | pkt_io_ingress.apply(hdr, fabric_metadata, standard_metadata); 10 | filtering.apply(hdr, fabric_metadata, standard_metadata); 11 | #ifdef UPF 12 | upf_ingress.apply(hdr.gtpu_ipv4, hdr.gtpu_udp, hdr.gtpu, 13 | hdr.ipv4, hdr.udp, fabric_metadata, standard_metadata); 14 | #endif // UPF 15 | 16 | // Forwarding Objective 17 | if (fabric_metadata.skip_forwarding == _FALSE) { 18 | forwarding.apply(hdr, fabric_metadata, standard_metadata); 19 | } 20 | acl.apply(hdr, fabric_metadata, standard_metadata); 21 | 22 | // Next Objective 23 | if (fabric_metadata.skip_next == _FALSE) { 24 | next.apply(hdr, fabric_metadata, standard_metadata); 25 | #if defined INT 26 | process_set_source_sink.apply(hdr, fabric_metadata, 27 | standard_metadata); 28 | #endif // INT 29 | } 30 | #ifdef BNG 31 | bng_ingress.apply(hdr, fabric_metadata, standard_metadata); 32 | #endif // BNG 33 | } 34 | -------------------------------------------------------------------------------- /code/flowobj.java: -------------------------------------------------------------------------------- 1 | public void createFlow( 2 | TrafficSelector originalSelector, 3 | TrafficTreatment originalTreatment, 4 | ConnectPoint ingress, ConnectPoint egress, 5 | int priority, boolean applyTreatment, 6 | List objectives, 7 | List devices) { 8 | TrafficSelector selector = DefaultTrafficSelector.builder(originalSelector) 9 | .matchInPort(ingress.port()) 10 | .build(); 11 | 12 | // Optionally apply the specified treatment 13 | TrafficTreatment.Builder treatmentBuilder; 14 | if (applyTreatment) { 15 | treatmentBuilder = DefaultTrafficTreatment.builder(originalTreatment); 16 | } else { 17 | treatmentBuilder = 18 | DefaultTrafficTreatment.builder(); 19 | } 20 | 21 | objectives.add(DefaultNextObjective.builder() 22 | .withId(flowObjectiveService.allocateNextId()) 23 | .addTreatment(treatmentBuilder.setOutput( 24 | egress.port()).build()) 25 | .withType(NextObjective.Type.SIMPLE) 26 | .fromApp(appId) 27 | .makePermanent() 28 | .add()); 29 | devices.add(ingress.deviceId()); 30 | 31 | objectives.add(DefaultForwardingObjective.builder() 32 | .withSelector(selector) 33 | .nextStep(nextObjective.id()) 34 | .withPriority(priority) 35 | .fromApp(appId) 36 | .makePermanent() 37 | .withFlag(ForwardingObjective.Flag.SPECIFIC) 38 | .add()); 39 | devices.add(ingress.deviceId()); 40 | } 41 | -------------------------------------------------------------------------------- /code/headers.p4: -------------------------------------------------------------------------------- 1 | /***** P4_16 *****/ 2 | #include 3 | #include 4 | 5 | const bit<16> TYPE_IPV4 = 0x800; 6 | 7 | /**************************************************** 8 | ************* H E A D E R S ************************ 9 | ****************************************************/ 10 | 11 | typedef bit<9> egressSpec_t; 12 | typedef bit<48> macAddr_t; 13 | typedef bit<32> ip4Addr_t; 14 | 15 | header ethernet_t { 16 | macAddr_t dstAddr; 17 | macAddr_t srcAddr; 18 | bit<16> etherType; 19 | } 20 | 21 | header ipv4_t { 22 | bit<4> version; 23 | bit<4> ihl; 24 | bit<8> diffserv; 25 | bit<16> totalLen; 26 | bit<16> identification; 27 | bit<3> flags; 28 | bit<13> fragOffset; 29 | bit<8> ttl; 30 | bit<8> protocol; 31 | bit<16> hdrChecksum; 32 | ip4Addr_t srcAddr; 33 | ip4Addr_t dstAddr; 34 | } 35 | 36 | struct metadata { 37 | /* empty */ 38 | } 39 | 40 | struct headers { 41 | ethernet_t ethernet; 42 | ipv4_t ipv4; 43 | } 44 | -------------------------------------------------------------------------------- /code/iface.yang: -------------------------------------------------------------------------------- 1 | Module: openconfig-interfaces 2 | +--rw interfaces 3 | +--rw interface* [name] 4 | +--rw name 5 | +--rw config 6 | | ... 7 | +--ro state 8 | | ... 9 | +--rw hold-time 10 | | ... 11 | +--rw subinterfaces 12 | | ... 13 | -------------------------------------------------------------------------------- /code/ingress.p4: -------------------------------------------------------------------------------- 1 | /**************************************************** 2 | ****** I N G R E S S P R O C E S S I N G ******* 3 | ****************************************************/ 4 | 5 | control MyIngress( 6 | inout headers hdr, 7 | inout metadata meta, 8 | inout standard_metadata_t standard_metadata) { 9 | 10 | action drop() { 11 | mark_to_drop(standard_metadata); 12 | } 13 | 14 | action ipv4_forward(macAddr_t dstAddr, 15 | egressSpec_t port) { 16 | standard_metadata.egress_spec = port; 17 | hdr.ethernet.srcAddr = hdr.ethernet.dstAddr; 18 | hdr.ethernet.dstAddr = dstAddr; 19 | hdr.ipv4.ttl = hdr.ipv4.ttl - 1; 20 | } 21 | 22 | table ipv4_lpm { 23 | key = { 24 | hdr.ipv4.dstAddr: lpm; 25 | } 26 | actions = { 27 | ipv4_forward; 28 | drop; 29 | NoAction; 30 | } 31 | size = 1024; 32 | default_action = drop(); 33 | } 34 | 35 | apply { 36 | if (hdr.ipv4.isValid()) { 37 | ipv4_lpm.apply(); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /code/make.sh: -------------------------------------------------------------------------------- 1 | $ cd ~/ngsdn-tutorial 2 | $ make deps 3 | -------------------------------------------------------------------------------- /code/onos1.txt: -------------------------------------------------------------------------------- 1 | onos> routes 2 | 3 | B: Best route, R: Resolved route 4 | 5 | Table: ipv4 6 | B R Network Next Hop Source (Node) 7 | 0.0.0.0/0 172.16.0.1 FPM (127.0.0.1) 8 | > * 1.1.0.0/18 10.0.1.20 STATIC 9 | > * 10.0.99.0/24 10.0.1.1 FPM (127.0.0.1) 10 | * 10.0.99.0/24 10.0.6.1 FPM (127.0.0.1) 11 | Total: 2 12 | 13 | Table: ipv6 14 | B R Network Next Hop Source (Node) 15 | > * 2000::7700/120 fe80::288:ff:fe00:1 FPM (127.0.0.1) 16 | > * 2000::8800/120 fe80::288:ff:fe00:2 FPM (127.0.0.1) 17 | > * 2000::9900/120 fe80::288:ff:fe00:1 FPM (127.0.0.1) 18 | * 2000::9900/120 fe80::288:ff:fe00:2 FPM (127.0.0.1) 19 | Total: 3 20 | 21 | -------------------------------------------------------------------------------- /code/onos2.txt: -------------------------------------------------------------------------------- 1 | onos> route-add 2 | onos> route-add 1.1.0.0/18 10.0.1.20 3 | onos> route-add 2020::101/120 2000::1 4 | -------------------------------------------------------------------------------- /code/onos3.txt: -------------------------------------------------------------------------------- 1 | onos> mcast-host-join -sAddr * 2 | -gAddr 224.0.0.1 3 | -srcs 00:AA:00:00:00:01/None 4 | -srcs 00:AA:00:00:00:05/None 5 | -sinks 00:AA:00:00:00:03/None 6 | -sinks 00:CC:00:00:00:01/None 7 | -------------------------------------------------------------------------------- /code/onos4.txt: -------------------------------------------------------------------------------- 1 | onos> mcast-sink-delete -sAddr * 2 | -gAddr 224.0.0.1 3 | -h 00:AA:00:00:00:03/None 4 | -------------------------------------------------------------------------------- /code/p4rt.py: -------------------------------------------------------------------------------- 1 | import p4runtime_lib.helper 2 | ... 3 | table_entry = p4info_helper.buildTableEntry( 4 | table_name="MyIngress.ipv4_lpm", 5 | match_fields={ 6 | "hdr.ipv4.dstAddr": (dst_ip_addr, 32) 7 | }, 8 | action_name="MyIngress.ipv4_forward", 9 | action_params={ 10 | "dstAddr": next_hop_mac_addr, 11 | "port": outport, 12 | }) 13 | ingress_sw.WriteTableEntry(table_entry) 14 | -------------------------------------------------------------------------------- /code/parser.p4: -------------------------------------------------------------------------------- 1 | /**************************************************** 2 | ************* P A R S E R ************************** 3 | ****************************************************/ 4 | 5 | parser MyParser( 6 | packet_in packet, 7 | out headers hdr, 8 | inout metadata meta, 9 | inout standard_metadata_t standard_metadata) { 10 | 11 | state start { 12 | transition parse_ethernet; 13 | } 14 | 15 | state parse_ethernet { 16 | packet.extract(hdr.ethernet); 17 | transition select(hdr.ethernet.etherType) { 18 | TYPE_IPV4: parse_ipv4; 19 | default: accept; 20 | } 21 | } 22 | 23 | state parse_ipv4 { 24 | packet.extract(hdr.ipv4); 25 | transition accept; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /code/pull.sh: -------------------------------------------------------------------------------- 1 | $ cd ~/ngsdn-tutorial 2 | $ git pull origin advanced 3 | -------------------------------------------------------------------------------- /code/reboot.proto: -------------------------------------------------------------------------------- 1 | message RebootRequest { 2 | // COLD, POWERDOWN, HALT, WARM, NSF, ... 3 | RebootMethod method = 1; 4 | // Delay in nanoseconds before issuing reboot. 5 | uint64 delay = 2; 6 | // Informational reason for the reboot. 7 | string message = 3; 8 | // Optional sub-components to reboot. 9 | repeated types.Path subcomponents = 4; 10 | // Force reboot if sanity checks fail. 11 | bool force = 5; 12 | } 13 | -------------------------------------------------------------------------------- /code/service.proto: -------------------------------------------------------------------------------- 1 | Service gNMI { 2 | rpc Capabilities(CapabilityRequest) 3 | returns (CapabilityResponse); 4 | rpc Get(GetRequest) returns (GetResponse); 5 | rpc Set(SetRequest) returns (SetResponse); 6 | rpc Subscribe(stream SubscribeRequest) 7 | returns (stream SubscribeResponse); 8 | } 9 | -------------------------------------------------------------------------------- /code/switch.p4: -------------------------------------------------------------------------------- 1 | /**************************************************** 2 | ************* S W I T C H ************************* 3 | ****************************************************/ 4 | 5 | V1Switch( 6 | MyParser(), 7 | MyVerifyChecksum(), 8 | MyIngress(), 9 | MyEgress(), 10 | MyComputeChecksum(), 11 | MyDeparser() 12 | ) main; 13 | -------------------------------------------------------------------------------- /code/system.proto: -------------------------------------------------------------------------------- 1 | service System { 2 | rpc Ping(PingRequest) 3 | returns (stream PingResponse) {} 4 | rpc Traceroute(TracerouteRequest) 5 | returns (stream TracerouteResponse) {} 6 | rpc Time(TimeRequest) 7 | returns (TimeResponse) {} 8 | rpc SetPackage(stream SetPackageRequest) 9 | returns (SetPackageResponse) {} 10 | rpc Reboot(RebootRequest) 11 | returns (RebootResponse) {} 12 | // ... 13 | } 14 | -------------------------------------------------------------------------------- /code/table.p4: -------------------------------------------------------------------------------- 1 | table ipv4_lpm { 2 | key = { 3 | hdr.ipv4.dstAddr: lpm; 4 | } 5 | actions = { 6 | ipv4_forward; 7 | drop; 8 | NoAction; 9 | } 10 | size = 1024; 11 | default_action = drop(); 12 | -------------------------------------------------------------------------------- /code/v1model.p4: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* Headers */ 5 | struct metadata { ... } 6 | struct headers { 7 | ethernet_t ethernet; 8 | ipv4_t ipv4; 9 | } 10 | 11 | /* Parser */ 12 | parser MyParser( 13 | packet_in packet, 14 | out headers hdr, 15 | inout metadata meta, 16 | inout standard_metadata_t smeta) { 17 | ... 18 | } 19 | 20 | /* Checksum Verification */ 21 | control MyVerifyChecksum( 22 | in headers, hdr, 23 | inout metadata meta) { 24 | ... 25 | } 26 | 27 | /* Ingress Processing */ 28 | control MyIngress( 29 | inout headers hdr, 30 | inout metadata meta, 31 | inout standard_metadata_t smeta) { 32 | ... 33 | } 34 | 35 | /* Egress Processing */ 36 | control MyEgress( 37 | inout headers hdr, 38 | inout metadata meta, 39 | inout standard_metadata_t smeta) { 40 | ... 41 | } 42 | 43 | /* Checksum Update */ 44 | control MyComputeChecksum( 45 | inout headers, hdr, 46 | inout metadata meta) { 47 | ... 48 | } 49 | 50 | /* Deparser */ 51 | parser MyDeparser( 52 | inout headers hdr, 53 | inout metadata meta) { 54 | ... 55 | } 56 | 57 | /* Switch */ 58 | V1Switch( 59 | MyParser(), 60 | MyVerifyChecksum(), 61 | MyIngress(), 62 | MyEgress(), 63 | MyComputeChecksum(), 64 | MyDeparser() 65 | ) main; 66 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | import os 20 | 21 | from subprocess import check_output, CalledProcessError 22 | 23 | def get_version(): 24 | 25 | try: 26 | version = check_output(['cat', 'VERSION'], 27 | universal_newlines=True) 28 | except CalledProcessError: 29 | return 'unknown version' 30 | 31 | return version.rstrip() 32 | 33 | # "version" is used for html build 34 | version = get_version() 35 | # "release" is used for LaTeX build 36 | release = version 37 | 38 | 39 | # -- Project information ----------------------------------------------------- 40 | 41 | project = u'Software-Defined Networks: A Systems Approach' 42 | copyright = u'2022, Systems Approach LLC (Publisher)' 43 | author = u'Peterson, Cascone, OConnor, Vachuska, Davie' 44 | 45 | # -- General configuration --------------------------------------------------- 46 | 47 | # If your documentation needs a minimal Sphinx version, state it here. 48 | # 49 | # needs_sphinx = '1.0' 50 | 51 | # make all warnings errors 52 | warning_is_error = False 53 | 54 | # Add any Sphinx extension module names here, as strings. They can be 55 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 56 | # ones. ***Replace "mathjax" with "imgmath" for epub output.*** 57 | extensions = [ 58 | 'sphinx.ext.autosectionlabel', 59 | 'sphinx.ext.coverage', 60 | 'sphinx.ext.ifconfig', 61 | 'sphinx.ext.mathjax', 62 | 'sphinx.ext.todo', 63 | 'sphinxcontrib.spelling' 64 | ] 65 | 66 | # Text files with lists of words that shouldn't fail the spellchecker: 67 | spelling_word_list_filename=['dict.txt', ] 68 | 69 | # Add any paths that contain templates here, relative to this directory. 70 | templates_path = ['_templates'] 71 | 72 | # The suffix(es) of source filenames. 73 | # You can specify multiple suffix as a list of string: 74 | # 75 | # source_suffix = ['.rst', '.md'] 76 | source_suffix = '.rst' 77 | 78 | # The master toctree document. 79 | master_doc = 'index' 80 | 81 | # The language for content autogenerated by Sphinx. Refer to documentation 82 | # for a list of supported languages. 83 | # 84 | # This is also used if you do content translation via gettext catalogs. 85 | # Usually you set "language" from the command line for these cases. 86 | language = 'en' 87 | 88 | # List of patterns, relative to source directory, that match files and 89 | # directories to ignore when looking for source files. 90 | # This pattern also affects html_static_path and html_extra_path. 91 | exclude_patterns = [u'_build', 'venv-docs', 'requirements.txt', 'Thumbs.db', 'private', '.DS_Store', '*/README.rst'] 92 | 93 | # The name of the Pygments (syntax highlighting) style to use. 94 | pygments_style = None 95 | 96 | # Enable numbered figures 97 | numfig = True 98 | numfig_format = { 99 | 'figure': 'Figure %s.', 100 | 'table': 'Table %s.' 101 | } 102 | 103 | # Ignore link check for the following websites 104 | linkcheck_ignore = [ 105 | 'https://amzn.to/', 'https://amazon.com', 'https://dl.acm.org', 'https://developers.redhat.com', 'https://www.adlittle.com' 106 | ] 107 | 108 | # -- Options for HTML output ------------------------------------------------- 109 | 110 | # The theme to use for HTML and HTML Help pages. See the documentation for 111 | # a list of builtin themes. 112 | # 113 | html_theme = 'sphinx_rtd_theme' 114 | 115 | # Theme options are theme-specific and customize the look and feel of a theme 116 | # further. For a list of options available for each theme, see the 117 | # documentation. 118 | # 119 | html_theme_options = { 120 | 'prev_next_buttons_location': 'both' 121 | } 122 | 123 | # Add any paths that contain custom static files (such as style sheets) here, 124 | # relative to this directory. They are copied after the builtin static files, 125 | # so a file named "default.css" will overwrite the builtin "default.css". 126 | html_static_path = ['_static'] 127 | 128 | html_css_files = [ 129 | 'css/rtd_theme_mods.css', 130 | ] 131 | 132 | # HTML Favicon 133 | html_favicon = '_static/bridge.ico' 134 | 135 | # HTML Index 136 | html_use_index = False 137 | 138 | # Custom sidebar templates, must be a dictionary that maps document names 139 | # to template names. 140 | # 141 | # The default sidebars (for documents that don't match any pattern) are 142 | # defined by theme itself. Builtin themes are using these templates by 143 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 144 | # 'searchbox.html']``. 145 | # 146 | # html_sidebars = {} 147 | 148 | #extra HTML files 149 | html_extra_path = ['_extra'] 150 | 151 | # -- Options for HTMLHelp output --------------------------------------------- 152 | 153 | # Output file base name for HTML help builder. 154 | htmlhelp_basename = 'SystemsApproach' 155 | 156 | 157 | # -- Options for LaTeX output ------------------------------------------------ 158 | #latex_engine = 'xelatex' 159 | 160 | latex_elements = { 161 | # The paper size ('letterpaper' or 'a4paper'). 162 | # 163 | 'papersize': 'letterpaper', 164 | 165 | # The font size ('10pt', '11pt' or '12pt'). 166 | # 167 | 'pointsize': '11pt', 168 | 169 | # Get unicode to work 170 | # 171 | 'fontenc': '\\usepackage[LGR,T1]{fontenc}', 172 | 173 | # Latex figure (float) alignment 174 | # 175 | 'figure_align': 'ht', 176 | } 177 | 178 | # Grouping the document tree into LaTeX files. List of tuples 179 | # (source start file, target name, title, 180 | # author, documentclass [howto, manual, or own class]). 181 | latex_documents = [ 182 | (master_doc, 'book.tex', u'Software-Defined Networks: A Systems Approach', 183 | u'Peterson, Cascone, OConnor, Vachuska and Davie ', 'manual', True), 184 | ] 185 | 186 | latex_toplevel_sectioning = 'chapter' 187 | 188 | 189 | # -- Options for manual page output ------------------------------------------ 190 | 191 | # One entry per manual page. List of tuples 192 | # (source start file, name, description, authors, manual section). 193 | man_pages = [ 194 | (master_doc, 'Systems Approach', u'Systems Approach', 195 | [author], 1) 196 | ] 197 | 198 | 199 | # -- Options for Texinfo output ---------------------------------------------- 200 | 201 | # Grouping the document tree into Texinfo files. List of tuples 202 | # (source start file, target name, title, author, 203 | # dir menu entry, description, category) 204 | texinfo_documents = [ 205 | (master_doc, 'Software-Defined Networks', u'Software-Defined Networks', 206 | author, 'Peterson, Cascone, OConnor, Vachuska, and Davie', 'A Systems Approach', 207 | 'Miscellaneous'), 208 | ] 209 | 210 | 211 | # -- Options for Epub output ------------------------------------------------- 212 | epub_title = project 213 | epub_description = 'Building a Cloud Management Platform' 214 | epub_cover = ('_static/cover.jpg', '') 215 | epub_show_urls = 'False' 216 | epub_use_index = False 217 | 218 | # The unique identifier of the text. This can be a ISBN number 219 | # or the project homepage. 220 | # 221 | # epub_identifier = '' 222 | 223 | # A unique identification for the text. 224 | # 225 | # epub_uid = '' 226 | 227 | # A list of files that should not be packed into the epub file. 228 | epub_exclude_files = ['search.html','robots.txt'] 229 | 230 | 231 | # -- Extension configuration ------------------------------------------------- 232 | 233 | # -- options for Intersphinx extension --------------------------------------- 234 | 235 | intersphinx_mapping = { 236 | 'sphinx': ('https://www.sphinx-doc.org/en/master', None), 237 | 'aether': ('https://docs.aetherproject.org/master', None), 238 | 'sdcore': ('https://docs.sd-core.opennetworking.org/master', None), 239 | 'sdran': ('https://docs.sd-ran.org/master', None), 240 | 'sdran': ('https://docs.sd-fabric.org/master', None), 241 | 'sysapproach5g': ('https://5g.systemsapproach.org/', None), 242 | 'sysapproachnet': ('https://book.systemsapproach.org/', None), 243 | 'sysapproachsdn': ('https://sdn.systemsapproach.org/', None), 244 | } 245 | 246 | # -- Options for todo extension ---------------------------------------------- 247 | # If true, `todo` and `todoList` produce output, else they produce nothing. 248 | todo_include_todos = True 249 | 250 | 251 | # -- Set up Google Analytics 252 | # -- using approach at https://stackoverflow.com/questions/9444342/adding-a-javascript-script-tag-some-place-so-that-it-works-for-every-file-in-sph/41885884#41885884 253 | 254 | 255 | GA_INVOKE_JS = """ 256 | window.dataLayer = window.dataLayer || []; 257 | function gtag(){dataLayer.push(arguments);} 258 | gtag('js', new Date()); 259 | 260 | gtag('config', 'G-QLSP3FJWGT'); 261 | """ 262 | 263 | def setup(app): 264 | 265 | app.add_js_file('https://www.googletagmanager.com/gtag/js?id=G-QLSP3FJWGT', loading_method="async") 266 | app.add_js_file(None, body=GA_INVOKE_JS) 267 | -------------------------------------------------------------------------------- /dict.txt: -------------------------------------------------------------------------------- 1 | ASIC 2 | Acknowledgements 3 | Aether 4 | Amin 5 | Antrea 6 | Arpanet 7 | Atomix 8 | BNG 9 | Broadcom 10 | Cascone 11 | Centric 12 | Comcast 13 | Config 14 | Connor 15 | Ctrl 16 | Davie 17 | Deparser 18 | Disaggregating 19 | Ebisawa 20 | École 21 | Feamster 22 | Ganga 23 | Gbps 24 | GTP 25 | GPP 26 | Geneve 27 | Greenberg 28 | IPv 29 | Inband 30 | IntelliJ 31 | IoT 32 | Ipsilon 33 | Kahn 34 | Karaf 35 | Kentaro 36 | Kobayashi 37 | Liu 38 | Makefile 39 | Mcast 40 | Microsegmentation 41 | Milano 42 | Mininet 43 | Montréal 44 | Motonori 45 | Mpps 46 | Multicast 47 | Multiprotocol 48 | OCP 49 | Ongaro 50 | Onix 51 | Ousterhout 52 | Pfaff 53 | PON 54 | Ph 55 | Pipeconf 56 | Pipeliner 57 | Pipeliners 58 | Pipeliners 59 | Politecnico 60 | Polytechnique 61 | Pudelko 62 | QinQ 63 | QoS 64 | Quagga 65 | Repo 66 | Rexford 67 | Runtime 68 | SAI 69 | Shindo 70 | Sridhar 71 | SNMP 72 | SRv 73 | Shenker 74 | Stratum 75 | Sunay 76 | Tbps 77 | Telco 78 | Telcos 79 | Todo 80 | Tofino 81 | Trellis 82 | Tseng 83 | UDP 84 | Ueno 85 | Univesity 86 | VMware 87 | Vachuska 88 | Vahdat 89 | Veriflow 90 | xApp 91 | xApps 92 | Xiaoqi 93 | Yi 94 | Zegura 95 | adaptor 96 | adaptors 97 | al 98 | analytics 99 | architected 100 | aspirational 101 | backend 102 | backends 103 | backhaul 104 | backplane 105 | balancers 106 | basestation 107 | bmv 108 | centric 109 | checksums 110 | chipset 111 | commoditized 112 | compositional 113 | config 114 | cyber 115 | cyberattacks 116 | datapath 117 | de 118 | di 119 | decapsulates 120 | decapsulation 121 | decrementing 122 | decrypt 123 | demarking 124 | demultiplexing 125 | deparser 126 | directionality 127 | disaggregate 128 | disaggregated 129 | disaggregating 130 | ebook 131 | et 132 | ethernet 133 | extern 134 | externs 135 | extensibility 136 | failover 137 | firewalling 138 | gNMI 139 | gNOI 140 | gNXI 141 | gRPC 142 | hairpinned 143 | hairpinning 144 | handoffs 145 | hyperscale 146 | impactful 147 | incrementing 148 | interoperable 149 | intra 150 | intradomain 151 | interdomain 152 | judgement 153 | microsegments 154 | microsegmentation 155 | microservice 156 | misconfigurations 157 | mmWave 158 | modularity 159 | multicast 160 | multipoint 161 | nexthop 162 | orchestrator 163 | ovsdb 164 | parameterizing 165 | pluggable 166 | pre 167 | programmability 168 | programme 169 | protobuf 170 | protobufs 171 | pseudowires 172 | repo 173 | retargeting 174 | roadmap 175 | runtime 176 | sFlow 177 | scalability 178 | splitters 179 | subdirectory 180 | subgraph 181 | subnet 182 | subnets 183 | syscall 184 | tized 185 | toolchain 186 | toolchains 187 | toolset 188 | transformative 189 | ttl 190 | unicast 191 | unvirtualized 192 | userspace 193 | utilizations 194 | vSwitch 195 | vSwitches 196 | verifiability 197 | verifiably 198 | virtualized 199 | virtualenv 200 | µONOS 201 | 202 | -------------------------------------------------------------------------------- /exercises.rst: -------------------------------------------------------------------------------- 1 | Hands-on Programming 2 | ====================== 3 | 4 | A collection of programming exercises provides hands-on experience with 5 | the software described in this book. They include: 6 | 7 | * Using Stratum's P4Runtime, gNMI, OpenConfig, and gNOI interfaces 8 | * Using ONOS to control P4-programmed switches 9 | * Writing ONOS applications to implement control plane logic 10 | * Testing a software stack using bmv2 in Mininet 11 | * Using PTF to test P4-based forwarding planes 12 | 13 | The exercises assume familiarity with Java and Python, although each 14 | exercise comes with starter code, so a high level of proficiency is 15 | not required. The exercises also use the *Mininet* network emulator, 16 | the *bmv2* P4-based switch emulator, the *PTF* Packet Testing 17 | Framework, and the *Wireshark* protocol analyzer. Additional 18 | information about each of these software tools is provided in the 19 | individual exercises. 20 | 21 | The exercises originated with a *Next Generation SDN Tutorial* 22 | produced by ONF, and so they come with a collection of on-line 23 | tutorial slides that introduce the topics covered in the exercises: 24 | 25 | * http://bit.ly/adv-ngsdn-tutorial-slides 26 | 27 | These slides have significant overlap with the material covered in 28 | this book, so it is not essential that you start with the slides, but 29 | they can be a good supplemental resource. 30 | 31 | Environment 32 | ---------------------------- 33 | 34 | You will be doing the exercises in a virtualized Linux environment 35 | running on your laptop. This section describes how to install and 36 | prepare that environment. 37 | 38 | System Requirements 39 | ~~~~~~~~~~~~~~~~~~~~~~ 40 | 41 | The current configuration of the VM is 4 GB of RAM and a 4-core CPU. 42 | These are the recommended minimum system requirements to complete the 43 | exercises. The VM also takes approximately 8 GB of HDD space. For a 44 | smooth experience, we recommend running the VM on a host system that 45 | has at least double these resources. 46 | 47 | Download VM 48 | ~~~~~~~~~~~~~~~~~ 49 | 50 | Click the following link to download the VM (4 GB): 51 | 52 | * http://bit.ly/ngsdn-tutorial-ova 53 | 54 | The VM is in ``.ova`` format and has been created using VirtualBox 55 | v5.2.32. You can use any modern virtualization system to run the VM, 56 | although we recommend using VirtualBox. The following links provide 57 | instructions on how to get VirtualBox and import the VM: 58 | 59 | * https://www.virtualbox.org/wiki/Downloads 60 | * https://docs.oracle.com/cd/E26217_01/E26796/html/qs-import-vm.html 61 | 62 | Alternatively, you can use these 63 | `scripts `__ 64 | to build a VM on your machine using Vagrant. 65 | 66 | .. _warning-windows: 67 | .. admonition:: Windows Users 68 | 69 | All scripts have been tested on MacOS and Ubuntu. Although they 70 | should work on Windows, they have not been tested. We therefore 71 | recommend that Windows users download the provided VM. 72 | 73 | At this point you can start the virtual machine (an Ubuntu system), 74 | and log in using the credentials ``sdn`` / ``rocks``. The instructions 75 | given throughout the remainder of this section (as well as the 76 | exercises themselves) are to be executed within the running VM. 77 | 78 | 79 | Clone Repository 80 | ~~~~~~~~~~~~~~~~~~ 81 | 82 | To work on the exercises you will need to clone the following repo: 83 | 84 | .. literalinclude:: code/clone.sh 85 | 86 | If the ``ngsdn-tutorial`` directory is already present in the VM, make 87 | sure to update its content: 88 | 89 | .. literalinclude:: code/pull.sh 90 | 91 | Note that there are multiple branches of the repo, each with a 92 | different configuration of the exercises. Always make sure you are in 93 | the ``advanced`` branch. 94 | 95 | Upgrade Dependencies 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 97 | 98 | The VM may have shipped with an older version of the dependencies than 99 | you need for the exercises. You can upgrade to the latest version 100 | using the following command: 101 | 102 | .. literalinclude:: code/make.sh 103 | 104 | This command downloads all necessary Docker images (~1.5 GB), which 105 | allows you to work through the exercises off-line. 106 | 107 | Using an IDE 108 | ~~~~~~~~~~~~~~~~~~~ 109 | 110 | During the exercises you will need to write code in multiple languages 111 | (e.g., P4, Java, Python). While the exercises do not require the use 112 | of any specific IDE or code editor, one option is the Java IDE 113 | `IntelliJ IDEA Community Edition `__, 114 | which comes pre-loaded with plugins for P4 syntax highlighting and 115 | Python development. We suggest using IntelliJ IDEA especially when 116 | working on the ONOS app, as it provides code completion for all ONOS 117 | APIs. 118 | 119 | Repo Structure 120 | ~~~~~~~~~~~~~~~~~~~~~ 121 | 122 | The repo you cloned is structured as follows: 123 | 124 | * ``p4src\`` → Data Plane Implementation (P4) 125 | * ``yang\`` → Config Models (YANG) 126 | * ``app\`` → Custom ONOS app (Java) 127 | * ``mininet\`` → 2x2 leaf-spine (Mininet) 128 | * ``util\`` → Utility Scripts (Bash) 129 | * ``ptf\`` → Data plane unit tests (PTF) 130 | 131 | Note that the exercises include links to various files on GitHub, but 132 | don't forget you have those same files cloned on your laptop. 133 | 134 | Commands 135 | ~~~~~~~~~~~~~~~~ 136 | 137 | To facilitate working on the exercises, the repo provides a set of 138 | ``make`` targets to control the different aspects of the process. The 139 | specific commands are introduced in the individual exercises, but the 140 | following is a quick reference: 141 | 142 | * ``make deps`` → Pull and build all required dependencies 143 | * ``make p4-build`` → Build P4 program 144 | * ``make p4-test`` → Run PTF tests 145 | * ``make start`` → Start Mininet and ONOS containers 146 | * ``make stop`` → Stop all containers 147 | * ``make restart`` → Restart containers clearing any previous state 148 | * ``make onos-cli`` → Access the ONOS CLI (password: ``rocks``, Ctrl-D to exit) 149 | * ``make onos-log`` → Show the ONOS log 150 | * ``make mn-cli`` → Access the Mininet CLI (Ctrl-D to exit) 151 | * ``make mn-log`` → Show the Mininet log (i.e., the CLI output) 152 | * ``make app-build`` → Build custom ONOS app 153 | * ``make app-reload`` → Install and activate the ONOS app 154 | * ``make netcfg`` → Push ``netcfg.json`` file (network config) to 155 | ONOS 156 | 157 | .. _warning-cmds: 158 | .. admonition:: Executing Commands 159 | 160 | As a reminder, these commands will be executed in a terminal window 161 | you open within the VM you just created. Be sure you are in the 162 | root directory of the repo you cloned (where the main ``Makefile`` 163 | lives). 164 | 165 | Exercises 166 | ------------------ 167 | 168 | The following lists (and links) the individual exercises. 169 | Exercises 1 and 2 170 | focus on Stratum, and are best attempted after reading through Chapter 171 | 5. Exercises 3 through 6 focus on ONOS and are best attempted after 172 | reading through Chapter 6. Exercises 7 and 8 focus on SD-Fabric and 173 | are best attempted after reading through Chapter 7.\ [#]_ Note that the 174 | exercises build on each other, so it is best to work through them in 175 | order. 176 | 177 | .. [#] SD-Fabric was previously known as Trellis, and still is in the 178 | code. UPF was previously known as SPGW, and still is in the 179 | code. 180 | 181 | 182 | 1. `P4Runtime Basics `__ 183 | 2. `YANG, OpenConfig, gNMI Basics `__ 184 | 3. `Using ONOS as the Control Plane `__ 185 | 4. `Enabling ONOS Built-in Services `__ 186 | 5. `Implementing IPv6 Routing with ECMP `__ 187 | 6. `Implementing SRv6 `__ 188 | 7. `SD-Fabric (Trellis) Basics 189 | `__ 190 | 8. `GTP Termination with fabric.p4 `__ 191 | 192 | You can find solutions for each exercise in the ``solution`` 193 | subdirectory for the repo you cloned. Feel free to compare your 194 | solution to the reference solution should you get stuck. 195 | 196 | .. _warning-tutorial: 197 | .. admonition:: Graphical Interfaces 198 | 199 | When exercises call for viewing graphical output, you will see 200 | reference to the *ONF Cloud Tutorial Portal*. This is for 201 | cloud-hosted VMs used during ONF-run tutorials, and so does not apply 202 | here. In its place, the exercises also describe how to access the 203 | GUI running locally on your laptop. 204 | 205 | If you have suggestions for how we can improve these exercises, please 206 | send email to ng-sdn-exercises@opennetworking.org or post an issue to 207 | `GitHub `__. 208 | -------------------------------------------------------------------------------- /figures.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures.pptx -------------------------------------------------------------------------------- /figures/Slide01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide01.png -------------------------------------------------------------------------------- /figures/Slide02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide02.png -------------------------------------------------------------------------------- /figures/Slide03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide03.png -------------------------------------------------------------------------------- /figures/Slide04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide04.png -------------------------------------------------------------------------------- /figures/Slide05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide05.png -------------------------------------------------------------------------------- /figures/Slide06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide06.png -------------------------------------------------------------------------------- /figures/Slide07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide07.png -------------------------------------------------------------------------------- /figures/Slide08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide08.png -------------------------------------------------------------------------------- /figures/Slide09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide09.png -------------------------------------------------------------------------------- /figures/Slide10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide10.png -------------------------------------------------------------------------------- /figures/Slide11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide11.png -------------------------------------------------------------------------------- /figures/Slide12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide12.png -------------------------------------------------------------------------------- /figures/Slide13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide13.png -------------------------------------------------------------------------------- /figures/Slide14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide14.png -------------------------------------------------------------------------------- /figures/Slide15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide15.png -------------------------------------------------------------------------------- /figures/Slide16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide16.png -------------------------------------------------------------------------------- /figures/Slide17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide17.png -------------------------------------------------------------------------------- /figures/Slide18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide18.png -------------------------------------------------------------------------------- /figures/Slide19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide19.png -------------------------------------------------------------------------------- /figures/Slide20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide20.png -------------------------------------------------------------------------------- /figures/Slide21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide21.png -------------------------------------------------------------------------------- /figures/Slide22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide22.png -------------------------------------------------------------------------------- /figures/Slide23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide23.png -------------------------------------------------------------------------------- /figures/Slide24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide24.png -------------------------------------------------------------------------------- /figures/Slide25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide25.png -------------------------------------------------------------------------------- /figures/Slide26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide26.png -------------------------------------------------------------------------------- /figures/Slide27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide27.png -------------------------------------------------------------------------------- /figures/Slide28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide28.png -------------------------------------------------------------------------------- /figures/Slide29.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide29.png -------------------------------------------------------------------------------- /figures/Slide30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide30.png -------------------------------------------------------------------------------- /figures/Slide31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide31.png -------------------------------------------------------------------------------- /figures/Slide32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide32.png -------------------------------------------------------------------------------- /figures/Slide33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide33.png -------------------------------------------------------------------------------- /figures/Slide34.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide34.png -------------------------------------------------------------------------------- /figures/Slide35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide35.png -------------------------------------------------------------------------------- /figures/Slide36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide36.png -------------------------------------------------------------------------------- /figures/Slide37.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide37.png -------------------------------------------------------------------------------- /figures/Slide38.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide38.png -------------------------------------------------------------------------------- /figures/Slide39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide39.png -------------------------------------------------------------------------------- /figures/Slide40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide40.png -------------------------------------------------------------------------------- /figures/Slide42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide42.png -------------------------------------------------------------------------------- /figures/Slide43.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide43.png -------------------------------------------------------------------------------- /figures/Slide44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide44.png -------------------------------------------------------------------------------- /figures/Slide45.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide45.png -------------------------------------------------------------------------------- /figures/Slide46.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide46.png -------------------------------------------------------------------------------- /figures/Slide47.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide47.png -------------------------------------------------------------------------------- /figures/Slide48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide48.png -------------------------------------------------------------------------------- /figures/Slide49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide49.png -------------------------------------------------------------------------------- /figures/Slide50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide50.png -------------------------------------------------------------------------------- /figures/Slide51.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide51.png -------------------------------------------------------------------------------- /figures/Slide52.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide52.png -------------------------------------------------------------------------------- /figures/Slide53.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide53.png -------------------------------------------------------------------------------- /figures/Slide54.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide54.png -------------------------------------------------------------------------------- /figures/Slide55.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide55.png -------------------------------------------------------------------------------- /figures/Slide56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide56.png -------------------------------------------------------------------------------- /figures/Slide57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide57.png -------------------------------------------------------------------------------- /figures/Slide58.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide58.png -------------------------------------------------------------------------------- /figures/Slide59.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide59.png -------------------------------------------------------------------------------- /figures/Slide60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide60.png -------------------------------------------------------------------------------- /figures/Slide61.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide61.png -------------------------------------------------------------------------------- /figures/Slide62.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SystemsApproach/sdn/f2ff110ec82ff3ca89590118b3f4662502463473/figures/Slide62.png -------------------------------------------------------------------------------- /foreword.rst: -------------------------------------------------------------------------------- 1 | Foreword 2 | ========== 3 | 4 | I got goosebumps when I saw the first Mosaic web browser 5 | in 1993. Something big was clearly about to happen; I had no idea how 6 | big. The Internet immediately exploded in scale, with thousands of new 7 | ISPs (Internet Service Providers) popping up everywhere, each grafting 8 | on a new piece of the Internet. All they needed to do was plug 9 | interoperable pieces together—off-the-shelf commercial switches, 10 | routers, base-stations, and access points sold by traditional 11 | networking equipment vendors—with no need to ask permission from a 12 | central controlling authority. The early routers were simple and 13 | streamlined—they just needed to support the Internet 14 | protocol. Decentralized control let the Internet grow rapidly. 15 | 16 | The router manufacturers faced a dilemma: It’s hard to maintain a 17 | thriving profitable business selling devices that are simple and 18 | streamlined. What's more, if a big network of simple devices is easy to 19 | manage remotely, all the intelligence (and value) is provided by the 20 | network operator, not the router manufacturer. So the external API was 21 | kept minimal (“network management” was considered a joke) and the 22 | routers were jam-packed with new features to keep all the value 23 | inside. By the mid 2000s, routers used by ISPs were so complicated 24 | that they supported hundreds of protocols and were based on more than 25 | 100 million lines of source code—ironically, more than ten times the 26 | complexity of the largest telephone exchange ever built. The Internet 27 | paid a hefty price for this complexity: routers were bloated, power 28 | hungry, unreliable, hard to secure, and crazy expensive. Worst of all, 29 | they were hard to improve (ISPs needed to beg equipment vendors to add 30 | new capabilities) and it was impossible for an ISP to add their own 31 | new features. Network owners complained of a “stranglehold” by the 32 | router vendors, and the research community warned that the Internet 33 | was “ossified.” 34 | 35 | This book is the story of what happened next, and it’s an exciting 36 | one. Larry, Carmelo, Brian, Thomas and Bruce capture clearly, through 37 | concrete examples and open-source code: How those who own and operate 38 | big networks started to write their own code and build their own 39 | switches and routers. Some chose to replace routers with homegrown 40 | devices that were simpler and easier to maintain; others chose to move 41 | the software off the router to a remote, centralized control plane. 42 | Whichever path they chose, open-source became a bigger and bigger 43 | part. Once open-source had proved itself in Linux, Apache, Mozilla and 44 | Kubernetes, it was ready to be trusted to run our networks too. 45 | 46 | This book explains why the SDN movement happened. It was essentially 47 | about a change in control: the owners and operators of big networks 48 | took control of how their networks work, grabbing the keys to 49 | innovation from the equipment vendors. It started with data center 50 | companies because they couldn't build big-enough scale-out networks 51 | using off-the-shelf networking equipment. So they bought switching 52 | chips and wrote the software themselves. Yes, it saved them money 53 | (often reducing the cost by a factor of five or more), but it was 54 | control they were after. They employed armies of software engineers to 55 | ignite a Cambrian explosion of new ideas in networking, making their 56 | networks more reliable, quicker to fix, and with better control over 57 | their traffic. Today, in 2021, all of the large data center companies 58 | build their own networking equipment: they download and modify 59 | open-source control software, or they write or commission software to 60 | control their networks. They have taken control. The ISPs and 5G 61 | operators are next. Within a decade, expect enterprise and campus 62 | networks to run on open-source control software, managed from the 63 | cloud. This is a good change, because only those who own and operate 64 | networks at scale know how to do it best. 65 | 66 | This change—a revolution in how networks are built, towards homegrown 67 | software developed and maintained by the network operator—is called 68 | Software Defined Networking (SDN). The authors have been part of this 69 | revolution since the very beginning, and have captured how and why it 70 | came about. 71 | 72 | They also help us see what future networks will be like. Rather than 73 | being built by plugging together a bunch of boxes running standardized 74 | interoperability protocols, a network system will be a platform we can 75 | program ourselves. The network owner will decide how the network works 76 | by programming whatever behavior they wish. Students of networking 77 | will learn how to programme a distributed system, rather than study 78 | the arcane details of legacy protocols. 79 | 80 | For anyone interested in programming, networks just got interesting 81 | again. And this book is an excellent place to start. 82 | 83 | | Nick McKeown 84 | | Stanford, California 85 | 86 | 87 | -------------------------------------------------------------------------------- /future.rst: -------------------------------------------------------------------------------- 1 | Chapter 10: Future of SDN 2 | =========================== 3 | 4 | It is still early days for SDN. Cloud-hosted control planes are being 5 | deployed in production networks, but we are only just starting to see 6 | SDN being applied to access networks and programmable pipelines being 7 | used to introduce new data plane functionality. Enterprises have 8 | adopted network virtualization and SD-WAN to varying degrees, but 9 | there are still a lot more traditional networks than software-defined 10 | ones. 11 | 12 | As the technology matures and the APIs stabilize we expect to 13 | see increased adoption of the use cases discussed earlier, but it may 14 | be new use cases still on the horizon that have the biggest impact on 15 | the role SDN eventually plays. Indeed, the ability to support 16 | capabilities that were impossible in traditional networks is a great 17 | part of the promise of SDN. 18 | 19 | This chapter looks at a promising opportunity, revolving around the 20 | verification of correctness. Networks are notoriously difficult to make 21 | verifiably robust and secure against failures, attacks, and 22 | configuration mistakes. While network verification has been a field of 23 | interest for several years, the lack of clear abstractions in 24 | networking has limited the progress that can be made. Most networks are still built 25 | using closed/proprietary software and complex/fixed-function hardware, 26 | whose correctness is hard to prove and whose design has unknown 27 | provenance. The distributed algorithms that determine how networks 28 | operate are notoriously difficult to reason about, with BGP being a 29 | classic example of a protocol whose failure modes have kept 30 | researchers and practitioners occupied for decades. 31 | 32 | The emergence of 5G networks and applications will only exacerbate the 33 | situation. 5G networks will connect not only smart phones and people, 34 | but also everything from doorbells, to lights, refrigerators, 35 | self-driving cars, and drones. If we cannot correctly configure and 36 | secure these networks, the risk of cyber disasters is much worse than 37 | anything experienced to date. 38 | 39 | A critical capability for a reliable and secure Internet is verifiability: 40 | the ability to ensure that every packet in the network follows an 41 | operator-specified path and encounters only a set of forwarding rules 42 | within every device that the operator intended. Nothing more and 43 | nothing less. 44 | 45 | Experience has shown that verification works best in settings where 46 | the overall system is constructed in a compositional (i.e., 47 | disaggregated) manner. Being able to reason about small pieces makes 48 | verification tractable, and the reasoning needed to stitch the 49 | components together into the composite system can also lead to 50 | insights. With disaggregation as the foundation, verifiability follows 51 | from (a) the ability to state intent *at the network level* rather 52 | than at the box level, and (b) the ability to observe 53 | behavior at fine granularity and in real-time. This is exactly the 54 | value SDN brings to the table, which leads to optimism that 55 | *verifiable closed-loop control* is now within reach. 56 | 57 | 58 | 59 | .. _reading_pronto: 60 | .. admonition:: Further Reading 61 | 62 | N. Foster, et. al. `Using Deep Programmability to Put Network 63 | Owners in Control 64 | `__. 65 | ACM SIGCOMM Computer Communication Review, October 2020. 66 | 67 | :numref:`Figure %s ` illustrates the basic idea. The 68 | software stack described in this book is augmented with the 69 | measurement, code generation, and verification elements needed for 70 | verifiable closed-loop control. Fine-grained measurements can be 71 | implemented using INT (Inband Network Telemetry), which allows every 72 | packet to be stamped by the forwarding elements to indicate the path 73 | it took, the queuing delay it experienced, and the rules it matched. 74 | These measurements can then be analyzed and fed back into code 75 | generation and formal verification tools. This closed loop complements 76 | the intrinsic value of disaggregation, which makes it possible to 77 | reason about correctness-by-construction. 78 | 79 | .. _fig-closed-loop: 80 | .. figure:: figures/Slide30.png 81 | :width: 600px 82 | :align: center 83 | 84 | INT generates fine-grain measurements, which in turn feed a closed 85 | control loop that verifies the network’s behavior. 86 | 87 | .. sidebar:: Top-Down Verification 88 | 89 | *The approach to verifying networks described in this chapter is 90 | similar to the one used in chip design. At the top is a behavioral 91 | model; then at the register-transfer level is a Verilog or VHDL 92 | model; and eventually at the bottom are transistors, polygons and 93 | metal. Tools are used to formally verify correctness across each 94 | boundary and abstraction level.* 95 | 96 | *This is a model for what we are talking about here: Verifying 97 | across boundaries in a top-down design approach. This is made 98 | possible by the new SDN interfaces and abstractions defined by the 99 | software stack, which extends all the way to the programmable 100 | forwarding pipelines provided by the switching chip.* 101 | 102 | *As experience with hardware verification demonstrates, this 103 | approach works best in composed systems, where each minimal 104 | component can be verified or reliably tested on its own. Formal 105 | tools are then applied as components are composed at layer 106 | boundaries.* 107 | 108 | The goal is to enable network operators to specify a network’s 109 | behavior top-down, and then verifying the correctness across each 110 | interface. At the lowest level, P4 programs specify how packets are 111 | processed; these programs are compiled to run on the forwarding plane 112 | elements. Such an approach represents a fundamental new capability 113 | that has not been possible in conventional designs, based on two key 114 | insights. 115 | 116 | First, while network control planes are inherently complicated, a P4 117 | data plane captures *ground truth* for the network—i.e., how it 118 | forwards packets—and is therefore an attractive platform for deploying 119 | verification technologies. By observing and then validating behavior 120 | at the data plane level, it is possible to reduce the trusted 121 | computing base: the switch operating system, driver, and other 122 | low-level components do not need to be trusted. Moreover, whereas the 123 | control plane tends to be written in a general-purpose language and is 124 | correspondingly complex, the data plane is necessarily simple: it is 125 | ultimately compiled to an efficient, feed-forward pipeline 126 | architecture with simple data types and limited state. While verifying 127 | general-purpose software is impossible in the general case, data plane 128 | verification is both powerful and practical. 129 | 130 | This claim of practicality is grounded in the current 131 | state-of-the-art. Once the forwarding behavior is defined and known, 132 | then forwarding table state defines forwarding behavior. For example, 133 | if everything is known to be IPv4-forwarded, then the forwarding table 134 | state in all routers is enough to define the network behavior. This 135 | idea has been reduced to practice by techniques like Veriflow and 136 | Header Space Analysis (HSA), and is now available commercially. 137 | Knowing that this state is enough to verify networks with fixed 138 | forwarding behavior means that we are "merely" adding one new 139 | degree-of-freedom: allowing the network operator to program the 140 | forwarding behavior (and evolve it over time) using P4. The use of P4 141 | to program the data plane is key: the language carefully excludes 142 | features such as loops and pointer-based data structures, which 143 | typically make analysis impractical. To read more about the 144 | opportunity, we recommend a paper by Jed Liu and colleagues. 145 | 146 | .. _reading_p4: 147 | .. admonition:: Further Reading 148 | 149 | J. Liu, et. al. `p4v: Practical Verification for Programmable Data Planes 150 | `__. ACM 151 | SIGCOMM 2018. 152 | 153 | The second insight is that, in addition to building tools for 154 | analyzing network programs, it is important also to develop 155 | technologies that map the high-level intent of the network operator to 156 | code that implements that intent. One of the challenges of current 157 | approaches to network verification is that they take existing network 158 | equipment, with their complex distributed control planes, as their 159 | starting point, and build mathematical models of how those 160 | control planes behave. If the reality doesn't precisely match the 161 | model, then verification won't ensure that the network behaves as 162 | required. But with the centralized control model of SDN, 163 | the control plane is designed to map a centrally specified request 164 | into a set of control directives that can be implemented in the data 165 | plane. And we are starting to see systems in which the SDN control 166 | plane itself is compiled from a high level specification of its 167 | desired properties. Thus we can hope to see control planes that are 168 | correct by construction, rather than trying to build models that 169 | accurately capture the behavior of historically hard-to-analyze 170 | systems like BGP.\ [#]_ 171 | 172 | .. [#] It's hard to imagine BGP ever going away entirely for 173 | interdomain routing, but at least for the large set of intradomain use 174 | cases the chance to design for verifiability seems possible. 175 | 176 | .. _fig-phase3: 177 | .. figure:: figures/Slide37.png 178 | :width: 600px 179 | :align: center 180 | 181 | Projecting into the future, with Phase 3 of SDN focusing on 182 | verifiable, top-down control of network behavior. 183 | 184 | To put this all in an historical context, :numref:`Figure %s 185 | ` illustrates a view of three phases of SDN. It is fair to 186 | say that we are in the early stages of phase 2, where the most 187 | advanced operators have been able to take control of their software, 188 | via disaggregated control planes, and of their packet processing, via 189 | P4-programmable data planes. We see an emerging third phase, during which 190 | verifiable closed loop control will empower network operators to take 191 | full ownership of the software that defines their networks. Not only 192 | will they be able to determine the behavior of their networks through 193 | software, but they will be able to provide that the network is 194 | implementing their intent. Just as the hardware industry has developed 195 | high confidence that chips will work as intended before they go into 196 | manufacturing, network operators will have confidence that their 197 | networks are reliable, secure, and meeting their specified 198 | objectives. 199 | 200 | 201 | -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | .. image:: _static/SystemsApproachLogoURL.png 2 | :width: 300px 3 | :align: center 4 | :target: https://systemsapproach.org 5 | 6 | | 7 | 8 | Software-Defined Networks: A Systems Approach 9 | ============================================= 10 | 11 | Peterson, Cascone, O’Connor, Vachuska, and Davie 12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | | 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | :caption: Table of Contents 19 | 20 | foreword.rst 21 | preface.rst 22 | intro.rst 23 | uses.rst 24 | arch.rst 25 | switch.rst 26 | stratum.rst 27 | onos.rst 28 | trellis.rst 29 | netvirt.rst 30 | access.rst 31 | future.rst 32 | exercises.rst 33 | README.rst 34 | authors.rst 35 | latest.rst 36 | print.rst 37 | -------------------------------------------------------------------------------- /latest.rst: -------------------------------------------------------------------------------- 1 | .. role:: pop 2 | 3 | :pop:`Read the Latest!` 4 | ======================== 5 | 6 | `Systems Approach Newsletter: 7 | `__ Stay up to date with the 8 | latest developments by subscribing to the `Systems Approach Newsletter 9 | `__, where the authors 10 | connect the concepts and lessons in this book to what’s happening in 11 | the Internet today. 12 | 13 | `Book Series: `__ Also check out 14 | our companion books that cover emerging topics in more depth. 15 | 16 | * `What We Talk About When We Talk About Systems: Essays on the 17 | Systems Approach `__ 18 | 19 | * `Private 5G: A Systems Approach `__ 20 | 21 | * `Edge Cloud Operations: A Systems Approach 22 | `__ 23 | 24 | * `TCP Congestion Control: A Systems Approach `__ 25 | 26 | -------------------------------------------------------------------------------- /preface.rst: -------------------------------------------------------------------------------- 1 | Preface 2 | ======= 3 | 4 | The Internet is in the midst of a transformation, one that moves away 5 | from bundled proprietary devices, and instead embraces disaggregating 6 | network hardware (which becomes commodity) from the software that 7 | controls it (which scales in the cloud). The transformation is 8 | generally known as *Software-Defined Networking (SDN)*, but because it 9 | is disrupting the marketplace, it is challenging to untangle business 10 | positioning from technical fundamentals, from short-term engineering 11 | decisions. This book provides such an untangling, where the most 12 | important thing we hope readers take away is an understanding of an 13 | SDN-based network as a scalable distributed system running on 14 | commodity hardware. 15 | 16 | Anyone who has taken an introductory networking class recognizes the 17 | protocol stack as the canonical framework for describing the 18 | network. Whether that stack has seven layers or just three, it shapes 19 | and constrains the way we think about computer networks. Textbooks are 20 | organized accordingly. SDN suggests an alternative world-view, one 21 | that comes with a new software stack. This book is organized around 22 | that new stack, with the goal of presenting a top-to-bottom tour of 23 | SDN without leaving any significant gaps that the reader might suspect 24 | can only be filled with magic or proprietary code. *We invite you do 25 | the hands-on programming exercises included at the end of the book to 26 | prove to yourself that the software stack is both real and complete.* 27 | 28 | An important aspect of meeting this goal is to use open source. We do 29 | this in large part by taking advantage of two community-based 30 | organizations that are leading the way. One is the *Open Compute 31 | Project (OCP)*, which is actively specifying and certifying commodity 32 | hardware (e.g., bare-metal switches) upon which the SDN software stack 33 | runs. The second is the *Open Networking Foundation (ONF)*, which is 34 | actively implementing a suite of software components that can be 35 | integrated into an end-to-end solution. There are many other players 36 | in this space—from incumbent vendors to network operators, startups, 37 | standards bodies, and other open source projects—each offering varied 38 | interpretations of what SDN *is* and *is not*. We discuss these other 39 | perspectives and explain how they fit into the larger scheme of 40 | things, but we do not let them deter us from describing the full 41 | breadth of SDN. Only time will tell where the SDN journey takes us, 42 | but we believe it is important to understand the scope of the 43 | opportunity. 44 | 45 | This book assumes a general understanding of the Internet, although a 46 | deeper appreciation for the role switches and routers play forwarding 47 | ethernet frames and IP packets is helpful. Links to related background 48 | information are included to help bridge any gaps. 49 | 50 | This book is a work-in-progress, with updates, clarifications, and new 51 | material added continuously. For example, the latest version (v2.0) 52 | includes new chapters on *Network Virtualization* (Chapter 8) and 53 | *Access Networks* (Chapter 9). We are eager to hear your feedback and 54 | suggestions on how we can continue to improve this book. 55 | 56 | Acknowledgements 57 | ---------------- 58 | 59 | The software described in this book is due to the hard work of the ONF 60 | engineering team and the open source community that works with 61 | them. We acknowledge their contributions, with a special thank-you to 62 | Yi Tseng, Max Pudelko, and Charles Chan for their contributions to the 63 | tutorials that this book includes as hands-on exercises. We also thank 64 | Charles Chan, Jennifer Rexford, Nick McKeown, Kentaro Ebisawa, 65 | Motonori Shindo, and Sina Ebrahimi for their comments and feedback. 66 | 67 | .. To include in epub and printed versions 68 | 69 | The cover photo of the Ueno Station (Tokyo) is 70 | by `Athena Lam `__ 71 | on `Unsplash `__. 72 | 73 | | Larry Peterson, Carmelo Cascone, Brian O'Connor, Thomas Vachuska, and Bruce Davie 74 | | November 2021 75 | 76 | -------------------------------------------------------------------------------- /print.rst: -------------------------------------------------------------------------------- 1 | .. role:: pop 2 | 3 | :pop:`Print Copies` 4 | =========================== 5 | 6 | We make all books in the *Systems Approach* series available as both 7 | print and e-books. This book is available from various online 8 | booksellers: `Software-Defined Networks: A Systems Approach `__ 9 | 10 | `Book Series: `__ Also check out 11 | our companion books that cover networking and emerging topics in more 12 | depth. 13 | 14 | * `What We Talk About When We Talk About Systems: Essays on the 15 | Systems Approach `__ 16 | 17 | * `Private 5G: A Systems Approach `__ 18 | 19 | * `Edge Cloud Operations: A Systems Approach 20 | `__ 21 | 22 | * `TCP Congestion Control: A Systems Approach `__ 23 | 24 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx~=6.2.1 2 | sphinxcontrib-spelling~=7.3.2 3 | sphinx-rtd-theme~=3.0.2 4 | doc8~=1.1.2 5 | -------------------------------------------------------------------------------- /stratum.rst: -------------------------------------------------------------------------------- 1 | Chapter 5: Switch OS 2 | ====================== 3 | 4 | This chapter describes the operating system running on every bare-metal 5 | switch. 6 | A good mental model is to think of this as analogous to a server OS: 7 | there is a general-purpose processor running a 8 | Linux-based OS, plus a “packet forwarding accelerator” similar in 9 | spirit to a GPU. 10 | 11 | The most common foundation for this Switch OS is *Open Network Linux 12 | (ONL)*, an open source project of the Open Compute Project. ONL starts 13 | with the Debian distribution of Linux, and augments it with support 14 | for hardware that is unique to switches, including the *Small 15 | Form-factor Pluggable (SFP)* interface module shown in :numref:`Figure 16 | %s `. 17 | 18 | This chapter does not go into these low-level device driver details, 19 | but instead focuses on the *Northbound Interface (NBI)* exported by 20 | the Switch OS to the control plane, whether that control plane runs 21 | on-switch (as a program running in user space on top of the Switch OS) 22 | or off-switch (as an SDN controller like ONOS). And as introduced in 23 | Chapter 3, we will use Stratum as our concrete example of the layer of 24 | software that implements this NBI on top of ONL. Stratum is sometimes 25 | called a *Thin Switch OS*, where the operative word is “thin” because 26 | it essentially implements an API shim. What’s interesting about the 27 | shim is the set of APIs it supports, and correspondingly, the vast 28 | majority of this chapter focuses on those APIs. 29 | 30 | 5.1 Thin Switch OS 31 | --------------------- 32 | 33 | This section describes the set of components that implement an 34 | SDN-ready Northbound Interface for the Switch OS running on a 35 | bare-metal switch. The details are drawn from Stratum, an open source 36 | project at the ONF that started with production-quality code made 37 | available by Google. :numref:`Figure %s ` gives a 38 | high-level overview of Stratum, and to re-emphasize, it’s the exposed 39 | interfaces—P4Runtime, gNMI, and gNOI—that are the important takeaways 40 | of this chapter. We show these few implementation details in this 41 | section only as a way of grounding the description of an end-to-end 42 | workflow for developers implementing SDN-based solutions. 43 | 44 | .. _fig-stratum: 45 | .. figure:: figures/Slide17.png 46 | :width: 500px 47 | :align: center 48 | 49 | High-level schematic of Stratum, a Thin Switch OS running on top 50 | of Open Networking Linux. 51 | 52 | Stratum exports three primary Northbound Interfaces: (1) P4Runtime is 53 | used to control the switch’s forwarding behavior; (2) gNMI is used to 54 | configure the switch; and (3) gNOI is used to access other operational 55 | variables on the switch. All three interfaces are gRPC services (not 56 | shown), which implies there is a corresponding set of *Protocol 57 | Buffers (protobufs)* that specify the API methods and supported 58 | parameters of each. A tutorial on gRPC and protobufs is beyond the 59 | scope of this book, but a brief introduction to both can be found 60 | online. 61 | 62 | .. _reading_grpc: 63 | .. admonition:: Further Reading 64 | 65 | `gRPC 66 | `__. 67 | *Computer Networks: A Systems Approach*, 2020. 68 | 69 | `Protocol Buffers 70 | `__. 71 | *Computer Networks: A Systems Approach*, 2020. 72 | 73 | The important take-away is that by using protobufs and gRPC, Stratum 74 | need not be concerned with the long list of formatting, reliability, 75 | backward compatibility, and security challenges that protocols 76 | (including OpenFlow) have historically spent a great deal of time 77 | worrying about. In addition, protobufs serve as a well-defined target 78 | for the code generated by the P4 compiler, which is to say, the P4 79 | toolchain outputs protobufs that specify the types and arguments for 80 | the P4Runtime interface. This API, along with the client- and 81 | server-side stubs that implement it, are (mostly) auto-generated. 82 | Section 5.2 describes the toolchain for creating this 83 | runtime contract in more detail. 84 | 85 | Below Stratum, the architecture takes advantage of two components. The 86 | first is a *Software Development Kit (SDK)* for the on-board switching 87 | chip(s). These are provided by the switch vendor, and in the case of 88 | Broadcom, it roughly corresponds to the OF-DPA layer described in 89 | Section 4.5. Barefoot provides a similar SDK for their Tofino 90 | chip. You can think of these SDKs as similar to device drivers in a 91 | traditional OS: they are used to indirectly read and write memory 92 | locations on the corresponding chip. The second is the *ONL Platform 93 | (ONLP)*, which exports the Platform API shown in :numref:`Figure %s 94 | `. This API provides access to hardware counters, monitors, 95 | status variables, and so on. 96 | 97 | As a simple example, which helps illustrate the fundamental difference 98 | between fixed-function and programmable pipelines, Broadcom's SDK 99 | defines a ``bcm_l3_route_create`` method to update the L3 forwarding 100 | table, whereas Barefoot's corresponding pipeline-independent method is 101 | ``bf_table_write``. 102 | 103 | Internal to Stratum, the rest of the components shown in 104 | :numref:`Figure %s ` are primarily designed to make 105 | Stratum vendor-agnostic. In the case of a programmable chip like 106 | Tofino, Stratum is largely pass-through: P4Runtime calls that come 107 | from above are directly passed through to the Barefoot SDK. In the 108 | case of a fixed-function chip like Tomahawk, Stratum maintains the 109 | runtime state it needs to translate the P4Runtime calls into their 110 | Broadcom SDK counterpart. To a first approximation, this implies 111 | mapping P4Runtime calls in 112 | ``switch.p4`` (Section 4.5.1) into Broadcom SDK calls. For example, a 113 | P4Runtime call to update table entries in a program like ``switch.p4`` 114 | (Section 4.5.1) would be mapped into a Broadcom SDK call to update 115 | entries in one of the ASIC tables. 116 | 117 | 5.2 P4Runtime 118 | ----------------- 119 | 120 | You can think of the P4Runtime interface shown in :numref:`Figure %s 121 | ` as the server-side RPC stub for controlling the 122 | switch. There is a corresponding client-side stub, which is similarly 123 | included in the SDN Controller. Together, they implement the 124 | *P4Runtime Contract* between the controller and the switch. The 125 | toolchain for generating this contract is shown in :numref:`Figure %s 126 | `, where as in earlier figures, we represent the 127 | original P4 forwarding program as an abstract graph rather than with 128 | actual P4 source code. 129 | 130 | .. _fig-toolchain: 131 | .. figure:: figures/Slide18.png 132 | :width: 600px 133 | :align: center 134 | 135 | P4 toolchain achieves ASIC-independence and auto-generates 136 | P4Runtime Contract (represented as a Protocol Buffer 137 | specification). 138 | 139 | One key takeaway from :numref:`Figure %s ` is that the 140 | P4 compiler generates both the binary that is loaded into each 141 | switching chip, and the *runtime interface* used to control the 142 | switching chip (indirectly via the Switch OS).\ [#]_ The compiler does 143 | this with the help of a vendor-specific backend, where :numref:`Figure 144 | %s ` shows two possible examples. Note that these 145 | vendor-specific backends have to be written for a specific 146 | architecture model (as defined by ``arch.p4`` in this example). In 147 | other words, today, it is a combination of the P4 language, the 148 | ASIC-specific backend, and the architecture model that defines the 149 | programming environment for injecting functionality into the data 150 | plane. 151 | 152 | .. [#] When we say the binary is loaded into the switching chip, we 153 | are adopting familiar terminology from general-purpose 154 | processors. The exact process is ASIC-specific, and might 155 | include initializing various on-chip tables via the SDK. 156 | 157 | The final piece of the end-to-end story is the connection between the 158 | runtime contract and the original program loaded into the data 159 | plane. Using the simple forwarding program presented in Section 4.4 as 160 | an example, we see that ``forward.p4`` defines a lookup table, which 161 | we restate here: 162 | 163 | .. literalinclude:: code/table.p4 164 | 165 | Correspondingly, the file ``forward.p4info`` output by the compiler 166 | *specifies* the P4Runtime Contract. As shown in the following example, 167 | it contains enough information to fully inform both the controller and 168 | switch on how to format and interpret the set of gRPC methods needed 169 | to insert, read, modify, and delete entries in this table. For 170 | example, the ``table`` definition identifies the field to match 171 | (``hdr.ipv4.dstAddr``) and the type of match (``LPM``), along with the 172 | three possible ``actions``. 173 | 174 | .. literalinclude:: code/actions.p4 175 | 176 | The gRPC toolchain takes over from there. For this to work, the 177 | toolchain must be aware of which P4 language elements are 178 | controllable, and hence, available to be “exposed” by 179 | ``p4runtime.proto``. Such information is contained in 180 | ``forward.p4info``, which specifies exactly the set of controllable 181 | elements and their attributes as defined in the source P4 program.\ [#]_ 182 | The table element is one obvious example, but there are 183 | others, including ``counters`` and ``meters``, which are used to 184 | report status information up to the controller and to allow the 185 | controller to specify a QoS rate, respectively. However, neither of 186 | these are included in our example program. 187 | 188 | .. [#] In principle, this P4Info file is not strictly required, as the 189 | controller and switch could use the source P4 program to derive all 190 | the information they need to handle P4Runtime methods. However, P4Info 191 | makes that much easier by extracting the relevant information from the 192 | P4 program and providing them in a more structured protobuf-defined 193 | format, which is straightforward to parse using a protobuf library. 194 | 195 | Finally, a controller actually writes an entry to this table. While in 196 | general this controller would run on top of ONOS, and so indirectly 197 | interact with the switch, we can look at a simpler example in which a 198 | Python program implements the controller, and writes an entry directly 199 | into the table (assisted by a P4Runtime library). 200 | 201 | .. literalinclude:: code/p4rt.py 202 | 203 | 5.3 gNMI and gNOI 204 | -------------------- 205 | 206 | A core challenge of configuring and operating any network device is to 207 | define the set of variables available for operators to ``GET`` and 208 | ``SET`` on the device, with the additional requirement that this 209 | dictionary of variables should be uniform across devices (i.e., be 210 | vendor-agnostic). The Internet has already gone through one 211 | decades-long exercise defining such a dictionary, resulting in the 212 | *Management Information Base (MIB)* used in conjunction with SNMP. But 213 | the MIB was more focused on *reading* device status variables than 214 | *writing* device configuration variables, where the latter has 215 | historically been done using the device’s *Command Line Interface 216 | (CLI)*. One consequence of the SDN transformation is to nudge the 217 | industry towards support for programmatic configuration APIs. This 218 | means revisiting the information model for network devices. 219 | 220 | The main technical advance that was not prevalent in the early days of 221 | SNMP and MIB is the availability of pragmatic modeling languages, 222 | where YANG is the leading choice to have emerged over the last few 223 | years. YANG—which stands for *Yet Another Next Generation*, a name 224 | chosen to poke fun at how often a do-over proves necessary—can be 225 | viewed as a restricted version of XSD, which is a language for 226 | defining a schema for XML. YANG defines the structure of the data, but 227 | unlike XSD, it is not XML-specific. Instead, YANG can be used in 228 | conjunction with different over-the-wire message formats, including 229 | XML, but also protobufs and JSON. If these acronyms are unfamiliar, or 230 | the distinction between a markup language and a schema for a markup 231 | language is fuzzy, a gentle introduction is available online. 232 | 233 | .. _reading_xml: 234 | .. admonition:: Further Reading 235 | 236 | `Markup Languages (XML) 237 | `__. 238 | *Computer Networks: A Systems Approach*, 2020. 239 | 240 | What’s important about going in this direction is that the data model 241 | that defines the semantics of the variables available to be read and 242 | written is available in a programmatic form; it’s not just text in a 243 | standards document. Moreover, while it is true that all hardware 244 | vendors promote the unique capabilities of their products, it is not a 245 | free-for-all, with each vendor defining a unique model. This is because 246 | the network operators that buy network hardware have a strong 247 | incentive to drive the models for similar devices towards convergence, 248 | and vendors have an equally strong incentive to adhere to those 249 | models. YANG makes the process of creating, using, and modifying 250 | models programmable and hence, adaptable to this iterative process. 251 | 252 | .. sidebar:: Cloud Best Practices 253 | 254 | *Our commentary on OpenConfig vs. NETCONF is grounded in a 255 | fundamental tenet of SDN, which is about bringing best 256 | practices in cloud computing to the network. It involves big 257 | ideas like implementing the network control plane as a 258 | scalable cloud service, but it also includes more narrow 259 | benefits, such as using modern messaging frameworks like 260 | gRPC and protobufs.* 261 | 262 | *The advantages in this particular case are tangible: (1) 263 | improved and optimized transport using HTTP/2 and 264 | protobuf-based marshalling instead of SSH plus hand-coded 265 | marshalling; (2) binary data encodings instead of text-based 266 | encoding; (3) diff-oriented data exchange instead of 267 | snapshot-based responses; and (4) native support for server 268 | push and client streaming.* 269 | 270 | This is where an industry-wide standardization effort, called 271 | *OpenConfig*, comes into play. OpenConfig is a group of network 272 | operators trying to drive the industry towards a common set of 273 | configuration models using YANG as its modeling language. OpenConfig 274 | is officially agnostic as to the over-the-wire protocol used to access 275 | on-device configuration and status variables, but gNMI (gRPC Network 276 | Management Interface) is one approach it is actively pursuing. And as 277 | you might guess from its name, gNMI uses gRPC (which in turn runs on 278 | top of HTTP/2). This means gNMI also adopts protobufs as the way it 279 | specifies the data actually communicated over the HTTP 280 | connection. Thus, gNMI is intended as a standard management interface 281 | for network devices. 282 | 283 | For completeness, note that NETCONF is another of the post-SNMP 284 | protocols for communicating configuration information to network 285 | devices. OpenConfig also works with NETCONF, but our current 286 | assessment is that gNMI has the weight of industry behind it as the 287 | future management protocol. For this reason, it 288 | is the one we highlight in our description of the full SDN software 289 | stack. 290 | 291 | OpenConfig defines a hierarchy of object types. For example, the YANG 292 | model for network interfaces looks like this: 293 | 294 | .. literalinclude:: code/iface.yang 295 | 296 | This is a base model that can be augmented, for example, to model an Ethernet interface: 297 | 298 | .. literalinclude:: code/eth.yang 299 | 300 | Other similar augmentations might be defined to support link 301 | aggregation, IP address assignment, VLAN tags, and so on. 302 | 303 | Each model in the OpenConfig hierarchy defines a combination of a 304 | configuration state that can be both read and written by the client 305 | (denoted ``rw`` in the examples) and an operational state that reports 306 | device status (denoted ``ro`` in the examples, indicating it is 307 | read-only from the client-side). This distinction between declarative 308 | configuration state and runtime feedback state is a fundamental aspect 309 | of any network device interface, where OpenConfig is explicitly 310 | focused on generalizing the latter to include network telemetry data 311 | the operator needs to track. 312 | 313 | Having a meaningful set of models is necessary, but a full 314 | configuration system includes other elements as well. In our case, 315 | there are three important points to make about the relationship 316 | between Stratum and the OpenConfig models. 317 | 318 | The first is that Stratum depends on a YANG toolchain. :numref:`Figure 319 | %s ` shows the steps involved in translating a set of 320 | YANG-based OpenConfig models into the client-side and server-side gRPC 321 | stubs used by gNMI. The gNMI Server shown in the figure is the same as 322 | the gNMI interface portal shown in :numref:`Figure %s 323 | `. The toolchain supports multiple target programming 324 | languages (Stratum happens to use C++), where the client and server 325 | sides of the gRPC need not be written in the same language. 326 | 327 | .. _fig-yang: 328 | .. figure:: figures/Slide25.png 329 | :width: 550px 330 | :align: center 331 | 332 | YANG toolchain used to generate gRPC-based runtime for gNMI. 333 | 334 | Keep in mind that YANG is not tied to either gRPC or gNMI. The 335 | toolchain is able to start with the very same OpenConfig models but 336 | instead produce XML or JSON representations for the data being 337 | read from or written to network devices using, for example, NETCONF or 338 | RESTCONF, respectively. But in our context, the target is protobufs, 339 | which Stratum uses to support gNMI running over gRPC. 340 | 341 | The second point is that gNMI defines a specific set of gRPC methods to 342 | operate on these models. The set is defined collectively as a Service 343 | in the protobuf specification: 344 | 345 | .. literalinclude:: code/service.proto 346 | 347 | The ``Capabilities`` method is used to retrieve the set of model 348 | definitions supported by the device. The ``Get`` and ``Set`` methods 349 | are used to read and write the corresponding variable defined in some 350 | models. The ``Subscribe`` method is used to set up a stream of 351 | telemetry updates from the device. The corresponding arguments and 352 | return values (e.g., ``GetRequest``, ``GetResponse``) are defined 353 | by a protobuf ``Message`` and include various fields from the YANG 354 | models. A given field is specified by giving its fully qualified 355 | path name in the data model tree. 356 | 357 | The third point is that Stratum does not necessarily care about the 358 | full range of OpenConfig models. This is because—as a Switch OS 359 | designed to support a centralized Controller—Stratum cares about 360 | configuring various aspects of the data plane but is not typically 361 | involved in configuring control plane protocols like BGP. Such control 362 | plane protocols are no longer implemented on the switch in an 363 | SDN-based solution (although they remain in scope for the Network OS, 364 | which implements their centralized counterpart). To be specific, 365 | Stratum tracks the following OpenConfig models: Interfaces, VLANs, 366 | QoS, and LACP (link aggregation), in addition to a set of system and 367 | platform variables (of which the switch’s fan speed is everyone’s 368 | favorite example). 369 | 370 | We conclude this section by briefly turning our attention to gNOI, but 371 | there isn't a lot to say. This is because the underlying mechanism 372 | used by gNOI is exactly the same as for gNMI, and in the larger scheme 373 | of things, there is little difference between a switch’s configuration 374 | interface and its operations interface. Generally speaking, persistent 375 | state is handled by gNMI (and a corresponding YANG model is defined), 376 | whereas clearing or setting ephemeral state is handled by gNOI. It is 377 | also the case that non-idempotent actions like reboot and ping tend to 378 | fall under gNOI's domain. In any case, the two are closely enough 379 | aligned to collectively be referred to as gNXI. 380 | 381 | As an illustrative example of what gNOI is used for, the following is 382 | the protobuf specification for the ``System`` service: 383 | 384 | .. literalinclude:: code/system.proto 385 | 386 | where, for example, the following protobuf message defines the 387 | ``RebootRequest`` parameter: 388 | 389 | .. literalinclude:: code/reboot.proto 390 | 391 | As a reminder, if you are unfamiliar with protobufs, a brief overview is available online. 392 | 393 | .. _reading_protobuf: 394 | .. admonition:: Further Reading 395 | 396 | `Protocol Buffers 397 | `__. 398 | *Computer Networks: A Systems Approach*, 2020. 399 | 400 | 5.4 SONiC 401 | ---------------- 402 | 403 | In the same way that SAI is an industry-wide switch abstraction (see 404 | Section 4.5), SONiC is a vendor-agnostic Switch OS that is gaining a 405 | lot of momentum in the industry. It was originally open-sourced by 406 | Microsoft and continues to serve as the Switch OS for the Azure Cloud. 407 | SONiC leverages SAI as a vendor-agnostic SDK and includes a 408 | switch-customized Linux distribution, which is to say, Stratum and 409 | SONiC try to fill the same need. Today their respective approaches are 410 | largely complementary, with the two open source communities working 411 | towards a “best of both worlds” solution. This effort is known as *PINS*, 412 | which stands for *P4 Integrated Network Stack.* 413 | 414 | .. _reading_pins: 415 | .. admonition:: Further Reading 416 | 417 | `PINS: P4 Integrated Network Stack 418 | `__. 419 | 420 | Both SONiC and Stratum support a configuration interface, so unifying 421 | those will be a matter of reconciling their respective data models and 422 | toolchains. The main distinction is Stratum’s support for programmable 423 | forwarding pipelines (including both P4 and P4Runtime), versus the 424 | least common denominator approach to forwarding taken by SAI. 425 | Developers on the two open-source projects are working together 426 | to define a roadmap that will make it possible for interested networks 427 | to take advantage of programmable pipelines in an incremental and 428 | low-risk way. 429 | 430 | The goal of this effort is both (1) to enable remote SDN 431 | Controllers/Apps to interact with SAI using P4Runtime and gNMI, 432 | and (2) to enable SAI extensions using P4 so as to improve feature 433 | velocity in the data plane. Both goals rely on a new representation of 434 | the SAI behavioral model and pipeline based on a P4 program (the so 435 | called ``sai.p4`` program shown in :numref:`Figure %s ` 436 | of Section 4.6). If you take one thing away from this reconciliation 437 | effort, it should be that embracing a programmable pipeline (and 438 | corresponding toolchain) is what facilitates doing something like 439 | this. 440 | -------------------------------------------------------------------------------- /trellis.rst: -------------------------------------------------------------------------------- 1 | Chapter 7: Leaf-Spine Fabric 2 | ======================================= 3 | 4 | This chapter describes a leaf-spine switching fabric implemented by a 5 | collection of control applications. We use SD-Fabric, running on ONOS, 6 | as our exemplar implementation. Various aspects of SD-Fabric were 7 | introduced in earlier chapters, so we summarize those highlights 8 | before getting into the details. 9 | 10 | * SD-Fabric supports the leaf-spine fabric topology that is commonly 11 | used to interconnect multiple racks of servers in a datacenter (see 12 | :numref:`Figure %s `), but it also supports 13 | multi-site deployments (see :numref:`Figure %s `). 14 | SD-Fabric uses only bare-metal switches, equipped with the software 15 | described in the previous chapters, to build out the fabric. It can 16 | run on a mix of fixed-function and programmable pipelines but is 17 | running in production with the former. 18 | 19 | * SD-Fabric supports a wide range of L2/L3 features, all re-implemented 20 | as SDN control apps (with the exception of a DHCP server used to 21 | relay DHCP requests and a Quagga BGP server used to exchange BGP 22 | routes with external peers). SD-Fabric implements L2 connectivity 23 | within each server rack and L3 connectivity between racks. 24 | 25 | * SD-Fabric supports access/edge networking technologies, such as PON 26 | (see :numref:`Figure %s `) and RAN (see :numref:`Figure %s 27 | `), including support for (a) routing IP traffic 28 | to/from devices connected to those access networks and (b) 29 | off-loading access network functionality into the fabric switches. 30 | 31 | This chapter does not give a comprehensive description of all of these 32 | features, but it does focus on the datacenter fabric use case, which 33 | is sufficient to illustrate the approach to building a 34 | production-grade network using SDN principles. More information about 35 | the full range of SD-Fabric design decisions is available on the SD-Fabric 36 | website. 37 | 38 | .. _reading_trellis: 39 | .. admonition:: Further Reading 40 | 41 | `SD-Fabric `__. Open 42 | Networking Foundation, 2021. 43 | 44 | 7.1 Feature Set 45 | --------------- 46 | 47 | SDN provides an opportunity to customize the network, but for 48 | pragmatic reasons, the first requirement for adoption is to reproduce 49 | functionality that already exists and do so in a way that reproduces 50 | (or improves upon) the resilience and scalability of legacy 51 | solutions. SD-Fabric has satisfied this requirement, which we 52 | summarize here. 53 | 54 | First, with respect to L2 connectivity, SD-Fabric supports VLANs, 55 | including native support for forwarding traffic based on VLAN id, 56 | along with Q-in-Q support based on an outer/inner VLAN id 57 | pair. Support for Q-in-Q is particularly relevant to access networks, 58 | where double tagging is used to isolate traffic belonging to different 59 | service classes. In addition, SD-Fabric supports L2 tunnels across the 60 | L3 fabric (both single- and double-tagged). 61 | 62 | Second, with respect to L3 connectivity, SD-Fabric supports IPv4 and 63 | IPv6 routing for both unicast and multicast addresses. For the latter, 64 | SD-Fabric implements centralized multicast tree construction (as opposed 65 | to running a protocol like PIM) but does include IGMP support for end 66 | hosts wishing to join/leave multicast groups. SD-Fabric also supports 67 | both ARP (for IPv4 address translation) and NDP (for IPv6 neighbor 68 | discovery), along with support for both DHCPv4 and DHCPv6. 69 | 70 | Third, SD-Fabric provides high availability in the face of link or 71 | switch failures. It does this through a combination of well-known 72 | techniques: dual-homing, link binding, and ECMP link groups. As 73 | illustrated in :numref:`Figure %s `, each server in an 74 | SD-Fabric cluster is connected to a pair of Top-of-Rack (ToR, or leaf) switches, where 75 | the OS running on each compute server implements active-active link 76 | bonding. Each leaf switch is then connected by a pair of links to two 77 | or more spine switches, with an ECMP group defined for the pair of 78 | links connecting each leaf to a given spine and for the set of links 79 | connecting each leaf to a set of spines. The cluster as a whole then 80 | has multiple connections to external routes, shown via leaf switches 3 81 | and 4 in the Figure. Not shown in :numref:`Figure %s ` 82 | is the fact that SD-Fabric runs on top of ONOS, which is itself 83 | replicated for the sake of availability. In a configuration like the 84 | one shown here, ONOS (and hence the SD-Fabric control applications) are 85 | replicated on three to five servers. 86 | 87 | .. _fig-netconfig: 88 | .. figure:: figures/Slide31.png 89 | :width: 450px 90 | :align: center 91 | 92 | High availability through a combination of dual-homing, link 93 | bonding, and ECMP groups. 94 | 95 | The use of link aggregation and ECMP is straightforward: the packet 96 | forwarding mechanism is augmented to load-balance outgoing packets 97 | among a group (e.g., a pair) of links (egress ports) rather than 98 | having just a single “best” output link (egress port). This both 99 | improves bandwidth and results in an automatic recovery mechanism 100 | should any single link fail. It is also the case that switch 101 | forwarding pipelines have explicit support for port groups, so once 102 | equivalences are established, they can be pushed all the way into the 103 | data plane. 104 | 105 | To be clear, ECMP is a forwarding strategy that SD-Fabric applies 106 | uniformly across all the switches in the fabric. The SD-Fabric control 107 | application knows the topology and pushes the port groups into each 108 | of the fabric switches accordingly. Each switch then applies these 109 | port groups to its forwarding pipeline, which then forwards packets 110 | across the set of ports in each group without additional control plane 111 | involvement. 112 | 113 | Fourth, with respect to scalability, SD-Fabric has demonstrated the 114 | ability to support up to 120k routes and 250k flows. This is in a 115 | configuration that includes two spine switches and eight leaf 116 | switches, the latter implying up to four racks of servers. As with 117 | availability, SD-Fabric’s ability to scale performance is directly due 118 | to ONOS’s ability to scale. 119 | 120 | 7.2 Segment Routing 121 | ------------------- 122 | 123 | The previous section focused on *what* SD-Fabric does. This section 124 | focuses on *how*. The core strategy in SD-Fabric is based on *Segment 125 | Routing (SR)*. The term “segment routing” comes from the idea that the 126 | end-to-end path between any pair of hosts can be constructed from a 127 | sequence of segments, where label-switching is used to traverse a 128 | sequence of segments along an end-to-end path. Segment routing is a 129 | general approach to source routing which can be implemented in a 130 | number of ways. In the case of SD-Fabric, segment routing leverages the 131 | forwarding plane of *Multi-Protocol Label Switching (MPLS)*, which you 132 | can read more about online. 133 | 134 | .. _reading_mpls: 135 | .. admonition:: Further Reading 136 | 137 | `Multi-Protocol Label Switching 138 | `__. *Computer 139 | Networks: A Systems Approach*, 2020. 140 | 141 | When applied to a leaf-spine fabric, there are always two segments 142 | involved: leaf-to-spine and spine-to-leaf. SD-Fabric programs the 143 | switches to match labeled or unlabeled packets and push or pop 144 | MPLS labels as needed. :numref:`Figure %s 145 | ` illustrates how SR works in SD-Fabric using a simple 146 | configuration that forwards traffic between a pair of hosts: 10.0.1.1 147 | and 10.0.2.1. In this example, the servers connected to Leaf 1 are on 148 | subnet 10.0.1/24, the servers connected to Leaf 2 are on subnet 149 | 10.0.2/24, and each of the switches has an assigned MPLS id: 101, 150 | 103, 102, and 104. 151 | 152 | .. _fig-sr: 153 | .. figure:: figures/Slide32.png 154 | :width: 550px 155 | :align: center 156 | 157 | Example of Segment Routing being used to forward traffic between a 158 | pair of hosts. 159 | 160 | When Host 1 sends a packet with destination address 10.0.2.1, it is by 161 | default forwarded to the server’s ToR/leaf switch. Leaf 1 matches the 162 | destination IP address, learns this packet needs to cross the fabric 163 | and emerge at Leaf 2 to reach subnet 10.0.2/24, and so pushes the MPLS 164 | label 102 onto the packet. Because of ECMP, Leaf 1 can forward the 165 | resulting packet to either spine, at which point that switch matches 166 | the MPLS label 102, pops the label off the header, and forwards it to 167 | Leaf 2. Finally, Leaf 2 matches the destination IP address and 168 | forwards the packet along to Host 2. 169 | 170 | What you should take away from this example is that SR is highly 171 | stylized. For a given combination of leaf and spine switches, SD-Fabric 172 | first assigns all identifiers, with each rack configured to share an 173 | IP prefix and be on the same VLAN. SD-Fabric then pre-computes the 174 | possible paths and installs the corresponding match/action rules in 175 | the underlying switches. The complexity having to do with balancing 176 | load across multiple paths is delegated to ECMP, which is similarly 177 | unaware of any end-to-end paths. From an implementation perspective, 178 | the SD-Fabric control application that implements SR passes these 179 | match/action rules to ONOS, which in turn installs them on the 180 | underlying switches. SD-Fabric also maintains its own Atomix map to 181 | manage the set of ECMP groups connecting leaf and spine switches. 182 | 183 | 7.3 Routes and Multicast 184 | ------------------------ 185 | 186 | In addition to Segment Routing, which establishes data paths between 187 | leaf switches, SD-Fabric also takes advantage of the Route and Mcast 188 | services introduced in Chapter 6. They determine which of the 189 | leaf-spine switches serve each IP prefix and where to find all the 190 | hosts connected to each multicast group, respectively. 191 | 192 | SD-Fabric does not run distributed protocols like OSPF to learn about 193 | routes or PIM to construct multicast trees. Instead, it computes the 194 | right answers based on global information and then pushes these 195 | mappings to the Route and Mcast services. This is straightforward to 196 | do because SD-Fabric imposes the simplifying constraint that each rack 197 | corresponds to exactly one IP subnet. 198 | 199 | To make this discussion more concrete, consider that all the ONOS 200 | Services described in Chapter 6 can be invoked via a RESTful API, or 201 | alternatively, through a CLI that is a thin wrapper around REST's 202 | ``GET``, ``POST``, and ``DELETE`` calls. Using the CLI to illustrate 203 | (because it is easier to read), one can query the Route service to 204 | learn the existing routes as follows: 205 | 206 | .. literalinclude:: code/onos1.txt 207 | 208 | Similarly, one can add a static route to the Route Service: 209 | 210 | .. literalinclude:: code/onos2.txt 211 | 212 | One thing to note about these examples is that there are two possible 213 | sources for routes. One is that the route is ``STATIC``, which usually 214 | means that SD-Fabric inserted it with full knowledge of the what prefix 215 | it has assigned to each rack in the cluster. (Human operators could 216 | also add a ``STATIC`` route using the CLI, but this would be an 217 | exception rather than the rule.) 218 | 219 | The second possibility is that ``FPM`` was the source. FPM (Forwarding 220 | Plane Manager) is yet 221 | another ONOS Service–one of the SD-Fabric suite of services. Its 222 | job is to learn routes from external sources, which it does by tapping 223 | into a locally running Quagga process that is configured to peer with 224 | BGP neighbors. Whenever FPM learns about an external route, it adds 225 | the corresponding prefix-to-nexthop mapping to the Route service, 226 | indicating that the destination prefix is reachable via the leaf 227 | switches that connect the fabric to upstream networks (e.g., Switches 3 228 | and 4 in :numref:`Figure %s `). 229 | 230 | The story with multicast is similar. Again using the ONOS CLI, it is 231 | possible to create a new multicast route and add a sink to it. For example: 232 | 233 | .. literalinclude:: code/onos3.txt 234 | 235 | specifies *Any-Source Multicast (ASM)* (``sAddr *``), a multicast group address 236 | (``gAddr``), the group source addresses (``srcs``), and the group sink 237 | addresses (``sinks``). A sink can then be removed as follows: 238 | 239 | .. literalinclude:: code/onos4.txt 240 | 241 | Again, there is no PIM running, but instead, SD-Fabric offers a 242 | programmatic interface for network operators to define a multicast tree 243 | through a sequence of such calls. For example, when SD-Fabric runs as 244 | part of an access network that delivers IPTV to subscribers, one 245 | option is for software running on the operator's set-top boxes to 246 | issue calls similar to those shown above (except, of course, using 247 | the RESTful API rather than the CLI). Another option is to have 248 | set-top boxes send IGMP messages, which SD-Fabric intercepts using the 249 | Packet Service (similar to how the Host service intercepts ARP and 250 | DHCP packets). So the next time you use your TV remote to change 251 | channels, you may be triggering procedure invocations up 252 | and down the SDN software stack described throughout this book! 253 | 254 | 7.4 Customized Forwarding 255 | -------------------------- 256 | 257 | SD-Fabric is an example use case for SDN. It is a set of control 258 | applications running top of a Network OS, which in turn runs on top of 259 | a collection of programmable switches arranged in a leaf-spine topology, 260 | where each switch runs a local Switch OS. In this way, SD-Fabric serves 261 | as a capstone for our bottom-up tour of the SDN software stack. 262 | 263 | But if we knew from the outset that a leaf-spine fabric supporting the 264 | SD-Fabric feature-set was exactly what we wanted, we might go back to 265 | lower layers and tailor them for that purpose. This is what has 266 | happened over time with SD-Fabric, resulting in a customized forwarding 267 | plane implemented by a P4 program called ``fabric.p4``. We conclude 268 | this chapter by giving a high-level summary of ``fabric.p4``, 269 | highlighting how its design meshes with the rest of the software 270 | stack. 271 | 272 | Before doing that, it is important to acknowledge that knowing exactly 273 | what you want from a network at the outset is an impossibly high 274 | bar. Networks evolve based on experience using and operating them. No 275 | one knew how to write ``fabric.p4`` on day one, but after iterating 276 | through a series of implementations of the other layers up-and-down 277 | the stack (including the introduction of Tofino as a programmable 278 | forwarding pipeline), ``fabric.p4`` emerged. *The point is that 279 | treating the network as a programmable platform frees you to 280 | continually and rapidly evolve it.* 281 | 282 | Said another way, we introduced ``forward.p4`` as our canonical 283 | example of "a forwarding plane customized to do exactly what we want" 284 | in Chapter 4, but then spent the rest of the chapter describing all 285 | the machinery that makes something like ``forward.p4`` possible, 286 | without ever revisiting what network-specific functionality it might 287 | actually implement. In short, ``fabric.p4`` is a specific example of 288 | ``forward.p4``, which we are only now able to describe because of how 289 | it relates to the control plane. 290 | 291 | There are three things of note about ``fabric.p4``. First, it is 292 | loosely based on the Broadcom OF-DPA pipeline, which makes sense 293 | because SD-Fabric was originally implemented on top of a set of 294 | Tomahawk-based switches. The ``fabric.p4`` pipeline is simpler than 295 | OF-DPA, as it eliminates tables that SD-Fabric does not need. This makes 296 | ``fabric.p4`` easier to control. 297 | 298 | Second, ``fabric.p4`` is designed to mimic ONOS's FlowObjective API, 299 | thereby simplifying the process of mapping FlowObjectives onto 300 | P4Runtime operations. This is best illustrated by :numref:`Figure %s 301 | `, which shows ``fabric.p4``\'s ingress pipeline. The 302 | egress pipeline is not shown, but it is a straightforward rewriting of 303 | the header fields in the common case. 304 | 305 | .. _fig-fabric: 306 | .. figure:: figures/Slide40.png 307 | :width: 500px 308 | :align: center 309 | 310 | Logical pipeline supported by ``fabric.p4``, designed to parallel 311 | the Filtering, Forwarding, and Next stages of the FlowObjective 312 | API. 313 | 314 | Third, ``fabric.p4`` is designed to be configurable, making it 315 | possible to selectively include additional functionality. This is not 316 | easy when writing code that is optimized for an ASIC-based forwarding 317 | pipeline; in practice, it makes heavy use of pre-processor 318 | conditionals (i.e., ``#ifdefs``). The code fragment shown below is the 319 | main control block of ``fabric.p4``\'s ingress function. Chapter 9 320 | discusses these optional extensions in more depth but at a high 321 | level: 322 | 323 | * **UPF (User Plane Function):** Augments IP functionality in 324 | support of 4G/5G Mobile Networks. 325 | 326 | * **BNG (Broadband Network Gateway):** Augments IP functionality in 327 | support of Fiber-to-the-Home. 328 | 329 | * **INT (Inband Network Telemetry):** Adds metric collection and 330 | telemetry output directives. 331 | 332 | .. literalinclude:: code/fabric.p4 333 | 334 | .. sidebar:: VNF Off-loading 335 | 336 | *The UPF and BNG extensions are examples of an optimization 337 | technique sometimes called VNF off-loading. VNF is an acronym 338 | for Virtual Network Function, which refers to functionality that 339 | sometimes runs as software in virtual machines. Off-loading refers 340 | to the idea of re-implementing this functionality to run in the switch 341 | forwarding pipeline rather than on a general-purpose server. This 342 | generally leads to better performance because packets can be 343 | forwarded from source to destination without having to be diverted 344 | to a server.* 345 | 346 | *Calling out functions like UPF and BNG as being an off-load 347 | "optimization" is arguably an example of selective memory. It's 348 | just as accurate to say that we've off-loaded IP to the switch 349 | since IP forwarding also sometimes runs in software on 350 | general-purpose processors. To a first approximation, UPF and BNG 351 | are just specialized IP routers, augmented with additional 352 | features unique to cellular and wireline access networks, 353 | respectively. In the grand scheme of things, networks are built 354 | from a combination of forwarding functions, and we now have more 355 | options as to what hardware chip is the most appropriate target 356 | for implementing each such function.* 357 | 358 | For example, a companion file, ``upf.p4`` (not shown), implements the 359 | forwarding plane for the UPF extension, which includes the GTP tunnel 360 | encapsulation/decapsulation required by the 3GPP cellular standard to 361 | connect the SD-Fabric fabric to the base stations of the Radio Access 362 | Network. Similarly, ``bng.p4`` (not shown) implements PPPoE 363 | termination, which is used by some Passive Optical Networks 364 | deployments to connect the SD-Fabric fabric to home routers. Finally, 365 | it is worth noting that the code fragment illustrates the basic 366 | structure of ``fabric.p4``\'s core functionality, which first applies 367 | the *filtering objective* (``filtering.apply``), then applies the 368 | *forwarding objective* (``forwarding.apply`` and ``acl.apply``), and 369 | finally applies the *next objective* (``next.apply``). 370 | 371 | In addition to selecting which extensions to include, the pre-processor 372 | also defines several constants, including the size of each logical 373 | table. Clearly, this implementation is a low-level approach to 374 | building configurable forwarding pipelines. Designing higher-level 375 | language constructs for composition, including the ability to 376 | dynamically add functions to the pipeline at runtime, is a subject of 377 | ongoing research. 378 | -------------------------------------------------------------------------------- /uses.rst: -------------------------------------------------------------------------------- 1 | Chapter 2: Use Cases 2 | ====================== 3 | 4 | A good way to understand the value of SDN is to look at how it is used 5 | in practice. Doing so also helps explain the different perspectives 6 | on what SDN means, corresponding to what we refer to as “pure play” 7 | versus “hybrid/lite” Software-Defined Networking in the previous 8 | chapter. But before getting into *how* SDN is used, we start by first 9 | summarizing *who* is using it. 10 | 11 | First, SDN has been embraced and widely deployed by cloud providers, 12 | with Google, Facebook, and Microsoft being the most public about 13 | adoption. While their platforms and solutions are still mostly 14 | proprietary, they have open sourced individual components in an effort 15 | to catalyze wider adoption. We discuss these individual components in 16 | later chapters. 17 | 18 | Second, large network operators like AT&T, DT, NTT, and Comcast 19 | publicly talk about their plans to deploy SDN-based 20 | solutions—especially in their access networks—but they are proceeding 21 | cautiously, with most of their initiatives either using hybrid 22 | approaches, or in the case of pure play SDN, just starting to go into 23 | production. Of particular note is Comcast, which has deployed the open 24 | source components described in this book throughout their production 25 | network. 26 | 27 | Finally, enterprises have begun to adopt SDN, but there are two things 28 | to note about this situation. One is that while pure play SDN is 29 | deployed in some Universities, with the goal of supporting research 30 | and innovation, adoption is slower for enterprises in general. The 31 | most likely path-to-adoption for pure play SDN by enterprises is via 32 | managed edge services offered by cloud providers. The idea is to 33 | connect on-premise clusters running edge workloads with public clouds 34 | running scalable datacenter workloads. The second is that many 35 | enterprise vendors offer SDN products, where the focus has been more 36 | on the benefits of logical control plane centralization rather than 37 | open interfaces to the data plane. Network virtualization and SD-WAN 38 | (software-defined wide area networks) have both had considerable 39 | success in the enterprise, as discussed below. 40 | 41 | 2.1 Network Virtualization 42 | --------------------------- 43 | 44 | The first widely-adopted use case for SDN was to virtualize the 45 | network. Virtual networks, including both *Virtual Private Networks 46 | (VPNs)* and *Virtual Local Area Networks (VLANs)*, have been a part of 47 | the Internet for years. VLANs have historically proven useful within 48 | enterprises, where they are used to isolate different organizational 49 | groups, such as departments or labs, giving each of them the 50 | appearance of having their own private LAN. However, these early forms 51 | of virtualization were quite limited in scope and lacked many of the 52 | advantages of SDN. You could think of them as virtualizing the address 53 | space of a network but not all its other properties, such as firewall 54 | policies or higher-level network services like load balancing. 55 | 56 | The original idea behind using SDN to create virtual networks is 57 | widely credited to the team at Nicira, whose approach is described 58 | in an NSDI paper by Teemu Koponen and colleagues. The key insight was 59 | that modern clouds required networks that could be programmatically 60 | created, managed, and torn down, without a sysadmin having to manually 61 | configure, say, VLAN tags on some number of network switches. By 62 | separating the control plane from the data plane, and logically 63 | centralizing the control plane, it became possible to expose a single 64 | API entry point for the creation, modification, and deletion of 65 | virtual networks. This meant that the same automation systems that 66 | were being used to provision compute and storage capacity in a cloud 67 | (such as OpenStack at the time) could now programmatically provision a 68 | virtual network with appropriate policies to interconnect those other 69 | resources. 70 | 71 | .. admonition:: Further Reading 72 | 73 | T. Koponen et al. `Network Virtualization in Multi-tenant 74 | Datacenters 75 | `__. 76 | NSDI, April, 2014. 77 | 78 | The rise of network virtualization followed by several years the rise 79 | of compute virtualization, and was very much enabled by it. Compute 80 | virtualization made manual server provisioning a thing of the past, 81 | and exposed the manual and time-consuming processes of network 82 | configuration as the "long pole" in delivering a cloud 83 | service. Virtual machine migration, which enabled running VMs to move from one 84 | network location to another (taking their IP addresses with them), 85 | further exposed the limitations of manual network configuration. This 86 | need to automate network provisioning was first recognized by large 87 | cloud providers but eventually became mainstream in enterprise 88 | datacenters. 89 | 90 | As microservices and container-based systems such as Kubernetes have 91 | gained in popularity, network virtualization has continued to evolve 92 | to meet the needs of these environments. There are a range of open 93 | source network "plugins" (Calico, Flannel, Antrea, 94 | etc.) that provide network virtualization services for Kubernetes. 95 | 96 | Because network virtualization set out to deliver a full set of 97 | network services in a programmatic way, its impact went beyond the 98 | simplification and automation of network provisioning. As virtual 99 | networks became lightweight objects, created and destroyed as needed, 100 | with a full set of services (such as stateful firewalling, deep-packet 101 | inspection, and so on), a new approach to network security was 102 | enabled. Rather than adding security features after the network was 103 | created, security features could be created as an inherent part of the 104 | network itself. Furthermore, with no limit on how many virtual 105 | networks could be created, as approach known as *microsegmentation* 106 | took hold. This entails the creation of fine-grained, isolated 107 | networks (microsegments) specific to the needs of, say, a group of 108 | processes implementing a single distributed application. 109 | Microsegmentation offers clear benefits over prior approaches to 110 | network security, dramatically reducing the attack surface and the 111 | impact of attacks spreading throughout an enterprise or data center. 112 | 113 | .. sidebar:: Bringing SDN to Life 114 | 115 | *As we saw in Chapter 1, the ideas behind SDN had been in the 116 | works for years, but there were two related events 117 | that, looking back, had a significant impact in bringing the 118 | concept of programmable networks from theory to practice. First 119 | was the 2007 founding of the commercial startup Nicira 120 | Networks. Nicira was founded by three of the acknowledged 121 | pioneers of SDN: Martin Casado, Scott Shenker, and Nick 122 | McKeown. While Nicira was founded to make commercial use of 123 | SDN, as with many startups, it took a while to find the ideal 124 | product for the marketplace. In the end, it was Network 125 | Virtualization that became the industry's first successful 126 | application of SDN. Nicira's network virtualization platform 127 | first shipped in 2011, establishing the category and 128 | ultimately paving the way for VMware's acquisition of the 129 | company and subsequent development of VMware NSX.* 130 | 131 | *At around the same time, McKeown and Shenker also created 132 | three non-profit organizations to catalyze the SDN 133 | transformation across the networking industry: the Open 134 | Networking Foundation (ONF) took on responsibility for 135 | advancing the cause of network disaggregation, including 136 | development of the OpenFlow standard; the Open Networking 137 | Laboratory (ON.Lab) was created to produce open source 138 | SDN-based solutions and platforms; and the Open Networking 139 | Summit (ONS) was created as a conference platform to bring 140 | together academics and practitioners interested in SDN. In 141 | 2018, ONF and ON.Lab merged, and the combined organization has 142 | focused on building the open source software that is 143 | highlighted throughout this book.* 144 | 145 | *Of course there have been many other startups, conferences, 146 | and consortia that have driven the development of SDN to where 147 | it is today, and the effects of their work can be seen 148 | throughout this chapter.* 149 | 150 | 151 | It's worth noting that to create virtual networks as we have 152 | described, it is necessary to encapsulate packets from the virtual 153 | networks in a way that lets them traverse the underlying physical 154 | network. As a simple example, a virtual network can have its own 155 | private address space which is decoupled from the underlying physical 156 | address space. For this reason, virtual networks have used a range of 157 | encapsulation techniques, of which VXLAN (briefly discussed in 158 | Chapter 1) is probably the most well 159 | known. In recent years, a more flexible encapsulation called GENEVE 160 | (Generic Network Virtualization Encapsulation) has emerged. 161 | 162 | .. _fig-nv-over: 163 | .. figure:: figures/Slide44.png 164 | :width: 450px 165 | :align: center 166 | 167 | An example network virtualization system 168 | 169 | A typical network virtualization system looks something like 170 | :numref:`Figure %s `. The Network Virtualization 171 | Controller is an SDN controller that exposes a northbound API by which 172 | networks can be created, monitored and modified. It connects to 173 | virtual switches running on hosts–in this case, hypervisors supporting 174 | virtual machines. Virtual networks are created by programming the 175 | virtual switches to forward packets, with appropriate encapsulation, 176 | from host to host across the underlay network. 177 | 178 | 179 | There have been reasonable debates about whether network 180 | virtualization is really SDN. Certainly it displays many of the 181 | properties we discussed in the previous chapter—the original Nicira 182 | network virtualization platform even used OpenFlow to communicate 183 | between its central controller and the data plane elements. And the 184 | centralization benefits of SDN are at the core of what made network 185 | virtualization possible, particularly as an enabler of network 186 | automation. On the other hand, network virtualization has not really 187 | enabled the disaggregation of networks envisioned by SDN: the 188 | controllers and the switches in a network virtualization system are 189 | typically quite tightly integrated using proprietary signalling methods 190 | rather than an open interface. And because the focus of network 191 | virtualization has been on connecting virtual machines and containers, 192 | it is usually implemented as an overlay among the servers on which 193 | those computing abstractions are implemented. Sitting underneath that 194 | overlay is a physical network, which network virtualization just takes 195 | as given (and that physical network need not implement SDN at 196 | all). 197 | 198 | This observation about different aspects of SDN being implemented in 199 | switches versus end hosts is an important one that we return to in 200 | Section 3.1 (where we outline the overall SDN architecture), and again 201 | in Chapter 8 (where we describe Network Virtualization in more detail). 202 | 203 | 204 | 2.2 Switching Fabrics 205 | ---------------------------- 206 | 207 | The predominant use case for pure play SDN is within cloud 208 | datacenters, where for reasons of both lowering costs and improving 209 | feature velocity, cloud providers have moved away from proprietary 210 | switches (i.e., those traditionally sold by network vendors), in favor 211 | of bare-metal switches built using merchant silicon switching 212 | chips. These cloud providers then control the *switching fabric* that 213 | interconnects their servers entirely in software. This is the use case 214 | we explore in-depth throughout this book, so for now we give only a 215 | brief introduction. 216 | 217 | A datacenter switching fabric is a network often designed according to 218 | a *leaf-spine* topology. The basic idea is illustrated by the small 219 | 4-rack example shown in :numref:`Figure %s `. 220 | Each rack has a *Top-of-Rack 221 | (ToR)* switch that interconnects the servers in that rack; these are 222 | referred to as the *leaf* switches of the fabric. (There are typically 223 | two such ToR switches per rack for resilience, but the figure shows 224 | only one for simplicity.) Each leaf switch then connects to a subset 225 | of available *spine* switches, with two requirements: (1) that there 226 | be multiple paths between any pair of racks, and (2) that each 227 | rack-to-rack path is two-hops (i.e., via a single intermediate spine 228 | switch). Note that this means in leaf-spine designs like the one shown in 229 | :numref:`Figure %s `, every server-to-server path is 230 | either two hops (server-leaf-server in the intra-rack case) or four 231 | hops (server-leaf-spine-leaf-server in the inter-rack case). 232 | 233 | .. _fig-leaf-spine: 234 | .. figure:: figures/Slide20.png 235 | :width: 450px 236 | :align: center 237 | 238 | Example of a leaf-spine switching fabric common to cloud 239 | datacenters and other clusters, such as on-premises edge clouds. 240 | 241 | The main fabric-control software sets up L2 forwarding (bridging) 242 | within a server-rack, and L3 forwarding (routing) across racks. The 243 | use of L3 down-to-the ToR switches is a well-known concept in 244 | leaf-spine fabrics, mainly due to L3 scaling better than L2. In 245 | such cases, the ToRs (leaves) route traffic by hashing IP flows to 246 | different spines using *Equal-Cost Multipath (ECMP)* forwarding. 247 | Because every ToR is 2-hops away from every other ToR, there are 248 | multiple such equal-cost paths. (Internally, the control software 249 | takes advantage of label switching concepts similar to that used by 250 | MPLS.) Having the fabric control software also provide L2-bridging 251 | comes from the need to support legacy workloads that often expect to 252 | communicate over an L2 network. There is much more to implementing a 253 | leaf-spine fabric, but we postpone a more complete description until 254 | Chapter 7, where we describe the specifics of the SD-Fabric 255 | implementation. 256 | 257 | 258 | 2.3 Traffic Engineering for WANs 259 | -------------------------------- 260 | 261 | Another cloud-inspired use case is traffic engineering applied to the 262 | wide-area links between datacenters. For example, Google has publicly 263 | described their private backbone, called B4, which is built entirely 264 | using bare-metal switches and SDN. Similarly, Microsoft has described 265 | an approach to interconnecting their data centers called SWAN. A 266 | central component of both B4 and SWAN is a 267 | *Traffic Engineering (TE)* control program that provisions the network 268 | according to the needs of various classes of applications. 269 | 270 | The idea of traffic engineering for packet-switched networks is almost 271 | as old as packet switching itself, with some ideas of traffic-aware 272 | routing having been tried in the Arpanet. However, traffic engineering 273 | only really became mainstream for the Internet backbone with the 274 | advent of MPLS, which provides a set of tools to steer traffic to 275 | balance load across different paths. However, a notable shortcoming of 276 | MPLS-based TE is that path calculation, like traditional routing, is a 277 | fully distributed process. Central planning tools are common but the 278 | real-time management of MPLS paths remains fully distributed. This 279 | means that it is near impossible to achieve any sort of global 280 | optimization, as the path calculation algorithms–which kick in any 281 | time a link changes status, or as traffic loads change–are making 282 | local choices about what seems best. 283 | 284 | Consider the example in :numref:`Figure %s `. Assume 285 | that all links are of unit capacity and we are trying to find paths 286 | for three unit flows of traffic. In the figure on the left, Flow A is 287 | placed first and picks one of the two shortest paths available. Flow B 288 | is placed next and takes the shortest remaining path, as the 289 | single-hop path is already filled by Flow A. When placing Flow C last, 290 | there is no choice but the long path. But a central algorithm that 291 | looked at all three flows at once and tried to place them optimally 292 | would end up with the much less wasteful set of paths shown on the 293 | right hand side of the figure. While this is a contrived example, 294 | sub-optimal outcomes as shown on the left are unavoidable when there 295 | is no central view of traffic. 296 | 297 | .. _fig-te-example: 298 | .. figure:: figures/Slide53.png 299 | :width: 600px 300 | :align: center 301 | 302 | Example of non-optimal traffic engineering (left) and optimal 303 | placement (right). 304 | 305 | B4 and SWAN recognize this shortcoming and move the path calculation to a 306 | logically centralized SDN controller. When a link fails, for example, 307 | the controller calculates a new mapping of traffic demands onto 308 | available links, and programs the switches to forward traffic flows in 309 | such a way that no link is overloaded. 310 | 311 | Over many years of operation, these approaches have become more sophisticated. For 312 | example, B4 evolved from treating all traffic equally to supporting a 313 | range of traffic classes with different levels of tolerance to delay 314 | and availability requirements. Examples of traffic classes 315 | included: (1) copying user data (e.g., email, documents, audio/video) 316 | to remote datacenters for availability; (2) accessing remote storage 317 | by computations that run over distributed data sources; and (3) 318 | pushing large-scale data to synchronize state across multiple 319 | datacenters. In this example, user-data represents the lowest volume 320 | on B4, is the most latency sensitive, and is of the highest 321 | priority. By breaking traffic up into these classes with different 322 | properties, and running a path calculation algorithm for each one, the 323 | team was able to considerably improve the efficiency of the network, 324 | while still meeting the requirements of the most demanding 325 | applications. 326 | 327 | Through a combination of centralizing the decision-making process, 328 | programmatically rate-limiting traffic at the senders, and 329 | differentiating classes of traffic, Google has been able to 330 | drive their link utilizations to nearly 100%. This is two to three 331 | times better than the 30-40% average utilization that WAN links are 332 | typically provisioned for, which is necessary to allow those networks 333 | to deal with both traffic bursts and link/switch failures. Microsoft's 334 | reported experience with SWAN was similar. These hyperscale 335 | experiences with SDN show both the value of 336 | being able to customize the network and the power of centralized 337 | control to change networking abstractions. A conversation with 338 | Amin Vahdat, Jennifer Rexford, and David Clark is especially 339 | insightful about the thought process in adopting SDN. 340 | 341 | .. _reading_b4: 342 | .. admonition:: Further Reading 343 | 344 | A. Vahdat, D. Clark, and J. Rexford. `A Purpose-built Global Network: 345 | Google's Move to SDN 346 | `__. 347 | ACM Queue, December 2015. 348 | 349 | 350 | 2.4 Software-Defined WANs 351 | ------------------------- 352 | 353 | Another use-case for SDN that has taken off for enterprise users is 354 | *Software-Defined Wide-Area Networks (SD-WAN)*. Enterprises have for 355 | many years been buying WAN services from telecommunications companies, 356 | mostly to obtain reliable and private network services to interconnect 357 | their many locations–main offices, branch offices, and corporate data 358 | centers. For most of the 21st century the most common technical 359 | approach to building these networks has been MPLS, using a technique 360 | known as MPLS-BGP VPNs (virtual private networks). The rapid rise of 361 | SD-WAN as an alternative to MPLS is another example of the power of 362 | centralized control. 363 | 364 | Provisioning a VPN using MPLS, while less complex than most earlier 365 | options, still requires some significant local configuration of both 366 | the Customer Edge (CE) router located at each customer site, and the 367 | Provider Edge (PE) router to which that site would be connected. In 368 | addition, it would typically require the provisioning of a circuit 369 | from the customer site to the nearest point of presence for the 370 | appropriate Telco. 371 | 372 | With SD-WAN, there was a realization that VPNs lend themselves to 373 | centralized configuration. An enterprise wants its sites—and only its 374 | authorized sites—to be interconnected, and it typically wants to apply 375 | a set of policies regarding security, traffic prioritization, access 376 | to shared services and so on. These can be input to a central 377 | controller, which can then push out all the necessary configuration to 378 | a switch located at the appropriate office. Rather than manually 379 | configuring a CE and a PE every time a new site is added, it is 380 | possible to achieve "zero-touch" provisioning: an appliance is shipped 381 | to the new site with nothing more than a certificate and an address to 382 | contact, which it then uses to contact the central controller and 383 | obtain all the configuration it needs. Changes to policy, which might 384 | affect many sites, can be input centrally and pushed out to all 385 | affected sites. An example policy would be *"put YouTube traffic into 386 | the lowest priority traffic class"* or *"allow direct access to a given 387 | cloud service from all branch offices"*. The idea is illustrated in 388 | :numref:`Figure %s `. 389 | 390 | .. _fig-sd-wan: 391 | .. figure:: figures/Slide43.png 392 | :width: 600px 393 | :align: center 394 | 395 | An SD-WAN controller receives policies centrally and pushes them 396 | out to edge switches at various sites. The switches build an 397 | overlay of tunnels over the Internet or other physical networks, 398 | and implement policies including allowing direct access to cloud 399 | services. 400 | 401 | 402 | Note that the "private" part of the VPN is generally achieved by the 403 | creation of encrypted tunnels between locations. This is another 404 | example of a task that is painful to set up using traditional 405 | box-by-box configuration but easy to achieve when all switches are 406 | receiving their configuration from a central controller. 407 | 408 | Many factors that are external to SDN came into play to make SD-WAN a 409 | compelling option. One of these was the ubiquity of broadband Internet 410 | access, meaning that there is no longer a reason to provision a 411 | dedicated circuit to connect a remote site, with the corresponding 412 | time and cost to install. But the privacy issue had to be solved 413 | before that could happen–as it was, using centrally managed, encrypted tunnels. Another was the increasing 414 | reliance on cloud services such as Office365 or Salesforce.com, which 415 | have tended to replace on-premises applications in corporate data centers. It 416 | seems natural that you would choose to access those services directly 417 | from an Internet-connected branch, but traditional VPNs would 418 | *backhaul* traffic to a central site before sending it out to the 419 | Internet, precisely so that security could be controlled 420 | centrally. With SD-WAN, the central control over security policy is achieved, while the data 421 | plane remains fully distributed–meaning that remote sites can directly 422 | connect to the cloud services without backhaul. This is yet another 423 | example of how separating the control and data planes leads to a new 424 | network architecture. 425 | 426 | As with some of the other use cases, SD-WAN is not necessarily doing 427 | everything that SDN promised. The control plane to data plane 428 | communication channel tends to be proprietary, and, like network 429 | virtualization, the SD-WAN solutions are overlay networks running on 430 | top of traditional networks. Nevertheless, SD-WAN has opened up a path 431 | for innovation because both the edge devices and the control planes 432 | are implemented in software, and centralization has offered new ways 433 | of tackling an old problem. Furthermore, there is plenty of competition among 434 | the players in the SD-WAN marketplace. 435 | 436 | 2.5 Access Networks 437 | ------------------------- 438 | 439 | Access networks that implement the *last mile* connecting homes, 440 | businesses, and mobile devices to the Internet are another opportunity 441 | to apply SDN principles. Example access network technologies include 442 | *Passive Optical Networks (PON)*, colloquially known as 443 | fiber-to-the-home, and the *Radio Access Network (RAN)* at the heart 444 | of the 4G/5G cellular network. 445 | 446 | What’s interesting about these use cases is that unlike all the 447 | others—which effectively open general-purpose switches to programmable 448 | control—access networks are typically built from special-purpose 449 | hardware devices. The challenge is to transform these purpose-built 450 | devices into their merchant silicon/bare-metal counterparts, so they 451 | can be controlled by software. In the case of wired networks like PON, 452 | there are two such devices: *Optical Line Terminals (OLT)* and 453 | *Broadband Network Gateways (BNG)*. In the case of the cellular 454 | network, there are also two relevant legacy components: *eNodeB* (the 455 | RAN base station) and the *Enhanced Packet Core (EPC)*. A brief 456 | introduction is available online if you are not familiar with these 457 | acronyms. 458 | 459 | .. _reading_access: 460 | .. admonition:: Further Reading 461 | 462 | `Access Networks 463 | `__. 464 | *Computer Networks: A Systems Approach*, 2020. 465 | 466 | Because these devices are purpose-built, not to mention closed and 467 | proprietary, they would seem to be worst-case examples for applying 468 | SDN principles. But that also means they represent an opportunity for 469 | the biggest payoff, and it is for precisely this reason that large 470 | network operators are actively pursuing software-defined PON and RAN 471 | networks. This initiative is sometimes referred to as *CORD (Central 472 | Office Re-architected as a Datacenter)* and has been the subject of 473 | much business analysis, including a comprehensive report by 474 | A.D. Little. 475 | 476 | .. _reading_cord: 477 | .. admonition:: Further Reading 478 | 479 | `Who Dares Wins! How Access Transformation Can Fast-Track Evolution 480 | of Operator Production Platforms 481 | `__. *A.D. Little 482 | Report*, September 2019. 483 | 484 | The central challenge of initiatives like CORD is to disaggregate the 485 | existing legacy devices, so as to isolate the underlying packet 486 | forwarding engine (the central element of the data plane) from the 487 | control plane. Doing so makes it possible to package the former as 488 | commodity hardware and to implement the latter in software. 489 | 490 | Progress disaggregating PON-based access networks is quite far along, 491 | with a solution known as *SEBA (SDN-Enabled Broadband Access)* 492 | currently being deployed in production. Full details are beyond the 493 | scope of this book, but the general idea is to add bare-metal OLT 494 | devices to a cluster similar to the one presented in :numref:`Figure 495 | %s `, resulting in configuration like the one depicted 496 | in :numref:`Figure %s `. In other words, the cluster 497 | includes a mix of compute servers and access devices, interconnected 498 | by a switching fabric. And just as the *Open Compute Project (OCP)* 499 | has certified bare-metal ethernet switches, they now also certify 500 | bare-metal OLT devices. Both the fabric switches and access devices 501 | are controlled by a software-defined control plane, with the code that 502 | implements that control plane running on servers in the cluster. 503 | 504 | Moreover, when the fabric is constructed using switches with 505 | programmable pipelines, certain functionality originally provided by 506 | the legacy hardware can be programmed into the switches that comprise 507 | the fabric. For example, BNG-equivalent functionality, which could be 508 | packaged as a *Virtual Network Function (VNF)* running on a 509 | general-purpose processor, is instead programmed directly into a 510 | programmable switch. This practice is sometimes called *VNF 511 | off-loading* because the packet processing is moved from the compute 512 | servers into the switches. This is a great example of what happens 513 | when switch data planes become programmable: developers write software 514 | that is able to take advantage of the hardware in new and unanticipated 515 | ways. 516 | 517 | .. _fig-seba: 518 | .. figure:: figures/Slide21.png 519 | :width: 500px 520 | :align: center 521 | 522 | General hardware architecture of SEBA: SDN-Enabled Broadband 523 | Access. 524 | 525 | Progress on *Software-Defined Radio Access Networks (SD-RAN)* lags 526 | software-defined broadband, with early-stage systems starting to run 527 | in trial deployments. Disaggregating the RAN is a bigger challenge, 528 | but the payoff will likely be even larger, as it leads to a 529 | 5G-empowered edge cloud. We revisit SD-RAN in Chapter 9, but for a 530 | broad introduction to how 5G is being implemented according to SDN 531 | principles, we recommend a companion book. 532 | 533 | .. _reading_5g: 534 | .. admonition:: Further Reading 535 | 536 | L. Peterson and O. Sunay. `5G Mobile Networks: A Systems Approach 537 | `__. June 2020. 538 | 539 | The bottom line is that the effort to apply SDN principles to both 540 | fiber and mobile access networks starts with the same building block 541 | components described throughout this book. We will highlight where 542 | such software-defined access networks “plug into” the SDN software 543 | stack as we work our way through the details, with a full account 544 | presented in Chapter 9. 545 | 546 | 2.6 Network Telemetry 547 | --------------------- 548 | 549 | We conclude this overview of SDN use cases by looking at a recent 550 | example made possible by the introduction of programmable forwarding 551 | pipelines: *In-Band Network Telemetry (INT)*. The idea of INT is to 552 | program the forwarding pipeline to collect network state as packets 553 | are being processed (i.e., “in-band”). This is in contrast to the 554 | conventional monitoring done by the control plane by reading various 555 | fixed counters (e.g., packets received/transmitted) or sampling 556 | subsets of packets (e.g., sFlow). 557 | 558 | In the INT approach, telemetry “instructions” are encoded into packet 559 | header fields, and then processed by network switches as they flow 560 | through the forwarding pipeline. These instructions tell an 561 | INT-capable device what state to collect, and then how to write 562 | that state into the packet as it transits the network. INT traffic 563 | sources (e.g., applications, end-host networking stacks, 564 | hypervisors) can embed the instructions either in normal data packets 565 | or in special probe packets. Similarly, INT traffic sinks retrieve and 566 | report the collected results of these instructions, allowing the 567 | traffic sinks to monitor the exact data plane state that the packets 568 | observed (experienced) while being forwarded. 569 | 570 | The idea is illustrated in :numref:`Figure %s `, which shows 571 | an example packet traversing a path from source switch *S1* to sink 572 | switch *S5* via transit switch *S2*. The INT metadata added by each 573 | switch along the path both indicates what data is to be collected for the 574 | packet, and records the corresponding data for each switch. 575 | 576 | .. _fig-int: 577 | .. figure:: figures/Slide38.png 578 | :width: 700px 579 | :align: center 580 | 581 | Illustration of Inband Network Telemetry (INT), with each packet 582 | collecting measurement data as it traverses the network. 583 | 584 | INT is still early-stage, but it has the potential to provide 585 | qualitatively deeper insights into traffic patterns and the root 586 | causes of network failures. For example, INT can be used to measure 587 | and record queuing delay individual packets experience while 588 | traversing a sequence of switches along an end-to-end path, with a 589 | packet like the one shown in the figure reporting: *"I visited Switch 590 | 1 @780ns, Switch 2 @1.3µs, Switch 5 @2.4µs."* This information can be 591 | used, for example, to detect *microbursts*—queuing delays measured 592 | over millisecond or even sub-millisecond time scales—as reported by 593 | Xiaoqi Chen and colleagues. It is even possible to correlate this 594 | information across packet flows that followed different routes, so as 595 | to determine which flows shared buffer capacity at each switch. 596 | 597 | .. _reading_int: 598 | .. admonition:: Further Reading 599 | 600 | X. Chen, et. al. `Fine-grained queue measurement in the data plane 601 | `__. ACM CoNEXT'19, December 2019. 602 | 603 | Similarly, packets can report the decision making process that 604 | directed their delivery, for example, with something like: *"In Switch 605 | 1, I followed rules 75 and 250; in Switch 2, I followed rules 3 and 606 | 80."* This opens the door to using INT to verify that the data plane 607 | is faithfully executing the forwarding behavior the network operator 608 | intended. We return to the potential of INT to impact how we build and 609 | operate networks in the concluding chapter of this book. 610 | 611 | This use case illustrates once again a potential benefit of SDN: the 612 | ability to try out new ideas that would have in the past been 613 | infeasible. With traditional fixed-function ASICs doing the packet 614 | forwarding, you could never get the chance to try an idea like INT to 615 | see if the benefits justify the cost. It is this freedom to experiment 616 | and innovate that will lead to lasting benefits from SDN in the long 617 | run. 618 | --------------------------------------------------------------------------------