├── .gitignore ├── LICENSE ├── README.md ├── examples ├── README.md ├── gonlmon │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── gonlmon.go ├── gonlsub │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── gonlsub.go └── netlink-c │ ├── a.out │ └── nl-subsc.c ├── golang └── README.md ├── ietf └── README.md ├── linux ├── README.md ├── figures │ ├── netlink-nexthop-nos.png │ ├── netlink-nexthop-route-nexthop.png │ └── netlink-nexthop-route-object.png ├── iprouting.md ├── linux-initial-setup.md ├── logs │ ├── strace-ip-route-add-nexthop-group.log │ ├── strace-ip-route-add-nexthop.log │ ├── strace-ip-route-add-no-nexthop-multipath.log │ └── strace-ip-route-add-no-nexthop.log ├── netlink-nexthop.md ├── netlink.md └── rtnetlink.md ├── network ├── frrouting │ └── memo-frr.md ├── nic-modules.md ├── perf-local │ └── README.md └── ribfib │ └── frrouting.md ├── nlmon0-ssh.trc ├── p4 └── README.md ├── paper ├── README.md └── sigcomm24-alibaba-hpn.md ├── protobuf ├── README.md └── example │ └── varint.py ├── python ├── README.md └── pytools │ └── sendpacket │ ├── README.md │ ├── pkt_srv6_ipv4_01.yaml │ ├── pkt_srv6_ipv6_01.yaml │ ├── pkt_template.yaml │ └── sendpacket.py ├── sonic └── README.md └── tmp-notes.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 94 | __pypackages__/ 95 | 96 | # Celery stuff 97 | celerybeat-schedule 98 | celerybeat.pid 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nsdevnotes: Network and Software Dev Notes 2 | 3 | - A place to store information and sample code for Network & Software Development. 4 | - Many notes are in Japanese. Please use your favorite translation tools to read them. 5 | - Submit an Issue or Pull Request for discussion and suggestions. 6 | 7 | Table of Contents 8 | 9 | - [Networking Software](#networking-software) 10 | - [Programming Language, Framework, and Tools](#programming-language-framework-and-tools) 11 | - [Organizations, Technology, and Protocols](#organizations-technology-and-protocols) 12 | 13 | 14 | ## Networking Software 15 | 16 | - [SONiC](sonic) 17 | 18 | ## Programming Language, Framework, and Tools 19 | 20 | - [P4](p4) 21 | - [Python](python) 22 | - [Golang](golang) 23 | 24 | ## Organizations, Technology, and Protocols 25 | 26 | - [IETF](ietf) 27 | - Mobile: 3GPP, ETSI, etc. 28 | 29 | 30 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This directory stores example code in any language. 4 | This is under root and not under specific category (e.g. linux, sonic, golang, etc.) since one might write a code in both golang and python about linux netlink. 5 | If you example is specific to one of the categories, then feel free to store it under them. 6 | -------------------------------------------------------------------------------- /examples/gonlmon/README.md: -------------------------------------------------------------------------------- 1 | # gonlmon 2 | 3 | Golang Netlink Monitoring Example Code. 4 | 5 | Most code was taken from: [gwind/go-netlink-socket-monitor.go](https://gist.github.com/gwind/05f5f649d93e6015cf47ffa2b2fd9713) 6 | 7 | ## How to use 8 | 9 | ``` 10 | sudo go get github.com/vishvananda/netlink/nl 11 | sudo go get github.com/sirupsen/logrus 12 | 13 | nsdevnotes/examples/gonlmon$ go run . 14 | Starting gonlmon.go 15 | Family, State, InetDiagSockId 16 | -------------------------------------------------------- 17 | tcp, listen, 127.0.0.1:37987 -> 0.0.0.0:0 18 | tcp, listen, 127.0.0.1:33939 -> 0.0.0.0:0 19 | tcp, listen, 10.28.65.1:53 -> 0.0.0.0:0 20 | tcp, listen, 192.168.122.1:53 -> 0.0.0.0:0 21 | tcp, listen, 127.0.0.53:53 -> 0.0.0.0:0 22 | tcp, listen, 0.0.0.0:22 -> 0.0.0.0:0 23 | tcp, established, 127.0.0.1:33939 -> 127.0.0.1:38144 24 | tcp, established, 172.20.105.171:22 -> 192.168.120.35:53048 25 | tcp, established, 127.0.0.1:38148 -> 127.0.0.1:33939 26 | tcp, established, 127.0.0.1:33939 -> 127.0.0.1:38148 27 | tcp, established, 127.0.0.1:38144 -> 127.0.0.1:33939 28 | tcp, established, 172.20.105.171:22 -> 192.168.120.35:53067 29 | tcp, established, 172.20.105.171:22 -> 192.168.120.35:53545 30 | tcp, established, 172.20.105.171:22 -> 192.168.120.35:53033 31 | ``` 32 | 33 | 34 | ## Memo 35 | 36 | ### How to init go module 37 | 38 | ``` 39 | ~/sandbox/nsdevnotes/examples/gonlmon$ 40 | 41 | go mod init example/gonlmon 42 | ``` 43 | -------------------------------------------------------------------------------- /examples/gonlmon/go.mod: -------------------------------------------------------------------------------- 1 | module example/gonlmon 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/sirupsen/logrus v1.9.0 // indirect 7 | github.com/vishvananda/netlink v1.1.0 // indirect 8 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect 9 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /examples/gonlmon/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 4 | github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= 5 | github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 8 | github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= 9 | github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= 10 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= 11 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= 12 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 h1:/d2cWp6PSamH4jDPFLyO150psQdqvtoNX8Zjg3AQ31g= 13 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 14 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ= 15 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 17 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /examples/gonlmon/gonlmon.go: -------------------------------------------------------------------------------- 1 | // Based on gwind/go-netlink-socket-monitor.go 2 | // https://gist.github.com/gwind/05f5f649d93e6015cf47ffa2b2fd9713 3 | 4 | package main 5 | 6 | import ( 7 | "fmt" 8 | "net" 9 | "syscall" 10 | "unsafe" 11 | 12 | "github.com/sirupsen/logrus" 13 | "github.com/vishvananda/netlink/nl" 14 | ) 15 | 16 | const TCPF_ALL = 0xFFF 17 | 18 | // Extensions: include/uapi/linux/inet_diag.h 19 | const ( 20 | INET_DIAG_NONE = iota 21 | INET_DIAG_MEMINFO 22 | INET_DIAG_INFO 23 | INET_DIAG_VEGASINFO 24 | INET_DIAG_CONG 25 | INET_DIAG_TOS 26 | INET_DIAG_TCLASS 27 | INET_DIAG_SKMEMINFO 28 | INET_DIAG_SHUTDOWN 29 | // Next extenstions cannot be requested in struct inet_diag_req_v2: 30 | // its field idiag_ext has only 8 bits. 31 | INET_DIAG_DCTCPINFO // request as INET_DIAG_VEGASINFO 32 | INET_DIAG_PROTOCOL // response attribute only 33 | INET_DIAG_SKV6ONLY 34 | INET_DIAG_LOCALS 35 | INET_DIAG_PEERS 36 | INET_DIAG_PAD 37 | INET_DIAG_MARK // only with CAP_NET_ADMIN 38 | INET_DIAG_BBRINFO // request as INET_DIAG_VEGASINFO 39 | INET_DIAG_CLASS_ID // request as INET_DIAG_TCLASS 40 | INET_DIAG_MD5SIG 41 | INET_DIAG_ULP_INFO 42 | INET_DIAG_SK_BPF_STORAGES 43 | INET_DIAG_CGROUP_ID 44 | __INET_DIAG_MAX 45 | ) 46 | 47 | // some comments added from: https://man7.org/linux/man-pages/man7/sock_diag.7.html 48 | func main() { 49 | fmt.Println("Starting gonlmon.go") 50 | 51 | // The request starts with a struct nlmsghdr header described in 52 | // netlink(7) with nlmsg_type field set to SOCK_DIAG_BY_FAMILY. 53 | // 54 | // If the nlmsg_flags field of the struct nlmsghdr header has the 55 | // NLM_F_DUMP flag set, it means that a list of sockets is being 56 | // requested; otherwise it is a query about an individual socket. 57 | req := nl.NewNetlinkRequest(SOCK_DIAG_BY_FAMILY, syscall.NLM_F_DUMP) 58 | 59 | // It is followed by a header specific to the address family that 60 | // starts with a common part shared by all address families: 61 | // struct sock_diag_req { 62 | // __u8 sdiag_family; 63 | // __u8 sdiag_protocol; 64 | // }; 65 | msg := NewInetDiagReqV2( 66 | // For IPv4 and IPv6 sockets, the request is represented in the 67 | // following structure: 68 | // struct inet_diag_req_v2 { 69 | // __u8 sdiag_family; 70 | // __u8 sdiag_protocol; 71 | // __u8 idiag_ext; 72 | // __u8 pad; 73 | // __u32 idiag_states; 74 | // struct inet_diag_sockid id; 75 | // }; 76 | 77 | // sdiag_family: This should be set to either AF_INET or AF_INET6 78 | syscall.AF_INET, // SDiagFamily 79 | // sdiag_protocol: This should be set to one of IPPROTO_TCP, IPPROTO_UDP, or IPPROTO_UDPLITE. 80 | syscall.IPPROTO_TCP, // SDiagProtocol 81 | 82 | // idiag_states (IDiagStates) 83 | // This is a bit mask that defines a filter of socket states. 84 | // Only those sockets whose states are in this mask will be 85 | // reported. Ignored when querying for an individual socket. 86 | // See TcpStatesMap later in this code for list of flags. 87 | //// Everything 88 | TCPF_ALL, 89 | //// Ignore TCP_SYN_RECV, TCP_TIME_WAIT, TCP_CLOSE, TCP_CLOSE_WAIT 90 | //TCPF_ALL & ^((1< %15s:%5d", id.SrcIP().String(), id.IDiagSPort.Int(), id.DstIP().String(), id.IDiagDPort.Int()) 256 | } 257 | 258 | type InetDiagReqV2 struct { 259 | SDiagFamily uint8 260 | SDiagProtocol uint8 261 | IDiagExt uint8 262 | Pad uint8 263 | IDiagStates uint32 264 | Id InetDiagSockId 265 | } 266 | 267 | func (req *InetDiagReqV2) Serialize() []byte { 268 | return (*(*[SizeofInetDiagReqV2]byte)(unsafe.Pointer(req)))[:] 269 | } 270 | 271 | func (req *InetDiagReqV2) Len() int { 272 | return SizeofInetDiagReqV2 273 | } 274 | 275 | func NewInetDiagReqV2(family, protocol uint8, states uint32) *InetDiagReqV2 { 276 | return &InetDiagReqV2{ 277 | SDiagFamily: family, 278 | SDiagProtocol: protocol, 279 | IDiagStates: states, 280 | } 281 | } 282 | 283 | type InetDiagMsg struct { 284 | IDiagFamily uint8 285 | IDiagState uint8 286 | IDiagTimer uint8 287 | IDiagRetrans uint8 288 | Id InetDiagSockId 289 | IDiagExpires uint32 290 | IDiagRqueue uint32 291 | IDiagWqueue uint32 292 | IDiagUid uint32 293 | IDiagInode uint32 294 | } 295 | 296 | func IndexInetDiagMsg() string { 297 | s := "Family, State, InetDiagSockId\n" 298 | s += "--------------------------------------------------------" 299 | return s 300 | } 301 | 302 | func (msg *InetDiagMsg) String() string { 303 | return fmt.Sprintf("%6s, %11s, %s", 304 | DiagFamilyMap[msg.IDiagFamily], 305 | TcpStatesMap[msg.IDiagState], 306 | msg.Id.String(), 307 | ) 308 | } 309 | 310 | func ParseInetDiagMsg(data []byte) *InetDiagMsg { 311 | return (*InetDiagMsg)(unsafe.Pointer(&data[0])) 312 | } 313 | -------------------------------------------------------------------------------- /examples/gonlsub/README.md: -------------------------------------------------------------------------------- 1 | # gonlsub 2 | 3 | An elementry code written just to practice netlink in Golang which does below: 4 | 5 | - subscribe to group `RTNLGRP_IPV4_ROUTE` 6 | - receive broadcasted message 7 | - parse and print netlink / rtnetlink messages 8 | - parse array of RTA_NEXTHOP/RTA_GATEWAY inside RTA_MULTIPATH 9 | 10 | ## Next Hop and Next Hop Group 11 | 12 | - rtmsg type (in NlMsghdr) for Next Hop is `RTM_NEWNEXTHOP` (104). 13 | - So far it looks like this is NOT included in `RTNLGRP_*` thus cannot receive update. 14 | - You can still receive `RTM_NEWROUTE` and `RTM_DELROUTE` using nexthop id 15 | - This message will includes Attributes `RTA_NH_ID` and `RTA_MULTIPATH` 16 | 17 | ### route using Next Hop Object 18 | 19 | ``` 20 | >> below messages will not show up even if you Subscribe to `NETLINK_ROUTE`. 21 | > ip nexthop add id 11 via 172.20.105.173 dev eno1 22 | 23 | >> below messages will be received via Subscribe. 24 | > ip route add 10.11.12.13/32 nhid 11 25 | 26 | ----------------------------------- 27 | NlMsghdr | Len:68, Type:RTM_NEWROUTE, Flags:600, Seq:1669595081, Pid:153947 28 | rtmsg: {2 32 0 0 254 3 0 1 0} 29 | rtmsg: RtMsg | 30 | Family: AF_INET (2) 31 | Dst_len: 32 32 | Src_len: 0 33 | Tos: 0 34 | Table: 254 35 | Protocol: RTPROT_BOOT (3) 36 | Scope: RT_SCOPE_UNIVERSE (0) 37 | Type: RTN_UNICAST (1) 38 | Flags: 0 39 | RtAttr | Len:8, Type:RTA_TABLE, Value:[254 0 0 0] 40 | RtAttr | Len:8, Type:RTA_DST, Value:[10 11 12 13] 41 | RtAttr | Len:8, Type:RTA_NH_ID, Value:[11 0 0 0] 42 | RtAttr | Len:8, Type:RTA_GATEWAY, Value:[172 20 105 173] 43 | RtAttr | Len:8, Type:RTA_OIF, Value:[5 0 0 0] 44 | ``` 45 | 46 | ### route using Next Hop Group 47 | 48 | `RTA_MULTIPATH` will be included when `RTA_NH_ID` is pointing to Next Hop Group. (nexthop id 3 in below example) 49 | 50 | Note that you do NOT need to set `RTA_MULTIPATH` in sendmsg to configure route using nexthop group. 51 | But when the route update is annouced, it will include `RTA_MULTIPATH`. (For backword compatibility?) 52 | 53 | ``` 54 | >> below messages will not show up even if you Subscribe to `NETLINK_ROUTE`. 55 | > ip nexthop add id 1 via 172.20.105.172 dev eno1 56 | > ip nexthop add id 2 via 172.20.105.173 dev eno1 57 | > ip nexthop add id 3 group 1/2 58 | 59 | >> below messages will be received via Subscribe. 60 | > ip route add 10.11.12.13/32 nhid 3 61 | > ip route del 10.11.12.13/32 62 | 63 | ----------------------------------- 64 | NlMsghdr | Len:88, Type:RTM_NEWROUTE, Flags:600, Seq:1669611617, Pid:167576 65 | rtmsg: {2 32 0 0 254 3 0 1 0} 66 | rtmsg: RtMsg | 67 | Family: AF_INET (2) 68 | Dst_len: 32 69 | Src_len: 0 70 | Tos: 0 71 | Table: 254 72 | Protocol: RTPROT_BOOT (3) 73 | Scope: RT_SCOPE_UNIVERSE (0) 74 | Type: RTN_UNICAST (1) 75 | Flags: 0 76 | RtAttr | Len:8, Type:RTA_TABLE, Value:254 77 | RtAttr | Len:8, Type:RTA_DST, IPv4:10.11.12.13 78 | RtAttr | Len:8, Type:RTA_NH_ID, Value:3 79 | RtAttr | Len:36, Type:RTA_MULTIPATH 80 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 81 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.172 82 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 83 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.173 84 | ----------------------------------- 85 | NlMsghdr | Len:88, Type:RTM_DELROUTE, Flags:0, Seq:1669611617, Pid:167577 86 | rtmsg: {2 32 0 0 254 3 0 1 0} 87 | rtmsg: RtMsg | 88 | Family: AF_INET (2) 89 | Dst_len: 32 90 | Src_len: 0 91 | Tos: 0 92 | Table: 254 93 | Protocol: RTPROT_BOOT (3) 94 | Scope: RT_SCOPE_UNIVERSE (0) 95 | Type: RTN_UNICAST (1) 96 | Flags: 0 97 | RtAttr | Len:8, Type:RTA_TABLE, Value:254 98 | RtAttr | Len:8, Type:RTA_DST, IPv4:10.11.12.13 99 | RtAttr | Len:8, Type:RTA_NH_ID, Value:3 100 | RtAttr | Len:36, Type:RTA_MULTIPATH 101 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 102 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.172 103 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 104 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.173 105 | 106 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=52, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669711532, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 3], [{nla_len=20, nla_type=NHA_GROUP}, [{id=1, weight=0}, {id=2, weight=0}]]]], iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 107 | ``` 108 | 109 | ## IPv4 ROUTE (no next hop object) 110 | 111 | ``` 112 | > $ sudo ip route del 10.11.11.99/32 113 | > $ sudo ip route add 10.11.11.99/32 via 172.20.104.1 dev eno1 114 | 115 | nsdevnotes/examples/gonlsub$ go run gonlsub.go 116 | Starting gonlsub.go 117 | ----------------------------------- 118 | msg.Header {60 24 1536 1669553033 149079} 119 | NlMsghdr | Len:60, Type:RTM_NEWROUTE, Flags:600, Seq:1669553033, Pid:149079 120 | rtmsg: {2 32 0 0 254 3 0 1 0} 121 | rtmsg: RtMsg | 122 | Family: AF_INET (2) 123 | Dst_len: 32 124 | Src_len: 0 125 | Tos: 0 126 | Table: 254 127 | Protocol: RTPROT_BOOT (3) 128 | Scope: RT_SCOPE_UNIVERSE (0) 129 | Type: RTN_UNICAST (1) 130 | Flags: 0 131 | RtAttr | Len:8, Type:RTA_TABLE, Value:[254 0 0 0] 132 | RtAttr | Len:8, Type:RTA_DST, Value:[10 11 11 99] 133 | RtAttr | Len:8, Type:RTA_GATEWAY, Value:[172 20 104 1] 134 | RtAttr | Len:8, Type:RTA_OIF, Value:[5 0 0 0] 135 | ----------------------------------- 136 | msg.Header {60 25 0 1669553060 149086} 137 | NlMsghdr | Len:60, Type:RTM_DELROUTE, Flags:0, Seq:1669553060, Pid:149086 138 | rtmsg: {2 32 0 0 254 3 0 1 0} 139 | rtmsg: RtMsg | 140 | Family: AF_INET (2) 141 | Dst_len: 32 142 | Src_len: 0 143 | Tos: 0 144 | Table: 254 145 | Protocol: RTPROT_BOOT (3) 146 | Scope: RT_SCOPE_UNIVERSE (0) 147 | Type: RTN_UNICAST (1) 148 | Flags: 0 149 | RtAttr | Len:8, Type:RTA_TABLE, Value:[254 0 0 0] 150 | RtAttr | Len:8, Type:RTA_DST, Value:[10 11 11 99] 151 | RtAttr | Len:8, Type:RTA_GATEWAY, Value:[172 20 104 1] 152 | RtAttr | Len:8, Type:RTA_OIF, Value:[5 0 0 0] 153 | ``` 154 | 155 | ## IPv4 MULTIPATH ROUTE (no next hop object) 156 | 157 | 158 | ``` 159 | > $ ip route add 10.11.11.11/32 \ 160 | nexthop via 172.20.105.174 dev eno1 \ 161 | nexthop via 172.20.105.175 dev eno1 162 | 163 | nsdevnotes/examples/gonlsub$ go run gonlsub.go 164 | Starting gonlsub.go 165 | ----------------------------------- 166 | NlMsghdr | Len:80, Type:RTM_NEWROUTE, Flags:600, Seq:1669864316, Pid:224911 167 | rtmsg: {2 32 0 0 254 3 0 1 0} 168 | rtmsg: RtMsg | 169 | Family: AF_INET (2) 170 | Dst_len: 32 171 | Src_len: 0 172 | Tos: 0 173 | Table: 254 174 | Protocol: RTPROT_BOOT (3) 175 | Scope: RT_SCOPE_UNIVERSE (0) 176 | Type: RTN_UNICAST (1) 177 | Flags: 0 178 | RtAttr | Len:8, Type:RTA_TABLE, Value:254 179 | RtAttr | Len:8, Type:RTA_DST, IPv4:10.11.11.11 180 | RtAttr | Len:36, Type:RTA_MULTIPATH 181 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 182 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.174 183 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 184 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.175 185 | 186 | # strace ip route add 10.11.11.11/32 nexthop via 172.20.105.174 dev eno1 nexthop via 172.20.105.175 dev eno1 187 | 188 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=72, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669864316, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.11")], [{nla_len=36, nla_type=RTA_MULTIPATH}, [[{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174")]], [{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.175")]]]]]], iov_len=72}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 72 189 | 190 | Netlink Message Type: RTM_NEWROUTE 191 | RT Message: 192 | rtm_family=AF_INET 193 | rtm_dst_len=32 194 | rtm_src_len=0 195 | rtm_tos=0 196 | rtm_table=RT_TABLE_MAIN 197 | rtm_protocol=RTPROT_BOOT 198 | rtm_scope=RT_SCOPE_UNIVERSE 199 | rtm_type=RTN_UNICAST 200 | rtm_flags=0 201 | Netlink Attribute: 202 | {nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.11") 203 | {nla_len=36, nla_type=RTA_MULTIPATH} 204 | {rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")} 205 | {nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174")] 206 | {rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")} 207 | {nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.175") 208 | ``` 209 | 210 | 211 | ## Prefix Len of RTA_DST (rtm_dst_len) 212 | 213 | prefix len will be set in rtmsg: `rtm_dst_len`. 214 | 215 | For example, to set route to `10.11.12.0/24` set 24 to `rtm_dst_len`. 216 | 217 | ``` 218 | > ip route add 10.11.12.0/24 nhid 11 219 | 220 | ----------------------------------- 221 | NlMsghdr | Len:68, Type:RTM_NEWROUTE, Flags:600, Seq:1669596915, Pid:154076 222 | rtmsg: {2 24 0 0 254 3 0 1 0} 223 | rtmsg: RtMsg | 224 | Family: AF_INET (2) 225 | Dst_len: 24 226 | Src_len: 0 227 | Tos: 0 228 | Table: 254 229 | Protocol: RTPROT_BOOT (3) 230 | Scope: RT_SCOPE_UNIVERSE (0) 231 | Type: RTN_UNICAST (1) 232 | Flags: 0 233 | RtAttr | Len:8, Type:RTA_TABLE, Value:[254 0 0 0] 234 | RtAttr | Len:8, Type:RTA_DST, Value:[10 11 12 0] 235 | RtAttr | Len:8, Type:RTA_NH_ID, Value:[11 0 0 0] 236 | RtAttr | Len:8, Type:RTA_GATEWAY, Value:[172 20 105 173] 237 | RtAttr | Len:8, Type:RTA_OIF, Value:[5 0 0 0] 238 | ``` 239 | 240 | ## tshark decode output of RTA_MULTIPATH 241 | 242 | Looks like it doesn't support decoding data part of `RTA_MULTIPATH`. 243 | 244 | 245 | ``` 246 | $ tshark -v 247 | TShark (Wireshark) 3.2.3 (Git v3.2.3 packaged as 3.2.3-1) 248 | 249 | 250 | Frame 102: 104 bytes on wire (832 bits), 104 bytes captured (832 bits) on interface nlmon0, id 0 251 | Interface id: 0 (nlmon0) 252 | Interface name: nlmon0 253 | Encapsulation type: Linux Netlink (158) 254 | Arrival Time: Nov 28, 2022 02:02:32.821352526 UTC 255 | [Time shift for this packet: 0.000000000 seconds] 256 | Epoch Time: 1669600952.821352526 seconds 257 | [Time delta from previous captured frame: 0.000019221 seconds] 258 | [Time delta from previous displayed frame: 0.000019221 seconds] 259 | [Time since reference or first frame: 46.296049573 seconds] 260 | Frame Number: 102 261 | Frame Length: 104 bytes (832 bits) 262 | Capture Length: 104 bytes (832 bits) 263 | [Frame is marked: False] 264 | [Frame is ignored: False] 265 | [Protocols in frame: netlink:netlink-route] 266 | Linux netlink (cooked header) 267 | Link-layer address type: Netlink (824) 268 | Family: Route (0x0000) 269 | Linux rtnetlink (route netlink) protocol 270 | Netlink message header (type: Add network route) 271 | Length: 88 272 | Message type: Add network route (24) 273 | Flags: 0x0600 274 | .... .... .... ...0 = Request: 0 275 | .... .... .... ..0. = Multipart message: 0 276 | .... .... .... .0.. = Ack: 0 277 | .... .... .... 0... = Echo: 0 278 | .... .... ...0 .... = Dump inconsistent: 0 279 | .... .... ..0. .... = Dump filtered: 0 280 | Sequence: 1669600953 281 | Port ID: 155117 282 | Address family: AF_INET (2) 283 | Length of destination: 24 284 | Length of source: 0 285 | TOS filter: 0x00 286 | Routing table ID: 254 287 | Routing protocol: boot (0x03) 288 | Route origin: global route (0x00) 289 | Route type: Gateway or direct route (0x01) 290 | Route flags: 0x00000000 291 | Attribute: RTA_TABLE 292 | Len: 8 293 | Type: 0x000f, RTA_TABLE (15) 294 | 0... .... .... .... = Nested: 0 295 | .0.. .... .... .... = Network byte order: 0 296 | Attribute type: RTA_TABLE (15) 297 | Data: fe000000 298 | Attribute: Route destination address 299 | Len: 8 300 | Type: 0x0001, Route destination address (1) 301 | 0... .... .... .... = Nested: 0 302 | .0.. .... .... .... = Network byte order: 0 303 | Attribute type: Route destination address (1) 304 | Data: 0a0b0c00 305 | Attribute 306 | Len: 8 307 | Type: 0x001e 308 | 0... .... .... .... = Nested: 0 309 | .0.. .... .... .... = Network byte order: 0 310 | Attribute type: Unknown (30) 311 | Data: 03000000 312 | Attribute: RTA_MULTIPATH 313 | Len: 36 314 | Type: 0x0009, RTA_MULTIPATH (9) 315 | 0... .... .... .... = Nested: 0 316 | .0.. .... .... .... = Network byte order: 0 317 | Attribute type: RTA_MULTIPATH (9) 318 | Data: 100000000500000008000500ac1469ac1000000005000000… 319 | ``` -------------------------------------------------------------------------------- /examples/gonlsub/go.mod: -------------------------------------------------------------------------------- 1 | module example/gonlsub 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.9 // indirect 7 | github.com/josharian/native v1.0.0 // indirect 8 | github.com/mdlayher/netlink v1.7.0 // indirect 9 | github.com/mdlayher/socket v0.4.0 // indirect 10 | github.com/sirupsen/logrus v1.9.0 // indirect 11 | github.com/vishvananda/netlink v1.1.0 // indirect 12 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df // indirect 13 | golang.org/x/net v0.2.0 // indirect 14 | golang.org/x/sync v0.1.0 // indirect 15 | golang.org/x/sys v0.2.0 // indirect 16 | ) 17 | -------------------------------------------------------------------------------- /examples/gonlsub/go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= 4 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 5 | github.com/josharian/native v1.0.0 h1:Ts/E8zCSEsG17dUqv7joXJFybuMLjQfWE04tsBODTxk= 6 | github.com/josharian/native v1.0.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= 7 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 8 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 9 | github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= 10 | github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= 11 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= 12 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= 13 | golang.org/x/net v0.2.0 h1:sZfSu1wtKLGlWI4ZZayP0ck9Y73K1ynO6gqzTdBVdPU= 14 | golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= 15 | golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= 16 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 17 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 18 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ= 19 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 20 | golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= 21 | golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 22 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 23 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 24 | -------------------------------------------------------------------------------- /examples/gonlsub/gonlsub.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "syscall" 7 | 8 | "github.com/vishvananda/netlink/nl" 9 | "golang.org/x/sys/unix" 10 | ) 11 | 12 | // Use syscall package to parse Netlink Messages 13 | // https://pkg.go.dev/syscall#NlMsghdr 14 | // type NlMsghdr struct { 15 | // Len uint32 16 | // Type uint16 17 | // Flags uint16 18 | // Seq uint32 19 | // Pid uint32 20 | // } 21 | // https://pkg.go.dev/syscall#NetlinkMessage 22 | // type NetlinkMessage struct { 23 | // Header NlMsghdr 24 | // Data []byte 25 | // } 26 | // 27 | // type RtMsg struct { 28 | // Family uint8 29 | // Dst_len uint8 30 | // Src_len uint8 31 | // Tos uint8 32 | // Table uint8 33 | // Protocol uint8 34 | // Scope uint8 35 | // Type uint8 36 | // Flags uint32 37 | // } 38 | // 39 | // type NetlinkRouteAttr struct { 40 | // Attr RtAttr 41 | // Value []byte 42 | // } 43 | // 44 | // type RtAttr struct { 45 | // Len uint16 46 | // Type uint16 47 | // } 48 | // 49 | //// functions 50 | // func ParseNetlinkMessage(b []byte) ([]NetlinkMessage, error) 51 | // func ParseNetlinkRouteAttr(m *NetlinkMessage) ([]NetlinkRouteAttr, error) 52 | // 53 | //// RTNLGRP_* used to Subscribe 54 | // RTNLGRP_NONE = 0x0 55 | // RTNLGRP_LINK = 0x1 56 | // RTNLGRP_NOTIFY = 0x2 57 | // RTNLGRP_NEIGH = 0x3 58 | // RTNLGRP_TC = 0x4 59 | // RTNLGRP_IPV4_IFADDR = 0x5 60 | // RTNLGRP_IPV4_MROUTE = 0x6 61 | // RTNLGRP_IPV4_ROUTE = 0x7 62 | // RTNLGRP_IPV4_RULE = 0x8 63 | // RTNLGRP_IPV6_IFADDR = 0x9 64 | // RTNLGRP_IPV6_MROUTE = 0xa 65 | // RTNLGRP_IPV6_ROUTE = 0xb 66 | // RTNLGRP_IPV6_IFINFO = 0xc 67 | // RTNLGRP_IPV6_PREFIX = 0x12 68 | // RTNLGRP_IPV6_RULE = 0x13 69 | // RTNLGRP_ND_USEROPT = 0x14 70 | 71 | func main() { 72 | fmt.Println("Starting gonlsub.go") 73 | 74 | // nl_linux.go: func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { 75 | // List of unix consts: https://pkg.go.dev/golang.org/x/sys/unix#pkg-constants 76 | 77 | //nlSock, err := nl.Subscribe(unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE) 78 | nlSock, err := nl.Subscribe( 79 | syscall.NETLINK_ROUTE, 80 | syscall.RTNLGRP_NONE, // 0x0 81 | syscall.RTNLGRP_LINK, // 0x1 82 | syscall.RTNLGRP_NOTIFY, // 0x2 83 | syscall.RTNLGRP_NEIGH, // 0x3 84 | syscall.RTNLGRP_TC, // 0x4 85 | syscall.RTNLGRP_IPV4_IFADDR, // 0x5 86 | syscall.RTNLGRP_IPV4_MROUTE, // 0x6 87 | syscall.RTNLGRP_IPV4_ROUTE, // 0x7 88 | syscall.RTNLGRP_IPV4_RULE, // 0x8 89 | syscall.RTNLGRP_IPV6_IFADDR, // 0x9 90 | syscall.RTNLGRP_IPV6_MROUTE, // 0xa 91 | syscall.RTNLGRP_IPV6_ROUTE, // 0xb 92 | syscall.RTNLGRP_IPV6_IFINFO, // 0xc 93 | syscall.RTNLGRP_IPV6_PREFIX, // 0x12 94 | syscall.RTNLGRP_IPV6_RULE, // 0x13 95 | syscall.RTNLGRP_ND_USEROPT, // 0x14 96 | ) 97 | if err != nil { 98 | fmt.Println("Error on creating the socket: %v", err) 99 | } 100 | 101 | nlSock.SetReceiveTimeout(&unix.Timeval{Sec: 1, Usec: 0}) 102 | for { 103 | //msgs, from, err := nlSock.Receive() 104 | msgs, _, _ := nlSock.Receive() 105 | if msgs != nil { 106 | fmt.Println("-----------------------------------") 107 | 108 | for _, msg := range msgs { // msg => NetlinkMessage 109 | //fmt.Printf("msg.Header %v\n", msg.Header) 110 | fmt.Printf("NlMsghdr | Len:%d, Type:%s, Flags:%x, Seq:%v, Pid:%v\n", 111 | msg.Header.Len, 112 | RtmMap[msg.Header.Type], 113 | msg.Header.Flags, 114 | msg.Header.Seq, 115 | msg.Header.Pid, 116 | ) 117 | 118 | myrtm := getMyRtMsg(msg.Data[0:syscall.SizeofRtMsg]) 119 | fmt.Printf("rtmsg: %v\n", myrtm) 120 | fmt.Printf("rtmsg: %s\n", myrtm.String()) 121 | 122 | nras, _ := syscall.ParseNetlinkRouteAttr(&msg) 123 | for _, nra := range nras { 124 | //fmt.Printf("nra: %v\n", nra) 125 | s := fmt.Sprintf("RtAttr | Len:%v, Type:%s", nra.Attr.Len, RtaMap[nra.Attr.Type]) 126 | switch nra.Attr.Type { 127 | case syscall.RTA_MULTIPATH: 128 | // RTA_MULTIPATH is array of [RTA_NEXTHOP + RTA_GATEWAY] 129 | parseRtNexthop := func(v []byte) ([]byte) { 130 | l := binary.LittleEndian.Uint16(v[0:2]) 131 | i := array2int32(v[4:8]) 132 | // https://pkg.go.dev/syscall#RtNexthop 133 | rtnexthop := syscall.RtNexthop { 134 | Len: l, // uint16 135 | Flags: uint8(v[2]), // uint8 136 | Hops: uint8(v[3]), // uint8 137 | Ifindex: i, // int32 138 | } 139 | s += fmt.Sprintf("\n | rtnexthop: Len:%v, Flags:%v, Hops:%v, Ifindex:%v", 140 | rtnexthop.Len, 141 | rtnexthop.Flags, 142 | rtnexthop.Hops, 143 | rtnexthop.Ifindex, 144 | ) 145 | // Parse RTAs 146 | v = v[unix.SizeofRtNexthop:] // unix.SizeofRtNexthop(8) 147 | //rtalen := rtnexthop.Len - unix.SizeofRtNexthop 148 | rtalen := binary.LittleEndian.Uint16(v[0:2]) 149 | rtatype := binary.LittleEndian.Uint16(v[2:4]) 150 | s += fmt.Sprintf("\n | RTA: Len:%v, Type:%s", 151 | rtalen, 152 | RtaMap[rtatype], 153 | ) 154 | if rtatype == syscall.RTA_GATEWAY { 155 | if (rtalen-4) == 4 { 156 | s += fmt.Sprintf(", IPv4:%d.%d.%d.%d", v[4], v[5], v[6], v[7]) 157 | } else if (rtalen-4) == 16 { 158 | s += fmt.Sprintf(", IPv6:%x", v[4:rtalen]) 159 | } else { 160 | s += fmt.Sprintf(", UNKOWN:%x", v[4:rtalen]) 161 | } 162 | } else { 163 | s += fmt.Sprintf(", Value:%x", v[4:rtalen]) 164 | } 165 | v = v[rtalen:] 166 | return v 167 | } 168 | rest := nra.Value 169 | for len(rest) > 0 { 170 | rest = parseRtNexthop(rest) 171 | } 172 | case syscall.RTA_DST: 173 | v := nra.Value 174 | l := len(v) 175 | if l == 4 { 176 | s += fmt.Sprintf(", IPv4:%d.%d.%d.%d", v[0], v[1], v[2], v[3]) 177 | } else if l == 16 { 178 | s += fmt.Sprintf(", IPv6:%x", v) 179 | } else { 180 | s += fmt.Sprintf(", UNKOWN:%x", v) 181 | } 182 | case syscall.RTA_TABLE: 183 | i := array2int32(nra.Value[0:4]) 184 | s += fmt.Sprintf(", Value:%v", i) 185 | case 30: // RTA_NH_ID 186 | i := array2int32(nra.Value[0:4]) 187 | s += fmt.Sprintf(", Value:%v", i) 188 | default: 189 | s += fmt.Sprintf(", Value:%v", nra.Value) 190 | } 191 | s += "\n" 192 | fmt.Print(s) 193 | } 194 | } 195 | } 196 | } 197 | 198 | //time.Sleep(4 * time.Second) 199 | // Close the socket 200 | nlSock.Close() 201 | } 202 | 203 | var RtmMap = map[uint16]string{ 204 | syscall.RTM_NEWLINK: "RTM_NEWLINK", 205 | syscall.RTM_DELLINK: "RTM_DELLINK", 206 | syscall.RTM_GETLINK: "RTM_GETLINK", 207 | syscall.RTM_SETLINK: "RTM_SETLINK", 208 | syscall.RTM_NEWADDR: "RTM_NEWADDR", 209 | syscall.RTM_DELADDR: "RTM_DELADDR", 210 | syscall.RTM_GETADDR: "RTM_GETADDR", 211 | syscall.RTM_NEWROUTE: "RTM_NEWROUTE", 212 | syscall.RTM_DELROUTE: "RTM_DELROUTE", 213 | syscall.RTM_GETROUTE: "RTM_GETROUTE", 214 | // RTM_*NEXTHOP below are not defined in syscall 215 | 104: "RTM_NEWNEXTHOP", 216 | 105: "RTM_DELNEXTHOP", 217 | 106: "RTM_GETNEXTHOP", 218 | } 219 | 220 | // Routing message attributes (enum rtattr_type_t) 221 | // include/uapi/linux/rtnetlink.h 222 | 223 | var RtaMap = map[uint16]string{ 224 | syscall.RTA_UNSPEC: "RTA_UNSPEC", 225 | syscall.RTA_DST: "RTA_DST", 226 | syscall.RTA_SRC: "RTA_SRC", 227 | syscall.RTA_IIF: "RTA_IIF", 228 | syscall.RTA_OIF: "RTA_OIF", 229 | syscall.RTA_GATEWAY: "RTA_GATEWAY", 230 | syscall.RTA_PRIORITY: "RTA_PRIORITY", 231 | syscall.RTA_PREFSRC: "RTA_PREFSRC", 232 | syscall.RTA_METRICS: "RTA_METRICS", 233 | syscall.RTA_MULTIPATH: "RTA_MULTIPATH", 234 | //RTA_PROTOINFO, /* no longer used */ 235 | syscall.RTA_FLOW: "RTA_FLOW", 236 | syscall.RTA_CACHEINFO: "RTA_CACHEINFO", 237 | //RTA_SESSION, /* no longer used */ 238 | //RTA_MP_ALGO, /* no longer used */ 239 | syscall.RTA_TABLE: "RTA_TABLE", 240 | // RTA_* below are not defined in syscall 241 | 16: "RTA_MARK", 242 | 17: "RTA_MFC_STATS", 243 | 18: "RTA_VIA", 244 | 19: "RTA_NEWDST", 245 | 20: "RTA_PREF", 246 | 21: "RTA_ENCAP_TYPE", 247 | 22: "RTA_ENCAP", 248 | 23: "RTA_EXPIRES", 249 | 24: "RTA_PAD", 250 | 25: "RTA_UID", 251 | 26: "RTA_TTL_PROPAGATE", 252 | 27: "RTA_IP_PROTO", 253 | 28: "RTA_SPORT", 254 | 29: "RTA_DPORT", 255 | 30: "RTA_NH_ID", 256 | //__RTA_MAX 257 | } 258 | 259 | var RtMsgFamilyMap = map[uint8]string{ 260 | syscall.AF_INET: "AF_INET", // 0x2 261 | syscall.AF_INET6: "AF_INET6", // 0xa 262 | syscall.AF_PACKET: "AF_PACKET", // 0x11 263 | syscall.AF_ROUTE: "AF_ROUTE", // 0x10 264 | syscall.AF_UNIX: "AF_UNIX", // 0x1 265 | syscall.AF_UNSPEC: "AF_UNSPEC", // 0x0 266 | } 267 | 268 | var RtMsgProtoMap = map[uint8]string{ 269 | syscall.RTPROT_UNSPEC: "RTPROT_UNSPEC", // 0x0 270 | syscall.RTPROT_REDIRECT: "RTPROT_REDIRECT", // 0x1 271 | syscall.RTPROT_KERNEL: "RTPROT_KERNEL", // 0x2 272 | syscall.RTPROT_BOOT: "RTPROT_BOOT", // 0x3 273 | syscall.RTPROT_STATIC: "RTPROT_STATIC", // 0x4 274 | syscall.RTPROT_GATED: "RTPROT_GATED", // 0x8 275 | syscall.RTPROT_RA: "RTPROT_RA", // 0x9 276 | syscall.RTPROT_MRT: "RTPROT_MRT", // 0xa 277 | syscall.RTPROT_ZEBRA: "RTPROT_ZEBRA", // 0xb 278 | syscall.RTPROT_BIRD: "RTPROT_BIRD", // 0xc 279 | syscall.RTPROT_DNROUTED: "RTPROT_DNROUTED", // 0xd 280 | syscall.RTPROT_XORP: "RTPROT_XORP", // 0xe 281 | syscall.RTPROT_NTK: "RTPROT_NTK", // 0xf 282 | syscall.RTPROT_DHCP: "RTPROT_DHCP", // 0x10 283 | } 284 | 285 | var RtMsgScopeMap = map[uint8]string{ 286 | syscall.RT_SCOPE_UNIVERSE: "RT_SCOPE_UNIVERSE", // 0x0 287 | syscall.RT_SCOPE_SITE: "RT_SCOPE_SITE", // 0xc8 288 | syscall.RT_SCOPE_LINK: "RT_SCOPE_LINK", // 0xfd 289 | syscall.RT_SCOPE_HOST: "RT_SCOPE_HOST", // 0xfe 290 | syscall.RT_SCOPE_NOWHERE: "RT_SCOPE_NOWHERE", // 0xff 291 | } 292 | 293 | var RtMsgTypeMap = map[uint8]string{ 294 | syscall.RTN_UNSPEC: "RTN_UNSPEC", // 0x0 295 | syscall.RTN_UNICAST: "RTN_UNICAST", // 0x1 296 | syscall.RTN_LOCAL: "RTN_LOCAL", // 0x2 297 | syscall.RTN_BROADCAST: "RTN_BROADCAST", // 0x3 298 | syscall.RTN_ANYCAST: "RTN_ANYCAST", // 0x4 299 | syscall.RTN_MULTICAST: "RTN_MULTICAST", // 0x5 300 | syscall.RTN_BLACKHOLE: "RTN_BLACKHOLE", // 0x6 301 | syscall.RTN_UNREACHABLE: "RTN_UNREACHABLE", // 0x7 302 | syscall.RTN_PROHIBIT: "RTN_PROHIBIT", // 0x8 303 | syscall.RTN_THROW: "RTN_THROW", // 0x9 304 | syscall.RTN_NAT: "RTN_NAT", // 0xa 305 | syscall.RTN_XRESOLVE: "RTN_XRESOLVE", // 0xb 306 | } 307 | 308 | type myRtMsg struct { 309 | Family uint8 310 | Dst_len uint8 311 | Src_len uint8 312 | Tos uint8 313 | Table uint8 314 | Protocol uint8 315 | Scope uint8 316 | Type uint8 317 | Flags uint32 318 | } 319 | 320 | // func getMyRtMsg(rtm syscall.RtMsg) myRtMsg { 321 | func getMyRtMsg(data []byte) myRtMsg { 322 | var r myRtMsg 323 | r.Family = data[0] 324 | r.Dst_len = data[1] 325 | r.Src_len = data[2] 326 | r.Tos = data[3] 327 | r.Table = data[4] 328 | r.Protocol = data[5] 329 | r.Scope = data[6] 330 | r.Type = data[7] 331 | r.Flags = binary.LittleEndian.Uint32(data[8:12]) 332 | 333 | return r 334 | } 335 | 336 | func (m *myRtMsg) String() string { 337 | s := "RtMsg |\n" 338 | s += fmt.Sprintf(" Family: %s (%d)\n", RtMsgFamilyMap[m.Family], m.Family) 339 | s += fmt.Sprintf(" Dst_len: %v\n", m.Dst_len) 340 | s += fmt.Sprintf(" Src_len: %v\n", m.Src_len) 341 | s += fmt.Sprintf(" Tos: %v\n", m.Tos) 342 | s += fmt.Sprintf(" Table: %v\n", m.Table) 343 | s += fmt.Sprintf(" Protocol: %s (%d)\n", RtMsgProtoMap[m.Protocol], m.Protocol) 344 | s += fmt.Sprintf(" Scope: %s (%d)\n", RtMsgScopeMap[m.Scope], m.Scope) 345 | s += fmt.Sprintf(" Type: %s (%d)\n", RtMsgTypeMap[m.Type], m.Type) 346 | s += fmt.Sprintf(" Flags: %v", m.Flags) 347 | return s 348 | } 349 | 350 | func array2int32(v []byte) int32 { 351 | var r int32 352 | r |= int32(v[0]) 353 | r |= int32(v[1]) << 8 354 | r |= int32(v[2]) << 16 355 | r |= int32(v[3]) << 24 356 | return r 357 | } 358 | -------------------------------------------------------------------------------- /examples/netlink-c/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebiken/nsdevnotes/1cd3ee56a75bfce3e826457b4c7bd69742abbc6b/examples/netlink-c/a.out -------------------------------------------------------------------------------- /examples/netlink-c/nl-subsc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | int main(int argc, char **argv) { 14 | 15 | int fd; // file descripter for netlink socket 16 | struct sockaddr_nl local; 17 | 18 | pid_t pid = getpid(); 19 | 20 | fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 21 | 22 | memset(&local, 0, sizeof(local)); /* fill-in local address information */ 23 | local.nl_family = AF_NETLINK; 24 | local.nl_pid = pid; 25 | local.nl_groups = RTMGRP_IPV4_ROUTE; 26 | 27 | if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { 28 | // cannot bind socket 29 | return -1; 30 | } 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /golang/README.md: -------------------------------------------------------------------------------- 1 | # Golang 2 | -------------------------------------------------------------------------------- /ietf/README.md: -------------------------------------------------------------------------------- 1 | # IETF 2 | 3 | > note only summary of current topics of each WG/RG/BoF on this page. 4 | 5 | - [Working Group](#working-group) 6 | - [DMM (Distributed Mobility Management)](#dmm-distributed-mobility-management) 7 | - [SPRING (Source Packet Routing in NetworkinG)](#spring-source-packet-routing-in-networking) 8 | - [Research Group and BoF](#research-group-and-bof) 9 | - [COIN (Computing in the Network)](#coin-computing-in-the-network) 10 | - [CAN (Computing-Aware Networking)](#can-computing-aware-networking) 11 | - [APN (Application-aware Networking)](#apn-application-aware-networking) 12 | 13 | ## Working Group 14 | 15 | ### DMM (Distributed Mobility Management) 16 | 17 | https://www.ietf.org/mailman/listinfo/dmm 18 | 19 | ### SPRING (Source Packet Routing in NetworkinG) 20 | 21 | https://www.ietf.org/mailman/listinfo/spring 22 | 23 | 24 | ## Research Group and BoF 25 | 26 | ### COIN (Computing in the Network) 27 | 28 | https://www.irtf.org/mailman/listinfo/coin 29 | 30 | ### CAN (Computing-Aware Networking) 31 | 32 | Discussion going on on Dynacast ML: https://www.ietf.org/mailman/listinfo/dyncast 33 | 34 | ### APN (Application-aware Networking) 35 | 36 | https://www.ietf.org/mailman/listinfo/apn 37 | -------------------------------------------------------------------------------- /linux/README.md: -------------------------------------------------------------------------------- 1 | # Linux related notes 2 | 3 | - [Linux IP Routing](iprouting.md) 4 | - [netlink / rtnetlink](netlink.md) 5 | - [netlink/rtnetlink - Next Hop Object & Next Hop Group](netlink-nexthop.md) 6 | - ebtables 7 | -------------------------------------------------------------------------------- /linux/figures/netlink-nexthop-nos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebiken/nsdevnotes/1cd3ee56a75bfce3e826457b4c7bd69742abbc6b/linux/figures/netlink-nexthop-nos.png -------------------------------------------------------------------------------- /linux/figures/netlink-nexthop-route-nexthop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebiken/nsdevnotes/1cd3ee56a75bfce3e826457b4c7bd69742abbc6b/linux/figures/netlink-nexthop-route-nexthop.png -------------------------------------------------------------------------------- /linux/figures/netlink-nexthop-route-object.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebiken/nsdevnotes/1cd3ee56a75bfce3e826457b4c7bd69742abbc6b/linux/figures/netlink-nexthop-route-object.png -------------------------------------------------------------------------------- /linux/iprouting.md: -------------------------------------------------------------------------------- 1 | # Linux IP Routing 2 | 3 | > 特に言及がない場合は Linux v6.0 をベースに解説しています 4 | 5 | Linux では IP packet を送信する際に Fib (Forwarding Information Base) と呼ばれるテーブルを参照し、宛先アドレスから送信先を検索します。 6 | FibやFibに関連するデータ構造はバージョン毎に変化を続けていますが、中でも Linux v5.3 から導入された Next Hop Object による変更は大きなものです。 7 | 本ページでは、Linux における Fib についての解説を、大きな変更点などを含めて解説します。 8 | 9 | - [TODO](#todo) 10 | - [Route Table](#route-table) 11 | - [Route Entry \& Next Hop のデータ構造](#route-entry--next-hop-のデータ構造) 12 | - [fib\_info \& Next Hop Object (Linux Source Code)](#fib_info--next-hop-object-linux-source-code) 13 | - [Next Hop Group (nh\_group)](#next-hop-group-nh_group) 14 | - [Next Hop Group の設定方法](#next-hop-group-の設定方法) 15 | - [memo: Linux Kernel Source Code snippet](#memo-linux-kernel-source-code-snippet) 16 | 17 | ## TODO 18 | 19 | - IPv6 に関しても調査。特に Linux v5.1/v5.2/v5.3 での fib6_info を中心とした変更(リファクタリング)など 20 | 21 | ## Route Table 22 | 23 | - Reference 24 | - [Guide to IP Layer Network Administration with Linux | 4.8. Routing Tables](http://linux-ip.net/html/routing-tables.html) 25 | 26 | Linux では複数の Route Table が存在し、 `cat /etc/iproute2/rt_tables` で一覧を参照可能です。 27 | 以下 ID は予約されています。 28 | 29 | - 255 local ... Kernelが管理し、通常は見えなくなっている 30 | - 254 main ... Table ID を指定せず route を追加した場合追加されるテーブル 31 | - 253 default 32 | - 0 unspec 33 | 34 | 35 | ## Route Entry & Next Hop のデータ構造 36 | 37 | Route Entry とは宛先に到達するために必要な情報を持ち、一般的に宛先(prefix/len)に対応した転送先情報 gateway (gw) と device (dev) を保持します。 38 | 39 | 実装により様々な保持の仕方(データ構造)が存在しますが、ここでは Linux ではどのようなデータ構造となっているのかを見ていきましょう。 40 | 41 | > - この場合の prefix は Longest Prefix Match に利用されマスク(len)を含みます) 42 | > - Linuxを含め、gw/dev 以外にも様々な情報を保持する実装が多いですが、ここでは省略します。 43 | 44 | 図:Route Entry の概念図 45 | ![netlink-nexthop-route-object.png](figures/netlink-nexthop-route-object.png) 46 | 47 | Linux 5.2 以前と、Linux 5.3 以降でデータ構造に変更がありました。 48 | 具体的には、以下図の右側のように、Linux 5.3 で Next Hop に関する情報が Route Entry から分離されました。 49 | 50 | これにより、以下のようなメリットが得られました。 51 | 52 | - 追加・更新に必要な時間の短縮 53 | - Next Hop が無い場合、Route追加時に以下操作が毎回必要となる 54 | - gateway address + dev が正しいかの確認(Lookup) 55 | - トンネルインターフェースの場合、状態の確認 56 | - Next Hop の比較・検索(既に存在するか?新規か?) 57 | - Next Hop Group が無い場合、Next Hop 追加・変更・削除時に、全ての route エントリの更新が必要 58 | - リソース(メモリ・SRAM/TCAM)の節約 59 | - 共通の Net Hop を持つ複数の Route Entry が Next Hop を共有可能 60 | 61 | 図:Linux の Route Entry 及び Next Hop Object の概念図 62 | ![netlink-nexthop-route-nexthop.png](figures/netlink-nexthop-route-nexthop.png) 63 | 64 | ## fib_info & Next Hop Object (Linux Source Code) 65 | 66 | Linux Kernel の Source Code ではどのように定義されているか確認しましょう。 67 | 68 | Linux ではルーティングエントリは `ip_fib.h` に定義された `fib_info` に保持されます。 69 | (IPv6の場合は `ip6_fib.h` に定義された `fib6_info`) 70 | 71 | Linux 5.2 までは `fib_info` の `fib_nh` という構造体に nexthop (dev/gw) に関する情報は保持されていますが、Linux 5.3 では `nexthop` が追加されているのが確認できます。 72 | 73 | Linux 5.3 で `nexthop` があれば `fib_nh` は不要ですが、`fib_nh` と `nexthop` の両方がある事により、Linux 5.2 までのデータ構造を想定したコードと新しいコードが並存する事が可能となっています。 74 | 75 | > linux-5.2/include/net/ip_fib.h 76 | ```c 77 | struct fib_info { 78 | ... 79 | int fib_nhs; 80 | bool fib_nh_is_v6; 81 | struct rcu_head rcu; 82 | struct fib_nh fib_nh[0]; 83 | #define fib_dev fib_nh[0].fib_nh_dev 84 | }; 85 | ``` 86 | 87 | 88 | > linux-5.3/include/net/ip_fib.h 89 | ```c 90 | struct fib_info { 91 | ... 92 | int fib_nhs; 93 | bool fib_nh_is_v6; 94 | bool nh_updated; 95 | struct nexthop *nh; 96 | struct rcu_head rcu; 97 | struct fib_nh fib_nh[0]; 98 | }; 99 | ``` 100 | 101 | `nexthop` 構造体は以下のように `nexthop` -> `nh_info` -> `fib_nh_common` 構造体で dev/gw の情報を保持しています。 102 | 103 | > linux-5.3/include/net/nexthop.h 104 | ```c 105 | struct nexthop { 106 | ... 107 | union { 108 | struct nh_info __rcu *nh_info; 109 | struct nh_group __rcu *nh_grp; 110 | }; 111 | }; 112 | 113 | struct nh_info { 114 | ... 115 | u8 family; 116 | ... 117 | union { 118 | struct fib_nh_common fib_nhc; 119 | struct fib_nh fib_nh; 120 | struct fib6_nh fib6_nh; 121 | }; 122 | }; 123 | ``` 124 | 125 | > linux-5.3/include/net/ip_fib.h 126 | ```c 127 | struct fib_nh_common { 128 | ... 129 | struct net_device *nhc_dev; 130 | int nhc_oif; 131 | unsigned char nhc_scope; 132 | u8 nhc_family; 133 | u8 nhc_gw_family; 134 | unsigned char nhc_flags; 135 | struct lwtunnel_state *nhc_lwtstate; 136 | 137 | union { 138 | __be32 ipv4; 139 | struct in6_addr ipv6; 140 | } nhc_gw; 141 | ... 142 | }; 143 | ``` 144 | 145 | ## Next Hop Group (nh_group) 146 | 147 | `nexthop` 構造体には、`nh_info` と `nh_group` が `union` で定義されていました。 148 | `nh_info` ではなく `nh_group` を用いる事により Next Hop が複数ある状態である Multi Path を定義できます。 149 | 150 | 具体的には、`nexthop` -> `nh_group` -> `nh_group_entry` -> `nexthop` -> `nh_info` と定義します。 151 | 通常 `nh_group_entry` は2つ以上となります。 152 | 153 | > linux-5.3/include/net/nexthop.h 154 | ```c 155 | struct nexthop { 156 | ... 157 | union { 158 | struct nh_info __rcu *nh_info; 159 | struct nh_group __rcu *nh_grp; 160 | }; 161 | }; 162 | ``` 163 | 164 | > linux-5.3/include/net/nexthop.h 165 | ```c 166 | struct nh_group { 167 | u16 num_nh; 168 | bool mpath; 169 | bool has_v4; 170 | struct nh_grp_entry nh_entries[0]; 171 | }; 172 | 173 | struct nh_grp_entry { 174 | struct nexthop *nh; 175 | u8 weight; 176 | atomic_t upper_bound; 177 | 178 | struct list_head nh_list; 179 | struct nexthop *nh_parent; /* nexthop of group with this entry */ 180 | }; 181 | ``` 182 | 183 | ## Next Hop Group の設定方法 184 | 185 | > iproute2 のバージョンに注意 186 | 187 | ip コマンド(iproute2)を用いる事により、nexthop group を用いた (ECMP含む) Multi Path を設定できます。 188 | 具体的には、以下手順となります。 189 | 190 | - nexthop を追加 191 | - nexthop group を追加 192 | - route を追加(nexthop group の id を Next Hop として指定) 193 | 194 | `ip nexthop list` コマンドにより、nexthop が作成されていることが確認できます。 195 | 196 | ``` 197 | > more on `man ip nexthop` 198 | > make sure your iproute2 supports nexthop 199 | $ ip -V 200 | ip utility, iproute2-ss200127 201 | 202 | $ ip ne [tab] 203 | neigh    netconf  netns    nexthop 204 | 205 | > Adds a nexthop group with id 3 using nexthops 206 | > with ids 1 and 2 at equal weight. 207 | 208 | ip nexthop add id 1 via 172.20.105.172 dev eno1 209 | ip nexthop add id 2 via 172.20.105.173 dev eno1 210 | ip nexthop add id 3 group 1/2 211 | 212 | $ ip nexthop list 213 | id 1 via 172.20.105.172 dev eno1 scope link 214 | id 2 via 172.20.105.173 dev eno1 scope link 215 | id 3 group 1/2 216 | 217 | $ ip route add 10.99.99.99/32 nhid 3 218 | 219 | $ ip route 220 | 10.99.99.99 nhid 3 221 |         nexthop via 172.20.105.172 dev eno1 weight 1 222 |         nexthop via 172.20.105.173 dev eno1 weight 1 223 | ``` 224 | 225 | なお、従来の方法である Next Hop Group を利用せず Multi Path を設定する事も可能です。 226 | (Linux v5.2 以前の設定方法) 227 | 228 | この場合、`fib_info` では `struct nexthop *nh;` ではなく `struct fib_nh fib_nh[0];` が利用されます。 229 | `ip nexthop list` コマンドにより、nexthop が作成されて**いない**ことが確認できます。 230 | 231 | ``` 232 | > If you do not use nexthop id, then it will be 233 | > configured in legacy way (non-nexthop object) 234 | 235 | $ ip route add 10.11.11.11/32 \ 236 |     nexthop via 172.20.105.174 dev eno1 \ 237 |     nexthop via 172.20.105.175 dev eno1 238 | 239 | $ ip route 240 | default via 172.20.104.1 dev eno1 proto static 241 | 10.11.11.11 242 |         nexthop via 172.20.105.174 dev eno1 weight 1 243 |         nexthop via 172.20.105.175 dev eno1 weight 1 244 | 10.99.99.99 nhid 3 245 |         nexthop via 172.20.105.172 dev eno1 weight 1 246 |         nexthop via 172.20.105.173 dev eno1 weight 1 247 | 248 | > Make sure gateways defined using nexthop are only shown 249 | 250 | $ ip nexthop list 251 | id 1 via 172.20.105.172 dev eno1 scope link 252 | id 2 via 172.20.105.173 dev eno1 scope link 253 | id 3 group 1/2 254 | ``` 255 | 256 | ## memo: Linux Kernel Source Code snippet 257 | 258 | > linux-5.2/include/net/ip_fib.h 259 | ```c 260 | struct fib_info { 261 | struct hlist_node fib_hash; 262 | struct hlist_node fib_lhash; 263 | struct net *fib_net; 264 | int fib_treeref; 265 | refcount_t fib_clntref; 266 | unsigned int fib_flags; 267 | unsigned char fib_dead; 268 | unsigned char fib_protocol; 269 | unsigned char fib_scope; 270 | unsigned char fib_type; 271 | __be32 fib_prefsrc; 272 | u32 fib_tb_id; 273 | u32 fib_priority; 274 | struct dst_metrics *fib_metrics; 275 | #define fib_mtu fib_metrics->metrics[RTAX_MTU-1] 276 | #define fib_window fib_metrics->metrics[RTAX_WINDOW-1] 277 | #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] 278 | #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] 279 | int fib_nhs; 280 | bool fib_nh_is_v6; 281 | struct rcu_head rcu; 282 | struct fib_nh fib_nh[0]; 283 | #define fib_dev fib_nh[0].fib_nh_dev 284 | }; 285 | ``` 286 | 287 | > linux-5.3/include/net/ip_fib.h 288 | ```c 289 | struct fib_info { 290 | struct hlist_node fib_hash; 291 | struct hlist_node fib_lhash; 292 | struct list_head nh_list; 293 | struct net *fib_net; 294 | int fib_treeref; 295 | refcount_t fib_clntref; 296 | unsigned int fib_flags; 297 | unsigned char fib_dead; 298 | unsigned char fib_protocol; 299 | unsigned char fib_scope; 300 | unsigned char fib_type; 301 | __be32 fib_prefsrc; 302 | u32 fib_tb_id; 303 | u32 fib_priority; 304 | struct dst_metrics *fib_metrics; 305 | #define fib_mtu fib_metrics->metrics[RTAX_MTU-1] 306 | #define fib_window fib_metrics->metrics[RTAX_WINDOW-1] 307 | #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] 308 | #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] 309 | int fib_nhs; 310 | bool fib_nh_is_v6; 311 | bool nh_updated; 312 | struct nexthop *nh; 313 | struct rcu_head rcu; 314 | struct fib_nh fib_nh[0]; 315 | }; 316 | ``` 317 | 318 | > linux-5.3/include/net/nexthop.h 319 | ```c 320 | struct nexthop { 321 | struct rb_node rb_node; /* entry on netns rbtree */ 322 | struct list_head fi_list; /* v4 entries using nh */ 323 | struct list_head f6i_list; /* v6 entries using nh */ 324 | struct list_head grp_list; /* nh group entries using this nh */ 325 | struct net *net; 326 | 327 | u32 id; 328 | 329 | u8 protocol; /* app managing this nh */ 330 | u8 nh_flags; 331 | bool is_group; 332 | 333 | refcount_t refcnt; 334 | struct rcu_head rcu; 335 | 336 | union { 337 | struct nh_info __rcu *nh_info; 338 | struct nh_group __rcu *nh_grp; 339 | }; 340 | }; 341 | 342 | struct nh_info { 343 | struct hlist_node dev_hash; /* entry on netns devhash */ 344 | struct nexthop *nh_parent; 345 | 346 | u8 family; 347 | bool reject_nh; 348 | 349 | union { 350 | struct fib_nh_common fib_nhc; 351 | struct fib_nh fib_nh; 352 | struct fib6_nh fib6_nh; 353 | }; 354 | }; 355 | ``` 356 | 357 | > linux-5.3/include/net/ip_fib.h 358 | ```c 359 | struct fib_nh_common { 360 | struct net_device *nhc_dev; 361 | int nhc_oif; 362 | unsigned char nhc_scope; 363 | u8 nhc_family; 364 | u8 nhc_gw_family; 365 | unsigned char nhc_flags; 366 | struct lwtunnel_state *nhc_lwtstate; 367 | 368 | union { 369 | __be32 ipv4; 370 | struct in6_addr ipv6; 371 | } nhc_gw; 372 | 373 | int nhc_weight; 374 | atomic_t nhc_upper_bound; 375 | 376 | /* v4 specific, but allows fib6_nh with v4 routes */ 377 | struct rtable __rcu * __percpu *nhc_pcpu_rth_output; 378 | struct rtable __rcu *nhc_rth_input; 379 | struct fnhe_hash_bucket __rcu *nhc_exceptions; 380 | }; 381 | ``` 382 | -------------------------------------------------------------------------------- /linux/linux-initial-setup.md: -------------------------------------------------------------------------------- 1 | # Linux Initial Setup 2 | 3 | > Note to remember initial setup when setting up new server (VM) 4 | 5 | ## vim 6 | 7 | Create `~/.vimrc` 8 | 9 | ``` 10 | source $VIMRUNTIME/defaults.vim 11 | 12 | " disable visual mode 13 | set mouse-=a 14 | ``` 15 | -------------------------------------------------------------------------------- /linux/logs/strace-ip-route-add-nexthop.log: -------------------------------------------------------------------------------- 1 | # strace --version 2 | strace -- version 6.0 3 | Copyright (c) 1991-2022 The strace developers . 4 | This is free software; see the source for copying conditions. There is NO 5 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 6 | 7 | Optional features enabled: stack-trace=libunwind no-m32-mpers no-mx32-mpers 8 | 9 | 10 | # strace ip nexthop add id 11 via 172.20.105.173 dev eno1 11 | execve("/usr/sbin/ip", ["ip", "nexthop", "add", "id", "11", "via", "172.20.105.173", "dev", "eno1"], 0x7ffe7f4aa510 /* 18 vars */) = 0 12 | brk(NULL) = 0x55cc7881b000 13 | arch_prctl(0x3001 /* ARCH_??? */, 0x7ffd36761a80) = -1 EINVAL (Invalid argument) 14 | access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) 15 | openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 16 | fstat(3, {st_mode=S_IFREG|0644, st_size=90170, ...}) = 0 17 | mmap(NULL, 90170, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f31b6722000 18 | close(3) = 0 19 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3 20 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2005\0\0\0\0\0\0"..., 832) = 832 21 | fstat(3, {st_mode=S_IFREG|0644, st_size=109200, ...}) = 0 22 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b6720000 23 | mmap(NULL, 110976, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b6704000 24 | mmap(0x7f31b6707000, 73728, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f31b6707000 25 | mmap(0x7f31b6719000, 20480, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7f31b6719000 26 | mmap(0x7f31b671e000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f31b671e000 27 | close(3) = 0 28 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libmnl.so.0", O_RDONLY|O_CLOEXEC) = 3 29 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\31\0\0\0\0\0\0"..., 832) = 832 30 | fstat(3, {st_mode=S_IFREG|0644, st_size=22520, ...}) = 0 31 | mmap(NULL, 2117648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b64fe000 32 | mprotect(0x7f31b6503000, 2093056, PROT_NONE) = 0 33 | mmap(0x7f31b6702000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f31b6702000 34 | close(3) = 0 35 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libbsd.so.0", O_RDONLY|O_CLOEXEC) = 3 36 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@N\0\0\0\0\0\0"..., 832) = 832 37 | fstat(3, {st_mode=S_IFREG|0644, st_size=96728, ...}) = 0 38 | mmap(NULL, 102768, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b64e4000 39 | mprotect(0x7f31b64e8000, 77824, PROT_NONE) = 0 40 | mmap(0x7f31b64e8000, 61440, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f31b64e8000 41 | mmap(0x7f31b64f7000, 12288, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f31b64f7000 42 | mmap(0x7f31b64fb000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f31b64fb000 43 | mmap(0x7f31b64fd000, 368, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f31b64fd000 44 | close(3) = 0 45 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3 46 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300#\0\0\0\0\0\0"..., 832) = 832 47 | fstat(3, {st_mode=S_IFREG|0644, st_size=31120, ...}) = 0 48 | mmap(NULL, 33112, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b64db000 49 | mprotect(0x7f31b64dd000, 20480, PROT_NONE) = 0 50 | mmap(0x7f31b64dd000, 12288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f31b64dd000 51 | mmap(0x7f31b64e0000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5000) = 0x7f31b64e0000 52 | mmap(0x7f31b64e2000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f31b64e2000 53 | close(3) = 0 54 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3 55 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \22\0\0\0\0\0\0"..., 832) = 832 56 | fstat(3, {st_mode=S_IFREG|0644, st_size=18848, ...}) = 0 57 | mmap(NULL, 20752, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b64d5000 58 | mmap(0x7f31b64d6000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7f31b64d6000 59 | mmap(0x7f31b64d8000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f31b64d8000 60 | mmap(0x7f31b64d9000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f31b64d9000 61 | close(3) = 0 62 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 63 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300A\2\0\0\0\0\0"..., 832) = 832 64 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 65 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 66 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 67 | fstat(3, {st_mode=S_IFREG|0755, st_size=2029592, ...}) = 0 68 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 69 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 70 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 71 | mmap(NULL, 2037344, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b62e3000 72 | mmap(0x7f31b6305000, 1540096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x22000) = 0x7f31b6305000 73 | mmap(0x7f31b647d000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19a000) = 0x7f31b647d000 74 | mmap(0x7f31b64cb000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f31b64cb000 75 | mmap(0x7f31b64d1000, 13920, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f31b64d1000 76 | close(3) = 0 77 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libz.so.1", O_RDONLY|O_CLOEXEC) = 3 78 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\"\0\0\0\0\0\0"..., 832) = 832 79 | fstat(3, {st_mode=S_IFREG|0644, st_size=108936, ...}) = 0 80 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b62e1000 81 | mmap(NULL, 110776, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f31b62c5000 82 | mprotect(0x7f31b62c7000, 98304, PROT_NONE) = 0 83 | mmap(0x7f31b62c7000, 69632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f31b62c7000 84 | mmap(0x7f31b62d8000, 24576, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f31b62d8000 85 | mmap(0x7f31b62df000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f31b62df000 86 | close(3) = 0 87 | mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f31b62c2000 88 | arch_prctl(ARCH_SET_FS, 0x7f31b62c2740) = 0 89 | mprotect(0x7f31b64cb000, 16384, PROT_READ) = 0 90 | mprotect(0x7f31b62df000, 4096, PROT_READ) = 0 91 | mprotect(0x7f31b64d9000, 4096, PROT_READ) = 0 92 | mprotect(0x7f31b64e2000, 4096, PROT_READ) = 0 93 | mprotect(0x7f31b64fb000, 4096, PROT_READ) = 0 94 | mprotect(0x7f31b6702000, 4096, PROT_READ) = 0 95 | mprotect(0x7f31b671e000, 4096, PROT_READ) = 0 96 | mprotect(0x55cc76884000, 8192, PROT_READ) = 0 97 | mprotect(0x7f31b6766000, 4096, PROT_READ) = 0 98 | munmap(0x7f31b6722000, 90170) = 0 99 | prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 100 | prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) 101 | prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = -1 EINVAL (Invalid argument) 102 | prctl(PR_CAPBSET_READ, CAP_BLOCK_SUSPEND) = 1 103 | prctl(PR_CAPBSET_READ, CAP_PERFMON) = -1 EINVAL (Invalid argument) 104 | prctl(PR_CAPBSET_READ, CAP_AUDIT_READ) = 1 105 | getuid() = 0 106 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3 107 | setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 108 | setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 109 | setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 110 | bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 111 | getsockname(3, {sa_family=AF_NETLINK, nl_pid=210649, nl_groups=00000000}, [12]) = 0 112 | setsockopt(3, SOL_NETLINK, NETLINK_GET_STRICT_CHK, [1], 4) = 0 113 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 4 114 | setsockopt(4, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 115 | setsockopt(4, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 116 | setsockopt(4, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 117 | bind(4, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 118 | getsockname(4, {sa_family=AF_NETLINK, nl_pid=-394067181, nl_groups=00000000}, [12]) = 0 119 | sendmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1669695871, nlmsg_pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, [[{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF|RTEXT_FILTER_SKIP_STATS], [{nla_len=9, nla_type=IFLA_IFNAME}, "eno1"]]], iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 120 | recvmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 948 121 | brk(NULL) = 0x55cc7881b000 122 | brk(0x55cc7883c000) = 0x55cc7883c000 123 | recvmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=948, nlmsg_type=RTM_NEWLINK, nlmsg_flags=0, nlmsg_seq=1669695871, nlmsg_pid=-394067181}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("eno1"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, [[{nla_len=9, nla_type=IFLA_IFNAME}, "eno1"], [{nla_len=8, nla_type=IFLA_TXQLEN}, 1000], [{nla_len=5, nla_type=IFLA_OPERSTATE}, 6], [{nla_len=5, nla_type=IFLA_LINKMODE}, 0], [{nla_len=8, nla_type=IFLA_MTU}, 1500], [{nla_len=8, nla_type=IFLA_MIN_MTU}, 68], [{nla_len=8, nla_type=IFLA_MAX_MTU}, 9710], [{nla_len=8, nla_type=IFLA_GROUP}, 0], [{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0], [{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 64], [{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535], [{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536], [{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 64], [{nla_len=5, nla_type=IFLA_CARRIER}, 1], [{nla_len=7, nla_type=IFLA_QDISC}, "mq"], [{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2], [{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0], [{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1], [{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1], [{nla_len=36, nla_type=IFLA_MAP}, {mem_start=0, mem_end=0, base_addr=0, irq=0, dma=0, port=0}], [{nla_len=10, nla_type=IFLA_ADDRESS}, e4:43:4b:c3:b4:3c], [{nla_len=10, nla_type=IFLA_BROADCAST}, ff:ff:ff:ff:ff:ff], [{nla_len=196, nla_type=IFLA_STATS64}, {rx_packets=11723425, tx_packets=1150802, rx_bytes=4016376284, tx_bytes=153303601, rx_errors=0, tx_errors=0, rx_dropped=234, tx_dropped=0, multicast=1202078, collisions=0, rx_length_errors=0, rx_over_errors=0, rx_crc_errors=0, rx_frame_errors=0, rx_fifo_errors=0, rx_missed_errors=0, tx_aborted_errors=0, tx_carrier_errors=0, tx_fifo_errors=0, tx_heartbeat_errors=0, tx_window_errors=0, rx_compressed=0, tx_compressed=0, rx_nohandler=0}], [{nla_len=100, nla_type=IFLA_STATS}, {rx_packets=11723425, tx_packets=1150802, rx_bytes=4016376284, tx_bytes=153303601, rx_errors=0, tx_errors=0, rx_dropped=234, tx_dropped=0, multicast=1202078, collisions=0, rx_length_errors=0, rx_over_errors=0, rx_crc_errors=0, rx_frame_errors=0, rx_fifo_errors=0, rx_missed_errors=0, tx_aborted_errors=0, tx_carrier_errors=0, tx_fifo_errors=0, tx_heartbeat_errors=0, tx_window_errors=0, rx_compressed=0, tx_compressed=0, rx_nohandler=0}], [{nla_len=8, nla_type=IFLA_NUM_VF}, 0], {nla_len=4, nla_type=IFLA_VFINFO_LIST}, [{nla_len=12, nla_type=IFLA_XDP}, [{nla_len=5, nla_type=IFLA_XDP_ATTACHED}, XDP_ATTACHED_NONE]], [{nla_len=380, nla_type=IFLA_AF_SPEC}, [[{nla_len=136, nla_type=AF_INET}, [{nla_len=132, nla_type=IFLA_INET_CONF}, [[IPV4_DEVCONF_FORWARDING-1] = 1, [IPV4_DEVCONF_MC_FORWARDING-1] = 0, [IPV4_DEVCONF_PROXY_ARP-1] = 0, [IPV4_DEVCONF_ACCEPT_REDIRECTS-1] = 0, [IPV4_DEVCONF_SECURE_REDIRECTS-1] = 1, [IPV4_DEVCONF_SEND_REDIRECTS-1] = 1, [IPV4_DEVCONF_SHARED_MEDIA-1] = 1, [IPV4_DEVCONF_RP_FILTER-1] = 2, [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE-1] = 1, [IPV4_DEVCONF_BOOTP_RELAY-1] = 0, [IPV4_DEVCONF_LOG_MARTIANS-1] = 0, [IPV4_DEVCONF_TAG-1] = 0, [IPV4_DEVCONF_ARPFILTER-1] = 0, [IPV4_DEVCONF_MEDIUM_ID-1] = 0, [IPV4_DEVCONF_NOXFRM-1] = 0, [IPV4_DEVCONF_NOPOLICY-1] = 0, [IPV4_DEVCONF_FORCE_IGMP_VERSION-1] = 0, [IPV4_DEVCONF_ARP_ANNOUNCE-1] = 0, [IPV4_DEVCONF_ARP_IGNORE-1] = 0, [IPV4_DEVCONF_PROMOTE_SECONDARIES-1] = 1, [IPV4_DEVCONF_ARP_ACCEPT-1] = 0, [IPV4_DEVCONF_ARP_NOTIFY-1] = 0, [IPV4_DEVCONF_ACCEPT_LOCAL-1] = 0, [IPV4_DEVCONF_SRC_VMARK-1] = 0, [IPV4_DEVCONF_PROXY_ARP_PVLAN-1] = 0, [IPV4_DEVCONF_ROUTE_LOCALNET-1] = 0, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL-1] = 10000, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL-1] = 1000, [IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN-1] = 0, [IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST-1] = 0, [IPV4_DEVCONF_DROP_GRATUITOUS_ARP-1] = 0, [IPV4_DEVCONF_BC_FORWARDING-1] = 0]]], [{nla_len=240, nla_type=AF_INET6}, [[{nla_len=8, nla_type=IFLA_INET6_FLAGS}, IF_READY], [{nla_len=20, nla_type=IFLA_INET6_CACHEINFO}, {max_reasm_len=65535, tstamp=1614, reachable_time=15096, retrans_time=1000}], [{nla_len=208, nla_type=IFLA_INET6_CONF}, [[DEVCONF_FORWARDING] = 0, [DEVCONF_HOPLIMIT] = 64, [DEVCONF_MTU6] = 1500, [DEVCONF_ACCEPT_RA] = 0, [DEVCONF_ACCEPT_REDIRECTS] = 1, [DEVCONF_AUTOCONF] = 1, [DEVCONF_DAD_TRANSMITS] = 1, [DEVCONF_RTR_SOLICITS] = -1, [DEVCONF_RTR_SOLICIT_INTERVAL] = 4000, [DEVCONF_RTR_SOLICIT_DELAY] = 1000, [DEVCONF_USE_TEMPADDR] = 0, [DEVCONF_TEMP_VALID_LFT] = 604800, [DEVCONF_TEMP_PREFERED_LFT] = 86400, [DEVCONF_REGEN_MAX_RETRY] = 3, [DEVCONF_MAX_DESYNC_FACTOR] = 600, [DEVCONF_MAX_ADDRESSES] = 16, [DEVCONF_FORCE_MLD_VERSION] = 0, [DEVCONF_ACCEPT_RA_DEFRTR] = 1, [DEVCONF_ACCEPT_RA_PINFO] = 1, [DEVCONF_ACCEPT_RA_RTR_PREF] = 1, [DEVCONF_RTR_PROBE_INTERVAL] = 60000, [DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = 0, [DEVCONF_PROXY_NDP] = 0, [DEVCONF_OPTIMISTIC_DAD] = 0, [DEVCONF_ACCEPT_SOURCE_ROUTE] = 0, [DEVCONF_MC_FORWARDING] = 0, [DEVCONF_DISABLE_IPV6] = 0, [DEVCONF_ACCEPT_DAD] = 1, [DEVCONF_FORCE_TLLAO] = 0, [DEVCONF_NDISC_NOTIFY] = 0, [DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] = 10000, [DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = 1000, ...]]]]]]]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 948 124 | close(4) = 0 125 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669695871, nlmsg_pid=0}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 11], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.173")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1")]]], iov_len=48}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48 126 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 68 127 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=68, nlmsg_type=NLMSG_ERROR, nlmsg_flags=0, nlmsg_seq=1669695871, nlmsg_pid=210649}, {error=-EEXIST, msg=[{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669695871, nlmsg_pid=0}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 11], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.173")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1")]]]}], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 68 128 | write(2, "RTNETLINK answers: File exists\n", 31RTNETLINK answers: File exists 129 | ) = 31 130 | exit_group(2) = ? 131 | +++ exited with 2 +++ 132 | 133 | 134 | # strace ip route add 10.11.12.13/32 nhid 11 135 | execve("/usr/sbin/ip", ["ip", "route", "add", "10.11.12.13/32", "nhid", "11"], 0x7ffca9584048 /* 18 vars */) = 0 136 | brk(NULL) = 0x55cda9ef2000 137 | arch_prctl(0x3001 /* ARCH_??? */, 0x7fff50c294c0) = -1 EINVAL (Invalid argument) 138 | access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) 139 | openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 140 | fstat(3, {st_mode=S_IFREG|0644, st_size=90170, ...}) = 0 141 | mmap(NULL, 90170, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7eff3c5ac000 142 | close(3) = 0 143 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3 144 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2005\0\0\0\0\0\0"..., 832) = 832 145 | fstat(3, {st_mode=S_IFREG|0644, st_size=109200, ...}) = 0 146 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7eff3c5aa000 147 | mmap(NULL, 110976, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c58e000 148 | mmap(0x7eff3c591000, 73728, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7eff3c591000 149 | mmap(0x7eff3c5a3000, 20480, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7eff3c5a3000 150 | mmap(0x7eff3c5a8000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7eff3c5a8000 151 | close(3) = 0 152 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libmnl.so.0", O_RDONLY|O_CLOEXEC) = 3 153 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\31\0\0\0\0\0\0"..., 832) = 832 154 | fstat(3, {st_mode=S_IFREG|0644, st_size=22520, ...}) = 0 155 | mmap(NULL, 2117648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c388000 156 | mprotect(0x7eff3c38d000, 2093056, PROT_NONE) = 0 157 | mmap(0x7eff3c58c000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7eff3c58c000 158 | close(3) = 0 159 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libbsd.so.0", O_RDONLY|O_CLOEXEC) = 3 160 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@N\0\0\0\0\0\0"..., 832) = 832 161 | fstat(3, {st_mode=S_IFREG|0644, st_size=96728, ...}) = 0 162 | mmap(NULL, 102768, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c36e000 163 | mprotect(0x7eff3c372000, 77824, PROT_NONE) = 0 164 | mmap(0x7eff3c372000, 61440, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7eff3c372000 165 | mmap(0x7eff3c381000, 12288, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7eff3c381000 166 | mmap(0x7eff3c385000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7eff3c385000 167 | mmap(0x7eff3c387000, 368, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7eff3c387000 168 | close(3) = 0 169 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3 170 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300#\0\0\0\0\0\0"..., 832) = 832 171 | fstat(3, {st_mode=S_IFREG|0644, st_size=31120, ...}) = 0 172 | mmap(NULL, 33112, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c365000 173 | mprotect(0x7eff3c367000, 20480, PROT_NONE) = 0 174 | mmap(0x7eff3c367000, 12288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7eff3c367000 175 | mmap(0x7eff3c36a000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5000) = 0x7eff3c36a000 176 | mmap(0x7eff3c36c000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7eff3c36c000 177 | close(3) = 0 178 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3 179 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \22\0\0\0\0\0\0"..., 832) = 832 180 | fstat(3, {st_mode=S_IFREG|0644, st_size=18848, ...}) = 0 181 | mmap(NULL, 20752, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c35f000 182 | mmap(0x7eff3c360000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7eff3c360000 183 | mmap(0x7eff3c362000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7eff3c362000 184 | mmap(0x7eff3c363000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7eff3c363000 185 | close(3) = 0 186 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 187 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300A\2\0\0\0\0\0"..., 832) = 832 188 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 189 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 190 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 191 | fstat(3, {st_mode=S_IFREG|0755, st_size=2029592, ...}) = 0 192 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 193 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 194 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 195 | mmap(NULL, 2037344, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c16d000 196 | mmap(0x7eff3c18f000, 1540096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x22000) = 0x7eff3c18f000 197 | mmap(0x7eff3c307000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19a000) = 0x7eff3c307000 198 | mmap(0x7eff3c355000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7eff3c355000 199 | mmap(0x7eff3c35b000, 13920, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7eff3c35b000 200 | close(3) = 0 201 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libz.so.1", O_RDONLY|O_CLOEXEC) = 3 202 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\"\0\0\0\0\0\0"..., 832) = 832 203 | fstat(3, {st_mode=S_IFREG|0644, st_size=108936, ...}) = 0 204 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7eff3c16b000 205 | mmap(NULL, 110776, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7eff3c14f000 206 | mprotect(0x7eff3c151000, 98304, PROT_NONE) = 0 207 | mmap(0x7eff3c151000, 69632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7eff3c151000 208 | mmap(0x7eff3c162000, 24576, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7eff3c162000 209 | mmap(0x7eff3c169000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7eff3c169000 210 | close(3) = 0 211 | mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7eff3c14c000 212 | arch_prctl(ARCH_SET_FS, 0x7eff3c14c740) = 0 213 | mprotect(0x7eff3c355000, 16384, PROT_READ) = 0 214 | mprotect(0x7eff3c169000, 4096, PROT_READ) = 0 215 | mprotect(0x7eff3c363000, 4096, PROT_READ) = 0 216 | mprotect(0x7eff3c36c000, 4096, PROT_READ) = 0 217 | mprotect(0x7eff3c385000, 4096, PROT_READ) = 0 218 | mprotect(0x7eff3c58c000, 4096, PROT_READ) = 0 219 | mprotect(0x7eff3c5a8000, 4096, PROT_READ) = 0 220 | mprotect(0x55cda98ae000, 8192, PROT_READ) = 0 221 | mprotect(0x7eff3c5f0000, 4096, PROT_READ) = 0 222 | munmap(0x7eff3c5ac000, 90170) = 0 223 | prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 224 | prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) 225 | prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = -1 EINVAL (Invalid argument) 226 | prctl(PR_CAPBSET_READ, CAP_BLOCK_SUSPEND) = 1 227 | prctl(PR_CAPBSET_READ, CAP_PERFMON) = -1 EINVAL (Invalid argument) 228 | prctl(PR_CAPBSET_READ, CAP_AUDIT_READ) = 1 229 | getuid() = 0 230 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3 231 | setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 232 | setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 233 | setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 234 | bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 235 | getsockname(3, {sa_family=AF_NETLINK, nl_pid=214121, nl_groups=00000000}, [12]) = 0 236 | setsockopt(3, SOL_NETLINK, NETLINK_GET_STRICT_CHK, [1], 4) = 0 237 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=44, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669710575, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.12.13")], [{nla_len=8, nla_type=RTA_NH_ID}, "\x0b\x00\x00\x00"]]], iov_len=44}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 44 238 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 64 239 | brk(NULL) = 0x55cda9ef2000 240 | brk(0x55cda9f13000) = 0x55cda9f13000 241 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=64, nlmsg_type=NLMSG_ERROR, nlmsg_flags=0, nlmsg_seq=1669710575, nlmsg_pid=214121}, {error=-EEXIST, msg=[{nlmsg_len=44, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669710575, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.12.13")], [{nla_len=8, nla_type=RTA_NH_ID}, "\x0b\x00\x00\x00"]]]}], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 64 242 | write(2, "RTNETLINK answers: File exists\n", 31RTNETLINK answers: File exists 243 | ) = 31 244 | exit_group(2) = ? 245 | +++ exited with 2 +++ 246 | -------------------------------------------------------------------------------- /linux/logs/strace-ip-route-add-no-nexthop-multipath.log: -------------------------------------------------------------------------------- 1 | # strace ip route add 10.11.11.11/32 \ 2 | nexthop via 172.20.105.174 dev eno1 \ 3 | nexthop via 172.20.105.175 dev eno1 4 | 5 | nsdevnotes/examples/gonlsub$ go run gonlsub.go 6 | Starting gonlsub.go 7 | ----------------------------------- 8 | NlMsghdr | Len:80, Type:RTM_NEWROUTE, Flags:600, Seq:1669864316, Pid:224911 9 | rtmsg: {2 32 0 0 254 3 0 1 0} 10 | rtmsg: RtMsg | 11 | Family: AF_INET (2) 12 | Dst_len: 32 13 | Src_len: 0 14 | Tos: 0 15 | Table: 254 16 | Protocol: RTPROT_BOOT (3) 17 | Scope: RT_SCOPE_UNIVERSE (0) 18 | Type: RTN_UNICAST (1) 19 | Flags: 0 20 | RtAttr | Len:8, Type:RTA_TABLE, Value:254 21 | RtAttr | Len:8, Type:RTA_DST, IPv4:10.11.11.11 22 | RtAttr | Len:36, Type:RTA_MULTIPATH 23 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 24 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.174 25 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 26 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.175 27 | 28 | 29 | # strace ip route add 10.11.11.11/32 nexthop via 172.20.105.174 dev eno1 nexthop via 172.20.105.175 dev en 30 | o1 31 | execve("/usr/sbin/ip", ["ip", "route", "add", "10.11.11.11/32", "nexthop", "via", "172.20.105.174", "dev", "eno1", "nexthop", "via", "172.20.105.175", "dev", "eno1"], 0x7fff8cfad838 /* 18 vars */) = 0 32 | brk(NULL) = 0x55a8b7222000 33 | arch_prctl(0x3001 /* ARCH_??? */, 0x7ffc37751b70) = -1 EINVAL (Invalid argument) 34 | access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) 35 | openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 36 | fstat(3, {st_mode=S_IFREG|0644, st_size=90170, ...}) = 0 37 | mmap(NULL, 90170, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f95f2480000 38 | close(3) = 0 39 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3 40 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2005\0\0\0\0\0\0"..., 832) = 832 41 | fstat(3, {st_mode=S_IFREG|0644, st_size=109200, ...}) = 0 42 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f95f247e000 43 | mmap(NULL, 110976, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f2462000 44 | mmap(0x7f95f2465000, 73728, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f95f2465000 45 | mmap(0x7f95f2477000, 20480, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7f95f2477000 46 | mmap(0x7f95f247c000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f95f247c000 47 | close(3) = 0 48 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libmnl.so.0", O_RDONLY|O_CLOEXEC) = 3 49 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\31\0\0\0\0\0\0"..., 832) = 832 50 | fstat(3, {st_mode=S_IFREG|0644, st_size=22520, ...}) = 0 51 | mmap(NULL, 2117648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f225c000 52 | mprotect(0x7f95f2261000, 2093056, PROT_NONE) = 0 53 | mmap(0x7f95f2460000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f95f2460000 54 | close(3) = 0 55 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libbsd.so.0", O_RDONLY|O_CLOEXEC) = 3 56 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@N\0\0\0\0\0\0"..., 832) = 832 57 | fstat(3, {st_mode=S_IFREG|0644, st_size=96728, ...}) = 0 58 | mmap(NULL, 102768, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f2242000 59 | mprotect(0x7f95f2246000, 77824, PROT_NONE) = 0 60 | mmap(0x7f95f2246000, 61440, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f95f2246000 61 | mmap(0x7f95f2255000, 12288, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f95f2255000 62 | mmap(0x7f95f2259000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f95f2259000 63 | mmap(0x7f95f225b000, 368, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f95f225b000 64 | close(3) = 0 65 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3 66 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300#\0\0\0\0\0\0"..., 832) = 832 67 | fstat(3, {st_mode=S_IFREG|0644, st_size=31120, ...}) = 0 68 | mmap(NULL, 33112, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f2239000 69 | mprotect(0x7f95f223b000, 20480, PROT_NONE) = 0 70 | mmap(0x7f95f223b000, 12288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f95f223b000 71 | mmap(0x7f95f223e000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5000) = 0x7f95f223e000 72 | mmap(0x7f95f2240000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f95f2240000 73 | close(3) = 0 74 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3 75 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \22\0\0\0\0\0\0"..., 832) = 832 76 | fstat(3, {st_mode=S_IFREG|0644, st_size=18848, ...}) = 0 77 | mmap(NULL, 20752, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f2233000 78 | mmap(0x7f95f2234000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7f95f2234000 79 | mmap(0x7f95f2236000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f95f2236000 80 | mmap(0x7f95f2237000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f95f2237000 81 | close(3) = 0 82 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 83 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300A\2\0\0\0\0\0"..., 832) = 832 84 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 85 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 86 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 87 | fstat(3, {st_mode=S_IFREG|0755, st_size=2029592, ...}) = 0 88 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 89 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 90 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 91 | mmap(NULL, 2037344, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f2041000 92 | mmap(0x7f95f2063000, 1540096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x22000) = 0x7f95f2063000 93 | mmap(0x7f95f21db000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19a000) = 0x7f95f21db000 94 | mmap(0x7f95f2229000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f95f2229000 95 | mmap(0x7f95f222f000, 13920, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f95f222f000 96 | close(3) = 0 97 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libz.so.1", O_RDONLY|O_CLOEXEC) = 3 98 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\"\0\0\0\0\0\0"..., 832) = 832 99 | fstat(3, {st_mode=S_IFREG|0644, st_size=108936, ...}) = 0 100 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f95f203f000 101 | mmap(NULL, 110776, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f95f2023000 102 | mprotect(0x7f95f2025000, 98304, PROT_NONE) = 0 103 | mmap(0x7f95f2025000, 69632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f95f2025000 104 | mmap(0x7f95f2036000, 24576, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f95f2036000 105 | mmap(0x7f95f203d000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f95f203d000 106 | close(3) = 0 107 | mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f95f2020000 108 | arch_prctl(ARCH_SET_FS, 0x7f95f2020740) = 0 109 | mprotect(0x7f95f2229000, 16384, PROT_READ) = 0 110 | mprotect(0x7f95f203d000, 4096, PROT_READ) = 0 111 | mprotect(0x7f95f2237000, 4096, PROT_READ) = 0 112 | mprotect(0x7f95f2240000, 4096, PROT_READ) = 0 113 | mprotect(0x7f95f2259000, 4096, PROT_READ) = 0 114 | mprotect(0x7f95f2460000, 4096, PROT_READ) = 0 115 | mprotect(0x7f95f247c000, 4096, PROT_READ) = 0 116 | mprotect(0x55a8b6320000, 8192, PROT_READ) = 0 117 | mprotect(0x7f95f24c4000, 4096, PROT_READ) = 0 118 | munmap(0x7f95f2480000, 90170) = 0 119 | prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 120 | prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) 121 | prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = -1 EINVAL (Invalid argument) 122 | prctl(PR_CAPBSET_READ, CAP_BLOCK_SUSPEND) = 1 123 | prctl(PR_CAPBSET_READ, CAP_PERFMON) = -1 EINVAL (Invalid argument) 124 | prctl(PR_CAPBSET_READ, CAP_AUDIT_READ) = 1 125 | getuid() = 0 126 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3 127 | setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 128 | setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 129 | setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 130 | bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 131 | getsockname(3, {sa_family=AF_NETLINK, nl_pid=224911, nl_groups=00000000}, [12]) = 0 132 | setsockopt(3, SOL_NETLINK, NETLINK_GET_STRICT_CHK, [1], 4) = 0 133 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 4 134 | setsockopt(4, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 135 | setsockopt(4, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 136 | setsockopt(4, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 137 | bind(4, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 138 | getsockname(4, {sa_family=AF_NETLINK, nl_pid=-1302585526, nl_groups=00000000}, [12]) = 0 139 | sendmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1669864316, nlmsg_pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, [[{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF|RTEXT_FILTER_SKIP_STATS], [{nla_len=9, nla_type=IFLA_IFNAME}, "eno1"]]], iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 140 | recvmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 948 141 | brk(NULL) = 0x55a8b7222000 142 | brk(0x55a8b7243000) = 0x55a8b7243000 143 | recvmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=948, nlmsg_type=RTM_NEWLINK, nlmsg_flags=0, nlmsg_seq=1669864316, nlmsg_pid=-1302585526}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("eno1"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, [[{nla_len=9, nla_type=IFLA_IFNAME}, "eno1"], [{nla_len=8, nla_type=IFLA_TXQLEN}, 1000], [{nla_len=5, nla_type=IFLA_OPERSTATE}, 6], [{nla_len=5, nla_type=IFLA_LINKMODE}, 0], [{nla_len=8, nla_type=IFLA_MTU}, 1500], [{nla_len=8, nla_type=IFLA_MIN_MTU}, 68], [{nla_len=8, nla_type=IFLA_MAX_MTU}, 9710], [{nla_len=8, nla_type=IFLA_GROUP}, 0], [{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0], [{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 64], [{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535], [{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536], [{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 64], [{nla_len=5, nla_type=IFLA_CARRIER}, 1], [{nla_len=7, nla_type=IFLA_QDISC}, "mq"], [{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2], [{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0], [{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1], [{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1], [{nla_len=36, nla_type=IFLA_MAP}, {mem_start=0, mem_end=0, base_addr=0, irq=0, dma=0, port=0}], [{nla_len=10, nla_type=IFLA_ADDRESS}, e4:43:4b:c3:b4:3c], [{nla_len=10, nla_type=IFLA_BROADCAST}, ff:ff:ff:ff:ff:ff], [{nla_len=196, nla_type=IFLA_STATS64}, {rx_packets=12664742, tx_packets=1400879, rx_bytes=4164658157, tx_bytes=231111396, rx_errors=0, tx_errors=0, rx_dropped=234, tx_dropped=0, multicast=1288752, collisions=0, rx_length_errors=0, rx_over_errors=0, rx_crc_errors=0, rx_frame_errors=0, rx_fifo_errors=0, rx_missed_errors=0, tx_aborted_errors=0, tx_carrier_errors=0, tx_fifo_errors=0, tx_heartbeat_errors=0, tx_window_errors=0, rx_compressed=0, tx_compressed=0, rx_nohandler=0}], [{nla_len=100, nla_type=IFLA_STATS}, {rx_packets=12664742, tx_packets=1400879, rx_bytes=4164658157, tx_bytes=231111396, rx_errors=0, tx_errors=0, rx_dropped=234, tx_dropped=0, multicast=1288752, collisions=0, rx_length_errors=0, rx_over_errors=0, rx_crc_errors=0, rx_frame_errors=0, rx_fifo_errors=0, rx_missed_errors=0, tx_aborted_errors=0, tx_carrier_errors=0, tx_fifo_errors=0, tx_heartbeat_errors=0, tx_window_errors=0, rx_compressed=0, tx_compressed=0, rx_nohandler=0}], [{nla_len=8, nla_type=IFLA_NUM_VF}, 0], {nla_len=4, nla_type=IFLA_VFINFO_LIST}, [{nla_len=12, nla_type=IFLA_XDP}, [{nla_len=5, nla_type=IFLA_XDP_ATTACHED}, XDP_ATTACHED_NONE]], [{nla_len=380, nla_type=IFLA_AF_SPEC}, [[{nla_len=136, nla_type=AF_INET}, [{nla_len=132, nla_type=IFLA_INET_CONF}, [[IPV4_DEVCONF_FORWARDING-1] = 1, [IPV4_DEVCONF_MC_FORWARDING-1] = 0, [IPV4_DEVCONF_PROXY_ARP-1] = 0, [IPV4_DEVCONF_ACCEPT_REDIRECTS-1] = 0, [IPV4_DEVCONF_SECURE_REDIRECTS-1] = 1, [IPV4_DEVCONF_SEND_REDIRECTS-1] = 1, [IPV4_DEVCONF_SHARED_MEDIA-1] = 1, [IPV4_DEVCONF_RP_FILTER-1] = 2, [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE-1] = 1, [IPV4_DEVCONF_BOOTP_RELAY-1] = 0, [IPV4_DEVCONF_LOG_MARTIANS-1] = 0, [IPV4_DEVCONF_TAG-1] = 0, [IPV4_DEVCONF_ARPFILTER-1] = 0, [IPV4_DEVCONF_MEDIUM_ID-1] = 0, [IPV4_DEVCONF_NOXFRM-1] = 0, [IPV4_DEVCONF_NOPOLICY-1] = 0, [IPV4_DEVCONF_FORCE_IGMP_VERSION-1] = 0, [IPV4_DEVCONF_ARP_ANNOUNCE-1] = 0, [IPV4_DEVCONF_ARP_IGNORE-1] = 0, [IPV4_DEVCONF_PROMOTE_SECONDARIES-1] = 1, [IPV4_DEVCONF_ARP_ACCEPT-1] = 0, [IPV4_DEVCONF_ARP_NOTIFY-1] = 0, [IPV4_DEVCONF_ACCEPT_LOCAL-1] = 0, [IPV4_DEVCONF_SRC_VMARK-1] = 0, [IPV4_DEVCONF_PROXY_ARP_PVLAN-1] = 0, [IPV4_DEVCONF_ROUTE_LOCALNET-1] = 0, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL-1] = 10000, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL-1] = 1000, [IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN-1] = 0, [IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST-1] = 0, [IPV4_DEVCONF_DROP_GRATUITOUS_ARP-1] = 0, [IPV4_DEVCONF_BC_FORWARDING-1] = 0]]], [{nla_len=240, nla_type=AF_INET6}, [[{nla_len=8, nla_type=IFLA_INET6_FLAGS}, IF_READY], [{nla_len=20, nla_type=IFLA_INET6_CACHEINFO}, {max_reasm_len=65535, tstamp=1614, reachable_time=18304, retrans_time=1000}], [{nla_len=208, nla_type=IFLA_INET6_CONF}, [[DEVCONF_FORWARDING] = 0, [DEVCONF_HOPLIMIT] = 64, [DEVCONF_MTU6] = 1500, [DEVCONF_ACCEPT_RA] = 0, [DEVCONF_ACCEPT_REDIRECTS] = 1, [DEVCONF_AUTOCONF] = 1, [DEVCONF_DAD_TRANSMITS] = 1, [DEVCONF_RTR_SOLICITS] = -1, [DEVCONF_RTR_SOLICIT_INTERVAL] = 4000, [DEVCONF_RTR_SOLICIT_DELAY] = 1000, [DEVCONF_USE_TEMPADDR] = 0, [DEVCONF_TEMP_VALID_LFT] = 604800, [DEVCONF_TEMP_PREFERED_LFT] = 86400, [DEVCONF_REGEN_MAX_RETRY] = 3, [DEVCONF_MAX_DESYNC_FACTOR] = 600, [DEVCONF_MAX_ADDRESSES] = 16, [DEVCONF_FORCE_MLD_VERSION] = 0, [DEVCONF_ACCEPT_RA_DEFRTR] = 1, [DEVCONF_ACCEPT_RA_PINFO] = 1, [DEVCONF_ACCEPT_RA_RTR_PREF] = 1, [DEVCONF_RTR_PROBE_INTERVAL] = 60000, [DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = 0, [DEVCONF_PROXY_NDP] = 0, [DEVCONF_OPTIMISTIC_DAD] = 0, [DEVCONF_ACCEPT_SOURCE_ROUTE] = 0, [DEVCONF_MC_FORWARDING] = 0, [DEVCONF_DISABLE_IPV6] = 0, [DEVCONF_ACCEPT_DAD] = 1, [DEVCONF_FORCE_TLLAO] = 0, [DEVCONF_NDISC_NOTIFY] = 0, [DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] = 10000, [DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = 1000, ...]]]]]]]], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 948 144 | close(4) = 0 145 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=72, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669864316, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.11")], [{nla_len=36, nla_type=RTA_MULTIPATH}, [[{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174")]], [{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.175")]]]]]], iov_len=72}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 72 146 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36 147 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=36, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED, nlmsg_seq=1669864316, nlmsg_pid=224911}, {error=0, msg={nlmsg_len=72, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669864316, nlmsg_pid=0}}], iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36 148 | exit_group(0) = ? 149 | +++ exited with 0 +++ 150 | -------------------------------------------------------------------------------- /linux/logs/strace-ip-route-add-no-nexthop.log: -------------------------------------------------------------------------------- 1 | # strace ip route add 10.11.11.99/32 via 172.20.104.1 dev eno1 2 | execve("/usr/sbin/ip", ["ip", "route", "add", "10.11.11.99/32", "via", "172.20.104.1", "dev", "eno1"], 0x7ffce6833e78 /* 18 vars */) = 0 3 | brk(NULL) = 0x56186635c000 4 | arch_prctl(0x3001 /* ARCH_??? */, 0x7ffd072ee760) = -1 EINVAL (Invalid argument) 5 | access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) 6 | openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 7 | fstat(3, {st_mode=S_IFREG|0644, st_size=90030, ...}) = 0 8 | mmap(NULL, 90030, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4d86fb8000 9 | close(3) = 0 10 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3 11 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\2005\0\0\0\0\0\0"..., 832) = 832 12 | fstat(3, {st_mode=S_IFREG|0644, st_size=109200, ...}) = 0 13 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4d86fb6000 14 | mmap(NULL, 110976, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86f9a000 15 | mmap(0x7f4d86f9d000, 73728, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f4d86f9d000 16 | mmap(0x7f4d86faf000, 20480, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15000) = 0x7f4d86faf000 17 | mmap(0x7f4d86fb4000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f4d86fb4000 18 | close(3) = 0 19 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libmnl.so.0", O_RDONLY|O_CLOEXEC) = 3 20 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@\31\0\0\0\0\0\0"..., 832) = 832 21 | fstat(3, {st_mode=S_IFREG|0644, st_size=22520, ...}) = 0 22 | mmap(NULL, 2117648, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86d94000 23 | mprotect(0x7f4d86d99000, 2093056, PROT_NONE) = 0 24 | mmap(0x7f4d86f98000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f4d86f98000 25 | close(3) = 0 26 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libbsd.so.0", O_RDONLY|O_CLOEXEC) = 3 27 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0@N\0\0\0\0\0\0"..., 832) = 832 28 | fstat(3, {st_mode=S_IFREG|0644, st_size=96728, ...}) = 0 29 | mmap(NULL, 102768, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86d7a000 30 | mprotect(0x7f4d86d7e000, 77824, PROT_NONE) = 0 31 | mmap(0x7f4d86d7e000, 61440, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f4d86d7e000 32 | mmap(0x7f4d86d8d000, 12288, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f4d86d8d000 33 | mmap(0x7f4d86d91000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7f4d86d91000 34 | mmap(0x7f4d86d93000, 368, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4d86d93000 35 | close(3) = 0 36 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3 37 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300#\0\0\0\0\0\0"..., 832) = 832 38 | fstat(3, {st_mode=S_IFREG|0644, st_size=31120, ...}) = 0 39 | mmap(NULL, 33112, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86d71000 40 | mprotect(0x7f4d86d73000, 20480, PROT_NONE) = 0 41 | mmap(0x7f4d86d73000, 12288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f4d86d73000 42 | mmap(0x7f4d86d76000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5000) = 0x7f4d86d76000 43 | mmap(0x7f4d86d78000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f4d86d78000 44 | close(3) = 0 45 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3 46 | read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \22\0\0\0\0\0\0"..., 832) = 832 47 | fstat(3, {st_mode=S_IFREG|0644, st_size=18848, ...}) = 0 48 | mmap(NULL, 20752, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86d6b000 49 | mmap(0x7f4d86d6c000, 8192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x7f4d86d6c000 50 | mmap(0x7f4d86d6e000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f4d86d6e000 51 | mmap(0x7f4d86d6f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f4d86d6f000 52 | close(3) = 0 53 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 54 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300A\2\0\0\0\0\0"..., 832) = 832 55 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 56 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 57 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 58 | fstat(3, {st_mode=S_IFREG|0755, st_size=2029592, ...}) = 0 59 | pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 60 | pread64(3, "\4\0\0\0\20\0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0", 32, 848) = 32 61 | pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\30x\346\264ur\f|Q\226\236i\253-'o"..., 68, 880) = 68 62 | mmap(NULL, 2037344, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86b79000 63 | mmap(0x7f4d86b9b000, 1540096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x22000) = 0x7f4d86b9b000 64 | mmap(0x7f4d86d13000, 319488, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19a000) = 0x7f4d86d13000 65 | mmap(0x7f4d86d61000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f4d86d61000 66 | mmap(0x7f4d86d67000, 13920, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4d86d67000 67 | close(3) = 0 68 | openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libz.so.1", O_RDONLY|O_CLOEXEC) = 3 69 | read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\"\0\0\0\0\0\0"..., 832) = 832 70 | fstat(3, {st_mode=S_IFREG|0644, st_size=108936, ...}) = 0 71 | mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4d86b77000 72 | mmap(NULL, 110776, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4d86b5b000 73 | mprotect(0x7f4d86b5d000, 98304, PROT_NONE) = 0 74 | mmap(0x7f4d86b5d000, 69632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f4d86b5d000 75 | mmap(0x7f4d86b6e000, 24576, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f4d86b6e000 76 | mmap(0x7f4d86b75000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19000) = 0x7f4d86b75000 77 | close(3) = 0 78 | mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4d86b58000 79 | arch_prctl(ARCH_SET_FS, 0x7f4d86b58740) = 0 80 | mprotect(0x7f4d86d61000, 16384, PROT_READ) = 0 81 | mprotect(0x7f4d86b75000, 4096, PROT_READ) = 0 82 | mprotect(0x7f4d86d6f000, 4096, PROT_READ) = 0 83 | mprotect(0x7f4d86d78000, 4096, PROT_READ) = 0 84 | mprotect(0x7f4d86d91000, 4096, PROT_READ) = 0 85 | mprotect(0x7f4d86f98000, 4096, PROT_READ) = 0 86 | mprotect(0x7f4d86fb4000, 4096, PROT_READ) = 0 87 | mprotect(0x561865b3c000, 8192, PROT_READ) = 0 88 | mprotect(0x7f4d86ffb000, 4096, PROT_READ) = 0 89 | munmap(0x7f4d86fb8000, 90030) = 0 90 | prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 91 | prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) 92 | prctl(PR_CAPBSET_READ, 0x28 /* CAP_??? */) = -1 EINVAL (Invalid argument) 93 | prctl(PR_CAPBSET_READ, CAP_BLOCK_SUSPEND) = 1 94 | prctl(PR_CAPBSET_READ, 0x26 /* CAP_??? */) = -1 EINVAL (Invalid argument) 95 | prctl(PR_CAPBSET_READ, CAP_AUDIT_READ) = 1 96 | getuid() = 0 97 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3 98 | setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 99 | setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 100 | setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 101 | bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 102 | getsockname(3, {sa_family=AF_NETLINK, nl_pid=175155, nl_groups=00000000}, [12]) = 0 103 | setsockopt(3, SOL_NETLINK, NETLINK_DUMP_STRICT_CHK, [1], 4) = 0 104 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 4 105 | setsockopt(4, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 106 | setsockopt(4, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 107 | setsockopt(4, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0 108 | bind(4, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 109 | getsockname(4, {sa_family=AF_NETLINK, nl_pid=-501398374, nl_groups=00000000}, [12]) = 0 110 | sendmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=52, type=RTM_GETLINK, flags=NLM_F_REQUEST, seq=1669690078, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, [{{nla_len=8, nla_type=IFLA_EXT_MASK}, 9}, {{nla_len=9, nla_type=IFLA_IFNAME}, "eno1"}]}, iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 111 | recvmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 948 112 | brk(NULL) = 0x56186635c000 113 | brk(0x56186637d000) = 0x56186637d000 114 | recvmsg(4, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=948, type=RTM_NEWLINK, flags=0, seq=1669690078, pid=3793568922}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("eno1"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, [{{nla_len=9, nla_type=IFLA_IFNAME}, "eno1"}, {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000}, {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6}, {{nla_len=5, nla_type=IFLA_LINKMODE}, 0}, {{nla_len=8, nla_type=IFLA_MTU}, 1500}, {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68}, {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9710}, {{nla_len=8, nla_type=IFLA_GROUP}, 0}, {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0}, {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 64}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536}, {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 64}, {{nla_len=5, nla_type=IFLA_CARRIER}, 1}, {{nla_len=7, nla_type=IFLA_QDISC}, "mq"}, {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2}, {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0}, {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1}, {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1}, {{nla_len=36, nla_type=IFLA_MAP}, {mem_start=0, mem_end=0, base_addr=0, irq=0, dma=0, port=0}}, {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xe4\x43\x4b\xc3\xb4\x3c"}, {{nla_len=10, nla_type=IFLA_BROADCAST}, "\xff\xff\xff\xff\xff\xff"}, {{nla_len=196, nla_type=IFLA_STATS64}, {rx_packets=11582941, tx_packets=1100555, rx_bytes=3882666456, tx_bytes=142551477, rx_errors=0, tx_errors=0, rx_dropped=234, tx_dropped=0, multicast=1199098, collisions=0, rx_length_errors=0, rx_over_errors=0, rx_crc_errors=0, rx_frame_errors=0, rx_fifo_errors=0, rx_missed_errors=0, tx_aborted_errors=0, tx_carrier_errors=0, tx_fifo_errors=0, tx_heartbeat_errors=0, tx_window_errors=0, rx_compressed=0, tx_compressed=0, rx_nohandler=0}}, {{nla_len=100, nla_type=IFLA_STATS}, {rx_packets=11582941, tx_packets=1100555, rx_bytes=3882666456, tx_bytes=142551477, rx_errors=0, tx_errors=0, rx_dropped=234, tx_dropped=0, multicast=1199098, collisions=0, rx_length_errors=0, rx_over_errors=0, rx_crc_errors=0, rx_frame_errors=0, rx_fifo_errors=0, rx_missed_errors=0, tx_aborted_errors=0, tx_carrier_errors=0, tx_fifo_errors=0, tx_heartbeat_errors=0, tx_window_errors=0, rx_compressed=0, tx_compressed=0, rx_nohandler=0}}, {{nla_len=8, nla_type=IFLA_NUM_VF}, 0}, {nla_len=4, nla_type=IFLA_VFINFO_LIST}, {{nla_len=12, nla_type=IFLA_XDP}, {{nla_len=5, nla_type=IFLA_XDP_ATTACHED}, XDP_ATTACHED_NONE}}, {{nla_len=380, nla_type=IFLA_AF_SPEC}, [{{nla_len=136, nla_type=AF_INET}, {{nla_len=132, nla_type=IFLA_INET_CONF}, [[IPV4_DEVCONF_FORWARDING-1] = 1, [IPV4_DEVCONF_MC_FORWARDING-1] = 0, [IPV4_DEVCONF_PROXY_ARP-1] = 0, [IPV4_DEVCONF_ACCEPT_REDIRECTS-1] = 0, [IPV4_DEVCONF_SECURE_REDIRECTS-1] = 1, [IPV4_DEVCONF_SEND_REDIRECTS-1] = 1, [IPV4_DEVCONF_SHARED_MEDIA-1] = 1, [IPV4_DEVCONF_RP_FILTER-1] = 2, [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE-1] = 1, [IPV4_DEVCONF_BOOTP_RELAY-1] = 0, [IPV4_DEVCONF_LOG_MARTIANS-1] = 0, [IPV4_DEVCONF_TAG-1] = 0, [IPV4_DEVCONF_ARPFILTER-1] = 0, [IPV4_DEVCONF_MEDIUM_ID-1] = 0, [IPV4_DEVCONF_NOXFRM-1] = 0, [IPV4_DEVCONF_NOPOLICY-1] = 0, [IPV4_DEVCONF_FORCE_IGMP_VERSION-1] = 0, [IPV4_DEVCONF_ARP_ANNOUNCE-1] = 0, [IPV4_DEVCONF_ARP_IGNORE-1] = 0, [IPV4_DEVCONF_PROMOTE_SECONDARIES-1] = 1, [IPV4_DEVCONF_ARP_ACCEPT-1] = 0, [IPV4_DEVCONF_ARP_NOTIFY-1] = 0, [IPV4_DEVCONF_ACCEPT_LOCAL-1] = 0, [IPV4_DEVCONF_SRC_VMARK-1] = 0, [IPV4_DEVCONF_PROXY_ARP_PVLAN-1] = 0, [IPV4_DEVCONF_ROUTE_LOCALNET-1] = 0, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL-1] = 10000, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL-1] = 1000, [IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN-1] = 0, [IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST-1] = 0, [IPV4_DEVCONF_DROP_GRATUITOUS_ARP-1] = 0, [IPV4_DEVCONF_BC_FORWARDING-1] = 0]}}, {{nla_len=240, nla_type=AF_INET6}, [{{nla_len=8, nla_type=IFLA_INET6_FLAGS}, IF_READY}, {{nla_len=20, nla_type=IFLA_INET6_CACHEINFO}, {max_reasm_len=65535, tstamp=1614, reachable_time=44200, retrans_time=1000}}, {{nla_len=208, nla_type=IFLA_INET6_CONF}, [[DEVCONF_FORWARDING] = 0, [DEVCONF_HOPLIMIT] = 64, [DEVCONF_MTU6] = 1500, [DEVCONF_ACCEPT_RA] = 0, [DEVCONF_ACCEPT_REDIRECTS] = 1, [DEVCONF_AUTOCONF] = 1, [DEVCONF_DAD_TRANSMITS] = 1, [DEVCONF_RTR_SOLICITS] = -1, [DEVCONF_RTR_SOLICIT_INTERVAL] = 4000, [DEVCONF_RTR_SOLICIT_DELAY] = 1000, [DEVCONF_USE_TEMPADDR] = 0, [DEVCONF_TEMP_VALID_LFT] = 604800, [DEVCONF_TEMP_PREFERED_LFT] = 86400, [DEVCONF_REGEN_MAX_RETRY] = 3, [DEVCONF_MAX_DESYNC_FACTOR] = 600, [DEVCONF_MAX_ADDRESSES] = 16, [DEVCONF_FORCE_MLD_VERSION] = 0, [DEVCONF_ACCEPT_RA_DEFRTR] = 1, [DEVCONF_ACCEPT_RA_PINFO] = 1, [DEVCONF_ACCEPT_RA_RTR_PREF] = 1, [DEVCONF_RTR_PROBE_INTERVAL] = 60000, [DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = 0, [DEVCONF_PROXY_NDP] = 0, [DEVCONF_OPTIMISTIC_DAD] = 0, [DEVCONF_ACCEPT_SOURCE_ROUTE] = 0, [DEVCONF_MC_FORWARDING] = 0, [DEVCONF_DISABLE_IPV6] = 0, [DEVCONF_ACCEPT_DAD] = 1, [DEVCONF_FORCE_TLLAO] = 0, [DEVCONF_NDISC_NOTIFY] = 0, [DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] = 10000, [DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] = 1000, ...]}]}]}]}, iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 948 115 | close(4) = 0 116 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=52, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669690078, pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [{{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.99")}, {{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.104.1")}, {{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("eno1")}]}, iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 117 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36 118 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=36, type=NLMSG_ERROR, flags=NLM_F_CAPPED, seq=1669690078, pid=175155}, {error=0, msg={len=52, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669690078, pid=0}}}, iov_len=32768}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36 119 | exit_group(0) = ? 120 | +++ exited with 0 +++ -------------------------------------------------------------------------------- /linux/netlink-nexthop.md: -------------------------------------------------------------------------------- 1 | # netlink/rtnetlink - Next Hop Object & Next Hop Group 2 | 3 | > 特に言及がない場合は Linux v6.0 をベースに解説しています 4 | 5 | 本ページでは ip route を設定する際の netlink (rtnetlink) の動作を、 Linux v5.3 で導入された Next Hop Object を利用しない従来の方法と、利用した場合を比較を中心に解説しています。 6 | 7 | - netlink に関する基本的な説明は [Linux Netlink](./netlink.md) を参照 8 | - Linux における IP Routing (Fib や nexthop を含む)に関しては [Linux IP Routing](./iprouting.md) を参照 9 | 10 | 目次 11 | 12 | - [strace と RTM\_NEWNEXTHOP](#strace-と-rtm_newnexthop) 13 | - [ip route 追加時の netlink/rtnetlink の具体例](#ip-route-追加時の-netlinkrtnetlink-の具体例) 14 | - [nexthop 利用無し(従来)](#nexthop-利用無し従来) 15 | - [nexthop 利用無し(従来) Multipath](#nexthop-利用無し従来-multipath) 16 | - [nexthop を利用](#nexthop-を利用) 17 | - [nexthop group を利用 Multipath](#nexthop-group-を利用-multipath) 18 | - [route add ipv6](#route-add-ipv6) 19 | - [reference](#reference) 20 | 21 | ## strace と RTM_NEWNEXTHOP 22 | 23 | strace コマンドと Kernel 間でやり取りされる netlink message をモニタ可能な便利なツールです。 24 | 25 | 以降の解説では、ip コマンドの前に `strace` を付けて実行しています。 26 | 27 | Next Hop Object に関するメッセージである `RTM_NEWNEXTHOP` には [strace v5.15 (2021-10-14) から対応](https://fossies.org/linux/strace/ChangeLog) していますので、もしそれ以前のバージョンの場合は以下のように v5.15 以上にアップデートが必要です。 28 | 29 | yum/apt コマンドによるアップデートができればベストですが、もし yum/apt で v5.15 以降にアップデートされない場合は以下のように Source Code からのビルド&インストールが必要になります。 30 | 31 | Ubuntu 20.04.4 で strace v6.0 をビルド&インストールした際の手順は以下の通りです。 32 | 33 | ``` 34 | > https://github.com/strace/strace/releases/tag/v6.0 35 | > download strace-6.0.tar.xz 36 | 37 | $ tar xf strace-6.0.tar.xz 38 | $ cd strace-6.0 39 | $ ./configure --disable-mpers 40 | $ make 41 | $ sudo make install 42 | 43 | $ which strace 44 | /usr/local/bin/strace 45 | 46 | $ strace --version 47 | strace -- version 6.0 48 | Copyright (c) 1991-2022 The strace developers . 49 | This is free software; see the source for copying conditions. There is NO 50 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 51 | 52 | Optional features enabled: stack-trace=libunwind no-m32-mpers no-mx32-mpers 53 | ``` 54 | 55 | ## ip route 追加時の netlink/rtnetlink の具体例 56 | 57 | ip route を設定する際の netlink/rtnetlink の動作を確認しましょう。 58 | 59 | 具体例として、以下3パターンを比較します。 60 | "Multipath" は2つ(以上)の nexthop が存在する route を意味します。 61 | 62 | - nexthop 利用無し(従来) 63 | - `ip route add 10.11.11.99/32 via 172.20.104.1 dev eno1` 64 | - nexthop 利用無し Multipath(従来) 65 | - `ip route add 10.11.11.11/32 nexthop via 172.20.105.174 dev eno1 nexthop via 172.20.105.175 dev eno1` 66 | - nexthop を利用 67 | - `ip nexthop add id 11 via 172.20.105.173 dev eno1` 68 | - `ip route add 10.11.12.13/32 nhid 11` 69 | - nexthop group を利用 Multipath 70 | - `ip nexthop add id 1 via 172.20.105.172 dev eno1` 71 | - `ip nexthop add id 2 via 172.20.105.173 dev eno1` 72 | - `ip nexthop add id 3 group 1/2` 73 | - `ip route add 10.11.12.13/32 nhid 3` 74 | 75 | それぞれの解説では設定のための rtnetlink message である sendmsg だけを抜粋しています。 76 | デバイス名の解決など、その前後でもメッセージがやりとりされる場合がありますので、詳細は以下 strace のログを参照してください。(ご自身の環境で strace コマンドを入力してみる事をお勧めします) 77 | 78 | - strace logs 79 | - [nexthop 利用無し(従来)](logs/strace-ip-route-add-no-nexthop.log) 80 | - [nexthop 利用無し(従来)Multipath](logs/strace-ip-route-add-no-nexthop-multipath.log) 81 | - [nexthop を利用](logs/strace-ip-route-add-nexthop.log) 82 | - [nexthop group を利用 Multipath](logs/strace-ip-route-add-nexthop-group.log) 83 | 84 | 共通の解説 85 | 86 | - `RTA_OIF` の値である `if_nametoindex()` は、デバイス名から dev index を求める関数です。 87 | - 実際の netlink message ではデバイス名(e.g. `eno1`)ではなく、数字(ID)が送信される事に留意してください。 88 | - `rtmsg rtm_table` や `RTA_TABLE` で利用される route table の一覧は `cat /etc/iproute2/rt_tables` から取得可能 89 | - `RT_TABLE_MAIN` (Table ID: 254) は Table ID を指定せず route を追加した場合追加されるテーブル 90 | - 宛先アドレス(PREFIX)について 91 | - route entry の宛先アドレス情報は `RTA_DST` に保持されますが、PREFIX Length は `rtm_dst_len` に保持されます。 92 | - 宛先 prefix/length に関する情報が RT Message と Attribute の異なる場所に格納されるので注意が必要です。 93 | 94 | ### nexthop 利用無し(従来) 95 | 96 | nexthop object を用いない従来の方法では、 `RTM_NEWROUTE` に nexthop に関する情報が含まれます。 97 | 98 | ``` 99 | # ip route add 10.11.11.99/32 via 172.20.104.1 dev eno1 100 | 101 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=52, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669690078, pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [{{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.99")}, {{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.104.1")}, {{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("eno1")}]}, iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 102 | 103 | Netlink Message Type: RTM_NEWROUTE 104 | RT Message: 105 | rtm_family=AF_INET 106 | rtm_dst_len=32 107 | rtm_src_len=0 108 | rtm_tos=0 109 | rtm_table=RT_TABLE_MAIN 110 | rtm_protocol=RTPROT_BOOT 111 | rtm_scope=RT_SCOPE_UNIVERSE 112 | rtm_type=RTN_UNICAST 113 | rtm_flags=0 114 | Netlink Attribute: 115 | {nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.99") 116 | {nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.104.1") 117 | {nla_len=8, nla_type=RTA_OIF}, if_nametoindex("eno1") 118 | ``` 119 | 120 | ### nexthop 利用無し(従来) Multipath 121 | 122 | nexthop object を用いない従来の方法でも、複数の nexthop 設定(Multipath)は可能です。 123 | `RTA_MULTIPATH` の値として、 `rtnexthop` 構造体を nexthop の数だけ利用し、 `rtnexthop` の中の `rtnh_ifindex` に Output Interface ID(`RTA_OIF` 相当)を、`rtnexthop` の値に `RTA_GATEWAY` として gateway アドレスをセットします。 124 | 125 | 126 | ```c 127 | // include/uapi/linux/rtnetlink.h 128 | struct rtnexthop { 129 | unsigned short rtnh_len; 130 | unsigned char rtnh_flags; 131 | unsigned char rtnh_hops; 132 | int rtnh_ifindex; 133 | }; 134 | ``` 135 | 136 | ``` 137 | # ip route add 10.11.11.11/32 nexthop via 172.20.105.174 dev eno1 nexthop via 172.20.105.175 dev eno1 138 | 139 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=72, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669864316, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.11")], [{nla_len=36, nla_type=RTA_MULTIPATH}, [[{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174")]], [{rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")}, [{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.175")]]]]]], iov_len=72}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 72 140 | 141 | Netlink Message Type: RTM_NEWROUTE 142 | RT Message: 143 | rtm_family=AF_INET 144 | rtm_dst_len=32 145 | rtm_src_len=0 146 | rtm_tos=0 147 | rtm_table=RT_TABLE_MAIN 148 | rtm_protocol=RTPROT_BOOT 149 | rtm_scope=RT_SCOPE_UNIVERSE 150 | rtm_type=RTN_UNICAST 151 | rtm_flags=0 152 | Netlink Attribute: 153 | {nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.11") 154 | {nla_len=36, nla_type=RTA_MULTIPATH} 155 | {rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")} 156 | {nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174") 157 | {rtnh_len=16, rtnh_flags=0, rtnh_hops=0, rtnh_ifindex=if_nametoindex("eno1")} 158 | {nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.175") 159 | ``` 160 | 161 | 162 | ### nexthop を利用 163 | 164 | Next Hop Object を利用する場合は、まず `RTM_NEWNEXTHOP` メッセージを送信して nexthop を作成し、そのIDを(`RTA_GATEWAY` や `RTA_OIF` の代わりに) `RTM_NEWROUTE` の Attribute である `RTA_NH_ID` に指定して送信します。 165 | 166 | 167 | ``` 168 | > ip nexthop add id 11 via 172.20.105.173 dev eno1 169 | 170 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669695871, nlmsg_pid=0}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 11], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.173")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1")]]], iov_len=48}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48 171 | 172 | Netlink Message Type: RTM_NEWNEXTHOP 173 | Next Hop Message: 174 | nh_family=AF_INET, 175 | nh_scope=RT_SCOPE_UNIVERSE, 176 | nh_protocol=RTPROT_UNSPEC, 177 | nh_flags=0 178 | Netlink Attribute: 179 | {nla_len=8, nla_type=NHA_ID}, 11 180 | {nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.173") 181 | {nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1") 182 | ``` 183 | 184 | ``` 185 | > ip route add 10.11.12.13/32 nhid 11 186 | 187 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=44, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669710575, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.12.13")], [{nla_len=8, nla_type=RTA_NH_ID}, "\x0b\x00\x00\x00"]]], iov_len=44}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 44 188 | 189 | Netlink Message Type: RTM_NEWROUTE 190 | Next Hop Message: 191 | rtm_family=AF_INET 192 | rtm_dst_len=32 193 | rtm_src_len=0 194 | rtm_tos=0 195 | rtm_table=RT_TABLE_MAIN 196 | rtm_protocol=RTPROT_BOOT 197 | rtm_scope=RT_SCOPE_UNIVERSE 198 | rtm_type=RTN_UNICAST 199 | rtm_flags=0 200 | Netlink Attribute: 201 | {nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.12.13") 202 | {nla_len=8, nla_type=RTA_NH_ID}, "\x0b\x00\x00\x00" 203 | ``` 204 | 205 | ### nexthop group を利用 Multipath 206 | 207 | 208 | Next Hop Object を利用して Multipath を設置する場合は、以下3ステップを辿ります。 209 | 210 | 1. `RTM_NEWNEXTHOP` メッセージを送信し nexthop を作成(2個以上) 211 | 2. `RTM_NEWNEXTHOP` メッセージを送信し `NHA_GROUP` に "1." で作成した nexthop の ID を指定し nexthop group を作成 212 | 3. nexthop group の ID を `RTA_NH_ID` に指定して送信 213 | 214 | 215 | ``` 216 | > ip nexthop add id 1 via 172.20.105.172 dev eno1 217 | > ip nexthop add id 2 via 172.20.105.173 dev eno1 218 | 219 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669711458, nlmsg_pid=0}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 1], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.172")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1")]]], iov_len=48}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48 220 | 221 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=48, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669711492, nlmsg_pid=0}, {nh_family=AF_INET, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 2], [{nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.173")], [{nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1")]]], iov_len=48}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48 222 | 223 | Netlink Message Type: RTM_NEWNEXTHOP 224 | Next Hop Message: 225 | nh_family=AF_INET, 226 | nh_scope=RT_SCOPE_UNIVERSE, 227 | nh_protocol=RTPROT_UNSPEC, 228 | nh_flags=0 229 | Netlink Attribute: 230 | {nla_len=8, nla_type=NHA_ID}, 1 231 | {nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.172") 232 | {nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1") 233 | 234 | Netlink Message Type: RTM_NEWNEXTHOP 235 | Next Hop Message: 236 | nh_family=AF_INET, 237 | nh_scope=RT_SCOPE_UNIVERSE, 238 | nh_protocol=RTPROT_UNSPEC, 239 | nh_flags=0 240 | Netlink Attribute: 241 | {nla_len=8, nla_type=NHA_ID}, 2 242 | {nla_len=8, nla_type=NHA_GATEWAY}, inet_addr("172.20.105.173") 243 | {nla_len=8, nla_type=NHA_OIF}, if_nametoindex("eno1") 244 | 245 | > ip nexthop add id 3 group 1/2 246 | 247 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=52, nlmsg_type=RTM_NEWNEXTHOP, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669711532, nlmsg_pid=0}, {nh_family=AF_UNSPEC, nh_scope=RT_SCOPE_UNIVERSE, nh_protocol=RTPROT_UNSPEC, nh_flags=0}, [[{nla_len=8, nla_type=NHA_ID}, 3], [{nla_len=20, nla_type=NHA_GROUP}, [{id=1, weight=0}, {id=2, weight=0}]]]], iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 248 | 249 | Netlink Message Type: RTM_NEWNEXTHOP 250 | Next Hop Message: 251 | nh_family=AF_UNSPEC, 252 | nh_scope=RT_SCOPE_UNIVERSE, 253 | nh_protocol=RTPROT_UNSPEC, 254 | nh_flags=0 255 | Netlink Attribute: 256 | {nla_len=8, nla_type=NHA_ID}, 3 257 | {nla_len=20, nla_type=NHA_GROUP}, [ {id=1, weight=0}, {id=2, weight=0} ] 258 | 259 | 260 | > ip route add 10.11.12.13/32 nhid 3 261 | 262 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{nlmsg_len=44, nlmsg_type=RTM_NEWROUTE, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1669711569, nlmsg_pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [[{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.12.13")], [{nla_len=8, nla_type=RTA_NH_ID}, "\x03\x00\x00\x00"]]], iov_len=44}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 44 263 | 264 | Netlink Message Type: RTM_NEWROUTE 265 | RT Message: 266 | rtm_family=AF_INET 267 | rtm_dst_len=32 268 | rtm_src_len=0 269 | rtm_tos=0 270 | rtm_table=RT_TABLE_MAIN 271 | rtm_protocol=RTPROT_BOOT 272 | rtm_scope=RT_SCOPE_UNIVERSE 273 | rtm_type=RTN_UNICAST 274 | rtm_flags=0 275 | Netlink Attribute: 276 | {nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.12.13") 277 | {nla_len=8, nla_type=RTA_NH_ID}, "\x03\x00\x00\x00" 278 | ``` 279 | 280 | ## route add ipv6 281 | 282 | IPv6 を設定する際も同様となります。 283 | なお、 `rtm_family=AF_INET6` の場合は `RTA_DST` の長さが `AF_INET` の場合と異なる事に注意してください。 284 | 285 | ``` 286 | > ip route add 2001:db8:ffff::/64 dev veth103 287 | 288 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=56, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669814439, pid=0}, {rtm_family=AF_INET6, rtm_dst_len=64, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [{{nla_len=20, nla_type=RTA_DST}, 2001:db8:ffff::}, {{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("veth103")}]}, iov_len=56}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 56 289 | 290 | Netlink Message Type: RTM_NEWROUTE 291 | RT Message: 292 | rtm_family=AF_INET6 293 | rtm_dst_len=64 294 | rtm_src_len=0 295 | rtm_tos=0 296 | rtm_table=RT_TABLE_MAIN 297 | rtm_protocol=RTPROT_BOOT 298 | rtm_scope=RT_SCOPE_UNIVERSE 299 | rtm_type=RTN_UNICAST 300 | rtm_flags=0 301 | Netlink Attribute: 302 | {nla_len=20, nla_type=RTA_DST}, 2001:db8:ffff:: 303 | {nla_len=8, nla_type=RTA_OIF}, if_nametoindex("veth103") 304 | ``` 305 | 306 | 307 | ## reference 308 | 309 | - https://wiki.slank.dev/book/types.html 310 | - NHA_* (Next Hop Attribute) 含む netlink attribute が1ページにまとまってる 311 | - 2022-11-23 [Netlinkと友達になろう](https://eniyo0.hatenablog.com/entry/2022/11/23/180135) 312 | - 日本語の平易な解説BLOG 313 | - 314 | -------------------------------------------------------------------------------- /linux/netlink.md: -------------------------------------------------------------------------------- 1 | # Linux Netlink 2 | 3 | > 特に言及がない場合は Linux v6.0 をベースに解説しています 4 | 5 | Linux の netlink について解説します。 6 | netlink を利用した ip route 設定の具体例は、別ページ [netlink/rtnetlink - Next Hop Object & Next Hop Group](./netlink-nexthop.md) にて解説しています。 7 | 8 | - [netlink とは?](#netlink-とは) 9 | - [netlink/rtnetlink を学習するモチベーション](#netlinkrtnetlink-を学習するモチベーション) 10 | - [netlink message のフォーマット](#netlink-message-のフォーマット) 11 | - [Routing message attributes `RTA_*`](#routing-message-attributes-rta_) 12 | - [Next Hop Attribute `NHA_*`](#next-hop-attribute-nha_) 13 | - [iproute2](#iproute2) 14 | - [References](#references) 15 | 16 | 17 | ## netlink とは? 18 | 19 | netlink とは、 Linux Kernel と情報のやり取りをするために利用されるインタフェース(API)、もしくはそれを提供するサブシステムです。 20 | netlink は Socket を利用するため、TCP/UDP等の Socket プログラミングに馴染みある技術者であれば学習コストが少ないというメリットもあります。 21 | 22 | netlink はプロトコル `protocol` と呼ばれる機能毎にグルーピングされます。 23 | 今回解説する IP Routing / Neighbor に関連した機能は `rtnetlink` と呼ばれ `protocol == NETLINK_ROUTE` という分類に所属します。 24 | 25 | netlink を利用するためには、socket システムコール `int socket(int domain, int type, int protocol)` の第一引数に `AF_NETLINK` 、第二引数に `SOCK_RAW`、第三引数に `protocol` を指定します。 26 | 27 | ``` 28 | fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 29 | ``` 30 | 31 | ## netlink/rtnetlink を学習するモチベーション 32 | 33 | White Box Switch 等の上で動作する Network OS は、 netlinkを利用して様々な情報をホストOSや routing application (OSPF/ISIS/BGPの機能を提供するプロセスやコンテナ) から情報を受け取る場合も多く、Linux Server だけでなくスイッチやルータの実装を知るためにも重要な機能です。 34 | 例えば SONiC では、netlink を利用して Linux Kernel からポートやネイバーの情報を取得する他にも、FRR が動作する bgp container 内では FPM (Forwarding Plane Manager) との通信にも利用されています。 35 | 36 | そのため、ネットワークに関連した幅広い技術者にとって学ぶべき基本的な技術と考えられます。 37 | 38 | 39 | 図:ネットリンクを活用している NOS の例 40 | (引用:Linux Plumbers Conf 2019: David Ahern: [nexthop-objects-talk.pdf](https://lpc.events/event/4/contributions/434/attachments/251/436/nexthop-objects-talk.pdf)) 41 | 42 | ![netlink-nexthop-nos.png](figures/netlink-nexthop-nos.png) 43 | 44 | 45 | ## netlink message のフォーマット 46 | 47 | netlink では `nlmsg_type` (netlink message type) でメッセージの種類を指定し、その後にメッセージタイプに応じたメッセージ(`rtmsg` `nhmsg` 等)、Netlink Attribute と続きます。 48 | 49 | 図:netlink message 概要 TODO 50 | 51 | 例えば、ip route の設定(追加)では `nlmsg_type` として `RTM_NEWROUTE` や `RTM_NEWNEXTHOP` を利用します。 52 | それぞれのメッセージは以下 `struct rtmsg` `struct nhmsg` 構造体で規定されたフォーマットを取り、それに続く Attribute として Routing Message Attributes `RTA_*` や Next Hop Attributes `NHA_*` を利用します。 53 | 54 | | nlmsg_type | message 構造体 | netlink attr | 55 | | -------------- | -------------- | ------------ | 56 | | RTM_NEWROUTE | struct rtmsg | RTA_* | 57 | | RTM_NEWNEXTHOP | struct nhmsg | NHA_* | 58 | 59 | 60 | ```c 61 | // include/uapi/linux/rtnetlink.h 62 | struct rtmsg { 63 | unsigned char rtm_family; 64 | unsigned char rtm_dst_len; 65 | unsigned char rtm_src_len; 66 | unsigned char rtm_tos; 67 | 68 | unsigned char rtm_table; /* Routing table id */ 69 | unsigned char rtm_protocol; /* Routing protocol; see below */ 70 | unsigned char rtm_scope; /* See below */ 71 | unsigned char rtm_type; /* See below */ 72 | 73 | unsigned rtm_flags; 74 | }; 75 | ``` 76 | 77 | ```c 78 | // include/uapi/linux/nexthop.h 79 | struct nhmsg { 80 | unsigned char nh_family; 81 | unsigned char nh_scope; /* return only */ 82 | unsigned char nh_protocol; /* Routing protocol that installed nh */ 83 | unsigned char resvd; 84 | unsigned int nh_flags; /* RTNH_F flags */ 85 | }; 86 | ``` 87 | 88 | ### Routing message attributes `RTA_*` 89 | 90 | Netlink Message `RTM_NEWROUTE` 等に含まれる Routing Message Attributes `RTA_*` の主なものについて、その意味を以下に記載します。 91 | 92 | 93 | | RTA_* | Value Type | 値 | 説明 | 94 | | :------------: | :-----------: | :--------------: | :-------------------------------------------------- | 95 | | RTA_DST | | | 宛先アドレス。 rtm_family に応じて Type が変わる | 96 | | RTA_OIF | NLA_U32 | ID (device) | 送信先インターフェース | 97 | | RTA_GATEWAY | NLA_U32, etc. | | Gateway のアドレス:IPv4(NLA_32), IPv6(???) | 98 | | RTA_MULTIPATH | NLA_NESTED | rtnexthop の配列 | (rtnexthop + RTA_GATEWAY) の配列 | 99 | | RTA_TABLE | NLA_U32 | ID (table) | route が属するテーブル | 100 | | RTA_ENCAP_TYPE | NLA_U16 | | lwt encap type | 101 | | RTA_ENCAP | NLA_NESTED | | lwt encap data | 102 | | RTA_NH_ID | NLA_U32 | ID (nexthop) | RTM_NEWNEXTHOP 等で作成された Next Hop Object の ID | 103 | 104 | - `RTA_*` の完全な一覧は `enum rtattr_type_t` の定義を参照 105 | - https://elixir.bootlin.com/linux/v6.0/source/include/uapi/linux/rtnetlink.h#L360 106 | - `RTA_*` は `rtm_family` などのコンテキスト毎に Type が変化するものが存在 107 | - `NHA_*` と異なり、`nla_policy` で Type 等が規定されていないものが見受けられるため、どのような型を取るかはコンテキスト毎に要確認 108 | - `rtmsg rtm_table` や `RTA_TABLE` で利用される route table の一覧は `cat /etc/iproute2/rt_tables` から取得可能 109 | 110 | RTA_MULTIPATH に含まれる rtnexthop 構造体。 111 | この Attribute として RTA_GATEWAY が続く。 112 | 113 | ```c 114 | // include/uapi/linux/rtnetlink.h 115 | struct rtnexthop { 116 | unsigned short rtnh_len; 117 | unsigned char rtnh_flags; 118 | unsigned char rtnh_hops; 119 | int rtnh_ifindex; 120 | }; 121 | ``` 122 | 123 | 参考:RTA_MULTIPATH の例。RTA_NH_ID が 124 | 125 | ``` 126 | NlMsghdr | Len:88, Type:RTM_NEWROUTE, Flags:600, Seq:1669611617, Pid:167576 127 | rtmsg: {2 32 0 0 254 3 0 1 0} 128 | rtmsg: RtMsg | 129 | Family: AF_INET (2) 130 | Dst_len: 32 131 | Src_len: 0 132 | Tos: 0 133 | Table: 254 134 | Protocol: RTPROT_BOOT (3) 135 | Scope: RT_SCOPE_UNIVERSE (0) 136 | Type: RTN_UNICAST (1) 137 | Flags: 0 138 | RtAttr | Len:8, Type:RTA_TABLE, Value:254 139 | RtAttr | Len:8, Type:RTA_DST, IPv4:10.11.12.13 140 | RtAttr | Len:8, Type:RTA_NH_ID, Value:3 141 | RtAttr | Len:36, Type:RTA_MULTIPATH 142 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 143 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.172 144 | | rtnexthop: Len:16, Flags:0, Hops:0, Ifindex:5 145 | | RTA: Len:8, Type:RTA_GATEWAY, IPv4:172.20.105.173 146 | ``` 147 | 148 | ### Next Hop Attribute `NHA_*` 149 | 150 | Netlink Message `RTM_NEWNEXTHOP` 等に含まれる Next Hop Attribute `NHA_*` とその意味を以下に記載します。 151 | 152 | 153 | | NHA_* | Value Type | 値 | 説明 | 154 | | :------------: | :--------: | :----------------: | :------------------------------------------------------------------------------- | 155 | | NHA_ID | NLA_U32 | ID (nexthop) | id == 0 の場合は自動採番(auto-assign) | 156 | | NHA_GROUP | NLA_BINARY | nexthop_grp の配列 | nexthop_grp には nexthop id, wright が含まれる [1] | 157 | | NHA_GROUP_TYPE | NLA_U16 | NEXTHOP_GRP_TYPE | NHA_GROUP が存在する場合、この attribute のみ利用可能 | 158 | | NHA_BLACKHOLE | NLA_FLAG | - | OIF, GATEWAY, ENCAP と同時利用は不可 | 159 | | NHA_OIF | NLA_U32 | ID (device) | dump で特定の device を使用する nexthop のみを返信するよう指定可能 | 160 | | NHA_GATEWAY | NLA_BINARY | be32, in6_addr | Gateway のアドレス: IPv4(be32) もしくは IPv6(in6_addr) | 161 | | NHA_ENCAP_TYPE | NLA_U16 | TODO | lwt encap type | 162 | | NHA_ENCAP | NLA_NESTED | TODO | lwt encap data | 163 | | NHA_GROUPS | NLA_FLAG | - | dump で nexthop group のみを返信するよう指定 | 164 | | NHA_MASTER | NLA_U32 | ID (master dev) | dump で master dev に所属する nexthop のみ返信するよう指定 | 165 | | NHA_FDB | NLA_FLAG | - | nexthop が bridge fdb に所属する事を示す。OIF, BLACKHOLE, ENCAP と同時利用は不可 | 166 | | NHA_RES_GROUP | NLA_NESTED | TODO | resilient nexthop group attributes | 167 | | NHA_RES_BUCKET | NLA_NESTED | TODO | nexthop bucket attributes | 168 | 169 | 170 | - "説明" は `include/uapi/linux/nexthop.h` のコメント記載された内容の和訳サマリ 171 | - "Value Type" は値の型に関する指定 172 | - 値以外にも、長さや最初に存在すべき Attribute など、満たすべき条件に関する情報は `rtm_nh_policy_new` に定義されている 173 | - `rtm_nh_policy_new` 等、 `static struct nla_policy xxx[]`の型を持つ配列は netlink attribute 毎に定義されており validation に利用される 174 | - `NLA_U32` 等、各条件の意味に関しては `include/net/netlink.h` の `struct nla_policy` に関するコメントに詳細が記載されている 175 | - https://elixir.bootlin.com/linux/v6.0/source/include/net/netlink.h#L208 176 | - NEXTHOP_GRP_TYPE は以下2種類 177 | - NEXTHOP_GRP_TYPE_MPATH ... Hash/Threshold タイプのNHG(指定無い場合のデフォルト値) 178 | - NEXTHOP_GRP_TYPE_RES ... Resilient Nexthop Group(別途解説予定) 179 | - NHA_RES_GROUP, NHA_RES_BUCKET は Resilient Nexthop Group に関連した Attribute 180 | 181 | [1] 参考: `NHA_GROUP` の Value の定義 182 | 183 | ```c 184 | // include/uapi/linux/nexthop.h 185 | /* entry in a nexthop group */ 186 | struct nexthop_grp { 187 | __u32 id; /* nexthop id - must exist */ 188 | __u8 weight; /* weight of this nexthop */ 189 | __u8 resvd1; 190 | __u16 resvd2; 191 | }; 192 | ``` 193 | 194 | ## iproute2 195 | 196 | rtnetlink を多用するツールとして iproute2 が挙げられます。 197 | iproute2 の中の ip コマンドを用いて、netlink がどのように利用されているか見る事ができます。 198 | 199 | 例えば以下例では、netlink を用いて static route を追加しています。 200 | 201 | - `sendmsg` 202 | - type=RTM_NEWROUTE 203 | - rtm_family=AF_INET, rtm_dst_len=32 204 | - {nla_len=8, nla_type=RTA_DST}, inet_addr("10.10.10.10")} 205 | - {nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174") 206 | 207 | ``` 208 | # strace ip route add 10.10.10.10/32 via 172.20.105.174 dev eno1 209 | ... 210 | socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3 211 | ... 212 | bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0 213 | ... 214 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=52, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669630667, pid=0}, {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, [{{nla_len=8, nla_type=RTA_DST}, inet_addr("10.10.10.10")}, {{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.105.174")}, {{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("eno1")}]}, iov_len=52}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 215 | ``` 216 | 217 | ## References 218 | 219 | - [Kernel Korner - Why and How to Use Netlink Socket](https://www.linuxjournal.com/article/7356) 220 | - [Blog: Debugging netlink requests](https://jvns.ca/blog/2017/09/03/debugging-netlink-requests/) 221 | - [sock_diag(7) — Linux manual page](https://man7.org/linux/man-pages/man7/sock_diag.7.html) 222 | - [Qiita: netlinkファンのためのnlmon](https://qiita.com/kwi/items/991c3bd01889db45307e) 223 | - [Blog: Taming Netlink](https://dtucker.co.uk/blog/taming-netlink/) 224 | - How to debug netlink using iproute2, strace, nltrace 225 | - [https://github.com/ebiken/doc-network/wiki/](https://github.com/ebiken/doc-network/wiki/Linux-Basics-:-NETLINK) 226 | - old and incomplete personal memo about netlink 227 | - tcptrack 228 | - [github source (C++)](https://github.com/bchretien/tcptrack) 229 | - [ubuntu manpage](https://manpages.ubuntu.com/manpages/focal/man1/tcptrack.1.html) 230 | - [Blog: 2022-01-11 Netlinkプログラミングの書き方](https://hana-shin.hatenablog.com/entry/2022/01/11/212403) 231 | - [gwind/go-netlink-socket-monitor.go](https://gist.github.com/gwind/05f5f649d93e6015cf47ffa2b2fd9713) 232 | - [Parsing the RTA_MULTIPATH attribute from Rtnetlink Jan 25, 2018](https://ederlf.website/post/netlink-multipath/) 233 | - [gist: cl4u2: Linux route monitoring example](https://gist.github.com/cl4u2/5204374) 234 | - Example code (including `route_monitor.c`) 235 | 236 | 237 | 238 | -------------------------------------------------------------------------------- /linux/rtnetlink.md: -------------------------------------------------------------------------------- 1 | # rtnetlink: NETLINK_ROUTE 2 | 3 | This is WIP draft. 4 | 5 | Refer [/examples/gonlsub/](/examples/gonlsub/) for RTNETLINK explanation & sample code. 6 | 7 | 8 | ## memo 9 | 10 | - NETLINK TYPE for NETLINK_ROUTE is 0 11 | - https://sites.uclouvain.be/SystInfo/usr/include/linux/netlink.h 12 | - `#define NETLINK_ROUTE 0 /* Routing/device hook */` 13 | - RTM_NEWROUTE: 24 (0x18) 14 | - tshark: `Message type: Add network route (24)` 15 | - xxx 16 | 17 | ### RTM_NEWROUTE 18 | 19 | dump using gonlsub.go 20 | 21 | ``` 22 | > msgs, from, err := nlSock.Receive() 23 | 24 | ebiken@dcig171:~/sandbox/nsdevnotes/examples/gonlsub$ go run gonlsub.go 25 | Starting gonlsub.go 26 | msgs: [{{60 24 1536 1669519338 130693} [2 32 0 0 254 3 0 1 0 0 0 0 8 0 15 0 254 0 0 0 8 0 1 0 10 11 11 99 8 0 5 0 172 20 104 1 8 0 4 0 5 0 0 0]}] 27 | from: &{16 0 0 64 {0 0 0 0}} 28 | err: 29 | ``` 30 | 31 | strace 32 | 33 | ``` 34 | >>> root@dcig171:/home/ebiken# strace ip route add 10.11.11.99/32 via 172.20.104.1 dev eno1 35 | ``` 36 | 37 | ```json 38 | sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, 39 | msg_iov=[{iov_base={ 40 | {len=52, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669520597, pid=0}, 41 | {rtm_family=AF_INET, rtm_dst_len=32, rtm_src_len=0, rtm_tos=0, rtm_table=RT_TABLE_MAIN, rtm_protocol=RTPROT_BOOT, rtm_scope=RT_SCOPE_UNIVERSE, rtm_type=RTN_UNICAST, rtm_flags=0}, 42 | [ 43 | {{nla_len=8, nla_type=RTA_DST}, inet_addr("10.11.11.99")}, 44 | {{nla_len=8, nla_type=RTA_GATEWAY}, inet_addr("172.20.104.1")}, 45 | {{nla_len=8, nla_type=RTA_OIF}, if_nametoindex("eno1")} 46 | ] 47 | }, iov_len=52 48 | }], 49 | msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52 50 | 51 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, 52 | msg_iov=[{iov_base=NULL, iov_len=0}], 53 | msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36 54 | 55 | recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, 56 | msg_iov=[{iov_base={ 57 | {len=36, type=NLMSG_ERROR, flags=NLM_F_CAPPED, seq=1669520597, pid=132894}, 58 | {error=0, msg={len=52, type=RTM_NEWROUTE, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1669520597, pid=0}} 59 | }, iov_len=32768 60 | }], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36 61 | ``` 62 | 63 | tshark example of RTM_NEWROUTE 64 | 65 | ``` 66 | > ebiken@dcig171:~$ sudo ip route add 10.11.11.99/32 via 172.20.104.1 dev eno1 67 | > 10.11.11.99 => hex: 0a0b0b63 68 | 69 | Frame 15: 68 bytes on wire (544 bits), 68 bytes captured (544 bits) on interface nlmon0, id 0 70 | Linux netlink (cooked header) 71 | Link-layer address type: Netlink (824) 72 | Family: Route (0x0000) 73 | Linux rtnetlink (route netlink) protocol 74 | Netlink message header (type: Add network route) 75 | Length: 52 76 | Message type: Add network route (24) 77 | Flags: 0x0605 78 | .... .... .... ...1 = Request: 1 79 | .... .... .... ..0. = Multipart message: 0 80 | .... .... .... .1.. = Ack: 1 81 | .... .... .... 0... = Echo: 0 82 | .... .... ...0 .... = Dump inconsistent: 0 83 | .... .... ..0. .... = Dump filtered: 0 84 | .... ...0 .... .... = Specify tree root: 0 85 | .... ..1. .... .... = Return all matching: 1 86 | .... .1.. .... .... = Atomic: 1 87 | Flags: 0x0605 88 | .... .... .... ...1 = Request: 1 89 | .... .... .... ..0. = Multipart message: 0 90 | .... .... .... .1.. = Ack: 1 91 | .... .... .... 0... = Echo: 0 92 | .... .... ...0 .... = Dump inconsistent: 0 93 | .... .... ..0. .... = Dump filtered: 0 94 | .... ...0 .... .... = Replace: 0 95 | .... ..1. .... .... = Excl: 1 96 | .... .1.. .... .... = Create: 1 97 | .... 0... .... .... = Append: 0 98 | Sequence: 1669518030 99 | Port ID: 0 100 | Address family: AF_INET (2) 101 | Length of destination: 32 102 | Length of source: 0 103 | TOS filter: 0x00 104 | Routing table ID: 254 105 | Routing protocol: boot (0x03) 106 | Route origin: global route (0x00) 107 | Route type: Gateway or direct route (0x01) 108 | Route flags: 0x00000000 109 | Attribute: Route destination address 110 | Len: 8 111 | Type: 0x0001, Route destination address (1) 112 | 0... .... .... .... = Nested: 0 113 | .0.. .... .... .... = Network byte order: 0 114 | Attribute type: Route destination address (1) 115 | Data: 0a0b0b63 116 | Attribute: Gateway of the route 117 | Len: 8 118 | Type: 0x0005, Gateway of the route (5) 119 | 0... .... .... .... = Nested: 0 120 | .0.. .... .... .... = Network byte order: 0 121 | Attribute type: Gateway of the route (5) 122 | Data: ac146801 123 | Attribute: Output interface index: 5 124 | Len: 8 125 | Type: 0x0004, Output interface index (4) 126 | 0... .... .... .... = Nested: 0 127 | .0.. .... .... .... = Network byte order: 0 128 | Attribute type: Output interface index (4) 129 | Output interface index: 5 130 | ``` 131 | 132 | ## Const definitions 133 | 134 | https://sites.uclouvain.be/SystInfo/usr/include/linux/rtnetlink.h.html 135 | 136 | ```c 137 | enum { 138 | RTM_BASE = 16, 139 | #define RTM_BASE RTM_BASE 140 | 141 | RTM_NEWLINK = 16, 142 | #define RTM_NEWLINK RTM_NEWLINK 143 | RTM_DELLINK, 144 | #define RTM_DELLINK RTM_DELLINK 145 | RTM_GETLINK, 146 | #define RTM_GETLINK RTM_GETLINK 147 | RTM_SETLINK, 148 | #define RTM_SETLINK RTM_SETLINK 149 | 150 | RTM_NEWADDR = 20, 151 | #define RTM_NEWADDR RTM_NEWADDR 152 | RTM_DELADDR, 153 | #define RTM_DELADDR RTM_DELADDR 154 | RTM_GETADDR, 155 | #define RTM_GETADDR RTM_GETADDR 156 | 157 | RTM_NEWROUTE = 24, 158 | #define RTM_NEWROUTE RTM_NEWROUTE 159 | RTM_DELROUTE, 160 | #define RTM_DELROUTE RTM_DELROUTE 161 | RTM_GETROUTE, 162 | #define RTM_GETROUTE RTM_GETROUTE 163 | ... snip ... 164 | } 165 | ``` -------------------------------------------------------------------------------- /network/frrouting/memo-frr.md: -------------------------------------------------------------------------------- 1 | # Memo: FRRouting 2 | 3 | ## dataplane context objects 4 | 5 | "dataplane context objects" is something you should understand when first working on FRRouting dataplane. 6 | 7 | > [FRR latest documentation >> Zebra >> Design](https://docs.frrouting.org/projects/dev-guide/en/latest/zebra.html#design) 8 | > With our dataplane abstraction, we create a queue of dataplane context objects for the messages we want to send to the kernel. In a separate pthread, we loop over this queue and send the context objects to the appropriate dataplane. A batching enhancement tightly integrates with the dataplane context objects so they are able to be batch sent to dataplanes that support it. 9 | 10 | -------------------------------------------------------------------------------- /network/nic-modules.md: -------------------------------------------------------------------------------- 1 | # NIC and Optical Modules 2 | 3 | - TODO: SFP, QSFP28, QSFP28-DD 等の早見表(チートシート) 4 | - どんなケーブルやモジュールで、どれとどれが接続可能か?) 5 | - QSFP-DD (28G/50G x8) と QSFP56 (50G x4) や QSFP28 (25G x4) は接続可能? 6 | 7 | Notes 8 | 9 | - QSFP56 MT28908 Family [ConnectX-6] 10 | - https://community.fs.com/blog/differences-between-qsfp-dd-and-qsfp-qsfp28-qsfp56-osfp-cfp8-cobo.html -------------------------------------------------------------------------------- /network/perf-local/README.md: -------------------------------------------------------------------------------- 1 | # Performance measurement 2 | 3 | - [L2 bridge between two VM (Ubutnu Multipass)](#l2-bridge-between-two-vm-ubutnu-multipass) 4 | - [Result](#result) 5 | - [VM Config](#vm-config) 6 | - [Host Interface (bridge)](#host-interface-bridge) 7 | - [WIP: L3 Routing among 3 VMs (Ubutnu Multipass)](#wip-l3-routing-among-3-vms-ubutnu-multipass) 8 | - [L3 routing between netns](#l3-routing-between-netns) 9 | - [Specs](#specs) 10 | - [Host 171](#host-171) 11 | 12 | 13 | ## L2 bridge between two VM (Ubutnu Multipass) 14 | 15 | Create two VMs 16 | 17 | ``` 18 | multipass launch 22.04 --name vm1 --cpus 16 --disk 20G --memory 16G 19 | multipass launch 22.04 --name vm2 --cpus 16 --disk 20G --memory 16G 20 | ``` 21 | 22 | Login and install iperf3 23 | 24 | ``` 25 | sudo apt update 26 | sudo apt install -y iperf3 27 | ``` 28 | 29 | Run iperf 30 | 31 | ``` 32 | > vm1: server 33 | 34 | iperf3 -s 35 | 36 | > vm2: client 37 | 38 | iperf3 -c 10.28.65.177 -p 5201 39 | ``` 40 | 41 | ### Result 42 | 43 | ``` 44 | ubuntu@vm2:~$ iperf3 -c 10.28.65.177 -p 5201 45 | Connecting to host 10.28.65.177, port 5201 46 | [ 5] local 10.28.65.38 port 42840 connected to 10.28.65.177 port 5201 47 | [ ID] Interval Transfer Bitrate Retr Cwnd 48 | [ 5] 0.00-1.00 sec 1.48 GBytes 12.7 Gbits/sec 0 3.13 MBytes 49 | [ 5] 1.00-2.00 sec 1.61 GBytes 13.8 Gbits/sec 0 3.13 MBytes 50 | [ 5] 2.00-3.00 sec 1.71 GBytes 14.6 Gbits/sec 0 3.13 MBytes 51 | [ 5] 3.00-4.00 sec 1.57 GBytes 13.5 Gbits/sec 0 3.13 MBytes 52 | [ 5] 4.00-5.00 sec 1.46 GBytes 12.5 Gbits/sec 0 3.13 MBytes 53 | [ 5] 5.00-6.00 sec 1.50 GBytes 12.9 Gbits/sec 0 3.13 MBytes 54 | [ 5] 6.00-7.00 sec 1.55 GBytes 13.3 Gbits/sec 0 3.13 MBytes 55 | [ 5] 7.00-8.00 sec 1.51 GBytes 12.9 Gbits/sec 0 3.13 MBytes 56 | [ 5] 8.00-9.00 sec 1.42 GBytes 12.2 Gbits/sec 0 3.13 MBytes 57 | [ 5] 9.00-10.00 sec 1.33 GBytes 11.4 Gbits/sec 0 3.13 MBytes 58 | - - - - - - - - - - - - - - - - - - - - - - - - - 59 | [ ID] Interval Transfer Bitrate Retr 60 | [ 5] 0.00-10.00 sec 15.1 GBytes 13.0 Gbits/sec 0 sender 61 | [ 5] 0.00-10.04 sec 15.1 GBytes 12.9 Gbits/sec receiver 62 | ``` 63 | 64 | ### VM Config 65 | 66 | ``` 67 | ubuntu@vm1:~$ ip a show ens3 68 | 2: ens3: mtu 1500 qdisc fq_codel state UP group default qlen 1000 69 | link/ether 52:54:00:70:c4:c6 brd ff:ff:ff:ff:ff:ff 70 | altname enp0s3 71 | inet 10.28.65.177/24 metric 100 brd 10.28.65.255 scope global ens3 72 | valid_lft forever preferred_lft forever 73 | inet6 fe80::5054:ff:fe70:c4c6/64 scope link 74 | valid_lft forever preferred_lft forever 75 | 76 | ubuntu@vm1:~$ ethtool -i ens3 77 | driver: virtio_net 78 | version: 1.0.0 79 | firmware-version: 80 | expansion-rom-version: 81 | bus-info: 0000:00:03.0 82 | supports-statistics: yes 83 | supports-test: no 84 | supports-eeprom-access: no 85 | supports-register-dump: no 86 | supports-priv-flags: no 87 | ``` 88 | 89 | ``` 90 | ubuntu@vm2:~$ ip a show ens3 91 | 2: ens3: mtu 1500 qdisc fq_codel state UP group default qlen 1000 92 | link/ether 52:54:00:4d:4f:79 brd ff:ff:ff:ff:ff:ff 93 | altname enp0s3 94 | inet 10.28.65.38/24 metric 100 brd 10.28.65.255 scope global ens3 95 | valid_lft forever preferred_lft forever 96 | inet6 fe80::5054:ff:fe4d:4f79/64 scope link 97 | valid_lft forever preferred_lft forever 98 | 99 | ubuntu@vm2:~$ ethtool -i ens3 100 | driver: virtio_net 101 | version: 1.0.0 102 | firmware-version: 103 | expansion-rom-version: 104 | bus-info: 0000:00:03.0 105 | supports-statistics: yes 106 | supports-test: no 107 | supports-eeprom-access: no 108 | supports-register-dump: no 109 | supports-priv-flags: no 110 | ``` 111 | 112 | ### Host Interface (bridge) 113 | 114 | ``` 115 | 12: mpqemubr0: mtu 1500 qdisc noqueue state UP group default qlen 1000 116 | link/ether 52:54:00:2e:d7:c8 brd ff:ff:ff:ff:ff:ff 117 | inet 10.28.65.1/24 brd 10.28.65.255 scope global mpqemubr0 118 | valid_lft forever preferred_lft forever 119 | inet6 fe80::5054:ff:fe2e:d7c8/64 scope link 120 | valid_lft forever preferred_lft forever 121 | ``` 122 | 123 | ## WIP: L3 Routing among 3 VMs (Ubutnu Multipass) 124 | 125 | > how to change multipass backend to lxd https://qiita.com/ynott/items/01e2913539c664b6559d 126 | > TODO: virsh でやった方が楽? 127 | 128 | Create bridge 129 | 130 | ``` 131 | sudo ip link add br0 type bridge 132 | sudo ip link add br1 type bridge 133 | ``` 134 | Create two VMs 135 | 136 | ``` 137 | multipass launch 22.04 --name vm13 --cpus 16 --disk 20G --memory 16G --network br0 138 | multipass launch 22.04 --name vm2 --cpus 16 --disk 20G --memory 16G 139 | ``` 140 | 141 | 142 | ## L3 routing between netns 143 | 144 | Create two netns. 145 | Run iperf 146 | 147 | ``` 148 | TBD 149 | ``` 150 | 151 | 152 | ## Specs 153 | 154 | ### Host 171 155 | 156 | ``` 157 | $ lscpu 158 | Architecture: x86_64 159 | CPU op-mode(s): 32-bit, 64-bit 160 | Byte Order: Little Endian 161 | Address sizes: 46 bits physical, 48 bits virtual 162 | CPU(s): 64 163 | On-line CPU(s) list: 0-63 164 | Thread(s) per core: 2 165 | Core(s) per socket: 16 166 | Socket(s): 2 167 | NUMA node(s): 2 168 | Vendor ID: GenuineIntel 169 | CPU family: 6 170 | Model: 85 171 | Model name: Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz 172 | Stepping: 7 173 | CPU MHz: 1000.420 174 | CPU max MHz: 3900.0000 175 | CPU min MHz: 1000.0000 176 | BogoMIPS: 4600.00 177 | ``` 178 | -------------------------------------------------------------------------------- /network/ribfib/frrouting.md: -------------------------------------------------------------------------------- 1 | # Rib/Fib notes: FRRouting 2 | 3 | 4 | ``` 5 | [[Routingd]] -> ((ROUTEs)) -> [RIB](ZEBRA)[FIB] -> [dataplane] 6 | ``` 7 | 8 | > In FRR, the Routing Information Base (RIB) resides inside zebra. Routing protocols communicate their best routes to zebra, and zebra computes the best route across protocols for each prefix. This latter information makes up the Forwarding Information Base (FIB). Zebra feeds the FIB to the kernel. 9 | 10 | 11 | ## TODO 12 | 13 | - Rib/Fib それぞれの構造体を抜き出す 14 | - Rib -> Fib の流れを調べる 15 | - 16 | 17 | ## Reference 18 | 19 | - [An abstract workflow for BGP implementations October 7, 2020](https://pluginized-protocols.org/xbgp/2020/10/07/xbgp.html) 20 | 21 | ## Related Source Code 22 | 23 | - https://github.com/FRRouting/frr/blob/master/zebra/rib.h 24 | 25 | ## memo 26 | 27 | ### Cisco RIB, CEF and FIB 28 | 29 | [RIBs and FIBs (aka IP Routing Table and CEF Table)](https://blog.ipspace.net/2010/09/ribs-and-fibs.html), updated on Dec 26, 2020 (original posted on Sep 2010) 30 | 31 | - Ideally, we would use RIB to forward IP packets, but we can’t as some entries in it (static routes and BGP routes) could have next hops that are not directly connected. 32 | - BGP route has no outgoing interface and its next hop is not directly connected; the router has to perform recursive lookups to find the outgoing interface 33 | - Forwarding Information Base (FIB) and Cisco Express Forwarding (CEF) were introduced to make layer-3 switching performance consistent. When IP routes are copied from RIB to FIB, their next hops are resolved, outgoing interfaces are computed and multiple entries are created when the next-hop resolution results in multiple paths to the same destination. 34 | - For example, when the BGP route from the previous printout is inserted into FIB, its next-hop is changed to point to the actual next-hop router. The information about the recursive next-hop is retained, as it allows the router to update the FIB (CEF table) without rescanning and recomputing the whole RIB if the path toward the BGP next-hop changes. 35 | 36 | ``` 37 | RR#show ip cef 10.0.11.11 detail 38 | 10.0.11.11/32, epoch 0, flags rib only nolabel, rib defined all labels 39 | recursive via 10.0.1.1 40 | nexthop 10.0.2.1 FastEthernet0/0 label 19 41 | ``` 42 | 43 | ### Code reading 44 | 45 | 46 | ```c 47 | /* Nexthop structure. */ 48 | struct rnh { 49 | uint8_t flags; 50 | #define ZEBRA_NHT_CONNECTED 0x1 51 | #define ZEBRA_NHT_DELETED 0x2 52 | #define ZEBRA_NHT_RESOLVE_VIA_DEFAULT 0x4 53 | /* VRF identifier. */ 54 | vrf_id_t vrf_id; 55 | afi_t afi; 56 | safi_t safi; 57 | uint32_t seqno; 58 | struct route_entry *state; 59 | struct prefix resolved_route; 60 | struct list *client_list; 61 | /* pseudowires dependent on this nh */ 62 | struct list *zebra_pseudowire_list; 63 | struct route_node *node; 64 | /* 65 | * if this has been filtered for the client 66 | */ 67 | int filtered[ZEBRA_ROUTE_MAX]; 68 | struct rnh_list_item rnh_list_item; 69 | } 70 | 71 | struct route_entry {} 72 | 73 | /* 74 | * Structure that represents a single destination (prefix). 75 | */ 76 | typedef struct rib_dest_t_ { } 77 | 78 | /* 79 | * rib_table_info_t 80 | * 81 | * Structure that is hung off of a route_table that holds information about 82 | * the table. 83 | */ 84 | struct rib_table_info { 85 | struct zebra_vrf *zvrf; 86 | afi_t afi; 87 | safi_t safi; 88 | uint32_t table_id; 89 | } 90 | 91 | 92 | 93 | ``` -------------------------------------------------------------------------------- /nlmon0-ssh.trc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebiken/nsdevnotes/1cd3ee56a75bfce3e826457b4c7bd69742abbc6b/nlmon0-ssh.trc -------------------------------------------------------------------------------- /p4/README.md: -------------------------------------------------------------------------------- 1 | # P4 2 | -------------------------------------------------------------------------------- /paper/README.md: -------------------------------------------------------------------------------- 1 | List of notes reading papers and related videos/slides. 2 | 3 | -------------------------------------------------------------------------------- /paper/sigcomm24-alibaba-hpn.md: -------------------------------------------------------------------------------- 1 | # SIGCOMM'22 Alibaba HPN: A Data Center Network for Large Language Model Training 2 | 3 | - link: https://dl.acm.org/doi/10.1145/3651890.3672265 4 | 5 | ## Summary 6 | 7 | TODO 8 | 9 | ## SONiC AI Working Group - 2024/08/20 10 | 11 | - Topology 12 | - leaf spine (2 layer) 13 | - 8GPUs per server 14 | - 1024 servers per segment 15 | - 15 segments per Pod 16 | - 15K GPUs per pod. This was from avilable power in the building. 17 | - ACCL: Alibaba xCCL 18 | - Add entolopy per queue pair. Can do with customized NCCL. 19 | - Using BF3 20 | - CX7 does not support Adaptive Routing (AR) 21 | - Price difference was not much so picked BF3 22 | - (not sure if this is regular BF3 or SuperNIC) 23 | - 2 NIC per GPU. 24 | - Custom Linux Kernel to send ARP req/responce to both ToRs. 25 | - Alternative Marking DSCP (A.M.D) 26 | 27 | -------------------------------------------------------------------------------- /protobuf/README.md: -------------------------------------------------------------------------------- 1 | # protobuf 2 | 3 | TODO 4 | 5 | - create sample code to decode/encode varint with python: [example/varint.py](example/varint.py) 6 | -------------------------------------------------------------------------------- /protobuf/example/varint.py: -------------------------------------------------------------------------------- 1 | ### encodeVarint(), decodeVarint(), dropMSB() is explained in the BLOG below: 2 | ### https://engineering.mercari.com/blog/entry/20210921-ca19c9f371/ 3 | 4 | ### TODO: port this golang code to python. 5 | 6 | import sys, getopt 7 | 8 | def encodeVarint(val uint64) (int, []byte) { 9 | length := 1 10 | for v := val; v >= 1<<7; v >>= 7 { 11 | length++ 12 | } 13 | 14 | b := make([]byte, 0, length) 15 | for i := 0; i < length; i++ { 16 | v := val >> (7 * i) & 0x7f 17 | if i+1 != length { 18 | v |= 0x80 19 | } 20 | 21 | b = append(b, byte(v)) 22 | } 23 | 24 | return len(b), b 25 | } 26 | 27 | def decodeVarint(in io.ByteReader) (length int, n uint64, _ error) { 28 | for i := 0; ; i++ { 29 | b, err := in.ReadByte() 30 | if err != nil { 31 | return 0, 0, err 32 | } 33 | 34 | length++ 35 | 36 | v, hasNext := dropMSB(b) 37 | n |= uint64(v) << (7 * i) 38 | 39 | if !hasNext { 40 | return length, n, nil 41 | } 42 | } 43 | } 44 | 45 | def dropMSB(b byte) (_ byte, hasNext bool) { 46 | hasNext = b>>7 == 1 47 | return b & 0x7f, hasNext 48 | } 49 | 50 | if __name__ == "__main__": 51 | result = 0 52 | try: 53 | opts, args = getopt.getopt(sys.argv[1:], "e:d:") 54 | except getopt.GetoptError: 55 | print("varint.py -e or -d ") 56 | sys.exit(2) 57 | print(opts) 58 | for opt, arg in opts: 59 | if opt == "-e": 60 | result = encodeVarint(arg) 61 | elif opt == "-d": 62 | result = decodeVarint(arg) 63 | print(result) 64 | 65 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # Python 2 | 3 | ## Python Libs 4 | 5 | - https://pypi.org/project/cidr-man/ 6 | - CIDR-Man is a high-performance ipaddress subnetting library designed to replace the built-in ipaddress library. 7 | 8 | -------------------------------------------------------------------------------- /python/pytools/sendpacket/README.md: -------------------------------------------------------------------------------- 1 | # pytools: SendPacket 2 | 3 | - read YAML file with packet information 4 | - use scapy to send packet based on the information 5 | 6 | Table of Contents 7 | - [How to Use](#how-to-use) 8 | - [prerequisits](#prerequisits) 9 | - [YAML format example](#yaml-format-example) 10 | - [Reference](#reference) 11 | - [Example tshark output](#example-tshark-output) 12 | 13 | ## How to Use 14 | 15 | ``` 16 | $ sudo ./sendpacket.py --help 17 | Usage: sendpacket.py [OPTIONS] FILE_YAML 18 | 19 | Options: 20 | -c, --count INTEGER number of packets to send 21 | -i, --send_iface TEXT interface name to send packet 22 | -s, --show show packet details 23 | --debug show debug messages 24 | --help Show this message and exit. 25 | ``` 26 | > Note: You can also specify send_iface in yaml file. 27 | 28 | Examples 29 | 30 | ``` 31 | > send 3 packets to interface ens1f0 32 | 33 | sudo ./sendpacket.py pkt_srv6_ipv4_01.yaml -i ens1f0 -c 3 34 | 35 | > send 1 packet. to interface ens1f0 or lo 36 | > show packet details (-s) 37 | > show debug messages like parsed YAML (--debug) 38 | 39 | sudo ./sendpacket.py pkt_srv6_ipv4_01.yaml -i ens1f0 -s --debug 40 | sudo ./sendpacket.py pkt_srv6_ipv6_01.yaml -i lo -s --debug 41 | ``` 42 | 43 | ## prerequisits 44 | 45 | ``` 46 | pip3 install pyyaml 47 | ``` 48 | 49 | ## YAML format example 50 | 51 | - Field names are ones listed with `ls()` like `ls(TCP)`. 52 | - Header names (e.g. `ether`, `ipv6`) are lower case of the scapy header name with some exceptions for long ones. (e.g. `srh` for `IPv6ExtHdrSegmentRouting`) 53 | - Copy [pkt_template.yaml](pkt_template.yaml) and remove unnessesary headers/fields. 54 | 55 | ``` 56 | packet: 57 | send_iface: "lo" 58 | ether: 59 | src: "02:03:04:05:06:01" 60 | dst: "02:03:04:05:06:02" 61 | ipv6: 62 | src: "2001:db8::100" 63 | dst: "2001:db8::1" 64 | srh: #IPv6ExtHdrSegmentRouting 65 | segleft: 1 66 | addresses: 67 | - "2001:db8:4::100" 68 | - "2001:db8::1" 69 | inner_packet: 70 | ip: 71 | src: "10.0.0.100" 72 | dst: "10.10.0.100" 73 | udp: 74 | sport: 1234 75 | dport: 4321 76 | ``` 77 | 78 | ## Reference 79 | 80 | - SRH: https://datatracker.ietf.org/doc/rfc8754/ 81 | 82 | ## Example tshark output 83 | 84 | ``` 85 | $ sudo ./sendpacket.py pkt_srv6_ipv6_01.yaml -i lo -s --debug 86 | 87 | # tshark -i lo -f "not tcp" -O ipv6 88 | Running as user "root" and group "root". This could be dangerous. 89 | Capturing on 'Loopback: lo' 90 | Frame 1: 142 bytes on wire (1136 bits), 142 bytes captured (1136 bits) on interface lo, id 0 91 | Ethernet II, Src: MS-NLB-PhysServer-03_04:05:06:01 (02:03:04:05:06:01), Dst: MS-NLB-PhysServer-03_04:05:06:02 (02:03:04:05:06:02) 92 | Internet Protocol Version 6, Src: 2001:db8::100, Dst: 2001:db8::1 93 | 0110 .... = Version: 6 94 | .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT) 95 | .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) 96 | .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0) 97 | .... .... .... 0000 0000 0000 0000 0000 = Flow Label: 0x00000 98 | Payload Length: 88 99 | Next Header: Routing Header for IPv6 (43) 100 | Hop Limit: 64 101 | Source: 2001:db8::100 102 | Destination: 2001:db8::1 103 | Routing Header for IPv6 (Segment Routing) 104 | Next Header: IPv6 (41) 105 | Length: 4 106 | [Length: 40 bytes] 107 | Type: Segment Routing (4) 108 | Segments Left: 1 109 | First segment: 1 110 | Flags: 0x00 111 | 0... .... = Unused: 0x0 112 | .0.. .... = Protected: False 113 | ..0. .... = OAM: False 114 | ...0 .... = Alert: Not Present 115 | .... 0... = HMAC: Not Present 116 | .... .000 = Unused: 0x0 117 | [Expert Info (Note/Undecoded): Dissection for SRH TLVs not yet implemented] 118 | [Dissection for SRH TLVs not yet implemented] 119 | [Severity level: Note] 120 | [Group: Undecoded] 121 | Reserved: 0000 122 | Address[0]: 2001:db8:4::100 [next segment] 123 | Address[1]: 2001:db8::1 124 | [Segments in Traversal Order] 125 | Address[1]: 2001:db8::1 126 | Address[0]: 2001:db8:4::100 [next segment] 127 | Internet Protocol Version 6, Src: 2001:db8:aa::100, Dst: 2001:db8:aa::1 128 | 0110 .... = Version: 6 129 | .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT) 130 | .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0) 131 | .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0) 132 | .... .... .... 0000 0000 0000 0000 0000 = Flow Label: 0x00000 133 | Payload Length: 8 134 | Next Header: UDP (17) 135 | Hop Limit: 64 136 | Source: 2001:db8:aa::100 137 | Destination: 2001:db8:aa::1 138 | User Datagram Protocol, Src Port: 1234, Dst Port: 4321 139 | ``` -------------------------------------------------------------------------------- /python/pytools/sendpacket/pkt_srv6_ipv4_01.yaml: -------------------------------------------------------------------------------- 1 | packet: 2 | send_iface: "lo" 3 | ether: 4 | src: "02:03:04:05:06:01" 5 | dst: "02:03:04:05:06:02" 6 | ipv6: 7 | src: "2001:db8::100" 8 | dst: "2001:db8::1" 9 | srh: #IPv6ExtHdrSegmentRouting 10 | segleft: 1 11 | addresses: 12 | - "2001:db8:4::100" 13 | - "2001:db8::1" 14 | inner_packet: 15 | ip: 16 | src: "10.0.0.100" 17 | dst: "10.10.0.100" 18 | udp: 19 | sport: 1234 20 | dport: 4321 -------------------------------------------------------------------------------- /python/pytools/sendpacket/pkt_srv6_ipv6_01.yaml: -------------------------------------------------------------------------------- 1 | packet: 2 | send_iface: "lo" 3 | send_iface: "lo" 4 | ether: 5 | src: "02:03:04:05:06:01" 6 | dst: "02:03:04:05:06:02" 7 | ipv6: 8 | src: "2001:db8::100" 9 | dst: "2001:db8::1" 10 | srh: #IPv6ExtHdrSegmentRouting 11 | segleft: 1 12 | addresses: 13 | - "2001:db8:4::100" 14 | - "2001:db8::1" 15 | inner_packet: 16 | ipv6: 17 | src: "2001:db8:aa::100" 18 | dst: "2001:db8:aa::1" 19 | udp: 20 | sport: 1234 21 | dport: 4321 22 | -------------------------------------------------------------------------------- /python/pytools/sendpacket/pkt_template.yaml: -------------------------------------------------------------------------------- 1 | packet: 2 | send_iface: "lo" 3 | ether: 4 | dst: 5 | src: 6 | type: 7 | ipv6: 8 | version: 9 | tc: 10 | fl: 11 | plen: 12 | nh: 13 | hlim: 14 | src: 15 | dst: 16 | srh: # IPv6ExtHdrSegmentRouting 17 | nh: 18 | len: 19 | type: 20 | segleft: 21 | lastentry: 22 | unused1: 23 | protected: 24 | oam: 25 | alert: 26 | hmac: 27 | unused2: 28 | tag: 29 | addresses: 30 | tlv_objects: 31 | ip: 32 | version: 33 | ihl: 34 | tos: 35 | len: 36 | id: 37 | flags: 38 | frag: 39 | ttl: 40 | proto: 41 | chksum: 42 | src: 43 | dst: 44 | options: 45 | icmp: 46 | type: 47 | code: 48 | chksum: 49 | id: 50 | seq: 51 | ts_ori: 52 | ts_rx: 53 | ts_tx: 54 | gw: 55 | ptr: 56 | reserved: 57 | length: 58 | addr_mask: 59 | nexthopmtu: 60 | udp: 61 | sport: 62 | dport: 63 | len: 64 | chksum: 65 | tcp: 66 | sport: 67 | dport: 68 | seq: 69 | ack: 70 | dataofs: 71 | reserved: 72 | flags: 73 | window: 74 | chksum: 75 | urgptr: 76 | options: -------------------------------------------------------------------------------- /python/pytools/sendpacket/sendpacket.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import yaml 4 | from scapy.all import * 5 | import click 6 | 7 | def yaml_read(yaml_file): 8 | with open(yaml_file, 'r') as f: 9 | data = yaml.load(f, Loader=yaml.FullLoader) 10 | packet = data['packet'] 11 | return packet 12 | 13 | def create_ether(eth): 14 | p = Ether() 15 | if 'dst' in eth: p.dst = eth['dst'] 16 | if 'src' in eth: p.src = eth['src'] 17 | if 'type' in eth: p.type = eth['type'] 18 | return p 19 | 20 | def create_ipv6(ipv6): 21 | p = IPv6() 22 | if 'version' in ipv6: p.version = ipv6['version'] 23 | if 'tc' in ipv6: p.tc = ipv6['tc'] 24 | if 'fl' in ipv6: p.fl = ipv6['fl'] 25 | if 'plen' in ipv6: p.plen = ipv6['plen'] 26 | if 'nh' in ipv6: p.nh = ipv6['nh'] 27 | if 'hlim' in ipv6: p.hlim = ipv6['hlim'] 28 | if 'src' in ipv6: p.src = ipv6['src'] 29 | if 'dst' in ipv6: p.dst = ipv6['dst'] 30 | return p 31 | 32 | def create_srh(srh): 33 | p = IPv6ExtHdrSegmentRouting() 34 | if 'nh' in srh: p.nh = srh['nh'] 35 | if 'len' in srh: p.len = srh['len'] 36 | if 'type' in srh: p.type = srh['type'] 37 | if 'segleft' in srh: p.segleft = srh['segleft'] 38 | if 'lastentry' in srh: p.lastentry = srh['lastentry'] 39 | if 'unused1' in srh: p.unused1 = srh['unused1'] 40 | if 'protected' in srh: p.protected = srh['protected'] 41 | if 'oam' in srh: p.oam = srh['oam'] 42 | if 'alert' in srh: p.alert = srh['alert'] 43 | if 'hmac' in srh: p.hmac = srh['hmac'] 44 | if 'unused2' in srh: p.unused2 = srh['unused2'] 45 | if 'tag' in srh: p.tag = srh['tag'] 46 | if 'addresses' in srh: p.addresses = srh['addresses'] 47 | if 'tlv_objects' in srh: p.tlv_objects= srh['tlv_objects'] 48 | return p 49 | 50 | def create_ip(ip): 51 | p = IP() 52 | if 'version' in ip: p.version = ip['version'] 53 | if 'ihl' in ip: p.ihl = ip['ihl'] 54 | if 'tos' in ip: p.tos = ip['tos'] 55 | if 'len' in ip: p.len = ip['len'] 56 | if 'id' in ip: p.id = ip['id'] 57 | if 'flags' in ip: p.flags = ip['flags'] 58 | if 'frag' in ip: p.frag = ip['frag'] 59 | if 'ttl' in ip: p.ttl = ip['ttl'] 60 | if 'proto' in ip: p.proto = ip['proto'] 61 | if 'chksum' in ip: p.chksum = ip['chksum'] 62 | if 'src' in ip: p.src = ip['src'] 63 | if 'dst' in ip: p.dst = ip['dst'] 64 | if 'options' in ip: p.options = ip['options'] 65 | return p 66 | 67 | def create_icmp(icmp): 68 | p = ICMP() 69 | if 'type' in icmp: p.type = icmp['type'] 70 | if 'code' in icmp: p.code = icmp['code'] 71 | if 'chksum' in icmp: p.chksum = icmp['chksum'] 72 | if 'id' in icmp: p.id = icmp['id'] 73 | if 'seq' in icmp: p.seq = icmp['seq'] 74 | if 'ts_ori' in icmp: p.ts_ori = icmp['ts_ori'] 75 | if 'ts_rx' in icmp: p.ts_rx = icmp['ts_rx'] 76 | if 'ts_tx' in icmp: p.ts_tx = icmp['ts_tx'] 77 | if 'gw' in icmp: p.gw = icmp['gw'] 78 | if 'ptr' in icmp: p.ptr = icmp['ptr'] 79 | if 'reserved' in icmp: p.reserved = icmp['reserved'] 80 | if 'length' in icmp: p.length = icmp['length'] 81 | if 'addr_mask' in icmp: p.addr_mask = icmp['addr_mask'] 82 | if 'nexthopmtu' in icmp: p.nexthopmtu = icmp['nexthopmtu'] 83 | return p 84 | 85 | def create_udp(udp): 86 | p = UDP() 87 | if 'sport' in udp: p.sport = udp['sport'] 88 | if 'dport' in udp: p.dport = udp['dport'] 89 | if 'len' in udp: p.len = udp['len'] 90 | if 'chksum' in udp: p.chksum = udp['chksum'] 91 | return p 92 | 93 | def create_tcp(tcp): 94 | p = TCP() 95 | if 'sport' in tcp: p.sport = tcp['sport'] 96 | if 'dport' in tcp: p.dport = tcp['dport'] 97 | if 'seq' in tcp: p.seq = tcp['seq'] 98 | if 'ack' in tcp: p.ack = tcp['ack'] 99 | if 'dataofs' in tcp: p.dataofs = tcp['dataofs'] 100 | if 'reserved' in tcp: p.reserved = tcp['reserved'] 101 | if 'flags' in tcp: p.flags = tcp['flags'] 102 | if 'window' in tcp: p.window = tcp['window'] 103 | if 'chksum' in tcp: p.chksum = tcp['chksum'] 104 | if 'urgptr' in tcp: p.urgptr = tcp['urgptr'] 105 | if 'options' in tcp: p.options = tcp['options'] 106 | return p 107 | 108 | def create_packet(p): 109 | packet = "" 110 | ### outer packet 111 | if 'ether' in p: 112 | packet /= create_ether(p['ether']) 113 | if 'ipv6' in p: 114 | packet /= create_ipv6(p['ipv6']) 115 | if 'srh' in p: # IPv6ExtHdrSegmentRouting 116 | packet /= create_srh(p['srh']) 117 | if 'ip' in p: 118 | packet /= create_ip(p['ip']) 119 | if 'icmp' in p: 120 | packet /= create_icmp(p['icmp']) 121 | if 'udp' in p: 122 | packet /= create_udp(p['udp']) 123 | if 'tcp' in p: 124 | packet /= create_tcp(p['tcp']) 125 | ### inner packet 126 | # We could call recursively by defining 'packet' and 'inner_packet' on same level. 127 | # However, keeping as below just in case we want to change order or available 128 | # header between outer and inner packet. 129 | if 'inner_packet' in p: 130 | i = p['inner_packet'] 131 | if 'ether' in i: 132 | packet /= create_ether(i['ether']) 133 | if 'ipv6' in i: 134 | packet /= create_ipv6(i['ipv6']) 135 | if 'ip' in i: 136 | packet /= create_ip(i['ip']) 137 | if 'icmp' in i: 138 | packet /= create_icmp(i['icmp']) 139 | if 'udp' in i: 140 | packet /= create_udp(i['udp']) 141 | if 'tcp' in i: 142 | packet /= create_tcp(i['tcp']) 143 | 144 | return packet 145 | 146 | class sendpacket(object): 147 | def __init__(self, yaml_file='packet.yaml'): 148 | self.send_iface = 'lo' 149 | self.yaml_file = yaml_file 150 | self.packet = yaml_read(self.yaml_file) 151 | self.send_iface = 'lo' 152 | if 'send_iface' in self.packet: 153 | self.send_iface = self.packet['send_iface'] 154 | 155 | def send(self, p, c=0): 156 | sendp(p, iface=self.send_iface, count=c) 157 | 158 | @click.command() 159 | @click.argument('file_yaml') 160 | @click.option('-c', '--count', default=1, help='number of packets to send') 161 | @click.option('-i', '--send_iface', default='', help='interface name to send packet') 162 | @click.option('-s', '--show', is_flag=True, help='show packet details') 163 | @click.option('--debug', is_flag=True, help='show debug messages') 164 | def cmd(file_yaml, count, send_iface, show, debug): 165 | sp = sendpacket(file_yaml) 166 | p = create_packet(sp.packet) #scapy packet 167 | 168 | if send_iface: sp.send_iface = send_iface 169 | if debug: 170 | print("DEBUG: sp.send_iface: {}".format(sp.send_iface)) 171 | print("DEBUG: ", sp.packet) 172 | if show: p.show() 173 | 174 | try: 175 | socket.if_nametoindex(sp.send_iface) 176 | except OSError as e: 177 | print(e) 178 | else: 179 | sp.send(p, count) 180 | 181 | if __name__ == '__main__': 182 | cmd() 183 | 184 | -------------------------------------------------------------------------------- /sonic/README.md: -------------------------------------------------------------------------------- 1 | # SONiC 2 | 3 | SONiC 関連情報は [ebiken/sonic-book](https://github.com/ebiken/sonic-book) レポジトリに集約しました 4 | -------------------------------------------------------------------------------- /tmp-notes.md: -------------------------------------------------------------------------------- 1 | # MISC NOTES 2 | 3 | > This is temporary notes which should be sorted out in future. 4 | 5 | - [Switch ASIC Buffer Size !! Un-official information !!](https://people.ucsc.edu/~warner/buffer.html) 6 | - "Azure is using Arista Switch with Tofino running VxLAN" mentioned on public website: [https://mnkcg.com/products/p4-ansible/](https://mnkcg.com/products/p4-ansible/) 7 | - An Arista network switch has shipped to Microsoft Azure cloud with support for 256K VTEP. This switch has a Tofino asic using all stages. There is no room left on the asic for any incremental P4 program merging. This switch requires a forklift upgrade to remove some existing features and add new ones. P4-Ansible has automated the forklift upgrade once a user provides what feature(s) to remove and add. 8 | 9 | ## P4 use case 10 | 11 | - P4 on SmartNIC の利用されかた。VPPをSmartNIC上で動作させ、P4をVPPにコンパイルする事で利用している人もいる。 12 | - https://groups.google.com/a/lists.p4.org/g/p4-dev/c/53IDM35BWTM/m/Of-SF6X6BgAJ 13 | - In the meeting P4 NICs were asked of. For one P4-programmable NIC, contact Marvell for an Octeon which uses vpp in software. Then, my company’s P4toVPP compiler makes the Octeon P4 programmable. Cavium/Marvell developed the first smartNIC. -- Hemant 14 | --------------------------------------------------------------------------------