├── .github └── workflows │ └── test.yml ├── .gitignore ├── .golangci.yml ├── LICENSE ├── README.md ├── constants_linux.go ├── doc.go ├── go.mod ├── go.sum ├── ipvs_linux.go ├── ipvs_linux_test.go ├── netlink_linux.go ├── netlink_linux_test.go └── ns ├── doc.go └── init_linux.go /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - master 8 | - main 9 | pull_request: 10 | branches: 11 | - master 12 | - main 13 | 14 | jobs: 15 | test: 16 | permissions: 17 | contents: read # for actions/checkout to fetch code 18 | timeout-minutes: 10 19 | 20 | strategy: 21 | matrix: 22 | # test against the "oldest" supported version and the current version 23 | # of go. Go 1.17 is kept in this matrix as it is the minimum version 24 | # specified in go.mod, and maintaining compatibility with go 1.17 is 25 | # currently not much of a burden. Most projects using this module are 26 | # using newer versions than that, so we can drop the old version if 27 | # it becomes too much of a burden. 28 | go-version: [1.17.x, 1.19.x] 29 | os: [ubuntu-latest] 30 | runs-on: ${{ matrix.os }} 31 | steps: 32 | - name: Install Go 33 | uses: actions/setup-go@v3 34 | with: 35 | go-version: ${{ matrix.go-version }} 36 | - name: Checkout code 37 | uses: actions/checkout@v3 38 | - name: go mod tidy 39 | run: | 40 | go mod tidy 41 | git diff --exit-code 42 | - name: Ensure IPVS module 43 | run: | 44 | sudo modprobe ip_vs 45 | - name: Test 46 | run: | 47 | go test -exec "sudo -n" -v ./... 48 | lint: 49 | permissions: 50 | contents: read # for actions/checkout to fetch code 51 | pull-requests: read # for golangci/golangci-lint-action to fetch pull requests 52 | timeout-minutes: 10 53 | runs-on: ubuntu-latest 54 | steps: 55 | - name: Install Go 56 | uses: actions/setup-go@v3 57 | with: 58 | # We only run on the latest version of go, as some linters may be 59 | # version-dependent (for example gofmt can change between releases). 60 | go-version: 1.19.x 61 | - name: Checkout code 62 | uses: actions/checkout@v3 63 | - name: Lint 64 | uses: golangci/golangci-lint-action@v3 65 | with: 66 | version: "v1.50.1" 67 | skip-cache: true 68 | args: --print-resources-usage --timeout=5m --verbose 69 | 70 | # Optional: show only new issues if it's a pull request. The default value is `false`. 71 | # only-new-issues: true 72 | 73 | # Optional: if set to true then the action don't cache or restore ~/go/pkg. 74 | # skip-pkg-cache: true 75 | 76 | # Optional: if set to true then the action don't cache or restore ~/.cache/go-build. 77 | # skip-build-cache: true 78 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | *~ 6 | .gtm 7 | tags 8 | .DS_Store 9 | 10 | # Folders 11 | _obj 12 | _test 13 | 14 | 15 | # Architecture specific extensions/prefixes 16 | *.[568vq] 17 | [568vq].out 18 | 19 | *.cgo1.go 20 | *.cgo2.c 21 | _cgo_defun.c 22 | _cgo_gotypes.go 23 | _cgo_export.* 24 | 25 | _testmain.go 26 | 27 | *.exe 28 | *.test 29 | *.prof 30 | 31 | # Coverage 32 | *.tmp 33 | *.coverprofile 34 | 35 | # IDE files and folders 36 | .project 37 | .settings/ 38 | .idea/ 39 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | disable-all: true 3 | enable: 4 | - gofmt 5 | - govet 6 | - ineffassign 7 | - misspell 8 | - revive 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ipvs - networking for containers 2 | 3 | ![Test](https://github.com/moby/ipvs/workflows/Test/badge.svg) [![GoDoc](https://godoc.org/github.com/moby/ipvs?status.svg)](https://godoc.org/github.com/moby/ipvs) [![Go Report Card](https://goreportcard.com/badge/github.com/moby/ipvs)](https://goreportcard.com/report/github.com/moby/ipvs) 4 | 5 | ipvs provides a native Go implementation for communicating with IPVS kernel module using a netlink socket. 6 | 7 | 8 | #### Using ipvs 9 | 10 | ```go 11 | import ( 12 | "log" 13 | 14 | "github.com/moby/ipvs" 15 | ) 16 | 17 | func main() { 18 | handle, err := ipvs.New("") 19 | if err != nil { 20 | log.Fatalf("ipvs.New: %s", err) 21 | } 22 | svcs, err := handle.GetServices() 23 | if err != nil { 24 | log.Fatalf("handle.GetServices: %s", err) 25 | } 26 | } 27 | ``` 28 | 29 | ## Contributing 30 | 31 | Want to hack on ipvs? [Docker's contributions guidelines](https://github.com/docker/docker/blob/master/CONTRIBUTING.md) apply. 32 | 33 | ## Copyright and license 34 | 35 | Copyright 2015 Docker, inc. Code released under the [Apache 2.0 license](LICENSE). 36 | -------------------------------------------------------------------------------- /constants_linux.go: -------------------------------------------------------------------------------- 1 | package ipvs 2 | 3 | const ( 4 | genlCtrlID = 0x10 5 | ) 6 | 7 | // GENL control commands 8 | const ( 9 | genlCtrlCmdUnspec uint8 = iota 10 | genlCtrlCmdNewFamily 11 | genlCtrlCmdDelFamily 12 | genlCtrlCmdGetFamily 13 | ) 14 | 15 | // GENL family attributes 16 | const ( 17 | genlCtrlAttrUnspec int = iota 18 | genlCtrlAttrFamilyID 19 | genlCtrlAttrFamilyName 20 | ) 21 | 22 | // IPVS genl commands 23 | const ( 24 | ipvsCmdUnspec uint8 = iota 25 | ipvsCmdNewService 26 | ipvsCmdSetService 27 | ipvsCmdDelService 28 | ipvsCmdGetService 29 | ipvsCmdNewDest 30 | ipvsCmdSetDest 31 | ipvsCmdDelDest 32 | ipvsCmdGetDest 33 | ipvsCmdNewDaemon 34 | ipvsCmdDelDaemon 35 | ipvsCmdGetDaemon 36 | ipvsCmdSetConfig 37 | ipvsCmdGetConfig 38 | ipvsCmdSetInfo 39 | ipvsCmdGetInfo 40 | ipvsCmdZero 41 | ipvsCmdFlush 42 | ) 43 | 44 | // Attributes used in the first level of commands 45 | const ( 46 | ipvsCmdAttrUnspec int = iota 47 | ipvsCmdAttrService 48 | ipvsCmdAttrDest 49 | ipvsCmdAttrDaemon 50 | ipvsCmdAttrTimeoutTCP 51 | ipvsCmdAttrTimeoutTCPFin 52 | ipvsCmdAttrTimeoutUDP 53 | ) 54 | 55 | // Attributes used to describe a service. Used inside nested attribute 56 | // ipvsCmdAttrService 57 | const ( 58 | ipvsSvcAttrUnspec int = iota 59 | ipvsSvcAttrAddressFamily 60 | ipvsSvcAttrProtocol 61 | ipvsSvcAttrAddress 62 | ipvsSvcAttrPort 63 | ipvsSvcAttrFWMark 64 | ipvsSvcAttrSchedName 65 | ipvsSvcAttrFlags 66 | ipvsSvcAttrTimeout 67 | ipvsSvcAttrNetmask 68 | ipvsSvcAttrStats 69 | ipvsSvcAttrPEName 70 | ) 71 | 72 | // Attributes used to describe a destination (real server). Used 73 | // inside nested attribute ipvsCmdAttrDest. 74 | const ( 75 | ipvsDestAttrUnspec int = iota 76 | ipvsDestAttrAddress 77 | ipvsDestAttrPort 78 | ipvsDestAttrForwardingMethod 79 | ipvsDestAttrWeight 80 | ipvsDestAttrUpperThreshold 81 | ipvsDestAttrLowerThreshold 82 | ipvsDestAttrActiveConnections 83 | ipvsDestAttrInactiveConnections 84 | ipvsDestAttrPersistentConnections 85 | ipvsDestAttrStats 86 | ipvsDestAttrAddressFamily 87 | ) 88 | 89 | // IPVS Statistics constants 90 | 91 | const ( 92 | ipvsStatsUnspec int = iota 93 | ipvsStatsConns 94 | ipvsStatsPktsIn 95 | ipvsStatsPktsOut 96 | ipvsStatsBytesIn 97 | ipvsStatsBytesOut 98 | ipvsStatsCPS 99 | ipvsStatsPPSIn 100 | ipvsStatsPPSOut 101 | ipvsStatsBPSIn 102 | ipvsStatsBPSOut 103 | ) 104 | 105 | // Destination forwarding methods 106 | const ( 107 | // ConnectionFlagFwdmask indicates the mask in the connection 108 | // flags which is used by forwarding method bits. 109 | ConnectionFlagFwdMask = 0x0007 110 | 111 | // ConnectionFlagMasq is used for masquerade forwarding method. 112 | ConnectionFlagMasq = 0x0000 113 | 114 | // ConnectionFlagLocalNode is used for local node forwarding 115 | // method. 116 | ConnectionFlagLocalNode = 0x0001 117 | 118 | // ConnectionFlagTunnel is used for tunnel mode forwarding 119 | // method. 120 | ConnectionFlagTunnel = 0x0002 121 | 122 | // ConnectionFlagDirectRoute is used for direct routing 123 | // forwarding method. 124 | ConnectionFlagDirectRoute = 0x0003 125 | ) 126 | 127 | const ( 128 | // RoundRobin distributes jobs equally amongst the available 129 | // real servers. 130 | RoundRobin = "rr" 131 | 132 | // LeastConnection assigns more jobs to real servers with 133 | // fewer active jobs. 134 | LeastConnection = "lc" 135 | 136 | // DestinationHashing assigns jobs to servers through looking 137 | // up a statically assigned hash table by their destination IP 138 | // addresses. 139 | DestinationHashing = "dh" 140 | 141 | // SourceHashing assigns jobs to servers through looking up 142 | // a statically assigned hash table by their source IP 143 | // addresses. 144 | SourceHashing = "sh" 145 | 146 | // WeightedRoundRobin assigns jobs to real servers proportionally 147 | // to there real servers' weight. Servers with higher weights 148 | // receive new jobs first and get more jobs than servers 149 | // with lower weights. Servers with equal weights get 150 | // an equal distribution of new jobs 151 | WeightedRoundRobin = "wrr" 152 | 153 | // WeightedLeastConnection assigns more jobs to servers 154 | // with fewer jobs and relative to the real servers' weight 155 | WeightedLeastConnection = "wlc" 156 | ) 157 | 158 | const ( 159 | // ConnFwdMask is a mask for the fwd methods 160 | ConnFwdMask = 0x0007 161 | 162 | // ConnFwdMasq denotes forwarding via masquerading/NAT 163 | ConnFwdMasq = 0x0000 164 | 165 | // ConnFwdLocalNode denotes forwarding to a local node 166 | ConnFwdLocalNode = 0x0001 167 | 168 | // ConnFwdTunnel denotes forwarding via a tunnel 169 | ConnFwdTunnel = 0x0002 170 | 171 | // ConnFwdDirectRoute denotes forwarding via direct routing 172 | ConnFwdDirectRoute = 0x0003 173 | 174 | // ConnFwdBypass denotes forwarding while bypassing the cache 175 | ConnFwdBypass = 0x0004 176 | ) 177 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | package ipvs 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/moby/ipvs 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/sirupsen/logrus v1.9.0 7 | github.com/vishvananda/netlink v1.1.0 8 | github.com/vishvananda/netns v0.0.2 9 | golang.org/x/sys v0.2.0 10 | ) 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= 7 | github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 8 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 9 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 10 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 11 | github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= 12 | github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= 13 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= 14 | github.com/vishvananda/netns v0.0.2 h1:Cn05BRLm+iRP/DZxyVSsfVyrzgjDbwHwkVt38qvXnNI= 15 | github.com/vishvananda/netns v0.0.2/go.mod h1:yitZXdAVI+yPFSb4QUe+VW3vOVl4PZPNcBgbPxAtJxw= 16 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 17 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 18 | golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= 19 | golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 20 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 21 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 22 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 23 | -------------------------------------------------------------------------------- /ipvs_linux.go: -------------------------------------------------------------------------------- 1 | package ipvs 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "time" 7 | 8 | "github.com/vishvananda/netlink/nl" 9 | "github.com/vishvananda/netns" 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | const ( 14 | netlinkRecvSocketsTimeout = 3 * time.Second 15 | netlinkSendSocketTimeout = 30 * time.Second 16 | ) 17 | 18 | // Service defines an IPVS service in its entirety. 19 | type Service struct { 20 | // Virtual service address. 21 | Address net.IP 22 | Protocol uint16 23 | Port uint16 24 | FWMark uint32 // Firewall mark of the service. 25 | 26 | // Virtual service options. 27 | SchedName string 28 | Flags uint32 29 | Timeout uint32 30 | Netmask uint32 31 | AddressFamily uint16 32 | PEName string 33 | Stats SvcStats 34 | } 35 | 36 | // SvcStats defines an IPVS service statistics 37 | type SvcStats struct { 38 | Connections uint32 39 | PacketsIn uint32 40 | PacketsOut uint32 41 | BytesIn uint64 42 | BytesOut uint64 43 | CPS uint32 44 | BPSOut uint32 45 | PPSIn uint32 46 | PPSOut uint32 47 | BPSIn uint32 48 | } 49 | 50 | // Destination defines an IPVS destination (real server) in its 51 | // entirety. 52 | type Destination struct { 53 | Address net.IP 54 | Port uint16 55 | Weight int 56 | ConnectionFlags uint32 57 | AddressFamily uint16 58 | UpperThreshold uint32 59 | LowerThreshold uint32 60 | ActiveConnections int 61 | InactiveConnections int 62 | Stats DstStats 63 | } 64 | 65 | // DstStats defines IPVS destination (real server) statistics 66 | type DstStats SvcStats 67 | 68 | // Config defines IPVS timeout configuration 69 | type Config struct { 70 | TimeoutTCP time.Duration 71 | TimeoutTCPFin time.Duration 72 | TimeoutUDP time.Duration 73 | } 74 | 75 | // Handle provides a namespace specific ipvs handle to program ipvs 76 | // rules. 77 | type Handle struct { 78 | seq uint32 79 | sock *nl.NetlinkSocket 80 | } 81 | 82 | // New provides a new ipvs handle in the namespace pointed to by the 83 | // passed path. It will return a valid handle or an error in case an 84 | // error occurred while creating the handle. 85 | func New(path string) (*Handle, error) { 86 | setup() 87 | 88 | n := netns.None() 89 | if path != "" { 90 | var err error 91 | n, err = netns.GetFromPath(path) 92 | if err != nil { 93 | return nil, err 94 | } 95 | } 96 | defer n.Close() 97 | 98 | sock, err := nl.GetNetlinkSocketAt(n, netns.None(), unix.NETLINK_GENERIC) 99 | if err != nil { 100 | return nil, err 101 | } 102 | // Add operation timeout to avoid deadlocks 103 | tv := unix.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds()) 104 | if err := sock.SetSendTimeout(&tv); err != nil { 105 | return nil, err 106 | } 107 | tv = unix.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds()) 108 | if err := sock.SetReceiveTimeout(&tv); err != nil { 109 | return nil, err 110 | } 111 | 112 | return &Handle{sock: sock}, nil 113 | } 114 | 115 | // Close closes the ipvs handle. The handle is invalid after Close 116 | // returns. 117 | func (i *Handle) Close() { 118 | if i.sock != nil { 119 | i.sock.Close() 120 | } 121 | } 122 | 123 | // NewService creates a new ipvs service in the passed handle. 124 | func (i *Handle) NewService(s *Service) error { 125 | return i.doCmd(s, nil, ipvsCmdNewService) 126 | } 127 | 128 | // IsServicePresent queries for the ipvs service in the passed handle. 129 | func (i *Handle) IsServicePresent(s *Service) bool { 130 | return nil == i.doCmd(s, nil, ipvsCmdGetService) 131 | } 132 | 133 | // UpdateService updates an already existing service in the passed 134 | // handle. 135 | func (i *Handle) UpdateService(s *Service) error { 136 | return i.doCmd(s, nil, ipvsCmdSetService) 137 | } 138 | 139 | // DelService deletes an already existing service in the passed 140 | // handle. 141 | func (i *Handle) DelService(s *Service) error { 142 | return i.doCmd(s, nil, ipvsCmdDelService) 143 | } 144 | 145 | // Flush deletes all existing services in the passed 146 | // handle. 147 | func (i *Handle) Flush() error { 148 | _, err := i.doCmdWithoutAttr(ipvsCmdFlush) 149 | return err 150 | } 151 | 152 | // NewDestination creates a new real server in the passed ipvs 153 | // service which should already be existing in the passed handle. 154 | func (i *Handle) NewDestination(s *Service, d *Destination) error { 155 | return i.doCmd(s, d, ipvsCmdNewDest) 156 | } 157 | 158 | // UpdateDestination updates an already existing real server in the 159 | // passed ipvs service in the passed handle. 160 | func (i *Handle) UpdateDestination(s *Service, d *Destination) error { 161 | return i.doCmd(s, d, ipvsCmdSetDest) 162 | } 163 | 164 | // DelDestination deletes an already existing real server in the 165 | // passed ipvs service in the passed handle. 166 | func (i *Handle) DelDestination(s *Service, d *Destination) error { 167 | return i.doCmd(s, d, ipvsCmdDelDest) 168 | } 169 | 170 | // GetServices returns an array of services configured on the Node 171 | func (i *Handle) GetServices() ([]*Service, error) { 172 | return i.doGetServicesCmd(nil) 173 | } 174 | 175 | // GetDestinations returns an array of Destinations configured for this Service 176 | func (i *Handle) GetDestinations(s *Service) ([]*Destination, error) { 177 | return i.doGetDestinationsCmd(s, nil) 178 | } 179 | 180 | // GetService gets details of a specific IPVS services, useful in updating statisics etc., 181 | func (i *Handle) GetService(s *Service) (*Service, error) { 182 | res, err := i.doGetServicesCmd(s) 183 | if err != nil { 184 | return nil, err 185 | } 186 | 187 | // We are looking for exactly one service otherwise error out 188 | if len(res) != 1 { 189 | return nil, fmt.Errorf("Expected only one service obtained=%d", len(res)) 190 | } 191 | 192 | return res[0], nil 193 | } 194 | 195 | // GetConfig returns the current timeout configuration 196 | func (i *Handle) GetConfig() (*Config, error) { 197 | return i.doGetConfigCmd() 198 | } 199 | 200 | // SetConfig set the current timeout configuration. 0: no change 201 | func (i *Handle) SetConfig(c *Config) error { 202 | return i.doSetConfigCmd(c) 203 | } 204 | -------------------------------------------------------------------------------- /ipvs_linux_test.go: -------------------------------------------------------------------------------- 1 | package ipvs 2 | 3 | import ( 4 | "net" 5 | "reflect" 6 | "runtime" 7 | "syscall" 8 | "testing" 9 | "time" 10 | 11 | "github.com/moby/ipvs/ns" 12 | "github.com/vishvananda/netlink" 13 | "github.com/vishvananda/netlink/nl" 14 | "golang.org/x/sys/unix" 15 | ) 16 | 17 | var ( 18 | schedMethods = []string{ 19 | RoundRobin, 20 | LeastConnection, 21 | DestinationHashing, 22 | SourceHashing, 23 | WeightedLeastConnection, 24 | WeightedRoundRobin, 25 | } 26 | 27 | protocols = []string{ 28 | "TCP", 29 | "UDP", 30 | "FWM", 31 | } 32 | 33 | fwdMethods = []uint32{ 34 | ConnectionFlagMasq, 35 | ConnectionFlagTunnel, 36 | ConnectionFlagDirectRoute, 37 | } 38 | 39 | fwdMethodStrings = []string{ 40 | "Masq", 41 | "Tunnel", 42 | "Route", 43 | } 44 | ) 45 | 46 | func lookupFwMethod(fwMethod uint32) string { 47 | switch fwMethod { 48 | case ConnectionFlagMasq: 49 | return fwdMethodStrings[0] 50 | case ConnectionFlagTunnel: 51 | return fwdMethodStrings[1] 52 | case ConnectionFlagDirectRoute: 53 | return fwdMethodStrings[2] 54 | } 55 | return "" 56 | } 57 | 58 | func checkDestination(t *testing.T, i *Handle, s *Service, d *Destination, checkPresent bool) { 59 | var dstFound bool 60 | 61 | dstArray, err := i.GetDestinations(s) 62 | if err != nil { 63 | t.Fatalf("Failed to get destination; %v", err) 64 | } 65 | 66 | for _, dst := range dstArray { 67 | if dst.Address.Equal(d.Address) && dst.Port == d.Port && 68 | lookupFwMethod(dst.ConnectionFlags) == lookupFwMethod(d.ConnectionFlags) && 69 | dst.AddressFamily == d.AddressFamily { 70 | dstFound = true 71 | break 72 | } 73 | } 74 | 75 | switch checkPresent { 76 | case true: // The test expects the service to be present 77 | if !dstFound { 78 | t.Fatalf("Did not find the service %s in ipvs output", d.Address.String()) 79 | } 80 | case false: // The test expects that the service should not be present 81 | if dstFound { 82 | t.Fatalf("Did not find the destination %s fwdMethod %s in ipvs output", d.Address.String(), lookupFwMethod(d.ConnectionFlags)) 83 | } 84 | } 85 | } 86 | 87 | func checkService(t *testing.T, i *Handle, s *Service, checkPresent bool) { 88 | svcArray, err := i.GetServices() 89 | if err != nil { 90 | t.Fatalf("Failed to get service; %v", err) 91 | } 92 | 93 | var svcFound bool 94 | 95 | for _, svc := range svcArray { 96 | if svc.Protocol == s.Protocol && svc.Address.String() == s.Address.String() && svc.Port == s.Port { 97 | svcFound = true 98 | break 99 | } 100 | } 101 | 102 | switch checkPresent { 103 | case true: // The test expects the service to be present 104 | if !svcFound { 105 | t.Fatalf("Did not find the service %s in ipvs output", s.Address.String()) 106 | } 107 | case false: // The test expects that the service should not be present 108 | if svcFound { 109 | t.Fatalf("Did not expect the service %s in ipvs output", s.Address.String()) 110 | } 111 | } 112 | } 113 | 114 | func TestGetFamily(t *testing.T) { 115 | id, err := getIPVSFamily() 116 | if err != nil { 117 | t.Fatal("Failed to get IPVS family:", err) 118 | } 119 | if id == 0 { 120 | t.Error("IPVS family was 0") 121 | } 122 | } 123 | 124 | func TestService(t *testing.T) { 125 | defer setupTestOSContext(t)() 126 | 127 | i, err := New("") 128 | if err != nil { 129 | t.Fatal("Failed to create IPVS handle:", err) 130 | } 131 | 132 | for _, protocol := range protocols { 133 | for _, schedMethod := range schedMethods { 134 | testDatas := []struct { 135 | AddressFamily uint16 136 | IP string 137 | Netmask uint32 138 | }{ 139 | { 140 | AddressFamily: nl.FAMILY_V4, 141 | IP: "1.2.3.4", 142 | Netmask: 0xFFFFFFFF, 143 | }, { 144 | AddressFamily: nl.FAMILY_V6, 145 | IP: "2001:db8:3c4d:15::1a00", 146 | Netmask: 128, 147 | }, 148 | } 149 | for _, td := range testDatas { 150 | s := Service{ 151 | AddressFamily: td.AddressFamily, 152 | SchedName: schedMethod, 153 | } 154 | 155 | switch protocol { 156 | case "FWM": 157 | s.FWMark = 1234 158 | s.Netmask = td.Netmask 159 | case "TCP": 160 | s.Protocol = unix.IPPROTO_TCP 161 | s.Port = 80 162 | s.Address = net.ParseIP(td.IP) 163 | s.Netmask = td.Netmask 164 | case "UDP": 165 | s.Protocol = unix.IPPROTO_UDP 166 | s.Port = 53 167 | s.Address = net.ParseIP(td.IP) 168 | s.Netmask = td.Netmask 169 | } 170 | 171 | err := i.NewService(&s) 172 | if err != nil { 173 | t.Fatal("Failed to create service:", err) 174 | } 175 | checkService(t, i, &s, true) 176 | for _, updateSchedMethod := range schedMethods { 177 | if updateSchedMethod == schedMethod { 178 | continue 179 | } 180 | 181 | s.SchedName = updateSchedMethod 182 | err = i.UpdateService(&s) 183 | if err != nil { 184 | t.Fatal("Failed to update service:", err) 185 | } 186 | checkService(t, i, &s, true) 187 | 188 | scopy, err := i.GetService(&s) 189 | if err != nil { 190 | t.Fatal("Failed to get service:", err) 191 | } 192 | if expected := (*scopy).Address.String(); expected != s.Address.String() { 193 | t.Errorf("expected: %v, got: %v", expected, s.Address.String()) 194 | } 195 | if expected := (*scopy).Port; expected != s.Port { 196 | t.Errorf("expected: %v, got: %v", expected, s.Port) 197 | } 198 | if expected := (*scopy).Protocol; expected != s.Protocol { 199 | t.Errorf("expected: %v, got: %v", expected, s.Protocol) 200 | } 201 | } 202 | 203 | err = i.DelService(&s) 204 | if err != nil { 205 | t.Fatal("Failed to delete service:", err) 206 | } 207 | checkService(t, i, &s, false) 208 | } 209 | } 210 | } 211 | 212 | svcs := []Service{ 213 | { 214 | AddressFamily: nl.FAMILY_V4, 215 | SchedName: RoundRobin, 216 | Protocol: unix.IPPROTO_TCP, 217 | Port: 80, 218 | Address: net.ParseIP("10.20.30.40"), 219 | Netmask: 0xFFFFFFFF, 220 | }, 221 | { 222 | AddressFamily: nl.FAMILY_V4, 223 | SchedName: LeastConnection, 224 | Protocol: unix.IPPROTO_UDP, 225 | Port: 8080, 226 | Address: net.ParseIP("10.20.30.41"), 227 | Netmask: 0xFFFFFFFF, 228 | }, 229 | } 230 | // Create services for testing flush 231 | for _, svc := range svcs { 232 | if !i.IsServicePresent(&svc) { 233 | err = i.NewService(&svc) 234 | if err != nil { 235 | t.Fatal("Failed to create service:", err) 236 | } 237 | checkService(t, i, &svc, true) 238 | } else { 239 | t.Errorf("svc: %v exists", svc) 240 | } 241 | } 242 | err = i.Flush() 243 | if err != nil { 244 | t.Fatal("Failed to flush:", err) 245 | } 246 | got, err := i.GetServices() 247 | if err != nil { 248 | t.Fatal("Failed to get service:", err) 249 | } 250 | if len(got) != 0 { 251 | t.Errorf("Unexpected services after flush") 252 | } 253 | } 254 | 255 | func createDummyInterface(t *testing.T) { 256 | dummy := &netlink.Dummy{ 257 | LinkAttrs: netlink.LinkAttrs{ 258 | Name: "dummy", 259 | }, 260 | } 261 | 262 | err := netlink.LinkAdd(dummy) 263 | if err != nil { 264 | t.Fatal("Failed to add link:", err) 265 | } 266 | 267 | dummyLink, err := netlink.LinkByName("dummy") 268 | if err != nil { 269 | t.Fatal("Failed to get dummy link:", err) 270 | } 271 | 272 | ip, ipNet, err := net.ParseCIDR("10.1.1.1/24") 273 | if err != nil { 274 | t.Fatal("Failed to parse CIDR:", err) 275 | } 276 | 277 | ipNet.IP = ip 278 | 279 | ipAddr := &netlink.Addr{IPNet: ipNet, Label: ""} 280 | err = netlink.AddrAdd(dummyLink, ipAddr) 281 | if err != nil { 282 | t.Fatal("Failed to add IP address:", err) 283 | } 284 | } 285 | 286 | func TestDestination(t *testing.T) { 287 | defer setupTestOSContext(t)() 288 | 289 | createDummyInterface(t) 290 | i, err := New("") 291 | if err != nil { 292 | t.Fatal("Failed to create IPVS handle:", err) 293 | } 294 | 295 | for _, protocol := range protocols { 296 | testDatas := []struct { 297 | AddressFamily uint16 298 | IP string 299 | Netmask uint32 300 | Destinations []string 301 | }{ 302 | { 303 | AddressFamily: nl.FAMILY_V4, 304 | IP: "1.2.3.4", 305 | Netmask: 0xFFFFFFFF, 306 | Destinations: []string{"10.1.1.2", "10.1.1.3", "10.1.1.4"}, 307 | }, { 308 | AddressFamily: nl.FAMILY_V6, 309 | IP: "2001:db8:3c4d:15::1a00", 310 | Netmask: 128, 311 | Destinations: []string{"2001:db8:3c4d:15::1a2b", "2001:db8:3c4d:15::1a2c", "2001:db8:3c4d:15::1a2d"}, 312 | }, 313 | } 314 | for _, td := range testDatas { 315 | s := Service{ 316 | AddressFamily: td.AddressFamily, 317 | SchedName: RoundRobin, 318 | } 319 | 320 | switch protocol { 321 | case "FWM": 322 | s.FWMark = 1234 323 | s.Netmask = td.Netmask 324 | case "TCP": 325 | s.Protocol = unix.IPPROTO_TCP 326 | s.Port = 80 327 | s.Address = net.ParseIP(td.IP) 328 | s.Netmask = td.Netmask 329 | case "UDP": 330 | s.Protocol = unix.IPPROTO_UDP 331 | s.Port = 53 332 | s.Address = net.ParseIP(td.IP) 333 | s.Netmask = td.Netmask 334 | } 335 | 336 | err := i.NewService(&s) 337 | if err != nil { 338 | t.Fatal("Failed to create service:", err) 339 | } 340 | checkService(t, i, &s, true) 341 | 342 | s.SchedName = "" 343 | for _, fwdMethod := range fwdMethods { 344 | destinations := make([]Destination, 0) 345 | for _, ip := range td.Destinations { 346 | d := Destination{ 347 | AddressFamily: td.AddressFamily, 348 | Address: net.ParseIP(ip), 349 | Port: 5000, 350 | Weight: 1, 351 | ConnectionFlags: fwdMethod, 352 | } 353 | destinations = append(destinations, d) 354 | err := i.NewDestination(&s, &d) 355 | if err != nil { 356 | t.Fatal("Failed to create destination:", err) 357 | } 358 | checkDestination(t, i, &s, &d, true) 359 | } 360 | 361 | for _, updateFwdMethod := range fwdMethods { 362 | if updateFwdMethod == fwdMethod { 363 | continue 364 | } 365 | for _, d := range destinations { 366 | d.ConnectionFlags = updateFwdMethod 367 | err = i.UpdateDestination(&s, &d) 368 | if err != nil { 369 | t.Fatal("Failed to update destination:", err) 370 | } 371 | checkDestination(t, i, &s, &d, true) 372 | } 373 | } 374 | for _, d := range destinations { 375 | err = i.DelDestination(&s, &d) 376 | if err != nil { 377 | t.Fatal("Failed to delete destination:", err) 378 | } 379 | checkDestination(t, i, &s, &d, false) 380 | } 381 | } 382 | 383 | } 384 | } 385 | } 386 | 387 | func TestTimeouts(t *testing.T) { 388 | defer setupTestOSContext(t)() 389 | 390 | i, err := New("") 391 | if err != nil { 392 | t.Fatal("Failed to create IPVS handle:", err) 393 | } 394 | 395 | _, err = i.GetConfig() 396 | if err != nil { 397 | t.Fatal("Failed to get config:", err) 398 | } 399 | 400 | cfg := Config{66 * time.Second, 66 * time.Second, 66 * time.Second} 401 | err = i.SetConfig(&cfg) 402 | if err != nil { 403 | t.Fatal("Failed to set config:", err) 404 | } 405 | 406 | c2, err := i.GetConfig() 407 | if err != nil { 408 | t.Fatal("Failed to get config:", err) 409 | } 410 | if !reflect.DeepEqual(*c2, cfg) { 411 | t.Fatalf("expected: %+v, got: %+v", cfg, *c2) 412 | } 413 | 414 | // A timeout value 0 means that the current timeout value of the corresponding entry is preserved 415 | cfg = Config{77 * time.Second, 0 * time.Second, 77 * time.Second} 416 | err = i.SetConfig(&cfg) 417 | if err != nil { 418 | t.Fatal("Failed to set config:", err) 419 | } 420 | 421 | c3, err := i.GetConfig() 422 | if err != nil { 423 | t.Fatal("Failed to get config:", err) 424 | } 425 | expected := Config{77 * time.Second, 66 * time.Second, 77 * time.Second} 426 | if !reflect.DeepEqual(*c3, expected) { 427 | t.Fatalf("expected: %+v, got: %+v", expected, *c3) 428 | } 429 | } 430 | 431 | // setupTestOSContext joins a new network namespace, and returns its associated 432 | // teardown function. 433 | // 434 | // Example usage: 435 | // 436 | // defer setupTestOSContext(t)() 437 | func setupTestOSContext(t *testing.T) func() { 438 | t.Helper() 439 | runtime.LockOSThread() 440 | if err := syscall.Unshare(syscall.CLONE_NEWNET); err != nil { 441 | t.Fatalf("Failed to enter netns: %v", err) 442 | } 443 | 444 | fd, err := syscall.Open("/proc/self/ns/net", syscall.O_RDONLY, 0) 445 | if err != nil { 446 | t.Fatal("Failed to open netns file:", err) 447 | } 448 | 449 | // Since we are switching to a new test namespace make 450 | // sure to re-initialize initNs context 451 | ns.Init() 452 | 453 | runtime.LockOSThread() 454 | 455 | return func() { 456 | if err := syscall.Close(fd); err != nil { 457 | t.Logf("Warning: netns closing failed (%v)", err) 458 | } 459 | runtime.UnlockOSThread() 460 | } 461 | } 462 | -------------------------------------------------------------------------------- /netlink_linux.go: -------------------------------------------------------------------------------- 1 | package ipvs 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "errors" 7 | "fmt" 8 | "net" 9 | "os/exec" 10 | "strings" 11 | "sync" 12 | "sync/atomic" 13 | "syscall" 14 | "time" 15 | "unsafe" 16 | 17 | "github.com/sirupsen/logrus" 18 | "github.com/vishvananda/netlink/nl" 19 | "github.com/vishvananda/netns" 20 | ) 21 | 22 | // For Quick Reference IPVS related netlink message is described at the end of this file. 23 | var ( 24 | native = nl.NativeEndian() 25 | ipvsFamily int 26 | ipvsOnce sync.Once 27 | ) 28 | 29 | type genlMsgHdr struct { 30 | cmd uint8 31 | version uint8 32 | reserved uint16 33 | } 34 | 35 | type ipvsFlags struct { 36 | flags uint32 37 | mask uint32 38 | } 39 | 40 | func deserializeGenlMsg(b []byte) (hdr *genlMsgHdr) { 41 | return (*genlMsgHdr)(unsafe.Pointer(&b[0:unsafe.Sizeof(*hdr)][0])) 42 | } 43 | 44 | func (hdr *genlMsgHdr) Serialize() []byte { 45 | return (*(*[unsafe.Sizeof(*hdr)]byte)(unsafe.Pointer(hdr)))[:] 46 | } 47 | 48 | func (hdr *genlMsgHdr) Len() int { 49 | return int(unsafe.Sizeof(*hdr)) 50 | } 51 | 52 | func (f *ipvsFlags) Serialize() []byte { 53 | return (*(*[unsafe.Sizeof(*f)]byte)(unsafe.Pointer(f)))[:] 54 | } 55 | 56 | func (f *ipvsFlags) Len() int { 57 | return int(unsafe.Sizeof(*f)) 58 | } 59 | 60 | func setup() { 61 | ipvsOnce.Do(func() { 62 | var err error 63 | if out, err := exec.Command("modprobe", "-va", "ip_vs").CombinedOutput(); err != nil { 64 | logrus.Warnf("Running modprobe ip_vs failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err) 65 | } 66 | 67 | ipvsFamily, err = getIPVSFamily() 68 | if err != nil { 69 | logrus.Error("Could not get ipvs family information from the kernel. It is possible that ipvs is not enabled in your kernel. Native loadbalancing will not work until this is fixed.") 70 | } 71 | }) 72 | } 73 | 74 | func fillService(s *Service) nl.NetlinkRequestData { 75 | cmdAttr := nl.NewRtAttr(ipvsCmdAttrService, nil) 76 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddressFamily, nl.Uint16Attr(s.AddressFamily)) 77 | if s.FWMark != 0 { 78 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFWMark, nl.Uint32Attr(s.FWMark)) 79 | } else { 80 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrProtocol, nl.Uint16Attr(s.Protocol)) 81 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrAddress, rawIPData(s.Address)) 82 | 83 | // Port needs to be in network byte order. 84 | portBuf := new(bytes.Buffer) 85 | binary.Write(portBuf, binary.BigEndian, s.Port) 86 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPort, portBuf.Bytes()) 87 | } 88 | 89 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrSchedName, nl.ZeroTerminated(s.SchedName)) 90 | if s.PEName != "" { 91 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrPEName, nl.ZeroTerminated(s.PEName)) 92 | } 93 | f := &ipvsFlags{ 94 | flags: s.Flags, 95 | mask: 0xFFFFFFFF, 96 | } 97 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrFlags, f.Serialize()) 98 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrTimeout, nl.Uint32Attr(s.Timeout)) 99 | nl.NewRtAttrChild(cmdAttr, ipvsSvcAttrNetmask, nl.Uint32Attr(s.Netmask)) 100 | return cmdAttr 101 | } 102 | 103 | func fillDestination(d *Destination) nl.NetlinkRequestData { 104 | cmdAttr := nl.NewRtAttr(ipvsCmdAttrDest, nil) 105 | 106 | nl.NewRtAttrChild(cmdAttr, ipvsDestAttrAddress, rawIPData(d.Address)) 107 | // Port needs to be in network byte order. 108 | portBuf := new(bytes.Buffer) 109 | binary.Write(portBuf, binary.BigEndian, d.Port) 110 | nl.NewRtAttrChild(cmdAttr, ipvsDestAttrPort, portBuf.Bytes()) 111 | 112 | nl.NewRtAttrChild(cmdAttr, ipvsDestAttrForwardingMethod, nl.Uint32Attr(d.ConnectionFlags&ConnectionFlagFwdMask)) 113 | nl.NewRtAttrChild(cmdAttr, ipvsDestAttrWeight, nl.Uint32Attr(uint32(d.Weight))) 114 | nl.NewRtAttrChild(cmdAttr, ipvsDestAttrUpperThreshold, nl.Uint32Attr(d.UpperThreshold)) 115 | nl.NewRtAttrChild(cmdAttr, ipvsDestAttrLowerThreshold, nl.Uint32Attr(d.LowerThreshold)) 116 | 117 | return cmdAttr 118 | } 119 | 120 | func (i *Handle) doCmdwithResponse(s *Service, d *Destination, cmd uint8) ([][]byte, error) { 121 | req := newIPVSRequest(cmd) 122 | req.Seq = atomic.AddUint32(&i.seq, 1) 123 | 124 | if s == nil { 125 | req.Flags |= syscall.NLM_F_DUMP // Flag to dump all messages 126 | req.AddData(nl.NewRtAttr(ipvsCmdAttrService, nil)) // Add a dummy attribute 127 | } else { 128 | req.AddData(fillService(s)) 129 | } 130 | 131 | if d == nil { 132 | if cmd == ipvsCmdGetDest { 133 | req.Flags |= syscall.NLM_F_DUMP 134 | } 135 | } else { 136 | req.AddData(fillDestination(d)) 137 | } 138 | 139 | res, err := execute(i.sock, req, 0) 140 | if err != nil { 141 | return [][]byte{}, err 142 | } 143 | 144 | return res, nil 145 | } 146 | 147 | func (i *Handle) doCmd(s *Service, d *Destination, cmd uint8) error { 148 | _, err := i.doCmdwithResponse(s, d, cmd) 149 | 150 | return err 151 | } 152 | 153 | func getIPVSFamily() (int, error) { 154 | sock, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), syscall.NETLINK_GENERIC) 155 | if err != nil { 156 | return 0, err 157 | } 158 | defer sock.Close() 159 | 160 | req := newGenlRequest(genlCtrlID, genlCtrlCmdGetFamily) 161 | req.AddData(nl.NewRtAttr(genlCtrlAttrFamilyName, nl.ZeroTerminated("IPVS"))) 162 | 163 | msgs, err := execute(sock, req, 0) 164 | if err != nil { 165 | return 0, err 166 | } 167 | 168 | for _, m := range msgs { 169 | hdr := deserializeGenlMsg(m) 170 | attrs, err := nl.ParseRouteAttr(m[hdr.Len():]) 171 | if err != nil { 172 | return 0, err 173 | } 174 | 175 | for _, attr := range attrs { 176 | switch int(attr.Attr.Type) { 177 | case genlCtrlAttrFamilyID: 178 | return int(native.Uint16(attr.Value[0:2])), nil 179 | } 180 | } 181 | } 182 | 183 | return 0, fmt.Errorf("no family id in the netlink response") 184 | } 185 | 186 | func rawIPData(ip net.IP) []byte { 187 | family := nl.GetIPFamily(ip) 188 | if family == nl.FAMILY_V4 { 189 | return ip.To4() 190 | } 191 | return ip 192 | } 193 | 194 | func newIPVSRequest(cmd uint8) *nl.NetlinkRequest { 195 | return newGenlRequest(ipvsFamily, cmd) 196 | } 197 | 198 | func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest { 199 | req := nl.NewNetlinkRequest(familyID, syscall.NLM_F_ACK) 200 | req.AddData(&genlMsgHdr{cmd: cmd, version: 1}) 201 | return req 202 | } 203 | 204 | func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) { 205 | if err := s.Send(req); err != nil { 206 | return nil, err 207 | } 208 | 209 | pid, err := s.GetPid() 210 | if err != nil { 211 | return nil, err 212 | } 213 | 214 | var res [][]byte 215 | 216 | done: 217 | for { 218 | msgs, _, err := s.Receive() 219 | if err != nil { 220 | if s.GetFd() == -1 { 221 | return nil, fmt.Errorf("Socket got closed on receive") 222 | } 223 | if err == syscall.EAGAIN { 224 | // timeout fired 225 | continue 226 | } 227 | return nil, err 228 | } 229 | for _, m := range msgs { 230 | if m.Header.Seq != req.Seq { 231 | continue 232 | } 233 | if m.Header.Pid != pid { 234 | return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid) 235 | } 236 | if m.Header.Type == syscall.NLMSG_DONE { 237 | break done 238 | } 239 | if m.Header.Type == syscall.NLMSG_ERROR { 240 | error := int32(native.Uint32(m.Data[0:4])) 241 | if error == 0 { 242 | break done 243 | } 244 | return nil, syscall.Errno(-error) 245 | } 246 | if resType != 0 && m.Header.Type != resType { 247 | continue 248 | } 249 | res = append(res, m.Data) 250 | if m.Header.Flags&syscall.NLM_F_MULTI == 0 { 251 | break done 252 | } 253 | } 254 | } 255 | return res, nil 256 | } 257 | 258 | func parseIP(ip []byte, family uint16) (net.IP, error) { 259 | var resIP net.IP 260 | 261 | switch family { 262 | case syscall.AF_INET: 263 | resIP = (net.IP)(ip[:4]) 264 | case syscall.AF_INET6: 265 | resIP = (net.IP)(ip[:16]) 266 | default: 267 | return nil, fmt.Errorf("parseIP Error ip=%v", ip) 268 | 269 | } 270 | return resIP, nil 271 | } 272 | 273 | // parseStats 274 | func assembleStats(msg []byte) (SvcStats, error) { 275 | var s SvcStats 276 | 277 | attrs, err := nl.ParseRouteAttr(msg) 278 | if err != nil { 279 | return s, err 280 | } 281 | 282 | for _, attr := range attrs { 283 | attrType := int(attr.Attr.Type) 284 | switch attrType { 285 | case ipvsStatsConns: 286 | s.Connections = native.Uint32(attr.Value) 287 | case ipvsStatsPktsIn: 288 | s.PacketsIn = native.Uint32(attr.Value) 289 | case ipvsStatsPktsOut: 290 | s.PacketsOut = native.Uint32(attr.Value) 291 | case ipvsStatsBytesIn: 292 | s.BytesIn = native.Uint64(attr.Value) 293 | case ipvsStatsBytesOut: 294 | s.BytesOut = native.Uint64(attr.Value) 295 | case ipvsStatsCPS: 296 | s.CPS = native.Uint32(attr.Value) 297 | case ipvsStatsPPSIn: 298 | s.PPSIn = native.Uint32(attr.Value) 299 | case ipvsStatsPPSOut: 300 | s.PPSOut = native.Uint32(attr.Value) 301 | case ipvsStatsBPSIn: 302 | s.BPSIn = native.Uint32(attr.Value) 303 | case ipvsStatsBPSOut: 304 | s.BPSOut = native.Uint32(attr.Value) 305 | } 306 | } 307 | return s, nil 308 | } 309 | 310 | // assembleService assembles a services back from a hain of netlink attributes 311 | func assembleService(attrs []syscall.NetlinkRouteAttr) (*Service, error) { 312 | var s Service 313 | var addressBytes []byte 314 | 315 | for _, attr := range attrs { 316 | 317 | attrType := int(attr.Attr.Type) 318 | 319 | switch attrType { 320 | 321 | case ipvsSvcAttrAddressFamily: 322 | s.AddressFamily = native.Uint16(attr.Value) 323 | case ipvsSvcAttrProtocol: 324 | s.Protocol = native.Uint16(attr.Value) 325 | case ipvsSvcAttrAddress: 326 | addressBytes = attr.Value 327 | case ipvsSvcAttrPort: 328 | s.Port = binary.BigEndian.Uint16(attr.Value) 329 | case ipvsSvcAttrFWMark: 330 | s.FWMark = native.Uint32(attr.Value) 331 | case ipvsSvcAttrSchedName: 332 | s.SchedName = nl.BytesToString(attr.Value) 333 | case ipvsSvcAttrFlags: 334 | s.Flags = native.Uint32(attr.Value) 335 | case ipvsSvcAttrTimeout: 336 | s.Timeout = native.Uint32(attr.Value) 337 | case ipvsSvcAttrNetmask: 338 | s.Netmask = native.Uint32(attr.Value) 339 | case ipvsSvcAttrStats: 340 | stats, err := assembleStats(attr.Value) 341 | if err != nil { 342 | return nil, err 343 | } 344 | s.Stats = stats 345 | } 346 | 347 | } 348 | 349 | // parse Address after parse AddressFamily incase of parseIP error 350 | if addressBytes != nil { 351 | ip, err := parseIP(addressBytes, s.AddressFamily) 352 | if err != nil { 353 | return nil, err 354 | } 355 | s.Address = ip 356 | } 357 | 358 | return &s, nil 359 | } 360 | 361 | // parseService given a ipvs netlink response this function will respond with a valid service entry, an error otherwise 362 | func (i *Handle) parseService(msg []byte) (*Service, error) { 363 | var s *Service 364 | 365 | // Remove General header for this message and parse the NetLink message 366 | hdr := deserializeGenlMsg(msg) 367 | NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():]) 368 | if err != nil { 369 | return nil, err 370 | } 371 | if len(NetLinkAttrs) == 0 { 372 | return nil, fmt.Errorf("error no valid netlink message found while parsing service record") 373 | } 374 | 375 | // Now Parse and get IPVS related attributes messages packed in this message. 376 | ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value) 377 | if err != nil { 378 | return nil, err 379 | } 380 | 381 | // Assemble all the IPVS related attribute messages and create a service record 382 | s, err = assembleService(ipvsAttrs) 383 | if err != nil { 384 | return nil, err 385 | } 386 | 387 | return s, nil 388 | } 389 | 390 | // doGetServicesCmd a wrapper which could be used commonly for both GetServices() and GetService(*Service) 391 | func (i *Handle) doGetServicesCmd(svc *Service) ([]*Service, error) { 392 | var res []*Service 393 | 394 | msgs, err := i.doCmdwithResponse(svc, nil, ipvsCmdGetService) 395 | if err != nil { 396 | return nil, err 397 | } 398 | 399 | for _, msg := range msgs { 400 | srv, err := i.parseService(msg) 401 | if err != nil { 402 | return nil, err 403 | } 404 | res = append(res, srv) 405 | } 406 | 407 | return res, nil 408 | } 409 | 410 | // doCmdWithoutAttr a simple wrapper of netlink socket execute command 411 | func (i *Handle) doCmdWithoutAttr(cmd uint8) ([][]byte, error) { 412 | req := newIPVSRequest(cmd) 413 | req.Seq = atomic.AddUint32(&i.seq, 1) 414 | return execute(i.sock, req, 0) 415 | } 416 | 417 | func assembleDestination(attrs []syscall.NetlinkRouteAttr) (*Destination, error) { 418 | var d Destination 419 | var addressBytes []byte 420 | 421 | for _, attr := range attrs { 422 | 423 | attrType := int(attr.Attr.Type) 424 | 425 | switch attrType { 426 | 427 | case ipvsDestAttrAddressFamily: 428 | d.AddressFamily = native.Uint16(attr.Value) 429 | case ipvsDestAttrAddress: 430 | addressBytes = attr.Value 431 | case ipvsDestAttrPort: 432 | d.Port = binary.BigEndian.Uint16(attr.Value) 433 | case ipvsDestAttrForwardingMethod: 434 | d.ConnectionFlags = native.Uint32(attr.Value) 435 | case ipvsDestAttrWeight: 436 | d.Weight = int(native.Uint16(attr.Value)) 437 | case ipvsDestAttrUpperThreshold: 438 | d.UpperThreshold = native.Uint32(attr.Value) 439 | case ipvsDestAttrLowerThreshold: 440 | d.LowerThreshold = native.Uint32(attr.Value) 441 | case ipvsDestAttrActiveConnections: 442 | d.ActiveConnections = int(native.Uint32(attr.Value)) 443 | case ipvsDestAttrInactiveConnections: 444 | d.InactiveConnections = int(native.Uint32(attr.Value)) 445 | case ipvsDestAttrStats: 446 | stats, err := assembleStats(attr.Value) 447 | if err != nil { 448 | return nil, err 449 | } 450 | d.Stats = DstStats(stats) 451 | } 452 | } 453 | 454 | // in older kernels (< 3.18), the destination address family attribute doesn't exist so we must 455 | // assume it based on the destination address provided. 456 | if d.AddressFamily == 0 { 457 | // we can't check the address family using net stdlib because netlink returns 458 | // IPv4 addresses as the first 4 bytes in a []byte of length 16 where as 459 | // stdlib expects it as the last 4 bytes. 460 | addressFamily, err := getIPFamily(addressBytes) 461 | if err != nil { 462 | return nil, err 463 | } 464 | d.AddressFamily = addressFamily 465 | } 466 | 467 | // parse Address after parse AddressFamily incase of parseIP error 468 | if addressBytes != nil { 469 | ip, err := parseIP(addressBytes, d.AddressFamily) 470 | if err != nil { 471 | return nil, err 472 | } 473 | d.Address = ip 474 | } 475 | 476 | return &d, nil 477 | } 478 | 479 | // getIPFamily parses the IP family based on raw data from netlink. 480 | // For AF_INET, netlink will set the first 4 bytes with trailing zeros 481 | // 482 | // 10.0.0.1 -> [10 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0] 483 | // 484 | // For AF_INET6, the full 16 byte array is used: 485 | // 486 | // 2001:db8:3c4d:15::1a00 -> [32 1 13 184 60 77 0 21 0 0 0 0 0 0 26 0] 487 | func getIPFamily(address []byte) (uint16, error) { 488 | if len(address) == 4 { 489 | return syscall.AF_INET, nil 490 | } 491 | 492 | if isZeros(address) { 493 | return 0, errors.New("could not parse IP family from address data") 494 | } 495 | 496 | // assume IPv4 if first 4 bytes are non-zero but rest of the data is trailing zeros 497 | if !isZeros(address[:4]) && isZeros(address[4:]) { 498 | return syscall.AF_INET, nil 499 | } 500 | 501 | return syscall.AF_INET6, nil 502 | } 503 | 504 | func isZeros(b []byte) bool { 505 | for i := 0; i < len(b); i++ { 506 | if b[i] != 0 { 507 | return false 508 | } 509 | } 510 | return true 511 | } 512 | 513 | // parseDestination given a ipvs netlink response this function will respond with a valid destination entry, an error otherwise 514 | func (i *Handle) parseDestination(msg []byte) (*Destination, error) { 515 | var dst *Destination 516 | 517 | // Remove General header for this message 518 | hdr := deserializeGenlMsg(msg) 519 | NetLinkAttrs, err := nl.ParseRouteAttr(msg[hdr.Len():]) 520 | if err != nil { 521 | return nil, err 522 | } 523 | if len(NetLinkAttrs) == 0 { 524 | return nil, fmt.Errorf("error no valid netlink message found while parsing destination record") 525 | } 526 | 527 | // Now Parse and get IPVS related attributes messages packed in this message. 528 | ipvsAttrs, err := nl.ParseRouteAttr(NetLinkAttrs[0].Value) 529 | if err != nil { 530 | return nil, err 531 | } 532 | 533 | // Assemble netlink attributes and create a Destination record 534 | dst, err = assembleDestination(ipvsAttrs) 535 | if err != nil { 536 | return nil, err 537 | } 538 | 539 | return dst, nil 540 | } 541 | 542 | // doGetDestinationsCmd a wrapper function to be used by GetDestinations and GetDestination(d) apis 543 | func (i *Handle) doGetDestinationsCmd(s *Service, d *Destination) ([]*Destination, error) { 544 | var res []*Destination 545 | 546 | msgs, err := i.doCmdwithResponse(s, d, ipvsCmdGetDest) 547 | if err != nil { 548 | return nil, err 549 | } 550 | 551 | for _, msg := range msgs { 552 | dest, err := i.parseDestination(msg) 553 | if err != nil { 554 | return res, err 555 | } 556 | res = append(res, dest) 557 | } 558 | return res, nil 559 | } 560 | 561 | // parseConfig given a ipvs netlink response this function will respond with a valid config entry, an error otherwise 562 | func (i *Handle) parseConfig(msg []byte) (*Config, error) { 563 | var c Config 564 | 565 | // Remove General header for this message 566 | hdr := deserializeGenlMsg(msg) 567 | attrs, err := nl.ParseRouteAttr(msg[hdr.Len():]) 568 | if err != nil { 569 | return nil, err 570 | } 571 | 572 | for _, attr := range attrs { 573 | attrType := int(attr.Attr.Type) 574 | switch attrType { 575 | case ipvsCmdAttrTimeoutTCP: 576 | c.TimeoutTCP = time.Duration(native.Uint32(attr.Value)) * time.Second 577 | case ipvsCmdAttrTimeoutTCPFin: 578 | c.TimeoutTCPFin = time.Duration(native.Uint32(attr.Value)) * time.Second 579 | case ipvsCmdAttrTimeoutUDP: 580 | c.TimeoutUDP = time.Duration(native.Uint32(attr.Value)) * time.Second 581 | } 582 | } 583 | 584 | return &c, nil 585 | } 586 | 587 | // doGetConfigCmd a wrapper function to be used by GetConfig 588 | func (i *Handle) doGetConfigCmd() (*Config, error) { 589 | msg, err := i.doCmdWithoutAttr(ipvsCmdGetConfig) 590 | if err != nil { 591 | return nil, err 592 | } 593 | 594 | res, err := i.parseConfig(msg[0]) 595 | if err != nil { 596 | return res, err 597 | } 598 | return res, nil 599 | } 600 | 601 | // doSetConfigCmd a wrapper function to be used by SetConfig 602 | func (i *Handle) doSetConfigCmd(c *Config) error { 603 | req := newIPVSRequest(ipvsCmdSetConfig) 604 | req.Seq = atomic.AddUint32(&i.seq, 1) 605 | 606 | req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCP, nl.Uint32Attr(uint32(c.TimeoutTCP.Seconds())))) 607 | req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutTCPFin, nl.Uint32Attr(uint32(c.TimeoutTCPFin.Seconds())))) 608 | req.AddData(nl.NewRtAttr(ipvsCmdAttrTimeoutUDP, nl.Uint32Attr(uint32(c.TimeoutUDP.Seconds())))) 609 | 610 | _, err := execute(i.sock, req, 0) 611 | 612 | return err 613 | } 614 | 615 | // IPVS related netlink message format explained 616 | 617 | /* EACH NETLINK MSG is of the below format, this is what we will receive from execute() api. 618 | If we have multiple netlink objects to process like GetServices() etc., execute() will 619 | supply an array of this below object 620 | 621 | NETLINK MSG 622 | |-----------------------------------| 623 | 0 1 2 3 624 | |--------|--------|--------|--------| - 625 | | CMD ID | VER | RESERVED | |==> General Message Header represented by genlMsgHdr 626 | |-----------------------------------| - 627 | | ATTR LEN | ATTR TYPE | | 628 | |-----------------------------------| | 629 | | | | 630 | | VALUE | | 631 | | []byte Array of IPVS MSG | |==> Attribute Message represented by syscall.NetlinkRouteAttr 632 | | PADDED BY 4 BYTES | | 633 | | | | 634 | |-----------------------------------| - 635 | 636 | 637 | Once We strip genlMsgHdr from above NETLINK MSG, we should parse the VALUE. 638 | VALUE will have an array of netlink attributes (syscall.NetlinkRouteAttr) such that each attribute will 639 | represent a "Service" or "Destination" object's field. If we assemble these attributes we can construct 640 | Service or Destination. 641 | 642 | IPVS MSG 643 | |-----------------------------------| 644 | 0 1 2 3 645 | |--------|--------|--------|--------| 646 | | ATTR LEN | ATTR TYPE | 647 | |-----------------------------------| 648 | | | 649 | | | 650 | | []byte IPVS ATTRIBUTE BY 4 BYTES | 651 | | | 652 | | | 653 | |-----------------------------------| 654 | NEXT ATTRIBUTE 655 | |-----------------------------------| 656 | | ATTR LEN | ATTR TYPE | 657 | |-----------------------------------| 658 | | | 659 | | | 660 | | []byte IPVS ATTRIBUTE BY 4 BYTES | 661 | | | 662 | | | 663 | |-----------------------------------| 664 | NEXT ATTRIBUTE 665 | |-----------------------------------| 666 | | ATTR LEN | ATTR TYPE | 667 | |-----------------------------------| 668 | | | 669 | | | 670 | | []byte IPVS ATTRIBUTE BY 4 BYTES | 671 | | | 672 | | | 673 | |-----------------------------------| 674 | 675 | */ 676 | -------------------------------------------------------------------------------- /netlink_linux_test.go: -------------------------------------------------------------------------------- 1 | package ipvs 2 | 3 | import ( 4 | "errors" 5 | "reflect" 6 | "syscall" 7 | "testing" 8 | ) 9 | 10 | func Test_getIPFamily(t *testing.T) { 11 | testcases := []struct { 12 | name string 13 | address []byte 14 | expectedFamily uint16 15 | expectedErr error 16 | }{ 17 | { 18 | name: "16 byte IPv4 10.0.0.1", 19 | address: []byte{10, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 20 | expectedFamily: syscall.AF_INET, 21 | expectedErr: nil, 22 | }, 23 | { 24 | name: "16 byte IPv6 2001:db8:3c4d:15::1a00", 25 | address: []byte{32, 1, 13, 184, 60, 77, 0, 21, 0, 0, 0, 0, 0, 0, 26, 0}, 26 | expectedFamily: syscall.AF_INET6, 27 | expectedErr: nil, 28 | }, 29 | { 30 | name: "zero address", 31 | address: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 32 | expectedFamily: 0, 33 | expectedErr: errors.New("could not parse IP family from address data"), 34 | }, 35 | } 36 | 37 | for _, testcase := range testcases { 38 | testcase := testcase 39 | t.Run(testcase.name, func(t *testing.T) { 40 | family, err := getIPFamily(testcase.address) 41 | if !reflect.DeepEqual(err, testcase.expectedErr) { 42 | t.Logf("got err: %v", err) 43 | t.Logf("expected err: %v", testcase.expectedErr) 44 | t.Errorf("unexpected error") 45 | } 46 | 47 | if family != testcase.expectedFamily { 48 | t.Logf("got IP family: %v", family) 49 | t.Logf("expected IP family: %v", testcase.expectedFamily) 50 | t.Errorf("unexpected IP family") 51 | } 52 | }) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /ns/doc.go: -------------------------------------------------------------------------------- 1 | package ns 2 | -------------------------------------------------------------------------------- /ns/init_linux.go: -------------------------------------------------------------------------------- 1 | package ns 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "strings" 8 | "sync" 9 | "time" 10 | 11 | "github.com/sirupsen/logrus" 12 | "github.com/vishvananda/netlink" 13 | "github.com/vishvananda/netns" 14 | "golang.org/x/sys/unix" 15 | ) 16 | 17 | var ( 18 | initNs netns.NsHandle 19 | initNl *netlink.Handle 20 | initOnce sync.Once 21 | ) 22 | 23 | // NetlinkSocketsTimeout represents the default timeout duration for the sockets 24 | const NetlinkSocketsTimeout = 3 * time.Second 25 | 26 | // Init initializes a new network namespace 27 | func Init() { 28 | var err error 29 | initNs, err = netns.Get() 30 | if err != nil { 31 | logrus.Errorf("could not get initial namespace: %v", err) 32 | } 33 | initNl, err = netlink.NewHandle(getSupportedNlFamilies()...) 34 | if err != nil { 35 | logrus.Errorf("could not create netlink handle on initial namespace: %v", err) 36 | } 37 | err = initNl.SetSocketTimeout(NetlinkSocketsTimeout) 38 | if err != nil { 39 | logrus.Warnf("Failed to set the timeout on the default netlink handle sockets: %v", err) 40 | } 41 | } 42 | 43 | // SetNamespace sets the initial namespace handler 44 | func SetNamespace() error { 45 | initOnce.Do(Init) 46 | if err := netns.Set(initNs); err != nil { 47 | linkInfo, linkErr := getLink() 48 | if linkErr != nil { 49 | linkInfo = linkErr.Error() 50 | } 51 | return fmt.Errorf("failed to set to initial namespace, %v, initns fd %d: %v", linkInfo, initNs, err) 52 | } 53 | return nil 54 | } 55 | 56 | // ParseHandlerInt transforms the namespace handler into an integer 57 | func ParseHandlerInt() int { 58 | return int(getHandler()) 59 | } 60 | 61 | // GetHandler returns the namespace handler 62 | func getHandler() netns.NsHandle { 63 | initOnce.Do(Init) 64 | return initNs 65 | } 66 | 67 | func getLink() (string, error) { 68 | return os.Readlink(fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())) 69 | } 70 | 71 | // NlHandle returns the netlink handler 72 | func NlHandle() *netlink.Handle { 73 | initOnce.Do(Init) 74 | return initNl 75 | } 76 | 77 | func getSupportedNlFamilies() []int { 78 | fams := []int{unix.NETLINK_ROUTE} 79 | // NETLINK_XFRM test 80 | if err := checkXfrmSocket(); err != nil { 81 | logrus.Warnf("Could not load necessary modules for IPSEC rules: %v", err) 82 | } else { 83 | fams = append(fams, unix.NETLINK_XFRM) 84 | } 85 | // NETLINK_NETFILTER test 86 | if err := loadNfConntrackModules(); err != nil { 87 | if checkNfSocket() != nil { 88 | logrus.Warnf("Could not load necessary modules for Conntrack: %v", err) 89 | } else { 90 | fams = append(fams, unix.NETLINK_NETFILTER) 91 | } 92 | } else { 93 | fams = append(fams, unix.NETLINK_NETFILTER) 94 | } 95 | 96 | return fams 97 | } 98 | 99 | // API check on required xfrm modules (xfrm_user, xfrm_algo) 100 | func checkXfrmSocket() error { 101 | fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, unix.NETLINK_XFRM) 102 | if err != nil { 103 | return err 104 | } 105 | unix.Close(fd) 106 | return nil 107 | } 108 | 109 | func loadNfConntrackModules() error { 110 | if out, err := exec.Command("modprobe", "-va", "nf_conntrack").CombinedOutput(); err != nil { 111 | return fmt.Errorf("Running modprobe nf_conntrack failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err) 112 | } 113 | if out, err := exec.Command("modprobe", "-va", "nf_conntrack_netlink").CombinedOutput(); err != nil { 114 | return fmt.Errorf("Running modprobe nf_conntrack_netlink failed with message: `%s`, error: %v", strings.TrimSpace(string(out)), err) 115 | } 116 | return nil 117 | } 118 | 119 | // API check on required nf_conntrack* modules (nf_conntrack, nf_conntrack_netlink) 120 | func checkNfSocket() error { 121 | fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, unix.NETLINK_NETFILTER) 122 | if err != nil { 123 | return err 124 | } 125 | unix.Close(fd) 126 | return nil 127 | } 128 | --------------------------------------------------------------------------------