├── docs ├── troubleshoot.md ├── img │ ├── ep.jpg │ ├── svc.jpg │ ├── forward.png │ ├── ipset.jpg │ ├── ipvs1.jpg │ ├── podfw.png │ ├── logo-full.png │ ├── policyfw.png │ ├── logo-icon-only.png │ └── kube-router-arch.png ├── kops.md ├── observability.md ├── index.md ├── architecture.md ├── health.md ├── see-it-in-action.md ├── kubeadm.md ├── load-balancer-allocator.md ├── introduction.md ├── tunnels.md ├── pod-toolbox.md ├── how-it-works.md ├── metrics.md ├── developing.md └── generic.md ├── pkg ├── controllers │ ├── controllers.go │ ├── controllers_suite_test.go │ ├── netpol │ │ ├── namespace.go │ │ ├── policy_test.go │ │ └── ipset_fixture_test.go │ ├── routing │ │ ├── utils_test.go │ │ ├── ipset_fixture_test.go │ │ ├── aws.go │ │ └── pod_egress.go │ └── proxy │ │ ├── hairpin_controller.go │ │ ├── nodeport_healthcheck.go │ │ └── metrics.go ├── cri │ ├── interface.go │ └── remote_runtime.go ├── utils │ ├── base64.go │ ├── ipset_fixture_helpers.go │ ├── base64_test.go │ ├── iptables_test.go │ ├── linux_routing.go │ ├── linux_routingtest.go │ ├── pod_cidr.go │ ├── utils.go │ ├── ip.go │ ├── sysctl.go │ └── service.go ├── k8s │ └── indexers │ │ └── endpointslices.go ├── routes │ ├── linux_routes.go │ ├── route_sync.go │ └── pbr.go ├── tunnels │ └── linux_tunnels_test.go ├── bgp │ ├── id_test.go │ ├── id.go │ ├── parse.go │ └── peer_config.go └── version │ └── version.go ├── dashboard └── dashboard.png ├── .markdownlint.yaml ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.md │ └── bug_report.md ├── dependabot.yml └── workflows │ ├── close_stale.yml │ └── codeql-analysis.yml ├── cni └── 10-kuberouter.conf ├── internal └── testutils │ └── pointers.go ├── MAINTAINER.md ├── testdata └── ipset_test_1 │ ├── nodes.yaml │ ├── networkpolicy.yaml │ ├── pods.yaml │ ├── services.yaml │ └── ipset_save.txt ├── .gitignore ├── .goreleaser.yml ├── cmd └── kube-router │ ├── kube-router_test.go │ └── kube-router.go ├── USERS.md ├── CONTRIBUTING.md ├── SECURITY.md ├── .golangci.yml ├── Dockerfile ├── RELEASE.md └── daemonset ├── generic-kuberouter-only-advertise-routes.yaml ├── kube-router-proxy-daemonset.yaml ├── kube-router-firewall-daemonset.yaml ├── kube-router-all-service-daemonset.yaml ├── kube-router-all-service-daemonset-advertise-routes.yaml ├── generic-kuberouter.yaml └── kubeadm-kuberouter.yaml /docs/troubleshoot.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pkg/controllers/controllers.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | -------------------------------------------------------------------------------- /docs/img/ep.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/ep.jpg -------------------------------------------------------------------------------- /docs/img/svc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/svc.jpg -------------------------------------------------------------------------------- /docs/img/forward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/forward.png -------------------------------------------------------------------------------- /docs/img/ipset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/ipset.jpg -------------------------------------------------------------------------------- /docs/img/ipvs1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/ipvs1.jpg -------------------------------------------------------------------------------- /docs/img/podfw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/podfw.png -------------------------------------------------------------------------------- /docs/img/logo-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/logo-full.png -------------------------------------------------------------------------------- /docs/img/policyfw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/policyfw.png -------------------------------------------------------------------------------- /dashboard/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/dashboard/dashboard.png -------------------------------------------------------------------------------- /docs/img/logo-icon-only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/logo-icon-only.png -------------------------------------------------------------------------------- /docs/img/kube-router-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/HEAD/docs/img/kube-router-arch.png -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MD046: 3 | style: fenced 4 | MD013: 5 | line_length: 120 6 | code_block_line_length: 200 7 | MD045: false 8 | -------------------------------------------------------------------------------- /pkg/cri/interface.go: -------------------------------------------------------------------------------- 1 | package cri 2 | 3 | // RuntimeService is the client API for RuntimeService service. 4 | type RuntimeService interface { 5 | ContainerInfo(id string) (*containerInfo, error) 6 | Close() error 7 | } 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Kube-Router Channel on Kubernetes Slack 4 | url: https://kubernetes.slack.com/messages/C8DCQGTSB 5 | about: Please ask and answer questions here. 6 | -------------------------------------------------------------------------------- /cni/10-kuberouter.conf: -------------------------------------------------------------------------------- 1 | { 2 | "cniVersion": "0.3.0", 3 | "name":"mynet", 4 | "type":"bridge", 5 | "bridge":"kube-bridge", 6 | "isDefaultGateway":true, 7 | "ipam": { 8 | "type":"host-local" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /pkg/controllers/controllers_suite_test.go: -------------------------------------------------------------------------------- 1 | package controllers_test 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/onsi/ginkgo" 7 | . "github.com/onsi/gomega" 8 | ) 9 | 10 | func TestControllers(t *testing.T) { 11 | RegisterFailHandler(Fail) 12 | RunSpecs(t, "Controllers Suite") 13 | } 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | groups: 6 | k8s-dependencies: 7 | patterns: 8 | - "k8s.io*" 9 | schedule: 10 | interval: "monthly" 11 | - package-ecosystem: "github-actions" 12 | directory: "/" 13 | schedule: 14 | interval: "monthly" 15 | -------------------------------------------------------------------------------- /internal/testutils/pointers.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "net" 5 | 6 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 7 | ) 8 | 9 | type TestValue interface { 10 | string | uint32 | net.IP | utils.Base64String 11 | } 12 | 13 | func ValToPtr[V TestValue](v V) *V { 14 | return &v 15 | } 16 | 17 | func PtrToVal[V TestValue](v *V) V { 18 | if v == nil { 19 | return *new(V) 20 | } 21 | return *v 22 | } 23 | -------------------------------------------------------------------------------- /docs/kops.md: -------------------------------------------------------------------------------- 1 | # Kops Integration 2 | 3 | Kops version 1.6.2 and above now officially includes an integration with kube-router. 4 | 5 | Please follow the instructions at their 6 | [official documentation](https://github.com/kubernetes/kops/blob/master/docs/networking/kube-router.md) to provision a 7 | Kubernetes cluster with Kube-router. 8 | 9 | Uses the kops [latest version](https://github.com/kubernetes/kops/releases) binaries which has kube-router support. 10 | -------------------------------------------------------------------------------- /MAINTAINER.md: -------------------------------------------------------------------------------- 1 | # Maintainers 2 | 3 | ## maintainers 4 | 5 | * Aaron U'Ren [@aauren](https://github.com/aauren) 6 | * Manuel Rüger [@mrueg](https://github.com/mrueg) 7 | * Murali Reddy [@murali-reddy](https://github.com/murali-reddy) 8 | 9 | ## emeritus maintainers 10 | 11 | * Andrew Sy Kim [@andrewsykim](https://github.com/andrewsykim) 12 | * Bryan Zubrod [@bzub](https://github.com/bzub) 13 | * Joakim Karlsson [@roffe](https://github.com/roffe) 14 | * Jimmy Zhang [@jimmy-zh](https://github.com/jimmy-zh) 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for kube-router 4 | title: '' 5 | labels: feature 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Is your feature request related to a problem? Please describe 11 | 12 | A clear and concise description of what the problem is and what the feature provides. 13 | 14 | ## Describe the solution you'd like 15 | 16 | A clear and concise description of what you want to happen. 17 | 18 | ## Describe alternatives you've considered 19 | 20 | A clear and concise description of any alternative solutions or features you've considered. 21 | 22 | ## Additional context 23 | 24 | Add any other context or screenshots about the feature request here. 25 | -------------------------------------------------------------------------------- /pkg/utils/base64.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/base64" 5 | "fmt" 6 | 7 | "github.com/goccy/go-yaml" 8 | ) 9 | 10 | // Base64String is a wrapper type that handles automatic b64 decoding of a 11 | // string when unmarshalling 12 | type Base64String string 13 | 14 | func (b *Base64String) UnmarshalYAML(raw []byte) error { 15 | var tmp string 16 | if err := yaml.Unmarshal(raw, &tmp); err != nil { 17 | return fmt.Errorf("failed to unmarshal string into base64string type: %w", err) 18 | } 19 | decoded, err := base64.StdEncoding.DecodeString(tmp) 20 | if err != nil { 21 | return fmt.Errorf("failed to base64 decode field: %w", err) 22 | } 23 | *b = Base64String(string(decoded)) 24 | return nil 25 | } 26 | -------------------------------------------------------------------------------- /docs/observability.md: -------------------------------------------------------------------------------- 1 | # Observability 2 | 3 | ## Observing kube-router with Metrics 4 | 5 | See [metrics documentation](metrics.md) for more information 6 | 7 | ## Observing dropped traffic due to network policy enforcements 8 | 9 | Traffic that gets rejected due to network policy enforcements gets logged by kube-route using iptables NFLOG target 10 | under the group 100. Simplest way to observe the dropped packets by kube-router is by running tcpdump on `nflog:100` 11 | interface for e.g. `tcpdump -i nflog:100 -n`. You can also configure ulogd to monitor dropped packets in desired output 12 | format. Please see [the official ulogd documentation](https://kb.gtkc.net/iptables-with-ulogd-quick-howto/) for an 13 | example configuration to setup a stack to log packets. 14 | -------------------------------------------------------------------------------- /testdata/ipset_test_1/nodes.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | items: 3 | - apiVersion: v1 4 | kind: Node 5 | metadata: 6 | name: kube-router-vm1 7 | spec: 8 | podCIDR: 10.242.0.0/24 9 | podCIDRs: 10 | - 10.242.0.0/24 11 | - 2001:db8:42:1000::/64 12 | status: 13 | addresses: 14 | - type: InternalIP 15 | address: 10.241.0.20 16 | - type: InternalIP 17 | address: 2001:db8:ca2:2::2ca1 18 | - apiVersion: v1 19 | kind: Node 20 | metadata: 21 | name: kube-router-vm2 22 | spec: 23 | podCIDR: 10.242.1.0/24 24 | podCIDRs: 25 | - 10.242.1.0/24 26 | - 2001:db8:42:1001::/64 27 | status: 28 | addresses: 29 | - type: InternalIP 30 | address: 10.241.0.21 31 | - type: InternalIP 32 | address: 2001:db8:ca2:2::e7e5 33 | kind: List 34 | metadata: 35 | resourceVersion: "" 36 | -------------------------------------------------------------------------------- /pkg/k8s/indexers/endpointslices.go: -------------------------------------------------------------------------------- 1 | package indexers 2 | 3 | import ( 4 | "fmt" 5 | 6 | discoveryv1 "k8s.io/api/discovery/v1" 7 | ) 8 | 9 | // ServiceNameIndex is the name for our custom index. 10 | const ServiceNameIndex = "service-name" 11 | 12 | // ServiceNameIndexFunc creates an index key based on an EndpointSlice's parent Service. 13 | // The key is in the format "/". 14 | func ServiceNameIndexFunc(obj interface{}) ([]string, error) { 15 | slice, ok := obj.(*discoveryv1.EndpointSlice) 16 | if !ok { 17 | return []string{}, nil 18 | } 19 | 20 | serviceName, ok := slice.Labels[discoveryv1.LabelServiceName] 21 | if !ok || serviceName == "" { 22 | // This slice is not associated with a Service, so we can't index it. 23 | return []string{}, nil 24 | } 25 | 26 | return []string{fmt.Sprintf("%s/%s", slice.Namespace, serviceName)}, nil 27 | } 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #### Project Specific Ignores #### 2 | # Built binaries 3 | /kube-router 4 | /gobgp 5 | /cni-download 6 | 7 | # Go directories 8 | _output 9 | _cache 10 | vendor 11 | .*.sw? 12 | 13 | # Ignore worktree directory 14 | worktrees 15 | 16 | # Ignore common IDE directories 17 | /.vscode 18 | /.idea 19 | 20 | 21 | #### Go Lang Ignores #### 22 | # If you prefer the allow list template instead of the deny list, see community template: 23 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 24 | # 25 | # Binaries for programs and plugins 26 | *.exe 27 | *.exe~ 28 | *.dll 29 | *.so 30 | *.dylib 31 | 32 | # Test binary, built with `go test -c` 33 | *.test 34 | 35 | # Output of the go coverage tool, specifically when used with LiteIDE 36 | *.out 37 | 38 | # Dependency directories (remove the comment below to include it) 39 | # vendor/ 40 | 41 | # Go workspace file 42 | go.work 43 | go.work.sum 44 | 45 | # env file 46 | .env 47 | -------------------------------------------------------------------------------- /pkg/utils/ipset_fixture_helpers.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | // BuildIPSetRestoreFromSets returns the restore script for a provided map of sets and an optional filter list. 4 | func BuildIPSetRestoreFromSets(sets map[string]*Set, isIPv6 bool, setNames []string) string { 5 | ipset := &IPSet{ 6 | sets: make(map[string]*Set, len(sets)), 7 | isIpv6: isIPv6, 8 | } 9 | for name, set := range sets { 10 | clone := &Set{ 11 | Parent: ipset, 12 | Name: set.Name, 13 | Options: append([]string(nil), set.Options...), 14 | } 15 | clone.Entries = make([]*Entry, len(set.Entries)) 16 | for i, entry := range set.Entries { 17 | clone.Entries[i] = &Entry{ 18 | Set: clone, 19 | Options: append([]string(nil), entry.Options...), 20 | } 21 | } 22 | ipset.sets[name] = clone 23 | } 24 | var include []string 25 | if setNames != nil { 26 | include = append([]string(nil), setNames...) 27 | } 28 | return BuildIPSetRestore(ipset, include) 29 | } 30 | -------------------------------------------------------------------------------- /testdata/ipset_test_1/networkpolicy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | items: 3 | - apiVersion: networking.k8s.io/v1 4 | kind: NetworkPolicy 5 | metadata: 6 | name: debug 7 | namespace: default 8 | spec: 9 | egress: 10 | - to: 11 | - podSelector: 12 | matchLabels: 13 | name: whoami 14 | podSelector: 15 | matchLabels: 16 | name: debug-toolbox 17 | policyTypes: 18 | - Egress 19 | - apiVersion: networking.k8s.io/v1 20 | kind: NetworkPolicy 21 | metadata: 22 | name: whoami 23 | namespace: default 24 | spec: 25 | ingress: 26 | - from: 27 | - podSelector: 28 | matchLabels: 29 | name: debug-toolbox 30 | - ipBlock: 31 | cidr: 10.95.0.239/32 32 | ports: 33 | - port: 5000 34 | protocol: TCP 35 | podSelector: 36 | matchLabels: 37 | name: whoami 38 | policyTypes: 39 | - Ingress 40 | kind: List 41 | metadata: 42 | resourceVersion: "" 43 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | release: 4 | draft: true 5 | prerelease: auto 6 | header: | 7 | ## Summary 8 | 9 | ## Contributions 10 | 11 | ## Changelog 12 | 13 | builds: 14 | - main: ./cmd/kube-router 15 | goos: 16 | - linux 17 | goarch: 18 | - amd64 19 | - arm 20 | - arm64 21 | - ppc64le 22 | - s390x 23 | - riscv64 24 | goarm: 25 | - 6 26 | - 7 27 | env: 28 | - CGO_ENABLED=0 29 | ldflags: 30 | - "-X github.com/cloudnativelabs/kube-router/v2/pkg/version.Version={{.Version}}" 31 | - "-X github.com/cloudnativelabs/kube-router/v2/pkg/version.BuildDate={{.Date}}" 32 | 33 | archives: 34 | - format: tar.gz 35 | name_template: '{{ .Binary }}_{{.Version}}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ 36 | .Arm }}{{ end }}' 37 | files: 38 | - LICENSE* 39 | - README* 40 | - CHANGELOG* 41 | - Documentation* 42 | 43 | snapshot: 44 | version_template: SNAPSHOT-{{ .Commit }} 45 | -------------------------------------------------------------------------------- /testdata/ipset_test_1/pods.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | items: 3 | - apiVersion: v1 4 | kind: Pod 5 | metadata: 6 | labels: 7 | name: whoami 8 | name: whoami-nflzk 9 | namespace: default 10 | spec: 11 | containers: 12 | - image: example/whoami:latest 13 | name: whoami 14 | hostNetwork: false 15 | nodeName: kube-router-vm2 16 | status: 17 | hostIP: 10.241.0.21 18 | phase: Running 19 | podIP: 10.242.1.4 20 | podIPs: 21 | - ip: 10.242.1.4 22 | - ip: 2001:db8:42:1001::4 23 | - apiVersion: v1 24 | kind: Pod 25 | metadata: 26 | labels: 27 | name: whoami 28 | name: whoami-s72mp 29 | namespace: default 30 | spec: 31 | containers: 32 | - image: example/whoami:latest 33 | name: whoami 34 | hostNetwork: false 35 | nodeName: kube-router-vm1 36 | status: 37 | hostIP: 10.241.0.20 38 | phase: Running 39 | podIP: 10.242.0.5 40 | podIPs: 41 | - ip: 10.242.0.5 42 | - ip: 2001:db8:42:1000::5 43 | kind: List 44 | metadata: 45 | resourceVersion: "" 46 | -------------------------------------------------------------------------------- /pkg/routes/linux_routes.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | "github.com/vishvananda/netlink" 8 | "github.com/vishvananda/netlink/nl" 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | const ( 13 | // Taken from: https://github.com/torvalds/linux/blob/master/include/uapi/linux/rtnetlink.h#L284 14 | ZebraOriginator = 0x11 15 | ) 16 | 17 | // DeleteByDestination attempts to safely find all routes based upon its destination subnet and delete them 18 | func DeleteByDestination(destinationSubnet *net.IPNet) error { 19 | routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{ 20 | Dst: destinationSubnet, Protocol: ZebraOriginator, 21 | }, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL) 22 | if err != nil { 23 | return fmt.Errorf("failed to get routes from netlink: %v", err) 24 | } 25 | for i, r := range routes { 26 | klog.V(2).Infof("Found route to remove: %s", r.String()) 27 | if err = netlink.RouteDel(&routes[i]); err != nil { 28 | return fmt.Errorf("failed to remove route due to %v", err) 29 | } 30 | } 31 | return nil 32 | } 33 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to Kube-router Documentation 2 | 3 | The documentation is divided into the following sections: 4 | 5 | ## Introduction 6 | 7 | - [Introduction](introduction.md) 8 | - [What is Kube-router?](introduction.md#what-is-kube-router) 9 | - [Why Kube-router?](introduction.md#why-kube-router) 10 | 11 | ## Concepts 12 | 13 | - [See it in action](see-it-in-action.md) 14 | - [How it works?](how-it-works.md) 15 | - [Architecture](architecture.md) 16 | 17 | ## User Guide 18 | 19 | - [User Guide](user-guide.md) 20 | - [installation](user-guide.md#try-kube-router-with-cluster-installers) 21 | - [requirements](user-guide.md#requirements) 22 | 23 | ## Operations guide 24 | 25 | - [Health](health.md) 26 | - [Observability](observability.md) 27 | - [Troubleshooting](troubleshoot.md) 28 | - [Pod toolbox](pod-toolbox.md) 29 | - [Upgrades](upgrading.md) 30 | - [IPv6 / Dual-Stack](ipv6.md) 31 | - [Load Balancer Support](load-balancer-allocator.md) 32 | 33 | ## Developer and Contributor Guide 34 | 35 | - [Developer Guide](developing.md) 36 | - [Contributor Guideline](/CONTRIBUTING.md) 37 | -------------------------------------------------------------------------------- /pkg/tunnels/linux_tunnels_test.go: -------------------------------------------------------------------------------- 1 | package tunnels 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_GenerateTunnelName(t *testing.T) { 10 | testcases := []struct { 11 | name string 12 | nodeIP string 13 | tunnelName string 14 | }{ 15 | { 16 | "IP less than 12 characters after removing '.'", 17 | "10.0.0.1", 18 | "tun-e443169117a", 19 | }, 20 | { 21 | "IP has 12 characters after removing '.'", 22 | "100.200.300.400", 23 | "tun-9033d7906c7", 24 | }, 25 | { 26 | "IPv6 tunnel names are properly handled and consistent", 27 | "2001:db8:42:2::/64", 28 | "tun-ba56986ef05", 29 | }, 30 | } 31 | 32 | for _, testcase := range testcases { 33 | t.Run(testcase.name, func(t *testing.T) { 34 | tunnelName := GenerateTunnelName(testcase.nodeIP) 35 | assert.Lessf(t, len(tunnelName), 16, "the maximum length of the tunnel name should never exceed"+ 36 | "15 characters as 16 characters is the maximum length of a Unix interface name") 37 | assert.Equal(t, testcase.tunnelName, tunnelName, "did not get expected tunnel interface name") 38 | }) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pkg/utils/base64_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/goccy/go-yaml" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestBase64String(t *testing.T) { 12 | type testStruct struct { 13 | Password Base64String `yaml:"password"` 14 | } 15 | 16 | tcs := []struct { 17 | name string 18 | input []byte 19 | shouldError bool 20 | errorContains string 21 | }{ 22 | { 23 | name: "happy path", 24 | // b64: hello world 25 | input: []byte(`password: "aGVsbG8gd29ybGQ="`), 26 | }, 27 | { 28 | name: "invalid base64 encoding", 29 | input: []byte(`password: "notbase64"`), 30 | shouldError: true, 31 | errorContains: "failed to base64 decode", 32 | }, 33 | } 34 | 35 | for _, tc := range tcs { 36 | t.Run(tc.name, func(tt *testing.T) { 37 | var ts testStruct 38 | err := yaml.Unmarshal(tc.input, &ts) 39 | fmt.Printf("TS: %+v\n", ts) 40 | if tc.shouldError { 41 | assert.ErrorContains(tt, err, tc.errorContains) 42 | } else { 43 | assert.NoError(tt, err) 44 | assert.NotEmpty(tt, ts.Password) 45 | } 46 | }) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /.github/workflows/close_stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | permissions: 7 | issues: write 8 | pull-requests: write 9 | 10 | jobs: 11 | stale: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/stale@v10 15 | with: 16 | stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' 17 | stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 10 days.' 18 | close-issue-message: 'This issue was closed because it has been stale for 5 days with no activity.' 19 | close-pr-message: 'This PR was closed because it has been stale for 10 days with no activity.' 20 | days-before-issue-stale: 30 21 | days-before-pr-stale: 60 22 | days-before-issue-close: 5 23 | days-before-pr-close: 10 24 | exempt-issue-labels: override-stale 25 | exempt-pr-labels: override-stale,dependencies 26 | enable-statistics: true 27 | operations-per-run: 100 28 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | Kube-router is built around concept of watchers and controllers. Watchers use Kubernetes watch API to get notification 4 | on events related to create, update, delete of Kubernetes objects. Each watcher gets notification related to a 5 | particular API object. On receiving an event from API server, watcher broadcasts events. Controller registers to get 6 | event updates from the watchers and act up on the events. 7 | 8 | Kube-router consists of 3 core controllers and multiple watchers as depicted in below diagram. 9 | 10 | ![Arch](./img/kube-router-arch.png) 11 | 12 | Each of the [controller](https://github.com/cloudnativelabs/kube-router/tree/master/app/controllers) follows below 13 | structure: 14 | 15 | ```go 16 | func Run() { 17 | for { 18 | Sync() // control loop that runs for ever and perfom sync at periodic interval 19 | } 20 | } 21 | 22 | func OnUpdate() { 23 | Sync() // on receiving update of a watched API object (namespace, node, pod, network policy etc) 24 | } 25 | 26 | Sync() { 27 | //re-concile any state changes 28 | } 29 | 30 | Cleanup() { 31 | // cleanup any changes (to iptables, ipvs, network etc) done to the system 32 | } 33 | ``` 34 | -------------------------------------------------------------------------------- /docs/health.md: -------------------------------------------------------------------------------- 1 | # Health checking kube-router 2 | 3 | kube-router currently has basic health checking in form of heartbeats sent from each controller to the healthcontroller 4 | each time the main loop completes successfully. 5 | 6 | The health port is by default 20244 but can be changed with the startup option. 7 | The health path is `/healthz` 8 | 9 | ```sh 10 | --health-port= 11 | ``` 12 | 13 | If port is set to 0 (zero) no HTTP endpoint will be made available but the health controller will still run and print 14 | out any missed heartbeats to STDERR of kube-router 15 | 16 | If a controller does not send a heartbeat within controllersynctime + 5 seconds the component will be flagged as 17 | unhealthy. 18 | 19 | If any of the running components is failing the whole kube-router state will be marked as failed in the /healthz 20 | endpoint 21 | 22 | For example, if kube-router is started with 23 | 24 | ```sh 25 | --run-router=true 26 | --run-firewall=true 27 | --run-service-proxy=true 28 | --run-loadbalancer=true 29 | ``` 30 | 31 | If the route controller, policy controller or service controller exits it's main loop and does not publish a heartbeat 32 | the `/healthz` endpoint will return a error 500 signaling that kube-router is not healthy. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Report a bug 3 | about: Create a bug report to help us improve kube-router 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## What happened? 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | ## What did you expect to happen? 15 | 16 | A clear and concise description of what you expected to happen. 17 | 18 | ## How can we reproduce the behavior you experienced? 19 | 20 | Steps to reproduce the behavior: 21 | 22 | 1. Step 1 23 | 2. Step 2 24 | 3. Step 3 25 | 4. Step 4 26 | 27 | ## Screenshots / Architecture Diagrams / Network Topologies 28 | 29 | If applicable, add those here to help explain your problem. 30 | 31 | ## System Information (please complete the following information) 32 | 33 | - Kube-Router Version (`kube-router --version`): [e.g. 1.0.1] 34 | - Kube-Router Parameters: [e.g. --run-router --run-service-proxy --enable-overlay --overlay-type=full etc.] 35 | - Kubernetes Version (`kubectl version`) : [e.g. 1.18.3] 36 | - Cloud Type: [e.g. AWS, GCP, Azure, on premise] 37 | - Kubernetes Deployment Type: [e.g. EKS, GKE, Kops, Kubeadm, etc.] 38 | - Kube-Router Deployment Type: [e.g. DaemonSet, System Service] 39 | - Cluster Size: [e.g. 200 Nodes] 40 | 41 | ## Logs, other output, metrics 42 | 43 | Please provide logs, other kind of output or observed metrics here. 44 | 45 | ## Additional context 46 | 47 | Add any other context about the problem here. 48 | -------------------------------------------------------------------------------- /cmd/kube-router/kube-router_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | "sync" 8 | "testing" 9 | ) 10 | 11 | func TestMainHelp(t *testing.T) { 12 | origStderr := os.Stderr 13 | stderrR, stderrW, _ := os.Pipe() 14 | os.Stderr = stderrW 15 | defer func() { os.Stderr = origStderr }() 16 | 17 | stderrBuf := bytes.NewBuffer(nil) 18 | wg := &sync.WaitGroup{} 19 | wg.Add(1) 20 | go func() { 21 | defer wg.Done() 22 | _, err := io.Copy(stderrBuf, stderrR) 23 | if err != nil { 24 | panic(err) 25 | } 26 | }() 27 | 28 | origArgs := os.Args 29 | os.Args = []string{"kube-router", "--help"} 30 | defer func() { os.Args = origArgs }() 31 | 32 | if err := Main(); err != nil { 33 | t.Fatalf("kube-router exited with error: %s\n", err) 34 | } 35 | stderrW.Close() 36 | wg.Wait() 37 | 38 | docF, err := os.Open("../../docs/user-guide.md") 39 | if err != nil { 40 | t.Fatalf("could not open docs/user-guide.md: %s\n", err) 41 | } 42 | docBuf := bytes.NewBuffer(nil) 43 | _, err = docBuf.ReadFrom(docF) 44 | if err != nil { 45 | t.Fatalf("could not read from buffer: %s\n", err) 46 | } 47 | docF.Close() 48 | 49 | exp := append([]byte("```sh\n"), stderrBuf.Bytes()...) 50 | exp = append(exp, []byte("```\n")...) 51 | 52 | if !bytes.Contains(docBuf.Bytes(), exp) { 53 | t.Errorf("docs/user-guide.md 'command line options' section does not match `kube-router --help`.\n"+ 54 | "Expected:\n%s", exp) 55 | t.Errorf("\nGot:\n%s", docBuf.Bytes()) 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /pkg/utils/iptables_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | v1core "k8s.io/api/core/v1" 9 | ) 10 | 11 | func TestCommonICMPRules(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | family v1core.IPFamily 15 | expected []ICMPRule 16 | }{ 17 | { 18 | name: "IPv4", 19 | family: v1core.IPv4Protocol, 20 | expected: []ICMPRule{ 21 | {"icmp", "--icmp-type", "echo-request", "allow icmp echo requests"}, 22 | {"icmp", "--icmp-type", "destination-unreachable", "allow icmp destination unreachable messages"}, 23 | {"icmp", "--icmp-type", "time-exceeded", "allow icmp time exceeded messages"}, 24 | }, 25 | }, 26 | { 27 | name: "IPv6", 28 | family: v1core.IPv6Protocol, 29 | expected: []ICMPRule{ 30 | {"ipv6-icmp", "--icmpv6-type", "echo-request", "allow icmp echo requests"}, 31 | {"ipv6-icmp", "--icmpv6-type", "destination-unreachable", "allow icmp destination unreachable messages"}, 32 | {"ipv6-icmp", "--icmpv6-type", "time-exceeded", "allow icmp time exceeded messages"}, 33 | {"ipv6-icmp", "--icmpv6-type", "neighbor-solicitation", "allow icmp neighbor solicitation messages"}, 34 | {"ipv6-icmp", "--icmpv6-type", "neighbor-advertisement", "allow icmp neighbor advertisement messages"}, 35 | {"ipv6-icmp", "--icmpv6-type", "echo-reply", "allow icmp echo reply messages"}, 36 | }, 37 | }, 38 | } 39 | 40 | for _, tt := range tests { 41 | t.Run(tt.name, func(t *testing.T) { 42 | result := CommonICMPRules(tt.family) 43 | assert.Equal(t, tt.expected, result, "CommonICMPRules(%v) = %v, want %v", tt.family, result, tt.expected) 44 | }) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /pkg/bgp/id_test.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_ValidateCommunity(t *testing.T) { 10 | t.Run("BGP community specified as a 32-bit integer should pass validation", func(t *testing.T) { 11 | assert.Nil(t, ValidateCommunity("4294967041")) 12 | assert.Nil(t, ValidateCommunity("4294967295")) 13 | }) 14 | t.Run("BGP community specified as 2 16-bit integers should pass validation", func(t *testing.T) { 15 | assert.Nil(t, ValidateCommunity("65535:65281")) 16 | assert.Nil(t, ValidateCommunity("65535:65535")) 17 | }) 18 | t.Run("Well known BGP communities passed as a string should pass validation", func(t *testing.T) { 19 | assert.Nil(t, ValidateCommunity("no-export")) 20 | assert.Nil(t, ValidateCommunity("internet")) 21 | assert.Nil(t, ValidateCommunity("planned-shut")) 22 | assert.Nil(t, ValidateCommunity("accept-own")) 23 | assert.Nil(t, ValidateCommunity("blackhole")) 24 | assert.Nil(t, ValidateCommunity("no-advertise")) 25 | assert.Nil(t, ValidateCommunity("no-peer")) 26 | }) 27 | t.Run("BGP community that is greater than 32-bit integer should fail validation", func(t *testing.T) { 28 | assert.Error(t, ValidateCommunity("4294967296")) 29 | }) 30 | t.Run("BGP community that is greater than 2 16-bit integers should fail validation", func(t *testing.T) { 31 | assert.Error(t, ValidateCommunity("65536:65535")) 32 | assert.Error(t, ValidateCommunity("65535:65536")) 33 | assert.Error(t, ValidateCommunity("65536:65536")) 34 | }) 35 | t.Run("BGP community that is not a number should fail validation", func(t *testing.T) { 36 | assert.Error(t, ValidateCommunity("0xFFFFFFFF")) 37 | assert.Error(t, ValidateCommunity("community")) 38 | }) 39 | } 40 | -------------------------------------------------------------------------------- /pkg/controllers/netpol/namespace.go: -------------------------------------------------------------------------------- 1 | package netpol 2 | 3 | import ( 4 | "reflect" 5 | 6 | api "k8s.io/api/core/v1" 7 | "k8s.io/client-go/tools/cache" 8 | "k8s.io/klog/v2" 9 | ) 10 | 11 | func (npc *NetworkPolicyController) newNamespaceEventHandler() cache.ResourceEventHandler { 12 | return cache.ResourceEventHandlerFuncs{ 13 | AddFunc: func(obj interface{}) { 14 | npc.handleNamespaceAdd(obj.(*api.Namespace)) 15 | }, 16 | UpdateFunc: func(oldObj, newObj interface{}) { 17 | npc.handleNamespaceUpdate(oldObj.(*api.Namespace), newObj.(*api.Namespace)) 18 | }, 19 | DeleteFunc: func(obj interface{}) { 20 | switch obj := obj.(type) { 21 | case *api.Namespace: 22 | npc.handleNamespaceDelete(obj) 23 | return 24 | case cache.DeletedFinalStateUnknown: 25 | if namespace, ok := obj.Obj.(*api.Namespace); ok { 26 | npc.handleNamespaceDelete(namespace) 27 | return 28 | } 29 | default: 30 | klog.Errorf("unexpected object type: %v", obj) 31 | } 32 | }, 33 | } 34 | } 35 | 36 | func (npc *NetworkPolicyController) handleNamespaceAdd(obj *api.Namespace) { 37 | if obj.Labels == nil { 38 | return 39 | } 40 | klog.V(2).Infof("Received update for namespace: %s", obj.Name) 41 | 42 | npc.RequestFullSync() 43 | } 44 | 45 | func (npc *NetworkPolicyController) handleNamespaceUpdate(oldObj, newObj *api.Namespace) { 46 | if reflect.DeepEqual(oldObj.Labels, newObj.Labels) { 47 | return 48 | } 49 | klog.V(2).Infof("Received update for namespace: %s", newObj.Name) 50 | 51 | npc.RequestFullSync() 52 | } 53 | 54 | func (npc *NetworkPolicyController) handleNamespaceDelete(obj *api.Namespace) { 55 | if obj.Labels == nil { 56 | return 57 | } 58 | klog.V(2).Infof("Received namespace: %s delete event", obj.Name) 59 | 60 | npc.RequestFullSync() 61 | } 62 | -------------------------------------------------------------------------------- /pkg/utils/linux_routing.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/vishvananda/netlink" 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | const ( 13 | rtTablesFileName = "rt_tables" 14 | iproutePkg = "iproute2" 15 | ) 16 | 17 | var ( 18 | rtTablesPosLoc = []string{ 19 | fmt.Sprintf("/etc/%s/%s", iproutePkg, rtTablesFileName), 20 | fmt.Sprintf("/usr/lib/%s/%s", iproutePkg, rtTablesFileName), 21 | fmt.Sprintf("/usr/share/%s/%s", iproutePkg, rtTablesFileName), 22 | } 23 | ) 24 | 25 | type LocalLinkQuerier interface { 26 | LinkList() ([]netlink.Link, error) 27 | AddrList(link netlink.Link, family int) ([]netlink.Addr, error) 28 | } 29 | 30 | // RouteTableAdd adds a new named table to iproute's rt_tables configuration file 31 | func RouteTableAdd(tableNumber int, tableName string) error { 32 | var rtTablesLoc string 33 | for _, possibleLoc := range rtTablesPosLoc { 34 | _, err := os.Stat(possibleLoc) 35 | if err != nil { 36 | klog.V(2).Infof("Did not find iproute2's rt_tables in location %s", possibleLoc) 37 | continue 38 | } 39 | rtTablesLoc = possibleLoc 40 | break 41 | } 42 | if rtTablesLoc == "" { 43 | return fmt.Errorf("did not find rt_tables in any of the expected locations: %s", rtTablesFileName) 44 | } 45 | 46 | b, err := os.ReadFile(rtTablesLoc) 47 | if err != nil { 48 | return fmt.Errorf("failed to read: %s", err.Error()) 49 | } 50 | 51 | if !strings.Contains(string(b), tableName) { 52 | f, err := os.OpenFile(rtTablesLoc, os.O_APPEND|os.O_WRONLY, 0600) 53 | if err != nil { 54 | return fmt.Errorf("failed to open: %s", err.Error()) 55 | } 56 | defer CloseCloserDisregardError(f) 57 | if _, err = f.WriteString(fmt.Sprint(tableNumber) + " " + tableName + "\n"); err != nil { 58 | return fmt.Errorf("failed to write: %s", err.Error()) 59 | } 60 | } 61 | 62 | return nil 63 | } 64 | -------------------------------------------------------------------------------- /USERS.md: -------------------------------------------------------------------------------- 1 | Who is using kube-router? 2 | ========================= 3 | 4 | The following is a directory of users who are using kube-router in production. 5 | 6 | Users (Alphabetically) 7 | ---------------------- 8 | 9 | * Name: DigitalOcean 10 | Description: DigitalOcean is using kube-router for production bare-metal Kubernetes clusters globally. 11 | Usage: Pod Networking, IPVS Service Proxy, BGP 12 | * Name: EEN (Eagle Eye Networks, Inc.) 13 | Description: Eagle Eye Networks is using kube-router for production bare-metal Kubernetes clusters globally. 14 | Usage: Pod Networking, IPVS Service Proxy, Network Policy Controller 15 | Contact: @DandyDeveloper 16 | * Name: enix.io 17 | Description: Simplicity, IPVS (including good support of incoming UDP long-living video streams) & BGP export of services IP to upstream BGP routers (allowing easy inbound HA with a pair of top-of-rack arista switches) are some of the killer features for us. 18 | Usage: Pod Networking, IPVS Service Proxy, Network Policy Controller 19 | * Name: Globo.com 20 | Description: Globo is using kube-router for production hybrid (bare-metal and VMs) Kubernetes clusters across multiple datacenters on Brazil. 21 | Usage: Pod Networking, IPVS Service Proxy, Network Policy Controller, BGP 22 | * Name: Numberly 23 | Description: Numberly is using kube-router for production bare-metal Kubernetes clusters globally. 24 | Usage: Pod Networking, BGP, Network Policy Controller 25 | Contact: @ramnes @Lujeni 26 | * Name: PubMatic 27 | Description: PubMatic is using kube-router for production kubernetes clusters located in world wide Datacenters. 28 | Usage: Pod Networking, BGP 29 | 30 | If you are using kube-router, please consider adding yourself as a user by opening a pull request to this file and adding a section describing your usage of kube-router or let us know on Slack. 31 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "Code Scanning - Action" 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - v* 8 | pull_request: 9 | schedule: 10 | # ┌───────────── minute (0 - 59) 11 | # │ ┌───────────── hour (0 - 23) 12 | # │ │ ┌───────────── day of the month (1 - 31) 13 | # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) 14 | # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) 15 | # │ │ │ │ │ 16 | # │ │ │ │ │ 17 | # │ │ │ │ │ 18 | # * * * * * 19 | - cron: '30 1 * * 0' 20 | 21 | jobs: 22 | CodeQL-Build: 23 | # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest 24 | runs-on: ubuntu-latest 25 | 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v6 29 | 30 | # Initializes the CodeQL tools for scanning. 31 | - name: Initialize CodeQL 32 | uses: github/codeql-action/init@v4 33 | # Override language selection by uncommenting this and choosing your languages 34 | # with: 35 | # languages: go, javascript, csharp, python, cpp, java 36 | 37 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 38 | # If this step fails, then you should remove it and run the build manually (see below). 39 | - name: Autobuild 40 | uses: github/codeql-action/autobuild@v4 41 | 42 | # ℹ️ Command-line programs to run using the OS shell. 43 | # 📚 https://git.io/JvXDl 44 | 45 | # ✏️ If the Autobuild fails above, remove it and uncomment the following 46 | # three lines and modify them (or add more) to build your code if your 47 | # project uses a compiled language 48 | 49 | # - run: | 50 | # make bootstrap 51 | # make release 52 | 53 | - name: Perform CodeQL Analysis 54 | uses: github/codeql-action/analyze@v4 55 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "runtime" 7 | 8 | "github.com/hashicorp/go-version" 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | type versionMessage struct { 13 | minVersionInclusive string 14 | maxVersionExclusive string 15 | message string 16 | } 17 | 18 | // Version and BuildDate are injected at build time via ldflags 19 | var ( 20 | BuildDate string 21 | Version string 22 | 23 | msgVersionArr = []versionMessage{ 24 | { 25 | minVersionInclusive: "v2.0.0", 26 | maxVersionExclusive: "v2.1.0", 27 | message: "Version v2.X introduces backward compatibility breaking changes, the kube-router project " + 28 | "recommends that you read the release notes carefully before deploying: " + 29 | "https://github.com/cloudnativelabs/kube-router/releases/tag/v2.0.0", 30 | }, 31 | } 32 | ) 33 | 34 | func (ver versionMessage) versionApplicable(testVerStr string) bool { 35 | minVer, err1 := version.NewVersion(ver.minVersionInclusive) 36 | maxVer, err2 := version.NewVersion(ver.maxVersionExclusive) 37 | testVer, err3 := version.NewVersion(testVerStr) 38 | 39 | // When in doubt return false 40 | if err1 != nil || err2 != nil || err3 != nil { 41 | klog.Warningf("encountered an error while trying to parse version numbers: %v - %v - %v", err1, err2, err3) 42 | return false 43 | } 44 | 45 | return testVer.GreaterThanOrEqual(minVer) && testVer.LessThan(maxVer) 46 | } 47 | 48 | func PrintVersion(logOutput bool) { 49 | output := fmt.Sprintf("Running %v version %s, built on %s, %s\n", os.Args[0], Version, BuildDate, runtime.Version()) 50 | 51 | outputToStream(output, logOutput) 52 | } 53 | 54 | func PrintVersionMessages(logOutput bool) { 55 | for _, verMsg := range msgVersionArr { 56 | if verMsg.versionApplicable(Version) { 57 | outputToStream(verMsg.message, logOutput) 58 | } 59 | } 60 | } 61 | 62 | func outputToStream(output string, logOutput bool) { 63 | if !logOutput { 64 | _, _ = fmt.Fprintf(os.Stderr, "%s", output) 65 | } else { 66 | klog.Info(output) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing to Kube-router 3 | 4 | ## Summary 5 | 6 | This document covers how to contribute to the kube-router project. Kube-router uses github PRs to manage contributions (could be anything from documentation, bug fixes, manifests etc.). 7 | 8 | Please read [users guide](/docs/user-guide.md) and [developers guide](/docs/developing.md) for the functionality and internals of kube-router. 9 | 10 | ## Filing issues 11 | 12 | If you have a question about Kube-router or have a problem using it, please start with contacting us on [community forum](https://kubernetes.slack.com/messages/C8DCQGTSB/) for quick help. If that doesn't answer your questions, or if you think you found a bug, please [file an issue](https://github.com/cloudnativelabs/kube-router/issues). 13 | 14 | ## Contributing Changes 15 | 16 | ### Fork the code 17 | 18 | Navigate to: 19 | [https://github.com/cloudnativelabs/kube-router](https://github.com/cloudnativelabs/kube-router) 20 | and fork the repository. 21 | 22 | Follow these steps to setup a local repository for working on Kube-router: 23 | 24 | ``` bash 25 | $ git clone https://github.com/YOUR_ACCOUNT/kube-router.git 26 | $ cd kube-router 27 | $ git remote add upstream https://github.com/cloudnativelabs/kube-router 28 | $ git checkout master 29 | $ git fetch upstream 30 | $ git rebase upstream/master 31 | ``` 32 | 33 | ### Creating A Feature Branch 34 | 35 | Create a new branch to make changes on and that branch. 36 | 37 | ``` bash 38 | $ git checkout -b feature_x 39 | (make your changes) 40 | $ git status 41 | $ git add . 42 | $ git commit -a -m "descriptive commit message for your changes" 43 | ``` 44 | get update from upstream 45 | 46 | ``` bash 47 | $ git checkout master 48 | $ git fetch upstream 49 | $ git rebase upstream/master 50 | $ git checkout feature_x 51 | $ git rebase master 52 | ``` 53 | 54 | Now your `feature_x` branch is up-to-date with all the code in `upstream/master`, so push to your fork 55 | 56 | ### Performing A Pull Request 57 | 58 | ``` bash 59 | $ git push origin master 60 | $ git push origin feature_x 61 | ``` 62 | 63 | Now that the `feature_x` branch has been pushed to your GitHub repository, you can initiate the pull request. 64 | -------------------------------------------------------------------------------- /pkg/bgp/id.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "hash/fnv" 8 | "net" 9 | "strconv" 10 | "strings" 11 | 12 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 13 | gobgp "github.com/osrg/gobgp/v3/pkg/packet/bgp" 14 | ) 15 | 16 | const ( 17 | CommunityMaxSize = 32 18 | CommunityMaxPartSize = 16 19 | ) 20 | 21 | // GenerateRouterID will generate a router ID based upon the user's configuration (or lack there of) and the node's 22 | // primary IP address if the user has not specified. If the user has configured the router ID as "generate" then we 23 | // will generate a router ID based upon fnv hashing the node's primary IP address. 24 | func GenerateRouterID(nodeIPAware utils.NodeIPAware, configRouterID string) (string, error) { 25 | switch { 26 | case configRouterID == "generate": 27 | h := fnv.New32a() 28 | h.Write(nodeIPAware.GetPrimaryNodeIP()) 29 | hs := h.Sum32() 30 | gip := make(net.IP, 4) 31 | binary.BigEndian.PutUint32(gip, hs) 32 | return gip.String(), nil 33 | case configRouterID != "": 34 | return configRouterID, nil 35 | } 36 | 37 | if nodeIPAware.GetPrimaryNodeIP().To4() == nil { 38 | return "", errors.New("router-id must be specified when primary node IP is an IPv6 address") 39 | } 40 | return nodeIPAware.GetPrimaryNodeIP().String(), nil 41 | } 42 | 43 | // ValidateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to 44 | // gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it 45 | // returns an error. 46 | func ValidateCommunity(arg string) error { 47 | _, err := strconv.ParseUint(arg, 10, CommunityMaxSize) 48 | if err == nil { 49 | return nil 50 | } 51 | 52 | elem1, elem2, found := strings.Cut(arg, ":") 53 | if found { 54 | if _, err := strconv.ParseUint(elem1, 10, CommunityMaxPartSize); err == nil { 55 | if _, err = strconv.ParseUint(elem2, 10, CommunityMaxPartSize); err == nil { 56 | return nil 57 | } 58 | } 59 | } 60 | for _, v := range gobgp.WellKnownCommunityNameMap { 61 | if arg == v { 62 | return nil 63 | } 64 | } 65 | return fmt.Errorf("failed to parse %s as community", arg) 66 | } 67 | -------------------------------------------------------------------------------- /pkg/utils/linux_routingtest.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | "github.com/stretchr/testify/mock" 8 | "github.com/vishvananda/netlink" 9 | ) 10 | 11 | type FakeLocalLinkQuerier struct { 12 | links []netlink.Link 13 | addrs []*net.IPNet 14 | } 15 | 16 | func NewFakeLocalLinkQuerier(addrStrings []string, mtus []int) *FakeLocalLinkQuerier { 17 | links := make([]netlink.Link, len(addrStrings)) 18 | for idx := range addrStrings { 19 | mtu := 1 20 | if idx < len(mtus) { 21 | mtu = mtus[idx] 22 | } 23 | linkAttrs := netlink.LinkAttrs{ 24 | Index: idx, 25 | MTU: mtu, 26 | } 27 | linkDevice := netlink.Device{LinkAttrs: linkAttrs} 28 | links[idx] = &linkDevice 29 | } 30 | addrs := make([]*net.IPNet, len(addrStrings)) 31 | for idx, addr := range addrStrings { 32 | ip := net.ParseIP(addr) 33 | var netMask net.IPMask 34 | if ip.To4() != nil { 35 | //nolint:mnd // Hardcoded value is used for testing purposes 36 | netMask = net.CIDRMask(24, 32) 37 | } else { 38 | //nolint:mnd // Hardcoded value is used for testing purposes 39 | netMask = net.CIDRMask(64, 128) 40 | } 41 | ipNet := &net.IPNet{ 42 | IP: ip, 43 | Mask: netMask, 44 | } 45 | addrs[idx] = ipNet 46 | } 47 | return &FakeLocalLinkQuerier{ 48 | links: links, 49 | addrs: addrs, 50 | } 51 | } 52 | 53 | func (f *FakeLocalLinkQuerier) LinkList() ([]netlink.Link, error) { 54 | return f.links, nil 55 | } 56 | 57 | func (f *FakeLocalLinkQuerier) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) { 58 | addrs := make([]netlink.Addr, 1) 59 | addrs[0] = netlink.Addr{IPNet: f.addrs[link.Attrs().Index]} 60 | if link.Attrs().MTU == 0 { 61 | return nil, fmt.Errorf("MTU was set to 0 to simulate an error") 62 | } 63 | return addrs, nil 64 | } 65 | 66 | type MockLocalLinkQuerier struct { 67 | mock.Mock 68 | } 69 | 70 | func (m *MockLocalLinkQuerier) LinkList() ([]netlink.Link, error) { 71 | args := m.Called() 72 | return args.Get(0).([]netlink.Link), args.Error(1) 73 | } 74 | 75 | func (m *MockLocalLinkQuerier) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) { 76 | args := m.Called(link, family) 77 | return args.Get(0).([]netlink.Addr), args.Error(1) 78 | } 79 | -------------------------------------------------------------------------------- /pkg/controllers/routing/utils_test.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "net" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func Test_stringSliceToIPs(t *testing.T) { 11 | t.Run("When receive an empty slice it returns an empty ip slice", func(t *testing.T) { 12 | ips, err := stringSliceToIPs([]string{}) 13 | assert.Nil(t, err) 14 | assert.Equal(t, []net.IP{}, ips) 15 | }) 16 | t.Run("When receive an ip string slice it returns an ip slice", func(t *testing.T) { 17 | ips, err := stringSliceToIPs([]string{"192.168.0.1", "10.0.0.1"}) 18 | assert.Nil(t, err) 19 | assert.Equal(t, []net.IP{net.ParseIP("192.168.0.1"), net.ParseIP("10.0.0.1")}, ips) 20 | }) 21 | t.Run("When receive an invalid ip string slice it returns an error", func(t *testing.T) { 22 | ips, err := stringSliceToIPs([]string{"500.168.0.1"}) 23 | assert.Equal(t, "could not parse \"500.168.0.1\" as an IP", err.Error()) 24 | assert.Nil(t, ips) 25 | ips, err = stringSliceToIPs([]string{"invalid"}) 26 | assert.Equal(t, "could not parse \"invalid\" as an IP", err.Error()) 27 | assert.Nil(t, ips) 28 | }) 29 | } 30 | 31 | func Test_stringSliceToIPNets(t *testing.T) { 32 | t.Run("When receive an empty slice it returns an empty ip slice", func(t *testing.T) { 33 | ips, err := stringSliceToIPNets([]string{}) 34 | assert.Nil(t, err) 35 | assert.Equal(t, []net.IPNet{}, ips) 36 | }) 37 | t.Run("When receive an ip string slice it returns an ip slice ignoring trailing spaces", func(t *testing.T) { 38 | ips, err := stringSliceToIPNets([]string{" 192.168.0.1/24", "10.0.0.1/16 "}) 39 | assert.Nil(t, err) 40 | _, firstIPNet, _ := net.ParseCIDR("192.168.0.1/24") 41 | _, secondIPNet, _ := net.ParseCIDR("10.0.0.1/16") 42 | assert.Equal(t, []net.IPNet{*firstIPNet, *secondIPNet}, ips) 43 | }) 44 | t.Run("When receive an invalid ip string slice it returns an error", func(t *testing.T) { 45 | ips, err := stringSliceToIPNets([]string{"500.168.0.1/24"}) 46 | assert.Equal(t, "could not parse \"500.168.0.1/24\" as an CIDR", err.Error()) 47 | assert.Nil(t, ips) 48 | ips, err = stringSliceToIPNets([]string{"10.0.0.1/80"}) 49 | assert.Equal(t, "could not parse \"10.0.0.1/80\" as an CIDR", err.Error()) 50 | assert.Nil(t, ips) 51 | }) 52 | } 53 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | The kube-router maintainers take security issues seriously. We appreciate responsible disclosure and will work with you 4 | to understand and address valid reports. 5 | 6 | ## Reporting a vulnerability 7 | 8 | Please **do not** open a public GitHub issue, pull request, or discuss a potential vulnerability in public forums/Slack 9 | before the maintainers have had a chance to review and respond. 10 | 11 | Use one of the following private channels: 12 | 13 | - Email: `admin@kube-router.io` 14 | - GitHub (preferred when available): https://github.com/cloudnativelabs/kube-router/security/advisories/new 15 | 16 | If you are reporting via email, include `[SECURITY]` in the subject line. 17 | 18 | ### What to include 19 | 20 | To help us triage quickly, please include: 21 | 22 | - A clear description of the issue and its potential impact 23 | - Affected versions/branches (and any configuration details that matter) 24 | - Reproduction steps or a minimal proof of concept (as appropriate) 25 | - Any known mitigations or workarounds 26 | - Your preferred contact information for follow-up 27 | 28 | ## Response process 29 | 30 | When we receive a report, we aim to: 31 | 32 | - Acknowledge receipt within **2 business days** 33 | - Provide an initial assessment and request additional details (if needed) 34 | - Develop and validate a fix (and backport on a best-effort basis when feasible) 35 | - Coordinate release timing and public disclosure with the reporter 36 | 37 | ## Coordinated disclosure 38 | 39 | We follow responsible/coordinated disclosure practices. Please give us a reasonable amount of time to investigate and 40 | prepare a fix before publishing details. If a CVE is warranted, the maintainers will coordinate with the appropriate 41 | CVE numbering authority. 42 | 43 | ## Supported versions 44 | 45 | Security fixes are provided on a best-effort basis for: 46 | 47 | - The latest released minor version 48 | - The previously released minor version 49 | 50 | Older versions may not receive security updates; upgrading is strongly recommended. 51 | 52 | ## Security updates 53 | 54 | Security advisories and releases are published via GitHub. We recommend watching the repository and staying current with 55 | upstream releases. 56 | -------------------------------------------------------------------------------- /cmd/kube-router/kube-router.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "net/http" 7 | "os" 8 | "time" 9 | 10 | //nolint:gosec // we want to unconditionally expose pprof here for advanced troubleshooting scenarios 11 | _ "net/http/pprof" 12 | 13 | "github.com/cloudnativelabs/kube-router/v2/pkg/cmd" 14 | "github.com/cloudnativelabs/kube-router/v2/pkg/options" 15 | "github.com/cloudnativelabs/kube-router/v2/pkg/version" 16 | "github.com/spf13/pflag" 17 | "k8s.io/klog/v2" 18 | ) 19 | 20 | func main() { 21 | if err := Main(); err != nil { 22 | fmt.Fprintf(os.Stderr, "%s\n", err) 23 | os.Exit(1) 24 | } 25 | os.Exit(0) 26 | } 27 | 28 | func Main() error { 29 | klog.InitFlags(nil) 30 | 31 | config := options.NewKubeRouterConfig() 32 | config.AddFlags(pflag.CommandLine) 33 | pflag.Parse() 34 | 35 | // Workaround for this issue: 36 | // https://github.com/kubernetes/kubernetes/issues/17162 37 | err := flag.CommandLine.Parse([]string{}) 38 | if err != nil { 39 | return fmt.Errorf("failed to parse flags: %s", err) 40 | } 41 | err = flag.Set("logtostderr", "true") 42 | if err != nil { 43 | return fmt.Errorf("failed to set flag: %s", err) 44 | } 45 | err = flag.Set("v", config.VLevel) 46 | if err != nil { 47 | return fmt.Errorf("failed to set flag: %s", err) 48 | } 49 | 50 | if config.HelpRequested { 51 | pflag.Usage() 52 | return nil 53 | } 54 | 55 | if config.Version { 56 | version.PrintVersion(false) 57 | return nil 58 | } 59 | 60 | if os.Geteuid() != 0 { 61 | return fmt.Errorf("kube-router needs to be run with privileges to execute iptables, ipset and configure ipvs") 62 | } 63 | 64 | if config.CleanupConfig { 65 | cmd.CleanupConfigAndExit() 66 | return nil 67 | } 68 | 69 | kubeRouter, err := cmd.NewKubeRouterDefault(config) 70 | if err != nil { 71 | return fmt.Errorf("failed to parse kube-router config: %v", err) 72 | } 73 | 74 | if config.EnablePprof { 75 | go func() { 76 | server := http.Server{ 77 | Addr: "0.0.0.0:6060", 78 | ReadHeaderTimeout: 5 * time.Second, 79 | Handler: nil, 80 | } 81 | fmt.Fprintf(os.Stdout, "%s\n", server.ListenAndServe().Error()) 82 | }() 83 | } 84 | 85 | err = kubeRouter.Run() 86 | if err != nil { 87 | return fmt.Errorf("failed to run kube-router: %v", err) 88 | } 89 | 90 | return nil 91 | } 92 | -------------------------------------------------------------------------------- /pkg/utils/pod_cidr.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "strings" 7 | 8 | v1core "k8s.io/api/core/v1" 9 | netutils "k8s.io/utils/net" 10 | ) 11 | 12 | const ( 13 | // deprecated - we now use multiple CIDRs, so it is better for users to use kube-router.io/pod-cidrs which allows 14 | // you to express all of the cidrs you want to advertise from a given node 15 | podCIDRAnnotation = "kube-router.io/pod-cidr" 16 | podCIDRsAnnotation = "kube-router.io/pod-cidrs" 17 | ) 18 | 19 | // GetPodCidrFromNodeSpec reads the pod CIDR allocated to the node from API node object and returns it 20 | func GetPodCidrFromNodeSpec(node *v1core.Node) (string, error) { 21 | if cidr, ok := node.Annotations[podCIDRAnnotation]; ok { 22 | _, _, err := net.ParseCIDR(cidr) 23 | if err != nil { 24 | return "", fmt.Errorf("error parsing pod CIDR in node annotation: %v", err) 25 | } 26 | 27 | return cidr, nil 28 | } 29 | 30 | if node.Spec.PodCIDR == "" { 31 | return "", fmt.Errorf("node.Spec.PodCIDR not set for node: %v", node.Name) 32 | } 33 | 34 | return node.Spec.PodCIDR, nil 35 | } 36 | 37 | // GetPodCIDRsFromNodeSpecDualStack reads the IPv4 and IPv6 pod CIDR allocated 38 | // to the node from API node object and returns them 39 | func GetPodCIDRsFromNodeSpecDualStack(node *v1core.Node) ([]string, []string, error) { 40 | var podIPv4CIDRs, podIPv6CIDRs []string 41 | 42 | if podCIDRs, ok := node.Annotations[podCIDRsAnnotation]; ok { 43 | for _, cidr := range strings.Split(podCIDRs, ",") { 44 | if _, _, err := net.ParseCIDR(cidr); err != nil { 45 | return podIPv4CIDRs, podIPv6CIDRs, fmt.Errorf("error parsing pod CIDR in node annotation: %v", err) 46 | } 47 | if netutils.IsIPv4CIDRString(cidr) { 48 | podIPv4CIDRs = append(podIPv4CIDRs, cidr) 49 | } 50 | if netutils.IsIPv6CIDRString(cidr) { 51 | podIPv6CIDRs = append(podIPv6CIDRs, cidr) 52 | } 53 | } 54 | return podIPv4CIDRs, podIPv6CIDRs, nil 55 | } 56 | 57 | if len(node.Spec.PodCIDRs) == 0 { 58 | return nil, nil, fmt.Errorf("node.Spec.PodCIDRs empty for node: %v", node.Name) 59 | } 60 | 61 | for _, podCIDR := range node.Spec.PodCIDRs { 62 | if netutils.IsIPv4CIDRString(podCIDR) { 63 | podIPv4CIDRs = append(podIPv4CIDRs, podCIDR) 64 | } 65 | if netutils.IsIPv6CIDRString(podCIDR) { 66 | podIPv6CIDRs = append(podIPv6CIDRs, podCIDR) 67 | } 68 | } 69 | 70 | return podIPv4CIDRs, podIPv6CIDRs, nil 71 | } 72 | -------------------------------------------------------------------------------- /docs/see-it-in-action.md: -------------------------------------------------------------------------------- 1 | # See Kube-router in action 2 | 3 | ## Network Services Controller 4 | 5 | Network services controller is responsible for reading the services and endpoints information from Kubernetes API server 6 | and configure IPVS on each cluster node accordingly. 7 | 8 | Please [read our blog](https://cloudnativelabs.github.io/post/2017-05-10-kube-network-service-proxy/) for design details 9 | and pros and cons compared to iptables based Kube-proxy 10 | 11 | Demo of Kube-router's IPVS based Kubernetes network service proxy 12 | 13 | [![asciicast](https://asciinema.org/a/120312.png)](https://asciinema.org/a/120312) 14 | 15 | Features: 16 | 17 | - round robin load balancing 18 | - client IP based session persistence 19 | - source IP is preserved if service controller is used in conjuction with network routes controller (kube-router with 20 | --run-router flag) 21 | - option to explicitly masquerade (SNAT) with --masquerade-all flag 22 | 23 | ## Network Policy Controller 24 | 25 | Network policy controller is responsible for reading the namespace, network policy and pods information from Kubernetes 26 | API server and configure iptables accordingly to provide ingress filter to the pods. 27 | 28 | Kube-router supports the networking.k8s.io/NetworkPolicy API or network policy V1/GA 29 | [semantics](https://github.com/kubernetes/kubernetes/pull/39164#issue-197243974) and also network policy beta semantics. 30 | 31 | Please [read our blog](https://cloudnativelabs.github.io/post/2017-05-1-kube-network-policies/) for design details of 32 | Network Policy controller 33 | 34 | Demo of Kube-router's iptables based implementaton of network policies 35 | 36 | [![asciicast](https://asciinema.org/a/120735.png)](https://asciinema.org/a/120735) 37 | 38 | ## Network Routes Controller 39 | 40 | Network routes controller is responsible for reading pod CIDR allocated by controller manager to the node, and 41 | advertises the routes to the rest of the nodes in the cluster (BGP peers). Use of BGP is transparent to user for basic 42 | pod-to-pod networking. 43 | 44 | [![asciicast](https://asciinema.org/a/120885.png)](https://asciinema.org/a/120885) 45 | 46 | However BGP can be leveraged to other use cases like advertising the cluster ip, routable pod ip etc. Only in such 47 | use-cases understanding of BGP and configuration is required. Please see below demo how kube-router advertises cluster 48 | IP and pod cidrs to external BGP router 49 | 50 | [![asciicast](https://asciinema.org/a/121635.png)](https://asciinema.org/a/121635) 51 | -------------------------------------------------------------------------------- /docs/kubeadm.md: -------------------------------------------------------------------------------- 1 | # Deploying kube-router with kubeadm 2 | 3 | Please follow the [steps](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) to install Kubernetes 4 | cluster with Kubeadm, however must specify `--pod-network-cidr` when you run `kubeadm init`. 5 | 6 | kube-router relies on kube-controller-manager to allocate pod CIDR for the nodes. 7 | 8 | kube-router provides pod networking, network policy and high perfoming IPVS/LVS based service proxy. Depending on your 9 | choice to use kube-router for service proxy you have two options. 10 | 11 | ## kube-router Providing Pod Networking and Network Policy 12 | 13 | For the step #3 **Installing a Pod network add-on** install a kube-router pod network and network policy add-on with the 14 | following command: 15 | 16 | ```sh 17 | KUBECONFIG=/etc/kubernetes/admin.conf kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter.yaml 18 | ``` 19 | 20 | ## kube-router Providing Service Proxy, Firewall and Pod Networking 21 | 22 | For the step #3 **Installing a Pod network add-on** install a kube-router pod network and network policy add-on with the 23 | following command: 24 | 25 | ```sh 26 | KUBECONFIG=/etc/kubernetes/admin.conf kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter-all-features.yaml 27 | ``` 28 | 29 | Now since kube-router provides service proxy as well. Run below commands to remove kube-proxy and cleanup any iptables 30 | configuration it may have done. 31 | 32 | ```sh 33 | KUBECONFIG=/etc/kubernetes/admin.conf kubectl -n kube-system delete ds kube-proxy 34 | ``` 35 | 36 | To cleanup kube-proxy we can do this with docker, containerd, or cri-o: 37 | 38 | ### docker 39 | 40 | ```sh 41 | docker run --privileged -v /lib/modules:/lib/modules --net=host registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy --cleanup 42 | ``` 43 | 44 | ### containerd 45 | 46 | ```sh 47 | ctr images pull registry.k8s.io/kube-proxy-amd64:v1.28.2 48 | ctr run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro \ 49 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 50 | ``` 51 | 52 | ### cri-o 53 | 54 | ```sh 55 | crictl pull registry.k8s.io/kube-proxy-amd64:v1.28.2 56 | crictl run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro 57 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 58 | ``` 59 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | output: 3 | formats: 4 | tab: 5 | path: stdout 6 | print-linter-name: true 7 | colors: false 8 | linters: 9 | enable: 10 | - bodyclose 11 | - copyloopvar 12 | - dogsled 13 | - dupl 14 | - durationcheck 15 | - exhaustive 16 | - gochecknoinits 17 | - goconst 18 | - gocritic 19 | - gosec 20 | - lll 21 | - misspell 22 | - mnd 23 | - nakedret 24 | - noctx 25 | - nolintlint 26 | - staticcheck 27 | - unconvert 28 | - unparam 29 | settings: 30 | goconst: 31 | min-len: 20 32 | exclusions: 33 | generated: lax 34 | presets: 35 | - comments 36 | - common-false-positives 37 | - legacy 38 | - std-error-handling 39 | rules: 40 | - linters: 41 | - mnd 42 | # Excluding single digits from magic number detector because it produces too many obvious results (like klog) 43 | text: 'Magic number: [0-9]{1},' 44 | - linters: 45 | - mnd 46 | # Exclude file masks from magic number detector because these numbers are obvious 47 | text: 'Magic number: 0[0-7]{3},' 48 | - linters: 49 | - mnd 50 | path: pkg/controllers/proxy/network_services_controller.go 51 | # Exclude IP masks netmasks as substituting them for constants only makes these less obvious 52 | text: 'Magic number: 255,' 53 | - linters: 54 | - mnd 55 | # Exclude IP netmasks from magic number detector because these numbers are obvious 56 | text: 'Magic number: 32,' 57 | - linters: 58 | - mnd 59 | # Exclude decimal bases from magic number detector because these numbers are obvious 60 | text: 'Magic number: 10,' 61 | - linters: 62 | - gosec 63 | # Exclude file mask security findings as we are always intentional about the file masks we use 64 | text: 'G306:' 65 | - linters: 66 | - lll 67 | # Exclude tests from long line linter 68 | path: _test\.go 69 | - linters: 70 | - dupl 71 | # Exclude tests from duplicate linter 72 | path: _test\.go 73 | - linters: 74 | - goconst 75 | path: (.+)_test\.go 76 | paths: 77 | - third_party$ 78 | - builtin$ 79 | - examples$ 80 | issues: 81 | max-issues-per-linter: 0 82 | max-same-issues: 0 83 | formatters: 84 | enable: 85 | - gofmt 86 | - goimports 87 | exclusions: 88 | generated: lax 89 | paths: 90 | - third_party$ 91 | - builtin$ 92 | - examples$ 93 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net" 8 | "strconv" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | const maxListenTestTimeout = 5 * time.Second 14 | 15 | type Listener interface { 16 | OnUpdate(instance interface{}) 17 | } 18 | 19 | type ListenerFunc func(instance interface{}) 20 | 21 | func (f ListenerFunc) OnUpdate(instance interface{}) { 22 | f(instance) 23 | } 24 | 25 | // Broadcaster holds the details of registered listeners 26 | type Broadcaster struct { 27 | listenerLock sync.RWMutex 28 | listeners []Listener 29 | } 30 | 31 | // Add lets to register a listener 32 | func (b *Broadcaster) Add(listener Listener) { 33 | b.listenerLock.Lock() 34 | defer b.listenerLock.Unlock() 35 | b.listeners = append(b.listeners, listener) 36 | } 37 | 38 | // Notify notifies an update to registered listeners 39 | func (b *Broadcaster) Notify(instance interface{}) { 40 | b.listenerLock.RLock() 41 | listeners := b.listeners 42 | b.listenerLock.RUnlock() 43 | for _, listener := range listeners { 44 | go listener.OnUpdate(instance) 45 | } 46 | } 47 | 48 | // CloseCloserDisregardError it is a common need throughout kube-router's code base to need close a closer in defer 49 | // statements, this allows an action like that to pass a linter as well as describe its intention well 50 | func CloseCloserDisregardError(handler io.Closer) { 51 | _ = handler.Close() 52 | } 53 | 54 | // SliceContainsString checks to see if needle is contained within haystack, returns true if found, otherwise 55 | // returns false 56 | func SliceContainsString(needle string, haystack []string) bool { 57 | for _, hay := range haystack { 58 | if needle == hay { 59 | return true 60 | } 61 | } 62 | return false 63 | } 64 | 65 | // TCPAddressBindable checks to see if an IP/port is bindable by attempting to open a listener then closing it 66 | // returns nil if successful 67 | func TCPAddressBindable(addr string, port uint16) error { 68 | ctx, cancel := context.WithTimeout(context.Background(), maxListenTestTimeout) 69 | defer cancel() 70 | endpoint := addr + ":" + strconv.Itoa(int(port)) 71 | lc := net.ListenConfig{} 72 | ln, err := lc.Listen(ctx, "tcp", endpoint) 73 | if err != nil { 74 | return fmt.Errorf("unable to open %s: %w", endpoint, err) 75 | } 76 | return ln.Close() 77 | } 78 | 79 | // ConvertMapKeysToSlice takes a map with a set of keys and then extracts the keys into a slice of the same length 80 | func ConvertMapKeysToSlice[K comparable, V any](mapContainingKeys map[K]V) []K { 81 | keys := make([]K, 0, len(mapContainingKeys)) 82 | 83 | for k := range mapContainingKeys { 84 | keys = append(keys, k) 85 | } 86 | 87 | return keys 88 | } 89 | -------------------------------------------------------------------------------- /pkg/bgp/parse.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | gobgpapi "github.com/osrg/gobgp/v3/api" 8 | "github.com/vishvananda/netlink" 9 | ) 10 | 11 | // ParseNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it 12 | // can't parse a next hop IP from the GoBGP Path, it returns an error. 13 | func ParseNextHop(path *gobgpapi.Path) (net.IP, error) { 14 | for _, pAttr := range path.GetPattrs() { 15 | unmarshalNew, err := pAttr.UnmarshalNew() 16 | if err != nil { 17 | return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err) 18 | } 19 | switch t := unmarshalNew.(type) { 20 | case *gobgpapi.NextHopAttribute: 21 | // This is the primary way that we receive NextHops and happens when both the client and the server exchange 22 | // next hops on the same IP family that they negotiated BGP on 23 | nextHopIP := net.ParseIP(t.NextHop) 24 | if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { 25 | return nextHopIP, nil 26 | } 27 | return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop) 28 | case *gobgpapi.MpReachNLRIAttribute: 29 | // in the case where the server and the client are exchanging next-hops that don't relate to their primary 30 | // IP family, we get MpReachNLRIAttribute instead of NextHopAttributes 31 | // TODO: here we only take the first next hop, at some point in the future it would probably be best to 32 | // consider multiple next hops 33 | nextHopIP := net.ParseIP(t.NextHops[0]) 34 | if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { 35 | return nextHopIP, nil 36 | } 37 | return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0]) 38 | } 39 | } 40 | return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path) 41 | } 42 | 43 | // ParsePath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes. 44 | // If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it 45 | // can't parse the destination or the next hop IP, it returns an error. 46 | func ParsePath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) { 47 | nextHop, err := ParseNextHop(path) 48 | if err != nil { 49 | return nil, nil, err 50 | } 51 | 52 | nlri := path.GetNlri() 53 | var prefix gobgpapi.IPAddressPrefix 54 | err = nlri.UnmarshalTo(&prefix) 55 | if err != nil { 56 | return nil, nil, fmt.Errorf("invalid nlri in advertised path") 57 | } 58 | dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen)) 59 | if err != nil { 60 | return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path") 61 | } 62 | return dstSubnet, nextHop, nil 63 | } 64 | -------------------------------------------------------------------------------- /pkg/controllers/routing/ipset_fixture_test.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "context" 5 | "path/filepath" 6 | "strings" 7 | "sync" 8 | "testing" 9 | 10 | "github.com/cloudnativelabs/kube-router/v2/pkg/controllers/testhelpers" 11 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 12 | "github.com/stretchr/testify/require" 13 | 14 | v1 "k8s.io/api/core/v1" 15 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 | "k8s.io/client-go/informers" 17 | "k8s.io/client-go/kubernetes/fake" 18 | ) 19 | 20 | func TestNetworkRoutingFixtureIPSets(t *testing.T) { 21 | fixtureDir := filepath.Join("..", "..", "..", "testdata", "ipset_test_1") 22 | 23 | nodes := testhelpers.LoadNodeList(t, filepath.Join(fixtureDir, "nodes.yaml")) 24 | 25 | client := fake.NewSimpleClientset() 26 | for i := range nodes.Items { 27 | _, err := client.CoreV1().Nodes().Create(context.Background(), nodes.Items[i].DeepCopy(), metav1.CreateOptions{}) 28 | require.NoError(t, err) 29 | } 30 | 31 | informerFactory := informers.NewSharedInformerFactory(client, 0) 32 | nodeInformer := informerFactory.Core().V1().Nodes().Informer() 33 | indexer := nodeInformer.GetIndexer() 34 | for i := range nodes.Items { 35 | node := nodes.Items[i].DeepCopy() 36 | node.SetResourceVersion("1") 37 | require.NoError(t, indexer.Add(node)) 38 | } 39 | 40 | ipv4Handler := testhelpers.NewFakeIPSetHandler(false) 41 | ipv6Handler := testhelpers.NewFakeIPSetHandler(true) 42 | 43 | controller := &NetworkRoutingController{ 44 | ipSetHandlers: map[v1.IPFamily]utils.IPSetHandler{ 45 | v1.IPv4Protocol: ipv4Handler, 46 | v1.IPv6Protocol: ipv6Handler, 47 | }, 48 | ipsetMutex: &sync.Mutex{}, 49 | nodeLister: indexer, 50 | } 51 | 52 | err := controller.syncNodeIPSets() 53 | require.NoError(t, err) 54 | 55 | actual := testhelpers.MergeExpectations( 56 | testhelpers.ParseRestoreScript(ipv4Handler.Restored()), 57 | testhelpers.ParseRestoreScript(ipv6Handler.Restored()), 58 | ) 59 | 60 | include := func(name string) bool { 61 | // Exclude netpol ipsets 62 | if strings.Contains(name, "KUBE-DST") || strings.Contains(name, "KUBE-SRC") { 63 | return false 64 | } 65 | // Exclude proxy ipsets 66 | if strings.Contains(name, "svip") || strings.Contains(name, "local-ips") { 67 | return false 68 | } 69 | return true 70 | } 71 | 72 | expected := testhelpers.ParseSnapshotWithFilter( 73 | t, 74 | filepath.Join(fixtureDir, "ipset_save.txt"), 75 | include, 76 | ) 77 | 78 | require.NotEmpty(t, expected, "expected snapshot should not be empty") 79 | require.Equal(t, testhelpers.ExpectedKeys(expected), testhelpers.ExpectedKeys(actual)) 80 | 81 | for name, exp := range expected { 82 | act := actual[name] 83 | require.Equal(t, exp.SetType, act.SetType, "set type mismatch for %s", name) 84 | require.Equal(t, exp.Entries, act.Entries, "entries mismatch for %s", name) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /docs/load-balancer-allocator.md: -------------------------------------------------------------------------------- 1 | # Load Balancer allocator 2 | 3 | ## What does it do 4 | 5 | The load balancer allocator controller looks for services with the type LoadBalancer and tries to allocate addresses for 6 | it if needed. The controller doesn't enable any announcement of the addresses by default, so 7 | `--advertise-loadbalancer-ip` should be set to true and BGP peers configured. 8 | 9 | ## Load balancer classes 10 | 11 | By default the controller allocates addresses for all LoadBalancer services with the where `loadBalancerClass` is empty 12 | or set to one of "default" or "kube-router". If `--loadbalancer-default-class` is set to false, the controller will only 13 | handle services with the class set to "kube-router". 14 | 15 | ## RBAC permissions 16 | 17 | The controller needs some extra permissions to get, create and update leases for leader election and to update services 18 | with allocated addresses. 19 | 20 | Example permissions: 21 | 22 | ```yaml 23 | kind: ClusterRole 24 | apiVersion: rbac.authorization.k8s.io/v1 25 | metadata: 26 | name: kube-router 27 | namespace: kube-system 28 | rules: 29 | - apiGroups: 30 | - "coordination.k8s.io" 31 | resources: 32 | - leases 33 | verbs: 34 | - get 35 | - create 36 | - update 37 | - apiGroups: 38 | - "" 39 | resources: 40 | - services/status 41 | verbs: 42 | - update 43 | ``` 44 | 45 | ## Environment variables 46 | 47 | The controller uses the environment variable `POD_NAME` as the identify for the lease used for leader election. 48 | Using the kubernetes downward api to set `POD_NAME` to the pod name the lease identify will match the current leader. 49 | 50 | ```yaml 51 | --- 52 | apiVersion: apps/v1 53 | kind: DaemonSet 54 | metadata: 55 | labels: 56 | k8s-app: kube-router 57 | tier: node 58 | name: kube-router 59 | namespace: kube-system 60 | spec: 61 | ... 62 | template: 63 | metadata: 64 | .... 65 | spec: 66 | ... 67 | env: 68 | - name: POD_NAME 69 | valueFrom: 70 | fieldRef: 71 | fieldPath: metadata.name 72 | ... 73 | ``` 74 | 75 | The environment variable `POD_NAMESPACE` can also be specified to set the namespace used for the lease. 76 | By default the namespace is looked up from within the pod using `/var/run/secrets/kubernetes.io/serviceaccount/namespace`. 77 | 78 | ## Running outside kubernetes 79 | 80 | When running the controller outside a pod, both `POD_NAME` and `POD_NAMESPACE` must set for the controller to work. 81 | `POD_NAME` should be unique per instance, so using for example the hostname of the machine might be a good idea. 82 | `POD_NAMESPACE` must be the same across all instances running in the same cluster. 83 | 84 | ## Notes 85 | 86 | It's not possible to specify the addresses for the load balancer services. A externalIP service can be used instead. 87 | -------------------------------------------------------------------------------- /pkg/controllers/routing/aws.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "net/url" 7 | "strings" 8 | "time" 9 | 10 | "github.com/aws/aws-sdk-go-v2/aws" 11 | "github.com/aws/aws-sdk-go-v2/config" 12 | "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" 13 | "github.com/aws/aws-sdk-go-v2/service/ec2" 14 | "github.com/aws/aws-sdk-go-v2/service/ec2/types" 15 | "github.com/aws/smithy-go" 16 | "k8s.io/klog/v2" 17 | 18 | v1core "k8s.io/api/core/v1" 19 | ) 20 | 21 | const ( 22 | awsThrottlingRequestDelay = 1000 * time.Millisecond 23 | awsMaxRetries = 5 24 | ) 25 | 26 | // disableSourceDestinationCheck disables src-dst check of all the VM's when cluster 27 | // is provisioned on AWS. EC2 by default drops any packets originating or destination 28 | // to a VM with IP other than that of VM's ip. This check needs to be disabled so that 29 | // cross node pod-to-pod traffic can be sent and received by a VM. 30 | func (nrc *NetworkRoutingController) disableSourceDestinationCheck() { 31 | nodes := nrc.nodeLister.List() 32 | 33 | for _, obj := range nodes { 34 | node := obj.(*v1core.Node) 35 | if node.Spec.ProviderID == "" || !strings.HasPrefix(node.Spec.ProviderID, "aws") { 36 | return 37 | } 38 | providerID := strings.Replace(node.Spec.ProviderID, "///", "//", 1) 39 | URL, err := url.Parse(providerID) 40 | if err != nil { 41 | klog.Errorf("failed to parse URL for providerID %s: %v", providerID, err) 42 | return 43 | } 44 | instanceID := URL.Path 45 | instanceID = strings.Trim(instanceID, "/") 46 | 47 | cfg, _ := config.LoadDefaultConfig(context.TODO(), 48 | config.WithRetryMaxAttempts(awsMaxRetries)) 49 | metadataClient := imds.NewFromConfig(cfg) 50 | region, err := metadataClient.GetRegion(context.TODO(), &imds.GetRegionInput{}) 51 | if err != nil { 52 | klog.Errorf("failed to disable source destination check due to: %v", err) 53 | return 54 | } 55 | cfg.Region = region.Region 56 | ec2Client := ec2.NewFromConfig(cfg) 57 | _, err = ec2Client.ModifyInstanceAttribute(context.TODO(), 58 | &ec2.ModifyInstanceAttributeInput{ 59 | InstanceId: aws.String(instanceID), 60 | SourceDestCheck: &types.AttributeBooleanValue{ 61 | Value: aws.Bool(false), 62 | }, 63 | }, 64 | ) 65 | if err != nil { 66 | var apiErr smithy.APIError 67 | if errors.As(err, &apiErr) { 68 | if apiErr.ErrorCode() == "UnauthorizedOperation" { 69 | nrc.ec2IamAuthorized = false 70 | klog.Errorf("Node does not have necessary IAM creds to modify instance attribute. So skipping "+ 71 | "disabling src-dst check. %v", apiErr.ErrorMessage()) 72 | return 73 | 74 | } 75 | } 76 | klog.Errorf("failed to disable source destination check due to: %v", err) 77 | } else { 78 | klog.Infof("disabled source destination check for the instance: %s", instanceID) 79 | } 80 | 81 | // to prevent EC2 rejecting API call due to API throttling give a delay between the calls 82 | time.Sleep(awsThrottlingRequestDelay) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BUILDTIME_BASE=golang:1-alpine 2 | ARG RUNTIME_BASE=alpine:latest 3 | ARG TARGETPLATFORM 4 | ARG CNI_VERSION 5 | FROM ${BUILDTIME_BASE} AS builder 6 | ENV BUILD_IN_DOCKER=false 7 | 8 | WORKDIR /build 9 | COPY . /build 10 | RUN apk add --no-cache make git tar curl \ 11 | && make kube-router \ 12 | && make gobgp \ 13 | && make cni-download 14 | 15 | WORKDIR /iptables-wrappers 16 | # This is the latest commit on the master branch. 17 | ENV IPTABLES_WRAPPERS_VERSION=f6ef44b2c449cca8f005b32dea9a4b497202dbef 18 | RUN git clone https://github.com/kubernetes-sigs/iptables-wrappers.git . \ 19 | && git checkout "${IPTABLES_WRAPPERS_VERSION}" \ 20 | && make build \ 21 | && test -x bin/iptables-wrapper \ 22 | && test -x iptables-wrapper-installer.sh 23 | 24 | FROM ${RUNTIME_BASE} 25 | 26 | RUN apk add --no-cache \ 27 | iptables \ 28 | iptables-legacy \ 29 | ipset \ 30 | iproute2 \ 31 | ipvsadm \ 32 | conntrack-tools \ 33 | curl \ 34 | bash && \ 35 | mkdir -p /var/lib/gobgp && \ 36 | mkdir -p /usr/local/share/bash-completion && \ 37 | curl -L -o /usr/local/share/bash-completion/bash-completion \ 38 | https://raw.githubusercontent.com/scop/bash-completion/master/bash_completion 39 | 40 | COPY build/image-assets/bashrc /root/.bashrc 41 | COPY build/image-assets/profile /root/.profile 42 | COPY build/image-assets/vimrc /root/.vimrc 43 | COPY build/image-assets/motd-kube-router.sh /etc/motd-kube-router.sh 44 | COPY build/image-assets/cni-install /usr/local/bin/cni-install 45 | COPY --from=builder /build/kube-router /build/gobgp /usr/local/bin/ 46 | COPY --from=builder /build/cni-download /usr/libexec/cni 47 | 48 | # Use iptables-wrappers so that correct version of iptables-legacy or iptables-nft gets used. Alpine contains both, but 49 | # which version is used should be based on the host system as well as where rules that may have been added before 50 | # kube-router are being placed. For more information see: https://github.com/kubernetes-sigs/iptables-wrappers 51 | COPY --from=builder /iptables-wrappers/bin/iptables-wrapper / 52 | COPY --from=builder /iptables-wrappers/iptables-wrapper-installer.sh / 53 | # This is necessary because of the bug reported here: https://github.com/flannel-io/flannel/pull/1340/files 54 | # Basically even under QEMU emulation, it still doesn't have an ARM kernel in-play which means that calls to 55 | # iptables-nft will fail in the build process. The sanity check here only makes sure that iptables-nft and iptables-legacy 56 | # are installed and that we are not using iptables-1.8.0-1.8.3. For now we'll manage that on our own. 57 | RUN if ! command -v iptables-nft > /dev/null; then \ 58 | echo "ERROR: iptables-nft is not installed" 1>&2; \ 59 | exit 1; \ 60 | fi && \ 61 | if ! command -v iptables-legacy > /dev/null; then \ 62 | echo "ERROR: iptables-legacy is not installed" 1>&2; \ 63 | exit 1; \ 64 | fi && \ 65 | if ! command -v ip6tables > /dev/null; then \ 66 | echo "ERROR: ip6tables is not installed" 1>&2; \ 67 | exit 1; \ 68 | fi && \ 69 | /iptables-wrapper-installer.sh --no-sanity-check 70 | 71 | WORKDIR /root 72 | ENTRYPOINT ["/usr/local/bin/kube-router"] 73 | -------------------------------------------------------------------------------- /docs/introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Welcome to the introduction guide to Kube-router! This guide is the best place to start with Kube-router. We cover what 4 | kube-router is, what problems it can solve, how it compares to existing software, and how you can get started using it. 5 | If you are familiar with the basics of Kube-router, head over to the next sections that provide a more detailed 6 | reference of available features. 7 | 8 | ## What is Kube-router 9 | 10 | If you are not familiar with Kubernetes networking model it is recommended to familiarize with Kubernetes 11 | [networking model](https://kubernetes.io/docs/concepts/cluster-administration/networking/#kubernetes-model). So 12 | essentially Kubernetes expects: 13 | 14 | - all containers can communicate with all other containers without NAT 15 | - all nodes can communicate with all containers (and vice-versa) without NAT 16 | - the IP that a container sees itself as is the same IP that others see it as 17 | 18 | Kubernetes only prescribes the requirements for the networking model but does not provide any default implementation. 19 | For a functional Kubernetes cluster one has to deploy what is called as CNI or pod networking solution that provides 20 | the above functionality. 21 | 22 | Any non-trivial containerized application will end up running multiple pods and exposing different services. 23 | [Service](https://kubernetes.io/docs/concepts/services-networking/service/) abstraction in Kubernetes is an essential 24 | building block that helps in service discovery and load balancing. A layer-4 service proxy must be deployed to 25 | the Kubernetes cluster that provides the load-balancing for the services exposed by the pods. 26 | 27 | Once you have pod-to-pod networking established and have a service proxy that provides load-balancing, you need a way to 28 | secure your pods. Kubernetes 29 | [Network Policies](https://kubernetes.io/docs/concepts/services-networking/network-policies/) provide a specfication to 30 | secure pods. You need to deploy a solution that implements network policy specification and provides security for your 31 | pods. 32 | 33 | If you utilize [LoadBalancer](https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer) services in 34 | your cluster, then you need to deploy a solution that will allocate and manage your LoadBalancer IP address space. 35 | 36 | Kube-router is a turnkey solution for Kubernetes networking that provides all the above essential functionality in one 37 | single elegant package. 38 | 39 | ## Why Kube-router 40 | 41 | Network is hard. You have multiple Kubernetes networking solutions that provide pod networking or network policy etc. 42 | But when you deploy indiviudal solution for each functionality you end up with lot of moving parts making it difficult 43 | to operate and troubleshoot. 44 | 45 | Kube-router is a lean yet powerful all-in-one alternative to several network components used in typical Kubernetes 46 | clusters. All this from a single DaemonSet/Binary. It doesn't get any easier. 47 | 48 | Kube-router also uses best-of-breed low-level kernel solutions for maximum performance. Kube-router uses IPVS/LVS for 49 | service proxying and provides direct routing between the nodes. 50 | 51 | Kube-router also provides very unique and advanced functionalities like DSR (Direct Server Return), ECMP based network 52 | load balancing etc. 53 | -------------------------------------------------------------------------------- /docs/tunnels.md: -------------------------------------------------------------------------------- 1 | # Tunnels in kube-router 2 | 3 | There are several situations in which kube-router will use tunnels in order to perform certain forms of overlay / 4 | underlay routing within the cluster. To accomplish this, kube-router makes use of 5 | [IPIP](https://en.wikipedia.org/wiki/IP_in_IP) overlay tunnels that are built into the Linux kernel and instrumented 6 | with iproute2. 7 | 8 | ## Scenarios for Tunnelling 9 | 10 | By default, kube-router enables the option `--enable-overlay` which will perform overlay networking based upon the 11 | `--overlay-type` setting (by default set to `subnet`). So out of the box, kube-router will create a tunnel for 12 | pod-to-pod traffic any time it comes across a kube-router enabled node that is not within the subnet of it's primary 13 | interface. 14 | 15 | Additionally, if `--overlay-type` is set to `full` kube-router will create an tunnel for all pod-to-pod traffic and 16 | attempt to transit any pod traffic in the cluster via an IPIP overlay network between nodes. 17 | 18 | Finally, kube-router also uses tunnels for DSR ([Direct Server Return](dsr.md)). In this case, the inbound traffic is 19 | encapsulated in an IPIP packet by IPVS after it reaches the node and before it is set to the pod for processing. This 20 | allows the return IP address of the sender to be preserved at the pod level so that it can be sent directly back to the 21 | requestor (rather than being routed in a synchronous fashion). 22 | 23 | ## Encapsulation Types 24 | 25 | * IPIP (IP in IP) - This is the default method of encapsulation that kube-router uses 26 | * FoU (Foo over UDP) - This is an optional type of IPIP encapsulation that kube-router uses if the user enables it 27 | 28 | ### FoU Details 29 | 30 | Specifically, kube-router uses GUE 31 | ([Generic UDP Encapsulation](https://developers.redhat.com/blog/2019/05/17/an-introduction-to-linux-virtual-interfaces-tunnels#gue)) 32 | in order to support both IPv4 and IPv6 FoU tunnels. This option can be enabled via the kube-router parameter 33 | `--overlay-encap=fou`. Optionally, the user can also specify a desired port for this traffic via the 34 | `--overlay-encap-port` parameter (by default set to `5555`). 35 | 36 | ## IPIP with Azure 37 | 38 | Unfortunately, Azure doesn't allow IPIP encapsulation on their network. So users that want to use an overlay network 39 | will need to enable `fou` support in order to deploy kube-router in an Azure environment. 40 | 41 | ## Changing Between Tunnel Types in a Live Cluster 42 | 43 | While it is possible to change a running cluster between `ipip` and `fou` type tunnels, administrators should beware 44 | that during the rollout it will cause pod-to-pod traffic to be dropped between nodes. Since, in almost all rollout 45 | scenarios, kube-router would be rolled out gracefully one pod or host to the next, during this rollout there will be 46 | mismatches between the encapsulation support between the two nodes as invariably one node will have an upgraded 47 | kube-router and another node may have a previous deployment. 48 | 49 | When this happens, they will have conflicting encapsulation setup on their tunnels and traffic will not be able to be 50 | sent between the two nodes until they are using a consistent encapsulation protocal between them. 51 | 52 | Once all nodes have upgraded with the destination configuration, pod-to-pod traffic patterns should return to normal. 53 | -------------------------------------------------------------------------------- /docs/pod-toolbox.md: -------------------------------------------------------------------------------- 1 | # Pod Toolbox 2 | 3 | When kube-router is ran as a Pod within your Kubernetes cluster, it also ships 4 | with a number of tools automatically configured for your cluster. These can be 5 | used to troubleshoot issues and learn more about how cluster networking is 6 | performed. 7 | 8 | ## Logging In 9 | 10 | Here's a quick way to get going on a random node in your cluster: 11 | 12 | ```sh 13 | KR_POD=$(basename $(kubectl -n kube-system get pods -l k8s-app=kube-router --output name|head -n1)) 14 | kubectl -n kube-system exec -it ${KR_POD} bash 15 | ``` 16 | 17 | Use `kubectl -n kube-system get pods -l k8s-app=kube-router -o wide` to see what 18 | nodes are running which pods. This will help if you want to investigate a 19 | particular node. 20 | 21 | ## Tools And Usage 22 | 23 | Once logged in you will see some help on using the tools in the container. 24 | 25 | For example: 26 | 27 | ```console 28 | Welcome to kube-router on "node1.zbrbdl"! 29 | 30 | For debugging, the following tools are available: 31 | - ipvsadm | Gather info about Virtual Services and Real Servers via IPVS. 32 | | Examples: 33 | | ## Show all options 34 | | ipvsadm --help 35 | | ## List Services and Endpoints handled by IPVS 36 | | ipvsadm -ln 37 | | ## Show traffic rate information 38 | | ipvsadm -ln --rate 39 | | ## Show cumulative traffic 40 | | ipvsadm -ln --stats 41 | 42 | - gobgp | Get BGP related information from your nodes. 43 | | 44 | | Tab-completion is ready to use, just type "gobgp " 45 | | to see the subcommands available. 46 | | 47 | | By default gobgp will query the Node this Pod is running 48 | | on, i.e. "node1.zbrbdl". To query a different node use 49 | | "gobgp --host node02.mydomain" as an example. 50 | | 51 | | For more examples see: https://github.com/osrg/gobgp/blob/master/docs/sources/cli-command-syntax.md 52 | 53 | Here's a quick look at what's happening on this Node 54 | --- BGP Server Configuration --- 55 | AS: 64512 56 | Router-ID: 10.10.3.2 57 | Listening Port: 179, Addresses: 0.0.0.0, :: 58 | 59 | --- BGP Neighbors --- 60 | Peer AS Up/Down State |#Received Accepted 61 | 64512 2d 01:05:07 Establ | 1 1 62 | 63 | --- BGP Route Info --- 64 | Network Next Hop AS_PATH Age Attrs 65 | *> 10.2.0.0/24 10.10.3.3 4000 400000 300000 40001 2d 01:05:20 [{Origin: i} {LocalPref: 100}] 66 | *> 10.2.1.0/24 10.10.3.2 4000 400000 300000 40001 00:00:36 [{Origin: i}] 67 | 68 | --- IPVS Services --- 69 | IP Virtual Server version 1.2.1 (size=4096) 70 | Prot LocalAddress:Port Scheduler Flags 71 | -> RemoteAddress:Port Forward Weight ActiveConn InActConn 72 | TCP 10.3.0.1:443 rr persistent 10800 mask 0.0.0.0 73 | -> 10.10.3.2:443 Masq 1 0 0 74 | TCP 10.3.0.10:53 rr 75 | -> 10.2.0.2:53 Masq 1 0 0 76 | TCP 10.3.0.15:2379 rr 77 | -> 10.10.3.3:2379 Masq 1 45 0 78 | TCP 10.3.0.155:2379 rr 79 | -> 10.10.3.3:2379 Masq 1 0 0 80 | UDP 10.3.0.10:53 rr 81 | -> 10.2.0.2:53 Masq 1 0 0 82 | ``` 83 | -------------------------------------------------------------------------------- /pkg/utils/ip.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net" 7 | ) 8 | 9 | const ( 10 | IPv4DefaultRoute = "0.0.0.0/0" 11 | IPv6DefaultRoute = "::/0" 12 | 13 | ipv4NetMaskBits = 32 14 | ipv6NetMaskBits = 128 15 | ) 16 | 17 | // GetSingleIPNet returns an IPNet object that represents a subnet containing a single IP address for a given IP address 18 | // with proper handling for IPv4 and IPv6 addresses. 19 | func GetSingleIPNet(ip net.IP) *net.IPNet { 20 | if ip.To4() != nil { 21 | return &net.IPNet{ 22 | IP: ip, 23 | Mask: net.CIDRMask(ipv4NetMaskBits, ipv4NetMaskBits), 24 | } 25 | } else { 26 | return &net.IPNet{ 27 | IP: ip, 28 | Mask: net.CIDRMask(ipv6NetMaskBits, ipv6NetMaskBits), 29 | } 30 | } 31 | } 32 | 33 | // GetIPv4NetMaxMaskBits returns the maximum mask bits for an IPv4 address 34 | func GetIPv4NetMaxMaskBits() uint32 { 35 | return ipv4NetMaskBits 36 | } 37 | 38 | // GetIPv6NetMaxMaskBits returns the maximum mask bits for an IPv6 address 39 | func GetIPv6NetMaxMaskBits() uint32 { 40 | return ipv6NetMaskBits 41 | } 42 | 43 | // ContainsIPv4Address checks a given string array to see if it contains a valid IPv4 address within it 44 | func ContainsIPv4Address(addrs []string) bool { 45 | for _, addr := range addrs { 46 | ip := net.ParseIP(addr) 47 | if ip == nil { 48 | continue 49 | } 50 | if ip.To4() != nil { 51 | return true 52 | } 53 | } 54 | return false 55 | } 56 | 57 | // ContainsIPv6Address checks a given string array to see if it contains a valid IPv6 address within it 58 | func ContainsIPv6Address(addrs []string) bool { 59 | for _, addr := range addrs { 60 | ip := net.ParseIP(addr) 61 | if ip == nil { 62 | continue 63 | } 64 | if ip.To4() != nil { 65 | continue 66 | } 67 | if ip.To16() != nil { 68 | return true 69 | } 70 | } 71 | return false 72 | } 73 | 74 | // GetDefaultIPv4Route returns the default IPv4 route 75 | func GetDefaultIPv4Route() *net.IPNet { 76 | _, defaultPrefixCIDR, err := net.ParseCIDR(IPv4DefaultRoute) 77 | if err != nil { 78 | return nil 79 | } 80 | return defaultPrefixCIDR 81 | } 82 | 83 | // GetDefaultIPv6Route returns the default IPv6 route 84 | func GetDefaultIPv6Route() *net.IPNet { 85 | _, defaultPrefixCIDR, err := net.ParseCIDR(IPv6DefaultRoute) 86 | if err != nil { 87 | return nil 88 | } 89 | return defaultPrefixCIDR 90 | } 91 | 92 | // IPNetEqual checks if two IPNet objects are equal by comparing the IP and Mask 93 | func IPNetEqual(a, b *net.IPNet) bool { 94 | if a == nil || b == nil { 95 | return a == b 96 | } 97 | return a.IP.Equal(b.IP) && bytes.Equal(a.Mask, b.Mask) 98 | } 99 | 100 | // IsDefaultRoute checks if a given CIDR is a default route by comparing it to the default routes for IPv4 and IPv6 101 | func IsDefaultRoute(cidr *net.IPNet) (bool, error) { 102 | var defaultPrefixCIDR *net.IPNet 103 | var err error 104 | 105 | if cidr.IP.To4() != nil { 106 | _, defaultPrefixCIDR, err = net.ParseCIDR(IPv4DefaultRoute) 107 | if err != nil { 108 | return false, fmt.Errorf("failed to parse default route: %s", err.Error()) 109 | } 110 | } else { 111 | _, defaultPrefixCIDR, err = net.ParseCIDR(IPv6DefaultRoute) 112 | if err != nil { 113 | return false, fmt.Errorf("failed to parse default route: %s", err.Error()) 114 | } 115 | } 116 | return IPNetEqual(defaultPrefixCIDR, cidr), nil 117 | } 118 | -------------------------------------------------------------------------------- /pkg/cri/remote_runtime.go: -------------------------------------------------------------------------------- 1 | package cri 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | "net" 8 | "strings" 9 | "time" 10 | 11 | "google.golang.org/grpc" 12 | "google.golang.org/grpc/credentials/insecure" 13 | 14 | "k8s.io/klog/v2" 15 | 16 | runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 17 | ) 18 | 19 | const ( 20 | DefaultConnectionTimeout = 15 * time.Second 21 | maxMsgSize = 1024 * 1024 * 16 // 16 MB 22 | ) 23 | 24 | // remoteRuntimeService is a gRPC implementation of RuntimeService. 25 | type remoteRuntimeService struct { 26 | timeout time.Duration 27 | runtimeClient runtimeapi.RuntimeServiceClient 28 | conn *grpc.ClientConn 29 | } 30 | 31 | type containerInfo struct { 32 | Pid int `json:"pid"` 33 | } 34 | 35 | // NewRemoteRuntimeService creates a new RuntimeService. 36 | func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration) (RuntimeService, error) { 37 | proto, addr, err := EndpointParser(endpoint) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | klog.V(4).Infof("[RuntimeService] got endpoint %s (proto=%s, path=%s)", endpoint, proto, addr) 43 | 44 | if proto == "unix" { 45 | // Every since grpc.DialContext was deprecated, we no longer get the passthrough resolver for free, so we need 46 | // to add it manually. See: https://github.com/grpc/grpc-go/issues/1846 for more context 47 | addr = "passthrough:///" + addr 48 | } else { 49 | return nil, errors.New("[RuntimeService] only unix socket is currently supported") 50 | } 51 | 52 | conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()), 53 | grpc.WithContextDialer(dialer), 54 | grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize))) 55 | if err != nil { 56 | klog.Errorf("Connect remote runtime %s failed: %v", addr, err) 57 | return nil, err 58 | } 59 | 60 | return &remoteRuntimeService{ 61 | timeout: connectionTimeout, 62 | runtimeClient: runtimeapi.NewRuntimeServiceClient(conn), 63 | conn: conn, 64 | }, nil 65 | } 66 | 67 | // ContainerInfo returns verbose info of provided container. 68 | func (r *remoteRuntimeService) ContainerInfo(id string) (*containerInfo, error) { 69 | ctx, cancel := context.WithTimeout(context.Background(), r.timeout) 70 | defer cancel() 71 | 72 | // Verbose should be set, otherwise we'll get an empty slice. see 73 | resp, err := r.runtimeClient.ContainerStatus(ctx, &runtimeapi.ContainerStatusRequest{ 74 | ContainerId: id, 75 | Verbose: true, 76 | }) 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | info := containerInfo{} 82 | 83 | if err := json.Unmarshal([]byte(resp.Info["info"]), &info); err != nil { 84 | return nil, err 85 | } 86 | return &info, nil 87 | } 88 | 89 | // Close tears down the *grpc.ClientConn and all underlying connections. 90 | func (r *remoteRuntimeService) Close() error { 91 | if err := r.conn.Close(); err != nil { 92 | return err 93 | } 94 | return nil 95 | } 96 | 97 | func dialer(ctx context.Context, addr string) (net.Conn, error) { 98 | return (&net.Dialer{}).DialContext(ctx, "unix", addr) 99 | } 100 | 101 | // EndpointParser returns protocol and path of provided endpoint 102 | func EndpointParser(endpoint string) (proto string, path string, err error) { 103 | 104 | result := strings.Split(endpoint, "://") 105 | 106 | if len(result) < 2 { 107 | return "", "", errors.New("bad endpoint format. should be 'protocol://path'") 108 | } 109 | return result[0], result[1], nil 110 | } 111 | -------------------------------------------------------------------------------- /pkg/controllers/netpol/policy_test.go: -------------------------------------------------------------------------------- 1 | package netpol 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | v1 "k8s.io/api/core/v1" 9 | ) 10 | 11 | func testNamePrefix(t *testing.T, testString string, isIPv6 bool) { 12 | if isIPv6 { 13 | assert.Truef(t, strings.HasPrefix(testString, "inet6:"), "%s is IPv6 and should begin with inet6:", testString) 14 | } 15 | } 16 | 17 | func Test_policySourcePodIPSetName(t *testing.T) { 18 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 19 | setName := policySourcePodIPSetName("foo", "bar", v1.IPv4Protocol) 20 | testNamePrefix(t, setName, false) 21 | setName = policySourcePodIPSetName("foo", "bar", v1.IPv6Protocol) 22 | testNamePrefix(t, setName, true) 23 | }) 24 | } 25 | 26 | func Test_policyDestinationPodIPSetName(t *testing.T) { 27 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 28 | setName := policyDestinationPodIPSetName("foo", "bar", v1.IPv4Protocol) 29 | testNamePrefix(t, setName, false) 30 | setName = policyDestinationPodIPSetName("foo", "bar", v1.IPv6Protocol) 31 | testNamePrefix(t, setName, true) 32 | }) 33 | } 34 | 35 | func Test_policyIndexedSourcePodIPSetName(t *testing.T) { 36 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 37 | setName := policyIndexedSourcePodIPSetName("foo", "bar", 1, v1.IPv4Protocol) 38 | testNamePrefix(t, setName, false) 39 | setName = policyIndexedSourcePodIPSetName("foo", "bar", 1, v1.IPv6Protocol) 40 | testNamePrefix(t, setName, true) 41 | }) 42 | } 43 | 44 | func Test_policyIndexedDestinationPodIPSetName(t *testing.T) { 45 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 46 | setName := policyIndexedDestinationPodIPSetName("foo", "bar", 1, v1.IPv4Protocol) 47 | testNamePrefix(t, setName, false) 48 | setName = policyIndexedDestinationPodIPSetName("foo", "bar", 1, v1.IPv6Protocol) 49 | testNamePrefix(t, setName, true) 50 | }) 51 | } 52 | 53 | func Test_policyIndexedSourceIPBlockIPSetName(t *testing.T) { 54 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 55 | setName := policyIndexedSourceIPBlockIPSetName("foo", "bar", 1, v1.IPv4Protocol) 56 | testNamePrefix(t, setName, false) 57 | setName = policyIndexedSourceIPBlockIPSetName("foo", "bar", 1, v1.IPv6Protocol) 58 | testNamePrefix(t, setName, true) 59 | }) 60 | } 61 | 62 | func Test_policyIndexedDestinationIPBlockIPSetName(t *testing.T) { 63 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 64 | setName := policyIndexedDestinationIPBlockIPSetName("foo", "bar", 1, v1.IPv4Protocol) 65 | testNamePrefix(t, setName, false) 66 | setName = policyIndexedDestinationIPBlockIPSetName("foo", "bar", 1, v1.IPv6Protocol) 67 | testNamePrefix(t, setName, true) 68 | }) 69 | } 70 | 71 | func Test_policyIndexedIngressNamedPortIPSetName(t *testing.T) { 72 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 73 | setName := policyIndexedIngressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv4Protocol) 74 | testNamePrefix(t, setName, false) 75 | setName = policyIndexedIngressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv6Protocol) 76 | testNamePrefix(t, setName, true) 77 | }) 78 | } 79 | 80 | func Test_policyIndexedEgressNamedPortIPSetName(t *testing.T) { 81 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 82 | setName := policyIndexedEgressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv4Protocol) 83 | testNamePrefix(t, setName, false) 84 | setName = policyIndexedEgressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv6Protocol) 85 | testNamePrefix(t, setName, true) 86 | }) 87 | } 88 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Process for creating a kube-router release 2 | 3 | ## Preparing for the release 4 | 5 | * Ensure that the Golang release used is still supported. Definition happens currently in 6 | [Github Workflow](.github/workflow/ci.yml) and [Makefile](Makefile). 7 | * Ensure that the Alpine version used in container builds is still supported. Definition happens currently in 8 | [Github Workflow](.github/workflow/ci.yml) and [Makefile](Makefile). 9 | * Ensure that Golang dependencies are updated. 10 | `go list -mod=mod -u -m -f '{{.}}{{if .Indirect}} IAMINDIRECT{{end}}' all | grep -v IAMINDIRECT` lists possible 11 | updates. 12 | * Ensure that the GoBGP version is updated. See [upstream](https://github.com/osrg/gobgp/releases) and GoBGP definition 13 | in [Makefile](Makefile) and [go.mod](go.mod). 14 | * Ensure that the Kubernetes object definitions do not contain deprecated object types. Definition currently is in 15 | kube-router's [Daemonset](daemonset) folder. 16 | * Ensure GitHub actions are updated: 17 | ```sh 18 | dependabot update github_actions cloudnativelabs/kube-router 19 | ``` 20 | 21 | ## New major/minor release 22 | 23 | * Create a branch named v$MAJOR.$MINOR from the default branch (currently: master) 24 | * Create a new tag with the release tag v$MAJOR.$MINOR.0 25 | 26 | ```sh 27 | git tag 28 | git push origin 29 | ``` 30 | 31 | Note: your remote for the main kube-router repo may not be origin, please correct it to whatever you have called the 32 | official kube-router remote. 33 | 34 | ## New patch release 35 | 36 | * Change to the `master` branch 37 | * Use `git log` to identify which commits you want to bring to the new patch release 38 | * Change to the major/minor release branch that was created for this release 39 | * Cherry-Pick the changes from the `master` branch into the release branch 40 | * Create a new tag from the v$MAJOR.$MINOR release branch with the release tag v$MAJOR.$MINOR.$PATCH 41 | 42 | Example: 43 | 44 | ```sh 45 | git checkout master 46 | git log --color --pretty=format:'%h - %s (%cr) <%an>' --abbrev-commit --decorate 47 | git checkout 48 | git cherry-pick 49 | git tag 50 | git push origin 51 | ``` 52 | 53 | Note: your remote for the main kube-router repo may not be origin, please correct it to whatever you have called the 54 | official kube-router remote. 55 | 56 | ## Release Candidates 57 | 58 | * Follow above instructions and ensure that the tag contains `-rc`. Don't mark the pre-release as a proper release. 59 | 60 | ## Release Build Process 61 | 62 | Once the tag is pushed to GitHub GitHub Actions will be triggered and several things will happen: 63 | 64 | * kube-router will be linted 65 | * kube-router will be tested 66 | * The actions will run a test build of the kube-router binary 67 | * Containers for [defined architectures](https://github.com/cloudnativelabs/kube-router/blob/master/.github/workflows/ci.yml) 68 | (see `platforms` section in yaml) will be built and pushed to 69 | [DockerHub](https://hub.docker.com/r/cloudnativelabs/kube-router) via the `docker buildx` command 70 | * [goreleaser](https://goreleaser.com) will be run and will: 71 | * Generate a draft release on GitHub where maintainers can later choose to update it and release it 72 | * Brief release notes will be added to the draft release 73 | * Build all of the binary releases for [defined architectures](https://github.com/cloudnativelabs/kube-router/blob/master/.goreleaser.yml) 74 | and attach them to the draft release on GitHub 75 | 76 | ## After the release 77 | 78 | * Go to the [GitHub releases page for the kube-router project](https://github.com/cloudnativelabs/kube-router/releases) 79 | * Find the draft release 80 | * Consistent Changelog Syntax can be retrieved by running the following Git command: 81 | 82 | ```sh 83 | git log --format='* %h - %s `<%an>`' .. 84 | ``` 85 | 86 | * Announce the release in [#kube-router](https://app.slack.com/client/T09NY5SBT/C8DCQGTSB) on Kubernetes Slack. 87 | -------------------------------------------------------------------------------- /daemonset/generic-kuberouter-only-advertise-routes.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: DaemonSet 4 | metadata: 5 | labels: 6 | k8s-app: kube-router 7 | tier: node 8 | name: kube-router 9 | namespace: kube-system 10 | spec: 11 | selector: 12 | matchLabels: 13 | k8s-app: kube-router 14 | tier: node 15 | template: 16 | metadata: 17 | labels: 18 | k8s-app: kube-router 19 | tier: node 20 | spec: 21 | priorityClassName: system-node-critical 22 | serviceAccountName: kube-router 23 | containers: 24 | - name: kube-router 25 | image: docker.io/cloudnativelabs/kube-router 26 | imagePullPolicy: Always 27 | args: 28 | - "--run-router=true" 29 | - "--run-firewall=false" 30 | - "--run-service-proxy=false" 31 | - "--bgp-graceful-restart=true" 32 | - "--enable-cni=false" 33 | - "--enable-ibgp=false" 34 | - "--enable-overlay=false" 35 | - "--peer-router-ips=" 36 | - "--peer-router-asns=" 37 | - "--cluster-asn=" 38 | - "--advertise-cluster-ip=true" 39 | - "--advertise-external-ip=true" 40 | - "--advertise-loadbalancer-ip=true" 41 | env: 42 | - name: NODE_NAME 43 | valueFrom: 44 | fieldRef: 45 | fieldPath: spec.nodeName 46 | - name: POD_NAME 47 | valueFrom: 48 | fieldRef: 49 | fieldPath: metadata.name 50 | livenessProbe: 51 | httpGet: 52 | path: /healthz 53 | port: 20244 54 | initialDelaySeconds: 10 55 | periodSeconds: 3 56 | resources: 57 | requests: 58 | cpu: 250m 59 | memory: 250Mi 60 | securityContext: 61 | privileged: true 62 | volumeMounts: 63 | - name: xtables-lock 64 | mountPath: /run/xtables.lock 65 | readOnly: false 66 | hostNetwork: true 67 | hostPID: true 68 | tolerations: 69 | - effect: NoSchedule 70 | operator: Exists 71 | - key: CriticalAddonsOnly 72 | operator: Exists 73 | - effect: NoExecute 74 | operator: Exists 75 | volumes: 76 | - name: xtables-lock 77 | hostPath: 78 | path: /run/xtables.lock 79 | type: FileOrCreate 80 | --- 81 | apiVersion: v1 82 | kind: ServiceAccount 83 | metadata: 84 | name: kube-router 85 | namespace: kube-system 86 | 87 | --- 88 | kind: ClusterRole 89 | apiVersion: rbac.authorization.k8s.io/v1 90 | metadata: 91 | name: kube-router 92 | namespace: kube-system 93 | rules: 94 | - apiGroups: 95 | - "" 96 | resources: 97 | - namespaces 98 | - pods 99 | - services 100 | - nodes 101 | - endpoints 102 | verbs: 103 | - list 104 | - get 105 | - watch 106 | - apiGroups: 107 | - "networking.k8s.io" 108 | resources: 109 | - networkpolicies 110 | verbs: 111 | - list 112 | - get 113 | - watch 114 | - apiGroups: 115 | - extensions 116 | resources: 117 | - networkpolicies 118 | verbs: 119 | - get 120 | - list 121 | - watch 122 | - apiGroups: 123 | - "coordination.k8s.io" 124 | resources: 125 | - leases 126 | verbs: 127 | - get 128 | - create 129 | - update 130 | - apiGroups: 131 | - "" 132 | resources: 133 | - services/status 134 | verbs: 135 | - update 136 | - apiGroups: 137 | - "discovery.k8s.io" 138 | resources: 139 | - endpointslices 140 | verbs: 141 | - get 142 | - list 143 | - watch 144 | 145 | --- 146 | kind: ClusterRoleBinding 147 | apiVersion: rbac.authorization.k8s.io/v1 148 | metadata: 149 | name: kube-router 150 | roleRef: 151 | apiGroup: rbac.authorization.k8s.io 152 | kind: ClusterRole 153 | name: kube-router 154 | subjects: 155 | - kind: ServiceAccount 156 | name: kube-router 157 | namespace: kube-system 158 | -------------------------------------------------------------------------------- /testdata/ipset_test_1/services.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | items: 3 | - apiVersion: v1 4 | kind: Service 5 | metadata: 6 | annotations: 7 | kube-router.io/service.hairpin: "true" 8 | labels: 9 | name: blank-service 10 | name: blank-service 11 | namespace: default 12 | spec: 13 | clusterIP: 10.96.156.243 14 | clusterIPs: 15 | - 10.96.156.243 16 | - 2001:db8:42:1::8e7d 17 | externalIPs: 18 | - 10.243.0.10 19 | externalTrafficPolicy: Cluster 20 | internalTrafficPolicy: Cluster 21 | ipFamilies: 22 | - IPv4 23 | - IPv6 24 | ipFamilyPolicy: PreferDualStack 25 | ports: 26 | - name: flask 27 | port: 5000 28 | protocol: TCP 29 | targetPort: 5000 30 | selector: 31 | name: foobarbaz12345 32 | sessionAffinity: None 33 | type: ClusterIP 34 | - apiVersion: v1 35 | kind: Service 36 | metadata: 37 | labels: 38 | component: apiserver 39 | provider: kubernetes 40 | name: kubernetes 41 | namespace: default 42 | spec: 43 | clusterIP: 10.96.0.1 44 | clusterIPs: 45 | - 10.96.0.1 46 | internalTrafficPolicy: Cluster 47 | ipFamilies: 48 | - IPv4 49 | ipFamilyPolicy: SingleStack 50 | ports: 51 | - name: https 52 | port: 443 53 | protocol: TCP 54 | targetPort: 6443 55 | sessionAffinity: None 56 | type: ClusterIP 57 | - apiVersion: v1 58 | kind: Service 59 | metadata: 60 | name: my-service 61 | namespace: default 62 | spec: 63 | clusterIP: 10.96.255.137 64 | clusterIPs: 65 | - 10.96.255.137 66 | internalTrafficPolicy: Cluster 67 | ipFamilies: 68 | - IPv4 69 | ipFamilyPolicy: SingleStack 70 | ports: 71 | - name: http 72 | port: 80 73 | protocol: TCP 74 | targetPort: 9376 75 | sessionAffinity: None 76 | type: ClusterIP 77 | - apiVersion: v1 78 | kind: Service 79 | metadata: 80 | labels: 81 | name: netcat-server 82 | name: netcat-server 83 | namespace: default 84 | spec: 85 | clusterIP: 10.96.115.202 86 | clusterIPs: 87 | - 10.96.115.202 88 | internalTrafficPolicy: Cluster 89 | ipFamilies: 90 | - IPv4 91 | ipFamilyPolicy: SingleStack 92 | ports: 93 | - name: nc-tcp 94 | port: 5000 95 | protocol: TCP 96 | targetPort: 5000 97 | - name: nc-udp 98 | port: 5001 99 | protocol: UDP 100 | targetPort: 5001 101 | selector: 102 | name: netcat-server 103 | sessionAffinity: None 104 | type: ClusterIP 105 | - apiVersion: v1 106 | kind: Service 107 | metadata: 108 | annotations: 109 | kube-router.io/service.hairpin: "true" 110 | kube-router.io/service.local: "true" 111 | labels: 112 | name: whoami 113 | name: whoami 114 | namespace: default 115 | spec: 116 | clusterIP: 10.96.243.193 117 | clusterIPs: 118 | - 10.96.243.193 119 | - 2001:db8:42:1::bd90 120 | externalIPs: 121 | - 10.243.0.1 122 | externalTrafficPolicy: Cluster 123 | internalTrafficPolicy: Cluster 124 | ipFamilies: 125 | - IPv4 126 | - IPv6 127 | ipFamilyPolicy: PreferDualStack 128 | ports: 129 | - name: flask 130 | port: 5000 131 | protocol: TCP 132 | targetPort: 5000 133 | selector: 134 | name: whoami 135 | sessionAffinity: None 136 | type: ClusterIP 137 | - apiVersion: v1 138 | kind: Service 139 | metadata: 140 | labels: 141 | k8s-app: kube-dns 142 | kubernetes.io/name: CoreDNS 143 | name: kube-dns 144 | namespace: kube-system 145 | spec: 146 | clusterIP: 10.96.0.10 147 | clusterIPs: 148 | - 10.96.0.10 149 | internalTrafficPolicy: Cluster 150 | ipFamilies: 151 | - IPv4 152 | ipFamilyPolicy: SingleStack 153 | ports: 154 | - name: dns 155 | port: 53 156 | protocol: UDP 157 | targetPort: 53 158 | - name: dns-tcp 159 | port: 53 160 | protocol: TCP 161 | targetPort: 53 162 | sessionAffinity: None 163 | type: ClusterIP 164 | kind: List 165 | metadata: 166 | resourceVersion: "" 167 | -------------------------------------------------------------------------------- /daemonset/kube-router-proxy-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=false" 49 | - "--run-firewall=false" 50 | - "--run-service-proxy=true" 51 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 52 | securityContext: 53 | privileged: true 54 | imagePullPolicy: Always 55 | env: 56 | - name: NODE_NAME 57 | valueFrom: 58 | fieldRef: 59 | fieldPath: spec.nodeName 60 | - name: KUBE_ROUTER_CNI_CONF_FILE 61 | value: /etc/cni/net.d/10-kuberouter.conflist 62 | livenessProbe: 63 | httpGet: 64 | path: /healthz 65 | port: 20244 66 | initialDelaySeconds: 10 67 | periodSeconds: 3 68 | volumeMounts: 69 | - name: lib-modules 70 | mountPath: /lib/modules 71 | readOnly: true 72 | - name: cni-conf-dir 73 | mountPath: /etc/cni/net.d 74 | - name: kubeconfig 75 | mountPath: /var/lib/kube-router/kubeconfig 76 | readOnly: true 77 | - name: xtables-lock 78 | mountPath: /run/xtables.lock 79 | readOnly: false 80 | initContainers: 81 | - name: install-cni 82 | image: docker.io/cloudnativelabs/kube-router 83 | imagePullPolicy: Always 84 | command: 85 | - /bin/sh 86 | - -c 87 | - set -e -x; 88 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 89 | if [ -f /etc/cni/net.d/*.conf ]; then 90 | rm -f /etc/cni/net.d/*.conf; 91 | fi; 92 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 93 | cp /etc/kube-router/cni-conf.json ${TMP}; 94 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 95 | fi; 96 | if [ -x /usr/local/bin/cni-install ]; then 97 | /usr/local/bin/cni-install; 98 | fi; 99 | volumeMounts: 100 | - name: cni-conf-dir 101 | mountPath: /etc/cni/net.d 102 | - name: kube-router-cfg 103 | mountPath: /etc/kube-router 104 | - name: host-opt 105 | mountPath: /opt 106 | hostNetwork: true 107 | hostPID: true 108 | tolerations: 109 | - effect: NoSchedule 110 | operator: Exists 111 | - key: CriticalAddonsOnly 112 | operator: Exists 113 | - effect: NoExecute 114 | operator: Exists 115 | volumes: 116 | - name: lib-modules 117 | hostPath: 118 | path: /lib/modules 119 | - name: cni-conf-dir 120 | hostPath: 121 | path: /etc/cni/net.d 122 | - name: kube-router-cfg 123 | configMap: 124 | name: kube-router-cfg 125 | - name: kubeconfig 126 | hostPath: 127 | path: /var/lib/kube-router/kubeconfig 128 | - name: xtables-lock 129 | hostPath: 130 | path: /run/xtables.lock 131 | type: FileOrCreate 132 | - name: host-opt 133 | hostPath: 134 | path: /opt 135 | -------------------------------------------------------------------------------- /daemonset/kube-router-firewall-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=false" 49 | - "--run-firewall=true" 50 | - "--run-service-proxy=false" 51 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 52 | securityContext: 53 | privileged: true 54 | imagePullPolicy: Always 55 | env: 56 | - name: NODE_NAME 57 | valueFrom: 58 | fieldRef: 59 | fieldPath: spec.nodeName 60 | - name: KUBE_ROUTER_CNI_CONF_FILE 61 | value: /etc/cni/net.d/10-kuberouter.conflist 62 | livenessProbe: 63 | httpGet: 64 | path: /healthz 65 | port: 20244 66 | initialDelaySeconds: 10 67 | periodSeconds: 3 68 | volumeMounts: 69 | - name: lib-modules 70 | mountPath: /lib/modules 71 | readOnly: true 72 | - name: cni-conf-dir 73 | mountPath: /etc/cni/net.d 74 | - name: kubeconfig 75 | mountPath: /var/lib/kube-router/kubeconfig 76 | readOnly: true 77 | - name: xtables-lock 78 | mountPath: /run/xtables.lock 79 | readOnly: false 80 | initContainers: 81 | - name: install-cni 82 | image: docker.io/cloudnativelabs/kube-router 83 | imagePullPolicy: Always 84 | command: 85 | - /bin/sh 86 | - -c 87 | - set -e -x; 88 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 89 | if [ -f /etc/cni/net.d/*.conf ]; then 90 | rm -f /etc/cni/net.d/*.conf; 91 | fi; 92 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 93 | cp /etc/kube-router/cni-conf.json ${TMP}; 94 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 95 | fi; 96 | if [ -x /usr/local/bin/cni-install ]; then 97 | /usr/local/bin/cni-install; 98 | fi; 99 | volumeMounts: 100 | - name: cni-conf-dir 101 | mountPath: /etc/cni/net.d 102 | - name: kube-router-cfg 103 | mountPath: /etc/kube-router 104 | - name: host-opt 105 | mountPath: /opt 106 | hostNetwork: true 107 | hostPID: true 108 | tolerations: 109 | - effect: NoSchedule 110 | operator: Exists 111 | - key: CriticalAddonsOnly 112 | operator: Exists 113 | - effect: NoExecute 114 | operator: Exists 115 | volumes: 116 | - name: lib-modules 117 | hostPath: 118 | path: /lib/modules 119 | - name: cni-conf-dir 120 | hostPath: 121 | path: /etc/cni/net.d 122 | - name: kube-router-cfg 123 | configMap: 124 | name: kube-router-cfg 125 | - name: kubeconfig 126 | hostPath: 127 | path: /var/lib/kube-router/kubeconfig 128 | - name: xtables-lock 129 | hostPath: 130 | path: /run/xtables.lock 131 | type: FileOrCreate 132 | - name: host-opt 133 | hostPath: 134 | path: /opt 135 | -------------------------------------------------------------------------------- /daemonset/kube-router-all-service-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=true" 49 | - "--run-firewall=true" 50 | - "--run-service-proxy=true" 51 | - "--bgp-graceful-restart=true" 52 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 53 | securityContext: 54 | privileged: true 55 | imagePullPolicy: Always 56 | env: 57 | - name: NODE_NAME 58 | valueFrom: 59 | fieldRef: 60 | fieldPath: spec.nodeName 61 | - name: KUBE_ROUTER_CNI_CONF_FILE 62 | value: /etc/cni/net.d/10-kuberouter.conflist 63 | livenessProbe: 64 | httpGet: 65 | path: /healthz 66 | port: 20244 67 | initialDelaySeconds: 10 68 | periodSeconds: 3 69 | volumeMounts: 70 | - name: lib-modules 71 | mountPath: /lib/modules 72 | readOnly: true 73 | - name: cni-conf-dir 74 | mountPath: /etc/cni/net.d 75 | - name: kubeconfig 76 | mountPath: /var/lib/kube-router/kubeconfig 77 | readOnly: true 78 | - name: xtables-lock 79 | mountPath: /run/xtables.lock 80 | readOnly: false 81 | initContainers: 82 | - name: install-cni 83 | image: docker.io/cloudnativelabs/kube-router 84 | imagePullPolicy: Always 85 | command: 86 | - /bin/sh 87 | - -c 88 | - set -e -x; 89 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 90 | if [ -f /etc/cni/net.d/*.conf ]; then 91 | rm -f /etc/cni/net.d/*.conf; 92 | fi; 93 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 94 | cp /etc/kube-router/cni-conf.json ${TMP}; 95 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 96 | fi; 97 | if [ -x /usr/local/bin/cni-install ]; then 98 | /usr/local/bin/cni-install; 99 | fi; 100 | volumeMounts: 101 | - name: cni-conf-dir 102 | mountPath: /etc/cni/net.d 103 | - name: kube-router-cfg 104 | mountPath: /etc/kube-router 105 | - name: host-opt 106 | mountPath: /opt 107 | hostNetwork: true 108 | hostPID: true 109 | tolerations: 110 | - effect: NoSchedule 111 | operator: Exists 112 | - key: CriticalAddonsOnly 113 | operator: Exists 114 | - effect: NoExecute 115 | operator: Exists 116 | volumes: 117 | - name: lib-modules 118 | hostPath: 119 | path: /lib/modules 120 | - name: cni-conf-dir 121 | hostPath: 122 | path: /etc/cni/net.d 123 | - name: kube-router-cfg 124 | configMap: 125 | name: kube-router-cfg 126 | - name: kubeconfig 127 | hostPath: 128 | path: /var/lib/kube-router/kubeconfig 129 | - name: xtables-lock 130 | hostPath: 131 | path: /run/xtables.lock 132 | type: FileOrCreate 133 | - name: host-opt 134 | hostPath: 135 | path: /opt 136 | -------------------------------------------------------------------------------- /pkg/utils/sysctl.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strconv" 7 | ) 8 | 9 | const ( 10 | // From what I can see there are no IPv6 equivalents for the below options, so we only consider IPv6 here 11 | // Network Services Configuration Paths 12 | IPv4IPVSConntrack = "net/ipv4/vs/conntrack" 13 | IPv4IPVSExpireNodestConn = "net/ipv4/vs/expire_nodest_conn" 14 | IPv4IPVSExpireQuiescent = "net/ipv4/vs/expire_quiescent_template" 15 | IPv4IPVSConnReuseMode = "net/ipv4/vs/conn_reuse_mode" 16 | IPv4IPVSSloppyTCP = "net/ipv4/vs/sloppy_tcp" 17 | IPv4ConfAllArpIgnore = "net/ipv4/conf/all/arp_ignore" 18 | IPv4ConfAllArpAnnounce = "net/ipv4/conf/all/arp_announce" 19 | IPv6ConfAllDisableIPv6 = "net/ipv6/conf/all/disable_ipv6" 20 | 21 | // Network Routes Configuration Paths 22 | BridgeNFCallIPTables = "net/bridge/bridge-nf-call-iptables" 23 | BridgeNFCallIP6Tables = "net/bridge/bridge-nf-call-ip6tables" 24 | 25 | // Template Configuration Paths 26 | IPv4ConfRPFilterTemplate = "net/ipv4/conf/%s/rp_filter" 27 | ) 28 | 29 | type SysctlError struct { 30 | additionalInfo string 31 | err error 32 | option string 33 | hasValue bool 34 | value int 35 | fatal bool 36 | } 37 | 38 | // Error return the error as string 39 | func (e *SysctlError) Error() string { 40 | value := "" 41 | if e.hasValue { 42 | value = fmt.Sprintf("=%d", e.value) 43 | } 44 | return fmt.Sprintf("Sysctl %s%s : %s (%s)", e.option, value, e.err, e.additionalInfo) 45 | } 46 | 47 | // IsFatal was the error fatal and reason to exit kube-router 48 | func (e *SysctlError) IsFatal() bool { 49 | return e.fatal 50 | } 51 | 52 | // Unwrap allows us to unwrap an error showing the original error 53 | func (e *SysctlError) Unwrap() error { 54 | return e.err 55 | } 56 | 57 | type SysctlConfig struct { 58 | name string 59 | value int8 60 | } 61 | 62 | func (n *SysctlConfig) CachedVal() int8 { 63 | return n.value 64 | } 65 | 66 | func (n *SysctlConfig) WriteVal(val int8) *SysctlError { 67 | err := SetSysctl(n.name, int(val)) 68 | if err != nil { 69 | return err 70 | } 71 | n.value = val 72 | return nil 73 | } 74 | 75 | func sysctlStat(path string, hasValue bool, value int) (string, *SysctlError) { 76 | sysctlPath := fmt.Sprintf("/proc/sys/%s", path) 77 | if _, err := os.Stat(sysctlPath); err != nil { 78 | if os.IsNotExist(err) { 79 | return sysctlPath, &SysctlError{ 80 | "option not found, Does your kernel version support this feature?", 81 | err, path, hasValue, value, false} 82 | } 83 | return sysctlPath, &SysctlError{"path existed, but could not be stat'd", err, path, hasValue, value, true} 84 | } 85 | return sysctlPath, nil 86 | } 87 | 88 | // GetSysctlSingleTemplate gets a sysctl value by first formatting the PathTemplate parameter with the substitute string 89 | // and then getting the sysctl value and converting it into a string 90 | func GetSysctlSingleTemplate(pathTemplate string, substitute string) (string, *SysctlError) { 91 | actualPath := fmt.Sprintf(pathTemplate, substitute) 92 | return GetSysctl(actualPath) 93 | } 94 | 95 | // GetSysctl gets a sysctl value 96 | func GetSysctl(path string) (string, *SysctlError) { 97 | sysctlPath, err := sysctlStat(path, false, 0) 98 | if err != nil { 99 | return "", err 100 | } 101 | buf, readErr := os.ReadFile(sysctlPath) 102 | if readErr != nil { 103 | return "", &SysctlError{"path could not be read", err, path, false, 0, true} 104 | } 105 | return string(buf), nil 106 | } 107 | 108 | // SetSysctlSingleTemplate sets a sysctl value by first formatting the PathTemplate parameter with the substitute string 109 | // and then setting the sysctl to the value parameter 110 | func SetSysctlSingleTemplate(pathTemplate string, substitute string, value int) *SysctlError { 111 | actualPath := fmt.Sprintf(pathTemplate, substitute) 112 | return SetSysctl(actualPath, value) 113 | } 114 | 115 | // SetSysctl sets a sysctl value 116 | func SetSysctl(path string, value int) *SysctlError { 117 | sysctlPath, err := sysctlStat(path, true, value) 118 | if err != nil { 119 | return err 120 | } 121 | writeErr := os.WriteFile(sysctlPath, []byte(strconv.Itoa(value)), 0640) 122 | if writeErr != nil { 123 | return &SysctlError{"path could not be set", err, path, true, value, true} 124 | } 125 | return nil 126 | } 127 | -------------------------------------------------------------------------------- /daemonset/kube-router-all-service-daemonset-advertise-routes.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=true" 49 | - "--run-firewall=true" 50 | - "--run-service-proxy=true" 51 | - "--bgp-graceful-restart=true" 52 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 53 | - "--advertise-cluster-ip=true" 54 | - "--cluster-asn=64512" 55 | - "--peer-router-ips=192.168.1.99" 56 | - "--peer-router-asns=64513" 57 | securityContext: 58 | privileged: true 59 | imagePullPolicy: Always 60 | env: 61 | - name: NODE_NAME 62 | valueFrom: 63 | fieldRef: 64 | fieldPath: spec.nodeName 65 | - name: KUBE_ROUTER_CNI_CONF_FILE 66 | value: /etc/cni/net.d/10-kuberouter.conflist 67 | livenessProbe: 68 | httpGet: 69 | path: /healthz 70 | port: 20244 71 | initialDelaySeconds: 10 72 | periodSeconds: 3 73 | volumeMounts: 74 | - name: lib-modules 75 | mountPath: /lib/modules 76 | readOnly: true 77 | - name: cni-conf-dir 78 | mountPath: /etc/cni/net.d 79 | - name: kubeconfig 80 | mountPath: /var/lib/kube-router/kubeconfig 81 | readOnly: true 82 | - name: xtables-lock 83 | mountPath: /run/xtables.lock 84 | readOnly: false 85 | initContainers: 86 | - name: install-cni 87 | image: docker.io/cloudnativelabs/kube-router 88 | imagePullPolicy: Always 89 | command: 90 | - /bin/sh 91 | - -c 92 | - set -e -x; 93 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 94 | if [ -f /etc/cni/net.d/*.conf ]; then 95 | rm -f /etc/cni/net.d/*.conf; 96 | fi; 97 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 98 | cp /etc/kube-router/cni-conf.json ${TMP}; 99 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 100 | fi; 101 | if [ -x /usr/local/bin/cni-install ]; then 102 | /usr/local/bin/cni-install; 103 | fi; 104 | volumeMounts: 105 | - name: cni-conf-dir 106 | mountPath: /etc/cni/net.d 107 | - name: kube-router-cfg 108 | mountPath: /etc/kube-router 109 | - name: host-opt 110 | mountPath: /opt 111 | hostNetwork: true 112 | hostPID: true 113 | tolerations: 114 | - effect: NoSchedule 115 | operator: Exists 116 | - key: CriticalAddonsOnly 117 | operator: Exists 118 | - effect: NoExecute 119 | operator: Exists 120 | volumes: 121 | - name: lib-modules 122 | hostPath: 123 | path: /lib/modules 124 | - name: cni-conf-dir 125 | hostPath: 126 | path: /etc/cni/net.d 127 | - name: kube-router-cfg 128 | configMap: 129 | name: kube-router-cfg 130 | - name: kubeconfig 131 | hostPath: 132 | path: /var/lib/kube-router/kubeconfig 133 | - name: xtables-lock 134 | hostPath: 135 | path: /run/xtables.lock 136 | type: FileOrCreate 137 | - name: host-opt 138 | hostPath: 139 | path: /opt 140 | -------------------------------------------------------------------------------- /pkg/controllers/routing/pod_egress.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | v1core "k8s.io/api/core/v1" 8 | "k8s.io/klog/v2" 9 | ) 10 | 11 | // set up MASQUERADE rule so that egress traffic from the pods gets masqueraded to node's IP 12 | 13 | var ( 14 | podEgressArgs4 = []string{"-m", "set", "--match-set", podSubnetsIPSetName, "src", 15 | "-m", "set", "!", "--match-set", podSubnetsIPSetName, "dst", 16 | "-m", "set", "!", "--match-set", nodeAddrsIPSetName, "dst", 17 | "-j", "MASQUERADE"} 18 | podEgressArgs6 = []string{"-m", "set", "--match-set", "inet6:" + podSubnetsIPSetName, "src", 19 | "-m", "set", "!", "--match-set", "inet6:" + podSubnetsIPSetName, "dst", 20 | "-m", "set", "!", "--match-set", "inet6:" + nodeAddrsIPSetName, "dst", 21 | "-j", "MASQUERADE"} 22 | podEgressArgsBad4 = [][]string{{"-m", "set", "--match-set", podSubnetsIPSetName, "src", 23 | "-m", "set", "!", "--match-set", podSubnetsIPSetName, "dst", 24 | "-j", "MASQUERADE"}} 25 | podEgressArgsBad6 = [][]string{{"-m", "set", "--match-set", "inet6:" + podSubnetsIPSetName, "src", 26 | "-m", "set", "!", "--match-set", "inet6:" + podSubnetsIPSetName, "dst", 27 | "-j", "MASQUERADE"}} 28 | ) 29 | 30 | func (nrc *NetworkRoutingController) createPodEgressRule() error { 31 | for family, iptablesCmdHandler := range nrc.iptablesCmdHandlers { 32 | podEgressArgs := podEgressArgs4 33 | if family == v1core.IPv6Protocol { 34 | podEgressArgs = podEgressArgs6 35 | } 36 | if iptablesCmdHandler.HasRandomFully() { 37 | podEgressArgs = append(podEgressArgs, "--random-fully") 38 | } 39 | 40 | err := iptablesCmdHandler.AppendUnique("nat", "POSTROUTING", podEgressArgs...) 41 | if err != nil { 42 | return errors.New("Failed to add iptables rule to masquerade outbound traffic from pods: " + 43 | err.Error() + "External connectivity will not work.") 44 | 45 | } 46 | } 47 | 48 | klog.V(1).Infof("Added iptables rule to masquerade outbound traffic from pods.") 49 | return nil 50 | } 51 | 52 | func (nrc *NetworkRoutingController) deletePodEgressRule() error { 53 | for family, iptablesCmdHandler := range nrc.iptablesCmdHandlers { 54 | podEgressArgs := podEgressArgs4 55 | if family == v1core.IPv6Protocol { 56 | podEgressArgs = podEgressArgs6 57 | } 58 | if iptablesCmdHandler.HasRandomFully() { 59 | podEgressArgs = append(podEgressArgs, "--random-fully") 60 | } 61 | 62 | exists, err := iptablesCmdHandler.Exists("nat", "POSTROUTING", podEgressArgs...) 63 | if err != nil { 64 | return errors.New("Failed to lookup iptables rule to masquerade outbound traffic from pods: " + err.Error()) 65 | } 66 | 67 | if exists { 68 | err = iptablesCmdHandler.Delete("nat", "POSTROUTING", podEgressArgs...) 69 | if err != nil { 70 | return errors.New("Failed to delete iptables rule to masquerade outbound traffic from pods: " + 71 | err.Error() + ". Pod egress might still work...") 72 | } 73 | klog.Infof("Deleted iptables rule to masquerade outbound traffic from pods.") 74 | } 75 | } 76 | 77 | return nil 78 | } 79 | 80 | func (nrc *NetworkRoutingController) deleteBadPodEgressRules() error { 81 | for family, iptablesCmdHandler := range nrc.iptablesCmdHandlers { 82 | podEgressArgsBad := podEgressArgsBad4 83 | if family == v1core.IPv6Protocol { 84 | podEgressArgsBad = podEgressArgsBad6 85 | } 86 | 87 | // If random fully is supported remove the original rule as well 88 | if iptablesCmdHandler.HasRandomFully() { 89 | if family == v1core.IPv4Protocol { 90 | podEgressArgsBad = append(podEgressArgsBad, podEgressArgs4) 91 | } else { 92 | podEgressArgsBad = append(podEgressArgsBad, podEgressArgs6) 93 | } 94 | } 95 | 96 | for _, args := range podEgressArgsBad { 97 | exists, err := iptablesCmdHandler.Exists("nat", "POSTROUTING", args...) 98 | if err != nil { 99 | return fmt.Errorf("failed to lookup iptables rule: %s", err.Error()) 100 | } 101 | 102 | if exists { 103 | err = iptablesCmdHandler.Delete("nat", "POSTROUTING", args...) 104 | if err != nil { 105 | return fmt.Errorf("failed to delete old/bad iptables rule to masquerade outbound traffic "+ 106 | "from pods: %s. Pod egress might still work, or bugs may persist after upgrade", err) 107 | } 108 | klog.Infof("Deleted old/bad iptables rule to masquerade outbound traffic from pods.") 109 | } 110 | } 111 | } 112 | 113 | return nil 114 | } 115 | -------------------------------------------------------------------------------- /docs/how-it-works.md: -------------------------------------------------------------------------------- 1 | 2 | # Theory of Operation 3 | 4 | Kube-router can be run as an agent or a Pod (via DaemonSet) on each node and 5 | leverages standard Linux technologies **iptables, ipvs/lvs, ipset, iproute2** 6 | 7 | ## Service Proxy And Load Balancing 8 | 9 | Blog: [Kubernetes network services proxy with IPVS/LVS](https://cloudnativelabs.github.io/post/2017-05-10-kube-network-service-proxy/) 10 | 11 | Kube-router uses IPVS/LVS technology built in Linux to provide L4 load 12 | balancing. Each **ClusterIP**, **NodePort**, and **LoadBalancer** Kubernetes 13 | Service type is configured as an IPVS virtual service. Each Service Endpoint is 14 | configured as real server to the virtual service. The standard **ipvsadm** tool 15 | can be used to verify the configuration and monitor the active connections. 16 | 17 | Below is example set of Services on Kubernetes: 18 | 19 | ![Kube services](./img/svc.jpg) 20 | 21 | and the Endpoints for the Services: 22 | 23 | ![Kube services](./img/ep.jpg) 24 | 25 | and how they got mapped to the IPVS by kube-router: 26 | 27 | ![IPVS configuration](./img/ipvs1.jpg) 28 | 29 | Kube-router watches the Kubernetes API server to get updates on the 30 | Services/Endpoints and automatically syncs the IPVS configuration to reflect the 31 | desired state of Services. Kube-router uses IPVS masquerading mode and uses 32 | round robin scheduling currently. Source pod IP is preserved so that appropriate 33 | network policies can be applied. 34 | 35 | ## Pod Ingress Firewall 36 | 37 | Blog: [Enforcing Kubernetes network policies with iptables](https://cloudnativelabs.github.io/post/2017-05-1-kube-network-policies/) 38 | 39 | Kube-router provides an implementation of Kubernetes Network Policies through 40 | the use of iptables, ipset and conntrack. All the Pods in a Namespace with 41 | 'DefaultDeny' ingress isolation policy has ingress blocked. Only traffic that 42 | matches whitelist rules specified in the network policies are permitted to reach 43 | those Pods. The following set of iptables rules and chains in the 'filter' table 44 | are used to achieve the Network Policies semantics. 45 | 46 | Each Pod running on the Node which needs ingress blocked by default is matched 47 | in FORWARD and OUTPUT chains of the fliter table and are sent to a pod specific 48 | firewall chain. Below rules are added to match various cases 49 | 50 | - Traffic getting switched between the Pods on the same Node through the local 51 | bridge 52 | - Traffic getting routed between the Pods on different Nodes 53 | - Traffic originating from a Pod and going through the Service proxy and getting 54 | routed to a Pod on the same Node 55 | 56 | ![FORWARD/OUTPUT chain](./img/forward.png) 57 | 58 | Each Pod specific firewall chain has default rule to block the traffic. Rules 59 | are added to jump traffic to the Network Policy specific policy chains. Rules 60 | cover only policies that apply to the destination pod ip. A rule is added to 61 | accept the the established traffic to permit the return traffic. 62 | 63 | ![Pod firewall chain](./img/podfw.png) 64 | 65 | Each policy chain has rules expressed through source and destination ipsets. Set 66 | of pods matching ingress rule in network policy spec forms a source Pod ip 67 | ipset. set of Pods matching pod selector (for destination Pods) in the Network 68 | Policy forms destination Pod ip ipset. 69 | 70 | ![Policy chain](./img/policyfw.png) 71 | 72 | Finally ipsets are created that are used in forming the rules in the Network 73 | Policy specific chain 74 | 75 | ![ipset](./img/ipset.jpg) 76 | 77 | Kube-router at runtime watches Kubernetes API server for changes in the 78 | namespace, network policy and pods and dynamically updates iptables and ipset 79 | configuration to reflect desired state of ingress firewall for the the pods. 80 | 81 | ## Pod Networking 82 | 83 | Blog: [Kubernetes pod networking and beyond with BGP](https://cloudnativelabs.github.io/post/2017-05-22-kube-pod-networking) 84 | 85 | Kube-router is expected to run on each Node. The subnet of the Node is obtained 86 | from the CNI configuration file on the Node or through the Node.PodCidr. Each 87 | kube-router instance on the Node acts as a BGP router and advertises the Pod 88 | CIDR assigned to the Node. Each Node peers with rest of the Nodes in the cluster 89 | forming full mesh. Learned routes about the Pod CIDR from the other Nodes (BGP 90 | peers) are injected into local Node routing table. On the data path, inter Node 91 | Pod-to-Pod communication is done by the routing stack on the Node. 92 | -------------------------------------------------------------------------------- /docs/metrics.md: -------------------------------------------------------------------------------- 1 | # Metrics 2 | 3 | ## Scraping kube-router metrics with Prometheus 4 | 5 | The scope of this document is to describe how to setup the 6 | [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) needed for 7 | [Prometheus](https://prometheus.io/) to use 8 | [Kubernetes SD](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#) to 9 | discover & scape kube-router [pods](https://kubernetes.io/docs/concepts/workloads/pods/pod/). 10 | 11 | For help with installing Prometheus please see their [docs](https://prometheus.io/docs/introduction/overview/) 12 | 13 | Metrics options: 14 | 15 | ```sh 16 | --metrics-path string Path to serve Prometheus metrics on ( default: /metrics ) 17 | --metrics-port uint16 <0-65535> Prometheus metrics port to use ( default: 0, disabled ) 18 | ``` 19 | 20 | To enable kube-router metrics, start kube-router with `--metrics-port` and provide a port over 0 21 | 22 | Metrics is generally exported at the same rate as the sync period for each service. Service metrics are exported real-time. 23 | 24 | The default values unless other specified is 25 | 26 | * iptables-sync-period - `1 min`` 27 | * routes-sync-period - `1 min`` 28 | 29 | By enabling 30 | [Kubernetes SD](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#) in 31 | Prometheus configuration & adding required annotations Prometheus can automaticly discover & scrape kube-router metrics 32 | 33 | ## Version notes 34 | 35 | kube-router v0.2.4 received a metrics overhaul where some metrics were changed into histograms, additional metrics were 36 | also added. Please make sure you are using the latest dashboard version with versions => v0.2.4 37 | 38 | kube-router 0.1.0-rc2 and upwards supports the runtime configuration for controlling where to expose the metrics. If 39 | you are using a older version, metrics path & port is locked to `/metrics` & `8080` 40 | 41 | ## Available metrics 42 | 43 | If metrics is enabled only services that are running have their metrics exposed 44 | 45 | The following metrics is exposed by kube-router prefixed by `kube_router_` 46 | 47 | ### run-router = true 48 | 49 | * controller_bgp_peers 50 | Number of BGP peers of the instance 51 | * controller_bgp_advertisements_received 52 | Total number of BGP advertisements received since kube-router started 53 | * controller_bgp_advertisements_sent 54 | Total number of BGP advertisements sent since kube-router started 55 | * controller_bgp_internal_peers_sync_time 56 | Time it took for the BGP internal peer sync loop to complete 57 | * controller_routes_sync_time 58 | Time it took for controller to sync routes 59 | 60 | ### run-firewall=true 61 | 62 | * controller_iptables_sync_time 63 | Time it took for the iptables sync loop to complete 64 | * controller_policy_chains_sync_time 65 | Time it took for controller to sync policy chains 66 | 67 | ### run-service-proxy = true 68 | 69 | * controller_ipvs_services_sync_time 70 | Time it took for the ipvs sync loop to complete 71 | * controller_ipvs_services 72 | The number of ipvs services in the instance 73 | * controller_ipvs_metrics_export_time 74 | The time it took to run the metrics export for IPVS services 75 | * service_total_connections 76 | Total connections made to the service since creation 77 | * service_packets_in 78 | Total n/o packets received by service 79 | * service_packets_out 80 | Total n/o packets sent by service 81 | * service_bytes_in 82 | Total bytes received by the service 83 | * service_bytes_out 84 | Total bytes sent by the service 85 | * service_pps_in 86 | Incoming packets per second 87 | * service_pps_out 88 | Outgoing packets per second 89 | * service_cps 90 | Connections per second 91 | * service_bps_in 92 | Incoming bytes per second 93 | * service_bps_out 94 | Outgoing bytes per second 95 | 96 | To get a grouped list of CPS for each service a Prometheus query could look like this e.g: 97 | `sum(kube_router_service_cps) by (svc_namespace, service_name)` 98 | 99 | ## Grafana Dashboard 100 | 101 | This repo contains a example 102 | [Grafana dashboard](https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/dashboard/kube-router.json) 103 | utilizing all the above exposed metrics from kube-router. 104 | ![dashboard](https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/dashboard/dashboard.png) 105 | -------------------------------------------------------------------------------- /pkg/routes/route_sync.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "sync" 7 | "time" 8 | 9 | "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" 10 | "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" 11 | "github.com/prometheus/client_golang/prometheus" 12 | "github.com/vishvananda/netlink" 13 | "k8s.io/klog/v2" 14 | ) 15 | 16 | type RouteSyncErr struct { 17 | route *netlink.Route 18 | err error 19 | } 20 | 21 | func (rse RouteSyncErr) Error() string { 22 | return fmt.Sprintf("route (%s) encountered the following error while being acted upon: %v", rse.route, rse.err) 23 | } 24 | 25 | // RouteSync is a struct that holds all of the information needed for syncing routes to the kernel's routing table 26 | type RouteSync struct { 27 | routeTableStateMap map[string]*netlink.Route 28 | injectedRoutesSyncPeriod time.Duration 29 | mutex sync.Mutex 30 | routeReplacer func(route *netlink.Route) error 31 | metricsEnabled bool 32 | } 33 | 34 | // addInjectedRoute adds a route to the route map that is regularly synced to the kernel's routing table 35 | func (rs *RouteSync) AddInjectedRoute(dst *net.IPNet, route *netlink.Route) { 36 | rs.mutex.Lock() 37 | defer rs.mutex.Unlock() 38 | klog.V(3).Infof("Adding route for destination: %s", dst) 39 | rs.routeTableStateMap[dst.String()] = route 40 | if rs.metricsEnabled { 41 | metrics.ControllerHostRoutesAdded.Inc() 42 | metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) 43 | } 44 | } 45 | 46 | // delInjectedRoute delete a route from the route map that is regularly synced to the kernel's routing table 47 | func (rs *RouteSync) DelInjectedRoute(dst *net.IPNet) { 48 | rs.mutex.Lock() 49 | defer rs.mutex.Unlock() 50 | if _, ok := rs.routeTableStateMap[dst.String()]; ok { 51 | klog.V(3).Infof("Removing route for destination: %s", dst) 52 | delete(rs.routeTableStateMap, dst.String()) 53 | } 54 | if rs.metricsEnabled { 55 | metrics.ControllerHostRoutesRemoved.Inc() 56 | metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) 57 | } 58 | } 59 | 60 | // syncLocalRouteTable iterates over the local route state map and syncs all routes to the kernel's routing table 61 | func (rs *RouteSync) SyncLocalRouteTable() error { 62 | if rs.metricsEnabled { 63 | startSyncTime := time.Now() 64 | defer func(startTime time.Time) { 65 | runTime := time.Since(startTime) 66 | metrics.ControllerHostRoutesSyncTime.Observe(runTime.Seconds()) 67 | }(startSyncTime) 68 | } 69 | rs.mutex.Lock() 70 | defer rs.mutex.Unlock() 71 | klog.V(2).Infof("Running local route table synchronization") 72 | for _, route := range rs.routeTableStateMap { 73 | klog.V(3).Infof("Syncing route: %s -> %s via %s", route.Src, route.Dst, route.Gw) 74 | err := rs.routeReplacer(route) 75 | if err != nil { 76 | return RouteSyncErr{ 77 | route: route, 78 | err: err, 79 | } 80 | } 81 | } 82 | if rs.metricsEnabled { 83 | metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) 84 | } 85 | return nil 86 | } 87 | 88 | // run starts a goroutine that calls syncLocalRouteTable on interval injectedRoutesSyncPeriod 89 | func (rs *RouteSync) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, 90 | wg *sync.WaitGroup) { 91 | // Start route synchronization routine 92 | wg.Add(1) 93 | go func(stopCh <-chan struct{}, wg *sync.WaitGroup) { 94 | defer wg.Done() 95 | t := time.NewTicker(rs.injectedRoutesSyncPeriod) 96 | defer t.Stop() 97 | for { 98 | select { 99 | case <-t.C: 100 | err := rs.SyncLocalRouteTable() 101 | if err != nil { 102 | klog.Errorf("route could not be replaced due to: %v", err) 103 | } 104 | // Some of our unit tests send a nil health channel 105 | if nil != healthChan && err == nil { 106 | healthcheck.SendHeartBeat(healthChan, healthcheck.RouteSyncController) 107 | } 108 | case <-stopCh: 109 | klog.Infof("Shutting down local route synchronization") 110 | return 111 | } 112 | } 113 | }(stopCh, wg) 114 | } 115 | 116 | // NewRouteSyncer creates a new routeSyncer that, when run, will sync routes kept in its local state table every 117 | // syncPeriod 118 | func NewRouteSyncer(syncPeriod time.Duration, registerMetrics bool) *RouteSync { 119 | rs := RouteSync{} 120 | rs.routeTableStateMap = make(map[string]*netlink.Route) 121 | rs.injectedRoutesSyncPeriod = syncPeriod 122 | rs.mutex = sync.Mutex{} 123 | // We substitute the RouteReplace function here so that we can easily monkey patch it in our unit tests 124 | rs.routeReplacer = netlink.RouteReplace 125 | rs.metricsEnabled = registerMetrics 126 | 127 | // Register Metrics 128 | if registerMetrics { 129 | prometheus.MustRegister(metrics.ControllerHostRoutesSynced, metrics.ControllerHostRoutesSyncTime, 130 | metrics.ControllerHostRoutesAdded, metrics.ControllerHostRoutesRemoved) 131 | } 132 | 133 | return &rs 134 | } 135 | -------------------------------------------------------------------------------- /testdata/ipset_test_1/ipset_save.txt: -------------------------------------------------------------------------------- 1 | create inet6:kube-router-pod-subnets hash:net family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x8c48e8f6 2 | add inet6:kube-router-pod-subnets 2001:db8:42:1001::/64 timeout 0 3 | add inet6:kube-router-pod-subnets 2001:db8:42:1000::/64 timeout 0 4 | create inet6:kube-router-node-ips hash:ip family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0xc1c0ab78 5 | add inet6:kube-router-node-ips 2001:db8:ca2:2::2ca1 timeout 0 6 | add inet6:kube-router-node-ips 2001:db8:ca2:2::e7e5 timeout 0 7 | create kube-router-pod-subnets hash:net family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x8c2211f1 8 | add kube-router-pod-subnets 10.242.0.0/24 timeout 0 9 | add kube-router-pod-subnets 10.242.1.0/24 timeout 0 10 | create kube-router-node-ips hash:ip family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0xceda0b01 11 | add kube-router-node-ips 10.241.0.20 timeout 0 12 | add kube-router-node-ips 10.241.0.21 timeout 0 13 | create kube-router-local-ips hash:ip family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x03a01482 14 | add kube-router-local-ips 127.0.0.1 timeout 0 15 | add kube-router-local-ips 10.241.0.21 timeout 0 16 | create kube-router-svip hash:ip family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0xd1a8e751 17 | add kube-router-svip 10.243.0.1 timeout 0 18 | add kube-router-svip 10.96.156.243 timeout 0 19 | add kube-router-svip 10.96.255.137 timeout 0 20 | add kube-router-svip 10.243.0.10 timeout 0 21 | add kube-router-svip 10.96.0.1 timeout 0 22 | add kube-router-svip 10.96.115.202 timeout 0 23 | add kube-router-svip 10.96.243.193 timeout 0 24 | add kube-router-svip 10.96.0.10 timeout 0 25 | create kube-router-svip-prt hash:ip,port family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0xc5567ea9 26 | add kube-router-svip-prt 10.96.115.202,udp:5001 timeout 0 27 | add kube-router-svip-prt 10.96.0.1,tcp:443 timeout 0 28 | add kube-router-svip-prt 10.96.0.10,tcp:9153 timeout 0 29 | add kube-router-svip-prt 10.96.243.193,tcp:5000 timeout 0 30 | add kube-router-svip-prt 10.96.115.202,tcp:5000 timeout 0 31 | add kube-router-svip-prt 10.96.156.243,tcp:5000 timeout 0 32 | add kube-router-svip-prt 10.243.0.10,tcp:5000 timeout 0 33 | add kube-router-svip-prt 10.243.0.1,tcp:5000 timeout 0 34 | add kube-router-svip-prt 10.96.0.10,tcp:53 timeout 0 35 | add kube-router-svip-prt 10.96.0.10,udp:53 timeout 0 36 | add kube-router-svip-prt 10.96.255.137,tcp:80 timeout 0 37 | create inet6:kube-router-local-ips hash:ip family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x1ac0b76e 38 | add inet6:kube-router-local-ips fe80::e0c7:dbff:fe6b:2d6 timeout 0 39 | add inet6:kube-router-local-ips fe80::5054:ff:fe66:d30b timeout 0 40 | add inet6:kube-router-local-ips fe80::f0eb:4cff:fefc:6cce timeout 0 41 | add inet6:kube-router-local-ips fe80::3013:9fff:fe8f:892c timeout 0 42 | add inet6:kube-router-local-ips 2001:db8:ca2:2::e7e5 timeout 0 43 | add inet6:kube-router-local-ips ::1 timeout 0 44 | add inet6:kube-router-local-ips fe80::8403:21ff:fee4:5935 timeout 0 45 | create inet6:kube-router-svip hash:ip family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x62f79196 46 | add inet6:kube-router-svip 2001:db8:42:1::bd90 timeout 0 47 | add inet6:kube-router-svip 2001:db8:42:1::8e7d timeout 0 48 | create inet6:kube-router-svip-prt hash:ip,port family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0xb27296db 49 | add inet6:kube-router-svip-prt 2001:db8:42:1::bd90,tcp:5000 timeout 0 50 | add inet6:kube-router-svip-prt 2001:db8:42:1::8e7d,tcp:5000 timeout 0 51 | create KUBE-DST-IZ5JGF4W6BKI7CEV hash:ip family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x44a1a05a 52 | add KUBE-DST-IZ5JGF4W6BKI7CEV 10.242.0.5 timeout 0 53 | add KUBE-DST-IZ5JGF4W6BKI7CEV 10.242.1.4 timeout 0 54 | create KUBE-DST-P226T6HGLTLDGDLC hash:ip family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x9c28f0c3 55 | add KUBE-DST-P226T6HGLTLDGDLC 10.242.0.5 timeout 0 56 | add KUBE-DST-P226T6HGLTLDGDLC 10.242.1.4 timeout 0 57 | create KUBE-SRC-C2EUPKL23CCP4COF hash:ip family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x889d1bd6 58 | create inet6:KUBE-DST-GZIEGRFLGG6BN3N7 hash:ip family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x9748e0b2 59 | add inet6:KUBE-DST-GZIEGRFLGG6BN3N7 2001:db8:42:1001::4 timeout 0 60 | add inet6:KUBE-DST-GZIEGRFLGG6BN3N7 2001:db8:42:1000::5 timeout 0 61 | create KUBE-SRC-KLLOHJLFVAV654Z2 hash:net family inet hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0xc50a569f 62 | add KUBE-SRC-KLLOHJLFVAV654Z2 10.95.0.239 timeout 0 63 | create inet6:KUBE-DST-KSECZGZUJPU4SKA3 hash:ip family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x3194fa68 64 | add inet6:KUBE-DST-KSECZGZUJPU4SKA3 2001:db8:42:1001::4 timeout 0 65 | add inet6:KUBE-DST-KSECZGZUJPU4SKA3 2001:db8:42:1000::5 timeout 0 66 | create inet6:KUBE-SRC-VY6KNKQ4BF6Y4J2K hash:ip family inet6 hashsize 1024 maxelem 65536 timeout 0 bucketsize 12 initval 0x0d5ebe1d 67 | -------------------------------------------------------------------------------- /pkg/controllers/proxy/hairpin_controller.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "os" 7 | "path" 8 | "runtime" 9 | "sync" 10 | "time" 11 | 12 | "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" 13 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 14 | "github.com/vishvananda/netns" 15 | "k8s.io/klog/v2" 16 | ) 17 | 18 | // !!!! IMPORTANT !!!! - This code is not currently used 19 | // Not creating the hairpin controller for now because this should be handled at the CNI level. The CNI bridge 20 | // plugin ensures that hairpin mode is set much more reliably than we do. However, as a lot of work was put into 21 | // the hairpin controller, and so that it is around to reference in the future if needed, I'm leaving the code 22 | // for now. 23 | 24 | type hairpinController struct { 25 | epC <-chan string 26 | nsc *NetworkServicesController 27 | } 28 | 29 | func (hpc *hairpinController) Run(stopCh <-chan struct{}, wg *sync.WaitGroup, 30 | healthChan chan<- *healthcheck.ControllerHeartbeat) { 31 | defer wg.Done() 32 | klog.Infof("Starting hairping controller (handles setting hairpin_mode for veth interfaces)") 33 | 34 | t := time.NewTicker(healthcheck.HPCSyncPeriod) 35 | defer t.Stop() 36 | for { 37 | // Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first 38 | select { 39 | case <-stopCh: 40 | klog.Info("Shutting down Hairpin Controller goroutine") 41 | return 42 | default: 43 | } 44 | select { 45 | case <-stopCh: 46 | klog.Info("Shutting down Hairpin Controller goroutine") 47 | return 48 | case endpointIP := <-hpc.epC: 49 | klog.V(1).Infof("Received request for hairpin setup of endpoint %s, processing", endpointIP) 50 | err := hpc.ensureHairpinEnabledForPodInterface(endpointIP) 51 | if err != nil { 52 | klog.Errorf("unable to set hairpin mode for endpoint %s, its possible that hairpinning will not "+ 53 | "work as expected. Error was: %v", 54 | endpointIP, err) 55 | } 56 | case <-t.C: 57 | healthcheck.SendHeartBeat(healthChan, healthcheck.HairpinController) 58 | } 59 | } 60 | } 61 | 62 | func (hpc *hairpinController) ensureHairpinEnabledForPodInterface(endpointIP string) error { 63 | klog.V(2).Infof("Attempting to enable hairpin mode for endpoint IP %s", endpointIP) 64 | crRuntime, containerID, err := hpc.nsc.findContainerRuntimeReferences(endpointIP) 65 | if err != nil { 66 | return err 67 | } 68 | klog.V(2).Infof("Detected runtime %s and container ID %s for endpoint IP %s", crRuntime, containerID, endpointIP) 69 | 70 | runtime.LockOSThread() 71 | defer runtime.UnlockOSThread() 72 | 73 | hostNetworkNSHandle, err := netns.Get() 74 | if err != nil { 75 | return fmt.Errorf("failed to get namespace due to %v", err) 76 | } 77 | defer utils.CloseCloserDisregardError(&hostNetworkNSHandle) 78 | 79 | var pid int 80 | if crRuntime == "docker" { 81 | // WARN: This method is deprecated and will be removed once docker-shim is removed from kubelet. 82 | pid, err = hpc.nsc.ln.getContainerPidWithDocker(containerID) 83 | if err != nil { 84 | return fmt.Errorf("failed to get pod's (%s) pid for hairpinning due to %v", endpointIP, err) 85 | } 86 | } else { 87 | // We expect CRI compliant runtimes here 88 | // ugly workaround, refactoring of pkg/Proxy is required 89 | pid, err = hpc.nsc.ln.getContainerPidWithCRI(hpc.nsc.dsr.runtimeEndpoint, containerID) 90 | if err != nil { 91 | return fmt.Errorf("failed to get pod's (%s) pid for hairpinning due to %v", endpointIP, err) 92 | } 93 | } 94 | klog.V(2).Infof("Found PID %d for endpoint IP %s", pid, endpointIP) 95 | 96 | // Get the interface link ID from inside the container so that we can link it to the veth on the host namespace 97 | ifaceID, err := hpc.nsc.ln.findIfaceLinkForPid(pid) 98 | if err != nil { 99 | return fmt.Errorf("failed to find the interface ID inside the container NS for endpoint IP: %s, due to: %v", 100 | endpointIP, err) 101 | } 102 | klog.V(2).Infof("Found Interface Link ID %d for endpoint IP %s", ifaceID, endpointIP) 103 | 104 | ifaceName, err := net.InterfaceByIndex(ifaceID) 105 | if err != nil { 106 | return fmt.Errorf("failed to get the interface name from the link ID inside the container for endpoint IP: "+ 107 | "%s and Interface ID: %d due to: %v", endpointIP, ifaceID, err) 108 | } 109 | 110 | klog.V(1).Infof("Enabling hairpin for interface %s for endpoint IP %s", ifaceName.Name, endpointIP) 111 | hpPath := path.Join(sysFSVirtualNetPath, ifaceName.Name, sysFSHairpinRelPath) 112 | if _, err := os.Stat(hpPath); err != nil { 113 | return fmt.Errorf("hairpin path %s doesn't appear to exist for us to set", hpPath) 114 | } 115 | 116 | return os.WriteFile(hpPath, []byte(hairpinEnable), 0644) 117 | } 118 | 119 | func NewHairpinController(nsc *NetworkServicesController, endpointCh <-chan string) *hairpinController { 120 | hpc := hairpinController{ 121 | nsc: nsc, 122 | epC: endpointCh, 123 | } 124 | 125 | return &hpc 126 | } 127 | -------------------------------------------------------------------------------- /pkg/utils/service.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | v1core "k8s.io/api/core/v1" 8 | discoveryv1 "k8s.io/api/discovery/v1" 9 | "k8s.io/client-go/tools/cache" 10 | "k8s.io/klog/v2" 11 | ) 12 | 13 | const ( 14 | IPInIPHeaderLength = 20 15 | ) 16 | 17 | // ServiceNameforEndpointSlice returns the name of the service that created the EndpointSlice for a given EndpointSlice 18 | // 19 | // With endpoints, the name of the endpoint object always matches the service object, however when it comes to 20 | // EndpointSlices, things work a bit different as k8s' controller will autogenerated it (something like: foo-kl29b) 21 | // 22 | // We can get service information from a number of spots: 23 | // * From the ownerReferences in the metadata EndpointSlice -> metadata -> ownerReferences[0] -> name 24 | // * We can also get this from the label: kubernetes.io/service-name 25 | // * generateName will also contain the prefix for the autogenerated name which should align with our service name 26 | // 27 | // We'll all through all of these and do our best to identify the service's name, if we aren't able to find any of these 28 | // or they disagree with each other we'll throw an error 29 | func ServiceNameforEndpointSlice(es *discoveryv1.EndpointSlice) (string, error) { 30 | const serviceNameLabel = "kubernetes.io/service-name" 31 | var ownerRefName, labelSvcName, generateName, finalSvcName string 32 | 33 | ownerRef := es.GetObjectMeta().GetOwnerReferences() 34 | if len(ownerRef) == 1 { 35 | ownerRefName = ownerRef[0].Name 36 | } 37 | 38 | labels := es.GetObjectMeta().GetLabels() 39 | if svcLabel, ok := labels[serviceNameLabel]; ok { 40 | labelSvcName = svcLabel 41 | } 42 | 43 | if es.GetObjectMeta().GetGenerateName() != "" { 44 | generateName = strings.TrimRight(es.GetObjectMeta().GetGenerateName(), "-") 45 | } 46 | 47 | if ownerRefName == "" && labelSvcName == "" && generateName == "" { 48 | return "", fmt.Errorf("all identifiers for service are empty on this EndpointSlice, unable to determine "+ 49 | "owning service for: %s/%s", es.Namespace, es.Name) 50 | } 51 | 52 | // Take things in an order of precedence here: generateName < ownerRefName < labelSvcName 53 | finalSvcName = generateName 54 | if ownerRefName != "" { 55 | finalSvcName = ownerRefName 56 | } 57 | if labelSvcName != "" { 58 | finalSvcName = labelSvcName 59 | } 60 | 61 | // At this point we do some checks to ensure that the final owning service name is sane. Specifically, we want to 62 | // check it against labelSvcName and ownerRefName if they were not blank and log a debug log if they don't agree. We 63 | // don't worry about generateName as that is less conclusive. 64 | // 65 | // See: https://github.com/cloudnativelabs/kube-router/issues/1957 for more information 66 | if ownerRefName != "" && finalSvcName != ownerRefName { 67 | klog.V(1).Infof("The metadata ownerReference name %s and the label service name (%s) %s don't appear to "+ 68 | "match for EndpointSlice %s/%s. In this case we prefer the label service name.", 69 | ownerRefName, serviceNameLabel, labelSvcName, es.Namespace, es.Name) 70 | } 71 | 72 | return finalSvcName, nil 73 | } 74 | 75 | // ServiceForEndpoints given EndpointSlice object return Service API object if it exists 76 | func ServiceForEndpointSlice(ci *cache.Indexer, es *discoveryv1.EndpointSlice) (any, bool, error) { 77 | svcName, err := ServiceNameforEndpointSlice(es) 78 | if err != nil { 79 | return nil, false, err 80 | } 81 | 82 | // The key that we're looking for here is just / 83 | key := fmt.Sprintf("%s/%s", es.Namespace, svcName) 84 | klog.V(2).Infof("key for looking up service from EndpointSlice is: %s", key) 85 | 86 | item, exists, err := (*ci).GetByKey(key) 87 | if err != nil { 88 | return nil, false, err 89 | } 90 | 91 | if !exists { 92 | return nil, false, nil 93 | } 94 | 95 | return item, true, nil 96 | } 97 | 98 | // ServiceHasNoClusterIP decides whether or not the this service is a headless service which is often useful to 99 | // kube-router as there is no need to execute logic on most headless changes. Function takes a generic interface as its 100 | // input parameter so that it can be used more easily in early processing if needed. If a non-service object is given, 101 | // function will return false. 102 | func ServiceHasNoClusterIP(obj any) bool { 103 | if svc, _ := obj.(*v1core.Service); svc != nil { 104 | if svc.Spec.Type == v1core.ServiceTypeClusterIP { 105 | if ClusterIPIsNone(svc.Spec.ClusterIP) && containsOnlyNone(svc.Spec.ClusterIPs) { 106 | return true 107 | } 108 | } 109 | } 110 | return false 111 | } 112 | 113 | // ClusterIPIsNone checks to see whether the ClusterIP contains "None" which would indicate that it is headless 114 | func ClusterIPIsNone(clusterIP string) bool { 115 | return strings.ToLower(clusterIP) == "none" 116 | } 117 | 118 | // ClusterIPIsNoneOrBlank checks to see whether the ClusterIP contains "None" or is blank 119 | func ClusterIPIsNoneOrBlank(clusterIP string) bool { 120 | return ClusterIPIsNone(clusterIP) || clusterIP == "" 121 | } 122 | 123 | func containsOnlyNone(clusterIPs []string) bool { 124 | for _, clusterIP := range clusterIPs { 125 | if !ClusterIPIsNone(clusterIP) { 126 | return false 127 | } 128 | } 129 | return true 130 | } 131 | -------------------------------------------------------------------------------- /pkg/routes/pbr.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 8 | "github.com/vishvananda/netlink" 9 | ) 10 | 11 | const ( 12 | PBRRuleAdd = iota 13 | PBRRuleDel 14 | ) 15 | 16 | const ( 17 | // CustomTableID is the ID of the custom, iproute2 routing table that will be used for policy based routing 18 | CustomTableID = 77 19 | // CustomTableName is the name of the custom, iproute2 routing table that will be used for policy based routing 20 | CustomTableName = "kube-router" 21 | ) 22 | 23 | // PolicyBasedRules is a struct that holds all of the information needed for manipulating policy based routing rules 24 | type PolicyBasedRules struct { 25 | nfa utils.NodeFamilyAware 26 | podIPv4CIDRs []string 27 | podIPv6CIDRs []string 28 | } 29 | 30 | // NewPolicyBasedRules creates a new PBR object which will be used to manipulate policy based routing rules 31 | func NewPolicyBasedRules(nfa utils.NodeFamilyAware, podIPv4CIDRs, podIPv6CIDRs []string) *PolicyBasedRules { 32 | return &PolicyBasedRules{ 33 | nfa: nfa, 34 | podIPv4CIDRs: podIPv4CIDRs, 35 | podIPv6CIDRs: podIPv6CIDRs, 36 | } 37 | } 38 | 39 | // ipRuleAbstraction used for abstracting iproute2 rule additions between IPv4 and IPv6 for both add and del operations. 40 | // ipProtocol is the iproute2 protocol specified as a string ("-4" or "-6"). ipOp is the rule operation specified as a 41 | // string ("add" or "del). The cidr is the IPv4 / IPv6 source CIDR string that when received will be used to lookup 42 | // routes in a custom table. 43 | func ipRuleAbstraction(ipFamily int, ipOp int, cidr string) error { 44 | _, nSrc, err := net.ParseCIDR(cidr) 45 | if err != nil { 46 | return fmt.Errorf("failed to parse CIDR: %s", err.Error()) 47 | } 48 | 49 | nRule := netlink.NewRule() 50 | nRule.Family = ipFamily 51 | nRule.Src = nSrc 52 | nRule.Table = CustomTableID 53 | 54 | // If the rule that we are abstracting has either the src or dst set to a default route, then we need to handle it 55 | // differently. For more information, see: https://github.com/vishvananda/netlink/issues/1080 56 | // TODO: If the above issue is resolved, some of the below logic can be removed 57 | rules := make([]netlink.Rule, 0) 58 | isDefaultRoute, err := utils.IsDefaultRoute(nSrc) 59 | if err != nil { 60 | return fmt.Errorf("failed to check if CIDR is a default route: %v", err) 61 | } 62 | 63 | if isDefaultRoute { 64 | var tmpRules []netlink.Rule 65 | tmpRules, err = netlink.RuleListFiltered(ipFamily, nRule, netlink.RT_FILTER_TABLE) 66 | if err != nil { 67 | return fmt.Errorf("failed to list rules: %s", err.Error()) 68 | } 69 | 70 | // Check if one or more of the rules returned are a default route rule 71 | for _, rule := range tmpRules { 72 | // If the rule has no src, then it is a default route rule which is the match criteria for the rule 73 | if rule.Src == nil { 74 | rules = append(rules, rule) 75 | } 76 | } 77 | } else { 78 | rules, err = netlink.RuleListFiltered(ipFamily, nRule, netlink.RT_FILTER_SRC|netlink.RT_FILTER_TABLE) 79 | if err != nil { 80 | return fmt.Errorf("failed to list rules: %s", err.Error()) 81 | } 82 | } 83 | 84 | if ipOp == PBRRuleDel && len(rules) > 0 { 85 | if err := netlink.RuleDel(nRule); err != nil { 86 | return fmt.Errorf("failed to delete rule: %s", err.Error()) 87 | } 88 | } else if ipOp == PBRRuleAdd && len(rules) < 1 { 89 | if err := netlink.RuleAdd(nRule); err != nil { 90 | return fmt.Errorf("failed to add rule: %s", err.Error()) 91 | } 92 | } 93 | 94 | return nil 95 | } 96 | 97 | // Enable setup a custom routing table that will be used for policy based routing to ensure traffic 98 | // originating on tunnel interface only leaves through tunnel interface irrespective rp_filter enabled/disabled 99 | func (pbr *PolicyBasedRules) Enable() error { 100 | err := utils.RouteTableAdd(CustomTableID, CustomTableName) 101 | if err != nil { 102 | return fmt.Errorf("failed to update rt_tables file: %s", err) 103 | } 104 | 105 | if pbr.nfa.IsIPv4Capable() { 106 | for _, ipv4CIDR := range pbr.podIPv4CIDRs { 107 | if err := ipRuleAbstraction(netlink.FAMILY_V4, PBRRuleAdd, ipv4CIDR); err != nil { 108 | return err 109 | } 110 | } 111 | } 112 | if pbr.nfa.IsIPv6Capable() { 113 | for _, ipv6CIDR := range pbr.podIPv6CIDRs { 114 | if err := ipRuleAbstraction(netlink.FAMILY_V6, PBRRuleAdd, ipv6CIDR); err != nil { 115 | return err 116 | } 117 | } 118 | } 119 | 120 | return nil 121 | } 122 | 123 | // Disable removes the custom routing table that was used for policy based routing 124 | func (pbr *PolicyBasedRules) Disable() error { 125 | err := utils.RouteTableAdd(CustomTableID, CustomTableName) 126 | if err != nil { 127 | return fmt.Errorf("failed to update rt_tables file: %s", err) 128 | } 129 | 130 | if pbr.nfa.IsIPv4Capable() { 131 | for _, ipv4CIDR := range pbr.podIPv4CIDRs { 132 | if err := ipRuleAbstraction(netlink.FAMILY_V4, PBRRuleDel, ipv4CIDR); err != nil { 133 | return err 134 | } 135 | } 136 | } 137 | if pbr.nfa.IsIPv6Capable() { 138 | for _, ipv6CIDR := range pbr.podIPv6CIDRs { 139 | if err := ipRuleAbstraction(netlink.FAMILY_V6, PBRRuleDel, ipv6CIDR); err != nil { 140 | return err 141 | } 142 | } 143 | } 144 | 145 | return nil 146 | } 147 | -------------------------------------------------------------------------------- /docs/developing.md: -------------------------------------------------------------------------------- 1 | # Developer's Guide 2 | 3 | We aim to make local development and testing as straightforward as possible. For 4 | basic guidelines around contributing, see the [CONTRIBUTING](/CONTRIBUTING.md) document. 5 | 6 | There are a number of automation tools available to help with testing and 7 | building your changes, detailed below. 8 | 9 | ## Building kube-router 10 | 11 | ### Go version 1.19 or above is required to build kube-router 12 | 13 | All the dependencies are specified as Go modules and will be fetched into your cache, so just run `make kube-router` or 14 | `go build pkg/cmd/kube-router.go` to build. 15 | 16 | ### Building A Docker Image 17 | 18 | Running `make container` will compile kube-router (if needed) and build a Docker 19 | image. By default the container will be tagged with the last release version, 20 | and current commit ID. 21 | 22 | For example: 23 | 24 | ```sh 25 | $ make container 26 | Building for GOARCH=amd64 27 | Verifying kube-router gobgp for ARCH=x86-64 ... 28 | Starting kube-router container image build for amd64 on amd64 29 | docker build -t "cloudnativelabs/kube-router-git:amd64-bug_fixes_for_v2.0.0" -f Dockerfile --build-arg ARCH="" \ 30 | --build-arg BUILDTIME_BASE="golang:1.20.9-alpine3.18" --build-arg RUNTIME_BASE="alpine:3.18" . 31 | Sending build context to Docker daemon 198.6MB 32 | Step 1/19 : ARG BUILDTIME_BASE=golang:1-alpine 33 | Step 2/19 : ARG RUNTIME_BASE=alpine:latest 34 | Step 3/19 : FROM ${BUILDTIME_BASE} as builder 35 | ---> 6cbc3ac54aa3 36 | Step 4/19 : ENV BUILD_IN_DOCKER=false 37 | ---> Using cache 38 | ---> aec11cc4a0cd 39 | 40 | ... 41 | 42 | Removing intermediate container 371a162930f5 43 | ---> 1d3f742d559e 44 | Step 19/19 : ENTRYPOINT ["/usr/local/bin/kube-router"] 45 | ---> Running in d5ea6fda9fe4 46 | Removing intermediate container d5ea6fda9fe4 47 | ---> 17cfbc77e293 48 | [Warning] One or more build-args [ARCH] were not consumed 49 | Successfully built 17cfbc77e293 50 | Successfully tagged cloudnativelabs/kube-router-git:amd64-bug_fixes_for_v2.0.0 51 | Finished kube-router container image build. 52 | ``` 53 | 54 | The following describes the rest of the portions of the container naming convention 55 | 56 | * `kube-router-git` indicates that the container was built from git and not from a tag. 57 | * `amd64` indicates that it was built for the `amd64` architecture 58 | * `bug_fixes_for_v2.0.0` indicates the branch that the user was on when it was built 59 | 60 | ### Pushing A Docker Image 61 | 62 | Running `make push` will push your container image to a Docker registry. The default configuration will use the 63 | Docker Hub repository for the official kube-router images, cloudnativelabs/kube-router. You can push to a different 64 | repository by changing a couple settings, as described in [Image Options](#image-options) 65 | below. 66 | 67 | ### Makefile Options 68 | 69 | There are several variables which can be modified in the Makefile to customize your builds. They are specified after 70 | your make command like this: `make OPTION=VALUE`. These options can also be set in your environment variables. 71 | 72 | For more details beyond the scope of this document, see the [Makefile](/Makefile) and run `make help`. 73 | 74 | #### Image Options 75 | 76 | You can configure the name and tag of the Docker image with a few variables 77 | passed to `make container` and `make push`. 78 | 79 | Example: 80 | 81 | ```sh 82 | $ make container IMG_FQDN=quay.io IMG_NAMESPACE=bzub IMG_TAG=custom 83 | docker build -t "quay.io/bzub/kube-router-git:custom" . 84 | Sending build context to Docker daemon 151.5MB 85 | Step 1/4 : FROM alpine 86 | ---> a41a7446062d 87 | Step 2/4 : RUN apk add --no-cache iptables ipset 88 | ---> Using cache 89 | ---> 30e25a7640de 90 | Step 3/4 : COPY kube-router / 91 | ---> Using cache 92 | ---> c06f78fd02e8 93 | Step 4/4 : ENTRYPOINT /kube-router 94 | ---> Using cache 95 | ---> 5cfcfe54623e 96 | Successfully built 5cfcfe54623e 97 | Successfully tagged quay.io/bzub/kube-router-git:custom 98 | ``` 99 | 100 | * `REGISTRY` is derived from other options. Set this to something else to 101 | quickly override the Docker image registry used to tag and push images. 102 | * Note: This will override other variables below that make up the image 103 | name/tag. 104 | * `IMG_FQDN` should be set if you are not using Docker Hub for images. In 105 | the examples above `IMG_FQDN` is set to `quay.io`. 106 | * `IMG_NAMESPACE` is the Docker registry user or organization. It is used in 107 | URLs. 108 | * Example: quay.io/IMG_NAMESPACE/kube-router 109 | * `NAME` goes onto the end of the Docker registry URL that will be used. 110 | * Example: quay.io/cloudnativelabs/NAME 111 | * `IMG_TAG` is used to override the tag of the Docker image being built. 112 | * `DEV_SUFFIX` is appended to Docker image names that are not for release. By 113 | default these images get a name ending with `-git` to signify that they are 114 | for testing purposes. 115 | Example (DEV-SUFFIX=master-latest): quay.io/cloudnativelabs/kube-router-git:master-latest 116 | 117 | ## Release Workflow 118 | 119 | See [Release Documentation](/RELEASE.md) for more information 120 | 121 | ## Dependency Management 122 | 123 | kube-router uses go modules for managing dependencies see [upstream documentation](https://go.dev/blog/using-go-modules) 124 | for more information 125 | -------------------------------------------------------------------------------- /daemonset/generic-kuberouter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | 27 | --- 28 | apiVersion: apps/v1 29 | kind: DaemonSet 30 | metadata: 31 | labels: 32 | k8s-app: kube-router 33 | tier: node 34 | name: kube-router 35 | namespace: kube-system 36 | spec: 37 | selector: 38 | matchLabels: 39 | k8s-app: kube-router 40 | tier: node 41 | template: 42 | metadata: 43 | labels: 44 | k8s-app: kube-router 45 | tier: node 46 | spec: 47 | priorityClassName: system-node-critical 48 | serviceAccountName: kube-router 49 | containers: 50 | - name: kube-router 51 | image: docker.io/cloudnativelabs/kube-router 52 | imagePullPolicy: Always 53 | args: 54 | - "--run-router=true" 55 | - "--run-firewall=true" 56 | - "--run-service-proxy=false" 57 | - "--bgp-graceful-restart=true" 58 | env: 59 | - name: NODE_NAME 60 | valueFrom: 61 | fieldRef: 62 | fieldPath: spec.nodeName 63 | - name: POD_NAME 64 | valueFrom: 65 | fieldRef: 66 | fieldPath: metadata.name 67 | - name: KUBE_ROUTER_CNI_CONF_FILE 68 | value: /etc/cni/net.d/10-kuberouter.conflist 69 | livenessProbe: 70 | httpGet: 71 | path: /healthz 72 | port: 20244 73 | initialDelaySeconds: 10 74 | periodSeconds: 3 75 | resources: 76 | requests: 77 | cpu: 250m 78 | memory: 250Mi 79 | securityContext: 80 | privileged: true 81 | volumeMounts: 82 | - name: lib-modules 83 | mountPath: /lib/modules 84 | readOnly: true 85 | - name: cni-conf-dir 86 | mountPath: /etc/cni/net.d 87 | - name: xtables-lock 88 | mountPath: /run/xtables.lock 89 | readOnly: false 90 | initContainers: 91 | - name: install-cni 92 | image: docker.io/cloudnativelabs/kube-router 93 | imagePullPolicy: Always 94 | command: 95 | - /bin/sh 96 | - -c 97 | - set -e -x; 98 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 99 | if [ -f /etc/cni/net.d/*.conf ]; then 100 | rm -f /etc/cni/net.d/*.conf; 101 | fi; 102 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 103 | cp /etc/kube-router/cni-conf.json ${TMP}; 104 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 105 | fi; 106 | if [ -x /usr/local/bin/cni-install ]; then 107 | /usr/local/bin/cni-install; 108 | fi; 109 | volumeMounts: 110 | - mountPath: /etc/cni/net.d 111 | name: cni-conf-dir 112 | - mountPath: /etc/kube-router 113 | name: kube-router-cfg 114 | - name: host-opt 115 | mountPath: /opt 116 | hostNetwork: true 117 | hostPID: true 118 | tolerations: 119 | - effect: NoSchedule 120 | operator: Exists 121 | - key: CriticalAddonsOnly 122 | operator: Exists 123 | - effect: NoExecute 124 | operator: Exists 125 | volumes: 126 | - name: lib-modules 127 | hostPath: 128 | path: /lib/modules 129 | - name: cni-conf-dir 130 | hostPath: 131 | path: /etc/cni/net.d 132 | - name: kube-router-cfg 133 | configMap: 134 | name: kube-router-cfg 135 | - name: xtables-lock 136 | hostPath: 137 | path: /run/xtables.lock 138 | type: FileOrCreate 139 | - name: host-opt 140 | hostPath: 141 | path: /opt 142 | 143 | --- 144 | apiVersion: v1 145 | kind: ServiceAccount 146 | metadata: 147 | name: kube-router 148 | namespace: kube-system 149 | 150 | --- 151 | kind: ClusterRole 152 | apiVersion: rbac.authorization.k8s.io/v1 153 | metadata: 154 | name: kube-router 155 | namespace: kube-system 156 | rules: 157 | - apiGroups: 158 | - "" 159 | resources: 160 | - namespaces 161 | - pods 162 | - services 163 | - nodes 164 | - endpoints 165 | verbs: 166 | - list 167 | - get 168 | - watch 169 | - apiGroups: 170 | - "networking.k8s.io" 171 | resources: 172 | - networkpolicies 173 | verbs: 174 | - list 175 | - get 176 | - watch 177 | - apiGroups: 178 | - extensions 179 | resources: 180 | - networkpolicies 181 | verbs: 182 | - get 183 | - list 184 | - watch 185 | - apiGroups: 186 | - "coordination.k8s.io" 187 | resources: 188 | - leases 189 | verbs: 190 | - get 191 | - create 192 | - update 193 | - apiGroups: 194 | - "" 195 | resources: 196 | - services/status 197 | verbs: 198 | - update 199 | - apiGroups: 200 | - "discovery.k8s.io" 201 | resources: 202 | - endpointslices 203 | verbs: 204 | - get 205 | - list 206 | - watch 207 | 208 | --- 209 | kind: ClusterRoleBinding 210 | apiVersion: rbac.authorization.k8s.io/v1 211 | metadata: 212 | name: kube-router 213 | roleRef: 214 | apiGroup: rbac.authorization.k8s.io 215 | kind: ClusterRole 216 | name: kube-router 217 | subjects: 218 | - kind: ServiceAccount 219 | name: kube-router 220 | namespace: kube-system 221 | -------------------------------------------------------------------------------- /pkg/controllers/proxy/nodeport_healthcheck.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "strconv" 8 | "sync" 9 | "time" 10 | 11 | "k8s.io/klog/v2" 12 | ) 13 | 14 | type nodePortHealthCheckController struct { 15 | nphcServicesInfo 16 | activeNPHC map[int](chan<- struct{}) 17 | wg *sync.WaitGroup 18 | stopCh chan struct{} 19 | } 20 | 21 | type serviceHealthCheck struct { 22 | serviceID string 23 | nodePort int 24 | } 25 | 26 | type nphcServicesInfo struct { 27 | serviceInfoMap serviceInfoMap 28 | endpointsInfoMap endpointSliceInfoMap 29 | } 30 | 31 | type nphcHandler struct { 32 | svcHC *serviceHealthCheck 33 | nphc *nodePortHealthCheckController 34 | } 35 | 36 | func (nphc *nodePortHealthCheckController) UpdateServicesInfo(serviceInfoMap serviceInfoMap, 37 | endpointsInfoMap endpointSliceInfoMap) error { 38 | klog.V(1).Info("Running UpdateServicesInfo for NodePort health check") 39 | nphc.serviceInfoMap = serviceInfoMap 40 | nphc.endpointsInfoMap = endpointsInfoMap 41 | 42 | newActiveServices := make(map[int]bool) 43 | 44 | for svcID, svc := range serviceInfoMap { 45 | if svc.healthCheckNodePort != 0 { 46 | newActiveServices[svc.healthCheckNodePort] = true 47 | svcHC := serviceHealthCheck{ 48 | serviceID: svcID, 49 | nodePort: svc.healthCheckNodePort, 50 | } 51 | if nphc.healthCheckExists(svcHC) { 52 | continue 53 | } 54 | err := nphc.addHealthCheck(svcHC) 55 | if err != nil { 56 | return err 57 | } 58 | } 59 | } 60 | 61 | for np := range nphc.activeNPHC { 62 | if !newActiveServices[np] { 63 | err := nphc.stopHealthCheck(np) 64 | if err != nil { 65 | klog.Errorf("error stopping the NodePort healthcheck on NodePort %d: %v", np, err) 66 | } 67 | } 68 | } 69 | 70 | klog.V(1).Info("Finished UpdateServicesInfo for NodePort health check") 71 | return nil 72 | } 73 | 74 | func (nphc *nodePortHealthCheckController) healthCheckExists(svcHC serviceHealthCheck) bool { 75 | if _, ok := nphc.activeNPHC[svcHC.nodePort]; ok { 76 | return true 77 | } 78 | return false 79 | } 80 | 81 | func (nphc *nodePortHealthCheckController) addHealthCheck(svcHC serviceHealthCheck) error { 82 | klog.V(1).Infof("Adding NodePort health check for port: %d with svcid: %s", svcHC.nodePort, svcHC.serviceID) 83 | if nphc.healthCheckExists(svcHC) { 84 | return fmt.Errorf("unable to add healthcheck for NodePort %d as it is already taken", svcHC.nodePort) 85 | } 86 | closingChan := make(chan struct{}) 87 | nphc.activeNPHC[svcHC.nodePort] = closingChan 88 | 89 | nphc.wg.Add(1) 90 | go func(nphc *nodePortHealthCheckController, svcHC serviceHealthCheck, closingChan <-chan struct{}) { 91 | defer nphc.wg.Done() 92 | mux := http.NewServeMux() 93 | srv := &http.Server{ 94 | Addr: ":" + strconv.Itoa(svcHC.nodePort), 95 | Handler: mux, 96 | ReadHeaderTimeout: 5 * time.Second, 97 | } 98 | 99 | npHandler := nphcHandler{ 100 | svcHC: &svcHC, 101 | nphc: nphc, 102 | } 103 | mux.HandleFunc("/healthz", npHandler.Handler) 104 | 105 | nphc.wg.Add(1) 106 | go func(svcHC serviceHealthCheck) { 107 | defer nphc.wg.Done() 108 | klog.Infof("starting NodePort health controller on NodePort: %d", svcHC.nodePort) 109 | if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { 110 | // cannot panic, because this probably is an intentional close 111 | klog.Errorf("could not start NodePort health controller on NodePort %d: %s", svcHC.nodePort, err) 112 | } 113 | }(svcHC) 114 | 115 | // block until we receive a shut down signal on either our private channel or the global channel 116 | select { 117 | case <-closingChan: 118 | case <-nphc.stopCh: 119 | } 120 | klog.Infof("shutting down NodePort health controller on NodePort: %d", svcHC.nodePort) 121 | if err := srv.Shutdown(context.Background()); err != nil { 122 | klog.Errorf("could not shutdown NodePort health controller on NodePort %d: %v", svcHC.nodePort, err) 123 | } 124 | 125 | }(nphc, svcHC, closingChan) 126 | 127 | return nil 128 | } 129 | 130 | func (nphc *nodePortHealthCheckController) stopHealthCheck(nodePort int) error { 131 | if _, ok := nphc.activeNPHC[nodePort]; !ok { 132 | return fmt.Errorf("no NodePort health check currently exists for NodePort: %d", nodePort) 133 | } 134 | 135 | svcStopCh := nphc.activeNPHC[nodePort] 136 | close(svcStopCh) 137 | 138 | delete(nphc.activeNPHC, nodePort) 139 | 140 | return nil 141 | } 142 | 143 | func (npHandler *nphcHandler) Handler(w http.ResponseWriter, r *http.Request) { 144 | eps := npHandler.nphc.endpointsInfoMap[npHandler.svcHC.serviceID] 145 | endpointsOnNode := hasActiveEndpoints(eps) 146 | 147 | var numActiveEndpoints int8 148 | for _, endpoint := range eps { 149 | if endpoint.isLocal && !endpoint.isTerminating { 150 | numActiveEndpoints++ 151 | } 152 | } 153 | 154 | if endpointsOnNode && numActiveEndpoints > 0 { 155 | w.WriteHeader(http.StatusOK) 156 | _, err := fmt.Fprintf(w, "%d Service Endpoints found\n", numActiveEndpoints) 157 | if err != nil { 158 | klog.Errorf("failed to write body: %s", err) 159 | } 160 | } else { 161 | w.WriteHeader(http.StatusServiceUnavailable) 162 | _, err := w.Write([]byte("No Service Endpoints Found\n")) 163 | if err != nil { 164 | klog.Errorf("Failed to write body: %s", err) 165 | } 166 | } 167 | } 168 | 169 | func (nphc *nodePortHealthCheckController) StopAll() { 170 | klog.Info("Stopping all NodePort health checks") 171 | close(nphc.stopCh) 172 | klog.Info("Waiting for all NodePort health checks to finish shutting down") 173 | nphc.wg.Wait() 174 | klog.Info("All NodePort health checks are completely shut down, all done!") 175 | } 176 | 177 | func NewNodePortHealthCheck() *nodePortHealthCheckController { 178 | nphc := nodePortHealthCheckController{ 179 | activeNPHC: make(map[int]chan<- struct{}), 180 | wg: &sync.WaitGroup{}, 181 | stopCh: make(chan struct{}), 182 | } 183 | 184 | return &nphc 185 | } 186 | -------------------------------------------------------------------------------- /pkg/controllers/proxy/metrics.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "strconv" 5 | "time" 6 | 7 | "github.com/ccoveille/go-safecast/v2" 8 | "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" 9 | "github.com/moby/ipvs" 10 | "github.com/prometheus/client_golang/prometheus" 11 | "k8s.io/klog/v2" 12 | ) 13 | 14 | type metricsServiceMapKey struct { 15 | ip string 16 | uPort uint16 17 | protocol uint16 18 | } 19 | 20 | type metricsServiceMap map[metricsServiceMapKey]*serviceInfo 21 | 22 | // getMetricsServiceMap builds a structure suitable for quick matching services 23 | func (nsc *NetworkServicesController) getMetricsServiceMap() metricsServiceMap { 24 | if serviceMapPtr := nsc.metricsServiceMap.Load(); serviceMapPtr != nil { 25 | return *serviceMapPtr 26 | } 27 | 28 | var err error 29 | serviceMap := metricsServiceMap{} 30 | 31 | for _, svc := range nsc.getServiceMap() { 32 | key := metricsServiceMapKey{} 33 | key.uPort, err = safecast.Convert[uint16](svc.port) 34 | if err != nil { 35 | klog.Errorf("failed to convert port %d to uint16: %v", svc.port, err) 36 | continue 37 | } 38 | key.protocol = convertSvcProtoToSysCallProto(svc.protocol) 39 | 40 | for _, ip := range svc.clusterIPs { 41 | key.ip = ip 42 | serviceMap[key] = svc 43 | } 44 | for _, ip := range svc.externalIPs { 45 | key.ip = ip 46 | serviceMap[key] = svc 47 | } 48 | for _, ip := range svc.loadBalancerIPs { 49 | key.ip = ip 50 | serviceMap[key] = svc 51 | } 52 | if svc.nodePort != 0 { 53 | key.ip = nsc.krNode.GetPrimaryNodeIP().String() 54 | key.uPort, err = safecast.Convert[uint16](svc.nodePort) 55 | if err != nil { 56 | klog.Errorf("failed to convert nodePort %d to uint16: %v", svc.nodePort, err) 57 | continue 58 | } 59 | serviceMap[key] = svc 60 | } 61 | } 62 | 63 | nsc.metricsServiceMap.Store(&serviceMap) 64 | 65 | return serviceMap 66 | } 67 | 68 | func (m metricsServiceMap) lookupService(ip string, uPort uint16, protocol uint16) *serviceInfo { 69 | key := metricsServiceMapKey{ 70 | ip: ip, 71 | uPort: uPort, 72 | protocol: protocol, 73 | } 74 | 75 | return m[key] 76 | } 77 | 78 | func (*NetworkServicesController) Describe(ch chan<- *prometheus.Desc) { 79 | ch <- metrics.ServiceBpsIn 80 | ch <- metrics.ServiceBpsOut 81 | ch <- metrics.ServiceBytesIn 82 | ch <- metrics.ServiceBytesOut 83 | ch <- metrics.ServiceCPS 84 | ch <- metrics.ServicePacketsIn 85 | ch <- metrics.ServicePacketsOut 86 | ch <- metrics.ServicePpsIn 87 | ch <- metrics.ServicePpsOut 88 | ch <- metrics.ServiceTotalConn 89 | ch <- metrics.ControllerIpvsServices 90 | } 91 | 92 | func (nsc *NetworkServicesController) Collect(ch chan<- prometheus.Metric) { 93 | start := time.Now() 94 | defer func() { 95 | endTime := time.Since(start) 96 | klog.V(2).Infof("Publishing IPVS metrics took %v", endTime) 97 | if nsc.MetricsEnabled { 98 | metrics.ControllerIpvsMetricsExportTime.Observe(endTime.Seconds()) 99 | } 100 | }() 101 | 102 | serviceMap := nsc.getMetricsServiceMap() 103 | 104 | ipvsHandle, err := ipvs.New("") 105 | if err != nil { 106 | klog.Errorf("failed to initialize ipvs handle: %v", err) 107 | return 108 | } 109 | defer ipvsHandle.Close() 110 | 111 | ipvsSvcs, err := ipvsHandle.GetServices() 112 | if err != nil { 113 | klog.Errorf("failed to list IPVS services: %v", err) 114 | return 115 | } 116 | 117 | klog.V(1).Info("Publishing IPVS metrics") 118 | for _, ipvsSvc := range ipvsSvcs { 119 | ip := ipvsSvc.Address.String() 120 | svc := serviceMap.lookupService(ip, ipvsSvc.Port, ipvsSvc.Protocol) 121 | 122 | if svc == nil { 123 | continue 124 | } 125 | 126 | klog.V(3).Infof("Publishing metrics for %s/%s (%s:%d/%s)", 127 | svc.namespace, svc.name, ip, ipvsSvc.Port, svc.protocol) 128 | 129 | labelValues := []string{ 130 | svc.namespace, 131 | svc.name, 132 | ip, 133 | svc.protocol, 134 | strconv.Itoa(int(ipvsSvc.Port)), 135 | } 136 | 137 | ch <- prometheus.MustNewConstMetric( 138 | metrics.ServiceBpsIn, 139 | prometheus.GaugeValue, 140 | float64(ipvsSvc.Stats.BPSIn), 141 | labelValues..., 142 | ) 143 | 144 | ch <- prometheus.MustNewConstMetric( 145 | metrics.ServiceBpsOut, 146 | prometheus.GaugeValue, 147 | float64(ipvsSvc.Stats.BPSOut), 148 | labelValues..., 149 | ) 150 | 151 | ch <- prometheus.MustNewConstMetric( 152 | metrics.ServiceBytesIn, 153 | prometheus.CounterValue, 154 | float64(ipvsSvc.Stats.BytesIn), 155 | labelValues..., 156 | ) 157 | 158 | ch <- prometheus.MustNewConstMetric( 159 | metrics.ServiceBytesOut, 160 | prometheus.CounterValue, 161 | float64(ipvsSvc.Stats.BytesOut), 162 | labelValues..., 163 | ) 164 | 165 | ch <- prometheus.MustNewConstMetric( 166 | metrics.ServiceCPS, 167 | prometheus.GaugeValue, 168 | float64(ipvsSvc.Stats.CPS), 169 | labelValues..., 170 | ) 171 | 172 | ch <- prometheus.MustNewConstMetric( 173 | metrics.ServicePacketsIn, 174 | prometheus.CounterValue, 175 | float64(ipvsSvc.Stats.PacketsIn), 176 | labelValues..., 177 | ) 178 | 179 | ch <- prometheus.MustNewConstMetric( 180 | metrics.ServicePacketsOut, 181 | prometheus.CounterValue, 182 | float64(ipvsSvc.Stats.PacketsOut), 183 | labelValues..., 184 | ) 185 | 186 | ch <- prometheus.MustNewConstMetric( 187 | metrics.ServicePpsIn, 188 | prometheus.GaugeValue, 189 | float64(ipvsSvc.Stats.PPSIn), 190 | labelValues..., 191 | ) 192 | 193 | ch <- prometheus.MustNewConstMetric( 194 | metrics.ServicePpsOut, 195 | prometheus.GaugeValue, 196 | float64(ipvsSvc.Stats.PPSOut), 197 | labelValues..., 198 | ) 199 | 200 | ch <- prometheus.MustNewConstMetric( 201 | metrics.ServiceTotalConn, 202 | prometheus.CounterValue, 203 | float64(ipvsSvc.Stats.Connections), 204 | labelValues..., 205 | ) 206 | } 207 | 208 | ch <- prometheus.MustNewConstMetric( 209 | metrics.ControllerIpvsServices, 210 | prometheus.GaugeValue, 211 | float64(len(ipvsSvcs)), 212 | ) 213 | } 214 | -------------------------------------------------------------------------------- /pkg/controllers/netpol/ipset_fixture_test.go: -------------------------------------------------------------------------------- 1 | package netpol 2 | 3 | import ( 4 | "context" 5 | "path/filepath" 6 | "sync" 7 | "testing" 8 | 9 | "github.com/cloudnativelabs/kube-router/v2/pkg/controllers/testhelpers" 10 | "github.com/cloudnativelabs/kube-router/v2/pkg/options" 11 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 12 | 13 | "github.com/stretchr/testify/require" 14 | 15 | v1 "k8s.io/api/core/v1" 16 | networkingv1 "k8s.io/api/networking/v1" 17 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 | "k8s.io/apimachinery/pkg/util/intstr" 19 | "k8s.io/client-go/informers" 20 | "k8s.io/client-go/kubernetes/fake" 21 | "k8s.io/client-go/tools/cache" 22 | ) 23 | 24 | func TestNetworkPolicyFixtureIPSets(t *testing.T) { 25 | fixtureDir := filepath.Join("..", "..", "..", "testdata", "ipset_test_1") 26 | 27 | pods := testhelpers.LoadPodList(t, filepath.Join(fixtureDir, "pods.yaml")) 28 | networkPolicies := testhelpers.LoadNetworkPolicyList(t, filepath.Join(fixtureDir, "networkpolicy.yaml")) 29 | nodes := testhelpers.LoadNodeList(t, filepath.Join(fixtureDir, "nodes.yaml")) 30 | namespaces := deriveNamespaces(pods, networkPolicies) 31 | 32 | client := fake.NewSimpleClientset() 33 | for i := range nodes.Items { 34 | _, err := client.CoreV1().Nodes().Create(context.Background(), nodes.Items[i].DeepCopy(), metav1.CreateOptions{}) 35 | require.NoError(t, err) 36 | } 37 | 38 | config := &options.KubeRouterConfig{ 39 | EnableIPv4: true, 40 | EnableIPv6: true, 41 | ClusterIPCIDRs: []string{"10.96.0.0/16", "2001:db8:42:1::/112"}, 42 | HostnameOverride: nodes.Items[0].Name, 43 | NodePortRange: "30000-32767", 44 | } 45 | 46 | informerFactory := informers.NewSharedInformerFactory(client, 0) 47 | podInformer := informerFactory.Core().V1().Pods().Informer() 48 | npInformer := informerFactory.Networking().V1().NetworkPolicies().Informer() 49 | nsInformer := informerFactory.Core().V1().Namespaces().Informer() 50 | 51 | ipv4Handler := testhelpers.NewFakeIPSetHandler(false) 52 | ipv6Handler := testhelpers.NewFakeIPSetHandler(true) 53 | t.Cleanup(func() { 54 | if t.Failed() { 55 | t.Logf("ipv4 restore script:\n%s", ipv4Handler.Restored()) 56 | t.Logf("ipv6 restore script:\n%s", ipv6Handler.Restored()) 57 | } 58 | }) 59 | 60 | linkQ := utils.NewFakeLocalLinkQuerier(collectNodeIPs(nodes), nil) 61 | 62 | controller, err := NewNetworkPolicyController( 63 | client, 64 | config, 65 | podInformer, 66 | npInformer, 67 | nsInformer, 68 | &sync.Mutex{}, 69 | linkQ, 70 | map[v1.IPFamily]utils.IPTablesHandler{ 71 | v1.IPv4Protocol: &fakeIPTables{}, 72 | v1.IPv6Protocol: &fakeIPTables{}, 73 | }, 74 | map[v1.IPFamily]utils.IPSetHandler{ 75 | v1.IPv4Protocol: ipv4Handler, 76 | v1.IPv6Protocol: ipv6Handler, 77 | }, 78 | ) 79 | require.NoError(t, err) 80 | 81 | addPodsToInformer(t, podInformer.GetStore(), pods) 82 | addNetworkPoliciesToInformer(t, npInformer.GetStore(), networkPolicies) 83 | addNamespacesToInformer(nsInformer.GetStore(), namespaces) 84 | 85 | netpolInfo, err := controller.buildNetworkPoliciesInfo() 86 | require.NoError(t, err) 87 | 88 | _, _, err = controller.syncNetworkPolicyChains(netpolInfo, "fixture") 89 | require.NoError(t, err) 90 | 91 | actual := testhelpers.MergeExpectations( 92 | testhelpers.ParseRestoreScript(ipv4Handler.Restored()), 93 | testhelpers.ParseRestoreScript(ipv6Handler.Restored()), 94 | ) 95 | expected := testhelpers.ParseSnapshot(t, filepath.Join(fixtureDir, "ipset_save.txt")) 96 | 97 | require.NotEmpty(t, expected, "expected snapshot should not be empty") 98 | require.Equal(t, testhelpers.ExpectedKeys(expected), testhelpers.ExpectedKeys(actual)) 99 | 100 | for name, exp := range expected { 101 | act := actual[name] 102 | require.Equal(t, exp.SetType, act.SetType, "set type mismatch for %s", name) 103 | require.Equal(t, exp.Entries, act.Entries, "entries mismatch for %s", name) 104 | } 105 | } 106 | 107 | func addPodsToInformer(t *testing.T, store cache.Store, pods *v1.PodList) { 108 | for i := range pods.Items { 109 | pod := pods.Items[i].DeepCopy() 110 | pod.SetResourceVersion("1") 111 | if len(pod.Status.PodIPs) > 0 { 112 | pod.Status.PodIP = pod.Status.PodIPs[0].IP 113 | } 114 | require.NoError(t, store.Add(pod)) 115 | } 116 | } 117 | 118 | func addNetworkPoliciesToInformer(t *testing.T, store cache.Store, policies *networkingv1.NetworkPolicyList) { 119 | for i := range policies.Items { 120 | pol := policies.Items[i].DeepCopy() 121 | pol.SetResourceVersion("1") 122 | for j := range pol.Spec.Ingress { 123 | for k := range pol.Spec.Ingress[j].Ports { 124 | if pol.Spec.Ingress[j].Ports[k].Protocol == nil { 125 | proto := v1.ProtocolTCP 126 | pol.Spec.Ingress[j].Ports[k].Protocol = &proto 127 | } 128 | if pol.Spec.Ingress[j].Ports[k].Port == nil { 129 | port := intstr.FromInt(0) 130 | pol.Spec.Ingress[j].Ports[k].Port = &port 131 | } 132 | } 133 | } 134 | require.NoError(t, store.Add(pol)) 135 | } 136 | } 137 | 138 | func addNamespacesToInformer(store cache.Store, namespaces *v1.NamespaceList) { 139 | for i := range namespaces.Items { 140 | _ = store.Add(namespaces.Items[i].DeepCopy()) 141 | } 142 | } 143 | 144 | func deriveNamespaces(pods *v1.PodList, policies *networkingv1.NetworkPolicyList) *v1.NamespaceList { 145 | nsSet := map[string]struct{}{} 146 | for _, pod := range pods.Items { 147 | nsSet[pod.Namespace] = struct{}{} 148 | } 149 | for _, pol := range policies.Items { 150 | nsSet[pol.Namespace] = struct{}{} 151 | } 152 | list := &v1.NamespaceList{} 153 | for ns := range nsSet { 154 | list.Items = append(list.Items, v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}) 155 | } 156 | return list 157 | } 158 | 159 | func collectNodeIPs(nodes *v1.NodeList) []string { 160 | ipSet := map[string]struct{}{} 161 | for _, node := range nodes.Items { 162 | for _, addr := range node.Status.Addresses { 163 | ipSet[addr.Address] = struct{}{} 164 | } 165 | } 166 | ips := make([]string, 0, len(ipSet)) 167 | for ip := range ipSet { 168 | ips = append(ips, ip) 169 | } 170 | return ips 171 | } 172 | -------------------------------------------------------------------------------- /daemonset/kubeadm-kuberouter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | labels: 31 | k8s-app: kube-router 32 | tier: node 33 | name: kube-router 34 | namespace: kube-system 35 | spec: 36 | selector: 37 | matchLabels: 38 | k8s-app: kube-router 39 | tier: node 40 | template: 41 | metadata: 42 | labels: 43 | k8s-app: kube-router 44 | tier: node 45 | spec: 46 | priorityClassName: system-node-critical 47 | serviceAccountName: kube-router 48 | serviceAccount: kube-router 49 | containers: 50 | - name: kube-router 51 | image: docker.io/cloudnativelabs/kube-router 52 | imagePullPolicy: Always 53 | args: 54 | - --run-router=true 55 | - --run-firewall=true 56 | - --run-service-proxy=false 57 | - --bgp-graceful-restart=true 58 | env: 59 | - name: NODE_NAME 60 | valueFrom: 61 | fieldRef: 62 | fieldPath: spec.nodeName 63 | - name: POD_NAME 64 | valueFrom: 65 | fieldRef: 66 | fieldPath: metadata.name 67 | - name: KUBE_ROUTER_CNI_CONF_FILE 68 | value: /etc/cni/net.d/10-kuberouter.conflist 69 | livenessProbe: 70 | httpGet: 71 | path: /healthz 72 | port: 20244 73 | initialDelaySeconds: 10 74 | periodSeconds: 3 75 | resources: 76 | requests: 77 | cpu: 250m 78 | memory: 250Mi 79 | securityContext: 80 | privileged: true 81 | volumeMounts: 82 | - name: lib-modules 83 | mountPath: /lib/modules 84 | readOnly: true 85 | - name: cni-conf-dir 86 | mountPath: /etc/cni/net.d 87 | - name: kubeconfig 88 | mountPath: /var/lib/kube-router/kubeconfig 89 | readOnly: true 90 | - name: xtables-lock 91 | mountPath: /run/xtables.lock 92 | readOnly: false 93 | initContainers: 94 | - name: install-cni 95 | image: docker.io/cloudnativelabs/kube-router 96 | imagePullPolicy: Always 97 | command: 98 | - /bin/sh 99 | - -c 100 | - set -e -x; 101 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 102 | if [ -f /etc/cni/net.d/*.conf ]; then 103 | rm -f /etc/cni/net.d/*.conf; 104 | fi; 105 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 106 | cp /etc/kube-router/cni-conf.json ${TMP}; 107 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 108 | fi; 109 | if [ -x /usr/local/bin/cni-install ]; then 110 | /usr/local/bin/cni-install; 111 | fi; 112 | volumeMounts: 113 | - mountPath: /etc/cni/net.d 114 | name: cni-conf-dir 115 | - mountPath: /etc/kube-router 116 | name: kube-router-cfg 117 | - name: host-opt 118 | mountPath: /opt 119 | hostNetwork: true 120 | hostPID: true 121 | tolerations: 122 | - effect: NoSchedule 123 | operator: Exists 124 | - key: CriticalAddonsOnly 125 | operator: Exists 126 | - effect: NoExecute 127 | operator: Exists 128 | volumes: 129 | - name: lib-modules 130 | hostPath: 131 | path: /lib/modules 132 | - name: cni-conf-dir 133 | hostPath: 134 | path: /etc/cni/net.d 135 | - name: kube-router-cfg 136 | configMap: 137 | name: kube-router-cfg 138 | - name: kubeconfig 139 | hostPath: 140 | path: /var/lib/kube-router/kubeconfig 141 | - name: xtables-lock 142 | hostPath: 143 | path: /run/xtables.lock 144 | type: FileOrCreate 145 | - name: host-opt 146 | hostPath: 147 | path: /opt 148 | --- 149 | apiVersion: v1 150 | kind: ServiceAccount 151 | metadata: 152 | name: kube-router 153 | namespace: kube-system 154 | --- 155 | kind: ClusterRole 156 | apiVersion: rbac.authorization.k8s.io/v1 157 | metadata: 158 | name: kube-router 159 | namespace: kube-system 160 | rules: 161 | - apiGroups: 162 | - "" 163 | resources: 164 | - namespaces 165 | - pods 166 | - services 167 | - nodes 168 | - endpoints 169 | verbs: 170 | - list 171 | - get 172 | - watch 173 | - apiGroups: 174 | - "networking.k8s.io" 175 | resources: 176 | - networkpolicies 177 | verbs: 178 | - list 179 | - get 180 | - watch 181 | - apiGroups: 182 | - extensions 183 | resources: 184 | - networkpolicies 185 | verbs: 186 | - get 187 | - list 188 | - watch 189 | - apiGroups: 190 | - "coordination.k8s.io" 191 | resources: 192 | - leases 193 | verbs: 194 | - get 195 | - create 196 | - update 197 | - apiGroups: 198 | - "" 199 | resources: 200 | - services/status 201 | verbs: 202 | - update 203 | - apiGroups: 204 | - "discovery.k8s.io" 205 | resources: 206 | - endpointslices 207 | verbs: 208 | - get 209 | - list 210 | - watch 211 | 212 | --- 213 | kind: ClusterRoleBinding 214 | apiVersion: rbac.authorization.k8s.io/v1 215 | metadata: 216 | name: kube-router 217 | roleRef: 218 | apiGroup: rbac.authorization.k8s.io 219 | kind: ClusterRole 220 | name: kube-router 221 | subjects: 222 | - kind: ServiceAccount 223 | name: kube-router 224 | namespace: kube-system 225 | -------------------------------------------------------------------------------- /docs/generic.md: -------------------------------------------------------------------------------- 1 | # Kube-router on generic clusters 2 | 3 | This guide is for running kube-router as the [CNI](https://github.com/containernetworking) network provider for on 4 | premise and/or bare metal clusters outside of a cloud provider's environment. It assumes the initial cluster is 5 | bootstrapped and a networking provider needs configuration. 6 | 7 | All pod networking [CIDRs](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) are allocated by 8 | kube-controller-manager. Kube-router provides service/pod networking, a network policy firewall, and a high performance 9 | [IPVS/LVS](http://www.linuxvirtualserver.org/software/ipvs.html) based service proxy. The network policy firewall and 10 | service proxy are both optional but recommended. 11 | 12 | ## Configuring the Worker Nodes 13 | 14 | If you choose to run kube-router as daemonset, then both kube-apiserver and kubelet must be run with 15 | `--allow-privileged=true` option (see our 16 | [example daemonsets for more information](https://github.com/cloudnativelabs/kube-router/tree/master/daemonset)) 17 | 18 | Ensure your [Container Runtime](https://kubernetes.io/docs/setup/production-environment/container-runtimes/) is 19 | configured to point its CNI configuration directory to `/etc/cni/net.d`. 20 | 21 | This is the default location for both `containerd` and `cri-o`, but can be set specifically if needed: 22 | 23 | ### containerd CRI Configuration 24 | 25 | Here is what the default containerd CNI plugin configuration looks like as of the writing of this document. The default 26 | containerd configuration can be retrieved using: 27 | 28 | ```sh 29 | containerd config default 30 | ``` 31 | 32 | ```toml 33 | [plugins] 34 | [plugins."io.containerd.grpc.v1.cri".cni] 35 | bin_dir = "/opt/cni/bin" 36 | conf_dir = "/etc/cni/net.d" 37 | conf_template = "" 38 | ip_pref = "" 39 | max_conf_num = 1 40 | ``` 41 | 42 | ### cri-o CRI Configuration 43 | 44 | cri-o CRI configuration can be referenced via their 45 | [documentation](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionetwork-table) 46 | 47 | If a previous CNI provider (e.g. weave-net, calico, or flannel) was used, remove old configurations from 48 | `/etc/cni/net.d` on each kubelet. 49 | 50 | ### Note: Switching CNI providers on a running cluster requires re-creating all pods to pick up new pod IPs** 51 | 52 | ## Configuring kube-controller-manager 53 | 54 | If you choose to use kube-router for pod-to-pod network connectivity then 55 | [kube-controller-manager](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager/) 56 | needs to be configured to allocate pod CIDRs by passing the `--allocate-node-cidrs=true` flag and providing a 57 | `cluster-cidr` (e.g. by passing `--cluster-cidr=10.32.0.0/12`) 58 | 59 | For example: 60 | 61 | ```sh 62 | --allocate-node-cidrs=true 63 | --cluster-cidr=10.32.0.0/12 64 | --service-cluster-ip-range=10.50.0.0/22 65 | ``` 66 | 67 | ## Running kube-router with Everything 68 | 69 | This runs kube-router with pod/service networking, the network policy firewall, and service proxy to replace kube-proxy. 70 | The example command uses `10.32.0.0/12` as the pod CIDR address range and `https://cluster01.int.domain.com:6443` as the 71 | [apiserver](https://kubernetes.io/docs/reference/generated/kube-apiserver/) address. Please change these to suit your 72 | cluster. 73 | 74 | ```sh 75 | CLUSTERCIDR=10.32.0.0/12 \ 76 | APISERVER=https://cluster01.int.domain.com:6443 \ 77 | sh -c 'curl -s https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/generic-kuberouter-all-features.yaml | \ 78 | sed -e "s;%APISERVER%;$APISERVER;g" -e "s;%CLUSTERCIDR%;$CLUSTERCIDR;g"' | \ 79 | kubectl apply -f - 80 | ``` 81 | 82 | ### Removing a Previous kube-proxy 83 | 84 | If [kube-proxy](https://kubernetes.io/docs/reference/generated/kube-proxy/) was ever deployed to the cluster, then you 85 | need to remove it when running kube-router in this capacity or they will conflict with each other. 86 | 87 | Remove any previously running kube-proxy and all iptables rules it created. Start by deleting the kube-proxy daemonset: 88 | 89 | ```sh 90 | kubectl -n kube-system delete ds kube-proxy 91 | ``` 92 | 93 | Any iptables rules kube-proxy left around will also need to be cleaned up. This command might differ based on how 94 | kube-proxy was setup or configured: 95 | 96 | To cleanup kube-proxy we can do this with docker, containerd, or cri-o: 97 | 98 | #### docker 99 | 100 | ```sh 101 | docker run --privileged -v /lib/modules:/lib/modules --net=host registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy --cleanup 102 | ``` 103 | 104 | #### containerd 105 | 106 | ```sh 107 | ctr images pull k8s.gcr.io/kube-proxy-amd64:v1.28.2 108 | ctr run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro \ 109 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 110 | ``` 111 | 112 | #### cri-o 113 | 114 | ```sh 115 | crictl pull registry.k8s.io/kube-proxy-amd64:v1.28.2 116 | crictl run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro 117 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 118 | ``` 119 | 120 | ## Running kube-router without the service proxy 121 | 122 | This runs kube-router with pod/service networking and the network policy firewall. The Service proxy is disabled. 123 | 124 | ```sh 125 | kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/generic-kuberouter.yaml 126 | ``` 127 | 128 | In this mode kube-router relies on [kube-proxy](https://kubernetes.io/docs/reference/generated/kube-proxy/) (or some 129 | other network service provider) to provide service networking. 130 | 131 | When service proxy is disabled kube-router will use 132 | [in-cluster configuration](https://github.com/kubernetes/client-go/tree/master/examples/in-cluster-client-configuration) 133 | to access APIserver through cluster-ip. Service networking must therefore be setup before deploying kube-router. 134 | 135 | ## Debugging 136 | 137 | kube-router supports setting log level via the command line -v or --v, To get maximal debug output from kube-router 138 | please start with `--v=3` 139 | -------------------------------------------------------------------------------- /pkg/bgp/peer_config.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net" 7 | "strconv" 8 | "strings" 9 | 10 | "github.com/cloudnativelabs/kube-router/v2/pkg/options" 11 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 12 | "github.com/goccy/go-yaml" 13 | ) 14 | 15 | type PeerConfig struct { 16 | remoteASN uint32 `yaml:"remoteasn"` 17 | remoteIP net.IP `yaml:"remoteip"` 18 | localIP string `yaml:"localip"` 19 | password utils.Base64String `yaml:"password"` 20 | port *uint32 `yaml:"port"` 21 | } 22 | 23 | func NewPeerConfig(remoteIPStr string, remoteASN uint32, port *uint32, b64EncodedPassword utils.Base64String, 24 | localIP string, 25 | ) (PeerConfig, error) { 26 | remoteIP := net.ParseIP(remoteIPStr) 27 | if remoteIP == nil { 28 | return PeerConfig{}, fmt.Errorf("invalid IP address: %s", remoteIPStr) 29 | } 30 | if err := validateASN(remoteASN); err != nil { 31 | return PeerConfig{}, err 32 | } 33 | 34 | return PeerConfig{ 35 | remoteIP: remoteIP, 36 | remoteASN: remoteASN, 37 | localIP: localIP, 38 | password: b64EncodedPassword, 39 | port: port, 40 | }, nil 41 | } 42 | 43 | func (p PeerConfig) RemoteASN() uint32 { 44 | return p.remoteASN 45 | } 46 | 47 | func (p PeerConfig) RemoteIP() net.IP { 48 | return p.remoteIP 49 | } 50 | 51 | func (p PeerConfig) LocalIP() string { 52 | return p.localIP 53 | } 54 | 55 | func (p PeerConfig) Password() string { 56 | return string(p.password) 57 | } 58 | 59 | func (p PeerConfig) Port() *uint32 { 60 | return p.port 61 | } 62 | 63 | // Custom Stringer to prevent leaking passwords when printed 64 | func (p PeerConfig) String() string { 65 | var fields []string 66 | if p.localIP != "" { 67 | fields = append(fields, fmt.Sprintf("LocalIP: %s", p.localIP)) 68 | } 69 | if p.port != nil { 70 | fields = append(fields, fmt.Sprintf("Port: %d", *p.port)) 71 | } 72 | if p.remoteASN != uint32(0) { 73 | fields = append(fields, fmt.Sprintf("RemoteASN: %d", p.remoteASN)) 74 | } 75 | if p.remoteIP != nil { 76 | fields = append(fields, fmt.Sprintf("RemoteIP: %v", p.remoteIP)) 77 | } 78 | return fmt.Sprintf("PeerConfig{%s}", strings.Join(fields, ", ")) 79 | } 80 | 81 | func (p *PeerConfig) UnmarshalYAML(raw []byte) error { 82 | tmp := struct { 83 | LocalIP *string `yaml:"localip"` 84 | Password *utils.Base64String `yaml:"password"` 85 | Port *uint32 `yaml:"port"` 86 | RemoteASN *uint32 `yaml:"remoteasn"` 87 | RemoteIP *string `yaml:"remoteip"` 88 | }{} 89 | 90 | if err := yaml.Unmarshal(raw, &tmp); err != nil { 91 | return fmt.Errorf("failed to unmarshal peer config: %w", err) 92 | } 93 | 94 | if tmp.RemoteIP == nil { 95 | return errors.New("remoteip cannot be empty") 96 | } 97 | if tmp.RemoteASN == nil { 98 | return errors.New("remoteasn cannot be empty") 99 | } 100 | if err := validateASN(*tmp.RemoteASN); err != nil { 101 | return err 102 | } 103 | if tmp.LocalIP != nil { 104 | p.localIP = *tmp.LocalIP 105 | } 106 | if tmp.Password != nil { 107 | p.password = *tmp.Password 108 | } 109 | p.port = tmp.Port 110 | p.remoteASN = *tmp.RemoteASN 111 | ip := net.ParseIP(*tmp.RemoteIP) 112 | if ip == nil { 113 | return fmt.Errorf("%s is not a valid IP address", *tmp.RemoteIP) 114 | } 115 | p.remoteIP = ip 116 | return nil 117 | } 118 | 119 | type PeerConfigs []PeerConfig 120 | 121 | func (p PeerConfigs) RemoteIPStrings() []string { 122 | remoteIPs := make([]string, 0) 123 | for _, cfg := range p { 124 | remoteIPs = append(remoteIPs, cfg.RemoteIP().String()) 125 | } 126 | return remoteIPs 127 | } 128 | 129 | // Prints the PeerConfigs without the passwords leaking 130 | func (p PeerConfigs) String() string { 131 | pcs := make([]string, len(p)) 132 | for i, pc := range p { 133 | pcs[i] = pc.String() 134 | } 135 | return fmt.Sprintf("PeerConfigs[%s]", strings.Join(pcs, ",")) 136 | } 137 | 138 | func NewPeerConfigs( 139 | remoteIPs []string, 140 | remoteASNs []uint32, 141 | ports []uint32, 142 | b64EncodedPasswords []string, 143 | localIPs []string, 144 | localAddress string, 145 | ) (PeerConfigs, error) { 146 | if len(remoteIPs) != len(remoteASNs) { 147 | return nil, errors.New("invalid peer router config, the number of IPs and ASN numbers must be equal") 148 | } 149 | if len(remoteIPs) != len(b64EncodedPasswords) && len(b64EncodedPasswords) != 0 { 150 | return nil, errors.New("invalid peer router config. The number of passwords should either be zero, or " + 151 | "one per peer router. Use blank items if a router doesn't expect a password. Example: \"pass,,pass\" " + 152 | "OR [\"pass\",\"\",\"pass\"]") 153 | } 154 | if len(remoteIPs) != len(ports) && len(ports) != 0 { 155 | return nil, fmt.Errorf("invalid peer router config. The number of ports should either be zero, or "+ 156 | "one per peer router. If blank items are used, it will default to standard BGP port, %s. ", 157 | strconv.Itoa(options.DefaultBgpPort)) 158 | } 159 | if len(remoteIPs) != len(localIPs) && len(localIPs) != 0 { 160 | return nil, fmt.Errorf("invalid peer router config. The number of localIPs should either be zero, or "+ 161 | "one per peer router. If blank items are used, it will default to nodeIP, %s. ", localAddress) 162 | } 163 | 164 | peerCfgs := make(PeerConfigs, len(remoteIPs)) 165 | for i, remoteIP := range remoteIPs { 166 | var localIP string 167 | var pw utils.Base64String 168 | var port *uint32 169 | if len(ports) != 0 { 170 | port = &ports[i] 171 | } 172 | if len(b64EncodedPasswords) != 0 { 173 | pw = utils.Base64String(b64EncodedPasswords[i]) 174 | } 175 | if len(localIPs) != 0 { 176 | localIP = localIPs[i] 177 | } 178 | peerCfg, err := NewPeerConfig(remoteIP, remoteASNs[i], port, pw, localIP) 179 | if err != nil { 180 | return nil, err 181 | } 182 | peerCfgs[i] = peerCfg 183 | } 184 | 185 | return peerCfgs, nil 186 | } 187 | 188 | func validateASN(asn uint32) error { 189 | if (asn < 1 || asn > 23455) && 190 | (asn < 23457 || asn > 63999) && 191 | (asn < 64512 || asn > 65534) && 192 | (asn < 131072 || asn > 4199999999) && 193 | (asn < 4200000000 || asn > 4294967294) { 194 | return fmt.Errorf("reserved ASN number \"%d\" for global BGP peer", asn) 195 | } 196 | return nil 197 | } 198 | --------------------------------------------------------------------------------