├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── ci.yml │ ├── close_stale.yml │ └── codeql-analysis.yml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yml ├── .markdownlint.yaml ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MAINTAINER.md ├── Makefile ├── README.md ├── RELEASE.md ├── USERS.md ├── build ├── image-assets │ ├── bashrc │ ├── cni-install │ ├── motd-kube-router.sh │ ├── profile │ └── vimrc └── test-scripts │ └── unit_test_timing.py ├── cmd └── kube-router │ ├── kube-router.go │ └── kube-router_test.go ├── cni └── 10-kuberouter.conf ├── daemonset ├── generic-kuberouter-all-features-advertise-routes.yaml ├── generic-kuberouter-all-features.yaml ├── generic-kuberouter-only-advertise-routes.yaml ├── generic-kuberouter.yaml ├── kube-router-all-service-daemonset-advertise-routes.yaml ├── kube-router-all-service-daemonset.yaml ├── kube-router-firewall-daemonset.yaml ├── kube-router-proxy-daemonset.yaml ├── kubeadm-kuberouter-all-features-dsr.yaml ├── kubeadm-kuberouter-all-features-hostport.yaml ├── kubeadm-kuberouter-all-features.yaml └── kubeadm-kuberouter.yaml ├── dashboard ├── dashboard.png └── kube-router.json ├── docs ├── architecture.md ├── bgp.md ├── developing.md ├── dsr.md ├── generic.md ├── health.md ├── how-it-works.md ├── img │ ├── ep.jpg │ ├── forward.png │ ├── ipset.jpg │ ├── ipvs1.jpg │ ├── kube-router-arch.png │ ├── logo-full.png │ ├── logo-full.svg │ ├── logo-icon-only.png │ ├── logo-icon-only.svg │ ├── podfw.png │ ├── policyfw.png │ └── svc.jpg ├── index.md ├── introduction.md ├── ipv6.md ├── kops.md ├── kubeadm.md ├── load-balancer-allocator.md ├── metrics.md ├── observability.md ├── pod-toolbox.md ├── see-it-in-action.md ├── troubleshoot.md ├── tunnels.md ├── upgrading.md └── user-guide.md ├── go.mod ├── go.sum └── pkg ├── bgp ├── id.go ├── id_test.go └── parse.go ├── cmd └── kube-router.go ├── controllers ├── controllers.go ├── controllers_suite_test.go ├── lballoc │ ├── lballoc.go │ └── lballoc_test.go ├── netpol │ ├── namespace.go │ ├── network_policy_controller.go │ ├── network_policy_controller_test.go │ ├── pod.go │ ├── policy.go │ ├── policy_test.go │ ├── utils.go │ └── utils_test.go ├── proxy │ ├── hairpin_controller.go │ ├── linux_networking.go │ ├── linux_networking_moq.go │ ├── network_service_graceful.go │ ├── network_services_controller.go │ ├── network_services_controller_test.go │ ├── nodeport_healthcheck.go │ ├── service_endpoints_sync.go │ ├── utils.go │ └── utils_test.go └── routing │ ├── aws.go │ ├── bgp_peers.go │ ├── bgp_policies.go │ ├── bgp_policies_test.go │ ├── ecmp_vip.go │ ├── ecmp_vip_test.go │ ├── network_routes_controller.go │ ├── network_routes_controller_test.go │ ├── pod_egress.go │ ├── utils.go │ └── utils_test.go ├── cri ├── interface.go └── remote_runtime.go ├── healthcheck └── health_controller.go ├── metrics └── metrics_controller.go ├── options └── options.go ├── routes ├── linux_routes.go ├── pbr.go ├── route_sync.go └── route_sync_test.go ├── tunnels ├── linux_tunnels.go └── linux_tunnels_test.go ├── utils ├── cni.go ├── cni_test.go ├── ipset.go ├── ipset_test.go ├── iptables.go ├── iptables_test.go ├── linux_routing.go ├── linux_routingtest.go ├── node.go ├── node_test.go ├── pod_cidr.go ├── pod_cidr_test.go ├── service.go ├── sysctl.go └── utils.go └── version └── version.go /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Report a bug 3 | about: Create a bug report to help us improve kube-router 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## What happened? 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | ## What did you expect to happen? 15 | 16 | A clear and concise description of what you expected to happen. 17 | 18 | ## How can we reproduce the behavior you experienced? 19 | 20 | Steps to reproduce the behavior: 21 | 22 | 1. Step 1 23 | 2. Step 2 24 | 3. Step 3 25 | 4. Step 4 26 | 27 | ## Screenshots / Architecture Diagrams / Network Topologies 28 | 29 | If applicable, add those here to help explain your problem. 30 | 31 | ## System Information (please complete the following information) 32 | 33 | - Kube-Router Version (`kube-router --version`): [e.g. 1.0.1] 34 | - Kube-Router Parameters: [e.g. --run-router --run-service-proxy --enable-overlay --overlay-type=full etc.] 35 | - Kubernetes Version (`kubectl version`) : [e.g. 1.18.3] 36 | - Cloud Type: [e.g. AWS, GCP, Azure, on premise] 37 | - Kubernetes Deployment Type: [e.g. EKS, GKE, Kops, Kubeadm, etc.] 38 | - Kube-Router Deployment Type: [e.g. DaemonSet, System Service] 39 | - Cluster Size: [e.g. 200 Nodes] 40 | 41 | ## Logs, other output, metrics 42 | 43 | Please provide logs, other kind of output or observed metrics here. 44 | 45 | ## Additional context 46 | 47 | Add any other context about the problem here. 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Kube-Router Channel on Kubernetes Slack 4 | url: https://kubernetes.slack.com/messages/C8DCQGTSB 5 | about: Please ask and answer questions here. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for kube-router 4 | title: '' 5 | labels: feature 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Is your feature request related to a problem? Please describe 11 | 12 | A clear and concise description of what the problem is and what the feature provides. 13 | 14 | ## Describe the solution you'd like 15 | 16 | A clear and concise description of what you want to happen. 17 | 18 | ## Describe alternatives you've considered 19 | 20 | A clear and concise description of any alternative solutions or features you've considered. 21 | 22 | ## Additional context 23 | 24 | Add any other context or screenshots about the feature request here. 25 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | groups: 6 | k8s-dependencies: 7 | patterns: 8 | - "k8s.io*" 9 | schedule: 10 | interval: "monthly" 11 | - package-ecosystem: "github-actions" 12 | directory: "/" 13 | schedule: 14 | interval: "monthly" 15 | -------------------------------------------------------------------------------- /.github/workflows/close_stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | permissions: 7 | issues: write 8 | pull-requests: write 9 | 10 | jobs: 11 | stale: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/stale@v9 15 | with: 16 | stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' 17 | stale-pr-message: 'This PR is stale because it has been open 60 days with no activity. Remove stale label or comment or this will be closed in 10 days.' 18 | close-issue-message: 'This issue was closed because it has been stale for 5 days with no activity.' 19 | close-pr-message: 'This PR was closed because it has been stale for 10 days with no activity.' 20 | days-before-issue-stale: 30 21 | days-before-pr-stale: 60 22 | days-before-issue-close: 5 23 | days-before-pr-close: 10 24 | exempt-issue-labels: override-stale 25 | exempt-pr-labels: override-stale,dependencies 26 | enable-statistics: true 27 | operations-per-run: 100 28 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "Code Scanning - Action" 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - v* 8 | pull_request: 9 | schedule: 10 | # ┌───────────── minute (0 - 59) 11 | # │ ┌───────────── hour (0 - 23) 12 | # │ │ ┌───────────── day of the month (1 - 31) 13 | # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) 14 | # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) 15 | # │ │ │ │ │ 16 | # │ │ │ │ │ 17 | # │ │ │ │ │ 18 | # * * * * * 19 | - cron: '30 1 * * 0' 20 | 21 | jobs: 22 | CodeQL-Build: 23 | # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest 24 | runs-on: ubuntu-latest 25 | 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v4 29 | 30 | # Initializes the CodeQL tools for scanning. 31 | - name: Initialize CodeQL 32 | uses: github/codeql-action/init@v3 33 | # Override language selection by uncommenting this and choosing your languages 34 | # with: 35 | # languages: go, javascript, csharp, python, cpp, java 36 | 37 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 38 | # If this step fails, then you should remove it and run the build manually (see below). 39 | - name: Autobuild 40 | uses: github/codeql-action/autobuild@v3 41 | 42 | # ℹ️ Command-line programs to run using the OS shell. 43 | # 📚 https://git.io/JvXDl 44 | 45 | # ✏️ If the Autobuild fails above, remove it and uncomment the following 46 | # three lines and modify them (or add more) to build your code if your 47 | # project uses a compiled language 48 | 49 | # - run: | 50 | # make bootstrap 51 | # make release 52 | 53 | - name: Perform CodeQL Analysis 54 | uses: github/codeql-action/analyze@v3 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #### Project Specific Ignores #### 2 | # Built binaries 3 | /kube-router 4 | /gobgp 5 | /cni-download 6 | 7 | # Go directories 8 | _output 9 | _cache 10 | vendor 11 | .*.sw? 12 | 13 | # Ignore worktree directory 14 | worktrees 15 | 16 | # Ignore common IDE directories 17 | /.vscode 18 | /.idea 19 | 20 | 21 | #### Go Lang Ignores #### 22 | # If you prefer the allow list template instead of the deny list, see community template: 23 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 24 | # 25 | # Binaries for programs and plugins 26 | *.exe 27 | *.exe~ 28 | *.dll 29 | *.so 30 | *.dylib 31 | 32 | # Test binary, built with `go test -c` 33 | *.test 34 | 35 | # Output of the go coverage tool, specifically when used with LiteIDE 36 | *.out 37 | 38 | # Dependency directories (remove the comment below to include it) 39 | # vendor/ 40 | 41 | # Go workspace file 42 | go.work 43 | go.work.sum 44 | 45 | # env file 46 | .env 47 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | output: 3 | formats: 4 | tab: 5 | path: stdout 6 | print-linter-name: true 7 | colors: false 8 | linters: 9 | enable: 10 | - bodyclose 11 | - copyloopvar 12 | - dogsled 13 | - dupl 14 | - durationcheck 15 | - exhaustive 16 | - gochecknoinits 17 | - goconst 18 | - gocritic 19 | - gosec 20 | - lll 21 | - misspell 22 | - mnd 23 | - nakedret 24 | - noctx 25 | - nolintlint 26 | - staticcheck 27 | - unconvert 28 | - unparam 29 | settings: 30 | goconst: 31 | min-len: 20 32 | exclusions: 33 | generated: lax 34 | presets: 35 | - comments 36 | - common-false-positives 37 | - legacy 38 | - std-error-handling 39 | rules: 40 | - linters: 41 | - mnd 42 | # Excluding single digits from magic number detector because it produces too many obvious results (like klog) 43 | text: 'Magic number: [0-9]{1},' 44 | - linters: 45 | - mnd 46 | # Exclude file masks from magic number detector because these numbers are obvious 47 | text: 'Magic number: 0[0-7]{3},' 48 | - linters: 49 | - mnd 50 | path: pkg/controllers/proxy/network_services_controller.go 51 | # Exclude IP masks netmasks as substituting them for constants only makes these less obvious 52 | text: 'Magic number: 255,' 53 | - linters: 54 | - mnd 55 | # Exclude IP netmasks from magic number detector because these numbers are obvious 56 | text: 'Magic number: 32,' 57 | - linters: 58 | - mnd 59 | # Exclude decimal bases from magic number detector because these numbers are obvious 60 | text: 'Magic number: 10,' 61 | - linters: 62 | - gosec 63 | # Exclude file mask security findings as we are always intentional about the file masks we use 64 | text: 'G306:' 65 | - linters: 66 | - lll 67 | # Exclude tests from long line linter 68 | path: _test\.go 69 | - linters: 70 | - dupl 71 | # Exclude tests from duplicate linter 72 | path: _test\.go 73 | - linters: 74 | - goconst 75 | path: (.+)_test\.go 76 | paths: 77 | - third_party$ 78 | - builtin$ 79 | - examples$ 80 | issues: 81 | max-issues-per-linter: 0 82 | max-same-issues: 0 83 | formatters: 84 | enable: 85 | - gofmt 86 | - goimports 87 | exclusions: 88 | generated: lax 89 | paths: 90 | - third_party$ 91 | - builtin$ 92 | - examples$ 93 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | release: 4 | draft: true 5 | prerelease: auto 6 | header: | 7 | ## Summary 8 | 9 | ## Contributions 10 | 11 | ## Changelog 12 | 13 | builds: 14 | - main: ./cmd/kube-router 15 | goos: 16 | - linux 17 | goarch: 18 | - amd64 19 | - arm 20 | - arm64 21 | - ppc64le 22 | - s390x 23 | - riscv64 24 | goarm: 25 | - 6 26 | - 7 27 | env: 28 | - CGO_ENABLED=0 29 | ldflags: 30 | - "-X github.com/cloudnativelabs/kube-router/v2/pkg/version.Version={{.Version}}" 31 | - "-X github.com/cloudnativelabs/kube-router/v2/pkg/version.BuildDate={{.Date}}" 32 | 33 | archives: 34 | - format: tar.gz 35 | name_template: '{{ .Binary }}_{{.Version}}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ 36 | .Arm }}{{ end }}' 37 | files: 38 | - LICENSE* 39 | - README* 40 | - CHANGELOG* 41 | - Documentation* 42 | 43 | snapshot: 44 | version_template: SNAPSHOT-{{ .Commit }} 45 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MD046: 3 | style: fenced 4 | MD013: 5 | line_length: 120 6 | code_blocks: false 7 | MD045: false 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributing to Kube-router 3 | 4 | ## Summary 5 | 6 | This document covers how to contribute to the kube-router project. Kube-router uses github PRs to manage contributions (could be anything from documentation, bug fixes, manifests etc.). 7 | 8 | Please read [users guide](/docs/user-guide.md) and [developers guide](/docs/developing.md) for the functionality and internals of kube-router. 9 | 10 | ## Filing issues 11 | 12 | If you have a question about Kube-router or have a problem using it, please start with contacting us on [community forum](https://kubernetes.slack.com/messages/C8DCQGTSB/) for quick help. If that doesn't answer your questions, or if you think you found a bug, please [file an issue](https://github.com/cloudnativelabs/kube-router/issues). 13 | 14 | ## Contributing Changes 15 | 16 | ### Fork the code 17 | 18 | Navigate to: 19 | [https://github.com/cloudnativelabs/kube-router](https://github.com/cloudnativelabs/kube-router) 20 | and fork the repository. 21 | 22 | Follow these steps to setup a local repository for working on Kube-router: 23 | 24 | ``` bash 25 | $ git clone https://github.com/YOUR_ACCOUNT/kube-router.git 26 | $ cd kube-router 27 | $ git remote add upstream https://github.com/cloudnativelabs/kube-router 28 | $ git checkout master 29 | $ git fetch upstream 30 | $ git rebase upstream/master 31 | ``` 32 | 33 | ### Creating A Feature Branch 34 | 35 | Create a new branch to make changes on and that branch. 36 | 37 | ``` bash 38 | $ git checkout -b feature_x 39 | (make your changes) 40 | $ git status 41 | $ git add . 42 | $ git commit -a -m "descriptive commit message for your changes" 43 | ``` 44 | get update from upstream 45 | 46 | ``` bash 47 | $ git checkout master 48 | $ git fetch upstream 49 | $ git rebase upstream/master 50 | $ git checkout feature_x 51 | $ git rebase master 52 | ``` 53 | 54 | Now your `feature_x` branch is up-to-date with all the code in `upstream/master`, so push to your fork 55 | 56 | ### Performing A Pull Request 57 | 58 | ``` bash 59 | $ git push origin master 60 | $ git push origin feature_x 61 | ``` 62 | 63 | Now that the `feature_x` branch has been pushed to your GitHub repository, you can initiate the pull request. 64 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BUILDTIME_BASE=golang:1-alpine 2 | ARG RUNTIME_BASE=alpine:latest 3 | ARG TARGETPLATFORM 4 | ARG CNI_VERSION 5 | FROM ${BUILDTIME_BASE} AS builder 6 | ENV BUILD_IN_DOCKER=false 7 | 8 | WORKDIR /build 9 | COPY . /build 10 | RUN apk add --no-cache make git tar curl \ 11 | && make kube-router \ 12 | && make gobgp \ 13 | && make cni-download 14 | 15 | WORKDIR /iptables-wrappers 16 | # This is the latest commit on the master branch. 17 | ENV IPTABLES_WRAPPERS_VERSION=f6ef44b2c449cca8f005b32dea9a4b497202dbef 18 | RUN git clone https://github.com/kubernetes-sigs/iptables-wrappers.git . \ 19 | && git checkout "${IPTABLES_WRAPPERS_VERSION}" \ 20 | && make build \ 21 | && test -x bin/iptables-wrapper \ 22 | && test -x iptables-wrapper-installer.sh 23 | 24 | FROM ${RUNTIME_BASE} 25 | 26 | RUN apk add --no-cache \ 27 | iptables \ 28 | iptables-legacy \ 29 | ipset \ 30 | iproute2 \ 31 | ipvsadm \ 32 | conntrack-tools \ 33 | curl \ 34 | bash && \ 35 | mkdir -p /var/lib/gobgp && \ 36 | mkdir -p /usr/local/share/bash-completion && \ 37 | curl -L -o /usr/local/share/bash-completion/bash-completion \ 38 | https://raw.githubusercontent.com/scop/bash-completion/master/bash_completion 39 | 40 | COPY build/image-assets/bashrc /root/.bashrc 41 | COPY build/image-assets/profile /root/.profile 42 | COPY build/image-assets/vimrc /root/.vimrc 43 | COPY build/image-assets/motd-kube-router.sh /etc/motd-kube-router.sh 44 | COPY build/image-assets/cni-install /usr/local/bin/cni-install 45 | COPY --from=builder /build/kube-router /build/gobgp /usr/local/bin/ 46 | COPY --from=builder /build/cni-download /usr/libexec/cni 47 | 48 | # Use iptables-wrappers so that correct version of iptables-legacy or iptables-nft gets used. Alpine contains both, but 49 | # which version is used should be based on the host system as well as where rules that may have been added before 50 | # kube-router are being placed. For more information see: https://github.com/kubernetes-sigs/iptables-wrappers 51 | COPY --from=builder /iptables-wrappers/bin/iptables-wrapper / 52 | COPY --from=builder /iptables-wrappers/iptables-wrapper-installer.sh / 53 | # This is necessary because of the bug reported here: https://github.com/flannel-io/flannel/pull/1340/files 54 | # Basically even under QEMU emulation, it still doesn't have an ARM kernel in-play which means that calls to 55 | # iptables-nft will fail in the build process. The sanity check here only makes sure that iptables-nft and iptables-legacy 56 | # are installed and that we are not using iptables-1.8.0-1.8.3. For now we'll manage that on our own. 57 | RUN if ! command -v iptables-nft > /dev/null; then \ 58 | echo "ERROR: iptables-nft is not installed" 1>&2; \ 59 | exit 1; \ 60 | fi && \ 61 | if ! command -v iptables-legacy > /dev/null; then \ 62 | echo "ERROR: iptables-legacy is not installed" 1>&2; \ 63 | exit 1; \ 64 | fi && \ 65 | if ! command -v ip6tables > /dev/null; then \ 66 | echo "ERROR: ip6tables is not installed" 1>&2; \ 67 | exit 1; \ 68 | fi && \ 69 | /iptables-wrapper-installer.sh --no-sanity-check 70 | 71 | WORKDIR /root 72 | ENTRYPOINT ["/usr/local/bin/kube-router"] 73 | -------------------------------------------------------------------------------- /MAINTAINER.md: -------------------------------------------------------------------------------- 1 | # Maintainers 2 | 3 | ## maintainers 4 | 5 | * Aaron U'Ren [@aauren](https://github.com/aauren) 6 | * Manuel Rüger [@mrueg](https://github.com/mrueg) 7 | * Murali Reddy [@murali-reddy](https://github.com/murali-reddy) 8 | 9 | ## emeritus maintainers 10 | 11 | * Andrew Sy Kim [@andrewsykim](https://github.com/andrewsykim) 12 | * Bryan Zubrod [@bzub](https://github.com/bzub) 13 | * Joakim Karlsson [@roffe](https://github.com/roffe) 14 | * Jimmy Zhang [@jimmy-zh](https://github.com/jimmy-zh) 15 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Process for creating a kube-router release 2 | 3 | ## Preparing for the release 4 | 5 | * Ensure that the Golang release used is still supported. Definition happens currently in 6 | [Github Workflow](.github/workflow/ci.yml) and [Makefile](Makefile). 7 | * Ensure that the Alpine version used in container builds is still supported. Definition happens currently in 8 | [Github Workflow](.github/workflow/ci.yml) and [Makefile](Makefile). 9 | * Ensure that Golang dependencies are updated. 10 | `go list -mod=mod -u -m -f '{{.}}{{if .Indirect}} IAMINDIRECT{{end}}' all | grep -v IAMINDIRECT` lists possible 11 | updates. 12 | * Ensure that the GoBGP version is updated. See [upstream](https://github.com/osrg/gobgp/releases) and GoBGP definition 13 | in [Makefile](Makefile) and [go.mod](go.mod). 14 | * Ensure that the Kubernetes object definitions do not contain deprecated object types. Definition currently is in 15 | kube-router's [Daemonset](daemonset) folder. 16 | * Ensure GitHub actions are updated: 17 | ```sh 18 | dependabot update github_actions cloudnativelabs/kube-router 19 | ``` 20 | 21 | ## New major/minor release 22 | 23 | * Create a branch named v$MAJOR.$MINOR from the default branch (currently: master) 24 | * Create a new tag with the release tag v$MAJOR.$MINOR.0 25 | 26 | ```sh 27 | git tag 28 | git push origin 29 | ``` 30 | 31 | Note: your remote for the main kube-router repo may not be origin, please correct it to whatever you have called the 32 | official kube-router remote. 33 | 34 | ## New patch release 35 | 36 | * Change to the `master` branch 37 | * Use `git log` to identify which commits you want to bring to the new patch release 38 | * Change to the major/minor release branch that was created for this release 39 | * Cherry-Pick the changes from the `master` branch into the release branch 40 | * Create a new tag from the v$MAJOR.$MINOR release branch with the release tag v$MAJOR.$MINOR.$PATCH 41 | 42 | Example: 43 | 44 | ```sh 45 | git checkout master 46 | git log --color --pretty=format:'%h - %s (%cr) <%an>' --abbrev-commit --decorate 47 | git checkout 48 | git cherry-pick 49 | git tag 50 | git push origin 51 | ``` 52 | 53 | Note: your remote for the main kube-router repo may not be origin, please correct it to whatever you have called the 54 | official kube-router remote. 55 | 56 | ## Release Candidates 57 | 58 | * Follow above instructions and ensure that the tag contains `-rc`. Don't mark the pre-release as a proper release. 59 | 60 | ## Release Build Process 61 | 62 | Once the tag is pushed to GitHub GitHub Actions will be triggered and several things will happen: 63 | 64 | * kube-router will be linted 65 | * kube-router will be tested 66 | * The actions will run a test build of the kube-router binary 67 | * Containers for [defined architectures](https://github.com/cloudnativelabs/kube-router/blob/master/.github/workflows/ci.yml) 68 | (see `platforms` section in yaml) will be built and pushed to 69 | [DockerHub](https://hub.docker.com/r/cloudnativelabs/kube-router) via the `docker buildx` command 70 | * [goreleaser](https://goreleaser.com) will be run and will: 71 | * Generate a draft release on GitHub where maintainers can later choose to update it and release it 72 | * Brief release notes will be added to the draft release 73 | * Build all of the binary releases for [defined architectures](https://github.com/cloudnativelabs/kube-router/blob/master/.goreleaser.yml) 74 | and attach them to the draft release on GitHub 75 | 76 | ## After the release 77 | 78 | * Go to the [GitHub releases page for the kube-router project](https://github.com/cloudnativelabs/kube-router/releases) 79 | * Find the draft release 80 | * Consistent Changelog Syntax can be retrieved by running the following Git command: 81 | 82 | ```sh 83 | git log --format='* %h - %s `<%an>`' .. 84 | ``` 85 | 86 | * Announce the release in [#kube-router](https://app.slack.com/client/T09NY5SBT/C8DCQGTSB) on Kubernetes Slack. 87 | -------------------------------------------------------------------------------- /USERS.md: -------------------------------------------------------------------------------- 1 | Who is using kube-router? 2 | ========================= 3 | 4 | The following is a directory of users who are using kube-router in production. 5 | 6 | Users (Alphabetically) 7 | ---------------------- 8 | 9 | * Name: DigitalOcean 10 | Description: DigitalOcean is using kube-router for production bare-metal Kubernetes clusters globally. 11 | Usage: Pod Networking, IPVS Service Proxy, BGP 12 | * Name: EEN (Eagle Eye Networks, Inc.) 13 | Description: Eagle Eye Networks is using kube-router for production bare-metal Kubernetes clusters globally. 14 | Usage: Pod Networking, IPVS Service Proxy, Network Policy Controller 15 | Contact: @DandyDeveloper 16 | * Name: enix.io 17 | Description: Simplicity, IPVS (including good support of incoming UDP long-living video streams) & BGP export of services IP to upstream BGP routers (allowing easy inbound HA with a pair of top-of-rack arista switches) are some of the killer features for us. 18 | Usage: Pod Networking, IPVS Service Proxy, Network Policy Controller 19 | * Name: Globo.com 20 | Description: Globo is using kube-router for production hybrid (bare-metal and VMs) Kubernetes clusters across multiple datacenters on Brazil. 21 | Usage: Pod Networking, IPVS Service Proxy, Network Policy Controller, BGP 22 | * Name: Numberly 23 | Description: Numberly is using kube-router for production bare-metal Kubernetes clusters globally. 24 | Usage: Pod Networking, BGP, Network Policy Controller 25 | Contact: @ramnes @Lujeni 26 | * Name: PubMatic 27 | Description: PubMatic is using kube-router for production kubernetes clusters located in world wide Datacenters. 28 | Usage: Pod Networking, BGP 29 | 30 | If you are using kube-router, please consider adding yourself as a user by opening a pull request to this file and adding a section describing your usage of kube-router or let us know on Slack. 31 | -------------------------------------------------------------------------------- /build/image-assets/bashrc: -------------------------------------------------------------------------------- 1 | # print motd 2 | /etc/motd-kube-router.sh 3 | 4 | # append to the history file, don't overwrite it 5 | shopt -s histappend 6 | 7 | # for setting history length see HISTSIZE and HISTFILESIZE in bash(1) 8 | HISTSIZE=1000 9 | HISTFILESIZE=2000 10 | 11 | # check the window size after each command and, if necessary, 12 | # update the values of LINES and COLUMNS. 13 | shopt -s checkwinsize 14 | 15 | # colour Definitions for .bashrc 16 | COL_YEL="\[\e[1;33m\]" 17 | COL_GRA="\[\e[0;37m\]" 18 | COL_WHI="\[\e[1;37m\]" 19 | COL_GRE="\[\e[1;32m\]" 20 | COL_RED="\[\e[1;31m\]" 21 | COL_BLU="\[\e[1;34m\]" 22 | 23 | # Bash Prompt 24 | if test "$UID" -eq 0 ; then 25 | _COL_USER=$COL_RED 26 | _p=" #" 27 | else 28 | _COL_USER=$COL_GRE 29 | _p=">" 30 | fi 31 | 32 | # Bash Prompt 33 | if test "$UID" -eq 0 ; then 34 | _COL_USER=$COL_RED 35 | _p=" #" 36 | else 37 | _COL_USER=$COL_GRE 38 | _p=">" 39 | fi 40 | 41 | COLORIZED_PROMPT="${_COL_USER}\u${COL_WHI}@${COL_YEL}\h${COL_WHI}:${COL_BLU}\w${_p}\[\e[m\]" 42 | 43 | case $TERM in 44 | *term | rxvt | screen | linux ) 45 | PS1="${COLORIZED_PROMPT}" ;; 46 | * ) 47 | PS1="\u@\h:\w${_p} " ;; 48 | esac 49 | 50 | source /usr/local/share/bash-completion/bash-completion 51 | source /var/lib/gobgp/gobgp-completion.bash 52 | -------------------------------------------------------------------------------- /build/image-assets/cni-install: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail -x 4 | 5 | # CNI binaries that kube-router uses 6 | KUBE_ROUTER_CNI_BINS=("bridge" "portmap" "host-local" "loopback") 7 | # Local path of the CNI binaries within the kube-router container image 8 | LOCAL_BIN_PATH="${LOCAL_BIN_PATH:-/usr/libexec/cni}" 9 | # Path on the host where the CRI will look for the CNI binaries. This should be mounted into the initContainer so that 10 | # the CRI can reference the binaries and this script has the intended effect. 11 | HOST_BIN_PATH="${HOST_BIN_PATH:-/opt/cni/bin}" 12 | 13 | setup_cni() { 14 | local cni_bin cni_dst_path cni_loc_path 15 | 16 | # If the host path for the binaries doesn't exist, create it 17 | if [[ ! -d "${HOST_BIN_PATH}" ]]; then 18 | printf "Host CNI bin path %s doesn't exist on node host, creating it\n" "${HOST_BIN_PATH}" 19 | if mkdir -p "${HOST_BIN_PATH}" >/dev/null; then 20 | printf "Successfully created CNI bin path\n" 21 | else 22 | printf "Failed to create missing CNI bin path, exiting\n" 23 | return 1 24 | fi 25 | fi 26 | 27 | # Loop over CNI binaries 28 | for cni_bin in "${KUBE_ROUTER_CNI_BINS[@]}"; do 29 | cni_dst_path="${HOST_BIN_PATH}/${cni_bin}" 30 | cni_loc_path="${LOCAL_BIN_PATH}/${cni_bin}" 31 | 32 | # Check to see if the binary already exists on the host node 33 | if [[ -x "${cni_dst_path}" ]]; then 34 | # If it did, then output a message and skip this loop 35 | printf "CNI binary %s already exists and is executable, skipping\n" "${cni_dst_path}" 36 | continue 37 | fi 38 | 39 | # If it didn't then try to install it 40 | printf "CNI binary %s was missing or wasn't executable, installing it\n" "${cni_dst_path}" 41 | if install -m 755 "${cni_loc_path}" "${cni_dst_path}" >/dev/null; then 42 | printf "CNI install successfull\n" 43 | else 44 | printf "Failed to install CNI binary, exiting\n" 45 | return 2 46 | fi 47 | done 48 | 49 | printf "CNI setup completed successfully!" 50 | return 0 51 | } 52 | 53 | setup_cni "${@}" 54 | exit $? 55 | -------------------------------------------------------------------------------- /build/image-assets/motd-kube-router.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | echo "Welcome to kube-router on \"${NODE_NAME}\"!" 4 | echo 5 | echo "For debugging, the following tools are available:" 6 | echo "- ipvsadm | Gather info about Virtual Services and Real Servers via IPVS." 7 | echo " | Examples:" 8 | echo " | ## Show all options" 9 | echo " | ipvsadm --help" 10 | echo " | ## List Services and Endpoints handled by IPVS" 11 | echo " | ipvsadm -ln" 12 | echo " | ## Show traffic rate information" 13 | echo " | ipvsadm -ln --rate" 14 | echo " | ## Show cumulative traffic statistics" 15 | echo " | ipvsadm -ln --stats" 16 | echo 17 | echo "- gobgp | Get BGP related information from your nodes." 18 | echo " | " 19 | echo " | Tab-completion is ready to use, just type \"gobgp \"" 20 | echo " | to see the subcommands available." 21 | echo " | " 22 | echo " | By default gobgp will query the Node this Pod is running" 23 | echo " | on, i.e. \"${NODE_NAME}\". To query a different node use" 24 | echo " | \"gobgp --host node02.mydomain\" for example." 25 | echo " | " 26 | echo " | Examples: See https://github.com/osrg/gobgp/blob/master/docs/sources/cli-command-syntax.md" 27 | echo 28 | echo "Here's a quick look at what's happening on this Node" 29 | echo "--- BGP Server Configuration ---" 30 | gobgp global 31 | echo 32 | echo "--- BGP Neighbors ---" 33 | gobgp neighbor 34 | echo 35 | echo "--- BGP Route Info ---" 36 | gobgp global rib 37 | echo 38 | echo "--- IPVS Services ---" 39 | ipvsadm -ln 40 | echo 41 | -------------------------------------------------------------------------------- /build/image-assets/profile: -------------------------------------------------------------------------------- 1 | export CHARSET=UTF-8 2 | export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 3 | export PAGER=less 4 | export PS1='\h:\w\$ ' 5 | umask 022 6 | 7 | for script in /etc/profile.d/*.sh ; do 8 | if [ -r $script ] ; then 9 | . $script 10 | fi 11 | done 12 | 13 | if [ -f /etc/bash.bashrc ]; then 14 | . /etc/bash.bashrc 15 | fi 16 | -------------------------------------------------------------------------------- /build/image-assets/vimrc: -------------------------------------------------------------------------------- 1 | syntax on 2 | set expandtab 3 | set tabstop=2 4 | -------------------------------------------------------------------------------- /build/test-scripts/unit_test_timing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Taken from: https://rotational.io/blog/speeding-up-go-tests/ this python script assists in parsing Golang JSON unit 4 | # tests and sorting them by the amount of time taken 5 | # 6 | # To use, run Go unit tests via the following: 7 | # go test -v -json -count 1 github.com/cloudnativelabs/kube-router/v2/cmd/kube-router/ github.com/cloudnativelabs/kube-router/v2/pkg/... >testing_output.json 8 | # 9 | # Then run this script via: 10 | # build/test-scripts/unit_test_timing.py testing_output.json 11 | 12 | import json 13 | import sys 14 | 15 | if __name__ == "__main__": 16 | tests = [] 17 | 18 | with open(sys.argv[1], 'r') as f: 19 | for line in f: 20 | data = json.loads(line) 21 | if data['Action'] != 'pass': 22 | continue 23 | 24 | if 'Test' not in data: 25 | continue 26 | 27 | if data['Elapsed'] < 0.1: 28 | continue 29 | 30 | tests.append(data) 31 | 32 | tests.sort(key=lambda d: d['Elapsed'], reverse=True) 33 | for t in tests: 34 | print(f"{t['Elapsed']:0.3f}s\t{t['Package']} {t['Test']}") 35 | -------------------------------------------------------------------------------- /cmd/kube-router/kube-router.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "net/http" 7 | "os" 8 | "time" 9 | 10 | //nolint:gosec // we want to unconditionally expose pprof here for advanced troubleshooting scenarios 11 | _ "net/http/pprof" 12 | 13 | "github.com/cloudnativelabs/kube-router/v2/pkg/cmd" 14 | "github.com/cloudnativelabs/kube-router/v2/pkg/options" 15 | "github.com/cloudnativelabs/kube-router/v2/pkg/version" 16 | "github.com/spf13/pflag" 17 | "k8s.io/klog/v2" 18 | ) 19 | 20 | func main() { 21 | if err := Main(); err != nil { 22 | fmt.Fprintf(os.Stderr, "%s\n", err) 23 | os.Exit(1) 24 | } 25 | os.Exit(0) 26 | } 27 | 28 | func Main() error { 29 | klog.InitFlags(nil) 30 | 31 | config := options.NewKubeRouterConfig() 32 | config.AddFlags(pflag.CommandLine) 33 | pflag.Parse() 34 | 35 | // Workaround for this issue: 36 | // https://github.com/kubernetes/kubernetes/issues/17162 37 | err := flag.CommandLine.Parse([]string{}) 38 | if err != nil { 39 | return fmt.Errorf("failed to parse flags: %s", err) 40 | } 41 | err = flag.Set("logtostderr", "true") 42 | if err != nil { 43 | return fmt.Errorf("failed to set flag: %s", err) 44 | } 45 | err = flag.Set("v", config.VLevel) 46 | if err != nil { 47 | return fmt.Errorf("failed to set flag: %s", err) 48 | } 49 | 50 | if config.HelpRequested { 51 | pflag.Usage() 52 | return nil 53 | } 54 | 55 | if config.Version { 56 | version.PrintVersion(false) 57 | return nil 58 | } 59 | 60 | if os.Geteuid() != 0 { 61 | return fmt.Errorf("kube-router needs to be run with privileges to execute iptables, ipset and configure ipvs") 62 | } 63 | 64 | if config.CleanupConfig { 65 | cmd.CleanupConfigAndExit() 66 | return nil 67 | } 68 | 69 | kubeRouter, err := cmd.NewKubeRouterDefault(config) 70 | if err != nil { 71 | return fmt.Errorf("failed to parse kube-router config: %v", err) 72 | } 73 | 74 | if config.EnablePprof { 75 | go func() { 76 | server := http.Server{ 77 | Addr: "0.0.0.0:6060", 78 | ReadHeaderTimeout: 5 * time.Second, 79 | Handler: nil, 80 | } 81 | fmt.Fprintf(os.Stdout, "%s\n", server.ListenAndServe().Error()) 82 | }() 83 | } 84 | 85 | err = kubeRouter.Run() 86 | if err != nil { 87 | return fmt.Errorf("failed to run kube-router: %v", err) 88 | } 89 | 90 | return nil 91 | } 92 | -------------------------------------------------------------------------------- /cmd/kube-router/kube-router_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | "sync" 8 | "testing" 9 | ) 10 | 11 | func TestMainHelp(t *testing.T) { 12 | origStderr := os.Stderr 13 | stderrR, stderrW, _ := os.Pipe() 14 | os.Stderr = stderrW 15 | defer func() { os.Stderr = origStderr }() 16 | 17 | stderrBuf := bytes.NewBuffer(nil) 18 | wg := &sync.WaitGroup{} 19 | wg.Add(1) 20 | go func() { 21 | defer wg.Done() 22 | _, err := io.Copy(stderrBuf, stderrR) 23 | if err != nil { 24 | panic(err) 25 | } 26 | }() 27 | 28 | origArgs := os.Args 29 | os.Args = []string{"kube-router", "--help"} 30 | defer func() { os.Args = origArgs }() 31 | 32 | if err := Main(); err != nil { 33 | t.Fatalf("kube-router exited with error: %s\n", err) 34 | } 35 | stderrW.Close() 36 | wg.Wait() 37 | 38 | docF, err := os.Open("../../docs/user-guide.md") 39 | if err != nil { 40 | t.Fatalf("could not open docs/user-guide.md: %s\n", err) 41 | } 42 | docBuf := bytes.NewBuffer(nil) 43 | _, err = docBuf.ReadFrom(docF) 44 | if err != nil { 45 | t.Fatalf("could not read from buffer: %s\n", err) 46 | } 47 | docF.Close() 48 | 49 | exp := append([]byte("```sh\n"), stderrBuf.Bytes()...) 50 | exp = append(exp, []byte("```\n")...) 51 | 52 | if !bytes.Contains(docBuf.Bytes(), exp) { 53 | t.Errorf("docs/user-guide.md 'command line options' section does not match `kube-router --help`.\n"+ 54 | "Expected:\n%s", exp) 55 | t.Errorf("\nGot:\n%s", docBuf.Bytes()) 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /cni/10-kuberouter.conf: -------------------------------------------------------------------------------- 1 | { 2 | "cniVersion": "0.3.0", 3 | "name":"mynet", 4 | "type":"bridge", 5 | "bridge":"kube-bridge", 6 | "isDefaultGateway":true, 7 | "ipam": { 8 | "type":"host-local" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /daemonset/generic-kuberouter-only-advertise-routes.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: DaemonSet 4 | metadata: 5 | labels: 6 | k8s-app: kube-router 7 | tier: node 8 | name: kube-router 9 | namespace: kube-system 10 | spec: 11 | selector: 12 | matchLabels: 13 | k8s-app: kube-router 14 | tier: node 15 | template: 16 | metadata: 17 | labels: 18 | k8s-app: kube-router 19 | tier: node 20 | spec: 21 | priorityClassName: system-node-critical 22 | serviceAccountName: kube-router 23 | containers: 24 | - name: kube-router 25 | image: docker.io/cloudnativelabs/kube-router 26 | imagePullPolicy: Always 27 | args: 28 | - "--run-router=true" 29 | - "--run-firewall=false" 30 | - "--run-service-proxy=false" 31 | - "--bgp-graceful-restart=true" 32 | - "--enable-cni=false" 33 | - "--enable-ibgp=false" 34 | - "--enable-overlay=false" 35 | - "--peer-router-ips=" 36 | - "--peer-router-asns=" 37 | - "--cluster-asn=" 38 | - "--advertise-cluster-ip=true" 39 | - "--advertise-external-ip=true" 40 | - "--advertise-loadbalancer-ip=true" 41 | env: 42 | - name: NODE_NAME 43 | valueFrom: 44 | fieldRef: 45 | fieldPath: spec.nodeName 46 | - name: POD_NAME 47 | valueFrom: 48 | fieldRef: 49 | fieldPath: metadata.name 50 | livenessProbe: 51 | httpGet: 52 | path: /healthz 53 | port: 20244 54 | initialDelaySeconds: 10 55 | periodSeconds: 3 56 | resources: 57 | requests: 58 | cpu: 250m 59 | memory: 250Mi 60 | securityContext: 61 | privileged: true 62 | volumeMounts: 63 | - name: xtables-lock 64 | mountPath: /run/xtables.lock 65 | readOnly: false 66 | hostNetwork: true 67 | hostPID: true 68 | tolerations: 69 | - effect: NoSchedule 70 | operator: Exists 71 | - key: CriticalAddonsOnly 72 | operator: Exists 73 | - effect: NoExecute 74 | operator: Exists 75 | volumes: 76 | - name: xtables-lock 77 | hostPath: 78 | path: /run/xtables.lock 79 | type: FileOrCreate 80 | --- 81 | apiVersion: v1 82 | kind: ServiceAccount 83 | metadata: 84 | name: kube-router 85 | namespace: kube-system 86 | 87 | --- 88 | kind: ClusterRole 89 | apiVersion: rbac.authorization.k8s.io/v1 90 | metadata: 91 | name: kube-router 92 | namespace: kube-system 93 | rules: 94 | - apiGroups: 95 | - "" 96 | resources: 97 | - namespaces 98 | - pods 99 | - services 100 | - nodes 101 | - endpoints 102 | verbs: 103 | - list 104 | - get 105 | - watch 106 | - apiGroups: 107 | - "networking.k8s.io" 108 | resources: 109 | - networkpolicies 110 | verbs: 111 | - list 112 | - get 113 | - watch 114 | - apiGroups: 115 | - extensions 116 | resources: 117 | - networkpolicies 118 | verbs: 119 | - get 120 | - list 121 | - watch 122 | - apiGroups: 123 | - "coordination.k8s.io" 124 | resources: 125 | - leases 126 | verbs: 127 | - get 128 | - create 129 | - update 130 | - apiGroups: 131 | - "" 132 | resources: 133 | - services/status 134 | verbs: 135 | - update 136 | - apiGroups: 137 | - "discovery.k8s.io" 138 | resources: 139 | - endpointslices 140 | verbs: 141 | - get 142 | - list 143 | - watch 144 | 145 | --- 146 | kind: ClusterRoleBinding 147 | apiVersion: rbac.authorization.k8s.io/v1 148 | metadata: 149 | name: kube-router 150 | roleRef: 151 | apiGroup: rbac.authorization.k8s.io 152 | kind: ClusterRole 153 | name: kube-router 154 | subjects: 155 | - kind: ServiceAccount 156 | name: kube-router 157 | namespace: kube-system 158 | -------------------------------------------------------------------------------- /daemonset/generic-kuberouter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | 27 | --- 28 | apiVersion: apps/v1 29 | kind: DaemonSet 30 | metadata: 31 | labels: 32 | k8s-app: kube-router 33 | tier: node 34 | name: kube-router 35 | namespace: kube-system 36 | spec: 37 | selector: 38 | matchLabels: 39 | k8s-app: kube-router 40 | tier: node 41 | template: 42 | metadata: 43 | labels: 44 | k8s-app: kube-router 45 | tier: node 46 | spec: 47 | priorityClassName: system-node-critical 48 | serviceAccountName: kube-router 49 | containers: 50 | - name: kube-router 51 | image: docker.io/cloudnativelabs/kube-router 52 | imagePullPolicy: Always 53 | args: 54 | - "--run-router=true" 55 | - "--run-firewall=true" 56 | - "--run-service-proxy=false" 57 | - "--bgp-graceful-restart=true" 58 | env: 59 | - name: NODE_NAME 60 | valueFrom: 61 | fieldRef: 62 | fieldPath: spec.nodeName 63 | - name: POD_NAME 64 | valueFrom: 65 | fieldRef: 66 | fieldPath: metadata.name 67 | - name: KUBE_ROUTER_CNI_CONF_FILE 68 | value: /etc/cni/net.d/10-kuberouter.conflist 69 | livenessProbe: 70 | httpGet: 71 | path: /healthz 72 | port: 20244 73 | initialDelaySeconds: 10 74 | periodSeconds: 3 75 | resources: 76 | requests: 77 | cpu: 250m 78 | memory: 250Mi 79 | securityContext: 80 | privileged: true 81 | volumeMounts: 82 | - name: lib-modules 83 | mountPath: /lib/modules 84 | readOnly: true 85 | - name: cni-conf-dir 86 | mountPath: /etc/cni/net.d 87 | - name: xtables-lock 88 | mountPath: /run/xtables.lock 89 | readOnly: false 90 | initContainers: 91 | - name: install-cni 92 | image: docker.io/cloudnativelabs/kube-router 93 | imagePullPolicy: Always 94 | command: 95 | - /bin/sh 96 | - -c 97 | - set -e -x; 98 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 99 | if [ -f /etc/cni/net.d/*.conf ]; then 100 | rm -f /etc/cni/net.d/*.conf; 101 | fi; 102 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 103 | cp /etc/kube-router/cni-conf.json ${TMP}; 104 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 105 | fi; 106 | if [ -x /usr/local/bin/cni-install ]; then 107 | /usr/local/bin/cni-install; 108 | fi; 109 | volumeMounts: 110 | - mountPath: /etc/cni/net.d 111 | name: cni-conf-dir 112 | - mountPath: /etc/kube-router 113 | name: kube-router-cfg 114 | - name: host-opt 115 | mountPath: /opt 116 | hostNetwork: true 117 | hostPID: true 118 | tolerations: 119 | - effect: NoSchedule 120 | operator: Exists 121 | - key: CriticalAddonsOnly 122 | operator: Exists 123 | - effect: NoExecute 124 | operator: Exists 125 | volumes: 126 | - name: lib-modules 127 | hostPath: 128 | path: /lib/modules 129 | - name: cni-conf-dir 130 | hostPath: 131 | path: /etc/cni/net.d 132 | - name: kube-router-cfg 133 | configMap: 134 | name: kube-router-cfg 135 | - name: xtables-lock 136 | hostPath: 137 | path: /run/xtables.lock 138 | type: FileOrCreate 139 | - name: host-opt 140 | hostPath: 141 | path: /opt 142 | 143 | --- 144 | apiVersion: v1 145 | kind: ServiceAccount 146 | metadata: 147 | name: kube-router 148 | namespace: kube-system 149 | 150 | --- 151 | kind: ClusterRole 152 | apiVersion: rbac.authorization.k8s.io/v1 153 | metadata: 154 | name: kube-router 155 | namespace: kube-system 156 | rules: 157 | - apiGroups: 158 | - "" 159 | resources: 160 | - namespaces 161 | - pods 162 | - services 163 | - nodes 164 | - endpoints 165 | verbs: 166 | - list 167 | - get 168 | - watch 169 | - apiGroups: 170 | - "networking.k8s.io" 171 | resources: 172 | - networkpolicies 173 | verbs: 174 | - list 175 | - get 176 | - watch 177 | - apiGroups: 178 | - extensions 179 | resources: 180 | - networkpolicies 181 | verbs: 182 | - get 183 | - list 184 | - watch 185 | - apiGroups: 186 | - "coordination.k8s.io" 187 | resources: 188 | - leases 189 | verbs: 190 | - get 191 | - create 192 | - update 193 | - apiGroups: 194 | - "" 195 | resources: 196 | - services/status 197 | verbs: 198 | - update 199 | - apiGroups: 200 | - "discovery.k8s.io" 201 | resources: 202 | - endpointslices 203 | verbs: 204 | - get 205 | - list 206 | - watch 207 | 208 | --- 209 | kind: ClusterRoleBinding 210 | apiVersion: rbac.authorization.k8s.io/v1 211 | metadata: 212 | name: kube-router 213 | roleRef: 214 | apiGroup: rbac.authorization.k8s.io 215 | kind: ClusterRole 216 | name: kube-router 217 | subjects: 218 | - kind: ServiceAccount 219 | name: kube-router 220 | namespace: kube-system 221 | -------------------------------------------------------------------------------- /daemonset/kube-router-all-service-daemonset-advertise-routes.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=true" 49 | - "--run-firewall=true" 50 | - "--run-service-proxy=true" 51 | - "--bgp-graceful-restart=true" 52 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 53 | - "--advertise-cluster-ip=true" 54 | - "--cluster-asn=64512" 55 | - "--peer-router-ips=192.168.1.99" 56 | - "--peer-router-asns=64513" 57 | securityContext: 58 | privileged: true 59 | imagePullPolicy: Always 60 | env: 61 | - name: NODE_NAME 62 | valueFrom: 63 | fieldRef: 64 | fieldPath: spec.nodeName 65 | - name: KUBE_ROUTER_CNI_CONF_FILE 66 | value: /etc/cni/net.d/10-kuberouter.conflist 67 | livenessProbe: 68 | httpGet: 69 | path: /healthz 70 | port: 20244 71 | initialDelaySeconds: 10 72 | periodSeconds: 3 73 | volumeMounts: 74 | - name: lib-modules 75 | mountPath: /lib/modules 76 | readOnly: true 77 | - name: cni-conf-dir 78 | mountPath: /etc/cni/net.d 79 | - name: kubeconfig 80 | mountPath: /var/lib/kube-router/kubeconfig 81 | readOnly: true 82 | - name: xtables-lock 83 | mountPath: /run/xtables.lock 84 | readOnly: false 85 | initContainers: 86 | - name: install-cni 87 | image: docker.io/cloudnativelabs/kube-router 88 | imagePullPolicy: Always 89 | command: 90 | - /bin/sh 91 | - -c 92 | - set -e -x; 93 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 94 | if [ -f /etc/cni/net.d/*.conf ]; then 95 | rm -f /etc/cni/net.d/*.conf; 96 | fi; 97 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 98 | cp /etc/kube-router/cni-conf.json ${TMP}; 99 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 100 | fi; 101 | if [ -x /usr/local/bin/cni-install ]; then 102 | /usr/local/bin/cni-install; 103 | fi; 104 | volumeMounts: 105 | - name: cni-conf-dir 106 | mountPath: /etc/cni/net.d 107 | - name: kube-router-cfg 108 | mountPath: /etc/kube-router 109 | - name: host-opt 110 | mountPath: /opt 111 | hostNetwork: true 112 | hostPID: true 113 | tolerations: 114 | - effect: NoSchedule 115 | operator: Exists 116 | - key: CriticalAddonsOnly 117 | operator: Exists 118 | - effect: NoExecute 119 | operator: Exists 120 | volumes: 121 | - name: lib-modules 122 | hostPath: 123 | path: /lib/modules 124 | - name: cni-conf-dir 125 | hostPath: 126 | path: /etc/cni/net.d 127 | - name: kube-router-cfg 128 | configMap: 129 | name: kube-router-cfg 130 | - name: kubeconfig 131 | hostPath: 132 | path: /var/lib/kube-router/kubeconfig 133 | - name: xtables-lock 134 | hostPath: 135 | path: /run/xtables.lock 136 | type: FileOrCreate 137 | - name: host-opt 138 | hostPath: 139 | path: /opt 140 | -------------------------------------------------------------------------------- /daemonset/kube-router-all-service-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=true" 49 | - "--run-firewall=true" 50 | - "--run-service-proxy=true" 51 | - "--bgp-graceful-restart=true" 52 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 53 | securityContext: 54 | privileged: true 55 | imagePullPolicy: Always 56 | env: 57 | - name: NODE_NAME 58 | valueFrom: 59 | fieldRef: 60 | fieldPath: spec.nodeName 61 | - name: KUBE_ROUTER_CNI_CONF_FILE 62 | value: /etc/cni/net.d/10-kuberouter.conflist 63 | livenessProbe: 64 | httpGet: 65 | path: /healthz 66 | port: 20244 67 | initialDelaySeconds: 10 68 | periodSeconds: 3 69 | volumeMounts: 70 | - name: lib-modules 71 | mountPath: /lib/modules 72 | readOnly: true 73 | - name: cni-conf-dir 74 | mountPath: /etc/cni/net.d 75 | - name: kubeconfig 76 | mountPath: /var/lib/kube-router/kubeconfig 77 | readOnly: true 78 | - name: xtables-lock 79 | mountPath: /run/xtables.lock 80 | readOnly: false 81 | initContainers: 82 | - name: install-cni 83 | image: docker.io/cloudnativelabs/kube-router 84 | imagePullPolicy: Always 85 | command: 86 | - /bin/sh 87 | - -c 88 | - set -e -x; 89 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 90 | if [ -f /etc/cni/net.d/*.conf ]; then 91 | rm -f /etc/cni/net.d/*.conf; 92 | fi; 93 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 94 | cp /etc/kube-router/cni-conf.json ${TMP}; 95 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 96 | fi; 97 | if [ -x /usr/local/bin/cni-install ]; then 98 | /usr/local/bin/cni-install; 99 | fi; 100 | volumeMounts: 101 | - name: cni-conf-dir 102 | mountPath: /etc/cni/net.d 103 | - name: kube-router-cfg 104 | mountPath: /etc/kube-router 105 | - name: host-opt 106 | mountPath: /opt 107 | hostNetwork: true 108 | hostPID: true 109 | tolerations: 110 | - effect: NoSchedule 111 | operator: Exists 112 | - key: CriticalAddonsOnly 113 | operator: Exists 114 | - effect: NoExecute 115 | operator: Exists 116 | volumes: 117 | - name: lib-modules 118 | hostPath: 119 | path: /lib/modules 120 | - name: cni-conf-dir 121 | hostPath: 122 | path: /etc/cni/net.d 123 | - name: kube-router-cfg 124 | configMap: 125 | name: kube-router-cfg 126 | - name: kubeconfig 127 | hostPath: 128 | path: /var/lib/kube-router/kubeconfig 129 | - name: xtables-lock 130 | hostPath: 131 | path: /run/xtables.lock 132 | type: FileOrCreate 133 | - name: host-opt 134 | hostPath: 135 | path: /opt 136 | -------------------------------------------------------------------------------- /daemonset/kube-router-firewall-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=false" 49 | - "--run-firewall=true" 50 | - "--run-service-proxy=false" 51 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 52 | securityContext: 53 | privileged: true 54 | imagePullPolicy: Always 55 | env: 56 | - name: NODE_NAME 57 | valueFrom: 58 | fieldRef: 59 | fieldPath: spec.nodeName 60 | - name: KUBE_ROUTER_CNI_CONF_FILE 61 | value: /etc/cni/net.d/10-kuberouter.conflist 62 | livenessProbe: 63 | httpGet: 64 | path: /healthz 65 | port: 20244 66 | initialDelaySeconds: 10 67 | periodSeconds: 3 68 | volumeMounts: 69 | - name: lib-modules 70 | mountPath: /lib/modules 71 | readOnly: true 72 | - name: cni-conf-dir 73 | mountPath: /etc/cni/net.d 74 | - name: kubeconfig 75 | mountPath: /var/lib/kube-router/kubeconfig 76 | readOnly: true 77 | - name: xtables-lock 78 | mountPath: /run/xtables.lock 79 | readOnly: false 80 | initContainers: 81 | - name: install-cni 82 | image: docker.io/cloudnativelabs/kube-router 83 | imagePullPolicy: Always 84 | command: 85 | - /bin/sh 86 | - -c 87 | - set -e -x; 88 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 89 | if [ -f /etc/cni/net.d/*.conf ]; then 90 | rm -f /etc/cni/net.d/*.conf; 91 | fi; 92 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 93 | cp /etc/kube-router/cni-conf.json ${TMP}; 94 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 95 | fi; 96 | if [ -x /usr/local/bin/cni-install ]; then 97 | /usr/local/bin/cni-install; 98 | fi; 99 | volumeMounts: 100 | - name: cni-conf-dir 101 | mountPath: /etc/cni/net.d 102 | - name: kube-router-cfg 103 | mountPath: /etc/kube-router 104 | - name: host-opt 105 | mountPath: /opt 106 | hostNetwork: true 107 | hostPID: true 108 | tolerations: 109 | - effect: NoSchedule 110 | operator: Exists 111 | - key: CriticalAddonsOnly 112 | operator: Exists 113 | - effect: NoExecute 114 | operator: Exists 115 | volumes: 116 | - name: lib-modules 117 | hostPath: 118 | path: /lib/modules 119 | - name: cni-conf-dir 120 | hostPath: 121 | path: /etc/cni/net.d 122 | - name: kube-router-cfg 123 | configMap: 124 | name: kube-router-cfg 125 | - name: kubeconfig 126 | hostPath: 127 | path: /var/lib/kube-router/kubeconfig 128 | - name: xtables-lock 129 | hostPath: 130 | path: /run/xtables.lock 131 | type: FileOrCreate 132 | - name: host-opt 133 | hostPath: 134 | path: /opt 135 | -------------------------------------------------------------------------------- /daemonset/kube-router-proxy-daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | name: kube-router 31 | namespace: kube-system 32 | labels: 33 | k8s-app: kube-router 34 | spec: 35 | selector: 36 | matchLabels: 37 | k8s-app: kube-router 38 | template: 39 | metadata: 40 | labels: 41 | k8s-app: kube-router 42 | spec: 43 | priorityClassName: system-node-critical 44 | containers: 45 | - name: kube-router 46 | image: docker.io/cloudnativelabs/kube-router 47 | args: 48 | - "--run-router=false" 49 | - "--run-firewall=false" 50 | - "--run-service-proxy=true" 51 | - "--kubeconfig=/var/lib/kube-router/kubeconfig" 52 | securityContext: 53 | privileged: true 54 | imagePullPolicy: Always 55 | env: 56 | - name: NODE_NAME 57 | valueFrom: 58 | fieldRef: 59 | fieldPath: spec.nodeName 60 | - name: KUBE_ROUTER_CNI_CONF_FILE 61 | value: /etc/cni/net.d/10-kuberouter.conflist 62 | livenessProbe: 63 | httpGet: 64 | path: /healthz 65 | port: 20244 66 | initialDelaySeconds: 10 67 | periodSeconds: 3 68 | volumeMounts: 69 | - name: lib-modules 70 | mountPath: /lib/modules 71 | readOnly: true 72 | - name: cni-conf-dir 73 | mountPath: /etc/cni/net.d 74 | - name: kubeconfig 75 | mountPath: /var/lib/kube-router/kubeconfig 76 | readOnly: true 77 | - name: xtables-lock 78 | mountPath: /run/xtables.lock 79 | readOnly: false 80 | initContainers: 81 | - name: install-cni 82 | image: docker.io/cloudnativelabs/kube-router 83 | imagePullPolicy: Always 84 | command: 85 | - /bin/sh 86 | - -c 87 | - set -e -x; 88 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 89 | if [ -f /etc/cni/net.d/*.conf ]; then 90 | rm -f /etc/cni/net.d/*.conf; 91 | fi; 92 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 93 | cp /etc/kube-router/cni-conf.json ${TMP}; 94 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 95 | fi; 96 | if [ -x /usr/local/bin/cni-install ]; then 97 | /usr/local/bin/cni-install; 98 | fi; 99 | volumeMounts: 100 | - name: cni-conf-dir 101 | mountPath: /etc/cni/net.d 102 | - name: kube-router-cfg 103 | mountPath: /etc/kube-router 104 | - name: host-opt 105 | mountPath: /opt 106 | hostNetwork: true 107 | hostPID: true 108 | tolerations: 109 | - effect: NoSchedule 110 | operator: Exists 111 | - key: CriticalAddonsOnly 112 | operator: Exists 113 | - effect: NoExecute 114 | operator: Exists 115 | volumes: 116 | - name: lib-modules 117 | hostPath: 118 | path: /lib/modules 119 | - name: cni-conf-dir 120 | hostPath: 121 | path: /etc/cni/net.d 122 | - name: kube-router-cfg 123 | configMap: 124 | name: kube-router-cfg 125 | - name: kubeconfig 126 | hostPath: 127 | path: /var/lib/kube-router/kubeconfig 128 | - name: xtables-lock 129 | hostPath: 130 | path: /run/xtables.lock 131 | type: FileOrCreate 132 | - name: host-opt 133 | hostPath: 134 | path: /opt 135 | -------------------------------------------------------------------------------- /daemonset/kubeadm-kuberouter-all-features-hostport.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "hairpinMode":true, 21 | "ipam":{ 22 | "type":"host-local" 23 | } 24 | }, 25 | { 26 | "type":"portmap", 27 | "capabilities":{ 28 | "snat":true, 29 | "portMappings":true 30 | } 31 | } 32 | ] 33 | } 34 | --- 35 | apiVersion: apps/v1 36 | kind: DaemonSet 37 | metadata: 38 | labels: 39 | k8s-app: kube-router 40 | tier: node 41 | name: kube-router 42 | namespace: kube-system 43 | spec: 44 | selector: 45 | matchLabels: 46 | k8s-app: kube-router 47 | tier: node 48 | template: 49 | metadata: 50 | labels: 51 | k8s-app: kube-router 52 | tier: node 53 | spec: 54 | priorityClassName: system-node-critical 55 | serviceAccountName: kube-router 56 | serviceAccount: kube-router 57 | containers: 58 | - name: kube-router 59 | image: docker.io/cloudnativelabs/kube-router 60 | imagePullPolicy: Always 61 | args: 62 | - --run-router=true 63 | - --run-firewall=true 64 | - --run-service-proxy=true 65 | - --bgp-graceful-restart=true 66 | - --kubeconfig=/var/lib/kube-router/kubeconfig 67 | env: 68 | - name: NODE_NAME 69 | valueFrom: 70 | fieldRef: 71 | fieldPath: spec.nodeName 72 | - name: POD_NAME 73 | valueFrom: 74 | fieldRef: 75 | fieldPath: metadata.name 76 | - name: KUBE_ROUTER_CNI_CONF_FILE 77 | value: /etc/cni/net.d/10-kuberouter.conflist 78 | livenessProbe: 79 | httpGet: 80 | path: /healthz 81 | port: 20244 82 | initialDelaySeconds: 10 83 | periodSeconds: 3 84 | resources: 85 | requests: 86 | cpu: 250m 87 | memory: 250Mi 88 | securityContext: 89 | privileged: true 90 | volumeMounts: 91 | - name: lib-modules 92 | mountPath: /lib/modules 93 | readOnly: true 94 | - name: cni-conf-dir 95 | mountPath: /etc/cni/net.d 96 | - name: kubeconfig 97 | mountPath: /var/lib/kube-router 98 | readOnly: true 99 | - name: xtables-lock 100 | mountPath: /run/xtables.lock 101 | readOnly: false 102 | initContainers: 103 | - name: install-cni 104 | image: docker.io/cloudnativelabs/kube-router 105 | imagePullPolicy: Always 106 | command: 107 | - /bin/sh 108 | - -c 109 | - set -e -x; 110 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 111 | if [ -f /etc/cni/net.d/*.conf ]; then 112 | rm -f /etc/cni/net.d/*.conf; 113 | fi; 114 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 115 | cp /etc/kube-router/cni-conf.json ${TMP}; 116 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 117 | fi; 118 | if [ -x /usr/local/bin/cni-install ]; then 119 | /usr/local/bin/cni-install; 120 | fi; 121 | volumeMounts: 122 | - name: cni-conf-dir 123 | mountPath: /etc/cni/net.d 124 | - name: kube-router-cfg 125 | mountPath: /etc/kube-router 126 | - name: host-opt 127 | mountPath: /opt 128 | hostNetwork: true 129 | hostPID: true 130 | tolerations: 131 | - effect: NoSchedule 132 | operator: Exists 133 | - key: CriticalAddonsOnly 134 | operator: Exists 135 | - effect: NoExecute 136 | operator: Exists 137 | volumes: 138 | - name: lib-modules 139 | hostPath: 140 | path: /lib/modules 141 | - name: cni-conf-dir 142 | hostPath: 143 | path: /etc/cni/net.d 144 | - name: kube-router-cfg 145 | configMap: 146 | name: kube-router-cfg 147 | - name: kubeconfig 148 | configMap: 149 | name: kube-proxy 150 | items: 151 | - key: kubeconfig.conf 152 | path: kubeconfig 153 | - name: xtables-lock 154 | hostPath: 155 | path: /run/xtables.lock 156 | type: FileOrCreate 157 | - name: host-opt 158 | hostPath: 159 | path: /opt 160 | --- 161 | apiVersion: v1 162 | kind: ServiceAccount 163 | metadata: 164 | name: kube-router 165 | namespace: kube-system 166 | --- 167 | kind: ClusterRole 168 | apiVersion: rbac.authorization.k8s.io/v1 169 | metadata: 170 | name: kube-router 171 | namespace: kube-system 172 | rules: 173 | - apiGroups: 174 | - "" 175 | resources: 176 | - namespaces 177 | - pods 178 | - services 179 | - nodes 180 | - endpoints 181 | verbs: 182 | - list 183 | - get 184 | - watch 185 | - apiGroups: 186 | - "networking.k8s.io" 187 | resources: 188 | - networkpolicies 189 | verbs: 190 | - list 191 | - get 192 | - watch 193 | - apiGroups: 194 | - extensions 195 | resources: 196 | - networkpolicies 197 | verbs: 198 | - get 199 | - list 200 | - watch 201 | - apiGroups: 202 | - "coordination.k8s.io" 203 | resources: 204 | - leases 205 | verbs: 206 | - get 207 | - create 208 | - update 209 | - apiGroups: 210 | - "" 211 | resources: 212 | - services/status 213 | verbs: 214 | - update 215 | - apiGroups: 216 | - "discovery.k8s.io" 217 | resources: 218 | - endpointslices 219 | verbs: 220 | - get 221 | - list 222 | - watch 223 | 224 | --- 225 | kind: ClusterRoleBinding 226 | apiVersion: rbac.authorization.k8s.io/v1 227 | metadata: 228 | name: kube-router 229 | roleRef: 230 | apiGroup: rbac.authorization.k8s.io 231 | kind: ClusterRole 232 | name: kube-router 233 | subjects: 234 | - kind: ServiceAccount 235 | name: kube-router 236 | namespace: kube-system 237 | -------------------------------------------------------------------------------- /daemonset/kubeadm-kuberouter-all-features.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "hairpinMode":true, 21 | "ipam":{ 22 | "type":"host-local" 23 | } 24 | } 25 | ] 26 | } 27 | --- 28 | apiVersion: apps/v1 29 | kind: DaemonSet 30 | metadata: 31 | labels: 32 | k8s-app: kube-router 33 | tier: node 34 | name: kube-router 35 | namespace: kube-system 36 | spec: 37 | selector: 38 | matchLabels: 39 | k8s-app: kube-router 40 | tier: node 41 | template: 42 | metadata: 43 | labels: 44 | k8s-app: kube-router 45 | tier: node 46 | spec: 47 | priorityClassName: system-node-critical 48 | serviceAccountName: kube-router 49 | serviceAccount: kube-router 50 | containers: 51 | - name: kube-router 52 | image: docker.io/cloudnativelabs/kube-router 53 | imagePullPolicy: Always 54 | args: 55 | - --run-router=true 56 | - --run-firewall=true 57 | - --run-service-proxy=true 58 | - --bgp-graceful-restart=true 59 | - --kubeconfig=/var/lib/kube-router/kubeconfig 60 | env: 61 | - name: NODE_NAME 62 | valueFrom: 63 | fieldRef: 64 | fieldPath: spec.nodeName 65 | - name: POD_NAME 66 | valueFrom: 67 | fieldRef: 68 | fieldPath: metadata.name 69 | - name: KUBE_ROUTER_CNI_CONF_FILE 70 | value: /etc/cni/net.d/10-kuberouter.conflist 71 | livenessProbe: 72 | httpGet: 73 | path: /healthz 74 | port: 20244 75 | initialDelaySeconds: 10 76 | periodSeconds: 3 77 | resources: 78 | requests: 79 | cpu: 250m 80 | memory: 250Mi 81 | securityContext: 82 | privileged: true 83 | volumeMounts: 84 | - name: lib-modules 85 | mountPath: /lib/modules 86 | readOnly: true 87 | - name: cni-conf-dir 88 | mountPath: /etc/cni/net.d 89 | - name: kubeconfig 90 | mountPath: /var/lib/kube-router 91 | readOnly: true 92 | - name: xtables-lock 93 | mountPath: /run/xtables.lock 94 | readOnly: false 95 | initContainers: 96 | - name: install-cni 97 | image: docker.io/cloudnativelabs/kube-router 98 | imagePullPolicy: Always 99 | command: 100 | - /bin/sh 101 | - -c 102 | - set -e -x; 103 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 104 | if [ -f /etc/cni/net.d/*.conf ]; then 105 | rm -f /etc/cni/net.d/*.conf; 106 | fi; 107 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 108 | cp /etc/kube-router/cni-conf.json ${TMP}; 109 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 110 | fi; 111 | if [ -x /usr/local/bin/cni-install ]; then 112 | /usr/local/bin/cni-install; 113 | fi; 114 | volumeMounts: 115 | - name: cni-conf-dir 116 | mountPath: /etc/cni/net.d 117 | - name: kube-router-cfg 118 | mountPath: /etc/kube-router 119 | - name: host-opt 120 | mountPath: /opt 121 | hostNetwork: true 122 | hostPID: true 123 | tolerations: 124 | - effect: NoSchedule 125 | operator: Exists 126 | - key: CriticalAddonsOnly 127 | operator: Exists 128 | - effect: NoExecute 129 | operator: Exists 130 | volumes: 131 | - name: lib-modules 132 | hostPath: 133 | path: /lib/modules 134 | - name: cni-conf-dir 135 | hostPath: 136 | path: /etc/cni/net.d 137 | - name: kube-router-cfg 138 | configMap: 139 | name: kube-router-cfg 140 | - name: kubeconfig 141 | configMap: 142 | name: kube-proxy 143 | items: 144 | - key: kubeconfig.conf 145 | path: kubeconfig 146 | - name: xtables-lock 147 | hostPath: 148 | path: /run/xtables.lock 149 | type: FileOrCreate 150 | - name: host-opt 151 | hostPath: 152 | path: /opt 153 | --- 154 | apiVersion: v1 155 | kind: ServiceAccount 156 | metadata: 157 | name: kube-router 158 | namespace: kube-system 159 | --- 160 | kind: ClusterRole 161 | apiVersion: rbac.authorization.k8s.io/v1 162 | metadata: 163 | name: kube-router 164 | namespace: kube-system 165 | rules: 166 | - apiGroups: 167 | - "" 168 | resources: 169 | - namespaces 170 | - pods 171 | - services 172 | - nodes 173 | - endpoints 174 | verbs: 175 | - list 176 | - get 177 | - watch 178 | - apiGroups: 179 | - "networking.k8s.io" 180 | resources: 181 | - networkpolicies 182 | verbs: 183 | - list 184 | - get 185 | - watch 186 | - apiGroups: 187 | - extensions 188 | resources: 189 | - networkpolicies 190 | verbs: 191 | - get 192 | - list 193 | - watch 194 | - apiGroups: 195 | - "coordination.k8s.io" 196 | resources: 197 | - leases 198 | verbs: 199 | - get 200 | - create 201 | - update 202 | - apiGroups: 203 | - "" 204 | resources: 205 | - services/status 206 | verbs: 207 | - update 208 | - apiGroups: 209 | - "discovery.k8s.io" 210 | resources: 211 | - endpointslices 212 | verbs: 213 | - get 214 | - list 215 | - watch 216 | 217 | --- 218 | kind: ClusterRoleBinding 219 | apiVersion: rbac.authorization.k8s.io/v1 220 | metadata: 221 | name: kube-router 222 | roleRef: 223 | apiGroup: rbac.authorization.k8s.io 224 | kind: ClusterRole 225 | name: kube-router 226 | subjects: 227 | - kind: ServiceAccount 228 | name: kube-router 229 | namespace: kube-system 230 | -------------------------------------------------------------------------------- /daemonset/kubeadm-kuberouter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: kube-router-cfg 5 | namespace: kube-system 6 | labels: 7 | tier: node 8 | k8s-app: kube-router 9 | data: 10 | cni-conf.json: | 11 | { 12 | "cniVersion":"0.3.0", 13 | "name":"mynet", 14 | "plugins":[ 15 | { 16 | "name":"kubernetes", 17 | "type":"bridge", 18 | "bridge":"kube-bridge", 19 | "isDefaultGateway":true, 20 | "ipam":{ 21 | "type":"host-local" 22 | } 23 | } 24 | ] 25 | } 26 | --- 27 | apiVersion: apps/v1 28 | kind: DaemonSet 29 | metadata: 30 | labels: 31 | k8s-app: kube-router 32 | tier: node 33 | name: kube-router 34 | namespace: kube-system 35 | spec: 36 | selector: 37 | matchLabels: 38 | k8s-app: kube-router 39 | tier: node 40 | template: 41 | metadata: 42 | labels: 43 | k8s-app: kube-router 44 | tier: node 45 | spec: 46 | priorityClassName: system-node-critical 47 | serviceAccountName: kube-router 48 | serviceAccount: kube-router 49 | containers: 50 | - name: kube-router 51 | image: docker.io/cloudnativelabs/kube-router 52 | imagePullPolicy: Always 53 | args: 54 | - --run-router=true 55 | - --run-firewall=true 56 | - --run-service-proxy=false 57 | - --bgp-graceful-restart=true 58 | env: 59 | - name: NODE_NAME 60 | valueFrom: 61 | fieldRef: 62 | fieldPath: spec.nodeName 63 | - name: POD_NAME 64 | valueFrom: 65 | fieldRef: 66 | fieldPath: metadata.name 67 | - name: KUBE_ROUTER_CNI_CONF_FILE 68 | value: /etc/cni/net.d/10-kuberouter.conflist 69 | livenessProbe: 70 | httpGet: 71 | path: /healthz 72 | port: 20244 73 | initialDelaySeconds: 10 74 | periodSeconds: 3 75 | resources: 76 | requests: 77 | cpu: 250m 78 | memory: 250Mi 79 | securityContext: 80 | privileged: true 81 | volumeMounts: 82 | - name: lib-modules 83 | mountPath: /lib/modules 84 | readOnly: true 85 | - name: cni-conf-dir 86 | mountPath: /etc/cni/net.d 87 | - name: kubeconfig 88 | mountPath: /var/lib/kube-router/kubeconfig 89 | readOnly: true 90 | - name: xtables-lock 91 | mountPath: /run/xtables.lock 92 | readOnly: false 93 | initContainers: 94 | - name: install-cni 95 | image: docker.io/cloudnativelabs/kube-router 96 | imagePullPolicy: Always 97 | command: 98 | - /bin/sh 99 | - -c 100 | - set -e -x; 101 | if [ ! -f /etc/cni/net.d/10-kuberouter.conflist ]; then 102 | if [ -f /etc/cni/net.d/*.conf ]; then 103 | rm -f /etc/cni/net.d/*.conf; 104 | fi; 105 | TMP=/etc/cni/net.d/.tmp-kuberouter-cfg; 106 | cp /etc/kube-router/cni-conf.json ${TMP}; 107 | mv ${TMP} /etc/cni/net.d/10-kuberouter.conflist; 108 | fi; 109 | if [ -x /usr/local/bin/cni-install ]; then 110 | /usr/local/bin/cni-install; 111 | fi; 112 | volumeMounts: 113 | - mountPath: /etc/cni/net.d 114 | name: cni-conf-dir 115 | - mountPath: /etc/kube-router 116 | name: kube-router-cfg 117 | - name: host-opt 118 | mountPath: /opt 119 | hostNetwork: true 120 | hostPID: true 121 | tolerations: 122 | - effect: NoSchedule 123 | operator: Exists 124 | - key: CriticalAddonsOnly 125 | operator: Exists 126 | - effect: NoExecute 127 | operator: Exists 128 | volumes: 129 | - name: lib-modules 130 | hostPath: 131 | path: /lib/modules 132 | - name: cni-conf-dir 133 | hostPath: 134 | path: /etc/cni/net.d 135 | - name: kube-router-cfg 136 | configMap: 137 | name: kube-router-cfg 138 | - name: kubeconfig 139 | hostPath: 140 | path: /var/lib/kube-router/kubeconfig 141 | - name: xtables-lock 142 | hostPath: 143 | path: /run/xtables.lock 144 | type: FileOrCreate 145 | - name: host-opt 146 | hostPath: 147 | path: /opt 148 | --- 149 | apiVersion: v1 150 | kind: ServiceAccount 151 | metadata: 152 | name: kube-router 153 | namespace: kube-system 154 | --- 155 | kind: ClusterRole 156 | apiVersion: rbac.authorization.k8s.io/v1 157 | metadata: 158 | name: kube-router 159 | namespace: kube-system 160 | rules: 161 | - apiGroups: 162 | - "" 163 | resources: 164 | - namespaces 165 | - pods 166 | - services 167 | - nodes 168 | - endpoints 169 | verbs: 170 | - list 171 | - get 172 | - watch 173 | - apiGroups: 174 | - "networking.k8s.io" 175 | resources: 176 | - networkpolicies 177 | verbs: 178 | - list 179 | - get 180 | - watch 181 | - apiGroups: 182 | - extensions 183 | resources: 184 | - networkpolicies 185 | verbs: 186 | - get 187 | - list 188 | - watch 189 | - apiGroups: 190 | - "coordination.k8s.io" 191 | resources: 192 | - leases 193 | verbs: 194 | - get 195 | - create 196 | - update 197 | - apiGroups: 198 | - "" 199 | resources: 200 | - services/status 201 | verbs: 202 | - update 203 | - apiGroups: 204 | - "discovery.k8s.io" 205 | resources: 206 | - endpointslices 207 | verbs: 208 | - get 209 | - list 210 | - watch 211 | 212 | --- 213 | kind: ClusterRoleBinding 214 | apiVersion: rbac.authorization.k8s.io/v1 215 | metadata: 216 | name: kube-router 217 | roleRef: 218 | apiGroup: rbac.authorization.k8s.io 219 | kind: ClusterRole 220 | name: kube-router 221 | subjects: 222 | - kind: ServiceAccount 223 | name: kube-router 224 | namespace: kube-system 225 | -------------------------------------------------------------------------------- /dashboard/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/dashboard/dashboard.png -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | Kube-router is built around concept of watchers and controllers. Watchers use Kubernetes watch API to get notification 4 | on events related to create, update, delete of Kubernetes objects. Each watcher gets notification related to a 5 | particular API object. On receiving an event from API server, watcher broadcasts events. Controller registers to get 6 | event updates from the watchers and act up on the events. 7 | 8 | Kube-router consists of 3 core controllers and multiple watchers as depicted in below diagram. 9 | 10 | ![Arch](./img/kube-router-arch.png) 11 | 12 | Each of the [controller](https://github.com/cloudnativelabs/kube-router/tree/master/app/controllers) follows below 13 | structure: 14 | 15 | ```go 16 | func Run() { 17 | for { 18 | Sync() // control loop that runs for ever and perfom sync at periodic interval 19 | } 20 | } 21 | 22 | func OnUpdate() { 23 | Sync() // on receiving update of a watched API object (namespace, node, pod, network policy etc) 24 | } 25 | 26 | Sync() { 27 | //re-concile any state changes 28 | } 29 | 30 | Cleanup() { 31 | // cleanup any changes (to iptables, ipvs, network etc) done to the system 32 | } 33 | ``` 34 | -------------------------------------------------------------------------------- /docs/developing.md: -------------------------------------------------------------------------------- 1 | # Developer's Guide 2 | 3 | We aim to make local development and testing as straightforward as possible. For 4 | basic guidelines around contributing, see the [CONTRIBUTING](/CONTRIBUTING.md) document. 5 | 6 | There are a number of automation tools available to help with testing and 7 | building your changes, detailed below. 8 | 9 | ## Building kube-router 10 | 11 | ### Go version 1.19 or above is required to build kube-router 12 | 13 | All the dependencies are specified as Go modules and will be fetched into your cache, so just run `make kube-router` or 14 | `go build pkg/cmd/kube-router.go` to build. 15 | 16 | ### Building A Docker Image 17 | 18 | Running `make container` will compile kube-router (if needed) and build a Docker 19 | image. By default the container will be tagged with the last release version, 20 | and current commit ID. 21 | 22 | For example: 23 | 24 | ```sh 25 | $ make container 26 | Building for GOARCH=amd64 27 | Verifying kube-router gobgp for ARCH=x86-64 ... 28 | Starting kube-router container image build for amd64 on amd64 29 | docker build -t "cloudnativelabs/kube-router-git:amd64-bug_fixes_for_v2.0.0" -f Dockerfile --build-arg ARCH="" \ 30 | --build-arg BUILDTIME_BASE="golang:1.20.9-alpine3.18" --build-arg RUNTIME_BASE="alpine:3.18" . 31 | Sending build context to Docker daemon 198.6MB 32 | Step 1/19 : ARG BUILDTIME_BASE=golang:1-alpine 33 | Step 2/19 : ARG RUNTIME_BASE=alpine:latest 34 | Step 3/19 : FROM ${BUILDTIME_BASE} as builder 35 | ---> 6cbc3ac54aa3 36 | Step 4/19 : ENV BUILD_IN_DOCKER=false 37 | ---> Using cache 38 | ---> aec11cc4a0cd 39 | 40 | ... 41 | 42 | Removing intermediate container 371a162930f5 43 | ---> 1d3f742d559e 44 | Step 19/19 : ENTRYPOINT ["/usr/local/bin/kube-router"] 45 | ---> Running in d5ea6fda9fe4 46 | Removing intermediate container d5ea6fda9fe4 47 | ---> 17cfbc77e293 48 | [Warning] One or more build-args [ARCH] were not consumed 49 | Successfully built 17cfbc77e293 50 | Successfully tagged cloudnativelabs/kube-router-git:amd64-bug_fixes_for_v2.0.0 51 | Finished kube-router container image build. 52 | ``` 53 | 54 | The following describes the rest of the portions of the container naming convention 55 | 56 | * `kube-router-git` indicates that the container was built from git and not from a tag. 57 | * `amd64` indicates that it was built for the `amd64` architecture 58 | * `bug_fixes_for_v2.0.0` indicates the branch that the user was on when it was built 59 | 60 | ### Pushing A Docker Image 61 | 62 | Running `make push` will push your container image to a Docker registry. The default configuration will use the 63 | Docker Hub repository for the official kube-router images, cloudnativelabs/kube-router. You can push to a different 64 | repository by changing a couple settings, as described in [Image Options](#image-options) 65 | below. 66 | 67 | ### Makefile Options 68 | 69 | There are several variables which can be modified in the Makefile to customize your builds. They are specified after 70 | your make command like this: `make OPTION=VALUE`. These options can also be set in your environment variables. 71 | 72 | For more details beyond the scope of this document, see the [Makefile](/Makefile) and run `make help`. 73 | 74 | #### Image Options 75 | 76 | You can configure the name and tag of the Docker image with a few variables 77 | passed to `make container` and `make push`. 78 | 79 | Example: 80 | 81 | ```sh 82 | $ make container IMG_FQDN=quay.io IMG_NAMESPACE=bzub IMG_TAG=custom 83 | docker build -t "quay.io/bzub/kube-router-git:custom" . 84 | Sending build context to Docker daemon 151.5MB 85 | Step 1/4 : FROM alpine 86 | ---> a41a7446062d 87 | Step 2/4 : RUN apk add --no-cache iptables ipset 88 | ---> Using cache 89 | ---> 30e25a7640de 90 | Step 3/4 : COPY kube-router / 91 | ---> Using cache 92 | ---> c06f78fd02e8 93 | Step 4/4 : ENTRYPOINT /kube-router 94 | ---> Using cache 95 | ---> 5cfcfe54623e 96 | Successfully built 5cfcfe54623e 97 | Successfully tagged quay.io/bzub/kube-router-git:custom 98 | ``` 99 | 100 | * `REGISTRY` is derived from other options. Set this to something else to 101 | quickly override the Docker image registry used to tag and push images. 102 | * Note: This will override other variables below that make up the image 103 | name/tag. 104 | * `IMG_FQDN` should be set if you are not using Docker Hub for images. In 105 | the examples above `IMG_FQDN` is set to `quay.io`. 106 | * `IMG_NAMESPACE` is the Docker registry user or organization. It is used in 107 | URLs. 108 | * Example: quay.io/IMG_NAMESPACE/kube-router 109 | * `NAME` goes onto the end of the Docker registry URL that will be used. 110 | * Example: quay.io/cloudnativelabs/NAME 111 | * `IMG_TAG` is used to override the tag of the Docker image being built. 112 | * `DEV_SUFFIX` is appended to Docker image names that are not for release. By 113 | default these images get a name ending with `-git` to signify that they are 114 | for testing purposes. 115 | Example (DEV-SUFFIX=master-latest): quay.io/cloudnativelabs/kube-router-git:master-latest 116 | 117 | ## Release Workflow 118 | 119 | See [Release Documentation](/RELEASE.md) for more information 120 | 121 | ## Dependency Management 122 | 123 | kube-router uses go modules for managing dependencies see [upstream documentation](https://go.dev/blog/using-go-modules) 124 | for more information 125 | -------------------------------------------------------------------------------- /docs/generic.md: -------------------------------------------------------------------------------- 1 | # Kube-router on generic clusters 2 | 3 | This guide is for running kube-router as the [CNI](https://github.com/containernetworking) network provider for on 4 | premise and/or bare metal clusters outside of a cloud provider's environment. It assumes the initial cluster is 5 | bootstrapped and a networking provider needs configuration. 6 | 7 | All pod networking [CIDRs](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) are allocated by 8 | kube-controller-manager. Kube-router provides service/pod networking, a network policy firewall, and a high performance 9 | [IPVS/LVS](http://www.linuxvirtualserver.org/software/ipvs.html) based service proxy. The network policy firewall and 10 | service proxy are both optional but recommended. 11 | 12 | ## Configuring the Worker Nodes 13 | 14 | If you choose to run kube-router as daemonset, then both kube-apiserver and kubelet must be run with 15 | `--allow-privileged=true` option (see our 16 | [example daemonsets for more information](https://github.com/cloudnativelabs/kube-router/tree/master/daemonset)) 17 | 18 | Ensure your [Container Runtime](https://kubernetes.io/docs/setup/production-environment/container-runtimes/) is 19 | configured to point its CNI configuration directory to `/etc/cni/net.d`. 20 | 21 | This is the default location for both `containerd` and `cri-o`, but can be set specifically if needed: 22 | 23 | ### containerd CRI Configuration 24 | 25 | Here is what the default containerd CNI plugin configuration looks like as of the writing of this document. The default 26 | containerd configuration can be retrieved using: 27 | 28 | ```sh 29 | containerd config default 30 | ``` 31 | 32 | ```toml 33 | [plugins] 34 | [plugins."io.containerd.grpc.v1.cri".cni] 35 | bin_dir = "/opt/cni/bin" 36 | conf_dir = "/etc/cni/net.d" 37 | conf_template = "" 38 | ip_pref = "" 39 | max_conf_num = 1 40 | ``` 41 | 42 | ### cri-o CRI Configuration 43 | 44 | cri-o CRI configuration can be referenced via their 45 | [documentation](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionetwork-table) 46 | 47 | If a previous CNI provider (e.g. weave-net, calico, or flannel) was used, remove old configurations from 48 | `/etc/cni/net.d` on each kubelet. 49 | 50 | ### Note: Switching CNI providers on a running cluster requires re-creating all pods to pick up new pod IPs** 51 | 52 | ## Configuring kube-controller-manager 53 | 54 | If you choose to use kube-router for pod-to-pod network connectivity then 55 | [kube-controller-manager](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager/) 56 | needs to be configured to allocate pod CIDRs by passing the `--allocate-node-cidrs=true` flag and providing a 57 | `cluster-cidr` (e.g. by passing `--cluster-cidr=10.32.0.0/12`) 58 | 59 | For example: 60 | 61 | ```sh 62 | --allocate-node-cidrs=true 63 | --cluster-cidr=10.32.0.0/12 64 | --service-cluster-ip-range=10.50.0.0/22 65 | ``` 66 | 67 | ## Running kube-router with Everything 68 | 69 | This runs kube-router with pod/service networking, the network policy firewall, and service proxy to replace kube-proxy. 70 | The example command uses `10.32.0.0/12` as the pod CIDR address range and `https://cluster01.int.domain.com:6443` as the 71 | [apiserver](https://kubernetes.io/docs/reference/generated/kube-apiserver/) address. Please change these to suit your 72 | cluster. 73 | 74 | ```sh 75 | CLUSTERCIDR=10.32.0.0/12 \ 76 | APISERVER=https://cluster01.int.domain.com:6443 \ 77 | sh -c 'curl -s https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/generic-kuberouter-all-features.yaml | \ 78 | sed -e "s;%APISERVER%;$APISERVER;g" -e "s;%CLUSTERCIDR%;$CLUSTERCIDR;g"' | \ 79 | kubectl apply -f - 80 | ``` 81 | 82 | ### Removing a Previous kube-proxy 83 | 84 | If [kube-proxy](https://kubernetes.io/docs/reference/generated/kube-proxy/) was ever deployed to the cluster, then you 85 | need to remove it when running kube-router in this capacity or they will conflict with each other. 86 | 87 | Remove any previously running kube-proxy and all iptables rules it created. Start by deleting the kube-proxy daemonset: 88 | 89 | ```sh 90 | kubectl -n kube-system delete ds kube-proxy 91 | ``` 92 | 93 | Any iptables rules kube-proxy left around will also need to be cleaned up. This command might differ based on how 94 | kube-proxy was setup or configured: 95 | 96 | To cleanup kube-proxy we can do this with docker, containerd, or cri-o: 97 | 98 | #### docker 99 | 100 | ```sh 101 | docker run --privileged -v /lib/modules:/lib/modules --net=host registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy --cleanup 102 | ``` 103 | 104 | #### containerd 105 | 106 | ```sh 107 | ctr images pull k8s.gcr.io/kube-proxy-amd64:v1.28.2 108 | ctr run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro \ 109 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 110 | ``` 111 | 112 | #### cri-o 113 | 114 | ```sh 115 | crictl pull registry.k8s.io/kube-proxy-amd64:v1.28.2 116 | crictl run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro 117 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 118 | ``` 119 | 120 | ## Running kube-router without the service proxy 121 | 122 | This runs kube-router with pod/service networking and the network policy firewall. The Service proxy is disabled. 123 | 124 | ```sh 125 | kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/generic-kuberouter.yaml 126 | ``` 127 | 128 | In this mode kube-router relies on [kube-proxy](https://kubernetes.io/docs/reference/generated/kube-proxy/) (or some 129 | other network service provider) to provide service networking. 130 | 131 | When service proxy is disabled kube-router will use 132 | [in-cluster configuration](https://github.com/kubernetes/client-go/tree/master/examples/in-cluster-client-configuration) 133 | to access APIserver through cluster-ip. Service networking must therefore be setup before deploying kube-router. 134 | 135 | ## Debugging 136 | 137 | kube-router supports setting log level via the command line -v or --v, To get maximal debug output from kube-router 138 | please start with `--v=3` 139 | -------------------------------------------------------------------------------- /docs/health.md: -------------------------------------------------------------------------------- 1 | # Health checking kube-router 2 | 3 | kube-router currently has basic health checking in form of heartbeats sent from each controller to the healthcontroller 4 | each time the main loop completes successfully. 5 | 6 | The health port is by default 20244 but can be changed with the startup option. 7 | The health path is `/healthz` 8 | 9 | ```sh 10 | --health-port= 11 | ``` 12 | 13 | If port is set to 0 (zero) no HTTP endpoint will be made available but the health controller will still run and print 14 | out any missed heartbeats to STDERR of kube-router 15 | 16 | If a controller does not send a heartbeat within controllersynctime + 5 seconds the component will be flagged as 17 | unhealthy. 18 | 19 | If any of the running components is failing the whole kube-router state will be marked as failed in the /healthz 20 | endpoint 21 | 22 | For example, if kube-router is started with 23 | 24 | ```sh 25 | --run-router=true 26 | --run-firewall=true 27 | --run-service-proxy=true 28 | --run-loadbalancer=true 29 | ``` 30 | 31 | If the route controller, policy controller or service controller exits it's main loop and does not publish a heartbeat 32 | the `/healthz` endpoint will return a error 500 signaling that kube-router is not healthy. 33 | -------------------------------------------------------------------------------- /docs/how-it-works.md: -------------------------------------------------------------------------------- 1 | 2 | # Theory of Operation 3 | 4 | Kube-router can be run as an agent or a Pod (via DaemonSet) on each node and 5 | leverages standard Linux technologies **iptables, ipvs/lvs, ipset, iproute2** 6 | 7 | ## Service Proxy And Load Balancing 8 | 9 | Blog: [Kubernetes network services proxy with IPVS/LVS](https://cloudnativelabs.github.io/post/2017-05-10-kube-network-service-proxy/) 10 | 11 | Kube-router uses IPVS/LVS technology built in Linux to provide L4 load 12 | balancing. Each **ClusterIP**, **NodePort**, and **LoadBalancer** Kubernetes 13 | Service type is configured as an IPVS virtual service. Each Service Endpoint is 14 | configured as real server to the virtual service. The standard **ipvsadm** tool 15 | can be used to verify the configuration and monitor the active connections. 16 | 17 | Below is example set of Services on Kubernetes: 18 | 19 | ![Kube services](./img/svc.jpg) 20 | 21 | and the Endpoints for the Services: 22 | 23 | ![Kube services](./img/ep.jpg) 24 | 25 | and how they got mapped to the IPVS by kube-router: 26 | 27 | ![IPVS configuration](./img/ipvs1.jpg) 28 | 29 | Kube-router watches the Kubernetes API server to get updates on the 30 | Services/Endpoints and automatically syncs the IPVS configuration to reflect the 31 | desired state of Services. Kube-router uses IPVS masquerading mode and uses 32 | round robin scheduling currently. Source pod IP is preserved so that appropriate 33 | network policies can be applied. 34 | 35 | ## Pod Ingress Firewall 36 | 37 | Blog: [Enforcing Kubernetes network policies with iptables](https://cloudnativelabs.github.io/post/2017-05-1-kube-network-policies/) 38 | 39 | Kube-router provides an implementation of Kubernetes Network Policies through 40 | the use of iptables, ipset and conntrack. All the Pods in a Namespace with 41 | 'DefaultDeny' ingress isolation policy has ingress blocked. Only traffic that 42 | matches whitelist rules specified in the network policies are permitted to reach 43 | those Pods. The following set of iptables rules and chains in the 'filter' table 44 | are used to achieve the Network Policies semantics. 45 | 46 | Each Pod running on the Node which needs ingress blocked by default is matched 47 | in FORWARD and OUTPUT chains of the fliter table and are sent to a pod specific 48 | firewall chain. Below rules are added to match various cases 49 | 50 | - Traffic getting switched between the Pods on the same Node through the local 51 | bridge 52 | - Traffic getting routed between the Pods on different Nodes 53 | - Traffic originating from a Pod and going through the Service proxy and getting 54 | routed to a Pod on the same Node 55 | 56 | ![FORWARD/OUTPUT chain](./img/forward.png) 57 | 58 | Each Pod specific firewall chain has default rule to block the traffic. Rules 59 | are added to jump traffic to the Network Policy specific policy chains. Rules 60 | cover only policies that apply to the destination pod ip. A rule is added to 61 | accept the the established traffic to permit the return traffic. 62 | 63 | ![Pod firewall chain](./img/podfw.png) 64 | 65 | Each policy chain has rules expressed through source and destination ipsets. Set 66 | of pods matching ingress rule in network policy spec forms a source Pod ip 67 | ipset. set of Pods matching pod selector (for destination Pods) in the Network 68 | Policy forms destination Pod ip ipset. 69 | 70 | ![Policy chain](./img/policyfw.png) 71 | 72 | Finally ipsets are created that are used in forming the rules in the Network 73 | Policy specific chain 74 | 75 | ![ipset](./img/ipset.jpg) 76 | 77 | Kube-router at runtime watches Kubernetes API server for changes in the 78 | namespace, network policy and pods and dynamically updates iptables and ipset 79 | configuration to reflect desired state of ingress firewall for the the pods. 80 | 81 | ## Pod Networking 82 | 83 | Blog: [Kubernetes pod networking and beyond with BGP](https://cloudnativelabs.github.io/post/2017-05-22-kube-pod-networking) 84 | 85 | Kube-router is expected to run on each Node. The subnet of the Node is obtained 86 | from the CNI configuration file on the Node or through the Node.PodCidr. Each 87 | kube-router instance on the Node acts as a BGP router and advertises the Pod 88 | CIDR assigned to the Node. Each Node peers with rest of the Nodes in the cluster 89 | forming full mesh. Learned routes about the Pod CIDR from the other Nodes (BGP 90 | peers) are injected into local Node routing table. On the data path, inter Node 91 | Pod-to-Pod communication is done by the routing stack on the Node. 92 | -------------------------------------------------------------------------------- /docs/img/ep.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/ep.jpg -------------------------------------------------------------------------------- /docs/img/forward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/forward.png -------------------------------------------------------------------------------- /docs/img/ipset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/ipset.jpg -------------------------------------------------------------------------------- /docs/img/ipvs1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/ipvs1.jpg -------------------------------------------------------------------------------- /docs/img/kube-router-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/kube-router-arch.png -------------------------------------------------------------------------------- /docs/img/logo-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/logo-full.png -------------------------------------------------------------------------------- /docs/img/logo-icon-only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/logo-icon-only.png -------------------------------------------------------------------------------- /docs/img/podfw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/podfw.png -------------------------------------------------------------------------------- /docs/img/policyfw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/policyfw.png -------------------------------------------------------------------------------- /docs/img/svc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/img/svc.jpg -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to Kube-router Documentation 2 | 3 | The documentation is divided into the following sections: 4 | 5 | ## Introduction 6 | 7 | - [Introduction](introduction.md) 8 | - [What is Kube-router?](introduction.md#what-is-kube-router) 9 | - [Why Kube-router?](introduction.md#why-kube-router) 10 | 11 | ## Concepts 12 | 13 | - [See it in action](see-it-in-action.md) 14 | - [How it works?](how-it-works.md) 15 | - [Architecture](architecture.md) 16 | 17 | ## User Guide 18 | 19 | - [User Guide](user-guide.md) 20 | - [installation](user-guide.md#try-kube-router-with-cluster-installers) 21 | - [requirements](user-guide.md#requirements) 22 | 23 | ## Operations guide 24 | 25 | - [Health](health.md) 26 | - [Observability](observability.md) 27 | - [Troubleshooting](troubleshoot.md) 28 | - [Pod toolbox](pod-toolbox.md) 29 | - [Upgrades](upgrading.md) 30 | - [IPv6 / Dual-Stack](ipv6.md) 31 | - [Load Balancer Support](load-balancer-allocator.md) 32 | 33 | ## Developer and Contributor Guide 34 | 35 | - [Developer Guide](developing.md) 36 | - [Contributor Guideline](/CONTRIBUTING.md) 37 | -------------------------------------------------------------------------------- /docs/introduction.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Welcome to the introduction guide to Kube-router! This guide is the best place to start with Kube-router. We cover what 4 | kube-router is, what problems it can solve, how it compares to existing software, and how you can get started using it. 5 | If you are familiar with the basics of Kube-router, head over to the next sections that provide a more detailed 6 | reference of available features. 7 | 8 | ## What is Kube-router 9 | 10 | If you are not familiar with Kubernetes networking model it is recommended to familiarize with Kubernetes 11 | [networking model](https://kubernetes.io/docs/concepts/cluster-administration/networking/#kubernetes-model). So 12 | essentially Kubernetes expects: 13 | 14 | - all containers can communicate with all other containers without NAT 15 | - all nodes can communicate with all containers (and vice-versa) without NAT 16 | - the IP that a container sees itself as is the same IP that others see it as 17 | 18 | Kubernetes only prescribes the requirements for the networking model but does not provide any default implementation. 19 | For a functional Kubernetes cluster one has to deploy what is called as CNI or pod networking solution that provides 20 | the above functionality. 21 | 22 | Any non-trivial containerized application will end up running multiple pods and exposing different services. 23 | [Service](https://kubernetes.io/docs/concepts/services-networking/service/) abstraction in Kubernetes is an essential 24 | building block that helps in service discovery and load balancing. A layer-4 service proxy must be deployed to 25 | the Kubernetes cluster that provides the load-balancing for the services exposed by the pods. 26 | 27 | Once you have pod-to-pod networking established and have a service proxy that provides load-balancing, you need a way to 28 | secure your pods. Kubernetes 29 | [Network Policies](https://kubernetes.io/docs/concepts/services-networking/network-policies/) provide a specfication to 30 | secure pods. You need to deploy a solution that implements network policy specification and provides security for your 31 | pods. 32 | 33 | If you utilize [LoadBalancer](https://kubernetes.io/docs/concepts/services-networking/service/#loadbalancer) services in 34 | your cluster, then you need to deploy a solution that will allocate and manage your LoadBalancer IP address space. 35 | 36 | Kube-router is a turnkey solution for Kubernetes networking that provides all the above essential functionality in one 37 | single elegant package. 38 | 39 | ## Why Kube-router 40 | 41 | Network is hard. You have multiple Kubernetes networking solutions that provide pod networking or network policy etc. 42 | But when you deploy indiviudal solution for each functionality you end up with lot of moving parts making it difficult 43 | to operate and troubleshoot. 44 | 45 | Kube-router is a lean yet powerful all-in-one alternative to several network components used in typical Kubernetes 46 | clusters. All this from a single DaemonSet/Binary. It doesn't get any easier. 47 | 48 | Kube-router also uses best-of-breed low-level kernel solutions for maximum performance. Kube-router uses IPVS/LVS for 49 | service proxying and provides direct routing between the nodes. 50 | 51 | Kube-router also provides very unique and advanced functionalities like DSR (Direct Server Return), ECMP based network 52 | load balancing etc. 53 | -------------------------------------------------------------------------------- /docs/kops.md: -------------------------------------------------------------------------------- 1 | # Kops Integration 2 | 3 | Kops version 1.6.2 and above now officially includes an integration with kube-router. 4 | 5 | Please follow the instructions at their 6 | [official documentation](https://github.com/kubernetes/kops/blob/master/docs/networking/kube-router.md) to provision a 7 | Kubernetes cluster with Kube-router. 8 | 9 | Uses the kops [latest version](https://github.com/kubernetes/kops/releases) binaries which has kube-router support. 10 | -------------------------------------------------------------------------------- /docs/kubeadm.md: -------------------------------------------------------------------------------- 1 | # Deploying kube-router with kubeadm 2 | 3 | Please follow the [steps](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) to install Kubernetes 4 | cluster with Kubeadm, however must specify `--pod-network-cidr` when you run `kubeadm init`. 5 | 6 | kube-router relies on kube-controller-manager to allocate pod CIDR for the nodes. 7 | 8 | kube-router provides pod networking, network policy and high perfoming IPVS/LVS based service proxy. Depending on your 9 | choice to use kube-router for service proxy you have two options. 10 | 11 | ## kube-router Providing Pod Networking and Network Policy 12 | 13 | For the step #3 **Installing a Pod network add-on** install a kube-router pod network and network policy add-on with the 14 | following command: 15 | 16 | ```sh 17 | KUBECONFIG=/etc/kubernetes/admin.conf kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter.yaml 18 | ``` 19 | 20 | ## kube-router Providing Service Proxy, Firewall and Pod Networking 21 | 22 | For the step #3 **Installing a Pod network add-on** install a kube-router pod network and network policy add-on with the 23 | following command: 24 | 25 | ```sh 26 | KUBECONFIG=/etc/kubernetes/admin.conf kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter-all-features.yaml 27 | ``` 28 | 29 | Now since kube-router provides service proxy as well. Run below commands to remove kube-proxy and cleanup any iptables 30 | configuration it may have done. 31 | 32 | ```sh 33 | KUBECONFIG=/etc/kubernetes/admin.conf kubectl -n kube-system delete ds kube-proxy 34 | ``` 35 | 36 | To cleanup kube-proxy we can do this with docker, containerd, or cri-o: 37 | 38 | ### docker 39 | 40 | ```sh 41 | docker run --privileged -v /lib/modules:/lib/modules --net=host registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy --cleanup 42 | ``` 43 | 44 | ### containerd 45 | 46 | ```sh 47 | ctr images pull k8s.gcr.io/kube-proxy-amd64:v1.28.2 48 | ctr run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro \ 49 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 50 | ``` 51 | 52 | ### cri-o 53 | 54 | ```sh 55 | crictl pull registry.k8s.io/kube-proxy-amd64:v1.28.2 56 | crictl run --rm --privileged --net-host --mount type=bind,src=/lib/modules,dst=/lib/modules,options=rbind:ro 57 | registry.k8s.io/kube-proxy-amd64:v1.28.2 kube-proxy-cleanup kube-proxy --cleanup 58 | ``` 59 | -------------------------------------------------------------------------------- /docs/load-balancer-allocator.md: -------------------------------------------------------------------------------- 1 | # Load Balancer allocator 2 | 3 | ## What does it do 4 | 5 | The load balancer allocator controller looks for services with the type LoadBalancer and tries to allocate addresses for 6 | it if needed. The controller doesn't enable any announcement of the addresses by default, so 7 | `--advertise-loadbalancer-ip` should be set to true and BGP peers configured. 8 | 9 | ## Load balancer classes 10 | 11 | By default the controller allocates addresses for all LoadBalancer services with the where `loadBalancerClass` is empty 12 | or set to one of "default" or "kube-router". If `--loadbalancer-default-class` is set to false, the controller will only 13 | handle services with the class set to "kube-router". 14 | 15 | ## RBAC permissions 16 | 17 | The controller needs some extra permissions to get, create and update leases for leader election and to update services 18 | with allocated addresses. 19 | 20 | Example permissions: 21 | 22 | ```yaml 23 | kind: ClusterRole 24 | apiVersion: rbac.authorization.k8s.io/v1 25 | metadata: 26 | name: kube-router 27 | namespace: kube-system 28 | rules: 29 | - apiGroups: 30 | - "coordination.k8s.io" 31 | resources: 32 | - leases 33 | verbs: 34 | - get 35 | - create 36 | - update 37 | - apiGroups: 38 | - "" 39 | resources: 40 | - services/status 41 | verbs: 42 | - update 43 | ``` 44 | 45 | ## Environment variables 46 | 47 | The controller uses the environment variable `POD_NAME` as the identify for the lease used for leader election. 48 | Using the kubernetes downward api to set `POD_NAME` to the pod name the lease identify will match the current leader. 49 | 50 | ```yaml 51 | --- 52 | apiVersion: apps/v1 53 | kind: DaemonSet 54 | metadata: 55 | labels: 56 | k8s-app: kube-router 57 | tier: node 58 | name: kube-router 59 | namespace: kube-system 60 | spec: 61 | ... 62 | template: 63 | metadata: 64 | .... 65 | spec: 66 | ... 67 | env: 68 | - name: POD_NAME 69 | valueFrom: 70 | fieldRef: 71 | fieldPath: metadata.name 72 | ... 73 | ``` 74 | 75 | The environment variable `POD_NAMESPACE` can also be specified to set the namespace used for the lease. 76 | By default the namespace is looked up from within the pod using `/var/run/secrets/kubernetes.io/serviceaccount/namespace`. 77 | 78 | ## Running outside kubernetes 79 | 80 | When running the controller outside a pod, both `POD_NAME` and `POD_NAMESPACE` must set for the controller to work. 81 | `POD_NAME` should be unique per instance, so using for example the hostname of the machine might be a good idea. 82 | `POD_NAMESPACE` must be the same across all instances running in the same cluster. 83 | 84 | ## Notes 85 | 86 | It's not possible to specify the addresses for the load balancer services. A externalIP service can be used instead. 87 | -------------------------------------------------------------------------------- /docs/metrics.md: -------------------------------------------------------------------------------- 1 | # Metrics 2 | 3 | ## Scraping kube-router metrics with Prometheus 4 | 5 | The scope of this document is to describe how to setup the 6 | [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) needed for 7 | [Prometheus](https://prometheus.io/) to use 8 | [Kubernetes SD](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#) to 9 | discover & scape kube-router [pods](https://kubernetes.io/docs/concepts/workloads/pods/pod/). 10 | 11 | For help with installing Prometheus please see their [docs](https://prometheus.io/docs/introduction/overview/) 12 | 13 | Metrics options: 14 | 15 | ```sh 16 | --metrics-path string Path to serve Prometheus metrics on ( default: /metrics ) 17 | --metrics-port uint16 <0-65535> Prometheus metrics port to use ( default: 0, disabled ) 18 | ``` 19 | 20 | To enable kube-router metrics, start kube-router with `--metrics-port` and provide a port over 0 21 | 22 | Metrics is generally exported at the same rate as the sync period for each service. 23 | 24 | The default values unless other specified is 25 | 26 | * iptables-sync-period - `1 min`` 27 | * ipvs-sync-period - `1 min`` 28 | * routes-sync-period - `1 min`` 29 | 30 | By enabling 31 | [Kubernetes SD](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#) in 32 | Prometheus configuration & adding required annotations Prometheus can automaticly discover & scrape kube-router metrics 33 | 34 | ## Version notes 35 | 36 | kube-router v0.2.4 received a metrics overhaul where some metrics were changed into histograms, additional metrics were 37 | also added. Please make sure you are using the latest dashboard version with versions => v0.2.4 38 | 39 | kube-router 0.1.0-rc2 and upwards supports the runtime configuration for controlling where to expose the metrics. If 40 | you are using a older version, metrics path & port is locked to `/metrics` & `8080` 41 | 42 | ## Available metrics 43 | 44 | If metrics is enabled only services that are running have their metrics exposed 45 | 46 | The following metrics is exposed by kube-router prefixed by `kube_router_` 47 | 48 | ### run-router = true 49 | 50 | * controller_bgp_peers 51 | Number of BGP peers of the instance 52 | * controller_bgp_advertisements_received 53 | Total number of BGP advertisements received since kube-router started 54 | * controller_bgp_advertisements_sent 55 | Total number of BGP advertisements sent since kube-router started 56 | * controller_bgp_internal_peers_sync_time 57 | Time it took for the BGP internal peer sync loop to complete 58 | * controller_routes_sync_time 59 | Time it took for controller to sync routes 60 | 61 | ### run-firewall=true 62 | 63 | * controller_iptables_sync_time 64 | Time it took for the iptables sync loop to complete 65 | * controller_policy_chains_sync_time 66 | Time it took for controller to sync policy chains 67 | 68 | ### run-service-proxy = true 69 | 70 | * controller_ipvs_services_sync_time 71 | Time it took for the ipvs sync loop to complete 72 | * controller_ipvs_services 73 | The number of ipvs services in the instance 74 | * controller_ipvs_metrics_export_time 75 | The time it took to run the metrics export for IPVS services 76 | * service_total_connections 77 | Total connections made to the service since creation 78 | * service_packets_in 79 | Total n/o packets received by service 80 | * service_packets_out 81 | Total n/o packets sent by service 82 | * service_bytes_in 83 | Total bytes received by the service 84 | * service_bytes_out 85 | Total bytes sent by the service 86 | * service_pps_in 87 | Incoming packets per second 88 | * service_pps_out 89 | Outgoing packets per second 90 | * service_cps 91 | Connections per second 92 | * service_bps_in 93 | Incoming bytes per second 94 | * service_bps_out 95 | Outgoing bytes per second 96 | 97 | To get a grouped list of CPS for each service a Prometheus query could look like this e.g: 98 | `sum(kube_router_service_cps) by (svc_namespace, service_name)` 99 | 100 | ## Grafana Dashboard 101 | 102 | This repo contains a example 103 | [Grafana dashboard](https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/dashboard/kube-router.json) 104 | utilizing all the above exposed metrics from kube-router. 105 | ![dashboard](https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/dashboard/dashboard.png) 106 | -------------------------------------------------------------------------------- /docs/observability.md: -------------------------------------------------------------------------------- 1 | # Observability 2 | 3 | ## Observing kube-router with Metrics 4 | 5 | See [metrics documentation](metrics.md) for more information 6 | 7 | ## Observing dropped traffic due to network policy enforcements 8 | 9 | Traffic that gets rejected due to network policy enforcements gets logged by kube-route using iptables NFLOG target 10 | under the group 100. Simplest way to observe the dropped packets by kube-router is by running tcpdump on `nflog:100` 11 | interface for e.g. `tcpdump -i nflog:100 -n`. You can also configure ulogd to monitor dropped packets in desired output 12 | format. Please see [the official ulogd documentation](https://kb.gtkc.net/iptables-with-ulogd-quick-howto/) for an 13 | example configuration to setup a stack to log packets. 14 | -------------------------------------------------------------------------------- /docs/pod-toolbox.md: -------------------------------------------------------------------------------- 1 | # Pod Toolbox 2 | 3 | When kube-router is ran as a Pod within your Kubernetes cluster, it also ships 4 | with a number of tools automatically configured for your cluster. These can be 5 | used to troubleshoot issues and learn more about how cluster networking is 6 | performed. 7 | 8 | ## Logging In 9 | 10 | Here's a quick way to get going on a random node in your cluster: 11 | 12 | ```sh 13 | KR_POD=$(basename $(kubectl -n kube-system get pods -l k8s-app=kube-router --output name|head -n1)) 14 | kubectl -n kube-system exec -it ${KR_POD} bash 15 | ``` 16 | 17 | Use `kubectl -n kube-system get pods -l k8s-app=kube-router -o wide` to see what 18 | nodes are running which pods. This will help if you want to investigate a 19 | particular node. 20 | 21 | ## Tools And Usage 22 | 23 | Once logged in you will see some help on using the tools in the container. 24 | 25 | For example: 26 | 27 | ```console 28 | Welcome to kube-router on "node1.zbrbdl"! 29 | 30 | For debugging, the following tools are available: 31 | - ipvsadm | Gather info about Virtual Services and Real Servers via IPVS. 32 | | Examples: 33 | | ## Show all options 34 | | ipvsadm --help 35 | | ## List Services and Endpoints handled by IPVS 36 | | ipvsadm -ln 37 | | ## Show traffic rate information 38 | | ipvsadm -ln --rate 39 | | ## Show cumulative traffic 40 | | ipvsadm -ln --stats 41 | 42 | - gobgp | Get BGP related information from your nodes. 43 | | 44 | | Tab-completion is ready to use, just type "gobgp " 45 | | to see the subcommands available. 46 | | 47 | | By default gobgp will query the Node this Pod is running 48 | | on, i.e. "node1.zbrbdl". To query a different node use 49 | | "gobgp --host node02.mydomain" as an example. 50 | | 51 | | For more examples see: https://github.com/osrg/gobgp/blob/master/docs/sources/cli-command-syntax.md 52 | 53 | Here's a quick look at what's happening on this Node 54 | --- BGP Server Configuration --- 55 | AS: 64512 56 | Router-ID: 10.10.3.2 57 | Listening Port: 179, Addresses: 0.0.0.0, :: 58 | 59 | --- BGP Neighbors --- 60 | Peer AS Up/Down State |#Received Accepted 61 | 64512 2d 01:05:07 Establ | 1 1 62 | 63 | --- BGP Route Info --- 64 | Network Next Hop AS_PATH Age Attrs 65 | *> 10.2.0.0/24 10.10.3.3 4000 400000 300000 40001 2d 01:05:20 [{Origin: i} {LocalPref: 100}] 66 | *> 10.2.1.0/24 10.10.3.2 4000 400000 300000 40001 00:00:36 [{Origin: i}] 67 | 68 | --- IPVS Services --- 69 | IP Virtual Server version 1.2.1 (size=4096) 70 | Prot LocalAddress:Port Scheduler Flags 71 | -> RemoteAddress:Port Forward Weight ActiveConn InActConn 72 | TCP 10.3.0.1:443 rr persistent 10800 mask 0.0.0.0 73 | -> 10.10.3.2:443 Masq 1 0 0 74 | TCP 10.3.0.10:53 rr 75 | -> 10.2.0.2:53 Masq 1 0 0 76 | TCP 10.3.0.15:2379 rr 77 | -> 10.10.3.3:2379 Masq 1 45 0 78 | TCP 10.3.0.155:2379 rr 79 | -> 10.10.3.3:2379 Masq 1 0 0 80 | UDP 10.3.0.10:53 rr 81 | -> 10.2.0.2:53 Masq 1 0 0 82 | ``` 83 | -------------------------------------------------------------------------------- /docs/see-it-in-action.md: -------------------------------------------------------------------------------- 1 | # See Kube-router in action 2 | 3 | ## Network Services Controller 4 | 5 | Network services controller is responsible for reading the services and endpoints information from Kubernetes API server 6 | and configure IPVS on each cluster node accordingly. 7 | 8 | Please [read our blog](https://cloudnativelabs.github.io/post/2017-05-10-kube-network-service-proxy/) for design details 9 | and pros and cons compared to iptables based Kube-proxy 10 | 11 | Demo of Kube-router's IPVS based Kubernetes network service proxy 12 | 13 | [![asciicast](https://asciinema.org/a/120312.png)](https://asciinema.org/a/120312) 14 | 15 | Features: 16 | 17 | - round robin load balancing 18 | - client IP based session persistence 19 | - source IP is preserved if service controller is used in conjuction with network routes controller (kube-router with 20 | --run-router flag) 21 | - option to explicitly masquerade (SNAT) with --masquerade-all flag 22 | 23 | ## Network Policy Controller 24 | 25 | Network policy controller is responsible for reading the namespace, network policy and pods information from Kubernetes 26 | API server and configure iptables accordingly to provide ingress filter to the pods. 27 | 28 | Kube-router supports the networking.k8s.io/NetworkPolicy API or network policy V1/GA 29 | [semantics](https://github.com/kubernetes/kubernetes/pull/39164#issue-197243974) and also network policy beta semantics. 30 | 31 | Please [read our blog](https://cloudnativelabs.github.io/post/2017-05-1-kube-network-policies/) for design details of 32 | Network Policy controller 33 | 34 | Demo of Kube-router's iptables based implementaton of network policies 35 | 36 | [![asciicast](https://asciinema.org/a/120735.png)](https://asciinema.org/a/120735) 37 | 38 | ## Network Routes Controller 39 | 40 | Network routes controller is responsible for reading pod CIDR allocated by controller manager to the node, and 41 | advertises the routes to the rest of the nodes in the cluster (BGP peers). Use of BGP is transparent to user for basic 42 | pod-to-pod networking. 43 | 44 | [![asciicast](https://asciinema.org/a/120885.png)](https://asciinema.org/a/120885) 45 | 46 | However BGP can be leveraged to other use cases like advertising the cluster ip, routable pod ip etc. Only in such 47 | use-cases understanding of BGP and configuration is required. Please see below demo how kube-router advertises cluster 48 | IP and pod cidrs to external BGP router 49 | 50 | [![asciicast](https://asciinema.org/a/121635.png)](https://asciinema.org/a/121635) 51 | -------------------------------------------------------------------------------- /docs/troubleshoot.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudnativelabs/kube-router/b9d5cb5a932345ed2247ea454a5b4c39beb6a6be/docs/troubleshoot.md -------------------------------------------------------------------------------- /docs/tunnels.md: -------------------------------------------------------------------------------- 1 | # Tunnels in kube-router 2 | 3 | There are several situations in which kube-router will use tunnels in order to perform certain forms of overlay / 4 | underlay routing within the cluster. To accomplish this, kube-router makes use of 5 | [IPIP](https://en.wikipedia.org/wiki/IP_in_IP) overlay tunnels that are built into the Linux kernel and instrumented 6 | with iproute2. 7 | 8 | ## Scenarios for Tunnelling 9 | 10 | By default, kube-router enables the option `--enable-overlay` which will perform overlay networking based upon the 11 | `--overlay-type` setting (by default set to `subnet`). So out of the box, kube-router will create a tunnel for 12 | pod-to-pod traffic any time it comes across a kube-router enabled node that is not within the subnet of it's primary 13 | interface. 14 | 15 | Additionally, if `--overlay-type` is set to `full` kube-router will create an tunnel for all pod-to-pod traffic and 16 | attempt to transit any pod traffic in the cluster via an IPIP overlay network between nodes. 17 | 18 | Finally, kube-router also uses tunnels for DSR ([Direct Server Return](dsr.md)). In this case, the inbound traffic is 19 | encapsulated in an IPIP packet by IPVS after it reaches the node and before it is set to the pod for processing. This 20 | allows the return IP address of the sender to be preserved at the pod level so that it can be sent directly back to the 21 | requestor (rather than being routed in a synchronous fashion). 22 | 23 | ## Encapsulation Types 24 | 25 | * IPIP (IP in IP) - This is the default method of encapsulation that kube-router uses 26 | * FoU (Foo over UDP) - This is an optional type of IPIP encapsulation that kube-router uses if the user enables it 27 | 28 | ### FoU Details 29 | 30 | Specifically, kube-router uses GUE 31 | ([Generic UDP Encapsulation](https://developers.redhat.com/blog/2019/05/17/an-introduction-to-linux-virtual-interfaces-tunnels#gue)) 32 | in order to support both IPv4 and IPv6 FoU tunnels. This option can be enabled via the kube-router parameter 33 | `--overlay-encap=fou`. Optionally, the user can also specify a desired port for this traffic via the 34 | `--overlay-encap-port` parameter (by default set to `5555`). 35 | 36 | ## IPIP with Azure 37 | 38 | Unfortunately, Azure doesn't allow IPIP encapsulation on their network. So users that want to use an overlay network 39 | will need to enable `fou` support in order to deploy kube-router in an Azure environment. 40 | 41 | ## Changing Between Tunnel Types in a Live Cluster 42 | 43 | While it is possible to change a running cluster between `ipip` and `fou` type tunnels, administrators should beware 44 | that during the rollout it will cause pod-to-pod traffic to be dropped between nodes. Since, in almost all rollout 45 | scenarios, kube-router would be rolled out gracefully one pod or host to the next, during this rollout there will be 46 | mismatches between the encapsulation support between the two nodes as invariably one node will have an upgraded 47 | kube-router and another node may have a previous deployment. 48 | 49 | When this happens, they will have conflicting encapsulation setup on their tunnels and traffic will not be able to be 50 | sent between the two nodes until they are using a consistent encapsulation protocal between them. 51 | 52 | Once all nodes have upgraded with the destination configuration, pod-to-pod traffic patterns should return to normal. 53 | -------------------------------------------------------------------------------- /docs/upgrading.md: -------------------------------------------------------------------------------- 1 | # Upgrading kube-router 2 | 3 | ## Breaking Changes 4 | 5 | We follow semantic versioning and try to the best of our abilities to maintain a 6 | stable interface between patch versions. For example, `v0.1.1` -> `v0.1.2` 7 | should be a perfectly safe upgrade path, without data service interruption. 8 | However, major (`vX.0.0`) and minor (`v0.Y.0`) version upgrades may contain 9 | breaking changes, which will be detailed here and in the release notes. 10 | 11 | First check if you are upgrading across one of the 12 | [breaking change versions](#breaking-change-version-history). If so, read the 13 | relevant section(s) first before proceeding with the general guidelines below. 14 | 15 | ## General Guidelines 16 | 17 | ### Image Pull Policy 18 | 19 | Here we will assume that you have the following in your kube-router DaemonSet: 20 | 21 | ```yaml 22 | imagePullPolicy: Always 23 | ``` 24 | 25 | If that's not the case, you will need to manually pull the desired image version 26 | on each of your nodes with a command like: `docker pull 27 | cloudnativelabs/kube-router:VERSION` 28 | 29 | ### Without Rolling Updates 30 | 31 | This is the default situation with our DaemonSet manifests. We will soon be 32 | switching these manifests to use Rolling Updates though. 33 | 34 | The following example(s) show an upgrade from `v0.0.15` to `v0.0.16`. 35 | 36 | First we will modify the kube-router DaemonSet resource's image field: 37 | 38 | ```sh 39 | kubectl -n kube-system set image ds/kube-router kube-router=cloudnativelabs/kube-router:v0.0.16 40 | ``` 41 | 42 | This does not actually trigger any version changes yet. It is recommended that 43 | you upgrade only one node and perform any tests you see fit to ensure nothing 44 | goes wrong. 45 | 46 | For example, we'll test upgrading kube-router on worker-01: 47 | 48 | ```sh 49 | TEST_NODE="worker-01" 50 | TEST_POD="$(kubectl -n kube-system get pods -o wide|grep -E "^kube-router.*${TEST_NODE}"|awk '{ print $1 }')" 51 | 52 | kubectl -n kube-system delete pod "${TEST_POD}" 53 | ``` 54 | 55 | You can watch to make sure the new kube-router pod comes up and stays running 56 | with: 57 | 58 | ```sh 59 | kubectl -n kube-system get pods -o wide -w 60 | ``` 61 | 62 | Check the logs with: 63 | 64 | ```sh 65 | TEST_NODE="worker-01" 66 | TEST_POD="$(kubectl -n kube-system get pods -o wide|grep -E "^kube-router.*${TEST_NODE}"|awk '{ print $1 }')" 67 | 68 | kubectl -n kube-system logs "${TEST_POD}" 69 | ``` 70 | 71 | If it all looks good, go ahead and upgrade kube-router on all nodes: 72 | 73 | ```sh 74 | kubectl -n kube-system delete pods -l k8s-app=kube-router 75 | ``` 76 | 77 | ### With Rolling Updates 78 | 79 | After updating a DaemonSet template, old DaemonSet pods will be killed, and new DaemonSet pods will be created 80 | automatically, in a controlled fashion 81 | 82 | If your global BGP peers supports gracefull restarts and has it enabled, 83 | [rolling updates](https://kubernetes.io/docs/tasks/manage-daemon/update-daemon-set/) can be used to upgrade your 84 | kube-router DaemonSet without network downtime. 85 | 86 | To enable gracefull BGP restart kube-router must be started with `--bgp-graceful-restart` 87 | 88 | To enable rolling updates on your kube-router DaemonSet modify it and add a updateStrategy 89 | 90 | ```yaml 91 | updateStrategy: 92 | type: RollingUpdate 93 | rollingUpdate: 94 | maxUnavailable: 1 95 | ``` 96 | 97 | maxUnavailable controls the maximum number of pods to simultaneously upgrade 98 | 99 | Starting from the top of the DaemonSet, it should look like this after you are done editing 100 | 101 | ```yaml 102 | apiVersion: extensions/v1beta1 103 | kind: DaemonSet 104 | metadata: 105 | labels: 106 | k8s-app: kube-router 107 | tier: node 108 | name: kube-router 109 | namespace: kube-system 110 | spec: 111 | updateStrategy: 112 | type: RollingUpdate 113 | rollingUpdate: 114 | maxUnavailable: 1 115 | ... 116 | ``` 117 | 118 | ## Breaking Change Version History 119 | 120 | This section covers version specific upgrade instructions. 121 | 122 | ### v0.0.X alpha versions 123 | 124 | While kube-router is in its alpha stage changes can be expected to be rapid. 125 | Therefor we cannot guarantee that a new alpha release will not break previous 126 | expected behavior. 127 | 128 | ### v0.0.17 (aka v0.1.0-rc1) 129 | 130 | This version brings changes to hairpin and BGP peering CLI/annotation 131 | configuration flags/keys. 132 | 133 | CLI flag changes: 134 | 135 | - OLD: `--peer-router` -> NEW: `--peer-router-ips` 136 | - OLD: `--peer-asn` -> NEW: `--peer-router-asns` 137 | 138 | CLI flag additions: 139 | 140 | - NEW: `--peer-router-passwords` 141 | 142 | Annotation key changes: 143 | 144 | - OLD: `kube-router.io/hairpin-mode=` -> NEW: 145 | `kube-router.io/service.hairpin=` 146 | - OLD: `net.kuberouter.nodeasn=` -> NEW: `kube-router.io/node.asn=` 147 | - OLD: `net.kuberouter.node.bgppeer.address=` -> NEW: `kube-router.io/peer.ips` 148 | - OLD: `net.kuberouter.node.bgppeer.asn` -> NEW: `kube-router.io/peer.asns` 149 | 150 | Annotation key additions: 151 | 152 | - NEW: `kube-router.io/peer.passwords` 153 | 154 | #### v0.0.17 Upgrade Procedure 155 | 156 | For CLI flag changes, all that is required is to change the flag names you use 157 | above to their new names at the same time that you change the image version. 158 | 159 | ```sh 160 | kubectl -n kube-system edit ds kube-router 161 | ``` 162 | 163 | For Annotations, the recommended approach is to copy all the values of 164 | your current annotations into new annotations with the updated keys. 165 | 166 | You can get a quick look at all your service and node annotations with these 167 | commands: 168 | 169 | ```sh 170 | kubectl describe services --all-namespaces |grep -E '^(Name:|Annotations:)' 171 | kubectl describe nodes |grep -E '^(Name:|Annotations:)' 172 | ``` 173 | 174 | For example if you have a service annotation to enable Hairpin mode like: 175 | 176 | ```sh 177 | Name: hairpin-service 178 | Annotations: kube-router.io/hairpin-mode= 179 | ``` 180 | 181 | You will then want to make a new annotation with the new key: 182 | 183 | ```sh 184 | kubectl annotate service hairpin-service "kube-router.io/service.hairpin=" 185 | ``` 186 | 187 | Once all new annotations are created, proceed with the 188 | [General Guidelines](#general-guidelines). After the upgrades tested and 189 | complete, you can delete the old annotations. 190 | 191 | ```sh 192 | kubectl annotate service hairpin-service "kube-router.io/hairpin-mode-" 193 | ``` 194 | -------------------------------------------------------------------------------- /pkg/bgp/id.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "hash/fnv" 8 | "net" 9 | "strconv" 10 | "strings" 11 | 12 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 13 | gobgp "github.com/osrg/gobgp/v3/pkg/packet/bgp" 14 | ) 15 | 16 | const ( 17 | CommunityMaxSize = 32 18 | CommunityMaxPartSize = 16 19 | ) 20 | 21 | // GenerateRouterID will generate a router ID based upon the user's configuration (or lack there of) and the node's 22 | // primary IP address if the user has not specified. If the user has configured the router ID as "generate" then we 23 | // will generate a router ID based upon fnv hashing the node's primary IP address. 24 | func GenerateRouterID(nodeIPAware utils.NodeIPAware, configRouterID string) (string, error) { 25 | switch { 26 | case configRouterID == "generate": 27 | h := fnv.New32a() 28 | h.Write(nodeIPAware.GetPrimaryNodeIP()) 29 | hs := h.Sum32() 30 | gip := make(net.IP, 4) 31 | binary.BigEndian.PutUint32(gip, hs) 32 | return gip.String(), nil 33 | case configRouterID != "": 34 | return configRouterID, nil 35 | } 36 | 37 | if nodeIPAware.GetPrimaryNodeIP().To4() == nil { 38 | return "", errors.New("router-id must be specified when primary node IP is an IPv6 address") 39 | } 40 | return nodeIPAware.GetPrimaryNodeIP().String(), nil 41 | } 42 | 43 | // ValidateCommunity takes in a string and attempts to parse a BGP community out of it in a way that is similar to 44 | // gobgp (internal/pkg/table/policy.go:ParseCommunity()). If it is not able to parse the community information it 45 | // returns an error. 46 | func ValidateCommunity(arg string) error { 47 | _, err := strconv.ParseUint(arg, 10, CommunityMaxSize) 48 | if err == nil { 49 | return nil 50 | } 51 | 52 | elem1, elem2, found := strings.Cut(arg, ":") 53 | if found { 54 | if _, err := strconv.ParseUint(elem1, 10, CommunityMaxPartSize); err == nil { 55 | if _, err = strconv.ParseUint(elem2, 10, CommunityMaxPartSize); err == nil { 56 | return nil 57 | } 58 | } 59 | } 60 | for _, v := range gobgp.WellKnownCommunityNameMap { 61 | if arg == v { 62 | return nil 63 | } 64 | } 65 | return fmt.Errorf("failed to parse %s as community", arg) 66 | } 67 | -------------------------------------------------------------------------------- /pkg/bgp/id_test.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_ValidateCommunity(t *testing.T) { 10 | t.Run("BGP community specified as a 32-bit integer should pass validation", func(t *testing.T) { 11 | assert.Nil(t, ValidateCommunity("4294967041")) 12 | assert.Nil(t, ValidateCommunity("4294967295")) 13 | }) 14 | t.Run("BGP community specified as 2 16-bit integers should pass validation", func(t *testing.T) { 15 | assert.Nil(t, ValidateCommunity("65535:65281")) 16 | assert.Nil(t, ValidateCommunity("65535:65535")) 17 | }) 18 | t.Run("Well known BGP communities passed as a string should pass validation", func(t *testing.T) { 19 | assert.Nil(t, ValidateCommunity("no-export")) 20 | assert.Nil(t, ValidateCommunity("internet")) 21 | assert.Nil(t, ValidateCommunity("planned-shut")) 22 | assert.Nil(t, ValidateCommunity("accept-own")) 23 | assert.Nil(t, ValidateCommunity("blackhole")) 24 | assert.Nil(t, ValidateCommunity("no-advertise")) 25 | assert.Nil(t, ValidateCommunity("no-peer")) 26 | }) 27 | t.Run("BGP community that is greater than 32-bit integer should fail validation", func(t *testing.T) { 28 | assert.Error(t, ValidateCommunity("4294967296")) 29 | }) 30 | t.Run("BGP community that is greater than 2 16-bit integers should fail validation", func(t *testing.T) { 31 | assert.Error(t, ValidateCommunity("65536:65535")) 32 | assert.Error(t, ValidateCommunity("65535:65536")) 33 | assert.Error(t, ValidateCommunity("65536:65536")) 34 | }) 35 | t.Run("BGP community that is not a number should fail validation", func(t *testing.T) { 36 | assert.Error(t, ValidateCommunity("0xFFFFFFFF")) 37 | assert.Error(t, ValidateCommunity("community")) 38 | }) 39 | } 40 | -------------------------------------------------------------------------------- /pkg/bgp/parse.go: -------------------------------------------------------------------------------- 1 | package bgp 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | gobgpapi "github.com/osrg/gobgp/v3/api" 8 | "github.com/vishvananda/netlink" 9 | ) 10 | 11 | // ParseNextHop takes in a GoBGP Path and parses out the destination's next hop from its attributes. If it 12 | // can't parse a next hop IP from the GoBGP Path, it returns an error. 13 | func ParseNextHop(path *gobgpapi.Path) (net.IP, error) { 14 | for _, pAttr := range path.GetPattrs() { 15 | unmarshalNew, err := pAttr.UnmarshalNew() 16 | if err != nil { 17 | return nil, fmt.Errorf("failed to unmarshal path attribute: %s", err) 18 | } 19 | switch t := unmarshalNew.(type) { 20 | case *gobgpapi.NextHopAttribute: 21 | // This is the primary way that we receive NextHops and happens when both the client and the server exchange 22 | // next hops on the same IP family that they negotiated BGP on 23 | nextHopIP := net.ParseIP(t.NextHop) 24 | if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { 25 | return nextHopIP, nil 26 | } 27 | return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHop) 28 | case *gobgpapi.MpReachNLRIAttribute: 29 | // in the case where the server and the client are exchanging next-hops that don't relate to their primary 30 | // IP family, we get MpReachNLRIAttribute instead of NextHopAttributes 31 | // TODO: here we only take the first next hop, at some point in the future it would probably be best to 32 | // consider multiple next hops 33 | nextHopIP := net.ParseIP(t.NextHops[0]) 34 | if nextHopIP != nil && (nextHopIP.To4() != nil || nextHopIP.To16() != nil) { 35 | return nextHopIP, nil 36 | } 37 | return nil, fmt.Errorf("invalid nextHop address: %s", t.NextHops[0]) 38 | } 39 | } 40 | return nil, fmt.Errorf("could not parse next hop received from GoBGP for path: %s", path) 41 | } 42 | 43 | // ParsePath takes in a GoBGP Path and parses out the destination subnet and the next hop from its attributes. 44 | // If successful, it will return the destination of the BGP path as a subnet form and the next hop. If it 45 | // can't parse the destination or the next hop IP, it returns an error. 46 | func ParsePath(path *gobgpapi.Path) (*net.IPNet, net.IP, error) { 47 | nextHop, err := ParseNextHop(path) 48 | if err != nil { 49 | return nil, nil, err 50 | } 51 | 52 | nlri := path.GetNlri() 53 | var prefix gobgpapi.IPAddressPrefix 54 | err = nlri.UnmarshalTo(&prefix) 55 | if err != nil { 56 | return nil, nil, fmt.Errorf("invalid nlri in advertised path") 57 | } 58 | dstSubnet, err := netlink.ParseIPNet(prefix.Prefix + "/" + fmt.Sprint(prefix.PrefixLen)) 59 | if err != nil { 60 | return nil, nil, fmt.Errorf("couldn't parse IP subnet from nlri advertised path") 61 | } 62 | return dstSubnet, nextHop, nil 63 | } 64 | -------------------------------------------------------------------------------- /pkg/controllers/controllers.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | -------------------------------------------------------------------------------- /pkg/controllers/controllers_suite_test.go: -------------------------------------------------------------------------------- 1 | package controllers_test 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/onsi/ginkgo" 7 | . "github.com/onsi/gomega" 8 | ) 9 | 10 | func TestControllers(t *testing.T) { 11 | RegisterFailHandler(Fail) 12 | RunSpecs(t, "Controllers Suite") 13 | } 14 | -------------------------------------------------------------------------------- /pkg/controllers/netpol/namespace.go: -------------------------------------------------------------------------------- 1 | package netpol 2 | 3 | import ( 4 | "reflect" 5 | 6 | api "k8s.io/api/core/v1" 7 | "k8s.io/client-go/tools/cache" 8 | "k8s.io/klog/v2" 9 | ) 10 | 11 | func (npc *NetworkPolicyController) newNamespaceEventHandler() cache.ResourceEventHandler { 12 | return cache.ResourceEventHandlerFuncs{ 13 | AddFunc: func(obj interface{}) { 14 | npc.handleNamespaceAdd(obj.(*api.Namespace)) 15 | }, 16 | UpdateFunc: func(oldObj, newObj interface{}) { 17 | npc.handleNamespaceUpdate(oldObj.(*api.Namespace), newObj.(*api.Namespace)) 18 | }, 19 | DeleteFunc: func(obj interface{}) { 20 | switch obj := obj.(type) { 21 | case *api.Namespace: 22 | npc.handleNamespaceDelete(obj) 23 | return 24 | case cache.DeletedFinalStateUnknown: 25 | if namespace, ok := obj.Obj.(*api.Namespace); ok { 26 | npc.handleNamespaceDelete(namespace) 27 | return 28 | } 29 | default: 30 | klog.Errorf("unexpected object type: %v", obj) 31 | } 32 | }, 33 | } 34 | } 35 | 36 | func (npc *NetworkPolicyController) handleNamespaceAdd(obj *api.Namespace) { 37 | if obj.Labels == nil { 38 | return 39 | } 40 | klog.V(2).Infof("Received update for namespace: %s", obj.Name) 41 | 42 | npc.RequestFullSync() 43 | } 44 | 45 | func (npc *NetworkPolicyController) handleNamespaceUpdate(oldObj, newObj *api.Namespace) { 46 | if reflect.DeepEqual(oldObj.Labels, newObj.Labels) { 47 | return 48 | } 49 | klog.V(2).Infof("Received update for namespace: %s", newObj.Name) 50 | 51 | npc.RequestFullSync() 52 | } 53 | 54 | func (npc *NetworkPolicyController) handleNamespaceDelete(obj *api.Namespace) { 55 | if obj.Labels == nil { 56 | return 57 | } 58 | klog.V(2).Infof("Received namespace: %s delete event", obj.Name) 59 | 60 | npc.RequestFullSync() 61 | } 62 | -------------------------------------------------------------------------------- /pkg/controllers/netpol/policy_test.go: -------------------------------------------------------------------------------- 1 | package netpol 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | v1 "k8s.io/api/core/v1" 9 | ) 10 | 11 | func testNamePrefix(t *testing.T, testString string, isIPv6 bool) { 12 | if isIPv6 { 13 | assert.Truef(t, strings.HasPrefix(testString, "inet6:"), "%s is IPv6 and should begin with inet6:", testString) 14 | } 15 | } 16 | 17 | func Test_policySourcePodIPSetName(t *testing.T) { 18 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 19 | setName := policySourcePodIPSetName("foo", "bar", v1.IPv4Protocol) 20 | testNamePrefix(t, setName, false) 21 | setName = policySourcePodIPSetName("foo", "bar", v1.IPv6Protocol) 22 | testNamePrefix(t, setName, true) 23 | }) 24 | } 25 | 26 | func Test_policyDestinationPodIPSetName(t *testing.T) { 27 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 28 | setName := policyDestinationPodIPSetName("foo", "bar", v1.IPv4Protocol) 29 | testNamePrefix(t, setName, false) 30 | setName = policyDestinationPodIPSetName("foo", "bar", v1.IPv6Protocol) 31 | testNamePrefix(t, setName, true) 32 | }) 33 | } 34 | 35 | func Test_policyIndexedSourcePodIPSetName(t *testing.T) { 36 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 37 | setName := policyIndexedSourcePodIPSetName("foo", "bar", 1, v1.IPv4Protocol) 38 | testNamePrefix(t, setName, false) 39 | setName = policyIndexedSourcePodIPSetName("foo", "bar", 1, v1.IPv6Protocol) 40 | testNamePrefix(t, setName, true) 41 | }) 42 | } 43 | 44 | func Test_policyIndexedDestinationPodIPSetName(t *testing.T) { 45 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 46 | setName := policyIndexedDestinationPodIPSetName("foo", "bar", 1, v1.IPv4Protocol) 47 | testNamePrefix(t, setName, false) 48 | setName = policyIndexedDestinationPodIPSetName("foo", "bar", 1, v1.IPv6Protocol) 49 | testNamePrefix(t, setName, true) 50 | }) 51 | } 52 | 53 | func Test_policyIndexedSourceIPBlockIPSetName(t *testing.T) { 54 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 55 | setName := policyIndexedSourceIPBlockIPSetName("foo", "bar", 1, v1.IPv4Protocol) 56 | testNamePrefix(t, setName, false) 57 | setName = policyIndexedSourceIPBlockIPSetName("foo", "bar", 1, v1.IPv6Protocol) 58 | testNamePrefix(t, setName, true) 59 | }) 60 | } 61 | 62 | func Test_policyIndexedDestinationIPBlockIPSetName(t *testing.T) { 63 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 64 | setName := policyIndexedDestinationIPBlockIPSetName("foo", "bar", 1, v1.IPv4Protocol) 65 | testNamePrefix(t, setName, false) 66 | setName = policyIndexedDestinationIPBlockIPSetName("foo", "bar", 1, v1.IPv6Protocol) 67 | testNamePrefix(t, setName, true) 68 | }) 69 | } 70 | 71 | func Test_policyIndexedIngressNamedPortIPSetName(t *testing.T) { 72 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 73 | setName := policyIndexedIngressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv4Protocol) 74 | testNamePrefix(t, setName, false) 75 | setName = policyIndexedIngressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv6Protocol) 76 | testNamePrefix(t, setName, true) 77 | }) 78 | } 79 | 80 | func Test_policyIndexedEgressNamedPortIPSetName(t *testing.T) { 81 | t.Run("Check IPv4 and IPv6 names are correct", func(t *testing.T) { 82 | setName := policyIndexedEgressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv4Protocol) 83 | testNamePrefix(t, setName, false) 84 | setName = policyIndexedEgressNamedPortIPSetName("foo", "bar", 1, 1, v1.IPv6Protocol) 85 | testNamePrefix(t, setName, true) 86 | }) 87 | } 88 | -------------------------------------------------------------------------------- /pkg/controllers/netpol/utils_test.go: -------------------------------------------------------------------------------- 1 | package netpol 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | api "k8s.io/api/core/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | ) 10 | 11 | var ( 12 | fakePod = api.Pod{ 13 | TypeMeta: metav1.TypeMeta{ 14 | Kind: "Pod", 15 | APIVersion: "v1", 16 | }, 17 | ObjectMeta: metav1.ObjectMeta{ 18 | Name: "testpod", 19 | Namespace: "testnamespace", 20 | Labels: map[string]string{"foo": "bar"}}, 21 | Spec: api.PodSpec{ 22 | Containers: []api.Container{ 23 | { 24 | Image: "k8s.gcr.io/busybox", 25 | }, 26 | }, 27 | }, 28 | Status: api.PodStatus{ 29 | PodIP: "172.16.0.1", 30 | PodIPs: []api.PodIP{ 31 | { 32 | IP: "172.16.0.1", 33 | }, 34 | }, 35 | HostIP: "10.0.0.1", 36 | Phase: api.PodRunning, 37 | }, 38 | } 39 | ) 40 | 41 | func Test_isPodUpdateNetPolRelevant(t *testing.T) { 42 | t.Run("Pod phase change should be detected as NetworkPolicy relevant", func(t *testing.T) { 43 | newPod := fakePod.DeepCopy() 44 | newPod.Status.Phase = api.PodFailed 45 | assert.True(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 46 | }) 47 | t.Run("Pod IP change should be detected as NetworkPolicy relevant", func(t *testing.T) { 48 | newPod := fakePod.DeepCopy() 49 | newPod.Status.PodIP = "172.16.0.2" 50 | assert.True(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 51 | }) 52 | t.Run("Pod IPs change should be detected as NetworkPolicy relevant", func(t *testing.T) { 53 | newPod := fakePod.DeepCopy() 54 | newPod.Status.PodIPs = []api.PodIP{{IP: "172.16.0.2"}} 55 | assert.True(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 56 | }) 57 | t.Run("Pod Label change should be detected as NetworkPolicy relevant", func(t *testing.T) { 58 | newPod := fakePod.DeepCopy() 59 | newPod.Labels = map[string]string{"bar": "foo"} 60 | assert.True(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 61 | }) 62 | t.Run("Pod Host IP change should be detected as NetworkPolicy relevant", func(t *testing.T) { 63 | newPod := fakePod.DeepCopy() 64 | newPod.Status.HostIP = "10.0.0.2" 65 | assert.True(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 66 | }) 67 | t.Run("Pod Image change should NOT be detected as NetworkPolicy relevant", func(t *testing.T) { 68 | newPod := fakePod.DeepCopy() 69 | newPod.Spec.Containers[0].Image = "k8s.gcr.io/otherimage" 70 | assert.False(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 71 | }) 72 | t.Run("Pod Name change should NOT be detected as NetworkPolicy relevant", func(t *testing.T) { 73 | newPod := fakePod.DeepCopy() 74 | newPod.Name = "otherpod" 75 | assert.False(t, isPodUpdateNetPolRelevant(&fakePod, newPod)) 76 | }) 77 | } 78 | 79 | func Test_isPodFinished(t *testing.T) { 80 | t.Run("Failed pod should be detected as finished", func(t *testing.T) { 81 | fakePod.Status.Phase = api.PodFailed 82 | assert.True(t, isFinished(&fakePod)) 83 | }) 84 | t.Run("Succeeded pod should be detected as finished", func(t *testing.T) { 85 | fakePod.Status.Phase = api.PodSucceeded 86 | assert.True(t, isFinished(&fakePod)) 87 | }) 88 | t.Run("Completed pod should be detected as finished", func(t *testing.T) { 89 | fakePod.Status.Phase = PodCompleted 90 | assert.True(t, isFinished(&fakePod)) 91 | }) 92 | t.Run("Running pod should NOT be detected as finished", func(t *testing.T) { 93 | fakePod.Status.Phase = api.PodRunning 94 | assert.False(t, isFinished(&fakePod)) 95 | }) 96 | t.Run("Pending pod should NOT be detected as finished", func(t *testing.T) { 97 | fakePod.Status.Phase = api.PodPending 98 | assert.False(t, isFinished(&fakePod)) 99 | }) 100 | t.Run("Unknown pod should NOT be detected as finished", func(t *testing.T) { 101 | fakePod.Status.Phase = api.PodUnknown 102 | assert.False(t, isFinished(&fakePod)) 103 | }) 104 | } 105 | 106 | func Test_isNetPolActionable(t *testing.T) { 107 | t.Run("Normal pod should be actionable", func(t *testing.T) { 108 | assert.True(t, isNetPolActionable(&fakePod)) 109 | }) 110 | t.Run("Pod without Pod IP should not be actionable", func(t *testing.T) { 111 | fakePod.Status.PodIP = "" 112 | assert.False(t, isNetPolActionable(&fakePod)) 113 | }) 114 | t.Run("Finished Pod should not be actionable", func(t *testing.T) { 115 | fakePod.Status.Phase = api.PodFailed 116 | assert.False(t, isNetPolActionable(&fakePod)) 117 | fakePod.Status.Phase = api.PodSucceeded 118 | assert.False(t, isNetPolActionable(&fakePod)) 119 | fakePod.Status.Phase = PodCompleted 120 | assert.False(t, isNetPolActionable(&fakePod)) 121 | }) 122 | t.Run("Host Networked Pod should not be actionable", func(t *testing.T) { 123 | fakePod.Spec.HostNetwork = true 124 | assert.False(t, isNetPolActionable(&fakePod)) 125 | }) 126 | } 127 | 128 | func Test_NewNetworkPolicyController(t *testing.T) { 129 | t.Run("Node Port range specified with a hyphen should pass validation", func(t *testing.T) { 130 | portRange, err := validateNodePortRange("1000-2000") 131 | assert.Nil(t, err) 132 | assert.NotEmpty(t, portRange) 133 | }) 134 | t.Run("Node Port range specified with a colon should pass validation", func(t *testing.T) { 135 | portRange, err := validateNodePortRange("1000:2000") 136 | assert.Nil(t, err) 137 | assert.NotEmpty(t, portRange) 138 | }) 139 | t.Run("Node Port range specified with a high port range should work", func(t *testing.T) { 140 | portRange, err := validateNodePortRange("40000:42767") 141 | assert.Nil(t, err) 142 | assert.NotEmpty(t, portRange) 143 | portRange, err = validateNodePortRange("50000:65535") 144 | assert.Nil(t, err) 145 | assert.NotEmpty(t, portRange) 146 | }) 147 | t.Run("Node Port range specified with a higher start number should fail validation", func(t *testing.T) { 148 | portRange, err := validateNodePortRange("2000:1000") 149 | assert.Error(t, err) 150 | assert.Empty(t, portRange) 151 | }) 152 | t.Run("Node Port range specified with same start and end port should fail validation", func(t *testing.T) { 153 | portRange, err := validateNodePortRange("2000:2000") 154 | assert.Error(t, err) 155 | assert.Empty(t, portRange) 156 | }) 157 | t.Run("Node Port range specified with a port number higher than 16-bits unsigned should fail validation", func(t *testing.T) { 158 | portRange, err := validateNodePortRange("65535:65537") 159 | assert.Error(t, err) 160 | assert.Empty(t, portRange) 161 | }) 162 | } 163 | -------------------------------------------------------------------------------- /pkg/controllers/proxy/hairpin_controller.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "os" 7 | "path" 8 | "runtime" 9 | "sync" 10 | "time" 11 | 12 | "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" 13 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 14 | "github.com/vishvananda/netns" 15 | "k8s.io/klog/v2" 16 | ) 17 | 18 | // !!!! IMPORTANT !!!! - This code is not currently used 19 | // Not creating the hairpin controller for now because this should be handled at the CNI level. The CNI bridge 20 | // plugin ensures that hairpin mode is set much more reliably than we do. However, as a lot of work was put into 21 | // the hairpin controller, and so that it is around to reference in the future if needed, I'm leaving the code 22 | // for now. 23 | 24 | type hairpinController struct { 25 | epC <-chan string 26 | nsc *NetworkServicesController 27 | } 28 | 29 | func (hpc *hairpinController) Run(stopCh <-chan struct{}, wg *sync.WaitGroup, 30 | healthChan chan<- *healthcheck.ControllerHeartbeat) { 31 | defer wg.Done() 32 | klog.Infof("Starting hairping controller (handles setting hairpin_mode for veth interfaces)") 33 | 34 | t := time.NewTicker(healthcheck.HPCSyncPeriod) 35 | defer t.Stop() 36 | for { 37 | // Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first 38 | select { 39 | case <-stopCh: 40 | klog.Info("Shutting down Hairpin Controller goroutine") 41 | return 42 | default: 43 | } 44 | select { 45 | case <-stopCh: 46 | klog.Info("Shutting down Hairpin Controller goroutine") 47 | return 48 | case endpointIP := <-hpc.epC: 49 | klog.V(1).Infof("Received request for hairpin setup of endpoint %s, processing", endpointIP) 50 | err := hpc.ensureHairpinEnabledForPodInterface(endpointIP) 51 | if err != nil { 52 | klog.Errorf("unable to set hairpin mode for endpoint %s, its possible that hairpinning will not "+ 53 | "work as expected. Error was: %v", 54 | endpointIP, err) 55 | } 56 | case <-t.C: 57 | healthcheck.SendHeartBeat(healthChan, healthcheck.HairpinController) 58 | } 59 | } 60 | } 61 | 62 | func (hpc *hairpinController) ensureHairpinEnabledForPodInterface(endpointIP string) error { 63 | klog.V(2).Infof("Attempting to enable hairpin mode for endpoint IP %s", endpointIP) 64 | crRuntime, containerID, err := hpc.nsc.findContainerRuntimeReferences(endpointIP) 65 | if err != nil { 66 | return err 67 | } 68 | klog.V(2).Infof("Detected runtime %s and container ID %s for endpoint IP %s", crRuntime, containerID, endpointIP) 69 | 70 | runtime.LockOSThread() 71 | defer runtime.UnlockOSThread() 72 | 73 | hostNetworkNSHandle, err := netns.Get() 74 | if err != nil { 75 | return fmt.Errorf("failed to get namespace due to %v", err) 76 | } 77 | defer utils.CloseCloserDisregardError(&hostNetworkNSHandle) 78 | 79 | var pid int 80 | if crRuntime == "docker" { 81 | // WARN: This method is deprecated and will be removed once docker-shim is removed from kubelet. 82 | pid, err = hpc.nsc.ln.getContainerPidWithDocker(containerID) 83 | if err != nil { 84 | return fmt.Errorf("failed to get pod's (%s) pid for hairpinning due to %v", endpointIP, err) 85 | } 86 | } else { 87 | // We expect CRI compliant runtimes here 88 | // ugly workaround, refactoring of pkg/Proxy is required 89 | pid, err = hpc.nsc.ln.getContainerPidWithCRI(hpc.nsc.dsr.runtimeEndpoint, containerID) 90 | if err != nil { 91 | return fmt.Errorf("failed to get pod's (%s) pid for hairpinning due to %v", endpointIP, err) 92 | } 93 | } 94 | klog.V(2).Infof("Found PID %d for endpoint IP %s", pid, endpointIP) 95 | 96 | // Get the interface link ID from inside the container so that we can link it to the veth on the host namespace 97 | ifaceID, err := hpc.nsc.ln.findIfaceLinkForPid(pid) 98 | if err != nil { 99 | return fmt.Errorf("failed to find the interface ID inside the container NS for endpoint IP: %s, due to: %v", 100 | endpointIP, err) 101 | } 102 | klog.V(2).Infof("Found Interface Link ID %d for endpoint IP %s", ifaceID, endpointIP) 103 | 104 | ifaceName, err := net.InterfaceByIndex(ifaceID) 105 | if err != nil { 106 | return fmt.Errorf("failed to get the interface name from the link ID inside the container for endpoint IP: "+ 107 | "%s and Interface ID: %d due to: %v", endpointIP, ifaceID, err) 108 | } 109 | 110 | klog.V(1).Infof("Enabling hairpin for interface %s for endpoint IP %s", ifaceName.Name, endpointIP) 111 | hpPath := path.Join(sysFSVirtualNetPath, ifaceName.Name, sysFSHairpinRelPath) 112 | if _, err := os.Stat(hpPath); err != nil { 113 | return fmt.Errorf("hairpin path %s doesn't appear to exist for us to set", hpPath) 114 | } 115 | 116 | return os.WriteFile(hpPath, []byte(hairpinEnable), 0644) 117 | } 118 | 119 | func NewHairpinController(nsc *NetworkServicesController, endpointCh <-chan string) *hairpinController { 120 | hpc := hairpinController{ 121 | nsc: nsc, 122 | epC: endpointCh, 123 | } 124 | 125 | return &hpc 126 | } 127 | -------------------------------------------------------------------------------- /pkg/controllers/proxy/nodeport_healthcheck.go: -------------------------------------------------------------------------------- 1 | package proxy 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "strconv" 8 | "sync" 9 | "time" 10 | 11 | "k8s.io/klog/v2" 12 | ) 13 | 14 | type nodePortHealthCheckController struct { 15 | nphcServicesInfo 16 | activeNPHC map[int](chan<- struct{}) 17 | wg *sync.WaitGroup 18 | stopCh chan struct{} 19 | } 20 | 21 | type serviceHealthCheck struct { 22 | serviceID string 23 | nodePort int 24 | } 25 | 26 | type nphcServicesInfo struct { 27 | serviceInfoMap serviceInfoMap 28 | endpointsInfoMap endpointSliceInfoMap 29 | } 30 | 31 | type nphcHandler struct { 32 | svcHC *serviceHealthCheck 33 | nphc *nodePortHealthCheckController 34 | } 35 | 36 | func (nphc *nodePortHealthCheckController) UpdateServicesInfo(serviceInfoMap serviceInfoMap, 37 | endpointsInfoMap endpointSliceInfoMap) error { 38 | klog.V(1).Info("Running UpdateServicesInfo for NodePort health check") 39 | nphc.serviceInfoMap = serviceInfoMap 40 | nphc.endpointsInfoMap = endpointsInfoMap 41 | 42 | newActiveServices := make(map[int]bool) 43 | 44 | for svcID, svc := range serviceInfoMap { 45 | if svc.healthCheckNodePort != 0 { 46 | newActiveServices[svc.healthCheckNodePort] = true 47 | svcHC := serviceHealthCheck{ 48 | serviceID: svcID, 49 | nodePort: svc.healthCheckNodePort, 50 | } 51 | if nphc.healthCheckExists(svcHC) { 52 | continue 53 | } 54 | err := nphc.addHealthCheck(svcHC) 55 | if err != nil { 56 | return err 57 | } 58 | } 59 | } 60 | 61 | for np := range nphc.activeNPHC { 62 | if !newActiveServices[np] { 63 | err := nphc.stopHealthCheck(np) 64 | if err != nil { 65 | klog.Errorf("error stopping the NodePort healthcheck on NodePort %d: %v", np, err) 66 | } 67 | } 68 | } 69 | 70 | klog.V(1).Info("Finished UpdateServicesInfo for NodePort health check") 71 | return nil 72 | } 73 | 74 | func (nphc *nodePortHealthCheckController) healthCheckExists(svcHC serviceHealthCheck) bool { 75 | if _, ok := nphc.activeNPHC[svcHC.nodePort]; ok { 76 | return true 77 | } 78 | return false 79 | } 80 | 81 | func (nphc *nodePortHealthCheckController) addHealthCheck(svcHC serviceHealthCheck) error { 82 | klog.V(1).Infof("Adding NodePort health check for port: %d with svcid: %s", svcHC.nodePort, svcHC.serviceID) 83 | if nphc.healthCheckExists(svcHC) { 84 | return fmt.Errorf("unable to add healthcheck for NodePort %d as it is already taken", svcHC.nodePort) 85 | } 86 | closingChan := make(chan struct{}) 87 | nphc.activeNPHC[svcHC.nodePort] = closingChan 88 | 89 | nphc.wg.Add(1) 90 | go func(nphc *nodePortHealthCheckController, svcHC serviceHealthCheck, closingChan <-chan struct{}) { 91 | defer nphc.wg.Done() 92 | mux := http.NewServeMux() 93 | srv := &http.Server{ 94 | Addr: ":" + strconv.Itoa(svcHC.nodePort), 95 | Handler: mux, 96 | ReadHeaderTimeout: 5 * time.Second, 97 | } 98 | 99 | npHandler := nphcHandler{ 100 | svcHC: &svcHC, 101 | nphc: nphc, 102 | } 103 | mux.HandleFunc("/healthz", npHandler.Handler) 104 | 105 | nphc.wg.Add(1) 106 | go func(svcHC serviceHealthCheck) { 107 | defer nphc.wg.Done() 108 | klog.Infof("starting NodePort health controller on NodePort: %d", svcHC.nodePort) 109 | if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { 110 | // cannot panic, because this probably is an intentional close 111 | klog.Errorf("could not start NodePort health controller on NodePort %d: %s", svcHC.nodePort, err) 112 | } 113 | }(svcHC) 114 | 115 | // block until we receive a shut down signal on either our private channel or the global channel 116 | select { 117 | case <-closingChan: 118 | case <-nphc.stopCh: 119 | } 120 | klog.Infof("shutting down NodePort health controller on NodePort: %d", svcHC.nodePort) 121 | if err := srv.Shutdown(context.Background()); err != nil { 122 | klog.Errorf("could not shutdown NodePort health controller on NodePort %d: %v", svcHC.nodePort, err) 123 | } 124 | 125 | }(nphc, svcHC, closingChan) 126 | 127 | return nil 128 | } 129 | 130 | func (nphc *nodePortHealthCheckController) stopHealthCheck(nodePort int) error { 131 | if _, ok := nphc.activeNPHC[nodePort]; !ok { 132 | return fmt.Errorf("no NodePort health check currently exists for NodePort: %d", nodePort) 133 | } 134 | 135 | svcStopCh := nphc.activeNPHC[nodePort] 136 | close(svcStopCh) 137 | 138 | delete(nphc.activeNPHC, nodePort) 139 | 140 | return nil 141 | } 142 | 143 | func (npHandler *nphcHandler) Handler(w http.ResponseWriter, r *http.Request) { 144 | eps := npHandler.nphc.endpointsInfoMap[npHandler.svcHC.serviceID] 145 | endpointsOnNode := hasActiveEndpoints(eps) 146 | 147 | var numActiveEndpoints int8 148 | for _, endpoint := range eps { 149 | if endpoint.isLocal && !endpoint.isTerminating { 150 | numActiveEndpoints++ 151 | } 152 | } 153 | 154 | if endpointsOnNode && numActiveEndpoints > 0 { 155 | w.WriteHeader(http.StatusOK) 156 | _, err := fmt.Fprintf(w, "%d Service Endpoints found\n", numActiveEndpoints) 157 | if err != nil { 158 | klog.Errorf("failed to write body: %s", err) 159 | } 160 | } else { 161 | w.WriteHeader(http.StatusServiceUnavailable) 162 | _, err := w.Write([]byte("No Service Endpoints Found\n")) 163 | if err != nil { 164 | klog.Errorf("Failed to write body: %s", err) 165 | } 166 | } 167 | } 168 | 169 | func (nphc *nodePortHealthCheckController) StopAll() { 170 | klog.Info("Stopping all NodePort health checks") 171 | close(nphc.stopCh) 172 | klog.Info("Waiting for all NodePort health checks to finish shutting down") 173 | nphc.wg.Wait() 174 | klog.Info("All NodePort health checks are completely shut down, all done!") 175 | } 176 | 177 | func NewNodePortHealthCheck() *nodePortHealthCheckController { 178 | nphc := nodePortHealthCheckController{ 179 | activeNPHC: make(map[int]chan<- struct{}), 180 | wg: &sync.WaitGroup{}, 181 | stopCh: make(chan struct{}), 182 | } 183 | 184 | return &nphc 185 | } 186 | -------------------------------------------------------------------------------- /pkg/controllers/routing/aws.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "net/url" 7 | "strings" 8 | "time" 9 | 10 | "github.com/aws/aws-sdk-go-v2/aws" 11 | "github.com/aws/aws-sdk-go-v2/config" 12 | "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" 13 | "github.com/aws/aws-sdk-go-v2/service/ec2" 14 | "github.com/aws/aws-sdk-go-v2/service/ec2/types" 15 | "github.com/aws/smithy-go" 16 | "k8s.io/klog/v2" 17 | 18 | v1core "k8s.io/api/core/v1" 19 | ) 20 | 21 | const ( 22 | awsThrottlingRequestDelay = 1000 * time.Millisecond 23 | awsMaxRetries = 5 24 | ) 25 | 26 | // disableSourceDestinationCheck disables src-dst check of all the VM's when cluster 27 | // is provisioned on AWS. EC2 by default drops any packets originating or destination 28 | // to a VM with IP other than that of VM's ip. This check needs to be disabled so that 29 | // cross node pod-to-pod traffic can be sent and received by a VM. 30 | func (nrc *NetworkRoutingController) disableSourceDestinationCheck() { 31 | nodes := nrc.nodeLister.List() 32 | 33 | for _, obj := range nodes { 34 | node := obj.(*v1core.Node) 35 | if node.Spec.ProviderID == "" || !strings.HasPrefix(node.Spec.ProviderID, "aws") { 36 | return 37 | } 38 | providerID := strings.Replace(node.Spec.ProviderID, "///", "//", 1) 39 | URL, err := url.Parse(providerID) 40 | if err != nil { 41 | klog.Errorf("failed to parse URL for providerID %s: %v", providerID, err) 42 | return 43 | } 44 | instanceID := URL.Path 45 | instanceID = strings.Trim(instanceID, "/") 46 | 47 | cfg, _ := config.LoadDefaultConfig(context.TODO(), 48 | config.WithRetryMaxAttempts(awsMaxRetries)) 49 | metadataClient := imds.NewFromConfig(cfg) 50 | region, err := metadataClient.GetRegion(context.TODO(), &imds.GetRegionInput{}) 51 | if err != nil { 52 | klog.Errorf("failed to disable source destination check due to: %v", err) 53 | return 54 | } 55 | cfg.Region = region.Region 56 | ec2Client := ec2.NewFromConfig(cfg) 57 | _, err = ec2Client.ModifyInstanceAttribute(context.TODO(), 58 | &ec2.ModifyInstanceAttributeInput{ 59 | InstanceId: aws.String(instanceID), 60 | SourceDestCheck: &types.AttributeBooleanValue{ 61 | Value: aws.Bool(false), 62 | }, 63 | }, 64 | ) 65 | if err != nil { 66 | var apiErr smithy.APIError 67 | if errors.As(err, &apiErr) { 68 | if apiErr.ErrorCode() == "UnauthorizedOperation" { 69 | nrc.ec2IamAuthorized = false 70 | klog.Errorf("Node does not have necessary IAM creds to modify instance attribute. So skipping "+ 71 | "disabling src-dst check. %v", apiErr.ErrorMessage()) 72 | return 73 | 74 | } 75 | } 76 | klog.Errorf("failed to disable source destination check due to: %v", err) 77 | } else { 78 | klog.Infof("disabled source destination check for the instance: %s", instanceID) 79 | } 80 | 81 | // to prevent EC2 rejecting API call due to API throttling give a delay between the calls 82 | time.Sleep(awsThrottlingRequestDelay) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /pkg/controllers/routing/pod_egress.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | 7 | v1core "k8s.io/api/core/v1" 8 | "k8s.io/klog/v2" 9 | ) 10 | 11 | // set up MASQUERADE rule so that egress traffic from the pods gets masqueraded to node's IP 12 | 13 | var ( 14 | podEgressArgs4 = []string{"-m", "set", "--match-set", podSubnetsIPSetName, "src", 15 | "-m", "set", "!", "--match-set", podSubnetsIPSetName, "dst", 16 | "-m", "set", "!", "--match-set", nodeAddrsIPSetName, "dst", 17 | "-j", "MASQUERADE"} 18 | podEgressArgs6 = []string{"-m", "set", "--match-set", "inet6:" + podSubnetsIPSetName, "src", 19 | "-m", "set", "!", "--match-set", "inet6:" + podSubnetsIPSetName, "dst", 20 | "-m", "set", "!", "--match-set", "inet6:" + nodeAddrsIPSetName, "dst", 21 | "-j", "MASQUERADE"} 22 | podEgressArgsBad4 = [][]string{{"-m", "set", "--match-set", podSubnetsIPSetName, "src", 23 | "-m", "set", "!", "--match-set", podSubnetsIPSetName, "dst", 24 | "-j", "MASQUERADE"}} 25 | podEgressArgsBad6 = [][]string{{"-m", "set", "--match-set", "inet6:" + podSubnetsIPSetName, "src", 26 | "-m", "set", "!", "--match-set", "inet6:" + podSubnetsIPSetName, "dst", 27 | "-j", "MASQUERADE"}} 28 | ) 29 | 30 | func (nrc *NetworkRoutingController) createPodEgressRule() error { 31 | for family, iptablesCmdHandler := range nrc.iptablesCmdHandlers { 32 | podEgressArgs := podEgressArgs4 33 | if family == v1core.IPv6Protocol { 34 | podEgressArgs = podEgressArgs6 35 | } 36 | if iptablesCmdHandler.HasRandomFully() { 37 | podEgressArgs = append(podEgressArgs, "--random-fully") 38 | } 39 | 40 | err := iptablesCmdHandler.AppendUnique("nat", "POSTROUTING", podEgressArgs...) 41 | if err != nil { 42 | return errors.New("Failed to add iptables rule to masquerade outbound traffic from pods: " + 43 | err.Error() + "External connectivity will not work.") 44 | 45 | } 46 | } 47 | 48 | klog.V(1).Infof("Added iptables rule to masquerade outbound traffic from pods.") 49 | return nil 50 | } 51 | 52 | func (nrc *NetworkRoutingController) deletePodEgressRule() error { 53 | for family, iptablesCmdHandler := range nrc.iptablesCmdHandlers { 54 | podEgressArgs := podEgressArgs4 55 | if family == v1core.IPv6Protocol { 56 | podEgressArgs = podEgressArgs6 57 | } 58 | if iptablesCmdHandler.HasRandomFully() { 59 | podEgressArgs = append(podEgressArgs, "--random-fully") 60 | } 61 | 62 | exists, err := iptablesCmdHandler.Exists("nat", "POSTROUTING", podEgressArgs...) 63 | if err != nil { 64 | return errors.New("Failed to lookup iptables rule to masquerade outbound traffic from pods: " + err.Error()) 65 | } 66 | 67 | if exists { 68 | err = iptablesCmdHandler.Delete("nat", "POSTROUTING", podEgressArgs...) 69 | if err != nil { 70 | return errors.New("Failed to delete iptables rule to masquerade outbound traffic from pods: " + 71 | err.Error() + ". Pod egress might still work...") 72 | } 73 | klog.Infof("Deleted iptables rule to masquerade outbound traffic from pods.") 74 | } 75 | } 76 | 77 | return nil 78 | } 79 | 80 | func (nrc *NetworkRoutingController) deleteBadPodEgressRules() error { 81 | for family, iptablesCmdHandler := range nrc.iptablesCmdHandlers { 82 | podEgressArgsBad := podEgressArgsBad4 83 | if family == v1core.IPv6Protocol { 84 | podEgressArgsBad = podEgressArgsBad6 85 | } 86 | 87 | // If random fully is supported remove the original rule as well 88 | if iptablesCmdHandler.HasRandomFully() { 89 | if family == v1core.IPv4Protocol { 90 | podEgressArgsBad = append(podEgressArgsBad, podEgressArgs4) 91 | } else { 92 | podEgressArgsBad = append(podEgressArgsBad, podEgressArgs6) 93 | } 94 | } 95 | 96 | for _, args := range podEgressArgsBad { 97 | exists, err := iptablesCmdHandler.Exists("nat", "POSTROUTING", args...) 98 | if err != nil { 99 | return fmt.Errorf("failed to lookup iptables rule: %s", err.Error()) 100 | } 101 | 102 | if exists { 103 | err = iptablesCmdHandler.Delete("nat", "POSTROUTING", args...) 104 | if err != nil { 105 | return fmt.Errorf("failed to delete old/bad iptables rule to masquerade outbound traffic "+ 106 | "from pods: %s. Pod egress might still work, or bugs may persist after upgrade", err) 107 | } 108 | klog.Infof("Deleted old/bad iptables rule to masquerade outbound traffic from pods.") 109 | } 110 | } 111 | } 112 | 113 | return nil 114 | } 115 | -------------------------------------------------------------------------------- /pkg/controllers/routing/utils.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "encoding/base64" 5 | "fmt" 6 | "net" 7 | "strconv" 8 | "strings" 9 | 10 | gobgpapi "github.com/osrg/gobgp/v3/api" 11 | 12 | v1core "k8s.io/api/core/v1" 13 | "k8s.io/klog/v2" 14 | ) 15 | 16 | // Used for processing Annotations that may contain multiple items 17 | // Pass this the string and the delimiter 18 | // 19 | //nolint:unparam // while delimiter is always "," for now it provides flexibility to leave the function this way 20 | func stringToSlice(s, d string) []string { 21 | ss := make([]string, 0) 22 | if strings.Contains(s, d) { 23 | ss = strings.Split(s, d) 24 | } else { 25 | ss = append(ss, s) 26 | } 27 | return ss 28 | } 29 | 30 | func stringSliceToIPs(s []string) ([]net.IP, error) { 31 | ips := make([]net.IP, 0) 32 | for _, ipString := range s { 33 | ip := net.ParseIP(ipString) 34 | if ip == nil { 35 | return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipString) 36 | } 37 | ips = append(ips, ip) 38 | } 39 | return ips, nil 40 | } 41 | 42 | func stringSliceToIPNets(s []string) ([]net.IPNet, error) { 43 | ipNets := make([]net.IPNet, 0) 44 | for _, ipNetString := range s { 45 | ip, ipNet, err := net.ParseCIDR(strings.TrimSpace(ipNetString)) 46 | if err != nil { 47 | return nil, fmt.Errorf("could not parse \"%s\" as an CIDR", ipNetString) 48 | } 49 | if ip == nil { 50 | return nil, fmt.Errorf("could not parse \"%s\" as an IP", ipNetString) 51 | } 52 | ipNets = append(ipNets, *ipNet) 53 | } 54 | return ipNets, nil 55 | } 56 | 57 | func stringSliceToUInt32(s []string) ([]uint32, error) { 58 | ints := make([]uint32, 0) 59 | for _, intString := range s { 60 | newInt, err := strconv.ParseUint(intString, 0, 32) 61 | if err != nil { 62 | return nil, fmt.Errorf("could not parse \"%s\" as an integer", intString) 63 | } 64 | ints = append(ints, uint32(newInt)) 65 | } 66 | return ints, nil 67 | } 68 | 69 | func stringSliceB64Decode(s []string) ([]string, error) { 70 | ss := make([]string, 0) 71 | for _, b64String := range s { 72 | decoded, err := base64.StdEncoding.DecodeString(b64String) 73 | if err != nil { 74 | return nil, fmt.Errorf("could not parse \"%s\" as a base64 encoded string", 75 | b64String) 76 | } 77 | ss = append(ss, string(decoded)) 78 | } 79 | return ss, nil 80 | } 81 | 82 | func statementsEqualByName(a, b []*gobgpapi.Statement) bool { 83 | // First a is in the outer loop ensuring that all members of a are in b 84 | for _, st1 := range a { 85 | st1Found := false 86 | for _, st2 := range b { 87 | if st1.Name == st2.Name { 88 | st1Found = true 89 | } 90 | } 91 | if !st1Found { 92 | return false 93 | } 94 | } 95 | 96 | // Second b is in the outer loop ensuring that all members of b are in a 97 | for _, st1 := range b { 98 | st1Found := false 99 | for _, st2 := range a { 100 | if st1.Name == st2.Name { 101 | st1Found = true 102 | } 103 | } 104 | if !st1Found { 105 | return false 106 | } 107 | } 108 | 109 | // If we've made it through both loops then we know that the statements arrays are equal 110 | return true 111 | } 112 | 113 | // getPodCIDRsFromAllNodeSources gets the pod CIDRs for all available sources on a given node in a specific order. The 114 | // order of preference is: 115 | // 1. From the kube-router.io/pod-cidr annotation (preserves backwards compatibility) 116 | // 2. From the kube-router.io/pod-cidrs annotation (allows the user to specify multiple CIDRs for a given node which 117 | // seems to be closer aligned to how upstream is moving) 118 | // 3. From the node's spec definition in node.Spec.PodCIDRs 119 | func getPodCIDRsFromAllNodeSources(node *v1core.Node) (podCIDRs []string) { 120 | // Prefer kube-router.io/pod-cidr as a matter of keeping backwards compatibility with previous functionality 121 | podCIDR := node.GetAnnotations()["kube-router.io/pod-cidr"] 122 | if podCIDR != "" { 123 | _, _, err := net.ParseCIDR(podCIDR) 124 | if err != nil { 125 | klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidr annotation, skipping...", podCIDR) 126 | } else { 127 | podCIDRs = append(podCIDRs, podCIDR) 128 | return podCIDRs 129 | } 130 | } 131 | 132 | // Then attempt to find the annotation kube-router.io/pod-cidrs and prefer those second 133 | cidrsAnnotation := node.GetAnnotations()["kube-router.io/pod-cidrs"] 134 | if cidrsAnnotation != "" { 135 | // this should contain comma separated CIDRs, any CIDRs which fail to parse we will emit a warning log for 136 | // and skip it 137 | cidrsAnnotArray := strings.Split(cidrsAnnotation, ",") 138 | for _, cidr := range cidrsAnnotArray { 139 | _, _, err := net.ParseCIDR(cidr) 140 | if err != nil { 141 | klog.Warningf("couldn't parse CIDR %s from kube-router.io/pod-cidrs annotation, skipping...", 142 | cidr) 143 | continue 144 | } 145 | podCIDRs = append(podCIDRs, cidr) 146 | } 147 | return podCIDRs 148 | } 149 | 150 | // Finally, if all else fails, use the PodCIDRs on the node spec 151 | return node.Spec.PodCIDRs 152 | } 153 | 154 | // getBGPRouteInfoForVIP attempt to automatically find the subnet, BGP AFI/SAFI Family, and nexthop for a given VIP 155 | // based upon whether it is an IPv4 address or an IPv6 address. Returns slash notation subnet as uint32 suitable for 156 | // sending to GoBGP and an error if it is unable to determine the subnet automatically 157 | func (nrc *NetworkRoutingController) getBGPRouteInfoForVIP(vip string) (subnet uint32, nh string, 158 | afiFamily gobgpapi.Family_Afi, err error) { 159 | ip := net.ParseIP(vip) 160 | if ip == nil { 161 | err = fmt.Errorf("could not parse VIP: %s", vip) 162 | return 163 | } 164 | if ip.To4() != nil { 165 | subnet = 32 166 | afiFamily = gobgpapi.Family_AFI_IP 167 | nhIP := nrc.krNode.FindBestIPv4NodeAddress() 168 | if nhIP == nil { 169 | err = fmt.Errorf("could not find an IPv4 address on node to set as nexthop for vip: %s", vip) 170 | } 171 | nh = nhIP.String() 172 | return 173 | } 174 | if ip.To16() != nil { 175 | subnet = 128 176 | afiFamily = gobgpapi.Family_AFI_IP6 177 | nhIP := nrc.krNode.FindBestIPv6NodeAddress() 178 | if nhIP == nil { 179 | err = fmt.Errorf("could not find an IPv6 address on node to set as nexthop for vip: %s", vip) 180 | } 181 | nh = nhIP.String() 182 | return 183 | } 184 | err = fmt.Errorf("could not convert IP to IPv4 or IPv6, unable to find subnet for: %s", vip) 185 | return 186 | } 187 | -------------------------------------------------------------------------------- /pkg/controllers/routing/utils_test.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "net" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func Test_stringSliceToIPs(t *testing.T) { 11 | t.Run("When receive an empty slice it returns an empty ip slice", func(t *testing.T) { 12 | ips, err := stringSliceToIPs([]string{}) 13 | assert.Nil(t, err) 14 | assert.Equal(t, []net.IP{}, ips) 15 | }) 16 | t.Run("When receive an ip string slice it returns an ip slice", func(t *testing.T) { 17 | ips, err := stringSliceToIPs([]string{"192.168.0.1", "10.0.0.1"}) 18 | assert.Nil(t, err) 19 | assert.Equal(t, []net.IP{net.ParseIP("192.168.0.1"), net.ParseIP("10.0.0.1")}, ips) 20 | }) 21 | t.Run("When receive an invalid ip string slice it returns an error", func(t *testing.T) { 22 | ips, err := stringSliceToIPs([]string{"500.168.0.1"}) 23 | assert.Equal(t, "could not parse \"500.168.0.1\" as an IP", err.Error()) 24 | assert.Nil(t, ips) 25 | ips, err = stringSliceToIPs([]string{"invalid"}) 26 | assert.Equal(t, "could not parse \"invalid\" as an IP", err.Error()) 27 | assert.Nil(t, ips) 28 | }) 29 | } 30 | 31 | func Test_stringSliceToIPNets(t *testing.T) { 32 | t.Run("When receive an empty slice it returns an empty ip slice", func(t *testing.T) { 33 | ips, err := stringSliceToIPNets([]string{}) 34 | assert.Nil(t, err) 35 | assert.Equal(t, []net.IPNet{}, ips) 36 | }) 37 | t.Run("When receive an ip string slice it returns an ip slice ignoring trailing spaces", func(t *testing.T) { 38 | ips, err := stringSliceToIPNets([]string{" 192.168.0.1/24", "10.0.0.1/16 "}) 39 | assert.Nil(t, err) 40 | _, firstIPNet, _ := net.ParseCIDR("192.168.0.1/24") 41 | _, secondIPNet, _ := net.ParseCIDR("10.0.0.1/16") 42 | assert.Equal(t, []net.IPNet{*firstIPNet, *secondIPNet}, ips) 43 | }) 44 | t.Run("When receive an invalid ip string slice it returns an error", func(t *testing.T) { 45 | ips, err := stringSliceToIPNets([]string{"500.168.0.1/24"}) 46 | assert.Equal(t, "could not parse \"500.168.0.1/24\" as an CIDR", err.Error()) 47 | assert.Nil(t, ips) 48 | ips, err = stringSliceToIPNets([]string{"10.0.0.1/80"}) 49 | assert.Equal(t, "could not parse \"10.0.0.1/80\" as an CIDR", err.Error()) 50 | assert.Nil(t, ips) 51 | }) 52 | } 53 | -------------------------------------------------------------------------------- /pkg/cri/interface.go: -------------------------------------------------------------------------------- 1 | package cri 2 | 3 | // RuntimeService is the client API for RuntimeService service. 4 | type RuntimeService interface { 5 | ContainerInfo(id string) (*containerInfo, error) 6 | Close() error 7 | } 8 | -------------------------------------------------------------------------------- /pkg/cri/remote_runtime.go: -------------------------------------------------------------------------------- 1 | package cri 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | "net" 8 | "strings" 9 | "time" 10 | 11 | "google.golang.org/grpc" 12 | "google.golang.org/grpc/credentials/insecure" 13 | 14 | "k8s.io/klog/v2" 15 | 16 | runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 17 | ) 18 | 19 | const ( 20 | DefaultConnectionTimeout = 15 * time.Second 21 | maxMsgSize = 1024 * 1024 * 16 // 16 MB 22 | ) 23 | 24 | // remoteRuntimeService is a gRPC implementation of RuntimeService. 25 | type remoteRuntimeService struct { 26 | timeout time.Duration 27 | runtimeClient runtimeapi.RuntimeServiceClient 28 | conn *grpc.ClientConn 29 | } 30 | 31 | type containerInfo struct { 32 | Pid int `json:"pid"` 33 | } 34 | 35 | // NewRemoteRuntimeService creates a new RuntimeService. 36 | func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration) (RuntimeService, error) { 37 | proto, addr, err := EndpointParser(endpoint) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | klog.V(4).Infof("[RuntimeService] got endpoint %s (proto=%s, path=%s)", endpoint, proto, addr) 43 | 44 | if proto == "unix" { 45 | // Every since grpc.DialContext was deprecated, we no longer get the passthrough resolver for free, so we need 46 | // to add it manually. See: https://github.com/grpc/grpc-go/issues/1846 for more context 47 | addr = "passthrough:///" + addr 48 | } else { 49 | return nil, errors.New("[RuntimeService] only unix socket is currently supported") 50 | } 51 | 52 | conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()), 53 | grpc.WithContextDialer(dialer), 54 | grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize))) 55 | if err != nil { 56 | klog.Errorf("Connect remote runtime %s failed: %v", addr, err) 57 | return nil, err 58 | } 59 | 60 | return &remoteRuntimeService{ 61 | timeout: connectionTimeout, 62 | runtimeClient: runtimeapi.NewRuntimeServiceClient(conn), 63 | conn: conn, 64 | }, nil 65 | } 66 | 67 | // ContainerInfo returns verbose info of provided container. 68 | func (r *remoteRuntimeService) ContainerInfo(id string) (*containerInfo, error) { 69 | ctx, cancel := context.WithTimeout(context.Background(), r.timeout) 70 | defer cancel() 71 | 72 | // Verbose should be set, otherwise we'll get an empty slice. see 73 | resp, err := r.runtimeClient.ContainerStatus(ctx, &runtimeapi.ContainerStatusRequest{ 74 | ContainerId: id, 75 | Verbose: true, 76 | }) 77 | if err != nil { 78 | return nil, err 79 | } 80 | 81 | info := containerInfo{} 82 | 83 | if err := json.Unmarshal([]byte(resp.Info["info"]), &info); err != nil { 84 | return nil, err 85 | } 86 | return &info, nil 87 | } 88 | 89 | // Close tears down the *grpc.ClientConn and all underlying connections. 90 | func (r *remoteRuntimeService) Close() error { 91 | if err := r.conn.Close(); err != nil { 92 | return err 93 | } 94 | return nil 95 | } 96 | 97 | func dialer(ctx context.Context, addr string) (net.Conn, error) { 98 | return (&net.Dialer{}).DialContext(ctx, "unix", addr) 99 | } 100 | 101 | // EndpointParser returns protocol and path of provided endpoint 102 | func EndpointParser(endpoint string) (proto string, path string, err error) { 103 | 104 | result := strings.Split(endpoint, "://") 105 | 106 | if len(result) < 2 { 107 | return "", "", errors.New("bad endpoint format. should be 'protocol://path'") 108 | } 109 | return result[0], result[1], nil 110 | } 111 | -------------------------------------------------------------------------------- /pkg/routes/linux_routes.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | "github.com/vishvananda/netlink" 8 | "github.com/vishvananda/netlink/nl" 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | const ( 13 | // Taken from: https://github.com/torvalds/linux/blob/master/include/uapi/linux/rtnetlink.h#L284 14 | ZebraOriginator = 0x11 15 | ) 16 | 17 | // DeleteByDestination attempts to safely find all routes based upon its destination subnet and delete them 18 | func DeleteByDestination(destinationSubnet *net.IPNet) error { 19 | routes, err := netlink.RouteListFiltered(nl.FAMILY_ALL, &netlink.Route{ 20 | Dst: destinationSubnet, Protocol: ZebraOriginator, 21 | }, netlink.RT_FILTER_DST|netlink.RT_FILTER_PROTOCOL) 22 | if err != nil { 23 | return fmt.Errorf("failed to get routes from netlink: %v", err) 24 | } 25 | for i, r := range routes { 26 | klog.V(2).Infof("Found route to remove: %s", r.String()) 27 | if err = netlink.RouteDel(&routes[i]); err != nil { 28 | return fmt.Errorf("failed to remove route due to %v", err) 29 | } 30 | } 31 | return nil 32 | } 33 | -------------------------------------------------------------------------------- /pkg/routes/pbr.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "os/exec" 6 | "strings" 7 | 8 | "github.com/cloudnativelabs/kube-router/v2/pkg/utils" 9 | ) 10 | 11 | const ( 12 | // CustomTableID is the ID of the custom, iproute2 routing table that will be used for policy based routing 13 | CustomTableID = "77" 14 | // CustomTableName is the name of the custom, iproute2 routing table that will be used for policy based routing 15 | CustomTableName = "kube-router" 16 | ) 17 | 18 | // PolicyBasedRules is a struct that holds all of the information needed for manipulating policy based routing rules 19 | type PolicyBasedRules struct { 20 | nfa utils.NodeFamilyAware 21 | podIPv4CIDRs []string 22 | podIPv6CIDRs []string 23 | } 24 | 25 | // NewPolicyBasedRules creates a new PBR object which will be used to manipulate policy based routing rules 26 | func NewPolicyBasedRules(nfa utils.NodeFamilyAware, podIPv4CIDRs, podIPv6CIDRs []string) *PolicyBasedRules { 27 | return &PolicyBasedRules{ 28 | nfa: nfa, 29 | podIPv4CIDRs: podIPv4CIDRs, 30 | podIPv6CIDRs: podIPv6CIDRs, 31 | } 32 | } 33 | 34 | // ipRuleAbstraction used for abstracting iproute2 rule additions between IPv4 and IPv6 for both add and del operations. 35 | // ipProtocol is the iproute2 protocol specified as a string ("-4" or "-6"). ipOp is the rule operation specified as a 36 | // string ("add" or "del). The cidr is the IPv4 / IPv6 source CIDR string that when received will be used to lookup 37 | // routes in a custom table. 38 | func ipRuleAbstraction(ipProtocol, ipOp, cidr string) error { 39 | out, err := exec.Command("ip", ipProtocol, "rule", "list").Output() 40 | if err != nil { 41 | return fmt.Errorf("failed to verify if `ip rule` exists: %s", err.Error()) 42 | } 43 | 44 | if strings.Contains(string(out), cidr) && ipOp == "del" { 45 | err = exec.Command("ip", ipProtocol, "rule", ipOp, "from", cidr, "lookup", CustomTableID).Run() 46 | if err != nil { 47 | return fmt.Errorf("failed to add ip rule due to: %s", err.Error()) 48 | } 49 | } else if !strings.Contains(string(out), cidr) && ipOp == "add" { 50 | err = exec.Command("ip", ipProtocol, "rule", ipOp, "from", cidr, "lookup", CustomTableID).Run() 51 | if err != nil { 52 | return fmt.Errorf("failed to add ip rule due to: %s", err.Error()) 53 | } 54 | } 55 | 56 | return nil 57 | } 58 | 59 | // Enable setup a custom routing table that will be used for policy based routing to ensure traffic 60 | // originating on tunnel interface only leaves through tunnel interface irrespective rp_filter enabled/disabled 61 | func (pbr *PolicyBasedRules) Enable() error { 62 | err := utils.RouteTableAdd(CustomTableID, CustomTableName) 63 | if err != nil { 64 | return fmt.Errorf("failed to update rt_tables file: %s", err) 65 | } 66 | 67 | if pbr.nfa.IsIPv4Capable() { 68 | for _, ipv4CIDR := range pbr.podIPv4CIDRs { 69 | if err := ipRuleAbstraction("-4", "add", ipv4CIDR); err != nil { 70 | return err 71 | } 72 | } 73 | } 74 | if pbr.nfa.IsIPv6Capable() { 75 | for _, ipv6CIDR := range pbr.podIPv6CIDRs { 76 | if err := ipRuleAbstraction("-6", "add", ipv6CIDR); err != nil { 77 | return err 78 | } 79 | } 80 | } 81 | 82 | return nil 83 | } 84 | 85 | // Disable removes the custom routing table that was used for policy based routing 86 | func (pbr *PolicyBasedRules) Disable() error { 87 | err := utils.RouteTableAdd(CustomTableID, CustomTableName) 88 | if err != nil { 89 | return fmt.Errorf("failed to update rt_tables file: %s", err) 90 | } 91 | 92 | if pbr.nfa.IsIPv4Capable() { 93 | for _, ipv4CIDR := range pbr.podIPv4CIDRs { 94 | if err := ipRuleAbstraction("-4", "del", ipv4CIDR); err != nil { 95 | return err 96 | } 97 | } 98 | } 99 | if pbr.nfa.IsIPv6Capable() { 100 | for _, ipv6CIDR := range pbr.podIPv6CIDRs { 101 | if err := ipRuleAbstraction("-6", "del", ipv6CIDR); err != nil { 102 | return err 103 | } 104 | } 105 | } 106 | 107 | return nil 108 | } 109 | -------------------------------------------------------------------------------- /pkg/routes/route_sync.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "sync" 7 | "time" 8 | 9 | "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" 10 | "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" 11 | "github.com/prometheus/client_golang/prometheus" 12 | "github.com/vishvananda/netlink" 13 | "k8s.io/klog/v2" 14 | ) 15 | 16 | type RouteSyncErr struct { 17 | route *netlink.Route 18 | err error 19 | } 20 | 21 | func (rse RouteSyncErr) Error() string { 22 | return fmt.Sprintf("route (%s) encountered the following error while being acted upon: %v", rse.route, rse.err) 23 | } 24 | 25 | // RouteSync is a struct that holds all of the information needed for syncing routes to the kernel's routing table 26 | type RouteSync struct { 27 | routeTableStateMap map[string]*netlink.Route 28 | injectedRoutesSyncPeriod time.Duration 29 | mutex sync.Mutex 30 | routeReplacer func(route *netlink.Route) error 31 | metricsEnabled bool 32 | } 33 | 34 | // addInjectedRoute adds a route to the route map that is regularly synced to the kernel's routing table 35 | func (rs *RouteSync) AddInjectedRoute(dst *net.IPNet, route *netlink.Route) { 36 | rs.mutex.Lock() 37 | defer rs.mutex.Unlock() 38 | klog.V(3).Infof("Adding route for destination: %s", dst) 39 | rs.routeTableStateMap[dst.String()] = route 40 | if rs.metricsEnabled { 41 | metrics.ControllerHostRoutesAdded.Inc() 42 | metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) 43 | } 44 | } 45 | 46 | // delInjectedRoute delete a route from the route map that is regularly synced to the kernel's routing table 47 | func (rs *RouteSync) DelInjectedRoute(dst *net.IPNet) { 48 | rs.mutex.Lock() 49 | defer rs.mutex.Unlock() 50 | if _, ok := rs.routeTableStateMap[dst.String()]; ok { 51 | klog.V(3).Infof("Removing route for destination: %s", dst) 52 | delete(rs.routeTableStateMap, dst.String()) 53 | } 54 | if rs.metricsEnabled { 55 | metrics.ControllerHostRoutesRemoved.Inc() 56 | metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) 57 | } 58 | } 59 | 60 | // syncLocalRouteTable iterates over the local route state map and syncs all routes to the kernel's routing table 61 | func (rs *RouteSync) SyncLocalRouteTable() error { 62 | if rs.metricsEnabled { 63 | startSyncTime := time.Now() 64 | defer func(startTime time.Time) { 65 | runTime := time.Since(startTime) 66 | metrics.ControllerHostRoutesSyncTime.Observe(runTime.Seconds()) 67 | }(startSyncTime) 68 | } 69 | rs.mutex.Lock() 70 | defer rs.mutex.Unlock() 71 | klog.V(2).Infof("Running local route table synchronization") 72 | for _, route := range rs.routeTableStateMap { 73 | klog.V(3).Infof("Syncing route: %s -> %s via %s", route.Src, route.Dst, route.Gw) 74 | err := rs.routeReplacer(route) 75 | if err != nil { 76 | return RouteSyncErr{ 77 | route: route, 78 | err: err, 79 | } 80 | } 81 | } 82 | if rs.metricsEnabled { 83 | metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) 84 | } 85 | return nil 86 | } 87 | 88 | // run starts a goroutine that calls syncLocalRouteTable on interval injectedRoutesSyncPeriod 89 | func (rs *RouteSync) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, 90 | wg *sync.WaitGroup) { 91 | // Start route synchronization routine 92 | wg.Add(1) 93 | go func(stopCh <-chan struct{}, wg *sync.WaitGroup) { 94 | defer wg.Done() 95 | t := time.NewTicker(rs.injectedRoutesSyncPeriod) 96 | defer t.Stop() 97 | for { 98 | select { 99 | case <-t.C: 100 | err := rs.SyncLocalRouteTable() 101 | if err != nil { 102 | klog.Errorf("route could not be replaced due to: %v", err) 103 | } 104 | // Some of our unit tests send a nil health channel 105 | if nil != healthChan && err == nil { 106 | healthcheck.SendHeartBeat(healthChan, healthcheck.RouteSyncController) 107 | } 108 | case <-stopCh: 109 | klog.Infof("Shutting down local route synchronization") 110 | return 111 | } 112 | } 113 | }(stopCh, wg) 114 | } 115 | 116 | // NewRouteSyncer creates a new routeSyncer that, when run, will sync routes kept in its local state table every 117 | // syncPeriod 118 | func NewRouteSyncer(syncPeriod time.Duration, registerMetrics bool) *RouteSync { 119 | rs := RouteSync{} 120 | rs.routeTableStateMap = make(map[string]*netlink.Route) 121 | rs.injectedRoutesSyncPeriod = syncPeriod 122 | rs.mutex = sync.Mutex{} 123 | // We substitute the RouteReplace function here so that we can easily monkey patch it in our unit tests 124 | rs.routeReplacer = netlink.RouteReplace 125 | rs.metricsEnabled = registerMetrics 126 | 127 | // Register Metrics 128 | if registerMetrics { 129 | prometheus.MustRegister(metrics.ControllerHostRoutesSynced, metrics.ControllerHostRoutesSyncTime, 130 | metrics.ControllerHostRoutesAdded, metrics.ControllerHostRoutesRemoved) 131 | } 132 | 133 | return &rs 134 | } 135 | -------------------------------------------------------------------------------- /pkg/routes/route_sync_test.go: -------------------------------------------------------------------------------- 1 | package routes 2 | 3 | import ( 4 | "net" 5 | "sync" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/vishvananda/netlink" 11 | ) 12 | 13 | var ( 14 | testRoutes = map[string]string{ 15 | "192.168.0.1": "192.168.0.0/24", 16 | "10.255.0.1": "10.255.0.0/16", 17 | } 18 | _, testAddRouteIPNet, _ = net.ParseCIDR("192.168.1.0/24") 19 | testAddRouteRoute = generateTestRoute("192.168.1.0/24", "192.168.1.1") 20 | ) 21 | 22 | func generateTestRoute(dstCIDR string, dstGateway string) *netlink.Route { 23 | ip, ipNet, _ := net.ParseCIDR(dstCIDR) 24 | gwIP := net.ParseIP(dstGateway) 25 | return &netlink.Route{ 26 | Dst: &net.IPNet{ 27 | IP: ip, 28 | Mask: ipNet.Mask, 29 | }, 30 | Gw: gwIP, 31 | } 32 | } 33 | 34 | func generateTestRouteMap(inputRouteMap map[string]string) map[string]*netlink.Route { 35 | testRoutes := make(map[string]*netlink.Route) 36 | for gw, dst := range inputRouteMap { 37 | testRoutes[dst] = generateTestRoute(dst, gw) 38 | } 39 | return testRoutes 40 | } 41 | 42 | type mockNetlink struct { 43 | currentRoute *netlink.Route 44 | pause time.Duration 45 | wg *sync.WaitGroup 46 | } 47 | 48 | func (mnl *mockNetlink) mockRouteReplace(route *netlink.Route) error { 49 | mnl.currentRoute = route 50 | if mnl.wg != nil { 51 | mnl.wg.Done() 52 | time.Sleep(mnl.pause) 53 | } 54 | return nil 55 | } 56 | 57 | func Test_syncLocalRouteTable(t *testing.T) { 58 | prepSyncLocalTest := func() (*mockNetlink, *RouteSync) { 59 | // Create myNetlink so that it will wait 200 milliseconds on routeReplace and artificially hold its lock 60 | myNetlink := mockNetlink{} 61 | myNetlink.pause = time.Millisecond * 200 62 | 63 | // Create a route replacer and seed it with some routes to iterate over 64 | syncer := NewRouteSyncer(15*time.Second, false) 65 | syncer.routeTableStateMap = generateTestRouteMap(testRoutes) 66 | 67 | // Replace the netlink.RouteReplace function with our own mock function that includes a WaitGroup for syncing 68 | // and an artificial pause and won't interact with the OS 69 | syncer.routeReplacer = myNetlink.mockRouteReplace 70 | 71 | return &myNetlink, syncer 72 | } 73 | 74 | waitForSyncLocalRouteToAcquireLock := func(myNetlink *mockNetlink, syncer *RouteSync) { 75 | // Launch syncLocalRouteTable in a separate goroutine so that we can try to inject a route into the map while it 76 | // is syncing. Then wait on the wait group so that we know that syncLocalRouteTable has a hold on the lock when 77 | // we try to use it in addInjectedRoute() below 78 | myNetlink.wg = &sync.WaitGroup{} 79 | myNetlink.wg.Add(1) 80 | go func() { 81 | _ = syncer.SyncLocalRouteTable() 82 | }() 83 | 84 | // Now we know that the syncLocalRouteTable() is paused on our artificial wait we added above 85 | myNetlink.wg.Wait() 86 | // We no longer need the wait group, so we change it to a nil reference so that it won't come into play in the 87 | // next iteration of the route map 88 | myNetlink.wg = nil 89 | } 90 | 91 | t.Run("Ensure addInjectedRoute is goroutine safe", func(t *testing.T) { 92 | myNetlink, syncer := prepSyncLocalTest() 93 | 94 | waitForSyncLocalRouteToAcquireLock(myNetlink, syncer) 95 | 96 | // By measuring how much time it takes to inject the route we can understand whether addInjectedRoute waited 97 | // for the lock to be returned or not 98 | start := time.Now() 99 | syncer.AddInjectedRoute(testAddRouteIPNet, testAddRouteRoute) 100 | duration := time.Since(start) 101 | 102 | // We give ourselves a bit of leeway here, and say that if we were forced to wait for at least 190 milliseconds 103 | // then that is evidence that execution was stalled while trying to acquire a lock from syncLocalRouteTable() 104 | assert.Greater(t, duration, time.Millisecond*190, 105 | "Expected addInjectedRoute to take longer than 190 milliseconds to prove locking works") 106 | }) 107 | 108 | t.Run("Ensure delInjectedRoute is goroutine safe", func(t *testing.T) { 109 | myNetlink, syncer := prepSyncLocalTest() 110 | 111 | waitForSyncLocalRouteToAcquireLock(myNetlink, syncer) 112 | 113 | // By measuring how much time it takes to inject the route we can understand whether addInjectedRoute waited 114 | // for the lock to be returned or not 115 | start := time.Now() 116 | syncer.DelInjectedRoute(testAddRouteIPNet) 117 | duration := time.Since(start) 118 | 119 | // We give ourselves a bit of leeway here, and say that if we were forced to wait for at least 190 milliseconds 120 | // then that is evidence that execution was stalled while trying to acquire a lock from syncLocalRouteTable() 121 | assert.Greater(t, duration, time.Millisecond*190, 122 | "Expected addInjectedRoute to take longer than 190 milliseconds to prove locking works") 123 | }) 124 | } 125 | 126 | func Test_routeSyncer_run(t *testing.T) { 127 | // Taken from:https://stackoverflow.com/questions/32840687/timeout-for-waitgroup-wait 128 | // waitTimeout waits for the waitgroup for the specified max timeout. 129 | // Returns true if waiting timed out. 130 | waitTimeout := func(wg *sync.WaitGroup, timeout time.Duration) bool { 131 | c := make(chan struct{}) 132 | go func() { 133 | defer close(c) 134 | wg.Wait() 135 | }() 136 | select { 137 | case <-c: 138 | return false // completed normally 139 | case <-time.After(timeout): 140 | return true // timed out 141 | } 142 | } 143 | 144 | t.Run("Ensure that run goroutine shuts down correctly on stop", func(t *testing.T) { 145 | // Setup routeSyncer to run 10 times a second 146 | syncer := NewRouteSyncer(100*time.Millisecond, false) 147 | myNetLink := mockNetlink{} 148 | syncer.routeReplacer = myNetLink.mockRouteReplace 149 | syncer.routeTableStateMap = generateTestRouteMap(testRoutes) 150 | stopCh := make(chan struct{}) 151 | wg := sync.WaitGroup{} 152 | 153 | // For a sanity check that the currentRoute on the mock object is nil to start with as we'll rely on this later 154 | assert.Nil(t, myNetLink.currentRoute, "currentRoute should be nil when the syncer hasn't run") 155 | 156 | syncer.Run(nil, stopCh, &wg) 157 | 158 | time.Sleep(110 * time.Millisecond) 159 | 160 | assert.NotNil(t, myNetLink.currentRoute, 161 | "the syncer should have run by now and populated currentRoute") 162 | 163 | // Simulate a shutdown 164 | close(stopCh) 165 | // WaitGroup should close out before our timeout 166 | timedOut := waitTimeout(&wg, 110*time.Millisecond) 167 | 168 | assert.False(t, timedOut, "WaitGroup should have marked itself as done instead of timing out") 169 | }) 170 | } 171 | -------------------------------------------------------------------------------- /pkg/tunnels/linux_tunnels_test.go: -------------------------------------------------------------------------------- 1 | package tunnels 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_GenerateTunnelName(t *testing.T) { 10 | testcases := []struct { 11 | name string 12 | nodeIP string 13 | tunnelName string 14 | }{ 15 | { 16 | "IP less than 12 characters after removing '.'", 17 | "10.0.0.1", 18 | "tun-e443169117a", 19 | }, 20 | { 21 | "IP has 12 characters after removing '.'", 22 | "100.200.300.400", 23 | "tun-9033d7906c7", 24 | }, 25 | { 26 | "IPv6 tunnel names are properly handled and consistent", 27 | "2001:db8:42:2::/64", 28 | "tun-ba56986ef05", 29 | }, 30 | } 31 | 32 | for _, testcase := range testcases { 33 | t.Run(testcase.name, func(t *testing.T) { 34 | tunnelName := GenerateTunnelName(testcase.nodeIP) 35 | assert.Lessf(t, len(tunnelName), 16, "the maximum length of the tunnel name should never exceed"+ 36 | "15 characters as 16 characters is the maximum length of a Unix interface name") 37 | assert.Equal(t, testcase.tunnelName, tunnelName, "did not get expected tunnel interface name") 38 | }) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pkg/utils/iptables_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | v1core "k8s.io/api/core/v1" 9 | ) 10 | 11 | func TestCommonICMPRules(t *testing.T) { 12 | tests := []struct { 13 | name string 14 | family v1core.IPFamily 15 | expected []ICMPRule 16 | }{ 17 | { 18 | name: "IPv4", 19 | family: v1core.IPv4Protocol, 20 | expected: []ICMPRule{ 21 | {"icmp", "--icmp-type", "echo-request", "allow icmp echo requests"}, 22 | {"icmp", "--icmp-type", "destination-unreachable", "allow icmp destination unreachable messages"}, 23 | {"icmp", "--icmp-type", "time-exceeded", "allow icmp time exceeded messages"}, 24 | }, 25 | }, 26 | { 27 | name: "IPv6", 28 | family: v1core.IPv6Protocol, 29 | expected: []ICMPRule{ 30 | {"ipv6-icmp", "--icmpv6-type", "echo-request", "allow icmp echo requests"}, 31 | {"ipv6-icmp", "--icmpv6-type", "destination-unreachable", "allow icmp destination unreachable messages"}, 32 | {"ipv6-icmp", "--icmpv6-type", "time-exceeded", "allow icmp time exceeded messages"}, 33 | {"ipv6-icmp", "--icmpv6-type", "neighbor-solicitation", "allow icmp neighbor solicitation messages"}, 34 | {"ipv6-icmp", "--icmpv6-type", "neighbor-advertisement", "allow icmp neighbor advertisement messages"}, 35 | {"ipv6-icmp", "--icmpv6-type", "echo-reply", "allow icmp echo reply messages"}, 36 | }, 37 | }, 38 | } 39 | 40 | for _, tt := range tests { 41 | t.Run(tt.name, func(t *testing.T) { 42 | result := CommonICMPRules(tt.family) 43 | assert.Equal(t, tt.expected, result, "CommonICMPRules(%v) = %v, want %v", tt.family, result, tt.expected) 44 | }) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /pkg/utils/linux_routing.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/vishvananda/netlink" 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | const ( 13 | rtTablesFileName = "rt_tables" 14 | iproutePkg = "iproute2" 15 | ) 16 | 17 | var ( 18 | rtTablesPosLoc = []string{ 19 | fmt.Sprintf("/etc/%s/%s", iproutePkg, rtTablesFileName), 20 | fmt.Sprintf("/usr/lib/%s/%s", iproutePkg, rtTablesFileName), 21 | fmt.Sprintf("/usr/share/%s/%s", iproutePkg, rtTablesFileName), 22 | } 23 | ) 24 | 25 | type LocalLinkQuerier interface { 26 | LinkList() ([]netlink.Link, error) 27 | AddrList(link netlink.Link, family int) ([]netlink.Addr, error) 28 | } 29 | 30 | // RouteTableAdd adds a new named table to iproute's rt_tables configuration file 31 | func RouteTableAdd(tableNumber, tableName string) error { 32 | var rtTablesLoc string 33 | for _, possibleLoc := range rtTablesPosLoc { 34 | _, err := os.Stat(possibleLoc) 35 | if err != nil { 36 | klog.V(2).Infof("Did not find iproute2's rt_tables in location %s", possibleLoc) 37 | continue 38 | } 39 | rtTablesLoc = possibleLoc 40 | } 41 | if rtTablesLoc == "" { 42 | return fmt.Errorf("did not find rt_tables in any of the expected locations: %s", rtTablesFileName) 43 | } 44 | 45 | b, err := os.ReadFile(rtTablesLoc) 46 | if err != nil { 47 | return fmt.Errorf("failed to read: %s", err.Error()) 48 | } 49 | 50 | if !strings.Contains(string(b), tableName) { 51 | f, err := os.OpenFile(rtTablesLoc, os.O_APPEND|os.O_WRONLY, 0600) 52 | if err != nil { 53 | return fmt.Errorf("failed to open: %s", err.Error()) 54 | } 55 | defer CloseCloserDisregardError(f) 56 | if _, err = f.WriteString(tableNumber + " " + tableName + "\n"); err != nil { 57 | return fmt.Errorf("failed to write: %s", err.Error()) 58 | } 59 | } 60 | 61 | return nil 62 | } 63 | -------------------------------------------------------------------------------- /pkg/utils/linux_routingtest.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | 7 | "github.com/stretchr/testify/mock" 8 | "github.com/vishvananda/netlink" 9 | ) 10 | 11 | type FakeLocalLinkQuerier struct { 12 | links []netlink.Link 13 | addrs []*net.IPNet 14 | } 15 | 16 | func NewFakeLocalLinkQuerier(addrStrings []string, mtus []int) *FakeLocalLinkQuerier { 17 | links := make([]netlink.Link, len(addrStrings)) 18 | for idx := range addrStrings { 19 | mtu := 1 20 | if idx < len(mtus) { 21 | mtu = mtus[idx] 22 | } 23 | linkAttrs := netlink.LinkAttrs{ 24 | Index: idx, 25 | MTU: mtu, 26 | } 27 | linkDevice := netlink.Device{LinkAttrs: linkAttrs} 28 | links[idx] = &linkDevice 29 | } 30 | addrs := make([]*net.IPNet, len(addrStrings)) 31 | for idx, addr := range addrStrings { 32 | ip := net.ParseIP(addr) 33 | var netMask net.IPMask 34 | if ip.To4() != nil { 35 | //nolint:mnd // Hardcoded value is used for testing purposes 36 | netMask = net.CIDRMask(24, 32) 37 | } else { 38 | //nolint:mnd // Hardcoded value is used for testing purposes 39 | netMask = net.CIDRMask(64, 128) 40 | } 41 | ipNet := &net.IPNet{ 42 | IP: ip, 43 | Mask: netMask, 44 | } 45 | addrs[idx] = ipNet 46 | } 47 | return &FakeLocalLinkQuerier{ 48 | links: links, 49 | addrs: addrs, 50 | } 51 | } 52 | 53 | func (f *FakeLocalLinkQuerier) LinkList() ([]netlink.Link, error) { 54 | return f.links, nil 55 | } 56 | 57 | func (f *FakeLocalLinkQuerier) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) { 58 | addrs := make([]netlink.Addr, 1) 59 | addrs[0] = netlink.Addr{IPNet: f.addrs[link.Attrs().Index]} 60 | if link.Attrs().MTU == 0 { 61 | return nil, fmt.Errorf("MTU was set to 0 to simulate an error") 62 | } 63 | return addrs, nil 64 | } 65 | 66 | type MockLocalLinkQuerier struct { 67 | mock.Mock 68 | } 69 | 70 | func (m *MockLocalLinkQuerier) LinkList() ([]netlink.Link, error) { 71 | args := m.Called() 72 | return args.Get(0).([]netlink.Link), args.Error(1) 73 | } 74 | 75 | func (m *MockLocalLinkQuerier) AddrList(link netlink.Link, family int) ([]netlink.Addr, error) { 76 | args := m.Called(link, family) 77 | return args.Get(0).([]netlink.Addr), args.Error(1) 78 | } 79 | -------------------------------------------------------------------------------- /pkg/utils/pod_cidr.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "strings" 7 | 8 | v1core "k8s.io/api/core/v1" 9 | netutils "k8s.io/utils/net" 10 | ) 11 | 12 | const ( 13 | // deprecated - we now use multiple CIDRs, so it is better for users to use kube-router.io/pod-cidrs which allows 14 | // you to express all of the cidrs you want to advertise from a given node 15 | podCIDRAnnotation = "kube-router.io/pod-cidr" 16 | podCIDRsAnnotation = "kube-router.io/pod-cidrs" 17 | ) 18 | 19 | // GetPodCidrFromNodeSpec reads the pod CIDR allocated to the node from API node object and returns it 20 | func GetPodCidrFromNodeSpec(node *v1core.Node) (string, error) { 21 | if cidr, ok := node.Annotations[podCIDRAnnotation]; ok { 22 | _, _, err := net.ParseCIDR(cidr) 23 | if err != nil { 24 | return "", fmt.Errorf("error parsing pod CIDR in node annotation: %v", err) 25 | } 26 | 27 | return cidr, nil 28 | } 29 | 30 | if node.Spec.PodCIDR == "" { 31 | return "", fmt.Errorf("node.Spec.PodCIDR not set for node: %v", node.Name) 32 | } 33 | 34 | return node.Spec.PodCIDR, nil 35 | } 36 | 37 | // GetPodCIDRsFromNodeSpecDualStack reads the IPv4 and IPv6 pod CIDR allocated 38 | // to the node from API node object and returns them 39 | func GetPodCIDRsFromNodeSpecDualStack(node *v1core.Node) ([]string, []string, error) { 40 | var podIPv4CIDRs, podIPv6CIDRs []string 41 | 42 | if podCIDRs, ok := node.Annotations[podCIDRsAnnotation]; ok { 43 | for _, cidr := range strings.Split(podCIDRs, ",") { 44 | if _, _, err := net.ParseCIDR(cidr); err != nil { 45 | return podIPv4CIDRs, podIPv6CIDRs, fmt.Errorf("error parsing pod CIDR in node annotation: %v", err) 46 | } 47 | if netutils.IsIPv4CIDRString(cidr) { 48 | podIPv4CIDRs = append(podIPv4CIDRs, cidr) 49 | } 50 | if netutils.IsIPv6CIDRString(cidr) { 51 | podIPv6CIDRs = append(podIPv6CIDRs, cidr) 52 | } 53 | } 54 | return podIPv4CIDRs, podIPv6CIDRs, nil 55 | } 56 | 57 | if len(node.Spec.PodCIDRs) == 0 { 58 | return nil, nil, fmt.Errorf("node.Spec.PodCIDRs empty for node: %v", node.Name) 59 | } 60 | 61 | for _, podCIDR := range node.Spec.PodCIDRs { 62 | if netutils.IsIPv4CIDRString(podCIDR) { 63 | podIPv4CIDRs = append(podIPv4CIDRs, podCIDR) 64 | } 65 | if netutils.IsIPv6CIDRString(podCIDR) { 66 | podIPv6CIDRs = append(podIPv6CIDRs, podCIDR) 67 | } 68 | } 69 | 70 | return podIPv4CIDRs, podIPv6CIDRs, nil 71 | } 72 | -------------------------------------------------------------------------------- /pkg/utils/service.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | v1core "k8s.io/api/core/v1" 8 | discovery "k8s.io/api/discovery/v1" 9 | "k8s.io/client-go/tools/cache" 10 | "k8s.io/klog/v2" 11 | ) 12 | 13 | const ( 14 | IPInIPHeaderLength = 20 15 | ) 16 | 17 | // ServiceForEndpoints given Endpoint object return Service API object if it exists 18 | func ServiceForEndpoints(ci *cache.Indexer, ep *v1core.Endpoints) (interface{}, bool, error) { 19 | key, err := cache.MetaNamespaceKeyFunc(ep) 20 | if err != nil { 21 | return nil, false, err 22 | } 23 | klog.V(2).Infof("key for looking up service from Endpoint is: %s", key) 24 | 25 | item, exists, err := (*ci).GetByKey(key) 26 | if err != nil { 27 | return nil, false, err 28 | } 29 | 30 | if !exists { 31 | return nil, false, nil 32 | } 33 | 34 | return item, true, nil 35 | } 36 | 37 | // ServiceNameforEndpointSlice returns the name of the service that created the EndpointSlice for a given EndpointSlice 38 | // 39 | // With endpoints, the name of the endpoint object always matches the service object, however when it comes to 40 | // EndpointSlices, things work a bit different as k8s' controller will autogenerated it (something like: foo-kl29b) 41 | // 42 | // We can get service information from a number of spots: 43 | // * From the ownerReferences in the metadata EndpointSlice -> metadata -> ownerReferences[0] -> name 44 | // * We can also get this from the label: kubernetes.io/service-name 45 | // * generateName will also contain the prefix for the autogenerated name which should align with our service name 46 | // 47 | // We'll all through all of these and do our best to identify the service's name, if we aren't able to find any of these 48 | // or they disagree with each other we'll throw an error 49 | func ServiceNameforEndpointSlice(es *discovery.EndpointSlice) (string, error) { 50 | const serviceNameLabel = "kubernetes.io/service-name" 51 | var ownerRefName, labelSvcName, generateName, finalSvcName string 52 | 53 | ownerRef := es.GetObjectMeta().GetOwnerReferences() 54 | if len(ownerRef) == 1 { 55 | ownerRefName = ownerRef[0].Name 56 | } 57 | 58 | labels := es.GetObjectMeta().GetLabels() 59 | if svcLabel, ok := labels[serviceNameLabel]; ok { 60 | labelSvcName = svcLabel 61 | } 62 | 63 | if es.GetObjectMeta().GetGenerateName() != "" { 64 | generateName = strings.TrimRight(es.GetObjectMeta().GetGenerateName(), "-") 65 | } 66 | 67 | if ownerRefName == "" && labelSvcName == "" && generateName == "" { 68 | return "", fmt.Errorf("all identifiers for service are empty on this EndpointSlice, unable to determine "+ 69 | "owning service for: %s/%s", es.Namespace, es.Name) 70 | } 71 | 72 | // Take things in an order of precedence here: generateName < ownerRefName < labelSvcName 73 | finalSvcName = generateName 74 | if ownerRefName != "" { 75 | finalSvcName = ownerRefName 76 | } 77 | if labelSvcName != "" { 78 | finalSvcName = labelSvcName 79 | } 80 | 81 | // At this point we do some checks to ensure that the final owning service name is sane. Specifically, we want to 82 | // check it against labelSvcName and ownerRefName if they were not blank and return error if they don't agree. We 83 | // don't worry about generateName as that is less conclusive. 84 | // 85 | // From above, we already know that if labelSvcName was not blank then it is equal to finalSvcName, so we only need 86 | // to worry about ownerRefName 87 | if ownerRefName != "" && finalSvcName != ownerRefName { 88 | return "", fmt.Errorf("the ownerReferences field on EndpointSlice (%s) doesn't agree with with the %s label "+ 89 | "(%s) for %s/%s EndpointSlice", ownerRefName, serviceNameLabel, labelSvcName, es.Namespace, es.Name) 90 | } 91 | 92 | return finalSvcName, nil 93 | } 94 | 95 | // ServiceForEndpoints given EndpointSlice object return Service API object if it exists 96 | func ServiceForEndpointSlice(ci *cache.Indexer, es *discovery.EndpointSlice) (interface{}, bool, error) { 97 | svcName, err := ServiceNameforEndpointSlice(es) 98 | if err != nil { 99 | return nil, false, err 100 | } 101 | 102 | // The key that we're looking for here is just / 103 | key := fmt.Sprintf("%s/%s", es.Namespace, svcName) 104 | klog.V(2).Infof("key for looking up service from EndpointSlice is: %s", key) 105 | 106 | item, exists, err := (*ci).GetByKey(key) 107 | if err != nil { 108 | return nil, false, err 109 | } 110 | 111 | if !exists { 112 | return nil, false, nil 113 | } 114 | 115 | return item, true, nil 116 | } 117 | 118 | // ServiceHasNoClusterIP decides whether or not the this service is a headless service which is often useful to 119 | // kube-router as there is no need to execute logic on most headless changes. Function takes a generic interface as its 120 | // input parameter so that it can be used more easily in early processing if needed. If a non-service object is given, 121 | // function will return false. 122 | func ServiceHasNoClusterIP(obj interface{}) bool { 123 | if svc, _ := obj.(*v1core.Service); svc != nil { 124 | if svc.Spec.Type == v1core.ServiceTypeClusterIP { 125 | if ClusterIPIsNone(svc.Spec.ClusterIP) && containsOnlyNone(svc.Spec.ClusterIPs) { 126 | return true 127 | } 128 | } 129 | } 130 | return false 131 | } 132 | 133 | // ClusterIPIsNone checks to see whether the ClusterIP contains "None" which would indicate that it is headless 134 | func ClusterIPIsNone(clusterIP string) bool { 135 | return strings.ToLower(clusterIP) == "none" 136 | } 137 | 138 | // ClusterIPIsNoneOrBlank checks to see whether the ClusterIP contains "None" or is blank 139 | func ClusterIPIsNoneOrBlank(clusterIP string) bool { 140 | return ClusterIPIsNone(clusterIP) || clusterIP == "" 141 | } 142 | 143 | func containsOnlyNone(clusterIPs []string) bool { 144 | for _, clusterIP := range clusterIPs { 145 | if !ClusterIPIsNone(clusterIP) { 146 | return false 147 | } 148 | } 149 | return true 150 | } 151 | -------------------------------------------------------------------------------- /pkg/utils/sysctl.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strconv" 7 | ) 8 | 9 | const ( 10 | // From what I can see there are no IPv6 equivalents for the below options, so we only consider IPv6 here 11 | // Network Services Configuration Paths 12 | IPv4IPVSConntrack = "net/ipv4/vs/conntrack" 13 | IPv4IPVSExpireNodestConn = "net/ipv4/vs/expire_nodest_conn" 14 | IPv4IPVSExpireQuiescent = "net/ipv4/vs/expire_quiescent_template" 15 | IPv4IPVSConnReuseMode = "net/ipv4/vs/conn_reuse_mode" 16 | IPv4ConfAllArpIgnore = "net/ipv4/conf/all/arp_ignore" 17 | IPv4ConfAllArpAnnounce = "net/ipv4/conf/all/arp_announce" 18 | IPv6ConfAllDisableIPv6 = "net/ipv6/conf/all/disable_ipv6" 19 | 20 | // Network Routes Configuration Paths 21 | BridgeNFCallIPTables = "net/bridge/bridge-nf-call-iptables" 22 | BridgeNFCallIP6Tables = "net/bridge/bridge-nf-call-ip6tables" 23 | 24 | // Template Configuration Paths 25 | IPv4ConfRPFilterTemplate = "net/ipv4/conf/%s/rp_filter" 26 | ) 27 | 28 | type SysctlError struct { 29 | additionalInfo string 30 | err error 31 | option string 32 | hasValue bool 33 | value int 34 | fatal bool 35 | } 36 | 37 | // Error return the error as string 38 | func (e *SysctlError) Error() string { 39 | value := "" 40 | if e.hasValue { 41 | value = fmt.Sprintf("=%d", e.value) 42 | } 43 | return fmt.Sprintf("Sysctl %s%s : %s (%s)", e.option, value, e.err, e.additionalInfo) 44 | } 45 | 46 | // IsFatal was the error fatal and reason to exit kube-router 47 | func (e *SysctlError) IsFatal() bool { 48 | return e.fatal 49 | } 50 | 51 | // Unwrap allows us to unwrap an error showing the original error 52 | func (e *SysctlError) Unwrap() error { 53 | return e.err 54 | } 55 | 56 | func sysctlStat(path string, hasValue bool, value int) (string, *SysctlError) { 57 | sysctlPath := fmt.Sprintf("/proc/sys/%s", path) 58 | if _, err := os.Stat(sysctlPath); err != nil { 59 | if os.IsNotExist(err) { 60 | return sysctlPath, &SysctlError{ 61 | "option not found, Does your kernel version support this feature?", 62 | err, path, hasValue, value, false} 63 | } 64 | return sysctlPath, &SysctlError{"path existed, but could not be stat'd", err, path, hasValue, value, true} 65 | } 66 | return sysctlPath, nil 67 | } 68 | 69 | // GetSysctlSingleTemplate gets a sysctl value by first formatting the PathTemplate parameter with the substitute string 70 | // and then getting the sysctl value and converting it into a string 71 | func GetSysctlSingleTemplate(pathTemplate string, substitute string) (string, *SysctlError) { 72 | actualPath := fmt.Sprintf(pathTemplate, substitute) 73 | return GetSysctl(actualPath) 74 | } 75 | 76 | // GetSysctl gets a sysctl value 77 | func GetSysctl(path string) (string, *SysctlError) { 78 | sysctlPath, err := sysctlStat(path, false, 0) 79 | if err != nil { 80 | return "", err 81 | } 82 | buf, readErr := os.ReadFile(sysctlPath) 83 | if readErr != nil { 84 | return "", &SysctlError{"path could not be read", err, path, false, 0, true} 85 | } 86 | return string(buf), nil 87 | } 88 | 89 | // SetSysctlSingleTemplate sets a sysctl value by first formatting the PathTemplate parameter with the substitute string 90 | // and then setting the sysctl to the value parameter 91 | func SetSysctlSingleTemplate(pathTemplate string, substitute string, value int) *SysctlError { 92 | actualPath := fmt.Sprintf(pathTemplate, substitute) 93 | return SetSysctl(actualPath, value) 94 | } 95 | 96 | // SetSysctl sets a sysctl value 97 | func SetSysctl(path string, value int) *SysctlError { 98 | sysctlPath, err := sysctlStat(path, true, value) 99 | if err != nil { 100 | return err 101 | } 102 | writeErr := os.WriteFile(sysctlPath, []byte(strconv.Itoa(value)), 0640) 103 | if writeErr != nil { 104 | return &SysctlError{"path could not be set", err, path, true, value, true} 105 | } 106 | return nil 107 | } 108 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net" 7 | "strconv" 8 | "sync" 9 | ) 10 | 11 | type Listener interface { 12 | OnUpdate(instance interface{}) 13 | } 14 | 15 | type ListenerFunc func(instance interface{}) 16 | 17 | func (f ListenerFunc) OnUpdate(instance interface{}) { 18 | f(instance) 19 | } 20 | 21 | // Broadcaster holds the details of registered listeners 22 | type Broadcaster struct { 23 | listenerLock sync.RWMutex 24 | listeners []Listener 25 | } 26 | 27 | // Add lets to register a listener 28 | func (b *Broadcaster) Add(listener Listener) { 29 | b.listenerLock.Lock() 30 | defer b.listenerLock.Unlock() 31 | b.listeners = append(b.listeners, listener) 32 | } 33 | 34 | // Notify notifies an update to registered listeners 35 | func (b *Broadcaster) Notify(instance interface{}) { 36 | b.listenerLock.RLock() 37 | listeners := b.listeners 38 | b.listenerLock.RUnlock() 39 | for _, listener := range listeners { 40 | go listener.OnUpdate(instance) 41 | } 42 | } 43 | 44 | // CloseCloserDisregardError it is a common need throughout kube-router's code base to need close a closer in defer 45 | // statements, this allows an action like that to pass a linter as well as describe its intention well 46 | func CloseCloserDisregardError(handler io.Closer) { 47 | _ = handler.Close() 48 | } 49 | 50 | // ContainsIPv4Address checks a given string array to see if it contains a valid IPv4 address within it 51 | func ContainsIPv4Address(addrs []string) bool { 52 | for _, addr := range addrs { 53 | ip := net.ParseIP(addr) 54 | if ip == nil { 55 | continue 56 | } 57 | if ip.To4() != nil { 58 | return true 59 | } 60 | } 61 | return false 62 | } 63 | 64 | // ContainsIPv6Address checks a given string array to see if it contains a valid IPv6 address within it 65 | func ContainsIPv6Address(addrs []string) bool { 66 | for _, addr := range addrs { 67 | ip := net.ParseIP(addr) 68 | if ip == nil { 69 | continue 70 | } 71 | if ip.To4() != nil { 72 | continue 73 | } 74 | if ip.To16() != nil { 75 | return true 76 | } 77 | } 78 | return false 79 | } 80 | 81 | // SliceContainsString checks to see if needle is contained within haystack, returns true if found, otherwise 82 | // returns false 83 | func SliceContainsString(needle string, haystack []string) bool { 84 | for _, hay := range haystack { 85 | if needle == hay { 86 | return true 87 | } 88 | } 89 | return false 90 | } 91 | 92 | // TCPAddressBindable checks to see if an IP/port is bindable by attempting to open a listener then closing it 93 | // returns nil if successful 94 | func TCPAddressBindable(addr string, port uint16) error { 95 | endpoint := addr + ":" + strconv.Itoa(int(port)) 96 | ln, err := net.Listen("tcp", endpoint) 97 | if err != nil { 98 | return fmt.Errorf("unable to open %s: %w", endpoint, err) 99 | } 100 | return ln.Close() 101 | } 102 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "runtime" 7 | 8 | "github.com/hashicorp/go-version" 9 | "k8s.io/klog/v2" 10 | ) 11 | 12 | type versionMessage struct { 13 | minVersionInclusive string 14 | maxVersionExclusive string 15 | message string 16 | } 17 | 18 | // Version and BuildDate are injected at build time via ldflags 19 | var ( 20 | BuildDate string 21 | Version string 22 | 23 | msgVersionArr = []versionMessage{ 24 | { 25 | minVersionInclusive: "v2.0.0", 26 | maxVersionExclusive: "v2.1.0", 27 | message: "Version v2.X introduces backward compatibility breaking changes, the kube-router project " + 28 | "recommends that you read the release notes carefully before deploying: " + 29 | "https://github.com/cloudnativelabs/kube-router/releases/tag/v2.0.0", 30 | }, 31 | } 32 | ) 33 | 34 | func (ver versionMessage) versionApplicable(testVerStr string) bool { 35 | minVer, err1 := version.NewVersion(ver.minVersionInclusive) 36 | maxVer, err2 := version.NewVersion(ver.maxVersionExclusive) 37 | testVer, err3 := version.NewVersion(testVerStr) 38 | 39 | // When in doubt return false 40 | if err1 != nil || err2 != nil || err3 != nil { 41 | klog.Warningf("encountered an error while trying to parse version numbers: %v - %v - %v", err1, err2, err3) 42 | return false 43 | } 44 | 45 | return testVer.GreaterThanOrEqual(minVer) && testVer.LessThan(maxVer) 46 | } 47 | 48 | func PrintVersion(logOutput bool) { 49 | output := fmt.Sprintf("Running %v version %s, built on %s, %s\n", os.Args[0], Version, BuildDate, runtime.Version()) 50 | 51 | outputToStream(output, logOutput) 52 | } 53 | 54 | func PrintVersionMessages(logOutput bool) { 55 | for _, verMsg := range msgVersionArr { 56 | if verMsg.versionApplicable(Version) { 57 | outputToStream(verMsg.message, logOutput) 58 | } 59 | } 60 | } 61 | 62 | func outputToStream(output string, logOutput bool) { 63 | if !logOutput { 64 | _, _ = fmt.Fprintf(os.Stderr, "%s", output) 65 | } else { 66 | klog.Info(output) 67 | } 68 | } 69 | --------------------------------------------------------------------------------