├── .clang-format ├── .github └── dependabot.yml ├── .gitignore ├── .golangci.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── NOTICE ├── README-zh_CN.md ├── README.md ├── bpf ├── headers │ ├── LICENSE.BSD-2-Clause │ ├── bpf_core_read.h │ ├── bpf_endian.h │ ├── bpf_helper_defs.h │ ├── bpf_helpers.h │ ├── bpf_tracing.h │ ├── common.h │ ├── compiler.h │ ├── linux │ │ ├── bpf.h │ │ ├── bpf_common.h │ │ ├── if_ether.h │ │ ├── in.h │ │ ├── in6.h │ │ ├── ip.h │ │ ├── ipv6.h │ │ ├── types.h │ │ └── types_mapper.h │ └── update.sh ├── monitor.bt ├── qos_tc.c └── qos_tc.h ├── charts └── terway-qos │ ├── .helmignore │ ├── Chart.yaml │ ├── templates │ ├── _helpers.tpl │ ├── clusterrole.yaml │ ├── clusterrolebinding.yaml │ ├── configmap.yaml │ ├── daemonset.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── cmd ├── bpf_bandwidth.go ├── bpf_bandwidth_list.go ├── cgroup.go ├── cgroup_list.go ├── config.go ├── damon.go ├── pod.go ├── pod_list.go ├── pod_set.go └── root.go ├── docs ├── quick-start-zh_CN.md └── quick-start.md ├── go.mod ├── go.sum ├── hack └── init.sh ├── images ├── builder │ └── Dockerfile └── runtime │ └── Dockerfile ├── main.go └── pkg ├── bandwidth └── utils.go ├── bpf ├── compile.go ├── generate.go ├── manager.go ├── maps.go ├── maps_test.go ├── qos_tc_bpfeb.go ├── qos_tc_bpfeb.o ├── qos_tc_bpfel.go ├── qos_tc_bpfel.o └── types.go ├── byteorder ├── byteorder.go ├── byteorder_bigendian.go ├── byteorder_littleendian.go └── byteorder_test.go ├── config ├── config.go ├── config_test.go ├── record.go ├── syncer.go └── types.go ├── k8s ├── pods.go └── predicates.go ├── types └── config.go └── version └── version.go /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: LLVM 4 | AlignAfterOpenBracket: DontAlign 5 | AlignConsecutiveAssignments: true 6 | AlignEscapedNewlines: DontAlign 7 | AlwaysBreakBeforeMultilineStrings: true 8 | AlwaysBreakTemplateDeclarations: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortFunctionsOnASingleLine: false 11 | BreakBeforeBraces: Attach 12 | IndentWidth: 4 13 | KeepEmptyLinesAtTheStartOfBlocks: false 14 | TabWidth: 4 15 | UseTab: ForContinuationAndIndentation 16 | ColumnLimit: 1000 17 | # Go compiler comments need to stay unindented. 18 | CommentPragmas: '^go:.*' 19 | ... 20 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 2 8 | rebase-strategy: "disabled" 9 | labels: 10 | - kind/enhancement 11 | - release-note/misc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | concurrency: 4 3 | timeout: 5m 4 | tests: false 5 | build-tags: [] 6 | 7 | issues: 8 | exclude-rules: 9 | - path: _test\.go 10 | linters: 11 | - dupl 12 | - goconst 13 | linters: 14 | enable: 15 | - goconst 16 | - goimports 17 | - govet 18 | - errcheck 19 | - ineffassign 20 | - staticcheck 21 | - goconst 22 | - stylecheck 23 | - misspell 24 | linters-settings: 25 | errcheck: 26 | check-blank: false 27 | govet: 28 | check-shadowing: false 29 | maligned: 30 | suggest-new: true -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GOLANG_IMAGE=docker.io/library/golang:1.21.3@sha256:24a09375a6216764a3eda6a25490a88ac178b5fcb9511d59d0da5ebf9e496474 2 | ARG UBUNTU_IMAGE=docker.io/library/ubuntu:22.04@sha256:2b7412e6465c3c7fc5bb21d3e6f1917c167358449fecac8176c6e496e5c1f05f 3 | ARG CILIUM_BPFTOOL_IMAGE=quay.io/cilium/cilium-bpftool:d3093f6aeefef8270306011109be623a7e80ad1b@sha256:2c28c64195dee20ab596d70a59a4597a11058333c6b35a99da32c339dcd7df56 4 | ARG RUNTIME_IMAGE=terway-qos-runtime 5 | 6 | FROM ${CILIUM_BPFTOOL_IMAGE} as bpftool-dist 7 | 8 | FROM ${GOLANG_IMAGE} as builder 9 | ARG GOPROXY 10 | ARG TARGETOS 11 | ARG TARGETARCH 12 | #ENV GOPROXY $GOPROXY 13 | ENV GOPROXY https://goproxy.cn 14 | WORKDIR /go/src/qos 15 | COPY go.sum go.mod ./ 16 | RUN go mod download 17 | COPY . . 18 | RUN CGO_ENABLED=0 go build \ 19 | -ldflags \ 20 | "-s -w -X \"github.com/AliyunContainerService/terway-qos/pkg/version.gitCommit=`git rev-parse HEAD 2>/dev/null`\" \ 21 | -X \"github.com/AliyunContainerService/terway-qos/pkg/version.buildDate=`date -u +'%Y-%m-%dT%H:%M:%SZ'`\" \ 22 | -X \"github.com/AliyunContainerService/terway-qos/pkg/version.gitVersion=`git describe --tags --match='v*' --abbrev=14 2>/dev/null`\"" -o /go/src/qos/qos . 23 | 24 | FROM terway-qos-runtime 25 | 26 | COPY bpf/headers /var/lib/terway/headers 27 | COPY bpf /var/lib/terway/src 28 | COPY hack/init.sh /bin/init.sh 29 | COPY --from=bpftool-dist /usr/local /usr/local 30 | COPY --from=builder /go/src/qos/qos /usr/bin/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] Alibaba Group 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | GO ?= go 3 | DOCKER ?= docker 4 | 5 | GOFLAGS ?= -ldflags "-s -w" 6 | 7 | CLANG ?= clang-15 8 | STRIP ?= llvm-strip-15 9 | OBJCOPY ?= llvm-objcopy-15 10 | CFLAGS ?= -g -O2 -target bpf -std=gnu99 -nostdinc -D__NR_CPUS__=4 -Werror -Wall -Wextra -Wshadow -Wno-address-of-packed-member -Wno-unknown-warning-option -Wno-gnu-variable-sized-type-not-at-end -Wimplicit-int-conversion -Wenum-conversion 11 | 12 | BPF_BUILD_IMAGE ?= terway-qos-builder:latest 13 | RUNTIME_IMAGE ?= terway-qos-runtime:latest 14 | GO_LINT_IMAGE ?= golangci/golangci-lint:v1.54.2-alpine 15 | DAEMON_IMAGE ?= terway-qos:latest 16 | 17 | .PHONE: all 18 | all: lint build 19 | 20 | .PHONY: lint 21 | lint: 22 | $(DOCKER) run --rm -it -v $(shell pwd):/go/src/qos \ 23 | -w /go/src/qos \ 24 | $(GO_LINT_IMAGE) golangci-lint -v run --timeout 5m 25 | 26 | .PHONY: build 27 | build: builder-image runtime-image generate daemon-image 28 | 29 | .PHONY: builder-image 30 | builder-image: 31 | @$(DOCKER) image inspect $(BPF_BUILD_IMAGE) >/dev/null 2>&1 || \ 32 | (echo "Docker image $(BPF_BUILD_IMAGE) not found, building..." && \ 33 | cd images/builder && \ 34 | $(DOCKER) build -t $(BPF_BUILD_IMAGE) .) 35 | 36 | .PHONY: runtime-image 37 | runtime-image: 38 | @$(DOCKER) image inspect $(RUNTIME_IMAGE) >/dev/null 2>&1 || \ 39 | (echo "Docker image $(RUNTIME_IMAGE) not found, building..." && \ 40 | cd images/runtime && \ 41 | $(DOCKER) build -t $(RUNTIME_IMAGE) .) 42 | 43 | .PHONY: daemon-image 44 | daemon-image: 45 | @$(DOCKER) build -t $(DAEMON_IMAGE) . 46 | 47 | .PHONY: generate 48 | generate: 49 | $(DOCKER) run --rm -it -v $(shell pwd):/go/src/qos \ 50 | -w /go/src/qos \ 51 | -e BPF_CLANG="$(CLANG)" \ 52 | -e BPF_CFLAGS="$(CFLAGS)" \ 53 | -e $BPF_STRIP="$(STRIP)" \ 54 | $(BPF_BUILD_IMAGE) go generate ./... -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | ======================================================== 2 | terway-qos 3 | Copyright (c) 2023, Alibaba Group. 4 | Licensed under the Apache License, Version 2.0 5 | 6 | =================================================================== 7 | This product contains various third-party components under other open source licenses. 8 | This section summarizes those components and their licenses. 9 | 10 | 11 | Apache Software Foundation License 2.0 12 | -------------------------------------- 13 | pkg/bandwidth/utils.go 14 | Copyright 2015 The Kubernetes Authors. 15 | 16 | Licensed under the Apache License, Version 2.0 (the "License"); 17 | you may not use this file except in compliance with the License. 18 | You may obtain a copy of the License at 19 | 20 | http://www.apache.org/licenses/LICENSE-2.0 21 | 22 | Unless required by applicable law or agreed to in writing, software 23 | distributed under the License is distributed on an "AS IS" BASIS, 24 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | See the License for the specific language governing permissions and 26 | limitations under the License. 27 | 28 | -------------------------------------- 29 | pkg/byteorder/* 30 | SPDX-License-Identifier: Apache-2.0 31 | Copyright Authors of Cilium 32 | 33 | -------------------------------------- 34 | bpf/* 35 | SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note 36 | 37 | -------------------------------------------------------------------------------- /README-zh_CN.md: -------------------------------------------------------------------------------- 1 | # terway-qos 2 | 3 | ## 介绍 4 | 5 | terway-qos 的诞生是为了解决混部场景下,容器网络带宽争抢问题。支持按单Pod、按业务类型限制带宽。 6 | 7 | 相比于其他方案,terway-qos 有以下优势: 8 | 9 | 1. 支持按业务类型限制带宽,支持多种业务类型混部 10 | 2. 支持 Pod 带宽限制动态调整 11 | 12 | ## 功能介绍 13 | 14 | 带宽限制分为 15 | 16 | - 整机带宽限制 17 | - Pod带宽限制 18 | 19 | ### 整机带宽限制 20 | 21 | 混部场景下,我们期望在线业务有最大带宽的保证,从而避免争抢。在空闲时,离线业务也能尽可能使用全部带宽资源。 22 | 由此用户可为业务流量定义三种优先级,L0,L1,L2。其优先级顺序依次递减。 23 | 24 | 争抢场景定义: 当 `L0 + L1 + L2` 总流量大于整机带宽 25 | 26 | 限制策略: 27 | 28 | - L0 最大带宽依据 L1, L2 实时流量而动态调整。最大为整机带宽,最小为 `整机带宽- L1 最小带宽- L2 最小带宽`。 29 | - 任何情况下,L1、L2 其带宽不超过各自带宽上限。 30 | - 争抢场景下, L1、L2 其带宽不会低于各自带宽下限。 31 | - 争抢场景下,将按照 L2 、L1 、L0 的顺序对带宽进行限制。 32 | 33 | #### Pod 优先级定义 34 | 35 | 通过为 Pod 配置下面 Annotation 36 | 37 | | key | 参数 | 38 | |----------------------------|------------------------------------------------------------------------| 39 | | `k8s.aliyun.com/qos-class` | `guaranteed` 在线业务 L0
`burstable` 离线业务 L1
`best-effort` 离线业务 L2 | 40 | 41 | #### 带宽限制配置 42 | 43 | 对需混部的节点,需配置宽限制,配置路径 `/var/lib/terway/qos/global_bps_config`。 44 | 45 | | 配置路径 | 参数 | 46 | |-----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 47 | | `/var/lib/terway/qos/global_bps_config` | `hw_tx_bps_max` 节点的最大tx带宽
`hw_rx_bps_max` 节点的最大rx带宽
`offline_l1_tx_bps_min` 入方向离线l1 业务的最小带宽保证
`offline_l1_tx_bps_max` 入方向离线l1 业务的最大带宽占用
`offline_l2_tx_bps_min` 入方向离线l2 业务的最小带宽保证
`offline_l2_tx_bps_max` 入方向离线l2 业务的最大带宽占用 | 48 | 49 | 示例如下 50 | 51 | ```yaml 52 | kind: ConfigMap 53 | apiVersion: v1 54 | metadata: 55 | name: terway-qos 56 | data: 57 | global_bps_config: | 58 | hw_tx_bps_max 900000000 59 | hw_rx_bps_max 900000000 60 | offline_l1_tx_bps_min 100000000 61 | offline_l1_tx_bps_max 200000000 62 | offline_l2_tx_bps_min 100000000 63 | offline_l2_tx_bps_max 300000000 64 | offline_l1_rx_bps_min 100000000 65 | offline_l1_rx_bps_max 200000000 66 | offline_l2_rx_bps_min 100000000 67 | offline_l2_rx_bps_max 300000000 68 | ``` 69 | 70 | > 带宽单位 Bytes/s , 带宽限制精度至少 1MB 以上 71 | 72 | ### Pod 带宽限制配置 73 | 74 | 支持 Kubernetes 标准的 Annotation 75 | 76 | - `kubernetes.io/egress-bandwidth` 77 | - `kubernetes.io/ingress-bandwidth` 78 | 79 | 支持热更新 Annotation 来调整 Pod 带宽限制 80 | 81 | 需注意,CNI 插件可能支持 Kubernetes 标准的 Annotation ,从而会影响热更新,这种情况下可以选择关闭 CNI 插件的带宽限制功能。 82 | 83 | ## 快速开始 84 | 85 | [快速开始](docs/quick-start-zh_CN.md) 86 | 87 | ## License 88 | 89 | terway-qos是由阿里巴巴开发的,采用Apache License(版本2.0)许可证。 90 | 本产品包含其他开源许可证下的各种第三方组件。 91 | 更多信息请参阅[NOTICE](NOTICE)文件。 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # qos 2 | 3 | ## Introduction 4 | 5 | terway-qos is developed to solve the problem of container network bandwidth contention in mixed deployment scenarios. It 6 | supports bandwidth limitation based on individual Pods and business types. 7 | 8 | Compared to other solutions, terway-qos has the following advantages: 9 | 10 | 1. Supports bandwidth limitation based on business types, allowing for mixed deployment of multiple business types. 11 | 2. Supports dynamic adjustment of Pod bandwidth limitation. 12 | 13 | ## Functionality 14 | 15 | Bandwidth limitation can be divided into: 16 | 17 | - Host bandwidth limitation 18 | - Pod bandwidth limitation 19 | 20 | ### Host bandwidth limitation 21 | 22 | In mixed deployment scenarios, we expect to guarantee maximum bandwidth for online business to avoid contention. During 23 | idle periods, offline business should also be able to utilize the full bandwidth resources as much as possible. 24 | For this purpose, users can define three priority levels for business traffic: L0, L1, and L2. The priority order is 25 | L0 > L1 > L2. 26 | 27 | Definition of contention scenario: When the total traffic of L0, L1, and L2 exceeds the host bandwidth. 28 | 29 | Limitation strategy: 30 | 31 | - The maximum bandwidth of L0 is dynamically adjusted based on the real-time traffic of L1 and L2. The maximum value is 32 | the host bandwidth, and the minimum value is `host bandwidth - minimum L1 bandwidth - minimum L2 bandwidth`. 33 | - Under any circumstances, the bandwidth of L1 and L2 should not exceed their respective upper limits. 34 | - In a contention scenario, the bandwidth of L1 and L2 should not be lower than their respective lower limits. 35 | - In a contention scenario, the bandwidth is limited in the order of L2, L1, and L0. 36 | 37 | Supports hot update of annotations to adjust Pod bandwidth limitation. 38 | 39 | Please note that the CNI plugin may also support Kubernetes standard annotations, which may affect the hot update. In 40 | this case, you can choose to disable the bandwidth limitation feature of the CNI plugin. 41 | 42 | ### Pod priority definition 43 | 44 | Configure the following annotation for Pods: 45 | 46 | | key | Parameters | 47 | |----------------------------|---------------------------------------------------------------------------------------------------------------------------| 48 | | `k8s.aliyun.com/qos-class` | `guaranteed` for online business L0
`burstable` for offline business L1
`best-effort` for offline business L2
| 49 | 50 | ### Bandwidth limitation configuration 51 | 52 | For nodes requiring mixed deployment, configure the grace limits in the path `/var/lib/terway/qos/global_bps_config`. 53 | 54 | | Configuration Path | Parameters | 55 | |-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 56 | | `/var/lib/terway/qos/global_bps_config` | `hw_tx_bps_max` maximum tx bandwidth for the node
`hw_rx_bps_max` maximum rx bandwidth for the node
`offline_l1_tx_bps_min` minimum guaranteed bandwidth for inbound L1 offline business
`offline_l1_tx_bps_max` maximum bandwidth usage for inbound L1 offline business
`offline_l2_tx_bps_min` minimum guaranteed bandwidth for inbound L2 offline business
`offline_l2_tx_bps_max` maximum bandwidth usage for inbound L2 offline business | 57 | 58 | Here is an example: 59 | 60 | ```yaml 61 | kind: ConfigMap 62 | apiVersion: v1 63 | metadata: 64 | name: terway-qos 65 | data: 66 | global_bps_config: | 67 | hw_tx_bps_max 900000000 68 | hw_rx_bps_max 0 69 | offline_l1_tx_bps_min 100000000 70 | offline_l1_tx_bps_max 200000000 71 | offline_l2_tx_bps_min 100000000 72 | offline_l2_tx_bps_max 300000000 73 | offline_l1_rx_bps_min 0 74 | offline_l1_rx_bps_max 0 75 | offline_l2_rx_bps_min 0 76 | offline_l2_rx_bps_max 0 77 | ``` 78 | 79 | > The bandwidth unit is Bytes/s, and the bandwidth limitation precision is at least 1MB or higher. 80 | 81 | ### Pod bandwidth limitation configuration 82 | 83 | Supports Kubernetes standard annotations: 84 | 85 | - `kubernetes.io/egress-bandwidth` 86 | - `kubernetes.io/ingress-bandwidth` 87 | 88 | Supports hot update of annotations to adjust Pod bandwidth limitation. 89 | 90 | Please note that the CNI plugin may also support Kubernetes standard annotations, which may affect the hot update. In 91 | this case, you can choose to disable the bandwidth limitation feature of the CNI plugin. 92 | 93 | ## License 94 | 95 | terway-qos developed by Alibaba Group and licensed under the Apache License (Version 2.0) 96 | This product contains various third-party components under other open source licenses. 97 | See the [NOTICE](NOTICE) file for more information. -------------------------------------------------------------------------------- /bpf/headers/LICENSE.BSD-2-Clause: -------------------------------------------------------------------------------- 1 | Valid-License-Identifier: BSD-2-Clause 2 | SPDX-URL: https://spdx.org/licenses/BSD-2-Clause.html 3 | Usage-Guide: 4 | To use the BSD 2-clause "Simplified" License put the following SPDX 5 | tag/value pair into a comment according to the placement guidelines in 6 | the licensing rules documentation: 7 | SPDX-License-Identifier: BSD-2-Clause 8 | License-Text: 9 | 10 | Copyright (c) . All rights reserved. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are met: 14 | 15 | 1. Redistributions of source code must retain the above copyright notice, 16 | this list of conditions and the following disclaimer. 17 | 18 | 2. Redistributions in binary form must reproduce the above copyright 19 | notice, this list of conditions and the following disclaimer in the 20 | documentation and/or other materials provided with the distribution. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 26 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 | POSSIBILITY OF SUCH DAMAGE. 33 | -------------------------------------------------------------------------------- /bpf/headers/bpf_endian.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __BPF_ENDIAN__ 3 | #define __BPF_ENDIAN__ 4 | 5 | /* 6 | * Isolate byte #n and put it into byte #m, for __u##b type. 7 | * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: 8 | * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 9 | * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 10 | * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 11 | * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 12 | */ 13 | #define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) 14 | 15 | #define ___bpf_swab16(x) ((__u16)( \ 16 | ___bpf_mvb(x, 16, 0, 1) | \ 17 | ___bpf_mvb(x, 16, 1, 0))) 18 | 19 | #define ___bpf_swab32(x) ((__u32)( \ 20 | ___bpf_mvb(x, 32, 0, 3) | \ 21 | ___bpf_mvb(x, 32, 1, 2) | \ 22 | ___bpf_mvb(x, 32, 2, 1) | \ 23 | ___bpf_mvb(x, 32, 3, 0))) 24 | 25 | #define ___bpf_swab64(x) ((__u64)( \ 26 | ___bpf_mvb(x, 64, 0, 7) | \ 27 | ___bpf_mvb(x, 64, 1, 6) | \ 28 | ___bpf_mvb(x, 64, 2, 5) | \ 29 | ___bpf_mvb(x, 64, 3, 4) | \ 30 | ___bpf_mvb(x, 64, 4, 3) | \ 31 | ___bpf_mvb(x, 64, 5, 2) | \ 32 | ___bpf_mvb(x, 64, 6, 1) | \ 33 | ___bpf_mvb(x, 64, 7, 0))) 34 | 35 | /* LLVM's BPF target selects the endianness of the CPU 36 | * it compiles on, or the user specifies (bpfel/bpfeb), 37 | * respectively. The used __BYTE_ORDER__ is defined by 38 | * the compiler, we cannot rely on __BYTE_ORDER from 39 | * libc headers, since it doesn't reflect the actual 40 | * requested byte order. 41 | * 42 | * Note, LLVM's BPF target has different __builtin_bswapX() 43 | * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE 44 | * in bpfel and bpfeb case, which means below, that we map 45 | * to cpu_to_be16(). We could use it unconditionally in BPF 46 | * case, but better not rely on it, so that this header here 47 | * can be used from application and BPF program side, which 48 | * use different targets. 49 | */ 50 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 51 | # define __bpf_ntohs(x) __builtin_bswap16(x) 52 | # define __bpf_htons(x) __builtin_bswap16(x) 53 | # define __bpf_constant_ntohs(x) ___bpf_swab16(x) 54 | # define __bpf_constant_htons(x) ___bpf_swab16(x) 55 | # define __bpf_ntohl(x) __builtin_bswap32(x) 56 | # define __bpf_htonl(x) __builtin_bswap32(x) 57 | # define __bpf_constant_ntohl(x) ___bpf_swab32(x) 58 | # define __bpf_constant_htonl(x) ___bpf_swab32(x) 59 | # define __bpf_be64_to_cpu(x) __builtin_bswap64(x) 60 | # define __bpf_cpu_to_be64(x) __builtin_bswap64(x) 61 | # define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) 62 | # define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) 63 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 64 | # define __bpf_ntohs(x) (x) 65 | # define __bpf_htons(x) (x) 66 | # define __bpf_constant_ntohs(x) (x) 67 | # define __bpf_constant_htons(x) (x) 68 | # define __bpf_ntohl(x) (x) 69 | # define __bpf_htonl(x) (x) 70 | # define __bpf_constant_ntohl(x) (x) 71 | # define __bpf_constant_htonl(x) (x) 72 | # define __bpf_be64_to_cpu(x) (x) 73 | # define __bpf_cpu_to_be64(x) (x) 74 | # define __bpf_constant_be64_to_cpu(x) (x) 75 | # define __bpf_constant_cpu_to_be64(x) (x) 76 | #else 77 | # error "Fix your compiler's __BYTE_ORDER__?!" 78 | #endif 79 | 80 | #define bpf_htons(x) \ 81 | (__builtin_constant_p(x) ? \ 82 | __bpf_constant_htons(x) : __bpf_htons(x)) 83 | #define bpf_ntohs(x) \ 84 | (__builtin_constant_p(x) ? \ 85 | __bpf_constant_ntohs(x) : __bpf_ntohs(x)) 86 | #define bpf_htonl(x) \ 87 | (__builtin_constant_p(x) ? \ 88 | __bpf_constant_htonl(x) : __bpf_htonl(x)) 89 | #define bpf_ntohl(x) \ 90 | (__builtin_constant_p(x) ? \ 91 | __bpf_constant_ntohl(x) : __bpf_ntohl(x)) 92 | #define bpf_cpu_to_be64(x) \ 93 | (__builtin_constant_p(x) ? \ 94 | __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) 95 | #define bpf_be64_to_cpu(x) \ 96 | (__builtin_constant_p(x) ? \ 97 | __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) 98 | 99 | #endif /* __BPF_ENDIAN__ */ 100 | -------------------------------------------------------------------------------- /bpf/headers/bpf_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __BPF_HELPERS__ 3 | #define __BPF_HELPERS__ 4 | 5 | /* 6 | * Note that bpf programs need to include either 7 | * vmlinux.h (auto-generated from BTF) or linux/types.h 8 | * in advance since bpf_helper_defs.h uses such types 9 | * as __u64. 10 | */ 11 | #include "bpf_helper_defs.h" 12 | 13 | #define __uint(name, val) int (*name)[val] 14 | #define __type(name, val) typeof(val) *name 15 | #define __array(name, val) typeof(val) *name[] 16 | 17 | /* 18 | * Helper macro to place programs, maps, license in 19 | * different sections in elf_bpf file. Section names 20 | * are interpreted by libbpf depending on the context (BPF programs, BPF maps, 21 | * extern variables, etc). 22 | * To allow use of SEC() with externs (e.g., for extern .maps declarations), 23 | * make sure __attribute__((unused)) doesn't trigger compilation warning. 24 | */ 25 | #define SEC(name) \ 26 | _Pragma("GCC diagnostic push") \ 27 | _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \ 28 | __attribute__((section(name), used)) \ 29 | _Pragma("GCC diagnostic pop") \ 30 | 31 | /* Avoid 'linux/stddef.h' definition of '__always_inline'. */ 32 | #undef __always_inline 33 | #define __always_inline inline __attribute__((always_inline)) 34 | 35 | #ifndef __noinline 36 | #define __noinline __attribute__((noinline)) 37 | #endif 38 | #ifndef __weak 39 | #define __weak __attribute__((weak)) 40 | #endif 41 | 42 | /* 43 | * Use __hidden attribute to mark a non-static BPF subprogram effectively 44 | * static for BPF verifier's verification algorithm purposes, allowing more 45 | * extensive and permissive BPF verification process, taking into account 46 | * subprogram's caller context. 47 | */ 48 | #define __hidden __attribute__((visibility("hidden"))) 49 | 50 | /* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include 51 | * any system-level headers (such as stddef.h, linux/version.h, etc), and 52 | * commonly-used macros like NULL and KERNEL_VERSION aren't available through 53 | * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define 54 | * them on their own. So as a convenience, provide such definitions here. 55 | */ 56 | #ifndef NULL 57 | #define NULL ((void *)0) 58 | #endif 59 | 60 | #ifndef KERNEL_VERSION 61 | #define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))) 62 | #endif 63 | 64 | /* 65 | * Helper macros to manipulate data structures 66 | */ 67 | #ifndef offsetof 68 | #define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) 69 | #endif 70 | #ifndef container_of 71 | #define container_of(ptr, type, member) \ 72 | ({ \ 73 | void *__mptr = (void *)(ptr); \ 74 | ((type *)(__mptr - offsetof(type, member))); \ 75 | }) 76 | #endif 77 | 78 | /* 79 | * Helper macro to throw a compilation error if __bpf_unreachable() gets 80 | * built into the resulting code. This works given BPF back end does not 81 | * implement __builtin_trap(). This is useful to assert that certain paths 82 | * of the program code are never used and hence eliminated by the compiler. 83 | * 84 | * For example, consider a switch statement that covers known cases used by 85 | * the program. __bpf_unreachable() can then reside in the default case. If 86 | * the program gets extended such that a case is not covered in the switch 87 | * statement, then it will throw a build error due to the default case not 88 | * being compiled out. 89 | */ 90 | #ifndef __bpf_unreachable 91 | # define __bpf_unreachable() __builtin_trap() 92 | #endif 93 | 94 | /* 95 | * Helper function to perform a tail call with a constant/immediate map slot. 96 | */ 97 | #if __clang_major__ >= 8 && defined(__bpf__) 98 | static __always_inline void 99 | bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) 100 | { 101 | if (!__builtin_constant_p(slot)) 102 | __bpf_unreachable(); 103 | 104 | /* 105 | * Provide a hard guarantee that LLVM won't optimize setting r2 (map 106 | * pointer) and r3 (constant map index) from _different paths_ ending 107 | * up at the _same_ call insn as otherwise we won't be able to use the 108 | * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel 109 | * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key 110 | * tracking for prog array pokes") for details on verifier tracking. 111 | * 112 | * Note on clobber list: we need to stay in-line with BPF calling 113 | * convention, so even if we don't end up using r0, r4, r5, we need 114 | * to mark them as clobber so that LLVM doesn't end up using them 115 | * before / after the call. 116 | */ 117 | asm volatile("r1 = %[ctx]\n\t" 118 | "r2 = %[map]\n\t" 119 | "r3 = %[slot]\n\t" 120 | "call 12" 121 | :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) 122 | : "r0", "r1", "r2", "r3", "r4", "r5"); 123 | } 124 | #endif 125 | 126 | /* 127 | * Helper structure used by eBPF C program 128 | * to describe BPF map attributes to libbpf loader 129 | */ 130 | struct bpf_map_def { 131 | unsigned int type; 132 | unsigned int key_size; 133 | unsigned int value_size; 134 | unsigned int max_entries; 135 | unsigned int map_flags; 136 | }; 137 | 138 | enum libbpf_pin_type { 139 | LIBBPF_PIN_NONE, 140 | /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 141 | LIBBPF_PIN_BY_NAME, 142 | }; 143 | 144 | enum libbpf_tristate { 145 | TRI_NO = 0, 146 | TRI_YES = 1, 147 | TRI_MODULE = 2, 148 | }; 149 | 150 | #define __kconfig __attribute__((section(".kconfig"))) 151 | #define __ksym __attribute__((section(".ksyms"))) 152 | 153 | #ifndef ___bpf_concat 154 | #define ___bpf_concat(a, b) a ## b 155 | #endif 156 | #ifndef ___bpf_apply 157 | #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) 158 | #endif 159 | #ifndef ___bpf_nth 160 | #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N 161 | #endif 162 | #ifndef ___bpf_narg 163 | #define ___bpf_narg(...) \ 164 | ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 165 | #endif 166 | 167 | #define ___bpf_fill0(arr, p, x) do {} while (0) 168 | #define ___bpf_fill1(arr, p, x) arr[p] = x 169 | #define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args) 170 | #define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args) 171 | #define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args) 172 | #define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args) 173 | #define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args) 174 | #define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args) 175 | #define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args) 176 | #define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args) 177 | #define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args) 178 | #define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args) 179 | #define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args) 180 | #define ___bpf_fill(arr, args...) \ 181 | ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args) 182 | 183 | /* 184 | * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values 185 | * in a structure. 186 | */ 187 | #define BPF_SEQ_PRINTF(seq, fmt, args...) \ 188 | ({ \ 189 | static const char ___fmt[] = fmt; \ 190 | unsigned long long ___param[___bpf_narg(args)]; \ 191 | \ 192 | _Pragma("GCC diagnostic push") \ 193 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 194 | ___bpf_fill(___param, args); \ 195 | _Pragma("GCC diagnostic pop") \ 196 | \ 197 | bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ 198 | ___param, sizeof(___param)); \ 199 | }) 200 | 201 | /* 202 | * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of 203 | * an array of u64. 204 | */ 205 | #define BPF_SNPRINTF(out, out_size, fmt, args...) \ 206 | ({ \ 207 | static const char ___fmt[] = fmt; \ 208 | unsigned long long ___param[___bpf_narg(args)]; \ 209 | \ 210 | _Pragma("GCC diagnostic push") \ 211 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 212 | ___bpf_fill(___param, args); \ 213 | _Pragma("GCC diagnostic pop") \ 214 | \ 215 | bpf_snprintf(out, out_size, ___fmt, \ 216 | ___param, sizeof(___param)); \ 217 | }) 218 | 219 | #ifdef BPF_NO_GLOBAL_DATA 220 | #define BPF_PRINTK_FMT_MOD 221 | #else 222 | #define BPF_PRINTK_FMT_MOD static const 223 | #endif 224 | 225 | #define __bpf_printk(fmt, ...) \ 226 | ({ \ 227 | BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ 228 | bpf_trace_printk(____fmt, sizeof(____fmt), \ 229 | ##__VA_ARGS__); \ 230 | }) 231 | 232 | /* 233 | * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments 234 | * instead of an array of u64. 235 | */ 236 | #define __bpf_vprintk(fmt, args...) \ 237 | ({ \ 238 | static const char ___fmt[] = fmt; \ 239 | unsigned long long ___param[___bpf_narg(args)]; \ 240 | \ 241 | _Pragma("GCC diagnostic push") \ 242 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 243 | ___bpf_fill(___param, args); \ 244 | _Pragma("GCC diagnostic pop") \ 245 | \ 246 | bpf_trace_vprintk(___fmt, sizeof(___fmt), \ 247 | ___param, sizeof(___param)); \ 248 | }) 249 | 250 | /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args 251 | * Otherwise use __bpf_vprintk 252 | */ 253 | #define ___bpf_pick_printk(...) \ 254 | ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ 255 | __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ 256 | __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ 257 | __bpf_printk /*1*/, __bpf_printk /*0*/) 258 | 259 | /* Helper macro to print out debug messages */ 260 | #define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) 261 | 262 | #endif 263 | -------------------------------------------------------------------------------- /bpf/headers/common.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __LITTLE_ENDIAN_BITFIELD 3 | #define __LITTLE_ENDIAN_BITFIELD 4 | #endif 5 | 6 | #include "compiler.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #ifndef TC_ACT_OK 13 | # define TC_ACT_OK 0 14 | #endif 15 | 16 | #ifndef TC_ACT_SHOT 17 | # define TC_ACT_SHOT 2 18 | #endif 19 | 20 | #ifndef TC_ACT_PIPE 21 | # define TC_ACT_PIPE 3 22 | #endif 23 | -------------------------------------------------------------------------------- /bpf/headers/compiler.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 | /* Copyright Authors of Cilium */ 3 | 4 | #ifndef __BPF_COMPILER_H_ 5 | #define __BPF_COMPILER_H_ 6 | 7 | #ifndef __section 8 | # define __section(X) __attribute__((section(X), used)) 9 | #endif 10 | 11 | #ifndef __maybe_unused 12 | # define __maybe_unused __attribute__((__unused__)) 13 | #endif 14 | 15 | #ifndef offsetof 16 | # define offsetof(T, M) __builtin_offsetof(T, M) 17 | #endif 18 | 19 | #ifndef field_sizeof 20 | # define field_sizeof(T, M) sizeof((((T *)NULL)->M)) 21 | #endif 22 | 23 | #ifndef __packed 24 | # define __packed __attribute__((packed)) 25 | #endif 26 | 27 | #ifndef __nobuiltin 28 | # if __clang_major__ >= 10 29 | # define __nobuiltin(X) __attribute__((no_builtin(X))) 30 | # else 31 | # define __nobuiltin(X) 32 | # endif 33 | #endif 34 | 35 | #ifndef likely 36 | # define likely(X) __builtin_expect(!!(X), 1) 37 | #endif 38 | 39 | #ifndef unlikely 40 | # define unlikely(X) __builtin_expect(!!(X), 0) 41 | #endif 42 | 43 | #ifndef always_succeeds /* Mainly for documentation purpose. */ 44 | # define always_succeeds(X) likely(X) 45 | #endif 46 | 47 | #undef __always_inline /* stddef.h defines its own */ 48 | #define __always_inline inline __attribute__((always_inline)) 49 | 50 | #ifndef __stringify 51 | # define __stringify(X) #X 52 | #endif 53 | 54 | #ifndef __fetch 55 | # define __fetch(X) (__u32)(__u64)(&(X)) 56 | #endif 57 | 58 | #ifndef __aligned 59 | # define __aligned(X) __attribute__((aligned(X))) 60 | #endif 61 | 62 | #ifndef build_bug_on 63 | # define build_bug_on(E) ((void)sizeof(char[1 - 2*!!(E)])) 64 | #endif 65 | 66 | #ifndef __throw_build_bug 67 | # define __throw_build_bug() __builtin_trap() 68 | #endif 69 | 70 | #ifndef __printf 71 | # define __printf(X, Y) __attribute__((__format__(printf, X, Y))) 72 | #endif 73 | 74 | #ifndef barrier 75 | # define barrier() asm volatile("": : :"memory") 76 | #endif 77 | 78 | #ifndef barrier_data 79 | # define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory") 80 | #endif 81 | 82 | static __always_inline void bpf_barrier(void) 83 | { 84 | /* Workaround to avoid verifier complaint: 85 | * "dereference of modified ctx ptr R5 off=48+0, ctx+const is allowed, 86 | * ctx+const+const is not" 87 | */ 88 | barrier(); 89 | } 90 | 91 | #ifndef ARRAY_SIZE 92 | # define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0])) 93 | #endif 94 | 95 | #ifndef __READ_ONCE 96 | # define __READ_ONCE(X) (*(volatile typeof(X) *)&X) 97 | #endif 98 | 99 | #ifndef __WRITE_ONCE 100 | # define __WRITE_ONCE(X, V) (*(volatile typeof(X) *)&X) = (V) 101 | #endif 102 | 103 | /* {READ,WRITE}_ONCE() with verifier workaround via bpf_barrier(). */ 104 | 105 | #ifndef READ_ONCE 106 | # define READ_ONCE(X) \ 107 | ({ typeof(X) __val = __READ_ONCE(X); \ 108 | bpf_barrier(); \ 109 | __val; }) 110 | #endif 111 | 112 | #ifndef WRITE_ONCE 113 | # define WRITE_ONCE(X, V) \ 114 | ({ typeof(X) __val = (V); \ 115 | __WRITE_ONCE(X, __val); \ 116 | bpf_barrier(); \ 117 | __val; }) 118 | #endif 119 | 120 | #endif /* __BPF_COMPILER_H_ */ 121 | -------------------------------------------------------------------------------- /bpf/headers/linux/bpf_common.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 | /* Copyright Authors of the Linux kernel */ 3 | #ifndef __LINUX_BPF_COMMON_H__ 4 | #define __LINUX_BPF_COMMON_H__ 5 | 6 | /* Instruction classes */ 7 | #define BPF_CLASS(code) ((code) & 0x07) 8 | #define BPF_LD 0x00 9 | #define BPF_LDX 0x01 10 | #define BPF_ST 0x02 11 | #define BPF_STX 0x03 12 | #define BPF_ALU 0x04 13 | #define BPF_JMP 0x05 14 | #define BPF_RET 0x06 15 | #define BPF_MISC 0x07 16 | 17 | /* ld/ldx fields */ 18 | #define BPF_SIZE(code) ((code) & 0x18) 19 | #define BPF_W 0x00 20 | #define BPF_H 0x08 21 | #define BPF_B 0x10 22 | #define BPF_MODE(code) ((code) & 0xe0) 23 | #define BPF_IMM 0x00 24 | #define BPF_ABS 0x20 25 | #define BPF_IND 0x40 26 | #define BPF_MEM 0x60 27 | #define BPF_LEN 0x80 28 | #define BPF_MSH 0xa0 29 | 30 | /* alu/jmp fields */ 31 | #define BPF_OP(code) ((code) & 0xf0) 32 | #define BPF_ADD 0x00 33 | #define BPF_SUB 0x10 34 | #define BPF_MUL 0x20 35 | #define BPF_DIV 0x30 36 | #define BPF_OR 0x40 37 | #define BPF_AND 0x50 38 | #define BPF_LSH 0x60 39 | #define BPF_RSH 0x70 40 | #define BPF_NEG 0x80 41 | #define BPF_MOD 0x90 42 | #define BPF_XOR 0xa0 43 | 44 | #define BPF_JA 0x00 45 | #define BPF_JEQ 0x10 46 | #define BPF_JGT 0x20 47 | #define BPF_JGE 0x30 48 | #define BPF_JSET 0x40 49 | #define BPF_SRC(code) ((code) & 0x08) 50 | #define BPF_K 0x00 51 | #define BPF_X 0x08 52 | 53 | #ifndef BPF_MAXINSNS 54 | #define BPF_MAXINSNS 4096 55 | #endif 56 | 57 | #endif /* __LINUX_BPF_COMMON_H__ */ 58 | -------------------------------------------------------------------------------- /bpf/headers/linux/if_ether.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2 | /* Copyright Authors of the Linux kernel */ 3 | /* 4 | * INET An implementation of the TCP/IP protocol suite for the LINUX 5 | * operating system. INET is implemented using the BSD Socket 6 | * interface as the means of communication with the user level. 7 | * 8 | * Global definitions for the Ethernet IEEE 802.3 interface. 9 | * 10 | * Version: @(#)if_ether.h 1.0.1a 02/08/94 11 | * 12 | * Author: Fred N. van Kempen, 13 | * Donald Becker, 14 | * Alan Cox, 15 | * Steve Whitehouse, 16 | * 17 | * This program is free software; you can redistribute it and/or 18 | * modify it under the terms of the GNU General Public License 19 | * as published by the Free Software Foundation; either version 20 | * 2 of the License, or (at your option) any later version. 21 | */ 22 | 23 | #ifndef _LINUX_IF_ETHER_H 24 | #define _LINUX_IF_ETHER_H 25 | 26 | #include 27 | 28 | /* 29 | * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble 30 | * and FCS/CRC (frame check sequence). 31 | */ 32 | 33 | #define ETH_ALEN 6 /* Octets in one ethernet addr */ 34 | /* __ETH_HLEN is out of sync with the kernel's if_ether.h. In Cilium datapath 35 | * we use ETH_HLEN which can be loaded via static data, and for L2-less devs 36 | * it's 0. To avoid replacing every occurrence of ETH_HLEN in the datapath, 37 | * we prefixed the kernel's ETH_HLEN instead. 38 | */ 39 | #define __ETH_HLEN 14 /* Total octets in header. */ 40 | #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ 41 | #define ETH_DATA_LEN 1500 /* Max. octets in payload */ 42 | #define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ 43 | #define ETH_FCS_LEN 4 /* Octets in the FCS */ 44 | 45 | /* 46 | * These are the defined Ethernet Protocol ID's. 47 | */ 48 | 49 | #define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */ 50 | #define ETH_P_PUP 0x0200 /* Xerox PUP packet */ 51 | #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ 52 | #define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */ 53 | #define ETH_P_IP 0x0800 /* Internet Protocol packet */ 54 | #define ETH_P_X25 0x0805 /* CCITT X.25 */ 55 | #define ETH_P_ARP 0x0806 /* Address Resolution packet */ 56 | #define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ 57 | #define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ 58 | #define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ 59 | #define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */ 60 | #define ETH_P_DEC 0x6000 /* DEC Assigned proto */ 61 | #define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ 62 | #define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ 63 | #define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */ 64 | #define ETH_P_LAT 0x6004 /* DEC LAT */ 65 | #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ 66 | #define ETH_P_CUST 0x6006 /* DEC Customer use */ 67 | #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ 68 | #define ETH_P_TEB 0x6558 /* Trans Ether Bridging */ 69 | #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ 70 | #define ETH_P_ATALK 0x809B /* Appletalk DDP */ 71 | #define ETH_P_AARP 0x80F3 /* Appletalk AARP */ 72 | #define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ 73 | #define ETH_P_IPX 0x8137 /* IPX over DIX */ 74 | #define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ 75 | #define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */ 76 | #define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */ 77 | #define ETH_P_WCCP 0x883E /* Web-cache coordination protocol 78 | * defined in draft-wilson-wrec-wccp-v2-00.txt */ 79 | #define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */ 80 | #define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */ 81 | #define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */ 82 | #define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */ 83 | #define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */ 84 | #define ETH_P_LINK_CTL 0x886c /* HPNA, wlan link local tunnel */ 85 | #define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport 86 | * over Ethernet 87 | */ 88 | #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ 89 | #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ 90 | #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ 91 | #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ 92 | #define ETH_P_TIPC 0x88CA /* TIPC */ 93 | #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ 94 | #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ 95 | #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ 96 | #define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ 97 | #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ 98 | #define ETH_P_TDLS 0x890D /* TDLS */ 99 | #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ 100 | #define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */ 101 | #define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */ 102 | #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ 103 | #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ 104 | #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ 105 | #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ 106 | #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ 107 | 108 | #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value 109 | * then the frame is Ethernet II. Else it is 802.3 */ 110 | 111 | /* 112 | * Non DIX types. Won't clash for 1500 types. 113 | */ 114 | 115 | #define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */ 116 | #define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */ 117 | #define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */ 118 | #define ETH_P_802_2 0x0004 /* 802.2 frames */ 119 | #define ETH_P_SNAP 0x0005 /* Internal only */ 120 | #define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */ 121 | #define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/ 122 | #define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */ 123 | #define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */ 124 | #define ETH_P_CAN 0x000C /* CAN: Controller Area Network */ 125 | #define ETH_P_CANFD 0x000D /* CANFD: CAN flexible data rate*/ 126 | #define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/ 127 | #define ETH_P_TR_802_2 0x0011 /* 802.2 frames */ 128 | #define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */ 129 | #define ETH_P_CONTROL 0x0016 /* Card specific control frames */ 130 | #define ETH_P_IRDA 0x0017 /* Linux-IrDA */ 131 | #define ETH_P_ECONET 0x0018 /* Acorn Econet */ 132 | #define ETH_P_HDLC 0x0019 /* HDLC frames */ 133 | #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */ 134 | #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */ 135 | #define ETH_P_TRAILER 0x001C /* Trailer switch tagging */ 136 | #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */ 137 | #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ 138 | #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ 139 | #define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ 140 | 141 | /* 142 | * This is an Ethernet frame header. 143 | */ 144 | 145 | struct ethhdr { 146 | unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ 147 | unsigned char h_source[ETH_ALEN]; /* source ether addr */ 148 | __be16 h_proto; /* packet type ID field */ 149 | } __attribute__((packed)); 150 | 151 | 152 | #endif /* _LINUX_IF_ETHER_H */ 153 | -------------------------------------------------------------------------------- /bpf/headers/linux/in.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2 | /* Copyright Authors of the Linux kernel */ 3 | /* 4 | * INET An implementation of the TCP/IP protocol suite for the LINUX 5 | * operating system. INET is implemented using the BSD Socket 6 | * interface as the means of communication with the user level. 7 | * 8 | * Definitions of the Internet Protocol. 9 | * 10 | * Version: @(#)in.h 1.0.1 04/21/93 11 | * 12 | * Authors: Original taken from the GNU Project file. 13 | * Fred N. van Kempen, 14 | * 15 | * This program is free software; you can redistribute it and/or 16 | * modify it under the terms of the GNU General Public License 17 | * as published by the Free Software Foundation; either version 18 | * 2 of the License, or (at your option) any later version. 19 | */ 20 | #ifndef _LINUX_IN_H 21 | #define _LINUX_IN_H 22 | 23 | #include 24 | 25 | /* Standard well-defined IP protocols. */ 26 | enum { 27 | IPPROTO_IP = 0, /* Dummy protocol for TCP */ 28 | #define IPPROTO_IP IPPROTO_IP 29 | IPPROTO_ICMP = 1, /* Internet Control Message Protocol */ 30 | #define IPPROTO_ICMP IPPROTO_ICMP 31 | IPPROTO_IGMP = 2, /* Internet Group Management Protocol */ 32 | #define IPPROTO_IGMP IPPROTO_IGMP 33 | IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */ 34 | #define IPPROTO_IPIP IPPROTO_IPIP 35 | IPPROTO_TCP = 6, /* Transmission Control Protocol */ 36 | #define IPPROTO_TCP IPPROTO_TCP 37 | IPPROTO_EGP = 8, /* Exterior Gateway Protocol */ 38 | #define IPPROTO_EGP IPPROTO_EGP 39 | IPPROTO_PUP = 12, /* PUP protocol */ 40 | #define IPPROTO_PUP IPPROTO_PUP 41 | IPPROTO_UDP = 17, /* User Datagram Protocol */ 42 | #define IPPROTO_UDP IPPROTO_UDP 43 | IPPROTO_IDP = 22, /* XNS IDP protocol */ 44 | #define IPPROTO_IDP IPPROTO_IDP 45 | IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */ 46 | #define IPPROTO_TP IPPROTO_TP 47 | IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ 48 | #define IPPROTO_DCCP IPPROTO_DCCP 49 | IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */ 50 | #define IPPROTO_IPV6 IPPROTO_IPV6 51 | IPPROTO_RSVP = 46, /* RSVP Protocol */ 52 | #define IPPROTO_RSVP IPPROTO_RSVP 53 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ 54 | #define IPPROTO_GRE IPPROTO_GRE 55 | IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */ 56 | #define IPPROTO_ESP IPPROTO_ESP 57 | IPPROTO_AH = 51, /* Authentication Header protocol */ 58 | #define IPPROTO_AH IPPROTO_AH 59 | IPPROTO_MTP = 92, /* Multicast Transport Protocol */ 60 | #define IPPROTO_MTP IPPROTO_MTP 61 | IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */ 62 | #define IPPROTO_BEETPH IPPROTO_BEETPH 63 | IPPROTO_ENCAP = 98, /* Encapsulation Header */ 64 | #define IPPROTO_ENCAP IPPROTO_ENCAP 65 | IPPROTO_PIM = 103, /* Protocol Independent Multicast */ 66 | #define IPPROTO_PIM IPPROTO_PIM 67 | IPPROTO_COMP = 108, /* Compression Header Protocol */ 68 | #define IPPROTO_COMP IPPROTO_COMP 69 | IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ 70 | #define IPPROTO_SCTP IPPROTO_SCTP 71 | IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ 72 | #define IPPROTO_UDPLITE IPPROTO_UDPLITE 73 | IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ 74 | #define IPPROTO_MPLS IPPROTO_MPLS 75 | IPPROTO_RAW = 255, /* Raw IP packets */ 76 | #define IPPROTO_RAW IPPROTO_RAW 77 | IPPROTO_MAX 78 | }; 79 | 80 | /* Internet address. */ 81 | struct in_addr { 82 | __be32 s_addr; 83 | }; 84 | 85 | #define IP_TOS 1 86 | #define IP_TTL 2 87 | #define IP_HDRINCL 3 88 | #define IP_OPTIONS 4 89 | #define IP_ROUTER_ALERT 5 90 | #define IP_RECVOPTS 6 91 | #define IP_RETOPTS 7 92 | #define IP_PKTINFO 8 93 | #define IP_PKTOPTIONS 9 94 | #define IP_MTU_DISCOVER 10 95 | #define IP_RECVERR 11 96 | #define IP_RECVTTL 12 97 | #define IP_RECVTOS 13 98 | #define IP_MTU 14 99 | #define IP_FREEBIND 15 100 | #define IP_IPSEC_POLICY 16 101 | #define IP_XFRM_POLICY 17 102 | #define IP_PASSSEC 18 103 | #define IP_TRANSPARENT 19 104 | 105 | /* BSD compatibility */ 106 | #define IP_RECVRETOPTS IP_RETOPTS 107 | 108 | /* TProxy original addresses */ 109 | #define IP_ORIGDSTADDR 20 110 | #define IP_RECVORIGDSTADDR IP_ORIGDSTADDR 111 | 112 | #define IP_MINTTL 21 113 | #define IP_NODEFRAG 22 114 | #define IP_CHECKSUM 23 115 | #define IP_BIND_ADDRESS_NO_PORT 24 116 | 117 | /* IP_MTU_DISCOVER values */ 118 | #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ 119 | #define IP_PMTUDISC_WANT 1 /* Use per route hints */ 120 | #define IP_PMTUDISC_DO 2 /* Always DF */ 121 | #define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */ 122 | /* Always use interface mtu (ignores dst pmtu) but don't set DF flag. 123 | * Also incoming ICMP frag_needed notifications will be ignored on 124 | * this socket to prevent accepting spoofed ones. 125 | */ 126 | #define IP_PMTUDISC_INTERFACE 4 127 | /* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get 128 | * fragmented if they exeed the interface mtu 129 | */ 130 | #define IP_PMTUDISC_OMIT 5 131 | 132 | #define IP_MULTICAST_IF 32 133 | #define IP_MULTICAST_TTL 33 134 | #define IP_MULTICAST_LOOP 34 135 | #define IP_ADD_MEMBERSHIP 35 136 | #define IP_DROP_MEMBERSHIP 36 137 | #define IP_UNBLOCK_SOURCE 37 138 | #define IP_BLOCK_SOURCE 38 139 | #define IP_ADD_SOURCE_MEMBERSHIP 39 140 | #define IP_DROP_SOURCE_MEMBERSHIP 40 141 | #define IP_MSFILTER 41 142 | #define MCAST_JOIN_GROUP 42 143 | #define MCAST_BLOCK_SOURCE 43 144 | #define MCAST_UNBLOCK_SOURCE 44 145 | #define MCAST_LEAVE_GROUP 45 146 | #define MCAST_JOIN_SOURCE_GROUP 46 147 | #define MCAST_LEAVE_SOURCE_GROUP 47 148 | #define MCAST_MSFILTER 48 149 | #define IP_MULTICAST_ALL 49 150 | #define IP_UNICAST_IF 50 151 | 152 | #define MCAST_EXCLUDE 0 153 | #define MCAST_INCLUDE 1 154 | 155 | /* These need to appear somewhere around here */ 156 | #define IP_DEFAULT_MULTICAST_TTL 1 157 | #define IP_DEFAULT_MULTICAST_LOOP 1 158 | 159 | /* Request struct for multicast socket ops */ 160 | 161 | /* 162 | * Definitions of the bits in an Internet address integer. 163 | * On subnets, host and network parts are found according 164 | * to the subnet mask, not these masks. 165 | */ 166 | #define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0) 167 | #define IN_CLASSA_NET 0xff000000 168 | #define IN_CLASSA_NSHIFT 24 169 | #define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET) 170 | #define IN_CLASSA_MAX 128 171 | 172 | #define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000) 173 | #define IN_CLASSB_NET 0xffff0000 174 | #define IN_CLASSB_NSHIFT 16 175 | #define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET) 176 | #define IN_CLASSB_MAX 65536 177 | 178 | #define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000) 179 | #define IN_CLASSC_NET 0xffffff00 180 | #define IN_CLASSC_NSHIFT 8 181 | #define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET) 182 | 183 | #define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000) 184 | #define IN_MULTICAST(a) IN_CLASSD(a) 185 | #define IN_MULTICAST_NET 0xF0000000 186 | 187 | #define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) 188 | #define IN_BADCLASS(a) IN_EXPERIMENTAL((a)) 189 | 190 | /* Address to accept any incoming messages. */ 191 | #define INADDR_ANY ((unsigned long int) 0x00000000) 192 | 193 | /* Address to send to all hosts. */ 194 | #define INADDR_BROADCAST ((unsigned long int) 0xffffffff) 195 | 196 | /* Address indicating an error return. */ 197 | #define INADDR_NONE ((unsigned long int) 0xffffffff) 198 | 199 | /* Network number for local host loopback. */ 200 | #define IN_LOOPBACKNET 127 201 | 202 | /* Address to loopback in software to local host. */ 203 | #define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */ 204 | #define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000) 205 | 206 | /* Defines for Multicast INADDR */ 207 | #define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ 208 | #define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ 209 | #define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ 210 | #define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ 211 | 212 | #endif /* _LINUX_IN_H */ 213 | -------------------------------------------------------------------------------- /bpf/headers/linux/in6.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2 | /* Copyright Authors of the Linux kernel */ 3 | /* 4 | * Types and definitions for AF_INET6 5 | * Linux INET6 implementation 6 | * 7 | * Authors: 8 | * Pedro Roque 9 | * 10 | * Sources: 11 | * IPv6 Program Interfaces for BSD Systems 12 | * 13 | * 14 | * Advanced Sockets API for IPv6 15 | * 16 | * 17 | * This program is free software; you can redistribute it and/or 18 | * modify it under the terms of the GNU General Public License 19 | * as published by the Free Software Foundation; either version 20 | * 2 of the License, or (at your option) any later version. 21 | */ 22 | 23 | #ifndef _LINUX_IN6_H 24 | #define _LINUX_IN6_H 25 | 26 | #include 27 | 28 | /* 29 | * IPv6 address structure 30 | */ 31 | 32 | struct in6_addr { 33 | union { 34 | __u8 u6_addr8[16]; 35 | __be16 u6_addr16[8]; 36 | __be32 u6_addr32[4]; 37 | } in6_u; 38 | #define s6_addr in6_u.u6_addr8 39 | #define s6_addr16 in6_u.u6_addr16 40 | #define s6_addr32 in6_u.u6_addr32 41 | }; 42 | 43 | /* 44 | * Bitmask constant declarations to help applications select out the 45 | * flow label and priority fields. 46 | * 47 | * Note that this are in host byte order while the flowinfo field of 48 | * sockaddr_in6 is in network byte order. 49 | */ 50 | 51 | #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff 52 | #define IPV6_FLOWINFO_PRIORITY 0x0ff00000 53 | 54 | /* These definitions are obsolete */ 55 | #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000 56 | #define IPV6_PRIORITY_FILLER 0x0100 57 | #define IPV6_PRIORITY_UNATTENDED 0x0200 58 | #define IPV6_PRIORITY_RESERVED1 0x0300 59 | #define IPV6_PRIORITY_BULK 0x0400 60 | #define IPV6_PRIORITY_RESERVED2 0x0500 61 | #define IPV6_PRIORITY_INTERACTIVE 0x0600 62 | #define IPV6_PRIORITY_CONTROL 0x0700 63 | #define IPV6_PRIORITY_8 0x0800 64 | #define IPV6_PRIORITY_9 0x0900 65 | #define IPV6_PRIORITY_10 0x0a00 66 | #define IPV6_PRIORITY_11 0x0b00 67 | #define IPV6_PRIORITY_12 0x0c00 68 | #define IPV6_PRIORITY_13 0x0d00 69 | #define IPV6_PRIORITY_14 0x0e00 70 | #define IPV6_PRIORITY_15 0x0f00 71 | 72 | /* 73 | * IPV6 extension headers 74 | */ 75 | #define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */ 76 | #define IPPROTO_ROUTING 43 /* IPv6 routing header */ 77 | #define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */ 78 | #define IPPROTO_ICMPV6 58 /* ICMPv6 */ 79 | #define IPPROTO_NONE 59 /* IPv6 no next header */ 80 | #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ 81 | #define IPPROTO_MH 135 /* IPv6 mobility header */ 82 | 83 | /* 84 | * IPv6 TLV options. 85 | */ 86 | #define IPV6_TLV_PAD1 0 87 | #define IPV6_TLV_PADN 1 88 | #define IPV6_TLV_ROUTERALERT 5 89 | #define IPV6_TLV_JUMBO 194 90 | #define IPV6_TLV_HAO 201 /* home address option */ 91 | 92 | /* 93 | * IPV6 socket options 94 | */ 95 | #define IPV6_ADDRFORM 1 96 | #define IPV6_2292PKTINFO 2 97 | #define IPV6_2292HOPOPTS 3 98 | #define IPV6_2292DSTOPTS 4 99 | #define IPV6_2292RTHDR 5 100 | #define IPV6_2292PKTOPTIONS 6 101 | #define IPV6_CHECKSUM 7 102 | #define IPV6_2292HOPLIMIT 8 103 | #define IPV6_NEXTHOP 9 104 | #define IPV6_AUTHHDR 10 /* obsolete */ 105 | #define IPV6_FLOWINFO 11 106 | 107 | #define IPV6_UNICAST_HOPS 16 108 | #define IPV6_MULTICAST_IF 17 109 | #define IPV6_MULTICAST_HOPS 18 110 | #define IPV6_MULTICAST_LOOP 19 111 | #define IPV6_ADD_MEMBERSHIP 20 112 | #define IPV6_DROP_MEMBERSHIP 21 113 | #define IPV6_ROUTER_ALERT 22 114 | #define IPV6_MTU_DISCOVER 23 115 | #define IPV6_MTU 24 116 | #define IPV6_RECVERR 25 117 | #define IPV6_V6ONLY 26 118 | #define IPV6_JOIN_ANYCAST 27 119 | #define IPV6_LEAVE_ANYCAST 28 120 | 121 | /* IPV6_MTU_DISCOVER values */ 122 | #define IPV6_PMTUDISC_DONT 0 123 | #define IPV6_PMTUDISC_WANT 1 124 | #define IPV6_PMTUDISC_DO 2 125 | #define IPV6_PMTUDISC_PROBE 3 126 | /* same as IPV6_PMTUDISC_PROBE, provided for symetry with IPv4 127 | * also see comments on IP_PMTUDISC_INTERFACE 128 | */ 129 | #define IPV6_PMTUDISC_INTERFACE 4 130 | /* weaker version of IPV6_PMTUDISC_INTERFACE, which allows packets to 131 | * get fragmented if they exceed the interface mtu 132 | */ 133 | #define IPV6_PMTUDISC_OMIT 5 134 | 135 | /* Flowlabel */ 136 | #define IPV6_FLOWLABEL_MGR 32 137 | #define IPV6_FLOWINFO_SEND 33 138 | 139 | #define IPV6_IPSEC_POLICY 34 140 | #define IPV6_XFRM_POLICY 35 141 | #define IPV6_HDRINCL 36 142 | 143 | /* 144 | * Multicast Routing: 145 | * see include/uapi/linux/mroute6.h. 146 | * 147 | * MRT6_BASE 200 148 | * ... 149 | * MRT6_MAX 150 | */ 151 | #endif /* _LINUX_IN6_H */ 152 | -------------------------------------------------------------------------------- /bpf/headers/linux/ip.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2 | /* Copyright Authors of the Linux kernel */ 3 | /* 4 | * INET An implementation of the TCP/IP protocol suite for the LINUX 5 | * operating system. INET is implemented using the BSD Socket 6 | * interface as the means of communication with the user level. 7 | * 8 | * Definitions for the IP protocol. 9 | * 10 | * Version: @(#)ip.h 1.0.2 04/28/93 11 | * 12 | * Authors: Fred N. van Kempen, 13 | * 14 | * This program is free software; you can redistribute it and/or 15 | * modify it under the terms of the GNU General Public License 16 | * as published by the Free Software Foundation; either version 17 | * 2 of the License, or (at your option) any later version. 18 | */ 19 | #ifndef _LINUX_IP_H 20 | #define _LINUX_IP_H 21 | 22 | #include 23 | #include 24 | 25 | #define IPTOS_TOS_MASK 0x1E 26 | #define IPTOS_TOS(tos) ((tos)&IPTOS_TOS_MASK) 27 | #define IPTOS_LOWDELAY 0x10 28 | #define IPTOS_THROUGHPUT 0x08 29 | #define IPTOS_RELIABILITY 0x04 30 | #define IPTOS_MINCOST 0x02 31 | 32 | #define IPTOS_PREC_MASK 0xE0 33 | #define IPTOS_PREC(tos) ((tos)&IPTOS_PREC_MASK) 34 | #define IPTOS_PREC_NETCONTROL 0xe0 35 | #define IPTOS_PREC_INTERNETCONTROL 0xc0 36 | #define IPTOS_PREC_CRITIC_ECP 0xa0 37 | #define IPTOS_PREC_FLASHOVERRIDE 0x80 38 | #define IPTOS_PREC_FLASH 0x60 39 | #define IPTOS_PREC_IMMEDIATE 0x40 40 | #define IPTOS_PREC_PRIORITY 0x20 41 | #define IPTOS_PREC_ROUTINE 0x00 42 | 43 | 44 | /* IP options */ 45 | #define IPOPT_COPY 0x80 46 | #define IPOPT_CLASS_MASK 0x60 47 | #define IPOPT_NUMBER_MASK 0x1f 48 | 49 | #define IPOPT_COPIED(o) ((o)&IPOPT_COPY) 50 | #define IPOPT_CLASS(o) ((o)&IPOPT_CLASS_MASK) 51 | #define IPOPT_NUMBER(o) ((o)&IPOPT_NUMBER_MASK) 52 | 53 | #define IPOPT_CONTROL 0x00 54 | #define IPOPT_RESERVED1 0x20 55 | #define IPOPT_MEASUREMENT 0x40 56 | #define IPOPT_RESERVED2 0x60 57 | 58 | #define IPOPT_END (0 |IPOPT_CONTROL) 59 | #define IPOPT_NOOP (1 |IPOPT_CONTROL) 60 | #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY) 61 | #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY) 62 | #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT) 63 | #define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY) 64 | #define IPOPT_RR (7 |IPOPT_CONTROL) 65 | #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY) 66 | #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY) 67 | #define IPOPT_RA (20|IPOPT_CONTROL|IPOPT_COPY) 68 | 69 | #define IPVERSION 4 70 | #define MAXTTL 255 71 | #define IPDEFTTL 64 72 | 73 | #define IPOPT_OPTVAL 0 74 | #define IPOPT_OLEN 1 75 | #define IPOPT_OFFSET 2 76 | #define IPOPT_MINOFF 4 77 | #define MAX_IPOPTLEN 40 78 | #define IPOPT_NOP IPOPT_NOOP 79 | #define IPOPT_EOL IPOPT_END 80 | #define IPOPT_TS IPOPT_TIMESTAMP 81 | 82 | #define IPOPT_TS_TSONLY 0 /* timestamps only */ 83 | #define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ 84 | #define IPOPT_TS_PRESPEC 3 /* specified modules only */ 85 | 86 | #define IPV4_BEET_PHMAXLEN 8 87 | 88 | struct iphdr { 89 | #if defined(__LITTLE_ENDIAN_BITFIELD) 90 | __u8 ihl:4, 91 | version:4; 92 | #elif defined (__BIG_ENDIAN_BITFIELD) 93 | __u8 version:4, 94 | ihl:4; 95 | #else 96 | #error "Please fix " 97 | #endif 98 | __u8 tos; 99 | __be16 tot_len; 100 | __be16 id; 101 | __be16 frag_off; 102 | __u8 ttl; 103 | __u8 protocol; 104 | __sum16 check; 105 | __be32 saddr; 106 | __be32 daddr; 107 | /*The options start here. */ 108 | }; 109 | 110 | 111 | struct ip_auth_hdr { 112 | __u8 nexthdr; 113 | __u8 hdrlen; /* This one is measured in 32 bit units! */ 114 | __be16 reserved; 115 | __be32 spi; 116 | __be32 seq_no; /* Sequence number */ 117 | __u8 auth_data[0]; /* Variable len but >=4. Mind the 64 bit alignment! */ 118 | }; 119 | 120 | struct ip_esp_hdr { 121 | __be32 spi; 122 | __be32 seq_no; /* Sequence number */ 123 | __u8 enc_data[0]; /* Variable len but >=8. Mind the 64 bit alignment! */ 124 | }; 125 | 126 | struct ip_comp_hdr { 127 | __u8 nexthdr; 128 | __u8 flags; 129 | __be16 cpi; 130 | }; 131 | 132 | struct ip_beet_phdr { 133 | __u8 nexthdr; 134 | __u8 hdrlen; 135 | __u8 padlen; 136 | __u8 reserved; 137 | }; 138 | 139 | #endif /* _LINUX_IP_H */ 140 | -------------------------------------------------------------------------------- /bpf/headers/linux/ipv6.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 | /* Copyright Authors of the Linux kernel */ 3 | #ifndef _IPV6_H 4 | #define _IPV6_H 5 | 6 | #include 7 | #include 8 | 9 | /* The latest drafts declared increase in minimal mtu up to 1280. */ 10 | 11 | #define IPV6_MIN_MTU 1280 12 | 13 | /* 14 | * Advanced API 15 | * source interface/address selection, source routing, etc... 16 | * *under construction* 17 | */ 18 | 19 | #if __UAPI_DEF_IN6_PKTINFO 20 | struct in6_pktinfo { 21 | struct in6_addr ipi6_addr; 22 | int ipi6_ifindex; 23 | }; 24 | #endif 25 | 26 | #if __UAPI_DEF_IP6_MTUINFO 27 | struct ip6_mtuinfo { 28 | struct sockaddr_in6 ip6m_addr; 29 | __u32 ip6m_mtu; 30 | }; 31 | #endif 32 | 33 | struct in6_ifreq { 34 | struct in6_addr ifr6_addr; 35 | __u32 ifr6_prefixlen; 36 | int ifr6_ifindex; 37 | }; 38 | 39 | #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ 40 | #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ 41 | #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ 42 | #define IPV6_SRCRT_TYPE_3 3 /* RPL Segment Routing with IPv6 */ 43 | #define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ 44 | 45 | /* 46 | * routing header 47 | */ 48 | struct ipv6_rt_hdr { 49 | __u8 nexthdr; 50 | __u8 hdrlen; 51 | __u8 type; 52 | __u8 segments_left; 53 | 54 | /* 55 | * type specific data 56 | * variable length field 57 | */ 58 | }; 59 | 60 | 61 | struct ipv6_opt_hdr { 62 | __u8 nexthdr; 63 | __u8 hdrlen; 64 | /* 65 | * TLV encoded option data follows. 66 | */ 67 | } __attribute__((packed)); /* required for some archs */ 68 | 69 | #define ipv6_destopt_hdr ipv6_opt_hdr 70 | #define ipv6_hopopt_hdr ipv6_opt_hdr 71 | 72 | /* Router Alert option values (RFC2711) */ 73 | #define IPV6_OPT_ROUTERALERT_MLD 0x0000 /* MLD(RFC2710) */ 74 | 75 | /* 76 | * routing header type 0 (used in cmsghdr struct) 77 | */ 78 | 79 | struct rt0_hdr { 80 | struct ipv6_rt_hdr rt_hdr; 81 | __u32 reserved; 82 | struct in6_addr addr[0]; 83 | 84 | #define rt0_type rt_hdr.type 85 | }; 86 | 87 | /* 88 | * routing header type 2 89 | */ 90 | 91 | struct rt2_hdr { 92 | struct ipv6_rt_hdr rt_hdr; 93 | __u32 reserved; 94 | struct in6_addr addr; 95 | 96 | #define rt2_type rt_hdr.type 97 | }; 98 | 99 | /* 100 | * home address option in destination options header 101 | */ 102 | 103 | struct ipv6_destopt_hao { 104 | __u8 type; 105 | __u8 length; 106 | struct in6_addr addr; 107 | } __attribute__((packed)); 108 | 109 | /* 110 | * IPv6 fixed header 111 | * 112 | * BEWARE, it is incorrect. The first 4 bits of flow_lbl 113 | * are glued to priority now, forming "class". 114 | */ 115 | 116 | struct ipv6hdr { 117 | #if defined(__LITTLE_ENDIAN_BITFIELD) 118 | __u8 priority:4, 119 | version:4; 120 | #elif defined(__BIG_ENDIAN_BITFIELD) 121 | __u8 version:4, 122 | priority:4; 123 | #else 124 | #error "Please fix " 125 | #endif 126 | __u8 flow_lbl[3]; 127 | 128 | __be16 payload_len; 129 | __u8 nexthdr; 130 | __u8 hop_limit; 131 | 132 | struct in6_addr saddr; 133 | struct in6_addr daddr; 134 | }; 135 | 136 | 137 | /* index values for the variables in ipv6_devconf */ 138 | enum { 139 | DEVCONF_FORWARDING = 0, 140 | DEVCONF_HOPLIMIT, 141 | DEVCONF_MTU6, 142 | DEVCONF_ACCEPT_RA, 143 | DEVCONF_ACCEPT_REDIRECTS, 144 | DEVCONF_AUTOCONF, 145 | DEVCONF_DAD_TRANSMITS, 146 | DEVCONF_RTR_SOLICITS, 147 | DEVCONF_RTR_SOLICIT_INTERVAL, 148 | DEVCONF_RTR_SOLICIT_DELAY, 149 | DEVCONF_USE_TEMPADDR, 150 | DEVCONF_TEMP_VALID_LFT, 151 | DEVCONF_TEMP_PREFERED_LFT, 152 | DEVCONF_REGEN_MAX_RETRY, 153 | DEVCONF_MAX_DESYNC_FACTOR, 154 | DEVCONF_MAX_ADDRESSES, 155 | DEVCONF_FORCE_MLD_VERSION, 156 | DEVCONF_ACCEPT_RA_DEFRTR, 157 | DEVCONF_ACCEPT_RA_PINFO, 158 | DEVCONF_ACCEPT_RA_RTR_PREF, 159 | DEVCONF_RTR_PROBE_INTERVAL, 160 | DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN, 161 | DEVCONF_PROXY_NDP, 162 | DEVCONF_OPTIMISTIC_DAD, 163 | DEVCONF_ACCEPT_SOURCE_ROUTE, 164 | DEVCONF_MC_FORWARDING, 165 | DEVCONF_DISABLE_IPV6, 166 | DEVCONF_ACCEPT_DAD, 167 | DEVCONF_FORCE_TLLAO, 168 | DEVCONF_NDISC_NOTIFY, 169 | DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL, 170 | DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL, 171 | DEVCONF_SUPPRESS_FRAG_NDISC, 172 | DEVCONF_ACCEPT_RA_FROM_LOCAL, 173 | DEVCONF_USE_OPTIMISTIC, 174 | DEVCONF_ACCEPT_RA_MTU, 175 | DEVCONF_STABLE_SECRET, 176 | DEVCONF_USE_OIF_ADDRS_ONLY, 177 | DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, 178 | DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, 179 | DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, 180 | DEVCONF_DROP_UNSOLICITED_NA, 181 | DEVCONF_KEEP_ADDR_ON_DOWN, 182 | DEVCONF_RTR_SOLICIT_MAX_INTERVAL, 183 | DEVCONF_SEG6_ENABLED, 184 | DEVCONF_SEG6_REQUIRE_HMAC, 185 | DEVCONF_ENHANCED_DAD, 186 | DEVCONF_ADDR_GEN_MODE, 187 | DEVCONF_DISABLE_POLICY, 188 | DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, 189 | DEVCONF_NDISC_TCLASS, 190 | DEVCONF_RPL_SEG_ENABLED, 191 | DEVCONF_RA_DEFRTR_METRIC, 192 | DEVCONF_IOAM6_ENABLED, 193 | DEVCONF_IOAM6_ID, 194 | DEVCONF_IOAM6_ID_WIDE, 195 | DEVCONF_NDISC_EVICT_NOCARRIER, 196 | DEVCONF_ACCEPT_UNTRACKED_NA, 197 | DEVCONF_MAX 198 | }; 199 | 200 | 201 | #endif /* _IPV6_H */ 202 | -------------------------------------------------------------------------------- /bpf/headers/linux/types.h: -------------------------------------------------------------------------------- 1 | #ifndef _LINUX_TYPES_H 2 | #define _LINUX_TYPES_H 3 | 4 | #include "types_mapper.h" 5 | 6 | #endif /* _LINUX_TYPES_H */ 7 | -------------------------------------------------------------------------------- /bpf/headers/linux/types_mapper.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ 2 | /* Copyright Authors of Cilium */ 3 | 4 | #ifndef __BPF_TYPES_MAPPER__ 5 | #define __BPF_TYPES_MAPPER__ 6 | 7 | typedef __signed__ char __s8; 8 | typedef unsigned char __u8; 9 | 10 | typedef __signed__ short __s16; 11 | typedef unsigned short __u16; 12 | 13 | typedef __signed__ int __s32; 14 | typedef unsigned int __u32; 15 | 16 | typedef __signed__ long long __s64; 17 | typedef unsigned long long __u64; 18 | 19 | typedef __u16 __le16; 20 | typedef __u16 __be16; 21 | 22 | typedef __u32 __le32; 23 | typedef __u32 __be32; 24 | 25 | typedef __u64 __le64; 26 | typedef __u64 __be64; 27 | 28 | typedef __u16 __sum16; 29 | typedef __u32 __wsum; 30 | 31 | typedef __u64 __aligned_u64; 32 | 33 | typedef __u64 __net_cookie; 34 | typedef __u64 __sock_cookie; 35 | 36 | #endif /* __BPF_TYPES_MAPPER__ */ 37 | -------------------------------------------------------------------------------- /bpf/headers/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LIBBPF_VERSION=0.6.1 4 | CILIUM_VERSION=1.13.1 5 | 6 | # The headers we want 7 | LIBBPF_HEADERS=( 8 | libbpf-"$LIBBPF_VERSION"/LICENSE.BSD-2-Clause 9 | libbpf-"$LIBBPF_VERSION"/src/bpf_endian.h 10 | libbpf-"$LIBBPF_VERSION"/src/bpf_helper_defs.h 11 | libbpf-"$LIBBPF_VERSION"/src/bpf_helpers.h 12 | libbpf-"$LIBBPF_VERSION"/src/bpf_tracing.h 13 | ) 14 | 15 | LINUX_HEADERS=( 16 | cilium-"$CILIUM_VERSION"/bpf/include/linux/in.h 17 | cilium-"$CILIUM_VERSION"/bpf/include/linux/in6.h 18 | cilium-"$CILIUM_VERSION"/bpf/include/linux/ip.h 19 | cilium-"$CILIUM_VERSION"/bpf/include/linux/ipv6.h 20 | cilium-"$CILIUM_VERSION"/bpf/include/linux/if_ether.h 21 | cilium-"$CILIUM_VERSION"/bpf/include/linux/bpf.h 22 | cilium-"$CILIUM_VERSION"/bpf/include/linux/bpf_common.h 23 | cilium-"$CILIUM_VERSION"/bpf/include/bpf/types_mapper.h 24 | ) 25 | 26 | TMP_DIR=$(mktemp -d) 27 | 28 | PROJECT_HEADERS_DIR=$(dirname ${BASH_SOURCE[0]}) 29 | LIBBPF_TAR=libbpf-v${LIBBPF_VERSION}.tar.gz 30 | CILIUM_TAR=cilium-v${CILIUM_VERSION}.tar.gz 31 | 32 | curl -sL "https://github.com/libbpf/libbpf/archive/refs/tags/v${LIBBPF_VERSION}.tar.gz" -o "${TMP_DIR}/${LIBBPF_TAR}" 33 | tar -xvf "${TMP_DIR}/${LIBBPF_TAR}" -C "${TMP_DIR}" 2> /dev/null 34 | 35 | for file in "${LIBBPF_HEADERS[@]}"; do 36 | cp "${TMP_DIR}/$file" "$PROJECT_HEADERS_DIR/" 37 | done; 38 | 39 | curl -sL "https://github.com/cilium/cilium/archive/refs/tags/v${CILIUM_VERSION}.tar.gz" -o "${TMP_DIR}/${CILIUM_TAR}" 40 | tar -xvf "${TMP_DIR}/${CILIUM_TAR}" -C "${TMP_DIR}" 2> /dev/null 41 | 42 | for file in "${LINUX_HEADERS[@]}"; do 43 | cp "${TMP_DIR}/$file" "$PROJECT_HEADERS_DIR/linux/" 44 | done; 45 | 46 | rm -rf "$TMP_DIR" 47 | -------------------------------------------------------------------------------- /bpf/monitor.bt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bpftrace 2 | 3 | #include 4 | #include 5 | 6 | BEGIN 7 | { 8 | @start = nsecs; 9 | @start_monitor = nsecs; 10 | @l0_rate = (uint64)0; 11 | @l1_rate = (uint64)0; 12 | @l0_bytes = (uint64)0; 13 | @l1_bytes = (uint64)0; 14 | @l2_bytes = (uint64)0; 15 | @avg_cnt = (uint64)0; 16 | 17 | @sample_interval = (uint64)100; 18 | @monitor_interval = (uint64)1000; 19 | } 20 | 21 | //tracepoint:net:net_dev_start_xmit 22 | kprobe:start_xmit 23 | { 24 | //$skb = ((struct sk_buff *)args->skbaddr); 25 | $skb = ((struct sk_buff *)arg0); 26 | 27 | $interval = (nsecs - @start)/1000000; 28 | 29 | if ($interval >= @sample_interval) { 30 | @l0_rate = @l0_bytes*(1000/$interval); 31 | @l1_rate = @l1_bytes*(1000/$interval); 32 | @l2_rate = @l2_bytes*(1000/$interval); 33 | 34 | 35 | @avg_l0 = @avg_l0 + @l0_rate; 36 | @avg_l1 = @avg_l1 + @l1_rate; 37 | @avg_l2 = @avg_l2 + @l2_rate; 38 | 39 | @avg_cnt = @avg_cnt + 1; 40 | 41 | /* reinit counters */ 42 | @l0_bytes = 0; 43 | @l1_bytes = 0; 44 | @l2_bytes = 0; 45 | @start = nsecs; 46 | } 47 | 48 | $interval = (nsecs - @start_monitor)/1000000; 49 | 50 | if ($interval >= @monitor_interval) { 51 | 52 | printf("l0/l1/l2{%d}(ms): %u/%u/%u(MBytes)\n", 53 | $interval, 54 | @avg_l0/@avg_cnt/1000/1000, 55 | @avg_l1/@avg_cnt/1000/1000, 56 | @avg_l2/@avg_cnt/1000/1000); 57 | @avg_l0 = 0; 58 | @avg_l1 = 0; 59 | @avg_l2 = 0; 60 | @start_monitor = nsecs; 61 | @avg_cnt = 0; 62 | } 63 | 64 | if ($skb->priority ==0) { 65 | @l0_bytes += $skb->len; 66 | } 67 | if ($skb->priority ==1) { 68 | @l1_bytes += $skb->len; 69 | } 70 | if ($skb->priority ==2) { 71 | @l2_bytes += $skb->len; 72 | } 73 | 74 | } 75 | 76 | END 77 | { 78 | clear(@start); 79 | clear(@start_monitor); 80 | clear(@sample_interval); 81 | clear(@monitor_interval); 82 | clear(@l0_bytes); 83 | clear(@l1_bytes); 84 | clear(@l2_bytes); 85 | clear(@avg_l0); 86 | clear(@avg_l1); 87 | clear(@l0_rate); 88 | clear(@l1_rate); 89 | clear(@avg_cnt); 90 | } -------------------------------------------------------------------------------- /bpf/qos_tc.h: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | #include 4 | 5 | #ifndef __RATE_LIMIT_TC__ 6 | #define __RATE_LIMIT_TC__ 7 | 8 | #undef NSEC_PER_SEC 9 | #undef NSEC_PER_MSEC 10 | 11 | #define NSEC_PER_SEC (1000 * 1000 * 1000ULL) 12 | #define NSEC_PER_MSEC (1000 * 1000ULL) 13 | 14 | #define T_HORIZON_DROP (2000 * 1000 * 1000ULL) 15 | 16 | #define MEGABYTE (1000 * 1000ULL) 17 | 18 | #define MAX_PROG 30 19 | 20 | #define PRIO_ONLINE 0 21 | #define PRIO_OFFLINE_L1 1 22 | #define PRIO_OFFLINE_L2 2 23 | 24 | #define INGRESS_TRAFFIC 0 25 | #define EGRESS_TRAFFIC 1 26 | 27 | #define PROG_TC_CGROUP 0 28 | #define PROG_TC_GLOBAL 1 29 | 30 | #define DEFAULT_TC_ACT TC_ACT_PIPE 31 | 32 | struct rate_info { 33 | __u64 bps; 34 | __u64 t_last; 35 | __u64 slot3; 36 | }; 37 | 38 | struct global_rate_cfg { 39 | __u64 interval; // the interval to adjust rate 40 | __u64 hw_min_bps; 41 | __u64 hw_max_bps; 42 | 43 | __u64 l0_min_bps; 44 | __u64 l0_max_bps; 45 | 46 | __u64 l1_min_bps; 47 | __u64 l1_max_bps; 48 | __u64 l2_min_bps; 49 | __u64 l2_max_bps; 50 | }; 51 | 52 | struct global_rate_info { 53 | __u64 t_last; 54 | 55 | __u64 t_l0_last; 56 | __u64 l0_bps; 57 | __u64 l0_slot; 58 | 59 | __u64 t_l1_last; 60 | __u64 l1_bps; 61 | __u64 l1_slot; 62 | 63 | __u64 t_l2_last; 64 | __u64 l2_bps; 65 | __u64 l2_slot; 66 | }; 67 | 68 | struct ip_addr { 69 | __u32 d1; 70 | __u32 d2; 71 | __u32 d3; 72 | __u32 d4; 73 | }; 74 | 75 | struct cgroup_info { 76 | __u32 class_id; // cgroup classid 77 | __u32 pad1; 78 | __u64 inode; // cgroup inode id 79 | }; 80 | 81 | struct cgroup_rate_id { 82 | __u64 inode; 83 | __u32 direction; 84 | __u32 pad; 85 | }; 86 | 87 | struct net_stat { 88 | __u64 index; 89 | __u64 ts; 90 | __u64 val; 91 | }; 92 | 93 | /* Global map to jump into terway qos program */ 94 | struct { 95 | __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 96 | __uint(max_entries, MAX_PROG); 97 | __uint(key_size, sizeof(__u32)); 98 | __uint(value_size, sizeof(__u32)); 99 | __uint(pinning, LIBBPF_PIN_BY_NAME); 100 | } qos_prog_map SEC(".maps"); 101 | 102 | /* per pod rate limit begin */ 103 | 104 | /* Global map for pod config, index by pod ip */ 105 | struct { 106 | __uint(type, BPF_MAP_TYPE_HASH); 107 | __uint(key_size, sizeof(struct ip_addr)); 108 | __uint(value_size, sizeof(struct cgroup_info)); 109 | __uint(max_entries, 65535); 110 | __uint(pinning, LIBBPF_PIN_BY_NAME); 111 | } pod_map SEC(".maps"); 112 | 113 | struct { 114 | __uint(type, BPF_MAP_TYPE_HASH); 115 | __uint(key_size, sizeof(struct cgroup_rate_id)); 116 | __uint(value_size, sizeof(struct rate_info)); 117 | __uint(max_entries, 65535); 118 | __uint(pinning, LIBBPF_PIN_BY_NAME); 119 | } cgroup_rate_map SEC(".maps"); 120 | /* per pod rate limit end */ 121 | 122 | /* global rate limit begin */ 123 | struct { 124 | __uint(type, BPF_MAP_TYPE_ARRAY); 125 | __uint(key_size, sizeof(__u32)); 126 | __uint(value_size, sizeof(struct global_rate_cfg)); 127 | __uint(max_entries, 2); 128 | __uint(pinning, LIBBPF_PIN_BY_NAME); 129 | } terway_global_cfg SEC(".maps"); 130 | 131 | struct { 132 | __uint(type, BPF_MAP_TYPE_ARRAY); 133 | __uint(key_size, sizeof(__u32)); 134 | __uint(value_size, sizeof(struct global_rate_info)); 135 | __uint(max_entries, 2); 136 | __uint(pinning, LIBBPF_PIN_BY_NAME); 137 | } global_rate_map SEC(".maps"); 138 | /* global rate limit end*/ 139 | 140 | struct { 141 | __uint(type, BPF_MAP_TYPE_ARRAY); 142 | __uint(key_size, sizeof(__u32)); 143 | __uint(value_size, sizeof(struct net_stat)); 144 | __uint(max_entries, 20); 145 | __uint(pinning, LIBBPF_PIN_BY_NAME); 146 | } terway_net_stat SEC(".maps"); 147 | 148 | #endif /* __RATE_LIMIT_TC__ */ -------------------------------------------------------------------------------- /charts/terway-qos/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /charts/terway-qos/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: terway-qos 3 | description: Network QoS 4 | 5 | type: application 6 | version: 0.3.2 7 | appVersion: "0.3.2" 8 | -------------------------------------------------------------------------------- /charts/terway-qos/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "terway-qos.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "terway-qos.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "terway-qos.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "terway-qos.labels" -}} 37 | helm.sh/chart: {{ include "terway-qos.chart" . }} 38 | {{ include "terway-qos.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "terway-qos.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "terway-qos.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "terway-qos.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "terway-qos.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /charts/terway-qos/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: terway-qos 5 | labels: 6 | {{- include "terway-qos.labels" . | nindent 4 }} 7 | rules: 8 | - apiGroups: 9 | - "" 10 | resources: 11 | - events 12 | verbs: 13 | - create 14 | - update 15 | - patch 16 | - apiGroups: 17 | - "" 18 | resources: 19 | - pods 20 | - pods/status 21 | verbs: 22 | - get 23 | - watch 24 | - list 25 | -------------------------------------------------------------------------------- /charts/terway-qos/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: terway-qos 5 | labels: 6 | {{- include "terway-qos.labels" . | nindent 4 }} 7 | roleRef: 8 | apiGroup: rbac.authorization.k8s.io 9 | kind: ClusterRole 10 | name: terway-qos 11 | subjects: 12 | - kind: ServiceAccount 13 | name: terway-qos 14 | namespace: {{ .Release.Namespace }} -------------------------------------------------------------------------------- /charts/terway-qos/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | kind: ConfigMap 2 | apiVersion: v1 3 | metadata: 4 | name: terway-qos 5 | data: 6 | global_bps_config: | 7 | hw_tx_bps_max 900000000 8 | hw_rx_bps_max 900000000 9 | offline_l1_tx_bps_min 100000000 10 | offline_l1_tx_bps_max 200000000 11 | offline_l2_tx_bps_min 100000000 12 | offline_l2_tx_bps_max 300000000 13 | offline_l1_rx_bps_min 100000000 14 | offline_l1_rx_bps_max 200000000 15 | offline_l2_rx_bps_min 100000000 16 | offline_l2_rx_bps_max 300000000 -------------------------------------------------------------------------------- /charts/terway-qos/templates/daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: terway-qos 5 | labels: 6 | {{- include "terway-qos.labels" . | nindent 4 }} 7 | spec: 8 | selector: 9 | matchLabels: 10 | {{- include "terway-qos.selectorLabels" . | nindent 6 }} 11 | template: 12 | metadata: 13 | {{- with .Values.podAnnotations }} 14 | annotations: 15 | {{- toYaml . | nindent 8 }} 16 | {{- end }} 17 | labels: 18 | {{- include "terway-qos.selectorLabels" . | nindent 8 }} 19 | spec: 20 | {{- with .Values.imagePullSecrets }} 21 | imagePullSecrets: 22 | {{- toYaml . | nindent 8 }} 23 | {{- end }} 24 | serviceAccountName: terway-qos 25 | securityContext: 26 | {{- toYaml .Values.podSecurityContext | nindent 8 }} 27 | hostNetwork: true 28 | hostPID: true 29 | initContainers: 30 | - name: init 31 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 32 | imagePullPolicy: {{ .Values.image.pullPolicy }} 33 | securityContext: 34 | privileged: true 35 | command: 36 | - '/bin/init.sh' 37 | containers: 38 | - name: {{ .Chart.Name }} 39 | securityContext: 40 | privileged: true 41 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 42 | imagePullPolicy: {{ .Values.image.pullPolicy }} 43 | command: 44 | - 'qos' 45 | - 'd' 46 | {{- if .Values.qos.enableIngress }} 47 | - --enable-ingress 48 | {{- end }} 49 | {{- if .Values.qos.enableEgress }} 50 | - --enable-egress 51 | {{- end }} 52 | {{- if .Values.qos.enableCODR }} 53 | - --enable-bpf-core 54 | {{- end }} 55 | volumeMounts: 56 | - mountPath: /sys/fs/bpf 57 | name: bpffs 58 | - mountPath: /sys/fs/cgroup 59 | name: cgroupfs 60 | - mountPath: /var/lib/terway/qos 61 | name: config 62 | readOnly: true 63 | resources: 64 | {{- toYaml .Values.resources | nindent 12 }} 65 | env: 66 | - name: K8S_NODE_NAME 67 | valueFrom: 68 | fieldRef: 69 | apiVersion: v1 70 | fieldPath: spec.nodeName 71 | 72 | {{- with .Values.affinity }} 73 | affinity: 74 | {{- toYaml . | nindent 8 }} 75 | {{- end }} 76 | tolerations: 77 | - operator: "Exists" 78 | volumes: 79 | {{- if eq .Values.qos.qosConfigSource "k8s" }} 80 | - name: config 81 | configMap: 82 | name: terway-qos 83 | items: 84 | - key: global_bps_config 85 | path: global_bps_config 86 | {{- end }} 87 | - name: cgroupfs 88 | hostPath: 89 | path: /sys/fs/cgroup 90 | type: "Directory" 91 | - name: bpffs 92 | hostPath: 93 | path: /sys/fs/bpf 94 | type: "Directory" 95 | {{- if eq .Values.qos.qosConfigSource "file" }} 96 | - name: config 97 | hostPath: 98 | path: /var/lib/terway/qos 99 | type: "DirectoryOrCreate" 100 | {{- end }} -------------------------------------------------------------------------------- /charts/terway-qos/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: terway-qos 5 | labels: 6 | {{- include "terway-qos.labels" . | nindent 4 }} 7 | {{- with .Values.serviceAccount.annotations }} 8 | annotations: 9 | {{- toYaml . | nindent 4 }} 10 | {{- end }} 11 | 12 | -------------------------------------------------------------------------------- /charts/terway-qos/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: registry.cn-hangzhou.aliyuncs.com/acs/terway-qos 3 | pullPolicy: Always 4 | tag: "v0.3.2" 5 | 6 | imagePullSecrets: [] 7 | nameOverride: "" 8 | fullnameOverride: "" 9 | 10 | serviceAccount: 11 | # Specifies whether a service account should be created 12 | create: true 13 | # Annotations to add to the service account 14 | annotations: {} 15 | # The name of the service account to use. 16 | # If not set and create is true, a name is generated using the fullname template 17 | name: "" 18 | 19 | podAnnotations: {} 20 | 21 | resources: 22 | limits: 23 | cpu: 100m 24 | memory: 128Mi 25 | requests: 26 | cpu: 100m 27 | memory: 128Mi 28 | 29 | affinity: {} 30 | 31 | qos: 32 | qosConfigSource: k8s 33 | enableIngress: true 34 | enableEgress: true 35 | enableCODR: false 36 | 37 | -------------------------------------------------------------------------------- /cmd/bpf_bandwidth.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "github.com/spf13/cobra" 20 | ) 21 | 22 | var bpfBandwidthCmd = &cobra.Command{ 23 | Use: "bandwidth", 24 | Aliases: []string{"bd"}, 25 | } 26 | 27 | func init() { 28 | rootCmd.AddCommand(bpfBandwidthCmd) 29 | } 30 | -------------------------------------------------------------------------------- /cmd/bpf_bandwidth_list.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | 22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 23 | 24 | "github.com/pterm/pterm" 25 | "github.com/spf13/cobra" 26 | ) 27 | 28 | var bpfBandwidthListCmd = &cobra.Command{ 29 | Use: "list", 30 | Run: func(cmd *cobra.Command, args []string) { 31 | writer, err := bpf.NewMap() 32 | if err != nil { 33 | fmt.Fprintf(os.Stderr, "error init bpf map %v", err) 34 | os.Exit(1) 35 | } 36 | defer writer.Close() 37 | ing, eg, err := writer.GetGlobalConfig() 38 | if err != nil { 39 | fmt.Fprintf(os.Stderr, "error get global config %v", err) 40 | os.Exit(1) 41 | } 42 | err = pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{ 43 | {"config", "l0", "l1", "l2"}, 44 | {"rx-max", fmt.Sprintf("%d", ing.HwGuaranteed), fmt.Sprintf("%d", ing.L1MaxBps), fmt.Sprintf("%d", ing.L2MaxBps)}, 45 | {"rx-min", fmt.Sprintf("%d", ing.L0MinBps), fmt.Sprintf("%d", ing.L1MinBps), fmt.Sprintf("%d", ing.L2MinBps)}, 46 | {"tx-max", fmt.Sprintf("%d", eg.HwGuaranteed), fmt.Sprintf("%d", eg.L1MaxBps), fmt.Sprintf("%d", eg.L2MaxBps)}, 47 | {"tx-min", fmt.Sprintf("%d", eg.L0MinBps), fmt.Sprintf("%d", eg.L1MinBps), fmt.Sprintf("%d", eg.L2MinBps)}, 48 | }).Render() 49 | if err != nil { 50 | fmt.Fprintf(os.Stderr, "error get global config %v", err) 51 | os.Exit(1) 52 | } 53 | 54 | ingRate, egressRate := writer.GetGlobalRateLimit() 55 | _ = pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{ 56 | {"limit", "L0", "L1", "L2"}, 57 | {"tx-max", fmt.Sprintf("%d", egressRate.L0Bps), fmt.Sprintf("%d", egressRate.L1Bps), fmt.Sprintf("%d", egressRate.L2Bps)}, 58 | {"t_last", fmt.Sprintf("%d", egressRate.L0LastTimestamp), fmt.Sprintf("%d", egressRate.L1LastTimestamp), fmt.Sprintf("%d", egressRate.L2LastTimestamp)}, 59 | {"slot", fmt.Sprintf("%d", egressRate.L0Slot), fmt.Sprintf("%d", egressRate.L1Slot), fmt.Sprintf("%d", egressRate.L2Slot)}, 60 | }).Render() 61 | 62 | _ = pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{ 63 | {"limit", "L0", "L1", "L2"}, 64 | {"rx-max", fmt.Sprintf("%d", ingRate.L0Bps), fmt.Sprintf("%d", ingRate.L1Bps), fmt.Sprintf("%d", ingRate.L2Bps)}, 65 | {"t_last", fmt.Sprintf("%d", ingRate.L0LastTimestamp), fmt.Sprintf("%d", ingRate.L1LastTimestamp), fmt.Sprintf("%d", ingRate.L2LastTimestamp)}, 66 | {"slot", fmt.Sprintf("%d", ingRate.L0Slot), fmt.Sprintf("%d", ingRate.L1Slot), fmt.Sprintf("%d", ingRate.L2Slot)}, 67 | }).Render() 68 | 69 | data := [][]string{ 70 | {"stat", "index", "ts", "val"}, 71 | } 72 | for _, v := range writer.GetNetStat() { 73 | data = append(data, []string{"", fmt.Sprintf("%d", v.Index), fmt.Sprintf("%d", v.TS), fmt.Sprintf("%d", v.Val)}) 74 | } 75 | _ = pterm.DefaultTable.WithHasHeader().WithData(data).Render() 76 | 77 | }, 78 | } 79 | 80 | func init() { 81 | bpfBandwidthCmd.AddCommand(bpfBandwidthListCmd) 82 | } 83 | -------------------------------------------------------------------------------- /cmd/cgroup.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "github.com/spf13/cobra" 20 | ) 21 | 22 | var cgroupCmd = &cobra.Command{ 23 | Use: "cgroup", 24 | } 25 | 26 | func init() { 27 | rootCmd.AddCommand(cgroupCmd) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/cgroup_list.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | 22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 23 | 24 | "github.com/pterm/pterm" 25 | "github.com/spf13/cobra" 26 | ) 27 | 28 | var cgroupListCmd = &cobra.Command{ 29 | Use: "list", 30 | Run: func(cmd *cobra.Command, args []string) { 31 | err := cgroupList() 32 | if err != nil { 33 | fmt.Fprintf(os.Stderr, "error read bpf map %v", err) 34 | os.Exit(1) 35 | } 36 | }, 37 | } 38 | 39 | func cgroupList() error { 40 | var err error 41 | 42 | writer, err := bpf.NewMap() 43 | if err != nil { 44 | return err 45 | } 46 | defer writer.Close() 47 | 48 | tableData := pterm.TableData{ 49 | {"inode", "direction", "rate"}, 50 | } 51 | for k, v := range writer.ListCgroupRate() { 52 | tableData = append(tableData, []string{fmt.Sprintf("%d", k.Inode), fmt.Sprintf("%d", k.Direction), fmt.Sprintf("%d", v.LimitBps)}) 53 | } 54 | 55 | return pterm.DefaultTable.WithHasHeader().WithData(tableData).Render() 56 | } 57 | 58 | func init() { 59 | cgroupCmd.AddCommand(cgroupListCmd) 60 | } 61 | -------------------------------------------------------------------------------- /cmd/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | "time" 21 | 22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 23 | "github.com/AliyunContainerService/terway-qos/pkg/types" 24 | 25 | "github.com/pterm/pterm" 26 | "github.com/spf13/cobra" 27 | ) 28 | 29 | var direction string 30 | var watch bool 31 | 32 | var ( 33 | cgroupPath string 34 | ipv4 string 35 | ipv6 string 36 | rate uint64 // bytes/s 37 | priority int 38 | ) 39 | 40 | var ( 41 | hwRxGuaranteedRate uint64 42 | hwTxGuaranteedRate uint64 43 | 44 | adjustInterval uint64 45 | 46 | l1RxMaxRate uint64 47 | l1RxMinRate uint64 48 | 49 | l1TxMaxRate uint64 50 | l1TxMinRate uint64 51 | 52 | l2RxMaxRate uint64 53 | l2RxMinRate uint64 54 | 55 | l2TxMaxRate uint64 56 | l2TxMinRate uint64 57 | ) 58 | 59 | // configCmd represents the config command 60 | var configCmd = &cobra.Command{ 61 | Use: "config", 62 | Short: "config qos", 63 | } 64 | 65 | var globalCmd = &cobra.Command{ 66 | Use: "global", 67 | Short: "g", 68 | } 69 | 70 | var globalSetCmd = &cobra.Command{ 71 | Use: "set", 72 | RunE: func(cmd *cobra.Command, args []string) error { 73 | writer, err := bpf.NewMap() 74 | if err != nil { 75 | return err 76 | } 77 | defer writer.Close() 78 | 79 | egress := &types.GlobalConfig{ 80 | HwGuaranteed: hwTxGuaranteedRate, 81 | HwBurstableBps: hwTxGuaranteedRate, 82 | L1MaxBps: l1TxMaxRate, 83 | L1MinBps: l1TxMinRate, 84 | L2MaxBps: l2TxMaxRate, 85 | L2MinBps: l2TxMinRate, 86 | } 87 | ingress := &types.GlobalConfig{ 88 | HwGuaranteed: hwRxGuaranteedRate, 89 | HwBurstableBps: hwRxGuaranteedRate, 90 | L1MaxBps: l1RxMaxRate, 91 | L1MinBps: l1RxMinRate, 92 | L2MaxBps: l2RxMaxRate, 93 | L2MinBps: l2RxMinRate, 94 | } 95 | 96 | err = writer.WriteGlobalConfig(ingress, egress) 97 | if err != nil { 98 | return err 99 | } 100 | 101 | return nil 102 | }, 103 | } 104 | 105 | var globalGetCmd = &cobra.Command{ 106 | Use: "get", 107 | RunE: func(cmd *cobra.Command, args []string) error { 108 | writer, err := bpf.NewMap() 109 | if err != nil { 110 | return err 111 | } 112 | defer writer.Close() 113 | ing, eg, err := writer.GetGlobalConfig() 114 | if err != nil { 115 | return err 116 | } 117 | 118 | return pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{ 119 | {"", "L0", "L1", "L2"}, 120 | {"Rx-Max", fmt.Sprintf("%d", ing.HwGuaranteed), fmt.Sprintf("%d", ing.L1MaxBps), fmt.Sprintf("%d", ing.L2MaxBps)}, 121 | {"Rx-Min", fmt.Sprintf("%d", ing.L0MinBps), fmt.Sprintf("%d", ing.L1MinBps), fmt.Sprintf("%d", ing.L2MinBps)}, 122 | {"Tx-Max", fmt.Sprintf("%d", eg.HwGuaranteed), fmt.Sprintf("%d", eg.L1MaxBps), fmt.Sprintf("%d", eg.L2MaxBps)}, 123 | {"Tx-Min", fmt.Sprintf("%d", eg.L0MinBps), fmt.Sprintf("%d", eg.L1MinBps), fmt.Sprintf("%d", eg.L2MinBps)}, 124 | }).Render() 125 | }, 126 | } 127 | 128 | var globalRateCetCmd = &cobra.Command{ 129 | Use: "rate", 130 | RunE: func(cmd *cobra.Command, args []string) error { 131 | writer, err := bpf.NewMap() 132 | if err != nil { 133 | return err 134 | } 135 | defer writer.Close() 136 | _, eg := writer.GetGlobalRateLimit() 137 | if err != nil { 138 | return err 139 | } 140 | 141 | return pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{ 142 | {"", "L0", "L1", "L2"}, 143 | {"Tx-Max", fmt.Sprintf("%d", eg.L0Bps), fmt.Sprintf("%d", eg.L1Bps), fmt.Sprintf("%d", eg.L2Bps)}, 144 | {"last", fmt.Sprintf("%d", eg.L0LastTimestamp), fmt.Sprintf("%d", eg.L1LastTimestamp), fmt.Sprintf("%d", eg.L2LastTimestamp)}, 145 | {"start", fmt.Sprintf("%d", eg.LastTimestamp), fmt.Sprintf("%d", eg.LastTimestamp), fmt.Sprintf("%d", eg.LastTimestamp)}, 146 | }).Render() 147 | }, 148 | } 149 | 150 | func init() { 151 | rootCmd.AddCommand(configCmd) 152 | rootCmd.AddCommand(globalRateCetCmd) 153 | configCmd.AddCommand(podCmd, globalCmd) 154 | 155 | globalCmd.AddCommand(globalSetCmd, globalGetCmd) 156 | globalSetCmd.PersistentFlags().Uint64Var(&adjustInterval, "interval", uint64(1*time.Second), "interval to adjust bandwidth") 157 | globalSetCmd.PersistentFlags().Uint64Var(&hwRxGuaranteedRate, "hw-rx", 0, "") 158 | globalSetCmd.PersistentFlags().Uint64Var(&hwTxGuaranteedRate, "hw-tx", 0, "") 159 | globalSetCmd.PersistentFlags().Uint64Var(&l1TxMaxRate, "l1-tx-max", 0, "") 160 | globalSetCmd.PersistentFlags().Uint64Var(&l1TxMinRate, "l1-tx-min", 0, "") 161 | globalSetCmd.PersistentFlags().Uint64Var(&l2TxMaxRate, "l2-tx-max", 0, "") 162 | globalSetCmd.PersistentFlags().Uint64Var(&l2TxMinRate, "l2-tx-min", 0, "") 163 | 164 | globalSetCmd.PersistentFlags().Uint64Var(&l1RxMaxRate, "l1-rx-max", 0, "") 165 | globalSetCmd.PersistentFlags().Uint64Var(&l1RxMinRate, "l1-rx-min", 0, "") 166 | globalSetCmd.PersistentFlags().Uint64Var(&l2RxMaxRate, "l2-rx-max", 0, "") 167 | globalSetCmd.PersistentFlags().Uint64Var(&l2RxMinRate, "l2-rx-min", 0, "") 168 | 169 | _ = globalSetCmd.MarkPersistentFlagRequired("hw-rx") 170 | _ = globalSetCmd.MarkPersistentFlagRequired("hw-tx") 171 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-tx-max") 172 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-tx-min") 173 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-tx-max") 174 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-tx-min") 175 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-rx-max") 176 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-rx-min") 177 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-rx-max") 178 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-rx-min") 179 | 180 | globalGetCmd.PersistentFlags().BoolVar(&watch, "w", false, "watch") 181 | } 182 | -------------------------------------------------------------------------------- /cmd/damon.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | "net" 21 | "os" 22 | 23 | "github.com/spf13/pflag" 24 | "github.com/spf13/viper" 25 | "github.com/vishvananda/netlink" 26 | "k8s.io/klog/v2" 27 | 28 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 29 | "github.com/AliyunContainerService/terway-qos/pkg/config" 30 | "github.com/AliyunContainerService/terway-qos/pkg/k8s" 31 | "github.com/AliyunContainerService/terway-qos/pkg/version" 32 | 33 | "github.com/spf13/cobra" 34 | "k8s.io/klog/v2/klogr" 35 | ctrl "sigs.k8s.io/controller-runtime" 36 | ) 37 | 38 | const ( 39 | enableBPFCORE = "enable-bpf-core" 40 | enableIngress = "enable-ingress" 41 | enableEgress = "enable-egress" 42 | excludeInterfaces = "exclude-interfaces" 43 | bpfPrio = "bpf-prio" 44 | ) 45 | 46 | func init() { 47 | fs := pflag.NewFlagSet("daemon", pflag.PanicOnError) 48 | fs.Bool(enableBPFCORE, false, "enable bpf CORE") 49 | fs.Bool(enableIngress, false, "enable ingress direction qos") 50 | fs.Bool(enableEgress, false, "enable egress direction qos") 51 | fs.StringSlice(excludeInterfaces, []string{}, "network interface names to exclude") 52 | fs.Int(bpfPrio, 90, "tc prio for the qos program") 53 | 54 | _ = viper.BindPFlags(fs) 55 | pflag.CommandLine.AddFlagSet(fs) 56 | 57 | rootCmd.AddCommand(daemonCmd) 58 | 59 | cobra.OnInitialize(initConfig) 60 | } 61 | 62 | var daemonCmd = &cobra.Command{ 63 | Use: "daemon", 64 | Aliases: []string{"d"}, 65 | Short: "start daemon", 66 | Run: func(cmd *cobra.Command, args []string) { 67 | klog.Infof("version: %s", version.Version) 68 | err := daemon() 69 | if err != nil { 70 | _, _ = fmt.Fprint(os.Stderr, err) 71 | os.Exit(1) 72 | } 73 | }, 74 | } 75 | 76 | func daemon() error { 77 | ctx := ctrl.SetupSignalHandler() 78 | ctrl.SetLogger(klogr.New()) 79 | 80 | mgr, err := bpf.NewBpfMgr(viper.GetBool(enableIngress), viper.GetBool(enableEgress), viper.GetBool(enableBPFCORE), validDevice, viper.GetInt(bpfPrio)) 81 | if err != nil { 82 | return err 83 | } 84 | err = mgr.Start(ctx) 85 | if err != nil { 86 | return err 87 | } 88 | m, err := bpf.NewMap() 89 | if err != nil { 90 | return err 91 | } 92 | defer m.Close() 93 | 94 | syncer := config.NewSyncer(m) 95 | err = syncer.Start(ctx) 96 | if err != nil { 97 | return err 98 | } 99 | return k8s.StartPodHandler(ctx, syncer) 100 | } 101 | 102 | func validDevice(link netlink.Link) bool { 103 | dev, ok := link.(*netlink.Device) 104 | if !ok { 105 | return false 106 | } 107 | if dev.Attrs().Flags&net.FlagUp == 0 { 108 | return false 109 | } 110 | 111 | for _, name := range viper.GetStringSlice(excludeInterfaces) { 112 | if dev.Name == name { 113 | return false 114 | } 115 | } 116 | 117 | return dev.EncapType != "loopback" 118 | } 119 | 120 | func initConfig() { 121 | if err := viper.ReadInConfig(); err == nil { 122 | fmt.Println("Using config file:", viper.ConfigFileUsed()) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /cmd/pod.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "github.com/spf13/cobra" 20 | ) 21 | 22 | var podCmd = &cobra.Command{ 23 | Use: "pod", 24 | } 25 | 26 | func init() { 27 | rootCmd.AddCommand(podCmd) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/pod_list.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | 22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 23 | 24 | "github.com/pterm/pterm" 25 | "github.com/spf13/cobra" 26 | ) 27 | 28 | var podListCmd = &cobra.Command{ 29 | Use: "list", 30 | Run: func(cmd *cobra.Command, args []string) { 31 | err := podList() 32 | if err != nil { 33 | fmt.Fprintf(os.Stderr, "error read bpf map %v", err) 34 | os.Exit(1) 35 | } 36 | }, 37 | } 38 | 39 | func podList() error { 40 | var err error 41 | 42 | writer, err := bpf.NewMap() 43 | if err != nil { 44 | return err 45 | } 46 | defer writer.Close() 47 | 48 | tableData := pterm.TableData{ 49 | {"ip", "class_id", "inode"}, 50 | } 51 | for k, v := range writer.ListPodInfo() { 52 | tableData = append(tableData, []string{k.String(), fmt.Sprintf("%d", v.ClassID), fmt.Sprintf("%d", v.Inode)}) 53 | } 54 | 55 | return pterm.DefaultTable.WithHasHeader().WithData(tableData).Render() 56 | } 57 | 58 | func init() { 59 | podCmd.AddCommand(podListCmd) 60 | } 61 | -------------------------------------------------------------------------------- /cmd/pod_set.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "fmt" 20 | "net/netip" 21 | "os" 22 | 23 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 24 | "github.com/AliyunContainerService/terway-qos/pkg/types" 25 | 26 | "github.com/spf13/cobra" 27 | ) 28 | 29 | var podSetCmd = &cobra.Command{ 30 | Use: "set", 31 | Run: func(cmd *cobra.Command, args []string) { 32 | err := podSet() 33 | if err != nil { 34 | fmt.Fprint(os.Stderr, err) 35 | os.Exit(1) 36 | } 37 | }, 38 | } 39 | 40 | func podSet() error { 41 | if ipv4 == "" && ipv6 == "" { 42 | return fmt.Errorf("ip must provided") 43 | } 44 | var err error 45 | var v4, v6 netip.Addr 46 | if ipv4 != "" { 47 | v4, err = netip.ParseAddr(ipv4) 48 | if err != nil { 49 | return err 50 | } 51 | } 52 | if ipv6 != "" { 53 | v6, err = netip.ParseAddr(ipv6) 54 | if err != nil { 55 | return err 56 | } 57 | } 58 | writer, err := bpf.NewMap() 59 | if err != nil { 60 | return err 61 | } 62 | defer writer.Close() 63 | 64 | unSet := uint64(0) 65 | return writer.WritePodInfo(&types.PodConfig{ 66 | PodID: "", 67 | PodUID: "", 68 | IPv4: v4, 69 | IPv6: v6, 70 | HostNetwork: false, 71 | CgroupInfo: nil, 72 | RxBps: &unSet, 73 | TxBps: &rate, 74 | }) 75 | } 76 | 77 | func init() { 78 | podCmd.AddCommand(podSetCmd) 79 | 80 | podCmd.PersistentFlags().StringVar(&direction, "direction", "egress", "ingress or egress") 81 | podCmd.PersistentFlags().StringVar(&cgroupPath, "cgroup", "", "cgroup path.") 82 | podCmd.PersistentFlags().StringVar(&ipv4, "ipv4", "", "ipv4 addr") 83 | podCmd.PersistentFlags().StringVar(&ipv6, "ipv6", "", "ipv6 addr") 84 | podCmd.PersistentFlags().Uint64Var(&rate, "rate", 0, "rate limit. bytes/s. At lease 1 MB/s, set 0 to disable rate limit") 85 | podCmd.PersistentFlags().IntVar(&priority, "prio", 0, "priority. 0,1,2") 86 | 87 | _ = podSetCmd.MarkPersistentFlagRequired("cgroup") 88 | _ = podSetCmd.MarkPersistentFlagRequired("rate") 89 | } 90 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package cmd 17 | 18 | import ( 19 | "os" 20 | 21 | "github.com/spf13/cobra" 22 | 23 | "github.com/AliyunContainerService/terway-qos/pkg/version" 24 | ) 25 | 26 | // rootCmd represents the base command when called without any subcommands 27 | var rootCmd = &cobra.Command{ 28 | Use: "qos", 29 | Short: "Terway QoS", 30 | Long: `Terway QoS`, 31 | Version: version.Version, 32 | } 33 | 34 | func Execute() { 35 | err := rootCmd.Execute() 36 | if err != nil { 37 | os.Exit(1) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /docs/quick-start-zh_CN.md: -------------------------------------------------------------------------------- 1 | # quick-start 2 | 3 | 前提条件 4 | - Kubernetes 集群 5 | - helm 6 | - kubectl 7 | 8 | ## 安装 9 | 10 | 1. git clone 仓库 `git clone --depth=1 https://github.com/AliyunContainerService/terway-qos.git` 11 | 2. 打包 chart 并部署到集群 `helm package ./charts/terway-qos && helm install -nkube-system terway-qos .` 12 | 3. 你可以在 ConfigMap 中检查 QoS 配置 `kubectl get cm terway-qos -nkube-system -oyaml` 13 | 14 | ## 测试 QoS 功能 15 | 16 | 部署下面的 `YAML` 模板,你将得到三个不同优先级的 Pod。 17 | 18 | ```shell 19 | priority=("server" "burstable" "guaranteed" "best-effort") 20 | 21 | for prio in "${priority[@]}" 22 | do 23 | echo "$prio" 24 | kubectl apply -f - < maxRsrc.Value() { 33 | return fmt.Errorf("resource is unreasonably large (> 1Pbit)") 34 | } 35 | return nil 36 | } 37 | 38 | // ExtractPodBandwidthResources extracts the ingress and egress from the given pod annotations 39 | func ExtractPodBandwidthResources(podAnnotations map[string]string) (ingress, egress *resource.Quantity, err error) { 40 | if podAnnotations == nil { 41 | return nil, nil, nil 42 | } 43 | str, found := podAnnotations["kubernetes.io/ingress-bandwidth"] 44 | if found { 45 | ingressValue, err := resource.ParseQuantity(str) 46 | if err != nil { 47 | return nil, nil, err 48 | } 49 | ingress = &ingressValue 50 | if err := validateBandwidthIsReasonable(ingress); err != nil { 51 | return nil, nil, err 52 | } 53 | } 54 | str, found = podAnnotations["kubernetes.io/egress-bandwidth"] 55 | if found { 56 | egressValue, err := resource.ParseQuantity(str) 57 | if err != nil { 58 | return nil, nil, err 59 | } 60 | egress = &egressValue 61 | if err := validateBandwidthIsReasonable(egress); err != nil { 62 | return nil, nil, err 63 | } 64 | } 65 | return ingress, egress, nil 66 | } 67 | -------------------------------------------------------------------------------- /pkg/bpf/compile.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package bpf 17 | 18 | import ( 19 | "fmt" 20 | "os/exec" 21 | "path/filepath" 22 | ) 23 | 24 | const ( 25 | progRoot = "/var/lib/terway" 26 | progPath = "/var/lib/terway/qos_tc.o" 27 | progName = "qos_tc" 28 | ) 29 | 30 | var standardCFlags = []string{"-O2", "-target", "bpf", "-std=gnu99"} 31 | 32 | func Compile(enableEDT bool) error { 33 | custom := map[string]string{} 34 | 35 | if enableEDT { 36 | custom["FEAT_EDT"] = "1" 37 | } 38 | 39 | return compile(progName, custom) 40 | } 41 | 42 | func compile(name string, custom map[string]string) error { 43 | args := make([]string, 0, 16) 44 | args = append(args, "-g") 45 | args = append(args, standardCFlags...) 46 | args = append(args, "-I/var/lib/terway/headers") 47 | for k, v := range custom { 48 | args = append(args, fmt.Sprintf("-D%s=%s", k, v)) 49 | } 50 | args = append(args, "-c") 51 | args = append(args, filepath.Join("/var/lib/terway/src", fmt.Sprintf("%s.c", name))) 52 | args = append(args, "-o") 53 | args = append(args, filepath.Join("/var/lib/terway", fmt.Sprintf("%s.o", name))) 54 | 55 | cmd := exec.Command("clang", args...) 56 | log.Info("exec", "cmd", cmd.String()) 57 | out, err := cmd.CombinedOutput() 58 | if err != nil { 59 | if len(out) > 0 { 60 | log.Info(string(out)) 61 | } 62 | return err 63 | } 64 | if len(out) > 0 { 65 | log.Info(string(out)) 66 | } 67 | return nil 68 | } 69 | -------------------------------------------------------------------------------- /pkg/bpf/generate.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package bpf 17 | 18 | //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags $BPF_CFLAGS -strip $BPF_STRIP qos_tc ../../bpf/qos_tc.c -- -I../../bpf/headers 19 | -------------------------------------------------------------------------------- /pkg/bpf/manager.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package bpf 17 | 18 | import ( 19 | "context" 20 | "errors" 21 | "io/fs" 22 | "os" 23 | "sync" 24 | 25 | "github.com/cilium/ebpf" 26 | "github.com/cilium/ebpf/asm" 27 | "github.com/cilium/ebpf/features" 28 | "github.com/cilium/ebpf/rlimit" 29 | "github.com/vishvananda/netlink" 30 | "golang.org/x/sys/unix" 31 | ctrl "sigs.k8s.io/controller-runtime" 32 | ) 33 | 34 | var log = ctrl.Log.WithName("bpf") 35 | 36 | const ( 37 | tcProgName = "terway_qos" 38 | pinPath = "/sys/fs/bpf/terway" 39 | ) 40 | 41 | var objs *qos_tcObjects 42 | var once sync.Once 43 | 44 | func getBpfObj(enableCORE bool) *qos_tcObjects { 45 | once.Do(func() { 46 | err := rlimit.RemoveMemlock() 47 | if err != nil { 48 | log.Error(err, "remove memlock failed") 49 | os.Exit(1) 50 | } 51 | err = os.MkdirAll(pinPath, os.ModeDir) 52 | if err != nil { 53 | log.Error(err, "mkdir failed") 54 | os.Exit(1) 55 | } 56 | 57 | featEDT := false 58 | err = features.HaveProgramHelper(ebpf.SchedCLS, asm.FnSkbEcnSetCe) 59 | if err != nil { 60 | if !errors.Is(err, ebpf.ErrNotSupported) { 61 | log.Error(err, "check kernel version failed") 62 | os.Exit(1) 63 | } 64 | } else { 65 | featEDT = true 66 | } 67 | 68 | objs = &qos_tcObjects{} 69 | 70 | opts := &ebpf.CollectionOptions{ 71 | Maps: ebpf.MapOptions{ 72 | PinPath: pinPath, 73 | LoadPinOptions: ebpf.LoadPinOptions{}, 74 | }, 75 | Programs: ebpf.ProgramOptions{}, 76 | MapReplacements: nil, 77 | } 78 | 79 | if enableCORE { 80 | err := loadQos_tcObjects(objs, opts) 81 | if err != nil { 82 | log.Error(err, "load bpf objects failed") 83 | os.Exit(1) 84 | } 85 | } else { 86 | err := Compile(featEDT) 87 | if err != nil { 88 | log.Error(err, "compile bpf failed") 89 | os.Exit(1) 90 | } 91 | 92 | spec, err := ebpf.LoadCollectionSpec(progPath) 93 | if err != nil { 94 | log.Error(err, "load bpf objects failed") 95 | os.Exit(1) 96 | } 97 | err = spec.LoadAndAssign(objs, opts) 98 | if err != nil { 99 | log.Error(err, "load bpf objects failed") 100 | os.Exit(1) 101 | } 102 | } 103 | 104 | }) 105 | return objs 106 | } 107 | 108 | type validateDeviceFunc = func(link netlink.Link) bool 109 | 110 | type Mgr struct { 111 | nlEvent chan netlink.LinkUpdate 112 | 113 | enableIngress, enableEgress bool 114 | 115 | obj *qos_tcObjects 116 | 117 | prio int 118 | 119 | validate validateDeviceFunc 120 | } 121 | 122 | func NewBpfMgr(enableIngress, enableEgress, enableCORE bool, validate validateDeviceFunc, prio int) (*Mgr, error) { 123 | return &Mgr{ 124 | nlEvent: make(chan netlink.LinkUpdate), 125 | obj: getBpfObj(enableCORE), 126 | enableEgress: enableEgress, 127 | enableIngress: enableIngress, 128 | validate: validate, 129 | prio: prio, 130 | }, nil 131 | } 132 | 133 | func (m *Mgr) Start(ctx context.Context) error { 134 | links, err := netlink.LinkList() 135 | if err != nil { 136 | return err 137 | } 138 | for _, link := range links { 139 | err = m.ensureBpfProg(link) 140 | if err != nil { 141 | log.Error(err, "attach bpf prog failed") 142 | return err 143 | } 144 | } 145 | 146 | err = netlink.LinkSubscribe(m.nlEvent, ctx.Done()) 147 | if err != nil { 148 | return err 149 | } 150 | 151 | go func() { 152 | for e := range m.nlEvent { 153 | err = m.ensureBpfProg(e.Link) 154 | if err != nil { 155 | log.Error(err, "attach bpf prog failed") 156 | } 157 | } 158 | }() 159 | 160 | return nil 161 | } 162 | 163 | func (m *Mgr) Close() { 164 | if m.obj != nil { 165 | m.obj.Close() 166 | } 167 | } 168 | 169 | func (m *Mgr) ensureBpfProg(link netlink.Link) error { 170 | if !m.validate(link) { 171 | return nil 172 | } 173 | 174 | err := ensureQdisc([]netlink.Link{link}) 175 | if err != nil { 176 | return err 177 | } 178 | 179 | ingressFilter := &netlink.BpfFilter{ 180 | FilterAttrs: netlink.FilterAttrs{ 181 | LinkIndex: link.Attrs().Index, 182 | Parent: netlink.HANDLE_MIN_INGRESS, 183 | Handle: netlink.MakeHandle(0, 1), 184 | Protocol: unix.ETH_P_ALL, 185 | Priority: uint16(m.prio), 186 | }, 187 | Fd: int(m.obj.qos_tcPrograms.QosProgIngress.FD()), 188 | Name: tcProgName, 189 | DirectAction: true, 190 | } 191 | if m.enableIngress { 192 | err = netlink.FilterReplace(ingressFilter) 193 | if err != nil { 194 | return err 195 | } 196 | 197 | log.Info("set bpf ingress", "dev", link.Attrs().Name) 198 | 199 | err = m.obj.QosProgMap.Put(uint32(0), uint32(m.obj.QosCgroup.FD())) 200 | if err != nil { 201 | return err 202 | } 203 | err = m.obj.QosProgMap.Put(uint32(1), uint32(m.obj.QosGlobal.FD())) 204 | if err != nil { 205 | return err 206 | } 207 | } else { 208 | err = netlink.FilterDel(ingressFilter) 209 | if err != nil { 210 | if !errors.Is(err, fs.ErrNotExist) { 211 | log.Error(err, "delete bpf prog failed") 212 | } 213 | } 214 | } 215 | 216 | egressFilter := &netlink.BpfFilter{ 217 | FilterAttrs: netlink.FilterAttrs{ 218 | LinkIndex: link.Attrs().Index, 219 | Parent: netlink.HANDLE_MIN_EGRESS, 220 | Handle: netlink.MakeHandle(0, 1), 221 | Protocol: unix.ETH_P_ALL, 222 | Priority: uint16(m.prio), 223 | }, 224 | Fd: int(m.obj.qos_tcPrograms.QosProgEgress.FD()), 225 | Name: tcProgName, 226 | DirectAction: true, 227 | } 228 | if m.enableEgress { 229 | err = netlink.FilterReplace(egressFilter) 230 | if err != nil { 231 | return err 232 | } 233 | 234 | log.Info("set bpf egress", "dev", link.Attrs().Name) 235 | 236 | err = m.obj.QosProgMap.Put(uint32(0), uint32(m.obj.QosCgroup.FD())) 237 | if err != nil { 238 | return err 239 | } 240 | err = m.obj.QosProgMap.Put(uint32(1), uint32(m.obj.QosGlobal.FD())) 241 | if err != nil { 242 | return err 243 | } 244 | } else { 245 | err = netlink.FilterDel(egressFilter) 246 | if err != nil { 247 | if !errors.Is(err, fs.ErrNotExist) { 248 | log.Error(err, "delete bpf prog failed") 249 | } 250 | } 251 | } 252 | 253 | return nil 254 | } 255 | 256 | func ensureQdisc(links []netlink.Link) error { 257 | for _, link := range links { 258 | qdisc := &netlink.GenericQdisc{ 259 | QdiscAttrs: netlink.QdiscAttrs{ 260 | LinkIndex: link.Attrs().Index, 261 | Parent: netlink.HANDLE_CLSACT, 262 | Handle: netlink.HANDLE_CLSACT & 0xffff0000, 263 | }, 264 | QdiscType: "clsact", 265 | } 266 | err := netlink.QdiscReplace(qdisc) 267 | if err != nil { 268 | return err 269 | } 270 | } 271 | return nil 272 | } 273 | -------------------------------------------------------------------------------- /pkg/bpf/maps.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package bpf 17 | 18 | import ( 19 | "encoding/binary" 20 | "errors" 21 | "fmt" 22 | "net/netip" 23 | "reflect" 24 | "time" 25 | 26 | "k8s.io/klog/v2" 27 | 28 | "github.com/AliyunContainerService/terway-qos/pkg/byteorder" 29 | "github.com/AliyunContainerService/terway-qos/pkg/types" 30 | 31 | "github.com/cilium/ebpf" 32 | ) 33 | 34 | const ( 35 | // trafficDirection for config, MUST equal with bpf map index 36 | ingressIndex uint32 = 0 37 | egressIndex uint32 = 1 38 | ) 39 | 40 | var _ Interface = &Writer{} 41 | 42 | type Writer struct { 43 | obj *qos_tcObjects 44 | } 45 | 46 | func (w *Writer) Close() { 47 | _ = w.obj.Close() 48 | } 49 | 50 | func NewMap() (*Writer, error) { 51 | w := &Writer{ 52 | obj: getBpfObj(true), 53 | } 54 | 55 | return w, nil 56 | } 57 | 58 | func (w *Writer) GetGlobalConfig() (*types.GlobalConfig, *types.GlobalConfig, error) { 59 | ingress := &globalRateCfg{} 60 | egress := &globalRateCfg{} 61 | err := w.obj.TerwayGlobalCfg.Lookup(ingressIndex, ingress) 62 | if err != nil { 63 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 64 | return nil, nil, err 65 | } 66 | } 67 | err = w.obj.TerwayGlobalCfg.Lookup(egressIndex, egress) 68 | if err != nil { 69 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 70 | return nil, nil, err 71 | } 72 | } 73 | 74 | return &types.GlobalConfig{ 75 | HwGuaranteed: ingress.HwGuaranteed, 76 | HwBurstableBps: ingress.HwBurstable, 77 | L0MaxBps: 0, 78 | L0MinBps: ingress.L0MinBps, 79 | L1MaxBps: ingress.L1MaxBps, 80 | L1MinBps: ingress.L1MinBps, 81 | L2MaxBps: ingress.L2MaxBps, 82 | L2MinBps: ingress.L2MinBps, 83 | }, &types.GlobalConfig{ 84 | HwGuaranteed: egress.HwGuaranteed, 85 | HwBurstableBps: egress.HwBurstable, 86 | L0MaxBps: 0, 87 | L0MinBps: egress.L0MinBps, 88 | L1MaxBps: egress.L1MaxBps, 89 | L1MinBps: egress.L1MinBps, 90 | L2MaxBps: egress.L2MaxBps, 91 | L2MinBps: egress.L2MinBps, 92 | }, nil 93 | } 94 | 95 | func updateIfNotEqual(expect any, idx uint32, lookupo func(idx uint32) (any, error), update func(idx uint32, rateCfg any) error) error { 96 | prev, err := lookupo(idx) 97 | if err != nil { 98 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 99 | return err 100 | } 101 | } 102 | if reflect.DeepEqual(prev, expect) { 103 | return nil 104 | } 105 | 106 | return update(idx, expect) 107 | } 108 | 109 | func (w *Writer) WriteGlobalConfig(ingress *types.GlobalConfig, egress *types.GlobalConfig) error { 110 | ingress.Default() 111 | if !ingress.Validate() { 112 | return fmt.Errorf("ingress config is not valid, %#v", *ingress) 113 | } 114 | egress.Default() 115 | if !egress.Validate() { 116 | return fmt.Errorf("egress config is not valid, %#v", *egress) 117 | } 118 | 119 | ingressCfg := &globalRateCfg{ 120 | Interval: uint64(500 * time.Millisecond), 121 | HwGuaranteed: ingress.HwGuaranteed, 122 | HwBurstable: 0, 123 | L0MinBps: ingress.HwGuaranteed - ingress.L1MinBps - ingress.L2MinBps, 124 | L1MinBps: ingress.L1MinBps, 125 | L1MaxBps: ingress.L1MaxBps, 126 | L2MinBps: ingress.L2MinBps, 127 | L2MaxBps: ingress.L2MaxBps, 128 | } 129 | egressCfg := &globalRateCfg{ 130 | Interval: uint64(500 * time.Millisecond), 131 | HwGuaranteed: egress.HwGuaranteed, 132 | HwBurstable: 0, 133 | L0MinBps: egress.HwGuaranteed - egress.L1MinBps - egress.L2MinBps, 134 | L1MinBps: egress.L1MinBps, 135 | L1MaxBps: egress.L1MaxBps, 136 | L2MinBps: egress.L2MinBps, 137 | L2MaxBps: egress.L2MaxBps, 138 | } 139 | 140 | lookRateFunc := func(idx uint32) (any, error) { 141 | prev := &globalRateCfg{} 142 | err := w.obj.TerwayGlobalCfg.Lookup(idx, prev) 143 | return prev, err 144 | } 145 | 146 | updateRateFunc := func(idx uint32, rateCfg any) error { 147 | idxtostr := map[uint32]string{ 148 | ingressIndex: "ingress", 149 | egressIndex: "egress", 150 | } 151 | log.Info("write global config", idxtostr[idx], egress.String()) 152 | return w.obj.TerwayGlobalCfg.Put(idx, rateCfg) 153 | } 154 | 155 | if err := updateIfNotEqual(ingressCfg, ingressIndex, lookRateFunc, updateRateFunc); err != nil { 156 | return err 157 | } 158 | return updateIfNotEqual(egressCfg, egressIndex, lookRateFunc, updateRateFunc) 159 | } 160 | 161 | func (w *Writer) WritePodInfo(config *types.PodConfig) error { 162 | if config.HostNetwork { 163 | return nil 164 | } 165 | info := &cgroupInfo{ 166 | ClassID: config.CgroupInfo.ClassID, 167 | Pad1: uint32(0), 168 | Inode: config.CgroupInfo.Inode, 169 | } 170 | if config.IPv4.IsValid() { 171 | err := w.obj.PodMap.Put(ip2Addr(config.IPv4), info) 172 | if err != nil { 173 | return fmt.Errorf("error put pod_map map, %w", err) 174 | } 175 | } 176 | if config.IPv6.IsValid() { 177 | err := w.obj.PodMap.Put(ip2Addr(config.IPv6), info) 178 | if err != nil { 179 | return fmt.Errorf("error put pod_map map, %w", err) 180 | } 181 | } 182 | 183 | klog.Infof("write pod info, %v", config) 184 | rx := uint64(0) 185 | tx := uint64(0) 186 | if config.RxBps != nil { 187 | rx = *config.RxBps 188 | } 189 | if config.TxBps != nil { 190 | tx = *config.TxBps 191 | } 192 | 193 | return w.WriteCgroupRate(&types.CgroupRate{ 194 | Inode: config.CgroupInfo.Inode, 195 | RxBps: rx, 196 | TxBps: tx, 197 | }) 198 | } 199 | 200 | func (w *Writer) DeletePodInfo(config *types.PodConfig) error { 201 | if config.HostNetwork { 202 | return nil 203 | } 204 | 205 | ips := []netip.Addr{config.IPv4, config.IPv6} 206 | for _, ip := range ips { 207 | if !ip.IsValid() { 208 | continue 209 | } 210 | if err := w.obj.PodMap.Delete(ip2Addr(ip)); err != nil && !errors.Is(err, ebpf.ErrKeyNotExist) { 211 | return fmt.Errorf("error put pod_map map by key %s, %w", ip, err) 212 | } 213 | } 214 | 215 | return nil 216 | } 217 | 218 | func (w *Writer) ListPodInfo() map[netip.Addr]cgroupInfo { 219 | var result = map[netip.Addr]cgroupInfo{} 220 | var key addr 221 | var value cgroupInfo 222 | 223 | iter := w.obj.PodMap.Iterate() 224 | for iter.Next(&key, &value) { 225 | result[addr2ip(&key)] = value 226 | } 227 | return result 228 | } 229 | 230 | func (w *Writer) GetGlobalRateLimit() (*globalRateInfo, *globalRateInfo) { 231 | var ingress = &globalRateInfo{} 232 | var egress = &globalRateInfo{} 233 | _ = w.obj.GlobalRateMap.Lookup(ingressIndex, ingress) 234 | 235 | _ = w.obj.GlobalRateMap.Lookup(egressIndex, egress) 236 | return ingress, egress 237 | } 238 | 239 | func (w *Writer) ListCgroupRate() map[cgroupRateID]rateInfo { 240 | result := make(map[cgroupRateID]rateInfo) 241 | var key cgroupRateID 242 | var value rateInfo 243 | 244 | iter := w.obj.CgroupRateMap.Iterate() 245 | for iter.Next(&key, &value) { 246 | result[key] = value 247 | } 248 | return result 249 | } 250 | 251 | func (w *Writer) DeleteCgroupRate(inode uint64) error { 252 | direction := []uint32{egressIndex, ingressIndex} 253 | for _, cur := range direction { 254 | obj := &cgroupRateID{ 255 | Inode: inode, 256 | Direction: cur, 257 | } 258 | if err := w.obj.CgroupRateMap.Delete(obj); err != nil && !errors.Is(err, ebpf.ErrKeyNotExist) { 259 | return err 260 | } 261 | } 262 | 263 | return nil 264 | } 265 | 266 | func (w *Writer) WriteCgroupRate(r *types.CgroupRate) error { 267 | egressID := &cgroupRateID{ 268 | Inode: r.Inode, 269 | Direction: egressIndex, 270 | } 271 | ingressID := &cgroupRateID{ 272 | Inode: r.Inode, 273 | Direction: ingressIndex, 274 | } 275 | if r.RxBps == 0 { 276 | err := w.obj.CgroupRateMap.Delete(ingressID) 277 | if err != nil { 278 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 279 | return err 280 | } 281 | } else { 282 | log.Info("delete rate", "ingress", r.RxBps) 283 | } 284 | } else { 285 | prev := &rateInfo{} 286 | err := w.obj.CgroupRateMap.Lookup(ingressID, prev) 287 | if err != nil { 288 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 289 | return err 290 | } 291 | } 292 | if prev.LimitBps == r.RxBps { 293 | return nil 294 | } 295 | log.Info("update rate", "rxBps", r.RxBps) 296 | 297 | err = w.obj.CgroupRateMap.Put(ingressID, &rateInfo{ 298 | LimitBps: r.RxBps, 299 | LastTimeStamp: 0, 300 | }) 301 | if err != nil { 302 | return err 303 | } 304 | } 305 | if r.TxBps == 0 { 306 | err := w.obj.CgroupRateMap.Delete(egressID) 307 | if err != nil { 308 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 309 | return err 310 | } 311 | } else { 312 | log.Info("delete rate", "txBps", r.TxBps) 313 | } 314 | } else { 315 | prev := &rateInfo{} 316 | err := w.obj.CgroupRateMap.Lookup(egressID, prev) 317 | if err != nil { 318 | if !errors.Is(err, ebpf.ErrKeyNotExist) { 319 | return err 320 | } 321 | } 322 | if prev.LimitBps == r.TxBps { 323 | return nil 324 | } 325 | log.Info("update rate", "txBps", r.TxBps) 326 | 327 | err = w.obj.CgroupRateMap.Put(egressID, &rateInfo{ 328 | LimitBps: r.TxBps, 329 | LastTimeStamp: 0, 330 | }) 331 | if err != nil { 332 | return err 333 | } 334 | } 335 | return nil 336 | } 337 | 338 | func (w *Writer) GetNetStat() []netStat { 339 | var result []netStat 340 | ite := w.obj.TerwayNetStat.Iterate() 341 | var key uint32 342 | var stat netStat 343 | for ite.Next(&key, &stat) { 344 | result = append(result, stat) 345 | } 346 | return result 347 | } 348 | 349 | func ip2Addr(ip netip.Addr) *addr { 350 | slice := ip.As16() 351 | return &addr{ 352 | D1: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[:4])), 353 | D2: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[4:8])), 354 | D3: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[8:12])), 355 | D4: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[12:])), 356 | } 357 | } 358 | 359 | func addr2ip(addr *addr) netip.Addr { 360 | slice := make([]byte, 0, 16) 361 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D1)) 362 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D2)) 363 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D3)) 364 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D4)) 365 | ip, _ := netip.AddrFromSlice(slice) 366 | return ip 367 | } 368 | -------------------------------------------------------------------------------- /pkg/bpf/maps_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package bpf 17 | 18 | import ( 19 | "net/netip" 20 | "reflect" 21 | "testing" 22 | ) 23 | 24 | func Test_ip2Addr(t *testing.T) { 25 | type args struct { 26 | ip netip.Addr 27 | } 28 | tests := []struct { 29 | name string 30 | args args 31 | want *addr 32 | }{ 33 | { 34 | name: "", 35 | args: args{ip: netip.MustParseAddr("172.16.1.237")}, 36 | want: &addr{ 37 | D1: 0x00000000, 38 | D2: 0x00000000, 39 | D3: 0xffff0000, 40 | D4: 0xed0110ac, 41 | }, 42 | }, 43 | { 44 | name: "", 45 | // net.IP{0x24, 0x8, 0x40, 0x5, 0x3, 0x9c, 0x78, 0x1, 0x10, 0x1, 0xe5, 0xd, 0xbc, 0x3f, 0xe1, 0x16} 46 | args: args{ip: netip.MustParseAddr("2408:4005:39c:7801:1001:e50d:bc3f:e116")}, 47 | want: &addr{ 48 | D1: 0x05400824, 49 | D2: 0x01789C03, 50 | D3: 0x0de50110, 51 | D4: 0x16e13fbc, 52 | }, 53 | }, 54 | } 55 | for _, tt := range tests { 56 | t.Run(tt.name, func(t *testing.T) { 57 | if got := ip2Addr(tt.args.ip); !reflect.DeepEqual(got, tt.want) { 58 | t.Errorf("ip2Addr() = %v, want %v", got, tt.want) 59 | } 60 | }) 61 | } 62 | } 63 | 64 | func Test_NewMap(t *testing.T) { 65 | 66 | } 67 | -------------------------------------------------------------------------------- /pkg/bpf/qos_tc_bpfeb.go: -------------------------------------------------------------------------------- 1 | // Code generated by bpf2go; DO NOT EDIT. 2 | //go:build arm64be || armbe || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64 3 | 4 | package bpf 5 | 6 | import ( 7 | "bytes" 8 | _ "embed" 9 | "fmt" 10 | "io" 11 | 12 | "github.com/cilium/ebpf" 13 | ) 14 | 15 | // loadQos_tc returns the embedded CollectionSpec for qos_tc. 16 | func loadQos_tc() (*ebpf.CollectionSpec, error) { 17 | reader := bytes.NewReader(_Qos_tcBytes) 18 | spec, err := ebpf.LoadCollectionSpecFromReader(reader) 19 | if err != nil { 20 | return nil, fmt.Errorf("can't load qos_tc: %w", err) 21 | } 22 | 23 | return spec, err 24 | } 25 | 26 | // loadQos_tcObjects loads qos_tc and converts it into a struct. 27 | // 28 | // The following types are suitable as obj argument: 29 | // 30 | // *qos_tcObjects 31 | // *qos_tcPrograms 32 | // *qos_tcMaps 33 | // 34 | // See ebpf.CollectionSpec.LoadAndAssign documentation for details. 35 | func loadQos_tcObjects(obj interface{}, opts *ebpf.CollectionOptions) error { 36 | spec, err := loadQos_tc() 37 | if err != nil { 38 | return err 39 | } 40 | 41 | return spec.LoadAndAssign(obj, opts) 42 | } 43 | 44 | // qos_tcSpecs contains maps and programs before they are loaded into the kernel. 45 | // 46 | // It can be passed ebpf.CollectionSpec.Assign. 47 | type qos_tcSpecs struct { 48 | qos_tcProgramSpecs 49 | qos_tcMapSpecs 50 | } 51 | 52 | // qos_tcSpecs contains programs before they are loaded into the kernel. 53 | // 54 | // It can be passed ebpf.CollectionSpec.Assign. 55 | type qos_tcProgramSpecs struct { 56 | QosCgroup *ebpf.ProgramSpec `ebpf:"qos_cgroup"` 57 | QosGlobal *ebpf.ProgramSpec `ebpf:"qos_global"` 58 | QosProgEgress *ebpf.ProgramSpec `ebpf:"qos_prog_egress"` 59 | QosProgIngress *ebpf.ProgramSpec `ebpf:"qos_prog_ingress"` 60 | } 61 | 62 | // qos_tcMapSpecs contains maps before they are loaded into the kernel. 63 | // 64 | // It can be passed ebpf.CollectionSpec.Assign. 65 | type qos_tcMapSpecs struct { 66 | CgroupRateMap *ebpf.MapSpec `ebpf:"cgroup_rate_map"` 67 | GlobalRateMap *ebpf.MapSpec `ebpf:"global_rate_map"` 68 | PodMap *ebpf.MapSpec `ebpf:"pod_map"` 69 | QosProgMap *ebpf.MapSpec `ebpf:"qos_prog_map"` 70 | TerwayGlobalCfg *ebpf.MapSpec `ebpf:"terway_global_cfg"` 71 | TerwayNetStat *ebpf.MapSpec `ebpf:"terway_net_stat"` 72 | } 73 | 74 | // qos_tcObjects contains all objects after they have been loaded into the kernel. 75 | // 76 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign. 77 | type qos_tcObjects struct { 78 | qos_tcPrograms 79 | qos_tcMaps 80 | } 81 | 82 | func (o *qos_tcObjects) Close() error { 83 | return _Qos_tcClose( 84 | &o.qos_tcPrograms, 85 | &o.qos_tcMaps, 86 | ) 87 | } 88 | 89 | // qos_tcMaps contains all maps after they have been loaded into the kernel. 90 | // 91 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign. 92 | type qos_tcMaps struct { 93 | CgroupRateMap *ebpf.Map `ebpf:"cgroup_rate_map"` 94 | GlobalRateMap *ebpf.Map `ebpf:"global_rate_map"` 95 | PodMap *ebpf.Map `ebpf:"pod_map"` 96 | QosProgMap *ebpf.Map `ebpf:"qos_prog_map"` 97 | TerwayGlobalCfg *ebpf.Map `ebpf:"terway_global_cfg"` 98 | TerwayNetStat *ebpf.Map `ebpf:"terway_net_stat"` 99 | } 100 | 101 | func (m *qos_tcMaps) Close() error { 102 | return _Qos_tcClose( 103 | m.CgroupRateMap, 104 | m.GlobalRateMap, 105 | m.PodMap, 106 | m.QosProgMap, 107 | m.TerwayGlobalCfg, 108 | m.TerwayNetStat, 109 | ) 110 | } 111 | 112 | // qos_tcPrograms contains all programs after they have been loaded into the kernel. 113 | // 114 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign. 115 | type qos_tcPrograms struct { 116 | QosCgroup *ebpf.Program `ebpf:"qos_cgroup"` 117 | QosGlobal *ebpf.Program `ebpf:"qos_global"` 118 | QosProgEgress *ebpf.Program `ebpf:"qos_prog_egress"` 119 | QosProgIngress *ebpf.Program `ebpf:"qos_prog_ingress"` 120 | } 121 | 122 | func (p *qos_tcPrograms) Close() error { 123 | return _Qos_tcClose( 124 | p.QosCgroup, 125 | p.QosGlobal, 126 | p.QosProgEgress, 127 | p.QosProgIngress, 128 | ) 129 | } 130 | 131 | func _Qos_tcClose(closers ...io.Closer) error { 132 | for _, closer := range closers { 133 | if err := closer.Close(); err != nil { 134 | return err 135 | } 136 | } 137 | return nil 138 | } 139 | 140 | // Do not access this directly. 141 | // 142 | //go:embed qos_tc_bpfeb.o 143 | var _Qos_tcBytes []byte 144 | -------------------------------------------------------------------------------- /pkg/bpf/qos_tc_bpfeb.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AliyunContainerService/terway-qos/a625936435f7fcd8be5b25805d92c8ca80f0235a/pkg/bpf/qos_tc_bpfeb.o -------------------------------------------------------------------------------- /pkg/bpf/qos_tc_bpfel.go: -------------------------------------------------------------------------------- 1 | // Code generated by bpf2go; DO NOT EDIT. 2 | //go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mips64le || mips64p32le || mipsle || ppc64le || riscv64 3 | 4 | package bpf 5 | 6 | import ( 7 | "bytes" 8 | _ "embed" 9 | "fmt" 10 | "io" 11 | 12 | "github.com/cilium/ebpf" 13 | ) 14 | 15 | // loadQos_tc returns the embedded CollectionSpec for qos_tc. 16 | func loadQos_tc() (*ebpf.CollectionSpec, error) { 17 | reader := bytes.NewReader(_Qos_tcBytes) 18 | spec, err := ebpf.LoadCollectionSpecFromReader(reader) 19 | if err != nil { 20 | return nil, fmt.Errorf("can't load qos_tc: %w", err) 21 | } 22 | 23 | return spec, err 24 | } 25 | 26 | // loadQos_tcObjects loads qos_tc and converts it into a struct. 27 | // 28 | // The following types are suitable as obj argument: 29 | // 30 | // *qos_tcObjects 31 | // *qos_tcPrograms 32 | // *qos_tcMaps 33 | // 34 | // See ebpf.CollectionSpec.LoadAndAssign documentation for details. 35 | func loadQos_tcObjects(obj interface{}, opts *ebpf.CollectionOptions) error { 36 | spec, err := loadQos_tc() 37 | if err != nil { 38 | return err 39 | } 40 | 41 | return spec.LoadAndAssign(obj, opts) 42 | } 43 | 44 | // qos_tcSpecs contains maps and programs before they are loaded into the kernel. 45 | // 46 | // It can be passed ebpf.CollectionSpec.Assign. 47 | type qos_tcSpecs struct { 48 | qos_tcProgramSpecs 49 | qos_tcMapSpecs 50 | } 51 | 52 | // qos_tcSpecs contains programs before they are loaded into the kernel. 53 | // 54 | // It can be passed ebpf.CollectionSpec.Assign. 55 | type qos_tcProgramSpecs struct { 56 | QosCgroup *ebpf.ProgramSpec `ebpf:"qos_cgroup"` 57 | QosGlobal *ebpf.ProgramSpec `ebpf:"qos_global"` 58 | QosProgEgress *ebpf.ProgramSpec `ebpf:"qos_prog_egress"` 59 | QosProgIngress *ebpf.ProgramSpec `ebpf:"qos_prog_ingress"` 60 | } 61 | 62 | // qos_tcMapSpecs contains maps before they are loaded into the kernel. 63 | // 64 | // It can be passed ebpf.CollectionSpec.Assign. 65 | type qos_tcMapSpecs struct { 66 | CgroupRateMap *ebpf.MapSpec `ebpf:"cgroup_rate_map"` 67 | GlobalRateMap *ebpf.MapSpec `ebpf:"global_rate_map"` 68 | PodMap *ebpf.MapSpec `ebpf:"pod_map"` 69 | QosProgMap *ebpf.MapSpec `ebpf:"qos_prog_map"` 70 | TerwayGlobalCfg *ebpf.MapSpec `ebpf:"terway_global_cfg"` 71 | TerwayNetStat *ebpf.MapSpec `ebpf:"terway_net_stat"` 72 | } 73 | 74 | // qos_tcObjects contains all objects after they have been loaded into the kernel. 75 | // 76 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign. 77 | type qos_tcObjects struct { 78 | qos_tcPrograms 79 | qos_tcMaps 80 | } 81 | 82 | func (o *qos_tcObjects) Close() error { 83 | return _Qos_tcClose( 84 | &o.qos_tcPrograms, 85 | &o.qos_tcMaps, 86 | ) 87 | } 88 | 89 | // qos_tcMaps contains all maps after they have been loaded into the kernel. 90 | // 91 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign. 92 | type qos_tcMaps struct { 93 | CgroupRateMap *ebpf.Map `ebpf:"cgroup_rate_map"` 94 | GlobalRateMap *ebpf.Map `ebpf:"global_rate_map"` 95 | PodMap *ebpf.Map `ebpf:"pod_map"` 96 | QosProgMap *ebpf.Map `ebpf:"qos_prog_map"` 97 | TerwayGlobalCfg *ebpf.Map `ebpf:"terway_global_cfg"` 98 | TerwayNetStat *ebpf.Map `ebpf:"terway_net_stat"` 99 | } 100 | 101 | func (m *qos_tcMaps) Close() error { 102 | return _Qos_tcClose( 103 | m.CgroupRateMap, 104 | m.GlobalRateMap, 105 | m.PodMap, 106 | m.QosProgMap, 107 | m.TerwayGlobalCfg, 108 | m.TerwayNetStat, 109 | ) 110 | } 111 | 112 | // qos_tcPrograms contains all programs after they have been loaded into the kernel. 113 | // 114 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign. 115 | type qos_tcPrograms struct { 116 | QosCgroup *ebpf.Program `ebpf:"qos_cgroup"` 117 | QosGlobal *ebpf.Program `ebpf:"qos_global"` 118 | QosProgEgress *ebpf.Program `ebpf:"qos_prog_egress"` 119 | QosProgIngress *ebpf.Program `ebpf:"qos_prog_ingress"` 120 | } 121 | 122 | func (p *qos_tcPrograms) Close() error { 123 | return _Qos_tcClose( 124 | p.QosCgroup, 125 | p.QosGlobal, 126 | p.QosProgEgress, 127 | p.QosProgIngress, 128 | ) 129 | } 130 | 131 | func _Qos_tcClose(closers ...io.Closer) error { 132 | for _, closer := range closers { 133 | if err := closer.Close(); err != nil { 134 | return err 135 | } 136 | } 137 | return nil 138 | } 139 | 140 | // Do not access this directly. 141 | // 142 | //go:embed qos_tc_bpfel.o 143 | var _Qos_tcBytes []byte 144 | -------------------------------------------------------------------------------- /pkg/bpf/qos_tc_bpfel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AliyunContainerService/terway-qos/a625936435f7fcd8be5b25805d92c8ca80f0235a/pkg/bpf/qos_tc_bpfel.o -------------------------------------------------------------------------------- /pkg/bpf/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package bpf 17 | 18 | import ( 19 | "net/netip" 20 | 21 | "github.com/AliyunContainerService/terway-qos/pkg/types" 22 | ) 23 | 24 | type Interface interface { 25 | // WriteGlobalConfig write global limit 26 | WriteGlobalConfig(ingress *types.GlobalConfig, egress *types.GlobalConfig) error 27 | // WritePodInfo write class_id or rate limit for each pod 28 | WritePodInfo(config *types.PodConfig) error 29 | DeletePodInfo(config *types.PodConfig) error 30 | 31 | ListPodInfo() map[netip.Addr]cgroupInfo 32 | GetGlobalRateLimit() (*globalRateInfo, *globalRateInfo) 33 | 34 | ListCgroupRate() map[cgroupRateID]rateInfo 35 | WriteCgroupRate(config *types.CgroupRate) error 36 | DeleteCgroupRate(inode uint64) error 37 | } 38 | 39 | // rate for current rate and limit 40 | type rateInfo struct { 41 | LimitBps uint64 `ebpf:"bps"` 42 | LastTimeStamp uint64 `ebpf:"t_last"` 43 | Slot uint64 `ebpf:"slot3"` 44 | } 45 | 46 | // addr for both ipv4 and ipv6 47 | type addr struct { 48 | D1 uint32 `ebpf:"d1"` 49 | D2 uint32 `ebpf:"d2"` 50 | D3 uint32 `ebpf:"d3"` 51 | D4 uint32 `ebpf:"d4"` 52 | } 53 | 54 | // cgroupRateID 55 | // store rx and tx in single map 56 | type cgroupRateID struct { 57 | Inode uint64 `ebpf:"inode"` 58 | Direction uint32 `ebpf:"direction"` 59 | Pad uint32 `ebpf:"pad"` 60 | } 61 | 62 | type cgroupInfo struct { 63 | ClassID uint32 `ebpf:"class_id"` 64 | Pad1 uint32 `ebpf:"pad1"` 65 | Inode uint64 `ebpf:"inode"` 66 | } 67 | 68 | type globalRateCfg struct { 69 | Interval uint64 `ebpf:"interval"` 70 | HwGuaranteed uint64 `ebpf:"hw_min_bps"` 71 | HwBurstable uint64 `ebpf:"hw_max_bps"` 72 | 73 | L0MinBps uint64 `ebpf:"l0_min_bps"` 74 | L0MaxBps uint64 `ebpf:"l0_max_bps"` 75 | L1MinBps uint64 `ebpf:"l1_min_bps"` 76 | L1MaxBps uint64 `ebpf:"l1_max_bps"` 77 | L2MinBps uint64 `ebpf:"l2_min_bps"` 78 | L2MaxBps uint64 `ebpf:"l2_max_bps"` 79 | } 80 | 81 | type globalRateInfo struct { 82 | LastTimestamp uint64 `ebpf:"t_last"` 83 | 84 | L0LastTimestamp uint64 `ebpf:"t_l0_last"` 85 | L0Bps uint64 `ebpf:"l0_bps"` 86 | L0Slot uint64 `ebpf:"l0_slot"` 87 | 88 | L1LastTimestamp uint64 `ebpf:"t_l1_last"` 89 | L1Bps uint64 `ebpf:"l1_bps"` 90 | L1Slot uint64 `ebpf:"l1_slot"` 91 | 92 | L2LastTimestamp uint64 `ebpf:"t_l2_last"` 93 | L2Bps uint64 `ebpf:"l2_bps"` 94 | L2Slot uint64 `ebpf:"l2_slot"` 95 | } 96 | 97 | type netStat struct { 98 | Index uint64 `ebpf:"index"` 99 | TS uint64 `ebpf:"ts"` 100 | Val uint64 `ebpf:"val"` 101 | } 102 | -------------------------------------------------------------------------------- /pkg/byteorder/byteorder.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | // Copyright Authors of Cilium 3 | 4 | package byteorder 5 | 6 | import ( 7 | "net" 8 | ) 9 | 10 | // NetIPv4ToHost32 converts an net.IP to a uint32 in host byte order. ip 11 | // must be a IPv4 address, otherwise the function will panic. 12 | func NetIPv4ToHost32(ip net.IP) uint32 { 13 | ipv4 := ip.To4() 14 | _ = ipv4[3] // Assert length of ipv4. 15 | return Native.Uint32(ipv4) 16 | } 17 | -------------------------------------------------------------------------------- /pkg/byteorder/byteorder_bigendian.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | // Copyright Authors of Cilium 3 | 4 | //go:build armbe || arm64be || mips || mips64 || ppc64 5 | 6 | package byteorder 7 | 8 | import "encoding/binary" 9 | 10 | var Native binary.ByteOrder = binary.BigEndian 11 | 12 | func HostToNetwork16(u uint16) uint16 { return u } 13 | func HostToNetwork32(u uint32) uint32 { return u } 14 | func NetworkToHost16(u uint16) uint16 { return u } 15 | func NetworkToHost32(u uint32) uint32 { return u } 16 | -------------------------------------------------------------------------------- /pkg/byteorder/byteorder_littleendian.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | // Copyright Authors of Cilium 3 | 4 | //go:build 386 || amd64 || arm || arm64 || mips64le || ppc64le || riscv64 || wasm 5 | 6 | package byteorder 7 | 8 | import ( 9 | "encoding/binary" 10 | "math/bits" 11 | ) 12 | 13 | var Native binary.ByteOrder = binary.LittleEndian 14 | 15 | func HostToNetwork16(u uint16) uint16 { return bits.ReverseBytes16(u) } 16 | func HostToNetwork32(u uint32) uint32 { return bits.ReverseBytes32(u) } 17 | func NetworkToHost16(u uint16) uint16 { return bits.ReverseBytes16(u) } 18 | func NetworkToHost32(u uint32) uint32 { return bits.ReverseBytes32(u) } 19 | -------------------------------------------------------------------------------- /pkg/byteorder/byteorder_test.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: Apache-2.0 2 | // Copyright Authors of Cilium 3 | 4 | //go:build !privileged_tests 5 | 6 | package byteorder 7 | 8 | import ( 9 | "encoding/binary" 10 | "net" 11 | "testing" 12 | 13 | . "gopkg.in/check.v1" 14 | ) 15 | 16 | // Hook up gocheck into the "go test" runner. 17 | func Test(t *testing.T) { 18 | TestingT(t) 19 | } 20 | 21 | type ByteorderSuite struct{} 22 | 23 | var _ = Suite(&ByteorderSuite{}) 24 | 25 | func (b *ByteorderSuite) TestNativeIsInitialized(c *C) { 26 | c.Assert(Native, NotNil) 27 | } 28 | 29 | func (b *ByteorderSuite) TestHostToNetwork(c *C) { 30 | switch Native { 31 | case binary.LittleEndian: 32 | c.Assert(HostToNetwork16(0xAABB), Equals, uint16(0xBBAA)) 33 | c.Assert(HostToNetwork32(0xAABBCCDD), Equals, uint32(0xDDCCBBAA)) 34 | case binary.BigEndian: 35 | c.Assert(HostToNetwork16(0xAABB), Equals, uint16(0xAABB)) 36 | c.Assert(HostToNetwork32(0xAABBCCDD), Equals, uint32(0xAABBCCDD)) 37 | } 38 | } 39 | 40 | func (b *ByteorderSuite) TestNetIPv4ToHost32(c *C) { 41 | switch Native { 42 | case binary.LittleEndian: 43 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.129.91")), Equals, uint32(0x5b810b0a)) 44 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.138.214")), Equals, uint32(0xd68a0b0a)) 45 | case binary.BigEndian: 46 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.129.91")), Equals, uint32(0x0a0b815b)) 47 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.138.214")), Equals, uint32(0x0a0b8ad6)) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /pkg/config/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package config 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | "path/filepath" 22 | "regexp" 23 | "strconv" 24 | "strings" 25 | "syscall" 26 | "time" 27 | 28 | "github.com/AliyunContainerService/terway-qos/pkg/types" 29 | 30 | "k8s.io/apimachinery/pkg/util/cache" 31 | ctrl "sigs.k8s.io/controller-runtime" 32 | ) 33 | 34 | const ( 35 | defaultTTL = 10 * time.Minute 36 | maxPodPerNode = 1024 37 | systemdCLSCgroupRootPath = "/sys/fs/cgroup/net_cls/kubepods.slice" 38 | defaultCLScgroupRootPath = "/sys/fs/cgroup/net_cls/kubepods" 39 | ) 40 | 41 | var ( 42 | log = ctrl.Log.WithName("config") 43 | systemdwalkPath = []string{"kubepods-burstable.slice", "kubepods-besteffort.slice", "kubepods-guaranteed.slice", ""} 44 | defaultwalkPath = []string{"burstable", "besteffort", "guaranteed", ""} 45 | podUIDRe = regexp.MustCompile("[0-9a-fA-F]{8}([-,_][0-9a-fA-F]{4}){3}[-,_][0-9a-fA-F]{12}") 46 | cgroupPathRe = regexp.MustCompile(`^\S+`) 47 | ) 48 | 49 | type Interface interface { 50 | GetCgroupByPodUID(string) (*types.CgroupInfo, error) 51 | SetCgroupClassID(prio uint32, path string) error 52 | } 53 | 54 | type Cgroup struct { 55 | cgroupPath string 56 | workPath []string 57 | 58 | cache *cache.LRUExpireCache 59 | } 60 | 61 | func NewCgroup() *Cgroup { 62 | fileExists := func(path string) bool { 63 | _, err := os.Stat(path) 64 | return !os.IsNotExist(err) 65 | } 66 | 67 | cg := Cgroup{ 68 | cache: cache.NewLRUExpireCache(maxPodPerNode), 69 | cgroupPath: defaultCLScgroupRootPath, 70 | workPath: defaultwalkPath, 71 | } 72 | if fileExists(systemdCLSCgroupRootPath) { 73 | cg.cgroupPath = systemdCLSCgroupRootPath 74 | cg.workPath = systemdwalkPath 75 | } 76 | 77 | return &cg 78 | } 79 | 80 | func (f *Cgroup) GetCgroupByPodUID(id string) (*types.CgroupInfo, error) { 81 | v, ok := f.cache.Get(id) 82 | if !ok { 83 | // update all cache 84 | result := f.getCgroupPath() 85 | for uid, info := range result { 86 | f.cache.Add(uid, info, defaultTTL) 87 | } 88 | v, ok = f.cache.Get(id) 89 | if !ok { 90 | return nil, fmt.Errorf("not found") 91 | } 92 | } 93 | 94 | info := v.(types.CgroupInfo) 95 | return &info, nil 96 | } 97 | 98 | func (f *Cgroup) SetCgroupClassID(prio uint32, path string) error { 99 | return os.WriteFile(filepath.Join(path, "net_cls.classid"), []byte(strconv.Itoa(int(prio))), 0644) 100 | } 101 | 102 | func GetGlobalConfig(path string) (*types.GlobalConfig, *types.GlobalConfig, error) { 103 | c, err := os.ReadFile(path) 104 | if err != nil { 105 | return nil, nil, err 106 | } 107 | 108 | ingress := &types.GlobalConfig{} 109 | egress := &types.GlobalConfig{} 110 | 111 | egress.HwGuaranteed = parseConfig("hw_tx_bps_max", string(c)) 112 | 113 | egress.L0MinBps = parseConfig("online_tx_bps_min", string(c)) 114 | egress.L0MaxBps = parseConfig("online_tx_bps_max", string(c)) 115 | 116 | egress.L1MinBps = parseConfig("offline_l1_tx_bps_min", string(c)) 117 | egress.L1MaxBps = parseConfig("offline_l1_tx_bps_max", string(c)) 118 | egress.L2MinBps = parseConfig("offline_l2_tx_bps_min", string(c)) 119 | egress.L2MaxBps = parseConfig("offline_l2_tx_bps_max", string(c)) 120 | 121 | ingress.HwGuaranteed = parseConfig("hw_rx_bps_max", string(c)) 122 | 123 | ingress.L0MinBps = parseConfig("online_rx_bps_min", string(c)) 124 | ingress.L0MaxBps = parseConfig("online_rx_bps_max", string(c)) 125 | 126 | ingress.L1MinBps = parseConfig("offline_l1_rx_bps_min", string(c)) 127 | ingress.L1MaxBps = parseConfig("offline_l1_rx_bps_max", string(c)) 128 | ingress.L2MinBps = parseConfig("offline_l2_rx_bps_min", string(c)) 129 | ingress.L2MaxBps = parseConfig("offline_l2_rx_bps_max", string(c)) 130 | 131 | return ingress, egress, nil 132 | } 133 | 134 | func parseConfig(key string, content string) uint64 { 135 | re, err := regexp.Compile(fmt.Sprintf("%s(?:=?|\\s+)(\\d+)", key)) 136 | if err != nil { 137 | return 0 138 | } 139 | group := re.FindStringSubmatch(content) 140 | if len(group) != 2 { 141 | return 0 142 | } 143 | result, _ := strconv.ParseUint(group[1], 10, 64) 144 | return result 145 | } 146 | 147 | func (f *Cgroup) getCgroupPath() map[string]types.CgroupInfo { 148 | result := map[string]types.CgroupInfo{} 149 | 150 | for _, p := range f.workPath { 151 | path := filepath.Join(f.cgroupPath, p) 152 | entries, err := os.ReadDir(path) 153 | if os.IsNotExist(err) { 154 | continue 155 | } 156 | for _, entry := range entries { 157 | if !entry.IsDir() { 158 | continue 159 | } 160 | 161 | uid := podUIDRe.FindString(entry.Name()) 162 | if uid == "" { 163 | continue 164 | } 165 | info, err := readCgroupInfo(filepath.Join(path, entry.Name())) 166 | if err != nil { 167 | log.Error(err, "error read cgroup info") 168 | } else { 169 | result[strings.ReplaceAll(uid, "_", "-")] = info 170 | } 171 | } 172 | } 173 | return result 174 | } 175 | 176 | func readCgroupInfo(path string) (types.CgroupInfo, error) { 177 | var stat syscall.Stat_t 178 | err := syscall.Stat(path, &stat) 179 | if err != nil { 180 | return types.CgroupInfo{}, err 181 | } 182 | // cgroupv1 183 | classIDBytes, err := os.ReadFile(filepath.Join(path, "net_cls.classid")) 184 | if err != nil { 185 | return types.CgroupInfo{}, fmt.Errorf("error read cgroup id, %w", err) 186 | } 187 | 188 | classID, err := strconv.ParseUint(strings.TrimSpace(string(classIDBytes)), 10, 32) 189 | if err != nil { 190 | return types.CgroupInfo{}, fmt.Errorf("failed parse %s,%w", classIDBytes, err) 191 | } 192 | 193 | return types.CgroupInfo{ 194 | Path: path, 195 | ClassID: uint32(classID), 196 | Inode: stat.Ino, 197 | }, nil 198 | } 199 | -------------------------------------------------------------------------------- /pkg/config/config_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package config 17 | 18 | import ( 19 | "testing" 20 | ) 21 | 22 | func Test_parseConfig(t *testing.T) { 23 | contents := `hw_tx_bps_max 100 24 | hw_rx_bps_max 100 25 | offline_l1_tx_bps_min 10 26 | offline_l1_tx_bps_max 20 27 | offline_l2_tx_bps_min 10 28 | offline_l2_tx_bps_max 30 29 | offline_l1_rx_bps_min 10 30 | offline_l1_rx_bps_max 20 31 | offline_l2_rx_bps_min 10 32 | offline_l2_rx_bps_max 30` 33 | 34 | tests := []struct { 35 | key string 36 | want uint64 37 | }{ 38 | { 39 | key: "hw_tx_bps_max", 40 | want: 100, 41 | }, { 42 | key: "hw_rx_bps_max", 43 | want: 100, 44 | }, { 45 | key: "offline_l1_tx_bps_min", 46 | want: 10, 47 | }, { 48 | key: "offline_l1_tx_bps_max", 49 | want: 20, 50 | }, { 51 | key: "offline_l2_tx_bps_min", 52 | want: 10, 53 | }, { 54 | key: "offline_l2_tx_bps_max", 55 | want: 30, 56 | }, 57 | } 58 | for _, tt := range tests { 59 | t.Run(tt.key, func(t *testing.T) { 60 | if got := parseConfig(tt.key, contents); got != tt.want { 61 | t.Errorf("parseConfig() = %v, want %v", got, tt.want) 62 | } 63 | }) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /pkg/config/record.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package config 17 | 18 | import ( 19 | "fmt" 20 | 21 | "github.com/AliyunContainerService/terway-qos/pkg/types" 22 | 23 | "k8s.io/client-go/tools/cache" 24 | ) 25 | 26 | type PodCache struct { 27 | cache.Indexer 28 | } 29 | 30 | const ( 31 | indexPodID = "podID" 32 | indexPodUID = "podUID" 33 | indexCgroupPath = "cgroupPath" 34 | ) 35 | 36 | func NewPodCache() *PodCache { 37 | return &PodCache{ 38 | Indexer: cache.NewIndexer(func(obj interface{}) (string, error) { 39 | r, ok := obj.(*types.PodConfig) 40 | if !ok { 41 | return "", fmt.Errorf("not type *Record") 42 | } 43 | return r.PodID, nil 44 | }, cache.Indexers{ 45 | indexPodID: func(obj interface{}) ([]string, error) { 46 | r, ok := obj.(*types.PodConfig) 47 | if !ok { 48 | return nil, fmt.Errorf("not type *Record") 49 | } 50 | return []string{r.PodID}, nil 51 | }, 52 | indexPodUID: func(obj interface{}) ([]string, error) { 53 | r, ok := obj.(*types.PodConfig) 54 | if !ok { 55 | return nil, fmt.Errorf("not type *Record") 56 | } 57 | return []string{r.PodUID}, nil 58 | }, 59 | indexCgroupPath: func(obj interface{}) ([]string, error) { 60 | r, ok := obj.(*types.PodConfig) 61 | if !ok { 62 | return nil, fmt.Errorf("not type *Record") 63 | } 64 | return []string{r.CgroupInfo.Path}, nil 65 | }, 66 | }), 67 | } 68 | } 69 | 70 | func (r *PodCache) ByPodID(id string) *types.PodConfig { 71 | objs, err := r.ByIndex(indexPodID, id) 72 | if err != nil { 73 | panic(err) 74 | } 75 | if len(objs) == 0 { 76 | return nil 77 | } 78 | return objs[0].(*types.PodConfig) 79 | } 80 | 81 | func (r *PodCache) ByPodUID(id string) *types.PodConfig { 82 | objs, err := r.ByIndex(indexPodUID, id) 83 | if err != nil { 84 | panic(err) 85 | } 86 | if len(objs) == 0 { 87 | return nil 88 | } 89 | return objs[0].(*types.PodConfig) 90 | } 91 | 92 | func (r *PodCache) ByCgroupPath(id string) *types.PodConfig { 93 | objs, err := r.ByIndex(indexCgroupPath, id) 94 | if err != nil { 95 | panic(err) 96 | } 97 | if len(objs) == 0 { 98 | return nil 99 | } 100 | return objs[0].(*types.PodConfig) 101 | } 102 | 103 | func (r *PodCache) AddIfNotPresent(config *types.PodConfig) error { 104 | _, ok, err := r.Indexer.Get(config) 105 | if err != nil { 106 | return err 107 | } 108 | if ok { 109 | return nil 110 | } 111 | return r.Indexer.Add(config) 112 | } 113 | 114 | func (r *PodCache) Del(config *types.PodConfig) error { 115 | return r.Indexer.Delete(config) 116 | } 117 | 118 | func (r *PodCache) DelByPodID(id string) error { 119 | return r.Indexer.Delete(&types.PodConfig{PodID: id}) 120 | } 121 | -------------------------------------------------------------------------------- /pkg/config/syncer.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package config 17 | 18 | import ( 19 | "context" 20 | "encoding/json" 21 | "fmt" 22 | "os" 23 | "path/filepath" 24 | "strconv" 25 | "strings" 26 | "sync" 27 | "time" 28 | 29 | "github.com/fsnotify/fsnotify" 30 | "k8s.io/apimachinery/pkg/util/sets" 31 | 32 | "github.com/AliyunContainerService/terway-qos/pkg/bpf" 33 | "github.com/AliyunContainerService/terway-qos/pkg/types" 34 | ) 35 | 36 | var _ types.SyncPod = &Syncer{} 37 | 38 | const ( 39 | rootFileConfig = "/var/lib/terway/qos" 40 | perCgroupConfig = "per_cgroup_bps_limit" 41 | globalConfig = "global_bps_config" 42 | podConfig = "pod.json" 43 | ) 44 | 45 | type Syncer struct { 46 | globalPath string 47 | perCgroupPath string 48 | podConfigPath string 49 | 50 | bpf bpf.Interface 51 | cgroup Interface 52 | 53 | podCache *PodCache 54 | 55 | lock sync.Mutex 56 | } 57 | 58 | func NewSyncer(bpfWriter bpf.Interface) *Syncer { 59 | return &Syncer{ 60 | globalPath: filepath.Join(rootFileConfig, globalConfig), 61 | perCgroupPath: filepath.Join(rootFileConfig, perCgroupConfig), 62 | podConfigPath: filepath.Join(rootFileConfig, podConfig), 63 | 64 | bpf: bpfWriter, 65 | cgroup: NewCgroup(), 66 | 67 | podCache: NewPodCache(), 68 | } 69 | } 70 | 71 | func (s *Syncer) Start(ctx context.Context) error { 72 | err := os.MkdirAll(rootFileConfig, os.ModeDir) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | watcher, err := fsnotify.NewWatcher() 78 | if err != nil { 79 | return err 80 | } 81 | log.Info("watching config change", "path", rootFileConfig) 82 | err = watcher.Add(rootFileConfig) 83 | if err != nil { 84 | return err 85 | } 86 | 87 | go func() { 88 | tick := time.NewTicker(5 * time.Second) 89 | 90 | defer watcher.Close() 91 | for { 92 | select { 93 | case <-ctx.Done(): 94 | tick.Stop() 95 | return 96 | case event, ok := <-watcher.Events: 97 | if !ok { 98 | return 99 | } 100 | switch event.Name { 101 | case rootFileConfig: 102 | if event.Has(fsnotify.Remove | fsnotify.Rename) { 103 | log.Info("config file gone, will restart", "event", event.String()) 104 | os.Exit(99) 105 | } 106 | case s.globalPath: 107 | log.Info("cfg change", "event", event.String()) 108 | 109 | err = s.syncGlobalConfig() 110 | case s.perCgroupPath: 111 | log.Info("cfg change", "event", event.String()) 112 | 113 | err = s.syncCgroupRate() 114 | case s.podConfigPath: 115 | log.Info("cfg change", "event", event.String()) 116 | 117 | err = s.syncPodConfig() 118 | default: 119 | continue 120 | } 121 | if err != nil { 122 | log.Error(err, "error sync config") 123 | } 124 | 125 | case err, ok := <-watcher.Errors: 126 | if !ok { 127 | return 128 | } 129 | log.Error(err, "file watch err") 130 | case <-tick.C: 131 | err = s.syncGlobalConfig() 132 | if err != nil { 133 | log.Error(err, "error sync config") 134 | } 135 | err = s.syncCgroupRate() 136 | if err != nil { 137 | log.Error(err, "error sync config") 138 | } 139 | err = s.syncPodConfig() 140 | if err != nil { 141 | log.Error(err, "error sync config") 142 | } 143 | } 144 | } 145 | }() 146 | 147 | return nil 148 | } 149 | 150 | func (s *Syncer) DeletePod(id string) error { 151 | s.lock.Lock() 152 | defer s.lock.Unlock() 153 | 154 | podConfig := s.podCache.ByPodID(id) 155 | if podConfig == nil { 156 | return nil 157 | } 158 | 159 | if err := s.podCache.DelByPodID(id); err != nil { 160 | return err 161 | } 162 | 163 | return s.bpf.DeletePodInfo(podConfig) 164 | } 165 | 166 | func (s *Syncer) UpdatePod(config *types.PodConfig) error { 167 | s.lock.Lock() 168 | defer s.lock.Unlock() 169 | 170 | prio := config.Prio 171 | 172 | v, ok, err := s.podCache.Get(config) 173 | if err != nil { 174 | return err 175 | } 176 | if ok { 177 | log.Info("update pod", "pod", config.PodID) 178 | 179 | prev := v.(*types.PodConfig) 180 | 181 | // keep previous cgroup info 182 | // take only single source 183 | config.CgroupInfo = prev.CgroupInfo 184 | 185 | // annotation has higher priority 186 | if config.TxBps != nil { 187 | config.TxBps = prev.TxBps 188 | } 189 | if config.RxBps != nil { 190 | config.RxBps = prev.RxBps 191 | } 192 | } else { 193 | // new pod 194 | log.Info("add new pod", "pod", config.PodID) 195 | cg, err := s.cgroup.GetCgroupByPodUID(config.PodUID) 196 | if err != nil { 197 | return err 198 | } 199 | config.CgroupInfo = cg 200 | } 201 | 202 | if prio != nil && *prio <= 2 { 203 | config.CgroupInfo.ClassID = *prio 204 | } 205 | 206 | err = s.podCache.Update(config) 207 | if err != nil { 208 | return err 209 | } 210 | 211 | if config.HostNetwork && config.Prio != nil { 212 | err = s.cgroup.SetCgroupClassID(*config.Prio, config.CgroupInfo.Path) 213 | if err != nil { 214 | return err 215 | } 216 | } 217 | 218 | return s.bpf.WritePodInfo(config) 219 | } 220 | 221 | func (s *Syncer) syncGlobalConfig() error { 222 | ingress, egress, err := GetGlobalConfig(s.globalPath) 223 | if err != nil { 224 | if os.IsNotExist(err) { 225 | return nil 226 | } 227 | return err 228 | } 229 | 230 | return s.bpf.WriteGlobalConfig(ingress, egress) 231 | } 232 | 233 | func (s *Syncer) syncCgroupRate() error { 234 | pods, err := s.parsePerCgroupConfig() 235 | if err != nil { 236 | return err 237 | } 238 | return s.podChanged(pods) 239 | } 240 | 241 | func (s *Syncer) syncPodConfig() error { 242 | pods, err := s.parsePodConfig() 243 | if err != nil { 244 | return err 245 | } 246 | return s.podChanged(pods) 247 | } 248 | 249 | func (s *Syncer) podChanged(pods []Pod) error { 250 | s.lock.Lock() 251 | defer s.lock.Unlock() 252 | 253 | current := sets.New[uint64]() 254 | 255 | for _, pod := range pods { 256 | info, err := readCgroupInfo(pod.CgroupDir) 257 | if err != nil { 258 | log.Error(err, "error get cgroup info", "path", pod.CgroupDir) 259 | continue 260 | } 261 | 262 | config := s.podCache.ByCgroupPath(info.Path) 263 | if config == nil { 264 | log.Info("ignore pod, cgroup not found", "cgroup", info.Path) 265 | continue 266 | } 267 | 268 | if pod.Prio >= 0 && pod.Prio <= 2 { 269 | prio := uint32(pod.Prio) 270 | config.Prio = &prio 271 | config.CgroupInfo.ClassID = prio 272 | } 273 | config.RxBps = &pod.QoSConfig.IngressBandwidth 274 | config.TxBps = &pod.QoSConfig.EgressBandwidth 275 | 276 | current.Insert(info.Inode) 277 | err = s.podChangeLocked(config) 278 | if err != nil { 279 | return err 280 | } 281 | } 282 | 283 | // clean up old cgroup rate 284 | cgroups := s.bpf.ListCgroupRate() 285 | olds := sets.New[uint64]() 286 | for key := range cgroups { 287 | olds.Insert(key.Inode) 288 | } 289 | for id := range olds.Difference(current) { 290 | err := s.bpf.DeleteCgroupRate(id) 291 | if err != nil { 292 | log.Error(err, "delete cgruop rate failed", "id", strconv.Itoa(int(id))) 293 | } 294 | } 295 | return nil 296 | } 297 | 298 | func (s *Syncer) parsePodConfig() ([]Pod, error) { 299 | content, err := os.ReadFile(s.podConfigPath) 300 | if err != nil { 301 | if os.IsNotExist(err) { 302 | return nil, nil 303 | } 304 | return nil, err 305 | } 306 | configs := make(map[string]*Pod) 307 | err = json.Unmarshal(content, &configs) 308 | if err != nil { 309 | return nil, err 310 | } 311 | 312 | var pods []Pod 313 | for _, pod := range configs { 314 | pods = append(pods, *pod) 315 | } 316 | 317 | return pods, nil 318 | } 319 | 320 | func (s *Syncer) parsePerCgroupConfig() ([]Pod, error) { 321 | content, err := os.ReadFile(s.perCgroupPath) 322 | if err != nil { 323 | if os.IsNotExist(err) { 324 | return nil, nil 325 | } 326 | return nil, err 327 | } 328 | configs := make([]Pod, 0) 329 | 330 | lines := strings.Split(string(content), "\n") 331 | if err != nil { 332 | return nil, err 333 | } 334 | for _, line := range lines { 335 | if len(line) == 0 { 336 | continue 337 | } 338 | cgroupPath := cgroupPathRe.FindString(line) 339 | rx := parseConfig("rx_bps", line) 340 | tx := parseConfig("tx_bps", line) 341 | 342 | configs = append(configs, Pod{ 343 | PodName: "", 344 | PodNamespace: "", 345 | PodUID: "", 346 | Prio: -1, 347 | CgroupDir: cgroupPath, 348 | QoSConfig: QoSConfig{ 349 | IngressBandwidth: rx, 350 | EgressBandwidth: tx, 351 | }, 352 | }) 353 | } 354 | return configs, nil 355 | } 356 | 357 | func (s *Syncer) podChangeLocked(config *types.PodConfig) error { 358 | log.Info("update pod", "pod", config.PodID, "detail", fmt.Sprintf("%+v", config), "prio", *config.Prio) 359 | err := s.podCache.Update(config) 360 | if err != nil { 361 | return err 362 | } 363 | 364 | if config.HostNetwork && config.Prio != nil { 365 | err = s.cgroup.SetCgroupClassID(*config.Prio, config.CgroupInfo.Path) 366 | if err != nil { 367 | return err 368 | } 369 | } 370 | 371 | return s.bpf.WritePodInfo(config) 372 | } 373 | -------------------------------------------------------------------------------- /pkg/config/types.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type Node struct { 4 | HwTxBpsMax uint64 `json:"hw_tx_bps_max" yaml:"hw_tx_bps_max"` 5 | HwRxBpsMax uint64 `json:"hw_rx_bps_max" yaml:"hw_rx_bps_max"` 6 | L0TxBpsMin uint64 `json:"l0_tx_bps_min" yaml:"l0_tx_bps_min"` 7 | L0TxBpsMax uint64 `json:"l0_tx_bps_max" yaml:"l0_tx_bps_max"` 8 | L0RxBpsMin uint64 `json:"l0_rx_bps_min" yaml:"l0_rx_bps_min"` 9 | L0RxBpsMax uint64 `json:"l0_rx_bps_max" yaml:"l0_rx_bps_max"` 10 | L1TxBpsMin uint64 `json:"l1_tx_bps_min" yaml:"l1_tx_bps_min"` 11 | L1TxBpsMax uint64 `json:"l1_tx_bps_max" yaml:"l1_tx_bps_max"` 12 | L1RxBpsMin uint64 `json:"l1_rx_bps_min" yaml:"l1_rx_bps_min"` 13 | L1RxBpsMax uint64 `json:"l1_rx_bps_max" yaml:"l1_rx_bps_max"` 14 | L2TxBpsMin uint64 `json:"l2_tx_bps_min" yaml:"l2_tx_bps_min"` 15 | L2TxBpsMax uint64 `json:"l2_tx_bps_max" yaml:"l2_tx_bps_max"` 16 | L2RxBpsMin uint64 `json:"l2_rx_bps_min" yaml:"l2_rx_bps_min"` 17 | L2RxBpsMax uint64 `json:"l2_rx_bps_max" yaml:"l2_rx_bps_max"` 18 | } 19 | 20 | type Pod struct { 21 | PodName string `json:"podName"` 22 | PodNamespace string `json:"podNamespace"` 23 | PodUID string `json:"podUID"` 24 | Prio int `json:"prio"` 25 | CgroupDir string `json:"cgroupDir"` 26 | QoSConfig QoSConfig `json:"qosConfig"` 27 | } 28 | 29 | type QoSConfig struct { 30 | IngressBandwidth uint64 `json:"ingressBandwidth"` 31 | EgressBandwidth uint64 `json:"egressBandwidth"` 32 | } 33 | -------------------------------------------------------------------------------- /pkg/k8s/pods.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package k8s 17 | 18 | import ( 19 | "context" 20 | "fmt" 21 | "net/netip" 22 | "os" 23 | "time" 24 | 25 | "github.com/AliyunContainerService/terway-qos/pkg/bandwidth" 26 | "github.com/AliyunContainerService/terway-qos/pkg/types" 27 | 28 | corev1 "k8s.io/api/core/v1" 29 | "k8s.io/apimachinery/pkg/api/errors" 30 | "k8s.io/apimachinery/pkg/fields" 31 | "k8s.io/apimachinery/pkg/runtime" 32 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 33 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 34 | "k8s.io/klog/v2" 35 | ctrl "sigs.k8s.io/controller-runtime" 36 | "sigs.k8s.io/controller-runtime/pkg/builder" 37 | "sigs.k8s.io/controller-runtime/pkg/cache" 38 | "sigs.k8s.io/controller-runtime/pkg/client" 39 | "sigs.k8s.io/controller-runtime/pkg/client/config" 40 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 41 | ) 42 | 43 | type Interface interface { 44 | PodByUID() *corev1.Pod 45 | } 46 | 47 | var scheme = runtime.NewScheme() 48 | 49 | func init() { 50 | utilruntime.Must(clientgoscheme.AddToScheme(scheme)) 51 | } 52 | 53 | func StartPodHandler(ctx context.Context, syncer types.SyncPod) error { 54 | options := ctrl.Options{ 55 | Scheme: scheme, 56 | } 57 | 58 | options.NewCache = cache.BuilderWithOptions(cache.Options{ 59 | SelectorsByObject: cache.SelectorsByObject{ 60 | &corev1.Pod{}: { 61 | Field: fields.SelectorFromSet(fields.Set{"spec.nodeName": os.Getenv("K8S_NODE_NAME")}), 62 | }, 63 | }}, 64 | ) 65 | mgr, err := ctrl.NewManager(config.GetConfigOrDie(), options) 66 | if err != nil { 67 | return err 68 | } 69 | 70 | err = ctrl.NewControllerManagedBy(mgr). 71 | For(&corev1.Pod{}, builder.WithPredicates(&predicateForPod{})). 72 | Complete(&reconcilePod{ 73 | client: mgr.GetClient(), 74 | syncer: syncer, 75 | }) 76 | if err != nil { 77 | return err 78 | } 79 | return mgr.Start(ctx) 80 | } 81 | 82 | // reconcilePod reconciles ReplicaSets 83 | type reconcilePod struct { 84 | // client can be used to retrieve objects from the APIServer. 85 | client client.Client 86 | 87 | syncer types.SyncPod 88 | } 89 | 90 | // Implement reconcile.Reconciler so the controller can reconcile objects 91 | var _ reconcile.Reconciler = &reconcilePod{} 92 | 93 | func (r *reconcilePod) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { 94 | pod := corev1.Pod{} 95 | err := r.client.Get(ctx, client.ObjectKey{ 96 | Namespace: request.Namespace, 97 | Name: request.Name, 98 | }, &pod) 99 | if err != nil { 100 | if errors.IsNotFound(err) { 101 | klog.Infof("pod %s/%s has been deleted", request.Namespace, request.Name) 102 | return reconcile.Result{}, r.syncer.DeletePod(request.String()) 103 | } 104 | return reconcile.Result{}, err 105 | } 106 | 107 | if !pod.DeletionTimestamp.IsZero() { 108 | t := time.Since(pod.DeletionTimestamp.Time) 109 | if t < 0 { 110 | // Reconciliation is level-based, meaning action isn't driven off changes in 111 | // individual Events. Requeue the result at least once to make sure the bpf map 112 | // will be deleted in time. Because the pod object may exist but its ip address 113 | // has been allocated to another pod. e.g. pod deletion is blocked by a 114 | // time-consuming finalizer. 115 | klog.Infof("pod %s/%s requeue deletion at %s", 116 | pod.Namespace, pod.Name, pod.DeletionTimestamp.Time) 117 | return reconcile.Result{RequeueAfter: -t}, nil 118 | } else { 119 | // IP addresses are expected to have been reclaimed 120 | // See https://github.com/kubernetes/kubernetes/issues/109414#issuecomment-1125233538 121 | klog.Infof("pod %s/%s IP addresses are expected to have been reclaimed", 122 | pod.Namespace, pod.Name) 123 | return reconcile.Result{}, r.syncer.DeletePod(request.String()) 124 | } 125 | } 126 | 127 | v4, v6 := getIPs(&pod) 128 | if !v4.IsValid() && !v6.IsValid() { 129 | return reconcile.Result{}, fmt.Errorf("pod %s/%s has no ip", pod.Namespace, pod.Name) 130 | } 131 | 132 | ingress, egress, err := bandwidth.ExtractPodBandwidthResources(pod.Annotations) 133 | if err != nil { 134 | return reconcile.Result{}, fmt.Errorf("error extract bandwidth resources, %w", err) 135 | } 136 | 137 | update := &types.PodConfig{ 138 | PodID: fmt.Sprintf("%s/%s", pod.Namespace, pod.Name), 139 | PodUID: string(pod.UID), 140 | IPv4: v4, 141 | IPv6: v6, 142 | HostNetwork: pod.Spec.HostNetwork, 143 | } 144 | 145 | if ingress != nil { 146 | v := uint64(ingress.Value()) 147 | update.RxBps = &(v) 148 | } 149 | if egress != nil { 150 | v := uint64(egress.Value()) 151 | update.TxBps = &(v) 152 | } 153 | switch pod.Annotations["k8s.aliyun.com/qos-class"] { 154 | case "best-effort": 155 | update.Prio = func(a uint32) *uint32 { 156 | return &a 157 | }(2) 158 | case "burstable": 159 | update.Prio = func(a uint32) *uint32 { 160 | return &a 161 | }(1) 162 | case "guaranteed": 163 | update.Prio = func(a uint32) *uint32 { 164 | return &a 165 | }(0) 166 | } 167 | 168 | return reconcile.Result{}, r.syncer.UpdatePod(update) 169 | } 170 | 171 | func getIPs(pod *corev1.Pod) (v4 netip.Addr, v6 netip.Addr) { 172 | for _, ip := range pod.Status.PodIPs { 173 | addr, err := netip.ParseAddr(ip.IP) 174 | if err != nil { 175 | continue 176 | } 177 | if addr.Is4() { 178 | v4 = addr 179 | } else { 180 | v6 = addr 181 | } 182 | } 183 | 184 | return 185 | } 186 | -------------------------------------------------------------------------------- /pkg/k8s/predicates.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package k8s 17 | 18 | import ( 19 | corev1 "k8s.io/api/core/v1" 20 | "sigs.k8s.io/controller-runtime/pkg/event" 21 | "sigs.k8s.io/controller-runtime/pkg/predicate" 22 | ) 23 | 24 | type predicateForPod struct { 25 | predicate.Funcs 26 | } 27 | 28 | func (p *predicateForPod) Create(e event.CreateEvent) bool { 29 | pod, ok := e.Object.(*corev1.Pod) 30 | if !ok { 31 | return false 32 | } 33 | 34 | v4, v6 := getIPs(pod) 35 | if !v4.IsValid() && !v6.IsValid() { 36 | return false 37 | } 38 | 39 | return true 40 | } 41 | 42 | func (p *predicateForPod) Update(e event.UpdateEvent) bool { 43 | pod, ok := e.ObjectNew.(*corev1.Pod) 44 | if !ok { 45 | return false 46 | } 47 | 48 | v4, v6 := getIPs(pod) 49 | if !v4.IsValid() && !v6.IsValid() { 50 | return false 51 | } 52 | 53 | return true 54 | } 55 | 56 | func (p *predicateForPod) Delete(e event.DeleteEvent) bool { 57 | _, ok := e.Object.(*corev1.Pod) 58 | return ok 59 | } 60 | -------------------------------------------------------------------------------- /pkg/types/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package types 17 | 18 | import ( 19 | "fmt" 20 | "net/netip" 21 | ) 22 | 23 | type SyncPod interface { 24 | DeletePod(id string) error 25 | UpdatePod(config *PodConfig) error 26 | } 27 | 28 | // PodConfig contain pod related resource 29 | type PodConfig struct { 30 | PodID string 31 | PodUID string 32 | 33 | IPv4 netip.Addr 34 | IPv6 netip.Addr 35 | 36 | HostNetwork bool 37 | Prio *uint32 38 | 39 | CgroupInfo *CgroupInfo 40 | 41 | RxBps *uint64 42 | TxBps *uint64 43 | } 44 | 45 | type CgroupInfo struct { 46 | Path string 47 | ClassID uint32 48 | Inode uint64 49 | } 50 | 51 | type CgroupRate struct { 52 | Inode uint64 53 | 54 | RxBps uint64 55 | TxBps uint64 56 | } 57 | 58 | type GlobalConfig struct { 59 | HwGuaranteed uint64 60 | HwBurstableBps uint64 61 | 62 | L0MaxBps uint64 63 | L0MinBps uint64 64 | 65 | L1MaxBps uint64 66 | L1MinBps uint64 67 | 68 | L2MaxBps uint64 69 | L2MinBps uint64 70 | } 71 | 72 | func (c *GlobalConfig) Default() { 73 | if c.HwGuaranteed != 0 && c.HwBurstableBps == 0 { 74 | c.HwBurstableBps = c.HwGuaranteed 75 | } 76 | if c.L0MaxBps == 0 { 77 | c.L0MaxBps = c.HwGuaranteed 78 | } 79 | if c.L0MinBps == 0 { 80 | c.L0MinBps = c.HwGuaranteed - c.L1MinBps - c.L2MinBps 81 | } 82 | } 83 | 84 | func (c *GlobalConfig) Validate() bool { 85 | if c.HwBurstableBps == 0 && c.L0MaxBps == 0 && c.L0MinBps == 0 && c.L1MaxBps == 0 && c.L1MinBps == 0 && c.L2MaxBps == 0 && c.L2MinBps == 0 { 86 | return true 87 | } 88 | 89 | if c.HwGuaranteed > c.HwBurstableBps || 90 | c.HwGuaranteed < c.L1MaxBps || 91 | c.HwGuaranteed < c.L2MaxBps || 92 | c.L1MinBps > c.L1MaxBps || 93 | c.L2MinBps > c.L2MaxBps || 94 | c.HwGuaranteed < c.L2MaxBps+c.L1MaxBps { 95 | return false 96 | } 97 | 98 | return true 99 | } 100 | 101 | func (c *GlobalConfig) String() string { 102 | return fmt.Sprintf("hw %d online-min %d online-max %d offline-l1-min %d offline-l1-max %d offline-l2-min %d offline-l2-max %d", 103 | c.HwGuaranteed, c.L0MinBps, c.L0MaxBps, c.L1MinBps, c.L1MaxBps, c.L2MinBps, c.L2MaxBps) 104 | } 105 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023, Alibaba Group; 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package version 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | "path/filepath" 22 | "runtime" 23 | "strings" 24 | ) 25 | 26 | const unknown = "unknown" 27 | 28 | var ( 29 | Version string 30 | UA string 31 | 32 | gitVersion = "v0.0.0-master+$Format:%H$" 33 | gitCommit = "$Format:%H$" // sha1 from git, output of $(git rev-parse HEAD) 34 | 35 | buildDate = "1970-01-01T00:00:00Z" // build date in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ') 36 | ) 37 | 38 | func init() { 39 | Version = fmt.Sprintf("%s/%s (%s/%s) %s %s", adjustCommand(os.Args[0]), adjustVersion(gitVersion), runtime.GOOS, runtime.GOARCH, gitCommit, buildDate) 40 | UA = fmt.Sprintf("%s/%s (%s/%s) poseidon/%s", adjustCommand(os.Args[0]), adjustVersion(gitVersion), runtime.GOOS, runtime.GOARCH, adjustCommit(gitCommit)) 41 | } 42 | 43 | // adjustVersion strips "alpha", "beta", etc. from version in form 44 | // major.minor.patch-[alpha|beta|etc]. 45 | func adjustVersion(v string) string { 46 | if len(v) == 0 { 47 | return unknown 48 | } 49 | seg := strings.SplitN(v, "-", 2) 50 | return seg[0] 51 | } 52 | 53 | // adjustCommand returns the last component of the 54 | // OS-specific command path for use in User-Agent. 55 | func adjustCommand(p string) string { 56 | // Unlikely, but better than returning "". 57 | if len(p) == 0 { 58 | return unknown 59 | } 60 | return filepath.Base(p) 61 | } 62 | 63 | // adjustCommit returns sufficient significant figures of the commit's git hash. 64 | func adjustCommit(c string) string { 65 | if len(c) == 0 { 66 | return unknown 67 | } 68 | if len(c) > 7 { 69 | return c[:7] 70 | } 71 | return c 72 | } 73 | --------------------------------------------------------------------------------