├── .clang-format
├── .github
└── dependabot.yml
├── .gitignore
├── .golangci.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── NOTICE
├── README-zh_CN.md
├── README.md
├── bpf
├── headers
│ ├── LICENSE.BSD-2-Clause
│ ├── bpf_core_read.h
│ ├── bpf_endian.h
│ ├── bpf_helper_defs.h
│ ├── bpf_helpers.h
│ ├── bpf_tracing.h
│ ├── common.h
│ ├── compiler.h
│ ├── linux
│ │ ├── bpf.h
│ │ ├── bpf_common.h
│ │ ├── if_ether.h
│ │ ├── in.h
│ │ ├── in6.h
│ │ ├── ip.h
│ │ ├── ipv6.h
│ │ ├── types.h
│ │ └── types_mapper.h
│ └── update.sh
├── monitor.bt
├── qos_tc.c
└── qos_tc.h
├── charts
└── terway-qos
│ ├── .helmignore
│ ├── Chart.yaml
│ ├── templates
│ ├── _helpers.tpl
│ ├── clusterrole.yaml
│ ├── clusterrolebinding.yaml
│ ├── configmap.yaml
│ ├── daemonset.yaml
│ └── serviceaccount.yaml
│ └── values.yaml
├── cmd
├── bpf_bandwidth.go
├── bpf_bandwidth_list.go
├── cgroup.go
├── cgroup_list.go
├── config.go
├── damon.go
├── pod.go
├── pod_list.go
├── pod_set.go
└── root.go
├── docs
├── quick-start-zh_CN.md
└── quick-start.md
├── go.mod
├── go.sum
├── hack
└── init.sh
├── images
├── builder
│ └── Dockerfile
└── runtime
│ └── Dockerfile
├── main.go
└── pkg
├── bandwidth
└── utils.go
├── bpf
├── compile.go
├── generate.go
├── manager.go
├── maps.go
├── maps_test.go
├── qos_tc_bpfeb.go
├── qos_tc_bpfeb.o
├── qos_tc_bpfel.go
├── qos_tc_bpfel.o
└── types.go
├── byteorder
├── byteorder.go
├── byteorder_bigendian.go
├── byteorder_littleendian.go
└── byteorder_test.go
├── config
├── config.go
├── config_test.go
├── record.go
├── syncer.go
└── types.go
├── k8s
├── pods.go
└── predicates.go
├── types
└── config.go
└── version
└── version.go
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | BasedOnStyle: LLVM
4 | AlignAfterOpenBracket: DontAlign
5 | AlignConsecutiveAssignments: true
6 | AlignEscapedNewlines: DontAlign
7 | AlwaysBreakBeforeMultilineStrings: true
8 | AlwaysBreakTemplateDeclarations: false
9 | AllowAllParametersOfDeclarationOnNextLine: false
10 | AllowShortFunctionsOnASingleLine: false
11 | BreakBeforeBraces: Attach
12 | IndentWidth: 4
13 | KeepEmptyLinesAtTheStartOfBlocks: false
14 | TabWidth: 4
15 | UseTab: ForContinuationAndIndentation
16 | ColumnLimit: 1000
17 | # Go compiler comments need to stay unindented.
18 | CommentPragmas: '^go:.*'
19 | ...
20 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: gomod
4 | directory: /
5 | schedule:
6 | interval: weekly
7 | open-pull-requests-limit: 2
8 | rebase-strategy: "disabled"
9 | labels:
10 | - kind/enhancement
11 | - release-note/misc
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .vscode
3 |
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | run:
2 | concurrency: 4
3 | timeout: 5m
4 | tests: false
5 | build-tags: []
6 |
7 | issues:
8 | exclude-rules:
9 | - path: _test\.go
10 | linters:
11 | - dupl
12 | - goconst
13 | linters:
14 | enable:
15 | - goconst
16 | - goimports
17 | - govet
18 | - errcheck
19 | - ineffassign
20 | - staticcheck
21 | - goconst
22 | - stylecheck
23 | - misspell
24 | linters-settings:
25 | errcheck:
26 | check-blank: false
27 | govet:
28 | check-shadowing: false
29 | maligned:
30 | suggest-new: true
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG GOLANG_IMAGE=docker.io/library/golang:1.21.3@sha256:24a09375a6216764a3eda6a25490a88ac178b5fcb9511d59d0da5ebf9e496474
2 | ARG UBUNTU_IMAGE=docker.io/library/ubuntu:22.04@sha256:2b7412e6465c3c7fc5bb21d3e6f1917c167358449fecac8176c6e496e5c1f05f
3 | ARG CILIUM_BPFTOOL_IMAGE=quay.io/cilium/cilium-bpftool:d3093f6aeefef8270306011109be623a7e80ad1b@sha256:2c28c64195dee20ab596d70a59a4597a11058333c6b35a99da32c339dcd7df56
4 | ARG RUNTIME_IMAGE=terway-qos-runtime
5 |
6 | FROM ${CILIUM_BPFTOOL_IMAGE} as bpftool-dist
7 |
8 | FROM ${GOLANG_IMAGE} as builder
9 | ARG GOPROXY
10 | ARG TARGETOS
11 | ARG TARGETARCH
12 | #ENV GOPROXY $GOPROXY
13 | ENV GOPROXY https://goproxy.cn
14 | WORKDIR /go/src/qos
15 | COPY go.sum go.mod ./
16 | RUN go mod download
17 | COPY . .
18 | RUN CGO_ENABLED=0 go build \
19 | -ldflags \
20 | "-s -w -X \"github.com/AliyunContainerService/terway-qos/pkg/version.gitCommit=`git rev-parse HEAD 2>/dev/null`\" \
21 | -X \"github.com/AliyunContainerService/terway-qos/pkg/version.buildDate=`date -u +'%Y-%m-%dT%H:%M:%SZ'`\" \
22 | -X \"github.com/AliyunContainerService/terway-qos/pkg/version.gitVersion=`git describe --tags --match='v*' --abbrev=14 2>/dev/null`\"" -o /go/src/qos/qos .
23 |
24 | FROM terway-qos-runtime
25 |
26 | COPY bpf/headers /var/lib/terway/headers
27 | COPY bpf /var/lib/terway/src
28 | COPY hack/init.sh /bin/init.sh
29 | COPY --from=bpftool-dist /usr/local /usr/local
30 | COPY --from=builder /go/src/qos/qos /usr/bin/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] Alibaba Group
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | GO ?= go
3 | DOCKER ?= docker
4 |
5 | GOFLAGS ?= -ldflags "-s -w"
6 |
7 | CLANG ?= clang-15
8 | STRIP ?= llvm-strip-15
9 | OBJCOPY ?= llvm-objcopy-15
10 | CFLAGS ?= -g -O2 -target bpf -std=gnu99 -nostdinc -D__NR_CPUS__=4 -Werror -Wall -Wextra -Wshadow -Wno-address-of-packed-member -Wno-unknown-warning-option -Wno-gnu-variable-sized-type-not-at-end -Wimplicit-int-conversion -Wenum-conversion
11 |
12 | BPF_BUILD_IMAGE ?= terway-qos-builder:latest
13 | RUNTIME_IMAGE ?= terway-qos-runtime:latest
14 | GO_LINT_IMAGE ?= golangci/golangci-lint:v1.54.2-alpine
15 | DAEMON_IMAGE ?= terway-qos:latest
16 |
17 | .PHONE: all
18 | all: lint build
19 |
20 | .PHONY: lint
21 | lint:
22 | $(DOCKER) run --rm -it -v $(shell pwd):/go/src/qos \
23 | -w /go/src/qos \
24 | $(GO_LINT_IMAGE) golangci-lint -v run --timeout 5m
25 |
26 | .PHONY: build
27 | build: builder-image runtime-image generate daemon-image
28 |
29 | .PHONY: builder-image
30 | builder-image:
31 | @$(DOCKER) image inspect $(BPF_BUILD_IMAGE) >/dev/null 2>&1 || \
32 | (echo "Docker image $(BPF_BUILD_IMAGE) not found, building..." && \
33 | cd images/builder && \
34 | $(DOCKER) build -t $(BPF_BUILD_IMAGE) .)
35 |
36 | .PHONY: runtime-image
37 | runtime-image:
38 | @$(DOCKER) image inspect $(RUNTIME_IMAGE) >/dev/null 2>&1 || \
39 | (echo "Docker image $(RUNTIME_IMAGE) not found, building..." && \
40 | cd images/runtime && \
41 | $(DOCKER) build -t $(RUNTIME_IMAGE) .)
42 |
43 | .PHONY: daemon-image
44 | daemon-image:
45 | @$(DOCKER) build -t $(DAEMON_IMAGE) .
46 |
47 | .PHONY: generate
48 | generate:
49 | $(DOCKER) run --rm -it -v $(shell pwd):/go/src/qos \
50 | -w /go/src/qos \
51 | -e BPF_CLANG="$(CLANG)" \
52 | -e BPF_CFLAGS="$(CFLAGS)" \
53 | -e $BPF_STRIP="$(STRIP)" \
54 | $(BPF_BUILD_IMAGE) go generate ./...
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | ========================================================
2 | terway-qos
3 | Copyright (c) 2023, Alibaba Group.
4 | Licensed under the Apache License, Version 2.0
5 |
6 | ===================================================================
7 | This product contains various third-party components under other open source licenses.
8 | This section summarizes those components and their licenses.
9 |
10 |
11 | Apache Software Foundation License 2.0
12 | --------------------------------------
13 | pkg/bandwidth/utils.go
14 | Copyright 2015 The Kubernetes Authors.
15 |
16 | Licensed under the Apache License, Version 2.0 (the "License");
17 | you may not use this file except in compliance with the License.
18 | You may obtain a copy of the License at
19 |
20 | http://www.apache.org/licenses/LICENSE-2.0
21 |
22 | Unless required by applicable law or agreed to in writing, software
23 | distributed under the License is distributed on an "AS IS" BASIS,
24 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | See the License for the specific language governing permissions and
26 | limitations under the License.
27 |
28 | --------------------------------------
29 | pkg/byteorder/*
30 | SPDX-License-Identifier: Apache-2.0
31 | Copyright Authors of Cilium
32 |
33 | --------------------------------------
34 | bpf/*
35 | SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
36 |
37 |
--------------------------------------------------------------------------------
/README-zh_CN.md:
--------------------------------------------------------------------------------
1 | # terway-qos
2 |
3 | ## 介绍
4 |
5 | terway-qos 的诞生是为了解决混部场景下,容器网络带宽争抢问题。支持按单Pod、按业务类型限制带宽。
6 |
7 | 相比于其他方案,terway-qos 有以下优势:
8 |
9 | 1. 支持按业务类型限制带宽,支持多种业务类型混部
10 | 2. 支持 Pod 带宽限制动态调整
11 |
12 | ## 功能介绍
13 |
14 | 带宽限制分为
15 |
16 | - 整机带宽限制
17 | - Pod带宽限制
18 |
19 | ### 整机带宽限制
20 |
21 | 混部场景下,我们期望在线业务有最大带宽的保证,从而避免争抢。在空闲时,离线业务也能尽可能使用全部带宽资源。
22 | 由此用户可为业务流量定义三种优先级,L0,L1,L2。其优先级顺序依次递减。
23 |
24 | 争抢场景定义: 当 `L0 + L1 + L2` 总流量大于整机带宽
25 |
26 | 限制策略:
27 |
28 | - L0 最大带宽依据 L1, L2 实时流量而动态调整。最大为整机带宽,最小为 `整机带宽- L1 最小带宽- L2 最小带宽`。
29 | - 任何情况下,L1、L2 其带宽不超过各自带宽上限。
30 | - 争抢场景下, L1、L2 其带宽不会低于各自带宽下限。
31 | - 争抢场景下,将按照 L2 、L1 、L0 的顺序对带宽进行限制。
32 |
33 | #### Pod 优先级定义
34 |
35 | 通过为 Pod 配置下面 Annotation
36 |
37 | | key | 参数 |
38 | |----------------------------|------------------------------------------------------------------------|
39 | | `k8s.aliyun.com/qos-class` | `guaranteed` 在线业务 L0
`burstable` 离线业务 L1
`best-effort` 离线业务 L2 |
40 |
41 | #### 带宽限制配置
42 |
43 | 对需混部的节点,需配置宽限制,配置路径 `/var/lib/terway/qos/global_bps_config`。
44 |
45 | | 配置路径 | 参数 |
46 | |-----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
47 | | `/var/lib/terway/qos/global_bps_config` | `hw_tx_bps_max` 节点的最大tx带宽
`hw_rx_bps_max` 节点的最大rx带宽
`offline_l1_tx_bps_min` 入方向离线l1 业务的最小带宽保证
`offline_l1_tx_bps_max` 入方向离线l1 业务的最大带宽占用
`offline_l2_tx_bps_min` 入方向离线l2 业务的最小带宽保证
`offline_l2_tx_bps_max` 入方向离线l2 业务的最大带宽占用 |
48 |
49 | 示例如下
50 |
51 | ```yaml
52 | kind: ConfigMap
53 | apiVersion: v1
54 | metadata:
55 | name: terway-qos
56 | data:
57 | global_bps_config: |
58 | hw_tx_bps_max 900000000
59 | hw_rx_bps_max 900000000
60 | offline_l1_tx_bps_min 100000000
61 | offline_l1_tx_bps_max 200000000
62 | offline_l2_tx_bps_min 100000000
63 | offline_l2_tx_bps_max 300000000
64 | offline_l1_rx_bps_min 100000000
65 | offline_l1_rx_bps_max 200000000
66 | offline_l2_rx_bps_min 100000000
67 | offline_l2_rx_bps_max 300000000
68 | ```
69 |
70 | > 带宽单位 Bytes/s , 带宽限制精度至少 1MB 以上
71 |
72 | ### Pod 带宽限制配置
73 |
74 | 支持 Kubernetes 标准的 Annotation
75 |
76 | - `kubernetes.io/egress-bandwidth`
77 | - `kubernetes.io/ingress-bandwidth`
78 |
79 | 支持热更新 Annotation 来调整 Pod 带宽限制
80 |
81 | 需注意,CNI 插件可能支持 Kubernetes 标准的 Annotation ,从而会影响热更新,这种情况下可以选择关闭 CNI 插件的带宽限制功能。
82 |
83 | ## 快速开始
84 |
85 | [快速开始](docs/quick-start-zh_CN.md)
86 |
87 | ## License
88 |
89 | terway-qos是由阿里巴巴开发的,采用Apache License(版本2.0)许可证。
90 | 本产品包含其他开源许可证下的各种第三方组件。
91 | 更多信息请参阅[NOTICE](NOTICE)文件。
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # qos
2 |
3 | ## Introduction
4 |
5 | terway-qos is developed to solve the problem of container network bandwidth contention in mixed deployment scenarios. It
6 | supports bandwidth limitation based on individual Pods and business types.
7 |
8 | Compared to other solutions, terway-qos has the following advantages:
9 |
10 | 1. Supports bandwidth limitation based on business types, allowing for mixed deployment of multiple business types.
11 | 2. Supports dynamic adjustment of Pod bandwidth limitation.
12 |
13 | ## Functionality
14 |
15 | Bandwidth limitation can be divided into:
16 |
17 | - Host bandwidth limitation
18 | - Pod bandwidth limitation
19 |
20 | ### Host bandwidth limitation
21 |
22 | In mixed deployment scenarios, we expect to guarantee maximum bandwidth for online business to avoid contention. During
23 | idle periods, offline business should also be able to utilize the full bandwidth resources as much as possible.
24 | For this purpose, users can define three priority levels for business traffic: L0, L1, and L2. The priority order is
25 | L0 > L1 > L2.
26 |
27 | Definition of contention scenario: When the total traffic of L0, L1, and L2 exceeds the host bandwidth.
28 |
29 | Limitation strategy:
30 |
31 | - The maximum bandwidth of L0 is dynamically adjusted based on the real-time traffic of L1 and L2. The maximum value is
32 | the host bandwidth, and the minimum value is `host bandwidth - minimum L1 bandwidth - minimum L2 bandwidth`.
33 | - Under any circumstances, the bandwidth of L1 and L2 should not exceed their respective upper limits.
34 | - In a contention scenario, the bandwidth of L1 and L2 should not be lower than their respective lower limits.
35 | - In a contention scenario, the bandwidth is limited in the order of L2, L1, and L0.
36 |
37 | Supports hot update of annotations to adjust Pod bandwidth limitation.
38 |
39 | Please note that the CNI plugin may also support Kubernetes standard annotations, which may affect the hot update. In
40 | this case, you can choose to disable the bandwidth limitation feature of the CNI plugin.
41 |
42 | ### Pod priority definition
43 |
44 | Configure the following annotation for Pods:
45 |
46 | | key | Parameters |
47 | |----------------------------|---------------------------------------------------------------------------------------------------------------------------|
48 | | `k8s.aliyun.com/qos-class` | `guaranteed` for online business L0
`burstable` for offline business L1
`best-effort` for offline business L2
|
49 |
50 | ### Bandwidth limitation configuration
51 |
52 | For nodes requiring mixed deployment, configure the grace limits in the path `/var/lib/terway/qos/global_bps_config`.
53 |
54 | | Configuration Path | Parameters |
55 | |-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
56 | | `/var/lib/terway/qos/global_bps_config` | `hw_tx_bps_max` maximum tx bandwidth for the node
`hw_rx_bps_max` maximum rx bandwidth for the node
`offline_l1_tx_bps_min` minimum guaranteed bandwidth for inbound L1 offline business
`offline_l1_tx_bps_max` maximum bandwidth usage for inbound L1 offline business
`offline_l2_tx_bps_min` minimum guaranteed bandwidth for inbound L2 offline business
`offline_l2_tx_bps_max` maximum bandwidth usage for inbound L2 offline business |
57 |
58 | Here is an example:
59 |
60 | ```yaml
61 | kind: ConfigMap
62 | apiVersion: v1
63 | metadata:
64 | name: terway-qos
65 | data:
66 | global_bps_config: |
67 | hw_tx_bps_max 900000000
68 | hw_rx_bps_max 0
69 | offline_l1_tx_bps_min 100000000
70 | offline_l1_tx_bps_max 200000000
71 | offline_l2_tx_bps_min 100000000
72 | offline_l2_tx_bps_max 300000000
73 | offline_l1_rx_bps_min 0
74 | offline_l1_rx_bps_max 0
75 | offline_l2_rx_bps_min 0
76 | offline_l2_rx_bps_max 0
77 | ```
78 |
79 | > The bandwidth unit is Bytes/s, and the bandwidth limitation precision is at least 1MB or higher.
80 |
81 | ### Pod bandwidth limitation configuration
82 |
83 | Supports Kubernetes standard annotations:
84 |
85 | - `kubernetes.io/egress-bandwidth`
86 | - `kubernetes.io/ingress-bandwidth`
87 |
88 | Supports hot update of annotations to adjust Pod bandwidth limitation.
89 |
90 | Please note that the CNI plugin may also support Kubernetes standard annotations, which may affect the hot update. In
91 | this case, you can choose to disable the bandwidth limitation feature of the CNI plugin.
92 |
93 | ## License
94 |
95 | terway-qos developed by Alibaba Group and licensed under the Apache License (Version 2.0)
96 | This product contains various third-party components under other open source licenses.
97 | See the [NOTICE](NOTICE) file for more information.
--------------------------------------------------------------------------------
/bpf/headers/LICENSE.BSD-2-Clause:
--------------------------------------------------------------------------------
1 | Valid-License-Identifier: BSD-2-Clause
2 | SPDX-URL: https://spdx.org/licenses/BSD-2-Clause.html
3 | Usage-Guide:
4 | To use the BSD 2-clause "Simplified" License put the following SPDX
5 | tag/value pair into a comment according to the placement guidelines in
6 | the licensing rules documentation:
7 | SPDX-License-Identifier: BSD-2-Clause
8 | License-Text:
9 |
10 | Copyright (c) . All rights reserved.
11 |
12 | Redistribution and use in source and binary forms, with or without
13 | modification, are permitted provided that the following conditions are met:
14 |
15 | 1. Redistributions of source code must retain the above copyright notice,
16 | this list of conditions and the following disclaimer.
17 |
18 | 2. Redistributions in binary form must reproduce the above copyright
19 | notice, this list of conditions and the following disclaimer in the
20 | documentation and/or other materials provided with the distribution.
21 |
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
26 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 | POSSIBILITY OF SUCH DAMAGE.
33 |
--------------------------------------------------------------------------------
/bpf/headers/bpf_endian.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2 | #ifndef __BPF_ENDIAN__
3 | #define __BPF_ENDIAN__
4 |
5 | /*
6 | * Isolate byte #n and put it into byte #m, for __u##b type.
7 | * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
8 | * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
9 | * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
10 | * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
11 | * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
12 | */
13 | #define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
14 |
15 | #define ___bpf_swab16(x) ((__u16)( \
16 | ___bpf_mvb(x, 16, 0, 1) | \
17 | ___bpf_mvb(x, 16, 1, 0)))
18 |
19 | #define ___bpf_swab32(x) ((__u32)( \
20 | ___bpf_mvb(x, 32, 0, 3) | \
21 | ___bpf_mvb(x, 32, 1, 2) | \
22 | ___bpf_mvb(x, 32, 2, 1) | \
23 | ___bpf_mvb(x, 32, 3, 0)))
24 |
25 | #define ___bpf_swab64(x) ((__u64)( \
26 | ___bpf_mvb(x, 64, 0, 7) | \
27 | ___bpf_mvb(x, 64, 1, 6) | \
28 | ___bpf_mvb(x, 64, 2, 5) | \
29 | ___bpf_mvb(x, 64, 3, 4) | \
30 | ___bpf_mvb(x, 64, 4, 3) | \
31 | ___bpf_mvb(x, 64, 5, 2) | \
32 | ___bpf_mvb(x, 64, 6, 1) | \
33 | ___bpf_mvb(x, 64, 7, 0)))
34 |
35 | /* LLVM's BPF target selects the endianness of the CPU
36 | * it compiles on, or the user specifies (bpfel/bpfeb),
37 | * respectively. The used __BYTE_ORDER__ is defined by
38 | * the compiler, we cannot rely on __BYTE_ORDER from
39 | * libc headers, since it doesn't reflect the actual
40 | * requested byte order.
41 | *
42 | * Note, LLVM's BPF target has different __builtin_bswapX()
43 | * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
44 | * in bpfel and bpfeb case, which means below, that we map
45 | * to cpu_to_be16(). We could use it unconditionally in BPF
46 | * case, but better not rely on it, so that this header here
47 | * can be used from application and BPF program side, which
48 | * use different targets.
49 | */
50 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
51 | # define __bpf_ntohs(x) __builtin_bswap16(x)
52 | # define __bpf_htons(x) __builtin_bswap16(x)
53 | # define __bpf_constant_ntohs(x) ___bpf_swab16(x)
54 | # define __bpf_constant_htons(x) ___bpf_swab16(x)
55 | # define __bpf_ntohl(x) __builtin_bswap32(x)
56 | # define __bpf_htonl(x) __builtin_bswap32(x)
57 | # define __bpf_constant_ntohl(x) ___bpf_swab32(x)
58 | # define __bpf_constant_htonl(x) ___bpf_swab32(x)
59 | # define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
60 | # define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
61 | # define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x)
62 | # define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x)
63 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
64 | # define __bpf_ntohs(x) (x)
65 | # define __bpf_htons(x) (x)
66 | # define __bpf_constant_ntohs(x) (x)
67 | # define __bpf_constant_htons(x) (x)
68 | # define __bpf_ntohl(x) (x)
69 | # define __bpf_htonl(x) (x)
70 | # define __bpf_constant_ntohl(x) (x)
71 | # define __bpf_constant_htonl(x) (x)
72 | # define __bpf_be64_to_cpu(x) (x)
73 | # define __bpf_cpu_to_be64(x) (x)
74 | # define __bpf_constant_be64_to_cpu(x) (x)
75 | # define __bpf_constant_cpu_to_be64(x) (x)
76 | #else
77 | # error "Fix your compiler's __BYTE_ORDER__?!"
78 | #endif
79 |
80 | #define bpf_htons(x) \
81 | (__builtin_constant_p(x) ? \
82 | __bpf_constant_htons(x) : __bpf_htons(x))
83 | #define bpf_ntohs(x) \
84 | (__builtin_constant_p(x) ? \
85 | __bpf_constant_ntohs(x) : __bpf_ntohs(x))
86 | #define bpf_htonl(x) \
87 | (__builtin_constant_p(x) ? \
88 | __bpf_constant_htonl(x) : __bpf_htonl(x))
89 | #define bpf_ntohl(x) \
90 | (__builtin_constant_p(x) ? \
91 | __bpf_constant_ntohl(x) : __bpf_ntohl(x))
92 | #define bpf_cpu_to_be64(x) \
93 | (__builtin_constant_p(x) ? \
94 | __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
95 | #define bpf_be64_to_cpu(x) \
96 | (__builtin_constant_p(x) ? \
97 | __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
98 |
99 | #endif /* __BPF_ENDIAN__ */
100 |
--------------------------------------------------------------------------------
/bpf/headers/bpf_helpers.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2 | #ifndef __BPF_HELPERS__
3 | #define __BPF_HELPERS__
4 |
5 | /*
6 | * Note that bpf programs need to include either
7 | * vmlinux.h (auto-generated from BTF) or linux/types.h
8 | * in advance since bpf_helper_defs.h uses such types
9 | * as __u64.
10 | */
11 | #include "bpf_helper_defs.h"
12 |
13 | #define __uint(name, val) int (*name)[val]
14 | #define __type(name, val) typeof(val) *name
15 | #define __array(name, val) typeof(val) *name[]
16 |
17 | /*
18 | * Helper macro to place programs, maps, license in
19 | * different sections in elf_bpf file. Section names
20 | * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
21 | * extern variables, etc).
22 | * To allow use of SEC() with externs (e.g., for extern .maps declarations),
23 | * make sure __attribute__((unused)) doesn't trigger compilation warning.
24 | */
25 | #define SEC(name) \
26 | _Pragma("GCC diagnostic push") \
27 | _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
28 | __attribute__((section(name), used)) \
29 | _Pragma("GCC diagnostic pop") \
30 |
31 | /* Avoid 'linux/stddef.h' definition of '__always_inline'. */
32 | #undef __always_inline
33 | #define __always_inline inline __attribute__((always_inline))
34 |
35 | #ifndef __noinline
36 | #define __noinline __attribute__((noinline))
37 | #endif
38 | #ifndef __weak
39 | #define __weak __attribute__((weak))
40 | #endif
41 |
42 | /*
43 | * Use __hidden attribute to mark a non-static BPF subprogram effectively
44 | * static for BPF verifier's verification algorithm purposes, allowing more
45 | * extensive and permissive BPF verification process, taking into account
46 | * subprogram's caller context.
47 | */
48 | #define __hidden __attribute__((visibility("hidden")))
49 |
50 | /* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
51 | * any system-level headers (such as stddef.h, linux/version.h, etc), and
52 | * commonly-used macros like NULL and KERNEL_VERSION aren't available through
53 | * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
54 | * them on their own. So as a convenience, provide such definitions here.
55 | */
56 | #ifndef NULL
57 | #define NULL ((void *)0)
58 | #endif
59 |
60 | #ifndef KERNEL_VERSION
61 | #define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
62 | #endif
63 |
64 | /*
65 | * Helper macros to manipulate data structures
66 | */
67 | #ifndef offsetof
68 | #define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
69 | #endif
70 | #ifndef container_of
71 | #define container_of(ptr, type, member) \
72 | ({ \
73 | void *__mptr = (void *)(ptr); \
74 | ((type *)(__mptr - offsetof(type, member))); \
75 | })
76 | #endif
77 |
78 | /*
79 | * Helper macro to throw a compilation error if __bpf_unreachable() gets
80 | * built into the resulting code. This works given BPF back end does not
81 | * implement __builtin_trap(). This is useful to assert that certain paths
82 | * of the program code are never used and hence eliminated by the compiler.
83 | *
84 | * For example, consider a switch statement that covers known cases used by
85 | * the program. __bpf_unreachable() can then reside in the default case. If
86 | * the program gets extended such that a case is not covered in the switch
87 | * statement, then it will throw a build error due to the default case not
88 | * being compiled out.
89 | */
90 | #ifndef __bpf_unreachable
91 | # define __bpf_unreachable() __builtin_trap()
92 | #endif
93 |
94 | /*
95 | * Helper function to perform a tail call with a constant/immediate map slot.
96 | */
97 | #if __clang_major__ >= 8 && defined(__bpf__)
98 | static __always_inline void
99 | bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
100 | {
101 | if (!__builtin_constant_p(slot))
102 | __bpf_unreachable();
103 |
104 | /*
105 | * Provide a hard guarantee that LLVM won't optimize setting r2 (map
106 | * pointer) and r3 (constant map index) from _different paths_ ending
107 | * up at the _same_ call insn as otherwise we won't be able to use the
108 | * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
109 | * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
110 | * tracking for prog array pokes") for details on verifier tracking.
111 | *
112 | * Note on clobber list: we need to stay in-line with BPF calling
113 | * convention, so even if we don't end up using r0, r4, r5, we need
114 | * to mark them as clobber so that LLVM doesn't end up using them
115 | * before / after the call.
116 | */
117 | asm volatile("r1 = %[ctx]\n\t"
118 | "r2 = %[map]\n\t"
119 | "r3 = %[slot]\n\t"
120 | "call 12"
121 | :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
122 | : "r0", "r1", "r2", "r3", "r4", "r5");
123 | }
124 | #endif
125 |
126 | /*
127 | * Helper structure used by eBPF C program
128 | * to describe BPF map attributes to libbpf loader
129 | */
130 | struct bpf_map_def {
131 | unsigned int type;
132 | unsigned int key_size;
133 | unsigned int value_size;
134 | unsigned int max_entries;
135 | unsigned int map_flags;
136 | };
137 |
138 | enum libbpf_pin_type {
139 | LIBBPF_PIN_NONE,
140 | /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
141 | LIBBPF_PIN_BY_NAME,
142 | };
143 |
144 | enum libbpf_tristate {
145 | TRI_NO = 0,
146 | TRI_YES = 1,
147 | TRI_MODULE = 2,
148 | };
149 |
150 | #define __kconfig __attribute__((section(".kconfig")))
151 | #define __ksym __attribute__((section(".ksyms")))
152 |
153 | #ifndef ___bpf_concat
154 | #define ___bpf_concat(a, b) a ## b
155 | #endif
156 | #ifndef ___bpf_apply
157 | #define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
158 | #endif
159 | #ifndef ___bpf_nth
160 | #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
161 | #endif
162 | #ifndef ___bpf_narg
163 | #define ___bpf_narg(...) \
164 | ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
165 | #endif
166 |
167 | #define ___bpf_fill0(arr, p, x) do {} while (0)
168 | #define ___bpf_fill1(arr, p, x) arr[p] = x
169 | #define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
170 | #define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
171 | #define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
172 | #define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
173 | #define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
174 | #define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
175 | #define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
176 | #define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
177 | #define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
178 | #define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
179 | #define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
180 | #define ___bpf_fill(arr, args...) \
181 | ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
182 |
183 | /*
184 | * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
185 | * in a structure.
186 | */
187 | #define BPF_SEQ_PRINTF(seq, fmt, args...) \
188 | ({ \
189 | static const char ___fmt[] = fmt; \
190 | unsigned long long ___param[___bpf_narg(args)]; \
191 | \
192 | _Pragma("GCC diagnostic push") \
193 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
194 | ___bpf_fill(___param, args); \
195 | _Pragma("GCC diagnostic pop") \
196 | \
197 | bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
198 | ___param, sizeof(___param)); \
199 | })
200 |
201 | /*
202 | * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
203 | * an array of u64.
204 | */
205 | #define BPF_SNPRINTF(out, out_size, fmt, args...) \
206 | ({ \
207 | static const char ___fmt[] = fmt; \
208 | unsigned long long ___param[___bpf_narg(args)]; \
209 | \
210 | _Pragma("GCC diagnostic push") \
211 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
212 | ___bpf_fill(___param, args); \
213 | _Pragma("GCC diagnostic pop") \
214 | \
215 | bpf_snprintf(out, out_size, ___fmt, \
216 | ___param, sizeof(___param)); \
217 | })
218 |
219 | #ifdef BPF_NO_GLOBAL_DATA
220 | #define BPF_PRINTK_FMT_MOD
221 | #else
222 | #define BPF_PRINTK_FMT_MOD static const
223 | #endif
224 |
225 | #define __bpf_printk(fmt, ...) \
226 | ({ \
227 | BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \
228 | bpf_trace_printk(____fmt, sizeof(____fmt), \
229 | ##__VA_ARGS__); \
230 | })
231 |
232 | /*
233 | * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments
234 | * instead of an array of u64.
235 | */
236 | #define __bpf_vprintk(fmt, args...) \
237 | ({ \
238 | static const char ___fmt[] = fmt; \
239 | unsigned long long ___param[___bpf_narg(args)]; \
240 | \
241 | _Pragma("GCC diagnostic push") \
242 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
243 | ___bpf_fill(___param, args); \
244 | _Pragma("GCC diagnostic pop") \
245 | \
246 | bpf_trace_vprintk(___fmt, sizeof(___fmt), \
247 | ___param, sizeof(___param)); \
248 | })
249 |
250 | /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
251 | * Otherwise use __bpf_vprintk
252 | */
253 | #define ___bpf_pick_printk(...) \
254 | ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \
255 | __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \
256 | __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\
257 | __bpf_printk /*1*/, __bpf_printk /*0*/)
258 |
259 | /* Helper macro to print out debug messages */
260 | #define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args)
261 |
262 | #endif
263 |
--------------------------------------------------------------------------------
/bpf/headers/common.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef __LITTLE_ENDIAN_BITFIELD
3 | #define __LITTLE_ENDIAN_BITFIELD
4 | #endif
5 |
6 | #include "compiler.h"
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #ifndef TC_ACT_OK
13 | # define TC_ACT_OK 0
14 | #endif
15 |
16 | #ifndef TC_ACT_SHOT
17 | # define TC_ACT_SHOT 2
18 | #endif
19 |
20 | #ifndef TC_ACT_PIPE
21 | # define TC_ACT_PIPE 3
22 | #endif
23 |
--------------------------------------------------------------------------------
/bpf/headers/compiler.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
2 | /* Copyright Authors of Cilium */
3 |
4 | #ifndef __BPF_COMPILER_H_
5 | #define __BPF_COMPILER_H_
6 |
7 | #ifndef __section
8 | # define __section(X) __attribute__((section(X), used))
9 | #endif
10 |
11 | #ifndef __maybe_unused
12 | # define __maybe_unused __attribute__((__unused__))
13 | #endif
14 |
15 | #ifndef offsetof
16 | # define offsetof(T, M) __builtin_offsetof(T, M)
17 | #endif
18 |
19 | #ifndef field_sizeof
20 | # define field_sizeof(T, M) sizeof((((T *)NULL)->M))
21 | #endif
22 |
23 | #ifndef __packed
24 | # define __packed __attribute__((packed))
25 | #endif
26 |
27 | #ifndef __nobuiltin
28 | # if __clang_major__ >= 10
29 | # define __nobuiltin(X) __attribute__((no_builtin(X)))
30 | # else
31 | # define __nobuiltin(X)
32 | # endif
33 | #endif
34 |
35 | #ifndef likely
36 | # define likely(X) __builtin_expect(!!(X), 1)
37 | #endif
38 |
39 | #ifndef unlikely
40 | # define unlikely(X) __builtin_expect(!!(X), 0)
41 | #endif
42 |
43 | #ifndef always_succeeds /* Mainly for documentation purpose. */
44 | # define always_succeeds(X) likely(X)
45 | #endif
46 |
47 | #undef __always_inline /* stddef.h defines its own */
48 | #define __always_inline inline __attribute__((always_inline))
49 |
50 | #ifndef __stringify
51 | # define __stringify(X) #X
52 | #endif
53 |
54 | #ifndef __fetch
55 | # define __fetch(X) (__u32)(__u64)(&(X))
56 | #endif
57 |
58 | #ifndef __aligned
59 | # define __aligned(X) __attribute__((aligned(X)))
60 | #endif
61 |
62 | #ifndef build_bug_on
63 | # define build_bug_on(E) ((void)sizeof(char[1 - 2*!!(E)]))
64 | #endif
65 |
66 | #ifndef __throw_build_bug
67 | # define __throw_build_bug() __builtin_trap()
68 | #endif
69 |
70 | #ifndef __printf
71 | # define __printf(X, Y) __attribute__((__format__(printf, X, Y)))
72 | #endif
73 |
74 | #ifndef barrier
75 | # define barrier() asm volatile("": : :"memory")
76 | #endif
77 |
78 | #ifndef barrier_data
79 | # define barrier_data(ptr) asm volatile("": :"r"(ptr) :"memory")
80 | #endif
81 |
82 | static __always_inline void bpf_barrier(void)
83 | {
84 | /* Workaround to avoid verifier complaint:
85 | * "dereference of modified ctx ptr R5 off=48+0, ctx+const is allowed,
86 | * ctx+const+const is not"
87 | */
88 | barrier();
89 | }
90 |
91 | #ifndef ARRAY_SIZE
92 | # define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0]))
93 | #endif
94 |
95 | #ifndef __READ_ONCE
96 | # define __READ_ONCE(X) (*(volatile typeof(X) *)&X)
97 | #endif
98 |
99 | #ifndef __WRITE_ONCE
100 | # define __WRITE_ONCE(X, V) (*(volatile typeof(X) *)&X) = (V)
101 | #endif
102 |
103 | /* {READ,WRITE}_ONCE() with verifier workaround via bpf_barrier(). */
104 |
105 | #ifndef READ_ONCE
106 | # define READ_ONCE(X) \
107 | ({ typeof(X) __val = __READ_ONCE(X); \
108 | bpf_barrier(); \
109 | __val; })
110 | #endif
111 |
112 | #ifndef WRITE_ONCE
113 | # define WRITE_ONCE(X, V) \
114 | ({ typeof(X) __val = (V); \
115 | __WRITE_ONCE(X, __val); \
116 | bpf_barrier(); \
117 | __val; })
118 | #endif
119 |
120 | #endif /* __BPF_COMPILER_H_ */
121 |
--------------------------------------------------------------------------------
/bpf/headers/linux/bpf_common.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 | /* Copyright Authors of the Linux kernel */
3 | #ifndef __LINUX_BPF_COMMON_H__
4 | #define __LINUX_BPF_COMMON_H__
5 |
6 | /* Instruction classes */
7 | #define BPF_CLASS(code) ((code) & 0x07)
8 | #define BPF_LD 0x00
9 | #define BPF_LDX 0x01
10 | #define BPF_ST 0x02
11 | #define BPF_STX 0x03
12 | #define BPF_ALU 0x04
13 | #define BPF_JMP 0x05
14 | #define BPF_RET 0x06
15 | #define BPF_MISC 0x07
16 |
17 | /* ld/ldx fields */
18 | #define BPF_SIZE(code) ((code) & 0x18)
19 | #define BPF_W 0x00
20 | #define BPF_H 0x08
21 | #define BPF_B 0x10
22 | #define BPF_MODE(code) ((code) & 0xe0)
23 | #define BPF_IMM 0x00
24 | #define BPF_ABS 0x20
25 | #define BPF_IND 0x40
26 | #define BPF_MEM 0x60
27 | #define BPF_LEN 0x80
28 | #define BPF_MSH 0xa0
29 |
30 | /* alu/jmp fields */
31 | #define BPF_OP(code) ((code) & 0xf0)
32 | #define BPF_ADD 0x00
33 | #define BPF_SUB 0x10
34 | #define BPF_MUL 0x20
35 | #define BPF_DIV 0x30
36 | #define BPF_OR 0x40
37 | #define BPF_AND 0x50
38 | #define BPF_LSH 0x60
39 | #define BPF_RSH 0x70
40 | #define BPF_NEG 0x80
41 | #define BPF_MOD 0x90
42 | #define BPF_XOR 0xa0
43 |
44 | #define BPF_JA 0x00
45 | #define BPF_JEQ 0x10
46 | #define BPF_JGT 0x20
47 | #define BPF_JGE 0x30
48 | #define BPF_JSET 0x40
49 | #define BPF_SRC(code) ((code) & 0x08)
50 | #define BPF_K 0x00
51 | #define BPF_X 0x08
52 |
53 | #ifndef BPF_MAXINSNS
54 | #define BPF_MAXINSNS 4096
55 | #endif
56 |
57 | #endif /* __LINUX_BPF_COMMON_H__ */
58 |
--------------------------------------------------------------------------------
/bpf/headers/linux/if_ether.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2 | /* Copyright Authors of the Linux kernel */
3 | /*
4 | * INET An implementation of the TCP/IP protocol suite for the LINUX
5 | * operating system. INET is implemented using the BSD Socket
6 | * interface as the means of communication with the user level.
7 | *
8 | * Global definitions for the Ethernet IEEE 802.3 interface.
9 | *
10 | * Version: @(#)if_ether.h 1.0.1a 02/08/94
11 | *
12 | * Author: Fred N. van Kempen,
13 | * Donald Becker,
14 | * Alan Cox,
15 | * Steve Whitehouse,
16 | *
17 | * This program is free software; you can redistribute it and/or
18 | * modify it under the terms of the GNU General Public License
19 | * as published by the Free Software Foundation; either version
20 | * 2 of the License, or (at your option) any later version.
21 | */
22 |
23 | #ifndef _LINUX_IF_ETHER_H
24 | #define _LINUX_IF_ETHER_H
25 |
26 | #include
27 |
28 | /*
29 | * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
30 | * and FCS/CRC (frame check sequence).
31 | */
32 |
33 | #define ETH_ALEN 6 /* Octets in one ethernet addr */
34 | /* __ETH_HLEN is out of sync with the kernel's if_ether.h. In Cilium datapath
35 | * we use ETH_HLEN which can be loaded via static data, and for L2-less devs
36 | * it's 0. To avoid replacing every occurrence of ETH_HLEN in the datapath,
37 | * we prefixed the kernel's ETH_HLEN instead.
38 | */
39 | #define __ETH_HLEN 14 /* Total octets in header. */
40 | #define ETH_ZLEN 60 /* Min. octets in frame sans FCS */
41 | #define ETH_DATA_LEN 1500 /* Max. octets in payload */
42 | #define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */
43 | #define ETH_FCS_LEN 4 /* Octets in the FCS */
44 |
45 | /*
46 | * These are the defined Ethernet Protocol ID's.
47 | */
48 |
49 | #define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */
50 | #define ETH_P_PUP 0x0200 /* Xerox PUP packet */
51 | #define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */
52 | #define ETH_P_TSN 0x22F0 /* TSN (IEEE 1722) packet */
53 | #define ETH_P_IP 0x0800 /* Internet Protocol packet */
54 | #define ETH_P_X25 0x0805 /* CCITT X.25 */
55 | #define ETH_P_ARP 0x0806 /* Address Resolution packet */
56 | #define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */
57 | #define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */
58 | #define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */
59 | #define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */
60 | #define ETH_P_DEC 0x6000 /* DEC Assigned proto */
61 | #define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */
62 | #define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */
63 | #define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */
64 | #define ETH_P_LAT 0x6004 /* DEC LAT */
65 | #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */
66 | #define ETH_P_CUST 0x6006 /* DEC Customer use */
67 | #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */
68 | #define ETH_P_TEB 0x6558 /* Trans Ether Bridging */
69 | #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */
70 | #define ETH_P_ATALK 0x809B /* Appletalk DDP */
71 | #define ETH_P_AARP 0x80F3 /* Appletalk AARP */
72 | #define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
73 | #define ETH_P_IPX 0x8137 /* IPX over DIX */
74 | #define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */
75 | #define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */
76 | #define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */
77 | #define ETH_P_WCCP 0x883E /* Web-cache coordination protocol
78 | * defined in draft-wilson-wrec-wccp-v2-00.txt */
79 | #define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */
80 | #define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */
81 | #define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */
82 | #define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */
83 | #define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */
84 | #define ETH_P_LINK_CTL 0x886c /* HPNA, wlan link local tunnel */
85 | #define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport
86 | * over Ethernet
87 | */
88 | #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */
89 | #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */
90 | #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
91 | #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */
92 | #define ETH_P_TIPC 0x88CA /* TIPC */
93 | #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */
94 | #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */
95 | #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */
96 | #define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */
97 | #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */
98 | #define ETH_P_TDLS 0x890D /* TDLS */
99 | #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */
100 | #define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */
101 | #define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */
102 | #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
103 | #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
104 | #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
105 | #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
106 | #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
107 |
108 | #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value
109 | * then the frame is Ethernet II. Else it is 802.3 */
110 |
111 | /*
112 | * Non DIX types. Won't clash for 1500 types.
113 | */
114 |
115 | #define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */
116 | #define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */
117 | #define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */
118 | #define ETH_P_802_2 0x0004 /* 802.2 frames */
119 | #define ETH_P_SNAP 0x0005 /* Internal only */
120 | #define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */
121 | #define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/
122 | #define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */
123 | #define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */
124 | #define ETH_P_CAN 0x000C /* CAN: Controller Area Network */
125 | #define ETH_P_CANFD 0x000D /* CANFD: CAN flexible data rate*/
126 | #define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/
127 | #define ETH_P_TR_802_2 0x0011 /* 802.2 frames */
128 | #define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */
129 | #define ETH_P_CONTROL 0x0016 /* Card specific control frames */
130 | #define ETH_P_IRDA 0x0017 /* Linux-IrDA */
131 | #define ETH_P_ECONET 0x0018 /* Acorn Econet */
132 | #define ETH_P_HDLC 0x0019 /* HDLC frames */
133 | #define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */
134 | #define ETH_P_DSA 0x001B /* Distributed Switch Arch. */
135 | #define ETH_P_TRAILER 0x001C /* Trailer switch tagging */
136 | #define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */
137 | #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */
138 | #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */
139 | #define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */
140 |
141 | /*
142 | * This is an Ethernet frame header.
143 | */
144 |
145 | struct ethhdr {
146 | unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
147 | unsigned char h_source[ETH_ALEN]; /* source ether addr */
148 | __be16 h_proto; /* packet type ID field */
149 | } __attribute__((packed));
150 |
151 |
152 | #endif /* _LINUX_IF_ETHER_H */
153 |
--------------------------------------------------------------------------------
/bpf/headers/linux/in.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2 | /* Copyright Authors of the Linux kernel */
3 | /*
4 | * INET An implementation of the TCP/IP protocol suite for the LINUX
5 | * operating system. INET is implemented using the BSD Socket
6 | * interface as the means of communication with the user level.
7 | *
8 | * Definitions of the Internet Protocol.
9 | *
10 | * Version: @(#)in.h 1.0.1 04/21/93
11 | *
12 | * Authors: Original taken from the GNU Project file.
13 | * Fred N. van Kempen,
14 | *
15 | * This program is free software; you can redistribute it and/or
16 | * modify it under the terms of the GNU General Public License
17 | * as published by the Free Software Foundation; either version
18 | * 2 of the License, or (at your option) any later version.
19 | */
20 | #ifndef _LINUX_IN_H
21 | #define _LINUX_IN_H
22 |
23 | #include
24 |
25 | /* Standard well-defined IP protocols. */
26 | enum {
27 | IPPROTO_IP = 0, /* Dummy protocol for TCP */
28 | #define IPPROTO_IP IPPROTO_IP
29 | IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
30 | #define IPPROTO_ICMP IPPROTO_ICMP
31 | IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
32 | #define IPPROTO_IGMP IPPROTO_IGMP
33 | IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
34 | #define IPPROTO_IPIP IPPROTO_IPIP
35 | IPPROTO_TCP = 6, /* Transmission Control Protocol */
36 | #define IPPROTO_TCP IPPROTO_TCP
37 | IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
38 | #define IPPROTO_EGP IPPROTO_EGP
39 | IPPROTO_PUP = 12, /* PUP protocol */
40 | #define IPPROTO_PUP IPPROTO_PUP
41 | IPPROTO_UDP = 17, /* User Datagram Protocol */
42 | #define IPPROTO_UDP IPPROTO_UDP
43 | IPPROTO_IDP = 22, /* XNS IDP protocol */
44 | #define IPPROTO_IDP IPPROTO_IDP
45 | IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */
46 | #define IPPROTO_TP IPPROTO_TP
47 | IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
48 | #define IPPROTO_DCCP IPPROTO_DCCP
49 | IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
50 | #define IPPROTO_IPV6 IPPROTO_IPV6
51 | IPPROTO_RSVP = 46, /* RSVP Protocol */
52 | #define IPPROTO_RSVP IPPROTO_RSVP
53 | IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
54 | #define IPPROTO_GRE IPPROTO_GRE
55 | IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
56 | #define IPPROTO_ESP IPPROTO_ESP
57 | IPPROTO_AH = 51, /* Authentication Header protocol */
58 | #define IPPROTO_AH IPPROTO_AH
59 | IPPROTO_MTP = 92, /* Multicast Transport Protocol */
60 | #define IPPROTO_MTP IPPROTO_MTP
61 | IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
62 | #define IPPROTO_BEETPH IPPROTO_BEETPH
63 | IPPROTO_ENCAP = 98, /* Encapsulation Header */
64 | #define IPPROTO_ENCAP IPPROTO_ENCAP
65 | IPPROTO_PIM = 103, /* Protocol Independent Multicast */
66 | #define IPPROTO_PIM IPPROTO_PIM
67 | IPPROTO_COMP = 108, /* Compression Header Protocol */
68 | #define IPPROTO_COMP IPPROTO_COMP
69 | IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
70 | #define IPPROTO_SCTP IPPROTO_SCTP
71 | IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
72 | #define IPPROTO_UDPLITE IPPROTO_UDPLITE
73 | IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
74 | #define IPPROTO_MPLS IPPROTO_MPLS
75 | IPPROTO_RAW = 255, /* Raw IP packets */
76 | #define IPPROTO_RAW IPPROTO_RAW
77 | IPPROTO_MAX
78 | };
79 |
80 | /* Internet address. */
81 | struct in_addr {
82 | __be32 s_addr;
83 | };
84 |
85 | #define IP_TOS 1
86 | #define IP_TTL 2
87 | #define IP_HDRINCL 3
88 | #define IP_OPTIONS 4
89 | #define IP_ROUTER_ALERT 5
90 | #define IP_RECVOPTS 6
91 | #define IP_RETOPTS 7
92 | #define IP_PKTINFO 8
93 | #define IP_PKTOPTIONS 9
94 | #define IP_MTU_DISCOVER 10
95 | #define IP_RECVERR 11
96 | #define IP_RECVTTL 12
97 | #define IP_RECVTOS 13
98 | #define IP_MTU 14
99 | #define IP_FREEBIND 15
100 | #define IP_IPSEC_POLICY 16
101 | #define IP_XFRM_POLICY 17
102 | #define IP_PASSSEC 18
103 | #define IP_TRANSPARENT 19
104 |
105 | /* BSD compatibility */
106 | #define IP_RECVRETOPTS IP_RETOPTS
107 |
108 | /* TProxy original addresses */
109 | #define IP_ORIGDSTADDR 20
110 | #define IP_RECVORIGDSTADDR IP_ORIGDSTADDR
111 |
112 | #define IP_MINTTL 21
113 | #define IP_NODEFRAG 22
114 | #define IP_CHECKSUM 23
115 | #define IP_BIND_ADDRESS_NO_PORT 24
116 |
117 | /* IP_MTU_DISCOVER values */
118 | #define IP_PMTUDISC_DONT 0 /* Never send DF frames */
119 | #define IP_PMTUDISC_WANT 1 /* Use per route hints */
120 | #define IP_PMTUDISC_DO 2 /* Always DF */
121 | #define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
122 | /* Always use interface mtu (ignores dst pmtu) but don't set DF flag.
123 | * Also incoming ICMP frag_needed notifications will be ignored on
124 | * this socket to prevent accepting spoofed ones.
125 | */
126 | #define IP_PMTUDISC_INTERFACE 4
127 | /* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
128 | * fragmented if they exeed the interface mtu
129 | */
130 | #define IP_PMTUDISC_OMIT 5
131 |
132 | #define IP_MULTICAST_IF 32
133 | #define IP_MULTICAST_TTL 33
134 | #define IP_MULTICAST_LOOP 34
135 | #define IP_ADD_MEMBERSHIP 35
136 | #define IP_DROP_MEMBERSHIP 36
137 | #define IP_UNBLOCK_SOURCE 37
138 | #define IP_BLOCK_SOURCE 38
139 | #define IP_ADD_SOURCE_MEMBERSHIP 39
140 | #define IP_DROP_SOURCE_MEMBERSHIP 40
141 | #define IP_MSFILTER 41
142 | #define MCAST_JOIN_GROUP 42
143 | #define MCAST_BLOCK_SOURCE 43
144 | #define MCAST_UNBLOCK_SOURCE 44
145 | #define MCAST_LEAVE_GROUP 45
146 | #define MCAST_JOIN_SOURCE_GROUP 46
147 | #define MCAST_LEAVE_SOURCE_GROUP 47
148 | #define MCAST_MSFILTER 48
149 | #define IP_MULTICAST_ALL 49
150 | #define IP_UNICAST_IF 50
151 |
152 | #define MCAST_EXCLUDE 0
153 | #define MCAST_INCLUDE 1
154 |
155 | /* These need to appear somewhere around here */
156 | #define IP_DEFAULT_MULTICAST_TTL 1
157 | #define IP_DEFAULT_MULTICAST_LOOP 1
158 |
159 | /* Request struct for multicast socket ops */
160 |
161 | /*
162 | * Definitions of the bits in an Internet address integer.
163 | * On subnets, host and network parts are found according
164 | * to the subnet mask, not these masks.
165 | */
166 | #define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0)
167 | #define IN_CLASSA_NET 0xff000000
168 | #define IN_CLASSA_NSHIFT 24
169 | #define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET)
170 | #define IN_CLASSA_MAX 128
171 |
172 | #define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000)
173 | #define IN_CLASSB_NET 0xffff0000
174 | #define IN_CLASSB_NSHIFT 16
175 | #define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET)
176 | #define IN_CLASSB_MAX 65536
177 |
178 | #define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000)
179 | #define IN_CLASSC_NET 0xffffff00
180 | #define IN_CLASSC_NSHIFT 8
181 | #define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET)
182 |
183 | #define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000)
184 | #define IN_MULTICAST(a) IN_CLASSD(a)
185 | #define IN_MULTICAST_NET 0xF0000000
186 |
187 | #define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
188 | #define IN_BADCLASS(a) IN_EXPERIMENTAL((a))
189 |
190 | /* Address to accept any incoming messages. */
191 | #define INADDR_ANY ((unsigned long int) 0x00000000)
192 |
193 | /* Address to send to all hosts. */
194 | #define INADDR_BROADCAST ((unsigned long int) 0xffffffff)
195 |
196 | /* Address indicating an error return. */
197 | #define INADDR_NONE ((unsigned long int) 0xffffffff)
198 |
199 | /* Network number for local host loopback. */
200 | #define IN_LOOPBACKNET 127
201 |
202 | /* Address to loopback in software to local host. */
203 | #define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
204 | #define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
205 |
206 | /* Defines for Multicast INADDR */
207 | #define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
208 | #define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
209 | #define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
210 | #define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
211 |
212 | #endif /* _LINUX_IN_H */
213 |
--------------------------------------------------------------------------------
/bpf/headers/linux/in6.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2 | /* Copyright Authors of the Linux kernel */
3 | /*
4 | * Types and definitions for AF_INET6
5 | * Linux INET6 implementation
6 | *
7 | * Authors:
8 | * Pedro Roque
9 | *
10 | * Sources:
11 | * IPv6 Program Interfaces for BSD Systems
12 | *
13 | *
14 | * Advanced Sockets API for IPv6
15 | *
16 | *
17 | * This program is free software; you can redistribute it and/or
18 | * modify it under the terms of the GNU General Public License
19 | * as published by the Free Software Foundation; either version
20 | * 2 of the License, or (at your option) any later version.
21 | */
22 |
23 | #ifndef _LINUX_IN6_H
24 | #define _LINUX_IN6_H
25 |
26 | #include
27 |
28 | /*
29 | * IPv6 address structure
30 | */
31 |
32 | struct in6_addr {
33 | union {
34 | __u8 u6_addr8[16];
35 | __be16 u6_addr16[8];
36 | __be32 u6_addr32[4];
37 | } in6_u;
38 | #define s6_addr in6_u.u6_addr8
39 | #define s6_addr16 in6_u.u6_addr16
40 | #define s6_addr32 in6_u.u6_addr32
41 | };
42 |
43 | /*
44 | * Bitmask constant declarations to help applications select out the
45 | * flow label and priority fields.
46 | *
47 | * Note that this are in host byte order while the flowinfo field of
48 | * sockaddr_in6 is in network byte order.
49 | */
50 |
51 | #define IPV6_FLOWINFO_FLOWLABEL 0x000fffff
52 | #define IPV6_FLOWINFO_PRIORITY 0x0ff00000
53 |
54 | /* These definitions are obsolete */
55 | #define IPV6_PRIORITY_UNCHARACTERIZED 0x0000
56 | #define IPV6_PRIORITY_FILLER 0x0100
57 | #define IPV6_PRIORITY_UNATTENDED 0x0200
58 | #define IPV6_PRIORITY_RESERVED1 0x0300
59 | #define IPV6_PRIORITY_BULK 0x0400
60 | #define IPV6_PRIORITY_RESERVED2 0x0500
61 | #define IPV6_PRIORITY_INTERACTIVE 0x0600
62 | #define IPV6_PRIORITY_CONTROL 0x0700
63 | #define IPV6_PRIORITY_8 0x0800
64 | #define IPV6_PRIORITY_9 0x0900
65 | #define IPV6_PRIORITY_10 0x0a00
66 | #define IPV6_PRIORITY_11 0x0b00
67 | #define IPV6_PRIORITY_12 0x0c00
68 | #define IPV6_PRIORITY_13 0x0d00
69 | #define IPV6_PRIORITY_14 0x0e00
70 | #define IPV6_PRIORITY_15 0x0f00
71 |
72 | /*
73 | * IPV6 extension headers
74 | */
75 | #define IPPROTO_HOPOPTS 0 /* IPv6 hop-by-hop options */
76 | #define IPPROTO_ROUTING 43 /* IPv6 routing header */
77 | #define IPPROTO_FRAGMENT 44 /* IPv6 fragmentation header */
78 | #define IPPROTO_ICMPV6 58 /* ICMPv6 */
79 | #define IPPROTO_NONE 59 /* IPv6 no next header */
80 | #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */
81 | #define IPPROTO_MH 135 /* IPv6 mobility header */
82 |
83 | /*
84 | * IPv6 TLV options.
85 | */
86 | #define IPV6_TLV_PAD1 0
87 | #define IPV6_TLV_PADN 1
88 | #define IPV6_TLV_ROUTERALERT 5
89 | #define IPV6_TLV_JUMBO 194
90 | #define IPV6_TLV_HAO 201 /* home address option */
91 |
92 | /*
93 | * IPV6 socket options
94 | */
95 | #define IPV6_ADDRFORM 1
96 | #define IPV6_2292PKTINFO 2
97 | #define IPV6_2292HOPOPTS 3
98 | #define IPV6_2292DSTOPTS 4
99 | #define IPV6_2292RTHDR 5
100 | #define IPV6_2292PKTOPTIONS 6
101 | #define IPV6_CHECKSUM 7
102 | #define IPV6_2292HOPLIMIT 8
103 | #define IPV6_NEXTHOP 9
104 | #define IPV6_AUTHHDR 10 /* obsolete */
105 | #define IPV6_FLOWINFO 11
106 |
107 | #define IPV6_UNICAST_HOPS 16
108 | #define IPV6_MULTICAST_IF 17
109 | #define IPV6_MULTICAST_HOPS 18
110 | #define IPV6_MULTICAST_LOOP 19
111 | #define IPV6_ADD_MEMBERSHIP 20
112 | #define IPV6_DROP_MEMBERSHIP 21
113 | #define IPV6_ROUTER_ALERT 22
114 | #define IPV6_MTU_DISCOVER 23
115 | #define IPV6_MTU 24
116 | #define IPV6_RECVERR 25
117 | #define IPV6_V6ONLY 26
118 | #define IPV6_JOIN_ANYCAST 27
119 | #define IPV6_LEAVE_ANYCAST 28
120 |
121 | /* IPV6_MTU_DISCOVER values */
122 | #define IPV6_PMTUDISC_DONT 0
123 | #define IPV6_PMTUDISC_WANT 1
124 | #define IPV6_PMTUDISC_DO 2
125 | #define IPV6_PMTUDISC_PROBE 3
126 | /* same as IPV6_PMTUDISC_PROBE, provided for symetry with IPv4
127 | * also see comments on IP_PMTUDISC_INTERFACE
128 | */
129 | #define IPV6_PMTUDISC_INTERFACE 4
130 | /* weaker version of IPV6_PMTUDISC_INTERFACE, which allows packets to
131 | * get fragmented if they exceed the interface mtu
132 | */
133 | #define IPV6_PMTUDISC_OMIT 5
134 |
135 | /* Flowlabel */
136 | #define IPV6_FLOWLABEL_MGR 32
137 | #define IPV6_FLOWINFO_SEND 33
138 |
139 | #define IPV6_IPSEC_POLICY 34
140 | #define IPV6_XFRM_POLICY 35
141 | #define IPV6_HDRINCL 36
142 |
143 | /*
144 | * Multicast Routing:
145 | * see include/uapi/linux/mroute6.h.
146 | *
147 | * MRT6_BASE 200
148 | * ...
149 | * MRT6_MAX
150 | */
151 | #endif /* _LINUX_IN6_H */
152 |
--------------------------------------------------------------------------------
/bpf/headers/linux/ip.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2 | /* Copyright Authors of the Linux kernel */
3 | /*
4 | * INET An implementation of the TCP/IP protocol suite for the LINUX
5 | * operating system. INET is implemented using the BSD Socket
6 | * interface as the means of communication with the user level.
7 | *
8 | * Definitions for the IP protocol.
9 | *
10 | * Version: @(#)ip.h 1.0.2 04/28/93
11 | *
12 | * Authors: Fred N. van Kempen,
13 | *
14 | * This program is free software; you can redistribute it and/or
15 | * modify it under the terms of the GNU General Public License
16 | * as published by the Free Software Foundation; either version
17 | * 2 of the License, or (at your option) any later version.
18 | */
19 | #ifndef _LINUX_IP_H
20 | #define _LINUX_IP_H
21 |
22 | #include
23 | #include
24 |
25 | #define IPTOS_TOS_MASK 0x1E
26 | #define IPTOS_TOS(tos) ((tos)&IPTOS_TOS_MASK)
27 | #define IPTOS_LOWDELAY 0x10
28 | #define IPTOS_THROUGHPUT 0x08
29 | #define IPTOS_RELIABILITY 0x04
30 | #define IPTOS_MINCOST 0x02
31 |
32 | #define IPTOS_PREC_MASK 0xE0
33 | #define IPTOS_PREC(tos) ((tos)&IPTOS_PREC_MASK)
34 | #define IPTOS_PREC_NETCONTROL 0xe0
35 | #define IPTOS_PREC_INTERNETCONTROL 0xc0
36 | #define IPTOS_PREC_CRITIC_ECP 0xa0
37 | #define IPTOS_PREC_FLASHOVERRIDE 0x80
38 | #define IPTOS_PREC_FLASH 0x60
39 | #define IPTOS_PREC_IMMEDIATE 0x40
40 | #define IPTOS_PREC_PRIORITY 0x20
41 | #define IPTOS_PREC_ROUTINE 0x00
42 |
43 |
44 | /* IP options */
45 | #define IPOPT_COPY 0x80
46 | #define IPOPT_CLASS_MASK 0x60
47 | #define IPOPT_NUMBER_MASK 0x1f
48 |
49 | #define IPOPT_COPIED(o) ((o)&IPOPT_COPY)
50 | #define IPOPT_CLASS(o) ((o)&IPOPT_CLASS_MASK)
51 | #define IPOPT_NUMBER(o) ((o)&IPOPT_NUMBER_MASK)
52 |
53 | #define IPOPT_CONTROL 0x00
54 | #define IPOPT_RESERVED1 0x20
55 | #define IPOPT_MEASUREMENT 0x40
56 | #define IPOPT_RESERVED2 0x60
57 |
58 | #define IPOPT_END (0 |IPOPT_CONTROL)
59 | #define IPOPT_NOOP (1 |IPOPT_CONTROL)
60 | #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY)
61 | #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY)
62 | #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT)
63 | #define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY)
64 | #define IPOPT_RR (7 |IPOPT_CONTROL)
65 | #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY)
66 | #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY)
67 | #define IPOPT_RA (20|IPOPT_CONTROL|IPOPT_COPY)
68 |
69 | #define IPVERSION 4
70 | #define MAXTTL 255
71 | #define IPDEFTTL 64
72 |
73 | #define IPOPT_OPTVAL 0
74 | #define IPOPT_OLEN 1
75 | #define IPOPT_OFFSET 2
76 | #define IPOPT_MINOFF 4
77 | #define MAX_IPOPTLEN 40
78 | #define IPOPT_NOP IPOPT_NOOP
79 | #define IPOPT_EOL IPOPT_END
80 | #define IPOPT_TS IPOPT_TIMESTAMP
81 |
82 | #define IPOPT_TS_TSONLY 0 /* timestamps only */
83 | #define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
84 | #define IPOPT_TS_PRESPEC 3 /* specified modules only */
85 |
86 | #define IPV4_BEET_PHMAXLEN 8
87 |
88 | struct iphdr {
89 | #if defined(__LITTLE_ENDIAN_BITFIELD)
90 | __u8 ihl:4,
91 | version:4;
92 | #elif defined (__BIG_ENDIAN_BITFIELD)
93 | __u8 version:4,
94 | ihl:4;
95 | #else
96 | #error "Please fix "
97 | #endif
98 | __u8 tos;
99 | __be16 tot_len;
100 | __be16 id;
101 | __be16 frag_off;
102 | __u8 ttl;
103 | __u8 protocol;
104 | __sum16 check;
105 | __be32 saddr;
106 | __be32 daddr;
107 | /*The options start here. */
108 | };
109 |
110 |
111 | struct ip_auth_hdr {
112 | __u8 nexthdr;
113 | __u8 hdrlen; /* This one is measured in 32 bit units! */
114 | __be16 reserved;
115 | __be32 spi;
116 | __be32 seq_no; /* Sequence number */
117 | __u8 auth_data[0]; /* Variable len but >=4. Mind the 64 bit alignment! */
118 | };
119 |
120 | struct ip_esp_hdr {
121 | __be32 spi;
122 | __be32 seq_no; /* Sequence number */
123 | __u8 enc_data[0]; /* Variable len but >=8. Mind the 64 bit alignment! */
124 | };
125 |
126 | struct ip_comp_hdr {
127 | __u8 nexthdr;
128 | __u8 flags;
129 | __be16 cpi;
130 | };
131 |
132 | struct ip_beet_phdr {
133 | __u8 nexthdr;
134 | __u8 hdrlen;
135 | __u8 padlen;
136 | __u8 reserved;
137 | };
138 |
139 | #endif /* _LINUX_IP_H */
140 |
--------------------------------------------------------------------------------
/bpf/headers/linux/ipv6.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 | /* Copyright Authors of the Linux kernel */
3 | #ifndef _IPV6_H
4 | #define _IPV6_H
5 |
6 | #include
7 | #include
8 |
9 | /* The latest drafts declared increase in minimal mtu up to 1280. */
10 |
11 | #define IPV6_MIN_MTU 1280
12 |
13 | /*
14 | * Advanced API
15 | * source interface/address selection, source routing, etc...
16 | * *under construction*
17 | */
18 |
19 | #if __UAPI_DEF_IN6_PKTINFO
20 | struct in6_pktinfo {
21 | struct in6_addr ipi6_addr;
22 | int ipi6_ifindex;
23 | };
24 | #endif
25 |
26 | #if __UAPI_DEF_IP6_MTUINFO
27 | struct ip6_mtuinfo {
28 | struct sockaddr_in6 ip6m_addr;
29 | __u32 ip6m_mtu;
30 | };
31 | #endif
32 |
33 | struct in6_ifreq {
34 | struct in6_addr ifr6_addr;
35 | __u32 ifr6_prefixlen;
36 | int ifr6_ifindex;
37 | };
38 |
39 | #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */
40 | #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */
41 | #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */
42 | #define IPV6_SRCRT_TYPE_3 3 /* RPL Segment Routing with IPv6 */
43 | #define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */
44 |
45 | /*
46 | * routing header
47 | */
48 | struct ipv6_rt_hdr {
49 | __u8 nexthdr;
50 | __u8 hdrlen;
51 | __u8 type;
52 | __u8 segments_left;
53 |
54 | /*
55 | * type specific data
56 | * variable length field
57 | */
58 | };
59 |
60 |
61 | struct ipv6_opt_hdr {
62 | __u8 nexthdr;
63 | __u8 hdrlen;
64 | /*
65 | * TLV encoded option data follows.
66 | */
67 | } __attribute__((packed)); /* required for some archs */
68 |
69 | #define ipv6_destopt_hdr ipv6_opt_hdr
70 | #define ipv6_hopopt_hdr ipv6_opt_hdr
71 |
72 | /* Router Alert option values (RFC2711) */
73 | #define IPV6_OPT_ROUTERALERT_MLD 0x0000 /* MLD(RFC2710) */
74 |
75 | /*
76 | * routing header type 0 (used in cmsghdr struct)
77 | */
78 |
79 | struct rt0_hdr {
80 | struct ipv6_rt_hdr rt_hdr;
81 | __u32 reserved;
82 | struct in6_addr addr[0];
83 |
84 | #define rt0_type rt_hdr.type
85 | };
86 |
87 | /*
88 | * routing header type 2
89 | */
90 |
91 | struct rt2_hdr {
92 | struct ipv6_rt_hdr rt_hdr;
93 | __u32 reserved;
94 | struct in6_addr addr;
95 |
96 | #define rt2_type rt_hdr.type
97 | };
98 |
99 | /*
100 | * home address option in destination options header
101 | */
102 |
103 | struct ipv6_destopt_hao {
104 | __u8 type;
105 | __u8 length;
106 | struct in6_addr addr;
107 | } __attribute__((packed));
108 |
109 | /*
110 | * IPv6 fixed header
111 | *
112 | * BEWARE, it is incorrect. The first 4 bits of flow_lbl
113 | * are glued to priority now, forming "class".
114 | */
115 |
116 | struct ipv6hdr {
117 | #if defined(__LITTLE_ENDIAN_BITFIELD)
118 | __u8 priority:4,
119 | version:4;
120 | #elif defined(__BIG_ENDIAN_BITFIELD)
121 | __u8 version:4,
122 | priority:4;
123 | #else
124 | #error "Please fix "
125 | #endif
126 | __u8 flow_lbl[3];
127 |
128 | __be16 payload_len;
129 | __u8 nexthdr;
130 | __u8 hop_limit;
131 |
132 | struct in6_addr saddr;
133 | struct in6_addr daddr;
134 | };
135 |
136 |
137 | /* index values for the variables in ipv6_devconf */
138 | enum {
139 | DEVCONF_FORWARDING = 0,
140 | DEVCONF_HOPLIMIT,
141 | DEVCONF_MTU6,
142 | DEVCONF_ACCEPT_RA,
143 | DEVCONF_ACCEPT_REDIRECTS,
144 | DEVCONF_AUTOCONF,
145 | DEVCONF_DAD_TRANSMITS,
146 | DEVCONF_RTR_SOLICITS,
147 | DEVCONF_RTR_SOLICIT_INTERVAL,
148 | DEVCONF_RTR_SOLICIT_DELAY,
149 | DEVCONF_USE_TEMPADDR,
150 | DEVCONF_TEMP_VALID_LFT,
151 | DEVCONF_TEMP_PREFERED_LFT,
152 | DEVCONF_REGEN_MAX_RETRY,
153 | DEVCONF_MAX_DESYNC_FACTOR,
154 | DEVCONF_MAX_ADDRESSES,
155 | DEVCONF_FORCE_MLD_VERSION,
156 | DEVCONF_ACCEPT_RA_DEFRTR,
157 | DEVCONF_ACCEPT_RA_PINFO,
158 | DEVCONF_ACCEPT_RA_RTR_PREF,
159 | DEVCONF_RTR_PROBE_INTERVAL,
160 | DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN,
161 | DEVCONF_PROXY_NDP,
162 | DEVCONF_OPTIMISTIC_DAD,
163 | DEVCONF_ACCEPT_SOURCE_ROUTE,
164 | DEVCONF_MC_FORWARDING,
165 | DEVCONF_DISABLE_IPV6,
166 | DEVCONF_ACCEPT_DAD,
167 | DEVCONF_FORCE_TLLAO,
168 | DEVCONF_NDISC_NOTIFY,
169 | DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL,
170 | DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
171 | DEVCONF_SUPPRESS_FRAG_NDISC,
172 | DEVCONF_ACCEPT_RA_FROM_LOCAL,
173 | DEVCONF_USE_OPTIMISTIC,
174 | DEVCONF_ACCEPT_RA_MTU,
175 | DEVCONF_STABLE_SECRET,
176 | DEVCONF_USE_OIF_ADDRS_ONLY,
177 | DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT,
178 | DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
179 | DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
180 | DEVCONF_DROP_UNSOLICITED_NA,
181 | DEVCONF_KEEP_ADDR_ON_DOWN,
182 | DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
183 | DEVCONF_SEG6_ENABLED,
184 | DEVCONF_SEG6_REQUIRE_HMAC,
185 | DEVCONF_ENHANCED_DAD,
186 | DEVCONF_ADDR_GEN_MODE,
187 | DEVCONF_DISABLE_POLICY,
188 | DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN,
189 | DEVCONF_NDISC_TCLASS,
190 | DEVCONF_RPL_SEG_ENABLED,
191 | DEVCONF_RA_DEFRTR_METRIC,
192 | DEVCONF_IOAM6_ENABLED,
193 | DEVCONF_IOAM6_ID,
194 | DEVCONF_IOAM6_ID_WIDE,
195 | DEVCONF_NDISC_EVICT_NOCARRIER,
196 | DEVCONF_ACCEPT_UNTRACKED_NA,
197 | DEVCONF_MAX
198 | };
199 |
200 |
201 | #endif /* _IPV6_H */
202 |
--------------------------------------------------------------------------------
/bpf/headers/linux/types.h:
--------------------------------------------------------------------------------
1 | #ifndef _LINUX_TYPES_H
2 | #define _LINUX_TYPES_H
3 |
4 | #include "types_mapper.h"
5 |
6 | #endif /* _LINUX_TYPES_H */
7 |
--------------------------------------------------------------------------------
/bpf/headers/linux/types_mapper.h:
--------------------------------------------------------------------------------
1 | /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
2 | /* Copyright Authors of Cilium */
3 |
4 | #ifndef __BPF_TYPES_MAPPER__
5 | #define __BPF_TYPES_MAPPER__
6 |
7 | typedef __signed__ char __s8;
8 | typedef unsigned char __u8;
9 |
10 | typedef __signed__ short __s16;
11 | typedef unsigned short __u16;
12 |
13 | typedef __signed__ int __s32;
14 | typedef unsigned int __u32;
15 |
16 | typedef __signed__ long long __s64;
17 | typedef unsigned long long __u64;
18 |
19 | typedef __u16 __le16;
20 | typedef __u16 __be16;
21 |
22 | typedef __u32 __le32;
23 | typedef __u32 __be32;
24 |
25 | typedef __u64 __le64;
26 | typedef __u64 __be64;
27 |
28 | typedef __u16 __sum16;
29 | typedef __u32 __wsum;
30 |
31 | typedef __u64 __aligned_u64;
32 |
33 | typedef __u64 __net_cookie;
34 | typedef __u64 __sock_cookie;
35 |
36 | #endif /* __BPF_TYPES_MAPPER__ */
37 |
--------------------------------------------------------------------------------
/bpf/headers/update.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | LIBBPF_VERSION=0.6.1
4 | CILIUM_VERSION=1.13.1
5 |
6 | # The headers we want
7 | LIBBPF_HEADERS=(
8 | libbpf-"$LIBBPF_VERSION"/LICENSE.BSD-2-Clause
9 | libbpf-"$LIBBPF_VERSION"/src/bpf_endian.h
10 | libbpf-"$LIBBPF_VERSION"/src/bpf_helper_defs.h
11 | libbpf-"$LIBBPF_VERSION"/src/bpf_helpers.h
12 | libbpf-"$LIBBPF_VERSION"/src/bpf_tracing.h
13 | )
14 |
15 | LINUX_HEADERS=(
16 | cilium-"$CILIUM_VERSION"/bpf/include/linux/in.h
17 | cilium-"$CILIUM_VERSION"/bpf/include/linux/in6.h
18 | cilium-"$CILIUM_VERSION"/bpf/include/linux/ip.h
19 | cilium-"$CILIUM_VERSION"/bpf/include/linux/ipv6.h
20 | cilium-"$CILIUM_VERSION"/bpf/include/linux/if_ether.h
21 | cilium-"$CILIUM_VERSION"/bpf/include/linux/bpf.h
22 | cilium-"$CILIUM_VERSION"/bpf/include/linux/bpf_common.h
23 | cilium-"$CILIUM_VERSION"/bpf/include/bpf/types_mapper.h
24 | )
25 |
26 | TMP_DIR=$(mktemp -d)
27 |
28 | PROJECT_HEADERS_DIR=$(dirname ${BASH_SOURCE[0]})
29 | LIBBPF_TAR=libbpf-v${LIBBPF_VERSION}.tar.gz
30 | CILIUM_TAR=cilium-v${CILIUM_VERSION}.tar.gz
31 |
32 | curl -sL "https://github.com/libbpf/libbpf/archive/refs/tags/v${LIBBPF_VERSION}.tar.gz" -o "${TMP_DIR}/${LIBBPF_TAR}"
33 | tar -xvf "${TMP_DIR}/${LIBBPF_TAR}" -C "${TMP_DIR}" 2> /dev/null
34 |
35 | for file in "${LIBBPF_HEADERS[@]}"; do
36 | cp "${TMP_DIR}/$file" "$PROJECT_HEADERS_DIR/"
37 | done;
38 |
39 | curl -sL "https://github.com/cilium/cilium/archive/refs/tags/v${CILIUM_VERSION}.tar.gz" -o "${TMP_DIR}/${CILIUM_TAR}"
40 | tar -xvf "${TMP_DIR}/${CILIUM_TAR}" -C "${TMP_DIR}" 2> /dev/null
41 |
42 | for file in "${LINUX_HEADERS[@]}"; do
43 | cp "${TMP_DIR}/$file" "$PROJECT_HEADERS_DIR/linux/"
44 | done;
45 |
46 | rm -rf "$TMP_DIR"
47 |
--------------------------------------------------------------------------------
/bpf/monitor.bt:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bpftrace
2 |
3 | #include
4 | #include
5 |
6 | BEGIN
7 | {
8 | @start = nsecs;
9 | @start_monitor = nsecs;
10 | @l0_rate = (uint64)0;
11 | @l1_rate = (uint64)0;
12 | @l0_bytes = (uint64)0;
13 | @l1_bytes = (uint64)0;
14 | @l2_bytes = (uint64)0;
15 | @avg_cnt = (uint64)0;
16 |
17 | @sample_interval = (uint64)100;
18 | @monitor_interval = (uint64)1000;
19 | }
20 |
21 | //tracepoint:net:net_dev_start_xmit
22 | kprobe:start_xmit
23 | {
24 | //$skb = ((struct sk_buff *)args->skbaddr);
25 | $skb = ((struct sk_buff *)arg0);
26 |
27 | $interval = (nsecs - @start)/1000000;
28 |
29 | if ($interval >= @sample_interval) {
30 | @l0_rate = @l0_bytes*(1000/$interval);
31 | @l1_rate = @l1_bytes*(1000/$interval);
32 | @l2_rate = @l2_bytes*(1000/$interval);
33 |
34 |
35 | @avg_l0 = @avg_l0 + @l0_rate;
36 | @avg_l1 = @avg_l1 + @l1_rate;
37 | @avg_l2 = @avg_l2 + @l2_rate;
38 |
39 | @avg_cnt = @avg_cnt + 1;
40 |
41 | /* reinit counters */
42 | @l0_bytes = 0;
43 | @l1_bytes = 0;
44 | @l2_bytes = 0;
45 | @start = nsecs;
46 | }
47 |
48 | $interval = (nsecs - @start_monitor)/1000000;
49 |
50 | if ($interval >= @monitor_interval) {
51 |
52 | printf("l0/l1/l2{%d}(ms): %u/%u/%u(MBytes)\n",
53 | $interval,
54 | @avg_l0/@avg_cnt/1000/1000,
55 | @avg_l1/@avg_cnt/1000/1000,
56 | @avg_l2/@avg_cnt/1000/1000);
57 | @avg_l0 = 0;
58 | @avg_l1 = 0;
59 | @avg_l2 = 0;
60 | @start_monitor = nsecs;
61 | @avg_cnt = 0;
62 | }
63 |
64 | if ($skb->priority ==0) {
65 | @l0_bytes += $skb->len;
66 | }
67 | if ($skb->priority ==1) {
68 | @l1_bytes += $skb->len;
69 | }
70 | if ($skb->priority ==2) {
71 | @l2_bytes += $skb->len;
72 | }
73 |
74 | }
75 |
76 | END
77 | {
78 | clear(@start);
79 | clear(@start_monitor);
80 | clear(@sample_interval);
81 | clear(@monitor_interval);
82 | clear(@l0_bytes);
83 | clear(@l1_bytes);
84 | clear(@l2_bytes);
85 | clear(@avg_l0);
86 | clear(@avg_l1);
87 | clear(@l0_rate);
88 | clear(@l1_rate);
89 | clear(@avg_cnt);
90 | }
--------------------------------------------------------------------------------
/bpf/qos_tc.h:
--------------------------------------------------------------------------------
1 | #include "common.h"
2 | #include
3 | #include
4 |
5 | #ifndef __RATE_LIMIT_TC__
6 | #define __RATE_LIMIT_TC__
7 |
8 | #undef NSEC_PER_SEC
9 | #undef NSEC_PER_MSEC
10 |
11 | #define NSEC_PER_SEC (1000 * 1000 * 1000ULL)
12 | #define NSEC_PER_MSEC (1000 * 1000ULL)
13 |
14 | #define T_HORIZON_DROP (2000 * 1000 * 1000ULL)
15 |
16 | #define MEGABYTE (1000 * 1000ULL)
17 |
18 | #define MAX_PROG 30
19 |
20 | #define PRIO_ONLINE 0
21 | #define PRIO_OFFLINE_L1 1
22 | #define PRIO_OFFLINE_L2 2
23 |
24 | #define INGRESS_TRAFFIC 0
25 | #define EGRESS_TRAFFIC 1
26 |
27 | #define PROG_TC_CGROUP 0
28 | #define PROG_TC_GLOBAL 1
29 |
30 | #define DEFAULT_TC_ACT TC_ACT_PIPE
31 |
32 | struct rate_info {
33 | __u64 bps;
34 | __u64 t_last;
35 | __u64 slot3;
36 | };
37 |
38 | struct global_rate_cfg {
39 | __u64 interval; // the interval to adjust rate
40 | __u64 hw_min_bps;
41 | __u64 hw_max_bps;
42 |
43 | __u64 l0_min_bps;
44 | __u64 l0_max_bps;
45 |
46 | __u64 l1_min_bps;
47 | __u64 l1_max_bps;
48 | __u64 l2_min_bps;
49 | __u64 l2_max_bps;
50 | };
51 |
52 | struct global_rate_info {
53 | __u64 t_last;
54 |
55 | __u64 t_l0_last;
56 | __u64 l0_bps;
57 | __u64 l0_slot;
58 |
59 | __u64 t_l1_last;
60 | __u64 l1_bps;
61 | __u64 l1_slot;
62 |
63 | __u64 t_l2_last;
64 | __u64 l2_bps;
65 | __u64 l2_slot;
66 | };
67 |
68 | struct ip_addr {
69 | __u32 d1;
70 | __u32 d2;
71 | __u32 d3;
72 | __u32 d4;
73 | };
74 |
75 | struct cgroup_info {
76 | __u32 class_id; // cgroup classid
77 | __u32 pad1;
78 | __u64 inode; // cgroup inode id
79 | };
80 |
81 | struct cgroup_rate_id {
82 | __u64 inode;
83 | __u32 direction;
84 | __u32 pad;
85 | };
86 |
87 | struct net_stat {
88 | __u64 index;
89 | __u64 ts;
90 | __u64 val;
91 | };
92 |
93 | /* Global map to jump into terway qos program */
94 | struct {
95 | __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
96 | __uint(max_entries, MAX_PROG);
97 | __uint(key_size, sizeof(__u32));
98 | __uint(value_size, sizeof(__u32));
99 | __uint(pinning, LIBBPF_PIN_BY_NAME);
100 | } qos_prog_map SEC(".maps");
101 |
102 | /* per pod rate limit begin */
103 |
104 | /* Global map for pod config, index by pod ip */
105 | struct {
106 | __uint(type, BPF_MAP_TYPE_HASH);
107 | __uint(key_size, sizeof(struct ip_addr));
108 | __uint(value_size, sizeof(struct cgroup_info));
109 | __uint(max_entries, 65535);
110 | __uint(pinning, LIBBPF_PIN_BY_NAME);
111 | } pod_map SEC(".maps");
112 |
113 | struct {
114 | __uint(type, BPF_MAP_TYPE_HASH);
115 | __uint(key_size, sizeof(struct cgroup_rate_id));
116 | __uint(value_size, sizeof(struct rate_info));
117 | __uint(max_entries, 65535);
118 | __uint(pinning, LIBBPF_PIN_BY_NAME);
119 | } cgroup_rate_map SEC(".maps");
120 | /* per pod rate limit end */
121 |
122 | /* global rate limit begin */
123 | struct {
124 | __uint(type, BPF_MAP_TYPE_ARRAY);
125 | __uint(key_size, sizeof(__u32));
126 | __uint(value_size, sizeof(struct global_rate_cfg));
127 | __uint(max_entries, 2);
128 | __uint(pinning, LIBBPF_PIN_BY_NAME);
129 | } terway_global_cfg SEC(".maps");
130 |
131 | struct {
132 | __uint(type, BPF_MAP_TYPE_ARRAY);
133 | __uint(key_size, sizeof(__u32));
134 | __uint(value_size, sizeof(struct global_rate_info));
135 | __uint(max_entries, 2);
136 | __uint(pinning, LIBBPF_PIN_BY_NAME);
137 | } global_rate_map SEC(".maps");
138 | /* global rate limit end*/
139 |
140 | struct {
141 | __uint(type, BPF_MAP_TYPE_ARRAY);
142 | __uint(key_size, sizeof(__u32));
143 | __uint(value_size, sizeof(struct net_stat));
144 | __uint(max_entries, 20);
145 | __uint(pinning, LIBBPF_PIN_BY_NAME);
146 | } terway_net_stat SEC(".maps");
147 |
148 | #endif /* __RATE_LIMIT_TC__ */
--------------------------------------------------------------------------------
/charts/terway-qos/.helmignore:
--------------------------------------------------------------------------------
1 | # Patterns to ignore when building packages.
2 | # This supports shell glob matching, relative path matching, and
3 | # negation (prefixed with !). Only one pattern per line.
4 | .DS_Store
5 | # Common VCS dirs
6 | .git/
7 | .gitignore
8 | .bzr/
9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 |
--------------------------------------------------------------------------------
/charts/terway-qos/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: terway-qos
3 | description: Network QoS
4 |
5 | type: application
6 | version: 0.3.2
7 | appVersion: "0.3.2"
8 |
--------------------------------------------------------------------------------
/charts/terway-qos/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Expand the name of the chart.
3 | */}}
4 | {{- define "terway-qos.name" -}}
5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6 | {{- end }}
7 |
8 | {{/*
9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "terway-qos.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 |
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "terway-qos.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 |
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "terway-qos.labels" -}}
37 | helm.sh/chart: {{ include "terway-qos.chart" . }}
38 | {{ include "terway-qos.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 |
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "terway-qos.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "terway-qos.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 |
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "terway-qos.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "terway-qos.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 |
--------------------------------------------------------------------------------
/charts/terway-qos/templates/clusterrole.yaml:
--------------------------------------------------------------------------------
1 | kind: ClusterRole
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | metadata:
4 | name: terway-qos
5 | labels:
6 | {{- include "terway-qos.labels" . | nindent 4 }}
7 | rules:
8 | - apiGroups:
9 | - ""
10 | resources:
11 | - events
12 | verbs:
13 | - create
14 | - update
15 | - patch
16 | - apiGroups:
17 | - ""
18 | resources:
19 | - pods
20 | - pods/status
21 | verbs:
22 | - get
23 | - watch
24 | - list
25 |
--------------------------------------------------------------------------------
/charts/terway-qos/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: rbac.authorization.k8s.io/v1
2 | kind: ClusterRoleBinding
3 | metadata:
4 | name: terway-qos
5 | labels:
6 | {{- include "terway-qos.labels" . | nindent 4 }}
7 | roleRef:
8 | apiGroup: rbac.authorization.k8s.io
9 | kind: ClusterRole
10 | name: terway-qos
11 | subjects:
12 | - kind: ServiceAccount
13 | name: terway-qos
14 | namespace: {{ .Release.Namespace }}
--------------------------------------------------------------------------------
/charts/terway-qos/templates/configmap.yaml:
--------------------------------------------------------------------------------
1 | kind: ConfigMap
2 | apiVersion: v1
3 | metadata:
4 | name: terway-qos
5 | data:
6 | global_bps_config: |
7 | hw_tx_bps_max 900000000
8 | hw_rx_bps_max 900000000
9 | offline_l1_tx_bps_min 100000000
10 | offline_l1_tx_bps_max 200000000
11 | offline_l2_tx_bps_min 100000000
12 | offline_l2_tx_bps_max 300000000
13 | offline_l1_rx_bps_min 100000000
14 | offline_l1_rx_bps_max 200000000
15 | offline_l2_rx_bps_min 100000000
16 | offline_l2_rx_bps_max 300000000
--------------------------------------------------------------------------------
/charts/terway-qos/templates/daemonset.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: terway-qos
5 | labels:
6 | {{- include "terway-qos.labels" . | nindent 4 }}
7 | spec:
8 | selector:
9 | matchLabels:
10 | {{- include "terway-qos.selectorLabels" . | nindent 6 }}
11 | template:
12 | metadata:
13 | {{- with .Values.podAnnotations }}
14 | annotations:
15 | {{- toYaml . | nindent 8 }}
16 | {{- end }}
17 | labels:
18 | {{- include "terway-qos.selectorLabels" . | nindent 8 }}
19 | spec:
20 | {{- with .Values.imagePullSecrets }}
21 | imagePullSecrets:
22 | {{- toYaml . | nindent 8 }}
23 | {{- end }}
24 | serviceAccountName: terway-qos
25 | securityContext:
26 | {{- toYaml .Values.podSecurityContext | nindent 8 }}
27 | hostNetwork: true
28 | hostPID: true
29 | initContainers:
30 | - name: init
31 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
32 | imagePullPolicy: {{ .Values.image.pullPolicy }}
33 | securityContext:
34 | privileged: true
35 | command:
36 | - '/bin/init.sh'
37 | containers:
38 | - name: {{ .Chart.Name }}
39 | securityContext:
40 | privileged: true
41 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
42 | imagePullPolicy: {{ .Values.image.pullPolicy }}
43 | command:
44 | - 'qos'
45 | - 'd'
46 | {{- if .Values.qos.enableIngress }}
47 | - --enable-ingress
48 | {{- end }}
49 | {{- if .Values.qos.enableEgress }}
50 | - --enable-egress
51 | {{- end }}
52 | {{- if .Values.qos.enableCODR }}
53 | - --enable-bpf-core
54 | {{- end }}
55 | volumeMounts:
56 | - mountPath: /sys/fs/bpf
57 | name: bpffs
58 | - mountPath: /sys/fs/cgroup
59 | name: cgroupfs
60 | - mountPath: /var/lib/terway/qos
61 | name: config
62 | readOnly: true
63 | resources:
64 | {{- toYaml .Values.resources | nindent 12 }}
65 | env:
66 | - name: K8S_NODE_NAME
67 | valueFrom:
68 | fieldRef:
69 | apiVersion: v1
70 | fieldPath: spec.nodeName
71 |
72 | {{- with .Values.affinity }}
73 | affinity:
74 | {{- toYaml . | nindent 8 }}
75 | {{- end }}
76 | tolerations:
77 | - operator: "Exists"
78 | volumes:
79 | {{- if eq .Values.qos.qosConfigSource "k8s" }}
80 | - name: config
81 | configMap:
82 | name: terway-qos
83 | items:
84 | - key: global_bps_config
85 | path: global_bps_config
86 | {{- end }}
87 | - name: cgroupfs
88 | hostPath:
89 | path: /sys/fs/cgroup
90 | type: "Directory"
91 | - name: bpffs
92 | hostPath:
93 | path: /sys/fs/bpf
94 | type: "Directory"
95 | {{- if eq .Values.qos.qosConfigSource "file" }}
96 | - name: config
97 | hostPath:
98 | path: /var/lib/terway/qos
99 | type: "DirectoryOrCreate"
100 | {{- end }}
--------------------------------------------------------------------------------
/charts/terway-qos/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: terway-qos
5 | labels:
6 | {{- include "terway-qos.labels" . | nindent 4 }}
7 | {{- with .Values.serviceAccount.annotations }}
8 | annotations:
9 | {{- toYaml . | nindent 4 }}
10 | {{- end }}
11 |
12 |
--------------------------------------------------------------------------------
/charts/terway-qos/values.yaml:
--------------------------------------------------------------------------------
1 | image:
2 | repository: registry.cn-hangzhou.aliyuncs.com/acs/terway-qos
3 | pullPolicy: Always
4 | tag: "v0.3.2"
5 |
6 | imagePullSecrets: []
7 | nameOverride: ""
8 | fullnameOverride: ""
9 |
10 | serviceAccount:
11 | # Specifies whether a service account should be created
12 | create: true
13 | # Annotations to add to the service account
14 | annotations: {}
15 | # The name of the service account to use.
16 | # If not set and create is true, a name is generated using the fullname template
17 | name: ""
18 |
19 | podAnnotations: {}
20 |
21 | resources:
22 | limits:
23 | cpu: 100m
24 | memory: 128Mi
25 | requests:
26 | cpu: 100m
27 | memory: 128Mi
28 |
29 | affinity: {}
30 |
31 | qos:
32 | qosConfigSource: k8s
33 | enableIngress: true
34 | enableEgress: true
35 | enableCODR: false
36 |
37 |
--------------------------------------------------------------------------------
/cmd/bpf_bandwidth.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "github.com/spf13/cobra"
20 | )
21 |
22 | var bpfBandwidthCmd = &cobra.Command{
23 | Use: "bandwidth",
24 | Aliases: []string{"bd"},
25 | }
26 |
27 | func init() {
28 | rootCmd.AddCommand(bpfBandwidthCmd)
29 | }
30 |
--------------------------------------------------------------------------------
/cmd/bpf_bandwidth_list.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "fmt"
20 | "os"
21 |
22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
23 |
24 | "github.com/pterm/pterm"
25 | "github.com/spf13/cobra"
26 | )
27 |
28 | var bpfBandwidthListCmd = &cobra.Command{
29 | Use: "list",
30 | Run: func(cmd *cobra.Command, args []string) {
31 | writer, err := bpf.NewMap()
32 | if err != nil {
33 | fmt.Fprintf(os.Stderr, "error init bpf map %v", err)
34 | os.Exit(1)
35 | }
36 | defer writer.Close()
37 | ing, eg, err := writer.GetGlobalConfig()
38 | if err != nil {
39 | fmt.Fprintf(os.Stderr, "error get global config %v", err)
40 | os.Exit(1)
41 | }
42 | err = pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
43 | {"config", "l0", "l1", "l2"},
44 | {"rx-max", fmt.Sprintf("%d", ing.HwGuaranteed), fmt.Sprintf("%d", ing.L1MaxBps), fmt.Sprintf("%d", ing.L2MaxBps)},
45 | {"rx-min", fmt.Sprintf("%d", ing.L0MinBps), fmt.Sprintf("%d", ing.L1MinBps), fmt.Sprintf("%d", ing.L2MinBps)},
46 | {"tx-max", fmt.Sprintf("%d", eg.HwGuaranteed), fmt.Sprintf("%d", eg.L1MaxBps), fmt.Sprintf("%d", eg.L2MaxBps)},
47 | {"tx-min", fmt.Sprintf("%d", eg.L0MinBps), fmt.Sprintf("%d", eg.L1MinBps), fmt.Sprintf("%d", eg.L2MinBps)},
48 | }).Render()
49 | if err != nil {
50 | fmt.Fprintf(os.Stderr, "error get global config %v", err)
51 | os.Exit(1)
52 | }
53 |
54 | ingRate, egressRate := writer.GetGlobalRateLimit()
55 | _ = pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
56 | {"limit", "L0", "L1", "L2"},
57 | {"tx-max", fmt.Sprintf("%d", egressRate.L0Bps), fmt.Sprintf("%d", egressRate.L1Bps), fmt.Sprintf("%d", egressRate.L2Bps)},
58 | {"t_last", fmt.Sprintf("%d", egressRate.L0LastTimestamp), fmt.Sprintf("%d", egressRate.L1LastTimestamp), fmt.Sprintf("%d", egressRate.L2LastTimestamp)},
59 | {"slot", fmt.Sprintf("%d", egressRate.L0Slot), fmt.Sprintf("%d", egressRate.L1Slot), fmt.Sprintf("%d", egressRate.L2Slot)},
60 | }).Render()
61 |
62 | _ = pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
63 | {"limit", "L0", "L1", "L2"},
64 | {"rx-max", fmt.Sprintf("%d", ingRate.L0Bps), fmt.Sprintf("%d", ingRate.L1Bps), fmt.Sprintf("%d", ingRate.L2Bps)},
65 | {"t_last", fmt.Sprintf("%d", ingRate.L0LastTimestamp), fmt.Sprintf("%d", ingRate.L1LastTimestamp), fmt.Sprintf("%d", ingRate.L2LastTimestamp)},
66 | {"slot", fmt.Sprintf("%d", ingRate.L0Slot), fmt.Sprintf("%d", ingRate.L1Slot), fmt.Sprintf("%d", ingRate.L2Slot)},
67 | }).Render()
68 |
69 | data := [][]string{
70 | {"stat", "index", "ts", "val"},
71 | }
72 | for _, v := range writer.GetNetStat() {
73 | data = append(data, []string{"", fmt.Sprintf("%d", v.Index), fmt.Sprintf("%d", v.TS), fmt.Sprintf("%d", v.Val)})
74 | }
75 | _ = pterm.DefaultTable.WithHasHeader().WithData(data).Render()
76 |
77 | },
78 | }
79 |
80 | func init() {
81 | bpfBandwidthCmd.AddCommand(bpfBandwidthListCmd)
82 | }
83 |
--------------------------------------------------------------------------------
/cmd/cgroup.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "github.com/spf13/cobra"
20 | )
21 |
22 | var cgroupCmd = &cobra.Command{
23 | Use: "cgroup",
24 | }
25 |
26 | func init() {
27 | rootCmd.AddCommand(cgroupCmd)
28 | }
29 |
--------------------------------------------------------------------------------
/cmd/cgroup_list.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "fmt"
20 | "os"
21 |
22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
23 |
24 | "github.com/pterm/pterm"
25 | "github.com/spf13/cobra"
26 | )
27 |
28 | var cgroupListCmd = &cobra.Command{
29 | Use: "list",
30 | Run: func(cmd *cobra.Command, args []string) {
31 | err := cgroupList()
32 | if err != nil {
33 | fmt.Fprintf(os.Stderr, "error read bpf map %v", err)
34 | os.Exit(1)
35 | }
36 | },
37 | }
38 |
39 | func cgroupList() error {
40 | var err error
41 |
42 | writer, err := bpf.NewMap()
43 | if err != nil {
44 | return err
45 | }
46 | defer writer.Close()
47 |
48 | tableData := pterm.TableData{
49 | {"inode", "direction", "rate"},
50 | }
51 | for k, v := range writer.ListCgroupRate() {
52 | tableData = append(tableData, []string{fmt.Sprintf("%d", k.Inode), fmt.Sprintf("%d", k.Direction), fmt.Sprintf("%d", v.LimitBps)})
53 | }
54 |
55 | return pterm.DefaultTable.WithHasHeader().WithData(tableData).Render()
56 | }
57 |
58 | func init() {
59 | cgroupCmd.AddCommand(cgroupListCmd)
60 | }
61 |
--------------------------------------------------------------------------------
/cmd/config.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "fmt"
20 | "time"
21 |
22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
23 | "github.com/AliyunContainerService/terway-qos/pkg/types"
24 |
25 | "github.com/pterm/pterm"
26 | "github.com/spf13/cobra"
27 | )
28 |
29 | var direction string
30 | var watch bool
31 |
32 | var (
33 | cgroupPath string
34 | ipv4 string
35 | ipv6 string
36 | rate uint64 // bytes/s
37 | priority int
38 | )
39 |
40 | var (
41 | hwRxGuaranteedRate uint64
42 | hwTxGuaranteedRate uint64
43 |
44 | adjustInterval uint64
45 |
46 | l1RxMaxRate uint64
47 | l1RxMinRate uint64
48 |
49 | l1TxMaxRate uint64
50 | l1TxMinRate uint64
51 |
52 | l2RxMaxRate uint64
53 | l2RxMinRate uint64
54 |
55 | l2TxMaxRate uint64
56 | l2TxMinRate uint64
57 | )
58 |
59 | // configCmd represents the config command
60 | var configCmd = &cobra.Command{
61 | Use: "config",
62 | Short: "config qos",
63 | }
64 |
65 | var globalCmd = &cobra.Command{
66 | Use: "global",
67 | Short: "g",
68 | }
69 |
70 | var globalSetCmd = &cobra.Command{
71 | Use: "set",
72 | RunE: func(cmd *cobra.Command, args []string) error {
73 | writer, err := bpf.NewMap()
74 | if err != nil {
75 | return err
76 | }
77 | defer writer.Close()
78 |
79 | egress := &types.GlobalConfig{
80 | HwGuaranteed: hwTxGuaranteedRate,
81 | HwBurstableBps: hwTxGuaranteedRate,
82 | L1MaxBps: l1TxMaxRate,
83 | L1MinBps: l1TxMinRate,
84 | L2MaxBps: l2TxMaxRate,
85 | L2MinBps: l2TxMinRate,
86 | }
87 | ingress := &types.GlobalConfig{
88 | HwGuaranteed: hwRxGuaranteedRate,
89 | HwBurstableBps: hwRxGuaranteedRate,
90 | L1MaxBps: l1RxMaxRate,
91 | L1MinBps: l1RxMinRate,
92 | L2MaxBps: l2RxMaxRate,
93 | L2MinBps: l2RxMinRate,
94 | }
95 |
96 | err = writer.WriteGlobalConfig(ingress, egress)
97 | if err != nil {
98 | return err
99 | }
100 |
101 | return nil
102 | },
103 | }
104 |
105 | var globalGetCmd = &cobra.Command{
106 | Use: "get",
107 | RunE: func(cmd *cobra.Command, args []string) error {
108 | writer, err := bpf.NewMap()
109 | if err != nil {
110 | return err
111 | }
112 | defer writer.Close()
113 | ing, eg, err := writer.GetGlobalConfig()
114 | if err != nil {
115 | return err
116 | }
117 |
118 | return pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
119 | {"", "L0", "L1", "L2"},
120 | {"Rx-Max", fmt.Sprintf("%d", ing.HwGuaranteed), fmt.Sprintf("%d", ing.L1MaxBps), fmt.Sprintf("%d", ing.L2MaxBps)},
121 | {"Rx-Min", fmt.Sprintf("%d", ing.L0MinBps), fmt.Sprintf("%d", ing.L1MinBps), fmt.Sprintf("%d", ing.L2MinBps)},
122 | {"Tx-Max", fmt.Sprintf("%d", eg.HwGuaranteed), fmt.Sprintf("%d", eg.L1MaxBps), fmt.Sprintf("%d", eg.L2MaxBps)},
123 | {"Tx-Min", fmt.Sprintf("%d", eg.L0MinBps), fmt.Sprintf("%d", eg.L1MinBps), fmt.Sprintf("%d", eg.L2MinBps)},
124 | }).Render()
125 | },
126 | }
127 |
128 | var globalRateCetCmd = &cobra.Command{
129 | Use: "rate",
130 | RunE: func(cmd *cobra.Command, args []string) error {
131 | writer, err := bpf.NewMap()
132 | if err != nil {
133 | return err
134 | }
135 | defer writer.Close()
136 | _, eg := writer.GetGlobalRateLimit()
137 | if err != nil {
138 | return err
139 | }
140 |
141 | return pterm.DefaultTable.WithHasHeader().WithData(pterm.TableData{
142 | {"", "L0", "L1", "L2"},
143 | {"Tx-Max", fmt.Sprintf("%d", eg.L0Bps), fmt.Sprintf("%d", eg.L1Bps), fmt.Sprintf("%d", eg.L2Bps)},
144 | {"last", fmt.Sprintf("%d", eg.L0LastTimestamp), fmt.Sprintf("%d", eg.L1LastTimestamp), fmt.Sprintf("%d", eg.L2LastTimestamp)},
145 | {"start", fmt.Sprintf("%d", eg.LastTimestamp), fmt.Sprintf("%d", eg.LastTimestamp), fmt.Sprintf("%d", eg.LastTimestamp)},
146 | }).Render()
147 | },
148 | }
149 |
150 | func init() {
151 | rootCmd.AddCommand(configCmd)
152 | rootCmd.AddCommand(globalRateCetCmd)
153 | configCmd.AddCommand(podCmd, globalCmd)
154 |
155 | globalCmd.AddCommand(globalSetCmd, globalGetCmd)
156 | globalSetCmd.PersistentFlags().Uint64Var(&adjustInterval, "interval", uint64(1*time.Second), "interval to adjust bandwidth")
157 | globalSetCmd.PersistentFlags().Uint64Var(&hwRxGuaranteedRate, "hw-rx", 0, "")
158 | globalSetCmd.PersistentFlags().Uint64Var(&hwTxGuaranteedRate, "hw-tx", 0, "")
159 | globalSetCmd.PersistentFlags().Uint64Var(&l1TxMaxRate, "l1-tx-max", 0, "")
160 | globalSetCmd.PersistentFlags().Uint64Var(&l1TxMinRate, "l1-tx-min", 0, "")
161 | globalSetCmd.PersistentFlags().Uint64Var(&l2TxMaxRate, "l2-tx-max", 0, "")
162 | globalSetCmd.PersistentFlags().Uint64Var(&l2TxMinRate, "l2-tx-min", 0, "")
163 |
164 | globalSetCmd.PersistentFlags().Uint64Var(&l1RxMaxRate, "l1-rx-max", 0, "")
165 | globalSetCmd.PersistentFlags().Uint64Var(&l1RxMinRate, "l1-rx-min", 0, "")
166 | globalSetCmd.PersistentFlags().Uint64Var(&l2RxMaxRate, "l2-rx-max", 0, "")
167 | globalSetCmd.PersistentFlags().Uint64Var(&l2RxMinRate, "l2-rx-min", 0, "")
168 |
169 | _ = globalSetCmd.MarkPersistentFlagRequired("hw-rx")
170 | _ = globalSetCmd.MarkPersistentFlagRequired("hw-tx")
171 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-tx-max")
172 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-tx-min")
173 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-tx-max")
174 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-tx-min")
175 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-rx-max")
176 | _ = globalSetCmd.MarkPersistentFlagRequired("l1-rx-min")
177 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-rx-max")
178 | _ = globalSetCmd.MarkPersistentFlagRequired("l2-rx-min")
179 |
180 | globalGetCmd.PersistentFlags().BoolVar(&watch, "w", false, "watch")
181 | }
182 |
--------------------------------------------------------------------------------
/cmd/damon.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "fmt"
20 | "net"
21 | "os"
22 |
23 | "github.com/spf13/pflag"
24 | "github.com/spf13/viper"
25 | "github.com/vishvananda/netlink"
26 | "k8s.io/klog/v2"
27 |
28 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
29 | "github.com/AliyunContainerService/terway-qos/pkg/config"
30 | "github.com/AliyunContainerService/terway-qos/pkg/k8s"
31 | "github.com/AliyunContainerService/terway-qos/pkg/version"
32 |
33 | "github.com/spf13/cobra"
34 | "k8s.io/klog/v2/klogr"
35 | ctrl "sigs.k8s.io/controller-runtime"
36 | )
37 |
38 | const (
39 | enableBPFCORE = "enable-bpf-core"
40 | enableIngress = "enable-ingress"
41 | enableEgress = "enable-egress"
42 | excludeInterfaces = "exclude-interfaces"
43 | bpfPrio = "bpf-prio"
44 | )
45 |
46 | func init() {
47 | fs := pflag.NewFlagSet("daemon", pflag.PanicOnError)
48 | fs.Bool(enableBPFCORE, false, "enable bpf CORE")
49 | fs.Bool(enableIngress, false, "enable ingress direction qos")
50 | fs.Bool(enableEgress, false, "enable egress direction qos")
51 | fs.StringSlice(excludeInterfaces, []string{}, "network interface names to exclude")
52 | fs.Int(bpfPrio, 90, "tc prio for the qos program")
53 |
54 | _ = viper.BindPFlags(fs)
55 | pflag.CommandLine.AddFlagSet(fs)
56 |
57 | rootCmd.AddCommand(daemonCmd)
58 |
59 | cobra.OnInitialize(initConfig)
60 | }
61 |
62 | var daemonCmd = &cobra.Command{
63 | Use: "daemon",
64 | Aliases: []string{"d"},
65 | Short: "start daemon",
66 | Run: func(cmd *cobra.Command, args []string) {
67 | klog.Infof("version: %s", version.Version)
68 | err := daemon()
69 | if err != nil {
70 | _, _ = fmt.Fprint(os.Stderr, err)
71 | os.Exit(1)
72 | }
73 | },
74 | }
75 |
76 | func daemon() error {
77 | ctx := ctrl.SetupSignalHandler()
78 | ctrl.SetLogger(klogr.New())
79 |
80 | mgr, err := bpf.NewBpfMgr(viper.GetBool(enableIngress), viper.GetBool(enableEgress), viper.GetBool(enableBPFCORE), validDevice, viper.GetInt(bpfPrio))
81 | if err != nil {
82 | return err
83 | }
84 | err = mgr.Start(ctx)
85 | if err != nil {
86 | return err
87 | }
88 | m, err := bpf.NewMap()
89 | if err != nil {
90 | return err
91 | }
92 | defer m.Close()
93 |
94 | syncer := config.NewSyncer(m)
95 | err = syncer.Start(ctx)
96 | if err != nil {
97 | return err
98 | }
99 | return k8s.StartPodHandler(ctx, syncer)
100 | }
101 |
102 | func validDevice(link netlink.Link) bool {
103 | dev, ok := link.(*netlink.Device)
104 | if !ok {
105 | return false
106 | }
107 | if dev.Attrs().Flags&net.FlagUp == 0 {
108 | return false
109 | }
110 |
111 | for _, name := range viper.GetStringSlice(excludeInterfaces) {
112 | if dev.Name == name {
113 | return false
114 | }
115 | }
116 |
117 | return dev.EncapType != "loopback"
118 | }
119 |
120 | func initConfig() {
121 | if err := viper.ReadInConfig(); err == nil {
122 | fmt.Println("Using config file:", viper.ConfigFileUsed())
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/cmd/pod.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "github.com/spf13/cobra"
20 | )
21 |
22 | var podCmd = &cobra.Command{
23 | Use: "pod",
24 | }
25 |
26 | func init() {
27 | rootCmd.AddCommand(podCmd)
28 | }
29 |
--------------------------------------------------------------------------------
/cmd/pod_list.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "fmt"
20 | "os"
21 |
22 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
23 |
24 | "github.com/pterm/pterm"
25 | "github.com/spf13/cobra"
26 | )
27 |
28 | var podListCmd = &cobra.Command{
29 | Use: "list",
30 | Run: func(cmd *cobra.Command, args []string) {
31 | err := podList()
32 | if err != nil {
33 | fmt.Fprintf(os.Stderr, "error read bpf map %v", err)
34 | os.Exit(1)
35 | }
36 | },
37 | }
38 |
39 | func podList() error {
40 | var err error
41 |
42 | writer, err := bpf.NewMap()
43 | if err != nil {
44 | return err
45 | }
46 | defer writer.Close()
47 |
48 | tableData := pterm.TableData{
49 | {"ip", "class_id", "inode"},
50 | }
51 | for k, v := range writer.ListPodInfo() {
52 | tableData = append(tableData, []string{k.String(), fmt.Sprintf("%d", v.ClassID), fmt.Sprintf("%d", v.Inode)})
53 | }
54 |
55 | return pterm.DefaultTable.WithHasHeader().WithData(tableData).Render()
56 | }
57 |
58 | func init() {
59 | podCmd.AddCommand(podListCmd)
60 | }
61 |
--------------------------------------------------------------------------------
/cmd/pod_set.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "fmt"
20 | "net/netip"
21 | "os"
22 |
23 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
24 | "github.com/AliyunContainerService/terway-qos/pkg/types"
25 |
26 | "github.com/spf13/cobra"
27 | )
28 |
29 | var podSetCmd = &cobra.Command{
30 | Use: "set",
31 | Run: func(cmd *cobra.Command, args []string) {
32 | err := podSet()
33 | if err != nil {
34 | fmt.Fprint(os.Stderr, err)
35 | os.Exit(1)
36 | }
37 | },
38 | }
39 |
40 | func podSet() error {
41 | if ipv4 == "" && ipv6 == "" {
42 | return fmt.Errorf("ip must provided")
43 | }
44 | var err error
45 | var v4, v6 netip.Addr
46 | if ipv4 != "" {
47 | v4, err = netip.ParseAddr(ipv4)
48 | if err != nil {
49 | return err
50 | }
51 | }
52 | if ipv6 != "" {
53 | v6, err = netip.ParseAddr(ipv6)
54 | if err != nil {
55 | return err
56 | }
57 | }
58 | writer, err := bpf.NewMap()
59 | if err != nil {
60 | return err
61 | }
62 | defer writer.Close()
63 |
64 | unSet := uint64(0)
65 | return writer.WritePodInfo(&types.PodConfig{
66 | PodID: "",
67 | PodUID: "",
68 | IPv4: v4,
69 | IPv6: v6,
70 | HostNetwork: false,
71 | CgroupInfo: nil,
72 | RxBps: &unSet,
73 | TxBps: &rate,
74 | })
75 | }
76 |
77 | func init() {
78 | podCmd.AddCommand(podSetCmd)
79 |
80 | podCmd.PersistentFlags().StringVar(&direction, "direction", "egress", "ingress or egress")
81 | podCmd.PersistentFlags().StringVar(&cgroupPath, "cgroup", "", "cgroup path.")
82 | podCmd.PersistentFlags().StringVar(&ipv4, "ipv4", "", "ipv4 addr")
83 | podCmd.PersistentFlags().StringVar(&ipv6, "ipv6", "", "ipv6 addr")
84 | podCmd.PersistentFlags().Uint64Var(&rate, "rate", 0, "rate limit. bytes/s. At lease 1 MB/s, set 0 to disable rate limit")
85 | podCmd.PersistentFlags().IntVar(&priority, "prio", 0, "priority. 0,1,2")
86 |
87 | _ = podSetCmd.MarkPersistentFlagRequired("cgroup")
88 | _ = podSetCmd.MarkPersistentFlagRequired("rate")
89 | }
90 |
--------------------------------------------------------------------------------
/cmd/root.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package cmd
17 |
18 | import (
19 | "os"
20 |
21 | "github.com/spf13/cobra"
22 |
23 | "github.com/AliyunContainerService/terway-qos/pkg/version"
24 | )
25 |
26 | // rootCmd represents the base command when called without any subcommands
27 | var rootCmd = &cobra.Command{
28 | Use: "qos",
29 | Short: "Terway QoS",
30 | Long: `Terway QoS`,
31 | Version: version.Version,
32 | }
33 |
34 | func Execute() {
35 | err := rootCmd.Execute()
36 | if err != nil {
37 | os.Exit(1)
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/docs/quick-start-zh_CN.md:
--------------------------------------------------------------------------------
1 | # quick-start
2 |
3 | 前提条件
4 | - Kubernetes 集群
5 | - helm
6 | - kubectl
7 |
8 | ## 安装
9 |
10 | 1. git clone 仓库 `git clone --depth=1 https://github.com/AliyunContainerService/terway-qos.git`
11 | 2. 打包 chart 并部署到集群 `helm package ./charts/terway-qos && helm install -nkube-system terway-qos .`
12 | 3. 你可以在 ConfigMap 中检查 QoS 配置 `kubectl get cm terway-qos -nkube-system -oyaml`
13 |
14 | ## 测试 QoS 功能
15 |
16 | 部署下面的 `YAML` 模板,你将得到三个不同优先级的 Pod。
17 |
18 | ```shell
19 | priority=("server" "burstable" "guaranteed" "best-effort")
20 |
21 | for prio in "${priority[@]}"
22 | do
23 | echo "$prio"
24 | kubectl apply -f - < maxRsrc.Value() {
33 | return fmt.Errorf("resource is unreasonably large (> 1Pbit)")
34 | }
35 | return nil
36 | }
37 |
38 | // ExtractPodBandwidthResources extracts the ingress and egress from the given pod annotations
39 | func ExtractPodBandwidthResources(podAnnotations map[string]string) (ingress, egress *resource.Quantity, err error) {
40 | if podAnnotations == nil {
41 | return nil, nil, nil
42 | }
43 | str, found := podAnnotations["kubernetes.io/ingress-bandwidth"]
44 | if found {
45 | ingressValue, err := resource.ParseQuantity(str)
46 | if err != nil {
47 | return nil, nil, err
48 | }
49 | ingress = &ingressValue
50 | if err := validateBandwidthIsReasonable(ingress); err != nil {
51 | return nil, nil, err
52 | }
53 | }
54 | str, found = podAnnotations["kubernetes.io/egress-bandwidth"]
55 | if found {
56 | egressValue, err := resource.ParseQuantity(str)
57 | if err != nil {
58 | return nil, nil, err
59 | }
60 | egress = &egressValue
61 | if err := validateBandwidthIsReasonable(egress); err != nil {
62 | return nil, nil, err
63 | }
64 | }
65 | return ingress, egress, nil
66 | }
67 |
--------------------------------------------------------------------------------
/pkg/bpf/compile.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package bpf
17 |
18 | import (
19 | "fmt"
20 | "os/exec"
21 | "path/filepath"
22 | )
23 |
24 | const (
25 | progRoot = "/var/lib/terway"
26 | progPath = "/var/lib/terway/qos_tc.o"
27 | progName = "qos_tc"
28 | )
29 |
30 | var standardCFlags = []string{"-O2", "-target", "bpf", "-std=gnu99"}
31 |
32 | func Compile(enableEDT bool) error {
33 | custom := map[string]string{}
34 |
35 | if enableEDT {
36 | custom["FEAT_EDT"] = "1"
37 | }
38 |
39 | return compile(progName, custom)
40 | }
41 |
42 | func compile(name string, custom map[string]string) error {
43 | args := make([]string, 0, 16)
44 | args = append(args, "-g")
45 | args = append(args, standardCFlags...)
46 | args = append(args, "-I/var/lib/terway/headers")
47 | for k, v := range custom {
48 | args = append(args, fmt.Sprintf("-D%s=%s", k, v))
49 | }
50 | args = append(args, "-c")
51 | args = append(args, filepath.Join("/var/lib/terway/src", fmt.Sprintf("%s.c", name)))
52 | args = append(args, "-o")
53 | args = append(args, filepath.Join("/var/lib/terway", fmt.Sprintf("%s.o", name)))
54 |
55 | cmd := exec.Command("clang", args...)
56 | log.Info("exec", "cmd", cmd.String())
57 | out, err := cmd.CombinedOutput()
58 | if err != nil {
59 | if len(out) > 0 {
60 | log.Info(string(out))
61 | }
62 | return err
63 | }
64 | if len(out) > 0 {
65 | log.Info(string(out))
66 | }
67 | return nil
68 | }
69 |
--------------------------------------------------------------------------------
/pkg/bpf/generate.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package bpf
17 |
18 | //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags $BPF_CFLAGS -strip $BPF_STRIP qos_tc ../../bpf/qos_tc.c -- -I../../bpf/headers
19 |
--------------------------------------------------------------------------------
/pkg/bpf/manager.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package bpf
17 |
18 | import (
19 | "context"
20 | "errors"
21 | "io/fs"
22 | "os"
23 | "sync"
24 |
25 | "github.com/cilium/ebpf"
26 | "github.com/cilium/ebpf/asm"
27 | "github.com/cilium/ebpf/features"
28 | "github.com/cilium/ebpf/rlimit"
29 | "github.com/vishvananda/netlink"
30 | "golang.org/x/sys/unix"
31 | ctrl "sigs.k8s.io/controller-runtime"
32 | )
33 |
34 | var log = ctrl.Log.WithName("bpf")
35 |
36 | const (
37 | tcProgName = "terway_qos"
38 | pinPath = "/sys/fs/bpf/terway"
39 | )
40 |
41 | var objs *qos_tcObjects
42 | var once sync.Once
43 |
44 | func getBpfObj(enableCORE bool) *qos_tcObjects {
45 | once.Do(func() {
46 | err := rlimit.RemoveMemlock()
47 | if err != nil {
48 | log.Error(err, "remove memlock failed")
49 | os.Exit(1)
50 | }
51 | err = os.MkdirAll(pinPath, os.ModeDir)
52 | if err != nil {
53 | log.Error(err, "mkdir failed")
54 | os.Exit(1)
55 | }
56 |
57 | featEDT := false
58 | err = features.HaveProgramHelper(ebpf.SchedCLS, asm.FnSkbEcnSetCe)
59 | if err != nil {
60 | if !errors.Is(err, ebpf.ErrNotSupported) {
61 | log.Error(err, "check kernel version failed")
62 | os.Exit(1)
63 | }
64 | } else {
65 | featEDT = true
66 | }
67 |
68 | objs = &qos_tcObjects{}
69 |
70 | opts := &ebpf.CollectionOptions{
71 | Maps: ebpf.MapOptions{
72 | PinPath: pinPath,
73 | LoadPinOptions: ebpf.LoadPinOptions{},
74 | },
75 | Programs: ebpf.ProgramOptions{},
76 | MapReplacements: nil,
77 | }
78 |
79 | if enableCORE {
80 | err := loadQos_tcObjects(objs, opts)
81 | if err != nil {
82 | log.Error(err, "load bpf objects failed")
83 | os.Exit(1)
84 | }
85 | } else {
86 | err := Compile(featEDT)
87 | if err != nil {
88 | log.Error(err, "compile bpf failed")
89 | os.Exit(1)
90 | }
91 |
92 | spec, err := ebpf.LoadCollectionSpec(progPath)
93 | if err != nil {
94 | log.Error(err, "load bpf objects failed")
95 | os.Exit(1)
96 | }
97 | err = spec.LoadAndAssign(objs, opts)
98 | if err != nil {
99 | log.Error(err, "load bpf objects failed")
100 | os.Exit(1)
101 | }
102 | }
103 |
104 | })
105 | return objs
106 | }
107 |
108 | type validateDeviceFunc = func(link netlink.Link) bool
109 |
110 | type Mgr struct {
111 | nlEvent chan netlink.LinkUpdate
112 |
113 | enableIngress, enableEgress bool
114 |
115 | obj *qos_tcObjects
116 |
117 | prio int
118 |
119 | validate validateDeviceFunc
120 | }
121 |
122 | func NewBpfMgr(enableIngress, enableEgress, enableCORE bool, validate validateDeviceFunc, prio int) (*Mgr, error) {
123 | return &Mgr{
124 | nlEvent: make(chan netlink.LinkUpdate),
125 | obj: getBpfObj(enableCORE),
126 | enableEgress: enableEgress,
127 | enableIngress: enableIngress,
128 | validate: validate,
129 | prio: prio,
130 | }, nil
131 | }
132 |
133 | func (m *Mgr) Start(ctx context.Context) error {
134 | links, err := netlink.LinkList()
135 | if err != nil {
136 | return err
137 | }
138 | for _, link := range links {
139 | err = m.ensureBpfProg(link)
140 | if err != nil {
141 | log.Error(err, "attach bpf prog failed")
142 | return err
143 | }
144 | }
145 |
146 | err = netlink.LinkSubscribe(m.nlEvent, ctx.Done())
147 | if err != nil {
148 | return err
149 | }
150 |
151 | go func() {
152 | for e := range m.nlEvent {
153 | err = m.ensureBpfProg(e.Link)
154 | if err != nil {
155 | log.Error(err, "attach bpf prog failed")
156 | }
157 | }
158 | }()
159 |
160 | return nil
161 | }
162 |
163 | func (m *Mgr) Close() {
164 | if m.obj != nil {
165 | m.obj.Close()
166 | }
167 | }
168 |
169 | func (m *Mgr) ensureBpfProg(link netlink.Link) error {
170 | if !m.validate(link) {
171 | return nil
172 | }
173 |
174 | err := ensureQdisc([]netlink.Link{link})
175 | if err != nil {
176 | return err
177 | }
178 |
179 | ingressFilter := &netlink.BpfFilter{
180 | FilterAttrs: netlink.FilterAttrs{
181 | LinkIndex: link.Attrs().Index,
182 | Parent: netlink.HANDLE_MIN_INGRESS,
183 | Handle: netlink.MakeHandle(0, 1),
184 | Protocol: unix.ETH_P_ALL,
185 | Priority: uint16(m.prio),
186 | },
187 | Fd: int(m.obj.qos_tcPrograms.QosProgIngress.FD()),
188 | Name: tcProgName,
189 | DirectAction: true,
190 | }
191 | if m.enableIngress {
192 | err = netlink.FilterReplace(ingressFilter)
193 | if err != nil {
194 | return err
195 | }
196 |
197 | log.Info("set bpf ingress", "dev", link.Attrs().Name)
198 |
199 | err = m.obj.QosProgMap.Put(uint32(0), uint32(m.obj.QosCgroup.FD()))
200 | if err != nil {
201 | return err
202 | }
203 | err = m.obj.QosProgMap.Put(uint32(1), uint32(m.obj.QosGlobal.FD()))
204 | if err != nil {
205 | return err
206 | }
207 | } else {
208 | err = netlink.FilterDel(ingressFilter)
209 | if err != nil {
210 | if !errors.Is(err, fs.ErrNotExist) {
211 | log.Error(err, "delete bpf prog failed")
212 | }
213 | }
214 | }
215 |
216 | egressFilter := &netlink.BpfFilter{
217 | FilterAttrs: netlink.FilterAttrs{
218 | LinkIndex: link.Attrs().Index,
219 | Parent: netlink.HANDLE_MIN_EGRESS,
220 | Handle: netlink.MakeHandle(0, 1),
221 | Protocol: unix.ETH_P_ALL,
222 | Priority: uint16(m.prio),
223 | },
224 | Fd: int(m.obj.qos_tcPrograms.QosProgEgress.FD()),
225 | Name: tcProgName,
226 | DirectAction: true,
227 | }
228 | if m.enableEgress {
229 | err = netlink.FilterReplace(egressFilter)
230 | if err != nil {
231 | return err
232 | }
233 |
234 | log.Info("set bpf egress", "dev", link.Attrs().Name)
235 |
236 | err = m.obj.QosProgMap.Put(uint32(0), uint32(m.obj.QosCgroup.FD()))
237 | if err != nil {
238 | return err
239 | }
240 | err = m.obj.QosProgMap.Put(uint32(1), uint32(m.obj.QosGlobal.FD()))
241 | if err != nil {
242 | return err
243 | }
244 | } else {
245 | err = netlink.FilterDel(egressFilter)
246 | if err != nil {
247 | if !errors.Is(err, fs.ErrNotExist) {
248 | log.Error(err, "delete bpf prog failed")
249 | }
250 | }
251 | }
252 |
253 | return nil
254 | }
255 |
256 | func ensureQdisc(links []netlink.Link) error {
257 | for _, link := range links {
258 | qdisc := &netlink.GenericQdisc{
259 | QdiscAttrs: netlink.QdiscAttrs{
260 | LinkIndex: link.Attrs().Index,
261 | Parent: netlink.HANDLE_CLSACT,
262 | Handle: netlink.HANDLE_CLSACT & 0xffff0000,
263 | },
264 | QdiscType: "clsact",
265 | }
266 | err := netlink.QdiscReplace(qdisc)
267 | if err != nil {
268 | return err
269 | }
270 | }
271 | return nil
272 | }
273 |
--------------------------------------------------------------------------------
/pkg/bpf/maps.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package bpf
17 |
18 | import (
19 | "encoding/binary"
20 | "errors"
21 | "fmt"
22 | "net/netip"
23 | "reflect"
24 | "time"
25 |
26 | "k8s.io/klog/v2"
27 |
28 | "github.com/AliyunContainerService/terway-qos/pkg/byteorder"
29 | "github.com/AliyunContainerService/terway-qos/pkg/types"
30 |
31 | "github.com/cilium/ebpf"
32 | )
33 |
34 | const (
35 | // trafficDirection for config, MUST equal with bpf map index
36 | ingressIndex uint32 = 0
37 | egressIndex uint32 = 1
38 | )
39 |
40 | var _ Interface = &Writer{}
41 |
42 | type Writer struct {
43 | obj *qos_tcObjects
44 | }
45 |
46 | func (w *Writer) Close() {
47 | _ = w.obj.Close()
48 | }
49 |
50 | func NewMap() (*Writer, error) {
51 | w := &Writer{
52 | obj: getBpfObj(true),
53 | }
54 |
55 | return w, nil
56 | }
57 |
58 | func (w *Writer) GetGlobalConfig() (*types.GlobalConfig, *types.GlobalConfig, error) {
59 | ingress := &globalRateCfg{}
60 | egress := &globalRateCfg{}
61 | err := w.obj.TerwayGlobalCfg.Lookup(ingressIndex, ingress)
62 | if err != nil {
63 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
64 | return nil, nil, err
65 | }
66 | }
67 | err = w.obj.TerwayGlobalCfg.Lookup(egressIndex, egress)
68 | if err != nil {
69 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
70 | return nil, nil, err
71 | }
72 | }
73 |
74 | return &types.GlobalConfig{
75 | HwGuaranteed: ingress.HwGuaranteed,
76 | HwBurstableBps: ingress.HwBurstable,
77 | L0MaxBps: 0,
78 | L0MinBps: ingress.L0MinBps,
79 | L1MaxBps: ingress.L1MaxBps,
80 | L1MinBps: ingress.L1MinBps,
81 | L2MaxBps: ingress.L2MaxBps,
82 | L2MinBps: ingress.L2MinBps,
83 | }, &types.GlobalConfig{
84 | HwGuaranteed: egress.HwGuaranteed,
85 | HwBurstableBps: egress.HwBurstable,
86 | L0MaxBps: 0,
87 | L0MinBps: egress.L0MinBps,
88 | L1MaxBps: egress.L1MaxBps,
89 | L1MinBps: egress.L1MinBps,
90 | L2MaxBps: egress.L2MaxBps,
91 | L2MinBps: egress.L2MinBps,
92 | }, nil
93 | }
94 |
95 | func updateIfNotEqual(expect any, idx uint32, lookupo func(idx uint32) (any, error), update func(idx uint32, rateCfg any) error) error {
96 | prev, err := lookupo(idx)
97 | if err != nil {
98 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
99 | return err
100 | }
101 | }
102 | if reflect.DeepEqual(prev, expect) {
103 | return nil
104 | }
105 |
106 | return update(idx, expect)
107 | }
108 |
109 | func (w *Writer) WriteGlobalConfig(ingress *types.GlobalConfig, egress *types.GlobalConfig) error {
110 | ingress.Default()
111 | if !ingress.Validate() {
112 | return fmt.Errorf("ingress config is not valid, %#v", *ingress)
113 | }
114 | egress.Default()
115 | if !egress.Validate() {
116 | return fmt.Errorf("egress config is not valid, %#v", *egress)
117 | }
118 |
119 | ingressCfg := &globalRateCfg{
120 | Interval: uint64(500 * time.Millisecond),
121 | HwGuaranteed: ingress.HwGuaranteed,
122 | HwBurstable: 0,
123 | L0MinBps: ingress.HwGuaranteed - ingress.L1MinBps - ingress.L2MinBps,
124 | L1MinBps: ingress.L1MinBps,
125 | L1MaxBps: ingress.L1MaxBps,
126 | L2MinBps: ingress.L2MinBps,
127 | L2MaxBps: ingress.L2MaxBps,
128 | }
129 | egressCfg := &globalRateCfg{
130 | Interval: uint64(500 * time.Millisecond),
131 | HwGuaranteed: egress.HwGuaranteed,
132 | HwBurstable: 0,
133 | L0MinBps: egress.HwGuaranteed - egress.L1MinBps - egress.L2MinBps,
134 | L1MinBps: egress.L1MinBps,
135 | L1MaxBps: egress.L1MaxBps,
136 | L2MinBps: egress.L2MinBps,
137 | L2MaxBps: egress.L2MaxBps,
138 | }
139 |
140 | lookRateFunc := func(idx uint32) (any, error) {
141 | prev := &globalRateCfg{}
142 | err := w.obj.TerwayGlobalCfg.Lookup(idx, prev)
143 | return prev, err
144 | }
145 |
146 | updateRateFunc := func(idx uint32, rateCfg any) error {
147 | idxtostr := map[uint32]string{
148 | ingressIndex: "ingress",
149 | egressIndex: "egress",
150 | }
151 | log.Info("write global config", idxtostr[idx], egress.String())
152 | return w.obj.TerwayGlobalCfg.Put(idx, rateCfg)
153 | }
154 |
155 | if err := updateIfNotEqual(ingressCfg, ingressIndex, lookRateFunc, updateRateFunc); err != nil {
156 | return err
157 | }
158 | return updateIfNotEqual(egressCfg, egressIndex, lookRateFunc, updateRateFunc)
159 | }
160 |
161 | func (w *Writer) WritePodInfo(config *types.PodConfig) error {
162 | if config.HostNetwork {
163 | return nil
164 | }
165 | info := &cgroupInfo{
166 | ClassID: config.CgroupInfo.ClassID,
167 | Pad1: uint32(0),
168 | Inode: config.CgroupInfo.Inode,
169 | }
170 | if config.IPv4.IsValid() {
171 | err := w.obj.PodMap.Put(ip2Addr(config.IPv4), info)
172 | if err != nil {
173 | return fmt.Errorf("error put pod_map map, %w", err)
174 | }
175 | }
176 | if config.IPv6.IsValid() {
177 | err := w.obj.PodMap.Put(ip2Addr(config.IPv6), info)
178 | if err != nil {
179 | return fmt.Errorf("error put pod_map map, %w", err)
180 | }
181 | }
182 |
183 | klog.Infof("write pod info, %v", config)
184 | rx := uint64(0)
185 | tx := uint64(0)
186 | if config.RxBps != nil {
187 | rx = *config.RxBps
188 | }
189 | if config.TxBps != nil {
190 | tx = *config.TxBps
191 | }
192 |
193 | return w.WriteCgroupRate(&types.CgroupRate{
194 | Inode: config.CgroupInfo.Inode,
195 | RxBps: rx,
196 | TxBps: tx,
197 | })
198 | }
199 |
200 | func (w *Writer) DeletePodInfo(config *types.PodConfig) error {
201 | if config.HostNetwork {
202 | return nil
203 | }
204 |
205 | ips := []netip.Addr{config.IPv4, config.IPv6}
206 | for _, ip := range ips {
207 | if !ip.IsValid() {
208 | continue
209 | }
210 | if err := w.obj.PodMap.Delete(ip2Addr(ip)); err != nil && !errors.Is(err, ebpf.ErrKeyNotExist) {
211 | return fmt.Errorf("error put pod_map map by key %s, %w", ip, err)
212 | }
213 | }
214 |
215 | return nil
216 | }
217 |
218 | func (w *Writer) ListPodInfo() map[netip.Addr]cgroupInfo {
219 | var result = map[netip.Addr]cgroupInfo{}
220 | var key addr
221 | var value cgroupInfo
222 |
223 | iter := w.obj.PodMap.Iterate()
224 | for iter.Next(&key, &value) {
225 | result[addr2ip(&key)] = value
226 | }
227 | return result
228 | }
229 |
230 | func (w *Writer) GetGlobalRateLimit() (*globalRateInfo, *globalRateInfo) {
231 | var ingress = &globalRateInfo{}
232 | var egress = &globalRateInfo{}
233 | _ = w.obj.GlobalRateMap.Lookup(ingressIndex, ingress)
234 |
235 | _ = w.obj.GlobalRateMap.Lookup(egressIndex, egress)
236 | return ingress, egress
237 | }
238 |
239 | func (w *Writer) ListCgroupRate() map[cgroupRateID]rateInfo {
240 | result := make(map[cgroupRateID]rateInfo)
241 | var key cgroupRateID
242 | var value rateInfo
243 |
244 | iter := w.obj.CgroupRateMap.Iterate()
245 | for iter.Next(&key, &value) {
246 | result[key] = value
247 | }
248 | return result
249 | }
250 |
251 | func (w *Writer) DeleteCgroupRate(inode uint64) error {
252 | direction := []uint32{egressIndex, ingressIndex}
253 | for _, cur := range direction {
254 | obj := &cgroupRateID{
255 | Inode: inode,
256 | Direction: cur,
257 | }
258 | if err := w.obj.CgroupRateMap.Delete(obj); err != nil && !errors.Is(err, ebpf.ErrKeyNotExist) {
259 | return err
260 | }
261 | }
262 |
263 | return nil
264 | }
265 |
266 | func (w *Writer) WriteCgroupRate(r *types.CgroupRate) error {
267 | egressID := &cgroupRateID{
268 | Inode: r.Inode,
269 | Direction: egressIndex,
270 | }
271 | ingressID := &cgroupRateID{
272 | Inode: r.Inode,
273 | Direction: ingressIndex,
274 | }
275 | if r.RxBps == 0 {
276 | err := w.obj.CgroupRateMap.Delete(ingressID)
277 | if err != nil {
278 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
279 | return err
280 | }
281 | } else {
282 | log.Info("delete rate", "ingress", r.RxBps)
283 | }
284 | } else {
285 | prev := &rateInfo{}
286 | err := w.obj.CgroupRateMap.Lookup(ingressID, prev)
287 | if err != nil {
288 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
289 | return err
290 | }
291 | }
292 | if prev.LimitBps == r.RxBps {
293 | return nil
294 | }
295 | log.Info("update rate", "rxBps", r.RxBps)
296 |
297 | err = w.obj.CgroupRateMap.Put(ingressID, &rateInfo{
298 | LimitBps: r.RxBps,
299 | LastTimeStamp: 0,
300 | })
301 | if err != nil {
302 | return err
303 | }
304 | }
305 | if r.TxBps == 0 {
306 | err := w.obj.CgroupRateMap.Delete(egressID)
307 | if err != nil {
308 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
309 | return err
310 | }
311 | } else {
312 | log.Info("delete rate", "txBps", r.TxBps)
313 | }
314 | } else {
315 | prev := &rateInfo{}
316 | err := w.obj.CgroupRateMap.Lookup(egressID, prev)
317 | if err != nil {
318 | if !errors.Is(err, ebpf.ErrKeyNotExist) {
319 | return err
320 | }
321 | }
322 | if prev.LimitBps == r.TxBps {
323 | return nil
324 | }
325 | log.Info("update rate", "txBps", r.TxBps)
326 |
327 | err = w.obj.CgroupRateMap.Put(egressID, &rateInfo{
328 | LimitBps: r.TxBps,
329 | LastTimeStamp: 0,
330 | })
331 | if err != nil {
332 | return err
333 | }
334 | }
335 | return nil
336 | }
337 |
338 | func (w *Writer) GetNetStat() []netStat {
339 | var result []netStat
340 | ite := w.obj.TerwayNetStat.Iterate()
341 | var key uint32
342 | var stat netStat
343 | for ite.Next(&key, &stat) {
344 | result = append(result, stat)
345 | }
346 | return result
347 | }
348 |
349 | func ip2Addr(ip netip.Addr) *addr {
350 | slice := ip.As16()
351 | return &addr{
352 | D1: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[:4])),
353 | D2: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[4:8])),
354 | D3: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[8:12])),
355 | D4: byteorder.HostToNetwork32(binary.BigEndian.Uint32(slice[12:])),
356 | }
357 | }
358 |
359 | func addr2ip(addr *addr) netip.Addr {
360 | slice := make([]byte, 0, 16)
361 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D1))
362 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D2))
363 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D3))
364 | slice = binary.BigEndian.AppendUint32(slice, byteorder.NetworkToHost32(addr.D4))
365 | ip, _ := netip.AddrFromSlice(slice)
366 | return ip
367 | }
368 |
--------------------------------------------------------------------------------
/pkg/bpf/maps_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package bpf
17 |
18 | import (
19 | "net/netip"
20 | "reflect"
21 | "testing"
22 | )
23 |
24 | func Test_ip2Addr(t *testing.T) {
25 | type args struct {
26 | ip netip.Addr
27 | }
28 | tests := []struct {
29 | name string
30 | args args
31 | want *addr
32 | }{
33 | {
34 | name: "",
35 | args: args{ip: netip.MustParseAddr("172.16.1.237")},
36 | want: &addr{
37 | D1: 0x00000000,
38 | D2: 0x00000000,
39 | D3: 0xffff0000,
40 | D4: 0xed0110ac,
41 | },
42 | },
43 | {
44 | name: "",
45 | // net.IP{0x24, 0x8, 0x40, 0x5, 0x3, 0x9c, 0x78, 0x1, 0x10, 0x1, 0xe5, 0xd, 0xbc, 0x3f, 0xe1, 0x16}
46 | args: args{ip: netip.MustParseAddr("2408:4005:39c:7801:1001:e50d:bc3f:e116")},
47 | want: &addr{
48 | D1: 0x05400824,
49 | D2: 0x01789C03,
50 | D3: 0x0de50110,
51 | D4: 0x16e13fbc,
52 | },
53 | },
54 | }
55 | for _, tt := range tests {
56 | t.Run(tt.name, func(t *testing.T) {
57 | if got := ip2Addr(tt.args.ip); !reflect.DeepEqual(got, tt.want) {
58 | t.Errorf("ip2Addr() = %v, want %v", got, tt.want)
59 | }
60 | })
61 | }
62 | }
63 |
64 | func Test_NewMap(t *testing.T) {
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/pkg/bpf/qos_tc_bpfeb.go:
--------------------------------------------------------------------------------
1 | // Code generated by bpf2go; DO NOT EDIT.
2 | //go:build arm64be || armbe || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
3 |
4 | package bpf
5 |
6 | import (
7 | "bytes"
8 | _ "embed"
9 | "fmt"
10 | "io"
11 |
12 | "github.com/cilium/ebpf"
13 | )
14 |
15 | // loadQos_tc returns the embedded CollectionSpec for qos_tc.
16 | func loadQos_tc() (*ebpf.CollectionSpec, error) {
17 | reader := bytes.NewReader(_Qos_tcBytes)
18 | spec, err := ebpf.LoadCollectionSpecFromReader(reader)
19 | if err != nil {
20 | return nil, fmt.Errorf("can't load qos_tc: %w", err)
21 | }
22 |
23 | return spec, err
24 | }
25 |
26 | // loadQos_tcObjects loads qos_tc and converts it into a struct.
27 | //
28 | // The following types are suitable as obj argument:
29 | //
30 | // *qos_tcObjects
31 | // *qos_tcPrograms
32 | // *qos_tcMaps
33 | //
34 | // See ebpf.CollectionSpec.LoadAndAssign documentation for details.
35 | func loadQos_tcObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
36 | spec, err := loadQos_tc()
37 | if err != nil {
38 | return err
39 | }
40 |
41 | return spec.LoadAndAssign(obj, opts)
42 | }
43 |
44 | // qos_tcSpecs contains maps and programs before they are loaded into the kernel.
45 | //
46 | // It can be passed ebpf.CollectionSpec.Assign.
47 | type qos_tcSpecs struct {
48 | qos_tcProgramSpecs
49 | qos_tcMapSpecs
50 | }
51 |
52 | // qos_tcSpecs contains programs before they are loaded into the kernel.
53 | //
54 | // It can be passed ebpf.CollectionSpec.Assign.
55 | type qos_tcProgramSpecs struct {
56 | QosCgroup *ebpf.ProgramSpec `ebpf:"qos_cgroup"`
57 | QosGlobal *ebpf.ProgramSpec `ebpf:"qos_global"`
58 | QosProgEgress *ebpf.ProgramSpec `ebpf:"qos_prog_egress"`
59 | QosProgIngress *ebpf.ProgramSpec `ebpf:"qos_prog_ingress"`
60 | }
61 |
62 | // qos_tcMapSpecs contains maps before they are loaded into the kernel.
63 | //
64 | // It can be passed ebpf.CollectionSpec.Assign.
65 | type qos_tcMapSpecs struct {
66 | CgroupRateMap *ebpf.MapSpec `ebpf:"cgroup_rate_map"`
67 | GlobalRateMap *ebpf.MapSpec `ebpf:"global_rate_map"`
68 | PodMap *ebpf.MapSpec `ebpf:"pod_map"`
69 | QosProgMap *ebpf.MapSpec `ebpf:"qos_prog_map"`
70 | TerwayGlobalCfg *ebpf.MapSpec `ebpf:"terway_global_cfg"`
71 | TerwayNetStat *ebpf.MapSpec `ebpf:"terway_net_stat"`
72 | }
73 |
74 | // qos_tcObjects contains all objects after they have been loaded into the kernel.
75 | //
76 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign.
77 | type qos_tcObjects struct {
78 | qos_tcPrograms
79 | qos_tcMaps
80 | }
81 |
82 | func (o *qos_tcObjects) Close() error {
83 | return _Qos_tcClose(
84 | &o.qos_tcPrograms,
85 | &o.qos_tcMaps,
86 | )
87 | }
88 |
89 | // qos_tcMaps contains all maps after they have been loaded into the kernel.
90 | //
91 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign.
92 | type qos_tcMaps struct {
93 | CgroupRateMap *ebpf.Map `ebpf:"cgroup_rate_map"`
94 | GlobalRateMap *ebpf.Map `ebpf:"global_rate_map"`
95 | PodMap *ebpf.Map `ebpf:"pod_map"`
96 | QosProgMap *ebpf.Map `ebpf:"qos_prog_map"`
97 | TerwayGlobalCfg *ebpf.Map `ebpf:"terway_global_cfg"`
98 | TerwayNetStat *ebpf.Map `ebpf:"terway_net_stat"`
99 | }
100 |
101 | func (m *qos_tcMaps) Close() error {
102 | return _Qos_tcClose(
103 | m.CgroupRateMap,
104 | m.GlobalRateMap,
105 | m.PodMap,
106 | m.QosProgMap,
107 | m.TerwayGlobalCfg,
108 | m.TerwayNetStat,
109 | )
110 | }
111 |
112 | // qos_tcPrograms contains all programs after they have been loaded into the kernel.
113 | //
114 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign.
115 | type qos_tcPrograms struct {
116 | QosCgroup *ebpf.Program `ebpf:"qos_cgroup"`
117 | QosGlobal *ebpf.Program `ebpf:"qos_global"`
118 | QosProgEgress *ebpf.Program `ebpf:"qos_prog_egress"`
119 | QosProgIngress *ebpf.Program `ebpf:"qos_prog_ingress"`
120 | }
121 |
122 | func (p *qos_tcPrograms) Close() error {
123 | return _Qos_tcClose(
124 | p.QosCgroup,
125 | p.QosGlobal,
126 | p.QosProgEgress,
127 | p.QosProgIngress,
128 | )
129 | }
130 |
131 | func _Qos_tcClose(closers ...io.Closer) error {
132 | for _, closer := range closers {
133 | if err := closer.Close(); err != nil {
134 | return err
135 | }
136 | }
137 | return nil
138 | }
139 |
140 | // Do not access this directly.
141 | //
142 | //go:embed qos_tc_bpfeb.o
143 | var _Qos_tcBytes []byte
144 |
--------------------------------------------------------------------------------
/pkg/bpf/qos_tc_bpfeb.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/terway-qos/a625936435f7fcd8be5b25805d92c8ca80f0235a/pkg/bpf/qos_tc_bpfeb.o
--------------------------------------------------------------------------------
/pkg/bpf/qos_tc_bpfel.go:
--------------------------------------------------------------------------------
1 | // Code generated by bpf2go; DO NOT EDIT.
2 | //go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mips64le || mips64p32le || mipsle || ppc64le || riscv64
3 |
4 | package bpf
5 |
6 | import (
7 | "bytes"
8 | _ "embed"
9 | "fmt"
10 | "io"
11 |
12 | "github.com/cilium/ebpf"
13 | )
14 |
15 | // loadQos_tc returns the embedded CollectionSpec for qos_tc.
16 | func loadQos_tc() (*ebpf.CollectionSpec, error) {
17 | reader := bytes.NewReader(_Qos_tcBytes)
18 | spec, err := ebpf.LoadCollectionSpecFromReader(reader)
19 | if err != nil {
20 | return nil, fmt.Errorf("can't load qos_tc: %w", err)
21 | }
22 |
23 | return spec, err
24 | }
25 |
26 | // loadQos_tcObjects loads qos_tc and converts it into a struct.
27 | //
28 | // The following types are suitable as obj argument:
29 | //
30 | // *qos_tcObjects
31 | // *qos_tcPrograms
32 | // *qos_tcMaps
33 | //
34 | // See ebpf.CollectionSpec.LoadAndAssign documentation for details.
35 | func loadQos_tcObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
36 | spec, err := loadQos_tc()
37 | if err != nil {
38 | return err
39 | }
40 |
41 | return spec.LoadAndAssign(obj, opts)
42 | }
43 |
44 | // qos_tcSpecs contains maps and programs before they are loaded into the kernel.
45 | //
46 | // It can be passed ebpf.CollectionSpec.Assign.
47 | type qos_tcSpecs struct {
48 | qos_tcProgramSpecs
49 | qos_tcMapSpecs
50 | }
51 |
52 | // qos_tcSpecs contains programs before they are loaded into the kernel.
53 | //
54 | // It can be passed ebpf.CollectionSpec.Assign.
55 | type qos_tcProgramSpecs struct {
56 | QosCgroup *ebpf.ProgramSpec `ebpf:"qos_cgroup"`
57 | QosGlobal *ebpf.ProgramSpec `ebpf:"qos_global"`
58 | QosProgEgress *ebpf.ProgramSpec `ebpf:"qos_prog_egress"`
59 | QosProgIngress *ebpf.ProgramSpec `ebpf:"qos_prog_ingress"`
60 | }
61 |
62 | // qos_tcMapSpecs contains maps before they are loaded into the kernel.
63 | //
64 | // It can be passed ebpf.CollectionSpec.Assign.
65 | type qos_tcMapSpecs struct {
66 | CgroupRateMap *ebpf.MapSpec `ebpf:"cgroup_rate_map"`
67 | GlobalRateMap *ebpf.MapSpec `ebpf:"global_rate_map"`
68 | PodMap *ebpf.MapSpec `ebpf:"pod_map"`
69 | QosProgMap *ebpf.MapSpec `ebpf:"qos_prog_map"`
70 | TerwayGlobalCfg *ebpf.MapSpec `ebpf:"terway_global_cfg"`
71 | TerwayNetStat *ebpf.MapSpec `ebpf:"terway_net_stat"`
72 | }
73 |
74 | // qos_tcObjects contains all objects after they have been loaded into the kernel.
75 | //
76 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign.
77 | type qos_tcObjects struct {
78 | qos_tcPrograms
79 | qos_tcMaps
80 | }
81 |
82 | func (o *qos_tcObjects) Close() error {
83 | return _Qos_tcClose(
84 | &o.qos_tcPrograms,
85 | &o.qos_tcMaps,
86 | )
87 | }
88 |
89 | // qos_tcMaps contains all maps after they have been loaded into the kernel.
90 | //
91 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign.
92 | type qos_tcMaps struct {
93 | CgroupRateMap *ebpf.Map `ebpf:"cgroup_rate_map"`
94 | GlobalRateMap *ebpf.Map `ebpf:"global_rate_map"`
95 | PodMap *ebpf.Map `ebpf:"pod_map"`
96 | QosProgMap *ebpf.Map `ebpf:"qos_prog_map"`
97 | TerwayGlobalCfg *ebpf.Map `ebpf:"terway_global_cfg"`
98 | TerwayNetStat *ebpf.Map `ebpf:"terway_net_stat"`
99 | }
100 |
101 | func (m *qos_tcMaps) Close() error {
102 | return _Qos_tcClose(
103 | m.CgroupRateMap,
104 | m.GlobalRateMap,
105 | m.PodMap,
106 | m.QosProgMap,
107 | m.TerwayGlobalCfg,
108 | m.TerwayNetStat,
109 | )
110 | }
111 |
112 | // qos_tcPrograms contains all programs after they have been loaded into the kernel.
113 | //
114 | // It can be passed to loadQos_tcObjects or ebpf.CollectionSpec.LoadAndAssign.
115 | type qos_tcPrograms struct {
116 | QosCgroup *ebpf.Program `ebpf:"qos_cgroup"`
117 | QosGlobal *ebpf.Program `ebpf:"qos_global"`
118 | QosProgEgress *ebpf.Program `ebpf:"qos_prog_egress"`
119 | QosProgIngress *ebpf.Program `ebpf:"qos_prog_ingress"`
120 | }
121 |
122 | func (p *qos_tcPrograms) Close() error {
123 | return _Qos_tcClose(
124 | p.QosCgroup,
125 | p.QosGlobal,
126 | p.QosProgEgress,
127 | p.QosProgIngress,
128 | )
129 | }
130 |
131 | func _Qos_tcClose(closers ...io.Closer) error {
132 | for _, closer := range closers {
133 | if err := closer.Close(); err != nil {
134 | return err
135 | }
136 | }
137 | return nil
138 | }
139 |
140 | // Do not access this directly.
141 | //
142 | //go:embed qos_tc_bpfel.o
143 | var _Qos_tcBytes []byte
144 |
--------------------------------------------------------------------------------
/pkg/bpf/qos_tc_bpfel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AliyunContainerService/terway-qos/a625936435f7fcd8be5b25805d92c8ca80f0235a/pkg/bpf/qos_tc_bpfel.o
--------------------------------------------------------------------------------
/pkg/bpf/types.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package bpf
17 |
18 | import (
19 | "net/netip"
20 |
21 | "github.com/AliyunContainerService/terway-qos/pkg/types"
22 | )
23 |
24 | type Interface interface {
25 | // WriteGlobalConfig write global limit
26 | WriteGlobalConfig(ingress *types.GlobalConfig, egress *types.GlobalConfig) error
27 | // WritePodInfo write class_id or rate limit for each pod
28 | WritePodInfo(config *types.PodConfig) error
29 | DeletePodInfo(config *types.PodConfig) error
30 |
31 | ListPodInfo() map[netip.Addr]cgroupInfo
32 | GetGlobalRateLimit() (*globalRateInfo, *globalRateInfo)
33 |
34 | ListCgroupRate() map[cgroupRateID]rateInfo
35 | WriteCgroupRate(config *types.CgroupRate) error
36 | DeleteCgroupRate(inode uint64) error
37 | }
38 |
39 | // rate for current rate and limit
40 | type rateInfo struct {
41 | LimitBps uint64 `ebpf:"bps"`
42 | LastTimeStamp uint64 `ebpf:"t_last"`
43 | Slot uint64 `ebpf:"slot3"`
44 | }
45 |
46 | // addr for both ipv4 and ipv6
47 | type addr struct {
48 | D1 uint32 `ebpf:"d1"`
49 | D2 uint32 `ebpf:"d2"`
50 | D3 uint32 `ebpf:"d3"`
51 | D4 uint32 `ebpf:"d4"`
52 | }
53 |
54 | // cgroupRateID
55 | // store rx and tx in single map
56 | type cgroupRateID struct {
57 | Inode uint64 `ebpf:"inode"`
58 | Direction uint32 `ebpf:"direction"`
59 | Pad uint32 `ebpf:"pad"`
60 | }
61 |
62 | type cgroupInfo struct {
63 | ClassID uint32 `ebpf:"class_id"`
64 | Pad1 uint32 `ebpf:"pad1"`
65 | Inode uint64 `ebpf:"inode"`
66 | }
67 |
68 | type globalRateCfg struct {
69 | Interval uint64 `ebpf:"interval"`
70 | HwGuaranteed uint64 `ebpf:"hw_min_bps"`
71 | HwBurstable uint64 `ebpf:"hw_max_bps"`
72 |
73 | L0MinBps uint64 `ebpf:"l0_min_bps"`
74 | L0MaxBps uint64 `ebpf:"l0_max_bps"`
75 | L1MinBps uint64 `ebpf:"l1_min_bps"`
76 | L1MaxBps uint64 `ebpf:"l1_max_bps"`
77 | L2MinBps uint64 `ebpf:"l2_min_bps"`
78 | L2MaxBps uint64 `ebpf:"l2_max_bps"`
79 | }
80 |
81 | type globalRateInfo struct {
82 | LastTimestamp uint64 `ebpf:"t_last"`
83 |
84 | L0LastTimestamp uint64 `ebpf:"t_l0_last"`
85 | L0Bps uint64 `ebpf:"l0_bps"`
86 | L0Slot uint64 `ebpf:"l0_slot"`
87 |
88 | L1LastTimestamp uint64 `ebpf:"t_l1_last"`
89 | L1Bps uint64 `ebpf:"l1_bps"`
90 | L1Slot uint64 `ebpf:"l1_slot"`
91 |
92 | L2LastTimestamp uint64 `ebpf:"t_l2_last"`
93 | L2Bps uint64 `ebpf:"l2_bps"`
94 | L2Slot uint64 `ebpf:"l2_slot"`
95 | }
96 |
97 | type netStat struct {
98 | Index uint64 `ebpf:"index"`
99 | TS uint64 `ebpf:"ts"`
100 | Val uint64 `ebpf:"val"`
101 | }
102 |
--------------------------------------------------------------------------------
/pkg/byteorder/byteorder.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | // Copyright Authors of Cilium
3 |
4 | package byteorder
5 |
6 | import (
7 | "net"
8 | )
9 |
10 | // NetIPv4ToHost32 converts an net.IP to a uint32 in host byte order. ip
11 | // must be a IPv4 address, otherwise the function will panic.
12 | func NetIPv4ToHost32(ip net.IP) uint32 {
13 | ipv4 := ip.To4()
14 | _ = ipv4[3] // Assert length of ipv4.
15 | return Native.Uint32(ipv4)
16 | }
17 |
--------------------------------------------------------------------------------
/pkg/byteorder/byteorder_bigendian.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | // Copyright Authors of Cilium
3 |
4 | //go:build armbe || arm64be || mips || mips64 || ppc64
5 |
6 | package byteorder
7 |
8 | import "encoding/binary"
9 |
10 | var Native binary.ByteOrder = binary.BigEndian
11 |
12 | func HostToNetwork16(u uint16) uint16 { return u }
13 | func HostToNetwork32(u uint32) uint32 { return u }
14 | func NetworkToHost16(u uint16) uint16 { return u }
15 | func NetworkToHost32(u uint32) uint32 { return u }
16 |
--------------------------------------------------------------------------------
/pkg/byteorder/byteorder_littleendian.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | // Copyright Authors of Cilium
3 |
4 | //go:build 386 || amd64 || arm || arm64 || mips64le || ppc64le || riscv64 || wasm
5 |
6 | package byteorder
7 |
8 | import (
9 | "encoding/binary"
10 | "math/bits"
11 | )
12 |
13 | var Native binary.ByteOrder = binary.LittleEndian
14 |
15 | func HostToNetwork16(u uint16) uint16 { return bits.ReverseBytes16(u) }
16 | func HostToNetwork32(u uint32) uint32 { return bits.ReverseBytes32(u) }
17 | func NetworkToHost16(u uint16) uint16 { return bits.ReverseBytes16(u) }
18 | func NetworkToHost32(u uint32) uint32 { return bits.ReverseBytes32(u) }
19 |
--------------------------------------------------------------------------------
/pkg/byteorder/byteorder_test.go:
--------------------------------------------------------------------------------
1 | // SPDX-License-Identifier: Apache-2.0
2 | // Copyright Authors of Cilium
3 |
4 | //go:build !privileged_tests
5 |
6 | package byteorder
7 |
8 | import (
9 | "encoding/binary"
10 | "net"
11 | "testing"
12 |
13 | . "gopkg.in/check.v1"
14 | )
15 |
16 | // Hook up gocheck into the "go test" runner.
17 | func Test(t *testing.T) {
18 | TestingT(t)
19 | }
20 |
21 | type ByteorderSuite struct{}
22 |
23 | var _ = Suite(&ByteorderSuite{})
24 |
25 | func (b *ByteorderSuite) TestNativeIsInitialized(c *C) {
26 | c.Assert(Native, NotNil)
27 | }
28 |
29 | func (b *ByteorderSuite) TestHostToNetwork(c *C) {
30 | switch Native {
31 | case binary.LittleEndian:
32 | c.Assert(HostToNetwork16(0xAABB), Equals, uint16(0xBBAA))
33 | c.Assert(HostToNetwork32(0xAABBCCDD), Equals, uint32(0xDDCCBBAA))
34 | case binary.BigEndian:
35 | c.Assert(HostToNetwork16(0xAABB), Equals, uint16(0xAABB))
36 | c.Assert(HostToNetwork32(0xAABBCCDD), Equals, uint32(0xAABBCCDD))
37 | }
38 | }
39 |
40 | func (b *ByteorderSuite) TestNetIPv4ToHost32(c *C) {
41 | switch Native {
42 | case binary.LittleEndian:
43 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.129.91")), Equals, uint32(0x5b810b0a))
44 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.138.214")), Equals, uint32(0xd68a0b0a))
45 | case binary.BigEndian:
46 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.129.91")), Equals, uint32(0x0a0b815b))
47 | c.Assert(NetIPv4ToHost32(net.ParseIP("10.11.138.214")), Equals, uint32(0x0a0b8ad6))
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/config/config.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package config
17 |
18 | import (
19 | "fmt"
20 | "os"
21 | "path/filepath"
22 | "regexp"
23 | "strconv"
24 | "strings"
25 | "syscall"
26 | "time"
27 |
28 | "github.com/AliyunContainerService/terway-qos/pkg/types"
29 |
30 | "k8s.io/apimachinery/pkg/util/cache"
31 | ctrl "sigs.k8s.io/controller-runtime"
32 | )
33 |
34 | const (
35 | defaultTTL = 10 * time.Minute
36 | maxPodPerNode = 1024
37 | systemdCLSCgroupRootPath = "/sys/fs/cgroup/net_cls/kubepods.slice"
38 | defaultCLScgroupRootPath = "/sys/fs/cgroup/net_cls/kubepods"
39 | )
40 |
41 | var (
42 | log = ctrl.Log.WithName("config")
43 | systemdwalkPath = []string{"kubepods-burstable.slice", "kubepods-besteffort.slice", "kubepods-guaranteed.slice", ""}
44 | defaultwalkPath = []string{"burstable", "besteffort", "guaranteed", ""}
45 | podUIDRe = regexp.MustCompile("[0-9a-fA-F]{8}([-,_][0-9a-fA-F]{4}){3}[-,_][0-9a-fA-F]{12}")
46 | cgroupPathRe = regexp.MustCompile(`^\S+`)
47 | )
48 |
49 | type Interface interface {
50 | GetCgroupByPodUID(string) (*types.CgroupInfo, error)
51 | SetCgroupClassID(prio uint32, path string) error
52 | }
53 |
54 | type Cgroup struct {
55 | cgroupPath string
56 | workPath []string
57 |
58 | cache *cache.LRUExpireCache
59 | }
60 |
61 | func NewCgroup() *Cgroup {
62 | fileExists := func(path string) bool {
63 | _, err := os.Stat(path)
64 | return !os.IsNotExist(err)
65 | }
66 |
67 | cg := Cgroup{
68 | cache: cache.NewLRUExpireCache(maxPodPerNode),
69 | cgroupPath: defaultCLScgroupRootPath,
70 | workPath: defaultwalkPath,
71 | }
72 | if fileExists(systemdCLSCgroupRootPath) {
73 | cg.cgroupPath = systemdCLSCgroupRootPath
74 | cg.workPath = systemdwalkPath
75 | }
76 |
77 | return &cg
78 | }
79 |
80 | func (f *Cgroup) GetCgroupByPodUID(id string) (*types.CgroupInfo, error) {
81 | v, ok := f.cache.Get(id)
82 | if !ok {
83 | // update all cache
84 | result := f.getCgroupPath()
85 | for uid, info := range result {
86 | f.cache.Add(uid, info, defaultTTL)
87 | }
88 | v, ok = f.cache.Get(id)
89 | if !ok {
90 | return nil, fmt.Errorf("not found")
91 | }
92 | }
93 |
94 | info := v.(types.CgroupInfo)
95 | return &info, nil
96 | }
97 |
98 | func (f *Cgroup) SetCgroupClassID(prio uint32, path string) error {
99 | return os.WriteFile(filepath.Join(path, "net_cls.classid"), []byte(strconv.Itoa(int(prio))), 0644)
100 | }
101 |
102 | func GetGlobalConfig(path string) (*types.GlobalConfig, *types.GlobalConfig, error) {
103 | c, err := os.ReadFile(path)
104 | if err != nil {
105 | return nil, nil, err
106 | }
107 |
108 | ingress := &types.GlobalConfig{}
109 | egress := &types.GlobalConfig{}
110 |
111 | egress.HwGuaranteed = parseConfig("hw_tx_bps_max", string(c))
112 |
113 | egress.L0MinBps = parseConfig("online_tx_bps_min", string(c))
114 | egress.L0MaxBps = parseConfig("online_tx_bps_max", string(c))
115 |
116 | egress.L1MinBps = parseConfig("offline_l1_tx_bps_min", string(c))
117 | egress.L1MaxBps = parseConfig("offline_l1_tx_bps_max", string(c))
118 | egress.L2MinBps = parseConfig("offline_l2_tx_bps_min", string(c))
119 | egress.L2MaxBps = parseConfig("offline_l2_tx_bps_max", string(c))
120 |
121 | ingress.HwGuaranteed = parseConfig("hw_rx_bps_max", string(c))
122 |
123 | ingress.L0MinBps = parseConfig("online_rx_bps_min", string(c))
124 | ingress.L0MaxBps = parseConfig("online_rx_bps_max", string(c))
125 |
126 | ingress.L1MinBps = parseConfig("offline_l1_rx_bps_min", string(c))
127 | ingress.L1MaxBps = parseConfig("offline_l1_rx_bps_max", string(c))
128 | ingress.L2MinBps = parseConfig("offline_l2_rx_bps_min", string(c))
129 | ingress.L2MaxBps = parseConfig("offline_l2_rx_bps_max", string(c))
130 |
131 | return ingress, egress, nil
132 | }
133 |
134 | func parseConfig(key string, content string) uint64 {
135 | re, err := regexp.Compile(fmt.Sprintf("%s(?:=?|\\s+)(\\d+)", key))
136 | if err != nil {
137 | return 0
138 | }
139 | group := re.FindStringSubmatch(content)
140 | if len(group) != 2 {
141 | return 0
142 | }
143 | result, _ := strconv.ParseUint(group[1], 10, 64)
144 | return result
145 | }
146 |
147 | func (f *Cgroup) getCgroupPath() map[string]types.CgroupInfo {
148 | result := map[string]types.CgroupInfo{}
149 |
150 | for _, p := range f.workPath {
151 | path := filepath.Join(f.cgroupPath, p)
152 | entries, err := os.ReadDir(path)
153 | if os.IsNotExist(err) {
154 | continue
155 | }
156 | for _, entry := range entries {
157 | if !entry.IsDir() {
158 | continue
159 | }
160 |
161 | uid := podUIDRe.FindString(entry.Name())
162 | if uid == "" {
163 | continue
164 | }
165 | info, err := readCgroupInfo(filepath.Join(path, entry.Name()))
166 | if err != nil {
167 | log.Error(err, "error read cgroup info")
168 | } else {
169 | result[strings.ReplaceAll(uid, "_", "-")] = info
170 | }
171 | }
172 | }
173 | return result
174 | }
175 |
176 | func readCgroupInfo(path string) (types.CgroupInfo, error) {
177 | var stat syscall.Stat_t
178 | err := syscall.Stat(path, &stat)
179 | if err != nil {
180 | return types.CgroupInfo{}, err
181 | }
182 | // cgroupv1
183 | classIDBytes, err := os.ReadFile(filepath.Join(path, "net_cls.classid"))
184 | if err != nil {
185 | return types.CgroupInfo{}, fmt.Errorf("error read cgroup id, %w", err)
186 | }
187 |
188 | classID, err := strconv.ParseUint(strings.TrimSpace(string(classIDBytes)), 10, 32)
189 | if err != nil {
190 | return types.CgroupInfo{}, fmt.Errorf("failed parse %s,%w", classIDBytes, err)
191 | }
192 |
193 | return types.CgroupInfo{
194 | Path: path,
195 | ClassID: uint32(classID),
196 | Inode: stat.Ino,
197 | }, nil
198 | }
199 |
--------------------------------------------------------------------------------
/pkg/config/config_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package config
17 |
18 | import (
19 | "testing"
20 | )
21 |
22 | func Test_parseConfig(t *testing.T) {
23 | contents := `hw_tx_bps_max 100
24 | hw_rx_bps_max 100
25 | offline_l1_tx_bps_min 10
26 | offline_l1_tx_bps_max 20
27 | offline_l2_tx_bps_min 10
28 | offline_l2_tx_bps_max 30
29 | offline_l1_rx_bps_min 10
30 | offline_l1_rx_bps_max 20
31 | offline_l2_rx_bps_min 10
32 | offline_l2_rx_bps_max 30`
33 |
34 | tests := []struct {
35 | key string
36 | want uint64
37 | }{
38 | {
39 | key: "hw_tx_bps_max",
40 | want: 100,
41 | }, {
42 | key: "hw_rx_bps_max",
43 | want: 100,
44 | }, {
45 | key: "offline_l1_tx_bps_min",
46 | want: 10,
47 | }, {
48 | key: "offline_l1_tx_bps_max",
49 | want: 20,
50 | }, {
51 | key: "offline_l2_tx_bps_min",
52 | want: 10,
53 | }, {
54 | key: "offline_l2_tx_bps_max",
55 | want: 30,
56 | },
57 | }
58 | for _, tt := range tests {
59 | t.Run(tt.key, func(t *testing.T) {
60 | if got := parseConfig(tt.key, contents); got != tt.want {
61 | t.Errorf("parseConfig() = %v, want %v", got, tt.want)
62 | }
63 | })
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/pkg/config/record.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package config
17 |
18 | import (
19 | "fmt"
20 |
21 | "github.com/AliyunContainerService/terway-qos/pkg/types"
22 |
23 | "k8s.io/client-go/tools/cache"
24 | )
25 |
26 | type PodCache struct {
27 | cache.Indexer
28 | }
29 |
30 | const (
31 | indexPodID = "podID"
32 | indexPodUID = "podUID"
33 | indexCgroupPath = "cgroupPath"
34 | )
35 |
36 | func NewPodCache() *PodCache {
37 | return &PodCache{
38 | Indexer: cache.NewIndexer(func(obj interface{}) (string, error) {
39 | r, ok := obj.(*types.PodConfig)
40 | if !ok {
41 | return "", fmt.Errorf("not type *Record")
42 | }
43 | return r.PodID, nil
44 | }, cache.Indexers{
45 | indexPodID: func(obj interface{}) ([]string, error) {
46 | r, ok := obj.(*types.PodConfig)
47 | if !ok {
48 | return nil, fmt.Errorf("not type *Record")
49 | }
50 | return []string{r.PodID}, nil
51 | },
52 | indexPodUID: func(obj interface{}) ([]string, error) {
53 | r, ok := obj.(*types.PodConfig)
54 | if !ok {
55 | return nil, fmt.Errorf("not type *Record")
56 | }
57 | return []string{r.PodUID}, nil
58 | },
59 | indexCgroupPath: func(obj interface{}) ([]string, error) {
60 | r, ok := obj.(*types.PodConfig)
61 | if !ok {
62 | return nil, fmt.Errorf("not type *Record")
63 | }
64 | return []string{r.CgroupInfo.Path}, nil
65 | },
66 | }),
67 | }
68 | }
69 |
70 | func (r *PodCache) ByPodID(id string) *types.PodConfig {
71 | objs, err := r.ByIndex(indexPodID, id)
72 | if err != nil {
73 | panic(err)
74 | }
75 | if len(objs) == 0 {
76 | return nil
77 | }
78 | return objs[0].(*types.PodConfig)
79 | }
80 |
81 | func (r *PodCache) ByPodUID(id string) *types.PodConfig {
82 | objs, err := r.ByIndex(indexPodUID, id)
83 | if err != nil {
84 | panic(err)
85 | }
86 | if len(objs) == 0 {
87 | return nil
88 | }
89 | return objs[0].(*types.PodConfig)
90 | }
91 |
92 | func (r *PodCache) ByCgroupPath(id string) *types.PodConfig {
93 | objs, err := r.ByIndex(indexCgroupPath, id)
94 | if err != nil {
95 | panic(err)
96 | }
97 | if len(objs) == 0 {
98 | return nil
99 | }
100 | return objs[0].(*types.PodConfig)
101 | }
102 |
103 | func (r *PodCache) AddIfNotPresent(config *types.PodConfig) error {
104 | _, ok, err := r.Indexer.Get(config)
105 | if err != nil {
106 | return err
107 | }
108 | if ok {
109 | return nil
110 | }
111 | return r.Indexer.Add(config)
112 | }
113 |
114 | func (r *PodCache) Del(config *types.PodConfig) error {
115 | return r.Indexer.Delete(config)
116 | }
117 |
118 | func (r *PodCache) DelByPodID(id string) error {
119 | return r.Indexer.Delete(&types.PodConfig{PodID: id})
120 | }
121 |
--------------------------------------------------------------------------------
/pkg/config/syncer.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package config
17 |
18 | import (
19 | "context"
20 | "encoding/json"
21 | "fmt"
22 | "os"
23 | "path/filepath"
24 | "strconv"
25 | "strings"
26 | "sync"
27 | "time"
28 |
29 | "github.com/fsnotify/fsnotify"
30 | "k8s.io/apimachinery/pkg/util/sets"
31 |
32 | "github.com/AliyunContainerService/terway-qos/pkg/bpf"
33 | "github.com/AliyunContainerService/terway-qos/pkg/types"
34 | )
35 |
36 | var _ types.SyncPod = &Syncer{}
37 |
38 | const (
39 | rootFileConfig = "/var/lib/terway/qos"
40 | perCgroupConfig = "per_cgroup_bps_limit"
41 | globalConfig = "global_bps_config"
42 | podConfig = "pod.json"
43 | )
44 |
45 | type Syncer struct {
46 | globalPath string
47 | perCgroupPath string
48 | podConfigPath string
49 |
50 | bpf bpf.Interface
51 | cgroup Interface
52 |
53 | podCache *PodCache
54 |
55 | lock sync.Mutex
56 | }
57 |
58 | func NewSyncer(bpfWriter bpf.Interface) *Syncer {
59 | return &Syncer{
60 | globalPath: filepath.Join(rootFileConfig, globalConfig),
61 | perCgroupPath: filepath.Join(rootFileConfig, perCgroupConfig),
62 | podConfigPath: filepath.Join(rootFileConfig, podConfig),
63 |
64 | bpf: bpfWriter,
65 | cgroup: NewCgroup(),
66 |
67 | podCache: NewPodCache(),
68 | }
69 | }
70 |
71 | func (s *Syncer) Start(ctx context.Context) error {
72 | err := os.MkdirAll(rootFileConfig, os.ModeDir)
73 | if err != nil {
74 | return err
75 | }
76 |
77 | watcher, err := fsnotify.NewWatcher()
78 | if err != nil {
79 | return err
80 | }
81 | log.Info("watching config change", "path", rootFileConfig)
82 | err = watcher.Add(rootFileConfig)
83 | if err != nil {
84 | return err
85 | }
86 |
87 | go func() {
88 | tick := time.NewTicker(5 * time.Second)
89 |
90 | defer watcher.Close()
91 | for {
92 | select {
93 | case <-ctx.Done():
94 | tick.Stop()
95 | return
96 | case event, ok := <-watcher.Events:
97 | if !ok {
98 | return
99 | }
100 | switch event.Name {
101 | case rootFileConfig:
102 | if event.Has(fsnotify.Remove | fsnotify.Rename) {
103 | log.Info("config file gone, will restart", "event", event.String())
104 | os.Exit(99)
105 | }
106 | case s.globalPath:
107 | log.Info("cfg change", "event", event.String())
108 |
109 | err = s.syncGlobalConfig()
110 | case s.perCgroupPath:
111 | log.Info("cfg change", "event", event.String())
112 |
113 | err = s.syncCgroupRate()
114 | case s.podConfigPath:
115 | log.Info("cfg change", "event", event.String())
116 |
117 | err = s.syncPodConfig()
118 | default:
119 | continue
120 | }
121 | if err != nil {
122 | log.Error(err, "error sync config")
123 | }
124 |
125 | case err, ok := <-watcher.Errors:
126 | if !ok {
127 | return
128 | }
129 | log.Error(err, "file watch err")
130 | case <-tick.C:
131 | err = s.syncGlobalConfig()
132 | if err != nil {
133 | log.Error(err, "error sync config")
134 | }
135 | err = s.syncCgroupRate()
136 | if err != nil {
137 | log.Error(err, "error sync config")
138 | }
139 | err = s.syncPodConfig()
140 | if err != nil {
141 | log.Error(err, "error sync config")
142 | }
143 | }
144 | }
145 | }()
146 |
147 | return nil
148 | }
149 |
150 | func (s *Syncer) DeletePod(id string) error {
151 | s.lock.Lock()
152 | defer s.lock.Unlock()
153 |
154 | podConfig := s.podCache.ByPodID(id)
155 | if podConfig == nil {
156 | return nil
157 | }
158 |
159 | if err := s.podCache.DelByPodID(id); err != nil {
160 | return err
161 | }
162 |
163 | return s.bpf.DeletePodInfo(podConfig)
164 | }
165 |
166 | func (s *Syncer) UpdatePod(config *types.PodConfig) error {
167 | s.lock.Lock()
168 | defer s.lock.Unlock()
169 |
170 | prio := config.Prio
171 |
172 | v, ok, err := s.podCache.Get(config)
173 | if err != nil {
174 | return err
175 | }
176 | if ok {
177 | log.Info("update pod", "pod", config.PodID)
178 |
179 | prev := v.(*types.PodConfig)
180 |
181 | // keep previous cgroup info
182 | // take only single source
183 | config.CgroupInfo = prev.CgroupInfo
184 |
185 | // annotation has higher priority
186 | if config.TxBps != nil {
187 | config.TxBps = prev.TxBps
188 | }
189 | if config.RxBps != nil {
190 | config.RxBps = prev.RxBps
191 | }
192 | } else {
193 | // new pod
194 | log.Info("add new pod", "pod", config.PodID)
195 | cg, err := s.cgroup.GetCgroupByPodUID(config.PodUID)
196 | if err != nil {
197 | return err
198 | }
199 | config.CgroupInfo = cg
200 | }
201 |
202 | if prio != nil && *prio <= 2 {
203 | config.CgroupInfo.ClassID = *prio
204 | }
205 |
206 | err = s.podCache.Update(config)
207 | if err != nil {
208 | return err
209 | }
210 |
211 | if config.HostNetwork && config.Prio != nil {
212 | err = s.cgroup.SetCgroupClassID(*config.Prio, config.CgroupInfo.Path)
213 | if err != nil {
214 | return err
215 | }
216 | }
217 |
218 | return s.bpf.WritePodInfo(config)
219 | }
220 |
221 | func (s *Syncer) syncGlobalConfig() error {
222 | ingress, egress, err := GetGlobalConfig(s.globalPath)
223 | if err != nil {
224 | if os.IsNotExist(err) {
225 | return nil
226 | }
227 | return err
228 | }
229 |
230 | return s.bpf.WriteGlobalConfig(ingress, egress)
231 | }
232 |
233 | func (s *Syncer) syncCgroupRate() error {
234 | pods, err := s.parsePerCgroupConfig()
235 | if err != nil {
236 | return err
237 | }
238 | return s.podChanged(pods)
239 | }
240 |
241 | func (s *Syncer) syncPodConfig() error {
242 | pods, err := s.parsePodConfig()
243 | if err != nil {
244 | return err
245 | }
246 | return s.podChanged(pods)
247 | }
248 |
249 | func (s *Syncer) podChanged(pods []Pod) error {
250 | s.lock.Lock()
251 | defer s.lock.Unlock()
252 |
253 | current := sets.New[uint64]()
254 |
255 | for _, pod := range pods {
256 | info, err := readCgroupInfo(pod.CgroupDir)
257 | if err != nil {
258 | log.Error(err, "error get cgroup info", "path", pod.CgroupDir)
259 | continue
260 | }
261 |
262 | config := s.podCache.ByCgroupPath(info.Path)
263 | if config == nil {
264 | log.Info("ignore pod, cgroup not found", "cgroup", info.Path)
265 | continue
266 | }
267 |
268 | if pod.Prio >= 0 && pod.Prio <= 2 {
269 | prio := uint32(pod.Prio)
270 | config.Prio = &prio
271 | config.CgroupInfo.ClassID = prio
272 | }
273 | config.RxBps = &pod.QoSConfig.IngressBandwidth
274 | config.TxBps = &pod.QoSConfig.EgressBandwidth
275 |
276 | current.Insert(info.Inode)
277 | err = s.podChangeLocked(config)
278 | if err != nil {
279 | return err
280 | }
281 | }
282 |
283 | // clean up old cgroup rate
284 | cgroups := s.bpf.ListCgroupRate()
285 | olds := sets.New[uint64]()
286 | for key := range cgroups {
287 | olds.Insert(key.Inode)
288 | }
289 | for id := range olds.Difference(current) {
290 | err := s.bpf.DeleteCgroupRate(id)
291 | if err != nil {
292 | log.Error(err, "delete cgruop rate failed", "id", strconv.Itoa(int(id)))
293 | }
294 | }
295 | return nil
296 | }
297 |
298 | func (s *Syncer) parsePodConfig() ([]Pod, error) {
299 | content, err := os.ReadFile(s.podConfigPath)
300 | if err != nil {
301 | if os.IsNotExist(err) {
302 | return nil, nil
303 | }
304 | return nil, err
305 | }
306 | configs := make(map[string]*Pod)
307 | err = json.Unmarshal(content, &configs)
308 | if err != nil {
309 | return nil, err
310 | }
311 |
312 | var pods []Pod
313 | for _, pod := range configs {
314 | pods = append(pods, *pod)
315 | }
316 |
317 | return pods, nil
318 | }
319 |
320 | func (s *Syncer) parsePerCgroupConfig() ([]Pod, error) {
321 | content, err := os.ReadFile(s.perCgroupPath)
322 | if err != nil {
323 | if os.IsNotExist(err) {
324 | return nil, nil
325 | }
326 | return nil, err
327 | }
328 | configs := make([]Pod, 0)
329 |
330 | lines := strings.Split(string(content), "\n")
331 | if err != nil {
332 | return nil, err
333 | }
334 | for _, line := range lines {
335 | if len(line) == 0 {
336 | continue
337 | }
338 | cgroupPath := cgroupPathRe.FindString(line)
339 | rx := parseConfig("rx_bps", line)
340 | tx := parseConfig("tx_bps", line)
341 |
342 | configs = append(configs, Pod{
343 | PodName: "",
344 | PodNamespace: "",
345 | PodUID: "",
346 | Prio: -1,
347 | CgroupDir: cgroupPath,
348 | QoSConfig: QoSConfig{
349 | IngressBandwidth: rx,
350 | EgressBandwidth: tx,
351 | },
352 | })
353 | }
354 | return configs, nil
355 | }
356 |
357 | func (s *Syncer) podChangeLocked(config *types.PodConfig) error {
358 | log.Info("update pod", "pod", config.PodID, "detail", fmt.Sprintf("%+v", config), "prio", *config.Prio)
359 | err := s.podCache.Update(config)
360 | if err != nil {
361 | return err
362 | }
363 |
364 | if config.HostNetwork && config.Prio != nil {
365 | err = s.cgroup.SetCgroupClassID(*config.Prio, config.CgroupInfo.Path)
366 | if err != nil {
367 | return err
368 | }
369 | }
370 |
371 | return s.bpf.WritePodInfo(config)
372 | }
373 |
--------------------------------------------------------------------------------
/pkg/config/types.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | type Node struct {
4 | HwTxBpsMax uint64 `json:"hw_tx_bps_max" yaml:"hw_tx_bps_max"`
5 | HwRxBpsMax uint64 `json:"hw_rx_bps_max" yaml:"hw_rx_bps_max"`
6 | L0TxBpsMin uint64 `json:"l0_tx_bps_min" yaml:"l0_tx_bps_min"`
7 | L0TxBpsMax uint64 `json:"l0_tx_bps_max" yaml:"l0_tx_bps_max"`
8 | L0RxBpsMin uint64 `json:"l0_rx_bps_min" yaml:"l0_rx_bps_min"`
9 | L0RxBpsMax uint64 `json:"l0_rx_bps_max" yaml:"l0_rx_bps_max"`
10 | L1TxBpsMin uint64 `json:"l1_tx_bps_min" yaml:"l1_tx_bps_min"`
11 | L1TxBpsMax uint64 `json:"l1_tx_bps_max" yaml:"l1_tx_bps_max"`
12 | L1RxBpsMin uint64 `json:"l1_rx_bps_min" yaml:"l1_rx_bps_min"`
13 | L1RxBpsMax uint64 `json:"l1_rx_bps_max" yaml:"l1_rx_bps_max"`
14 | L2TxBpsMin uint64 `json:"l2_tx_bps_min" yaml:"l2_tx_bps_min"`
15 | L2TxBpsMax uint64 `json:"l2_tx_bps_max" yaml:"l2_tx_bps_max"`
16 | L2RxBpsMin uint64 `json:"l2_rx_bps_min" yaml:"l2_rx_bps_min"`
17 | L2RxBpsMax uint64 `json:"l2_rx_bps_max" yaml:"l2_rx_bps_max"`
18 | }
19 |
20 | type Pod struct {
21 | PodName string `json:"podName"`
22 | PodNamespace string `json:"podNamespace"`
23 | PodUID string `json:"podUID"`
24 | Prio int `json:"prio"`
25 | CgroupDir string `json:"cgroupDir"`
26 | QoSConfig QoSConfig `json:"qosConfig"`
27 | }
28 |
29 | type QoSConfig struct {
30 | IngressBandwidth uint64 `json:"ingressBandwidth"`
31 | EgressBandwidth uint64 `json:"egressBandwidth"`
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/k8s/pods.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package k8s
17 |
18 | import (
19 | "context"
20 | "fmt"
21 | "net/netip"
22 | "os"
23 | "time"
24 |
25 | "github.com/AliyunContainerService/terway-qos/pkg/bandwidth"
26 | "github.com/AliyunContainerService/terway-qos/pkg/types"
27 |
28 | corev1 "k8s.io/api/core/v1"
29 | "k8s.io/apimachinery/pkg/api/errors"
30 | "k8s.io/apimachinery/pkg/fields"
31 | "k8s.io/apimachinery/pkg/runtime"
32 | utilruntime "k8s.io/apimachinery/pkg/util/runtime"
33 | clientgoscheme "k8s.io/client-go/kubernetes/scheme"
34 | "k8s.io/klog/v2"
35 | ctrl "sigs.k8s.io/controller-runtime"
36 | "sigs.k8s.io/controller-runtime/pkg/builder"
37 | "sigs.k8s.io/controller-runtime/pkg/cache"
38 | "sigs.k8s.io/controller-runtime/pkg/client"
39 | "sigs.k8s.io/controller-runtime/pkg/client/config"
40 | "sigs.k8s.io/controller-runtime/pkg/reconcile"
41 | )
42 |
43 | type Interface interface {
44 | PodByUID() *corev1.Pod
45 | }
46 |
47 | var scheme = runtime.NewScheme()
48 |
49 | func init() {
50 | utilruntime.Must(clientgoscheme.AddToScheme(scheme))
51 | }
52 |
53 | func StartPodHandler(ctx context.Context, syncer types.SyncPod) error {
54 | options := ctrl.Options{
55 | Scheme: scheme,
56 | }
57 |
58 | options.NewCache = cache.BuilderWithOptions(cache.Options{
59 | SelectorsByObject: cache.SelectorsByObject{
60 | &corev1.Pod{}: {
61 | Field: fields.SelectorFromSet(fields.Set{"spec.nodeName": os.Getenv("K8S_NODE_NAME")}),
62 | },
63 | }},
64 | )
65 | mgr, err := ctrl.NewManager(config.GetConfigOrDie(), options)
66 | if err != nil {
67 | return err
68 | }
69 |
70 | err = ctrl.NewControllerManagedBy(mgr).
71 | For(&corev1.Pod{}, builder.WithPredicates(&predicateForPod{})).
72 | Complete(&reconcilePod{
73 | client: mgr.GetClient(),
74 | syncer: syncer,
75 | })
76 | if err != nil {
77 | return err
78 | }
79 | return mgr.Start(ctx)
80 | }
81 |
82 | // reconcilePod reconciles ReplicaSets
83 | type reconcilePod struct {
84 | // client can be used to retrieve objects from the APIServer.
85 | client client.Client
86 |
87 | syncer types.SyncPod
88 | }
89 |
90 | // Implement reconcile.Reconciler so the controller can reconcile objects
91 | var _ reconcile.Reconciler = &reconcilePod{}
92 |
93 | func (r *reconcilePod) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
94 | pod := corev1.Pod{}
95 | err := r.client.Get(ctx, client.ObjectKey{
96 | Namespace: request.Namespace,
97 | Name: request.Name,
98 | }, &pod)
99 | if err != nil {
100 | if errors.IsNotFound(err) {
101 | klog.Infof("pod %s/%s has been deleted", request.Namespace, request.Name)
102 | return reconcile.Result{}, r.syncer.DeletePod(request.String())
103 | }
104 | return reconcile.Result{}, err
105 | }
106 |
107 | if !pod.DeletionTimestamp.IsZero() {
108 | t := time.Since(pod.DeletionTimestamp.Time)
109 | if t < 0 {
110 | // Reconciliation is level-based, meaning action isn't driven off changes in
111 | // individual Events. Requeue the result at least once to make sure the bpf map
112 | // will be deleted in time. Because the pod object may exist but its ip address
113 | // has been allocated to another pod. e.g. pod deletion is blocked by a
114 | // time-consuming finalizer.
115 | klog.Infof("pod %s/%s requeue deletion at %s",
116 | pod.Namespace, pod.Name, pod.DeletionTimestamp.Time)
117 | return reconcile.Result{RequeueAfter: -t}, nil
118 | } else {
119 | // IP addresses are expected to have been reclaimed
120 | // See https://github.com/kubernetes/kubernetes/issues/109414#issuecomment-1125233538
121 | klog.Infof("pod %s/%s IP addresses are expected to have been reclaimed",
122 | pod.Namespace, pod.Name)
123 | return reconcile.Result{}, r.syncer.DeletePod(request.String())
124 | }
125 | }
126 |
127 | v4, v6 := getIPs(&pod)
128 | if !v4.IsValid() && !v6.IsValid() {
129 | return reconcile.Result{}, fmt.Errorf("pod %s/%s has no ip", pod.Namespace, pod.Name)
130 | }
131 |
132 | ingress, egress, err := bandwidth.ExtractPodBandwidthResources(pod.Annotations)
133 | if err != nil {
134 | return reconcile.Result{}, fmt.Errorf("error extract bandwidth resources, %w", err)
135 | }
136 |
137 | update := &types.PodConfig{
138 | PodID: fmt.Sprintf("%s/%s", pod.Namespace, pod.Name),
139 | PodUID: string(pod.UID),
140 | IPv4: v4,
141 | IPv6: v6,
142 | HostNetwork: pod.Spec.HostNetwork,
143 | }
144 |
145 | if ingress != nil {
146 | v := uint64(ingress.Value())
147 | update.RxBps = &(v)
148 | }
149 | if egress != nil {
150 | v := uint64(egress.Value())
151 | update.TxBps = &(v)
152 | }
153 | switch pod.Annotations["k8s.aliyun.com/qos-class"] {
154 | case "best-effort":
155 | update.Prio = func(a uint32) *uint32 {
156 | return &a
157 | }(2)
158 | case "burstable":
159 | update.Prio = func(a uint32) *uint32 {
160 | return &a
161 | }(1)
162 | case "guaranteed":
163 | update.Prio = func(a uint32) *uint32 {
164 | return &a
165 | }(0)
166 | }
167 |
168 | return reconcile.Result{}, r.syncer.UpdatePod(update)
169 | }
170 |
171 | func getIPs(pod *corev1.Pod) (v4 netip.Addr, v6 netip.Addr) {
172 | for _, ip := range pod.Status.PodIPs {
173 | addr, err := netip.ParseAddr(ip.IP)
174 | if err != nil {
175 | continue
176 | }
177 | if addr.Is4() {
178 | v4 = addr
179 | } else {
180 | v6 = addr
181 | }
182 | }
183 |
184 | return
185 | }
186 |
--------------------------------------------------------------------------------
/pkg/k8s/predicates.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package k8s
17 |
18 | import (
19 | corev1 "k8s.io/api/core/v1"
20 | "sigs.k8s.io/controller-runtime/pkg/event"
21 | "sigs.k8s.io/controller-runtime/pkg/predicate"
22 | )
23 |
24 | type predicateForPod struct {
25 | predicate.Funcs
26 | }
27 |
28 | func (p *predicateForPod) Create(e event.CreateEvent) bool {
29 | pod, ok := e.Object.(*corev1.Pod)
30 | if !ok {
31 | return false
32 | }
33 |
34 | v4, v6 := getIPs(pod)
35 | if !v4.IsValid() && !v6.IsValid() {
36 | return false
37 | }
38 |
39 | return true
40 | }
41 |
42 | func (p *predicateForPod) Update(e event.UpdateEvent) bool {
43 | pod, ok := e.ObjectNew.(*corev1.Pod)
44 | if !ok {
45 | return false
46 | }
47 |
48 | v4, v6 := getIPs(pod)
49 | if !v4.IsValid() && !v6.IsValid() {
50 | return false
51 | }
52 |
53 | return true
54 | }
55 |
56 | func (p *predicateForPod) Delete(e event.DeleteEvent) bool {
57 | _, ok := e.Object.(*corev1.Pod)
58 | return ok
59 | }
60 |
--------------------------------------------------------------------------------
/pkg/types/config.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package types
17 |
18 | import (
19 | "fmt"
20 | "net/netip"
21 | )
22 |
23 | type SyncPod interface {
24 | DeletePod(id string) error
25 | UpdatePod(config *PodConfig) error
26 | }
27 |
28 | // PodConfig contain pod related resource
29 | type PodConfig struct {
30 | PodID string
31 | PodUID string
32 |
33 | IPv4 netip.Addr
34 | IPv6 netip.Addr
35 |
36 | HostNetwork bool
37 | Prio *uint32
38 |
39 | CgroupInfo *CgroupInfo
40 |
41 | RxBps *uint64
42 | TxBps *uint64
43 | }
44 |
45 | type CgroupInfo struct {
46 | Path string
47 | ClassID uint32
48 | Inode uint64
49 | }
50 |
51 | type CgroupRate struct {
52 | Inode uint64
53 |
54 | RxBps uint64
55 | TxBps uint64
56 | }
57 |
58 | type GlobalConfig struct {
59 | HwGuaranteed uint64
60 | HwBurstableBps uint64
61 |
62 | L0MaxBps uint64
63 | L0MinBps uint64
64 |
65 | L1MaxBps uint64
66 | L1MinBps uint64
67 |
68 | L2MaxBps uint64
69 | L2MinBps uint64
70 | }
71 |
72 | func (c *GlobalConfig) Default() {
73 | if c.HwGuaranteed != 0 && c.HwBurstableBps == 0 {
74 | c.HwBurstableBps = c.HwGuaranteed
75 | }
76 | if c.L0MaxBps == 0 {
77 | c.L0MaxBps = c.HwGuaranteed
78 | }
79 | if c.L0MinBps == 0 {
80 | c.L0MinBps = c.HwGuaranteed - c.L1MinBps - c.L2MinBps
81 | }
82 | }
83 |
84 | func (c *GlobalConfig) Validate() bool {
85 | if c.HwBurstableBps == 0 && c.L0MaxBps == 0 && c.L0MinBps == 0 && c.L1MaxBps == 0 && c.L1MinBps == 0 && c.L2MaxBps == 0 && c.L2MinBps == 0 {
86 | return true
87 | }
88 |
89 | if c.HwGuaranteed > c.HwBurstableBps ||
90 | c.HwGuaranteed < c.L1MaxBps ||
91 | c.HwGuaranteed < c.L2MaxBps ||
92 | c.L1MinBps > c.L1MaxBps ||
93 | c.L2MinBps > c.L2MaxBps ||
94 | c.HwGuaranteed < c.L2MaxBps+c.L1MaxBps {
95 | return false
96 | }
97 |
98 | return true
99 | }
100 |
101 | func (c *GlobalConfig) String() string {
102 | return fmt.Sprintf("hw %d online-min %d online-max %d offline-l1-min %d offline-l1-max %d offline-l2-min %d offline-l2-max %d",
103 | c.HwGuaranteed, c.L0MinBps, c.L0MaxBps, c.L1MinBps, c.L1MaxBps, c.L2MinBps, c.L2MaxBps)
104 | }
105 |
--------------------------------------------------------------------------------
/pkg/version/version.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2023, Alibaba Group;
3 | * Licensed under the Apache License, Version 2.0 (the "License");
4 | * you may not use this file except in compliance with the License.
5 | * You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software
10 | * distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package version
17 |
18 | import (
19 | "fmt"
20 | "os"
21 | "path/filepath"
22 | "runtime"
23 | "strings"
24 | )
25 |
26 | const unknown = "unknown"
27 |
28 | var (
29 | Version string
30 | UA string
31 |
32 | gitVersion = "v0.0.0-master+$Format:%H$"
33 | gitCommit = "$Format:%H$" // sha1 from git, output of $(git rev-parse HEAD)
34 |
35 | buildDate = "1970-01-01T00:00:00Z" // build date in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ')
36 | )
37 |
38 | func init() {
39 | Version = fmt.Sprintf("%s/%s (%s/%s) %s %s", adjustCommand(os.Args[0]), adjustVersion(gitVersion), runtime.GOOS, runtime.GOARCH, gitCommit, buildDate)
40 | UA = fmt.Sprintf("%s/%s (%s/%s) poseidon/%s", adjustCommand(os.Args[0]), adjustVersion(gitVersion), runtime.GOOS, runtime.GOARCH, adjustCommit(gitCommit))
41 | }
42 |
43 | // adjustVersion strips "alpha", "beta", etc. from version in form
44 | // major.minor.patch-[alpha|beta|etc].
45 | func adjustVersion(v string) string {
46 | if len(v) == 0 {
47 | return unknown
48 | }
49 | seg := strings.SplitN(v, "-", 2)
50 | return seg[0]
51 | }
52 |
53 | // adjustCommand returns the last component of the
54 | // OS-specific command path for use in User-Agent.
55 | func adjustCommand(p string) string {
56 | // Unlikely, but better than returning "".
57 | if len(p) == 0 {
58 | return unknown
59 | }
60 | return filepath.Base(p)
61 | }
62 |
63 | // adjustCommit returns sufficient significant figures of the commit's git hash.
64 | func adjustCommit(c string) string {
65 | if len(c) == 0 {
66 | return unknown
67 | }
68 | if len(c) > 7 {
69 | return c[:7]
70 | }
71 | return c
72 | }
73 |
--------------------------------------------------------------------------------