├── .github └── workflows │ └── c-cpp.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── common.mk ├── component ├── arg_parse.c ├── arg_parse.h ├── list.h ├── net_utils.c ├── net_utils.h ├── parse_sym.c ├── parse_sym.h ├── sys_utils.c └── sys_utils.h ├── docs ├── develop.md ├── droptrace.md ├── images │ └── nettrace-start.svg ├── nettrace.md └── nodetrace.md ├── script ├── .gitignore ├── Dockerfile ├── arch.mk ├── bash-completion.sh ├── bpftool-arm ├── bpftool-loongarch ├── bpftool-x86 ├── dropreason.8 ├── nettrace.spec └── zh_CN │ ├── nettrace.8 │ └── nettrace.md ├── shared ├── bpf │ ├── skb_macro.h │ ├── skb_parse.h │ ├── skb_shared.h │ ├── vmlinux.h │ ├── vmlinux_arm64.h │ ├── vmlinux_loongarch64.h │ └── vmlinux_x86.h ├── bpf_utils.c ├── bpf_utils.h ├── pkt_utils.c └── pkt_utils.h └── src ├── .gitignore ├── Makefile ├── README.md ├── analysis.c ├── analysis.h ├── btf.raw ├── dropreason.c ├── dropreason.h ├── gen_trace.py ├── nettrace.c ├── nettrace.h ├── progs ├── core.c ├── core.h ├── feat_args_ext.c ├── kprobe.c ├── shared.h └── tracing.c ├── rstreason.c ├── rstreason.h ├── rule.yaml ├── trace.c ├── trace.h ├── trace.yaml ├── trace_probe.c ├── trace_tracing.c └── vmlinux_header.h /.github/workflows/c-cpp.yml: -------------------------------------------------------------------------------- 1 | name: Build CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: apt-update 17 | run: sudo apt update 18 | - name: prepare 19 | run: sudo apt install python3 python3-yaml libelf-dev libbpf-dev linux-headers-`uname -r` clang llvm gcc linux-tools-`uname -r` linux-tools-generic -y 20 | - name: make 21 | run: make all 22 | 23 | build-legacy: 24 | 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - uses: actions/checkout@v3 29 | - name: apt-update 30 | run: sudo apt update 31 | - name: prepare 32 | run: sudo apt install python3 python3-yaml libelf-dev libbpf-dev linux-headers-`uname -r` clang llvm gcc linux-tools-`uname -r` linux-tools-generic -y 33 | - name: make 34 | run: make COMPAT=1 all 35 | 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__ 3 | *.pyc 4 | output 5 | *.skel.h 6 | *.o 7 | .* 8 | GPATH 9 | GTAGS 10 | GRTAGS 11 | kheaders.h 12 | build*.sh 13 | release.sh 14 | compile_commands.json 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2022 THL A29 Limited 2 | 3 | nettrace is licensed under Mulan PSL v2. You can use this software according to the terms and conditions of the 4 | Mulan PSL v2. You may obtain a copy of Mulan PSL v2 at: http://license.coscl.org.cn/MulanPSL2 5 | THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, 6 | INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 7 | See the Mulan PSL v2 for more details. 8 | 9 | January 2020 http://license.coscl.org.cn/MulanPSL2 10 | 11 | Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v2 (this License) with the following terms and conditions: 12 | 13 | 0. Definition 14 | 15 | Software means the program and related documents which are licensed under this License and comprise all Contribution(s). 16 | 17 | Contribution means the copyrightable work licensed by a particular Contributor under this License. 18 | 19 | Contributor means the Individual or Legal Entity who licenses its copyrightable work under this License. 20 | 21 | Legal Entity means the entity making a Contribution and all its Affiliates. 22 | 23 | Affiliates means entities that control, are controlled by, or are under common control with the acting entity under this License, ‘control’ means direct or indirect ownership of at least fifty percent (50%) of the voting power, capital or other securities of controlled or commonly controlled entity. 24 | 25 | 1. Grant of Copyright License 26 | 27 | Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable copyright license to reproduce, use, modify, or distribute its Contribution, with modification or not. 28 | 29 | 2. Grant of Patent License 30 | 31 | Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable (except for revocation under this Section) patent license to make, have made, use, offer for sale, sell, import or otherwise transfer its Contribution, where such patent license is only limited to the patent claims owned or controlled by such Contributor now or in future which will be necessarily infringed by its Contribution alone, or by combination of the Contribution with the Software to which the Contribution was contributed. The patent license shall not apply to any modification of the Contribution, and any other combination which includes the Contribution. If you or your Affiliates directly or indirectly institute patent litigation (including a cross claim or counterclaim in a litigation) or other patent enforcement activities against any individual or entity by alleging that the Software or any Contribution in it infringes patents, then any patent license granted to you under this License for the Software shall terminate as of the date such litigation or activity is filed or taken. 32 | 33 | 3. No Trademark License 34 | 35 | No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, except as required to fulfill notice requirements in Section 4. 36 | 37 | 4. Distribution Restriction 38 | 39 | You may distribute the Software in any medium with or without modification, whether in source or executable forms, provided that you provide recipients with a copy of this License and retain copyright, patent, trademark and disclaimer statements in the Software. 40 | 41 | 5. Disclaimer of Warranty and Limitation of Liability 42 | 43 | THE SOFTWARE AND CONTRIBUTION IN IT ARE PROVIDED WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED. IN NO EVENT SHALL ANY CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE SOFTWARE OR THE CONTRIBUTION IN IT, NO MATTER HOW IT’S CAUSED OR BASED ON WHICH LEGAL THEORY, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 44 | 45 | 6. Language 46 | 47 | THIS LICENSE IS WRITTEN IN BOTH CHINESE AND ENGLISH, AND THE CHINESE VERSION AND ENGLISH VERSION SHALL HAVE THE SAME LEGAL EFFECT. IN THE CASE OF DIVERGENCE BETWEEN THE CHINESE AND ENGLISH VERSIONS, THE CHINESE VERSION SHALL PREVAIL. 48 | 49 | END OF THE TERMS AND CONDITIONS 50 | 51 | 52 | 53 | Other dependencies and licenses: 54 | 55 | 1. bcc 56 | Copyright (c) bcc authors and contributors 57 | 58 | Apache License 59 | Version 2.0, January 2004 60 | http://www.apache.org/licenses/ 61 | 62 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 63 | 64 | 1. Definitions. 65 | 66 | "License" shall mean the terms and conditions for use, reproduction, 67 | and distribution as defined by Sections 1 through 9 of this document. 68 | 69 | "Licensor" shall mean the copyright owner or entity authorized by 70 | the copyright owner that is granting the License. 71 | 72 | "Legal Entity" shall mean the union of the acting entity and all 73 | other entities that control, are controlled by, or are under common 74 | control with that entity. For the purposes of this definition, 75 | "control" means (i) the power, direct or indirect, to cause the 76 | direction or management of such entity, whether by contract or 77 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 78 | outstanding shares, or (iii) beneficial ownership of such entity. 79 | 80 | "You" (or "Your") shall mean an individual or Legal Entity 81 | exercising permissions granted by this License. 82 | 83 | "Source" form shall mean the preferred form for making modifications, 84 | including but not limited to software source code, documentation 85 | source, and configuration files. 86 | 87 | "Object" form shall mean any form resulting from mechanical 88 | transformation or translation of a Source form, including but 89 | not limited to compiled object code, generated documentation, 90 | and conversions to other media types. 91 | 92 | "Work" shall mean the work of authorship, whether in Source or 93 | Object form, made available under the License, as indicated by a 94 | copyright notice that is included in or attached to the work 95 | (an example is provided in the Appendix below). 96 | 97 | "Derivative Works" shall mean any work, whether in Source or Object 98 | form, that is based on (or derived from) the Work and for which the 99 | editorial revisions, annotations, elaborations, or other modifications 100 | represent, as a whole, an original work of authorship. For the purposes 101 | of this License, Derivative Works shall not include works that remain 102 | separable from, or merely link (or bind by name) to the interfaces of, 103 | the Work and Derivative Works thereof. 104 | 105 | "Contribution" shall mean any work of authorship, including 106 | the original version of the Work and any modifications or additions 107 | to that Work or Derivative Works thereof, that is intentionally 108 | submitted to Licensor for inclusion in the Work by the copyright owner 109 | or by an individual or Legal Entity authorized to submit on behalf of 110 | the copyright owner. For the purposes of this definition, "submitted" 111 | means any form of electronic, verbal, or written communication sent 112 | to the Licensor or its representatives, including but not limited to 113 | communication on electronic mailing lists, source code control systems, 114 | and issue tracking systems that are managed by, or on behalf of, the 115 | Licensor for the purpose of discussing and improving the Work, but 116 | excluding communication that is conspicuously marked or otherwise 117 | designated in writing by the copyright owner as "Not a Contribution." 118 | 119 | "Contributor" shall mean Licensor and any individual or Legal Entity 120 | on behalf of whom a Contribution has been received by Licensor and 121 | subsequently incorporated within the Work. 122 | 123 | 2. Grant of Copyright License. Subject to the terms and conditions of 124 | this License, each Contributor hereby grants to You a perpetual, 125 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 126 | copyright license to reproduce, prepare Derivative Works of, 127 | publicly display, publicly perform, sublicense, and distribute the 128 | Work and such Derivative Works in Source or Object form. 129 | 130 | 3. Grant of Patent License. Subject to the terms and conditions of 131 | this License, each Contributor hereby grants to You a perpetual, 132 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 133 | (except as stated in this section) patent license to make, have made, 134 | use, offer to sell, sell, import, and otherwise transfer the Work, 135 | where such license applies only to those patent claims licensable 136 | by such Contributor that are necessarily infringed by their 137 | Contribution(s) alone or by combination of their Contribution(s) 138 | with the Work to which such Contribution(s) was submitted. If You 139 | institute patent litigation against any entity (including a 140 | cross-claim or counterclaim in a lawsuit) alleging that the Work 141 | or a Contribution incorporated within the Work constitutes direct 142 | or contributory patent infringement, then any patent licenses 143 | granted to You under this License for that Work shall terminate 144 | as of the date such litigation is filed. 145 | 146 | 4. Redistribution. You may reproduce and distribute copies of the 147 | Work or Derivative Works thereof in any medium, with or without 148 | modifications, and in Source or Object form, provided that You 149 | meet the following conditions: 150 | 151 | (a) You must give any other recipients of the Work or 152 | Derivative Works a copy of this License; and 153 | 154 | (b) You must cause any modified files to carry prominent notices 155 | stating that You changed the files; and 156 | 157 | (c) You must retain, in the Source form of any Derivative Works 158 | that You distribute, all copyright, patent, trademark, and 159 | attribution notices from the Source form of the Work, 160 | excluding those notices that do not pertain to any part of 161 | the Derivative Works; and 162 | 163 | (d) If the Work includes a "NOTICE" text file as part of its 164 | distribution, then any Derivative Works that You distribute must 165 | include a readable copy of the attribution notices contained 166 | within such NOTICE file, excluding those notices that do not 167 | pertain to any part of the Derivative Works, in at least one 168 | of the following places: within a NOTICE text file distributed 169 | as part of the Derivative Works; within the Source form or 170 | documentation, if provided along with the Derivative Works; or, 171 | within a display generated by the Derivative Works, if and 172 | wherever such third-party notices normally appear. The contents 173 | of the NOTICE file are for informational purposes only and 174 | do not modify the License. You may add Your own attribution 175 | notices within Derivative Works that You distribute, alongside 176 | or as an addendum to the NOTICE text from the Work, provided 177 | that such additional attribution notices cannot be construed 178 | as modifying the License. 179 | 180 | You may add Your own copyright statement to Your modifications and 181 | may provide additional or different license terms and conditions 182 | for use, reproduction, or distribution of Your modifications, or 183 | for any such Derivative Works as a whole, provided Your use, 184 | reproduction, and distribution of the Work otherwise complies with 185 | the conditions stated in this License. 186 | 187 | 5. Submission of Contributions. Unless You explicitly state otherwise, 188 | any Contribution intentionally submitted for inclusion in the Work 189 | by You to the Licensor shall be under the terms and conditions of 190 | this License, without any additional terms or conditions. 191 | Notwithstanding the above, nothing herein shall supersede or modify 192 | the terms of any separate license agreement you may have executed 193 | with Licensor regarding such Contributions. 194 | 195 | 6. Trademarks. This License does not grant permission to use the trade 196 | names, trademarks, service marks, or product names of the Licensor, 197 | except as required for reasonable and customary use in describing the 198 | origin of the Work and reproducing the content of the NOTICE file. 199 | 200 | 7. Disclaimer of Warranty. Unless required by applicable law or 201 | agreed to in writing, Licensor provides the Work (and each 202 | Contributor provides its Contributions) on an "AS IS" BASIS, 203 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 204 | implied, including, without limitation, any warranties or conditions 205 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 206 | PARTICULAR PURPOSE. You are solely responsible for determining the 207 | appropriateness of using or redistributing the Work and assume any 208 | risks associated with Your exercise of permissions under this License. 209 | 210 | 8. Limitation of Liability. In no event and under no legal theory, 211 | whether in tort (including negligence), contract, or otherwise, 212 | unless required by applicable law (such as deliberate and grossly 213 | negligent acts) or agreed to in writing, shall any Contributor be 214 | liable to You for damages, including any direct, indirect, special, 215 | incidental, or consequential damages of any character arising as a 216 | result of this License or out of the use or inability to use the 217 | Work (including but not limited to damages for loss of goodwill, 218 | work stoppage, computer failure or malfunction, or any and all 219 | other commercial damages or losses), even if such Contributor 220 | has been advised of the possibility of such damages. 221 | 222 | 9. Accepting Warranty or Additional Liability. While redistributing 223 | the Work or Derivative Works thereof, You may choose to offer, 224 | and charge a fee for, acceptance of support, warranty, indemnity, 225 | or other liability obligations and/or rights consistent with this 226 | License. However, in accepting such obligations, You may act only 227 | on Your own behalf and on Your sole responsibility, not on behalf 228 | of any other Contributor, and only if You agree to indemnify, 229 | defend, and hold each Contributor harmless for any liability 230 | incurred by, or claims asserted against, such Contributor by reason 231 | of your accepting any such warranty or additional liability. 232 | 233 | END OF TERMS AND CONDITIONS 234 | 235 | APPENDIX: How to apply the Apache License to your work. 236 | 237 | To apply the Apache License to your work, attach the following 238 | boilerplate notice, with the fields enclosed by brackets "{}" 239 | replaced with your own identifying information. (Don't include 240 | the brackets!) The text should be enclosed in the appropriate 241 | comment syntax for the file format. We also recommend that a 242 | file or class name and description of purpose be included on the 243 | same "printed page" as the copyright notice for easier 244 | identification within third-party archives. 245 | 246 | Copyright {yyyy} {name of copyright owner} 247 | 248 | Licensed under the Apache License, Version 2.0 (the "License"); 249 | you may not use this file except in compliance with the License. 250 | You may obtain a copy of the License at 251 | 252 | http://www.apache.org/licenses/LICENSE-2.0 253 | 254 | Unless required by applicable law or agreed to in writing, software 255 | distributed under the License is distributed on an "AS IS" BASIS, 256 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 257 | See the License for the specific language governing permissions and 258 | limitations under the License. 259 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | export VERSION = 1.2.11 3 | RELEASE ?= .tl3 4 | export RELEASE 5 | 6 | man-target := script/zh_CN/nettrace.8 7 | 8 | ROOT := $(abspath .) 9 | export ROOT 10 | PREFIX ?= ./output 11 | PREFIX := $(abspath $(PREFIX)) 12 | MAN_DIR := $(PREFIX)/usr/share/man 13 | BCOMP := ${PREFIX}/usr/share/bash-completion/completions/ 14 | export PREFIX 15 | SCRIPT = $(ROOT)/script 16 | export SCRIPT 17 | ARCH ?= $(shell uname -m) 18 | SOURCE_DIR := ~/rpmbuild/SOURCES/nettrace-${VERSION} 19 | PACK_TARGET := nettrace-$(VERSION)-1$(RELEASE).$(ARCH) 20 | PACK_PATH := $(abspath $(PREFIX)/$(PACK_TARGET)) 21 | PACK_NAME := $(PACK_TARGET).tar.bz2 22 | 23 | all clean: 24 | make -C src $@ 25 | 26 | %.8: %.md 27 | md2man-roff $< > $@ 28 | 29 | man: $(man-target) 30 | 31 | install: 32 | @mkdir -p $(PREFIX) 33 | make -C src install 34 | 35 | @mkdir -p ${MAN_DIR}/zh_CN/man8/; gzip -k $(SCRIPT)/zh_CN/*.8; \ 36 | mv $(SCRIPT)/zh_CN/*.8.gz ${MAN_DIR}/zh_CN/man8 37 | 38 | @mkdir -p ${MAN_DIR}/man8/; gzip -k $(SCRIPT)/*.8; mv \ 39 | $(SCRIPT)/*.8.gz ${MAN_DIR}/man8/; \ 40 | cd ${MAN_DIR}/man8/; for i in `ls ../zh_CN/man8/`; \ 41 | do \ 42 | if [ ! -f $$i ];then \ 43 | ln -s ../zh_CN/man8/$$i ./; \ 44 | fi; \ 45 | done 46 | 47 | @mkdir -p $(BCOMP); cd $(BCOMP); cp $(SCRIPT)/bash-completion.sh \ 48 | ./nettrace 49 | 50 | pack: 51 | @make clean 52 | @rm -rf $(PACK_PATH) && mkdir -p $(PACK_PATH) 53 | make PREFIX=$(PACK_PATH) -C src pack 54 | @cd $(PREFIX) && tar -cjf $(PACK_NAME) $(PACK_TARGET) && \ 55 | echo "$(PREFIX)/$(PACK_NAME) is generated" 56 | 57 | rpm: 58 | @make clean 59 | @rm -rf ${SOURCE_DIR} && mkdir -p ${SOURCE_DIR} 60 | @cp -r * ${SOURCE_DIR}/ 61 | @sed -i 's/%{VERSION}/$(VERSION)/' ${SOURCE_DIR}/script/nettrace.spec 62 | @cd ~/rpmbuild/SOURCES/ && tar -czf nettrace-${VERSION}.tar.gz \ 63 | nettrace-${VERSION} 64 | @rpmbuild -D 'dist $(RELEASE)' --target ${ARCH} \ 65 | -ba ${SOURCE_DIR}/script/nettrace.spec 66 | -------------------------------------------------------------------------------- /common.mk: -------------------------------------------------------------------------------- 1 | COMPONENT := $(ROOT)/component 2 | COMMON_SHARED := $(ROOT)/shared/pkt_utils.c $(COMPONENT)/net_utils.c \ 3 | $(COMPONENT)/arg_parse.c $(COMPONENT)/sys_utils.c \ 4 | $(ROOT)/shared/bpf_utils.c 5 | 6 | CFLAGS += -I./ -I$(ROOT)/shared/bpf/ -g 7 | BPF_CFLAGS = $(CFLAGS) -Wno-unused-function \ 8 | -Wno-compare-distinct-pointer-types -Wuninitialized \ 9 | -D__TARGET_ARCH_$(SRCARCH) -DBPF_NO_PRESERVE_ACCESS_INDEX 10 | 11 | ifeq ("$(shell pkg-config --print-requires-private libelf | grep libzstd)","libzstd") 12 | LIBELF_ZSTD_FLAGS = -lzstd 13 | endif 14 | 15 | HOST_CFLAGS = \ 16 | -lbpf -lelf -lz $(LIBELF_ZSTD_FLAGS) -O2 -static $(CFLAGS) -Wall \ 17 | -Wno-deprecated-declarations -DVERSION=$(VERSION) \ 18 | -DRELEASE=$(RELEASE) \ 19 | -I$(ROOT)/shared/ -I$(ROOT)/component 20 | 21 | CC := $(CROSS_COMPILE)gcc 22 | 23 | include $(ROOT)/script/arch.mk 24 | 25 | HEADERS := $(if $(KERNEL),$(KERNEL),/lib/modules/$(shell uname -r)/build/) 26 | NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) 27 | export HEADERS 28 | 29 | USERINCLUDE := \ 30 | -I$(HEADERS)/arch/$(SRCARCH)/include/uapi \ 31 | -I$(HEADERS)/arch/$(SRCARCH)/include/generated/uapi \ 32 | -I$(HEADERS)/include/uapi \ 33 | -I$(HEADERS)/include/generated/uapi \ 34 | -include $(HEADERS)/include/linux/kconfig.h \ 35 | -I/usr/include/ 36 | 37 | LINUXINCLUDE := \ 38 | -I$(HEADERS)/arch/$(SRCARCH)/include \ 39 | -I$(HEADERS)/arch/$(SRCARCH)/include/generated \ 40 | -I$(HEADERS)/include \ 41 | $(USERINCLUDE) 42 | 43 | KERNEL_CFLAGS += $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ 44 | -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ 45 | -Wno-compare-distinct-pointer-types \ 46 | -Wno-gnu-variable-sized-type-not-at-end \ 47 | -Wno-address-of-packed-member -Wno-tautological-compare \ 48 | -Wno-unknown-warning-option -Wno-frame-address 49 | 50 | ifdef KERN_VER 51 | CFLAGS += -DKERN_VER=$(KERN_VER) 52 | endif 53 | 54 | ifdef NO_GLOBAL_DATA 55 | CFLAGS += -DBPF_NO_GLOBAL_DATA 56 | endif 57 | 58 | ifdef DISABLE_IPV6 59 | CFLAGS += -DNT_DISABLE_IPV6 60 | endif 61 | 62 | ifdef NO_BTF 63 | ifeq ($(wildcard $(HEADERS)),) 64 | $(error kernel headers not exist in COMPAT mode, please install it) 65 | endif 66 | kheaders_cmd := ln -s vmlinux_header.h kheaders.h 67 | CFLAGS += -DNO_BTF 68 | BPF_CFLAGS += $(KERNEL_CFLAGS) 69 | else 70 | kheaders_cmd := ln -s ../shared/bpf/vmlinux.h kheaders.h 71 | BPF_CFLAGS += -target bpf 72 | endif 73 | 74 | ifdef INLINE 75 | CFLAGS += -DINLINE_MODE 76 | endif 77 | 78 | ifdef INIT 79 | CFLAGS += -D__F_INIT_EVENT 80 | endif 81 | 82 | ifdef OUTPUT_WHOLE 83 | CFLAGS += -D__F_OUTPUT_WHOLE 84 | endif 85 | 86 | ifndef BPFTOOL 87 | ifneq ("$(shell bpftool gen help 2>&1 | grep skeleton)","") 88 | BPFTOOL := bpftool 89 | else 90 | ifeq ("$(shell uname -m)","x86_64") 91 | BPFTOOL := $(ROOT)/script/bpftool-x86 92 | endif 93 | 94 | ifeq ("$(shell uname -m)","aarch64") 95 | BPFTOOL := $(ROOT)/script/bpftool-arm 96 | endif 97 | ifeq ("$(shell uname -m)","loongarch64") 98 | BPFTOOL := $(ROOT)/script/bpftool-loongarch 99 | endif 100 | endif 101 | endif 102 | 103 | ifdef BPF_DEBUG 104 | CFLAGS += -DBPF_DEBUG 105 | endif 106 | 107 | kheaders.h: 108 | $(call kheaders_cmd) 109 | 110 | progs/%.o: progs/%.c $(BPF_EXTRA_DEP) 111 | clang -O2 -c -S -Wall -fno-asynchronous-unwind-tables \ 112 | -Wno-incompatible-pointer-types-discards-qualifiers \ 113 | $< -emit-llvm -Wno-unknown-attributes $(BPF_CFLAGS) -Xclang \ 114 | -disable-llvm-passes -o - | \ 115 | opt -O2 -mtriple=bpf-pc-linux | \ 116 | llvm-dis | \ 117 | llc -march=bpf -filetype=obj -o $@ 118 | @readelf -S $@ | grep BTF > /dev/null || (rm $@ && exit 1) 119 | 120 | %.skel.h: %.o 121 | $(BPFTOOL) gen skeleton $< > $@ || (rm -r $@ && exit 1) 122 | 123 | $(bpf_progs): %: %.skel.h 124 | @: 125 | 126 | bpf: $(bpf_progs) $(bpf_progs_ext) 127 | 128 | $(progs): %: %.c bpf 129 | @if [ -n "$(prog-$@)" ]; then \ 130 | echo $(CC) $(prog-$@) -o $@ $(HOST_CFLAGS); \ 131 | $(CC) $(prog-$@) -o $@ $(HOST_CFLAGS); \ 132 | else \ 133 | echo $(CC) $< -o $@ $(HOST_CFLAGS); \ 134 | $(CC) $< -o $@ $(HOST_CFLAGS); \ 135 | fi 136 | -------------------------------------------------------------------------------- /component/arg_parse.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #define _LINUX_IN_H 10 | #include 11 | #include 12 | 13 | #include "arg_parse.h" 14 | #include "net_utils.h" 15 | #include "sys_utils.h" 16 | 17 | #define KEY_START 1000 18 | 19 | #define for_each_opt(i, options, item, option_size) \ 20 | for (i = 0, item = options; i < option_size; \ 21 | i++, item = options + i) \ 22 | 23 | int parse_args(int argc, char *argv[], arg_config_t *config, 24 | option_item_t *options, 25 | int option_size) 26 | { 27 | struct option *long_opts; 28 | int cur_key = KEY_START; 29 | char sopts[128] = {}; 30 | option_item_t *item; 31 | struct option *opt; 32 | int i, cur_opt; 33 | 34 | opt = long_opts = calloc(option_size + 1, sizeof(struct option)); 35 | if (!long_opts) 36 | return -ENOMEM; 37 | 38 | for_each_opt(i, options, item, option_size) { 39 | int val = item->sname; 40 | bool has_s = val; 41 | 42 | if (item->type == OPTION_BLANK) 43 | continue; 44 | if (!has_s) 45 | val = cur_key++; 46 | item->key = val; 47 | 48 | switch (item->type) { 49 | case OPTION_BOOL_REV: 50 | case OPTION_BOOL: 51 | case OPTION_HELP: 52 | if (has_s) 53 | sprintf_end(sopts, "%c", item->sname); 54 | opt->has_arg = no_argument; 55 | break; 56 | default: 57 | if (has_s) 58 | sprintf_end(sopts, "%c:", item->sname); 59 | opt->has_arg = required_argument; 60 | break; 61 | } 62 | if (!item->lname) 63 | continue; 64 | opt->name = item->lname; 65 | opt->flag = NULL; 66 | opt->val = val; 67 | opt++; 68 | } 69 | 70 | #define S_DST(type, val) *((type *)item->dest) = val 71 | #define S_SET(type, val) \ 72 | do { \ 73 | if (item->set) \ 74 | *((type *)item->set) = val; \ 75 | item->__is_set = true; \ 76 | } while (0) 77 | 78 | while ((cur_opt = getopt_long(argc, argv, sopts, long_opts, 79 | NULL)) != -1) { 80 | for_each_opt(i, options, item, option_size) { 81 | if (item->key == cur_opt) 82 | goto found; 83 | } 84 | goto err; 85 | found: 86 | switch (item->type) { 87 | case OPTION_BOOL: 88 | S_DST(bool, true); 89 | S_SET(bool, true); 90 | break; 91 | case OPTION_BOOL_REV: 92 | S_DST(bool, false); 93 | S_SET(bool, true); 94 | break; 95 | case OPTION_STRING: 96 | S_DST(char *, optarg); 97 | S_SET(bool, true); 98 | break; 99 | case OPTION_INT: { 100 | char buf[32]; 101 | int val; 102 | 103 | if (sscanf(optarg, "%d%s", &val, buf) != 1) { 104 | printf("invalid arg value: %s\n", 105 | optarg); 106 | goto err; 107 | } 108 | S_DST(int, val); 109 | S_SET(bool, true); 110 | break; 111 | } 112 | case OPTION_U16BE: 113 | case OPTION_U16: { 114 | char buf[32]; 115 | u16 val; 116 | 117 | if (sscanf(optarg, "%hu%s", &val, buf) != 1) { 118 | printf("invalid arg value: %s\n", 119 | optarg); 120 | goto err; 121 | } 122 | if (item->type == OPTION_U16BE) 123 | val = htons(val); 124 | S_DST(u16, val); 125 | S_SET(bool, true); 126 | break; 127 | } 128 | case OPTION_U32: { 129 | char buf[32]; 130 | u32 val; 131 | 132 | if (sscanf(optarg, "%u%s", &val, buf) != 1) { 133 | printf("invalid arg value: %s\n", 134 | optarg); 135 | goto err; 136 | } 137 | S_DST(u32, val); 138 | S_SET(bool, true); 139 | break; 140 | } 141 | case OPTION_IPV4: 142 | if (!inet_pton(AF_INET, optarg, item->dest)) { 143 | printf("invalid ip address: %s\n", optarg); 144 | goto err; 145 | } 146 | S_SET(bool, true); 147 | break; 148 | case OPTION_IPV6: 149 | if (!inet_pton(AF_INET6, optarg, item->dest)) { 150 | printf("invalid ip address: %s\n", optarg); 151 | goto err; 152 | } 153 | S_SET(bool, true); 154 | break; 155 | case OPTION_IPV4ORIPV6: 156 | if (inet_pton(AF_INET, optarg, item->dest)) { 157 | S_SET(u16, ETH_P_IP); 158 | } else if (inet_pton(AF_INET6, optarg, item->dest)) { 159 | S_SET(u16, ETH_P_IPV6); 160 | } else { 161 | printf("invalid ip address: %s\n", optarg); 162 | goto err; 163 | } 164 | break; 165 | case OPTION_HELP: 166 | goto help; 167 | case OPTION_PROTO: { 168 | /* convert string to number in host order */ 169 | int val, layer = proto2i(optarg, &val); 170 | if (!layer) { 171 | printf("protocol not found\n"); 172 | goto err; 173 | } 174 | S_SET(int, layer); 175 | S_DST(u16, val); 176 | break; 177 | } 178 | default: 179 | printf("invalid argument\n"); 180 | goto err; 181 | } 182 | } 183 | 184 | for_each_opt(i, options, item, option_size) { 185 | if (item->required && !item->__is_set) { 186 | if (item->sname) 187 | printf("-%c is necessary\n", item->sname); 188 | else 189 | printf("--%s is necessary\n", item->lname); 190 | goto err; 191 | } 192 | } 193 | 194 | free(long_opts); 195 | return 0; 196 | err: 197 | return -EINVAL; 198 | help: 199 | printf("%s: %s\n", config->name, config->summary); 200 | printf("\nUsage:\n"); 201 | for_each_opt(i, options, item, option_size) { 202 | char name[64]; 203 | if (item->type == OPTION_BLANK) { 204 | printf("\n"); 205 | continue; 206 | } 207 | if (item->sname && item->lname) 208 | sprintf(name, "-%c, --%s", item->sname, item->lname); 209 | else if (item->sname) 210 | sprintf(name, "-%c", item->sname); 211 | else 212 | sprintf(name, "--%s", item->lname); 213 | printf(" %-16s %s\n", name, item->desc); 214 | } 215 | free(long_opts); 216 | exit(0); 217 | } 218 | -------------------------------------------------------------------------------- /component/arg_parse.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _H_ARG_PARSE 4 | #define _H_ARG_PARSE 5 | #include 6 | 7 | enum option_type { 8 | OPTION_STRING, 9 | OPTION_BOOL, 10 | OPTION_BOOL_REV, 11 | OPTION_U16, 12 | OPTION_U16BE, 13 | OPTION_U32, 14 | OPTION_INT, 15 | OPTION_IPV4, 16 | OPTION_IPV6, 17 | OPTION_IPV4ORIPV6, 18 | OPTION_HELP, 19 | OPTION_BLANK, 20 | OPTION_PROTO, 21 | }; 22 | 23 | typedef struct { 24 | char *lname; 25 | char sname; 26 | void *dest; 27 | enum option_type type; 28 | void *set; 29 | char *desc; 30 | bool required; 31 | int key; 32 | bool __is_set; 33 | } option_item_t; 34 | 35 | typedef struct { 36 | char *summary; 37 | char *name; 38 | char *desc; 39 | } arg_config_t; 40 | 41 | int parse_args(int argc, char *argv[], arg_config_t *config, 42 | option_item_t *options, 43 | int option_size); 44 | 45 | #endif -------------------------------------------------------------------------------- /component/list.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _LIST_H 4 | #define _LIST_H 5 | 6 | #include 7 | #include 8 | 9 | #define container_of(ptr, type, member) ({ \ 10 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 11 | (type *)( (char *)__mptr - offsetof(type,member) );}) 12 | 13 | #define LIST_POISON1 ((void *) 0x00100100) 14 | #define LIST_POISON2 ((void *) 0x00200200) 15 | 16 | struct list_head { 17 | struct list_head *next, *prev; 18 | }; 19 | 20 | struct hlist_head { 21 | struct hlist_node *first; 22 | }; 23 | 24 | struct hlist_node { 25 | struct hlist_node *next, **pprev; 26 | }; 27 | 28 | #define LIST_HEAD_INIT(name) { &(name), &(name) } 29 | 30 | #define LIST_HEAD(name) \ 31 | struct list_head name = LIST_HEAD_INIT(name) 32 | 33 | static inline void INIT_LIST_HEAD(struct list_head *list) 34 | { 35 | list->next = list; 36 | list->prev = list; 37 | } 38 | 39 | static inline void __list_add(struct list_head *new, 40 | struct list_head *prev, 41 | struct list_head *next) 42 | { 43 | next->prev = new; 44 | new->next = next; 45 | new->prev = prev; 46 | prev->next = new; 47 | } 48 | 49 | static inline void list_add(struct list_head *new, struct list_head *head) 50 | { 51 | __list_add(new, head, head->next); 52 | } 53 | 54 | 55 | static inline void list_add_tail(struct list_head *new, struct list_head *head) 56 | { 57 | __list_add(new, head->prev, head); 58 | } 59 | 60 | static inline void __list_del(struct list_head * prev, struct list_head * next) 61 | { 62 | next->prev = prev; 63 | prev->next = next; 64 | } 65 | 66 | static inline void __list_del_entry(struct list_head *entry) 67 | { 68 | __list_del(entry->prev, entry->next); 69 | } 70 | 71 | static inline void list_del(struct list_head *entry) 72 | { 73 | __list_del(entry->prev, entry->next); 74 | entry->next = LIST_POISON1; 75 | entry->prev = LIST_POISON2; 76 | } 77 | 78 | static inline void list_del_init(struct list_head *entry) 79 | { 80 | __list_del_entry(entry); 81 | INIT_LIST_HEAD(entry); 82 | } 83 | 84 | static inline void list_move(struct list_head *list, struct list_head *head) 85 | { 86 | __list_del_entry(list); 87 | list_add(list, head); 88 | } 89 | 90 | static inline void list_move_tail(struct list_head *list, 91 | struct list_head *head) 92 | { 93 | __list_del_entry(list); 94 | list_add_tail(list, head); 95 | } 96 | 97 | static inline int list_empty(const struct list_head *head) 98 | { 99 | return head->next == head; 100 | } 101 | 102 | static inline void __list_splice(const struct list_head *list, 103 | struct list_head *prev, 104 | struct list_head *next) 105 | { 106 | struct list_head *first = list->next; 107 | struct list_head *last = list->prev; 108 | 109 | first->prev = prev; 110 | prev->next = first; 111 | 112 | last->next = next; 113 | next->prev = last; 114 | } 115 | 116 | static inline void list_splice(const struct list_head *list, 117 | struct list_head *head) 118 | { 119 | if (!list_empty(list)) 120 | __list_splice(list, head, head->next); 121 | } 122 | 123 | static inline void list_splice_tail(struct list_head *list, 124 | struct list_head *head) 125 | { 126 | if (!list_empty(list)) 127 | __list_splice(list, head->prev, head); 128 | } 129 | 130 | static inline void list_splice_init(struct list_head *list, 131 | struct list_head *head) 132 | { 133 | if (!list_empty(list)) { 134 | __list_splice(list, head, head->next); 135 | INIT_LIST_HEAD(list); 136 | } 137 | } 138 | 139 | static inline int list_is_singular(const struct list_head *head) 140 | { 141 | return !list_empty(head) && (head->next == head->prev); 142 | } 143 | 144 | static inline int list_is_first(const struct list_head *list, const struct list_head *head) 145 | { 146 | return list->prev == head; 147 | } 148 | 149 | static inline int list_is_last(const struct list_head *list, const struct list_head *head) 150 | { 151 | return list->next == head; 152 | } 153 | 154 | #define list_entry(ptr, type, member) \ 155 | container_of(ptr, type, member) 156 | 157 | #define list_first_entry(ptr, type, member) \ 158 | list_entry((ptr)->next, type, member) 159 | 160 | #define list_last_entry(ptr, type, member) \ 161 | list_entry((ptr)->prev, type, member) 162 | 163 | #define list_prev_entry(pos, member) \ 164 | list_entry((pos)->member.prev, typeof(*(pos)), member) 165 | 166 | #define list_for_each_entry(pos, head, member) \ 167 | for (pos = list_entry((head)->next, typeof(*pos), member); \ 168 | &pos->member != (head); \ 169 | pos = list_entry(pos->member.next, typeof(*pos), member)) 170 | 171 | #define list_for_each_entry_safe(pos, n, head, member) \ 172 | for (pos = list_entry((head)->next, typeof(*pos), member), \ 173 | n = list_entry(pos->member.next, typeof(*pos), member); \ 174 | &pos->member != (head); \ 175 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) 176 | 177 | #define list_safe_reset_next(pos, n, member) \ 178 | n = list_entry(pos->member.next, typeof(*pos), member) 179 | 180 | #define list_entry_is_head(pos, head, member) \ 181 | (&pos->member == (head)) 182 | 183 | #define list_for_each_entry_reverse(pos, head, member) \ 184 | for (pos = list_last_entry(head, typeof(*pos), member); \ 185 | !list_entry_is_head(pos, head, member); \ 186 | pos = list_prev_entry(pos, member)) 187 | 188 | #define HLIST_HEAD_INIT { .first = NULL } 189 | #define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } 190 | #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) 191 | static inline void INIT_HLIST_NODE(struct hlist_node *h) 192 | { 193 | h->next = NULL; 194 | h->pprev = NULL; 195 | } 196 | 197 | static inline int hlist_unhashed(const struct hlist_node *h) 198 | { 199 | return !h->pprev; 200 | } 201 | 202 | static inline int hlist_empty(const struct hlist_head *h) 203 | { 204 | return !h->first; 205 | } 206 | 207 | static inline void __hlist_del(struct hlist_node *n) 208 | { 209 | struct hlist_node *next = n->next; 210 | struct hlist_node **pprev = n->pprev; 211 | *pprev = next; 212 | if (next) 213 | next->pprev = pprev; 214 | } 215 | 216 | static inline void hlist_del(struct hlist_node *n) 217 | { 218 | __hlist_del(n); 219 | n->next = LIST_POISON1; 220 | n->pprev = LIST_POISON2; 221 | } 222 | 223 | static inline void hlist_del_init(struct hlist_node *n) 224 | { 225 | if (!hlist_unhashed(n)) { 226 | __hlist_del(n); 227 | INIT_HLIST_NODE(n); 228 | } 229 | } 230 | 231 | static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) 232 | { 233 | struct hlist_node *first = h->first; 234 | n->next = first; 235 | if (first) 236 | first->pprev = &n->next; 237 | h->first = n; 238 | n->pprev = &h->first; 239 | } 240 | 241 | static inline void hlist_add_before(struct hlist_node *n, 242 | struct hlist_node *next) 243 | { 244 | n->pprev = next->pprev; 245 | n->next = next; 246 | next->pprev = &n->next; 247 | *(n->pprev) = n; 248 | } 249 | 250 | static inline void hlist_add_after(struct hlist_node *n, 251 | struct hlist_node *next) 252 | { 253 | next->next = n->next; 254 | n->next = next; 255 | next->pprev = &n->next; 256 | 257 | if(next->next) 258 | next->next->pprev = &next->next; 259 | } 260 | 261 | static inline void hlist_add_fake(struct hlist_node *n) 262 | { 263 | n->pprev = &n->next; 264 | } 265 | 266 | static inline void hlist_move_list(struct hlist_head *old, 267 | struct hlist_head *new) 268 | { 269 | new->first = old->first; 270 | if (new->first) 271 | new->first->pprev = &new->first; 272 | old->first = NULL; 273 | } 274 | 275 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) 276 | 277 | #define hlist_for_each_entry(tpos, pos, head, member) \ 278 | for (pos = (head)->first; \ 279 | pos && \ 280 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 281 | pos = pos->next) 282 | 283 | #define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ 284 | for (pos = (head)->first; \ 285 | pos && ({ n = pos->next; 1; }) && \ 286 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 287 | pos = n) 288 | 289 | #endif 290 | -------------------------------------------------------------------------------- /component/net_utils.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include "net_utils.h" 4 | #include "sys_utils.h" 5 | 6 | #define _LINUX_IN_H 7 | #include 8 | 9 | typedef struct { 10 | char *name; 11 | int val; 12 | } proto_item_t; 13 | 14 | #define for_each_protos(protos, size, i, item) \ 15 | for (i = 0, item = protos; i < size; \ 16 | i++, item = protos + i) 17 | 18 | static proto_item_t l3_protos[] = { 19 | { "loop", 0x0060 }, 20 | { "pup", 0x0200 }, 21 | { "pupat", 0x0201 }, 22 | { "tsn", 0x22F0 }, 23 | { "erspan2", 0x22EB }, 24 | { "ip", 0x0800 }, 25 | { "x25", 0x0805 }, 26 | { "arp", 0x0806 }, 27 | { "bpq", 0x08FF }, 28 | { "ieeepup", 0x0a00 }, 29 | { "ieeepupat", 0x0a01 }, 30 | { "batman", 0x4305 }, 31 | { "dec", 0x6000 }, 32 | { "dna_dl", 0x6001 }, 33 | { "dna_rc", 0x6002 }, 34 | { "dna_rt", 0x6003 }, 35 | { "lat", 0x6004 }, 36 | { "diag", 0x6005 }, 37 | { "cust", 0x6006 }, 38 | { "sca", 0x6007 }, 39 | { "teb", 0x6558 }, 40 | { "rarp", 0x8035 }, 41 | { "atalk", 0x809B }, 42 | { "aarp", 0x80F3 }, 43 | { "8021q", 0x8100 }, 44 | { "erspan", 0x88BE }, 45 | { "ipx", 0x8137 }, 46 | { "ipv6", 0x86DD }, 47 | { "pause", 0x8808 }, 48 | { "slow", 0x8809 }, 49 | { "wccp", 0x883E }, 50 | }; 51 | 52 | static proto_item_t l4_protos[] = { 53 | { "icmp", 1 }, 54 | { "igmp", 2 }, 55 | { "ipip", 4 }, 56 | { "tcp", 6 }, 57 | { "egp", 8 }, 58 | { "pup", 12 }, 59 | { "udp", 17 }, 60 | { "idp", 22 }, 61 | { "tp", 29 }, 62 | { "dccp", 33 }, 63 | { "ipv6", 41 }, 64 | { "rsvp", 46 }, 65 | { "gre", 47 }, 66 | { "esp", 50 }, 67 | { "ah", 51 }, 68 | { "icmpv6", 58 }, 69 | { "mtp", 92 }, 70 | { "beetph", 94 }, 71 | { "encap", 98 }, 72 | { "pim", 103 }, 73 | { "comp", 108 }, 74 | { "sctp", 132 }, 75 | { "udplite", 136 }, 76 | { "mpls", 137 }, 77 | { "raw", 255 }, 78 | }; 79 | 80 | char *l4_proto_names[] = { 81 | [0] = "TCP", 82 | [1] = "ICMP", 83 | [2] = "IGMP", 84 | [4] = "IPIP", 85 | [6] = "TCP", 86 | [8] = "EGP", 87 | [12] = "PUP", 88 | [17] = "UDP", 89 | [22] = "IDP", 90 | [29] = "TP", 91 | [33] = "DCCP", 92 | [41] = "IPV6", 93 | [46] = "RSVP", 94 | [47] = "GRE", 95 | [50] = "ESP", 96 | [58] = "ICMPV6", 97 | [51] = "AH", 98 | [92] = "MTP", 99 | [94] = "BEETPH", 100 | [98] = "ENCAP", 101 | [103] = "PIM", 102 | [108] = "COMP", 103 | [132] = "SCTP", 104 | [136] = "UDPLITE", 105 | [137] = "MPLS", 106 | [255] = "RAW", 107 | }; 108 | 109 | static proto_item_t *proto_search(proto_item_t *protos, int size, 110 | char *name) 111 | { 112 | proto_item_t *item; 113 | int i = 0; 114 | 115 | for_each_protos(protos, size, i, item) { 116 | if (strcmp(item->name, name) == 0) 117 | return item; 118 | } 119 | return NULL; 120 | } 121 | 122 | int l3proto2i(char *proto, int *dest) 123 | { 124 | proto_item_t *item = proto_search(l3_protos, ARRAY_SIZE(l3_protos), 125 | proto); 126 | if (item) { 127 | *dest = item->val; 128 | return 0; 129 | } 130 | return -1; 131 | } 132 | 133 | int l4proto2i(char *proto, int *dest) 134 | { 135 | proto_item_t *item = proto_search(l4_protos, ARRAY_SIZE(l4_protos), 136 | proto); 137 | if (item) { 138 | *dest = item->val; 139 | return 0; 140 | } 141 | return -1; 142 | } 143 | 144 | int proto2i(char *proto, int *dest) 145 | { 146 | if (!l3proto2i(proto, dest)) 147 | return 3; 148 | if (!l4proto2i(proto, dest)) 149 | return 4; 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /component/net_utils.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _H_NET_UTILS 4 | #define _H_NET_UTILS 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | typedef __s8 s8; 15 | typedef __u8 u8; 16 | typedef __s16 s16; 17 | typedef __u16 u16; 18 | typedef __s32 s32; 19 | typedef __u32 u32; 20 | typedef __s64 s64; 21 | typedef __u64 u64; 22 | 23 | extern char *l4_proto_names[]; 24 | 25 | static inline char *i2l4(u8 num) 26 | { 27 | return l4_proto_names[num]; 28 | } 29 | 30 | int proto2i(char *proto, int *dest); 31 | 32 | #endif -------------------------------------------------------------------------------- /component/parse_sym.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "parse_sym.h" 8 | #include "sys_utils.h" 9 | 10 | #define SWAP(a, b) { typeof(a) _tmp = (b); (b) = (a); (a) = _tmp; } 11 | 12 | static char *proc_syms = NULL; 13 | struct sym_result *result_list; 14 | struct loc_result *loc_list; 15 | 16 | static int sym_init_data() 17 | { 18 | size_t size = 1024 * 1024 * 4; // begin with 4M 19 | char *cur, *tmp; 20 | int count; 21 | FILE *f; 22 | 23 | if (proc_syms) 24 | return 0; 25 | 26 | f = fopen("/proc/kallsyms", "r"); 27 | if (!f) { 28 | pr_err("/proc/kallsyms is not founded!\n"); 29 | exit(-1); 30 | } 31 | 32 | proc_syms = malloc(size); 33 | cur = proc_syms; 34 | while (true) { 35 | count = fread(cur, sizeof(char), size + proc_syms - cur, 36 | f); 37 | if (feof(f)) 38 | break; 39 | 40 | count += cur - proc_syms; 41 | size <<= 1; 42 | tmp = realloc(proc_syms, size); 43 | cur = tmp + count; 44 | proc_syms = tmp; 45 | } 46 | 47 | return 0; 48 | } 49 | 50 | static void sym_add_cache(struct sym_result *result) 51 | { 52 | if (!result_list) { 53 | result_list = result; 54 | result->next = NULL; 55 | return; 56 | } 57 | result->next = result_list; 58 | result_list = result; 59 | } 60 | 61 | static struct sym_result *sym_lookup_cache(__u64 pc, bool exact) 62 | { 63 | struct sym_result *head = result_list, *sym = NULL; 64 | while (head) { 65 | if (!exact) { 66 | if (pc >= head->start && pc < head->end) { 67 | if (head->pc == pc) 68 | return head; 69 | sym = head; 70 | break; 71 | } 72 | } else { 73 | if (head->start == pc) 74 | return head; 75 | } 76 | head = head->next; 77 | } 78 | if (!sym) 79 | return NULL; 80 | head = malloc(sizeof(*head)); 81 | if (!head) 82 | return NULL; 83 | memcpy(head, sym, sizeof(*head)); 84 | head->pc = pc; 85 | sprintf(head->desc, "%s+0x%llx", head->name, pc - head->start); 86 | sym_add_cache(head); 87 | return head; 88 | } 89 | 90 | static struct sym_result *sym_lookup_proc(__u64 pc, bool exact) 91 | { 92 | char _cname[1024], _pname[1024], *pname = _pname, *cname = _cname; 93 | struct sym_result *result; 94 | __u64 cpc, ppc = 0; 95 | FILE *f; 96 | 97 | f = fopen("/proc/kallsyms", "r"); 98 | if (!f) 99 | return NULL; 100 | 101 | result = malloc(sizeof(*result)); 102 | if (!result) 103 | goto err_out; 104 | 105 | while (true) { 106 | if (fscanf(f, "%llx %*s %s [ %*[^]] ]", &cpc, cname) < 0) 107 | break; 108 | 109 | if (exact) { 110 | if (ppc != pc) { 111 | SWAP(cname, pname); 112 | ppc = cpc; 113 | continue; 114 | } 115 | } else { 116 | if (pc < ppc || pc >= cpc) { 117 | SWAP(cname, pname); 118 | ppc = cpc; 119 | continue; 120 | } 121 | } 122 | 123 | strcpy(result->name, pname); 124 | result->start = ppc; 125 | result->end = cpc; 126 | result->pc = pc; 127 | sprintf(result->desc, "%s+0x%llx", result->name, 128 | pc - result->start); 129 | sym_add_cache(result); 130 | fclose(f); 131 | return result; 132 | } 133 | free(result); 134 | err_out: 135 | fclose(f); 136 | return NULL; 137 | } 138 | 139 | struct sym_result *sym_parse(__u64 pc) 140 | { 141 | if (!pc) 142 | return NULL; 143 | return sym_lookup_cache(pc, false) ?: sym_lookup_proc(pc, false); 144 | } 145 | 146 | struct sym_result *sym_parse_exact(__u64 pc) 147 | { 148 | if (!pc) 149 | return NULL; 150 | return sym_lookup_cache(pc, true) ?: sym_lookup_proc(pc, true); 151 | } 152 | 153 | int sym_search_pattern(const char *name, char *result, bool partial) 154 | { 155 | char func[128], module[128], search[128], *target; 156 | int count; 157 | 158 | sym_init_data(); 159 | 160 | sprintf(search, " %s", name); 161 | target = proc_syms; 162 | while (true) { 163 | target = strstr(target, search); 164 | if (!target) 165 | break; 166 | 167 | count = sscanf(target, " %s [%[^]]]", func, module); 168 | target++; 169 | 170 | if (count <= 0) 171 | continue; 172 | 173 | if (partial && strncmp(func, name, strlen(name)) == 0) 174 | goto found; 175 | if (!partial && strcmp(func, name) == 0) 176 | goto found; 177 | } 178 | 179 | return SYM_NOT_EXIST; 180 | found: 181 | if (result) 182 | strcpy(result, func); 183 | 184 | return count == 2 ? SYM_MODULE : SYM_KERNEL; 185 | } 186 | -------------------------------------------------------------------------------- /component/parse_sym.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define MAX_SYM_LENGTH 256 8 | #define MAX_SYM_ADDR_LENGTH (MAX_SYM_LENGTH + 8) 9 | 10 | enum { 11 | SYM_NOT_EXIST, 12 | SYM_MODULE, 13 | SYM_KERNEL 14 | }; 15 | 16 | struct sym_result { 17 | char name[MAX_SYM_LENGTH]; 18 | __u64 start; 19 | __u64 end; 20 | char desc[MAX_SYM_ADDR_LENGTH]; 21 | __u64 pc; 22 | struct sym_result *next; 23 | }; 24 | 25 | struct sym_result *sym_parse_exact(__u64 pc); 26 | struct sym_result *sym_parse(__u64 pc); 27 | int sym_search_pattern(const char *name, char *result, bool partial); 28 | 29 | static inline int sym_get_type(const char *name) 30 | { 31 | return sym_search_pattern(name, NULL, false); 32 | } 33 | -------------------------------------------------------------------------------- /component/sys_utils.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "sys_utils.h" 18 | 19 | static int __hz = -1; 20 | int log_level = 0; 21 | 22 | int exec(char *cmd, char *output) 23 | { 24 | FILE *f = popen(cmd, "r"); 25 | char buf[128]; 26 | int status; 27 | 28 | if (output) 29 | output[0] = '\0'; 30 | 31 | while (fgets(buf, sizeof(buf) - 1, f) != NULL) { 32 | if (!output) 33 | continue; 34 | strcat(output + strlen(output), buf); 35 | } 36 | 37 | status = pclose(f); 38 | pr_debug("command: %s, status:%d\n", cmd, WEXITSTATUS(status)); 39 | return WEXITSTATUS(status); 40 | } 41 | 42 | int execf(char *output, char *fmt, ...) 43 | { 44 | static char cmd[1024]; 45 | va_list valist; 46 | 47 | va_start(valist, fmt); 48 | vsprintf(cmd, fmt, valist); 49 | va_end(valist); 50 | 51 | return exec(cmd, output); 52 | } 53 | 54 | int liberate_l() 55 | { 56 | struct rlimit lim = {RLIM_INFINITY, RLIM_INFINITY}; 57 | return setrlimit(RLIMIT_MEMLOCK, &lim); 58 | } 59 | 60 | bool fsearch(FILE *f, char *target) 61 | { 62 | char tmp[128]; 63 | 64 | while (fscanf(f, "%s", tmp) == 1) { 65 | if (strstr(tmp, target)) 66 | return true; 67 | } 68 | return false; 69 | } 70 | 71 | int kernel_version() 72 | { 73 | int major, minor, patch; 74 | struct utsname buf; 75 | 76 | uname(&buf); 77 | sscanf(buf.release, "%d.%d.%d", &major, &minor, &patch); 78 | 79 | return kv_to_num(major, minor, patch); 80 | } 81 | 82 | char *kernel_version_str() 83 | { 84 | static char version[16]; 85 | int major, minor, patch; 86 | struct utsname buf; 87 | 88 | uname(&buf); 89 | sscanf(buf.release, "%d.%d.%d", &major, &minor, &patch); 90 | sprintf(version, "%d.%d.%d", major, minor, patch); 91 | 92 | return version; 93 | } 94 | 95 | bool debugfs_mounted() 96 | { 97 | return simple_exec("mount | grep debugfs") == 0; 98 | } 99 | 100 | int kernel_get_config(char *name, char *output) 101 | { 102 | char tmp[128] = {}; 103 | int err; 104 | 105 | if (file_exist("/proc/config.gz")) 106 | err = execf(tmp, "zcat /proc/config.gz | grep 'CONFIG_%s=' 2>&1", 107 | name); 108 | else 109 | err = execf(tmp, "grep 'CONFIG_%s=' /boot/config-$(uname -r)" 110 | " 2>&1", name); 111 | 112 | if (!output || err) 113 | return err; 114 | 115 | sscanf(tmp, "%*[^=]=%s", output); 116 | return err; 117 | } 118 | 119 | bool kernel_has_config(char *name) 120 | { 121 | char type[32] = {}; 122 | return kernel_get_config(name, type) == 0 && type[0] == 'y'; 123 | } 124 | 125 | int kernel_hz() 126 | { 127 | char hz[32] = {}; 128 | int err; 129 | 130 | if (__hz > 0) 131 | return __hz; 132 | 133 | err = kernel_get_config("HZ", hz); 134 | if (err) 135 | return -ENOTSUP; 136 | 137 | __hz = atoi(hz); 138 | return __hz; 139 | } 140 | 141 | u32 file_inode(char *path) 142 | { 143 | char tmp1[128], tmp2[128]; 144 | struct stat file_stat; 145 | char *__path = path; 146 | u32 inode; 147 | 148 | if (!file_exist(path)) 149 | return 0; 150 | 151 | again: 152 | if (sscanf(__path, "%*[^:]:[%u]", &inode) == 1) 153 | return inode; 154 | 155 | if (stat(__path, &file_stat) == -1) 156 | return 0; 157 | 158 | if (S_ISLNK(file_stat.st_mode)) { 159 | if (readlink(path, tmp1, sizeof(tmp1)) == -1) 160 | return 0; 161 | memcpy(tmp2, tmp1, sizeof(tmp1)); 162 | __path = tmp2; 163 | goto again; 164 | } 165 | 166 | return file_stat.st_ino; 167 | } 168 | -------------------------------------------------------------------------------- /component/sys_utils.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _H_SYS_UTILS 4 | #define _H_SYS_UTILS 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "net_utils.h" 15 | 16 | extern int log_level; 17 | 18 | int execf(char *output, char *fmt, ...); 19 | int exec(char *cmd, char *output); 20 | int liberate_l(); 21 | bool fsearch(FILE *f, char *target); 22 | int kernel_version(); 23 | char *kernel_version_str(); 24 | bool debugfs_mounted(); 25 | bool kernel_has_config(char *name); 26 | int kernel_hz(); 27 | u32 file_inode(char *path); 28 | 29 | static inline int simple_exec(char *cmd) 30 | { 31 | return exec(cmd, NULL); 32 | } 33 | 34 | static inline bool file_exist(const char *path) 35 | { 36 | return access(path, F_OK) == 0; 37 | } 38 | 39 | static inline int kv_to_num(int major, int minor, int patch) 40 | { 41 | return (major << 16) + (minor << 8) + patch; 42 | } 43 | 44 | /* compare current kernel version with the provided one */ 45 | static inline int kv_compare(int major, int minor, int patch) 46 | { 47 | return kernel_version() - kv_to_num(major, minor, patch); 48 | } 49 | 50 | #define pr_level(level, target, fmt, args...) \ 51 | do { \ 52 | if (level <= log_level) { \ 53 | fprintf(target, fmt, ##args); \ 54 | fflush(target); \ 55 | } \ 56 | } while (0) 57 | 58 | #define pr_info(fmt, args...) pr_level(0, stdout, fmt, ##args) 59 | #define pr_verb(fmt, args...) pr_level(1, stdout, fmt, ##args) 60 | #define pr_warn(fmt, args...) pr_level(0, stderr, "\033[0;34mWARN: "fmt"\033[0m", ##args) 61 | #define pr_err(fmt, args...) pr_level(0, stderr, "\033[0;31mERROR: "fmt"\033[0m", ##args) 62 | #define pr_debug(fmt, args...) pr_level(2, stdout, "DEBUG: "fmt, ##args) 63 | 64 | #define PFMT_EMPH "\033[0;33m" 65 | #define PFMT_WARN "\033[0;32m" 66 | #define PFMT_ERROR "\033[0;31m" 67 | #define PFMT_END "\033[0m" 68 | 69 | #define PFMT_EMPH_STR(str) PFMT_EMPH str PFMT_END 70 | #define PFMT_WARN_STR(str) PFMT_WARN str PFMT_END 71 | #define PFMT_ERROR_STR(str) PFMT_ERROR str PFMT_END 72 | 73 | #define set_log_level(l) log_level = l 74 | 75 | #define sprintf_end(buf, fmt, args...) \ 76 | sprintf(strlen(buf) + buf, fmt, ##args) 77 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) 78 | 79 | #define MIN(a, b) (a > b ? b : a) 80 | #define MAX(a, b) (a > b ? a : b) 81 | 82 | #define PTR2X(ptr) (__u64)(void *)ptr 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /docs/develop.md: -------------------------------------------------------------------------------- 1 | # 开发手册 2 | 3 | ## 一、项目结构介绍 4 | 5 | ### 1.1 文件夹 6 | 7 | - component:组件模块,封装了一些通用的C函数 8 | 9 | - docs:项目的文档目录 10 | 11 | - legacy:老的基于BCC的nettrace,已遗弃 12 | 13 | - nodetrace:节点报文跟踪模块 14 | 15 | - script:项目构建过程中需要用到的一些脚本等 16 | 17 | - shared:网络报文处理(用户态和BPF)用到的一些封装的相对较为通用的函数 18 | 19 | - bpf/sk_parse.h:封装的BPF程序相关的网络报文处理函数 20 | 21 | - bpf/vmlinux*:经过BTF生成的内核头文件,包含了内核中所使用到的所有的结构体 22 | 23 | - pkg_utils.c:封装的用户态使用的网络报文处理(解析、打印)函数 24 | 25 | - bpf_utils.c:封装的一些用户简化BPF程序处理的函数 26 | 27 | - src:nettrace的核心代码 28 | 29 | - progs:BPF代码目录,其中kprobe.c是基于kprobe-BPF实现的BPF程序,tracing是基于tracing-BPF技术(还未实现,等待开发中)。除了该目录下,其他的代码都是用户态的代码。 30 | 31 | - analysis.c:解析器代码,用来处理BPF程序采集到的数据 32 | 33 | - dropreason.c:用于支持内核特性`skb drop reason`的用户态代码 34 | 35 | - gen_trace.py:根据`trace.yaml`里定义的内核函数和tracepoint来生成`trace_group.c`和`kprobe_trace.h` 36 | 37 | - nettrace.c:nettrace主程序的入口函数,定义了命令行参数等 38 | 39 | - trace_probe.c:用于处理基于kprobe-BPF类型的BPF程序的加载和数据处理 40 | 41 | - trace_tracing.c:用于处理基于tracing-BPF类型的BPF程序的加载和数据处理(暂未实现) 42 | 43 | - trace.c:BPF程序的检查、加载等部分的功能函数 44 | 45 | - trace.yaml:定义了nettrace所有的支持跟踪的内核函数和tracepoint。同时,诊断模式的规则也定义在了这里面 46 | 47 | - vmlinux_header.h:对于不支持BTF(COMPAT模式)的情况,会使用这里的头文件 48 | 49 | ### 1.2 项目编译过程 50 | 51 | 项目的部分编译过程如下图所示,其中kprobe.o是经过CLANG编译出来的BPF的ELF文件,经过bpftool生成skel头文件。 52 | 53 | ``` 54 | nettrace.c ----------------- nettrace 55 | trace.c | 56 | xxxxx | 57 | | 58 | | 59 | trace_group.c | 60 | ╱ | 61 | trace.yaml -- gen_trace.py | 62 | ╲ | 63 | kprobe_trace.h | 64 | ╲ | 65 | kprobe.o → kprobe.skel.h 66 | ╱ 67 | kprobe.c 68 | ``` 69 | 70 | ## 二、项目加载及运行 71 | 72 | 整个nettrace在运行过程中的代码执行逻辑如下图所示: 73 | 74 | ![](images/nettrace-start.svg) 75 | 76 | 这里没有列出`poll`(BPF事件处理)的逻辑,这块比较复杂,后面再补上。 77 | 78 | ## 三、开发介绍 79 | 80 | ### 3.1 trace.yaml格式 81 | 82 | 这个配置文件是项目的核心配置,里面按照`yaml`格式保存了所有的支持的内核函数,按照树状图的结构来配置的。在树状图中,所有的叶节点表示的都是trace(跟踪点,内核函数或者tracepoint),非叶节点代表的都是网络模块,也可以理解为目录。 83 | 84 | **网络模块(目录)格式**: 85 | 86 | - name:名称 87 | - desc:一段描述 88 | - visual:是否对用户可见,默认true 89 | - children:子目录,或者当前目录下的traces 90 | 91 | **trace的格式**: 92 | 93 | - name:在未指定target的情况下,这个名称就是要跟踪的内核函数的名称。 94 | - target:当前trace针对的内核函数。在name和内核函数不同的情况下,可以通过target来指定需要跟踪的内核函数。 95 | - skb:skb参数在该内核函数参数中的位置,从0开始 96 | - sock:sk参数在该内核函数参数中的位置,从0开始(仅用作--sock模式) 97 | - tp:tracepoint类型需要写的,tracepoint的位置。格式:dir:tracepoint 98 | - analyzer:解析器。该参数用于指定诊断模式下分析当前函数采集到的数据的诊断器,默认不进行特殊的数据格式检查和处理。 99 | - rules:定义诊断模式下的规则。规则有三种级别,分别是: 100 | - `info`:提示性规则,只是给个信息提示 101 | - `warn`:警告性规则,命中当前规则可能意味着网络可能会发生问题 102 | - `error`:出错性规则,命中当前规则意味着发生了网络丢包、网络异常 103 | rules是一个数组,可以为每个trace指定多条规则。rules的格式如下: 104 | - exp:命中当前规则的表达式。目前支持的表达式包括any(一定会命中)和返回值表达式。返回值表达式支持的语句包括:eq(等于)、ne(不等于)、lt(小于)、gt(大于)、range(指定一个范围)。例如:`eq 0`代表返回值等于0就命中规则。 105 | - msg:当命中规则后给出的信息 106 | - adv:诊断建议,一般用于`error`级别的规则。 107 | 108 | **简写方式**: 109 | 110 | 可以在`name`中指定skb或者sk的索引,其格式为:内核函数名称:skb/sock,其中skb指的是skb参数在该内核函数参数中的位置,从0开始;sock代表sk在该内核函数中的位置(非必须,--sock模式下的跟踪点)。如果只跟踪skb,那只需要写成:function:skb的格式即可。如果当前定义的trace只包含`name`,那么可以进一步对其进行简化,只需要将trace定义为字符串即可,如下所示: 111 | 112 | ```yaml 113 | - name: ip_rcv:0 # 定义了一个trace对象,跟踪的是内核函数ip_rcv,其中skb在这个函数参数中的索引为0 114 | - ip_rcv:0 # 使用字符串来定义trace,作用和上面的一样 115 | - name: inet_listen/0 # 定义了一个trace对象,跟踪的是内核函数inet_listen,其中sock在这个函数参数中的索引为0,该trace仅在sock模式下有效。 116 | ``` 117 | 118 | ### 3.2 诊断器开发 119 | 120 | 常规场景下,如果我们有需要跟踪的内核函数或者场景,只需要在`trace.yaml`中增加对应的`trace`即可。如果需要增加额外的数据采集和分析能力,就需要增加自定义`诊断器`了。新增诊断器所要做的修改包括以下内容: 121 | 122 | **BPF代码编写** 123 | 124 | 在`progs/kprobe.c`中使用`DEFINE_KPROBE_SKB`来定义一个用来跟踪内核函数的trace,这里我们假设要跟踪的内核函数为`sch_direct_xmit`: 125 | 126 | ```c 127 | 128 | DEFINE_KPROBE_SKB(sch_direct_xmit, 1) { 129 | struct Qdisc *q = info_get_arg(info, 2); 130 | struct netdev_queue *txq; 131 | DECLARE_EVENT(qdisc_event_t, e) 132 | 133 | txq = _C(q, dev_queue); 134 | e->state = _C(txq, state); 135 | xxxxxx 136 | return handle_entry(info, e_size); 137 | ``` 138 | 139 | `DEFINE_KPROBE_SKB`第一个参数是内核函数名称,第二个是skb的索引。*注意*:这里的索引是从1开始的,和yaml里的不一样。`info_get_arg`用于获取内核函数的参数,第一个参数是固定的,第二个参数代表要获取内核函数参数的索引,也是从1开始的。在这个函数里面,我们就可以编写自己的BPF代码来获取数据。 140 | 141 | 如果当前已经定义好的事件的结构体没有能满足要求的,那还需要定义自己的用于传递给用户态的结构体。其定义在`progs/shared.h`中,定义的方式可参考其中的`qdisc_event_t`: 142 | 143 | ```c 144 | DEFINE_EVENT(qdisc_event_t, 145 | event_field(u64, last_update) 146 | event_field(u32, state) 147 | event_field(u32, qlen) 148 | event_field(u32, flags) 149 | ) 150 | ``` 151 | 152 | **诊断器定义** 153 | 154 | 在BPF代码中我们定义了自己的结构体,并采集了一些自定义的信息。这些信息目前`analysis.c`里的代码是不能处理的,会被忽略,因此我们需要定义特殊的诊断器用于处理这些信息。自定义的诊断器都定义在analysis.c中,可以采用两个宏定义: 155 | 156 | - DEFINE_ANALYZER_ENTRY:采用这个宏定义的诊断器会在函数入口(函数被执行的时候)触发的事件中被调用,针对的是kprobe阶段 157 | - DEFINE_ANALYZER_EXIT:采用这个宏定义的诊断器会在函数执行结束触发的事件中被调用,针对的是kretprobe阶段。如果要分析函数的返回值,需要使用这个宏。 158 | 159 | 宏定义的第一个参数为诊断器的名称,这里假设我们定义了`drop`诊断器。第二个是诊断器针对的模式,当前nettrace支持`basic/timeline/diag/drop/sock`五种模式。 160 | 161 | ```c 162 | DEFINE_ANALYZER_EXIT(qdisc, TRACE_MODE_DIAG_MASK) 163 | { 164 | /* e->event是基础类型的结构体,这里我们将其转为我们定义的结构体。这里的event 165 | * 变量就是qdisc_event_t类型的,我们就能获取到BPF中采集到的数据,并按照一定的 166 | * 格式显示出来。 167 | */ 168 | define_pure_event(qdisc_event_t, event, e->entry->event); 169 | char *msg = malloc(1024); 170 | int hz; 171 | 172 | msg[0] = '\0'; 173 | hz = kernel_hz(); 174 | hz = hz > 0 ? hz : 1; 175 | sprintf(msg, PFMT_EMPH_STR(" *queue state: %x, flags: %x, " 176 | "last update: %lums, len: %lu*"), event->state, 177 | event->flags, (1000 * event->last_update) / hz, 178 | event->qlen); 179 | entry_set_msg(e->entry, msg); 180 | 181 | rule_run(e->entry, trace, e->event.val); 182 | 183 | return RESULT_CONT; 184 | } 185 | ``` 186 | 187 | 除了定义诊断器,我们还需要在analysis.h中声明这个诊断器,格式为: 188 | 189 | ```c 190 | DECLARE_ANALYZER(qdisc); 191 | ``` 192 | 193 | **trace修改** 194 | 195 | 在`trace.yaml`中将我们要跟踪的内核函数(这里为`sch_direct_xmit`)加进来,这里我们需要将其`analyzer`字段指定为我们刚才创建的诊断器qdisc。*需要注意的是*:由于这是一个自定义的trace(不是自动生成的,是我们在kprobe.c中手动定义的),因此这里不能给其指定skb或者sk: 196 | 197 | ```yaml 198 | - name: sch_direct_xmit 199 | analyzer: qdisc 200 | ``` 201 | 202 | 203 | -------------------------------------------------------------------------------- /docs/droptrace.md: -------------------------------------------------------------------------------- 1 | # 丢包监控工具 - droptrace 2 | 3 | ## 一、背景 4 | 5 | ### 1.1 丢包的历史 6 | 7 | 网络丢包是网络故障排查中一个不朽的问题。很多网络故障的对外表现即为“丢包”,而所谓的“丢包”指的是报文没有按照预期到达报文的接收方。在Linux网络环境中,丢包发生的位置无非是在物理链路(即中间物理设备)和操作系统(即Linux内核)。其中我们遇到的大部分的丢包问题都是发生在内核中,因此理解内核为什么会做出“丢包”这一行为对我们解决网络故障尤为重要。 8 | 9 | 关于内核丢包这一方向的探索和实现,最早可以追溯到2009年的2.6.30版本内核。在该版本之前,内核还没有“丢包”的概念,即所有到报文释放都是通过统一的函数`kfree_skb()`来进行的,这种情况下用户(甚至是系统本身)是不知道报文是正常还是异常释放了的,即没有一个界限来区别“好的”和“坏的“报文。在2.6.30版本中,`Neil Horman`引入了`consume_skb()`接口,划清了丢包的界限:`kfree_skb()`用来丢包,`consume_skb()`用来正常释放报文。同时,他还定义了`kfree_skb`和`consume_skb`两个tracepoint,使得用户可以通过采集`kfree_skb`事件来监控系统上的丢包。为了向用户提供更丰富的丢包信息(如被丢弃的报文的内容),`Neil Horman`还引入了`drop monitor`模块,该功能可以通过`netlink`的方式将丢包事件传递给用户态程序。 10 | 11 | ### 1.2 dopwatch 12 | 13 | `drop monitor`本质上是基于`kfree_skb`事件的,因此它提供的信息我们可以从`kfree_skb`事件中看的出来:通过`skb`的地址(skbaddr),它可以将报文的内容传给用户;通过指令地址(location,即调用`kfree_skb()`函数的地方),它可以告诉用户丢包发生的函数。 14 | 15 | ```shell 16 | # cat /tracing/events/skb/kfree_skb/format 17 | name: kfree_skb 18 | ID: 1408 19 | format: 20 | field:unsigned short common_type; offset:0; size:2; signed:0; 21 | field:unsigned char common_flags; offset:2; size:1; signed:0; 22 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; 23 | field:int common_pid; offset:4; size:4; signed:1; 24 | 25 | field:void * skbaddr; offset:8; size:8; signed:0; 26 | field:void * location; offset:16; size:8; signed:0; 27 | field:unsigned short protocol; offset:24; size:2; signed:0; 28 | 29 | print fmt: "skbaddr=%p protocol=%u location=%p", REC->skbaddr, REC->protocol, REC->location 30 | ``` 31 | 32 | `drop monitor`所提供的的信息对于我们定位网络丢包问题很有用,[dropwatch](https://github.com/nhorman/dropwatch)就是基于该功能实现的丢包监控工具。事实上,这个工具的作者好像正是`Neil Horman`本人。使用该工具,我们可以获取到以下的监控信息: 33 | 34 | ```shell 35 | drop at: tcp_v4_rcv+0x86/0xf60 (0xffffffff81d5fbf6) 36 | origin: software 37 | input port ifindex: 2 38 | timestamp: Fri May 27 17:18:02 2022 712681502 nsec 39 | protocol: 0x800 40 | length: 54 41 | original length: 54 42 | ``` 43 | 44 | 这个工具告诉了我们丢包发生的函数为`tcp_v4_rcv()`,收包的网口的index为2等信息。通过使用该工具提供的另一个命令dwdump,还可以把报文信息dump到pcap文件,使用wireshark等工具打开分析。然而这里有个问题:虽然我们已经把丢包定位到了具体的函数,知道了报文是在TCP层收包阶段被丢弃的,缩小了问题的范围,但是仍然不知道具体的丢包原因,因为`tcp_v4_rcv()`函数无论是什么原因导致丢包,都会跳转(goto)到统一的地方释放报文。整个内核协议栈在设计方面,采用了类似于”集中释放“的方式,即某项检查失败时不会直接在当前位置释放报文,而是返回一个错误码,并在某个调用的地方统一释放。这就使得在很多使用,我们并不能通过报文释放的位置来准确找到丢包的原因。 45 | 46 | ### 1.3 dropreason 47 | 48 | 为了解决上文中提到的问题,内核需要提供一种更加直观的机制来告诉用户为什么发生了丢包。为此,笔者也做了多方面的探索,最初的思路是为`snmp`增加一个的tracepoint点。`snmp`大家应该比较清楚,即内核提供的报文(网络)统计功能,通过命令`cat /proc/net/snmp`可以获取到当前系统的统计信息: 49 | 50 | ``` 51 | Ip: Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates 52 | Ip: 1 64 1523807 0 1 0 0 0 1523802 1492568 0 40 0 0 0 0 0 0 0 53 | Icmp: InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps 54 | Icmp: 117377 9 0 40 0 0 0 0 117336 0 1 0 0 0 117501 0 164 0 0 0 0 0 117336 0 1 0 0 55 | IcmpMsg: InType3 InType8 InType13 OutType0 OutType3 OutType14 56 | IcmpMsg: 40 117336 1 117336 164 1 57 | Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors 58 | Tcp: 1 200 120000 -1 12562 306 1061 172 137 1264809 1246746 296 84 12940 82 59 | Udp: InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti MemErrors 60 | Udp: 138987 164 0 139190 0 0 0 0 0 61 | UdpLite: InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti MemErrors 62 | UdpLite: 0 0 0 0 0 0 0 0 0 63 | ``` 64 | 65 | 该信息也可以通过命令`netstat -s`来获取。相比于`dropwatch`,通过`snmp`我们可以获取到一些丢包问题产生的原因,如校验和错误(`InCsumErrors`)、缓冲区满了(`RcvbufErrors`)等。但是`snmp`毕竟是整个系统中的统计信息,无法定位、监控特定的报文丢包的原因(毕竟一个系统中可能存在着大量的丢包事件)。通过给`snmp`增加`tracepoint`点,我们就可以通过一些手段(如eBPF)来进行`trace`,获取到被丢弃的报文的内容和原因。笔者将该方案提交到了`kernel`社区,网络模块的`maintainer`之一的`David Ahern`给出的建议是,相比于增加`snmp`的`tracepoint`点,新增一个接口`kfree_skb_reason()`并扩展当前的`kfree_skb`事件可能的更好的选择,因为这样就可以无缝对接到`drop monitor`模块,一些第三方工具(如`dropwatch`、`wireshark`)也可以受益于该功能。虽然这将产生大量的修改(内核中使用`kfree_skb`的地方约有2000+处),经过深思熟虑,笔者还是采用了该方案,毕竟这种方案在扩展新的丢包原因方面有着更加灵活的优势。相关的讨论可见[net: snmp: tracepoint support for snmp](https://lore.kernel.org/netdev/20211118124812.106538-1-imagedong@tencent.com/) 66 | 67 | 经过修改后,当前的`kfree_skb`事件已经变成了这样: 68 | 69 | ```shell 70 | root@imagedong-LC1:/home/xm# cat /tracing/trace 71 | # tracer: nop 72 | # 73 | # entries-in-buffer/entries-written: 26/26 #P:64 74 | # 75 | # _-----=> irqs-off/BH-disabled 76 | # / _----=> need-resched 77 | # | / _---=> hardirq/softirq 78 | # || / _--=> preempt-depth 79 | # ||| / _-=> migrate-disable 80 | # |||| / delay 81 | # TASK-PID CPU# ||||| TIMESTAMP FUNCTION 82 | # | | | ||||| | | 83 | -0 [013] .Ns2. 176394.894120: kfree_skb: skbaddr=00000000b09a26bc protocol=2048 location=00000000751c2330 reason: TCP_INVALID_SEQUENCE 84 | -0 [028] ..s2. 176398.260716: kfree_skb: skbaddr=00000000da3ef973 protocol=2048 location=00000000751c2330 reason: TCP_INVALID_SEQUENCE 85 | ``` 86 | 87 | 在丢包事件中,用户已经可以清晰地看出报文丢包的原因。该功能发布于Kernel5.18版本中,经过笔者的不懈努力,目前常用的网络协议,如`IP`、`TCP`、`UDP`、`ICMP`等已经完成了接口的替换,抽象出来的各个协议层的丢包原因也将近70个。 88 | 89 | 虽然`dropwatch`目前也已经支持了丢包原因,但是该工具目前还不支持报文过滤的功能,也不能直接显示报文内容,使用起来多有不便。综合考虑,笔者决定基于eBPF自己写一个轻量级的用于丢包监控的工具,并命名为`droptrace`,作为`nettrace`工具下的一个子工具(不排除后期将功能合并到nettrace中的可能)。 90 | 91 | ## 二、使用方法 92 | 93 | `droptrace`是采用C语言编写的基于`eBPF/libbpf`的命令行工具,在构建时采用的静态编译。因此运行时不依赖于第三方库,具有轻量化的特点,在嵌入式等场景下也可以适用。 94 | 95 | ### 2.1 构建 96 | 97 | 可以直接从`github`中下载编译好的二进制程序`droptrace`或者`nettrace`的rpm包,也可以自行进行编译构建: 98 | 99 | ```shell 100 | git clone https://github.com/OpenCloudOS/nettrace.git 101 | cd nettrace/droptrace 102 | make all 103 | ``` 104 | 105 | 由于构建过程中需要动态生成`vmlinux.h`头文件,因此需要当前构建环境开启了`CONFIG_DEBUG_INFO_BTF`配置。如果没有配置,则需要通过VMLINUX手动指定内核的`vmlinux`文件路径,如: 106 | 107 | ``` 108 | make VMLINUX=/home/test/linux/vmlinux all 109 | ``` 110 | 111 | ### 2.2 功能介绍 112 | 113 | 在使用和显示方面,该工具与tcpdump类似,可以通过指定过滤条件(如IP地址、端口、协议等)来筛选被丢弃的报文,基本用法如下: 114 | 115 | ```shell 116 | $ sudo droptrace --saddr 127.0.0.1 117 | [2553.203430] 127.0.0.1:51136 -> 127.0.0.1:56100 TCP seq:2144353195, ack:461890105, flags:A reason:TCP_INVALID_SEQUENCE tcp_validate_incoming+0x132 118 | [2573.683437] 127.0.0.1:56101 -> 127.0.0.1:52010 TCP seq:2111110047, ack:4270738862, flags:A reason:TCP_INVALID_SEQUENCE tcp_validate_incoming+0x132 119 | [2573.683455] 127.0.0.1:52010 -> 127.0.0.1:56101 TCP seq:4253961646, ack:2127887263, flags:A reason:TCP_INVALID_SEQUENCE tcp_validate_incoming+0x132 120 | ``` 121 | 122 | 该命令用来监控系统中所有源地址为`127.0.0.1`的丢包报文。输出中的**reason**即为**丢包原因**,如果对该原因和解决办法不太理解,还可以使用命令`man dropreason`命令来查看丢包原因用户手册中对其所做出的详细解释(该手册还是不断完善中): 123 | 124 | ```shell 125 | Constants 126 | SKB_NOT_DROPPED_YET 127 | skb is not dropped yet (used for no-drop case) 128 | 129 | SKB_DROP_REASON_NOT_SPECIFIED 130 | drop reason is not specified 131 | 132 | SKB_DROP_REASON_NO_SOCKET 133 | socket not found 134 | 135 | SKB_DROP_REASON_PKT_TOO_SMALL 136 | packet size is too small 137 | 138 | SKB_DROP_REASON_TCP_CSUM 139 | TCP checksum error 140 | 141 | SKB_DROP_REASON_SOCKET_FILTER 142 | dropped by socket filter 143 | 144 | SKB_DROP_REASON_UDP_CSUM 145 | UDP checksum error 146 | 147 | SKB_DROP_REASON_NETFILTER_DROP 148 | dropped by netfilter 149 | 150 | ...... 151 | ``` 152 | 153 | 输出信息中的最后一个字段为产生丢包的指令地址(内核函数),该信息和`dropwatch`中所提供的一致。 154 | 155 | 使用`-h`可以查看工具支持的所有功能: 156 | 157 | ```shell 158 | $ droptrace -h 159 | droptrace: a tool to monitor the packet dropped by kernel 160 | 161 | Usage: 162 | -s, --saddr filter source ip address 163 | -d, --daddr filter dest ip address 164 | --addr filter source or dest ip address 165 | -S, --sport filter source TCP/UDP port 166 | -D, --dport filter dest TCP/UDP port 167 | -P, --port filter source or dest TCP/UDP port 168 | -p, --proto filter L3/L4 protocol, such as 'tcp', 'arp' 169 | -r, --reason filter drop reason 170 | 171 | --raw-sym show kernel symbol address (default false) 172 | --stat show drop statistics 173 | --stat-stop stop drop statistics and remove the launched eBPF program 174 | -l, --limit set the max output pcaket per second, defaultunlimited 175 | --limit-budget set the budget depth of the token used to limitoutput rate 176 | -h, --help show help information 177 | ``` 178 | 179 | 前面的几个参数是用来进行报文过滤的,比较好理解,这里着重介绍一下剩余的几个参数的功能。 180 | 181 | - `raw-sym`:显示原始丢包指令地址。跟踪数据中的`tcp_validate_incoming+0x132`是工具将指令地址解析成的内核函数信息,如果想看到未经解析的地址,可以加上该参数。这在用户想要通过addr2line命令来定位到具体的代码行的时候比较有用。 182 | - `stat`:丢包统计模式。该模式下,会挂载eBPF程序到系统中来统计各个原因下所产生的的丢包数量(该模式暂不支持指定过滤条件)。 183 | - `stat-stop`:停止丢包统计。使用该参数,会将原先使用`stat`加载到内核中的eBPF卸载掉,停止丢包统计。 184 | - `limit`:限制输出频率。如果系统中存在大量的丢包,`droptrace`的输出就会很多。该参数可以限制每秒所跟踪的报文的数量,其采用令牌桶的方式来实现。 185 | - `limit-budget`:令牌桶的深度。 186 | 187 | 丢包统计模式下的输出信息如下所示: 188 | 189 | ```shell 190 | $ sudo droptrace --stat 191 | packet statistics: 192 | NOT_SPECIFIED: 0 193 | NO_SOCKET: 0 194 | PKT_TOO_SMALL: 0 195 | TCP_CSUM: 0 196 | SOCKET_FILTER: 49 197 | UDP_CSUM: 13 198 | NETFILTER_DROP: 0 199 | OTHERHOST: 0 200 | IP_CSUM: 0 201 | IP_INHDR: 0 202 | IP_RPFILTER: 0 203 | UNICAST_IN_L2_MULTICAST: 0 204 | ...... 205 | ``` 206 | 207 | ## 三、注意事项 208 | 209 | 目前所有的丢包原因都保存在内核中的枚举类型`enum skb_drop_reason`中。虽然笔者在开发过程中尽量将新增的丢包原因添加到到枚举的尾部来保持兼容,但是社区上的有些开发者认为丢包原因是通过字符串的方式由`ftrace`传递给用户的,所以将新增的原因加到枚举中间位置也是被接受的。为了保持最好的兼容性,不建议直接下载`release`中的二进制程序`droptrace`来使用,最好的方式是在目标机器上编译后使用。 -------------------------------------------------------------------------------- /docs/nettrace.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /docs/nodetrace.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCloudOS/nettrace/9739591785293802ace8ff41cb6a4fb3bdd4d00d/docs/nodetrace.md -------------------------------------------------------------------------------- /script/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCloudOS/nettrace/9739591785293802ace8ff41cb6a4fb3bdd4d00d/script/.gitignore -------------------------------------------------------------------------------- /script/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:sid 2 | 3 | RUN rm -rf /etc/apt/sources.list.d/ 4 | RUN echo 'deb http://mirrors.tuna.tsinghua.edu.cn/debian/ sid main non-free contrib' > /etc/apt/sources.list 5 | 6 | RUN apt update && apt upgrade -y 7 | 8 | RUN apt install gcc clang llvm -y 9 | RUN apt install make file bpftool libbpf-dev -y 10 | RUN apt install rpm zip bzip2 -y 11 | 12 | RUN apt clean 13 | -------------------------------------------------------------------------------- /script/arch.mk: -------------------------------------------------------------------------------- 1 | ARCH ?= $(shell uname -m) 2 | SRCARCH := $(ARCH) 3 | 4 | # Additional ARCH settings for x86 5 | ifeq ($(ARCH),i386) 6 | SRCARCH := x86 7 | endif 8 | ifeq ($(ARCH),x86_64) 9 | SRCARCH := x86 10 | endif 11 | 12 | # Additional ARCH settings for arm64 13 | ifeq ($(ARCH),aarch64) 14 | SRCARCH := arm64 15 | endif 16 | 17 | # Additional ARCH settings for loongarch64 18 | ifeq ($(ARCH),loongarch64) 19 | SRCARCH := loongarch 20 | endif 21 | 22 | # Additional ARCH settings for sparc 23 | ifeq ($(ARCH),sparc32) 24 | SRCARCH := sparc 25 | endif 26 | ifeq ($(ARCH),sparc64) 27 | SRCARCH := sparc 28 | endif 29 | 30 | # Additional ARCH settings for sh 31 | ifeq ($(ARCH),sh64) 32 | SRCARCH := sh 33 | endif 34 | 35 | # Additional ARCH settings for tile 36 | ifeq ($(ARCH),tilepro) 37 | SRCARCH := tile 38 | endif 39 | ifeq ($(ARCH),tilegx) 40 | SRCARCH := tile 41 | endif 42 | -------------------------------------------------------------------------------- /script/bash-completion.sh: -------------------------------------------------------------------------------- 1 | complete -W '-s --saddr -d --daddr --addr -p -D --dport -S --sport -P --port 2 | --netns --netns-current --pid --min-latency --pkt-len 3 | --tcp-flags --pkt-len --tcp-rtt --tcp--srtt 4 | 5 | --basic --diag --diag-quiet --diag-keep --drop --drop-stack 6 | --sock --monitor --rtt --rtt-detail --filter-srtt 7 | --filter-minrtt --latency-show --latency-free --latency 8 | --latency-summary 9 | 10 | -t --trace --force --ret --detail --date -c --count --hooks 11 | --tiny-show --trace-stack --trace-matcher --trace-noclone 12 | --func-stats --rate-limit --btf-path 13 | 14 | -v --debug -h --help -V --version' nettrace 15 | -------------------------------------------------------------------------------- /script/bpftool-arm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCloudOS/nettrace/9739591785293802ace8ff41cb6a4fb3bdd4d00d/script/bpftool-arm -------------------------------------------------------------------------------- /script/bpftool-loongarch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCloudOS/nettrace/9739591785293802ace8ff41cb6a4fb3bdd4d00d/script/bpftool-loongarch -------------------------------------------------------------------------------- /script/bpftool-x86: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenCloudOS/nettrace/9739591785293802ace8ff41cb6a4fb3bdd4d00d/script/bpftool-x86 -------------------------------------------------------------------------------- /script/dropreason.8: -------------------------------------------------------------------------------- 1 | .TH "Kernel API" 9 "enum skb_drop_reason" "May 2022" "API Manual" LINUX 2 | .SH NAME 3 | enum skb_drop_reason \- the reasons of skb drops 4 | .SH SYNOPSIS 5 | enum skb_drop_reason { 6 | .br 7 | .BI " SKB_NOT_DROPPED_YET" 8 | , 9 | .br 10 | .br 11 | .BI " SKB_DROP_REASON_NOT_SPECIFIED" 12 | , 13 | .br 14 | .br 15 | .BI " SKB_DROP_REASON_NO_SOCKET" 16 | , 17 | .br 18 | .br 19 | .BI " SKB_DROP_REASON_PKT_TOO_SMALL" 20 | , 21 | .br 22 | .br 23 | .BI " SKB_DROP_REASON_TCP_CSUM" 24 | , 25 | .br 26 | .br 27 | .BI " SKB_DROP_REASON_SOCKET_FILTER" 28 | , 29 | .br 30 | .br 31 | .BI " SKB_DROP_REASON_UDP_CSUM" 32 | , 33 | .br 34 | .br 35 | .BI " SKB_DROP_REASON_NETFILTER_DROP" 36 | , 37 | .br 38 | .br 39 | .BI " SKB_DROP_REASON_OTHERHOST" 40 | , 41 | .br 42 | .br 43 | .BI " SKB_DROP_REASON_IP_CSUM" 44 | , 45 | .br 46 | .br 47 | .BI " SKB_DROP_REASON_IP_INHDR" 48 | , 49 | .br 50 | .br 51 | .BI " SKB_DROP_REASON_IP_RPFILTER" 52 | , 53 | .br 54 | .br 55 | .BI " SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST" 56 | , 57 | .br 58 | .br 59 | .BI " SKB_DROP_REASON_XFRM_POLICY" 60 | , 61 | .br 62 | .br 63 | .BI " SKB_DROP_REASON_IP_NOPROTO" 64 | , 65 | .br 66 | .br 67 | .BI " SKB_DROP_REASON_SOCKET_RCVBUFF" 68 | , 69 | .br 70 | .br 71 | .BI " SKB_DROP_REASON_PROTO_MEM" 72 | , 73 | .br 74 | .br 75 | .BI " SKB_DROP_REASON_TCP_MD5NOTFOUND" 76 | , 77 | .br 78 | .br 79 | .BI " SKB_DROP_REASON_TCP_MD5UNEXPECTED" 80 | , 81 | .br 82 | .br 83 | .BI " SKB_DROP_REASON_TCP_MD5FAILURE" 84 | , 85 | .br 86 | .br 87 | .BI " SKB_DROP_REASON_SOCKET_BACKLOG" 88 | , 89 | .br 90 | .br 91 | .BI " SKB_DROP_REASON_TCP_FLAGS" 92 | , 93 | .br 94 | .br 95 | .BI " SKB_DROP_REASON_TCP_ZEROWINDOW" 96 | , 97 | .br 98 | .br 99 | .BI " SKB_DROP_REASON_TCP_OLD_DATA" 100 | , 101 | .br 102 | .br 103 | .BI " SKB_DROP_REASON_TCP_OVERWINDOW" 104 | , 105 | .br 106 | .br 107 | .BI " SKB_DROP_REASON_TCP_OFOMERGE" 108 | , 109 | .br 110 | .br 111 | .BI " SKB_DROP_REASON_TCP_RFC7323_PAWS" 112 | , 113 | .br 114 | .br 115 | .BI " SKB_DROP_REASON_TCP_INVALID_SEQUENCE" 116 | , 117 | .br 118 | .br 119 | .BI " SKB_DROP_REASON_TCP_RESET" 120 | , 121 | .br 122 | .br 123 | .BI " SKB_DROP_REASON_TCP_INVALID_SYN" 124 | , 125 | .br 126 | .br 127 | .BI " SKB_DROP_REASON_TCP_CLOSE" 128 | , 129 | .br 130 | .br 131 | .BI " SKB_DROP_REASON_TCP_FASTOPEN" 132 | , 133 | .br 134 | .br 135 | .BI " SKB_DROP_REASON_TCP_OLD_ACK" 136 | , 137 | .br 138 | .br 139 | .BI " SKB_DROP_REASON_TCP_TOO_OLD_ACK" 140 | , 141 | .br 142 | .br 143 | .BI " SKB_DROP_REASON_TCP_ACK_UNSENT_DATA" 144 | , 145 | .br 146 | .br 147 | .BI " SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE" 148 | , 149 | .br 150 | .br 151 | .BI " SKB_DROP_REASON_TCP_OFO_DROP" 152 | , 153 | .br 154 | .br 155 | .BI " SKB_DROP_REASON_IP_OUTNOROUTES" 156 | , 157 | .br 158 | .br 159 | .BI " SKB_DROP_REASON_BPF_CGROUP_EGRESS" 160 | , 161 | .br 162 | .br 163 | .BI " SKB_DROP_REASON_IPV6DISABLED" 164 | , 165 | .br 166 | .br 167 | .BI " SKB_DROP_REASON_NEIGH_CREATEFAIL" 168 | , 169 | .br 170 | .br 171 | .BI " SKB_DROP_REASON_NEIGH_FAILED" 172 | , 173 | .br 174 | .br 175 | .BI " SKB_DROP_REASON_NEIGH_QUEUEFULL" 176 | , 177 | .br 178 | .br 179 | .BI " SKB_DROP_REASON_NEIGH_DEAD" 180 | , 181 | .br 182 | .br 183 | .BI " SKB_DROP_REASON_TC_EGRESS" 184 | , 185 | .br 186 | .br 187 | .BI " SKB_DROP_REASON_QDISC_DROP" 188 | , 189 | .br 190 | .br 191 | .BI " SKB_DROP_REASON_CPU_BACKLOG" 192 | , 193 | .br 194 | .br 195 | .BI " SKB_DROP_REASON_XDP" 196 | , 197 | .br 198 | .br 199 | .BI " SKB_DROP_REASON_TC_INGRESS" 200 | , 201 | .br 202 | .br 203 | .BI " SKB_DROP_REASON_UNHANDLED_PROTO" 204 | , 205 | .br 206 | .br 207 | .BI " SKB_DROP_REASON_SKB_CSUM" 208 | , 209 | .br 210 | .br 211 | .BI " SKB_DROP_REASON_SKB_GSO_SEG" 212 | , 213 | .br 214 | .br 215 | .BI " SKB_DROP_REASON_SKB_UCOPY_FAULT" 216 | , 217 | .br 218 | .br 219 | .BI " SKB_DROP_REASON_DEV_HDR" 220 | , 221 | .br 222 | .br 223 | .BI " SKB_DROP_REASON_DEV_READY" 224 | , 225 | .br 226 | .br 227 | .BI " SKB_DROP_REASON_FULL_RING" 228 | , 229 | .br 230 | .br 231 | .BI " SKB_DROP_REASON_NOMEM" 232 | , 233 | .br 234 | .br 235 | .BI " SKB_DROP_REASON_HDR_TRUNC" 236 | , 237 | .br 238 | .br 239 | .BI " SKB_DROP_REASON_TAP_FILTER" 240 | , 241 | .br 242 | .br 243 | .BI " SKB_DROP_REASON_TAP_TXFILTER" 244 | , 245 | .br 246 | .br 247 | .BI " SKB_DROP_REASON_ICMP_CSUM" 248 | , 249 | .br 250 | .br 251 | .BI " SKB_DROP_REASON_INVALID_PROTO" 252 | , 253 | .br 254 | .br 255 | .BI " SKB_DROP_REASON_IP_INADDRERRORS" 256 | , 257 | .br 258 | .br 259 | .BI " SKB_DROP_REASON_IP_INNOROUTES" 260 | , 261 | .br 262 | .br 263 | .BI " SKB_DROP_REASON_PKT_TOO_BIG" 264 | , 265 | .br 266 | .br 267 | .BI " SKB_DROP_REASON_MAX" 268 | 269 | }; 270 | .SH Constants 271 | .IP "SKB_NOT_DROPPED_YET" 12 272 | skb is not dropped yet (used for no-drop case) 273 | .IP "SKB_DROP_REASON_NOT_SPECIFIED" 12 274 | drop reason is not specified 275 | .IP "SKB_DROP_REASON_NO_SOCKET" 12 276 | socket not found 277 | .IP "SKB_DROP_REASON_PKT_TOO_SMALL" 12 278 | packet size is too small 279 | .IP "SKB_DROP_REASON_TCP_CSUM" 12 280 | TCP checksum error 281 | .IP "SKB_DROP_REASON_SOCKET_FILTER" 12 282 | dropped by socket filter 283 | .IP "SKB_DROP_REASON_UDP_CSUM" 12 284 | UDP checksum error 285 | .IP "SKB_DROP_REASON_NETFILTER_DROP" 12 286 | dropped by netfilter 287 | .IP "SKB_DROP_REASON_OTHERHOST" 12 288 | packet don't belong to current host 289 | (interface is in promisc mode) 290 | .IP "SKB_DROP_REASON_IP_CSUM" 12 291 | IP checksum error 292 | .IP "SKB_DROP_REASON_IP_INHDR" 12 293 | there is something wrong with IP header (see 294 | IPSTATS_MIB_INHDRERRORS) 295 | .IP "SKB_DROP_REASON_IP_RPFILTER" 12 296 | IP rpfilter validate failed. see the 297 | document for rp_filter in ip-sysctl.rst for more information 298 | .IP "SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST" 12 299 | destination address of L2 is 300 | multicast, but L3 is unicast. 301 | .IP "SKB_DROP_REASON_XFRM_POLICY" 12 302 | xfrm policy check failed 303 | .IP "SKB_DROP_REASON_IP_NOPROTO" 12 304 | no support for IP protocol 305 | .IP "SKB_DROP_REASON_SOCKET_RCVBUFF" 12 306 | socket receive buff is full 307 | .IP "SKB_DROP_REASON_PROTO_MEM" 12 308 | proto memory limition, such as udp packet 309 | drop out of udp_memory_allocated. 310 | .IP "SKB_DROP_REASON_TCP_MD5NOTFOUND" 12 311 | no MD5 hash and one expected, 312 | corresponding to LINUX_MIB_TCPMD5NOTFOUND 313 | .IP "SKB_DROP_REASON_TCP_MD5UNEXPECTED" 12 314 | MD5 hash and we're not expecting 315 | one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED 316 | .IP "SKB_DROP_REASON_TCP_MD5FAILURE" 12 317 | MD5 hash and its wrong, corresponding 318 | to LINUX_MIB_TCPMD5FAILURE 319 | .IP "SKB_DROP_REASON_SOCKET_BACKLOG" 12 320 | failed to add skb to socket backlog ( 321 | see LINUX_MIB_TCPBACKLOGDROP) 322 | .IP "SKB_DROP_REASON_TCP_FLAGS" 12 323 | TCP flags invalid 324 | .IP "SKB_DROP_REASON_TCP_ZEROWINDOW" 12 325 | TCP receive window size is zero, 326 | see LINUX_MIB_TCPZEROWINDOWDROP 327 | .IP "SKB_DROP_REASON_TCP_OLD_DATA" 12 328 | the TCP data reveived is already 329 | received before (spurious retrans may happened), see 330 | LINUX_MIB_DELAYEDACKLOST 331 | .IP "SKB_DROP_REASON_TCP_OVERWINDOW" 12 332 | the TCP data is out of window, 333 | the seq of the first byte exceed the right edges of receive 334 | window 335 | .IP "SKB_DROP_REASON_TCP_OFOMERGE" 12 336 | the data of skb is already in the ofo 337 | queue, corresponding to LINUX_MIB_TCPOFOMERGE 338 | .IP "SKB_DROP_REASON_TCP_RFC7323_PAWS" 12 339 | PAWS check, corresponding to 340 | LINUX_MIB_PAWSESTABREJECTED 341 | .IP "SKB_DROP_REASON_TCP_INVALID_SEQUENCE" 12 342 | Not acceptable SEQ field 343 | .IP "SKB_DROP_REASON_TCP_RESET" 12 344 | Invalid RST packet 345 | .IP "SKB_DROP_REASON_TCP_INVALID_SYN" 12 346 | Incoming packet has unexpected SYN flag 347 | .IP "SKB_DROP_REASON_TCP_CLOSE" 12 348 | TCP socket in CLOSE state 349 | .IP "SKB_DROP_REASON_TCP_FASTOPEN" 12 350 | dropped by FASTOPEN request socket 351 | .IP "SKB_DROP_REASON_TCP_OLD_ACK" 12 352 | TCP ACK is old, but in window 353 | .IP "SKB_DROP_REASON_TCP_TOO_OLD_ACK" 12 354 | TCP ACK is too old 355 | .IP "SKB_DROP_REASON_TCP_ACK_UNSENT_DATA" 12 356 | TCP ACK for data we haven't sent yet 357 | .IP "SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE" 12 358 | pruned from TCP OFO queue 359 | .IP "SKB_DROP_REASON_TCP_OFO_DROP" 12 360 | data already in receive queue 361 | .IP "SKB_DROP_REASON_IP_OUTNOROUTES" 12 362 | route lookup failed 363 | .IP "SKB_DROP_REASON_BPF_CGROUP_EGRESS" 12 364 | dropped by BPF_PROG_TYPE_CGROUP_SKB 365 | eBPF program 366 | .IP "SKB_DROP_REASON_IPV6DISABLED" 12 367 | IPv6 is disabled on the device 368 | .IP "SKB_DROP_REASON_NEIGH_CREATEFAIL" 12 369 | failed to create neigh entry 370 | .IP "SKB_DROP_REASON_NEIGH_FAILED" 12 371 | neigh entry in failed state 372 | .IP "SKB_DROP_REASON_NEIGH_QUEUEFULL" 12 373 | arp_queue for neigh entry is full 374 | .IP "SKB_DROP_REASON_NEIGH_DEAD" 12 375 | neigh entry is dead 376 | .IP "SKB_DROP_REASON_TC_EGRESS" 12 377 | dropped in TC egress HOOK 378 | .IP "SKB_DROP_REASON_QDISC_DROP" 12 379 | dropped by qdisc when packet outputting ( 380 | failed to enqueue to current qdisc) 381 | .IP "SKB_DROP_REASON_CPU_BACKLOG" 12 382 | failed to enqueue the skb to the per CPU 383 | backlog queue. This can be caused by backlog queue full (see 384 | netdev_max_backlog in net.rst) or RPS flow limit 385 | .IP "SKB_DROP_REASON_XDP" 12 386 | dropped by XDP in input path 387 | .IP "SKB_DROP_REASON_TC_INGRESS" 12 388 | dropped in TC ingress HOOK 389 | .IP "SKB_DROP_REASON_UNHANDLED_PROTO" 12 390 | protocol not implemented or not supported 391 | .IP "SKB_DROP_REASON_SKB_CSUM" 12 392 | sk_buff checksum computation error 393 | .IP "SKB_DROP_REASON_SKB_GSO_SEG" 12 394 | gso segmentation error 395 | .IP "SKB_DROP_REASON_SKB_UCOPY_FAULT" 12 396 | failed to copy data from user space, 397 | e.g., via \fBzerocopy_sg_from_iter\fP or \fBskb_orphan_frags_rx\fP 398 | .IP "SKB_DROP_REASON_DEV_HDR" 12 399 | device driver specific header/metadata is invalid 400 | .IP "SKB_DROP_REASON_DEV_READY" 12 401 | the device is not ready to xmit/recv due to 402 | any of its data structure that is not up/ready/initialized, 403 | e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq] 404 | is not initialized 405 | .IP "SKB_DROP_REASON_FULL_RING" 12 406 | ring buffer is full 407 | .IP "SKB_DROP_REASON_NOMEM" 12 408 | error due to OOM 409 | .IP "SKB_DROP_REASON_HDR_TRUNC" 12 410 | failed to trunc/extract the header from 411 | networking data, e.g., failed to pull the protocol header from 412 | frags via \fBpskb_may_pull\fP 413 | .IP "SKB_DROP_REASON_TAP_FILTER" 12 414 | dropped by (ebpf) filter directly attached 415 | to tun/tap, e.g., via TUNSETFILTEREBPF 416 | .IP "SKB_DROP_REASON_TAP_TXFILTER" 12 417 | dropped by tx filter implemented at 418 | tun/tap, e.g., \fBcheck_filter\fP 419 | .IP "SKB_DROP_REASON_ICMP_CSUM" 12 420 | ICMP checksum error 421 | .IP "SKB_DROP_REASON_INVALID_PROTO" 12 422 | the packet doesn't follow RFC 2211, 423 | such as a broadcasts ICMP_TIMESTAMP 424 | .IP "SKB_DROP_REASON_IP_INADDRERRORS" 12 425 | host unreachable, corresponding to 426 | IPSTATS_MIB_INADDRERRORS 427 | .IP "SKB_DROP_REASON_IP_INNOROUTES" 12 428 | network unreachable, corresponding to 429 | IPSTATS_MIB_INADDRERRORS 430 | .IP "SKB_DROP_REASON_PKT_TOO_BIG" 12 431 | packet size is too big (maybe exceed the 432 | MTU) 433 | .IP "SKB_DROP_REASON_MAX" 12 434 | the maximum of drop reason, which shouldn't be 435 | used as a real 'reason' 436 | .SH "Description" 437 | The reason of skb drop, which is used in \fBkfree_skb_reason\fP. 438 | en...maybe they should be splited by group? 439 | -------------------------------------------------------------------------------- /script/nettrace.spec: -------------------------------------------------------------------------------- 1 | Summary: A skb (network package) trace tool for kernel 2 | 3 | Name: nettrace 4 | 5 | Version: %{VERSION} 6 | 7 | Release: 1%{?dist} 8 | 9 | License: MulanPSL-2.0 10 | 11 | BuildRoot:%{_tmppath}/%{name}-%{version}-%{release}-root 12 | 13 | Group: Development/Tools 14 | 15 | Source0:%{name}-%{version}.tar.gz 16 | 17 | # URL: 18 | 19 | %define __strip ${CROSS_COMPILE}strip 20 | %define __objdump ${CROSS_COMPILE}objdump 21 | 22 | %description 23 | nettrace is is a powerful tool to trace network packet and diagnose 24 | network problem inside kernel on TencentOS. 25 | 26 | It make use of eBPF. 27 | 28 | 'skb' is the struct that used in kernel to store network package. 29 | By tracing kernel function and tracepoint (with the help of kprobe 30 | based on eBPF) that handle skb, nettrace is able to show the path 31 | of skb bypass inside kernel network stack. Therefor, some network 32 | issue (such as package drop) can be solved simply. 33 | 34 | %prep 35 | %setup -q 36 | 37 | %install 38 | rm -rf $RPM_BUILD_ROOT 39 | make PREFIX=$RPM_BUILD_ROOT install 40 | PREFIX=$RPM_BUILD_ROOT 41 | 42 | %files 43 | %defattr (-,root,root,0755) 44 | /usr/bin/nettrace 45 | /usr/share/man/zh_CN/man8/nettrace.8.gz 46 | /usr/share/man/man8/nettrace.8.gz 47 | /usr/share/man/man8/dropreason.8.gz 48 | /usr/share/bash-completion/completions/nettrace 49 | 50 | %doc 51 | 52 | %changelog 53 | -------------------------------------------------------------------------------- /script/zh_CN/nettrace.md: -------------------------------------------------------------------------------- 1 | # NETTRACE 8 "20 JULY 2022" Linux "User Manuals" 2 | 3 | ## NAME 4 | 5 | nettrace - Linux系统下的网络报文跟踪、网络问题诊断工具 6 | 7 | ## SYNOPSIS 8 | 9 | `nettrace` [选项] 10 | 11 | ## DESCRIPTION 12 | 13 | `nettrace`是基于eBPF的集网络报文跟踪(故障定位)、网络故障诊断、网络异常监控于一体的网 14 | 络工具集,旨在能够提供一种更加高效、易用的方法来解决复杂场景下的网络问题。 15 | 16 | ## OPTIONS 17 | 18 | ### 过滤类参数 19 | 20 | `-s,--saddr` *source_address* 21 | 根据IP源地址来进行报文筛选 22 | 23 | `-d,--daddr` *dest_address* 24 | 根据IP目的地址来进行报文筛选 25 | 26 | `--addr` *address* 27 | 根据IP源地址或者目的地址来进行报文筛选 28 | 29 | `-S,--sport` *source_port* 30 | 根据UDP/TCP源端口进行报文筛选 31 | 32 | `-D,--dport` *dest_port* 33 | 根据UDP/TCP目的端口进行报文筛选 34 | 35 | `--port` *port* 36 | 根据UDP/TCP源端口或者目的端口进行报文筛选 37 | 38 | `-p,--proto` *protocol* 39 | 根据报文的协议(三层或者四层)进行过滤,如*-p udp* 40 | 41 | `--netns` *netns_inode* 42 | 根据网络命名空间进行过滤。 43 | 44 | 该参数后面跟的是网络命名空间的inode,可以通过 45 | `ls -l /proc//ns/net` 46 | 来查看对应进程的网络命名空间的inode号 47 | 48 | `--netns-current` 49 | 仅显示当前网络命名空间的报文,等价于`--netns <当前网络命名空间的inode>` 50 | 51 | `--pid` *pid* 52 | 根据进程号进行过滤 53 | 54 | `--min-latency` *latency in ms* 55 | 根据报文的寿命进行过滤,仅打印处理时长超过该值的报文,单位为us。该参数在`basic`和 56 | `sock`模式下不可用。 57 | 58 | `--pkt-len` *pkt_len* 59 | 根据报文长度进行过滤,可以指定范围,如`--pkt-len 10-20`;也可以指定确切的值, 60 | 如`--pkt-len 64` 61 | 62 | `--tcp-flags` *flags* 63 | 根据TCP报文中的flag进行过滤,有效的flag包括`SARF`,可以指定多个,如:`--tcp-flags FA` 64 | 65 | ### 模式类参数 66 | 67 | `--basic` 68 | 启用`basic`跟踪模式。默认情况下,启用的是生命周期跟踪模式。启用该模式后,会直接打印 69 | 出报文所经过的内核函数/tracepoint 70 | 71 | `--diag` 72 | 启用诊断模式 73 | 74 | `--diag-quiet` 75 | 只显示出现存在问题的报文,不显示正常的报文 76 | 77 | `--diag-keep` 78 | 持续跟踪。`diag`模式下,默认在跟踪到异常报文后会停止跟踪,使用该参数后,会持续跟踪下去。 79 | 80 | `--drop` 81 | 进行系统丢包监控,取代原先的`droptrace` 82 | 83 | `--drop-stack` 84 | 打印`kfree_skb`内核函数的调用堆栈,等价于`--trace-stack kfree_skb` 85 | 86 | `--sock` 87 | 启用套接口模式。这个模式下,不会再跟踪报文,而会跟踪套接口。 88 | 89 | `--monitor` 90 | 启用监控模式。一种轻量化的实时监控系统中网络异常的模式(对内核版本有一定要求)。 91 | 92 | `--rtt` 93 | 启用RTT统计模式,会统计TCP RTT的分布情况 94 | 95 | `--rtt-detail` 96 | 启用RTT详细模式,输出符合过滤条件的每个报文的RTT数据 97 | 98 | `--filter-srtt` *rtt* 99 | 根据srtt来进行过滤,`rtt/rtt-detail`模式下可用,单位ms 100 | 101 | `--filter-minrtt` *rtt* 102 | 根据minrtt来进行过滤,`rtt/rtt-detail`模式下可用,单位ms 103 | 104 | `--latency-show` 105 | 显示延迟(协议栈处理耗时)信息,`basic/sock`模式下不可用 106 | 107 | `--latency` 108 | 启用延迟分析模式,可以高效分析每个报文协议栈处理耗时 109 | 110 | `--latency-summary` 111 | 启用延迟分析统计模式,可以统计协议栈处理耗时的分布情况 112 | 113 | ### 功能类参数 114 | 115 | `-t,--trace` *traces* 116 | 要启用(跟踪)的内核函数、tracepoint。 117 | 118 | 这里将这些被跟踪的对象(内核函数、tracepoint等)简称为跟踪器, 119 | 所有的跟踪器以树状图的方式被组织了起来,使用命令: 120 | *nettrace -t ?* 121 | 可以查看所有的跟踪器。 122 | 123 | 默认情况下,大部分的跟踪器会被启用,一些设备相关的跟踪器(如ipvlan、bridge等)默认 124 | 不启用。使用参数*-t all*可启用所有的跟踪器。 125 | 126 | 可以同时指定多个跟踪器,以*,*分隔,比如*nettrace -t ip,link,kfree_skb*。 127 | 可以指定跟踪器的目录,也可以直接指定跟踪器。 128 | 129 | `--ret` 130 | 显示被跟踪的内核函数的返回值 131 | 132 | `--detail` 133 | 显示跟踪详细信息,包括当前的进程、网口和CPU等信息 134 | 135 | `--date` 136 | 以时间格式打印(以2022-10-24 xx:xx:xx.xxxxxx格式打印),而不是时间戳 137 | 138 | `-c,--count` 139 | 指定要跟踪的报文个数c,达到该个数后自动退出 140 | 141 | `--hooks` 142 | 打印netfilter上的钩子函数 143 | 144 | `--tiny-show` 145 | 精简显示,只显示第一个报文的内容,用于提升性能 146 | 147 | `--trace-stack` *traces* 148 | 指定需要进行堆栈打印的内核函数,可以指定多个,用“,”分隔。出于性能考虑,启用堆栈打印的 149 | 内核函数不能超过16个。用法和格式与`--trace`完全一致。 150 | 151 | `--trace-matcher` 152 | 指定进行报文匹配的内核函数,默认所有的函数,用于提升性能 153 | 154 | `--trace-exclude` 155 | 不进行跟踪的函数 156 | 157 | `--trace-noclone` 158 | 不跟踪报文的克隆时间,即不把克隆出来的报文和当前报文放到一块跟踪 159 | 160 | `--func-stats` 161 | 只统计内核函数被调用的次数,不打印具体的报文,可指定过滤条件 162 | 163 | `--rate-limit` 164 | 进行限速,限制每秒事件输出的数量 165 | 166 | `--btf-path` 167 | 手动指定BTF文件的路径 168 | 169 | `-v` 170 | 显示程序启动的日志信息 171 | 172 | `--debug` 173 | 显示调试信息 174 | 175 | ## EXAMPLES 176 | 177 | ### 生命周期跟踪 178 | 179 | 跟踪源地址为`192.168.1.8`的ping报文: 180 | *nettrace -p icmp -s 192.168.1.8* 181 | 182 | 跟踪源地址为`192.168.1.8`的ping报文在IP协议层和ICMP协议层的路径: 183 | *nettrace -p icmp -s 192.168.1.8 -t ip,icmp* 184 | 185 | 显示详细信息: 186 | *nettrace -p icmp -s 192.168.1.8 --detail* 187 | 188 | 打印堆栈: 189 | *nettrace -p icmp -s 192.168.1.8 --trace-stack consume_skb,icmp_rcv* 190 | 191 | ### 诊断模式 192 | 193 | 使用方式与上面的一致,加个`diag`参数即可使用诊断模式。上文的生命周期模式对于使用者的 194 | 要求比较高,需要了解内核协议栈各个函数的用法、返回值的意义等,易用性较差。诊断模式是在 195 | 生命周期模式的基础上,提供了更加丰富的信息,使得没有网络开发经验的人也可进行复杂 196 | 网络问题的定位和分析。 197 | 198 | 比于普通模式,诊断模式提供了更多的可供参考的信息,包括当前报文经过了iptables的哪些表和 199 | 哪些链、报文发生了NAT、报文被克隆了等。诊断模式设置了三种提示级别: 200 | 201 | - `INFO`:正常的信息提示 202 | - `WARN`:警告信息,该报文可能存在一定的问题,需要关注 203 | - `ERROR`:异常信息,报文发生了问题(比如被丢弃)。 204 | 205 | 如果当前报文存在`ERROR`,那么工具会给出一定的诊断修复建议,并终止当前诊断操作。通过添 206 | 加`diag-keep`可以在发生`ERROR`事件时不退出,继续进行跟踪分析。下面是发生异常时的日志: 207 | 208 | ```shell 209 | ./nettrace -p icmp --diag --saddr 192.168.122.8 210 | begin trace... 211 | ***************** ffff889fb3c64f00 *************** 212 | [4049.295546] [__netif_receive_skb_core] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 213 | [4049.295566] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *ipv4 in chain: PRE_ROUTING* 214 | [4049.295578] [nft_do_chain ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *iptables table:nat, chain:PREROUT* *packet is accepted* 215 | [4049.295594] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *bridge in chain: PRE_ROUTING* 216 | [4049.295612] [__netif_receive_skb_core] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 217 | [4049.295624] [ip_rcv ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 218 | [4049.295629] [ip_rcv_core ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 219 | [4049.295640] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *ipv4 in chain: PRE_ROUTING* 220 | [4049.295644] [ip_rcv_finish ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 221 | [4049.295655] [ip_route_input_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 222 | [4049.295664] [fib_validate_source ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 223 | [4049.295683] [ip_forward ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 224 | [4049.295687] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *ipv4 in chain: FORWARD* *packet is dropped by netfilter (NF_DROP)* 225 | [4049.295695] [nft_do_chain ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *iptables table:filter, chain:FORWARD* *packet is dropped by iptables/iptables-nft* 226 | [4049.295711] [kfree_skb ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 0 *packet is dropped by kernel* 227 | ---------------- ANALYSIS RESULT --------------------- 228 | [1] ERROR happens in nf_hook_slow(netfilter): 229 | packet is dropped by netfilter (NF_DROP) 230 | fix advice: 231 | check your netfilter rule 232 | 233 | [2] ERROR happens in nft_do_chain(netfilter): 234 | packet is dropped by iptables/iptables-nft 235 | fix advice: 236 | check your iptables rule 237 | 238 | [3] ERROR happens in kfree_skb(life): 239 | packet is dropped by kernel 240 | location: 241 | nf_hook_slow+0x96 242 | drop reason: 243 | NETFILTER_DROP 244 | 245 | analysis finished! 246 | 247 | end trace... 248 | ``` 249 | 250 | 从这里的日志可以看出,在报文经过iptables的filter表的forward链的时候,发生了丢包。在 251 | 诊断结果里,会列出所有的异常事件,一个报文跟踪可能会命中多条诊断结果。这里的诊断建议是让 252 | 用户检查iptables中的规则是否存在问题。 253 | 254 | 其中,`kfree_skb`这个跟踪点是对`drop reason`内核特性(详见droptrace中的介绍)做了 255 | 适配的,可以理解为将droptrace的功能集成到了这里的诊断结果中,这里可以看出其给出的丢包 256 | 原因是`NETFILTER_DROP`。因此,可以通过一下命令来监控内核中所有的丢包事件以及丢包原因: 257 | 258 | *nettrace -t kfree_skb --diag --diag-keep* 259 | 260 | ### 丢包监控 261 | 262 | 使用命令`nettrace --drop`可以对系统中的丢包事件进行监控,对于支持内核特性 263 | `skb drop reason`的内核,这里还会打印出丢包原因。可以通过查看 264 | `/tracing/events/skb/kfree_skb/format`来判断当前系统是否支持该特性。 265 | 266 | 该模式下使用的效果与原先的`droptrace`完全相同,如下所示: 267 | 268 | ```shell 269 | nettrace --drop 270 | begin trace... 271 | [142.097193] TCP: 162.241.189.135:57022 -> 172.27.0.6:22 seq:299038593, ack:3843597961, flags:AR, reason: NOT_SPECIFIED, tcp_v4_rcv+0x81 272 | [142.331798] TCP: 162.241.189.135:57022 -> 172.27.0.6:22 seq:299038593, ack:3843597961, flags:A, reason: NOT_SPECIFIED, tcp_v4_do_rcv+0x83 273 | [142.331857] TCP: 162.241.189.135:57022 -> 172.27.0.6:22 seq:299038593, ack:3843597961, flags:AP, reason: NOT_SPECIFIED, tcp_v4_do_rcv+0x83 274 | [146.136576] TCP: 127.0.0.1:43582 -> 127.0.0.1:9999 seq:3819454691, ack:0, flags:S, reason: NO_SOCKET, tcp_v4_rcv+0x81 275 | [146.220414] TCP: 169.254.0.138:8186 -> 172.27.0.6:40634 seq:8486084, ack:2608831141, flags:A, reason: TCP_INVALID_SEQUENCE, tcp_validate_incoming+0x126 276 | [146.533728] TCP: 127.0.0.1:36338 -> 127.0.0.1:56100 seq:1110580666, ack:1951926207, flags:A, reason: TCP_INVALID_SEQUENCE, tcp_validate_incoming+0x126 277 | [147.255946] TCP: 20.44.10.122:443 -> 192.168.255.10:42878 seq:2950381253, ack:211751623, flags:A, reason: NOT_SPECIFIED, tcp_rcv_state_process+0xe9 278 | ``` 279 | 280 | 同样可以使用`man dropreason`命令来查看对应的丢包原因的详细解释。对于不支持 281 | `skb drop reason`特性的内核,该模式下将不会打印丢包原因字段,效果如下所示: 282 | 283 | ```shell 284 | nettrace --drop 285 | begin trace... 286 | [2016.965295] TCP: 162.241.189.135:45432 -> 172.27.0.6:22 seq:133152310, ack:2529234288, flags:AR, tcp_v4_rcv+0x50 287 | [2017.201315] TCP: 162.241.189.135:45432 -> 172.27.0.6:22 seq:133152310, ack:2529234288, flags:A, tcp_v4_do_rcv+0x70 288 | [2019.041344] TCP: 176.58.124.134:37441 -> 172.27.0.6:443 seq:1160140493, ack:0, flags:S, tcp_v4_rcv+0x50 289 | [2021.867340] TCP: 127.0.0.1:34936 -> 127.0.0.1:9999 seq:1309795878, ack:0, flags:S, tcp_v4_rcv+0x50 290 | [2024.997146] TCP: 162.241.189.135:46756 -> 172.27.0.6:22 seq:1304582308, ack:1354418612, flags:AR, tcp_v4_rcv+0x50 291 | [2025.235953] TCP: 162.241.189.135:46756 -> 172.27.0.6:22 seq:1304582308, ack:1354418612, flags:A, tcp_v4_do_rcv+0x70 292 | [2025.235967] TCP: 162.241.189.135:46756 -> 172.27.0.6:22 seq:1304582308, ack:1354418612, flags:AP, tcp_v4_do_rcv+0x70 293 | ``` 294 | 295 | ### netfilter支持 296 | 297 | 网络防火墙是网络故障、网络不同发生的重灾区,因此`netfilter`工具对`netfilter`提供了 298 | 完美适配,包括老版本的`iptables-legacy`和新版本的`iptables-nft`。诊断模式下, 299 | `nettrace`能够跟踪报文所经过的`iptables`表和`iptables`链,并在发生由于iptables 300 | 导致的丢包时给出一定的提示,上面的示例充分展现出了这部分。出了对iptables的支持, 301 | `nettrace`对整个netfilter大模块也提供了支持,能够显示在经过每个HOOK点时对应的协议族 302 | 和链的名称。除此之外,为了应对一些注册到netfilter中的第三方内核模块导致的丢包问题, 303 | `nettrace`还可以通过添加参数`hooks`来打印出当前`HOOK`上所有的的钩子函数,从而深入 304 | 分析问题: 305 | 306 | ```shell 307 | ./nettrace -p icmp --diag --saddr 192.168.122.8 --hooks 308 | begin trace... 309 | ***************** ffff889faa054500 *************** 310 | [5810.702473] [__netif_receive_skb_core] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 311 | [5810.702491] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *ipv4 in chain: PRE_ROUTING* 312 | [5810.702504] [nft_do_chain ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *iptables table:nat, chain:PREROUT* *packet is accepted* 313 | [5810.702519] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *bridge in chain: PRE_ROUTING* 314 | [5810.702527] [__netif_receive_skb_core] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 315 | [5810.702535] [ip_rcv ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 316 | [5810.702540] [ip_rcv_core ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 317 | [5810.702546] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *ipv4 in chain: PRE_ROUTING* 318 | [5810.702551] [ip_rcv_finish ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 319 | [5810.702556] [ip_route_input_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 320 | [5810.702565] [fib_validate_source ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 321 | [5810.702579] [ip_forward ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 322 | [5810.702583] [nf_hook_slow ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *ipv4 in chain: FORWARD* *packet is dropped by netfilter (NF_DROP)* 323 | [5810.702586] [nft_do_chain ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *iptables table:filter, chain:FORWARD* *packet is dropped by iptables/iptables-nft* 324 | [5810.702599] [kfree_skb ] ICMP: 192.168.122.8 -> 10.123.119.98 ping request, seq: 943 *packet is dropped by kernel* 325 | ---------------- ANALYSIS RESULT --------------------- 326 | [1] ERROR happens in nf_hook_slow(netfilter): 327 | packet is dropped by netfilter (NF_DROP) 328 | 329 | following hook functions are blamed: 330 | nft_do_chain_ipv4 331 | 332 | fix advice: 333 | check your netfilter rule 334 | 335 | [2] ERROR happens in nft_do_chain(netfilter): 336 | packet is dropped by iptables/iptables-nft 337 | fix advice: 338 | check your iptables rule 339 | 340 | [3] ERROR happens in kfree_skb(life): 341 | packet is dropped by kernel 342 | location: 343 | nf_hook_slow+0x96 344 | drop reason: 345 | NETFILTER_DROP 346 | 347 | analysis finished! 348 | 349 | end trace... 350 | ``` 351 | 352 | 可以看出,上面`following hook functions are blamed`中列出了导致当前`netfilter` 353 | 丢包的所有的钩子函数,这里只有`iptables`一个钩子函数。 354 | 355 | ### sock跟踪 356 | 357 | 套接口跟踪在原理上与skb的basic模式很类似,只不过跟踪对象从skb换成了sock。 358 | 常规的过滤参数,如ip、端口等,在该模式下都可以直接使用,基本用法如下所示: 359 | 360 | ```shell 361 | sudo ./nettrace -p tcp --port 9999 --sock 362 | begin trace... 363 | [2157947.050509] [inet_listen ] TCP: 0.0.0.0:9999 -> 0.0.0.0:0 info:(0 0) 364 | [2157958.364842] [__tcp_transmit_skb ] TCP: 127.0.0.1:36562 -> 127.0.0.1:9999 info:(1 0) 365 | [2157958.364875] [tcp_rcv_state_process] TCP: 0.0.0.0:9999 -> 0.0.0.0:0 info:(0 0) 366 | [2157958.364890] [tcp_rcv_state_process] TCP: 127.0.0.1:36562 -> 127.0.0.1:9999 info:(1 0) timer:(retrans, 1.000s) 367 | [2157958.364896] [tcp_ack ] TCP: 127.0.0.1:36562 -> 127.0.0.1:9999 info:(1 0) timer:(retrans, 1.000s) 368 | [2157958.364906] [__tcp_transmit_skb ] TCP: 127.0.0.1:36562 -> 127.0.0.1:9999 info:(0 0) 369 | [2157958.364917] [tcp_rcv_state_process] TCP: 127.0.0.1:9999 -> 127.0.0.1:36562 info:(0 0) 370 | [2157958.364921] [tcp_ack ] TCP: 127.0.0.1:9999 -> 127.0.0.1:36562 info:(0 0) 371 | [2157959.365240] [tcp_write_timer_handler] TCP: 127.0.0.1:36562 -> 127.0.0.1:9999 info:(0 0) 372 | ``` 373 | 374 | 其中,`info`里显示的内容分别是:报文在外数量、报文重传数量。`timer`显示的为当前套接口上的定时器和超时时间。目前,信息还在不断完善中。 375 | 376 | ### monitor模式 377 | 378 | 常规的网络定位手段,包括上面的报文跟踪、诊断等方式,由于开销过大,不适合在生产环境中 379 | 部署和常态化运行。监控模式能够提供一种更加轻量级别的网络异常、丢包监控。由于这种模式 380 | 是基于`TRACING`类型的BPF,因此其对于内核版本有较高的要求。以下是内核版本要求: 381 | 382 | | TencentOS | 开源版本 | BPF特性 | monitor | 383 | |---|---|---|---| 384 | |5.4.119-19.0009 | 5.5 | TRACING | 可用,不可监控内核模块中的函数和参数个数超过6的内核函数 | 385 | | 开发中 | 5.11 | BTF_MODULES | 可用,不可监控参数个数超过6的内核函数 | 386 | | 开发中 | 开发中 | TRACING支持6+参数 | 完全可用 | 387 | 388 | 其中,“TRACING支持6+参数”目前正在开发中,具体进展可参见: 389 | 390 | [bpf, x86: allow function arguments up to 12 for TRACING](https://lore.kernel.org/bpf/20230607125911.145345-1-imagedong@tencent.com/) 391 | 392 | 基本用法(在内核特性完全支持的情况下): 393 | 394 | ```shell 395 | $ nettrace --monitor 396 | begin trace... 397 | [25.167980] [nft_do_chain ] ICMP: 192.168.122.1 -> 192.168.122.9 ping request, seq: 1, id: 1523 *iptables table:filter, chain:INPUT* *packet is dropped by iptables/iptables-nft* 398 | [25.167996] [kfree_skb ] ICMP: 192.168.122.1 -> 192.168.122.9 ping request, seq: 1, id: 1523, reason: NETFILTER_DROP, nf_hook_slow+0xa8 399 | [25.168000] [nf_hook_slow ] ICMP: 192.168.122.1 -> 192.168.122.9 ping request, seq: 1, id: 1523 *ipv4 in chain: INPUT* *packet is dropped by netfilter (NF_DROP)* 400 | ``` 401 | 402 | 监控模式下,也可以使用普通模式的下各种参数,如报文过滤、`--detail`详情显示等。 403 | 404 | ## REQUIREMENTS 405 | 406 | 内核需要支持`CONFIG_BPF`, `CONFIG_KPROBE`, `CONFIG_DEBUG_INFO_BTF`(可选)功能 407 | 408 | ## OS 409 | 410 | Linux 411 | 412 | ## AUTHOR 413 | 414 | Menglong Dong 415 | 416 | ## SEE ALSO 417 | 418 | dropreason(8) 419 | 420 | -------------------------------------------------------------------------------- /shared/bpf/skb_macro.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file define the macro that used by BPF program. As the vmlinux 3 | * can't contain macro definition, we have to define them is this 4 | * file instead. 5 | * 6 | * NOTE: This file SHOULD be used by BPF only. 7 | */ 8 | #ifndef _H_BPF_MACRO 9 | #define _H_BPF_MACRO 10 | 11 | #define AF_INET 2 /* Internet IP Protocol */ 12 | #define AF_INET6 10 /* IP version 6 */ 13 | 14 | #define ETH_P_IP 0x0800 /* Internet Protocol packet */ 15 | #define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ 16 | #define ETH_P_ARP 0x0806 /* Address Resolution packet */ 17 | 18 | #define ETH_HLEN 14 /* Total octets in header. */ 19 | 20 | #ifndef IPPROTO_ICMPV6 21 | #define IPPROTO_ICMPV6 58 /* ICMPv6 */ 22 | #endif 23 | 24 | #define ICMPV6_ECHO_REQUEST 128 25 | #define ICMPV6_ECHO_REPLY 129 26 | 27 | #define TC_ACT_UNSPEC (-1) 28 | #define TC_ACT_OK 0 29 | #define TC_ACT_RECLASSIFY 1 30 | #define TC_ACT_SHOT 2 31 | #define TC_ACT_PIPE 3 32 | #define TC_ACT_STOLEN 4 33 | #define TC_ACT_QUEUED 5 34 | #define TC_ACT_REPEAT 6 35 | #define TC_ACT_REDIRECT 7 36 | 37 | #ifndef NULL 38 | #define NULL (void *)0 39 | #endif 40 | 41 | #ifndef ARRAY_SIZE 42 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) 43 | #endif 44 | 45 | /* redefine all the CO-RE usage if BTF not supported */ 46 | #ifdef NO_BTF 47 | #undef bpf_core_type_exists 48 | #define bpf_core_type_exists(type) false 49 | 50 | #undef bpf_core_field_exists 51 | #define bpf_core_field_exists(field...) false 52 | 53 | #undef bpf_core_enum_value_exists 54 | #define bpf_core_enum_value_exists(value) false 55 | 56 | #undef bpf_core_field_offset 57 | #define bpf_core_field_offset(type, field) offsetof(type, field) 58 | #endif 59 | 60 | #ifdef __F_NO_PROBE_READ_STR 61 | #define bpf_probe_read_str bpf_probe_read 62 | #endif 63 | 64 | #define likely(x) __builtin_expect(!!(x), 1) 65 | #define unlikely(x) __builtin_expect(!!(x), 0) 66 | 67 | #ifndef READ_ONCE 68 | #define READ_ONCE(x) (*(volatile typeof(x) *)&x) 69 | #define WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) 70 | #endif 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /shared/bpf/skb_shared.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file define the struct that we use both in BPF and use space, such 3 | * as the perf event data. 4 | */ 5 | #ifndef _H_BPF_SKB_SHARED 6 | #define _H_BPF_SKB_SHARED 7 | 8 | #define nt_take_2th(ignored, a, ...) a 9 | #define nt_take_3th(ignored, a, b, ...) b 10 | 11 | #define __nt_placehold_arg_0 0, 12 | #define __nt_placehold_arg_1 1, 13 | #define __nt_placehold_arg_2 2, 14 | #define __nt_placehold_arg_3 3, 15 | #define __nt_placehold_arg_4 4, 16 | #define __nt_placehold_arg_5 5, 17 | #define __nt_placehold_arg_6 6, 18 | #define __nt_placehold_arg_7 7, 19 | #define __nt_placehold_arg_8 8, 20 | #define __nt_placehold_arg_9 9, 21 | #define __nt_placehold_arg_10 10, 22 | #define __nt_placehold_arg_11 11, 23 | #define __nt_placehold_arg_12 12, 24 | 25 | #define ____nt_ternary_take(a, b, c) nt_take_2th(a b, c) 26 | #define __nt_ternary_take(a, b, c) \ 27 | ____nt_ternary_take(__nt_placehold_arg_##a, b, c) 28 | 29 | /* take b if a offered; else, take c */ 30 | #define nt_ternary_take(a, b, c) __nt_ternary_take(a, b, c) 31 | 32 | #define ___macro_to_str(m) #m 33 | #define __macro_to_str(m) ___macro_to_str(m) 34 | #define macro_to_str(m) __macro_to_str(m) 35 | 36 | #define ICSK_TIME_RETRANS 1 37 | #define ICSK_TIME_DACK 2 38 | #define ICSK_TIME_PROBE0 3 39 | #define ICSK_TIME_EARLY_RETRANS 4 40 | #define ICSK_TIME_LOSS_PROBE 5 41 | #define ICSK_TIME_REO_TIMEOUT 6 42 | 43 | /* Codes for EXT_ECHO (PROBE) */ 44 | #ifndef ICMPV6_EXT_ECHO_REQUEST 45 | #define ICMPV6_EXT_ECHO_REQUEST 160 46 | #endif 47 | #ifndef ICMPV6_EXT_ECHO_REPLY 48 | #define ICMPV6_EXT_ECHO_REPLY 161 49 | #endif 50 | 51 | #define be16 u16 52 | #define be32 u32 53 | 54 | #define ETH_ALEN 6 55 | #define ARPOP_REQUEST 1 56 | #define ARPOP_REPLY 2 57 | 58 | 59 | typedef struct { 60 | u16 sport; 61 | u16 dport; 62 | } l4_min_t; 63 | 64 | typedef struct { 65 | u64 ts; 66 | union { 67 | struct { 68 | u32 saddr; 69 | u32 daddr; 70 | } ipv4; 71 | #ifndef NT_DISABLE_IPV6 72 | struct { 73 | u8 saddr[16]; 74 | u8 daddr[16]; 75 | } ipv6; 76 | #endif 77 | } l3; 78 | union { 79 | struct { 80 | be16 sport; 81 | be16 dport; 82 | u32 seq; 83 | u32 ack; 84 | u8 flags; 85 | } tcp; 86 | struct { 87 | be16 sport; 88 | be16 dport; 89 | } udp; 90 | l4_min_t min; 91 | struct { 92 | u8 type; 93 | u8 code; 94 | u16 seq; 95 | u16 id; 96 | } icmp; 97 | struct { 98 | u16 op; 99 | u8 source[ETH_ALEN]; 100 | u8 dest[ETH_ALEN]; 101 | } arp_ext; 102 | struct 103 | { 104 | u32 spi; 105 | u32 seq; 106 | } espheader; 107 | #define field_udp l4.udp 108 | } l4; 109 | u16 proto_l3; 110 | u8 proto_l4; 111 | u8 pad; 112 | } packet_t; 113 | 114 | typedef struct { 115 | u64 ts; 116 | union { 117 | struct { 118 | u32 saddr; 119 | u32 daddr; 120 | } ipv4; 121 | #if 0 122 | struct { 123 | u8 saddr[16]; 124 | u8 daddr[16]; 125 | } ipv6; 126 | #endif 127 | } l3; 128 | union { 129 | struct { 130 | be16 sport; 131 | be16 dport; 132 | u32 packets_out; 133 | u32 retrans_out; 134 | u32 snd_una; 135 | } tcp; 136 | struct { 137 | be16 sport; 138 | be16 dport; 139 | } udp; 140 | l4_min_t min; 141 | } l4; 142 | u32 timer_out; 143 | u32 wqlen; 144 | u32 rqlen; 145 | u16 proto_l3; 146 | u8 proto_l4; 147 | u8 timer_pending; 148 | u8 state; 149 | u8 ca_state; 150 | } sock_t; 151 | 152 | #define TCP_FLAGS_ACK (1 << 4) 153 | #define TCP_FLAGS_PSH (1 << 3) 154 | #define TCP_FLAGS_RST (1 << 2) 155 | #define TCP_FLAGS_SYN (1 << 1) 156 | #define TCP_FLAGS_FIN (1 << 0) 157 | 158 | #define DEFINE_FIELD_STD(type, name) \ 159 | type name; \ 160 | bool enable_##name; 161 | #define DEFINE_FIELD_ARRAY(type, name, size) \ 162 | type name[size]; \ 163 | bool enable_##name; 164 | #define DEFINE_FIELD(type, name, args...) \ 165 | nt_take_3th(dummy, ##args, DEFINE_FIELD_ARRAY, \ 166 | DEFINE_FIELD_STD)(type, name, ##args) 167 | 168 | /* used for packet filter condition */ 169 | typedef struct { 170 | u32 saddr; 171 | u32 daddr; 172 | u32 addr; 173 | u32 pkt_len_1; 174 | u32 pkt_len_2; 175 | u32 pad0; 176 | u32 saddr_v6[4]; 177 | u32 daddr_v6[4]; 178 | u32 addr_v6[4]; 179 | u16 sport; 180 | u16 dport; 181 | u16 port; 182 | u16 l3_proto; 183 | u8 l4_proto; 184 | u8 tcp_flags; 185 | u8 saddr_v6_enable:1, 186 | daddr_v6_enable:1, 187 | addr_v6_enable:1; 188 | 189 | #ifdef BPF_DEBUG 190 | bool bpf_debug; 191 | #endif 192 | } pkt_args_t; 193 | 194 | #define args_check(args, attr, value) (args->attr && args->attr != value) 195 | 196 | #define CONFIG_MAP_SIZE 1024 197 | 198 | #ifndef PERF_MAX_STACK_DEPTH 199 | #define PERF_MAX_STACK_DEPTH 127 200 | #endif 201 | typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; 202 | 203 | #define BPF_LOCAL_FUNC_MAPPER(FN, args...) \ 204 | FN(jiffies64, ##args) \ 205 | FN(get_func_ret, ##args) 206 | 207 | #define FN(name) BPF_LOCAL_FUNC_##name, 208 | enum { 209 | BPF_LOCAL_FUNC_MAPPER(FN) 210 | BPF_LOCAL_FUNC_MAX, 211 | }; 212 | #undef FN 213 | 214 | #endif 215 | -------------------------------------------------------------------------------- /shared/bpf/vmlinux.h: -------------------------------------------------------------------------------- 1 | #if defined(__TARGET_ARCH_x86) 2 | #include "vmlinux_x86.h" 3 | #elif defined(__TARGET_ARCH_arm64) 4 | #include "vmlinux_arm64.h" 5 | #elif defined(__TARGET_ARCH_loongarch) 6 | #include "vmlinux_loongarch64.h" 7 | #endif 8 | -------------------------------------------------------------------------------- /shared/bpf_utils.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "bpf_utils.h" 17 | 18 | int compat_bpf_attach_kprobe(int fd, char *name, bool ret) 19 | { 20 | struct perf_event_attr attr = {}; 21 | char buf[1024], target[128]; 22 | int id, err, i = 0; 23 | 24 | attr.type = PERF_TYPE_TRACEPOINT; 25 | attr.sample_type = PERF_SAMPLE_RAW; 26 | attr.sample_period = 1; 27 | attr.wakeup_events = 1; 28 | 29 | sprintf(target, "%s%s", ret ? "ret_" : "", name); 30 | 31 | /* replace '.' with '_' in the event name, as it don't support 32 | * '.' in the kprobe event name. 33 | */ 34 | while (target[i] != '\0') { 35 | if (target[i] == '.') 36 | target[i] = '_'; 37 | i++; 38 | } 39 | 40 | sprintf(buf, "/sys/kernel/debug/tracing/events/kprobes/%s/id", 41 | target); 42 | 43 | if (file_exist(buf)) 44 | goto exist; 45 | 46 | sprintf(buf, "(echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events) 2>&1", 47 | ret ? 'r' : 'p', target, name); 48 | if (simple_exec(buf)) { 49 | pr_warn("failed to create kprobe: %s\n", target); 50 | return -1; 51 | } 52 | sprintf(buf, "/sys/kernel/debug/tracing/events/kprobes/%s/id", 53 | target); 54 | exist:; 55 | int efd = open(buf, O_RDONLY, 0); 56 | if (efd < 0) { 57 | pr_warn("failed to open event %s\n", name); 58 | return -1; 59 | } 60 | 61 | err = read(efd, buf, sizeof(buf)); 62 | if (err < 0 || err >= sizeof(buf)) { 63 | pr_warn("read from '%s' failed '%s'\n", target, strerror(errno)); 64 | return -1; 65 | } 66 | 67 | close(efd); 68 | 69 | buf[err] = 0; 70 | id = atoi(buf); 71 | attr.config = id; 72 | 73 | efd = syscall(SYS_perf_event_open, &attr, -1, 0, -1, 0); 74 | if (efd < 0) { 75 | pr_warn("event %d fd %d err %s\n", id, efd, strerror(errno)); 76 | return -1; 77 | } 78 | ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); 79 | ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); 80 | 81 | return 0; 82 | } 83 | 84 | static struct btf *local_btf; 85 | const struct btf_type *btf_get_type(char *name) 86 | { 87 | const struct btf_type *t; 88 | int id; 89 | 90 | if (!local_btf) 91 | local_btf= btf__load_vmlinux_btf(); 92 | 93 | id = btf__find_by_name(local_btf, name); 94 | if (id < 0) 95 | return NULL; 96 | 97 | t = btf__type_by_id(local_btf, id); 98 | return t; 99 | } 100 | 101 | int btf_get_arg_count(char *name) 102 | { 103 | const struct btf_type *t; 104 | 105 | t = btf_get_type(name); 106 | if (!t) 107 | return -ENOENT; 108 | 109 | t = btf__type_by_id(local_btf, t->type); 110 | if (!t) 111 | return -ENOENT; 112 | 113 | return btf_vlen(t); 114 | } 115 | -------------------------------------------------------------------------------- /shared/bpf_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _H_BPF_UTILS 2 | #define _H_BPF_UTILS 3 | 4 | #include 5 | #include 6 | 7 | #include "bpf/skb_shared.h" 8 | 9 | #define BPF_PROG_FD(name) (bpf_program__fd(obj->progs.name)) 10 | #define BPF_MAP_FD(name) (bpf_map__fd(obj->maps.name)) 11 | 12 | 13 | extern long int syscall (long int __sysno, ...); 14 | 15 | #define bpf_set_config(skel, sec, value) do { \ 16 | int fd = bpf_map__fd(skel->maps.m_config); \ 17 | u8 buf[CONFIG_MAP_SIZE] = {}; \ 18 | int key = 0; \ 19 | \ 20 | if (fd < 0) { \ 21 | pr_err("failed to get config map: %d\n",\ 22 | fd); \ 23 | break; \ 24 | } \ 25 | \ 26 | memcpy(buf, &value, sizeof(value)); \ 27 | bpf_map_update_elem(fd, &key, buf, 0); \ 28 | } while (0) 29 | 30 | #define bpf_set_config_field(skel, sec, type, name, value) do { \ 31 | int fd = bpf_map__fd(skel->maps.m_config); \ 32 | u8 buf[CONFIG_MAP_SIZE] = {}; \ 33 | type *args = (void *)buf; \ 34 | int key = 0; \ 35 | \ 36 | if (fd < 0) { \ 37 | pr_err("failed to get config map: %d\n",\ 38 | fd); \ 39 | break; \ 40 | } \ 41 | \ 42 | bpf_map_lookup_elem(fd, &key, args); \ 43 | args->name = value; \ 44 | bpf_map_update_elem(fd, &key, args, 0); \ 45 | } while (0) 46 | 47 | int compat_bpf_attach_kprobe(int fd, char *name, bool ret); 48 | const struct btf_type *btf_get_type(char *name); 49 | int btf_get_arg_count(char *name); 50 | 51 | #ifndef BPF_NO_GLOBAL_DATA 52 | #undef BPF_FUNC_CHECK 53 | #define BPF_FUNC_CHECK(name, data, type) \ 54 | data[BPF_LOCAL_FUNC_##name] = libbpf_probe_bpf_helper(type, \ 55 | BPF_FUNC_##name, NULL) == 1; 56 | 57 | #define bpf_func_init(skel, type) \ 58 | BPF_LOCAL_FUNC_MAPPER(BPF_FUNC_CHECK, skel->rodata->bpf_func_exist, type) 59 | #else 60 | #define bpf_func_init(data, type) 61 | #endif 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /shared/pkt_utils.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #define _LINUX_IN_H 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "pkt_utils.h" 15 | 16 | static time_t time_offset; 17 | static struct tm *convert_ts_to_date(u64 ts) 18 | { 19 | time_t tmp; 20 | 21 | if (!time_offset) { 22 | struct sysinfo s_info; 23 | sysinfo(&s_info); 24 | 25 | time(&time_offset); 26 | time_offset -= s_info.uptime; 27 | } 28 | 29 | tmp = time_offset + (ts / 1000000000); 30 | return localtime(&tmp); 31 | } 32 | 33 | int ts_print_ts(char *buf, u64 ts, bool date_format) 34 | { 35 | struct tm *p; 36 | 37 | if (date_format) { 38 | p = convert_ts_to_date(ts); 39 | return sprintf(buf, "[%d-%d-%d %02d:%02d:%02d.%06lld] ", 1900 + p->tm_year, 40 | 1 + p->tm_mon, p->tm_mday, p->tm_hour, p->tm_min, 41 | p->tm_sec, ts % 1000000000 / 1000); 42 | } else { 43 | return sprintf(buf, "[%llu.%06llu] ", ts / 1000000000, 44 | ts % 1000000000 / 1000); 45 | } 46 | } 47 | 48 | static void ntomac(u8 mac[], char *dst) 49 | { 50 | for (int i = 0; i < 6; i++) { 51 | sprintf(dst + (i * 3), "%02X", mac[i]); 52 | if (i < 5) 53 | dst[(i * 3) + 2] = ':'; 54 | } 55 | } 56 | 57 | void ts_print_packet(char *buf, packet_t *pkt, char *minfo, 58 | bool date_format) 59 | { 60 | static char saddr[MAX_ADDR_LENGTH], daddr[MAX_ADDR_LENGTH]; 61 | char *l4_desc; 62 | u8 flags, l4; 63 | int pos; 64 | 65 | pos = ts_print_ts(buf, pkt->ts, date_format); 66 | if (minfo) 67 | BUF_FMT("%s", minfo); 68 | 69 | if (!pkt->proto_l3) { 70 | BUF_FMT("unknow"); 71 | return; 72 | } 73 | 74 | switch (pkt->proto_l3) { 75 | case ETH_P_ARP: 76 | case ETH_P_IP: 77 | inet_ntop(AF_INET, (void *)&pkt->l3.ipv4.saddr, saddr, 78 | sizeof(saddr)); 79 | inet_ntop(AF_INET, (void *)&pkt->l3.ipv4.daddr, daddr, 80 | sizeof(daddr)); 81 | 82 | if (pkt->proto_l3 == ETH_P_IP) 83 | break; 84 | 85 | if (pkt->l4.arp_ext.op == ARPOP_REPLY) { 86 | static char mac[MAX_ADDR_LENGTH]; 87 | 88 | ntomac(pkt->l4.arp_ext.source, mac); 89 | BUF_FMT("ARP: %s is at %s", saddr, mac); 90 | } else { 91 | BUF_FMT("ARP: who has %s, tell %s", daddr, saddr); 92 | } 93 | return; 94 | #ifndef NT_DISABLE_IPV6 95 | case ETH_P_IPV6: 96 | inet_ntop(AF_INET6, (void *)pkt->l3.ipv6.saddr, saddr, 97 | sizeof(saddr)); 98 | inet_ntop(AF_INET6, (void *)pkt->l3.ipv6.daddr, daddr, 99 | sizeof(daddr)); 100 | break; 101 | #endif 102 | default: 103 | BUF_FMT("ether protocol: 0x%04x", pkt->proto_l3); 104 | return; 105 | } 106 | 107 | l4 = pkt->proto_l4; 108 | l4_desc = i2l4(l4); 109 | if (l4_desc) 110 | BUF_FMT("%s: ", l4_desc); 111 | else 112 | BUF_FMT("%d: ", l4); 113 | 114 | switch (l4) { 115 | case IPPROTO_IP: 116 | case IPPROTO_TCP: 117 | case IPPROTO_UDP: 118 | BUF_FMT("%s:%d -> %s:%d", 119 | saddr, ntohs(pkt->l4.min.sport), 120 | daddr, ntohs(pkt->l4.min.dport)); 121 | break; 122 | case IPPROTO_ICMPV6: 123 | case IPPROTO_ICMP: 124 | case IPPROTO_ESP: 125 | BUF_FMT("%s -> %s", saddr, daddr); 126 | break; 127 | default: 128 | BUF_FMT("%s -> %s", saddr, daddr); 129 | return; 130 | } 131 | 132 | switch (l4) { 133 | case IPPROTO_IP: 134 | case IPPROTO_TCP: 135 | flags = pkt->l4.tcp.flags; 136 | #define CONVERT_FLAG(mask, name) ((flags & mask) ? name : "") 137 | BUF_FMT(" seq:%u, ack:%u, flags:%s%s%s%s%s", 138 | pkt->l4.tcp.seq, 139 | pkt->l4.tcp.ack, 140 | CONVERT_FLAG(TCP_FLAGS_SYN, "S"), 141 | CONVERT_FLAG(TCP_FLAGS_ACK, "A"), 142 | CONVERT_FLAG(TCP_FLAGS_RST, "R"), 143 | CONVERT_FLAG(TCP_FLAGS_PSH, "P"), 144 | CONVERT_FLAG(TCP_FLAGS_FIN, "F")); 145 | break; 146 | case IPPROTO_ICMPV6: 147 | case IPPROTO_ICMP: 148 | switch (pkt->l4.icmp.type) { 149 | default: 150 | BUF_FMT(" type: %u, code: %u, ", pkt->l4.icmp.type, 151 | pkt->l4.icmp.code); 152 | break; 153 | case ICMPV6_ECHO_REQUEST: 154 | case ICMP_ECHO: 155 | BUF_FMT(" ping request, "); 156 | break; 157 | case ICMPV6_EXT_ECHO_REQUEST: 158 | BUF_FMT(" ping request(ext), "); 159 | break; 160 | case ICMPV6_ECHO_REPLY: 161 | case ICMP_ECHOREPLY: 162 | BUF_FMT(" ping reply, "); 163 | break; 164 | case ICMPV6_EXT_ECHO_REPLY: 165 | BUF_FMT(" ping reply(ext), "); 166 | break; 167 | } 168 | BUF_FMT("seq: %u, id: %u", ntohs(pkt->l4.icmp.seq), 169 | ntohs(pkt->l4.icmp.id)); 170 | break; 171 | case IPPROTO_ESP: 172 | BUF_FMT(" spi:0x%x seq:0x%x", ntohl(pkt->l4.espheader.spi), 173 | ntohl(pkt->l4.espheader.seq)); 174 | break; 175 | default: 176 | break; 177 | } 178 | } 179 | 180 | static const char *timer_name[] = { 181 | [ICSK_TIME_RETRANS] = "retrans", 182 | [ICSK_TIME_DACK] = "dack", 183 | [ICSK_TIME_PROBE0] = "probe0", 184 | [ICSK_TIME_EARLY_RETRANS] = "early_retrans", 185 | [ICSK_TIME_LOSS_PROBE] = "loss_probe", 186 | [ICSK_TIME_REO_TIMEOUT] = "reo_timeout", 187 | }; 188 | static const char *state_name[] = { 189 | [0] = "UNKNOW", 190 | [TCP_ESTABLISHED] = "ESTABLISHED", 191 | [TCP_SYN_SENT] = "SYN_SENT", 192 | [TCP_SYN_RECV] = "SYN_RECV", 193 | [TCP_FIN_WAIT1] = "FIN_WAIT1", 194 | [TCP_FIN_WAIT2] = "FIN_WAIT2", 195 | [TCP_TIME_WAIT] = "TIME_WAIT", 196 | [TCP_CLOSE] = "CLOSE", 197 | [TCP_CLOSE_WAIT] = "CLOSE_WAIT", 198 | [TCP_LAST_ACK] = "LAST_ACK", 199 | [TCP_LISTEN] = "LISTEN", 200 | [TCP_CLOSING] = "CLOSING", 201 | }; 202 | static const char *ca_name[] = { 203 | [TCP_CA_Open] = "CA_Open", 204 | [TCP_CA_Disorder] = "CA_Disorder", 205 | [TCP_CA_CWR] = "CA_CWR", 206 | [TCP_CA_Recovery] = "CA_Recovery", 207 | [TCP_CA_Loss] = "CA_Loss", 208 | }; 209 | 210 | typedef struct { 211 | u8 icsk_ca_state:5, 212 | icsk_ca_initialized:1, 213 | icsk_ca_setsockopt:1, 214 | icsk_ca_dst_locked:1; 215 | 216 | } tcp_ca_data_t; 217 | 218 | void ts_print_sock(char *buf, sock_t *ske, char *minfo, bool date_format) 219 | { 220 | static char saddr[MAX_ADDR_LENGTH], daddr[MAX_ADDR_LENGTH]; 221 | u64 ts = ske->ts; 222 | int pos = 0, hz; 223 | struct tm *p; 224 | u8 l4; 225 | 226 | if (date_format) { 227 | p = convert_ts_to_date(ts); 228 | BUF_FMT("[%d-%d-%d %02d:%02d:%02d.%06lld] ", 1900 + p->tm_year, 229 | 1 + p->tm_mon, p->tm_mday, p->tm_hour, p->tm_min, 230 | p->tm_sec, ts % 1000000000 / 1000); 231 | } else { 232 | BUF_FMT("[%llu.%06llu] ", ts / 1000000000, 233 | ts % 1000000000 / 1000); 234 | } 235 | 236 | if (minfo) 237 | BUF_FMT("%s", minfo); 238 | 239 | if (!ske->proto_l3) { 240 | BUF_FMT("unknow"); 241 | return; 242 | } 243 | 244 | switch (ske->proto_l3) { 245 | case ETH_P_IP: 246 | inet_ntop(AF_INET, (void *)&ske->l3.ipv4.saddr, saddr, 247 | sizeof(saddr)); 248 | inet_ntop(AF_INET, (void *)&ske->l3.ipv4.daddr, daddr, 249 | sizeof(daddr)); 250 | break; 251 | case ETH_P_IPV6: 252 | sprintf(saddr, "ipv6"); 253 | sprintf(daddr, "ipv6"); 254 | break; 255 | #if 0 256 | case ETH_P_IPV6: 257 | inet_ntop(AF_INET6, (void *)ske->l3.ipv6.saddr, saddr, 258 | sizeof(saddr)); 259 | inet_ntop(AF_INET6, (void *)ske->l3.ipv6.daddr, daddr, 260 | sizeof(daddr)); 261 | goto print_l4; 262 | #endif 263 | default: 264 | BUF_FMT("ether protocol: %u", ske->proto_l3); 265 | return; 266 | } 267 | 268 | l4 = ske->proto_l4; 269 | BUF_FMT("%s: ", i2l4(l4)); 270 | switch (l4) { 271 | case IPPROTO_TCP: 272 | case IPPROTO_UDP: 273 | BUF_FMT("%s:%d -> %s:%d", 274 | saddr, ntohs(ske->l4.min.sport), 275 | daddr, ntohs(ske->l4.min.dport)); 276 | break; 277 | default: 278 | BUF_FMT("%s -> %s", saddr, daddr); 279 | return; 280 | } 281 | 282 | switch (l4) { 283 | case IPPROTO_TCP: { 284 | tcp_ca_data_t *ca_state = (void *)&ske->ca_state; 285 | BUF_FMT(" %s %s out:(p%u r%u) unack:%u", state_name[ske->state], 286 | ca_name[ca_state->icsk_ca_state], 287 | ske->l4.tcp.packets_out, 288 | ske->l4.tcp.retrans_out, 289 | ske->l4.tcp.snd_una); 290 | } 291 | case IPPROTO_UDP: 292 | hz = kernel_hz(); 293 | hz = hz > 0 ? hz : 1; 294 | BUF_FMT(" mem:(w%u r%u)", ske->wqlen, ske->rqlen); 295 | if (ske->timer_pending) 296 | BUF_FMT(" timer:(%s, %u.%03us)", 297 | timer_name[ske->timer_pending], 298 | ske->timer_out / hz, 299 | ((ske->timer_out * 1000) / hz) % 1000); 300 | break; 301 | default: 302 | break; 303 | } 304 | } 305 | -------------------------------------------------------------------------------- /shared/pkt_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _H_PKT_UTILS 2 | #define _H_PKT_UTILS 3 | 4 | #include 5 | #include 6 | 7 | #define MAX_ADDR_LENGTH 48 8 | #define PARAM_SET(name, value) \ 9 | obj->rodata->enable_##name = true; \ 10 | obj->rodata->arg_##name = value 11 | 12 | #define BUF_FMT_INIT(fmt, args...) \ 13 | do { \ 14 | pos = sprintf(buf, fmt, ##args); \ 15 | } while (0) 16 | 17 | #define BUF_FMT(fmt, args...) pos += sprintf(buf + pos, fmt, ##args) 18 | 19 | void ts_print_packet(char *buf, packet_t *pkt, char *minfo, 20 | bool date_format); 21 | void ts_print_sock(char *buf, sock_t *ske, char *minfo, bool date_format); 22 | int ts_print_ts(char *buf, u64 ts, bool date_format); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | trace_group.c 2 | kprobe_trace.h 3 | nettrace 4 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | ROOT ?= $(abspath ../) 2 | bpf_progs := progs/kprobe 3 | progs := nettrace 4 | prog-nettrace-origin = \ 5 | trace.c $(COMMON_SHARED) trace_probe.c trace_tracing.c \ 6 | analysis.c $(COMPONENT)/parse_sym.c trace_group.c \ 7 | dropreason.c rstreason.c 8 | prog-nettrace = $(prog-nettrace-origin) nettrace.c 9 | 10 | ifdef COMPAT 11 | NO_GLOBAL_DATA := 1 12 | NO_BTF := 1 13 | INLINE := 1 14 | endif 15 | 16 | ifndef NO_BTF 17 | bpf_progs += progs/tracing progs/feat_args_ext 18 | endif 19 | 20 | BPF_EXTRA_DEP := kheaders.h progs/core.c progs/core.h 21 | include ../common.mk 22 | 23 | cmd_kversion := awk -F '=' 'NR>10{exit;} \ 24 | $$1~/^VERSION/{ \ 25 | sub(/ /, "", $$2); a=$$2; \ 26 | } \ 27 | $$1~/^PATCHLEVEL/{ \ 28 | sub(/ /, "", $$2); b=$$2 \ 29 | } \ 30 | $$1~/^SUBLEVEL/{ \ 31 | sub(/ /, "", $$2); c=$$2 \ 32 | } \ 33 | END{printf "%s.%s.%s\n", a, b, c}' \ 34 | $(HEADERS)/Makefile 35 | 36 | str_count = $(shell grep -c $2 $(HEADERS)/$1 2>/dev/null) 37 | struct_field = $(shell awk 'BEGIN{start=0}/struct $2 {/{start=1} \ 38 | /$3/{if (start==1) print 1} \ 39 | /}/{start=0}' $(HEADERS)/$1 2>/dev/null) 40 | 41 | ifdef NO_BTF 42 | ifeq ($(call str_count,include/uapi/linux/bpf.h,BPF_PROG_TYPE_TRACEPOINT),0) 43 | $(error BPF_PROG_TYPE_TRACEPOINT not supported by your kernel!) 44 | endif 45 | 46 | ifneq ($(call str_count,include/uapi/linux/bpf.h,get_stackid),0) 47 | CFLAGS += -D__F_STACK_TRACE 48 | endif 49 | 50 | ifneq ($(call str_count,include/net/sock.h,sk_protocol.*8),0) 51 | BPF_CFLAGS += -D__F_SK_PRPTOCOL_LEGACY 52 | endif 53 | 54 | ifeq ($(call str_count,include/net/sock.h,__sk_flags_offset),0) 55 | BPF_CFLAGS += -D__F_NO_SK_FLAGS_OFFSET 56 | endif 57 | 58 | ifeq ($(call str_count,include/linux/netfilter.h,nf_hook_entries),0) 59 | BPF_CFLAGS += -D__F_NO_NF_HOOK_ENTRIES 60 | endif 61 | 62 | ifeq ($(call str_count,include/uapi/linux/bpf.h,bpf_probe_read_str),0) 63 | BPF_CFLAGS += -D__F_NO_PROBE_READ_STR 64 | endif 65 | 66 | ifeq ($(call struct_field,include/net/netfilter/nf_tables.h,nft_chain,\*name),) 67 | CFLAGS += -D__F_NFT_NAME_ARRAY 68 | endif 69 | 70 | __KERN_VER := $(shell $(cmd_kversion)) 71 | CFLAGS += -D__KERN_VER=$(__KERN_VER) 72 | CFLAGS += -D__KERN_MAJOR=$(shell echo $(__KERN_VER) | cut -c 1) 73 | else 74 | CFLAGS += -D__F_STACK_TRACE 75 | endif 76 | 77 | progs/kprobe_trace.h: 78 | python3 gen_trace.py probe > progs/kprobe_trace.h 79 | 80 | trace_group.c: trace.yaml 81 | python3 gen_trace.py > trace_group.c 82 | 83 | progs/*.c: progs/kprobe_trace.h 84 | @: 85 | 86 | nettrace.c: $(prog-nettrace-origin) 87 | 88 | all: $(progs) 89 | 90 | install: all 91 | @mkdir -p ${PREFIX}/usr/bin/ 92 | @cp nettrace ${PREFIX}/usr/bin/ 93 | 94 | pack: all 95 | @cp nettrace ${PREFIX}/ 96 | 97 | clean: 98 | rm -rf $(progs) trace_group.c progs/kprobe_trace.h \ 99 | $(bpf_progs) progs/*.o progs/*.skel.h \ 100 | kheaders.h 101 | -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | # nettrace 2 | 3 | 基于eBPF的内核报文跟踪和诊断工具 4 | -------------------------------------------------------------------------------- /src/analysis.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _H_ANALYSIS 4 | #define _H_ANALYSIS 5 | 6 | #include "progs/shared.h" 7 | #include "trace.h" 8 | 9 | enum rule_level { 10 | RULE_INFO, 11 | RULE_WARN, 12 | RULE_ERROR, 13 | }; 14 | 15 | typedef struct { 16 | enum rule_level level; 17 | enum rule_type type; 18 | char *msg; 19 | char *adv; 20 | struct list_head list; 21 | 22 | union { 23 | int expected; 24 | struct { 25 | int min; 26 | int max; 27 | } range; 28 | }; 29 | } rule_t; 30 | 31 | #define ANALY_CTX_ERROR (1 << 0) 32 | #define ANALY_CTX_WARN (1 << 1) 33 | 34 | typedef struct { 35 | struct list_head entries; 36 | struct list_head fakes; 37 | u16 refs; 38 | u16 status; 39 | } analy_ctx_t; 40 | 41 | typedef struct fake_analy_ctx { 42 | analy_ctx_t *ctx; 43 | struct hlist_node hash; 44 | struct list_head list; 45 | u32 key; 46 | u16 refs; 47 | } fake_analy_ctx_t; 48 | 49 | typedef struct { 50 | /* packet that belongs to the same context */ 51 | struct list_head list; 52 | analy_ctx_t *ctx; 53 | fake_analy_ctx_t *fake_ctx; 54 | event_t *event; 55 | /* the first rule matched */ 56 | rule_t *rule; 57 | /* info used in analysis entry log */ 58 | char *msg; 59 | /* info used in analysis context result */ 60 | char *extinfo; 61 | u64 priv; 62 | u32 status; 63 | u16 cpu; 64 | /* this list is used for kretprobe based program */ 65 | struct list_head cpu_list; 66 | } analy_entry_t; 67 | 68 | typedef struct { 69 | retevent_t event; 70 | analy_entry_t *entry; 71 | u64 key; 72 | u16 cpu; 73 | } analy_exit_t; 74 | 75 | typedef struct { 76 | struct list_head list; 77 | u16 size; 78 | u16 cpu; 79 | u8 data[0]; 80 | } data_list_t; 81 | 82 | typedef enum analyzer_result { 83 | RESULT_CONT = 0, 84 | RESULT_CONSUME, 85 | } analyzer_result_t; 86 | 87 | typedef struct analyzer { 88 | analyzer_result_t (*analy_entry)(trace_t *trace, analy_entry_t *e); 89 | analyzer_result_t (*analy_exit)(trace_t *trace, analy_exit_t *e); 90 | char *name; 91 | u32 mode; 92 | } analyzer_t; 93 | 94 | #define ANALY_ENTRY_RETURNED (1 << 0) 95 | #define ANALY_ENTRY_EXTINFO (1 << 1) 96 | #define ANALY_ENTRY_MSG (1 << 2) 97 | #define ANALY_ENTRY_ONCPU (1 << 3) 98 | #define ANALY_ENTRY_DLIST (1 << 4) 99 | 100 | #define ANALYZER(name) analyzer_##name 101 | #define DEFINE_ANALYZER_PART(name, type, mode_mask) \ 102 | analyzer_result_t analyzer_##name##_exit(trace_t *trace, \ 103 | analy_exit_t *e) __attribute__((weak)); \ 104 | analyzer_result_t analyzer_##name##_entry(trace_t *trace, \ 105 | analy_entry_t *e) __attribute__((weak)); \ 106 | analyzer_t ANALYZER(name) = { \ 107 | .analy_entry = analyzer_##name##_entry, \ 108 | .analy_exit = analyzer_##name##_exit, \ 109 | .mode = mode_mask, \ 110 | }; \ 111 | analyzer_result_t analyzer_##name##_##type(trace_t *trace, \ 112 | analy_##type##_t *e) 113 | #define DEFINE_ANALYZER_ENTRY(name, mode) \ 114 | DEFINE_ANALYZER_PART(name, entry, mode) 115 | #define DEFINE_ANALYZER_EXIT(name, mode) \ 116 | DEFINE_ANALYZER_PART(name, exit, mode) 117 | #define DEFINE_ANALYZER_EXIT_FUNC(name) \ 118 | analyzer_result_t analyzer_##name##_exit(trace_t *trace, \ 119 | analy_exit_t *e) 120 | 121 | #define DEFINE_ANALYZER_EXIT_FUNC_DEFAULT(name) \ 122 | DEFINE_ANALYZER_EXIT_FUNC(name) \ 123 | { \ 124 | rule_run_ret(e->entry, trace, e->event.val); \ 125 | return RESULT_CONT; \ 126 | } 127 | 128 | #define DECLARE_ANALYZER(name) extern analyzer_t ANALYZER(name) 129 | #define IS_ANALYZER(target, name) (target == &(ANALYZER(name))) 130 | 131 | DECLARE_ANALYZER(drop); 132 | DECLARE_ANALYZER(free); 133 | DECLARE_ANALYZER(clone); 134 | DECLARE_ANALYZER(ret); 135 | DECLARE_ANALYZER(iptable); 136 | DECLARE_ANALYZER(nf); 137 | DECLARE_ANALYZER(qdisc); 138 | DECLARE_ANALYZER(rtt); 139 | DECLARE_ANALYZER(reset); 140 | DECLARE_ANALYZER(default); 141 | 142 | #define define_pure_event(type, name, data) \ 143 | pure_##type *name = \ 144 | (!trace_ctx.detail ? (void *)(data) + \ 145 | offsetof(type, __event_filed) : \ 146 | (void *)(data) + \ 147 | offsetof(detail_##type, __event_filed)) 148 | 149 | void ctx_poll_handler(void *raw_ctx, int cpu, void *data, u32 size); 150 | void basic_poll_handler(void *ctx, int cpu, void *data, u32 size); 151 | void async_poll_handler(void *ctx, int cpu, void *data, u32 size); 152 | void latency_poll_handler(void *ctx, int cpu, void *data, u32 size); 153 | 154 | int stats_poll_handler(); 155 | int func_stats_poll_handler(); 156 | 157 | static inline trace_t *get_trace_from_analy_entry(analy_entry_t *e) 158 | { 159 | return get_trace(e->event->func); 160 | } 161 | 162 | static inline trace_t *get_trace_from_analy_exit(analy_exit_t *e) 163 | { 164 | return get_trace(e->event.func); 165 | } 166 | 167 | static inline void get_analy_ctx(analy_ctx_t *ctx) 168 | { 169 | ctx->refs++; 170 | } 171 | 172 | static inline void put_analy_ctx(analy_ctx_t *ctx) 173 | { 174 | ctx->refs--; 175 | } 176 | 177 | static inline u32 get_entry_dela_us(analy_entry_t *n, analy_entry_t *o) 178 | { 179 | if (n == o) 180 | return 0; 181 | 182 | return (n->event->pkt.ts - o->event->pkt.ts) / 1000; 183 | } 184 | 185 | static inline u32 get_lifetime_us(analy_ctx_t *ctx, bool skip_last) 186 | { 187 | analy_entry_t *first, *last; 188 | trace_t *t; 189 | 190 | first = list_first_entry(&ctx->entries, analy_entry_t, list); 191 | last = list_last_entry(&ctx->entries, analy_entry_t, list); 192 | 193 | t = get_trace_from_analy_entry(last); 194 | if (skip_last && !(t->status & TRACE_CFREE)) { 195 | if (first == last) 196 | return 0; 197 | last = list_prev_entry(last, list); 198 | } 199 | 200 | return get_entry_dela_us(last, first); 201 | } 202 | 203 | static inline u32 get_lifetime_ms(analy_ctx_t *ctx, bool skip_last) 204 | { 205 | return get_lifetime_us(ctx, skip_last) / 1000; 206 | } 207 | 208 | static inline void get_fake_analy_ctx(fake_analy_ctx_t *ctx) 209 | { 210 | /* the case of new created fake_ctx */ 211 | if (!ctx->refs) 212 | get_analy_ctx(ctx->ctx); 213 | ctx->refs++; 214 | } 215 | 216 | static inline void put_fake_analy_ctx(fake_analy_ctx_t *ctx) 217 | { 218 | ctx->refs--; 219 | if (ctx->refs <= 0) 220 | put_analy_ctx(ctx->ctx); 221 | } 222 | 223 | static inline void entry_set_extinfo(analy_entry_t *e, char *info) 224 | { 225 | e->extinfo = info; 226 | e->status |= ANALY_ENTRY_EXTINFO; 227 | } 228 | 229 | static inline void entry_set_msg(analy_entry_t *e, char *info) 230 | { 231 | e->msg = info; 232 | e->status |= ANALY_ENTRY_MSG; 233 | } 234 | 235 | static inline analy_entry_t *analy_entry_alloc(void *data, u32 size) 236 | { 237 | analy_entry_t *entry = calloc(1, sizeof(*entry)); 238 | int copy_size = size; 239 | void *event; 240 | 241 | if (!entry) 242 | return NULL; 243 | 244 | if (size > MAX_EVENT_SIZE + 8) { 245 | pr_err("trace data is too big! size: %u, max: %lu\n", 246 | size, MAX_EVENT_SIZE); 247 | return NULL; 248 | } 249 | copy_size = MIN(size, MAX_EVENT_SIZE); 250 | event = malloc(copy_size); 251 | 252 | memcpy(event, data, copy_size); 253 | entry->event = event; 254 | return entry; 255 | } 256 | 257 | static inline bool mode_has_context() 258 | { 259 | return trace_ctx.mode_mask & TRACE_MODE_CTX_MASK; 260 | } 261 | 262 | static inline int func_get_type(void *data) 263 | { 264 | return ((event_t *)data)->meta; 265 | } 266 | 267 | #endif 268 | -------------------------------------------------------------------------------- /src/dropreason.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "dropreason.h" 9 | 10 | #define REASON_MAX_COUNT 256 11 | #define REASON_MAX_LEN 32 12 | 13 | static char drop_reasons[REASON_MAX_COUNT][REASON_MAX_LEN] = {}; 14 | static int drop_reason_max; 15 | static bool drop_reason_inited = false; 16 | 17 | /* check if drop reason on kfree_skb is supported */ 18 | bool drop_reason_support() 19 | { 20 | return simple_exec("cat /sys/kernel/debug/tracing/events/skb/" 21 | "kfree_skb/format 2>/dev/null | " 22 | "grep NOT_SPECIFIED") == 0; 23 | } 24 | 25 | static int parse_reason_enum() 26 | { 27 | char name[REASON_MAX_LEN]; 28 | int index = 0; 29 | FILE *f; 30 | 31 | f = fopen("/sys/kernel/debug/tracing/events/skb/kfree_skb/format", 32 | "r"); 33 | 34 | if (!f || !fsearch(f, "__print_symbolic")) { 35 | if (f) 36 | fclose(f); 37 | return -1; 38 | } 39 | 40 | while (true) { 41 | if (!fsearch(f, "{") || 42 | fscanf(f, "%d, \"%31[A-Z_0-9]", &index, name) != 2) 43 | break; 44 | strcpy(drop_reasons[index], name); 45 | } 46 | drop_reason_max = index; 47 | drop_reason_inited = true; 48 | 49 | fclose(f); 50 | return 0; 51 | } 52 | 53 | char *get_drop_reason(int index) 54 | { 55 | if (!drop_reason_inited && parse_reason_enum()) 56 | return NULL; 57 | if (index <= 0 || index > drop_reason_max) 58 | return NULL; 59 | 60 | return drop_reasons[index]; 61 | } 62 | -------------------------------------------------------------------------------- /src/dropreason.h: -------------------------------------------------------------------------------- 1 | #ifndef _H_SKB_DROP_REASON 2 | #define _H_SKB_DROP_REASON 3 | 4 | #include 5 | 6 | char *get_drop_reason(int index); 7 | bool drop_reason_support(); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/gen_trace.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | """ script that generate trace group info """ 3 | import sys 4 | import yaml 5 | import re 6 | 7 | global_status = {} 8 | global_names = {} 9 | global_status['trace_index'] = 1 10 | 11 | rule_levels = { 12 | 'info': 'RULE_INFO', 13 | 'warn': 'RULE_WARN', 14 | 'error': 'RULE_ERROR', 15 | } 16 | 17 | rule_types = { 18 | 'eq': 'RULE_RETURN_EQ', 19 | 'lt': 'RULE_RETURN_LT', 20 | 'gt': 'RULE_RETURN_GT', 21 | 'ne': 'RULE_RETURN_NE', 22 | 'range': 'RULE_RETURN_RANGE', 23 | 'any': 'RULE_RETURN_ANY', 24 | } 25 | 26 | 27 | def parse_names(trace, children): 28 | children.remove(trace) 29 | names = trace['names'] 30 | del trace['names'] 31 | 32 | for name in names: 33 | if isinstance(name, str): 34 | name = {'name': name} 35 | 36 | tmp = dict(trace) 37 | tmp.update(name) 38 | name.update(tmp) 39 | if 'cond' in name: 40 | name['cond'] = name['cond'].replace('"', '\\"') 41 | 42 | children.append(name) 43 | 44 | 45 | def parse_group(group): 46 | """ parse group in yaml file """ 47 | if 'children' not in group: 48 | return 49 | children = group['children'] 50 | i = 0 51 | while i < len(children): 52 | child = children[i] 53 | if 'backup' in child: 54 | child['backup']['is_backup'] = True 55 | 56 | if isinstance(child, str): 57 | children.remove(child) 58 | child = { 59 | "name": child 60 | } 61 | children.insert(i, child) 62 | 63 | parse_group(child) 64 | if 'names' in child: 65 | parse_names(child, children) 66 | continue 67 | i += 1 68 | if 'children' in child: 69 | continue 70 | 71 | name_split = child['name'].split(':') 72 | if len(name_split) > 1: 73 | child['skb'] = int(re.match(r'\d+', name_split[1]).group()) 74 | name_split = child['name'].split('/') 75 | if len(name_split) > 1: 76 | child['sk'] = int(re.match(r'\d+', name_split[1]).group()) 77 | child['name'] = re.match(r'[a-zA-Z_0-9]+', child['name']).group() 78 | 79 | 80 | def gen_group_init(group, name): 81 | return f'''trace_group_t {name} = {{ 82 | .name = "{group['name']}", 83 | .desc = "{group.get('desc')}", 84 | .children = LIST_HEAD_INIT({name}.children), 85 | .traces = LIST_HEAD_INIT({name}.traces), 86 | .list = LIST_HEAD_INIT({name}.list), 87 | }}; 88 | ''' 89 | 90 | 91 | def gen_name(name, is_trace=False): 92 | if is_trace: 93 | name = name.replace('-', '_') 94 | else: 95 | name = ('group_' + name).replace('-', '_') 96 | if name in global_names: 97 | global_names[name] += 1 98 | return f'{name}_{global_names[name]}' 99 | global_names[name] = 0 100 | return name 101 | 102 | 103 | btf_data = None 104 | 105 | 106 | def get_arg_count(name): 107 | global btf_data 108 | if not btf_data: 109 | with open("btf.raw", 'r', encoding='utf-8') as btf_file: 110 | btf_data = btf_file.read() 111 | reg_text = f"'{name}' type_id=([0-9]+)" 112 | match = re.search(reg_text, btf_data) 113 | if not match: 114 | return 0 115 | 116 | type_id = match.group(1) 117 | match = re.search(f"\\[{type_id}\\].*vlen=([0-9]+)", btf_data) 118 | return match.group(1) 119 | 120 | 121 | def gen_rules(rules, name): 122 | rule_str, init_str = '', '' 123 | for index, rule in enumerate(rules): 124 | level = rule['level'] 125 | rule_tmp = f'\t.level = {rule_levels[level]},\n' 126 | exps = rule['exp'].split(' ') 127 | rule_type = rule_types[exps[0]] 128 | if exps[0] == 'range': 129 | ranges = exps[1].split('-') 130 | rule_tmp += f'\t.range = {{ .min = {ranges[0]}, .max = {ranges[1]}}},\n' 131 | elif exps[0] != 'any': 132 | rule_tmp += f'\t.expected = {exps[1]},\n' 133 | rule_tmp += f'\t.type = {rule_type},\n' 134 | if 'adv' in rule: 135 | rule_adv = rule["adv"].replace('\n', '\\n') 136 | rule_tmp += f'\t.adv = "{rule_adv}",\n' 137 | msg = f'PFMT_EMPH"{rule["msg"]}"PFMT_END' 138 | if level == 'warn': 139 | msg = f'PFMT_WARN"{rule["msg"]}"PFMT_END' 140 | elif level == 'error': 141 | msg = f'PFMT_ERROR"{rule["msg"]}"PFMT_END' 142 | rule_tmp += f'\t.msg = {msg},\n' 143 | rule_name = f'rule_{name}_{index}' 144 | rule_str += f'rule_t {rule_name} = {{{rule_tmp}}};\n' 145 | init_str += f'\tlist_add_tail(&{rule_name}.list, &{name}.rules);\n' 146 | return (rule_str, init_str) 147 | 148 | 149 | def append_trace_field(field, trace, type='string'): 150 | if type == 'string': 151 | return f'\n\t.{field} = "{trace[field]}",' if field in trace else '' 152 | if type == 'bool': 153 | value = 'true' if trace.get(field) else 'false' 154 | return f'\n\t.{field} = {value},' 155 | if type == 'raw': 156 | if field in trace: 157 | return f'\n\t.{field} = {trace[field]},' 158 | return '' 159 | 160 | 161 | def append_filed(field, value, type='string'): 162 | if type == 'string': 163 | return f'\n\t.{field} = "{value}",' 164 | if type == 'bool': 165 | value = 'true' if value else 'false' 166 | return f'\n\t.{field} = {value},' 167 | if type == 'raw': 168 | return f'\n\t.{field} = {value},' 169 | return '' 170 | 171 | 172 | def gen_trace_list(trace, p_name): 173 | name = trace['define_name'] 174 | list_count = trace.get('list_count', 1) 175 | list_count += 1 176 | trace['list_count'] = list_count 177 | trace_name = 'trace_' + name 178 | trace_list = f'{trace_name}_list_{list_count}' 179 | define_str = f''' 180 | trace_list_t {trace_list} = {{ 181 | \t.trace = &{trace_name}, 182 | \t.list = LIST_HEAD_INIT({trace_list}.list) 183 | }}; 184 | ''' 185 | init_str = f'\tlist_add_tail(&{trace_list}.list, &{p_name}.traces);\n' 186 | 187 | return { 188 | 'define_str': define_str, 189 | 'init_str': init_str, 190 | 'probe_str': '', 191 | 'index_str': '' 192 | } 193 | 194 | 195 | def gen_trace(trace, group, p_name): 196 | # trace is already defined, just define corresponding trace_list for it 197 | if 'define_name' in trace: 198 | return gen_trace_list(trace, p_name) 199 | 200 | name = gen_name(trace["name"], True) 201 | trace_name = 'trace_' + name 202 | trace['define_name'] = name 203 | probe_str = '' 204 | skb_str = '' 205 | index_str = f'#define INDEX_{name} {global_status["trace_index"]}\n' 206 | rule_str = '' 207 | init_str = '' 208 | fields_str = '' 209 | skb_index = 0 210 | sk_index = 0 211 | target = trace.get('target') or trace['name'] 212 | 213 | if 'tp' in trace: 214 | trace_type = 'TRACE_TP' 215 | tp = trace['tp'].split('/') 216 | if 'skb' in trace and 'custom' not in trace: 217 | probe_str = f'\tFN_tp({name}, {tp[0]}, {tp[1]}, {trace["skb"]}, {trace["skboffset"]})\t\\\n' 218 | else: 219 | probe_str = f'\tFNC({name})\t\\\n' 220 | else: 221 | trace_type = 'TRACE_FUNCTION' 222 | if 'skb' in trace or 'sk' in trace: 223 | arg_count = '' 224 | if 'monitor' in trace: 225 | if 'arg_count' not in trace: 226 | trace['arg_count'] = get_arg_count(target) 227 | arg_count = trace['arg_count'] 228 | else: 229 | arg_count = trace['arg_count'] 230 | if not arg_count: 231 | print( 232 | f"BTF not found for {target}, skip monitor", file=sys.stderr) 233 | trace['monitor'] = 0 234 | else: 235 | fields_str += append_trace_field('arg_count', trace, 'raw') 236 | skb = trace['skb'] if 'skb' in trace else '' 237 | sk = trace['sk'] if 'sk' in trace else '' 238 | if 'custom' not in trace: 239 | probe_str = f'\tFN({name}, {skb}, {sk}, {arg_count})\t\\\n' 240 | else: 241 | probe_str = f'\tFNC({name})\t\\\n' 242 | else: 243 | probe_str = f'\tFNC({name})\t\\\n' 244 | trace['custom'] = True 245 | 246 | if 'analyzer' in trace: 247 | analyzer = f'\n\t.analyzer = &ANALYZER({trace["analyzer"]}),' 248 | else: 249 | analyzer = '\n\t.analyzer = &ANALYZER(default),' 250 | 251 | if 'rules' in trace and trace['rules']: 252 | rules = trace['rules'] 253 | (rule_str, _init_str) = gen_rules(rules, trace_name) 254 | init_str += _init_str 255 | 256 | if 'skb' in trace: 257 | trace['skb'] = int(trace.get('skb') or 0) + 1 258 | if 'sk' in trace: 259 | trace['sk'] = int(trace.get('sk') or 0) + 1 260 | 261 | fields_str += append_trace_field('cond', trace) 262 | fields_str += append_trace_field('regex', trace) 263 | fields_str += append_trace_field('msg', trace) 264 | fields_str += append_trace_field('is_backup', trace, 'bool') 265 | fields_str += append_trace_field('probe', trace, 'bool') 266 | fields_str += append_trace_field('monitor', trace, 'raw') 267 | fields_str += append_filed('name', target) 268 | fields_str += append_trace_field('skb', trace, 'raw') 269 | fields_str += append_trace_field('sk', trace, 'raw') 270 | fields_str += append_trace_field('skboffset', trace, 'raw') 271 | fields_str += append_trace_field('skoffset', trace, 'raw') 272 | fields_str += append_trace_field('custom', trace, 'bool') 273 | fields_str += append_trace_field('tp', trace) 274 | 275 | default = True 276 | if 'default' in trace: 277 | default = trace['default'] 278 | elif 'default' in group: 279 | default = group['default'] 280 | fields_str += append_filed('def', default, 'bool') 281 | 282 | define_str = f'''trace_t {trace_name} = {{ 283 | \t.desc = "{trace.get('desc') or ''}", 284 | \t.type = {trace_type},{analyzer}{fields_str} 285 | \t.index = INDEX_{name}, 286 | \t.prog = "__trace_{name}", 287 | \t.parent = &{p_name}, 288 | \t.rules = LIST_HEAD_INIT({trace_name}.rules), 289 | }}; 290 | trace_list_t {trace_name}_list = {{ 291 | \t.trace = &{trace_name}, 292 | \t.list = LIST_HEAD_INIT({trace_name}_list.list) 293 | }}; 294 | {rule_str} 295 | ''' 296 | 297 | init_str += f'''\tlist_add_tail(&{trace_name}_list.list, &{p_name}.traces); 298 | \tall_traces[INDEX_{name}] = &{trace_name}; 299 | \tlist_add_tail(&{trace_name}.all, &trace_list); 300 | ''' 301 | global_status['trace_index'] += 1 302 | 303 | return { 304 | 'define_str': define_str, 305 | 'init_str': init_str, 306 | 'probe_str': probe_str, 307 | 'index_str': index_str 308 | } 309 | 310 | 311 | def gen_append(target, source): 312 | target['define_str'] += source['define_str'] 313 | target['init_str'] += source['init_str'] 314 | target['probe_str'] += source['probe_str'] 315 | target['index_str'] += source['index_str'] 316 | 317 | 318 | def gen_group(group, is_root=False): 319 | if 'children' not in group: 320 | return 321 | 322 | result = { 323 | "define_str": '', 324 | "init_str": '', 325 | "probe_str": '', 326 | "index_str": '', 327 | } 328 | 329 | if is_root: 330 | p_name = 'root_group' 331 | group['define_name'] = p_name 332 | result['define_str'] += gen_group_init(group, p_name) 333 | else: 334 | p_name = group['define_name'] 335 | 336 | for child in group['children']: 337 | if 'children' in child: 338 | name = gen_name(child["name"]) 339 | child['define_name'] = name 340 | result['define_str'] += gen_group_init(child, name) 341 | result['init_str'] += f'\tlist_add_tail(&{name}.list, &{p_name}.children);\n' 342 | gen_append(result, gen_group(child)) 343 | else: 344 | gen_append(result, gen_trace(child, group, p_name)) 345 | 346 | for child in group['children']: 347 | if 'children' in child: 348 | continue 349 | backup = 'NULL' 350 | if 'backup' in child: 351 | backup = f"&trace_{child['backup']['define_name']}" 352 | result['init_str'] += f"\ttrace_{child['define_name']}.backup = {backup};\n" 353 | return result 354 | 355 | 356 | with open('trace.yaml', 'r', encoding='utf-8') as f: 357 | content = f.read() 358 | root = yaml.load(content, yaml.SafeLoader) 359 | parse_group(root) 360 | 361 | all_result = gen_group(root, root) 362 | all_define_str = all_result['define_str'] 363 | all_init_str = all_result['init_str'] 364 | all_probe_str = all_result['probe_str'] 365 | all_index_str = all_result['index_str'] 366 | 367 | if len(sys.argv) > 1 and sys.argv[1] == 'probe': 368 | print(f'''{all_index_str} 369 | #define TRACE_MAX {global_status['trace_index']} 370 | #define DEFINE_ALL_PROBES(FN, FN_tp, FNC)\t\t\\ 371 | {all_probe_str} 372 | ''') 373 | else: 374 | print(f'''#include "trace.h" 375 | #include "progs/kprobe_trace.h" 376 | #include "analysis.h" 377 | 378 | {all_define_str} 379 | 380 | trace_t *all_traces[TRACE_MAX]; 381 | int trace_count = TRACE_MAX; 382 | LIST_HEAD(trace_list); 383 | 384 | void init_trace_group() 385 | {{ 386 | {all_init_str} 387 | }} 388 | ''') 389 | -------------------------------------------------------------------------------- /src/nettrace.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "nettrace.h" 10 | #include "trace.h" 11 | 12 | arg_config_t config = { 13 | .name = "nettrace", 14 | .summary = "a tool to trace skb in kernel and diagnose network problem", 15 | .desc = "", 16 | }; 17 | 18 | static void do_parse_args(int argc, char *argv[]) 19 | { 20 | bool show_log = false, debug = false, version = false; 21 | trace_args_t *trace_args = &trace_ctx.args; 22 | bpf_args_t *bpf_args = &trace_ctx.bpf_args; 23 | pkt_args_t *pkt_args = &bpf_args->pkt; 24 | u8 addr_buf[16], saddr_buf[16], daddr_buf[16]; 25 | u16 addr_pf = 0, saddr_pf = 0, daddr_pf = 0; 26 | int proto_l = 0; 27 | u16 proto; 28 | 29 | option_item_t opts[] = { 30 | { 31 | .lname = "saddr", 32 | .sname = 's', 33 | .dest = saddr_buf, 34 | .type = OPTION_IPV4ORIPV6, 35 | .set = &saddr_pf, 36 | .desc = "filter source ip/ipv6 address", 37 | }, 38 | { 39 | .lname = "daddr", 40 | .sname = 'd', 41 | .dest = daddr_buf, 42 | .type = OPTION_IPV4ORIPV6, 43 | .set = &daddr_pf, 44 | .desc = "filter dest ip/ipv6 address", 45 | }, 46 | { 47 | .lname = "addr", 48 | .dest = addr_buf, 49 | .type = OPTION_IPV4ORIPV6, 50 | .set = &addr_pf, 51 | .desc = "filter source or dest ip/ipv6 address", 52 | }, 53 | { 54 | .lname = "sport", 55 | .sname = 'S', 56 | .dest = &pkt_args->sport, 57 | .type = OPTION_U16BE, 58 | .desc = "filter source TCP/UDP port", 59 | }, 60 | { 61 | .lname = "dport", 62 | .sname = 'D', 63 | .dest = &pkt_args->dport, 64 | .type = OPTION_U16BE, 65 | .desc = "filter dest TCP/UDP port", 66 | }, 67 | { 68 | .lname = "port", 69 | .sname = 'P', 70 | .dest = &pkt_args->port, 71 | .type = OPTION_U16BE, 72 | .desc = "filter source or dest TCP/UDP port", 73 | }, 74 | { 75 | .lname = "proto", 76 | .sname = 'p', 77 | .dest = &proto, 78 | .type = OPTION_PROTO, 79 | .set = &proto_l, 80 | .desc = "filter L3/L4 protocol, such as 'tcp', 'arp'", 81 | }, 82 | { 83 | .lname = "netns", 84 | .dest = &bpf_args->netns, 85 | .type = OPTION_U32, 86 | .desc = "filter by net namespace inode", 87 | }, 88 | { 89 | .lname = "netns-current", 90 | .dest = &trace_args->netns_current, 91 | .type = OPTION_BOOL, 92 | .desc = "filter by current net namespace", 93 | }, 94 | { 95 | .lname = "pid", .type = OPTION_U32, 96 | .dest = &bpf_args->pid, 97 | .desc = "filter by current process id(pid)", 98 | }, 99 | { 100 | .lname = "min-latency", .dest = &trace_args->min_latency, 101 | .type = OPTION_U32, 102 | .desc = "filter by the minial time to live of the skb in us", 103 | }, 104 | { 105 | .lname = "pkt-len", .dest = &trace_args->pkt_len, 106 | .type = OPTION_STRING, 107 | .desc = "filter by the IP packet length (include header) in byte", 108 | }, 109 | { 110 | .lname = "tcp-flags", .dest = &trace_args->tcp_flags, 111 | .type = OPTION_STRING, 112 | .desc = "filter by TCP flags, such as: SAPR", 113 | }, 114 | { .type = OPTION_BLANK }, 115 | { 116 | .lname = "basic", .dest = &trace_args->basic, 117 | .type = OPTION_BOOL, 118 | .desc = "use 'basic' trace mode, don't trace skb's life", 119 | }, 120 | { 121 | .lname = "diag", .dest = &trace_args->intel, 122 | .type = OPTION_BOOL, 123 | .desc = "enable 'diagnose' mode", 124 | }, 125 | { 126 | .lname = "diag-quiet", .dest = &trace_args->intel_quiet, 127 | .type = OPTION_BOOL, 128 | .desc = "only print abnormal packet", 129 | }, 130 | { 131 | .lname = "diag-keep", .dest = &trace_args->intel_keep, 132 | .type = OPTION_BOOL, 133 | .desc = "don't quit when abnormal packet found", 134 | }, 135 | { 136 | .lname = "drop", .dest = &trace_args->drop, 137 | .type = OPTION_BOOL, 138 | .desc = "skb drop monitor mode, for replace of 'droptrace'", 139 | }, 140 | #ifdef __F_STACK_TRACE 141 | { 142 | .lname = "drop-stack", .dest = &trace_args->drop_stack, 143 | .type = OPTION_BOOL, 144 | .desc = "print the kernel function call stack of kfree_skb", 145 | }, 146 | #endif 147 | { 148 | .lname = "sock", .dest = &trace_args->sock, 149 | .type = OPTION_BOOL, 150 | .desc = "enable 'sock' mode", 151 | }, 152 | { 153 | .lname = "monitor", .dest = &trace_args->monitor, 154 | .type = OPTION_BOOL, 155 | .desc = "enable 'monitor' mode", 156 | }, 157 | { 158 | .lname = "rtt", .dest = &trace_args->rtt, 159 | .type = OPTION_BOOL, 160 | .desc = "enable 'rtt' in statistics mode", 161 | }, 162 | { 163 | .lname = "rtt-detail", .dest = &trace_args->rtt_detail, 164 | .type = OPTION_BOOL, 165 | .desc = "enable 'rtt' in detail mode", 166 | }, 167 | { 168 | .lname = "filter-srtt", .dest = &bpf_args->first_rtt, 169 | .type = OPTION_U32, 170 | .desc = "filter by the minial first-acked rtt in ms", 171 | }, 172 | { 173 | .lname = "filter-minrtt", .dest = &bpf_args->last_rtt, 174 | .type = OPTION_U32, 175 | .desc = "filter by the minial last-acked rtt in ms", 176 | }, 177 | { 178 | .lname = "latency-show", .dest = &trace_args->latency_show, 179 | .type = OPTION_BOOL, 180 | .desc = "show latency between kernel functions", 181 | }, 182 | { 183 | .lname = "latency-free", .dest = &bpf_args->latency_free, 184 | .type = OPTION_BOOL, 185 | .desc = "account the latency of skb free", 186 | }, 187 | { 188 | .lname = "latency", .dest = &trace_args->latency, 189 | .type = OPTION_BOOL, 190 | .desc = "enable 'latency' mode", 191 | }, 192 | { 193 | .lname = "latency-summary", .dest = &bpf_args->latency_summary, 194 | .type = OPTION_BOOL, 195 | .desc = "show latency by statistics", 196 | }, 197 | { .type = OPTION_BLANK }, 198 | { 199 | .lname = "trace", .sname = 't', 200 | .dest = &trace_args->traces, 201 | .desc = "enable trace group or trace. Some traces are " 202 | "disabled by default, use \"all\" to enable all", 203 | }, 204 | { 205 | .lname = "force", .dest = &trace_args->force, 206 | .type = OPTION_BOOL, 207 | .desc = "skip some check and force load nettrace", 208 | }, 209 | { 210 | .lname = "ret", .dest = &trace_args->ret, 211 | .type = OPTION_BOOL, 212 | .desc = "show function return value", 213 | }, 214 | { 215 | .lname = "detail", .dest = &bpf_args->detail, 216 | .type = OPTION_BOOL, 217 | .desc = "show extern packet info, such as pid, ifname, etc", 218 | }, 219 | { 220 | .lname = "date", .dest = &trace_args->date, 221 | .type = OPTION_BOOL, 222 | .desc = "print timestamp in date-time format", 223 | }, 224 | { 225 | .lname = "count", .sname = 'c', .dest = &trace_args->count, 226 | .type = OPTION_U32, 227 | .desc = "exit after receiving count packets", 228 | }, 229 | { 230 | .lname = "hooks", .dest = &bpf_args->hooks, 231 | .type = OPTION_BOOL, 232 | .desc = "print netfilter hooks if dropping by netfilter", 233 | }, 234 | { 235 | .lname = "tiny-show", .dest = &bpf_args->tiny_output, 236 | .type = OPTION_BOOL, 237 | .desc = "set this option to show less infomation", 238 | }, 239 | { 240 | .lname = "trace-stack", .dest = &trace_args->traces_stack, 241 | .type = OPTION_STRING, 242 | .desc = "print call stack for traces or group", 243 | }, 244 | { 245 | .lname = "trace-matcher", .dest = &trace_args->trace_matcher, 246 | .type = OPTION_STRING, 247 | .desc = "traces that can match packet(default all)", 248 | }, 249 | { 250 | .lname = "trace-exclude", .dest = &trace_args->trace_exclude, 251 | .type = OPTION_STRING, 252 | .desc = "traces that should be disabled", 253 | }, 254 | { 255 | .lname = "trace-noclone", .dest = &trace_args->traces_noclone, 256 | .type = OPTION_BOOL, 257 | .desc = "don't trace skb clone", 258 | }, 259 | { 260 | .lname = "trace-free", .dest = &trace_args->trace_free, 261 | .type = OPTION_STRING, 262 | .desc = "custom the free functions", 263 | }, 264 | { 265 | .lname = "func-stats", .dest = &bpf_args->func_stats, 266 | .type = OPTION_BOOL, 267 | .desc = "only do the statistics for function call", 268 | }, 269 | { 270 | .lname = "rate-limit", .dest = &bpf_args->rate_limit, 271 | .type = OPTION_U32, 272 | .desc = "limit the output to N/s, not valid in diag/default mode", 273 | }, 274 | { 275 | .lname = "btf-path", .dest = &trace_args->btf_path, 276 | .type = OPTION_STRING, 277 | .desc = "custom the path of BTF info of vmlinux", 278 | }, 279 | { .type = OPTION_BLANK }, 280 | { 281 | .sname = 'v', .dest = &show_log, 282 | .type = OPTION_BOOL, 283 | .desc = "show log information", 284 | }, 285 | { 286 | .lname = "debug", .dest = &debug, 287 | .type = OPTION_BOOL, 288 | .desc = "show debug information", 289 | }, 290 | #ifdef BPF_DEBUG 291 | { 292 | .lname = "bpf-debug", .dest = &bpf_args->pkt.bpf_debug, 293 | .type = OPTION_BOOL, 294 | .desc = "show bpf debug information", 295 | }, 296 | #endif 297 | { 298 | .lname = "help", 299 | .sname = 'h', 300 | .type = OPTION_HELP, 301 | .desc = "show help information", 302 | }, 303 | { 304 | .lname = "version", .dest = &version, 305 | .sname = 'V', 306 | .type = OPTION_BOOL, 307 | .desc = "show nettrace version", 308 | }, 309 | }; 310 | 311 | if (parse_args(argc, argv, &config, opts, ARRAY_SIZE(opts))) 312 | goto err; 313 | 314 | if (show_log) 315 | set_log_level(1); 316 | 317 | if (!debug) { 318 | /* turn off warning of libbpf */ 319 | libbpf_set_print(NULL); 320 | } else { 321 | set_log_level(2); 322 | } 323 | 324 | if (version) { 325 | pr_version(); 326 | exit(0); 327 | } 328 | 329 | /* convert the args to the eBPF pkt_arg struct */ 330 | #define FILL_ADDR_PROTO(name, subfix, args, pf) if (name##_pf == pf) { \ 331 | memcpy(&(args)->name##subfix, name##_buf, \ 332 | sizeof((args)->name##subfix)); \ 333 | if ((args)->l3_proto && (args)->l3_proto != pf) { \ 334 | pr_err("ip" #subfix " protocol is excepted!\n"); \ 335 | goto err; \ 336 | } \ 337 | (args)->l3_proto = pf; \ 338 | } 339 | #define FILL_ADDR(name, args) \ 340 | FILL_ADDR_PROTO(name, _v6, args, ETH_P_IPV6) \ 341 | FILL_ADDR_PROTO(name, , args, ETH_P_IP) 342 | 343 | switch (proto_l) { 344 | case 3: 345 | pkt_args->l3_proto = proto; 346 | break; 347 | case 4: 348 | pkt_args->l4_proto = proto; 349 | break; 350 | default: 351 | break; 352 | } 353 | 354 | /* set L3 protocol if addr is offered */ 355 | FILL_ADDR(saddr, pkt_args) 356 | FILL_ADDR(daddr, pkt_args) 357 | FILL_ADDR(addr, pkt_args) 358 | 359 | pkt_args->saddr_v6_enable = !!saddr_pf; 360 | pkt_args->daddr_v6_enable = !!daddr_pf; 361 | pkt_args->addr_v6_enable = !!addr_pf; 362 | 363 | return; 364 | err: 365 | exit(-EINVAL); 366 | } 367 | 368 | static void do_exit(int code) 369 | { 370 | static bool is_exited = false; 371 | bpf_args_t *bpf_args; 372 | u64 event_count; 373 | 374 | if (is_exited) 375 | return; 376 | 377 | is_exited = true; 378 | bpf_args = get_bpf_args(); 379 | event_count = bpf_args->event_count; 380 | 381 | pr_info("end trace...\n"); 382 | pr_debug("begin destory BPF skel...\n"); 383 | trace_ctx.ops->trace_close(); 384 | pr_debug("BPF skel is destroied\n"); 385 | trace_ctx.stop = true; 386 | 387 | pr_info("total event: %llu, %d context skipped\n", 388 | event_count, ctx_count); 389 | } 390 | 391 | int main(int argc, char *argv[]) 392 | { 393 | init_trace_group(); 394 | do_parse_args(argc, argv); 395 | 396 | if (trace_prepare()) 397 | goto err; 398 | 399 | if (trace_bpf_load_and_attach()) { 400 | pr_err("failed to load bpf\n"); 401 | goto err; 402 | } 403 | 404 | signal(SIGTERM, do_exit); 405 | signal(SIGINT, do_exit); 406 | 407 | pr_info("begin trace...\n"); 408 | trace_poll(trace_ctx); 409 | do_exit(0); 410 | return 0; 411 | err: 412 | return -1; 413 | } 414 | -------------------------------------------------------------------------------- /src/nettrace.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _H_NETTRACE 4 | #define _H_NETTRACE 5 | 6 | #include 7 | 8 | #define pr_version() \ 9 | pr_info("version: " macro_to_str(VERSION) macro_to_str(RELEASE) \ 10 | nt_ternary_take(INLINE_MODE, ", inline", "") \ 11 | nt_ternary_take(NO_BTF, ", no-btf, kernel-" \ 12 | macro_to_str(__KERN_VER), " btf") \ 13 | nt_ternary_take(BPF_NO_GLOBAL_DATA, ", no-global-data", \ 14 | ", global-data") \ 15 | nt_ternary_take(NT_DISABLE_IPV6, ", no-ipv6", "") \ 16 | "\n") 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/progs/core.h: -------------------------------------------------------------------------------- 1 | #ifndef _H_PROG_CORE 2 | #define _H_PROG_CORE 3 | 4 | typedef struct { 5 | u16 func1; 6 | u16 func2; 7 | u32 ts1; 8 | u32 ts2; 9 | } match_val_t; 10 | 11 | typedef struct { 12 | /* the bpf context args */ 13 | void *ctx; 14 | struct sk_buff *skb; 15 | struct sock *sk; 16 | event_t *e; 17 | /* the filter condition stored in map */ 18 | bpf_args_t *args; 19 | union { 20 | /* used by fexit to pass the retval to event */ 21 | u64 retval; 22 | /* match only used in context mode, no conflict with retval */ 23 | match_val_t match_val; 24 | u32 matched; 25 | }; 26 | u16 func; 27 | u8 func_status; 28 | /* don't output the event for this skb */ 29 | u8 no_event:1; 30 | } context_info_t; 31 | 32 | /* init the skb by the index of func args */ 33 | #define DEFINE_KPROBE_SKB(name, skb_index, arg_count) \ 34 | DEFINE_KPROBE_INIT(name, name, arg_count, \ 35 | .skb = ctx_get_arg(ctx, skb_index)) 36 | 37 | /* BPF_NO_GLOBAL_DATA means this kernel version is old, we need to initialize 38 | * all the event data. 39 | */ 40 | #if defined(BPF_NO_GLOBAL_DATA) || defined(__F_INIT_EVENT) 41 | #define DECLARE_EVENT(type, name) \ 42 | pure_##type __attribute__((__unused__)) *name; \ 43 | type __attribute__((__unused__))__##name; \ 44 | detail_##type __detail_##name = {0}; \ 45 | info->e = (void *)&__detail_##name; \ 46 | if (info->args->detail) { \ 47 | WRITE_ONCE(name, (void *)info->e + \ 48 | offsetof(detail_##type, __event_filed)); \ 49 | } else { \ 50 | WRITE_ONCE(name, (void *)info->e + \ 51 | offsetof(type, __event_filed)); \ 52 | } 53 | 54 | #ifdef __F_OUTPUT_WHOLE 55 | #define handle_event_output(info, e) \ 56 | do_event_output(info, sizeof(__detail_##e)) 57 | #else 58 | #define handle_event_output(info, e) \ 59 | do_event_output(info, (info->args->detail ? sizeof(__detail_##e) : sizeof(__##e))) 60 | #endif 61 | 62 | #else 63 | /* initialize only part event data if not detail */ 64 | #define DECLARE_EVENT(type, name) \ 65 | pure_##type __attribute__((__unused__)) *name; \ 66 | type __attribute__((__unused__))__##name; \ 67 | detail_##type __detail_##name; \ 68 | info->e = (void *)&__detail_##name; \ 69 | int name##_size; \ 70 | if (info->args->detail) { \ 71 | name##_size = sizeof(detail_##type); \ 72 | __builtin_memset(info->e, 0, name##_size); \ 73 | name = offsetof(detail_##type, __event_filed) + \ 74 | (void *)info->e; \ 75 | } else { \ 76 | name##_size = sizeof(type); \ 77 | __builtin_memset(info->e, 0, name##_size); \ 78 | name = offsetof(type, __event_filed) + \ 79 | (void *)info->e; \ 80 | } 81 | 82 | #define handle_event_output(info, e) do_event_output(info, e##_size) 83 | #endif 84 | 85 | #define handle_entry_output(info, e) \ 86 | ({ \ 87 | int err = handle_entry(info); \ 88 | if (!err) \ 89 | handle_event_output(info, e); \ 90 | err; \ 91 | }) 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /src/progs/feat_args_ext.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int ret; 7 | 8 | SEC("fexit/__inet_lookup_listener") 9 | __attribute__((optimize("O0"))) 10 | int BPF_PROG(feat_probe_args_ext) 11 | { 12 | ret = (int)ctx[10]; 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /src/progs/kprobe.c: -------------------------------------------------------------------------------- 1 | #define KBUILD_MODNAME "" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "shared.h" 8 | #include 9 | #include "core.h" 10 | 11 | #include "kprobe_trace.h" 12 | 13 | #define pt_regs_param_0 PT_REGS_PARM1 14 | #define pt_regs_param_1 PT_REGS_PARM2 15 | #define pt_regs_param_2 PT_REGS_PARM3 16 | #define pt_regs_param_3 PT_REGS_PARM4 17 | #define pt_regs_param_4 PT_REGS_PARM5 18 | 19 | #define ctx_get_arg(ctx, index) (void *)pt_regs_param_##index((struct pt_regs*)ctx) 20 | #define info_get_arg(info, index) ctx_get_arg(info->ctx, index) 21 | 22 | #define DECLARE_FAKE_FUNC(name) \ 23 | static inline int name(context_info_t *info) 24 | 25 | /* one trace may have more than one implement */ 26 | #define __DEFINE_KPROBE_INIT(name, target, info_init...) \ 27 | DECLARE_FAKE_FUNC(fake__##name); \ 28 | SEC("kretprobe/"#target) \ 29 | int TRACE_RET_NAME(name)(struct pt_regs *ctx) \ 30 | { \ 31 | return handle_exit(ctx, INDEX_##name); \ 32 | } \ 33 | SEC("kprobe/"#target) \ 34 | int TRACE_NAME(name)(struct pt_regs *ctx) \ 35 | { \ 36 | context_info_t info = { \ 37 | .func = INDEX_##name, \ 38 | .ctx = ctx, \ 39 | .args = (void *)CONFIG(), \ 40 | info_init \ 41 | }; \ 42 | if (pre_handle_entry(&info, INDEX_##name)) \ 43 | return 0; \ 44 | handle_entry_finish(&info, \ 45 | fake__##name(&info)); \ 46 | return 0; \ 47 | } \ 48 | DECLARE_FAKE_FUNC(fake__##name) 49 | 50 | /* expand name and target sufficiently */ 51 | #define DEFINE_KPROBE_INIT(name, target, dummy, info_init...) \ 52 | __DEFINE_KPROBE_INIT(name, target, info_init) 53 | 54 | #define KPROBE_DEFAULT(name, skb_index, sk_index, dummy) \ 55 | DEFINE_KPROBE_INIT(name, name, dummy, \ 56 | .skb = nt_ternary_take(skb_index, \ 57 | ctx_get_arg(ctx, skb_index),\ 58 | NULL), \ 59 | .sk = nt_ternary_take(sk_index, \ 60 | ctx_get_arg(ctx, sk_index),\ 61 | NULL)) { \ 62 | return default_handle_entry(info); \ 63 | } 64 | 65 | #define DEFINE_TP_INIT(name, cata, tp, info_init...) \ 66 | DECLARE_FAKE_FUNC(fake__##name); \ 67 | SEC("tp/"#cata"/"#tp) \ 68 | int TRACE_NAME(name)(void *ctx) { \ 69 | context_info_t info = { \ 70 | .func = INDEX_##name, \ 71 | .ctx = ctx, \ 72 | .args = (void *)CONFIG(), \ 73 | info_init \ 74 | }; \ 75 | if (pre_handle_entry(&info, INDEX_##name)) \ 76 | return 0; \ 77 | handle_entry_finish(&info, \ 78 | fake__##name(&info)); \ 79 | return 0; \ 80 | } \ 81 | DECLARE_FAKE_FUNC(fake__##name) 82 | #define DEFINE_TP(name, cata, tp, skb_index, offset) \ 83 | DEFINE_TP_INIT(name, cata, tp, \ 84 | .skb = *(void **)(ctx + offset)) 85 | #define TP_DEFAULT(name, cata, tp, skb, offset) \ 86 | DEFINE_TP(name, cata, tp, skb, offset) \ 87 | { \ 88 | return default_handle_entry(info); \ 89 | } 90 | #define FNC(name) 91 | 92 | static inline int handle_exit(struct pt_regs *ctx, int func); 93 | static inline void get_ret(context_info_t *info); 94 | static inline int default_handle_entry(context_info_t *info); 95 | 96 | #include "core.c" 97 | 98 | static __always_inline int get_ret_key(int func) 99 | { 100 | return func; 101 | } 102 | 103 | static inline void get_ret(context_info_t *info) 104 | { 105 | int *ref, key; 106 | 107 | if (!(info->func_status & FUNC_STATUS_RET)) 108 | return; 109 | 110 | key = get_ret_key(info->func); 111 | ref = bpf_map_lookup_elem(&m_ret, &key); 112 | if (!ref) 113 | return; 114 | (*ref)++; 115 | } 116 | 117 | static inline int put_ret(bpf_args_t *args, int func) 118 | { 119 | int *ref, key; 120 | 121 | if (!(get_func_status(args, func) & FUNC_STATUS_RET)) 122 | return 1; 123 | 124 | key = get_ret_key(func); 125 | ref = bpf_map_lookup_elem(&m_ret, &key); 126 | if (!ref || *ref <= 0) 127 | return 1; 128 | (*ref)--; 129 | return 0; 130 | } 131 | 132 | static inline int handle_exit(struct pt_regs *ctx, int func) 133 | { 134 | bpf_args_t *args = (void *)CONFIG(); 135 | retevent_t event; 136 | 137 | if (!args->ready || put_ret(args, func)) 138 | return 0; 139 | 140 | event = (retevent_t) { 141 | .ts = bpf_ktime_get_ns(), 142 | .func = func, 143 | .meta = FUNC_TYPE_RET, 144 | .val = PT_REGS_RC(ctx), 145 | }; 146 | 147 | if (func == INDEX_skb_clone) 148 | init_ctx_match((void *)event.val, func, false); 149 | 150 | EVENT_OUTPUT(ctx, event); 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /src/progs/shared.h: -------------------------------------------------------------------------------- 1 | #ifndef _H_PROGS_SHARED 2 | #define _H_PROGS_SHARED 3 | 4 | #define MAX_FUNC_STACK 16 5 | 6 | #include 7 | 8 | #include "kprobe_trace.h" 9 | 10 | typedef struct { 11 | pkt_args_t pkt; 12 | u32 trace_mode; 13 | u32 pid; 14 | u32 netns; 15 | u32 max_event; 16 | bool drop_reason; 17 | bool detail; 18 | bool hooks; 19 | bool ready; 20 | bool stack; 21 | bool tiny_output; 22 | bool has_filter; 23 | bool latency_summary; 24 | bool func_stats; 25 | bool match_mode; 26 | bool latency_free; 27 | u32 first_rtt; 28 | u32 last_rtt; 29 | u32 rate_limit; 30 | u32 latency_min; 31 | int __rate_limit; 32 | u64 __last_update; 33 | u8 trace_status[TRACE_MAX]; 34 | u64 event_count; 35 | } bpf_args_t; 36 | 37 | typedef struct { 38 | u16 meta; 39 | u16 func; 40 | u32 key; 41 | union { 42 | packet_t pkt; 43 | sock_t ske; 44 | }; 45 | union { 46 | /* For FEXIT program only for now */ 47 | u64 retval; 48 | struct { 49 | u16 latency_func1; 50 | u16 latency_func2; 51 | u32 latency; 52 | }; 53 | }; 54 | #ifdef __F_STACK_TRACE 55 | u32 stack_id; 56 | #endif 57 | int __event_filed[0]; 58 | } event_t; 59 | 60 | typedef struct { 61 | u16 meta; 62 | u16 func; 63 | u32 key; 64 | u64 ts; 65 | } tiny_event_t; 66 | 67 | typedef struct { 68 | u16 meta; 69 | u16 func; 70 | u32 key; 71 | union { 72 | packet_t pkt; 73 | sock_t ske; 74 | }; 75 | u64 retval; 76 | #ifdef __F_STACK_TRACE 77 | u32 stack_id; 78 | #endif 79 | u32 pid; 80 | char task[16]; 81 | char ifname[16]; 82 | u32 ifindex; 83 | u32 netns; 84 | int __event_filed[0]; 85 | } detail_event_t; 86 | 87 | typedef struct { 88 | } pure_event_t; 89 | 90 | enum { 91 | FUNC_TYPE_FUNC, 92 | FUNC_TYPE_RET, 93 | FUNC_TYPE_TINY, 94 | FUNC_TYPE_TRACING_RET, 95 | FUNC_TYPE_MAX, 96 | }; 97 | 98 | 99 | #define FUNC_STATUS_FREE (1 << 0) 100 | #define FUNC_STATUS_SK (1 << 1) 101 | #define FUNC_STATUS_MATCHER (1 << 3) 102 | #define FUNC_STATUS_STACK (1 << 4) 103 | #define FUNC_STATUS_RET (1 << 5) 104 | #define FUNC_STATUS_CFREE (1 << 6) /* custom skb free function */ 105 | 106 | #undef DEFINE_EVENT 107 | #define DEFINE_EVENT(name, fields...) \ 108 | typedef struct { \ 109 | event_t event; \ 110 | int __event_filed[0]; \ 111 | fields \ 112 | } name; \ 113 | typedef struct { \ 114 | detail_event_t event; \ 115 | int __event_filed[0]; \ 116 | fields \ 117 | } detail_##name; \ 118 | typedef struct { \ 119 | fields \ 120 | } pure_##name; 121 | #define event_field(type, name) type name; 122 | 123 | DEFINE_EVENT(drop_event_t, 124 | event_field(u64, location) 125 | event_field(u32, reason) 126 | ) 127 | 128 | DEFINE_EVENT(reset_event_t, 129 | event_field(unsigned char, state) 130 | event_field(u32, reason) 131 | ) 132 | 133 | DEFINE_EVENT(nf_event_t, 134 | event_field(char, table[8]) 135 | event_field(char, chain[8]) 136 | event_field(u8, hook) 137 | event_field(u8, pf) 138 | ) 139 | 140 | DEFINE_EVENT(nf_hooks_event_t, 141 | event_field(char, table[8]) 142 | event_field(char, chain[8]) 143 | event_field(u8, hook) 144 | event_field(u8, pf) 145 | event_field(u64, hooks[6]) 146 | ) 147 | 148 | DEFINE_EVENT(qdisc_event_t, 149 | event_field(u64, last_update) 150 | event_field(u32, state) 151 | event_field(u32, qlen) 152 | event_field(u32, flags) 153 | ) 154 | 155 | DEFINE_EVENT(rtt_event_t, 156 | event_field(u32, first_rtt) 157 | event_field(u32, last_rtt) 158 | ) 159 | 160 | #define MAX_EVENT_SIZE sizeof(detail_nf_hooks_event_t) 161 | 162 | typedef struct __attribute__((__packed__)) { 163 | u16 meta; 164 | u16 func; 165 | u32 pad; 166 | u64 ts; 167 | u64 val; 168 | } retevent_t; 169 | 170 | typedef enum trace_mode { 171 | TRACE_MODE_BASIC, 172 | TRACE_MODE_DROP, 173 | TRACE_MODE_TIMELINE, 174 | TRACE_MODE_DIAG, 175 | TRACE_MODE_SOCK, 176 | TRACE_MODE_MONITOR, 177 | TRACE_MODE_RTT, 178 | TRACE_MODE_LATENCY, 179 | /* following is some fake mode */ 180 | TRACE_MODE_TINY = 16, 181 | } trace_mode_t; 182 | 183 | enum rule_type { 184 | /* equal */ 185 | RULE_RETURN_EQ = 1, 186 | /* not equal */ 187 | RULE_RETURN_NE, 188 | /* less than */ 189 | RULE_RETURN_LT, 190 | /* greater then */ 191 | RULE_RETURN_GT, 192 | /* in range */ 193 | RULE_RETURN_RANGE, 194 | /* always active this rule */ 195 | RULE_RETURN_ANY, 196 | }; 197 | 198 | #define MAX_RULE_COUNT 8 199 | typedef struct { 200 | int expected[MAX_RULE_COUNT]; 201 | int op[MAX_RULE_COUNT]; 202 | } rules_ret_t; 203 | 204 | #define TRACE_MODE_BASIC_MASK (1 << TRACE_MODE_BASIC) 205 | #define TRACE_MODE_TIMELINE_MASK (1 << TRACE_MODE_TIMELINE) 206 | #define TRACE_MODE_DIAG_MASK (1 << TRACE_MODE_DIAG) 207 | #define TRACE_MODE_DROP_MASK (1 << TRACE_MODE_DROP) 208 | #define TRACE_MODE_SOCK_MASK (1 << TRACE_MODE_SOCK) 209 | #define TRACE_MODE_MONITOR_MASK (1 << TRACE_MODE_MONITOR) 210 | #define TRACE_MODE_RTT_MASK (1 << TRACE_MODE_RTT) 211 | #define TRACE_MODE_LATENCY_MASK (1 << TRACE_MODE_LATENCY) 212 | #define TRACE_MODE_TINY_MASK (1 << TRACE_MODE_TINY) 213 | 214 | #define TRACE_MODE_SKB_REQUIRE_MASK \ 215 | (TRACE_MODE_BASIC_MASK | TRACE_MODE_TIMELINE_MASK | \ 216 | TRACE_MODE_DIAG_MASK | TRACE_MODE_DROP_MASK | \ 217 | TRACE_MODE_RTT_MASK | TRACE_MODE_LATENCY_MASK) 218 | #define TRACE_MODE_SOCK_REQUIRE_MASK TRACE_MODE_SOCK_MASK 219 | #define TRACE_MODE_ALL_MASK \ 220 | (TRACE_MODE_SKB_REQUIRE_MASK | TRACE_MODE_MONITOR_MASK |\ 221 | TRACE_MODE_SOCK_REQUIRE_MASK) 222 | #define TRACE_MODE_BPF_CTX_MASK \ 223 | (TRACE_MODE_DIAG_MASK | TRACE_MODE_TIMELINE_MASK | \ 224 | TRACE_MODE_LATENCY_MASK) 225 | #define TRACE_MODE_CTX_MASK \ 226 | (TRACE_MODE_DIAG_MASK | TRACE_MODE_TIMELINE_MASK) 227 | 228 | #define __MACRO_SIZE(macro) sizeof(#macro) 229 | #define MACRO_SIZE(macro) __MACRO_SIZE(macro) 230 | #define __MACRO_CONCAT(a, b) a##b 231 | #define MACRO_CONCAT(a, b) __MACRO_CONCAT(a, b) 232 | 233 | #define TRACE_PREFIX __trace_ 234 | #define TRACE_RET_PREFIX ret__trace_ 235 | #define TRACE_PREFIX_LEN MACRO_SIZE(TRACE_PREFIX) 236 | #define TRACE_NAME(name) MACRO_CONCAT(TRACE_PREFIX, name) 237 | #define TRACE_RET_NAME(name) MACRO_CONCAT(TRACE_RET_PREFIX, name) 238 | 239 | #if defined(__F_NO_SK_FLAGS_OFFSET) && defined(__F_SK_PRPTOCOL_LEGACY) 240 | #define __F_DISABLE_SOCK 241 | #endif 242 | 243 | #ifdef INLINE_MODE 244 | #define __F_INIT_EVENT 245 | #endif 246 | 247 | #endif 248 | -------------------------------------------------------------------------------- /src/progs/tracing.c: -------------------------------------------------------------------------------- 1 | #define KBUILD_MODNAME "" 2 | #define __PROG_TYPE_TRACING 1 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "shared.h" 10 | #include 11 | 12 | #include "kprobe_trace.h" 13 | #include "core.h" 14 | 15 | #define ctx_get_arg(ctx, index) (void *)((unsigned long long *)ctx)[index] 16 | #define info_get_arg(info, index) ctx_get_arg(info->ctx, index) 17 | 18 | #define DECLARE_FAKE_FUNC(name) \ 19 | static __always_inline int name(context_info_t *info) 20 | 21 | /* one trace may have more than one implement */ 22 | #define __DEFINE_KPROBE_INIT(name, target, acount, info_init...) \ 23 | DECLARE_FAKE_FUNC(fake__##name); \ 24 | SEC("fexit/"#target) \ 25 | int TRACE_RET_NAME(name)(void **ctx) \ 26 | { \ 27 | context_info_t info; \ 28 | u64 __retval = 0; \ 29 | if (pre_handle_exit(ctx, INDEX_##name, &__retval, acount)) \ 30 | return 0; \ 31 | info = (context_info_t) {0}; \ 32 | /* initialize info only after the check pass */ \ 33 | info = (context_info_t) { \ 34 | .func = INDEX_##name, \ 35 | .ctx = ctx, \ 36 | .args = (void *)CONFIG(), \ 37 | .retval = __retval, \ 38 | info_init \ 39 | }; \ 40 | if (pre_handle_entry(&info, INDEX_##name)) \ 41 | return 0; \ 42 | handle_entry_finish(&info, fake__##name(&info));\ 43 | return 0; \ 44 | } \ 45 | SEC("fentry/"#target) \ 46 | int TRACE_NAME(name)(void **ctx) \ 47 | { \ 48 | context_info_t info = { \ 49 | .func = INDEX_##name, \ 50 | .ctx = ctx, \ 51 | .args = (void *)CONFIG(), \ 52 | info_init \ 53 | }; \ 54 | if (pre_handle_entry(&info, INDEX_##name)) \ 55 | return 0; \ 56 | handle_entry_finish(&info, fake__##name(&info));\ 57 | return 0; \ 58 | } \ 59 | DECLARE_FAKE_FUNC(fake__##name) 60 | 61 | /* expand name and target sufficiently */ 62 | #define DEFINE_KPROBE_INIT(name, target, acount, info_init...) \ 63 | __DEFINE_KPROBE_INIT(name, target, acount, info_init) 64 | 65 | #define __KPROBE_DEFAULT(name, skb_index, sk_index, acount) \ 66 | DEFINE_KPROBE_INIT(name, name, acount, \ 67 | .skb = nt_ternary_take(skb_index, \ 68 | ctx_get_arg(ctx, skb_index),\ 69 | NULL), \ 70 | .sk = nt_ternary_take(sk_index, \ 71 | ctx_get_arg(ctx, sk_index),\ 72 | NULL)) \ 73 | { \ 74 | return default_handle_entry(info); \ 75 | } 76 | #define KPROBE_DUMMY(name, skb_index, sk_index, acount) 77 | 78 | /* for now, only generate BPF program for monitor case */ 79 | #define KPROBE_DEFAULT(name, skb_index, sk_index, acount) \ 80 | nt_ternary_take(acount, __KPROBE_DEFAULT, \ 81 | KPROBE_DUMMY)(name, skb_index, sk_index, acount) 82 | 83 | #define DEFINE_TP_INIT(name, cata, tp, info_init...) \ 84 | DECLARE_FAKE_FUNC(fake__##name); \ 85 | SEC("tp_btf/"#tp) \ 86 | int TRACE_NAME(name)(void **ctx) { \ 87 | context_info_t info = { \ 88 | .func = INDEX_##name, \ 89 | .ctx = ctx, \ 90 | .args = (void *)CONFIG(), \ 91 | info_init \ 92 | }; \ 93 | if (pre_handle_entry(&info, INDEX_##name)) \ 94 | return 0; \ 95 | handle_entry_finish(&info, fake__##name(&info));\ 96 | return 0; \ 97 | } \ 98 | DECLARE_FAKE_FUNC(fake__##name) 99 | #define DEFINE_TP(name, cata, tp, skb_index, offset) \ 100 | DEFINE_TP_INIT(name, cata, tp, \ 101 | .skb = ctx_get_arg(ctx, skb_index)) 102 | #define TP_DEFAULT(name, cata, tp, skb_index, offset) \ 103 | DEFINE_TP(name, cata, tp, skb_index, offset) \ 104 | { \ 105 | return default_handle_entry(info); \ 106 | } 107 | #define FNC(name) 108 | 109 | static __always_inline int pre_handle_exit(void *ctx, int func_index, 110 | u64 *retval, 111 | int arg_count); 112 | static inline int default_handle_entry(context_info_t *info); 113 | /* we don't need to get/put kernel function to pair the entry and exit in 114 | * TRACING program. 115 | */ 116 | #define get_ret(func) 117 | 118 | #include "core.c" 119 | 120 | rules_ret_t rules_all[TRACE_MAX]; 121 | 122 | static __always_inline int pre_handle_exit(void *ctx, int func_index, 123 | u64 *retval, 124 | int arg_count) 125 | { 126 | int i, expected, ret; 127 | rules_ret_t *rules; 128 | bool hit = false; 129 | void *ret_ptr; 130 | 131 | /* this can't happen */ 132 | if (func_index >= TRACE_MAX) 133 | goto no_match; 134 | 135 | rules = &rules_all[func_index]; 136 | if (!rules) 137 | goto no_match; 138 | 139 | *retval = 0; 140 | if (bpf_core_helper_exist(get_func_ret)) { 141 | bpf_get_func_ret(ctx, retval); 142 | } else { 143 | if (!arg_count) 144 | goto no_match; 145 | ret_ptr = ctx + arg_count * 8; 146 | bpf_probe_read_kernel(retval, sizeof(u64), ret_ptr); 147 | } 148 | 149 | ret = (int)*retval; 150 | pr_bpf_debug("func=%d retval=%d\n", func_index, ret); 151 | for (i = 0; i < MAX_RULE_COUNT; i++) { 152 | expected = rules->expected[i]; 153 | switch (rules->op[i]) { 154 | case RULE_RETURN_ANY: 155 | hit = true; 156 | break; 157 | case RULE_RETURN_EQ: 158 | hit = expected == ret; 159 | break; 160 | case RULE_RETURN_LT: 161 | hit = expected < ret; 162 | break; 163 | case RULE_RETURN_GT: 164 | hit = expected > ret; 165 | break; 166 | case RULE_RETURN_NE: 167 | hit = expected != ret; 168 | break; 169 | default: 170 | goto no_match; 171 | } 172 | if (hit) 173 | break; 174 | } 175 | 176 | if (!hit) 177 | goto no_match; 178 | return 0; 179 | no_match: 180 | return -1; 181 | } 182 | -------------------------------------------------------------------------------- /src/rstreason.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "rstreason.h" 9 | 10 | #define REASON_MAX_COUNT 256 11 | #define REASON_MAX_LEN 32 12 | 13 | static char reset_reasons[REASON_MAX_COUNT][REASON_MAX_LEN] = {}; 14 | static int reset_reason_max; 15 | static bool reset_reason_inited = false; 16 | static const char *tcp_state_str[] = { 17 | "UNKNOWN", // 0 18 | "TCP_ESTABLISHED", // 1 19 | "TCP_SYN_SENT", // 2 20 | "TCP_SYN_RECV", // 3 21 | "TCP_FIN_WAIT1", // 4 22 | "TCP_FIN_WAIT2", // 5 23 | "TCP_TIME_WAIT", // 6 24 | "TCP_CLOSE", // 7 25 | "TCP_CLOSE_WAIT", // 8 26 | "TCP_LAST_ACK", // 9 27 | "TCP_LISTEN", // 10 28 | "TCP_CLOSING", // 11 29 | "TCP_NEW_SYN_RECV", // 12 30 | "TCP_MAX_STATES" // 13 31 | }; 32 | 33 | /* check if rst reason is supported */ 34 | bool reset_reason_support() 35 | { 36 | return simple_exec("cat /sys/kernel/debug/tracing/events/tcp/" 37 | "tcp_send_reset/format 2>/dev/null | " 38 | "grep NOT_SPECIFIED") == 0; 39 | } 40 | 41 | static int parse_reason_enum() 42 | { 43 | char name[REASON_MAX_LEN]; 44 | int index = 0; 45 | FILE *f; 46 | int symbolics_found = 1; 47 | 48 | f = fopen("/sys/kernel/debug/tracing/events/tcp/tcp_send_reset/format", "r"); 49 | 50 | if (!f || !fsearch(f, "__print_symbolic")) { 51 | if (f) 52 | fclose(f); 53 | return -1; 54 | } 55 | 56 | while (true) { 57 | if (symbolics_found == 1 && 58 | fsearch(f, "__print_symbolic")) { 59 | symbolics_found++; 60 | } 61 | 62 | if (symbolics_found == 2) { 63 | if (!fsearch(f, "{") || 64 | fscanf(f, "%d, \"%31[A-Z_0-9]", &index, name) != 2) 65 | break; 66 | pr_debug("reset_reason[%d] = %s\n", index, name); 67 | strcpy(reset_reasons[index], name); 68 | } else if (feof(f)) { 69 | fclose(f); 70 | return -1; 71 | } 72 | } 73 | reset_reason_max = index; 74 | reset_reason_inited = true; 75 | 76 | fclose(f); 77 | return 0; 78 | } 79 | 80 | char *get_reset_reason(int index) 81 | { 82 | if (!reset_reason_inited && parse_reason_enum()) 83 | return NULL; 84 | if (index <= 0 || index > reset_reason_max) 85 | return NULL; 86 | 87 | return reset_reasons[index]; 88 | } 89 | 90 | const char *get_tcp_state_str(unsigned char state) { 91 | if (state < 0 || state >= sizeof(tcp_state_str) / sizeof(tcp_state_str[0])) { 92 | return "UNKNOWN"; 93 | } 94 | return tcp_state_str[state]; 95 | } -------------------------------------------------------------------------------- /src/rstreason.h: -------------------------------------------------------------------------------- 1 | #ifndef _H_TCP_RESET_REASON 2 | #define _H_TCP_RESET_REASON 3 | 4 | #include 5 | 6 | char *get_reset_reason(int index); 7 | bool reset_reason_support(); 8 | const char *get_tcp_state_str(unsigned char state); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /src/rule.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | -------------------------------------------------------------------------------- /src/trace.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #ifndef _H_TRACE 4 | #define _H_TRACE 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "progs/shared.h" 12 | #include 13 | #include "progs/kprobe_trace.h" 14 | 15 | enum trace_type { 16 | TRACE_FUNCTION, 17 | TRACE_TP, 18 | }; 19 | 20 | struct analyzer; 21 | 22 | #define TRACE_LOADED (1 << 0) 23 | #define TRACE_ENABLE (1 << 1) 24 | #define TRACE_INVALID (1 << 2) 25 | #define TRACE_RET (1 << 3) 26 | #define TRACE_STACK (1 << 4) 27 | #define TRACE_ATTACH_MANUAL (1 << 5) 28 | #define TRACE_RET_ONLY (1 << 6) 29 | #define TRACE_CFREE (1 << 7) 30 | 31 | #define trace_for_each(pos) \ 32 | list_for_each_entry(pos, &trace_list, all) 33 | #define trace_for_each_cond(pos, cond) \ 34 | trace_for_each(pos) \ 35 | if (cond) 36 | 37 | #define bpf_pbn(obj, name) \ 38 | bpf_object__find_program_by_name(obj, name) 39 | 40 | typedef struct trace_group { 41 | char *name; 42 | char *desc; 43 | struct list_head children; 44 | struct list_head list; 45 | struct list_head traces; 46 | } trace_group_t; 47 | 48 | enum { 49 | TRACE_MONITOR_EXIT = 1, 50 | TRACE_MONITOR_ENTRY, 51 | }; 52 | 53 | typedef struct trace { 54 | /* name of the kernel function this trace targeted */ 55 | char name[64]; 56 | char *desc; 57 | char *msg; 58 | /* name of the eBPF program */ 59 | char *prog; 60 | enum trace_type type; 61 | char *cond; 62 | char *regex; 63 | char *tp; 64 | /* index of skb in function args, start from 1, 0 means no skb */ 65 | u8 skb; 66 | /* offset of skb in ftrace event */ 67 | u8 skboffset; 68 | /* the same as skb */ 69 | u8 sk; 70 | /* the same as skb_offset */ 71 | u8 skoffset; 72 | /* traces in a global list */ 73 | struct list_head all; 74 | /* traces in the same group */ 75 | struct list_head list; 76 | /* list head of rules that belongs to this trace */ 77 | struct list_head rules; 78 | /* traces that share the same target */ 79 | struct trace *backup; 80 | bool is_backup; 81 | bool probe; 82 | /* if this trace should be enabled by default */ 83 | bool def; 84 | /* if the BPF program is custom of this trace */ 85 | bool custom; 86 | int monitor; 87 | int index; 88 | int arg_count; 89 | u32 status; 90 | trace_group_t *parent; 91 | struct analyzer *analyzer; 92 | } trace_t; 93 | 94 | typedef struct { 95 | struct list_head list; 96 | trace_t * trace; 97 | } trace_list_t; 98 | 99 | typedef struct trace_args { 100 | bool timeline; 101 | bool ret; 102 | bool intel; 103 | bool intel_quiet; 104 | bool intel_keep; 105 | bool basic; 106 | bool monitor; 107 | bool drop; 108 | bool date; 109 | bool drop_stack; 110 | bool show_traces; 111 | bool sock; 112 | bool netns_current; 113 | bool force; 114 | bool latency_show; 115 | bool rtt; 116 | bool rtt_detail; 117 | bool latency; 118 | bool traces_noclone; 119 | u32 min_latency; 120 | char *traces; 121 | char *traces_stack; 122 | char *trace_matcher; 123 | char *trace_exclude; 124 | char *trace_free; 125 | char *pkt_len; 126 | char *tcp_flags; 127 | u32 count; 128 | char *btf_path; 129 | } trace_args_t; 130 | 131 | typedef struct { 132 | /* open and initialize the bpf program */ 133 | int (*trace_load)(); 134 | /* load and attach the bpf program */ 135 | int (*trace_attach)(); 136 | void (*trace_poll)(void *ctx, int cpu, void *data, u32 size); 137 | int (*trace_anal)(event_t *e); 138 | void (*trace_close)(); 139 | void (*trace_ready)(); 140 | void (*print_stack)(int key); 141 | void (*trace_feat_probe)(); 142 | bool (*trace_supported)(); 143 | void (*prepare_traces)(); 144 | int (*raw_poll)(); 145 | struct analyzer *analyzer; 146 | } trace_ops_t; 147 | 148 | typedef struct { 149 | trace_ops_t *ops; 150 | trace_args_t args; 151 | bpf_args_t bpf_args; 152 | trace_mode_t mode; 153 | __u64 mode_mask; 154 | bool stop; 155 | /* if drop reason feature is supported */ 156 | bool drop_reason; 157 | /* enable detail output */ 158 | bool detail; 159 | bool skip_last; 160 | bool trace_clone; 161 | struct bpf_object *obj; 162 | /* if reset reason feature is supported */ 163 | bool reset_reason; 164 | } trace_context_t; 165 | 166 | #define TRACE_HAS_ANALYZER(trace, name) IS_ANALYZER(trace->analyzer, name) 167 | #define TRACE_ANALYZER_ENABLED(name) trace_analyzer_enabled(&(ANALYZER(name))) 168 | 169 | #define BPF_ARG_GET(name) (trace_ctx.bpf_args.name) 170 | 171 | extern trace_context_t trace_ctx; 172 | 173 | extern trace_t *all_traces[]; 174 | extern trace_group_t root_group; 175 | extern int trace_count; 176 | extern struct list_head trace_list; 177 | extern u32 ctx_count; 178 | 179 | #define DECLARE_TRACES(name, ...) extern trace_t trace_##name; 180 | DEFINE_ALL_PROBES(DECLARE_TRACES, DECLARE_TRACES, DECLARE_TRACES) 181 | 182 | static inline trace_t *get_trace(int index) 183 | { 184 | if (index < 0 || index > TRACE_MAX) 185 | return NULL; 186 | return all_traces[index]; 187 | } 188 | 189 | static inline void set_trace_ops(trace_ops_t *ops) 190 | { 191 | trace_ctx.ops = ops; 192 | } 193 | 194 | static inline void trace_set_enable(trace_t *t) 195 | { 196 | t->status |= TRACE_ENABLE; 197 | } 198 | 199 | static inline bool trace_is_enable(trace_t *t) 200 | { 201 | return t->status & TRACE_ENABLE; 202 | } 203 | 204 | static inline void trace_set_invalid_reason(trace_t *t, const char *reason) 205 | { 206 | if (reason) 207 | pr_debug("trace name=%s, prog=%s is made invalid for: %s\n", 208 | t->name, t->prog, reason); 209 | else 210 | pr_debug("trace name=%s, prog=%s is made invalid\n", 211 | t->name, t->prog); 212 | t->status |= TRACE_INVALID; 213 | } 214 | 215 | static inline void trace_set_invalid(trace_t *t) 216 | { 217 | trace_set_invalid_reason(t, NULL); 218 | } 219 | 220 | static inline bool trace_is_invalid(trace_t *t) 221 | { 222 | return t->status & TRACE_INVALID; 223 | } 224 | 225 | static inline bool trace_is_usable(trace_t *t) 226 | { 227 | return trace_is_enable(t) && !trace_is_invalid(t); 228 | } 229 | 230 | static inline void trace_set_status(int func, int status) 231 | { 232 | trace_ctx.bpf_args.trace_status[func] |= status; 233 | } 234 | 235 | static inline void trace_set_ret(trace_t *t) 236 | { 237 | trace_set_status(t->index, FUNC_STATUS_RET); 238 | t->status |= TRACE_RET; 239 | } 240 | 241 | static inline bool trace_is_ret(trace_t *t) 242 | { 243 | return t->status & TRACE_RET; 244 | } 245 | 246 | static inline void trace_set_retonly(trace_t *t) 247 | { 248 | t->status |= TRACE_RET_ONLY; 249 | } 250 | 251 | static inline bool trace_is_retonly(trace_t *t) 252 | { 253 | return t->status & TRACE_RET_ONLY; 254 | } 255 | 256 | static inline u8 trace_get_status(int func) 257 | { 258 | return trace_ctx.bpf_args.trace_status[func]; 259 | } 260 | 261 | static inline bool trace_using_sk(trace_t *t) 262 | { 263 | return trace_get_status(t->index) & FUNC_STATUS_SK; 264 | } 265 | 266 | static inline int trace_set_stack(trace_t *t) 267 | { 268 | trace_set_status(t->index, FUNC_STATUS_STACK); 269 | trace_ctx.bpf_args.stack = true; 270 | t->status |= TRACE_STACK; 271 | return 0; 272 | } 273 | 274 | static inline bool trace_is_stack(trace_t *t) 275 | { 276 | return t->status & TRACE_STACK; 277 | } 278 | 279 | static inline bool trace_is_func(trace_t *t) 280 | { 281 | return t->type == TRACE_FUNCTION; 282 | } 283 | 284 | static inline void trace_stop() 285 | { 286 | trace_ctx.stop = true; 287 | } 288 | 289 | static inline bool trace_stopped() 290 | { 291 | return trace_ctx.stop; 292 | } 293 | 294 | static inline bool trace_mode_timeline() 295 | { 296 | return trace_ctx.mode == TRACE_MODE_TIMELINE; 297 | } 298 | 299 | static inline bool trace_mode_diag() 300 | { 301 | return trace_ctx.mode == TRACE_MODE_DIAG; 302 | } 303 | 304 | void trace_show(trace_group_t *group); 305 | void init_trace_group(); 306 | trace_group_t *search_trace_group(char *name); 307 | int trace_prepare(); 308 | int trace_bpf_load_and_attach(); 309 | int trace_poll(); 310 | bool trace_analyzer_enabled(struct analyzer *analyzer); 311 | int trace_pre_load(); 312 | bpf_args_t *get_bpf_args(); 313 | 314 | #endif 315 | -------------------------------------------------------------------------------- /src/trace.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | name: all 3 | desc: trace the whole kernel network stack 4 | common_rules: 5 | define: &rule_ret_err 6 | exp: ne 0 7 | level: error 8 | define: &rules_nf 9 | - exp: eq 0 10 | level: error 11 | msg: packet is dropped 12 | adv: check your netfilter rule 13 | - exp: eq 1 14 | level: info 15 | msg: packet is accepted 16 | define: &rule_msg 17 | - exp: any 18 | level: info 19 | children: 20 | - name: link 21 | desc: link layer (L2) of the network stack 22 | children: 23 | - name: link-in 24 | desc: link layer (L2) of packet in 25 | visual: true 26 | children: 27 | - name: napi_gro_receive_entry 28 | tp: net/napi_gro_receive_entry 29 | skboffset: 24 30 | skb: 3 31 | - name: dev_gro_receive:1 32 | rules: 33 | - exp: eq 4 34 | level: error 35 | msg: packet is dropped by GRO 36 | - name: enqueue_to_backlog:0 37 | analyzer: ret 38 | monitor: 1 39 | rules: 40 | - <<: *rule_ret_err 41 | msg: failed to enqeueu to CPU backlog 42 | adv: increase the /proc/sys/net/core/netdev_max_backlog 43 | - name: netif_receive_generic_xdp:0 44 | analyzer: ret 45 | rules: 46 | - exp: eq 1 47 | level: error 48 | msg: packet is dropped by XDP program 49 | adv: check your XDP eBPF program 50 | - exp: eq 3 51 | level: info 52 | msg: packet is transmited by XDP program 53 | - exp: eq 4 54 | level: info 55 | msg: packet is redirected by XDP program 56 | - name: xdp_do_generic_redirect:1 57 | analyzer: ret 58 | monitor: 1 59 | rules: 60 | - <<: *rule_ret_err 61 | msg: XDP failed to redirect skb 62 | adv: check if the target ifindex exist 63 | - name: __netif_receive_skb_core:0 64 | tp: net/netif_receive_skb 65 | skboffset: 8 66 | - name: link-out 67 | desc: link layer (L2) of packet out 68 | visual: true 69 | children: 70 | - name: __dev_queue_xmit:0 71 | analyzer: ret 72 | monitor: 1 73 | rules: 74 | - <<: *rule_ret_err 75 | msg: failed to queue packet to qdisc 76 | adv: too complex to say 77 | - name: dev_hard_start_xmit:0 78 | rules: 79 | - <<: *rule_msg 80 | msg: skb is successfully sent to the NIC driver 81 | - name: sched 82 | desc: TC(traffic control) module 83 | visual: true 84 | children: 85 | - tcf_classify:0 86 | - cls_bpf_classify:0 87 | - tcf_bpf_act:0 88 | - name: qdisc_dequeue:3 89 | tp: qdisc/qdisc_dequeue 90 | custom: true 91 | skboffset: 32 92 | analyzer: qdisc 93 | - &qdisc_enqueue 94 | name: qdisc_enqueue:2 95 | tp: qdisc/qdisc_enqueue 96 | custom: true 97 | skboffset: 24 98 | analyzer: qdisc 99 | - name: ipvlan 100 | desc: ipvlan network interface 101 | default: false 102 | visual: true 103 | children: 104 | - ipvlan_queue_xmit:0 105 | - ipvlan_handle_frame:0 106 | - ipvlan_rcv_frame:1 107 | - ipvlan_xmit_mode_l3:0 108 | - ipvlan_process_v4_outbound:0 109 | - name: bridge 110 | desc: bridge network interface 111 | default: false 112 | visual: true 113 | children: 114 | - name: br_nf_pre_routing:1 115 | analyzer: ret 116 | msg: ebtable in PREROUTING 117 | rules: *rules_nf 118 | - name: br_nf_forward_ip:1 119 | analyzer: ret 120 | msg: ebtable in FORWARD 121 | rules: *rules_nf 122 | - name: br_nf_forward_arp:1 123 | analyzer: ret 124 | msg: ebtable in FORWARD 125 | rules: *rules_nf 126 | - name: br_nf_post_routing:1 127 | analyzer: ret 128 | msg: ebtable in POST_ROUTING 129 | rules: *rules_nf 130 | - name: arp 131 | visual: true 132 | desc: arp protocol 133 | children: 134 | - arp_rcv:0 135 | - arp_process:2 136 | - name: bonding 137 | visual: true 138 | desc: bonding netdevice 139 | children: 140 | - bond_dev_queue_xmit:1 141 | - name: vxlan 142 | visual: true 143 | desc: vxlan model 144 | children: 145 | - __iptunnel_pull_header:0 146 | - vxlan_rcv/0:1 147 | - vxlan_xmit_one:0 148 | - name: vlan 149 | visual: true 150 | desc: vlan module 151 | children: 152 | - vlan_do_receive:0 153 | - vlan_dev_hard_start_xmit:0 154 | - name: ovs 155 | visual: true 156 | desc: openvswitch module 157 | children: 158 | - netdev_port_receive:0 159 | - ovs_vport_receive:1 160 | - ovs_dp_process_packet:0 161 | - name: packet 162 | desc: the process of skb of type PF_PACKET 163 | children: 164 | - name: pkt-in 165 | desc: the process of skb of type PF_PACKET 166 | visual: true 167 | children: 168 | - packet_rcv:0 169 | - tpacket_rcv:0 170 | - name: pkt-output 171 | desc: the process of skb of type PF_PACKET 172 | visual: true 173 | children: 174 | - packet_direct_xmit:0 175 | - name: netfilter 176 | desc: netfilter process(filter, nat, etc) 177 | children: 178 | - name: netfilter 179 | desc: base netfilter entry 180 | visual: true 181 | children: 182 | - names: 183 | - &ipt_do_table 184 | name: ipt_do_table 185 | - name: ipt_do_table_legacy 186 | cond: '[ $(verlte "$(uname -r)" "5.16") -eq -1 ]' # valid when kernel < 5.16 187 | backup: *ipt_do_table 188 | analyzer: iptable 189 | monitor: 1 190 | skb: 1 191 | custom: 192 | target: ipt_do_table 193 | rules: 194 | - exp: eq 0 195 | level: error 196 | msg: packet is dropped by iptables/iptables-legacy 197 | adv: check your iptables rule 198 | - exp: eq 1 199 | level: info 200 | msg: packet is accepted 201 | - name: nft_do_chain:0 202 | analyzer: iptable 203 | monitor: 1 204 | custom: 1 205 | rules: 206 | - exp: eq 0 207 | level: error 208 | msg: packet is dropped by iptables/iptables-nft 209 | adv: check your iptables rule 210 | - exp: eq 1 211 | level: info 212 | msg: packet is accepted 213 | - name: nf_nat_manip_pkt:0 214 | analyzer: ret 215 | rules: 216 | - exp: any 217 | msg: NAT happens (packet address will change) 218 | level: warn 219 | - name: nf_hook_slow:0 220 | monitor: 1 221 | analyzer: nf 222 | custom: 1 223 | rules: 224 | - exp: eq -1 225 | level: error 226 | msg: packet is dropped by netfilter (NF_DROP) 227 | adv: check your netfilter rule 228 | - name: conntrack 229 | desc: connection track (used by nat mostly) 230 | default: false 231 | visual: true 232 | children: 233 | - ipv4_confirm:1 234 | - nf_confirm:0 235 | - ipv4_conntrack_in:1 236 | - nf_conntrack_in:3 237 | - ipv4_pkt_to_tuple:0 238 | - tcp_new:1 239 | - tcp_pkt_to_tuple:0 240 | - resolve_normal_ct:2 241 | - tcp_packet:1 242 | - name: tcp_in_window:4 243 | default: true 244 | analyzer: ret 245 | monitor: 1 246 | arg_count: 7 247 | rules: 248 | - exp: eq 0 249 | level: warn 250 | msg: conntrack window check failed (packet out ordering) 251 | adv: | 252 | enable 'nf_conntrack_tcp_be_liberal' with the command 253 | 'echo 1 > /proc/sys/net/netfilter/nf_conntrack_tcp_be_liberal' 254 | - __nf_ct_refresh_acct:2 255 | - name: ip 256 | desc: ip protocol layer (L3) of the network stack 257 | children: 258 | - name: ip-in 259 | desc: ip layer of packet in 260 | visual: true 261 | children: 262 | - ip_rcv:0 263 | - ip_rcv_core:0 264 | - ip_rcv_finish:2 265 | - ip_local_deliver:0 266 | - ip_local_deliver_finish:2 267 | - ip_forward:0 268 | - ip_forward_finish:0 269 | - ip6_forward:0 270 | - ip6_rcv_finish:2 271 | - ip6_rcv_core:0 272 | - ipv6_rcv:0 273 | - name: ip-out 274 | desc: ip layer of packet out 275 | visual: true 276 | children: 277 | - name: __ip_queue_xmit:1/0 278 | - __ip_local_out:2/1 279 | - ip_output:2 280 | - ip_finish_output:2 281 | - ip_finish_output_gso:2 282 | - ip_finish_output2:2 283 | - ip6_output:2 284 | - ip6_finish_output:2 285 | - ip6_finish_output2:2 286 | - ip6_send_skb:0 287 | - ip6_local_out:2 288 | - name: xfrm 289 | desc: xfrm module 290 | visual: true 291 | children: 292 | - xfrm4_output:2 293 | - xfrm_output:1 294 | - xfrm_output2:2 295 | - xfrm_output_gso:2 296 | - xfrm_output_resume:1 297 | - xfrm4_transport_output:1 298 | - xfrm4_prepare_output:1 299 | - xfrm4_policy_check:2 300 | - xfrm4_rcv:0 301 | - xfrm_input:0 302 | - xfrm4_transport_input:1 303 | - name: esp 304 | desc: ip layer of packet out 305 | visual: true 306 | children: 307 | - ah_output:1 308 | - esp_output:1 309 | - esp_output_tail:1 310 | - ah_input:1 311 | - esp_input:1 312 | - name: ip-route 313 | desc: ip route for packet in and out 314 | visual: true 315 | children: 316 | - name: fib_validate_source:0 317 | monitor: 1 318 | rules: 319 | - <<: *rule_ret_err 320 | msg: source address valid failed (properly rp_filter fail) 321 | adv: | 322 | check you ip route config or disable rp_filter with command 323 | 'echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter' 324 | - name: ip_route_input_slow:0 325 | monitor: 1 326 | rules: 327 | - <<: *rule_ret_err 328 | msg: failed to route packet in input path 329 | adv: check packet address and your route 330 | - name: tcp 331 | desc: tcp protocol layer (L4) of the network stack 332 | children: 333 | - name: tcp-in 334 | desc: tcp layer of packet in 335 | visual: true 336 | children: 337 | - tcp_v4_rcv:0 338 | - tcp_v6_rcv:0 339 | - tcp_filter:1 340 | - tcp_child_process:2 341 | - name: tcp_v4_send_reset:1/0 342 | custom: 1 343 | analyzer: reset 344 | rules: 345 | - exp: any 346 | level: error 347 | msg: connection reset initiated by transport layer (TCP stack, skb) 348 | - name: tcp_v6_send_reset:1/0 349 | custom: 1 350 | analyzer: reset 351 | rules: 352 | - exp: any 353 | level: error 354 | msg: connection reset initiated by transport layer (TCP stack, skb) 355 | - tcp_v4_do_rcv:1 356 | - tcp_v6_do_rcv:1 357 | - tcp_rcv_established:1/0 358 | - &tcp_rcv_state_process 359 | name: tcp_rcv_state_process:1/0 360 | rules: 361 | - exp: any 362 | level: info 363 | msg: TCP socket state has changed 364 | - tcp_queue_rcv:1/0 365 | - tcp_data_queue_ofo:1/0 366 | - tcp_ack_probe/0 367 | - tcp_ack:1/0 368 | - tcp_probe_timer/0 369 | - name: tcp_send_probe0/0 370 | rules: 371 | - exp: any 372 | level: info 373 | msg: send zero-window probe packet 374 | - name: __inet_lookup_listener:2 375 | monitor: 1 376 | analyzer: ret 377 | rules: 378 | - exp: eq 0 379 | level: warn 380 | msg: tcp port is not listened 381 | adv: check your target tcp port 382 | - name: inet6_lookup_listener:2 383 | analyzer: ret 384 | monitor: 1 385 | rules: 386 | - exp: eq 0 387 | level: warn 388 | msg: tcp port is not listened 389 | adv: check your target tcp port 390 | - name: tcp_bad_csum:0 391 | tp: tcp/tcp_bad_csum 392 | skboffset: 8 393 | monitor: 2 394 | rules: 395 | - exp: any 396 | level: error 397 | msg: TCP packet has bad csum 398 | - name: tcp-out 399 | desc: tcp layer of packet out 400 | visiual: true 401 | children: 402 | - tcp_sendmsg_locked/0 403 | - name: tcp_skb_entail/0:1 404 | - name: skb_entail/0:1 405 | - __tcp_push_pending_frames/0 406 | - name: __tcp_transmit_skb:1/0 407 | rules: 408 | - exp: ne 0 409 | level: warn 410 | msg: failed to xmit skb to ip layer 411 | - name: __tcp_retransmit_skb/0:1 412 | - tcp_rate_skb_delivered/0:1 413 | - name: udp 414 | desc: udp protocol layer (L4) of the network stack 415 | children: 416 | - name: udp-in 417 | desc: udp layer of packet in 418 | visual: true 419 | children: 420 | - udp_rcv:0 421 | - udp_unicast_rcv_skb:1 422 | - udp_queue_rcv_skb:1 423 | - xfrm4_udp_encap_rcv:1 424 | - xfrm4_rcv_encap:0 425 | - __udp_queue_rcv_skb:1 426 | - __udp_enqueue_schedule_skb:1 427 | - name: icmp 428 | desc: icmp(ping) protocol layer (L4) of the network stack 429 | children: 430 | - name: icmp-in 431 | desc: icmp layer of packet in 432 | visual: true 433 | children: 434 | - icmp_rcv:0 435 | - icmp_echo:0 436 | - icmp_reply:1 437 | - icmpv6_rcv:0 438 | - icmpv6_echo_reply:0 439 | - ping_rcv:0 440 | - __ping_queue_rcv_skb:1 441 | - ping_queue_rcv_skb:1 442 | - name: ping_lookup:1 443 | analyzer: ret 444 | rules: 445 | - exp: eq 0 446 | level: warn 447 | msg: icmp socket is not founded 448 | adv: not support 449 | - name: socket 450 | desc: socket releated hooks 451 | visiual: true 452 | children: 453 | - name: tcp-state 454 | desc: TCP socket state releated hooks 455 | children: 456 | - name: inet_listen/0 457 | custom: true 458 | rules: 459 | - exp: any 460 | level: info 461 | msg: TCP socket begin to listen 462 | - tcp_v4_destroy_sock/0 463 | - name: tcp_close/0 464 | rules: 465 | - exp: any 466 | level: info 467 | msg: TCP socket is closed 468 | - *tcp_rcv_state_process 469 | - name: tcp_send_active_reset/0 470 | custom: 1 471 | analyzer: reset 472 | rules: 473 | - exp: any 474 | level: error 475 | msg: connection reset initiated by application (active close, sk) 476 | - name: tcp_ack_update_rtt/0 477 | custom: 1 478 | analyzer: rtt 479 | - name: tcp-congestion 480 | desc: TCP congestion control releated hooks 481 | children: 482 | - tcp_write_timer_handler/0 483 | - name: tcp_retransmit_timer/0 484 | monitor: 2 485 | rules: 486 | - exp: any 487 | level: warn 488 | msg: TCP retransmission timer out 489 | - name: tcp_enter_recovery/0 490 | rules: 491 | - exp: any 492 | level: warn 493 | msg: TCP enter conguestion recover 494 | - name: tcp_enter_loss/0 495 | rules: 496 | - exp: any 497 | level: warn 498 | msg: TCP enter conguestion loss 499 | - name: tcp_try_keep_open/0 500 | rules: 501 | - exp: any 502 | level: info 503 | msg: TCP enter conguestion open state 504 | - name: tcp_enter_cwr/0 505 | rules: 506 | - exp: any 507 | level: info 508 | msg: TCP enter conguestion CWR state 509 | - tcp_fastretrans_alert/0 510 | - tcp_rearm_rto/0 511 | - tcp_event_new_data_sent/0 512 | - tcp_schedule_loss_probe/0 513 | - name: tcp-retrans 514 | desc: TCP retransmission releated hooks 515 | children: 516 | - name: tcp_rtx_synack/0 517 | monitor: 2 518 | - name: tcp_retransmit_skb/0 519 | monitor: 2 520 | - name: tcp_rcv_spurious_retrans/0:1 521 | monitor: 2 522 | arg_count: 2 523 | - name: tcp_dsack_set/0 524 | monitor: 2 525 | arg_count: 3 526 | rules: 527 | - exp: any 528 | level: warn 529 | msg: spurious retransmission happened 530 | - name: life 531 | desc: skb clone and free 532 | visual: true 533 | children: 534 | - name: skb_clone 535 | skb: 0 536 | analyzer: clone 537 | rules: 538 | - exp: any 539 | level: info 540 | msg: packet is cloned 541 | - name: consume_skb:0 542 | tp: skb/consume_skb 543 | skboffset: 8 544 | analyzer: free 545 | rules: 546 | - exp: any 547 | level: info 548 | msg: packet is freed (normally) 549 | - name: kfree_skb:0 550 | tp: skb/kfree_skb 551 | skboffset: 8 552 | custom: 1 553 | analyzer: drop 554 | monitor: 1 555 | rules: 556 | - exp: any 557 | level: error 558 | msg: packet is dropped by kernel 559 | - name: __kfree_skb:0 560 | analyzer: free 561 | - name: kfree_skb_partial:0 562 | analyzer: free 563 | - name: skb_attempt_defer_free:0 564 | analyzer: free 565 | -------------------------------------------------------------------------------- /src/trace_probe.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "trace.h" 5 | #include "progs/kprobe.skel.h" 6 | #include "analysis.h" 7 | 8 | #define MAX_CPU_COUNT 1024 9 | 10 | const char *kprobe_type = "/sys/bus/event_source/devices/kprobe/type"; 11 | 12 | struct list_head cpus[MAX_CPU_COUNT]; 13 | trace_ops_t probe_ops; 14 | static struct kprobe *skel; 15 | 16 | static void probe_trace_attach_manual(char *prog_name, char *func, 17 | bool retprobe) 18 | { 19 | struct bpf_program *prog; 20 | bool legacy; 21 | int err; 22 | 23 | prog = bpf_pbn(skel->obj, prog_name); 24 | if (!prog) { 25 | pr_verb("failed to find prog %s\n", prog_name); 26 | return; 27 | } 28 | 29 | bpf_program__set_autoattach(prog, false); 30 | legacy = !file_exist(kprobe_type); 31 | 32 | again: 33 | if (!legacy) 34 | err = libbpf_get_error(bpf_program__attach_kprobe(prog, 35 | retprobe, func)); 36 | else 37 | err = compat_bpf_attach_kprobe(bpf_program__fd(prog), 38 | func, retprobe); 39 | 40 | if (err && !legacy) { 41 | pr_verb("retring to attach in legacy mode, prog=%s, func=%s\n", 42 | prog_name, func); 43 | legacy = true; 44 | goto again; 45 | } 46 | 47 | if (err) { 48 | pr_err("failed to manually attach program prog=%s, func=%s\n", 49 | prog_name, func); 50 | return; 51 | } 52 | 53 | pr_verb("manually attach prog %s success\n", prog_name); 54 | } 55 | 56 | static int probe_trace_attach() 57 | { 58 | bool auto_attach = false; 59 | char kret_name[128]; 60 | trace_t *trace; 61 | 62 | again: 63 | trace_for_each(trace) { 64 | if ((auto_attach && !(trace->status & TRACE_ATTACH_MANUAL)) || 65 | (!auto_attach && (trace->status & TRACE_ATTACH_MANUAL))) { 66 | probe_trace_attach_manual(trace->prog, trace->name, false); 67 | if (!trace_is_ret(trace)) 68 | continue; 69 | 70 | sprintf(kret_name, "ret%s", trace->prog); 71 | probe_trace_attach_manual(kret_name, trace->name, true); 72 | } 73 | } 74 | 75 | if (!auto_attach && kprobe__attach(skel)) { 76 | /* failed to auto attach, attach manually */ 77 | auto_attach = true; 78 | pr_warn("failed to auto attach kprobe, trying manual attach...\n"); 79 | goto again; 80 | } 81 | 82 | return 0; 83 | } 84 | 85 | /* In kprobe, we only enable the monitor for the traces with "any" rule */ 86 | static void probe_check_monitor() 87 | { 88 | trace_t *trace; 89 | 90 | if (trace_ctx.mode != TRACE_MODE_MONITOR) 91 | return; 92 | 93 | trace_for_each(trace) { 94 | if (!trace_is_func(trace) || trace_is_invalid(trace)) 95 | continue; 96 | 97 | /* kprobe don't support to monitor function exit */ 98 | if (trace->monitor == TRACE_MONITOR_EXIT) { 99 | pr_debug("disabled monitor_exit for kprobe\n"); 100 | trace_set_invalid_reason(trace, "monitor"); 101 | } 102 | } 103 | } 104 | 105 | static int probe_trace_load() 106 | { 107 | DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, 108 | .btf_custom_path = trace_ctx.args.btf_path, 109 | ); 110 | int i = 0; 111 | 112 | skel = kprobe__open_opts(&opts); 113 | if (!skel) { 114 | pr_err("failed to open kprobe-based eBPF\n"); 115 | goto err; 116 | } 117 | pr_debug("eBPF is opened successfully\n"); 118 | 119 | /* set the max entries of perf event map to current cpu count */ 120 | bpf_map__set_max_entries(skel->maps.m_event, get_nprocs_conf()); 121 | bpf_func_init(skel, BPF_PROG_TYPE_KPROBE); 122 | 123 | trace_ctx.obj = skel->obj; 124 | if (trace_pre_load() || kprobe__load(skel)) { 125 | pr_err("failed to load kprobe-based eBPF\n"); 126 | goto err; 127 | } 128 | pr_debug("eBPF is loaded successfully\n"); 129 | 130 | bpf_set_config(skel, bss, trace_ctx.bpf_args); 131 | 132 | for (; i < ARRAY_SIZE(cpus); i++) 133 | INIT_LIST_HEAD(&cpus[i]); 134 | 135 | return 0; 136 | err: 137 | return -1; 138 | } 139 | 140 | static bool is_trace_supported(trace_t *trace) 141 | { 142 | struct kprobe *tmp = kprobe__open(); 143 | struct bpf_program *prog; 144 | int err; 145 | 146 | bpf_object__for_each_program(prog, tmp->obj) { 147 | if (strcmp(trace->prog, bpf_program__name(prog)) != 0) 148 | bpf_program__set_autoload(prog, false); 149 | } 150 | err = kprobe__load(tmp); 151 | kprobe__destroy(tmp); 152 | 153 | if (err) 154 | pr_verb("kernel feature probe failed for trace: %s\n", 155 | trace->prog); 156 | else 157 | pr_debug("kernel feature probe success for trace: %s\n", 158 | trace->prog); 159 | 160 | return err == 0; 161 | } 162 | 163 | static void probe_trace_feat_probe() 164 | { 165 | trace_t *trace; 166 | 167 | trace_for_each(trace) { 168 | if (!trace->probe || !trace_is_usable(trace)) 169 | continue; 170 | if (!is_trace_supported(trace)) 171 | trace_set_invalid(trace); 172 | } 173 | } 174 | 175 | void probe_trace_close() 176 | { 177 | if (skel) 178 | kprobe__destroy(skel); 179 | skel = NULL; 180 | } 181 | 182 | static analyzer_result_t probe_analy_exit(trace_t *trace, analy_exit_t *e) 183 | { 184 | analy_entry_t *pos; 185 | int cpu = e->cpu; 186 | 187 | if (cpu > MAX_CPU_COUNT) { 188 | pr_err("cpu count is too big\n"); 189 | goto out; 190 | } 191 | 192 | if (list_empty(&cpus[cpu])) { 193 | pr_debug("no entry found for exit: %s on cpu %d (list empty)\n", 194 | trace->name, cpu); 195 | goto out; 196 | } 197 | 198 | list_for_each_entry(pos, &cpus[cpu], cpu_list) { 199 | if (pos->event->func == e->event.func) 200 | goto found; 201 | } 202 | pr_debug("no entry found for exit: %s on cpu %d; func: %d, " 203 | "last_func: %d\n", trace->name, cpu, e->event.func, 204 | pos->event->func); 205 | goto out; 206 | found: 207 | pos->status |= ANALY_ENTRY_RETURNED; 208 | pos->priv = e->event.val; 209 | list_del(&pos->cpu_list); 210 | put_fake_analy_ctx(pos->fake_ctx); 211 | e->entry = pos; 212 | pos->status &= ~ANALY_ENTRY_ONCPU; 213 | pr_debug("found exit for entry: %s(%x) on cpu %d with return " 214 | "value %llx, ctx:%llx:%u\n", trace->name, pos->event->key, cpu, 215 | e->event.val, PTR2X(pos->ctx), pos->ctx->refs); 216 | out: 217 | return RESULT_CONT; 218 | } 219 | 220 | static analyzer_result_t probe_analy_entry(trace_t *trace, analy_entry_t *e) 221 | { 222 | struct list_head *list; 223 | 224 | if (!trace_is_ret(trace)) { 225 | pr_debug("entry found for %s(%llx), ctx:%llx:%d\n", trace->name, 226 | (u64)e->event->key, PTR2X(e->ctx), 227 | e->ctx->refs); 228 | goto out; 229 | } 230 | list = &cpus[e->cpu]; 231 | list_add(&e->cpu_list, list); 232 | get_fake_analy_ctx(e->fake_ctx); 233 | pr_debug("mounted entry %s(%llx) on cpu %d, ctx:%llx:%d\n", trace->name, 234 | (u64)e->event->key, e->cpu, PTR2X(e->ctx), 235 | e->ctx->refs); 236 | e->status |= ANALY_ENTRY_ONCPU; 237 | 238 | out: 239 | return RESULT_CONT; 240 | } 241 | 242 | static void probe_trace_ready() 243 | { 244 | bpf_set_config_field(skel, bss, bpf_args_t, ready, true); 245 | } 246 | 247 | #ifdef __F_STACK_TRACE 248 | static void probe_print_stack(int key) 249 | { 250 | if (key <= 0) 251 | { 252 | pr_info("Call Stack Error! Invalid stack id:%d.\n", key); 253 | return; 254 | } 255 | 256 | int map_fd = bpf_map__fd(skel->maps.m_stack); 257 | __u64 ip[PERF_MAX_STACK_DEPTH] = {}; 258 | struct sym_result *sym; 259 | int i = 0; 260 | 261 | if (bpf_map_lookup_elem(map_fd, &key, ip)) { 262 | pr_info("Call Stack Error!\n"); 263 | return; 264 | } 265 | 266 | pr_info("Call Stack:\n"); 267 | for (; i < PERF_MAX_STACK_DEPTH && ip[i]; i++) { 268 | sym = sym_parse(ip[i]); 269 | if (!sym) 270 | break; 271 | pr_info(" -> %s\n", sym->desc); 272 | } 273 | pr_info("\n"); 274 | } 275 | #else 276 | static void probe_print_stack(int key) { } 277 | #endif 278 | 279 | static bool probe_trace_supported() 280 | { 281 | return true; 282 | } 283 | 284 | analyzer_t probe_analyzer = { 285 | .mode = TRACE_MODE_CTX_MASK | TRACE_MODE_TINY_MASK, 286 | .analy_entry = probe_analy_entry, 287 | .analy_exit = probe_analy_exit, 288 | }; 289 | 290 | trace_ops_t probe_ops = { 291 | .trace_attach = probe_trace_attach, 292 | .trace_load = probe_trace_load, 293 | .trace_close = probe_trace_close, 294 | .trace_ready = probe_trace_ready, 295 | .trace_feat_probe = probe_trace_feat_probe, 296 | .trace_supported = probe_trace_supported, 297 | .print_stack = probe_print_stack, 298 | .prepare_traces = probe_check_monitor, 299 | .analyzer = &probe_analyzer, 300 | }; 301 | -------------------------------------------------------------------------------- /src/trace_tracing.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "trace.h" 5 | #include "analysis.h" 6 | 7 | /* check whether trampoline is supported by current arch */ 8 | static bool tracing_arch_supported() 9 | { 10 | return simple_exec("cat /proc/kallsyms | " 11 | "grep arch_prepare_bpf_trampoline | " 12 | "grep T") == 0; 13 | } 14 | 15 | static bool tracing_trace_supported() 16 | { 17 | #ifdef NO_BTF 18 | goto failed; 19 | #endif 20 | 21 | /* for now, monitor mode only */ 22 | if (trace_ctx.mode != TRACE_MODE_MONITOR) 23 | goto failed; 24 | 25 | /* TRACING is not supported, skip this handle */ 26 | if (!libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_TRACING, NULL)) 27 | goto failed; 28 | 29 | if (!tracing_arch_supported()) { 30 | pr_warn("trampoline is not supported, skip TRACING\n"); 31 | goto failed; 32 | } 33 | 34 | return true; 35 | failed: 36 | pr_verb("TRACING is not supported, trying others\n"); 37 | return false; 38 | } 39 | 40 | #ifndef NO_BTF 41 | 42 | #include "progs/tracing.skel.h" 43 | #include "progs/feat_args_ext.skel.h" 44 | 45 | #define MAX_CPU_COUNT 1024 46 | 47 | trace_ops_t tracing_ops; 48 | 49 | static struct tracing *skel; 50 | 51 | static bool tracing_support_feat_args_ext() 52 | { 53 | struct feat_args_ext *tmp; 54 | int err; 55 | 56 | tmp = feat_args_ext__open_and_load(); 57 | if (tmp == NULL) 58 | return false; 59 | err = feat_args_ext__attach(tmp); 60 | feat_args_ext__destroy(tmp); 61 | return err == 0; 62 | } 63 | 64 | static void tracing_adjust_target() 65 | { 66 | struct bpf_program *prog; 67 | trace_t *trace; 68 | 69 | trace_for_each(trace) { 70 | if (!(trace->status & TRACE_ATTACH_MANUAL)) 71 | continue; 72 | 73 | prog = bpf_pbn(trace_ctx.obj, trace->prog); 74 | /* function name contain "." is not supported by BTF */ 75 | if (prog && strchr(trace->name, '.')) { 76 | trace_set_invalid_reason(trace, "BTF invalid"); 77 | bpf_program__set_autoload(prog, false); 78 | } 79 | 80 | #if 0 81 | tracing_trace_attach_manual(trace->prog, trace->name); 82 | if (!trace_is_ret(trace)) 83 | continue; 84 | 85 | sprintf(kret_name, "ret%s", trace->prog); 86 | tracing_trace_attach_manual(kret_name, trace->name); 87 | #endif 88 | } 89 | } 90 | 91 | static int tracing_trace_attach() 92 | { 93 | return tracing__attach(skel); 94 | } 95 | 96 | static void tracing_load_rules() 97 | { 98 | rule_t *local_rule; 99 | rules_ret_t *rule; 100 | trace_t *trace; 101 | int i; 102 | 103 | trace_for_each(trace) { 104 | if (trace_is_invalid(trace) || !trace_is_enable(trace) || 105 | !trace_is_ret(trace) || !trace_is_func(trace)) 106 | continue; 107 | 108 | rule = &skel->bss->rules_all[trace->index]; 109 | i = 0; 110 | list_for_each_entry(local_rule, &trace->rules, list) { 111 | if (local_rule->level == RULE_INFO) 112 | continue; 113 | rule->expected[i] = local_rule->expected; 114 | rule->op[i] = local_rule->type; 115 | i++; 116 | } 117 | } 118 | } 119 | 120 | static void tracing_check_args() 121 | { 122 | bool support_feat_args_ext, support_btf_modules; 123 | trace_t *trace; 124 | 125 | support_feat_args_ext = tracing_support_feat_args_ext(); 126 | if (!support_feat_args_ext) 127 | pr_warn("tracing kernel function with 6+ arguments is not" 128 | "supportd by your kernel, following functions " 129 | "are skipped:\n"); 130 | 131 | trace_for_each(trace) { 132 | if (trace_is_invalid(trace) || !trace_is_enable(trace) || 133 | !trace_is_func(trace)) 134 | continue; 135 | 136 | if (!support_feat_args_ext && trace->arg_count > 6) { 137 | pr_warn("\t%s\n", trace->name); 138 | trace_set_invalid(trace); 139 | } 140 | } 141 | 142 | support_btf_modules = kernel_has_config("DEBUG_INFO_BTF_MODULES"); 143 | if (!support_btf_modules) 144 | pr_warn("CONFIG_DEBUG_INFO_BTF_MODULES is not supported " 145 | "by your kernel, following functions are " 146 | "skipped:\n"); 147 | 148 | trace_for_each(trace) { 149 | if (trace_is_invalid(trace) || !trace_is_enable(trace) || 150 | !trace_is_func(trace)) 151 | continue; 152 | 153 | if (!support_btf_modules && !btf_get_type(trace->name)) { 154 | pr_warn("\t%s\n", trace->name); 155 | trace_set_invalid(trace); 156 | } 157 | } 158 | } 159 | 160 | static int tracing_trace_load() 161 | { 162 | DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, 163 | .btf_custom_path = trace_ctx.args.btf_path, 164 | ); 165 | 166 | skel = tracing__open_opts(&opts); 167 | if (!skel) { 168 | pr_err("failed to open tracing-based eBPF\n"); 169 | goto err; 170 | } 171 | pr_debug("eBPF is opened successfully\n"); 172 | 173 | /* set the max entries of perf event map to current cpu count */ 174 | bpf_map__set_max_entries(skel->maps.m_event, get_nprocs_conf()); 175 | bpf_func_init(skel, BPF_PROG_TYPE_TRACING); 176 | 177 | trace_ctx.obj = skel->obj; 178 | tracing_load_rules(); 179 | tracing_check_args(); 180 | 181 | if (trace_pre_load()) { 182 | pr_err("failed to prepare load\n"); 183 | goto err; 184 | } 185 | 186 | tracing_adjust_target(); 187 | if (tracing__load(skel)) { 188 | pr_err("failed to load tracing-based eBPF\n"); 189 | goto err; 190 | } 191 | pr_debug("eBPF is loaded successfully\n"); 192 | 193 | bpf_set_config(skel, bss, trace_ctx.bpf_args); 194 | 195 | return 0; 196 | err: 197 | return -1; 198 | } 199 | 200 | void tracing_trace_close() 201 | { 202 | if (skel) 203 | tracing__destroy(skel); 204 | skel = NULL; 205 | } 206 | 207 | static analyzer_result_t 208 | tracing_analy_exit(trace_t *trace, analy_exit_t *e) 209 | { 210 | return RESULT_CONT; 211 | } 212 | 213 | static analyzer_result_t 214 | tracing_analy_entry(trace_t *trace, analy_entry_t *e) 215 | { 216 | return RESULT_CONT; 217 | } 218 | 219 | static void tracing_trace_ready() 220 | { 221 | bpf_set_config_field(skel, bss, bpf_args_t, ready, true); 222 | } 223 | 224 | static void tracing_print_stack(int key) 225 | { 226 | if (key <= 0) 227 | { 228 | pr_info("Call Stack Error! Invalid stack id:%d.\n", key); 229 | return; 230 | } 231 | 232 | int map_fd = bpf_map__fd(skel->maps.m_stack); 233 | __u64 ip[PERF_MAX_STACK_DEPTH] = {}; 234 | struct sym_result *sym; 235 | int i = 0; 236 | 237 | if (bpf_map_lookup_elem(map_fd, &key, ip)) { 238 | pr_info("Call Stack Error!\n"); 239 | return; 240 | } 241 | 242 | pr_info("Call Stack:\n"); 243 | for (; i < PERF_MAX_STACK_DEPTH && ip[i]; i++) { 244 | sym = sym_parse(ip[i]); 245 | if (!sym) 246 | break; 247 | pr_info(" -> [%llx]%s\n", ip[i], sym->desc); 248 | } 249 | pr_info("\n"); 250 | } 251 | 252 | analyzer_t tracing_analyzer = { 253 | .mode = TRACE_MODE_CTX_MASK, 254 | .analy_entry = tracing_analy_entry, 255 | .analy_exit = tracing_analy_exit, 256 | }; 257 | 258 | trace_ops_t tracing_ops = { 259 | .trace_attach = tracing_trace_attach, 260 | .trace_load = tracing_trace_load, 261 | .trace_close = tracing_trace_close, 262 | .trace_ready = tracing_trace_ready, 263 | .trace_supported = tracing_trace_supported, 264 | .print_stack = tracing_print_stack, 265 | .analyzer = &tracing_analyzer, 266 | }; 267 | 268 | #else 269 | trace_ops_t tracing_ops = { 270 | .trace_supported = tracing_trace_supported, 271 | }; 272 | #endif 273 | -------------------------------------------------------------------------------- /src/vmlinux_header.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MulanPSL-2.0 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #define bpf_tail_call dummy____bpf_tail_call 21 | #define bpf_get_stackid dummy____bpf_get_stackid 22 | #include 23 | #undef bpf_tail_call 24 | #undef bpf_get_stackid 25 | 26 | #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) 27 | #include 28 | #else 29 | #define NT_DISABLE_NFT 30 | #endif 31 | --------------------------------------------------------------------------------