├── kprobe_sys_execve ├── readme.md ├── .vscode │ ├── settings.json │ └── c_cpp_properties.json ├── execve_data.h ├── clang-8.exe.stackdump ├── kp_execve.user.purec.c ├── Makefile └── kp_execve.kern.c ├── tcp_accelerate ├── client.sh ├── server.sh ├── unload.sh ├── load.sh ├── tcp_accelerate_bypass.kern.c ├── tcp_accelerate.h ├── tcp_accelerate_sockops.kern.c └── Makefile ├── ebpf-kill-example ├── test │ ├── loop.sh │ └── test.sh ├── .gitmodules ├── img │ └── example.png ├── .vscode │ ├── dryrun.log │ ├── configurationCache.log │ └── settings.json ├── Makefile ├── .gitignore ├── src │ ├── Makefile │ ├── user.c │ └── kern.c ├── docs │ └── what-is-ebpf.md ├── README.md └── LICENSE ├── ebpf_helloworld ├── load_helloworld.c ├── Makefile └── kern_helloworld.c ├── tp_execve └── clang │ ├── clang-8.exe.stackdump │ ├── event_help.h │ ├── bpf_help.h │ ├── event.h │ ├── bpf_help.c │ ├── event_help.c │ ├── tp_execve.user.purec.c │ ├── Makefile │ ├── tp_execve.user.btf.c │ └── tp_execve.kern.c ├── rmap_walk_file_snoop.bt ├── .vscode ├── settings.json └── c_cpp_properties.json ├── dcsnoop.bt ├── .clang-format ├── skbtracer.c └── README.md /kprobe_sys_execve/readme.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tcp_accelerate/client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | nc localhost 1000 -------------------------------------------------------------------------------- /tcp_accelerate/server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo socat TCP4-LISTEN:1000,fork exec:cat -------------------------------------------------------------------------------- /ebpf-kill-example/test/loop.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | while : 3 | do 4 | sleep 1 5 | done 6 | -------------------------------------------------------------------------------- /ebpf-kill-example/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libbpf"] 2 | path = libbpf 3 | url = https://github.com/libbpf/libbpf 4 | -------------------------------------------------------------------------------- /ebpf-kill-example/img/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubo0067/ebpf_space/HEAD/ebpf-kill-example/img/example.png -------------------------------------------------------------------------------- /ebpf-kill-example/.vscode/dryrun.log: -------------------------------------------------------------------------------- 1 | make.exe --dry-run --keep-going --print-directory 2 | '"make.exe"' �����ڲ����ⲿ���Ҳ���ǿ����еij��� 3 | ���������ļ��� 4 | 5 | -------------------------------------------------------------------------------- /ebpf-kill-example/.vscode/configurationCache.log: -------------------------------------------------------------------------------- 1 | {"buildTargets":[],"launchTargets":[],"customConfigurationProvider":{"workspaceBrowse":{"browsePath":[]},"fileIndex":[]}} -------------------------------------------------------------------------------- /ebpf-kill-example/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "makefile.extensionOutputFolder": "./.vscode", 3 | "C_Cpp.default.configurationProvider": "ms-vscode.makefile-tools" 4 | } -------------------------------------------------------------------------------- /tcp_accelerate/unload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | bpftool prog detach pinned /sys/fs/bpf/bpf_tcpip_bypass msg_verdict pinned /sys/fs/bpf/sock_ops_map 5 | 6 | rm /sys/fs/bpf/bpf_tcpip_bypass 7 | 8 | bpftool cgroup detach /sys/fs/cgroup/ sock_ops pinned /sys/fs/bpf/bpf_sockops 9 | 10 | rm /sys/fs/bpf/bpf_sockops 11 | 12 | rm /sys/fs/bpf/sock_ops_map -------------------------------------------------------------------------------- /kprobe_sys_execve/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "bpf.h": "c", 4 | "bpf_helpers.h": "c", 5 | "sched.h": "c", 6 | "bpf_tracing.h": "c", 7 | "trace_common.h": "c", 8 | "stringify.h": "c", 9 | "libbpf.h": "c", 10 | "types.h": "c", 11 | "ptrace.h": "c", 12 | "typeinfo": "c", 13 | "execve_data.h": "c" 14 | } 15 | } -------------------------------------------------------------------------------- /kprobe_sys_execve/execve_data.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-24 15:16:05 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-08-24 16:42:15 6 | */ 7 | 8 | #ifndef __EXECVE_DATA_H_ 9 | #define __EXECVE_DATA_H_ 10 | 11 | #include 12 | 13 | struct data_t 14 | { 15 | pid_t pid; 16 | pid_t tid; 17 | uid_t uid; 18 | __s32 ret; 19 | char comm[ TASK_COMM_LEN ]; 20 | char filename[ 32 ]; 21 | }; 22 | 23 | #endif // __EXECVE_DATA_H_ -------------------------------------------------------------------------------- /ebpf-kill-example/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean build deps test libbpf 2 | 3 | deps: 4 | sudo apt update 5 | sudo apt install -y build-essential git make gcc clang llvm libelf-dev 6 | git submodule update --init 7 | 8 | libbpf: 9 | $(MAKE) --directory=libbpf/src all 10 | DESTDIR=root $(MAKE) --directory=libbpf/src install_headers 11 | 12 | build: libbpf 13 | $(MAKE) --directory=src 14 | 15 | clean: 16 | $(MAKE) --directory=src clean 17 | $(MAKE) --directory=libbpf/src clean 18 | 19 | test: 20 | ./test/test.sh 21 | 22 | .DEFAULT_GOAL := build 23 | -------------------------------------------------------------------------------- /ebpf_helloworld/load_helloworld.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: calmwu 3 | * @Date: 2021-02-11 17:19:51 4 | * @Last Modified by: calmwu 5 | * @Last Modified time: 2021-02-11 20:51:42 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | int main(int argc, char **argv) { 14 | if(load_bpf_file("kern_helloworld.o") != 0) { 15 | printf("The kernel didn't load the BPF program\n"); 16 | return -1; 17 | } 18 | 19 | read_trace_pipe(); 20 | 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /tp_execve/clang/clang-8.exe.stackdump: -------------------------------------------------------------------------------- 1 | Stack trace: 2 | Frame Function Args 3 | 000FFFFAB88 0018006332E (0018028BBD8, 0018026AFEE, 000FFFFAB88, 000FFFF9A90) 4 | 000FFFFAB88 0018004977A (00000000000, 00000000000, 00000000000, 0018028BC90) 5 | 000FFFFAB88 001800497B2 (0018028BCD8, 000FFFFAA48, 000FFFFAB88, 00000000000) 6 | 000FFFFAB88 001800C9BF2 (00000000000, 00000000000, 00000000000, 00000000000) 7 | 000FFFFAB88 001800C9D90 (000FFFFABD0, 00000000000, 00000000000, 00000000000) 8 | 0038724D560 001800CB5A5 (000FFFFABD0, 00000000000, 00000000000, 00000000000) 9 | End of stack trace 10 | -------------------------------------------------------------------------------- /tp_execve/clang/event_help.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-09-01 14:31:28 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 15:00:59 6 | */ 7 | 8 | #ifndef __EVENT_HELP_H__ 9 | #define __EVENT_HELP_H__ 10 | 11 | #include 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif // 16 | 17 | extern struct timespec start_time; 18 | 19 | void handle_event( void* ctx, int32_t cpu, void* data, uint32_t size ); 20 | 21 | void handle_lost_event( void* ctx, int32_t cpu, uint64_t lost_cnt ); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif // 26 | 27 | #endif // __EVENT_HELP_H__ -------------------------------------------------------------------------------- /rmap_walk_file_snoop.bt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bpftrace 2 | 3 | #include 4 | #include 5 | 6 | //struct page { 7 | // unsigned long flags; 8 | // // [...] 9 | //}; 10 | 11 | BEGIN 12 | { 13 | printf("Tracing rmap_walk_file lookups... Hit Ctrl-C to end.\n"); 14 | printf("%-8s %-16s %1s %-8s %-8s %-8s %-8s\n", "TIME", "COMM", "T", "flags", "mapcount", "page_type", "STACK"); 15 | } 16 | 17 | kprobe:rmap_walk_file 18 | { 19 | time("%H:%M:%S "); 20 | if(((struct page *)arg0)->_mapcount.counter > 0) { 21 | printf("%-16s R %lu %d %u %s\n", comm, ((struct page *)arg0)->flags, ((struct page *)arg0)->_mapcount.counter, 0, kstack); 22 | } else { 23 | printf("%-16s R %lu %d %u %s\n", comm, ((struct page *)arg0)->flags, -1, ((struct page *)arg0)->page_type, kstack); 24 | } 25 | } -------------------------------------------------------------------------------- /ebpf-kill-example/.gitignore: -------------------------------------------------------------------------------- 1 | *.save 2 | 3 | *.ll 4 | 5 | # Prerequisites 6 | *.d 7 | 8 | # Object files 9 | *.o 10 | *.ko 11 | *.obj 12 | *.elf 13 | 14 | # Linker output 15 | *.ilk 16 | *.map 17 | *.exp 18 | 19 | # Precompiled Headers 20 | *.gch 21 | *.pch 22 | 23 | # Libraries 24 | *.lib 25 | *.a 26 | *.la 27 | *.lo 28 | 29 | # Shared objects (inc. Windows DLLs) 30 | *.dll 31 | *.so 32 | *.so.* 33 | *.dylib 34 | 35 | # Executables 36 | *.exe 37 | *.out 38 | *.app 39 | *.i*86 40 | *.x86_64 41 | *.hex 42 | 43 | # Debug files 44 | *.dSYM/ 45 | *.su 46 | *.idb 47 | *.pdb 48 | 49 | # Kernel Module Compile Results 50 | *.mod* 51 | *.cmd 52 | .tmp_versions/ 53 | modules.order 54 | Module.symvers 55 | Mkfile.old 56 | dkms.conf 57 | 58 | # Kernel src files 59 | /kernel-src 60 | /lib64 61 | /include 62 | 63 | # Executable 64 | /src/ebpf-kill-example 65 | -------------------------------------------------------------------------------- /ebpf-kill-example/src/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := ebpf-kill-example 2 | 3 | SRC_DIR = $(shell realpath .) 4 | LIBBPF_DIR = $(SRC_DIR)/../libbpf/src/ 5 | CFLAGS ?= -I$(LIBBPF_DIR)/root/usr/include/ 6 | LDFLAGS ?= -L$(LIBBPF_DIR) 7 | 8 | LIBS = -lbpf -lelf 9 | 10 | all: $(TARGET) kern.o 11 | 12 | .PHONY: clean 13 | 14 | clean: 15 | rm -f $(TARGET) 16 | rm -f kern.o 17 | rm -f kern.ll 18 | 19 | $(TARGET): %: user.c Makefile 20 | #gcc $(CFLAGS) $(LDFLAGS) -o $(TARGET) user.c -Wl,-rpath=$(LIBBPF_DIR) $(LIBS) 21 | #gcc -Wall -Werror -g3 -ggdb3 -o ebpf-kill-example user.c -Wl,-Bstatic -lbpf -Wl,-Bdynamic -lgcc_s -lz -lelf 22 | gcc -Wall -Werror -g3 -ggdb3 -o ebpf-kill-example user.c -Wl,-rpath=/usr/local/lib64 -lbpf -lelf 23 | 24 | 25 | kern.o: kern.c 26 | clang -S \ 27 | -D __BPF_TRACING__ \ 28 | -Wall \ 29 | -Werror \ 30 | -O2 -emit-llvm -c -g kern.c 31 | llc -march=bpf -filetype=obj -o kern.o kern.ll 32 | -------------------------------------------------------------------------------- /ebpf-kill-example/test/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | RED='\033[0;31m' 4 | GREEN='\033[0;32m' 5 | NC='\033[0m' # No Color 6 | 7 | SCRIPT=$(readlink -f "$0") 8 | SCRIPT_PATH=$(dirname "$SCRIPT") 9 | 10 | # stop on error 11 | set -e 12 | 13 | # request sudo in advance 14 | [ "$UID" -eq 0 ] || exec sudo bash "$0" "$@" 15 | 16 | echo "-- Loading eBPF program." 17 | sudo ./src/ebpf-kill-example > /tmp/ebpf-kill.log & 18 | 19 | sleep 5 20 | 21 | echo "-- Starting test process to kill." 22 | $SCRIPT_PATH/loop.sh & 23 | echo "-- PID of test process is $!." 24 | 25 | disown 26 | kill -9 $! 27 | 28 | echo "-- Killed. Waiting for eBPF program to terminate .." 29 | sleep 30 30 | 31 | if (grep "$!" /tmp/ebpf-kill.log > /dev/null) 32 | then 33 | printf "${GREEN}[ OK ]${NC} -- eBPF program ran as expected.\n" 34 | exit 0 35 | else 36 | printf "${RED}[ FAIL ]${NC} -- eBPF program did not run as expected.\n" 37 | exit 1 38 | fi 39 | -------------------------------------------------------------------------------- /tp_execve/clang/bpf_help.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-31 11:47:46 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 11:55:31 6 | */ 7 | 8 | #ifndef __BPF_HELP_H__ 9 | #define __BPF_HELP_H__ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #ifdef NSEC_PER_SEC 22 | # undef NSEC_PER_SEC 23 | #endif 24 | #define NSEC_PER_SEC 1000000000ULL 25 | 26 | struct env_t { 27 | bool time; 28 | bool timestamp; 29 | bool fails; 30 | uid_t uid; 31 | bool quote; 32 | const char* name; 33 | const char* line; 34 | bool print_uid; 35 | bool verbose; 36 | int32_t max_args; 37 | }; 38 | 39 | extern struct env_t g_env; 40 | 41 | int bump_memlock_rlimit( void ); 42 | 43 | int bpf_printf( enum libbpf_print_level level, const char* fmt, va_list args ); 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | 49 | #endif // __BPF_HELP_H__ -------------------------------------------------------------------------------- /tp_execve/clang/event.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-27 11:58:48 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 19:08:10 6 | */ 7 | 8 | #ifndef __TP_EXECVE_EVENT_H__ 9 | #define __TP_EXECVE_EVENT_H__ 10 | 11 | #include 12 | 13 | #define TASK_COMM_LEN 16 14 | #define ARGSIZE 128 15 | //#define TOTAL_MAX_ARGS 60 16 | #define DEFAULT_MAXARGS 20 17 | #define FULL_MAX_ARGS_ARR ( DEFAULT_MAXARGS * ARGSIZE ) 18 | #define LAST_ARG (FULL_MAX_ARGS_ARR - ARGSIZE) 19 | #define BASE_EVENT_SIZE (size_t)(&((struct event_t*)0)->args) 20 | #define EVENT_SIZE(e) (BASE_EVENT_SIZE + e->args_size) 21 | 22 | 23 | #ifdef INVALID_UID 24 | # undef INVALID_UID 25 | #endif 26 | #define INVALID_UID ( ( uid_t ) -1 ) 27 | 28 | struct event_t { 29 | pid_t pid; 30 | pid_t ppid; 31 | uid_t uid; 32 | __s32 retval; 33 | __s32 args_count; 34 | __u32 args_size; 35 | char comm[ TASK_COMM_LEN ]; 36 | char args[ FULL_MAX_ARGS_ARR ]; // 所有的args都是写入一个数组中 37 | }; 38 | 39 | #endif // __TP_EXECVE_EVENT_H__ -------------------------------------------------------------------------------- /ebpf-kill-example/docs/what-is-ebpf.md: -------------------------------------------------------------------------------- 1 | # What is eBPF? 2 | 3 | ## Introduction 4 | eBPF or Extended Berkeley Packet Filter is a Linux system allowing developers 5 | to run kernel-space programs from user-space [1]. Programs are compiled to eBPF 6 | byte-code and are run in a VM within the kernel. BPF is a highly advanced VM, 7 | running instructions in a fully isolated environment. It is comparable to the 8 | Java Virtual Machine. eBPF can be used for performance tracing, but also as a 9 | middleware for various different scenarios including security. 10 | 11 | After the program is compiled, eBPF verifies that the program is safe to run. 12 | This prevents the kernel from loading programs that that might compromise the 13 | system by crashing the kernel. 14 | 15 | eBPF does not require restart when loading modules, but can load and unload 16 | on demand. 17 | 18 | ## References 19 | [1] Calavera, D., Fontana, L., & Frazelle, J. (2020). Linux observability with 20 | BPF: Advanced programming for performance analysis and networking. Sebastopol, 21 | CA: O'Reilly Media. 22 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "libbpf.h": "c", 4 | "bpf.h": "c", 5 | "types.h": "c", 6 | "bpf_helpers.h": "c", 7 | "stdint.h": "c", 8 | "ptrace.h": "c", 9 | "bpf_helper_defs.h": "c", 10 | "*.tbl": "c", 11 | "tp_execve.skel.h": "c", 12 | "time.h": "c", 13 | "event.h": "c", 14 | "bpf_help.h": "c", 15 | "bpf_core_read.h": "c", 16 | "ios": "c", 17 | "__tree": "c", 18 | "map": "c", 19 | "filesystem": "c", 20 | "__node_handle": "c", 21 | "execve_data.h": "c", 22 | "*.def": "c", 23 | "strstream": "c", 24 | "cerrno": "c", 25 | "cstdarg": "c", 26 | "iostream": "c", 27 | "streambuf": "c", 28 | "*.tcc": "c", 29 | "workqueue.h": "c", 30 | "timer.h": "c", 31 | "ktime.h": "c", 32 | "jiffies.h": "c", 33 | "trace_common.h": "c", 34 | "tcp_accerlerate.h": "c", 35 | "tcp_accelerate.h": "c", 36 | "bpf_endian.h": "c" 37 | } 38 | } -------------------------------------------------------------------------------- /kprobe_sys_execve/.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "GCC", 5 | "includePath": [ 6 | "${workspaceFolder}/**", 7 | "C:\\cygwin64\\usr\\include", 8 | "E:/develope/linux/linux-5.12.9/include", 9 | "E:/develope/linux/linux-5.12.9/include/uapi", 10 | "E:/develope/linux/linux-5.12.9/arch/x86/include/uapi", 11 | "E:/develope/linux/linux-5.12.9/arch/alpha/include", 12 | "E:/develope/linux/linux-5.12.9/tools/lib", 13 | "E:/develope/linux/linux-5.12.9/arch/x86/include" 14 | ], 15 | "defines": [ 16 | "_DEBUG", 17 | "UNICODE", 18 | "_UNICODE" 19 | ], 20 | "windowsSdkVersion": "10.0.17763.0", 21 | "compilerPath": "C:/cygwin64/bin/gcc.exe", 22 | "cStandard": "c11", 23 | "cppStandard": "c++17", 24 | "intelliSenseMode": "gcc-x64" 25 | } 26 | ], 27 | "version": 4 28 | } -------------------------------------------------------------------------------- /tp_execve/clang/bpf_help.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-31 11:48:54 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 12:12:10 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "bpf_help.h" 18 | 19 | struct env_t g_env = { .quote = true, .time = true, .print_uid = true, .timestamp = true, .verbose = true }; 20 | 21 | int bump_memlock_rlimit( void ) { 22 | struct rlimit rlim_new = { 23 | .rlim_cur = RLIM_INFINITY, 24 | .rlim_max = RLIM_INFINITY, 25 | }; 26 | 27 | return setrlimit( RLIMIT_MEMLOCK, &rlim_new ); 28 | } 29 | 30 | int bpf_printf( enum libbpf_print_level level, const char* fmt, va_list args ) { 31 | // if ( level == LIBBPF_DEBUG && !g_env.verbose ) { 32 | // return 0; 33 | // } 34 | char out_fmt[128] = {0}; 35 | sprintf(out_fmt, "level:{%d} %s", level, fmt); 36 | // vfprintf适合参数可变列表传递 37 | return vfprintf( stderr, out_fmt, args ); 38 | } -------------------------------------------------------------------------------- /tcp_accelerate/load.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | mount -t bpf bpf /sys/fs/bpf/ 5 | 6 | bpftool prog load /home/calmwu/program/ebpf_space/tcp_accelerate/tcp_accelerate_sockops.kern.o "/sys/fs/bpf/bpf_sockops" 7 | 8 | # This attaches the loaded SOCK_OPS program to the cgroup 9 | # This is attached to cgroup so that the program applies to all sockets of all tasks placed in the cgroup 10 | bpftool cgroup attach "/sys/fs/cgroup/" sock_ops pinned "/sys/fs/bpf/bpf_sockops" 11 | 12 | MAP_ID=$(bpftool prog show pinned "/sys/fs/bpf/bpf_sockops" | grep -o -E 'map_ids [0-9]+'|cut -d' ' -f2) 13 | sudo bpftool map pin id $MAP_ID "/sys/fs/bpf/sock_ops_map" 14 | 15 | # 将程序加载到内核 16 | # 将程序 pin 到 BPF 文件系统的 /sys/fs/bpf/bpf_tcpip_bypass 位置 17 | # 重用已有的 sockmap,指定了 sockmap 的名字为 sock_ops_map 并且文件路径为 /sys/fs/bpf/sock_ops_map 18 | bpftool prog load /home/calmwu/program/ebpf_space/tcp_accelerate/tcp_accelerate_bypass.kern.o "/sys/fs/bpf/bpf_tcpip_bypass" map name sock_ops_map pinned "/sys/fs/bpf/sock_ops_map" 19 | 20 | bpftool prog attach pinned "/sys/fs/bpf/bpf_tcpip_bypass" msg_verdict pinned "/sys/fs/bpf/sock_ops_map" 21 | 22 | -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "GCC", 5 | "includePath": [ 6 | "${workspaceFolder}/**", 7 | "C:/cygwin64/usr/include", 8 | "E:/develope/linux/linux-5.12.9/include", 9 | "E:/develope/linux/linux-5.12.9/include/uapi", 10 | "E:/develope/linux/linux-5.12.9/arch/x86/include/uapi", 11 | "E:/develope/linux/linux-5.12.9/arch/alpha/include", 12 | "E:/develope/linux/linux-5.12.9/tools/lib", 13 | "E:/develope/linux/linux-5.12.9/arch/x86/include", 14 | "E:/develope/linux/linux-5.12.9/samples/bpf" 15 | ], 16 | "defines": [ 17 | "_DEBUG", 18 | "UNICODE", 19 | "_UNICODE" 20 | ], 21 | "windowsSdkVersion": "10.0.17763.0", 22 | "compilerPath": "C:/cygwin64/bin/gcc.exe", 23 | "cStandard": "c11", 24 | "cppStandard": "c++17", 25 | "intelliSenseMode": "gcc-x64" 26 | } 27 | ], 28 | "version": 4 29 | } -------------------------------------------------------------------------------- /tcp_accelerate/tcp_accelerate_bypass.kern.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-09-22 17:06:55 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-22 17:51:19 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | //#include 这个需要BTF支持 14 | #include 15 | #include 16 | #include 17 | 18 | #include "tcp_accelerate.h" 19 | 20 | static inline void sk_msg_extractv4_key( struct sk_msg_md* msg, struct sock_key* key ) { 21 | key->sip4 = msg->remote_ip4; 22 | key->dip4 = msg->local_ip4; 23 | key->family = 1; 24 | 25 | key->dport = ( bpf_htonl( msg->local_port ) >> 16 ); 26 | key->sport = FORCE_READ( msg->remote_port ) >> 16; 27 | } 28 | 29 | /* 30 | The SK_MSG program 在调用sendmsg时被执行 31 | */ 32 | SEC( "sk_msg" ) 33 | __s32 bpf_tcpip_bypass( struct sk_msg_md* msg ) { 34 | struct sock_key key = {}; 35 | // 从socket metadata中提取key 36 | sk_msg_extractv4_key( msg, &key ); 37 | // 调用bpf_msg_redirect_hash寻找对应的socket,并根据flag,将数据重定向到socket的某个queue 38 | bpf_msg_redirect_hash( msg, &sock_ops_map, &key, BPF_F_INGRESS ); 39 | return SK_PASS; 40 | } 41 | 42 | char _license[] SEC( "license" ) = "GPL"; 43 | __u32 _version SEC( "version" ) = LINUX_VERSION_CODE; 44 | -------------------------------------------------------------------------------- /ebpf-kill-example/src/user.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | int main(int argc, char **argv) { 12 | char path[PATH_MAX]; 13 | sprintf(path, "%s/kern.o", dirname(argv[0])); 14 | 15 | int prog_fd; 16 | int ret; 17 | struct bpf_object *obj; 18 | 19 | if ((ret = bpf_prog_load(path, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd)) != 0) { 20 | printf("The kernel didn't load the BPF program, ret: %d, reason: %s\n", ret, strerror(ret)); 21 | return -1; 22 | } 23 | 24 | if (prog_fd < 1) { 25 | printf("Error creating prog_fd\n"); 26 | return -2; 27 | } 28 | 29 | struct bpf_program *prog = 30 | bpf_object__find_program_by_name(obj, "ebpf_kill_example"); 31 | bpf_program__attach(prog); 32 | 33 | printf("eBPF will listen to force kills for the next 30 seconds!\n"); 34 | sleep(30); 35 | 36 | struct bpf_map *kill_map = bpf_object__find_map_by_name(obj, "kill_map"); 37 | int kill_map_fd = bpf_map__fd(kill_map); 38 | long key = -1, prev_key; 39 | 40 | // Iterate over all keys in the map 41 | while (bpf_map_get_next_key(kill_map_fd, &prev_key, &key) == 0) { 42 | printf("%ld was forcefully killed!\n", key); 43 | prev_key = key; 44 | } 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /dcsnoop.bt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bpftrace 2 | /* 3 | * dcsnoop Trace directory entry cache (dcache) lookups. 4 | * For Linux, uses bpftrace and eBPF. 5 | * 6 | * This uses kernel dynamic tracing of kernel functions, lookup_fast() and 7 | * d_lookup(), which will need to be modified to match kernel changes. See 8 | * code comments. 9 | * 10 | * USAGE: dcsnoop.bt 11 | * 12 | * Copyright 2018 Netflix, Inc. 13 | * Licensed under the Apache License, Version 2.0 (the "License") 14 | * 15 | * 08-Sep-2018 Brendan Gregg Created this. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | // from fs/namei.c: 22 | struct nameidata { 23 | struct path path; 24 | struct qstr last; 25 | // [...] 26 | }; 27 | 28 | BEGIN 29 | { 30 | printf("Tracing dcache lookups... Hit Ctrl-C to end.\n"); 31 | printf("%-8s %-6s %-16s %1s %s\n", "TIME", "PID", "COMM", "T", "FILE"); 32 | } 33 | 34 | // comment out this block to avoid showing hits: 35 | kprobe:lookup_fast, 36 | kprobe:lookup_fast.constprop.* 37 | { 38 | $nd = (struct nameidata *)arg0; 39 | printf("%-8d %-6d %-16s R %s\n", elapsed / 100000, pid, comm, 40 | str($nd->last.name)); 41 | } 42 | 43 | kprobe:d_lookup 44 | { 45 | $name = (struct qstr *)arg1; 46 | @fname[tid] = $name->name; 47 | } 48 | 49 | kretprobe:d_lookup 50 | /@fname[tid]/ 51 | { 52 | printf("%-8d %-6d %-16s M %s\n", elapsed / 100000, pid, comm, 53 | str(@fname[tid])); 54 | delete(@fname[tid]); 55 | } -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | #https://blog.csdn.net/softimite_zifeng/article/details/78357898 2 | Language: Cpp 3 | BasedOnStyle: WebKit 4 | AccessModifierOffset: -4 5 | Standard: Cpp11 6 | TabWidth: 4 7 | UseTab: ForIndentation 8 | MaxEmptyLinesToKeep: 1 9 | SpaceAfterCStyleCast: true 10 | SpaceBeforeAssignmentOperators: true 11 | SpaceBeforeParens: ControlStatements 12 | SpaceInEmptyParentheses: false 13 | SpacesBeforeTrailingComments: 1 14 | SpacesInAngles: true 15 | SpacesInContainerLiterals: true 16 | SpacesInCStyleCastParentheses: true 17 | SpacesInParentheses: true 18 | SpacesInSquareBrackets: true 19 | BreakBeforeBraces: Custom 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: false 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: true 26 | AfterObjCDeclaration: true 27 | AfterStruct: false 28 | AfterUnion: false 29 | BeforeCatch: false 30 | BeforeElse: false 31 | IndentBraces: false 32 | AlignConsecutiveAssignments: true 33 | #AlignConsecutiveDeclarations: true 34 | ColumnLimit: 120 35 | AlignTrailingComments: true 36 | BreakBeforeInheritanceComma : true 37 | DisableFormat: false 38 | ExperimentalAutoDetectBinPacking: false 39 | ConstructorInitializerIndentWidth: 4 40 | ContinuationIndentWidth: 4 41 | IndentWidth: 4 42 | BinPackParameters: false 43 | AllowAllParametersOfDeclarationOnNextLine : true 44 | AlwaysBreakTemplateDeclarations : true 45 | CompactNamespaces : false 46 | FixNamespaceComments : true 47 | IndentCaseLabels : true 48 | PointerAlignment : Left 49 | IndentPPDirectives : AfterHash 50 | ReflowComments: true -------------------------------------------------------------------------------- /ebpf-kill-example/README.md: -------------------------------------------------------------------------------- 1 | # ebpf-kill-example 2 | 3 | *ebpf-kill-example* is an example of an eBPF program hooking into the kill tracepoint. 4 | This project is a Proof-of-Concept (PoC) showing the feasibility and viability of eBPF. 5 | Furthermore, the project shows how to create and run a simple eBPF program. 6 | 7 | ## Installation 8 | 9 | To install ebpf-kill-example, first clone this repository. 10 | 11 | ``` 12 | git clone https://github.com/niclashedam/ebpf-kill-example 13 | ``` 14 | 15 | Install dependencies needed to compile *ebpf-kill-example*. 16 | 17 | ``` 18 | make deps 19 | ``` 20 | 21 | Compile *ebpf-kill-example*. 22 | 23 | ``` 24 | make 25 | ``` 26 | 27 | ## Usage 28 | 29 | Run *ebpf-kill-example*. Super user privileges are required to load the program into the kernel. 30 | 31 | ``` 32 | sudo ./src/ebpf-kill-example 33 | ``` 34 | 35 | ## Test 36 | 37 | To test *ebpf-kill-example*, run `make test`. 38 | This will load the eBPF program, start a looping process and kill it. It will 39 | verify that the eBPF program was invoked when kill was called. 40 | 41 | ``` 42 | nhed@nhed-1:~/Development/ebpf-kill-example$ make test 43 | ./test/test.sh 44 | -- Loading eBPF program. 45 | -- Starting test process to kill. 46 | -- PID of test process is 332996. 47 | -- Killed. Waiting for eBPF program to terminate .. 48 | [ OK ] -- eBPF program ran as expected. 49 | ``` 50 | 51 | ## Example 52 | ![Example](/img/example.png?raw=true) 53 | 54 | ## Contributing 55 | Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. 56 | 57 | Please make sure to update tests as appropriate. 58 | 59 | ## License 60 | [GPLv3](https://choosealicense.com/licenses/gpl-3.0/) 61 | -------------------------------------------------------------------------------- /kprobe_sys_execve/clang-8.exe.stackdump: -------------------------------------------------------------------------------- 1 | Stack trace: 2 | Frame Function Args 3 | 000FFFFAB88 0018006332E (0018028BBD8, 0018026AFEE, 000FFFFAB88, 000FFFF9A90) 4 | 000FFFFAB88 0018004977A (00000000000, 00000000000, 00000000000, 0018028BC90) 5 | 000FFFFAB88 001800497B2 (0018028BCD8, 000FFFFAA48, 000FFFFAB88, 00000000000) 6 | 000FFFFAB88 001800C9BF2 (00000000000, 00000000000, 00000000000, 00000000000) 7 | 000FFFFAB88 001800C9D90 (000FFFFABD0, 008000EF190, 000FFFFAD70, 00000000000) 8 | 0038724D560 001800CB5A5 (000000004F8, 00800000030, 00000000000, 00000000000) 9 | 0038724D560 001800CB8CD (008000F0660, 000FFFFB010, 000FFFFAEF0, 000FFFFAFE8) 10 | 0038724D560 001801BC7DA (000FFFFB010, 000FFFFB070, 000FFFFB858, 008000EF3B0) 11 | 0038724D560 001801BCB15 (00000000004, 008000EEFE0, 008000EF9F8, 008000EF3B0) 12 | 0038724D560 0018013E96B (00000000004, 008000EEFE0, 008000EF9F8, 008000EF3B0) 13 | 0038724D560 003F04EFB80 (001800D9A7C, 00000000000, 008000EF5B0, 000FFFFB180) 14 | 000FFFFB858 003F04F0735 (00000000035, 008000AD498, 000FFFFB460, 00300000000) 15 | 000FFFFB2C0 003D2C667DA (00180325DB0, 00000000000, 00180045A89, 00000000000) 16 | 008000EF190 003D2C43468 (00000000035, 008000ACAC8, 00000000010, 000FFFFBC70) 17 | 000FFFFB900 003D2C43BA6 (0018013E96B, 000FFFFB9F0, 0018005E556, 000FFFFBC70) 18 | 00180325DB0 003D2C4B9FC (00000000000, 000FFFFBBE0, 00000000000, 000FFFFBB80) 19 | 00000000001 00100411182 (0018023A780, 00000000007, 00000000000, 000FFFFCCE0) 20 | 000FFFFCCE0 0018004AF2D (00000000000, 00000000000, 00000000000, 00000000000) 21 | 000FFFFFFF0 00180048886 (00000000000, 00000000000, 00000000000, 00000000000) 22 | 000FFFFFFF0 00180048934 (00000000000, 00000000000, 00000000000, 00000000000) 23 | End of stack trace 24 | -------------------------------------------------------------------------------- /ebpf_helloworld/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := ebpf-helloworld 2 | 3 | CCINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/testing/selftests/bpf 4 | CCINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/lib 5 | CCINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/include 6 | #CCINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/include/uapi 7 | CCINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/perf 8 | #CCINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/include 9 | 10 | LOADINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/samples/bpf 11 | LOADINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/lib 12 | LOADINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/perf 13 | LOADINCLUDE += -I/home/calm/Downloads/kernels/linux-5.10.14/tools/include 14 | 15 | LIBS = -lbpf -lelf 16 | 17 | all: ebpf-helloworld kern_helloworld.o 18 | 19 | .PHONY: clean 20 | 21 | clean: 22 | rm -f $(TARGET) 23 | rm -f *.o 24 | 25 | $(TARGET): %: load_helloworld.c 26 | #gcc $(CFLAGS) $(LDFLAGS) -o $(TARGET) user.c -Wl,-rpath=$(LIBBPF_DIR) $(LIBS) 27 | #gcc -Wall -Werror -g3 -ggdb3 -o ebpf-kill-example user.c -Wl,-Bstatic -lbpf -Wl,-Bdynamic -lgcc_s -lz -lelf 28 | clang -Wall -Werror,-Wunused-variable -Wextra -pedantic -Wshadow -march=native -g3 -ggdb3 -o $(TARGET) \ 29 | /home/calm/Downloads/kernels/linux-5.10.14/samples/bpf/bpf_load.c \ 30 | /home/calm/Downloads/kernels/linux-5.10.14/tools/testing/selftests/bpf/trace_helpers.c \ 31 | load_helloworld.c \ 32 | $(CCINCLUDE) $(LOADINCLUDE) -L/usr/local/lib64 -Wl,-rpath=/usr/local/lib64 $(LIBS) 33 | 34 | 35 | kern_helloworld.o: kern_helloworld.c 36 | clang -O2 -target bpf -c kern_helloworld.c $(CCINCLUDE) -D__TARGET_ARCH_x86 -D__KERNEL__ -D__BPF_TRACING__ -D__x86_64__ -o kern_helloworld.o 37 | #clang -S \ 38 | # -D __BPF_TRACING__ \ 39 | # -Wall \ 40 | # -Werror \ 41 | # -O2 -emit-llvm -c -g kern_helloworld.c 42 | #llc -march=bpf -filetype=obj -o kern_helloworld.o kern_helloworld.ll -------------------------------------------------------------------------------- /tcp_accelerate/tcp_accelerate.h: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-09-22 17:07:38 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-22 17:08:48 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #ifndef FORCE_READ 12 | # define FORCE_READ( X ) ( *( volatile typeof( X )* ) &X ) 13 | #endif 14 | 15 | #define printk( fmt, ... ) \ 16 | ( { \ 17 | char ____fmt[] = fmt; \ 18 | bpf_trace_printk( ____fmt, sizeof( ____fmt ), ##__VA_ARGS__ ); \ 19 | } ) 20 | 21 | struct sock_key { 22 | __u32 sip4; 23 | __u32 dip4; 24 | __u8 family; 25 | __u8 pad1; 26 | __u16 pad2; 27 | // this padding required for 64bit alignment 28 | // else ebpf kernel verifier rejects loading 29 | // of the program 30 | __u32 pad3; 31 | __u32 sport; 32 | __u32 dport; 33 | } __attribute__( ( packed ) ); 34 | 35 | // struct { 36 | // __uint( type, BPF_MAP_TYPE_SOCKHASH ); 37 | // __uint( max_entries, 65535 ); 38 | // __type( key, struct sock_key ); 39 | // __type( value, __s32 ); 40 | // __uint( map_flags, 0 ); 41 | // __uint( key_size, sizeof( struct sock_key ) ); 42 | // __uint( value_size, sizeof( __s32 ) ); 43 | // } sock_ops_map_1 SEC( ".maps" ); 44 | 45 | // struct { 46 | // __uint(type, BPF_MAP_TYPE_HASH); 47 | // __uint(max_entries, 64); 48 | // __type(key, __u32); 49 | // __type(value, __u64); 50 | // } sockhash SEC(".maps"); 51 | 52 | struct bpf_map_def SEC( "maps" ) sock_ops_map = { 53 | .type = BPF_MAP_TYPE_SOCKHASH, 54 | .key_size = sizeof(struct sock_key), 55 | .value_size = sizeof(int), 56 | .max_entries = 65535, 57 | .map_flags = 0, 58 | }; 59 | 60 | // map的定义不同,上面会创建失败,Error in bpf_create_map_xattr(sock_ops_map):ERROR: strerror_r(-524)=22(-524) -------------------------------------------------------------------------------- /tcp_accelerate/tcp_accelerate_sockops.kern.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-09-22 10:39:52 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-22 17:51:16 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | //#include 这个需要BTF支持 14 | #include 15 | #include 16 | #include 17 | 18 | #include "tcp_accelerate.h" 19 | 20 | static inline void sk_extractv4_key( struct bpf_sock_ops* skops, struct sock_key* key ) { 21 | key->dip4 = skops->remote_ip4; 22 | key->sip4 = skops->local_ip4; 23 | key->family = 2; 24 | 25 | // local_port is in host byte order, and remote_port is in network byte order 26 | key->sport = ( bpf_htonl( skops->local_port ) >> 16 ); 27 | key->dport = FORCE_READ( skops->remote_port ) >> 16; 28 | } 29 | 30 | static inline void bpf_sock_ops_ipv4( struct bpf_sock_ops* skops ) { 31 | struct sock_key key = {}; 32 | 33 | sk_extractv4_key( skops, &key ); 34 | 35 | __s32 ret = bpf_sock_hash_update( skops, &sock_ops_map, &key, BPF_NOEXIST ); 36 | if ( ret != 0 ) { 37 | printk( "FAILED: sock_hash_update ret: %d\n", ret ); 38 | } else { 39 | printk( "remote-ip = %u, local-ip = %u\n", bpf_htonl( skops->remote_ip4 ), bpf_htonl( skops->local_ip4 ) ); 40 | 41 | printk( "<<< ipv4 op = %d, local-port %d --> remote-port %d\n", skops->op, skops->local_port, 42 | bpf_ntohl( skops->remote_port ) ); 43 | } 44 | } 45 | 46 | /* 47 | eBPF program type SOCK_OPS which gets invoked upon TCP events such as connection establishment, tcp retransmit, etc 48 | */ 49 | SEC( "sockops" ) 50 | __s32 bpf_sockops_v4( struct bpf_sock_ops* skops ) { 51 | __u32 family, op; 52 | 53 | family = skops->family; 54 | op = skops->op; 55 | 56 | switch ( op ) { 57 | case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: // 被动建立连接,目的端发送SYN+ACK会命中 58 | case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: // 主动建立连接,源端发送SYN会命中 59 | if ( family == AF_INET ) { 60 | bpf_sock_ops_ipv4( skops ); 61 | } 62 | break; 63 | default: 64 | break; 65 | } 66 | return 0; 67 | } 68 | 69 | char _license[] SEC( "license" ) = "GPL"; 70 | __u32 _version SEC( "version" ) = LINUX_VERSION_CODE; -------------------------------------------------------------------------------- /ebpf-kill-example/src/kern.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define SIGKILL 9 6 | 7 | #define bpfprint(fmt, ...) \ 8 | ({ \ 9 | char __fmt[] = fmt; \ 10 | bpf_trace_printk(__fmt, sizeof(__fmt), ##__VA_ARGS__); \ 11 | }) 12 | 13 | // Data in this map is accessible in user-space 14 | // The particular syntax with __uint() and __type() and the use of the .maps 15 | // section enable BTF for this map. For an example, "bpftool map dump" will 16 | // show the map structure. 17 | struct { 18 | __uint(type, BPF_MAP_TYPE_HASH); 19 | __type(key, long); 20 | __type(value, char); 21 | __uint(max_entries, 64); 22 | } kill_map SEC(".maps"); 23 | 24 | // This is the tracepoint arguments of the kill functions 25 | // /sys/kernel/debug/tracing/events/syscalls/sys_enter_kill/format 26 | struct syscalls_enter_kill_args { 27 | long long pad; 28 | 29 | long syscall_nr; 30 | long pid; 31 | long sig; 32 | }; 33 | 34 | // 这里定义了BPF_PROG_TYPE_TRACEPOINT类型的BPF Program 35 | SEC("tracepoint/syscalls/sys_enter_kill") 36 | int ebpf_kill_example(struct syscalls_enter_kill_args *ctx) { 37 | // For this tiny example, we will only listen for "kill -9". 38 | // Bear in mind that there exist many other signals, and it 39 | // may be possible to stop or forcefully terminate processes 40 | // with other signals. 41 | if (ctx->sig != SIGKILL) return 0; 42 | 43 | bpfprint("[calm]---PID %u is being killed! syscall_nr:%u\n", ctx->pid, ctx->syscall_nr); 44 | 45 | // We can call glibc functions in eBPF program if and only if 46 | // they are not too large and do not use any of the risky operations 47 | // disallowed by the eBPF verifier. These include, among others, 48 | // complex loops and floats. 49 | long key = labs(ctx->pid); 50 | int val = 1; 51 | 52 | // Mark the PID as killed in the map. 53 | // This will create an entry where the killed PID is set to 1. 54 | bpf_map_update_elem(&kill_map, &key, &val, BPF_NOEXIST); 55 | 56 | return 0; 57 | } 58 | 59 | // Some eBPF programs must be GPL licensed. This depends on program types, 60 | // eBPF helpers used and among other things. As this eBPF program is 61 | // integrating with tracepoints, it must be GPL. 62 | char _license[] SEC("license") = "GPL"; 63 | -------------------------------------------------------------------------------- /tp_execve/clang/event_help.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-09-01 14:32:52 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 15:01:12 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "bpf_help.h" 13 | #include "event.h" 14 | #include "event_help.h" 15 | 16 | struct timespec start_time; 17 | 18 | static void time_since_start() { 19 | int64_t secs, nsecs; 20 | static struct timespec cur_time; 21 | double time_diff; 22 | 23 | clock_gettime( CLOCK_MONOTONIC, &cur_time ); 24 | nsecs = cur_time.tv_nsec - start_time.tv_nsec; 25 | secs = cur_time.tv_sec - start_time.tv_sec; 26 | if ( nsecs < 0 ) { 27 | nsecs += NSEC_PER_SEC; 28 | secs--; 29 | } 30 | 31 | time_diff = secs + ( nsecs / NSEC_PER_SEC ); 32 | printf( "%-8.3f", time_diff ); 33 | } 34 | 35 | static void inline quoted_symbol( char c ) { 36 | switch ( c ) { 37 | case '"': 38 | putchar( '\\' ); 39 | putchar( '"' ); 40 | break; 41 | case '\t': 42 | putchar( '\\' ); 43 | putchar( 't' ); 44 | break; 45 | case '\n': 46 | putchar( '\\' ); 47 | putchar( 'n' ); 48 | break; 49 | default: 50 | putchar( c ); 51 | break; 52 | } 53 | } 54 | 55 | static void print_args( const struct event_t* e, bool quote ) { 56 | int32_t i, args_counter = 0; 57 | 58 | if ( quote ) 59 | putchar( '"' ); 60 | 61 | for ( i = 0; i < e->args_size && args_counter < e->args_count; i++ ) { 62 | char c = e->args[ i ]; 63 | 64 | if ( quote ) { 65 | if ( c == '\0' ) { 66 | args_counter++; 67 | putchar( '"' ); 68 | putchar( ' ' ); 69 | if ( args_counter < e->args_count ) { 70 | putchar( '"' ); 71 | } 72 | } else { 73 | quoted_symbol( c ); 74 | } 75 | } else { 76 | if ( c == '\0' ) { 77 | args_counter++; 78 | putchar( ' ' ); 79 | } else { 80 | putchar( c ); 81 | } 82 | } 83 | } 84 | if ( e->args_count == g_env.max_args + 1 ) { 85 | fputs( " ...", stdout ); 86 | } 87 | } 88 | 89 | void handle_event( void* ctx, int32_t cpu, void* data, uint32_t size ) { 90 | const struct event_t* e = ( const struct event_t* ) data; 91 | time_t t; 92 | struct tm* tm; 93 | char ts[ 32 ]; 94 | 95 | time( &t ); 96 | tm = localtime( &t ); 97 | strftime( ts, sizeof( ts ), "%H:%M:%S", tm ); 98 | 99 | printf( "%-8s ", ts ); 100 | time_since_start(); 101 | printf( "%-6d", e->uid ); 102 | printf( "%-16s %-6d %-6d %3d ", e->comm, e->pid, e->ppid, e->retval ); 103 | print_args( e, true ); 104 | putchar( '\n' ); 105 | } 106 | 107 | void handle_lost_event( void* ctx, int32_t cpu, uint64_t lost_cnt ) { 108 | fprintf( stderr, "Lost %lu events on CPU #%d!\n", lost_cnt, cpu ); 109 | } -------------------------------------------------------------------------------- /tcp_accelerate/Makefile: -------------------------------------------------------------------------------- 1 | APP_TAG = tcp_accelerate 2 | OUTPUT := .output 3 | # :前面的变量不能使用后面的变量,只能使用前面已定义好了的变量 4 | SRCS = $(wildcard *.c) 5 | OBJS := $(patsubst %.c, %.o, $(SRCS)) 6 | 7 | BPF_KERN_OBJS = $(APP_TAG)_bypass.kern.o $(APP_TAG)_sockops.kern.o 8 | 9 | BPFTOOL ?= /usr/sbin/bpftool 10 | CFLAGS := -g -O2 -Wall 11 | INCLUDES = 12 | LLC = llc 13 | CC = gcc 14 | CLANG = clang 15 | OPT = opt 16 | LLVM_DIS ?= llvm-dis 17 | KERNEL_SRC = /usr/src/linux-5.12.9 18 | ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/' | sed 's/ppc64le/powerpc/' | sed 's/mips.*/mips/') 19 | 20 | LIBS = $(KERNEL_SRC)/tools/lib/bpf/libbpf.a -lelf -lz 21 | 22 | KERNEL_SRC_INCLUDE := \ 23 | -I$(KERNEL_SRC)/arch/$(ARCH)/include \ 24 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated \ 25 | -I$(KERNEL_SRC)/include \ 26 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/uapi \ 27 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated/uapi \ 28 | -I$(KERNEL_SRC)/include/uapi \ 29 | -I$(KERNEL_SRC)/include/generated/uapi \ 30 | -I$(KERNEL_SRC)/samples/bpf \ 31 | -I$(KERNEL_SRC)/tools/testing/selftests/bpf \ 32 | -I$(KERNEL_SRC)/tools/lib \ 33 | -include $(KERNEL_SRC)/include/linux/kconfig.h 34 | # -include $(KERNEL_SRC)/include/linux/compiler-version.h 35 | 36 | 37 | K_CFLAGS = -g -O2 -Wall -nostdinc -isystem `$(CLANG) -print-file-name=include` 38 | K_CFLAGS += -D__KERNEL__ -D__BPF_TRACING__ -D__TARGET_ARCH_$(ARCH) 39 | K_CFLAGS += -Wunused -Wall \ 40 | -Wno-compare-distinct-pointer-types \ 41 | -Wno-pointer-sign \ 42 | -Wno-gnu-variable-sized-type-not-at-end \ 43 | -Wno-address-of-packed-member \ 44 | -Wno-tautological-compare \ 45 | -Wno-unknown-warning-option \ 46 | -fno-stack-protector 47 | 48 | K_CFLAGS += -I./ $(KERNEL_SRC_INCLUDE) 49 | 50 | 51 | COMMON_HELPER_OBJS = \ 52 | $(KERNEL_SRC)/tools/testing/selftests/bpf/trace_helpers.o 53 | 54 | .PHONY: all 55 | all : $(BPF_KERN_OBJS) 56 | 57 | ifeq ("$(origin V)", "command line") 58 | VERBOSE = $(V) 59 | endif 60 | ifndef VERBOSE 61 | VERBOSE = 0 62 | endif 63 | 64 | ifeq ($(VERBOSE),1) 65 | Q = 66 | msg = 67 | else 68 | Q = @ 69 | msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; 70 | endif 71 | 72 | $(OUTPUT): 73 | $(call msg,MKDIR,$@) 74 | $(Q)mkdir -p $@ 75 | 76 | $(APP_TAG)_bypass.kern.o: %.kern.o: %.kern.c 77 | $(call msg,BPF,$@) 78 | $(Q)$(CLANG) $(K_CFLAGS) -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ 79 | $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ 80 | $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ 81 | 82 | $(APP_TAG)_sockops.kern.o: %.kern.o: %.kern.c 83 | $(call msg,BPF,$@) 84 | $(Q)$(CLANG) $(K_CFLAGS) -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ 85 | $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ 86 | $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ 87 | 88 | .PHONY: clean 89 | clean: 90 | $(call msg, CLEANING) 91 | $(Q)rm -rf $(OUTPUT) $(BPF_KERN_OBJS) -------------------------------------------------------------------------------- /ebpf_helloworld/kern_helloworld.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: calmwu 3 | * @Date: 2021-02-11 17:16:06 4 | * @Last Modified by: calmwu 5 | * @Last Modified time: 2021-02-11 20:56:10 6 | */ 7 | 8 | //#include 9 | //#include 10 | //#include 11 | //#include 12 | #include 13 | #include 14 | #include 15 | //#include 16 | //#include 17 | //#include 18 | 19 | #define ARG_MAX 16 /* # chars in a file name */ 20 | 21 | # define printk(fmt, ...) \ 22 | ({ \ 23 | char ____fmt[] = fmt; \ 24 | bpf_trace_printk(____fmt, sizeof(____fmt), \ 25 | ##__VA_ARGS__); \ 26 | }) 27 | 28 | struct syscalls_enter_execve_args { 29 | u64 pad; 30 | 31 | u64 syscall_nr; 32 | const char *filename_ptr; 33 | char *const *argv; 34 | char *const *envp; 35 | }; 36 | 37 | SEC("tracepoint/syscalls/sys_enter_execve") 38 | int bpf_prog(struct syscalls_enter_execve_args *ctx) { 39 | // char msg[] = "Hello, BPF world!"; 40 | // bpfprint(msg); 41 | 42 | // char arg[ARG_MAX] = {}; 43 | 44 | //char fmt_1[] = "execve program: %s"; 45 | char comm[16]; 46 | bpf_get_current_comm(&comm, sizeof(comm)); 47 | printk("execve program: %s", comm); 48 | 49 | //char fmt_2[] = "execve process running with PID: %d"; 50 | u64 pid = bpf_get_current_pid_tgid(); 51 | printk("execve process running with PID: %d", pid); 52 | 53 | //char fmt[] = "execve syscall_nr{%u} filename_ptr:[%s]\n"; 54 | printk("execve syscall_nr{%u} filename_ptr:[%s]", ctx->syscall_nr, ctx->filename_ptr); 55 | 56 | // 最多三个参数,这个输出和/sys/kernel/debug/tracing/trace_pipe一样 57 | printk("execve filename: %lx argv: %lx, envp: %lx", (u64)(ctx->filename_ptr), (u64)(ctx->argv), (u64)(ctx->envp)); 58 | 59 | //printk("execve argv[0]: %p", (ctx->argv)[0]); 60 | 61 | // char arg0[ARG_MAX] = {}; 62 | 63 | // // // bpf_probe_read_user_str(arg0, ARG_MAX, ((char **)(ctx->argv))[0]); 64 | // // // printk("execve program first args:%s\n", ((char *const 65 | // // // *)(ctx->argv))[0]); 66 | // char *const *argv_pp = (char *const *)ctx->argv; 67 | // bpf_probe_read(arg0, ARG_MAX - 1, (const void*)*argv_pp); 68 | // printk("execve argv[0]: %s\n", arg0); 69 | 70 | // // char fmtArgv[] = "argc:%d"; 71 | // char *const *argv_pp = (char *const *)(ctx->argv); 72 | // for (char *argv_p = *argv_pp; argv_p;) { 73 | // if (NULL == argv_p) { 74 | // break; 75 | // } 76 | // argv_pp = argv_pp + 1; 77 | // argv_p = *argv_pp; 78 | // } 79 | 80 | // //bpf_probe_read(&arg, sizeof(arg), (void*)argv[i]); 81 | // // else { 82 | // // bpf_trace_printk(fmtArgv, sizeof(fmtArgv), i); 83 | // // } 84 | // //(void)(fmtArgv); 85 | // bpf_trace_printk(fmtArgv, sizeof(fmtArgv), i); 86 | // } 87 | printk("-------------------\n"); 88 | return 0; 89 | } 90 | 91 | char _license[] SEC("license") = "GPL"; -------------------------------------------------------------------------------- /tp_execve/clang/tp_execve.user.purec.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-09-01 10:32:41 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 17:21:31 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | // #include 14 | // #include 15 | // #include 16 | // #include 17 | // #include 18 | 19 | #include "bpf_help.h" 20 | #include "event.h" 21 | #include "event_help.h" 22 | 23 | #define PERF_BUFFER_PAGES 64 24 | 25 | int32_t main( int32_t argc, char** argv ) { 26 | int32_t err = 0; 27 | int32_t map_fd, ret, j = 0; 28 | 29 | struct perf_buffer_opts pb_opts = {}; 30 | struct perf_buffer* pb = NULL; 31 | 32 | struct bpf_object* obj; 33 | struct bpf_program* prog; 34 | struct bpf_link* links[ 2 ]; 35 | 36 | const char* kern_obj = "tp_execve.kern.o"; 37 | 38 | fprintf( stderr, "main: %s\n", argv[ 0 ] ); 39 | 40 | libbpf_set_print( bpf_printf ); 41 | 42 | err = bump_memlock_rlimit(); 43 | if ( err ) { 44 | fprintf( stderr, "failed to increase rlimit: %d\n", err ); 45 | return 1; 46 | } 47 | 48 | obj = bpf_object__open_file( kern_obj, NULL ); 49 | if ( libbpf_get_error( obj ) ) { 50 | fprintf( stderr, "ERROR: opening BPF object file failed\n" ); 51 | return 0; 52 | } 53 | 54 | /* load BPF program */ 55 | if ( bpf_object__load( obj ) ) { 56 | fprintf( stderr, "ERROR: loading BPF object file failed\n" ); 57 | goto cleanup; 58 | } 59 | 60 | // find map 61 | map_fd = bpf_object__find_map_fd_by_name( obj, "execve_perf_evt_map" ); 62 | if ( map_fd < 0 ) { 63 | fprintf( stderr, "ERROR: finding a map in obj file failed\n" ); 64 | goto cleanup; 65 | } 66 | 67 | bpf_object__for_each_program( prog, obj ) { 68 | prog->log_level = 1; 69 | links[ j ] = bpf_program__attach( prog ); 70 | if ( libbpf_get_error( links[ j ] ) ) { 71 | fprintf( stderr, "%d: bpf_program__attach failed\n", j ); 72 | links[ j ] = NULL; 73 | goto cleanup; 74 | } 75 | fprintf( stderr, "%d bpf program attach successed\n", j ); 76 | j++; 77 | } 78 | 79 | printf( "%-9s", "TIME" ); 80 | printf( "%-8s ", "TIME(s)" ); 81 | printf( "%-6s ", "UID" ); 82 | printf( "%-16s %-6s %-6s %3s %s\n", "PCOMM", "PID", "PPID", "RET", "ARGS" ); 83 | // setup perf event callback 84 | pb_opts.sample_cb = handle_event; 85 | pb_opts.lost_cb = handle_lost_event; 86 | pb = perf_buffer__new( map_fd, PERF_BUFFER_PAGES, &pb_opts ); 87 | err = libbpf_get_error( pb ); 88 | if ( err ) { 89 | pb = NULL; 90 | fprintf( stderr, "failed to open perf buffer: %d\n", err ); 91 | goto cleanup; 92 | } 93 | 94 | // loop perf event 95 | while ( ( err = perf_buffer__poll( pb, 100 ) ) >= 0 ) { } 96 | printf( "Error polling perf buffer: %d\n", err ); 97 | 98 | cleanup: 99 | for ( j--; j >= 0; j-- ) 100 | bpf_link__destroy( links[ j ] ); 101 | 102 | bpf_object__close( obj ); 103 | 104 | return 0; 105 | } -------------------------------------------------------------------------------- /kprobe_sys_execve/kp_execve.user.purec.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-23 15:19:48 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 14:22:49 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define TASK_COMM_LEN 16 18 | 19 | #include "execve_data.h" 20 | 21 | static void print_bpf_output( void* ctx, int cpu, void* data, __u32 size ) { 22 | struct data_t* d = data; 23 | fprintf( stderr, "pid:%d, tid:%d uid:%d, ret:%d cpu:%d executing program:%s filename:[%s]\n", 24 | d->pid, d->tid, d->uid, d->ret, cpu, d->comm, d->filename ); 25 | } 26 | 27 | int main( int argc, char** argv ) { 28 | 29 | struct perf_buffer_opts pb_opts = {}; 30 | struct bpf_link *links[2] = {}; 31 | struct bpf_program *prog; 32 | struct perf_buffer *pb; 33 | struct bpf_object *obj; 34 | int map_fd, ret = 0, j = 0; 35 | 36 | const char* kern_obj = "kp_execve.kern.o"; 37 | 38 | obj = bpf_object__open_file( kern_obj, NULL ); 39 | if ( libbpf_get_error( obj ) ) { 40 | fprintf( stderr, "ERROR: opening BPF object file failed\n" ); 41 | return 0; 42 | } 43 | 44 | /* load BPF program */ 45 | if ( bpf_object__load( obj ) ) { 46 | fprintf( stderr, "ERROR: loading BPF object file failed\n" ); 47 | goto cleanup; 48 | } 49 | 50 | // find perf event map 51 | map_fd = bpf_object__find_map_fd_by_name( obj, "execve_perf_evt_map" ); 52 | if ( map_fd < 0 ) { 53 | fprintf( stderr, "ERROR: finding a map in obj file failed\n" ); 54 | goto cleanup; 55 | } 56 | 57 | bpf_object__for_each_program( prog, obj ) { 58 | //prog->log_level = 1; 59 | links[ j ] = bpf_program__attach( prog ); 60 | if ( libbpf_get_error( links[ j ] ) ) { 61 | fprintf( stderr, "%d: bpf_program__attach failed\n", j ); 62 | links[ j ] = NULL; 63 | goto cleanup; 64 | } 65 | fprintf( stderr, "%d bpf program attach successed\n", j ); 66 | j++; 67 | } 68 | 69 | // prog = bpf_object__find_program_by_name( obj, "kprobe_sys_execve" ); 70 | // if ( !prog ) { 71 | // fprintf( stderr, "ERROR: finding a prog in obj file failed\n" ); 72 | // goto cleanup; 73 | // } 74 | 75 | // link = bpf_program__attach( prog ); 76 | // if ( libbpf_get_error( link ) ) { 77 | // fprintf( stderr, "ERROR: bpf_program__attach failed\n" ); 78 | // link = NULL; 79 | // goto cleanup; 80 | // } 81 | 82 | pb_opts.sample_cb = print_bpf_output; 83 | // 8: 为每个CPU分配8个页,也就是32KB的缓冲区 84 | pb = perf_buffer__new( map_fd, 8, &pb_opts ); 85 | ret = libbpf_get_error( pb ); 86 | if ( ret ) { 87 | printf( "failed to setup perf_buffer: %d\n", ret );//🤣🤞 88 | return 1; 89 | } 90 | 91 | // read_trace_pipe(); 92 | while ( ( ret = perf_buffer__poll( pb, 100 ) ) >= 0 ) { 93 | } 94 | 95 | cleanup: 96 | for ( j--; j >= 0; j-- ) 97 | bpf_link__destroy( links[ j ] ); 98 | 99 | bpf_object__close( obj ); 100 | return 0; 101 | } -------------------------------------------------------------------------------- /kprobe_sys_execve/Makefile: -------------------------------------------------------------------------------- 1 | APP_EXECVE_BTF = kp_execve_btf 2 | APP_EXECVE_PUREC = kp_execve_clang 3 | 4 | APP_TAG = kp_execve 5 | OUTPUT := .output 6 | # :前面的变量不能使用后面的变量,只能使用前面已定义好了的变量 7 | SRCS = $(wildcard *.c) 8 | OBJS := $(patsubst %.c, %.o, $(SRCS)) 9 | 10 | BPF_KERN_OBJ = $(patsubst %,%.kern.o,$(APP_TAG)) 11 | BPF_USER_PUREC_OBJ = $(patsubst %,%.user.purec.o,$(APP_TAG)) 12 | BPF_USER_BTF_OBJ = $(patsubst %,%.user.btf.o,$(APP_TAG)) 13 | 14 | # 编写的辅助函数 15 | #HELP_EXCLUDE_OBJS := $(BPF_KERN_OBJ) $(BPF_USER_PUREC_OBJ) $(BPF_USER_BTF_OBJ) 16 | HELP_OBJS_1 := $(filter-out $(BPF_KERN_OBJ) $(BPF_USER_PUREC_OBJ) $(BPF_USER_BTF_OBJ), $(OBJS)) 17 | HELP_OBJS := $(addprefix $(OUTPUT)/, $(HELP_OBJS_1)) 18 | 19 | BPFTOOL ?= /usr/sbin/bpftool 20 | CFLAGS := -g -O2 -Wall 21 | INCLUDES = 22 | LLC = llc 23 | CC = gcc 24 | CLANG = clang 25 | OPT = opt 26 | LLVM_DIS ?= llvm-dis 27 | KERNEL_SRC = /usr/src/linux-5.12.9 28 | ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/' | sed 's/ppc64le/powerpc/' | sed 's/mips.*/mips/') 29 | 30 | LIBS = $(KERNEL_SRC)/tools/lib/bpf/libbpf.a -lelf -lz 31 | 32 | KERNEL_SRC_INCLUDE := \ 33 | -I$(KERNEL_SRC)/arch/$(ARCH)/include \ 34 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated \ 35 | -I$(KERNEL_SRC)/include \ 36 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/uapi \ 37 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated/uapi \ 38 | -I$(KERNEL_SRC)/include/uapi \ 39 | -I$(KERNEL_SRC)/include/generated/uapi \ 40 | -I$(KERNEL_SRC)/samples/bpf \ 41 | -I$(KERNEL_SRC)/tools/testing/selftests/bpf \ 42 | -I$(KERNEL_SRC)/tools/lib \ 43 | -include $(KERNEL_SRC)/include/linux/kconfig.h 44 | # -include $(KERNEL_SRC)/include/linux/compiler-version.h 45 | 46 | 47 | K_CFLAGS = -g -O2 -Wall -nostdinc -isystem `$(CLANG) -print-file-name=include` 48 | K_CFLAGS += -D__KERNEL__ -D__BPF_TRACING__ -D__TARGET_ARCH_$(ARCH) 49 | K_CFLAGS += -Wunused -Wall \ 50 | -Wno-compare-distinct-pointer-types \ 51 | -Wno-pointer-sign \ 52 | -Wno-gnu-variable-sized-type-not-at-end \ 53 | -Wno-address-of-packed-member \ 54 | -Wno-tautological-compare \ 55 | -Wno-unknown-warning-option \ 56 | -fno-stack-protector 57 | 58 | K_CFLAGS += -I./ $(KERNEL_SRC_INCLUDE) 59 | 60 | U_CFLAGS = -Wunused -Wall -Wno-unused-variable -Wmissing-prototypes -Wstrict-prototypes -g3 -ggdb -DDEBUG 61 | U_CFLAGS += -I./ -I$(KERNEL_SRC)/usr/include 62 | #U_CFLAGS += -I$(KERNEL_SRC)/include 63 | U_CFLAGS += -I$(KERNEL_SRC)/tools/lib -I$(KERNEL_SRC)/tools/include -I$(KERNEL_SRC)/tools/perf 64 | U_CFLAGS += -I$(KERNEL_SRC)/tools/testing/selftests/bpf 65 | 66 | COMMON_HELPER_OBJS = \ 67 | $(KERNEL_SRC)/tools/testing/selftests/bpf/trace_helpers.o 68 | 69 | .PHONY: all 70 | all : $(APP_EXECVE_BTF) $(APP_EXECVE_PUREC) 71 | 72 | ifeq ("$(origin V)", "command line") 73 | VERBOSE = $(V) 74 | endif 75 | ifndef VERBOSE 76 | VERBOSE = 0 77 | endif 78 | 79 | ifeq ($(VERBOSE),1) 80 | Q = 81 | msg = 82 | else 83 | Q = @ 84 | msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; 85 | endif 86 | 87 | $(OUTPUT): 88 | $(call msg,MKDIR,$@) 89 | $(Q)mkdir -p $@ 90 | 91 | # $^ 所有的依赖文件 92 | $(APP_EXECVE_BTF): %: $(OUTPUT)/$(BPF_USER_BTF_OBJ) $(HELP_OBJS) 93 | $(call msg,APP_EXECVE_BTF,$@) 94 | $(Q)$(CC) -o $@ $^ $(COMMON_HELPER_OBJS) $(LIBS) 95 | 96 | $(APP_EXECVE_PUREC): %: $(OUTPUT)/$(BPF_USER_PUREC_OBJ) $(HELP_OBJS) 97 | $(call msg,APP_EXECVE_PUREC,$@) 98 | $(Q)$(CC) -o $@ $^ $(COMMON_HELPER_OBJS) $(LIBS) 99 | 100 | $(HELP_OBJS): $(OUTPUT)/%.o: %.c 101 | $(call msg,HELP_OBJS,$@) 102 | $(Q)$(CC) $(U_CFLAGS) -c $< -o $@ 103 | 104 | # 推导必须一层一层,如果已经解析出来了,就没法解析%了 105 | $(OUTPUT)/$(BPF_USER_BTF_OBJ): $(OUTPUT)/%.user.btf.o: %.user.btf.c %.skel.h | $(OUTPUT) 106 | $(call msg,BPF_USER_BTF_OBJ,$@) 107 | $(Q)$(CC) $(U_CFLAGS) -c $< -o $@ 108 | 109 | # $< 第一个依赖文件 110 | $(OUTPUT)/$(BPF_USER_PUREC_OBJ): $(OUTPUT)/%.user.purec.o: %.user.purec.c %.kern.o | $(OUTPUT) 111 | $(call msg,BPF_USER_PUREC_OBJ,$@) 112 | $(Q)$(CC) $(U_CFLAGS) -c $< -o $@ 113 | 114 | $(patsubst %,%.skel.h,$(APP_TAG)): $(patsubst %,%.kern.o,$(APP_TAG)) 115 | $(call msg,GEN-SKEL,$@) 116 | $(Q)$(BPFTOOL) gen skeleton $< > $@ 117 | 118 | $(patsubst %,%.kern.o,$(APP_TAG)): $(patsubst %,%.kern.c,$(APP_TAG)) 119 | $(call msg,BPF,$@) 120 | $(Q)$(CLANG) $(K_CFLAGS) -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ 121 | $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ 122 | $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ 123 | 124 | .PHONY: clean 125 | clean: 126 | $(call msg, CLEANING) 127 | $(Q)rm -rf $(OUTPUT) $(APP_EXECVE_PUREC) $(APP_EXECVE_BTF) *.o *.skel.h -------------------------------------------------------------------------------- /tp_execve/clang/Makefile: -------------------------------------------------------------------------------- 1 | APP_EXECVE_BTF = tp_execve_btf 2 | APP_EXECVE_PUREC = tp_execve_clang 3 | 4 | APP_TAG = tp_execve 5 | OUTPUT := .output 6 | # :前面的变量不能使用后面的变量,只能使用前面已定义好了的变量 7 | SRCS = $(wildcard *.c) 8 | OBJS := $(patsubst %.c, %.o, $(SRCS)) 9 | 10 | BPF_KERN_OBJ = $(patsubst %,%.kern.o,$(APP_TAG)) 11 | BPF_USER_PUREC_OBJ = $(patsubst %,%.user.purec.o,$(APP_TAG)) 12 | BPF_USER_BTF_OBJ = $(patsubst %,%.user.btf.o,$(APP_TAG)) 13 | 14 | # 编写的辅助函数 15 | #HELP_EXCLUDE_OBJS := $(BPF_KERN_OBJ) $(BPF_USER_PUREC_OBJ) $(BPF_USER_BTF_OBJ) 16 | HELP_OBJS_1 := $(filter-out $(BPF_KERN_OBJ) $(BPF_USER_PUREC_OBJ) $(BPF_USER_BTF_OBJ), $(OBJS)) 17 | HELP_OBJS := $(addprefix $(OUTPUT)/, $(HELP_OBJS_1)) 18 | 19 | BPFTOOL ?= /usr/sbin/bpftool 20 | CFLAGS := -g -O2 -Wall 21 | INCLUDES = 22 | LLC = llc 23 | CC = gcc 24 | CLANG = clang 25 | OPT = opt 26 | LLVM_DIS ?= llvm-dis 27 | KERNEL_SRC = /usr/src/linux-5.12.9 28 | ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/' | sed 's/ppc64le/powerpc/' | sed 's/mips.*/mips/') 29 | 30 | LIBS = $(KERNEL_SRC)/tools/lib/bpf/libbpf.a -lelf -lz 31 | 32 | KERNEL_SRC_INCLUDE := \ 33 | -I$(KERNEL_SRC)/arch/$(ARCH)/include \ 34 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated \ 35 | -I$(KERNEL_SRC)/include \ 36 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/uapi \ 37 | -I$(KERNEL_SRC)/arch/$(ARCH)/include/generated/uapi \ 38 | -I$(KERNEL_SRC)/include/uapi \ 39 | -I$(KERNEL_SRC)/include/generated/uapi \ 40 | -I$(KERNEL_SRC)/samples/bpf \ 41 | -I$(KERNEL_SRC)/tools/testing/selftests/bpf \ 42 | -I$(KERNEL_SRC)/tools/lib \ 43 | -include $(KERNEL_SRC)/include/linux/kconfig.h 44 | # -include $(KERNEL_SRC)/include/linux/compiler-version.h 45 | 46 | 47 | K_CFLAGS = -g -O2 -Wall -nostdinc -isystem `$(CLANG) -print-file-name=include` 48 | K_CFLAGS += -D__KERNEL__ -D__BPF_TRACING__ -D__TARGET_ARCH_$(ARCH) 49 | K_CFLAGS += -Wunused -Wall \ 50 | -Wno-compare-distinct-pointer-types \ 51 | -Wno-pointer-sign \ 52 | -Wno-gnu-variable-sized-type-not-at-end \ 53 | -Wno-address-of-packed-member \ 54 | -Wno-tautological-compare \ 55 | -Wno-unknown-warning-option \ 56 | -fno-stack-protector 57 | 58 | K_CFLAGS += -I./ $(KERNEL_SRC_INCLUDE) 59 | 60 | U_CFLAGS = -Wunused -Wall -Wno-unused-variable -Wmissing-prototypes -Wstrict-prototypes -g3 -ggdb -DDEBUG 61 | U_CFLAGS += -I./ -I$(KERNEL_SRC)/usr/include 62 | #U_CFLAGS += -I$(KERNEL_SRC)/include 63 | U_CFLAGS += -I$(KERNEL_SRC)/tools/lib -I$(KERNEL_SRC)/tools/include -I$(KERNEL_SRC)/tools/perf 64 | U_CFLAGS += -I$(KERNEL_SRC)/tools/testing/selftests/bpf 65 | 66 | COMMON_HELPER_OBJS = \ 67 | $(KERNEL_SRC)/tools/testing/selftests/bpf/trace_helpers.o 68 | 69 | .PHONY: all 70 | all : $(APP_EXECVE_BTF) $(APP_EXECVE_PUREC) 71 | 72 | ifeq ("$(origin V)", "command line") 73 | VERBOSE = $(V) 74 | endif 75 | ifndef VERBOSE 76 | VERBOSE = 0 77 | endif 78 | 79 | ifeq ($(VERBOSE),1) 80 | Q = 81 | msg = 82 | else 83 | Q = @ 84 | msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; 85 | endif 86 | 87 | $(OUTPUT): 88 | $(call msg,MKDIR,$@) 89 | $(Q)mkdir -p $@ 90 | 91 | # $^ 所有的依赖文件 92 | $(APP_EXECVE_BTF): %: $(OUTPUT)/$(BPF_USER_BTF_OBJ) $(HELP_OBJS) 93 | $(call msg,APP_EXECVE_BTF,$@) 94 | $(Q)$(CC) -o $@ $^ $(COMMON_HELPER_OBJS) $(LIBS) 95 | 96 | $(APP_EXECVE_PUREC): %: $(OUTPUT)/$(BPF_USER_PUREC_OBJ) $(HELP_OBJS) 97 | $(call msg,APP_EXECVE_PUREC,$@) 98 | $(Q)$(CC) -o $@ $^ $(COMMON_HELPER_OBJS) $(LIBS) 99 | 100 | $(HELP_OBJS): $(OUTPUT)/%.o: %.c 101 | $(call msg,HELP_OBJS,$@) 102 | $(Q)$(CC) $(U_CFLAGS) -c $< -o $@ 103 | 104 | # 推导必须一层一层,如果已经解析出来了,就没法解析%了 105 | $(OUTPUT)/$(BPF_USER_BTF_OBJ): $(OUTPUT)/%.user.btf.o: %.user.btf.c %.skel.h | $(OUTPUT) 106 | $(call msg,BPF_USER_BTF_OBJ,$@) 107 | $(Q)$(CC) $(U_CFLAGS) -c $< -o $@ 108 | 109 | # $< 第一个依赖文件 110 | $(OUTPUT)/$(BPF_USER_PUREC_OBJ): $(OUTPUT)/%.user.purec.o: %.user.purec.c %.kern.o | $(OUTPUT) 111 | $(call msg,BPF_USER_PUREC_OBJ,$@) 112 | $(Q)$(CC) $(U_CFLAGS) -c $< -o $@ 113 | 114 | $(patsubst %,%.skel.h,$(APP_TAG)): $(patsubst %,%.kern.o,$(APP_TAG)) 115 | $(call msg,GEN-SKEL,$@) 116 | $(Q)$(BPFTOOL) gen skeleton $< > $@ 117 | 118 | $(patsubst %,%.kern.o,$(APP_TAG)): $(patsubst %,%.kern.c,$(APP_TAG)) 119 | $(call msg,BPF,$@) 120 | $(Q)$(CLANG) $(K_CFLAGS) -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ 121 | $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \ 122 | $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ 123 | 124 | .PHONY: clean 125 | clean: 126 | $(call msg, CLEANING) 127 | $(Q)rm -rf $(OUTPUT) $(APP_EXECVE_PUREC) $(APP_EXECVE_BTF) *.o *.skel.h -------------------------------------------------------------------------------- /kprobe_sys_execve/kp_execve.kern.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-20 10:30:24 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-08-24 19:26:38 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | //#include 13 | //#include 14 | 15 | // #include 16 | // #include 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include "execve_data.h" 23 | 24 | /* 25 | sys_execve(const char __user *filename, 26 | const char __user *const __user *argv, 27 | const char __user *const __user *envp); 28 | */ 29 | 30 | #define printk( fmt, ... ) \ 31 | ( { \ 32 | char ____fmt[] = fmt; \ 33 | bpf_trace_printk( ____fmt, sizeof( ____fmt ), ##__VA_ARGS__ ); \ 34 | } ) 35 | 36 | #define _( P ) \ 37 | ( { \ 38 | typeof( P ) val = 0; \ 39 | bpf_probe_read( &val, sizeof( val ), &P ); \ 40 | val; \ 41 | } ) 42 | 43 | /* 44 | 使用perf event来保存eBPF数据,从user程序读取 45 | */ 46 | struct { 47 | __uint( type, BPF_MAP_TYPE_PERF_EVENT_ARRAY ); 48 | __uint( key_size, sizeof( __u32 ) ); 49 | __uint( value_size, sizeof( __u32 ) ); // 这里不是传输数据的sizeof 50 | __uint( max_entries, 128 ); 51 | } execve_perf_evt_map SEC( ".maps" ); 52 | 53 | struct { 54 | __uint( type, BPF_MAP_TYPE_HASH ); 55 | __uint( max_entries, 128 ); 56 | __type( key, pid_t ); 57 | __type( value, struct data_t ); 58 | __uint( value_size, sizeof( struct data_t ) ); 59 | } execve_hash SEC( ".maps" ); 60 | 61 | SEC( "kprobe/" SYSCALL( sys_execve ) ) 62 | //int probe_sys_execve( struct pt_regs* ctx ) { 63 | int BPF_KPROBE(probe_sys_execve, const char __user *filename, 64 | const char __user *const __user *argv, 65 | const char __user *const __user *envp) { 66 | 67 | struct data_t data = {}; 68 | 69 | __u64 pid_tgid = bpf_get_current_pid_tgid(); 70 | __u32 pid = pid_tgid >> 32; 71 | __u32 tid = (__u32)pid_tgid; 72 | 73 | data.pid = pid; 74 | data.tid = tid; 75 | data.uid = bpf_get_current_uid_gid(); 76 | // 获取当前的进程名 77 | bpf_get_current_comm( &data.comm, sizeof( data.comm ) ); 78 | 79 | // 读取filename参数内容 80 | const char * filename_t = (const char *)PT_REGS_PARM1(ctx); 81 | bpf_probe_read_user_str( &data.filename, sizeof( data.filename ), filename_t ); 82 | //bpf_probe_read_user_str( &data.filename, sizeof( data.filename ), filename); 83 | printk( "filenameStr:[%s]", data.filename ); 84 | 85 | if ( bpf_map_update_elem( &execve_hash, &tid, &data, BPF_NOEXIST ) ) { 86 | return 0; 87 | } 88 | 89 | // 打印输出 /sys/kernel/debug/tracing/trace_pipe 90 | // (%struct.pt_regs*): too many args to 0x55e14eb7daa8: i64 = Constant<6> 91 | // 上面错误是 bpf_trace_printk 带的参数太多了 http://kerneltravel.net/blog/2020/ebpf_ljr_no3/ 92 | printk( "pid:%d, tid:%d executing program:%s", pid, data.tid, data.comm ); 93 | printk( "filename:[%s]", data.filename ); 94 | 95 | return 0; 96 | } 97 | 98 | SEC( "kretprobe/" SYSCALL( sys_execve ) ) 99 | int BPF_KRETPROBE( kprobe_sys_execve_exit, int ret ) { 100 | __u32 tid = bpf_get_current_pid_tgid(); 101 | 102 | struct data_t* data; 103 | data = bpf_map_lookup_elem( &execve_hash, &tid ); 104 | if ( data == NULL ) { 105 | return 0; 106 | } 107 | 108 | data->ret = ret; 109 | 110 | printk( "kretprobe sys_execve pid:%d, tid:%d executing program:%s", data->pid, data->tid, data->comm ); 111 | // 使用bpf_perf_event_output将data附加到映射上 112 | // 加上了BPF_F_CURRENT_CPU这个flag后,user测能实时的获得event回调。这是为什么? 113 | bpf_perf_event_output( ctx, &execve_perf_evt_map, BPF_F_CURRENT_CPU, data, sizeof( *data ) ); 114 | bpf_map_delete_elem( &execve_hash, &tid ); 115 | return 0; 116 | } 117 | 118 | char _license[] SEC( "license" ) = "GPL"; 119 | __u32 _version SEC( "version" ) = LINUX_VERSION_CODE; -------------------------------------------------------------------------------- /tp_execve/clang/tp_execve.user.btf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-27 11:57:38 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 17:19:16 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "bpf_help.h" 14 | #include "event_help.h" 15 | #include "event.h" 16 | #include "tp_execve.skel.h" 17 | 18 | #define PERF_BUFFER_PAGES 64 19 | 20 | // struct env_t g_env = { 21 | // .max_args = DEFAULT_MAXARGS, .uid = INVALID_UID, .quote = true, .time = true, .print_uid = true, .timestamp = true 22 | // }; 23 | 24 | // static struct timespec start_time; 25 | 26 | // static void time_since_start() { 27 | // int64_t secs, nsecs; 28 | // static struct timespec cur_time; 29 | // double time_diff; 30 | 31 | // clock_gettime( CLOCK_MONOTONIC, &cur_time ); 32 | // nsecs = cur_time.tv_nsec - start_time.tv_nsec; 33 | // secs = cur_time.tv_sec - start_time.tv_sec; 34 | // if ( nsecs < 0 ) { 35 | // nsecs += NSEC_PER_SEC; 36 | // secs--; 37 | // } 38 | 39 | // time_diff = secs + ( nsecs / NSEC_PER_SEC ); 40 | // printf( "%-8.3f", time_diff ); 41 | // } 42 | 43 | // static void inline quoted_symbol( char c ) { 44 | // switch ( c ) { 45 | // case '"': 46 | // putchar( '\\' ); 47 | // putchar( '"' ); 48 | // break; 49 | // case '\t': 50 | // putchar( '\\' ); 51 | // putchar( 't' ); 52 | // break; 53 | // case '\n': 54 | // putchar( '\\' ); 55 | // putchar( 'n' ); 56 | // break; 57 | // default: 58 | // putchar( c ); 59 | // break; 60 | // } 61 | // } 62 | 63 | // static void print_args( const struct event_t* e, bool quote ) { 64 | // int32_t i, args_counter = 0; 65 | 66 | // if ( g_env.quote ) 67 | // putchar( '"' ); 68 | 69 | // for ( i = 0; i < e->args_size && args_counter < e->args_count; i++ ) { 70 | // char c = e->args[ i ]; 71 | 72 | // if ( g_env.quote ) { 73 | // if ( c == '\0' ) { 74 | // args_counter++; 75 | // putchar( '"' ); 76 | // putchar( ' ' ); 77 | // if ( args_counter < e->args_count ) { 78 | // putchar( '"' ); 79 | // } 80 | // } else { 81 | // quoted_symbol( c ); 82 | // } 83 | // } else { 84 | // if ( c == '\0' ) { 85 | // args_counter++; 86 | // putchar( ' ' ); 87 | // } else { 88 | // putchar( c ); 89 | // } 90 | // } 91 | // } 92 | // if ( e->args_count == g_env.max_args + 1 ) { 93 | // fputs( " ...", stdout ); 94 | // } 95 | // } 96 | 97 | // perf event call back function 98 | // static void handle_event( void* ctx, int32_t cpu, void* data, uint32_t size ) { 99 | // const struct event_t* e = ( const struct event_t* ) data; 100 | // time_t t; 101 | // struct tm* tm; 102 | // char ts[ 32 ]; 103 | 104 | // time( &t ); 105 | // tm = localtime( &t ); 106 | // strftime( ts, sizeof( ts ), "%H:%M:%S", tm ); 107 | 108 | // printf( "%-8s ", ts ); 109 | // time_since_start(); 110 | // printf( "%-6d", e->uid ); 111 | // printf( "%-16s %-6d %-6d %3d ", e->comm, e->pid, e->ppid, e->retval ); 112 | // print_args( e, g_env.quote ); 113 | // putchar( '\n' ); 114 | // } 115 | 116 | // static void handle_lost_event( void* ctx, int32_t cpu, uint64_t lost_cnt ) { 117 | // fprintf( stderr, "Lost %lu events on CPU #%d!\n", lost_cnt, cpu ); 118 | // } 119 | 120 | int32_t main( int32_t argc, char** argv ) { 121 | int32_t err = 0; 122 | struct perf_buffer_opts pb_opts = {}; 123 | struct perf_buffer* pb = NULL; 124 | struct tp_execve_kern* bpf_obj = NULL; 125 | 126 | g_env.max_args = DEFAULT_MAXARGS; 127 | g_env.uid = INVALID_UID; 128 | 129 | fprintf( stderr, "main: %s\n", argv[ 0 ] ); 130 | 131 | libbpf_set_print( bpf_printf ); 132 | 133 | err = bump_memlock_rlimit(); 134 | if ( err ) { 135 | fprintf( stderr, "failed to increase rlimit: %d\n", err ); 136 | return 1; 137 | } 138 | 139 | bpf_obj = tp_execve_kern__open(); 140 | if ( !bpf_obj ) { 141 | fprintf( stderr, "failed to open BPF execve_kern object\n" ); 142 | return 1; 143 | } 144 | 145 | /* initialize global data (filtering options),传递参数控制bpf kern程序的过滤行为 */ 146 | // 在load之前修改只读代码段的变量 147 | bpf_obj->rodata->ignore_failed = !g_env.fails; 148 | bpf_obj->rodata->target_uid = g_env.uid; 149 | // 最大的参数个数 150 | bpf_obj->rodata->max_args = g_env.max_args; 151 | 152 | // 加载bpf kern程序 153 | err = tp_execve_kern__load( bpf_obj ); 154 | if ( err ) { 155 | fprintf( stderr, "failed to load BPF object: %d\n", err ); 156 | goto cleanup; 157 | } 158 | 159 | clock_gettime( CLOCK_MONOTONIC, &start_time ); 160 | err = tp_execve_kern__attach( bpf_obj ); 161 | if ( err ) { 162 | fprintf( stderr, "failed to attach BPF object: %d\n", err ); 163 | goto cleanup; 164 | } 165 | 166 | printf( "%-9s", "TIME" ); 167 | printf( "%-8s ", "TIME(s)" ); 168 | printf( "%-6s ", "UID" ); 169 | printf( "%-16s %-6s %-6s %3s %s\n", "PCOMM", "PID", "PPID", "RET", "ARGS" ); 170 | 171 | // setup event callback 172 | pb_opts.sample_cb = handle_event; 173 | pb_opts.lost_cb = handle_lost_event; 174 | pb = perf_buffer__new( bpf_map__fd( bpf_obj->maps.execve_perf_evt_map ), PERF_BUFFER_PAGES, &pb_opts ); 175 | err = libbpf_get_error( pb ); 176 | if ( err ) { 177 | pb = NULL; 178 | fprintf( stderr, "failed to open perf buffer: %d\n", err ); 179 | goto cleanup; 180 | } 181 | 182 | // loop perf event 183 | while ( ( err = perf_buffer__poll( pb, 100 ) ) >= 0 ) { 184 | } 185 | printf( "Error polling perf buffer: %d\n", err ); 186 | 187 | cleanup: 188 | tp_execve_kern__destroy( bpf_obj ); 189 | 190 | return err != 0; 191 | } -------------------------------------------------------------------------------- /tp_execve/clang/tp_execve.kern.c: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: CALM.WU 3 | * @Date: 2021-08-27 10:49:37 4 | * @Last Modified by: CALM.WU 5 | * @Last Modified time: 2021-09-01 19:14:08 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | //#include 这个需要BTF支持 14 | #include 15 | #include 16 | #include 17 | 18 | #include "event.h" 19 | 20 | #define printk( fmt, ... ) \ 21 | ( { \ 22 | char ____fmt[] = fmt; \ 23 | bpf_trace_printk( ____fmt, sizeof( ____fmt ), ##__VA_ARGS__ ); \ 24 | } ) 25 | 26 | // 缓存event数据,hook tracepoints函数时用于记录数据 27 | struct { 28 | __uint( type, BPF_MAP_TYPE_HASH ); 29 | __uint( max_entries, 1024 ); 30 | __type( key, pid_t ); 31 | __type( value, struct event_t ); 32 | __uint( value_size, sizeof( struct event_t ) ); 33 | } execve_hash SEC( ".maps" ); 34 | 35 | // 将ebpf数据做为event上报 36 | struct { 37 | __uint( type, BPF_MAP_TYPE_PERF_EVENT_ARRAY ); 38 | __uint( key_size, sizeof( __s32 ) ); 39 | __uint( value_size, sizeof( __u32 ) ); // 这里不是传输数据的sizeof 40 | __uint( max_entries, 128 ); 41 | } execve_perf_evt_map SEC( ".maps" ); 42 | 43 | // Based on /sys/kernel/debug/tracing/events/syscalls/sys_enter_execve/format 44 | struct enter_execve_args { 45 | // __s16 common_type; 46 | // char common_flags; 47 | // char common_preempt_count; 48 | // __s32 common_pid; 49 | unsigned long long unused; 50 | __s32 __syscall_nr; 51 | char* filename; 52 | const char* const* argv; 53 | const char* const* envp; 54 | }; 55 | 56 | // Based on /sys/kernel/debug/tracing/events/syscalls/sys_exit_execve/format 57 | struct exit_execve_args { 58 | __s16 common_type; 59 | char common_flags; 60 | char common_preempt_count; 61 | __s32 common_pid; 62 | __s32 __syscall_nr; 63 | __s64 ret; 64 | }; 65 | 66 | static struct event_t empty_event = {}; 67 | 68 | // user程序可以设置具体值,volatile保证变量每次都从内存读取,而不是寄存器和cache 69 | // 相当于外部传入的参数,而控制ebpf内核程序 70 | const volatile __s32 max_args = DEFAULT_MAXARGS; 71 | const volatile uid_t target_uid = INVALID_UID; 72 | const volatile bool ignore_failed = true; 73 | 74 | SEC( "tracepoint/syscalls/sys_enter_execve" ) 75 | __s32 tracepoint__syscalls__sys_enter_execve( struct enter_execve_args* ctx ) { 76 | __u64 id; 77 | pid_t pid, tgid; 78 | uid_t uid; 79 | __u32 ret; 80 | struct event_t* event; 81 | // 内核结构task_struct,用于表示进程和线程 82 | struct task_struct* task; 83 | // 参数地址 84 | const char* argp = NULL; 85 | 86 | // 获取用户id 87 | uid = bpf_get_current_uid_gid(); 88 | id = bpf_get_current_pid_tgid(); 89 | // 获取进程id 90 | pid = ( pid_t ) (id & 0xffffffff); 91 | // 获取线程组id 92 | tgid = id >> 32; 93 | 94 | // 输出在 /sys/kernel/debug/tracing/trace_pipe 95 | printk( "pid: %d, tgid: %d, uid: %d\n", pid, tgid, uid ); 96 | 97 | if ( target_uid != INVALID_UID && target_uid != uid ) { 98 | // uid和target_uid不相等,直接返回 99 | printk( "target_uid:%d != uid:%d, so return", target_uid, uid ); 100 | return 0; 101 | } 102 | 103 | // 在execve_hash加入成员,key为进程id,value为event_t结构体 104 | if ( bpf_map_update_elem( &execve_hash, &pid, &empty_event, BPF_NOEXIST ) ) { 105 | // bpf hash中key不存在,加入失败,直接返回 106 | return 0; 107 | } 108 | 109 | // 根据pid查找event数据 110 | event = bpf_map_lookup_elem( &execve_hash, &pid ); 111 | if ( !event ) { 112 | return 0; 113 | } 114 | 115 | // tgid使用主线程的pid 116 | event->pid = tgid; 117 | event->uid = uid; 118 | 119 | // 获取当前task_struct 120 | task = ( struct task_struct* ) bpf_get_current_task(); 121 | 122 | 123 | // 通过task获取父进程id, 这个需要BTF,如果内核不支持BTF,那么只有直接去读取,根据地址去读 124 | // event->ppid = ( pid_t ) BPF_CORE_READ( task, real_parent, tgid ); 125 | struct task_struct* real_parent_task; 126 | bpf_probe_read( &real_parent_task, sizeof( real_parent_task ), &task->real_parent ); 127 | bpf_probe_read( &event->ppid, sizeof( event->ppid ), &real_parent_task->pid ); 128 | 129 | event->args_count = 0; 130 | event->args_size = 0; 131 | 132 | // 读取命令名,觉得这个和bpf_get_current_comm应该想用,args[0] 133 | // 命令行参数都是用户空间分配的,所以用***_user_str 134 | // https://stackoverflow.com/questions/67188440/ebpf-cannot-read-argv-and-envp-from-tracepoint-sys-enter-execve 135 | // 先读取第一个参数地址,在读取第一个参数内容 136 | bpf_probe_read( &argp, sizeof( argp ), &ctx->argv[ 0 ] ); 137 | ret = bpf_probe_read_user_str( event->args, ARGSIZE, argp ); 138 | 139 | // ret = bpf_probe_read_user_str( event->args, ARGSIZE, ( const char* ) ctx->argv[ 0 ] ); 140 | if ( ret < ARGSIZE ) { 141 | event->args_size += ret; 142 | } else { 143 | // empty string 144 | event->args[ 0 ] = '\0'; 145 | event->args_size++; 146 | } 147 | // 参数个数递增 148 | event->args_count++; 149 | 150 | // // 读取第二个参数 151 | // bpf_probe_read( &argp, sizeof( argp ), &ctx->argv[ 1 ] ); 152 | // if ( !argp ) { 153 | // return 0; 154 | // } 155 | 156 | // // 这行代码非常重要,如果不加上,下面代码是没法判断空间是否足够读取ARGSIZE这多字节的。而且BPF Verifier会报错 157 | // if ( event->args_size > LAST_ARG ) 158 | // return 0; 159 | 160 | // ret = bpf_probe_read_user_str( event->args + event->args_size, ARGSIZE, argp ); 161 | // if ( ret > ARGSIZE ) { 162 | // return 0; 163 | // } 164 | 165 | // event->args_size += ret; 166 | // event->args_count++; 167 | 168 | // 告诉编译器,不做循环展开 169 | // #pragma unroll 170 | for ( __s32 i = 1; i < DEFAULT_MAXARGS && i < max_args; i++ ) { 171 | // 读取后续参数地址 172 | ret = bpf_probe_read( &argp, sizeof( argp ), &ctx->argv[ i ] ); 173 | if ( !argp ) { 174 | // 地址为空,说明没有参数 175 | return 0; 176 | } 177 | 178 | // 这行代码非常重要,如果不加上,下面代码是没法判断空间是否足够读取ARGSIZE这多字节的。而且BPF Verifier会报错 179 | if ( event->args_size > LAST_ARG ) 180 | return 0; 181 | 182 | // 读取参数内容 183 | ret = bpf_probe_read_user_str( &event->args[ event->args_size ], ARGSIZE, argp ); 184 | if ( ret > ARGSIZE ) { 185 | printk( "argv[%d] size: %d larger than ARGSIZE", i, ret ); 186 | return 0; 187 | } 188 | 189 | event->args_size += ret; 190 | event->args_count++; 191 | } 192 | 193 | return 0; 194 | } 195 | 196 | SEC( "tracepoint/syscalls/sys_exit_execve" ) 197 | int tracepoint__syscalls__sys_exit_execve( struct exit_execve_args* ctx ) { 198 | __u64 id; 199 | pid_t pid; 200 | __s32 ret; 201 | struct event_t* evt; 202 | 203 | __u32 uid = bpf_get_current_uid_gid(); 204 | 205 | if ( target_uid != INVALID_UID && target_uid != uid ) { 206 | // uid和target_uid不相等,直接返回 207 | printk( "target_uid:%d != uid:%d, so return", target_uid, uid ); 208 | return 0; 209 | } 210 | 211 | id = bpf_get_current_pid_tgid(); 212 | pid = ( pid_t ) id; 213 | // 在execve_hash中查找成员,key为进程id 214 | evt = bpf_map_lookup_elem( &execve_hash, &pid ); 215 | if ( !evt ) { 216 | return 0; 217 | } 218 | 219 | // 得到exit_execve的返回值 220 | ret = ctx->ret; 221 | if ( ignore_failed && ret < 0 ) { 222 | // 从execve_hash中删除成员,key为进程id 223 | bpf_map_delete_elem( &execve_hash, &pid ); 224 | return 0; 225 | } 226 | 227 | // 更新event中的返回值 228 | evt->retval = ret; 229 | // 得到应用程序名字 230 | bpf_get_current_comm( &evt->comm, sizeof( evt->comm ) ); 231 | // 计算event数据的实际长度 232 | // size_t event_len = offsetof( struct event_t, args ) + event->args_size; 233 | // bpf_perf_event_output( ctx, &execve_perf_evt_map, BPF_F_CURRENT_CPU, event, sizeof( *event ) ); 234 | 235 | // 下面这种计算长度,会校验报错,R5 unbounded memory access, use 'var &= const' or 'if (var < const)' 236 | // 原因是size_t对应的format格式不对,就算填写%zu也不对 237 | // size_t len = offsetof( struct event_t, args ) + evt->args_size; 238 | __u32 len = offsetof( struct event_t, args ) + evt->args_size; 239 | 240 | // bpf_trace_printk 只支持这些类型,必须对应上,否则verifier报错,%d, %i, %u, %x, %ld, %li, %lu, %lx, %lld, %lli, %llu, %llx, %p, %s 241 | printk( "execute:%s, event length: %u", evt->comm, len ); 242 | 243 | // size_t len = EVENT_SIZE(evt); 244 | if ( len <= sizeof( *evt ) ) { 245 | bpf_perf_event_output( ctx, &execve_perf_evt_map, BPF_F_CURRENT_CPU, evt, len ); 246 | } 247 | return 0; 248 | } 249 | 250 | char _license[] SEC( "license" ) = "GPL"; 251 | __u32 _version SEC( "version" ) = LINUX_VERSION_CODE; -------------------------------------------------------------------------------- /skbtracer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define ROUTE_EVENT_IF 0x0001 12 | #define ROUTE_EVENT_IPTABLE 0x0002 13 | #define ROUTE_EVENT_DROP 0x0004 14 | #define ROUTE_EVENT_NEW 0x0010 15 | 16 | #ifdef __BCC_ARGS__ 17 | __BCC_ARGS_DEFINE__ 18 | #else 19 | #define __BCC_pid 0 20 | #define __BCC_ipaddr 0 21 | #define __BCC_port 0 22 | #define __BCC_icmpid 0 23 | #define __BCC_dropstack 0 24 | #define __BCC_callstack 0 25 | #define __BCC_iptable 0 26 | #define __BCC_route 0 27 | #define __BCC_keep 0 28 | #define __BCC_proto 0 29 | #define __BCC_netns 0 30 | #endif 31 | 32 | /* route info as default */ 33 | #if !__BCC_dropstack && !__BCC_iptable && !__BCC_route 34 | #undef __BCC_route 35 | #define __BCC_route 1 36 | #endif 37 | 38 | #if (__BCC_dropstack) || (!__BCC_pid && !__BCC_ipaddr && !__BCC_port && !__BCC_icmpid &&! __BCC_proto && !__BCC_netns) 39 | #undef __BCC_keep 40 | #define __BCC_keep 0 41 | #endif 42 | 43 | BPF_STACK_TRACE(stacks, 2048); 44 | 45 | #define FUNCNAME_MAX_LEN 64 46 | struct event_t { 47 | char func_name[FUNCNAME_MAX_LEN]; 48 | u8 flags; 49 | 50 | // route info 51 | char ifname[IFNAMSIZ]; 52 | u32 netns; 53 | 54 | // pkt info 55 | u8 dest_mac[6]; 56 | u32 len; 57 | u8 ip_version; 58 | u8 l4_proto; 59 | u64 saddr[2]; 60 | u64 daddr[2]; 61 | u8 icmptype; 62 | u16 icmpid; 63 | u16 icmpseq; 64 | u16 sport; 65 | u16 dport; 66 | u16 tcpflags; 67 | 68 | // ipt info 69 | u32 hook; 70 | u8 pf; 71 | u32 verdict; 72 | char tablename[XT_TABLE_MAXNAMELEN]; 73 | u64 ipt_delay; 74 | 75 | void *skb; 76 | // skb info 77 | u8 pkt_type; //skb->pkt_type 78 | 79 | // call stack 80 | int kernel_stack_id; 81 | u64 kernel_ip; 82 | 83 | //time 84 | u64 start_ns; 85 | u64 test; 86 | }; 87 | BPF_PERF_OUTPUT(route_event); 88 | 89 | struct ipt_do_table_args 90 | { 91 | struct sk_buff *skb; 92 | const struct nf_hook_state *state; 93 | struct xt_table *table; 94 | u64 start_ns; 95 | }; 96 | BPF_HASH(cur_ipt_do_table_args, u32, struct ipt_do_table_args); 97 | 98 | union ___skb_pkt_type { 99 | __u8 value; 100 | struct { 101 | __u8 __pkt_type_offset[0]; 102 | __u8 pkt_type:3; 103 | __u8 pfmemalloc:1; 104 | __u8 ignore_df:1; 105 | 106 | __u8 nf_trace:1; 107 | __u8 ip_summed:2; 108 | }; 109 | }; 110 | 111 | #if __BCC_keep 112 | #endif 113 | 114 | #define MAC_HEADER_SIZE 14; 115 | #define member_address(source_struct, source_member) \ 116 | ({ \ 117 | void* __ret; \ 118 | __ret = (void*) (((char*)source_struct) + offsetof(typeof(*source_struct), source_member)); \ 119 | __ret; \ 120 | }) 121 | #define member_read(destination, source_struct, source_member) \ 122 | do{ \ 123 | bpf_probe_read( \ 124 | destination, \ 125 | sizeof(source_struct->source_member), \ 126 | member_address(source_struct, source_member) \ 127 | ); \ 128 | } while(0) 129 | 130 | enum { 131 | __TCP_FLAG_CWR, 132 | __TCP_FLAG_ECE, 133 | __TCP_FLAG_URG, 134 | __TCP_FLAG_ACK, 135 | __TCP_FLAG_PSH, 136 | __TCP_FLAG_RST, 137 | __TCP_FLAG_SYN, 138 | __TCP_FLAG_FIN 139 | }; 140 | 141 | static void bpf_strncpy(char *dst, const char *src, int n) 142 | { 143 | int i = 0, j; 144 | #define CPY(n) \ 145 | do { \ 146 | for (; i < n; i++) { \ 147 | if (src[i] == 0) return; \ 148 | dst[i] = src[i]; \ 149 | } \ 150 | } while(0) 151 | 152 | for (j = 10; j < 64; j += 10) 153 | CPY(j); 154 | CPY(64); 155 | #undef CPY 156 | } 157 | 158 | #define TCP_FLAGS_INIT(new_flags, orig_flags, flag) \ 159 | do { \ 160 | if (orig_flags & flag) { \ 161 | new_flags |= (1U<<__##flag); \ 162 | } \ 163 | } while (0) 164 | #define init_tcpflags_bits(new_flags, orig_flags) \ 165 | ({ \ 166 | new_flags = 0; \ 167 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_CWR); \ 168 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_ECE); \ 169 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_URG); \ 170 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_ACK); \ 171 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_PSH); \ 172 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_RST); \ 173 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_SYN); \ 174 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_FIN); \ 175 | }) 176 | 177 | static void get_stack(struct pt_regs *ctx, struct event_t *event) 178 | { 179 | event->kernel_stack_id = stacks.get_stackid(ctx, 0); 180 | if (event->kernel_stack_id >= 0) { 181 | u64 ip = PT_REGS_IP(ctx); 182 | u64 page_offset; 183 | // if ip isn't sane, leave key ips as zero for later checking 184 | #if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE) 185 | // x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it 186 | page_offset = __PAGE_OFFSET_BASE; 187 | #elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4) 188 | // x64, 4.17, and later 189 | #if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL) 190 | page_offset = __PAGE_OFFSET_BASE_L5; 191 | #else 192 | page_offset = __PAGE_OFFSET_BASE_L4; 193 | #endif 194 | #else 195 | // earlier x86_64 kernels, e.g., 4.6, comes here 196 | // arm64, s390, powerpc, x86_32 197 | page_offset = PAGE_OFFSET; 198 | #endif 199 | if (ip > page_offset) { 200 | event->kernel_ip = ip; 201 | } 202 | } 203 | return; 204 | } 205 | 206 | #define CALL_STACK(ctx, event) \ 207 | do { \ 208 | if (__BCC_callstack) \ 209 | get_stack(ctx, event); \ 210 | } while (0) 211 | 212 | 213 | /** 214 | * Common tracepoint handler. Detect IPv4/IPv6 and 215 | * emit event with address, interface and namespace. 216 | */ 217 | static int 218 | do_trace_skb(struct event_t *event, void *ctx, struct sk_buff *skb, void *netdev) 219 | { 220 | struct net_device *dev; 221 | 222 | char *head; 223 | char *l2_header_address; 224 | char *l3_header_address; 225 | char *l4_header_address; 226 | 227 | u16 mac_header; 228 | u16 network_header; 229 | 230 | u8 proto_icmp_echo_request; 231 | u8 proto_icmp_echo_reply; 232 | u8 l4_offset_from_ip_header; 233 | 234 | struct icmphdr icmphdr; 235 | union tcp_word_hdr tcphdr; 236 | struct udphdr udphdr; 237 | 238 | // Get device pointer, we'll need it to get the name and network namespace 239 | event->ifname[0] = 0; 240 | if (netdev) 241 | dev = netdev; 242 | else 243 | member_read(&dev, skb, dev); 244 | 245 | bpf_probe_read(&event->ifname, IFNAMSIZ, dev->name); 246 | 247 | if (event->ifname[0] == 0 || dev == NULL) 248 | bpf_strncpy(event->ifname, "nil", IFNAMSIZ); 249 | 250 | event->flags |= ROUTE_EVENT_IF; 251 | 252 | #ifdef CONFIG_NET_NS 253 | struct net* net; 254 | 255 | // Get netns id. The code below is equivalent to: event->netns = dev->nd_net.net->ns.inum 256 | possible_net_t *skc_net = &dev->nd_net; 257 | member_read(&net, skc_net, net); 258 | struct ns_common *ns = member_address(net, ns); 259 | member_read(&event->netns, ns, inum); 260 | 261 | // maybe the skb->dev is not init, for this situation, we can get ns by sk->__sk_common.skc_net.net->ns.inum 262 | if (event->netns == 0) { 263 | struct sock *sk; 264 | struct sock_common __sk_common; 265 | struct ns_common* ns2; 266 | member_read(&sk, skb, sk); 267 | if (sk != NULL) { 268 | member_read(&__sk_common, sk, __sk_common); 269 | ns2 = member_address(__sk_common.skc_net.net, ns); 270 | member_read(&event->netns, ns2, inum); 271 | } 272 | } 273 | 274 | 275 | #endif 276 | 277 | member_read(&event->len, skb, len); 278 | member_read(&head, skb, head); 279 | member_read(&mac_header, skb, mac_header); 280 | member_read(&network_header, skb, network_header); 281 | 282 | if(network_header == 0) { 283 | network_header = mac_header + MAC_HEADER_SIZE; 284 | } 285 | 286 | l2_header_address = mac_header + head; 287 | bpf_probe_read(&event->dest_mac, 6, l2_header_address); 288 | 289 | l3_header_address = head + network_header; 290 | bpf_probe_read(&event->ip_version, sizeof(u8), l3_header_address); 291 | event->ip_version = event->ip_version >> 4 & 0xf; 292 | 293 | if (event->ip_version == 4) { 294 | struct iphdr iphdr; 295 | bpf_probe_read(&iphdr, sizeof(iphdr), l3_header_address); 296 | 297 | l4_offset_from_ip_header = iphdr.ihl * 4; 298 | event->l4_proto = iphdr.protocol; 299 | event->saddr[0] = iphdr.saddr; 300 | event->daddr[0] = iphdr.daddr; 301 | 302 | if (event->l4_proto == IPPROTO_ICMP) { 303 | proto_icmp_echo_request = ICMP_ECHO; 304 | proto_icmp_echo_reply = ICMP_ECHOREPLY; 305 | } 306 | 307 | } else if (event->ip_version == 6) { 308 | // Assume no option header --> fixed size header 309 | struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)l3_header_address; 310 | l4_offset_from_ip_header = sizeof(*ipv6hdr); 311 | 312 | bpf_probe_read(&event->l4_proto, sizeof(ipv6hdr->nexthdr), (char*)ipv6hdr + offsetof(struct ipv6hdr, nexthdr)); 313 | bpf_probe_read(event->saddr, sizeof(ipv6hdr->saddr), (char*)ipv6hdr + offsetof(struct ipv6hdr, saddr)); 314 | bpf_probe_read(event->daddr, sizeof(ipv6hdr->daddr), (char*)ipv6hdr + offsetof(struct ipv6hdr, daddr)); 315 | 316 | if (event->l4_proto == IPPROTO_ICMPV6) { 317 | proto_icmp_echo_request = ICMPV6_ECHO_REQUEST; 318 | proto_icmp_echo_reply = ICMPV6_ECHO_REPLY; 319 | } 320 | 321 | } else { 322 | return -1; 323 | } 324 | 325 | l4_header_address = l3_header_address + l4_offset_from_ip_header; 326 | switch (event->l4_proto) { 327 | case IPPROTO_ICMPV6: 328 | case IPPROTO_ICMP: 329 | bpf_probe_read(&icmphdr, sizeof(icmphdr), l4_header_address); 330 | if (icmphdr.type != proto_icmp_echo_request && icmphdr.type != proto_icmp_echo_reply) { 331 | return -1; 332 | } 333 | event->icmptype = icmphdr.type; 334 | event->icmpid = be16_to_cpu(icmphdr.un.echo.id); 335 | event->icmpseq = be16_to_cpu(icmphdr.un.echo.sequence); 336 | break; 337 | case IPPROTO_TCP: 338 | bpf_probe_read(&tcphdr, sizeof(tcphdr), l4_header_address); 339 | init_tcpflags_bits(event->tcpflags, tcp_flag_word(&tcphdr)); 340 | event->sport = be16_to_cpu(tcphdr.hdr.source); 341 | event->dport = be16_to_cpu(tcphdr.hdr.dest); 342 | break; 343 | case IPPROTO_UDP: 344 | bpf_probe_read(&udphdr, sizeof(udphdr), l4_header_address); 345 | event->sport = be16_to_cpu(udphdr.source); 346 | event->dport = be16_to_cpu(udphdr.dest); 347 | break; 348 | default: 349 | return -1; 350 | } 351 | 352 | #if __BCC_keep 353 | #endif 354 | 355 | 356 | /* 357 | * netns filter 358 | */ 359 | if (__BCC_netns !=0 && event->netns != 0 && event->netns != __BCC_netns) { 360 | return -1; 361 | } 362 | 363 | /* 364 | * pid filter 365 | */ 366 | #if __BCC_pid 367 | u64 tgid = bpf_get_current_pid_tgid() >> 32; 368 | if (tgid != __BCC_pid) 369 | return -1; 370 | #endif 371 | 372 | /* 373 | * skb filter 374 | */ 375 | #if __BCC_ipaddr 376 | if (event->ip_version == 4) { 377 | if (__BCC_ipaddr != event->saddr[0] && __BCC_ipaddr != event->daddr[0]) 378 | return -1; 379 | } else { 380 | return -1; 381 | } 382 | #endif 383 | 384 | #if __BCC_proto 385 | if (__BCC_proto != event->l4_proto) 386 | return -1; 387 | #endif 388 | 389 | #if __BCC_port 390 | if ( (event->l4_proto == IPPROTO_UDP || event->l4_proto == IPPROTO_TCP) && 391 | (__BCC_port != event->sport && __BCC_port != event->dport)) 392 | return -1; 393 | #endif 394 | 395 | #if __BCC_icmpid 396 | if (__BCC_proto == IPPROTO_ICMP && __BCC_icmpid != event->icmpid) 397 | return -1; 398 | #endif 399 | 400 | #if __BCC_keep 401 | #endif 402 | 403 | return 0; 404 | } 405 | 406 | static int 407 | do_trace(void *ctx, struct sk_buff *skb, const char *func_name, void *netdev) 408 | { 409 | struct event_t event = {}; 410 | union ___skb_pkt_type type = {}; 411 | 412 | if (do_trace_skb(&event, ctx, skb, netdev) < 0) 413 | return 0; 414 | 415 | event.skb=skb; 416 | bpf_probe_read(&type.value, 1, ((char*)skb) + offsetof(typeof(*skb), __pkt_type_offset)); 417 | event.pkt_type = type.pkt_type; 418 | 419 | event.start_ns = bpf_ktime_get_ns(); 420 | bpf_strncpy(event.func_name, func_name, FUNCNAME_MAX_LEN); 421 | CALL_STACK(ctx, &event); 422 | route_event.perf_submit(ctx, &event, sizeof(event)); 423 | out: 424 | return 0; 425 | } 426 | 427 | #if __BCC_route 428 | 429 | /* 430 | * netif rcv hook: 431 | * 1) int netif_rx(struct sk_buff *skb) 432 | * 2) int __netif_receive_skb(struct sk_buff *skb) 433 | * 3) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 434 | * 4) ... 435 | */ 436 | int kprobe__netif_rx(struct pt_regs *ctx, struct sk_buff *skb) 437 | { 438 | return do_trace(ctx, skb, __func__+8, NULL); 439 | } 440 | 441 | int kprobe____netif_receive_skb(struct pt_regs *ctx, struct sk_buff *skb) 442 | { 443 | return do_trace(ctx, skb, __func__+8, NULL); 444 | } 445 | 446 | int kprobe__tpacket_rcv(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 447 | { 448 | return do_trace(ctx, skb, __func__+8, orig_dev); 449 | } 450 | 451 | int kprobe__packet_rcv(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 452 | { 453 | return do_trace(ctx, skb, __func__+8, orig_dev); 454 | } 455 | 456 | int kprobe__napi_gro_receive(struct pt_regs *ctx, struct napi_struct *napi, struct sk_buff *skb) 457 | { 458 | return do_trace(ctx, skb, __func__+8, NULL); 459 | } 460 | 461 | /* 462 | * netif send hook: 463 | * 1) int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) 464 | * 2) ... 465 | */ 466 | 467 | int kprobe____dev_queue_xmit(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *sb_dev) 468 | { 469 | return do_trace(ctx, skb, __func__+8, NULL); 470 | } 471 | 472 | /* 473 | * br process hook: 474 | * 1) rx_handler_result_t br_handle_frame(struct sk_buff **pskb) 475 | * 2) int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 476 | * 3) unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 477 | * 4) int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 478 | * 5) int br_pass_frame_up(struct sk_buff *skb) 479 | * 6) int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) 480 | * 7) void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig) 481 | * 8) int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 482 | * 9) unsigned int br_nf_forward_ip(void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 483 | * 10)int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 484 | * 11)unsigned int br_nf_post_routing(void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 485 | * 12)int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 486 | */ 487 | int kprobe__br_handle_frame(struct pt_regs *ctx, struct sk_buff **pskb) 488 | { 489 | return do_trace(ctx, *pskb, __func__+8, NULL); 490 | } 491 | 492 | int kprobe__br_handle_frame_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 493 | { 494 | return do_trace(ctx, skb, __func__+8, NULL); 495 | } 496 | 497 | int kprobe__br_nf_pre_routing(struct pt_regs *ctx, void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 498 | { 499 | return do_trace(ctx, skb, __func__+8, NULL); 500 | } 501 | 502 | int kprobe__br_nf_pre_routing_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 503 | { 504 | return do_trace(ctx, skb, __func__+8, NULL); 505 | } 506 | 507 | int kprobe__br_pass_frame_up(struct pt_regs *ctx, struct sk_buff *skb) 508 | { 509 | return do_trace(ctx, skb, __func__+8, NULL); 510 | } 511 | 512 | int kprobe__br_netif_receive_skb(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 513 | { 514 | return do_trace(ctx, skb, __func__+8, NULL); 515 | } 516 | 517 | int kprobe__br_forward(struct pt_regs *ctx, const void *to, struct sk_buff *skb, bool local_rcv, bool local_orig) 518 | { 519 | return do_trace(ctx, skb, __func__+8, NULL); 520 | } 521 | 522 | int kprobe____br_forward(struct pt_regs *ctx, const void *to, struct sk_buff *skb, bool local_orig) 523 | { 524 | return do_trace(ctx, skb, __func__+8, NULL); 525 | } 526 | 527 | int kprobe__deliver_clone(struct pt_regs *ctx, const void *prev, struct sk_buff *skb, bool local_orig) 528 | { 529 | return do_trace(ctx, skb, __func__+8, NULL); 530 | } 531 | 532 | int kprobe__br_forward_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 533 | { 534 | return do_trace(ctx, skb, __func__+8, NULL); 535 | } 536 | 537 | int kprobe__br_nf_forward_ip(struct pt_regs *ctx, void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 538 | { 539 | return do_trace(ctx, skb, __func__+8, NULL); 540 | } 541 | 542 | int kprobe__br_nf_forward_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 543 | { 544 | return do_trace(ctx, skb, __func__+8, NULL); 545 | } 546 | 547 | int kprobe__br_nf_post_routing(struct pt_regs *ctx, void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 548 | { 549 | return do_trace(ctx, skb, __func__+8, NULL); 550 | } 551 | 552 | int kprobe__br_nf_dev_queue_xmit(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 553 | { 554 | return do_trace(ctx, skb, __func__+8, NULL); 555 | } 556 | 557 | /* 558 | * ip layer: 559 | * 1) int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 560 | * 2) int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 561 | * 3) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) 562 | * 4) int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 563 | * 5) int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 564 | * 6) ... 565 | */ 566 | 567 | int kprobe__ip_rcv(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 568 | { 569 | return do_trace(ctx, skb, __func__+8, NULL); 570 | } 571 | 572 | int kprobe__ip_rcv_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 573 | { 574 | return do_trace(ctx, skb, __func__+8, NULL); 575 | } 576 | 577 | int kprobe__ip_output(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 578 | { 579 | return do_trace(ctx, skb, __func__+8, NULL); 580 | } 581 | 582 | int kprobe__ip_finish_output(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 583 | { 584 | return do_trace(ctx, skb, __func__+8, NULL); 585 | } 586 | 587 | #endif 588 | 589 | #if __BCC_iptable 590 | static int 591 | __ipt_do_table_in(struct pt_regs *ctx, struct sk_buff *skb, 592 | const struct nf_hook_state *state, struct xt_table *table) 593 | { 594 | u32 pid = bpf_get_current_pid_tgid(); 595 | 596 | struct ipt_do_table_args args = { 597 | .skb = skb, 598 | .state = state, 599 | .table = table, 600 | }; 601 | args.start_ns = bpf_ktime_get_ns(); 602 | cur_ipt_do_table_args.update(&pid, &args); 603 | 604 | return 0; 605 | }; 606 | 607 | static int 608 | __ipt_do_table_out(struct pt_regs * ctx, struct sk_buff *skb) 609 | { 610 | struct event_t event = {}; 611 | union ___skb_pkt_type type = {}; 612 | struct ipt_do_table_args *args; 613 | u32 pid = bpf_get_current_pid_tgid(); 614 | 615 | args = cur_ipt_do_table_args.lookup(&pid); 616 | if (args == 0) 617 | return 0; 618 | 619 | cur_ipt_do_table_args.delete(&pid); 620 | 621 | if (do_trace_skb(&event, ctx, args->skb, NULL) < 0) 622 | return 0; 623 | 624 | event.flags |= ROUTE_EVENT_IPTABLE; 625 | event.ipt_delay = bpf_ktime_get_ns() - args->start_ns; 626 | member_read(&event.hook, args->state, hook); 627 | member_read(&event.pf, args->state, pf); 628 | member_read(&event.tablename, args->table, name); 629 | event.verdict = PT_REGS_RC(ctx); 630 | event.skb=args->skb; 631 | bpf_probe_read(&type.value, 1, ((char*)args->skb) + offsetof(typeof(*args->skb), __pkt_type_offset)); 632 | event.pkt_type = type.pkt_type; 633 | 634 | event.start_ns = bpf_ktime_get_ns(); 635 | CALL_STACK(ctx, &event); 636 | route_event.perf_submit(ctx, &event, sizeof(event)); 637 | 638 | return 0; 639 | } 640 | 641 | int kprobe__ipt_do_table(struct pt_regs *ctx, struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table) 642 | { 643 | return __ipt_do_table_in(ctx, skb, state, table); 644 | }; 645 | 646 | /* 647 | * tricky: use ebx as the 1st parms, thus get skb 648 | */ 649 | int kretprobe__ipt_do_table(struct pt_regs *ctx) 650 | { 651 | struct sk_buff *skb=(void*)ctx->bx; 652 | return __ipt_do_table_out(ctx, skb); 653 | } 654 | #endif 655 | 656 | 657 | #if __BCC_dropstack 658 | int kprobe____kfree_skb(struct pt_regs *ctx, struct sk_buff *skb) 659 | { 660 | struct event_t event = {}; 661 | 662 | if (do_trace_skb(&event, ctx, skb, NULL) < 0) 663 | return 0; 664 | 665 | event.flags |= ROUTE_EVENT_DROP; 666 | event.start_ns = bpf_ktime_get_ns(); 667 | bpf_strncpy(event.func_name, __func__+8, FUNCNAME_MAX_LEN); 668 | get_stack(ctx, &event); 669 | route_event.perf_submit(ctx, event, sizeof(*event)); 670 | return 0; 671 | } 672 | #endif 673 | 674 | #if 0 675 | int kprobe__ip6t_do_table(struct pt_regs *ctx, struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table) 676 | { 677 | return __ipt_do_table_in(ctx, skb, state, table); 678 | }; 679 | 680 | int kretprobe__ip6t_do_table(struct pt_regs *ctx) 681 | { 682 | return __ipt_do_table_out(ctx); 683 | } 684 | #endif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EBPF开发总结 2 | 3 | ### 编译内核支持BTF 4 | 5 | 如果在代码中有`#include `,使用了`BPF_CORE_READ`宏,在user程序中`bpf_object__load`就会报如下错误。 6 | 7 | ``` 8 | libbpf: failed to find valid kernel BTF 9 | libbpf: Error loading vmlinux BTF: -3 10 | libbpf: failed to load object 'tp_execve_kern' 11 | libbpf: failed to load BPF skeleton 'tp_execve_kern': -3 12 | failed to load BPF object: -3 13 | ``` 14 | 15 | BTF,即BPF Type Format,它通过pahole将DWARF调试信息转化得到,但是没有那么generic和verbose。它是一种空间高效的、紧凑的、有足够表达能力的格式,一个常用内核的BTF仅需要1-5MB,足以描述C程序的所有类型信息。由于它的简单性和BPF去重算法,对比DWARF,BTF能够缩小100x的尺寸。现在,在运行时总是保留BTF信息是常见做法,它对应内核选项 CONFIG_DEBUG_INFO_BTF=y。 16 | 17 | BTF能够用来增强BPF verifier的能力,**能够允许BPF代码直接访问内核内存,不需要 bpf_probe_read**()。 18 | 19 | 编译支持BTF的内核。 20 | 21 | - .config文件设置CONFIG_DEBUG_INFO_BTF=y,让 Linux 内核在运行时(runtime)一直携带 BTF 信息是可行的 22 | 23 | - 如果是oraclelinux系统,需要codeready仓库才能安装下面的包,dnf config-manager --set-enabled ol8_codeready_builder 24 | 25 | - 安装dwarves。**yum -y install libdwarves1.x86_64 dwarves.x86_64**,如果配置了CONFIG_DEBUG_INFO_BTF=y则必须安装该包 26 | 27 | - 安装**pahole**。一定要用v1.22,发现4.18.0的内核还是用v1.20比较好 28 | 29 | ``` 30 | git clone https://git.kernel.org/pub/scm/devel/pahole/pahole.git 31 | git checkout v1.22 32 | cmake -D__LIB="lib" -DCMAKE_INSTALL_PREFIX="/usr/local" -DEXEC_INSTALL_PREFIX="" . 33 | make 34 | make install 35 | ``` 36 | 37 | - 在内核源码执行make vmlinux,检查/sys/kernel/btf/vmlinux 38 | 39 | ### BPF Verifier校验报错 40 | 41 | BPF Verifier输出unbounded memory access,需要判断args空间是否足够读取ARGSIZE,没有这个判断就校验就会报错。 42 | 43 | ``` 44 | // 这行代码非常重要,如果不加上,下面代码是没法判断空间是否足够读取ARGSIZE这多字节的。而且BPF Verifier会报错 45 | if ( event->args_size > LAST_ARG ) 46 | return 0; 47 | 48 | // 读取参数内容 49 | ret = bpf_probe_read_user_str( &event->args[ event->args_size ], ARGSIZE, argp ); 50 | if ( ret > ARGSIZE ) { 51 | printk( "argv[%d] size: %d larger than ARGSIZE", i, ret ); 52 | return 0; 53 | } 54 | ``` 55 | 56 | ### eBPF用户程序中的全局变量 57 | 58 | user程序如何初始化kern程序中的变量达到控制效果。patch有个说明 [[v3,bpf-next,1/3\] bpf: add mmap() support for BPF_MAP_TYPE_ARRAY - Patchwork (ozlabs.org)](https://patchwork.ozlabs.org/project/netdev/patch/20191113031518.155618-2-andriin@fb.com/) 。 59 | 60 | 要使用libbpf中`bpf_object__load_skeleton`这个方法做mmap达到改变变量值的效果。 61 | 62 | 变量放在名字是rodata,类型是BPF_MAP_TYPE_ARRAY的map中。这是用户态程序调用的参数: 63 | 64 | ``` 65 | bpf(BPF_MAP_CREATE, {map_type=BPF_MAP_TYPE_ARRAY, key_size=4, value_size=9, max_entries=1, map_flags=BPF_F_RDONLY_PROG|BPF_F_MMAPABLE, inner_map_fd=0, map_name="tp_execv.rodata", map_ifindex=0, btf_fd=3, btf_key_type_id=0, btf_value_type_id=65, btf_vmlinux_value_type_id=0}, 120) = 6 66 | ``` 67 | 68 | 内核代码:SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size),kernel_src\kernel\bpf\syscall.c 69 | 70 | 首先会找到BPF_MAP_TYPE_ARRAY对应的操作op对象,`BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)`,然后构造map对象,`static struct bpf_map *array_map_alloc(union bpf_attr *attr)`。 71 | 72 | ``` 73 | /* allocate all map elements and zero-initialize them */ 74 | if (attr->map_flags & BPF_F_MMAPABLE) { 75 | void *data; 76 | 77 | /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ 78 | data = bpf_map_area_mmapable_alloc(array_size, numa_node); 79 | if (!data) 80 | return ERR_PTR(-ENOMEM); 81 | array = data + PAGE_ALIGN(sizeof(struct bpf_array)) 82 | - offsetof(struct bpf_array, value); 83 | } 84 | ``` 85 | 这里面很核心的flag是BPF_F_MMAPABLE,它会在最终的空间分配中带上flags = VM_USERMAP这个标志位。 86 | 87 | 在函数`static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)`的代码中 88 | 89 | ``` 90 | /* kmalloc()'ed memory can't be mmap()'ed */ 91 | if (mmapable) { 92 | BUG_ON(!PAGE_ALIGNED(size)); 93 | align = SHMLBA; 94 | flags = VM_USERMAP; 95 | ``` 96 | 那么这个map的地址空间分配为何要使用**VM_USERMAP**这个标志呢? 97 | 98 | 1. ###### 就是实现了mmap,vmalloc_user + remap_vmalloc_range 99 | 100 | * vmalloc申请一段不连续的物理地址空间,映射到连续的内核虚拟地址上。 101 | * vmalloc_user申请一段不连续的物理地址空间,映射到连续的虚拟地址给user space使用。疑问,这个地址是在User Addresses范围内?不在User Addresses范围,而是在Kernel Addresses范围,只是在分配的vma打上VM_USERMAP的标志。相当于在内核连续地址空间范围内标识一块范围,这个是用户空间使用的。 102 | * vmalloc_user的帮助说明,用于申请一段虚拟地址连续的内存给user space使用,一般情况下这段虚拟内存是当前进程空间的,因此会给它添加一个VM_USERMAP的flag,防止将kernel space的数据泄露到user space。 103 | * vmalloc_user的实践。看到分配的地址是大于0xffff8000000000的,还是内核地址空间。https://www.coolcou.com/linux-kernel/linux-kernel-memory-management-api/the-linux-kernel-vmalloc-user.html 104 | * VM_USERMAP,也是配合函数remap_vmalloc_range使用的,因为这块地址是要用在User Addresses的,所以要重新进行映射,remap_vmalloc_range - map vmalloc pages to userspace。 105 | 106 | 2. 小结 107 | 108 | - 全局变量使用bpf_object__init_global_data_maps 109 | 110 | - 这个map对应类型是BPF_MAP_TYPE_ARRAY,加上了BPF_F_MMAPABLE标志位,支持内存映射。.map_alloc = array_map_alloc, 111 | 112 | ``` 113 | if (attr->map_flags & BPF_F_MMAPABLE) { 114 | void *data; 115 | 116 | /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ 117 | data = bpf_map_area_mmapable_alloc(array_size, numa_node); 118 | if (!data) 119 | return ERR_PTR(-ENOMEM); 120 | array = data + PAGE_ALIGN(sizeof(struct bpf_array)) 121 | 122 | - offsetof(struct bpf_array, value); 123 | 124 | } 125 | ``` 126 | 127 | if (mmapable) { 128 | BUG_ON(!PAGE_ALIGNED(size)); 129 | align = SHMLBA; 130 | flags = VM_USERMAP; 131 | 132 | - BPF_F_MMAPABLE的目的是实现内存映射的效果,让用户应用程序可以直接访问内核地址空间。用户空间和内核空间共享数据空间,数据存放在物理内存。在创建带有该标志位的MAP时,使用VM_USERMAP来分配内存 133 | 134 | - 每个bpf map的mmap,首先每个bpf map有个fd。 135 | 136 | ``` 137 | int bpf_map_new_fd(struct bpf_map *map, int flags) 138 | { 139 | int ret; 140 | ret = security_bpf_map(map, OPEN_FMODE(flags)); 141 | if (ret < 0) 142 | return ret; 143 | return anon_inode_getfd("bpf-map", &bpf_map_fops, map, 144 | flags | O_CLOEXEC); 145 | } 146 | ``` 147 | 148 | 这里有个bpf_map_fops,上面绑定了fd的对应操作 149 | 150 | ``` 151 | const struct file_operations bpf_map_fops = { 152 | #ifdef CONFIG_PROC_FS 153 | .show_fdinfo = bpf_map_show_fdinfo, 154 | #endif 155 | .release = bpf_map_release, 156 | .read = bpf_dummy_read, 157 | .write = bpf_dummy_write, 158 | .mmap = bpf_map_mmap, 159 | .poll = bpf_map_poll, 160 | }; 161 | ``` 162 | 163 | bpf_map_mmap就实现了内存映射功能,调用了err = map->ops->map_mmap(map, vma);针对BPF_MAP_TYPE_ARRAY这种类型的map,.map_mmap = array_map_mmap,最终是调用了remap_vmalloc_range函数。 164 | 165 | ``` 166 | static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) 167 | { 168 | struct bpf_array *array = container_of(map, struct bpf_array, map); 169 | pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT; 170 | 171 | if (!(map->map_flags & BPF_F_MMAPABLE)) 172 | return -EINVAL; 173 | 174 | if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > 175 | PAGE_ALIGN((u64)array->map.max_entries * array->elem_size)) 176 | return -EINVAL; 177 | 178 | return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), 179 | vma->vm_pgoff + pgoff); 180 | 181 | } 182 | ``` 183 | 184 | ### dump出对应的源码和bpf指令,在verifier报错后可检查指令 185 | 186 | ``` 187 | llvm-objdump -S --no-show-raw-insn tp_execve.kern.o 188 | ``` 189 | 190 | ### bfptool工具生成xxx.skel.h文件 191 | 192 | 解除对xxx.kern.o的依赖。程序中不用`bpf_object__load`。bpftool gen skeleton %.kern.o > %.skel.h。 193 | 194 | ``` 195 | $(patsubst %,%.skel.h,$(APP_TAG)): $(patsubst %,%.kern.o,$(APP_TAG)) 196 | $(call msg,GEN-SKEL,$@) 197 | $(Q)$(BPFTOOL) gen skeleton $< > $@ 198 | ``` 199 | 200 | ### Open bpf kernel object 201 | 202 | 创建struct bpf_object*对象。加载obj文件用`bpf_object__open_file`,在skel.h中创建obj使用`bpf_object__open_mem` 203 | 204 | ### 查看正在使用BPF Map 205 | 206 | ``` 207 | [root@Thor-CI ~]# bpftool map 208 | 791: hash name execve_hash flags 0x0 209 | key 4B value 2600B max_entries 1024 memlock 2670592B 210 | btf_id 655 211 | 792: perf_event_array name execve_perf_evt flags 0x0 212 | key 4B value 4B max_entries 128 memlock 4096B 213 | 793: array name tp_execv.rodata flags 0x480 214 | key 4B value 9B max_entries 1 memlock 4096B 215 | btf_id 655 frozen 216 | 794: array name tp_execv.bss flags 0x400 217 | key 4B value 2600B max_entries 1 memlock 4096B 218 | btf_id 655 219 | ``` 220 | 221 | ``` 222 | [root@Thor-CI ~]# bpftool map dump name tp_execv.rodata 223 | [{ 224 | "value": { 225 | ".rodata": [{ 226 | "max_args": 20 227 | },{ 228 | "target_uid": 4294967295 229 | },{ 230 | "ignore_failed": true 231 | } 232 | ] 233 | } 234 | } 235 | ] 236 | ``` 237 | 238 | 查看map的数据 239 | 240 | ``` 241 | [root@localhost ~]# bpftool map dump id 283 242 | [{ 243 | "key": 0, 244 | "values": [{ 245 | "cpu": 0, 246 | "value": { 247 | "rx_packets": 0, 248 | "rx_bytes": 0 249 | } 250 | },{ 251 | "cpu": 1, 252 | "value": { 253 | "rx_packets": 0, 254 | "rx_bytes": 0 255 | } 256 | },{ 257 | "cpu": 2, 258 | "value": { 259 | "rx_packets": 0, 260 | "rx_bytes": 0 261 | } 262 | ``` 263 | 264 | ### bpf_trace_prink的限制 265 | 266 | - 最大只支持3个参数。 267 | 268 | - 程序共享输出共享 `/sys/kernel/debug/tracing/trace_pipe` 文件 。 269 | 270 | - 该实现方式在数据量大的时候,性能也存在一定的问题 。 271 | 272 | - 应为format只支持%d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s。 273 | 274 | ### 创建BPF_MAP_TYPE_SOCKMAP、BPF_MAP_TYPE_SOCKHASH两种类型的map失败 275 | 276 | ``` 277 | libbpf: Error in bpf_create_map_xattr(sock_ops_map):Invalid argument(-22). Retrying without BTF. 278 | libbpf: map 'sock_ops_map': failed to create: Invalid argument(-22) 279 | ``` 280 | 281 | 原因是内核编译时没有配置 CONFIG_BPF_STREAM_PARSER ,在代码中可以查看到 282 | 283 | ``` 284 | #if defined(CONFIG_BPF_STREAM_PARSER) 285 | BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) 286 | BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) 287 | #endif 288 | ``` 289 | 290 | 配置.config,CONFIG_BPF_STREAM_PARSER=y,重新编译内核。最好配置debug模式。 291 | 292 | ``` 293 | CONFIG_DEBUG_INFO_BTF=y 294 | CONFIG_BPF_STREAM_PARSER=y 295 | CONFIG_DEBUG_INFO=y # with debug symbols 296 | ``` 297 | 298 | ``` 299 | struct bpf_map_def SEC( "maps" ) sock_ops_map = { 300 | .type = BPF_MAP_TYPE_SOCKHASH, 301 | .key_size = sizeof(struct sock_key), 302 | .value_size = sizeof(int), 303 | .max_entries = 65535, 304 | .map_flags = 0, 305 | }; 306 | ``` 307 | 308 | 上面的定义方式能**正确运行**,如果使用下面的方式创建map时会报错:Error in bpf_create_map_xattr(sock_ops_map):ERROR: strerror_r(-524)=22(-524)。 309 | 310 | ``` 311 | struct { 312 | __uint( type, BPF_MAP_TYPE_SOCKHASH ); 313 | __uint( max_entries, 65535 ); 314 | __type( key, struct sock_key ); 315 | __type( value, __s32 ); 316 | __uint( map_flags, 0 ); 317 | __uint( key_size, sizeof( struct sock_key ) ); 318 | __uint( value_size, sizeof( __s32 ) ); 319 | } sock_ops_map_1 SEC( ".maps" ); 320 | ``` 321 | 322 | 但其它类型的map却没有问题,例如BPF_MAP_TYPE_HASH,这种差异问题需要深入研究代码,查看内核源码是可以按上面的编写方式的。 323 | 324 | ``` 325 | struct { 326 | __uint(type, BPF_MAP_TYPE_HASH); 327 | __uint(max_entries, 64); 328 | __type(key, __u32); 329 | __type(value, __u64); 330 | } sockhash SEC(".maps"); 331 | ``` 332 | 333 | 加载prog的命令 **bpftool prog load tcp_accelerate_sockops.kern.o "/sys/fs/bpf/bpf_sockops"** 334 | 335 | ### bpftool cgroup attach使用cgroup V2 336 | 337 | 当前systemd支持三种cgroup模式,分别是 338 | 339 | 1. legacy, 采用 cgroup v1 340 | 2. hybrid,混杂模式,既挂载 cgroup v1 也挂载 cgroup v2, 但是在该模式下,cgroup v2 下不使能任何 controller,不用于资源管理,参考[systemd 模式说明](https://github.com/systemd/systemd/pull/10161/files) 341 | 3. unified, 纯粹使用 cgroup v2 342 | 343 | 检查当前系统是否支持cgroup v2 344 | 345 | ``` 346 | [root@Thor-CI sockredir]# grep cgroup /proc/filesystems 347 | nodev cgroup 348 | nodev cgroup2 349 | ``` 350 | 351 | 在内核中开启cgroup v2 352 | 353 | ``` 354 | grubby --update-kernel=ALL --args="systemd.unified_cgroup_hierarchy=1" 355 | reboot 356 | ``` 357 | 358 | 检查cgroup v2是否生效 359 | 360 | ``` 361 | [root@Thor-CI sockredir]# mount | grep cgroup 362 | cgroup2 on /sys/fs/cgroup type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate) 363 | tmpfs on /usr/local/aegis/cgroup type tmpfs (rw,relatime,size=51200k) 364 | ``` 365 | 366 | 相关资料 367 | 368 | - [centos8使用grubby修改内核启动参数 - TinyChen's Studio](https://tinychen.com/20201118-centos8-use-grubby-modify-kernel/) 369 | 370 | - [详解Cgroup V2 | Zorro’s Linux Book (zorrozou.github.io)](https://zorrozou.github.io/docs/详解Cgroup V2.html) 371 | 372 | - [Cgroup V2 Notes | Lifeng (gitee.io)](https://lifeng2221dd1.gitee.io/2020/11/12/cgroup-v2/) 373 | 374 | ### SEC("name")和prog _type、attach_type关系 375 | 376 | 文件libbpf.c中定义了name和prog_type与attach_type的对应关系。部分如下。 377 | 378 | ``` 379 | static const struct bpf_sec_def section_defs[] = { 380 | SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), 381 | SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), 382 | SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), 383 | SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 384 | SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 385 | SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 386 | SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 387 | SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 388 | SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 389 | SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), 390 | SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), 391 | ...... 392 | ``` 393 | 394 | 在实际开发中,**sec只要前缀匹配就能对应上type**。 395 | 396 | ``` 397 | static const struct bpf_sec_def *find_sec_def(const char *sec_name) 398 | { 399 | int i, n = ARRAY_SIZE(section_defs); 400 | for (i = 0; i < n; i++) { 401 | if (strncmp(sec_name, section_defs[i].sec, section_defs[i].len)) 402 | continue; 403 | return §ion_defs[i]; 404 | } 405 | return NULL; 406 | } 407 | ``` 408 | 409 | 注册的时候,明确了sec的len。 410 | 411 | ``` 412 | #define SEC_DEF(sec_pfx, ptype, ...) \ 413 | { \ 414 | .sec = sec_pfx, .len = sizeof(sec_pfx) - 1, \ 415 | .prog_type = BPF_PROG_TYPE_##ptype, __VA_ARGS__ \ 416 | } 417 | ``` 418 | 419 | ### eBPF的所有hooks 420 | 421 | 查看完整的ebpf hooks列表,文件/uapi/linux/bpf.h中,枚举类型*enum* bpf_attach_type 就是所有的hook点。在libbpf.c代码中通过函数libbpf_prog_type_by_name传入sec name可以获取对应的prog type和attach type。 422 | 423 | ``` 424 | (gdb) p sec_name 425 | $5 = 0x872f70 "sockops" 426 | (gdb) n 427 | 1518 bpf_program__set_ifindex(pos, ifindex); 428 | (gdb) p pos 429 | $6 = (struct bpf_program *) 0x872e90 430 | (gdb) p ifindex 431 | $7 = 0 432 | (gdb) p expected_attach_type 433 | $8 = BPF_CGROUP_SOCK_OPS 434 | (gdb) p prog_type 435 | $9 = BPF_PROG_TYPE_SOCK_OPS 436 | ``` 437 | 438 | 上面的gdb调试结果可清晰的显示这种关系。这篇文章对prog type有详细的说明,[BPF: A Tour of Program Types (oracle.com)](https://blogs.oracle.com/linux/post/bpf-a-tour-of-program-types) 439 | 440 | ### eBPF对象持久化,文件系统/sys/fs/bpf 441 | 442 | [Persistent BPF objects [LWN.net\]](https://lwn.net/Articles/664688/),一般我们会编写一个user space的程序来加载kern的prog,这样ebpf程序的生命周期和用户态程序一致,监控采集显示的程序基本如此。可有些模式下的prog程序是需要类似守护,例如流量控制,转发控制这些,所以在kernel4.4版本提供了持久化能力。会创建一个pin fd在该文件系统下,这个fd就代表一个ebpf object。如果要unpin这个ebpf object,可以直接删除这个文件。 443 | 444 | ``` 445 | err = bpf_obj_pin(bpf_program__fd(prog), pinfile); 446 | err = bpf_object__pin_maps(obj, pinmaps); 447 | ``` 448 | 449 | 使用bpftool命令来持久化ebpf object: 450 | 451 | ``` 452 | [root@192 linux]# bpftool prog load tcp_accelerate_sockops.kern.o "/sys/fs/bpf/bpf_sockops" 453 | [root@192 linux]# bpftool prog 454 | 8: sock_ops name bpf_sockops_v4 tag 532c5c6d79f1461d gpl 455 | loaded_at 2021-10-01T14:26:57+0800 uid 0 456 | xlated 936B jited 533B memlock 4096B map_ids 6 457 | btf_id 4 458 | [root@192 tcp_accelerate]# rm /sys/fs/bpf/bpf_sockops 459 | rm: remove regular empty file '/sys/fs/bpf/bpf_sockops'? y 460 | ``` 461 | 462 | ### 编译内核以及eBPF代码 463 | 464 | - [RPM Search (pbone.net)](http://rpm.pbone.net/)搜索源码rpm包, 465 | - [Index of /Linux/cern/centos/7/updates/Source/SPackages (riken.jp)](http://ftp.riken.jp/Linux/cern/centos/7/updates/Source/SPackages/)这个网站 466 | - centos9的源码:[CentOS Stream Mirror](http://mirror.stream.centos.org/9-stream/BaseOS/source/tree/Packages/), 467 | - centos8以及之前的内核源码:[Index of / (centos.org)](https://vault.centos.org/) 468 | 469 | 下载好源码包执行下面命令,解压内核代码 470 | 471 | rpm2cpio kernel-4.18.0-305.el8.src.rpm | cpio -idmv 472 | rpm -ivh kernel-4.18.0-305.el8.src.rpm ----》 安装到/root/rpmbuild/SOURCES 473 | xz -d linux-4.18.0-305.el8.tar.xz 474 | tar -xvf linux-4.18.0-305.el8.tar -C /usr/src 475 | 476 | 配置内核,编译ebpf sample、libbpf.a 477 | 478 | 在最上层makefile中可以设置EXTRAVERSION版本号。 479 | 480 | ``` 481 | VERSION = 4 482 | PATCHLEVEL = 18 483 | SUBLEVEL = 0 484 | EXTRAVERSION = 485 | NAME = calmwu-linux 486 | ``` 487 | 488 | ``` 489 | make mrproper # 在编译内核模块时,会用到make mrproper,目的是把下载的内核还原到初始状态(清除掉.o文件,清除掉一些在make之后生成的备份文件,甚至还清除了.config配置文件)。 在make mrproper时,会首先调用make clean 490 | cp -v /boot/config-$(uname -r) .config 491 | 将.config中配置改为CONFIG_SYSTEM_TRUSTED_KEYS="",或者scripts/config --disable SYSTEM_TRUSTED_KEYS,scripts/config --disable SYSTEM_REVOCATION_KEYS。 492 | make menuconfig 493 | make -j8 494 | make headers_install # /usr/include/linux 495 | make modules -j8 # 编译内核模块 496 | make vmlinux # 编译vmlinux内核映像 497 | make bzImage -j8 # 生成压缩的内核映像 498 | make install # 安装内核映像 499 | make modules_install 500 | ``` 501 | 502 | 在编译的过程中FAILED: load BTF from vmlinux: Invalid argument遇到这种问题,是link-vmlinux.sh脚本中对pahole版本判断加入特性导致的。这个时候要查看脚本确定合适的pahole版本来解决 503 | 504 | ``` 505 | make M=samples/bpf V=1 506 | cd tools/bpf/bpftool 507 | make V=1 508 | make install 509 | ``` 510 | 511 | 编译遇到如下报错时: 512 | 513 | ``` 514 | ./include/linux/page-flags-layout.h:6:10: fatal error: 'generated/bounds.h' file not found 515 | ./include/linux/jiffies.h:13:10: fatal error: 'generated/timeconst.h' file not found 516 | ``` 517 | 518 | 先执行下make -j 4,编译下内核源码,这些文件就会生成,编译参考文档:[How to compile and install Linux Kernel 5.6.9 from source code - nixCraft (cyberciti.biz)](https://www.cyberciti.biz/tips/compiling-linux-kernel-26.html) 519 | 520 | ### eBPF程序的安全性 521 | 522 | - 字节码只能够调用一小部分指定的 eBPF 帮助函数 523 | - eBPF程序不允许包含无法到达的指令,防止加载无效代码,延迟程序的终止。 524 | - eBPF 程序中循环次数限制且必须在有限时间内结束。 525 | 526 | ### BPF系统调用 527 | 528 | - 内核:uapi/linux/bpf.h、tools/lib/bpf/bpf_helper_defs.h 文件中,*enum* bpf_func_id定义的都是可直接调用的helper functions。 529 | - 用户:[LIBBPF API — libbpf documentation](https://libbpf.readthedocs.io/en/latest/api.html) 530 | - CO-RE:tools/lib/bpf/bpf_core_read.h 531 | 532 | ### BPF程序字段访问 533 | 534 | ``` 535 | task = (struct task_struct *)bpf_get_current_task(); 536 | pid = BPF_PROBE_READ(task, real_parent, pid); 537 | ``` 538 | 539 | BPF_PROBE_READ实际还是通过bpf_probe_read_kernel,基于编译的内核vmlinux.h来读取字段的。如果和目标内核结构偏移量存在差异,读取的数据不是预期的。 540 | 541 | libbpf` 提供了新的宏 `**BPF_CORE_READ**`,它使用 `__builtin_preserve_access_index` 包住被读取的内核空间地址,比如 `task->real_parent`, `real_parent->pid`,那么在编译阶段会将访问路径 `0:57`,`0:54` 作为 relocation 的符号保存在 `.BTF.ext` section 里,其中 `57` 和 `54` 分别是 `real_parent` 和 `pid` 在 v5.8 内核 `task_struct` 结构体里的第 57 和 54 个字段。加载器会根据访问路径来比较源和当前内核对应数据结构的差异。当找到匹配对象后,那么将会以当前内核的偏移地址来修改原先的指令,保证程序可以正确运行;否则将会加载失败。 542 | 543 | ### 函数bpf_map_update_elem 544 | 545 | 函数参数: 546 | 547 | BPF_ANY:0,表示如果元素存在,内核将更新元素;如果不存在,则在映射中创建该元素。 548 | BPF_NOEXIST:1,表示仅在元素不存在时,内核创建元素。 549 | BPF_EXIST:2,表示仅在元素存在时,内核更新元素。 550 | 551 | 内核头文件bpf/bpf_helpers.h,用户空间程序头文件tools/lib/bpf/bpf.h 552 | 用户空间修改映射,区别在于第一个参数改为文件描述符来访问。 553 | 554 | ### 使用Perf Event 555 | 556 | 将BPF代码附加到Perf事件上。Perf事件程序类型定义为BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),Perf是内核的内部分析器,可以产生硬件和软件的性能数据事件。 557 | 558 | 我们可以用Perf事件程序监控很多系统信息,从计算机的CPU到系统中运行的任何软件。当BPF程序附加到Perf事件上时,每次Perf产生分析数据时,程序代码都将被执行,SEC("perf_event")。 559 | 560 | ### 使用bpf_get_stackid获取进程用户态、内核态堆栈 561 | 562 | - 应用程序的函数地址转换为symbols name。查看程序elf格式的section,所有symbols信息保存在.symtab 表中。 563 | 564 | readelf --section-headers ./cachestat_cli 565 | readelf --syms ./cachestat_cli 566 | 567 | - 基于软件事件**PERF_TYPE_SOFTWARE**,config描述 568 | 569 | PERF_COUNT_SW_CPU_CLOCK:它报告CPU时钟,即每个CPU的高分辨率计时器,进程堆栈采集使用该事件。 570 | PERF_COUNT_SW_PAGE_FAULTS:这将报告页面错误数 571 | 572 | - perf_event_open函数参数 573 | 574 | pid == 0 && cpu == -1:这可以测量任何CPU上的调用进程/线程。 575 | pid == 0 && cpu >= 0:仅当在指定的CPU上运行时,才测量调用进程/线程。 576 | pid > 0 && cpu == -1:这将测量任何CPU上的指定进程/线程。 577 | pid > 0 && cpu >= 0:仅当在指定的CPU上运行时,才测量指定的进程/线程。 578 | pid == -1 && cpu >= 0:这将测量指定CPU上的所有进程/线程。这需要CAP_SYS_ADMIN功能或/ proc / sys / kernel / perf_event_paranoid值小于1。 579 | pid == -1 && cpu == -1:此设置无效,将返回错误。 580 | 581 | - 用户空间栈帧的内存地址到函数名转换。 582 | 583 | BPF_F_USER_STACK标志可以获取用户空间堆栈列表,栈帧中保存的都是虚拟内存地址,将地址转变为源代码中的函数名(demangle) 584 | 585 | - /proc/pid/maps文件,拟地址在该文件列出的范围里。六列的信息依次为:本段在虚拟内存中的地址范围、本段的权限、偏移地址,即指本段映射地址在文件中的偏移、主设备号与次设备号、文件索引节点号、映射的文件名。kernel会将elf的代码段、数据段映射到虚拟地址空间。 586 | 587 | - 函数名在elf文件中,核心是**elf格式和vma之间的关系**,找到这种对应关系才能通过地址找到函数名。 588 | 589 | - elf是section,maps是segment,前者是链接视角,后者是运行视角。比如代码在链接时放到了text代码段,这个段就是section,同理还有data、bss等,可当执行文件被加载到进程VM中的不同区域时,这个段就是segment了。readelf -l /usr/libexec/netdata/plugins.d/apps.plugin,elf中**只有PT_LOAD段才会被加载到VMA中**。通过这个命令可以看到那些段被加载。(https://blog.csdn.net/rockrockwu/article/details/81707909),[c - relationship between VMA and ELF segments - Stack Overflow](https://stackoverflow.com/questions/33756119/relationship-between-vma-and-elf-segments) 590 | 591 | - segment和VMA并不是一一对应的关系,一个segment可能对应多个VMA。这是由segment中的section属性决定的。 592 | 593 | - readelf -s 第一列地址是It's (relative) virtual address。我实验的结果第一列就是虚拟地址。 594 | 595 | - print_stack() **0x00000000005414d0** rrddim_compare /usr/sbin/netdata 0x0,这是bpf_get_stackid返回的帧地址。 596 | 597 | - readelf -s 第一列地址是It's (relative) virtual address。我实验的结果第一列就是虚拟地址。 598 | 599 | - 但是对于动态库中的函数地址,可以通过/proc/pid/maps中module基地址+readelf第一列的相对地址+偏移量得到函数在地址空间的地址。0x00007f52ff67911b = 0xb + ef110 + 7f52ff58a000 600 | 601 | 0x00007f52ff67911b __GI___readlink /usr/lib64/libc-2.28.so 0xb 602 | 603 | [root@localhost build]# readelf -s /usr/lib64/libc-2.28.so|grep __GI___readlink 604 | 23266: 00000000000ef110 37 FUNC LOCAL DEFAULT 14 __GI___readlink 605 | 606 | 7f52ff58a000-7f52ff746000 r-xp 00000000 fd:00 7445 /usr/lib64/libc-2.28.so 607 | 7f52ff746000-7f52ff945000 ---p 001bc000 fd:00 7445 /usr/lib64/libc-2.28.so 608 | 7f52ff945000-7f52ff949000 r--p 001bb000 fd:00 7445 /usr/lib64/libc-2.28.so 609 | 7f52ff949000-7f52ff94b000 rw-p 001bf000 fd:00 7445 /usr/lib64/libc-2.28.so 610 | 611 | ### 获取内核所使用的数据结构,解除对内核代码头文件的依赖 612 | 613 | ``` 614 | bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h 615 | ``` 616 | 617 | 判断系统是否支持BTF,这个文件可作为标志。BTF(BPF Type Format, BPF类型格式)是一个元数据的格式,用来将BPF程序的源代码信息编码到调试信息中。调试信息包括BPF程序、映射结构等很多其它信息。BTF调试信息可以内嵌到vmlinux二进制文件中,或者随BPF程序一同使用原生Clang编译时生成。除了描述BPF程序之外,BTF正在成为一个通用的、用来描述所有内核数据结构的格式,在某些方面,它正在成为内核调试信息文件的一种轻量级替代方案,而且比使用内核头文件更加完整和可靠。 618 | 619 | ### selinux和bfptool命令冲突 620 | 621 | 执行bpftool报错 622 | 623 | ``` 624 | root@localhost pahole]# bpftool prog show 625 | Error: can't get prog by id (794): Permission denied 626 | [root@localhost pahole]# bpftool map show 627 | ``` 628 | 629 | 解决方式 630 | 631 | ``` 632 | ausearch -c 'bpftool' --raw | audit2allow -M my-bpftool 633 | semodule -X 300 -i my-bpftool.pp 634 | ``` 635 | 636 | ### profile eEBPF程序 637 | 638 | kernel.bpf_stats_enabled,用来开启收集eBPF程序的状态信息,主要是run_time_ns和run_cnt这两个参数。前者代表内核累计花了多少时间运行这个BPF程序,后者是这个BPF程序累计运行了多少次。 639 | 640 | - 使用`bpftool prog show`命令,执行后直接显示结果 641 | - 使用`cat /proc//fdinfo/`命令,执行后直接显示结果 642 | - 使用`BPF_OBJ_GET_INFO_BY_FD`的BPF系统调用方法,编程获取结果 643 | 644 | ### CO-RE 645 | 646 | 一次编译,到处运行,Compile Once – Run Everywhere,将它依赖的软件栈和数据集中在一起. 647 | 648 | - BTF 类型信息:使得我们能获取内核、BPF 程序类型及 BPF 代码的关键信息, 这也是下面其他部分的基础。 649 | - 编译器(clang):给 BPF C 代码提供了表达能力和记录重定位(relocation)信息的能力。 650 | - BPF loader (libbpf):根据内核的BTF和BPF程序,调整编译后的BPF代码,使其适合在目标内核上运行。 651 | - 内核:虽然对 BPF CO-RE 完全不感知,但提供了一些 BPF 高级特性,使某些高级场景成为可能。 652 | 653 | ### cursor_advance宏的作用 654 | 655 | ``` 656 | /* Packet parsing state machine helpers. */ 657 | #define cursor_advance(_cursor, _len) \ 658 | ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) 659 | ``` 660 | 661 | 调用代码如下: 662 | 663 | ``` 664 | struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet)); 665 | ``` 666 | 667 | 代码结果等价于: 668 | 669 | ``` 670 | { 671 | void *__tmp = cursor; 672 | cursor += sizeof(*ethernet); 673 | ethernet = __tmp; 674 | } 675 | ``` 676 | 677 | ### 安装内核 678 | 679 | 列出仓库中内核:dnf --enablerepo="ol8_baseos_latest" --enablerepo="elrepo-kernel" list available | grep kernel 680 | 681 | 查看包信息:yum info kernel-4.18.0 682 | 683 | 安装内核:yum install kernel-4.18.0-348.7.1.el8_5 684 | 685 | 安装内核源码:在仓库地址[Oracle Linux 8 (x86_64) BaseOS Latest | Oracle, Software. Hardware. Complete.](https://yum.oracle.com/repo/OracleLinux/OL8/baseos/latest/x86_64/index_src.html),找到kernel-4.18.0-348.7.1.el8_5.src.rpm,下载安装。安装路径在/root/rpmbuild/SOURCES目录下。 686 | 687 | 查看安装的内核:rpm -qa|grep kernel 688 | 689 | 解压rpm源码包:cd /usr/src/kernels/,cp linux-4.18.0-348.7.1.el8_5.tar.xz ./,unxz linux-4.18.0-348.7.1.el8_5.tar.xz,tar xf linux-4.18.0-348.7.1.el8_5.tar 690 | 691 | ### bpftool工具使用 692 | 693 | ``` 694 | 1. bpftool prog dump xlated id 105。 695 | ``` 696 | 697 | ### XDP Action小结 698 | 699 | 1. XDP_DROP:在驱动层丢弃报文,通常用于实现DDos或防火墙。(drop)。 700 | 2. XDP_PASS:允许报文上送到内核网络栈,同时处理该报文的CPU会分配并填充一个`skb`,将其传递到GRO引擎。之后的处理与没有XDP程序的过程相同。 701 | 3. XDP_TX:BPF程序通过该选项可以将网络报文从接收到该报文的NIC上发送出去。例如当集群中的部分机器实现了防火墙和负载均衡时,这些机器就可以作为hairpinned模式的负载均衡,在接收到报文,经过XDP BPF修改后将该报文原路发送出去。(send)。 702 | 4. XDP_REDIRECT:与XDP_TX类似,但是通过另一个网卡将包发出去。另外, `XDP_REDIRECT` 还可以将包重定向到一个 BPF cpumap,即,当前执行 XDP 程序的 CPU 可以将这个包交给某个远端 CPU,由后者将这个包送到更上层的内核栈,当前 CPU 则继续在这个网卡执行接收和处理包的任务。这和 `XDP_PASS` 类似,但当前 CPU 不用去做将包送到内核协议栈的准备工作(分配 `skb`,初始化等等),这部分开销还是很大的。 703 | 5. XDP_ABORT:表示程序产生了异常,其行为和 `XDP_DROP`相同,但 `XDP_ABORTED` 会经过 `trace_xdp_exception` tracepoint,因此可以通过 tracing 工具来监控这种非正常行为。 704 | 705 | 对于TX和REDIRECT操作,通常需要做一些数据包数据转换(例如重写mac地址)。 706 | 707 | ### XDP xdp_md结构 708 | 709 | ``` 710 | struct xdp_md { 711 | __u32 data; 712 | __u32 data_end; 713 | __u32 data_meta; 714 | __u32 ingress_ifindex; 715 | __u32 rx_queue_index; 716 | __u32 egress_ifindex; 717 | }; 718 | ``` 719 | 720 | rx_queue_index:rx队列索引。 721 | 722 | ingress/egress_ifindex:接口索引。 723 | 724 | 前三项其实是指针,data指向数据包的开始,data_end指向数据包的结束,data_meta指向元数据区域,xdp程序可以使用该元数据区域存储额外的伴随数据包的元数据。 725 | 726 | ##### BPF_MAP_TYPE_PERCPU_ARRAY数据改变的原子性 727 | 728 | BPF_MAP_TYPE_PERCPU_ARRAY returns a data record specific to current CPU and XDP hooks runs under Softirq, which makes it safe to update without atomic operations. 729 | 730 | 从BPF_MAP_TYPE_PERCPU_ARRAY中查询的value,修改不用加锁。 731 | 732 | ### eBPF中不同类型Program的作用 733 | 734 | [BPF program types and their principles - actorsfit](https://blog.actorsfit.com/a?ID=01750-a415789d-fe05-4a5f-8aa4-3183a1c6d97b) 735 | 736 | 1. 套接字相关Socket的bpf prog type:SOCKET_FILTER, SK_SKB, SOCK_OPS。我们使用socket相关的ebpf prog type去过滤,转发,监控套接字数据。对于socket filtering通常将其附加到原始套接字上,常见的代码如下,用来创建一个原始套接字,针对所有IP包协议类型。 737 | 738 | ``` 739 | s = socket(AP_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 740 | ``` 741 | 742 | - *BPF_PROG_TYPE_SOCKET_FILTER* 743 | 744 | 过滤操作包括丢弃包:program直接return 0。或者修改包:program返回包的长度。 745 | 746 | - *BPF_PROG_TYPE_SOCK_OPS* 747 | 748 | 用来操作套接字选项,例如setsockopt,设置rwnd,mtu等。Program返回0表示成功,负数表示失败。这个Program是附加到cgroup文件描述符上。Program参数是bpf_sock_ops。 749 | 750 | - *BPF_PROG_TYPE_SK_SKB* 751 | 752 | 允许用户访问skb和套接字细节,例如端口、IP,支持套接字之间skb重定向(*https://lwn.net/Articles/731133/*),使用bpf_sk_redirect_map帮助函数去执行重定向。 753 | 754 | 2. TC,输入是sk_buff,说明已经经过XDP,内核协议栈已经分配数据包。 755 | 756 | hook触发点:在ingress和egress点都可以触发。 757 | 758 | - ingress hook sch_handle_ingress();由__netif_receive_skb_core触发 759 | - egress hook sch_handle_egress();由__dev_queue_xmit触发。 760 | 761 | 使用场景 762 | 763 | - 容器的策略。传统方式veth pair一端接入到宿主机,所有流量都要经过宿主机的veth,因此可以在这个veth设备上tc ingress和egress hook点上attch tc eBPF。目标地址是容器网络流量会经过主机端的veth tc egress hook,从容器出来的流量会经过主机端的veth tc ingre hook。 764 | - 转发、负载均衡。对容器的出流量做NAT和负载均衡,整个过程对容器是透明的。到了egress的hook点,使用bpf_redirect辅助函数,bpf就可以接管转发逻辑了,将包推送到另一个网络设备的ingress或egress路径上。 765 | - 流量抽样与监控。和xdp类似,可以使用per-cpu的ring-buffer实现流量抽样。在这种场景下,bpf程序将自定义数据、全部或截断的包内容同时推送到一个用户空间应用程序。bpf_skb_event_output使用该函数。 766 | 767 | tc BPF程序返回值 768 | 769 | - TC_ACT_UNSPEC和TC_ACT_OK,将skb向下一阶段传递,在ingress的情况下传递给内核协议栈的更上层,在egress下传递给网络设备驱动,所以说TC是在tx上是链路层的最后一层。唯一的不同是 `TC_ACT_OK` 基于 tc BPF 程序设置的 classid 来 设置 `skb->tc_index`,而 `TC_ACT_UNSPEC` 是通过 tc BPF 程序之外的 BPF 上下文中的 `skb->tc_classid` 设置。 770 | - TC_ACT_SHOT和TC_ACT_STOLEN,两个都是指示内核将包丢弃。 771 | - TC_ACT_SHOT提示内核skb是通过kfree_skb释放的,并返回NET_XMIT_DROP给调用方,作为立即反馈。 772 | - TC_ACT_STOLEN通过consume_skb释放skb,返回NET_XMIT_SUCCESS给上层假装这个包已经被正确发送了。 773 | - TC_ACT_REDIRECT,这个返回码加上bpf_redirect辅助函数,允许重定向一个skb到同一个或另一个设备的ingress或egress路径。能够将包注入另一个设备的ingress或egress路径使得基于BPF的包转发具备了完全的灵活性。 774 | 775 | 3. XDP,XDP钩子尽可能的靠近设备,在内核创建sk_buff metadata之前。为了最大限度地提高性能,同时支持跨设备的通用基础架构。 776 | 777 | - *BPF_PROG_TYPE_XDP* 778 | 779 | XDP允许访问数据包早于包元数据分配,这是适合做防御DDos和负载均衡的地方。这样可以避免分配sk_buff昂贵的开销。Program附加在netlink socket上,如下代码创建netlink socket。Program的参数是xdp metadata指针。 780 | 781 | ``` 782 | socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) 783 | ``` 784 | 785 | ``` 786 | /* user accessible metadata for XDP packet hook * new fields must be added to the end of this structure */ 787 | struct xdp_md { 788 | __u32 data; 789 | __u32 data_end; }; 790 | ``` 791 | 792 | 实际的XDP是实现在驱动层,如果驱动不支持XDP,可选择使用"generic" XDP,这个是现在net/core/dev.c,缺点是没有绕过skb的分配,仅仅是允许XDP用于此设备。 793 | 794 | hook触发点:只能在ingress点触发。 795 | 796 | 4. kprobes, tracepoints and perf events 797 | 798 | 5. cgroup相关的program类型。cgroup是用于处理资源的分配,允许和拒绝进程组访问系统资源(CPU、network bandwidth等等),其效果被各种namespace隔离。 799 | 800 | 1. *BPF_PROG_TYPE_CGROUP_SKB* 801 | 802 | Allow or deny network access on IP exit/entry (BPF_CGROUP_INET_INGRESS/BPF_CGROUP_INET_EGRESS). The BPF program should return 1 to allow access. Any other value will cause the function __cgroup_bpf_run_filter_skb() to return -EPERM, which will be propagated to the caller, thus discarding the packet. 803 | 804 | 2. *BPF_PROG_TYPE_CGROUP_SOCK* 805 | 806 | What can you do? Allow or deny network access on various socket-related events (BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS). As mentioned above, the BPF program should return 1 to allow access. Any other value will cause the function __cgroup_bpf_run_filter_sk() to return -EPERM, which will be propagated to the caller, thus discarding the packet. 807 | 808 | ### 资料 809 | 810 | [透视Linux内核,BPF神奇的Linux技术入门-51CTO.COM](https://os.51cto.com/article/703114.html) 811 | 812 | https://mp.weixin.qq.com/s/Xr8ECrS_fR3aCT1vKJ9yIg 813 | -------------------------------------------------------------------------------- /ebpf-kill-example/LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------