├── .gitignore ├── .gitmodules ├── Makefile ├── README.org ├── advanced01-xdp-tc-interact └── README.org ├── advanced03-AF_XDP ├── Makefile ├── README.org ├── af_xdp_kern.c └── af_xdp_user.c ├── basic-solutions ├── Makefile ├── README.org ├── xdp_loader.c └── xdp_stats.c ├── basic01-xdp-pass ├── Makefile ├── README.org ├── xdp_pass_kern.c └── xdp_pass_user.c ├── basic02-prog-by-name ├── Makefile ├── README.org ├── xdp_loader.c ├── xdp_offload_nfp.org └── xdp_prog_kern.c ├── basic03-map-counter ├── Makefile ├── README.org ├── common_kern_user.h ├── xdp_load_and_stats.c └── xdp_prog_kern.c ├── basic04-pinning-maps ├── Makefile ├── README.org ├── common_kern_user.h ├── xdp_loader.c ├── xdp_prog_kern.c └── xdp_stats.c ├── common ├── Makefile ├── README.org ├── common.mk ├── common_defines.h ├── common_libbpf.c ├── common_libbpf.h ├── common_params.c ├── common_params.h ├── common_user_bpf_xdp.c ├── common_user_bpf_xdp.h ├── parsing_helpers.h ├── rewrite_helpers.h ├── xdp_stats_kern.h └── xdp_stats_kern_user.h ├── configure ├── experiment01-tailgrow ├── Makefile ├── README.org ├── xdp_data_access_helpers.h ├── xdp_prog_fail1.c ├── xdp_prog_fail2.c ├── xdp_prog_fail3.c ├── xdp_prog_kern.c ├── xdp_prog_kern2.c ├── xdp_prog_kern3.c └── xdp_prog_kern4.c ├── lib ├── Makefile ├── defines.mk └── install │ ├── .gitignore │ ├── include │ └── .gitignore │ └── lib │ └── .gitignore ├── packet-solutions ├── Makefile ├── README.org ├── tc_reply_kern_02.c ├── xdp_prog_kern_02.c ├── xdp_prog_kern_03.c ├── xdp_prog_user.c ├── xdp_vlan01_kern.c └── xdp_vlan02_kern.c ├── packet01-parsing ├── Makefile ├── README.org └── xdp_prog_kern.c ├── packet02-rewriting ├── Makefile ├── README.org └── xdp_prog_kern.c ├── packet03-redirecting ├── Makefile ├── README.org ├── xdp_prog_kern.c └── xdp_prog_user.c ├── setup-testlab ├── README.org ├── ansible │ ├── README.org │ ├── bootstrap-ansible.yml │ ├── group_vars │ │ └── all │ ├── hosts │ ├── roles │ │ └── tutorial │ │ │ ├── files │ │ │ ├── inputrc │ │ │ └── ld_usr_local.conf │ │ │ ├── handlers │ │ │ └── main.yml │ │ │ └── tasks │ │ │ └── main.yml │ ├── run-on-hosts.sh │ ├── run-on-localhost.sh │ └── site.yml ├── create_vm_image.org ├── using_vm_image.org └── workarounds.org ├── setup_dependencies.org ├── testenv ├── README.org ├── config.sh ├── setup-env.sh └── testenv.sh ├── tracing01-xdp-simple ├── Makefile ├── README.org ├── trace_load_and_stats.c ├── trace_prog_kern.c └── xdp_prog_kern.c ├── tracing02-xdp-monitor ├── Makefile ├── README.org ├── trace_load_and_stats.c └── trace_prog_kern.c ├── tracing03-xdp-debug-print ├── Makefile ├── README.org ├── trace_read.c └── xdp_prog_kern.c └── tracing04-xdp-tcpdump ├── Makefile ├── README.org ├── xdp_sample_pkts_kern.c └── xdp_sample_pkts_user.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | *.s 10 | *.ll 11 | 12 | # Linker output 13 | *.ilk 14 | *.map 15 | *.exp 16 | 17 | # Precompiled Headers 18 | *.gch 19 | *.pch 20 | 21 | # Libraries 22 | *.lib 23 | *.a 24 | *.la 25 | *.lo 26 | 27 | # Shared objects (inc. Windows DLLs) 28 | *.dll 29 | *.so 30 | *.so.* 31 | *.dylib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | *.i*86 38 | *.x86_64 39 | *.hex 40 | 41 | # Debug files 42 | *.dSYM/ 43 | *.su 44 | *.idb 45 | *.pdb 46 | 47 | # Kernel Module Compile Results 48 | *.mod* 49 | *.cmd 50 | .tmp_versions/ 51 | modules.order 52 | Module.symvers 53 | Mkfile.old 54 | dkms.conf 55 | 56 | # Userspace programs 57 | xdp-loader 58 | xdp_loader 59 | xdp_stats 60 | xdp_pass_user 61 | xdp_load_and_stats 62 | xdp_prog_user 63 | af_xdp_user 64 | 65 | # tracing userspace programs 66 | trace_load_and_stats 67 | trace_load_and_stats 68 | trace_read 69 | xdp_sample_pkts_user 70 | 71 | # configure output 72 | config.mk 73 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/libbpf"] 2 | path = lib/libbpf 3 | url = https://github.com/libbpf/libbpf 4 | [submodule "lib/xdp-tools"] 5 | path = lib/xdp-tools 6 | url = https://github.com/xdp-project/xdp-tools 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | ifeq ("$(origin V)", "command line") 4 | VERBOSE = $(V) 5 | endif 6 | ifndef VERBOSE 7 | VERBOSE = 0 8 | endif 9 | 10 | ifeq ($(VERBOSE),0) 11 | MAKEFLAGS += --no-print-directory 12 | Q = @ 13 | endif 14 | 15 | LESSONS = $(wildcard basic*) $(wildcard packet*) $(wildcard tracing??-*) 16 | # LESSONS += advanced03-AF_XDP 17 | LESSONS_CLEAN = $(addsuffix _clean,$(LESSONS)) 18 | 19 | .PHONY: clean clobber distclean $(LESSONS) $(LESSONS_CLEAN) 20 | 21 | all: lib $(LESSONS) 22 | clean: $(LESSONS_CLEAN) 23 | @echo; echo common; $(MAKE) -C common clean 24 | @echo; echo lib; $(MAKE) -C lib clean 25 | 26 | lib: config.mk check_submodule 27 | @echo; echo $@; $(MAKE) -C $@ 28 | 29 | $(LESSONS): 30 | @echo; echo $@; $(MAKE) -C $@ 31 | 32 | $(LESSONS_CLEAN): 33 | @echo; echo $@; $(MAKE) -C $(subst _clean,,$@) clean 34 | 35 | config.mk: configure 36 | @sh configure 37 | 38 | clobber: 39 | @touch config.mk 40 | $(Q)$(MAKE) clean 41 | $(Q)rm -f config.mk 42 | 43 | distclean: clobber 44 | 45 | check_submodule: 46 | @if [ -d .git ] && `git submodule status lib/libbpf | grep -q '^+'`; then \ 47 | echo "" ;\ 48 | echo "** WARNING **: git submodule SHA-1 out-of-sync" ;\ 49 | echo " consider running: git submodule update" ;\ 50 | echo "" ;\ 51 | fi\ 52 | 53 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: XDP Programming Hands-On Tutorial 3 | #+OPTIONS: ^:nil 4 | 5 | This repository contains a tutorial that aims to introduce you to the basic 6 | steps needed to effectively write programs for the eXpress Data Path (XDP) 7 | system in the Linux kernel, which offers high-performance programmable 8 | packet processing integrated with the kernel. 9 | 10 | The tutorial is composed of a number of lessons, each of which has its own 11 | repository. Start with the lessons starting with "basicXX", and read the 12 | README.org file in each repository for instructions for that lesson. 13 | 14 | Keep reading below for an introduction to XDP and an overview of what you 15 | will learn in this tutorial, or jump [[file:basic01-xdp-pass/README.org][straight to the first lesson]]. 16 | 17 | * Table of Contents :TOC: 18 | - [[#introduction][Introduction]] 19 | - [[#first-step-setup-dependencies][First step: Setup dependencies]] 20 | - [[#how-the-lessons-are-organised][How the lessons are organised]] 21 | - [[#basic-setup-lessons][Basic setup lessons]] 22 | - [[#packet-processing-lessons][Packet processing lessons]] 23 | - [[#advanced-lessons][Advanced lessons]] 24 | 25 | * Introduction 26 | 27 | XDP is a part of the upstream Linux kernel, and enables users to install 28 | packet processing programs into the kernel, that will be executed for each 29 | arriving packet, before the kernel does any other processing on the data. 30 | The programs are written in restricted C, and compiled into the eBPF byte 31 | code format that is executed and JIT-compiled in the kernel, after being 32 | verified for safety. This approach offers great flexibility and high 33 | performance, and integrates well with the rest of the system. For a general 34 | introduction to XDP, read [[https://github.com/xdp-project/xdp-paper/blob/master/xdp-the-express-data-path.pdf][the academic paper (pdf)]], or the [[https://cilium.readthedocs.io/en/latest/bpf/][Cilium BPF 35 | reference guide]]. 36 | 37 | This tutorial aims to be a practical introduction to the different steps 38 | needed to successfully write useful programs using the XDP system. We assume 39 | you have a basic understanding of Linux networking and how to configure it 40 | with the =iproute2= suite of tools, but assume no prior experience with eBPF 41 | or XDP. Prior programming experience is also helpful: the lessons are all 42 | written in C and they include some basic pointer arithmetic and aliasing. 43 | 44 | The tutorial is a work in progress, and was initially created for use as a 45 | live tutorial at the [[https://www.netdevconf.org/0x13/session.html?tutorial-XDP-hands-on][Netdev Conference]] in Prague in March 2019. Since the 46 | kernel BPF subsystem continues to develop at a rapid pace, this tutorial has 47 | not kept up with all developments. However, everything presented here will 48 | work with recent kernels, and this tutorial functions as a self-contained 49 | introduction that anyone can go through to learn the XDP basics. Input and 50 | contributions to advance towards this goal are very welcome; just open 51 | issues or pull requests in the [[https://github.com/xdp-project/xdp-tutorial/][Github repository]]. 52 | 53 | * First step: Setup dependencies 54 | 55 | Before you can start completing step in this tutorial, you will need to 56 | install a few dependencies on your system. These are described in 57 | [[file:setup_dependencies.org]]. 58 | 59 | We also provide a helper script that will set up a test environment with 60 | virtual interfaces for you to test your code on. This is introduced in the 61 | basic lessons, and also has [[file:testenv/README.org][it's own README file]]. 62 | 63 | * How the lessons are organised 64 | The tutorial is organised into a number of lessons; each lesson has its own 65 | subdirectory, and the lessons are grouped by category: 66 | 67 | - Basic setup (directories starting with basicXX) 68 | - Packet processing (directories starting with packetXX) 69 | - Advanced topics (directories starting with advancedXX) 70 | 71 | We recommend you start with the "basic" lessons, and follow the lessons in 72 | each category in numerical order. Read the README.org file in each lesson 73 | directory for instructions on how to complete the lesson. 74 | 75 | ** Basic setup lessons 76 | We recommend you start with these lessons, as they will teach you how to 77 | compile and inspect the eBPF programs that will implement your packet 78 | processing code, how to load them into the kernel, and how to inspect the 79 | state afterwards. As part of the basic lessons you will also be writing an 80 | eBPF program loader that you will need in subsequent lessons. 81 | 82 | ** Packet processing lessons 83 | Once you have the basics figured out and know how to load programs into the 84 | kernel, you are ready to start processing some packets. The lessons in the 85 | packet processing category will teach you about the different steps needed 86 | to process data packets, including parsing, rewriting, instructing the 87 | kernel about what to do with the packet after processing, and how to use 88 | helpers to access existing kernel functionality. 89 | 90 | ** Advanced lessons 91 | After having completed the lessons in the basic and packet processing 92 | categories, you should be all set to write your first real XDP program that 93 | will do useful processing of the packets coming into the system. However, 94 | there are some slightly more advanced topics that will probably be useful 95 | once you start expanding your program to do more things. 96 | 97 | The topics covered in the advanced lessons include how to make eBPF programs 98 | in other parts of the kernel interact with your XDP program, passing 99 | metadata between programs, best practices for interacting with user space and 100 | kernel features, and how to run multiple XDP programs on a single interface. 101 | -------------------------------------------------------------------------------- /advanced01-xdp-tc-interact/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+Title: Advanced: XDP interacting with TC 3 | #+OPTIONS: ^:nil 4 | 5 | XDP is only one of the available eBPF network hooks. Another very important 6 | eBPF network hook in the Linux Traffic Control (TC) system, both at 7 | /ingress/ and /egress/ via =clsact=. 8 | 9 | * Lessons 10 | 11 | ** XDP meta-data to TC 12 | 13 | To transfer info between XDP and network stack there are a number of 14 | options. One option is that XDP can *modify packet headers* before netstack, 15 | e.g. pop/push headers influence RX-handler in netstack, or e.g. modify 16 | MAC-src and match that with a iptables rule. 17 | 18 | Another option is XDP "meta-data". The "meta-data" can be written by XDP, 19 | and a TC-hook BPF program can read this, and e.g. update fields in the SKB. 20 | 21 | In the kernel tree there is a BPF-sample that show how XDP and TC-ingress 22 | hook can cooperate; XDP set info in meta-data and TC use this meta-data to 23 | set the SKB mark field. 24 | 25 | The XDP and TC BPF-prog's code is in: [[https://github.com/torvalds/linux/blob/master/samples/bpf/xdp2skb_meta_kern.c][samples/bpf/xdp2skb_meta_kern.c]]. 26 | A shell script to load both XDP and TC via iproute2 is placed in 27 | [[https://github.com/torvalds/linux/blob/master/samples/bpf/xdp2skb_meta.sh][xdp2skb_meta.sh]]. 28 | 29 | ** XDP CPU-redirect solving TC-locking 30 | 31 | A real-world problem is traffic shaping causing lock-congestion on the TC 32 | root qdisc lock (e.g. [[http://vger.kernel.org/lpc-bpf2018.html#session-1][Googles servers experience this]] also see [[https://doi.org/10.1145/2785956.2787478][article]]). 33 | 34 | The XDP-project have a git-repo for demonstrating how to solve this: 35 | - https://github.com/xdp-project/xdp-cpumap-tc 36 | 37 | It setup the MQ (Multi-Queue) qdisc per TXQ to have a HTB-shaper. Then it 38 | uses XDP to redirect (via CPUMAP) the traffic to the CPU that is responsible 39 | for handling this egress traffic. In the TC clsact-egress hook, a BPF-prog 40 | stamps the SKB packet with the appropriate HTB class id (via 41 | skb->queue_mapping), such that traffic shaping get isolated per CPU. 42 | 43 | Do notice that it depends on a kernel feature that will first be avail in 44 | kernel v5.1, via [[https://github.com/torvalds/linux/commit/74e31ca850c1][kernel commit 74e31ca850c1]]. 45 | -------------------------------------------------------------------------------- /advanced03-AF_XDP/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := af_xdp_kern 4 | USER_TARGETS := af_xdp_user 5 | LDLIBS += -lpthread 6 | 7 | COMMON_DIR := ../common 8 | 9 | include $(COMMON_DIR)/common.mk 10 | COMMON_OBJS := $(COMMON_DIR)/common_params.o 11 | COMMON_OBJS += $(COMMON_DIR)/common_user_bpf_xdp.o 12 | -------------------------------------------------------------------------------- /advanced03-AF_XDP/af_xdp_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | 3 | #include 4 | 5 | #include 6 | 7 | struct { 8 | __uint(type, BPF_MAP_TYPE_XSKMAP); 9 | __type(key, __u32); 10 | __type(value, __u32); 11 | __uint(max_entries, 64); 12 | } xsks_map SEC(".maps"); 13 | 14 | struct { 15 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 16 | __type(key, __u32); 17 | __type(value, __u32); 18 | __uint(max_entries, 64); 19 | } xdp_stats_map SEC(".maps"); 20 | 21 | SEC("xdp") 22 | int xdp_sock_prog(struct xdp_md *ctx) 23 | { 24 | int index = ctx->rx_queue_index; 25 | __u32 *pkt_count; 26 | 27 | pkt_count = bpf_map_lookup_elem(&xdp_stats_map, &index); 28 | if (pkt_count) { 29 | 30 | /* We pass every other packet */ 31 | if ((*pkt_count)++ & 1) 32 | return XDP_PASS; 33 | } 34 | 35 | /* A set entry here means that the correspnding queue_id 36 | * has an active AF_XDP socket bound to it. */ 37 | if (bpf_map_lookup_elem(&xsks_map, &index)) 38 | return bpf_redirect_map(&xsks_map, index, 0); 39 | 40 | return XDP_PASS; 41 | } 42 | 43 | char _license[] SEC("license") = "GPL"; 44 | -------------------------------------------------------------------------------- /basic-solutions/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | USER_TARGETS := xdp_stats xdp_loader 4 | 5 | COMMON_DIR := ../common 6 | 7 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 8 | 9 | include $(COMMON_DIR)/common.mk 10 | -------------------------------------------------------------------------------- /basic-solutions/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Packet - solutions 3 | #+OPTIONS: ^:nil 4 | 5 | This directory contains solutions to all the assignments in the 6 | [[file:../basic01-xdp-pass/][basic01]], 7 | [[file:../basic02-prog-by-name/][basic02]], 8 | [[file:../basic03-map-counter/][basic03]], and 9 | [[file:../basic04-pinning-maps/][basic04]] lessons. 10 | 11 | * Table of Contents :TOC: 12 | - [[#solutions][Solutions]] 13 | - [[#basic01-loading-your-first-bpf-program][Basic01: loading your first BPF program]] 14 | - [[#basic02-loading-a-program-by-name][Basic02: loading a program by name]] 15 | - [[#basic03-counting-with-bpf-maps][Basic03: counting with BPF maps]] 16 | - [[#basic04-pinning-of-maps][Basic04: pinning of maps]] 17 | 18 | * Solutions 19 | 20 | ** Basic01: loading your first BPF program 21 | 22 | This lesson doesn't contain any assignments except to repeat the steps listed 23 | in the lesson readme file. 24 | 25 | ** Basic02: loading a program by name 26 | 27 | *** Assignment 1: Setting up your test lab 28 | 29 | No code is needed, just repeat the steps listed in the assignment description. 30 | 31 | *** Assignment 2: Add xdp_abort program 32 | 33 | Just add the following section to the 34 | [[file:../basic02-prog-by-name/xdp_prog_kern.c][xdp_prog_kern.c]] program and 35 | follow the steps listed in the assignment description: 36 | #+begin_example c 37 | SEC("xdp_abort") 38 | int xdp_abort_func(struct xdp_md *ctx) 39 | { 40 | return XDP_ABORTED; 41 | } 42 | #+end_example 43 | 44 | ** Basic03: counting with BPF maps 45 | 46 | The solutions to all three assignments can be found in the following files: 47 | 48 | * The [[file:../basic04-pinning-maps/common_kern_user.h][common_kern_user.h]] file contains the new structure =datarec= definition. 49 | * The [[file:../basic04-pinning-maps/xdp_prog_kern.c][xdp_prog_kern.c]] file contains the new =xdp_stats_map= map definition and the updated =xdp_stats_record_action= function. 50 | 51 | Note that for use in later lessons/assignments the code was moved to the following files: 52 | [[file:../common/xdp_stats_kern_user.h][xdp_stats_kern_user.h]] and 53 | [[file:../common/xdp_stats_kern.h][xdp_stats_kern.h]]. So in order to use the 54 | =xdp_stats_record_action= function in later XDP programs, just include the 55 | following header files: 56 | #+begin_example c 57 | #include "../common/xdp_stats_kern_user.h" 58 | #include "../common/xdp_stats_kern.h" 59 | #+end_example 60 | For a user-space application, only the former header is needed. 61 | 62 | ** Basic04: pinning of maps 63 | 64 | *** Assignment 1: (xdp_stats.c) reload map file-descriptor 65 | 66 | See the [[file:xdp_stats.c][xdp_stats.c]] program in this directory. 67 | 68 | *** Assignment 2: (xdp_loader.c) reuse pinned map 69 | 70 | See the [[file:xdp_loader.c][xdp_loader.c]] program in this directory. 71 | -------------------------------------------------------------------------------- /basic-solutions/xdp_loader.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | static const char *__doc__ = "XDP loader\n" 3 | " - Allows selecting BPF program --progname name to XDP-attach to --dev\n"; 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | #include /* depend on kernel-headers installed */ 21 | 22 | #include "../common/common_params.h" 23 | #include "../common/common_user_bpf_xdp.h" 24 | #include "../common/common_libbpf.h" 25 | 26 | static const char *default_filename = "xdp_prog_kern.o"; 27 | 28 | static const struct option_wrapper long_options[] = { 29 | 30 | {{"help", no_argument, NULL, 'h' }, 31 | "Show help", false}, 32 | 33 | {{"dev", required_argument, NULL, 'd' }, 34 | "Operate on device ", "", true}, 35 | 36 | {{"skb-mode", no_argument, NULL, 'S' }, 37 | "Install XDP program in SKB (AKA generic) mode"}, 38 | 39 | {{"native-mode", no_argument, NULL, 'N' }, 40 | "Install XDP program in native mode"}, 41 | 42 | {{"auto-mode", no_argument, NULL, 'A' }, 43 | "Auto-detect SKB or native mode"}, 44 | 45 | {{"force", no_argument, NULL, 'F' }, 46 | "Force install, replacing existing program on interface"}, 47 | 48 | {{"unload", no_argument, NULL, 'U' }, 49 | "Unload XDP program instead of loading"}, 50 | 51 | {{"reuse-maps", no_argument, NULL, 'M' }, 52 | "Reuse pinned maps"}, 53 | 54 | {{"quiet", no_argument, NULL, 'q' }, 55 | "Quiet mode (no output)"}, 56 | 57 | {{"filename", required_argument, NULL, 1 }, 58 | "Load program from ", ""}, 59 | 60 | {{"progname", required_argument, NULL, 2 }, 61 | "Load program from function in the ELF file", ""}, 62 | 63 | {{0, 0, NULL, 0 }, NULL, false} 64 | }; 65 | 66 | #ifndef PATH_MAX 67 | #define PATH_MAX 4096 68 | #endif 69 | 70 | const char *pin_basedir = "/sys/fs/bpf"; 71 | const char *map_name = "xdp_stats_map"; 72 | 73 | /* Pinning maps under /sys/fs/bpf in subdir */ 74 | int pin_maps_in_bpf_object(struct bpf_object *bpf_obj, struct config *cfg) 75 | { 76 | char map_filename[PATH_MAX]; 77 | int err, len; 78 | 79 | len = snprintf(map_filename, PATH_MAX, "%s/%s/%s", 80 | cfg->pin_dir, cfg->ifname, map_name); 81 | if (len < 0) { 82 | fprintf(stderr, "ERR: creating map_name\n"); 83 | return EXIT_FAIL_OPTION; 84 | } 85 | 86 | /* Existing/previous XDP prog might not have cleaned up */ 87 | if (access(map_filename, F_OK ) != -1 ) { 88 | if (verbose) 89 | printf(" - Unpinning (remove) prev maps in %s/\n", 90 | cfg->pin_dir); 91 | 92 | /* Basically calls unlink(3) on map_filename */ 93 | err = bpf_object__unpin_maps(bpf_obj, cfg->pin_dir); 94 | if (err) { 95 | fprintf(stderr, "ERR: UNpinning maps in %s\n", cfg->pin_dir); 96 | return EXIT_FAIL_BPF; 97 | } 98 | } 99 | if (verbose) 100 | printf(" - Pinning maps in %s/\n", cfg->pin_dir); 101 | 102 | /* This will pin all maps in our bpf_object */ 103 | err = bpf_object__pin_maps(bpf_obj, cfg->pin_dir); 104 | if (err) { 105 | fprintf(stderr, "ERR: Pinning maps in %s\n", cfg->pin_dir); 106 | return EXIT_FAIL_BPF; 107 | } 108 | 109 | return 0; 110 | } 111 | 112 | int main(int argc, char **argv) 113 | { 114 | struct xdp_program *program; 115 | int err, len; 116 | 117 | struct config cfg = { 118 | .attach_mode = XDP_MODE_NATIVE, 119 | .ifindex = -1, 120 | .do_unload = false, 121 | }; 122 | /* Set default BPF-ELF object file and BPF program name */ 123 | strncpy(cfg.filename, default_filename, sizeof(cfg.filename)); 124 | /* Cmdline options can change progname */ 125 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 126 | 127 | /* Required option */ 128 | if (cfg.ifindex == -1) { 129 | fprintf(stderr, "ERR: required option --dev missing\n\n"); 130 | usage(argv[0], __doc__, long_options, (argc == 1)); 131 | return EXIT_FAIL_OPTION; 132 | } 133 | if (cfg.do_unload) { 134 | if (!cfg.reuse_maps) { 135 | /* TODO: Miss unpin of maps on unload */ 136 | } 137 | /* return xdp_link_detach(cfg.ifindex, cfg.xdp_flags, 0); */ 138 | } 139 | 140 | /* Initialize the pin_dir configuration */ 141 | len = snprintf(cfg.pin_dir, 512, "%s/%s", pin_basedir, cfg.ifname); 142 | if (len < 0) { 143 | fprintf(stderr, "ERR: creating pin dirname\n"); 144 | return EXIT_FAIL_OPTION; 145 | } 146 | 147 | 148 | program = load_bpf_and_xdp_attach(&cfg); 149 | if (!program) 150 | return EXIT_FAIL_BPF; 151 | 152 | if (verbose) { 153 | printf("Success: Loaded BPF-object(%s) and used program(%s)\n", 154 | cfg.filename, cfg.progname); 155 | printf(" - XDP prog attached on device:%s(ifindex:%d)\n", 156 | cfg.ifname, cfg.ifindex); 157 | } 158 | 159 | /* Use the --dev name as subdir for exporting/pinning maps */ 160 | if (!cfg.reuse_maps) { 161 | err = pin_maps_in_bpf_object(xdp_program__bpf_obj(program), &cfg); 162 | if (err) { 163 | fprintf(stderr, "ERR: pinning maps\n"); 164 | return err; 165 | } 166 | } 167 | 168 | return EXIT_OK; 169 | } 170 | -------------------------------------------------------------------------------- /basic01-xdp-pass/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_pass_kern 4 | USER_TARGETS := xdp_pass_user 5 | 6 | LLC ?= llc 7 | CLANG ?= clang 8 | CC := gcc 9 | 10 | COMMON_DIR := ../common 11 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 12 | 13 | include $(COMMON_DIR)/common.mk 14 | -------------------------------------------------------------------------------- /basic01-xdp-pass/xdp_pass_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | SEC("xdp") 6 | int xdp_prog_simple(struct xdp_md *ctx) 7 | { 8 | return XDP_PASS; 9 | } 10 | 11 | char _license[] SEC("license") = "GPL"; 12 | -------------------------------------------------------------------------------- /basic01-xdp-pass/xdp_pass_user.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | static const char *__doc__ = "Simple XDP prog doing XDP_PASS\n"; 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include /* depend on kernel-headers installed */ 17 | 18 | #include "../common/common_params.h" 19 | #include "../common/common_user_bpf_xdp.h" 20 | 21 | static const struct option_wrapper long_options[] = { 22 | {{"help", no_argument, NULL, 'h' }, 23 | "Show help", false}, 24 | 25 | {{"dev", required_argument, NULL, 'd' }, 26 | "Operate on device ", "", true}, 27 | 28 | {{"skb-mode", no_argument, NULL, 'S' }, 29 | "Install XDP program in SKB (AKA generic) mode"}, 30 | 31 | {{"native-mode", no_argument, NULL, 'N' }, 32 | "Install XDP program in native mode"}, 33 | 34 | {{"auto-mode", no_argument, NULL, 'A' }, 35 | "Auto-detect SKB or native mode"}, 36 | 37 | {{"unload", required_argument, NULL, 'U' }, 38 | "Unload XDP program instead of loading", ""}, 39 | 40 | {{"unload-all", no_argument, NULL, 4 }, 41 | "Unload all XDP programs on device"}, 42 | 43 | {{0, 0, NULL, 0 }, NULL, false} 44 | }; 45 | 46 | 47 | int main(int argc, char **argv) 48 | { 49 | struct bpf_prog_info info = {}; 50 | __u32 info_len = sizeof(info); 51 | char filename[] = "xdp_pass_kern.o"; 52 | char progname[] = "xdp_prog_simple"; 53 | struct xdp_program *prog; 54 | char errmsg[1024]; 55 | int prog_fd, err; // = EXIT_SUCCESS; 56 | 57 | struct config cfg = { 58 | .attach_mode = XDP_MODE_UNSPEC, 59 | .ifindex = -1, 60 | .do_unload = false, 61 | }; 62 | 63 | DECLARE_LIBBPF_OPTS(bpf_object_open_opts, bpf_opts); 64 | DECLARE_LIBXDP_OPTS(xdp_program_opts, xdp_opts, 65 | .open_filename = filename, 66 | .prog_name = progname, 67 | .opts = &bpf_opts); 68 | 69 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 70 | /* Required option */ 71 | if (cfg.ifindex == -1) { 72 | fprintf(stderr, "ERR: required option --dev missing\n"); 73 | usage(argv[0], __doc__, long_options, (argc == 1)); 74 | return EXIT_FAIL_OPTION; 75 | } 76 | 77 | /* Unload a program by prog_id, or 78 | * unload all programs on net device 79 | */ 80 | if (cfg.do_unload || cfg.unload_all) { 81 | err = do_unload(&cfg); 82 | if (err) { 83 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 84 | fprintf(stderr, "Couldn't unload XDP program %s: %s\n", 85 | progname, errmsg); 86 | return err; 87 | } 88 | 89 | printf("Success: Unloading XDP prog name: %s\n", progname); 90 | return EXIT_OK; 91 | } 92 | 93 | /* Create an xdp_program froma a BPF ELF object file */ 94 | prog = xdp_program__create(&xdp_opts); 95 | err = libxdp_get_error(prog); 96 | if (err) { 97 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 98 | fprintf(stderr, "Couldn't get XDP program %s: %s\n", 99 | progname, errmsg); 100 | return err; 101 | } 102 | 103 | /* Attach the xdp_program to the net device XDP hook */ 104 | err = xdp_program__attach(prog, cfg.ifindex, cfg.attach_mode, 0); 105 | if (err) { 106 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 107 | fprintf(stderr, "Couldn't attach XDP program on iface '%s' : %s (%d)\n", 108 | cfg.ifname, errmsg, err); 109 | return err; 110 | } 111 | 112 | /* This step is not really needed , BPF-info via bpf-syscall */ 113 | prog_fd = xdp_program__fd(prog); 114 | err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); 115 | if (err) { 116 | fprintf(stderr, "ERR: can't get prog info - %s\n", 117 | strerror(errno)); 118 | return err; 119 | } 120 | 121 | printf("Success: Loading " 122 | "XDP prog name:%s(id:%d) on device:%s(ifindex:%d)\n", 123 | info.name, info.id, cfg.ifname, cfg.ifindex); 124 | return EXIT_OK; 125 | } 126 | -------------------------------------------------------------------------------- /basic02-prog-by-name/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern 4 | USER_TARGETS := xdp_loader 5 | 6 | COMMON_DIR := ../common 7 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 8 | 9 | include $(COMMON_DIR)/common.mk 10 | 11 | -------------------------------------------------------------------------------- /basic02-prog-by-name/xdp_loader.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | static const char *__doc__ = "XDP loader\n" 3 | " - Specify BPF-object --filename to load \n" 4 | " - and select BPF program --progname name to XDP-attach to --dev\n"; 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include /* depend on kernel-headers installed */ 18 | 19 | #include "../common/common_params.h" 20 | #include "../common/common_user_bpf_xdp.h" 21 | 22 | static const char *default_filename = "xdp_prog_kern.o"; 23 | static const char *default_progname = "xdp_pass_func"; 24 | 25 | static const struct option_wrapper long_options[] = { 26 | {{"help", no_argument, NULL, 'h' }, 27 | "Show help", false}, 28 | 29 | {{"dev", required_argument, NULL, 'd' }, 30 | "Operate on device ", "", true}, 31 | 32 | {{"skb-mode", no_argument, NULL, 'S' }, 33 | "Install XDP program in SKB (AKA generic) mode"}, 34 | 35 | {{"native-mode", no_argument, NULL, 'N' }, 36 | "Install XDP program in native mode"}, 37 | 38 | {{"auto-mode", no_argument, NULL, 'A' }, 39 | "Auto-detect SKB or native mode"}, 40 | 41 | {{"offload-mode",no_argument, NULL, 3 }, 42 | "Hardware offload XDP program to NIC"}, 43 | 44 | {{"unload", required_argument, NULL, 'U' }, 45 | "Unload XDP program instead of loading", ""}, 46 | 47 | {{"unload-all", no_argument, NULL, 4 }, 48 | "Unload all XDP programs on device"}, 49 | 50 | {{"quiet", no_argument, NULL, 'q' }, 51 | "Quiet mode (no output)"}, 52 | 53 | {{"filename", required_argument, NULL, 1 }, 54 | "Load program from ", ""}, 55 | 56 | {{"progname", required_argument, NULL, 2 }, 57 | "Load program from function in the ELF file", ""}, 58 | 59 | {{0, 0, NULL, 0 }, NULL, false} 60 | }; 61 | 62 | 63 | static void list_avail_progs(struct bpf_object *obj) 64 | { 65 | struct bpf_program *pos; 66 | 67 | printf("BPF object (%s) listing available XDP functions\n", 68 | bpf_object__name(obj)); 69 | 70 | bpf_object__for_each_program(pos, obj) { 71 | if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) 72 | printf(" %s\n", bpf_program__name(pos)); 73 | } 74 | } 75 | 76 | /* Lesson#1: This is a central piece of this lesson: 77 | * - Notice how BPF-ELF obj can have several programs 78 | * - Find by program name via: xdp_program__create 79 | */ 80 | int main(int argc, char **argv) 81 | { 82 | struct config cfg = { 83 | .attach_mode = XDP_MODE_NATIVE, 84 | .ifindex = -1, 85 | .do_unload = false, 86 | }; 87 | struct bpf_object *obj; 88 | char errmsg[1024]; 89 | int err; 90 | 91 | /* Set default BPF-ELF object file and BPF program name */ 92 | strncpy(cfg.filename, default_filename, sizeof(cfg.filename)); 93 | strncpy(cfg.progname, default_progname, sizeof(cfg.progname)); 94 | /* Cmdline options can change these */ 95 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 96 | 97 | /* Required option */ 98 | if (cfg.ifindex == -1) { 99 | fprintf(stderr, "ERR: required option --dev missing\n"); 100 | usage(argv[0], __doc__, long_options, (argc == 1)); 101 | return EXIT_FAIL_OPTION; 102 | } 103 | /* Unload a program by prog_id, or 104 | * unload all programs on net device 105 | */ 106 | if (cfg.do_unload || cfg.unload_all) { 107 | err = do_unload(&cfg); 108 | if (err) { 109 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 110 | fprintf(stderr, "Couldn't unload XDP program %s: %s\n", 111 | cfg.progname, errmsg); 112 | return err; 113 | } 114 | 115 | printf("Success: Unloading XDP prog name: %s\n", cfg.progname); 116 | return EXIT_OK; 117 | } 118 | 119 | /* Open a BPF object file */ 120 | DECLARE_LIBBPF_OPTS(bpf_object_open_opts, bpf_opts); 121 | obj = bpf_object__open_file(cfg.filename, &bpf_opts); 122 | err = libbpf_get_error(obj); 123 | if (err) { 124 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 125 | fprintf(stderr, "Couldn't open BPF object file %s: %s\n", 126 | cfg.filename, errmsg); 127 | return err; 128 | } 129 | 130 | /* List available programs */ 131 | if (verbose) 132 | list_avail_progs(obj); 133 | 134 | DECLARE_LIBXDP_OPTS(xdp_program_opts, xdp_opts, 135 | .obj = obj, 136 | .prog_name = cfg.progname); 137 | struct xdp_program *prog = xdp_program__create(&xdp_opts); 138 | err = libxdp_get_error(prog); 139 | if (err) { 140 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 141 | fprintf(stderr, "ERR: loading program %s: %s\n", cfg.progname, errmsg); 142 | exit(EXIT_FAIL_BPF); 143 | } 144 | 145 | /* At this point: BPF-progs are (only) loaded by the kernel, and prog 146 | * is our selected program handle. Next step is attaching this prog 147 | * to a kernel hook point, in this case XDP net_device link-level hook. 148 | */ 149 | err = xdp_program__attach(prog, cfg.ifindex, cfg.attach_mode, 0); 150 | if (err) { 151 | perror("xdp_program__attach"); 152 | exit(err); 153 | } 154 | 155 | if (verbose) { 156 | printf("Success: Loaded BPF-object(%s) and used program(%s)\n", 157 | cfg.filename, cfg.progname); 158 | printf(" - XDP prog id:%d attached on device:%s(ifindex:%d)\n", 159 | xdp_program__id(prog), cfg.ifname, cfg.ifindex); 160 | } 161 | /* Other BPF programs from ELF file will get freed on exit */ 162 | return EXIT_OK; 163 | } 164 | -------------------------------------------------------------------------------- /basic02-prog-by-name/xdp_offload_nfp.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: XDP hardware offload on Netronome's Agilio SmartNICs 3 | #+OPTIONS: ^:nil 4 | 5 | * Check firmware 6 | 7 | Netronome's Agilio SmartNICs driver =nfp= support XDP offloading, *but* it 8 | requires that the correct firmware is loaded. 9 | 10 | Check the firmware version via ethtool. 11 | 12 | E.g this shows the wrong version: 13 | #+begin_example 14 | $ ethtool -i nfp1 | grep firmware-version 15 | firmware-version: 0.0.3.5 0.22 nic-2.1.13 nic 16 | #+end_example 17 | 18 | It needs to look like: 19 | #+begin_example 20 | $ ethtool -i nfp1 | grep firmware-version 21 | firmware-version: 0.0.3.5 0.22 bpf-2.0.6.124 ebpf 22 | #+end_example 23 | 24 | * Upgrade firmware 25 | 26 | If you need to upgrade the firmware: 27 | - Get the BPF firmware: [[https://help.netronome.com/support/solutions/articles/36000050009-agilio-ebpf-2-0-6-extended-berkeley-packet-filter][here]] 28 | - Follow the [[https://help.netronome.com/support/solutions/articles/36000049975-basic-firmware-user-guide][Netronome: Basic Firmware User Guide]]. 29 | 30 | A section says after installing the firmware, unload and reload the driver 31 | kernel module will upgrade the firmware, like this: 32 | 33 | #+begin_example 34 | # reload driver to load new firmware 35 | rmmod nfp; modprobe nfp 36 | #+end_example 37 | 38 | -------------------------------------------------------------------------------- /basic02-prog-by-name/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | /* Notice how this XDP/BPF-program contains several programs in the same source 6 | * file. These will each get their own section in the ELF file, and via libbpf 7 | * they can be selected individually, and via their file-descriptor attached to 8 | * a given kernel BPF-hook. 9 | * 10 | * The libbpf bpf_object__find_program_by_title() refers to SEC names below. 11 | * The iproute2 utility also use section name. 12 | * 13 | * Slightly confusing, the names that gets listed by "bpftool prog" are the 14 | * C-function names (below the SEC define). 15 | */ 16 | 17 | SEC("xdp") 18 | int xdp_pass_func(struct xdp_md *ctx) 19 | { 20 | return XDP_PASS; 21 | } 22 | 23 | SEC("xdp") 24 | int xdp_drop_func(struct xdp_md *ctx) 25 | { 26 | return XDP_DROP; 27 | } 28 | 29 | /* Assignment#2: Add new XDP program section that use XDP_ABORTED */ 30 | 31 | char _license[] SEC("license") = "GPL"; 32 | 33 | /* Hint the avail XDP action return codes are: 34 | 35 | enum xdp_action { 36 | XDP_ABORTED = 0, 37 | XDP_DROP, 38 | XDP_PASS, 39 | XDP_TX, 40 | XDP_REDIRECT, 41 | }; 42 | */ 43 | -------------------------------------------------------------------------------- /basic03-map-counter/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | # Departing from the implicit _user.c scheme 4 | XDP_TARGETS := xdp_prog_kern 5 | USER_TARGETS := xdp_load_and_stats 6 | 7 | COMMON_DIR := ../common 8 | 9 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 10 | include $(COMMON_DIR)/common.mk 11 | -------------------------------------------------------------------------------- /basic03-map-counter/common_kern_user.h: -------------------------------------------------------------------------------- 1 | /* This common_kern_user.h is used by kernel side BPF-progs and 2 | * userspace programs, for sharing common struct's and DEFINEs. 3 | */ 4 | #ifndef __COMMON_KERN_USER_H 5 | #define __COMMON_KERN_USER_H 6 | 7 | /* This is the data record stored in the map */ 8 | struct datarec { 9 | __u64 rx_packets; 10 | /* Assignment#1: Add byte counters */ 11 | }; 12 | 13 | #ifndef XDP_ACTION_MAX 14 | #define XDP_ACTION_MAX (XDP_REDIRECT + 1) 15 | #endif 16 | 17 | #endif /* __COMMON_KERN_USER_H */ 18 | -------------------------------------------------------------------------------- /basic03-map-counter/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #include "common_kern_user.h" /* defines: struct datarec; */ 6 | 7 | /* Lesson#1: See how a map is defined. 8 | * - Here an array with XDP_ACTION_MAX (max_)entries are created. 9 | * - The idea is to keep stats per (enum) xdp_action 10 | */ 11 | struct { 12 | __uint(type, BPF_MAP_TYPE_ARRAY); 13 | __type(key, __u32); 14 | __type(value, struct datarec); 15 | __uint(max_entries, XDP_ACTION_MAX); 16 | } xdp_stats_map SEC(".maps"); 17 | 18 | /* LLVM maps __sync_fetch_and_add() as a built-in function to the BPF atomic add 19 | * instruction (that is BPF_STX | BPF_XADD | BPF_W for word sizes) 20 | */ 21 | #ifndef lock_xadd 22 | #define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val)) 23 | #endif 24 | 25 | SEC("xdp") 26 | int xdp_stats1_func(struct xdp_md *ctx) 27 | { 28 | // void *data_end = (void *)(long)ctx->data_end; 29 | // void *data = (void *)(long)ctx->data; 30 | struct datarec *rec; 31 | __u32 key = XDP_PASS; /* XDP_PASS = 2 */ 32 | 33 | /* Lookup in kernel BPF-side return pointer to actual data record */ 34 | rec = bpf_map_lookup_elem(&xdp_stats_map, &key); 35 | /* BPF kernel-side verifier will reject program if the NULL pointer 36 | * check isn't performed here. Even-though this is a static array where 37 | * we know key lookup XDP_PASS always will succeed. 38 | */ 39 | if (!rec) 40 | return XDP_ABORTED; 41 | 42 | /* Multiple CPUs can access data record. Thus, the accounting needs to 43 | * use an atomic operation. 44 | */ 45 | lock_xadd(&rec->rx_packets, 1); 46 | /* Assignment#1: Add byte counters 47 | * - Hint look at struct xdp_md *ctx (copied below) 48 | * 49 | * Assignment#3: Avoid the atomic operation 50 | * - Hint there is a map type named BPF_MAP_TYPE_PERCPU_ARRAY 51 | */ 52 | 53 | return XDP_PASS; 54 | } 55 | 56 | char _license[] SEC("license") = "GPL"; 57 | 58 | /* Copied from: $KERNEL/include/uapi/linux/bpf.h 59 | * 60 | * User return codes for XDP prog type. 61 | * A valid XDP program must return one of these defined values. All other 62 | * return codes are reserved for future use. Unknown return codes will 63 | * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). 64 | * 65 | enum xdp_action { 66 | XDP_ABORTED = 0, 67 | XDP_DROP, 68 | XDP_PASS, 69 | XDP_TX, 70 | XDP_REDIRECT, 71 | }; 72 | 73 | * user accessible metadata for XDP packet hook 74 | * new fields must be added to the end of this structure 75 | * 76 | struct xdp_md { 77 | // (Note: type __u32 is NOT the real-type) 78 | __u32 data; 79 | __u32 data_end; 80 | __u32 data_meta; 81 | // Below access go through struct xdp_rxq_info 82 | __u32 ingress_ifindex; // rxq->dev->ifindex 83 | __u32 rx_queue_index; // rxq->queue_index 84 | }; 85 | */ 86 | -------------------------------------------------------------------------------- /basic04-pinning-maps/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern 4 | USER_TARGETS := xdp_loader 5 | USER_TARGETS += xdp_stats 6 | 7 | COMMON_DIR := ../common 8 | 9 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 10 | 11 | include $(COMMON_DIR)/common.mk 12 | -------------------------------------------------------------------------------- /basic04-pinning-maps/common_kern_user.h: -------------------------------------------------------------------------------- 1 | /* This common_kern_user.h is used by kernel side BPF-progs and 2 | * userspace programs, for sharing common struct's and DEFINEs. 3 | */ 4 | #ifndef __COMMON_KERN_USER_H 5 | #define __COMMON_KERN_USER_H 6 | 7 | /* This is the data record stored in the map */ 8 | struct datarec { 9 | __u64 rx_packets; 10 | __u64 rx_bytes; 11 | }; 12 | 13 | #ifndef XDP_ACTION_MAX 14 | #define XDP_ACTION_MAX (XDP_REDIRECT + 1) 15 | #endif 16 | 17 | #endif /* __COMMON_KERN_USER_H */ 18 | -------------------------------------------------------------------------------- /basic04-pinning-maps/xdp_loader.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | static const char *__doc__ = "XDP loader\n" 3 | " - Allows selecting BPF program --progname name to XDP-attach to --dev\n"; 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | #include /* depend on kernel-headers installed */ 21 | 22 | #include "../common/common_params.h" 23 | #include "../common/common_user_bpf_xdp.h" 24 | #include "../common/common_libbpf.h" 25 | #include "common_kern_user.h" 26 | 27 | static const char *default_filename = "xdp_prog_kern.o"; 28 | 29 | static const struct option_wrapper long_options[] = { 30 | 31 | {{"help", no_argument, NULL, 'h' }, 32 | "Show help", false}, 33 | 34 | {{"dev", required_argument, NULL, 'd' }, 35 | "Operate on device ", "", true}, 36 | 37 | {{"skb-mode", no_argument, NULL, 'S' }, 38 | "Install XDP program in SKB (AKA generic) mode"}, 39 | 40 | {{"native-mode", no_argument, NULL, 'N' }, 41 | "Install XDP program in native mode"}, 42 | 43 | {{"auto-mode", no_argument, NULL, 'A' }, 44 | "Auto-detect SKB or native mode"}, 45 | 46 | {{"force", no_argument, NULL, 'F' }, 47 | "Force install, replacing existing program on interface"}, 48 | 49 | {{"unload", no_argument, NULL, 'U' }, 50 | "Unload XDP program instead of loading"}, 51 | 52 | {{"quiet", no_argument, NULL, 'q' }, 53 | "Quiet mode (no output)"}, 54 | 55 | {{"filename", required_argument, NULL, 1 }, 56 | "Load program from ", ""}, 57 | 58 | {{"progname", required_argument, NULL, 2 }, 59 | "Load program from function in the ELF file", ""}, 60 | 61 | {{0, 0, NULL, 0 }, NULL, false} 62 | }; 63 | 64 | #ifndef PATH_MAX 65 | #define PATH_MAX 4096 66 | #endif 67 | 68 | const char *pin_basedir = "/sys/fs/bpf"; 69 | const char *map_name = "xdp_stats_map"; 70 | 71 | /* Pinning maps under /sys/fs/bpf in subdir */ 72 | int pin_maps_in_bpf_object(struct bpf_object *bpf_obj, const char *subdir) 73 | { 74 | char map_filename[PATH_MAX]; 75 | char pin_dir[PATH_MAX]; 76 | int err, len; 77 | 78 | len = snprintf(pin_dir, PATH_MAX, "%s/%s", pin_basedir, subdir); 79 | if (len < 0) { 80 | fprintf(stderr, "ERR: creating pin dirname\n"); 81 | return EXIT_FAIL_OPTION; 82 | } 83 | 84 | len = snprintf(map_filename, PATH_MAX, "%s/%s/%s", 85 | pin_basedir, subdir, map_name); 86 | if (len < 0) { 87 | fprintf(stderr, "ERR: creating map_name\n"); 88 | return EXIT_FAIL_OPTION; 89 | } 90 | 91 | /* Existing/previous XDP prog might not have cleaned up */ 92 | if (access(map_filename, F_OK ) != -1 ) { 93 | if (verbose) 94 | printf(" - Unpinning (remove) prev maps in %s/\n", 95 | pin_dir); 96 | 97 | /* Basically calls unlink(3) on map_filename */ 98 | err = bpf_object__unpin_maps(bpf_obj, pin_dir); 99 | if (err) { 100 | fprintf(stderr, "ERR: UNpinning maps in %s\n", pin_dir); 101 | return EXIT_FAIL_BPF; 102 | } 103 | } 104 | if (verbose) 105 | printf(" - Pinning maps in %s/\n", pin_dir); 106 | 107 | /* This will pin all maps in our bpf_object */ 108 | err = bpf_object__pin_maps(bpf_obj, pin_dir); 109 | if (err) 110 | return EXIT_FAIL_BPF; 111 | 112 | return 0; 113 | } 114 | 115 | int main(int argc, char **argv) 116 | { 117 | struct xdp_program *program; 118 | int err; 119 | 120 | struct config cfg = { 121 | .attach_mode = XDP_MODE_NATIVE, 122 | .ifindex = -1, 123 | .do_unload = false, 124 | }; 125 | /* Set default BPF-ELF object file and BPF program name */ 126 | strncpy(cfg.filename, default_filename, sizeof(cfg.filename)); 127 | /* Cmdline options can change progname */ 128 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 129 | 130 | /* Required option */ 131 | if (cfg.ifindex == -1) { 132 | fprintf(stderr, "ERR: required option --dev missing\n\n"); 133 | usage(argv[0], __doc__, long_options, (argc == 1)); 134 | return EXIT_FAIL_OPTION; 135 | } 136 | if (cfg.do_unload) { 137 | /* TODO: Miss unpin of maps on unload */ 138 | /* return xdp_link_detach(cfg.ifindex, cfg.xdp_flags, 0); */ 139 | } 140 | 141 | program = load_bpf_and_xdp_attach(&cfg); 142 | if (!program) 143 | return EXIT_FAIL_BPF; 144 | 145 | if (verbose) { 146 | printf("Success: Loaded BPF-object(%s) and used program(%s)\n", 147 | cfg.filename, cfg.progname); 148 | printf(" - XDP prog attached on device:%s(ifindex:%d)\n", 149 | cfg.ifname, cfg.ifindex); 150 | } 151 | 152 | /* Use the --dev name as subdir for exporting/pinning maps */ 153 | err = pin_maps_in_bpf_object(xdp_program__bpf_obj(program), cfg.ifname); 154 | if (err) { 155 | fprintf(stderr, "ERR: pinning maps\n"); 156 | return err; 157 | } 158 | 159 | return EXIT_OK; 160 | } 161 | -------------------------------------------------------------------------------- /basic04-pinning-maps/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #include "common_kern_user.h" /* defines: struct datarec; */ 6 | 7 | /* Lesson: See how a map is defined. 8 | * - Here an array with XDP_ACTION_MAX (max_)entries are created. 9 | * - The idea is to keep stats per (enum) xdp_action 10 | */ 11 | struct { 12 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 13 | __type(key, __u32); 14 | __type(value, struct datarec); 15 | __uint(max_entries, XDP_ACTION_MAX); 16 | } xdp_stats_map SEC(".maps"); 17 | 18 | /* LLVM maps __sync_fetch_and_add() as a built-in function to the BPF atomic add 19 | * instruction (that is BPF_STX | BPF_XADD | BPF_W for word sizes) 20 | */ 21 | #ifndef lock_xadd 22 | #define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val)) 23 | #endif 24 | 25 | static __always_inline 26 | __u32 xdp_stats_record_action(struct xdp_md *ctx, __u32 action) 27 | { 28 | void *data_end = (void *)(long)ctx->data_end; 29 | void *data = (void *)(long)ctx->data; 30 | 31 | if (action >= XDP_ACTION_MAX) 32 | return XDP_ABORTED; 33 | 34 | /* Lookup in kernel BPF-side return pointer to actual data record */ 35 | struct datarec *rec = bpf_map_lookup_elem(&xdp_stats_map, &action); 36 | if (!rec) 37 | return XDP_ABORTED; 38 | 39 | /* Calculate packet length */ 40 | __u64 bytes = data_end - data; 41 | 42 | /* BPF_MAP_TYPE_PERCPU_ARRAY returns a data record specific to current 43 | * CPU and XDP hooks runs under Softirq, which makes it safe to update 44 | * without atomic operations. 45 | */ 46 | rec->rx_packets++; 47 | rec->rx_bytes += bytes; 48 | 49 | return action; 50 | } 51 | 52 | SEC("xdp") 53 | int xdp_pass_func(struct xdp_md *ctx) 54 | { 55 | __u32 action = XDP_PASS; /* XDP_PASS = 2 */ 56 | 57 | return xdp_stats_record_action(ctx, action); 58 | } 59 | 60 | SEC("xdp") 61 | int xdp_drop_func(struct xdp_md *ctx) 62 | { 63 | __u32 action = XDP_DROP; 64 | 65 | return xdp_stats_record_action(ctx, action); 66 | } 67 | 68 | SEC("xdp") 69 | int xdp_abort_func(struct xdp_md *ctx) 70 | { 71 | __u32 action = XDP_ABORTED; 72 | 73 | return xdp_stats_record_action(ctx, action); 74 | } 75 | 76 | char _license[] SEC("license") = "GPL"; 77 | 78 | /* Copied from: $KERNEL/include/uapi/linux/bpf.h 79 | * 80 | * User return codes for XDP prog type. 81 | * A valid XDP program must return one of these defined values. All other 82 | * return codes are reserved for future use. Unknown return codes will 83 | * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). 84 | * 85 | enum xdp_action { 86 | XDP_ABORTED = 0, 87 | XDP_DROP, 88 | XDP_PASS, 89 | XDP_TX, 90 | XDP_REDIRECT, 91 | }; 92 | 93 | * user accessible metadata for XDP packet hook 94 | * new fields must be added to the end of this structure 95 | * 96 | struct xdp_md { 97 | // (Note: type __u32 is NOT the real-type) 98 | __u32 data; 99 | __u32 data_end; 100 | __u32 data_meta; 101 | // Below access go through struct xdp_rxq_info 102 | __u32 ingress_ifindex; // rxq->dev->ifindex 103 | __u32 rx_queue_index; // rxq->queue_index 104 | }; 105 | */ 106 | -------------------------------------------------------------------------------- /common/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0) 2 | LIB_DIR = ../lib 3 | include $(LIB_DIR)/defines.mk 4 | 5 | all: common_params.o common_user_bpf_xdp.o 6 | 7 | CFLAGS += -I$(LIB_DIR)/install/include 8 | 9 | common_params.o: common_params.c common_params.h 10 | $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $< 11 | 12 | common_user_bpf_xdp.o: common_user_bpf_xdp.c common_user_bpf_xdp.h 13 | $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $< 14 | 15 | .PHONY: clean 16 | 17 | clean: 18 | $(Q)rm -f *.o 19 | -------------------------------------------------------------------------------- /common/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Common files 3 | #+OPTIONS: ^:nil 4 | 5 | This directory contains code that is common between the different 6 | assignments. This reduce code duplication in each tutorial assignment, and 7 | allow us to hideaway code that is irrelevant or have been seen/introduced in 8 | earlier assignments. 9 | -------------------------------------------------------------------------------- /common/common.mk: -------------------------------------------------------------------------------- 1 | # Common Makefile parts for BPF-building with libbpf 2 | # -------------------------------------------------- 3 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 4 | # 5 | # This file should be included from your Makefile like: 6 | # COMMON_DIR = ../common 7 | # include $(COMMON_DIR)/common.mk 8 | # 9 | # It is expected that you define the variables: 10 | # XDP_TARGETS and USER_TARGETS 11 | # as a space-separated list 12 | # 13 | LLC ?= llc 14 | CLANG ?= clang 15 | CC ?= gcc 16 | 17 | XDP_C = ${XDP_TARGETS:=.c} 18 | XDP_OBJ = ${XDP_C:.c=.o} 19 | USER_C := ${USER_TARGETS:=.c} 20 | USER_OBJ := ${USER_C:.c=.o} 21 | 22 | # Expect this is defined by including Makefile, but define if not 23 | COMMON_DIR ?= ../common 24 | LIB_DIR ?= ../lib 25 | 26 | COPY_LOADER ?= 27 | LOADER_DIR ?= $(LIB_DIR)/xdp-tools/xdp-loader 28 | STATS_DIR ?= $(COMMON_DIR)/../basic-solutions 29 | 30 | COMMON_OBJS += $(COMMON_DIR)/common_params.o 31 | include $(LIB_DIR)/defines.mk 32 | 33 | # Create expansions for dependencies 34 | COMMON_H := ${COMMON_OBJS:.o=.h} 35 | 36 | EXTRA_DEPS += 37 | 38 | # BPF-prog kern and userspace shares struct via header file: 39 | KERN_USER_H ?= $(wildcard common_kern_user.h) 40 | 41 | CFLAGS += -I$(LIB_DIR)/install/include $(EXTRA_CFLAGS) 42 | BPF_CFLAGS += -I$(LIB_DIR)/install/include $(EXTRA_CFLAGS) 43 | LDFLAGS += -L$(LIB_DIR)/install/lib 44 | 45 | BPF_HEADERS := $(wildcard $(HEADER_DIR)/*/*.h) $(wildcard $(INCLUDE_DIR)/*/*.h) 46 | 47 | all: llvm-check $(USER_TARGETS) $(XDP_OBJ) $(COPY_LOADER) $(COPY_STATS) 48 | 49 | .PHONY: clean $(CLANG) $(LLC) 50 | 51 | clean: 52 | $(Q)rm -f $(USER_TARGETS) $(XDP_OBJ) $(USER_OBJ) $(COPY_LOADER) $(COPY_STATS) *.ll 53 | 54 | ifdef COPY_LOADER 55 | $(LOADER_DIR)/$(COPY_LOADER): 56 | $(Q)make -C $(LOADER_DIR) 57 | 58 | $(COPY_LOADER): $(LOADER_DIR)/$(COPY_LOADER) 59 | $(QUIET_COPY)cp $(LOADER_DIR)/$(COPY_LOADER) $(COPY_LOADER) 60 | endif 61 | 62 | ifdef COPY_STATS 63 | $(STATS_DIR)/$(COPY_STATS): $(STATS_DIR)/${COPY_STATS:=.c} $(COMMON_H) 64 | $(Q)make -C $(STATS_DIR) $(COPY_STATS) 65 | 66 | $(COPY_STATS): $(STATS_DIR)/$(COPY_STATS) 67 | $(QUIET_COPY)cp $(STATS_DIR)/$(COPY_STATS) $(COPY_STATS) 68 | # Needing xdp_stats imply depending on header files: 69 | EXTRA_DEPS += $(COMMON_DIR)/xdp_stats_kern.h $(COMMON_DIR)/xdp_stats_kern_user.h 70 | endif 71 | 72 | # For build dependency on this file, if it gets updated 73 | COMMON_MK = $(COMMON_DIR)/common.mk 74 | 75 | llvm-check: $(CLANG) $(LLC) 76 | @for TOOL in $^ ; do \ 77 | if [ ! $$(command -v $${TOOL} 2>/dev/null) ]; then \ 78 | echo "*** ERROR: Cannot find tool $${TOOL}" ;\ 79 | exit 1; \ 80 | else true; fi; \ 81 | done 82 | 83 | $(OBJECT_LIBBPF): 84 | @if [ ! -d $(LIBBPF_DIR) ]; then \ 85 | echo "Error: Need libbpf submodule" $(LIBBPF_DIR); \ 86 | echo "May need to run git submodule update --init"; \ 87 | exit 1; \ 88 | else \ 89 | cd $(LIBBPF_DIR) && $(MAKE) all OBJDIR=.; \ 90 | mkdir -p build; $(MAKE) install_headers DESTDIR=build OBJDIR=.; \ 91 | fi 92 | 93 | $(OBJECT_LIBXDP): 94 | @if [ ! -d $(LIBXDP_DIR) ]; then \ 95 | echo "Error: Need libxdp submodule" $(LIBXDP_DIR); \ 96 | echo "May need to run git submodule update --init"; \ 97 | exit 1; \ 98 | else \ 99 | cd $(LIBXDP_DIR) && $(MAKE) all OBJDIR=.; \ 100 | fi 101 | 102 | # Create dependency: detect if C-file change and touch H-file, to trigger 103 | # target $(COMMON_OBJS) 104 | $(COMMON_H): %.h: %.c 105 | touch $@ 106 | 107 | # Detect if any of common obj changed and create dependency on .h-files 108 | $(COMMON_OBJS): %.o: %.h 109 | $(Q)$(MAKE) -C $(COMMON_DIR) 110 | 111 | $(USER_TARGETS): %: %.c $(OBJECT_LIBBPF) $(OBJECT_LIBXDP) Makefile $(COMMON_MK) $(COMMON_OBJS) $(KERN_USER_H) $(EXTRA_DEPS) 112 | $(QUIET_CC)$(CC) -Wall $(CFLAGS) $(LDFLAGS) -o $@ $(COMMON_OBJS) $(LIB_OBJS) \ 113 | $< $(LDLIBS) 114 | 115 | $(XDP_OBJ): %.o: %.c Makefile $(COMMON_MK) $(KERN_USER_H) $(EXTRA_DEPS) $(OBJECT_LIBBPF) 116 | $(QUIET_CLANG)$(CLANG) -S \ 117 | -target bpf \ 118 | -D __BPF_TRACING__ \ 119 | $(BPF_CFLAGS) \ 120 | -Wall \ 121 | -Wno-unused-value \ 122 | -Wno-pointer-sign \ 123 | -Wno-compare-distinct-pointer-types \ 124 | -Werror \ 125 | -O2 -emit-llvm -c -g -o ${@:.o=.ll} $< 126 | $(QUIET_LLC)$(LLC) -march=bpf -filetype=obj -o $@ ${@:.o=.ll} 127 | -------------------------------------------------------------------------------- /common/common_defines.h: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON_DEFINES_H 2 | #define __COMMON_DEFINES_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct config { 10 | enum xdp_attach_mode attach_mode; 11 | __u32 xdp_flags; 12 | int ifindex; 13 | char *ifname; 14 | char ifname_buf[IF_NAMESIZE]; 15 | int redirect_ifindex; 16 | char *redirect_ifname; 17 | char redirect_ifname_buf[IF_NAMESIZE]; 18 | bool do_unload; 19 | __u32 prog_id; 20 | bool reuse_maps; 21 | char pin_dir[512]; 22 | char filename[512]; 23 | char progname[32]; 24 | char src_mac[18]; 25 | char dest_mac[18]; 26 | __u16 xsk_bind_flags; 27 | int xsk_if_queue; 28 | bool xsk_poll_mode; 29 | bool unload_all; 30 | }; 31 | 32 | /* Defined in common_params.o */ 33 | extern int verbose; 34 | 35 | /* Exit return codes */ 36 | #define EXIT_OK 0 /* == EXIT_SUCCESS (stdlib.h) man exit(3) */ 37 | #define EXIT_FAIL 1 /* == EXIT_FAILURE (stdlib.h) man exit(3) */ 38 | #define EXIT_FAIL_OPTION 2 39 | #define EXIT_FAIL_XDP 30 40 | #define EXIT_FAIL_BPF 40 41 | 42 | #endif /* __COMMON_DEFINES_H */ 43 | -------------------------------------------------------------------------------- /common/common_libbpf.c: -------------------------------------------------------------------------------- 1 | /* Common function that with time should be moved to libbpf */ 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "common_libbpf.h" 10 | 11 | /* From: include/linux/err.h */ 12 | #define MAX_ERRNO 4095 13 | #define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) 14 | static inline bool IS_ERR_OR_NULL(const void *ptr) 15 | { 16 | return (!ptr) || IS_ERR_VALUE((unsigned long)ptr); 17 | } 18 | 19 | #define pr_warning printf 20 | 21 | /* As close as possible to libbpf bpf_prog_load_xattr(), with the 22 | * difference of handling pinned maps. 23 | */ 24 | int bpf_prog_load_xattr_maps(const struct bpf_prog_load_attr_maps *attr, 25 | struct bpf_object **pobj, int *prog_fd) 26 | { 27 | struct bpf_object_open_attr open_attr = { 28 | .file = attr->file, 29 | .prog_type = attr->prog_type, 30 | }; 31 | struct bpf_program *prog, *first_prog = NULL; 32 | enum bpf_attach_type expected_attach_type; 33 | enum bpf_prog_type prog_type; 34 | struct bpf_object *obj; 35 | struct bpf_map *map; 36 | int err; 37 | int i; 38 | 39 | if (!attr) 40 | return -EINVAL; 41 | if (!attr->file) 42 | return -EINVAL; 43 | 44 | 45 | obj = bpf_object__open_xattr(&open_attr); 46 | if (IS_ERR_OR_NULL(obj)) 47 | return -ENOENT; 48 | 49 | bpf_object__for_each_program(prog, obj) { 50 | /* 51 | * If type is not specified, try to guess it based on 52 | * section name. 53 | */ 54 | prog_type = attr->prog_type; 55 | // Was: prog->prog_ifindex = attr->ifindex; 56 | bpf_program__set_ifindex(prog, attr->ifindex); 57 | 58 | expected_attach_type = attr->expected_attach_type; 59 | #if 0 /* Use internal libbpf variables */ 60 | if (prog_type == BPF_PROG_TYPE_UNSPEC) { 61 | err = bpf_program__identify_section(prog, &prog_type, 62 | &expected_attach_type); 63 | if (err < 0) { 64 | bpf_object__close(obj); 65 | return -EINVAL; 66 | } 67 | } 68 | #endif 69 | 70 | bpf_program__set_type(prog, prog_type); 71 | bpf_program__set_expected_attach_type(prog, 72 | expected_attach_type); 73 | 74 | if (!first_prog) 75 | first_prog = prog; 76 | } 77 | 78 | /* Reset attr->pinned_maps.map_fd to identify successful file load */ 79 | for (i = 0; i < attr->nr_pinned_maps; i++) 80 | attr->pinned_maps[i].map_fd = -1; 81 | 82 | bpf_map__for_each(map, obj) { 83 | const char* mapname = bpf_map__name(map); 84 | 85 | if (!bpf_map__is_offload_neutral(map)) 86 | bpf_map__set_ifindex(map, attr->ifindex); 87 | /* Was: map->map_ifindex = attr->ifindex; */ 88 | 89 | for (i = 0; i < attr->nr_pinned_maps; i++) { 90 | struct bpf_pinned_map *pin_map = &attr->pinned_maps[i]; 91 | int fd; 92 | 93 | if (strcmp(mapname, pin_map->name) != 0) 94 | continue; 95 | 96 | /* Matched, try opening pinned file */ 97 | fd = bpf_obj_get(pin_map->filename); 98 | if (fd > 0) { 99 | /* Use FD from pinned map as replacement */ 100 | bpf_map__reuse_fd(map, fd); 101 | /* TODO: Might want to set internal map "name" 102 | * if opened pinned map didn't, to allow 103 | * bpf_object__find_map_fd_by_name() to work. 104 | */ 105 | pin_map->map_fd = fd; 106 | continue; 107 | } 108 | /* Could not open pinned filename map, then this prog 109 | * should then pin the map, BUT this can only happen 110 | * after bpf_object__load(). 111 | */ 112 | } 113 | } 114 | 115 | if (!first_prog) { 116 | pr_warning("object file doesn't contain bpf program\n"); 117 | bpf_object__close(obj); 118 | return -ENOENT; 119 | } 120 | 121 | err = bpf_object__load(obj); 122 | if (err) { 123 | bpf_object__close(obj); 124 | return -EINVAL; 125 | } 126 | 127 | /* Pin the maps that were not loaded via pinned filename */ 128 | bpf_map__for_each(map, obj) { 129 | const char* mapname = bpf_map__name(map); 130 | 131 | for (i = 0; i < attr->nr_pinned_maps; i++) { 132 | struct bpf_pinned_map *pin_map = &attr->pinned_maps[i]; 133 | int err; 134 | 135 | if (strcmp(mapname, pin_map->name) != 0) 136 | continue; 137 | 138 | /* Matched, check if map is already loaded */ 139 | if (pin_map->map_fd != -1) 140 | continue; 141 | 142 | /* Needs to be pinned */ 143 | err = bpf_map__pin(map, pin_map->filename); 144 | if (err) 145 | continue; 146 | pin_map->map_fd = bpf_map__fd(map); 147 | } 148 | } 149 | 150 | /* Help user if requested map name that doesn't exist */ 151 | for (i = 0; i < attr->nr_pinned_maps; i++) { 152 | struct bpf_pinned_map *pin_map = &attr->pinned_maps[i]; 153 | 154 | if (pin_map->map_fd < 0) 155 | pr_warning("%s() requested mapname:%s not seen\n", 156 | __func__, pin_map->name); 157 | } 158 | 159 | *pobj = obj; 160 | *prog_fd = bpf_program__fd(first_prog); 161 | return 0; 162 | } 163 | -------------------------------------------------------------------------------- /common/common_libbpf.h: -------------------------------------------------------------------------------- 1 | /* Common function that with time should be moved to libbpf */ 2 | #ifndef __COMMON_LIBBPF_H 3 | #define __COMMON_LIBBPF_H 4 | 5 | struct bpf_pinned_map { 6 | const char *name; 7 | const char *filename; 8 | int map_fd; 9 | }; 10 | 11 | /* bpf_prog_load_attr extended */ 12 | struct bpf_prog_load_attr_maps { 13 | const char *file; 14 | enum bpf_prog_type prog_type; 15 | enum bpf_attach_type expected_attach_type; 16 | int ifindex; 17 | int nr_pinned_maps; 18 | struct bpf_pinned_map *pinned_maps; 19 | }; 20 | 21 | int bpf_prog_load_xattr_maps(const struct bpf_prog_load_attr_maps *attr, 22 | struct bpf_object **pobj, int *prog_fd); 23 | 24 | #endif /* __COMMON_LIBBPF_H */ 25 | -------------------------------------------------------------------------------- /common/common_params.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include /* XDP_FLAGS_* depend on kernel-headers installed */ 11 | #include 12 | 13 | #include "common_params.h" 14 | 15 | int verbose = 1; 16 | 17 | #define BUFSIZE 30 18 | 19 | void _print_options(const struct option_wrapper *long_options, bool required) 20 | { 21 | int i, pos; 22 | char buf[BUFSIZE]; 23 | 24 | for (i = 0; long_options[i].option.name != 0; i++) { 25 | if (long_options[i].required != required) 26 | continue; 27 | 28 | if (long_options[i].option.val > 64) /* ord('A') = 65 */ 29 | printf(" -%c,", long_options[i].option.val); 30 | else 31 | printf(" "); 32 | pos = snprintf(buf, BUFSIZE, " --%s", long_options[i].option.name); 33 | if (long_options[i].metavar) 34 | snprintf(&buf[pos], BUFSIZE-pos, " %s", long_options[i].metavar); 35 | printf("%-22s", buf); 36 | printf(" %s", long_options[i].help); 37 | printf("\n"); 38 | } 39 | } 40 | 41 | void usage(const char *prog_name, const char *doc, 42 | const struct option_wrapper *long_options, bool full) 43 | { 44 | printf("Usage: %s [options]\n", prog_name); 45 | 46 | if (!full) { 47 | printf("Use --help (or -h) to see full option list.\n"); 48 | return; 49 | } 50 | 51 | printf("\nDOCUMENTATION:\n %s\n", doc); 52 | printf("Required options:\n"); 53 | _print_options(long_options, true); 54 | printf("\n"); 55 | printf("Other options:\n"); 56 | _print_options(long_options, false); 57 | printf("\n"); 58 | } 59 | 60 | int option_wrappers_to_options(const struct option_wrapper *wrapper, 61 | struct option **options) 62 | { 63 | int i, num; 64 | struct option *new_options; 65 | for (i = 0; wrapper[i].option.name != 0; i++) {} 66 | num = i; 67 | 68 | new_options = malloc(sizeof(struct option) * num); 69 | if (!new_options) 70 | return -1; 71 | for (i = 0; i < num; i++) { 72 | memcpy(&new_options[i], &wrapper[i], sizeof(struct option)); 73 | } 74 | 75 | *options = new_options; 76 | return 0; 77 | } 78 | 79 | void parse_cmdline_args(int argc, char **argv, 80 | const struct option_wrapper *options_wrapper, 81 | struct config *cfg, const char *doc) 82 | { 83 | struct option *long_options; 84 | bool full_help = false; 85 | int longindex = 0; 86 | char *dest; 87 | int opt; 88 | 89 | if (option_wrappers_to_options(options_wrapper, &long_options)) { 90 | fprintf(stderr, "Unable to malloc()\n"); 91 | exit(EXIT_FAIL_OPTION); 92 | } 93 | 94 | /* Parse commands line args */ 95 | while ((opt = getopt_long(argc, argv, "hd:r:L:R:ASNFU:MQ:czpq", 96 | long_options, &longindex)) != -1) { 97 | switch (opt) { 98 | case 'd': 99 | if (strlen(optarg) >= IF_NAMESIZE) { 100 | fprintf(stderr, "ERR: --dev name too long\n"); 101 | goto error; 102 | } 103 | cfg->ifname = (char *)&cfg->ifname_buf; 104 | strncpy(cfg->ifname, optarg, IF_NAMESIZE); 105 | cfg->ifindex = if_nametoindex(cfg->ifname); 106 | if (cfg->ifindex == 0) { 107 | fprintf(stderr, 108 | "ERR: --dev name unknown err(%d):%s\n", 109 | errno, strerror(errno)); 110 | goto error; 111 | } 112 | break; 113 | case 'r': 114 | if (strlen(optarg) >= IF_NAMESIZE) { 115 | fprintf(stderr, "ERR: --redirect-dev name too long\n"); 116 | goto error; 117 | } 118 | cfg->redirect_ifname = (char *)&cfg->redirect_ifname_buf; 119 | strncpy(cfg->redirect_ifname, optarg, IF_NAMESIZE); 120 | cfg->redirect_ifindex = if_nametoindex(cfg->redirect_ifname); 121 | if (cfg->redirect_ifindex == 0) { 122 | fprintf(stderr, 123 | "ERR: --redirect-dev name unknown err(%d):%s\n", 124 | errno, strerror(errno)); 125 | goto error; 126 | } 127 | break; 128 | case 'A': 129 | cfg->attach_mode = XDP_MODE_UNSPEC; 130 | break; 131 | case 'S': 132 | cfg->attach_mode = XDP_MODE_SKB; 133 | cfg->xsk_bind_flags &= ~XDP_ZEROCOPY; 134 | cfg->xsk_bind_flags |= XDP_COPY; 135 | break; 136 | case 'N': 137 | cfg->attach_mode = XDP_MODE_NATIVE; 138 | break; 139 | case 3: /* --offload-mode */ 140 | cfg->attach_mode = XDP_MODE_HW; 141 | break; 142 | case 'M': 143 | cfg->reuse_maps = true; 144 | break; 145 | case 'U': 146 | cfg->do_unload = true; 147 | cfg->prog_id = atoi(optarg); 148 | break; 149 | case 'p': 150 | cfg->xsk_poll_mode = true; 151 | break; 152 | case 'q': 153 | verbose = false; 154 | break; 155 | case 'Q': 156 | cfg->xsk_if_queue = atoi(optarg); 157 | break; 158 | case 1: /* --filename */ 159 | dest = (char *)&cfg->filename; 160 | strncpy(dest, optarg, sizeof(cfg->filename)); 161 | break; 162 | case 2: /* --progname */ 163 | dest = (char *)&cfg->progname; 164 | strncpy(dest, optarg, sizeof(cfg->progname)); 165 | break; 166 | case 'L': /* --src-mac */ 167 | dest = (char *)&cfg->src_mac; 168 | strncpy(dest, optarg, sizeof(cfg->src_mac)); 169 | break; 170 | case 'R': /* --dest-mac */ 171 | dest = (char *)&cfg->dest_mac; 172 | strncpy(dest, optarg, sizeof(cfg->dest_mac)); 173 | break; 174 | case 'c': 175 | cfg->xsk_bind_flags &= ~XDP_ZEROCOPY; 176 | cfg->xsk_bind_flags |= XDP_COPY; 177 | break; 178 | case 'z': 179 | cfg->xsk_bind_flags &= ~XDP_COPY; 180 | cfg->xsk_bind_flags |= XDP_ZEROCOPY; 181 | break; 182 | case 4: /* --unload-all */ 183 | cfg->unload_all = true; 184 | break; 185 | case 'h': 186 | full_help = true; 187 | /* fall-through */ 188 | error: 189 | default: 190 | usage(argv[0], doc, options_wrapper, full_help); 191 | free(long_options); 192 | exit(EXIT_FAIL_OPTION); 193 | } 194 | } 195 | free(long_options); 196 | } 197 | -------------------------------------------------------------------------------- /common/common_params.h: -------------------------------------------------------------------------------- 1 | /* This common_user.h is used by userspace programs */ 2 | #ifndef __COMMON_PARAMS_H 3 | #define __COMMON_PARAMS_H 4 | 5 | #include 6 | #include "common_defines.h" 7 | 8 | struct option_wrapper { 9 | struct option option; 10 | char *help; 11 | char *metavar; 12 | bool required; 13 | }; 14 | 15 | void usage(const char *prog_name, const char *doc, 16 | const struct option_wrapper *long_options, bool full); 17 | 18 | void parse_cmdline_args(int argc, char **argv, 19 | const struct option_wrapper *long_options, 20 | struct config *cfg, const char *doc); 21 | 22 | #endif /* __COMMON_PARAMS_H */ 23 | -------------------------------------------------------------------------------- /common/common_user_bpf_xdp.h: -------------------------------------------------------------------------------- 1 | /* Common BPF/XDP functions used by userspace side programs */ 2 | #ifndef __COMMON_USER_BPF_XDP_H 3 | #define __COMMON_USER_BPF_XDP_H 4 | 5 | struct bpf_object *load_bpf_object_file(const char *filename, int ifindex); 6 | struct xdp_program *load_bpf_and_xdp_attach(struct config *cfg); 7 | 8 | const char *action2str(__u32 action); 9 | 10 | int check_map_fd_info(const struct bpf_map_info *info, 11 | const struct bpf_map_info *exp); 12 | 13 | int open_bpf_map_file(const char *pin_dir, 14 | const char *mapname, 15 | struct bpf_map_info *info); 16 | int do_unload(struct config *cfg); 17 | 18 | #endif /* __COMMON_USER_BPF_XDP_H */ 19 | -------------------------------------------------------------------------------- /common/parsing_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ 2 | /* 3 | * This file contains parsing functions that are used in the packetXX XDP 4 | * programs. The functions are marked as __always_inline, and fully defined in 5 | * this header file to be included in the BPF program. 6 | * 7 | * Each helper parses a packet header, including doing bounds checking, and 8 | * returns the type of its contents if successful, and -1 otherwise. 9 | * 10 | * For Ethernet and IP headers, the content type is the type of the payload 11 | * (h_proto for Ethernet, nexthdr for IPv6), for ICMP it is the ICMP type field. 12 | * All return values are in host byte order. 13 | * 14 | * The versions of the functions included here are slightly expanded versions of 15 | * the functions in the packet01 lesson. For instance, the Ethernet header 16 | * parsing has support for parsing VLAN tags. 17 | */ 18 | 19 | #ifndef __PARSING_HELPERS_H 20 | #define __PARSING_HELPERS_H 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | /* Header cursor to keep track of current parsing position */ 33 | struct hdr_cursor { 34 | void *pos; 35 | }; 36 | 37 | /* 38 | * struct vlan_hdr - vlan header 39 | * @h_vlan_TCI: priority and VLAN ID 40 | * @h_vlan_encapsulated_proto: packet type ID or len 41 | */ 42 | struct vlan_hdr { 43 | __be16 h_vlan_TCI; 44 | __be16 h_vlan_encapsulated_proto; 45 | }; 46 | 47 | /* 48 | * Struct icmphdr_common represents the common part of the icmphdr and icmp6hdr 49 | * structures. 50 | */ 51 | struct icmphdr_common { 52 | __u8 type; 53 | __u8 code; 54 | __sum16 cksum; 55 | }; 56 | 57 | /* Allow users of header file to redefine VLAN max depth */ 58 | #ifndef VLAN_MAX_DEPTH 59 | #define VLAN_MAX_DEPTH 2 60 | #endif 61 | 62 | #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ 63 | /* Struct for collecting VLANs after parsing via parse_ethhdr_vlan */ 64 | struct collect_vlans { 65 | __u16 id[VLAN_MAX_DEPTH]; 66 | }; 67 | 68 | static __always_inline int proto_is_vlan(__u16 h_proto) 69 | { 70 | return !!(h_proto == bpf_htons(ETH_P_8021Q) || 71 | h_proto == bpf_htons(ETH_P_8021AD)); 72 | } 73 | 74 | /* Notice, parse_ethhdr() will skip VLAN tags, by advancing nh->pos and returns 75 | * next header EtherType, BUT the ethhdr pointer supplied still points to the 76 | * Ethernet header. Thus, caller can look at eth->h_proto to see if this was a 77 | * VLAN tagged packet. 78 | */ 79 | static __always_inline int parse_ethhdr_vlan(struct hdr_cursor *nh, 80 | void *data_end, 81 | struct ethhdr **ethhdr, 82 | struct collect_vlans *vlans) 83 | { 84 | struct ethhdr *eth = nh->pos; 85 | int hdrsize = sizeof(*eth); 86 | struct vlan_hdr *vlh; 87 | __u16 h_proto; 88 | int i; 89 | 90 | /* Byte-count bounds check; check if current pointer + size of header 91 | * is after data_end. 92 | */ 93 | if (nh->pos + hdrsize > data_end) 94 | return -1; 95 | 96 | nh->pos += hdrsize; 97 | *ethhdr = eth; 98 | vlh = nh->pos; 99 | h_proto = eth->h_proto; 100 | 101 | /* Use loop unrolling to avoid the verifier restriction on loops; 102 | * support up to VLAN_MAX_DEPTH layers of VLAN encapsulation. 103 | */ 104 | #pragma unroll 105 | for (i = 0; i < VLAN_MAX_DEPTH; i++) { 106 | if (!proto_is_vlan(h_proto)) 107 | break; 108 | 109 | if (vlh + 1 > data_end) 110 | break; 111 | 112 | h_proto = vlh->h_vlan_encapsulated_proto; 113 | if (vlans) /* collect VLAN ids */ 114 | vlans->id[i] = 115 | (bpf_ntohs(vlh->h_vlan_TCI) & VLAN_VID_MASK); 116 | 117 | vlh++; 118 | } 119 | 120 | nh->pos = vlh; 121 | return h_proto; /* network-byte-order */ 122 | } 123 | 124 | static __always_inline int parse_ethhdr(struct hdr_cursor *nh, 125 | void *data_end, 126 | struct ethhdr **ethhdr) 127 | { 128 | /* Expect compiler removes the code that collects VLAN ids */ 129 | return parse_ethhdr_vlan(nh, data_end, ethhdr, NULL); 130 | } 131 | 132 | static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, 133 | void *data_end, 134 | struct ipv6hdr **ip6hdr) 135 | { 136 | struct ipv6hdr *ip6h = nh->pos; 137 | 138 | /* Pointer-arithmetic bounds check; pointer +1 points to after end of 139 | * thing being pointed to. We will be using this style in the remainder 140 | * of the tutorial. 141 | */ 142 | if (ip6h + 1 > data_end) 143 | return -1; 144 | 145 | nh->pos = ip6h + 1; 146 | *ip6hdr = ip6h; 147 | 148 | return ip6h->nexthdr; 149 | } 150 | 151 | static __always_inline int parse_iphdr(struct hdr_cursor *nh, 152 | void *data_end, 153 | struct iphdr **iphdr) 154 | { 155 | struct iphdr *iph = nh->pos; 156 | int hdrsize; 157 | 158 | if (iph + 1 > data_end) 159 | return -1; 160 | 161 | hdrsize = iph->ihl * 4; 162 | /* Sanity check packet field is valid */ 163 | if(hdrsize < sizeof(*iph)) 164 | return -1; 165 | 166 | /* Variable-length IPv4 header, need to use byte-based arithmetic */ 167 | if (nh->pos + hdrsize > data_end) 168 | return -1; 169 | 170 | nh->pos += hdrsize; 171 | *iphdr = iph; 172 | 173 | return iph->protocol; 174 | } 175 | 176 | static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh, 177 | void *data_end, 178 | struct icmp6hdr **icmp6hdr) 179 | { 180 | struct icmp6hdr *icmp6h = nh->pos; 181 | 182 | if (icmp6h + 1 > data_end) 183 | return -1; 184 | 185 | nh->pos = icmp6h + 1; 186 | *icmp6hdr = icmp6h; 187 | 188 | return icmp6h->icmp6_type; 189 | } 190 | 191 | static __always_inline int parse_icmphdr(struct hdr_cursor *nh, 192 | void *data_end, 193 | struct icmphdr **icmphdr) 194 | { 195 | struct icmphdr *icmph = nh->pos; 196 | 197 | if (icmph + 1 > data_end) 198 | return -1; 199 | 200 | nh->pos = icmph + 1; 201 | *icmphdr = icmph; 202 | 203 | return icmph->type; 204 | } 205 | 206 | static __always_inline int parse_icmphdr_common(struct hdr_cursor *nh, 207 | void *data_end, 208 | struct icmphdr_common **icmphdr) 209 | { 210 | struct icmphdr_common *h = nh->pos; 211 | 212 | if (h + 1 > data_end) 213 | return -1; 214 | 215 | nh->pos = h + 1; 216 | *icmphdr = h; 217 | 218 | return h->type; 219 | } 220 | 221 | /* 222 | * parse_udphdr: parse the udp header and return the length of the udp payload 223 | */ 224 | static __always_inline int parse_udphdr(struct hdr_cursor *nh, 225 | void *data_end, 226 | struct udphdr **udphdr) 227 | { 228 | int len; 229 | struct udphdr *h = nh->pos; 230 | 231 | if (h + 1 > data_end) 232 | return -1; 233 | 234 | nh->pos = h + 1; 235 | *udphdr = h; 236 | 237 | len = bpf_ntohs(h->len) - sizeof(struct udphdr); 238 | if (len < 0) 239 | return -1; 240 | 241 | return len; 242 | } 243 | 244 | /* 245 | * parse_tcphdr: parse and return the length of the tcp header 246 | */ 247 | static __always_inline int parse_tcphdr(struct hdr_cursor *nh, 248 | void *data_end, 249 | struct tcphdr **tcphdr) 250 | { 251 | int len; 252 | struct tcphdr *h = nh->pos; 253 | 254 | if (h + 1 > data_end) 255 | return -1; 256 | 257 | len = h->doff * 4; 258 | /* Sanity check packet field is valid */ 259 | if(len < sizeof(*h)) 260 | return -1; 261 | 262 | /* Variable-length TCP header, need to use byte-based arithmetic */ 263 | if (nh->pos + len > data_end) 264 | return -1; 265 | 266 | nh->pos += len; 267 | *tcphdr = h; 268 | 269 | return len; 270 | } 271 | 272 | #endif /* __PARSING_HELPERS_H */ 273 | -------------------------------------------------------------------------------- /common/rewrite_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-clause) */ 2 | /* 3 | * This file contains functions that are used in the packetXX XDP programs to 4 | * manipulate on packets data. The functions are marked as __always_inline, and 5 | * fully defined in this header file to be included in the BPF program. 6 | */ 7 | 8 | #ifndef __REWRITE_HELPERS_H 9 | #define __REWRITE_HELPERS_H 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | /* Pops the outermost VLAN tag off the packet. Returns the popped VLAN ID on 20 | * success or negative errno on failure. 21 | */ 22 | static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) 23 | { 24 | void *data_end = (void *)(long)ctx->data_end; 25 | struct ethhdr eth_cpy; 26 | struct vlan_hdr *vlh; 27 | __be16 h_proto; 28 | int vlid; 29 | 30 | if (!proto_is_vlan(eth->h_proto)) 31 | return -1; 32 | 33 | /* Careful with the parenthesis here */ 34 | vlh = (void *)(eth + 1); 35 | 36 | /* Still need to do bounds checking */ 37 | if (vlh + 1 > data_end) 38 | return -1; 39 | 40 | /* Save vlan ID for returning, h_proto for updating Ethernet header */ 41 | vlid = bpf_ntohs(vlh->h_vlan_TCI); 42 | h_proto = vlh->h_vlan_encapsulated_proto; 43 | 44 | /* Make a copy of the outer Ethernet header before we cut it off */ 45 | __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); 46 | 47 | /* Actually adjust the head pointer */ 48 | if (bpf_xdp_adjust_head(ctx, (int)sizeof(*vlh))) 49 | return -1; 50 | 51 | /* Need to re-evaluate data *and* data_end and do new bounds checking 52 | * after adjusting head 53 | */ 54 | eth = (void *)(long)ctx->data; 55 | data_end = (void *)(long)ctx->data_end; 56 | if (eth + 1 > data_end) 57 | return -1; 58 | 59 | /* Copy back the old Ethernet header and update the proto type */ 60 | __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); 61 | eth->h_proto = h_proto; 62 | 63 | return vlid; 64 | } 65 | 66 | /* Pushes a new VLAN tag after the Ethernet header. Returns 0 on success, 67 | * -1 on failure. 68 | */ 69 | static __always_inline int vlan_tag_push(struct xdp_md *ctx, 70 | struct ethhdr *eth, int vlid) 71 | { 72 | void *data_end = (void *)(long)ctx->data_end; 73 | struct ethhdr eth_cpy; 74 | struct vlan_hdr *vlh; 75 | 76 | /* First copy the original Ethernet header */ 77 | __builtin_memcpy(ð_cpy, eth, sizeof(eth_cpy)); 78 | 79 | /* Then add space in front of the packet */ 80 | if (bpf_xdp_adjust_head(ctx, 0 - (int)sizeof(*vlh))) 81 | return -1; 82 | 83 | /* Need to re-evaluate data_end and data after head adjustment, and 84 | * bounds check, even though we know there is enough space (as we 85 | * increased it). 86 | */ 87 | data_end = (void *)(long)ctx->data_end; 88 | eth = (void *)(long)ctx->data; 89 | 90 | if (eth + 1 > data_end) 91 | return -1; 92 | 93 | /* Copy back Ethernet header in the right place, populate VLAN tag with 94 | * ID and proto, and set outer Ethernet header to VLAN type. 95 | */ 96 | __builtin_memcpy(eth, ð_cpy, sizeof(*eth)); 97 | 98 | vlh = (void *)(eth + 1); 99 | 100 | if (vlh + 1 > data_end) 101 | return -1; 102 | 103 | vlh->h_vlan_TCI = bpf_htons(vlid); 104 | vlh->h_vlan_encapsulated_proto = eth->h_proto; 105 | 106 | eth->h_proto = bpf_htons(ETH_P_8021Q); 107 | return 0; 108 | } 109 | 110 | /* 111 | * Swaps destination and source MAC addresses inside an Ethernet header 112 | */ 113 | static __always_inline void swap_src_dst_mac(struct ethhdr *eth) 114 | { 115 | __u8 h_tmp[ETH_ALEN]; 116 | 117 | __builtin_memcpy(h_tmp, eth->h_source, ETH_ALEN); 118 | __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); 119 | __builtin_memcpy(eth->h_dest, h_tmp, ETH_ALEN); 120 | } 121 | 122 | /* 123 | * Swaps destination and source IPv6 addresses inside an IPv6 header 124 | */ 125 | static __always_inline void swap_src_dst_ipv6(struct ipv6hdr *ipv6) 126 | { 127 | struct in6_addr tmp = ipv6->saddr; 128 | 129 | ipv6->saddr = ipv6->daddr; 130 | ipv6->daddr = tmp; 131 | } 132 | 133 | /* 134 | * Swaps destination and source IPv4 addresses inside an IPv4 header 135 | */ 136 | static __always_inline void swap_src_dst_ipv4(struct iphdr *iphdr) 137 | { 138 | __be32 tmp = iphdr->saddr; 139 | 140 | iphdr->saddr = iphdr->daddr; 141 | iphdr->daddr = tmp; 142 | } 143 | 144 | #endif /* __REWRITE_HELPERS_H */ 145 | -------------------------------------------------------------------------------- /common/xdp_stats_kern.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | 3 | /* Used *ONLY* by BPF-prog running kernel side. */ 4 | #ifndef __XDP_STATS_KERN_H 5 | #define __XDP_STATS_KERN_H 6 | 7 | /* Data record type 'struct datarec' is defined in common/xdp_stats_kern_user.h, 8 | * programs using this header must first include that file. 9 | */ 10 | #ifndef __XDP_STATS_KERN_USER_H 11 | #warning "You forgot to #include <../common/xdp_stats_kern_user.h>" 12 | #include <../common/xdp_stats_kern_user.h> 13 | #endif 14 | 15 | /* Keeps stats per (enum) xdp_action */ 16 | struct { 17 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 18 | __type(key, __u32); 19 | __type(value, struct datarec); 20 | __uint(max_entries, XDP_ACTION_MAX); 21 | __uint(pinning, LIBBPF_PIN_BY_NAME); 22 | } xdp_stats_map SEC(".maps"); 23 | 24 | static __always_inline 25 | __u32 xdp_stats_record_action(struct xdp_md *ctx, __u32 action) 26 | { 27 | if (action >= XDP_ACTION_MAX) 28 | return XDP_ABORTED; 29 | 30 | /* Lookup in kernel BPF-side return pointer to actual data record */ 31 | struct datarec *rec = bpf_map_lookup_elem(&xdp_stats_map, &action); 32 | if (!rec) 33 | return XDP_ABORTED; 34 | 35 | /* BPF_MAP_TYPE_PERCPU_ARRAY returns a data record specific to current 36 | * CPU and XDP hooks runs under Softirq, which makes it safe to update 37 | * without atomic operations. 38 | */ 39 | rec->rx_packets++; 40 | rec->rx_bytes += (ctx->data_end - ctx->data); 41 | 42 | return action; 43 | } 44 | 45 | #endif /* __XDP_STATS_KERN_H */ 46 | -------------------------------------------------------------------------------- /common/xdp_stats_kern_user.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | 3 | /* Used by BPF-prog kernel side BPF-progs and userspace programs, 4 | * for sharing xdp_stats common struct and DEFINEs. 5 | */ 6 | #ifndef __XDP_STATS_KERN_USER_H 7 | #define __XDP_STATS_KERN_USER_H 8 | 9 | /* This is the data record stored in the map */ 10 | struct datarec { 11 | __u64 rx_packets; 12 | __u64 rx_bytes; 13 | }; 14 | 15 | #ifndef XDP_ACTION_MAX 16 | #define XDP_ACTION_MAX (XDP_REDIRECT + 1) 17 | #endif 18 | 19 | #endif /* __XDP_STATS_KERN_USER_H */ 20 | -------------------------------------------------------------------------------- /experiment01-tailgrow/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern xdp_prog_kern2 4 | XDP_TARGETS += xdp_prog_kern3 5 | XDP_TARGETS += xdp_prog_kern4 6 | XDP_TARGETS += xdp_prog_fail1 7 | XDP_TARGETS += xdp_prog_fail2 8 | XDP_TARGETS += xdp_prog_fail3 9 | 10 | # USER_TARGETS := 11 | 12 | COMMON_DIR = ../common 13 | 14 | COPY_LOADER := xdp-loader 15 | COPY_STATS := xdp_stats 16 | EXTRA_DEPS := $(COMMON_DIR)/parsing_helpers.h 17 | 18 | include $(COMMON_DIR)/common.mk 19 | -------------------------------------------------------------------------------- /experiment01-tailgrow/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Experiment01 - Accessing data at packet end 3 | #+OPTIONS: ^:nil 4 | 5 | This example shows how to access BPF packet data at XDP =data_end=. 6 | Examples like this are needed, as the programmer needs to convince the 7 | BPF verifier that access bounds are safe. 8 | 9 | * Use-case: tail-grow timestamping 10 | 11 | The BPF helper =bpf_xdp_adjust_tail= is being extended with 12 | capabilites to grow the packet size at tail. To use this for 13 | anything, we need to demo how to access packet data at XDP =data_end=. 14 | 15 | One use-case is to *add timestamps in extended tailroom* at XDP 16 | processing time, which will survive when packet is processed by 17 | network-stack (via XDP_PASS). One way to capture this timestamp is to 18 | use =tcpdump=, which could use this to determine the time spend in 19 | network-stack (on NIC without hardware timestamps). 20 | 21 | In main example [[file:xdp_prog_kern.c]], the =xdp_tailgrow_parse= code 22 | implements this by parsing up-to the IP-layer, and using the 23 | IP-headers total-length field ([[https://elixir.bootlin.com/linux/v5.6.10/source/include/uapi/linux/ip.h#L97][iphdr->tot_len]]). See the code for the 24 | strange bounding checks needed to convince the verifier. Notice, this 25 | is limited to IPv4 ICMP packets for testing purposes. 26 | 27 | ** Side-note: extra programs 28 | 29 | Side-note: [[file:xdp_prog_kern.c]] also contains some other smaller 30 | programs to test =bpf_xdp_adjust_tail= grow works, and to benchmark 31 | the overhead when doing =XDP_TX=. Selecting others BPF programs via 32 | =xdp_loader= option =--prog== like this: 33 | 34 | #+begin_src sh 35 | sudo ./xdp_loader --dev mlx5p1 --force --prog xdp_tailgrow 36 | sudo ./xdp_loader --dev mlx5p1 --force --prog xdp_tailgrow_tx 37 | #+end_src 38 | 39 | * Alternative methods 40 | 41 | ** Works: Use loop to access data_end 42 | 43 | Code in [[file:xdp_prog_kern2.c]] shows howto find the =data_end=, 44 | *without parsing packet contents*, but by advancing a =data= position 45 | pointer one-byte at the time in a bounded loop. The bounded loop with 46 | max number of iterations allows the verifier to see the bound. (This 47 | obviously depend on the bounded loop support that was added in kernel 48 | [[https://git.kernel.org/torvalds/c/v5.3-rc1~140^2~179^2^2~5][v5.3]]). 49 | This is not very effecient, but it works. 50 | 51 | * Methods that fail 52 | 53 | Methods for accessing access BPF packet data at XDP =data_end=. 54 | 55 | ** Fail#1: Using packet length 56 | 57 | In example [[file:xdp_prog_fail1.c]], we try to use the packet length 58 | (calculated as =data_end - data=) to access the last byte as an offset 59 | added to =data=. The verifier rejects this, as the dynamic length 60 | calculation cannot be used for static analysis. 61 | 62 | #+begin_src sh 63 | sudo ./xdp_loader --dev mlx5p1 --force --file xdp_prog_fail1.o 64 | #+end_src 65 | 66 | ** Fail#2: Use data_end directly 67 | 68 | In example [[file:xdp_prog_fail2.c]], we try to use the =data_end= pointer 69 | more or less directy to find the last byte in the packet. The packet 70 | data [[https://www.mathwords.com/i/interval_notation.htm][interval]] is defined as =[data, data_end)=, meaning that the byte 71 | =data_end= is pointing is *excluded*. The example tries to access 72 | 2nd-last byte (to have a code if-construct that doesn't get removed by 73 | compiler optimizations). 74 | 75 | #+begin_src sh 76 | sudo ./xdp_loader --dev mlx5p1 --force --file xdp_prog_fail2.o 77 | #+end_src 78 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_data_access_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* Copyright (C) 2020 Authors of Cilium 3 | * 4 | * Code copied from cilium/bpf/include/bpf/ctx/xdp.h 5 | * https://github.com/cilium/cilium/blob/master/bpf/include/bpf/ctx/xdp.h 6 | */ 7 | 8 | #ifndef _XDP_DATA_ACCESS_HELPERS_H_ 9 | #define _XDP_DATA_ACCESS_HELPERS_H_ 10 | 11 | /* This must be a mask and all offsets guaranteed to be less than that. */ 12 | //#define __CTX_OFF_MAX 0xff 13 | #define __CTX_OFF_MAX 0x1fff 14 | 15 | #ifndef __maybe_unused 16 | # define __maybe_unused __attribute__((__unused__)) 17 | #endif 18 | 19 | #ifndef EINVAL 20 | # define EINVAL 22 21 | #endif 22 | 23 | #ifndef memcpy 24 | # define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n)) 25 | #endif 26 | 27 | static __always_inline __maybe_unused int 28 | xdp_load_bytes(struct xdp_md *ctx, __u64 off, void *to, const __u64 len) 29 | { 30 | void *from; 31 | int ret; 32 | /* LLVM tends to generate code that verifier doesn't understand, 33 | * so force it the way we want it in order to open up a range 34 | * on the reg. 35 | */ 36 | asm volatile("r1 = *(u32 *)(%[ctx] +0)\n\t" 37 | "r2 = *(u32 *)(%[ctx] +4)\n\t" 38 | "%[off] &= %[offmax]\n\t" 39 | "r1 += %[off]\n\t" 40 | "%[from] = r1\n\t" 41 | "r1 += %[len]\n\t" 42 | "if r1 > r2 goto +2\n\t" 43 | "%[ret] = 0\n\t" 44 | "goto +1\n\t" 45 | "%[ret] = %[errno]\n\t" 46 | : [ret]"=r"(ret), [from]"=r"(from) 47 | : [ctx]"r"(ctx), [off]"r"(off), [len]"ri"(len), 48 | [offmax]"i"(__CTX_OFF_MAX), [errno]"i"(-EINVAL) 49 | : "r1", "r2"); 50 | if (!ret) 51 | memcpy(to, from, len); 52 | return ret; 53 | } 54 | 55 | static __always_inline __maybe_unused int 56 | xdp_store_bytes(struct xdp_md *ctx, __u64 off, const void *from, 57 | const __u64 len, __u64 flags __maybe_unused) 58 | { 59 | void *to; 60 | int ret; 61 | /* See xdp_load_bytes(). */ 62 | asm volatile("r1 = *(u32 *)(%[ctx] +0)\n\t" 63 | "r2 = *(u32 *)(%[ctx] +4)\n\t" 64 | "%[off] &= %[offmax]\n\t" 65 | "r1 += %[off]\n\t" 66 | "%[to] = r1\n\t" 67 | "r1 += %[len]\n\t" 68 | "if r1 > r2 goto +2\n\t" 69 | "%[ret] = 0\n\t" 70 | "goto +1\n\t" 71 | "%[ret] = %[errno]\n\t" 72 | : [ret]"=r"(ret), [to]"=r"(to) 73 | : [ctx]"r"(ctx), [off]"r"(off), [len]"ri"(len), 74 | [offmax]"i"(__CTX_OFF_MAX), [errno]"i"(-EINVAL) 75 | : "r1", "r2"); 76 | if (!ret) 77 | memcpy(to, from, len); 78 | return ret; 79 | } 80 | 81 | #define ctx_load_bytes xdp_load_bytes 82 | #define ctx_store_bytes xdp_store_bytes 83 | 84 | #endif /* _XDP_DATA_ACCESS_HELPERS_H_ */ 85 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_fail1.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | /* 6 | * This BPF-prog will FAIL, due to verifier rejecting it. 7 | * 8 | * General idea: Use packet length to find and access last byte in 9 | * packet. The verifier cannot see this is safe, as it cannot deduce 10 | * the packet length at verification time. 11 | */ 12 | 13 | SEC("xdp_fail1") 14 | int _xdp_fail1(struct xdp_md *ctx) 15 | { 16 | void *data_end = (void *)(long)ctx->data_end; 17 | void *data = (void *)(long)ctx->data; 18 | unsigned char *ptr; 19 | void *pos; 20 | 21 | /* (Correct me if I'm wrong) 22 | * 23 | * The verifier cannot use this packet length calculation as 24 | * part of its static analysis. It chooses to use zero as the 25 | * offset value static value. 26 | */ 27 | unsigned int offset = data_end - data; 28 | 29 | pos = data; 30 | 31 | if (pos + offset > data_end) 32 | goto out; 33 | 34 | /* Fails at this line with: 35 | * "invalid access to packet, off=-1 size=1, R1(id=2,off=0,r=0)" 36 | * "R1 offset is outside of the packet" 37 | * 38 | * Because verifer used offset==0 it thinks that we are trying 39 | * to access (data - 1), which is not within [data,data_end) 40 | */ 41 | ptr = pos + (offset - sizeof(*ptr)); 42 | if (*ptr == 0xFF) 43 | return XDP_ABORTED; 44 | out: 45 | return XDP_PASS; 46 | } 47 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_fail2.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | /* 6 | * This BPF-prog will FAIL, due to verifier rejecting it. 7 | * 8 | * General idea: Use data_end point to access last (2nd-last) byte in 9 | * packet. That is not allowed by verifier, as pointer arithmetic on 10 | * pkt_end is prohibited. 11 | */ 12 | 13 | SEC("xdp_fail2") 14 | int _xdp_fail2(struct xdp_md *ctx) 15 | { 16 | void *data_end = (void *)(long)ctx->data_end; 17 | volatile unsigned char *ptr; 18 | volatile void *pos; 19 | 20 | pos = data_end; 21 | 22 | #pragma clang optimize off 23 | if (pos - 1 > data_end) 24 | goto out; 25 | #pragma clang optimize on 26 | 27 | /* Verifier fails with: "pointer arithmetic on pkt_end prohibited" 28 | */ 29 | ptr = pos - 2; 30 | if (*ptr == 0xFF) 31 | return XDP_ABORTED; 32 | out: 33 | return XDP_PASS; 34 | } 35 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_fail3.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #define MAX_PACKET_OFF 0x7fff 6 | 7 | /* This is a barrier_var() operation that makes specified variable 8 | * "a black box" for optimizing compiler. 9 | */ 10 | #define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) 11 | 12 | /* 13 | * This BPF-prog will FAIL, due to verifier rejecting it. 14 | * 15 | * General idea: Use packet length to find and access last byte in 16 | * packet. The verifier cannot see this is safe, as it cannot deduce 17 | * the packet length at verification time. 18 | */ 19 | 20 | SEC("xdp_fail3") 21 | int _xdp_fail3(struct xdp_md *ctx) 22 | { 23 | void *data_end = (void *)(long)ctx->data_end; 24 | void *data = (void *)(long)ctx->data; 25 | unsigned char *ptr; 26 | 27 | /* (Correct me if I'm wrong) 28 | * 29 | * The verifier cannot use this packet length calculation as 30 | * part of its static analysis. It chooses to use zero as the 31 | * offset value static value. 32 | */ 33 | __u64 offset = data_end - data; 34 | 35 | /* Help verifier with bounds checks */ 36 | offset = offset & MAX_PACKET_OFF; /* Give verifier max_value */ 37 | if (offset < 2) 38 | offset = 2; /* Give verifier min_value */ 39 | 40 | if (data + offset > data_end) 41 | goto out; 42 | 43 | /* Fails at this line with: 44 | * "invalid access to packet, off=-1 size=1, R1(id=2,off=0,r=0)" 45 | * "R1 offset is outside of the packet" 46 | * 47 | * Because verifer used offset==0 it thinks that we are trying 48 | * to access (data - 1), which is not within [data,data_end) 49 | */ 50 | ptr = data + (offset - sizeof(*ptr)); 51 | if (*ptr == 0xFF) 52 | return XDP_ABORTED; 53 | out: 54 | return XDP_PASS; 55 | } 56 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // The parsing helper functions from the packet01 lesson have moved here 8 | #include "../common/parsing_helpers.h" 9 | #include "../common/rewrite_helpers.h" 10 | 11 | /* Defines xdp_stats_map */ 12 | #include "../common/xdp_stats_kern_user.h" 13 | #include "../common/xdp_stats_kern.h" 14 | 15 | struct my_timestamp { 16 | __u16 magic; 17 | __u64 time; 18 | } __attribute__((packed)); 19 | 20 | SEC("xdp_tailgrow_parse") 21 | int grow_parse(struct xdp_md *ctx) 22 | { 23 | void *data_end; 24 | void *data; 25 | int action = XDP_PASS; 26 | int eth_type, ip_type; 27 | struct hdr_cursor nh; 28 | struct iphdr *iphdr; 29 | struct ethhdr *eth; 30 | __u16 ip_tot_len; 31 | 32 | struct my_timestamp *ts; 33 | 34 | /* Increase packet size (at tail) and reload data pointers */ 35 | __u8 offset = sizeof(*ts); 36 | if (bpf_xdp_adjust_tail(ctx, offset)) 37 | goto out; 38 | data_end = (void *)(long)ctx->data_end; 39 | data = (void *)(long)ctx->data; 40 | 41 | /* These keep track of the next header type and iterator pointer */ 42 | nh.pos = data; 43 | 44 | eth_type = parse_ethhdr(&nh, data_end, ð); 45 | if (eth_type < 0) { 46 | action = XDP_ABORTED; 47 | goto out; 48 | } 49 | 50 | if (eth_type == bpf_htons(ETH_P_IP)) { 51 | ip_type = parse_iphdr(&nh, data_end, &iphdr); 52 | } else { 53 | action = XDP_PASS; 54 | goto out; 55 | } 56 | 57 | /* Demo use-case: Add timestamp in extended tailroom to ICMP packets, 58 | * before sending to network-stack via XDP_PASS. This can be 59 | * captured via tcpdump, and provide earlier (XDP layer) timestamp. 60 | */ 61 | if (ip_type == IPPROTO_ICMP) { 62 | 63 | /* Packet size in bytes, including IP header and data */ 64 | ip_tot_len = bpf_ntohs(iphdr->tot_len); 65 | 66 | /* 67 | * Tricks to get pass the verifier. Being allowed to use 68 | * packet value iphdr->tot_len, involves bounding possible 69 | * values to please verifier. 70 | */ 71 | if (ip_tot_len < 2) { 72 | /* This check seems strange on unsigned ip_tot_len, 73 | * but is needed, else verifier complains: 74 | * "unbounded min value is not allowed" 75 | */ 76 | goto out; 77 | } 78 | ip_tot_len &= 0xFFF; /* Max 4095 */ 79 | 80 | /* Finding end of packet + offset, and bound access */ 81 | if ((void *)iphdr + ip_tot_len + offset > data_end) { 82 | action = XDP_ABORTED; 83 | goto out; 84 | } 85 | 86 | /* Point ts to end-of-packet, that have been offset extended */ 87 | ts = (void *)iphdr + ip_tot_len; 88 | ts->magic = 0x5354; /* String "TS" in network-byte-order */ 89 | ts->time = bpf_ktime_get_ns(); 90 | } 91 | out: 92 | return xdp_stats_record_action(ctx, action); 93 | } 94 | 95 | SEC("xdp_tailgrow") 96 | int tailgrow_pass(struct xdp_md *ctx) 97 | { 98 | int offset; 99 | 100 | offset = 10; 101 | bpf_xdp_adjust_tail(ctx, offset); 102 | return xdp_stats_record_action(ctx, XDP_PASS); 103 | } 104 | 105 | SEC("xdp_pass") 106 | int xdp_pass_func(struct xdp_md *ctx) 107 | { 108 | return xdp_stats_record_action(ctx, XDP_PASS); 109 | } 110 | 111 | /* For benchmarking tail grow overhead (does a memset)*/ 112 | SEC("xdp_tailgrow_tx") 113 | int tailgrow_tx(struct xdp_md *ctx) 114 | { 115 | int offset; 116 | 117 | offset = 32; 118 | bpf_xdp_adjust_tail(ctx, offset); 119 | return xdp_stats_record_action(ctx, XDP_TX); 120 | } 121 | 122 | /* Baseline benchmark of XDP_TX */ 123 | SEC("xdp_tx") 124 | int xdp_tx_rec(struct xdp_md *ctx) 125 | { 126 | return xdp_stats_record_action(ctx, XDP_TX); 127 | } 128 | 129 | char _license[] SEC("license") = "GPL"; 130 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_kern2.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #define MTU 1536 6 | #define MIN_LEN 14 7 | 8 | /* 9 | * This example show howto access packet last byte in XDP packet, 10 | * without parsing packet contents. 11 | * 12 | * It is not very effecient, as it advance the data pointer one-byte in a 13 | * loop until reaching data_end. This is needed as the verifier only allows 14 | * accessing data via advancing the position of the data pointer. The bounded 15 | * loop with a max number of iterations allows the verifier to see the bound. 16 | */ 17 | 18 | SEC("xdp_end_loop") 19 | int _xdp_end_loop(struct xdp_md *ctx) 20 | { 21 | void *data_end = (void *)(long)ctx->data_end; 22 | void *data = (void *)(long)ctx->data; 23 | unsigned char *ptr; 24 | unsigned int i; 25 | void *pos; 26 | 27 | /* Assume minimum length to reduce loops needed a bit */ 28 | unsigned int offset = MIN_LEN; 29 | 30 | pos = data; 31 | 32 | /* Verifier can handle this bounded 'basic-loop' construct */ 33 | for (i = 0; i < (MTU - MIN_LEN); i++ ) { 34 | if (pos + offset > data_end) { 35 | /* Promise verifier no access beyond data_end */ 36 | goto out; 37 | } 38 | if (pos + offset == data_end) { 39 | /* Found data_end, exit for-loop and read data. 40 | * 41 | * It seems strange, that finding data_end via 42 | * moving pos (data) pointer forward is needed. 43 | * This is because pointer arithmetic on pkt_end is 44 | * prohibited by verifer. 45 | * 46 | * In principle data_end points to byte that is not 47 | * accessible. Thus, accessing last readable byte 48 | * via (data_end - 1) is prohibited by verifer. 49 | */ 50 | goto read; 51 | } 52 | offset++; 53 | } 54 | /* Show verifier all other cases exit program */ 55 | goto out; 56 | 57 | read: 58 | ptr = pos + (offset - sizeof(*ptr)); /* Parentheses needed */ 59 | if (*ptr == 0xFF) 60 | return XDP_ABORTED; 61 | out: 62 | return XDP_PASS; 63 | } 64 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_kern3.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | /* This is a barrier_var() operation that makes specified variable 6 | * "a black box" for optimizing compiler. 7 | */ 8 | #define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) 9 | 10 | /* 11 | * General idea: Use packet length to find and access last byte. 12 | */ 13 | 14 | SEC("xdp_works1") 15 | int _xdp_works1(struct xdp_md *ctx) 16 | { 17 | void *data_end = (void *)(long)ctx->data_end; 18 | void *data = (void *)(long)ctx->data; 19 | unsigned char *ptr; 20 | void *pos; 21 | 22 | /* Important to understand that data_end points to the byte AFTER 23 | * the data 'where-data-ends' (e.g one byte off the end). This is 24 | * practical to calculate the length when subtracting two pointers. 25 | */ 26 | unsigned int offset = data_end - data; 27 | 28 | /* The offset now contains the byte length, but instead we want an 29 | * offset (from data pointer) that point to the last byte in the 30 | * packet. Thus, subtract one byte, but we need to stop compiler 31 | * from optimzing this (else BPF verifier will reject). 32 | */ 33 | barrier_var(offset); 34 | offset = offset - 1; 35 | 36 | offset &= 0x7FFF; /* Bound/limit max value to help verifier */ 37 | 38 | /* Explicitly use a position pointer (corresponding to data) being 39 | * moved forward, to show how verifier tracks this. 40 | */ 41 | pos = data; 42 | pos += offset; 43 | 44 | /* BPF verifier needs this step: It show that reading one byte via 45 | * position pointer 'pos' is safe. 46 | */ 47 | if (pos + 1 > data_end) 48 | return XDP_DROP; 49 | 50 | /* Access data in byte-steps via an unsigned char pointer */ 51 | ptr = pos; 52 | if (*ptr == 0xFF) /* Reads last byte before data_end */ 53 | return XDP_ABORTED; 54 | 55 | return XDP_PASS; 56 | } 57 | -------------------------------------------------------------------------------- /experiment01-tailgrow/xdp_prog_kern4.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #include "xdp_data_access_helpers.h" 6 | 7 | SEC("xdp_test1") 8 | int _xdp_test1(struct xdp_md *ctx) 9 | { 10 | // void *data_end = (void *)(long)ctx->data_end; 11 | void *data = (void *)(long)ctx->data; 12 | unsigned int len; 13 | // len = (data_end - data) - 2 ; // Not working, due to verifier 14 | len = 12; 15 | 16 | unsigned int offset = len - 2; 17 | 18 | if (ctx_store_bytes(ctx, offset, data, 2, 0) < 0) 19 | return XDP_ABORTED; 20 | 21 | return XDP_PASS; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | 2 | LIBBPF_CFLAGS:=$(if $(CFLAGS),$(CFLAGS),-g -O2 -Werror -Wall) -fPIC 3 | 4 | LIB_DIR = . 5 | LIB_INSTALL := $(LIB_DIR)/install 6 | include defines.mk 7 | 8 | SUBDIRS= 9 | 10 | all: $(OBJECT_LIBBPF) $(OBJECT_LIBXDP) 11 | @set -e; \ 12 | for i in $(SUBDIRS); \ 13 | do echo; echo " $$i"; $(MAKE) -C $$i; done 14 | 15 | .PHONY: clean 16 | clean: libbpf_clean libxdp_clean 17 | @for i in $(SUBDIRS); \ 18 | do $(MAKE) -C $$i clean; done 19 | $(Q)find $(LIB_INSTALL) -type f -not -name .gitignore -delete 20 | $(Q)find $(LIB_INSTALL) -type d -empty -delete 21 | 22 | install: 23 | install -m 0755 -d $(DESTDIR)$(HDRDIR) 24 | $(MAKE) -C libxdp install 25 | $(MAKE) -C testing install 26 | 27 | 28 | libbpf: $(OBJECT_LIBBPF) 29 | libxdp: libbpf $(OBJECT_LIBXDP) 30 | 31 | # Handle libbpf as git submodule 32 | ifeq ($(SYSTEM_LIBBPF),n) 33 | ifeq ($(VERBOSE),0) 34 | P:= >/dev/null 35 | endif 36 | 37 | # Detect submodule libbpf source file changes 38 | LIBBPF_SOURCES := $(wildcard libbpf/src/*.[ch]) 39 | 40 | $(LIB_INSTALL)/lib/libbpf.a: $(LIBBPF_SOURCES) 41 | @echo ; echo " libbpf" 42 | $(QUIET_CC)$(MAKE) -C libbpf/src CFLAGS="$(LIBBPF_CFLAGS)" $P 43 | $(QUIET_INSTALL)$(MAKE) -C libbpf/src DESTDIR=../../$(LIB_INSTALL) PREFIX= install_headers $P 44 | $(Q)cp -fp libbpf/src/libbpf.a install/lib/ 45 | 46 | .PHONY: libbpf_clean 47 | libbpf_clean: 48 | $(Q)$(MAKE) -C libbpf/src clean $P 49 | 50 | else 51 | 52 | libbpf_clean: 53 | @echo -n 54 | endif 55 | 56 | # Handle libbpf as git submodule 57 | ifeq ($(SYSTEM_LIBXDP),n) 58 | ifeq ($(VERBOSE),0) 59 | P:= >/dev/null 60 | endif 61 | 62 | # Detect submodule libbpf source file changes 63 | LIBXDP_SOURCES := $(wildcard xdp-tools/lib/libxdp/libxdp*.[ch]) xdp-tools/lib/libxdp/xsk.c 64 | 65 | 66 | $(LIB_INSTALL)/lib/libxdp.a: $(LIBXDP_SOURCES) 67 | @echo ; echo " libxdp" 68 | $(QUIET_CC)$(MAKE) -C xdp-tools BUILD_STATIC_ONLY=1 libxdp $P 69 | $(QUIET_INSTALL)$(MAKE) -C xdp-tools DESTDIR=../../../$(LIB_INSTALL) PREFIX= BUILD_STATIC_ONLY=1 libxdp_install $P 70 | 71 | .PHONY: libxdp_clean 72 | libxdp_clean: 73 | $(Q)$(MAKE) -C xdp-tools clean $P 74 | 75 | else 76 | 77 | libxdp_clean: 78 | @echo -n 79 | endif 80 | -------------------------------------------------------------------------------- /lib/defines.mk: -------------------------------------------------------------------------------- 1 | CFLAGS ?= -O2 -g 2 | BPF_CFLAGS ?= -Wno-visibility 3 | 4 | include $(LIB_DIR)/../config.mk 5 | 6 | PREFIX?=/usr/local 7 | LIBDIR?=$(PREFIX)/lib 8 | SBINDIR?=$(PREFIX)/sbin 9 | HDRDIR?=$(PREFIX)/include/xdp 10 | DATADIR?=$(PREFIX)/share 11 | MANDIR?=$(DATADIR)/man 12 | BPF_DIR_MNT ?=/sys/fs/bpf 13 | BPF_OBJECT_DIR ?=$(LIBDIR)/bpf 14 | MAX_DISPATCHER_ACTIONS ?=10 15 | 16 | # headers/ dir contains include header files needed to compile BPF programs 17 | HEADER_DIR = $(LIB_DIR)/../headers 18 | # include/ dir contains the projects own include header files 19 | INCLUDE_DIR = $(LIB_DIR)/../include 20 | TEST_DIR = $(LIB_DIR)/testing 21 | LIBBPF_DIR := $(LIB_DIR)/libbpf 22 | 23 | DEFINES := -DBPF_DIR_MNT=\"$(BPF_DIR_MNT)\" -DBPF_OBJECT_PATH=\"$(BPF_OBJECT_DIR)\" 24 | 25 | ifneq ($(PRODUCTION),1) 26 | DEFINES += -DDEBUG 27 | endif 28 | 29 | HAVE_FEATURES := 30 | 31 | CFLAGS += $(DEFINES) $(ARCH_INCLUDES) 32 | BPF_CFLAGS += $(DEFINES) $(ARCH_INCLUDES) 33 | 34 | CONFIGMK := $(LIB_DIR)/../config.mk 35 | LIBMK := Makefile $(CONFIGMK) $(LIB_DIR)/defines.mk $(LIB_DIR)/common.mk $(LIB_DIR)/util/util.mk 36 | 37 | -------------------------------------------------------------------------------- /lib/install/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !include 4 | !lib 5 | -------------------------------------------------------------------------------- /lib/install/include/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /lib/install/lib/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /packet-solutions/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern_02 xdp_prog_kern_03 tc_reply_kern_02 4 | XDP_TARGETS += xdp_vlan01_kern 5 | XDP_TARGETS += xdp_vlan02_kern 6 | USER_TARGETS := xdp_prog_user 7 | 8 | COMMON_DIR := ../common 9 | 10 | COPY_LOADER := xdp-loader 11 | COPY_STATS := xdp_stats 12 | EXTRA_DEPS := $(COMMON_DIR)/parsing_helpers.h 13 | 14 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 15 | include $(COMMON_DIR)/common.mk 16 | -------------------------------------------------------------------------------- /packet-solutions/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Packet - solutions 3 | #+OPTIONS: ^:nil 4 | 5 | This directory contains solutions to all the assignments in the 6 | [[file:../packet01-parsing/][packet01]], 7 | [[file:../packet02-rewriting/][packet02]], and 8 | [[file:../packet03-redirecting/][packet03]] lessons. 9 | 10 | * Table of Contents :TOC: 11 | - [[#solutions][Solutions]] 12 | - [[#packet01-packet-parsing][Packet01: packet parsing]] 13 | - [[#packet02-packet-rewriting][Packet02: packet rewriting]] 14 | - [[#packet03-redirecting-packets][Packet03: redirecting packets]] 15 | 16 | * Solutions 17 | 18 | ** Packet01: packet parsing 19 | 20 | *** Assignment 1: Fix the bounds checking error 21 | 22 | See the =parse_ethhdr= function from the [[file:../common/parsing_helpers.h][parsing_helpers.h]] file. 23 | 24 | *** Assignment 2: Parsing the IP header 25 | 26 | See the =parse_ip6hdr= function from the [[file:../common/parsing_helpers.h][parsing_helpers.h]] file. 27 | 28 | *** Assignment 3: Parsing the ICMPv6 header and reacting to it 29 | 30 | See the =parse_icmp6hdr= function from the [[file:../common/parsing_helpers.h][parsing_helpers.h]] 31 | file. The sequence number should be accessed as =bpf_ntohs(icmp6h->icmp6_sequence)= 32 | as it is a 2-byte value in the network order. 33 | 34 | *** Assignment 4: Adding VLAN support 35 | 36 | See the =parse_ethhdr= function from the [[file:../common/parsing_helpers.h][parsing_helpers.h]] file. 37 | 38 | *** Assignment 5: Adding IPv4 support 39 | 40 | See the =parse_iphdr= and =parse_icmphdr= functions from the [[file:../common/parsing_helpers.h][parsing_helpers.h]] file. 41 | 42 | ** Packet02: packet rewriting 43 | 44 | *** Assignment 1: Rewrite port numbers 45 | 46 | An example XDP program can be found in the =xdp_patch_ports= section in the [[file:xdp_prog_kern_02.c][xdp_prog_kern_02.c]] file. The program will decrease by one destination port number in any TCP or UDP packet. 47 | 48 | =fix_port_egress= section in the [[file:tc_reply_kern_02.c][tc_reply_kern_02.c]] file will increase by one source port number in reply packet. 49 | 50 | Steps to do before generate traffic: 51 | tc qdisc add dev eth0 clsact 52 | tc filter add dev eth0 egress bpf da obj tc_reply_kern_02.o sec tc 53 | tc filter show dev eth0 egress 54 | 55 | Clean qdisc clsact: 56 | tc qdisc del dev eth0 clsact 57 | 58 | *** Assignment 2: Remove the outermost VLAN tag 59 | 60 | See the =vlan_tag_pop= function from the [[file:../common/rewrite_helpers.h][rewrite_helpers.h]] file. 61 | An example XDP program can be found in the =xdp_vlan_swap= section in the [[file:xdp_prog_kern_02.c][xdp_prog_kern_02.c]] file. 62 | 63 | *** Assignment 3: Add back a missing VLAN tag 64 | 65 | See the =vlan_tag_push= function from the [[file:../common/rewrite_helpers.h][rewrite_helpers.h]] file. 66 | An example XDP program can be found in the =xdp_vlan_swap= section in the [[file:xdp_prog_kern_02.c][xdp_prog_kern_02.c]] file. 67 | 68 | ** Packet03: redirecting packets 69 | 70 | *** Assignment 1: Send packets back where they came from 71 | 72 | See the =xdp_icmp_echo= program in the [[file:xdp_prog_kern_03.c][xdp_prog_kern_03.c]] file. 73 | 74 | *** Assignment 2: Redirect packets between two interfaces 75 | 76 | See the =xdp_redirect= program in the [[file:xdp_prog_kern_03.c][xdp_prog_kern_03.c]] file. 77 | 78 | *** Assignment 3: Extend to a bidirectional router 79 | 80 | See the =xdp_redirect_map= program in the [[file:xdp_prog_kern_03.c][xdp_prog_kern_03.c]] file. 81 | User space part of the assignment is implemented in the [[file:xdp_prog_user.c][xdp_prog_user.c]] file. 82 | 83 | *** Assignment 4: Use the BPF helper for routing 84 | 85 | See the =xdp_router= program in the [[file:xdp_prog_kern_03.c][xdp_prog_kern_03.c]] file. 86 | User space part of the assignment is implemented in the [[file:xdp_prog_user.c][xdp_prog_user.c]] file. 87 | -------------------------------------------------------------------------------- /packet-solutions/tc_reply_kern_02.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // The parsing helper functions from the packet01 lesson have moved here 11 | #include "../common/parsing_helpers.h" 12 | 13 | SEC("tc") 14 | int _fix_port_egress(struct __sk_buff *skb) 15 | { 16 | void *data_end = (void *)(long)skb->data_end; 17 | void *data = (void *)(long)skb->data; 18 | struct hdr_cursor nh = { .pos = data }; 19 | int eth_type, ip_type, ret = TC_ACT_OK; 20 | struct ipv6hdr *ipv6hdr; 21 | struct iphdr *iphdr; 22 | struct udphdr *udphdr; 23 | struct tcphdr *tcphdr; 24 | struct ethhdr *eth; 25 | 26 | if (data + sizeof(*eth) > data_end) 27 | goto out; 28 | 29 | eth_type = parse_ethhdr(&nh, data_end, ð); 30 | if (eth_type < 0) 31 | goto out; 32 | 33 | 34 | if (eth_type == bpf_htons(ETH_P_IP)) { 35 | ip_type = parse_iphdr(&nh, data_end, &iphdr); 36 | } else if (eth_type == bpf_htons(ETH_P_IPV6)) { 37 | ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); 38 | } else { 39 | goto out; 40 | } 41 | 42 | if (ip_type == IPPROTO_UDP) { 43 | if (parse_udphdr(&nh, data_end, &udphdr) < 0) 44 | goto out; 45 | 46 | udphdr->source = bpf_htons(bpf_ntohs(udphdr->source) + 1); 47 | udphdr->check += bpf_htons(-1); 48 | if (!udphdr->check) 49 | udphdr->check += bpf_htons(-1); 50 | } else if (ip_type == IPPROTO_TCP) { 51 | if (parse_tcphdr(&nh, data_end, &tcphdr) < 0) 52 | goto out; 53 | 54 | tcphdr->source = bpf_htons(bpf_ntohs(tcphdr->source) + 1); 55 | tcphdr->check += bpf_htons(-1); 56 | if (!tcphdr->check) 57 | tcphdr->check += bpf_htons(-1); 58 | } 59 | 60 | out: 61 | return ret; 62 | } 63 | char _license[] SEC("license") = "GPL"; 64 | -------------------------------------------------------------------------------- /packet-solutions/xdp_prog_kern_02.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | // The parsing helper functions from the packet01 lesson have moved here 10 | #include "../common/parsing_helpers.h" 11 | #include "../common/rewrite_helpers.h" 12 | 13 | /* Defines xdp_stats_map */ 14 | #include "../common/xdp_stats_kern_user.h" 15 | #include "../common/xdp_stats_kern.h" 16 | 17 | /* 18 | * Solution to the assignment 1 in lesson packet02 19 | */ 20 | SEC("xdp_patch_ports") 21 | int xdp_patch_ports_func(struct xdp_md *ctx) 22 | { 23 | int action = XDP_PASS; 24 | int eth_type, ip_type; 25 | struct ethhdr *eth; 26 | struct iphdr *iphdr; 27 | struct ipv6hdr *ipv6hdr; 28 | struct udphdr *udphdr; 29 | struct tcphdr *tcphdr; 30 | void *data_end = (void *)(long)ctx->data_end; 31 | void *data = (void *)(long)ctx->data; 32 | struct hdr_cursor nh = { .pos = data }; 33 | 34 | eth_type = parse_ethhdr(&nh, data_end, ð); 35 | if (eth_type < 0) { 36 | action = XDP_ABORTED; 37 | goto out; 38 | } 39 | 40 | if (eth_type == bpf_htons(ETH_P_IP)) { 41 | ip_type = parse_iphdr(&nh, data_end, &iphdr); 42 | } else if (eth_type == bpf_htons(ETH_P_IPV6)) { 43 | ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); 44 | } else { 45 | goto out; 46 | } 47 | 48 | if (ip_type == IPPROTO_UDP) { 49 | if (parse_udphdr(&nh, data_end, &udphdr) < 0) { 50 | action = XDP_ABORTED; 51 | goto out; 52 | } 53 | 54 | /* 55 | * We need to update the packet checksum when modifying the header. 56 | * RFC1071 contains an algorithm for in-place updating, which is what we use here 57 | * since we're always just decrementing the port number. Another option would be 58 | * to recompute the full checksum, like: 59 | * 60 | * struct udphdr udphdr_old; 61 | * __u32 csum = udphdr->check; 62 | * udphdr_old = *udphdr; 63 | * udphdr->dest = bpf_htons(bpf_ntohs(udphdr->dest) - 1); 64 | * csum = bpf_csum_diff((__be32 *)&udphdr_old, 4, (__be32 *)udphdr, 4, ~csum); 65 | * udphdr->check = csum_fold_helper(csum); 66 | */ 67 | 68 | udphdr->dest = bpf_htons(bpf_ntohs(udphdr->dest) - 1); 69 | udphdr->check += bpf_htons(1); 70 | if (!udphdr->check) 71 | udphdr->check += bpf_htons(1); 72 | } else if (ip_type == IPPROTO_TCP) { 73 | if (parse_tcphdr(&nh, data_end, &tcphdr) < 0) { 74 | action = XDP_ABORTED; 75 | goto out; 76 | } 77 | tcphdr->dest = bpf_htons(bpf_ntohs(tcphdr->dest) - 1); 78 | tcphdr->check += bpf_htons(1); 79 | if (!tcphdr->check) 80 | tcphdr->check += bpf_htons(1); 81 | } 82 | 83 | out: 84 | return xdp_stats_record_action(ctx, action); 85 | } 86 | 87 | /* 88 | * Solution to the assignments 2 and 3 in lesson packet02: Will pop outermost 89 | * VLAN tag if it exists, otherwise push a new one with ID 1 90 | */ 91 | SEC("xdp_vlan_swap") 92 | int xdp_vlan_swap_func(struct xdp_md *ctx) 93 | { 94 | void *data_end = (void *)(long)ctx->data_end; 95 | void *data = (void *)(long)ctx->data; 96 | 97 | /* These keep track of the next header type and iterator pointer */ 98 | struct hdr_cursor nh; 99 | int nh_type; 100 | nh.pos = data; 101 | 102 | struct ethhdr *eth; 103 | nh_type = parse_ethhdr(&nh, data_end, ð); 104 | if (nh_type < 0) 105 | return XDP_PASS; 106 | 107 | if (proto_is_vlan(eth->h_proto)) 108 | vlan_tag_pop(ctx, eth); 109 | else 110 | vlan_tag_push(ctx, eth, 1); 111 | 112 | return XDP_PASS; 113 | } 114 | 115 | SEC("xdp_pass") 116 | int xdp_pass_func(struct xdp_md *ctx) 117 | { 118 | return XDP_PASS; 119 | } 120 | 121 | char _license[] SEC("license") = "GPL"; 122 | -------------------------------------------------------------------------------- /packet-solutions/xdp_prog_user.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | 3 | static const char *__doc__ = "XDP redirect helper\n" 4 | " - Allows to populate/query tx_port and redirect_params maps\n"; 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include /* depend on kernel-headers installed */ 23 | 24 | #include "../common/common_params.h" 25 | #include "../common/common_user_bpf_xdp.h" 26 | #include "../common/common_libbpf.h" 27 | 28 | #include "../common/xdp_stats_kern_user.h" 29 | 30 | static const struct option_wrapper long_options[] = { 31 | 32 | {{"help", no_argument, NULL, 'h' }, 33 | "Show help", false}, 34 | 35 | {{"dev", required_argument, NULL, 'd' }, 36 | "Operate on device ", "", true}, 37 | 38 | {{"redirect-dev", required_argument, NULL, 'r' }, 39 | "Redirect to device ", "", true}, 40 | 41 | {{"src-mac", required_argument, NULL, 'L' }, 42 | "Source MAC address of ", "", true }, 43 | 44 | {{"dest-mac", required_argument, NULL, 'R' }, 45 | "Destination MAC address of ", "", true }, 46 | 47 | {{"quiet", no_argument, NULL, 'q' }, 48 | "Quiet mode (no output)"}, 49 | 50 | {{0, 0, NULL, 0 }, NULL, false} 51 | }; 52 | 53 | static int parse_u8(char *str, unsigned char *x) 54 | { 55 | unsigned long z; 56 | 57 | z = strtoul(str, 0, 16); 58 | if (z > 0xff) 59 | return -1; 60 | 61 | if (x) 62 | *x = z; 63 | 64 | return 0; 65 | } 66 | 67 | static int parse_mac(char *str, unsigned char mac[ETH_ALEN]) 68 | { 69 | if (parse_u8(str, &mac[0]) < 0) 70 | return -1; 71 | if (parse_u8(str + 3, &mac[1]) < 0) 72 | return -1; 73 | if (parse_u8(str + 6, &mac[2]) < 0) 74 | return -1; 75 | if (parse_u8(str + 9, &mac[3]) < 0) 76 | return -1; 77 | if (parse_u8(str + 12, &mac[4]) < 0) 78 | return -1; 79 | if (parse_u8(str + 15, &mac[5]) < 0) 80 | return -1; 81 | 82 | return 0; 83 | } 84 | 85 | static int write_iface_params(int map_fd, unsigned char *src, unsigned char *dest) 86 | { 87 | if (bpf_map_update_elem(map_fd, src, dest, 0) < 0) { 88 | fprintf(stderr, 89 | "WARN: Failed to update bpf map file: err(%d):%s\n", 90 | errno, strerror(errno)); 91 | return -1; 92 | } 93 | 94 | printf("forward: %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x\n", 95 | src[0], src[1], src[2], src[3], src[4], src[5], 96 | dest[0], dest[1], dest[2], dest[3], dest[4], dest[5] 97 | ); 98 | 99 | return 0; 100 | } 101 | 102 | #ifndef PATH_MAX 103 | #define PATH_MAX 4096 104 | #endif 105 | 106 | const char *pin_basedir = "/sys/fs/bpf"; 107 | 108 | int main(int argc, char **argv) 109 | { 110 | int i; 111 | int len; 112 | int map_fd; 113 | bool redirect_map; 114 | char pin_dir[PATH_MAX]; 115 | unsigned char src[ETH_ALEN]; 116 | unsigned char dest[ETH_ALEN]; 117 | 118 | struct config cfg = { 119 | .ifindex = -1, 120 | .redirect_ifindex = -1, 121 | }; 122 | 123 | /* Cmdline options can change progname */ 124 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 125 | 126 | redirect_map = (cfg.ifindex > 0) && (cfg.redirect_ifindex > 0); 127 | 128 | if (cfg.redirect_ifindex > 0 && cfg.ifindex == -1) { 129 | fprintf(stderr, "ERR: required option --dev missing\n\n"); 130 | usage(argv[0], __doc__, long_options, (argc == 1)); 131 | return EXIT_FAIL_OPTION; 132 | } 133 | 134 | len = snprintf(pin_dir, PATH_MAX, "%s/%s", pin_basedir, cfg.ifname); 135 | if (len < 0) { 136 | fprintf(stderr, "ERR: creating pin dirname\n"); 137 | return EXIT_FAIL_OPTION; 138 | } 139 | 140 | if (parse_mac(cfg.src_mac, src) < 0) { 141 | fprintf(stderr, "ERR: can't parse mac address %s\n", cfg.src_mac); 142 | return EXIT_FAIL_OPTION; 143 | } 144 | 145 | if (parse_mac(cfg.dest_mac, dest) < 0) { 146 | fprintf(stderr, "ERR: can't parse mac address %s\n", cfg.dest_mac); 147 | return EXIT_FAIL_OPTION; 148 | } 149 | 150 | /* Open the tx_port map corresponding to the cfg.ifname interface */ 151 | map_fd = open_bpf_map_file(pin_dir, "tx_port", NULL); 152 | if (map_fd < 0) { 153 | return EXIT_FAIL_BPF; 154 | } 155 | 156 | printf("map dir: %s\n", pin_dir); 157 | 158 | if (redirect_map) { 159 | /* setup a virtual port for the static redirect */ 160 | i = 0; 161 | bpf_map_update_elem(map_fd, &i, &cfg.redirect_ifindex, 0); 162 | printf("redirect from ifnum=%d to ifnum=%d\n", cfg.ifindex, cfg.redirect_ifindex); 163 | 164 | /* Open the redirect_params map */ 165 | map_fd = open_bpf_map_file(pin_dir, "redirect_params", NULL); 166 | if (map_fd < 0) { 167 | return EXIT_FAIL_BPF; 168 | } 169 | 170 | /* Setup the mapping containing MAC addresses */ 171 | if (write_iface_params(map_fd, src, dest) < 0) { 172 | fprintf(stderr, "can't write iface params\n"); 173 | return 1; 174 | } 175 | } 176 | 177 | return EXIT_OK; 178 | } 179 | -------------------------------------------------------------------------------- /packet-solutions/xdp_vlan01_kern.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | /* NOTICE: Re-defining VLAN header levels to parse */ 9 | #define VLAN_MAX_DEPTH 8 10 | //#include "../common/parsing_helpers.h" 11 | /* 12 | * NOTICE: Copied over parts of ../common/parsing_helpers.h 13 | * to make it easier to point out compiler optimizations 14 | */ 15 | 16 | /* Header cursor to keep track of current parsing position */ 17 | struct hdr_cursor { 18 | void *pos; 19 | }; 20 | 21 | static __always_inline int proto_is_vlan(__u16 h_proto) 22 | { 23 | return !!(h_proto == bpf_htons(ETH_P_8021Q) || 24 | h_proto == bpf_htons(ETH_P_8021AD)); 25 | } 26 | 27 | /* 28 | * struct vlan_hdr - vlan header 29 | * @h_vlan_TCI: priority and VLAN ID 30 | * @h_vlan_encapsulated_proto: packet type ID or len 31 | */ 32 | struct vlan_hdr { 33 | __be16 h_vlan_TCI; 34 | __be16 h_vlan_encapsulated_proto; /* NOTICE: unsigned type */ 35 | }; 36 | 37 | /* Notice, parse_ethhdr() will skip VLAN tags, by advancing nh->pos and returns 38 | * next header EtherType, BUT the ethhdr pointer supplied still points to the 39 | * Ethernet header. Thus, caller can look at eth->h_proto to see if this was a 40 | * VLAN tagged packet. 41 | */ 42 | static __always_inline int parse_ethhdr(struct hdr_cursor *nh, void *data_end, 43 | struct ethhdr **ethhdr) 44 | { 45 | struct ethhdr *eth = nh->pos; 46 | int hdrsize = sizeof(*eth); 47 | struct vlan_hdr *vlh; 48 | __u16 h_proto; 49 | int i; 50 | 51 | /* Byte-count bounds check; check if current pointer + size of header 52 | * is after data_end. 53 | */ 54 | if (nh->pos + hdrsize > data_end) 55 | return -1; 56 | 57 | nh->pos += hdrsize; 58 | *ethhdr = eth; 59 | vlh = nh->pos; 60 | h_proto = eth->h_proto; 61 | 62 | /* Use loop unrolling to avoid the verifier restriction on loops; 63 | * support up to VLAN_MAX_DEPTH layers of VLAN encapsulation. 64 | */ 65 | #pragma unroll 66 | for (i = 0; i < VLAN_MAX_DEPTH; i++) { 67 | if (!proto_is_vlan(h_proto)) 68 | break; 69 | 70 | if (vlh + 1 > data_end) 71 | break; 72 | 73 | h_proto = vlh->h_vlan_encapsulated_proto; 74 | vlh++; 75 | } 76 | 77 | nh->pos = vlh; 78 | return h_proto; /* network-byte-order */ 79 | } 80 | 81 | SEC("xdp_vlan01") 82 | int xdp_vlan_01(struct xdp_md *ctx) 83 | { 84 | void *data_end = (void *)(long)ctx->data_end; 85 | void *data = (void *)(long)ctx->data; 86 | 87 | /* These keep track of the next header type and iterator pointer */ 88 | struct hdr_cursor nh; 89 | int nh_type; 90 | nh.pos = data; 91 | 92 | struct ethhdr *eth; 93 | nh_type = parse_ethhdr(&nh, data_end, ð); 94 | if (nh_type < 0) 95 | return XDP_ABORTED; 96 | 97 | /* The LLVM compiler is very clever, and will remove above walking of 98 | * VLAN headers (the loop unroll). 99 | * 100 | * The returned value nh_type, variable (__u16) h_proto in 101 | * parse_ethhdr(), is only compared against a negative value (signed). 102 | * The compile see that it can remove the VLAN loop, because: 103 | * 1. h_proto = vlh->h_vlan_encapsulated_proto can only be >= 0 104 | * 2. we never read nh->pos (so it removes nh->pos = vlh;). 105 | */ 106 | 107 | /* Accessing eth pointer is still valid after compiler optimization */ 108 | if (proto_is_vlan(eth->h_proto)) 109 | return XDP_DROP; 110 | 111 | /* Hint: to inspect BPF byte-code run: 112 | * llvm-objdump -S xdp_vlan01_kern.o 113 | */ 114 | return XDP_PASS; 115 | } 116 | -------------------------------------------------------------------------------- /packet-solutions/xdp_vlan02_kern.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | /* NOTICE: Re-defining VLAN header levels to parse */ 10 | #define VLAN_MAX_DEPTH 10 11 | #include "../common/parsing_helpers.h" 12 | 13 | #if 0 14 | #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ 15 | struct collect_vlans { 16 | __u16 id[VLAN_MAX_DEPTH]; 17 | }; 18 | #endif 19 | 20 | #if 0 /* moved to parsing_helpers.h */ 21 | /* Based on parse_ethhdr() */ 22 | static __always_inline int __parse_ethhdr_vlan(struct hdr_cursor *nh, 23 | void *data_end, 24 | struct ethhdr **ethhdr, 25 | struct collect_vlans *vlans) 26 | { 27 | struct ethhdr *eth = nh->pos; 28 | int hdrsize = sizeof(*eth); 29 | struct vlan_hdr *vlh; 30 | __u16 h_proto; 31 | int i; 32 | 33 | /* Byte-count bounds check; check if current pointer + size of header 34 | * is after data_end. 35 | */ 36 | if (nh->pos + hdrsize > data_end) 37 | return -1; 38 | 39 | nh->pos += hdrsize; 40 | *ethhdr = eth; 41 | vlh = nh->pos; 42 | h_proto = eth->h_proto; 43 | 44 | /* Use loop unrolling to avoid the verifier restriction on loops; 45 | * support up to VLAN_MAX_DEPTH layers of VLAN encapsulation. 46 | */ 47 | #pragma unroll 48 | for (i = 0; i < VLAN_MAX_DEPTH; i++) { 49 | if (!proto_is_vlan(h_proto)) 50 | break; 51 | 52 | if (vlh + 1 > data_end) 53 | break; 54 | 55 | h_proto = vlh->h_vlan_encapsulated_proto; 56 | if (vlans) { 57 | vlans->id[i] = 58 | bpf_ntohs(vlh->h_vlan_TCI) & VLAN_VID_MASK; 59 | } 60 | vlh++; 61 | } 62 | 63 | nh->pos = vlh; 64 | return h_proto; /* network-byte-order */ 65 | } 66 | #endif 67 | 68 | SEC("xdp_vlan02") 69 | int xdp_vlan_02(struct xdp_md *ctx) 70 | { 71 | void *data_end = (void *)(long)ctx->data_end; 72 | void *data = (void *)(long)ctx->data; 73 | 74 | /* These keep track of the next header type and iterator pointer */ 75 | struct hdr_cursor nh; 76 | int eth_type; 77 | nh.pos = data; 78 | 79 | struct collect_vlans vlans; 80 | 81 | struct ethhdr *eth; 82 | 83 | eth_type = parse_ethhdr_vlan(&nh, data_end, ð, &vlans); 84 | if (eth_type < 0) 85 | return XDP_ABORTED; 86 | /* The eth_type have skipped VLAN-types, but collected VLAN ids. The 87 | * eth ptr still points to Ethernet header, thus to check if this is a 88 | * VLAN packet do proto_is_vlan(eth->h_proto). 89 | */ 90 | 91 | /* The LLVM compiler is very clever, it sees that program only access 92 | * 2nd "inner" vlan (array index 1), and only does loop unroll of 2, and 93 | * only does the VLAN_VID_MASK in the 2nd "inner" vlan case. 94 | */ 95 | if (vlans.id[1] == 42) 96 | return XDP_ABORTED; 97 | 98 | /* If using eth_type (even compare against zero), it will cause full 99 | * loop unroll and walking all VLANs (for VLAN_MAX_DEPTH). Still only 100 | * "inner" VLAN is masked out. 101 | */ 102 | #if 0 103 | if (eth_type == 0) 104 | return XDP_PASS; 105 | #endif 106 | 107 | /* Unless we only want to manipulate VLAN, then next step will naturally 108 | * be parsing the next L3 headers. This (also) cause compiler to create 109 | * VLAN loop, as this uses nh->pos 110 | */ 111 | #if 0 112 | int ip_type; 113 | struct iphdr *iphdr; 114 | if (eth_type == bpf_htons(ETH_P_IP)) { 115 | ip_type = parse_iphdr(&nh, data_end, &iphdr); 116 | if (eth_type < 0) 117 | return XDP_ABORTED; 118 | 119 | if (ip_type == IPPROTO_UDP) 120 | return XDP_DROP; 121 | } 122 | #endif 123 | /* Hint: to inspect BPF byte-code run: 124 | * llvm-objdump --no-show-raw-insn -S xdp_vlan02_kern.o 125 | */ 126 | return XDP_PASS; 127 | } 128 | -------------------------------------------------------------------------------- /packet01-parsing/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern 4 | USER_TARGETS := 5 | 6 | COMMON_DIR := ../common 7 | 8 | COPY_LOADER := xdp-loader 9 | COPY_STATS := xdp_stats 10 | 11 | include $(COMMON_DIR)/common.mk 12 | -------------------------------------------------------------------------------- /packet01-parsing/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | /* Defines xdp_stats_map from packet04 */ 12 | #include "../common/xdp_stats_kern_user.h" 13 | #include "../common/xdp_stats_kern.h" 14 | 15 | /* Header cursor to keep track of current parsing position */ 16 | struct hdr_cursor { 17 | void *pos; 18 | }; 19 | 20 | /* Packet parsing helpers. 21 | * 22 | * Each helper parses a packet header, including doing bounds checking, and 23 | * returns the type of its contents if successful, and -1 otherwise. 24 | * 25 | * For Ethernet and IP headers, the content type is the type of the payload 26 | * (h_proto for Ethernet, nexthdr for IPv6), for ICMP it is the ICMP type field. 27 | * All return values are in network byte order. 28 | */ 29 | static __always_inline int parse_ethhdr(struct hdr_cursor *nh, 30 | void *data_end, 31 | struct ethhdr **ethhdr) 32 | { 33 | struct ethhdr *eth = nh->pos; 34 | int hdrsize = sizeof(*eth); 35 | 36 | /* Byte-count bounds check; check if current pointer + size of header 37 | * is after data_end. 38 | */ 39 | if (nh->pos + 1 > data_end) 40 | return -1; 41 | 42 | nh->pos += hdrsize; 43 | *ethhdr = eth; 44 | 45 | return eth->h_proto; /* network-byte-order */ 46 | } 47 | 48 | /* Assignment 2: Implement and use this */ 49 | /*static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, 50 | void *data_end, 51 | struct ipv6hdr **ip6hdr) 52 | { 53 | }*/ 54 | 55 | /* Assignment 3: Implement and use this */ 56 | /*static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh, 57 | void *data_end, 58 | struct icmp6hdr **icmp6hdr) 59 | { 60 | }*/ 61 | 62 | SEC("xdp") 63 | int xdp_parser_func(struct xdp_md *ctx) 64 | { 65 | void *data_end = (void *)(long)ctx->data_end; 66 | void *data = (void *)(long)ctx->data; 67 | struct ethhdr *eth; 68 | 69 | /* Default action XDP_PASS, imply everything we couldn't parse, or that 70 | * we don't want to deal with, we just pass up the stack and let the 71 | * kernel deal with it. 72 | */ 73 | __u32 action = XDP_PASS; /* Default action */ 74 | 75 | /* These keep track of the next header type and iterator pointer */ 76 | struct hdr_cursor nh; 77 | int nh_type; 78 | 79 | /* Start next header cursor position at data start */ 80 | nh.pos = data; 81 | 82 | /* Packet parsing in steps: Get each header one at a time, aborting if 83 | * parsing fails. Each helper function does sanity checking (is the 84 | * header type in the packet correct?), and bounds checking. 85 | */ 86 | nh_type = parse_ethhdr(&nh, data_end, ð); 87 | if (nh_type != bpf_htons(ETH_P_IPV6)) 88 | goto out; 89 | 90 | /* Assignment additions go below here */ 91 | 92 | action = XDP_DROP; 93 | out: 94 | return xdp_stats_record_action(ctx, action); /* read via xdp_stats */ 95 | } 96 | 97 | char _license[] SEC("license") = "GPL"; 98 | -------------------------------------------------------------------------------- /packet02-rewriting/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern 4 | USER_TARGETS := 5 | 6 | COMMON_DIR := ../common 7 | 8 | COPY_LOADER := xdp-loader 9 | COPY_STATS := xdp_stats 10 | EXTRA_DEPS := $(COMMON_DIR)/parsing_helpers.h 11 | 12 | include $(COMMON_DIR)/common.mk 13 | 14 | -------------------------------------------------------------------------------- /packet02-rewriting/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Packet02 - packet rewriting 3 | #+OPTIONS: ^:nil 4 | 5 | Having completed the packet parsing lesson in packet01, you are now familiar 6 | with how to structure packet parsing, how to make sure you do proper bounds 7 | checking before referencing packet data, and how to decide the final packet 8 | verdict with return codes. In this lesson we build on this to show how to 9 | modify the packet contents. 10 | 11 | * Table of Contents :TOC: 12 | - [[#what-you-will-learn-in-this-lesson][What you will learn in this lesson]] 13 | - [[#rewriting-packet-data-with-direct-memory-access][Rewriting packet data with direct memory access]] 14 | - [[#enlarging-and-shrinking-packet-size][Enlarging and shrinking packet size]] 15 | - [[#assignments][Assignments]] 16 | - [[#assignment-1-rewrite-port-numbers][Assignment 1: Rewrite port numbers]] 17 | - [[#assignment-2-remove-the-outermost-vlan-tag][Assignment 2: Remove the outermost VLAN tag]] 18 | - [[#assignment-3-add-back-a-missing-vlan-tag][Assignment 3: Add back a missing VLAN tag]] 19 | 20 | * What you will learn in this lesson 21 | 22 | ** Rewriting packet data with direct memory access 23 | 24 | As we saw in the previous lesson, the verifier will check that all memory 25 | accesses to packet data first perform correct bounds checking to make sure 26 | it doesn't reference memory outside the packet. This applies not only to 27 | packet data reads, but also to writes; which means that we can rewrite 28 | packet data simply by changing the memory it occupies. We will use this in 29 | the assignments below to modify packet header fields. 30 | 31 | ** Enlarging and shrinking packet size 32 | 33 | While many things can be accomplished simply by rewriting existing packet 34 | data, sometimes it is necessary to add or remove chunks of memory entirely, 35 | for instance to perform encapsulation, or to remove protocol headers from a 36 | packet. The kernel exposes an eBPF helper function to achieve this, which is 37 | called =bpf_xdp_adjust_head()=. This function takes the XDP context object 38 | and an adjustment size as parameter, and will move the header pointer by 39 | this many bytes (i.e., a positive number will shrink the size of the packet 40 | data, while a negative number will add that many bytes to the front of the 41 | packet). 42 | 43 | There are a few things to be aware of when using this helper: 44 | 45 | 1. First, it may fail either because the adjustment would make the packet 46 | data too small to contain at least an Ethernet header, or because there 47 | is not enough space in memory before the start of the packet (packet data 48 | is put at a fixed offset from the start of the memory page). 49 | 50 | 2. Second, the helper only adjusts the data pointer. It is up to the XDP 51 | program to ensure that the packet data is valid afterwards. This typically 52 | involves rewriting the Ethernet header at the new start of the packet 53 | location, and adjusting any subsequent header fields to match. 54 | 55 | 3. Finally, the verifier will discard all information about previous bounds 56 | checks after the packet size has been adjusted. This means that the XDP 57 | program needs to perform new bounds checks *including re-evaluating the 58 | data and data_end pointers* after adjusting the packet size. 59 | 60 | There is also a =bpf_xdp_adjust_tail()= which can be used to move the end of 61 | the packet data. From kernel v5.8 it functions identically to 62 | =bpf_xdp_adjust_head()=, before that it was only possible to shrink the 63 | packet at the tail, not grow it. 64 | 65 | * Assignments 66 | 67 | In this lesson we will be creating two programs: One that rewrites the 68 | destination port number of TCP and UDP packets to be one less than its 69 | original. And another that removes the outermost VLAN encapsulation header 70 | if one exists, or add a new one if it doesn't. 71 | 72 | ** Assignment 1: Rewrite port numbers 73 | 74 | For this assignment you will need to parse the TCP and UDP headers and 75 | rewrite the port number before passing on the packet. These headers are 76 | defined in == and ==, respectively. 77 | 78 | Rewriting is simply a matter of writing to the right field in the header 79 | (after parsing it). E.g.: 80 | 81 | #+begin_src C 82 | udphdr->dest = bpf_htons(bpf_ntohs(udphdr->dest) - 1); 83 | #+end_src 84 | 85 | You can use =tcpdump= to verify that this works. As a packet generator you 86 | can use the =socat= utility. The following will generate a UDP packet to 87 | port 2000 for each line you type on stdin: 88 | 89 | #+begin_example 90 | $ t exec -- socat - 'udp6:[fc00:dead:cafe:1::1]:2000' 91 | #+end_example 92 | 93 | You can view these with =tcpdump=: 94 | 95 | #+begin_example 96 | $ t tcpdump 97 | tcpdump: verbose output suppressed, use -v or -vv for full protocol decode 98 | listening on xdptut-3c93, link-type EN10MB (Ethernet), capture size 262144 bytes 99 | 12:54:31.085948 d2:9e:c0:4f:3b:7b > 32:71:5a:a4:74:c1, ethertype IPv6 (0x86dd), length 67: fc00:dead:cafe:1::2.35126 > fc00:dead:cafe:1::1.2000: UDP, length 5 100 | #+end_example 101 | 102 | When your program is working correctly, the destination port (2000 near the 103 | end of the line) should be 1999 instead. 104 | 105 | ** Assignment 2: Remove the outermost VLAN tag 106 | 107 | In this assignment we will start implementing the program that removes the 108 | outermost VLAN tag if one exists. To do this, fill in the =vlan_tag_pop()= 109 | function that is prototyped in [[file:xdp_prog_kern.c]]. The function prototype 110 | contains the variable definitions and inline comments from our solution to 111 | the issue, to guide you in the implementation. 112 | 113 | Once you have implemented the function, test that it works by setting up a 114 | test environment with VLANs enabled (like in the previous lesson), and run 115 | =t ping --vlan= in one window, while looking at the output of =t tcpdump= in 116 | another. You should see no VLAN tags on the echo request packets; the echo 117 | replies will still have VLAN tags, because the kernel will reply to the ping 118 | even though it is targeting a different interface, and the replies will be 119 | routed out the interface that actually has the IP address being pinged 120 | (i.e., the virtual VLAN interface). 121 | 122 | ** Assignment 3: Add back a missing VLAN tag 123 | 124 | In this assignment we will implement the opposite of the previous one: I.e., 125 | the code that adds a VLAN tag if none exists. Just hardcode the VLAN ID to a 126 | value of your choosing; and test the program the same way as with the 127 | previous assignment (but run =t ping= without the =--vlan= parameter, and 128 | verify that the ICMP echo request packets do have a VLAN tag added to them). 129 | -------------------------------------------------------------------------------- /packet02-rewriting/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // The parsing helper functions from the packet01 lesson have moved here 8 | #include "../common/parsing_helpers.h" 9 | 10 | /* Defines xdp_stats_map */ 11 | #include "../common/xdp_stats_kern_user.h" 12 | #include "../common/xdp_stats_kern.h" 13 | 14 | /* Pops the outermost VLAN tag off the packet. Returns the popped VLAN ID on 15 | * success or -1 on failure. 16 | */ 17 | static __always_inline int vlan_tag_pop(struct xdp_md *ctx, struct ethhdr *eth) 18 | { 19 | /* 20 | void *data_end = (void *)(long)ctx->data_end; 21 | struct ethhdr eth_cpy; 22 | struct vlan_hdr *vlh; 23 | __be16 h_proto; 24 | */ 25 | int vlid = -1; 26 | 27 | /* Check if there is a vlan tag to pop */ 28 | 29 | /* Still need to do bounds checking */ 30 | 31 | /* Save vlan ID for returning, h_proto for updating Ethernet header */ 32 | 33 | /* Make a copy of the outer Ethernet header before we cut it off */ 34 | 35 | /* Actually adjust the head pointer */ 36 | 37 | /* Need to re-evaluate data *and* data_end and do new bounds checking 38 | * after adjusting head 39 | */ 40 | 41 | /* Copy back the old Ethernet header and update the proto type */ 42 | 43 | 44 | return vlid; 45 | } 46 | 47 | /* Pushes a new VLAN tag after the Ethernet header. Returns 0 on success, 48 | * -1 on failure. 49 | */ 50 | static __always_inline int vlan_tag_push(struct xdp_md *ctx, 51 | struct ethhdr *eth, int vlid) 52 | { 53 | return 0; 54 | } 55 | 56 | /* Implement assignment 1 in this section */ 57 | SEC("xdp") 58 | int xdp_port_rewrite_func(struct xdp_md *ctx) 59 | { 60 | return XDP_PASS; 61 | } 62 | 63 | /* VLAN swapper; will pop outermost VLAN tag if it exists, otherwise push a new 64 | * one with ID 1. Use this for assignments 2 and 3. 65 | */ 66 | SEC("xdp") 67 | int xdp_vlan_swap_func(struct xdp_md *ctx) 68 | { 69 | void *data_end = (void *)(long)ctx->data_end; 70 | void *data = (void *)(long)ctx->data; 71 | 72 | /* These keep track of the next header type and iterator pointer */ 73 | struct hdr_cursor nh; 74 | int nh_type; 75 | nh.pos = data; 76 | 77 | struct ethhdr *eth; 78 | nh_type = parse_ethhdr(&nh, data_end, ð); 79 | if (nh_type < 0) 80 | return XDP_PASS; 81 | 82 | /* Assignment 2 and 3 will implement these. For now they do nothing */ 83 | if (proto_is_vlan(eth->h_proto)) 84 | vlan_tag_pop(ctx, eth); 85 | else 86 | vlan_tag_push(ctx, eth, 1); 87 | 88 | return XDP_PASS; 89 | } 90 | 91 | /* Solution to the parsing exercise in lesson packet01. Handles VLANs and legacy 92 | * IP (via the helpers in parsing_helpers.h). 93 | */ 94 | SEC("xdp") 95 | int xdp_parser_func(struct xdp_md *ctx) 96 | { 97 | void *data_end = (void *)(long)ctx->data_end; 98 | void *data = (void *)(long)ctx->data; 99 | 100 | /* Default action XDP_PASS, imply everything we couldn't parse, or that 101 | * we don't want to deal with, we just pass up the stack and let the 102 | * kernel deal with it. 103 | */ 104 | __u32 action = XDP_PASS; /* Default action */ 105 | 106 | /* These keep track of the next header type and iterator pointer */ 107 | struct hdr_cursor nh; 108 | int nh_type; 109 | nh.pos = data; 110 | 111 | struct ethhdr *eth; 112 | 113 | /* Packet parsing in steps: Get each header one at a time, aborting if 114 | * parsing fails. Each helper function does sanity checking (is the 115 | * header type in the packet correct?), and bounds checking. 116 | */ 117 | nh_type = parse_ethhdr(&nh, data_end, ð); 118 | 119 | if (nh_type == bpf_htons(ETH_P_IPV6)) { 120 | struct ipv6hdr *ip6h; 121 | struct icmp6hdr *icmp6h; 122 | 123 | nh_type = parse_ip6hdr(&nh, data_end, &ip6h); 124 | if (nh_type != IPPROTO_ICMPV6) 125 | goto out; 126 | 127 | nh_type = parse_icmp6hdr(&nh, data_end, &icmp6h); 128 | if (nh_type != ICMPV6_ECHO_REQUEST) 129 | goto out; 130 | 131 | if (bpf_ntohs(icmp6h->icmp6_sequence) % 2 == 0) 132 | action = XDP_DROP; 133 | 134 | } else if (nh_type == bpf_htons(ETH_P_IP)) { 135 | struct iphdr *iph; 136 | struct icmphdr *icmph; 137 | 138 | nh_type = parse_iphdr(&nh, data_end, &iph); 139 | if (nh_type != IPPROTO_ICMP) 140 | goto out; 141 | 142 | nh_type = parse_icmphdr(&nh, data_end, &icmph); 143 | if (nh_type != ICMP_ECHO) 144 | goto out; 145 | 146 | if (bpf_ntohs(icmph->un.echo.sequence) % 2 == 0) 147 | action = XDP_DROP; 148 | } 149 | out: 150 | return xdp_stats_record_action(ctx, action); 151 | } 152 | 153 | char _license[] SEC("license") = "GPL"; 154 | -------------------------------------------------------------------------------- /packet03-redirecting/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern 4 | USER_TARGETS := xdp_prog_user 5 | 6 | COMMON_DIR := ../common 7 | 8 | COPY_LOADER := xdp-loader 9 | COPY_STATS := xdp_stats 10 | EXTRA_DEPS := $(COMMON_DIR)/parsing_helpers.h 11 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 12 | 13 | include $(COMMON_DIR)/common.mk 14 | -------------------------------------------------------------------------------- /packet03-redirecting/xdp_prog_user.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | 3 | static const char *__doc__ = "XDP redirect helper\n" 4 | " - Allows to populate/query tx_port and redirect_params maps\n"; 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include /* depend on kernel-headers installed */ 23 | 24 | #include "../common/common_params.h" 25 | #include "../common/common_user_bpf_xdp.h" 26 | #include "../common/common_libbpf.h" 27 | 28 | #include "../common/xdp_stats_kern_user.h" 29 | 30 | static const struct option_wrapper long_options[] = { 31 | 32 | {{"help", no_argument, NULL, 'h' }, 33 | "Show help", false}, 34 | 35 | {{"dev", required_argument, NULL, 'd' }, 36 | "Operate on device ", "", true}, 37 | 38 | {{"redirect-dev", required_argument, NULL, 'r' }, 39 | "Redirect to device ", "", true}, 40 | 41 | {{"src-mac", required_argument, NULL, 'L' }, 42 | "Source MAC address of ", "", true }, 43 | 44 | {{"dest-mac", required_argument, NULL, 'R' }, 45 | "Destination MAC address of ", "", true }, 46 | 47 | {{"quiet", no_argument, NULL, 'q' }, 48 | "Quiet mode (no output)"}, 49 | 50 | {{0, 0, NULL, 0 }, NULL, false} 51 | }; 52 | 53 | static int parse_mac(char *str, unsigned char mac[ETH_ALEN]) 54 | { 55 | /* Assignment 3: parse a MAC address in this function and place the 56 | * result in the mac array */ 57 | 58 | return 0; 59 | } 60 | 61 | static int write_iface_params(int map_fd, unsigned char *src, unsigned char *dest) 62 | { 63 | if (bpf_map_update_elem(map_fd, src, dest, 0) < 0) { 64 | fprintf(stderr, 65 | "WARN: Failed to update bpf map file: err(%d):%s\n", 66 | errno, strerror(errno)); 67 | return -1; 68 | } 69 | 70 | printf("forward: %02x:%02x:%02x:%02x:%02x:%02x -> %02x:%02x:%02x:%02x:%02x:%02x\n", 71 | src[0], src[1], src[2], src[3], src[4], src[5], 72 | dest[0], dest[1], dest[2], dest[3], dest[4], dest[5] 73 | ); 74 | 75 | return 0; 76 | } 77 | 78 | #ifndef PATH_MAX 79 | #define PATH_MAX 4096 80 | #endif 81 | 82 | const char *pin_basedir = "/sys/fs/bpf"; 83 | 84 | int main(int argc, char **argv) 85 | { 86 | int i; 87 | int len; 88 | int map_fd; 89 | bool redirect_map; 90 | char pin_dir[PATH_MAX]; 91 | unsigned char src[ETH_ALEN]; 92 | unsigned char dest[ETH_ALEN]; 93 | 94 | struct config cfg = { 95 | .ifindex = -1, 96 | .redirect_ifindex = -1, 97 | }; 98 | 99 | /* Cmdline options can change progsec */ 100 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 101 | 102 | redirect_map = (cfg.ifindex > 0) && (cfg.redirect_ifindex > 0); 103 | 104 | if (cfg.redirect_ifindex > 0 && cfg.ifindex == -1) { 105 | fprintf(stderr, "ERR: required option --dev missing\n\n"); 106 | usage(argv[0], __doc__, long_options, (argc == 1)); 107 | return EXIT_FAIL_OPTION; 108 | } 109 | 110 | len = snprintf(pin_dir, PATH_MAX, "%s/%s", pin_basedir, cfg.ifname); 111 | if (len < 0) { 112 | fprintf(stderr, "ERR: creating pin dirname\n"); 113 | return EXIT_FAIL_OPTION; 114 | } 115 | 116 | if (parse_mac(cfg.src_mac, src) < 0) { 117 | fprintf(stderr, "ERR: can't parse mac address %s\n", cfg.src_mac); 118 | return EXIT_FAIL_OPTION; 119 | } 120 | 121 | if (parse_mac(cfg.dest_mac, dest) < 0) { 122 | fprintf(stderr, "ERR: can't parse mac address %s\n", cfg.dest_mac); 123 | return EXIT_FAIL_OPTION; 124 | } 125 | 126 | 127 | /* Assignment 3: open the tx_port map corresponding to the cfg.ifname interface */ 128 | map_fd = -1; 129 | 130 | printf("map dir: %s\n", pin_dir); 131 | 132 | if (redirect_map) { 133 | /* setup a virtual port for the static redirect */ 134 | i = 0; 135 | bpf_map_update_elem(map_fd, &i, &cfg.redirect_ifindex, 0); 136 | printf("redirect from ifnum=%d to ifnum=%d\n", cfg.ifindex, cfg.redirect_ifindex); 137 | 138 | /* Assignment 3: open the redirect_params map corresponding to the cfg.ifname interface */ 139 | map_fd = -1; 140 | 141 | /* Setup the mapping containing MAC addresses */ 142 | if (write_iface_params(map_fd, src, dest) < 0) { 143 | fprintf(stderr, "can't write iface params\n"); 144 | return 1; 145 | } 146 | } 147 | 148 | return EXIT_OK; 149 | } 150 | -------------------------------------------------------------------------------- /setup-testlab/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+Title: Setup testlab VM image 3 | #+OPTIONS: ^:nil 4 | 5 | It is preferred if you can run the XDP-tutorial directly on your existing 6 | laptops Linux distro. This require minimum kernel version 4.19, and 7 | preferably kernel version 4.20. If these requirements are met, then you 8 | don't need to setup/use the VM images, and you can instead follow and 9 | install the software packages described in [[file:../setup_dependencies.org]]. 10 | 11 | You /only/ need to read further if you want/need to use the provided VM 12 | (Virtual Machine) image. 13 | 14 | * Howto use VM image 15 | 16 | Howto use the instructor provided VM image is described in 17 | [[file:using_vm_image.org]]. 18 | 19 | * How were this created 20 | 21 | If you are interested in how this VM image were created (based on a Fedora29 22 | Cloud Image) you can read [[file:create_vm_image.org]]. But that is not relevant 23 | for the tutorial. 24 | -------------------------------------------------------------------------------- /setup-testlab/ansible/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+Title: Ansible setup to install XDP-tutorial dependencies 3 | #+OPTIONS: ^:nil 4 | 5 | This directory contains an Ansible setup, that installs the needed software 6 | package dependencies for the XDP-tutorial. It have been used on the VM 7 | image that participants are provided. 8 | 9 | * Running 10 | 11 | To run this ansible setup on your own testlab VM, edit the [[file:hosts]] and 12 | update it with the correct VM IP-address. Verify that you can SSH login to 13 | the VM with username: =fedora= and your SSH-key. 14 | 15 | The script [[file:run-on-hosts.sh][run-on-hosts.sh]] can be used to run: 16 | - ansible-playbook -i hosts site.yml 17 | 18 | 19 | * Bootstrapping Fedora 20 | 21 | Notice: This trick is ONLY needed first time, on an clean/fresh (VM) image. 22 | 23 | For some reason =/usr/bin/python= were not installed in Fedora 29, which 24 | Ansible complains about like this: 25 | 26 | #+begin_example 27 | $ ansible -i hosts --user=root -m ping all 28 | 192.168.122.98 | FAILED! => { 29 | "changed": false, 30 | "module_stderr": "Shared connection to 192.168.122.98 closed.\r\n", 31 | "module_stdout": "/bin/sh: /usr/bin/python: No such file or directory\r\n", 32 | "msg": "The module failed to execute correctly, you probably need to set the interpreter.\nSee stdout/stderr for the exact error", 33 | "rc": 127 34 | } 35 | #+end_example 36 | 37 | The packages python and python-dnf needs to be installed. We have added a 38 | file:bootstrap-ansible.yml that perform this via ansible, and it need to be 39 | run like: 40 | 41 | #+begin_example 42 | ansible-playbook -i hosts --user=root bootstrap-ansible.yml 43 | #+end_example 44 | 45 | Afterwards we can test if the user =fedora= can run ansible: 46 | 47 | #+begin_example 48 | $ ansible -i hosts --user fedora -m ping all 49 | 192.168.122.98 | SUCCESS => { 50 | "changed": false, 51 | "ping": "pong" 52 | } 53 | #+end_example 54 | -------------------------------------------------------------------------------- /setup-testlab/ansible/bootstrap-ansible.yml: -------------------------------------------------------------------------------- 1 | # Bootstrap ansible on Fedora 2 | 3 | # Need this bootstrap trick on newer Fedora servers 4 | # From: http://blog.oddbit.com/2015/10/15/bootstrapping-ansible-on-fedora-23/ 5 | # 6 | # Run as: ansible-playbook -i hosts --user=root bootstrap-ansible.yml 7 | --- 8 | - hosts: all 9 | gather_facts: false 10 | tasks: 11 | - name: Install packages for ansible support 12 | raw: dnf -y -e0 -d0 install python python-dnf 13 | 14 | # Example to install some packages 15 | #- hosts: all 16 | # tasks: 17 | # - name: Install some packages 18 | # dnf: pkg={{ item }} state=installed 19 | # with_items: 20 | # - xorg-x11-xauth 21 | # - xorg-x11-apps 22 | # - vim 23 | -------------------------------------------------------------------------------- /setup-testlab/ansible/group_vars/all: -------------------------------------------------------------------------------- 1 | # 2 | # group_vars/all is used to set variables that will be used for every host 3 | # 4 | # 5 | # Multiline: 6 | # ---------- 7 | # Values can span multiple lines using | or >. Spanning multiple lines 8 | # using a | will include the newlines. Using a > will ignore newlines; 9 | # it’s used to make what would otherwise be a very long line easier to 10 | # read and edit. In either case the indentation will be ignored. 11 | 12 | ssh_public_key: | 13 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAEAQC+4vvIvwdZDBqVpeTHNv1QVxmWyK9rPTcIeAEssPly9aMe3Z0pzCikKnmH0biQQBn+hY3N7M6lrtE/n5znyClblS7k4Wud2GsZDjwmEYHPsi2/mf8JmJvNkJXTwd/1fOqr4LX4XFRVxbpT4cJ4qmtVSUjzc5I3a0/GJzm/0t9eXFHlIA/Ei+mFWF9b6y+0hWudb3Uwe1AwY1orM8imiHkS6/kztvD1FRJWZswwZi2fb5EfCCfyZCDGNDljueIWGt/I64iMgVBWuttyUxHvKkOd2AZ4up3K2JDnI0RC6pPWW4QiP6DLYYSRX9NofnO2/PaBjuKo1dGyGb8EAM5NrR/UxWAKpVThx8ya/j375ut2OsleCIhoj4Kb1depuZtGdJyj7p6c86K7pum4CqS8ar8XbnGsW1Nt6SaCOdcP4LkZAk/eGVMuRkFbU9C2fDaaipuzlh+kZxWS5/1PZVKfOK+X0xU8c0aBifqhy4kYpsLRkVPwv/EdjYbiKYdRZIJMIRgkyZDSStI2mJAzbIO4VVgGVWeRiVZsPw3nbt6GvgNCd1WGnjvR5LuvrFevDaXgnpsLHU42bwuuq30M+lKh4Ysu0xgniCZsEc7JWZkjzZi7s3I2r6Q2iL6hq+WheDsGEcrOFWo3FDe8mGDkmyGC3SzSUZmhJPav0WfWbWtPnz9a9Tmd48un7fngxYO9lQVBTotJ6uHs9JsrRENWDuSiIUQk9D2XB3x82tJiL3Pb+8CGaJqCzt5Zs0HkfF9K2LMw40ENcWwDMqdXmuHprUuwhTXk1tZtkQqAbghQVx+ivBmaq4issgPfyaUyCnETFjmmNXRirYAsWQUiYmxbz78jUBM7+idEDhvJANCprcgU8W8g8nvv9Gfbg781jCxZim5qm/3PpkSJGfcoQQYQyVguI1a9A2YrtK5EwHNFmVdW0v8Qs9gP60WGOI+2GIk73ePMsD99eowAKMBKAYbcst+sGGY3h8TS3md4TFT3zlgU4EgAOs3NDytxKJWI0G1I7FVVfbWRgIedNxT5Z0g9Mbw+rDo42lIEL6EBe3q1vkwg0bS7Y0+9HCMKDO3NFDKXhE9b/IYjB2QYb18ZlhxENFZPyXkTKKL5S9O7i5vTQ/efGZRNw/MVB52V6UPh+mY3KIUOkTyafUtlx0ufqshkJklTndRE9oOt/0iuLt1aM6ruK77HYY6gEvnHUZwJXsw4YbiaX+8m9RG7G/otxBihJZQTKO4LnB8XRZj7eZatV4MoXAxXuWMYvsB6Hlzg5J8gWP6egfk2dYoC9SAEko1LpAspMilJDEFtZZepYkMqH8wsFyXgRb5CUrSOh4jSRtBix/uMCPtHq+z77mzKCEZTy3L+Wzbr netoptimizer@github 14 | 15 | -------------------------------------------------------------------------------- /setup-testlab/ansible/hosts: -------------------------------------------------------------------------------- 1 | # This is the an ansible 'hosts' file. 2 | # 3 | # It usually live in /etc/ansible/hosts, but this local copy can be 4 | # invoked like: ansible-playbook -i hosts site.yml 5 | # 6 | # - Comments begin with the '#' character 7 | # - Blank lines are ignored 8 | # - Groups of hosts are delimited by [header] elements 9 | # - You can enter hostnames or ip addresses 10 | # - A hostname/ip can be a member of multiple groups 11 | 12 | [fedora-vm] 13 | 192.168.122.16 14 | 15 | # Idea: run on the VM itself after git-clone 16 | # 127.0.0.1 17 | -------------------------------------------------------------------------------- /setup-testlab/ansible/roles/tutorial/files/inputrc: -------------------------------------------------------------------------------- 1 | # This file ~/.inputrc enables searching via up/down arrow keys 2 | "\e[A": history-search-backward 3 | "\e[B": history-search-forward 4 | 5 | #If that prevents Left and Right from working, fix them like this: 6 | #"\e[C": forward-char 7 | #"\e[D": backward-char 8 | -------------------------------------------------------------------------------- /setup-testlab/ansible/roles/tutorial/files/ld_usr_local.conf: -------------------------------------------------------------------------------- 1 | /usr/local/lib 2 | /usr/local/lib64 3 | -------------------------------------------------------------------------------- /setup-testlab/ansible/roles/tutorial/handlers/main.yml: -------------------------------------------------------------------------------- 1 | # handlers : 2 | 3 | # Simple handler for updating the shared library cache 4 | - name: run-ldconfig 5 | action: command /sbin/ldconfig 6 | -------------------------------------------------------------------------------- /setup-testlab/ansible/roles/tutorial/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook contains stuff for XDP-tutorial 3 | 4 | # 5 | # Setup SSH keys 6 | # - Hint: see group_vars/all 7 | # 8 | - name: SSH Authorized Keys 9 | authorized_key: 10 | user="{{ item }}" 11 | key="{{ ssh_public_key }}" 12 | state=present 13 | with_items: 14 | - fedora 15 | - root 16 | 17 | # F29 failed with: 18 | # Aborting, target uses selinux but python bindings (libselinux-python) 19 | # aren't installed! 20 | - name: be sure libselinux-python are installed 21 | dnf: pkg=libselinux-python state=present 22 | 23 | # 24 | # Notify handler to run ldconfig 25 | # See file: roles/tutorial/handlers/main.yml 26 | # 27 | - name: Support shared ld libs in /usr/local/ 28 | copy: src=ld_usr_local.conf dest=/etc/ld.so.conf.d/usr_local.conf owner=root group=root mode=0644 29 | notify: 30 | - run-ldconfig 31 | 32 | - name: Upgrade all packages 33 | dnf: 34 | name: "*" 35 | state: latest 36 | 37 | # 38 | # Copy some shell setup files 39 | # - src= looks in roles/tutorial/files/ 40 | # 41 | - name: Shell inputrc root 42 | copy: src=inputrc dest=/root/.inputrc owner=root group=root mode=0644 43 | - name: Shell inputrc for users 44 | copy: src=inputrc dest=/home/{{ item }}/.inputrc owner={{ item }} group={{ item }} mode=0644 45 | with_items: 46 | - fedora 47 | 48 | # Want have SSH-X11 forwarding working 49 | - name: Packages - SSH-X11 packages 50 | dnf: 51 | name: 52 | - xorg-x11-xauth 53 | - xorg-x11-apps 54 | state: installed 55 | 56 | # Ordinary developement tools 57 | - name: Packages - Devel tools 58 | dnf: 59 | name: 60 | - git 61 | - make 62 | - cmake 63 | - gcc 64 | - gdb 65 | - autoconf 66 | - libtool 67 | - emacs 68 | - vim 69 | - stgit 70 | - openssl-devel 71 | - cscope 72 | - pigz 73 | state: installed 74 | 75 | - name: Packages - Devel tools eBPF 76 | dnf: 77 | name: 78 | - clang 79 | - llvm 80 | - kernel-headers 81 | state: installed 82 | 83 | - name: Packages - libbpf build dependencies 84 | dnf: 85 | name: 86 | - elfutils-libelf-devel 87 | state: installed 88 | 89 | - name: Packages - pahole/dwarves build dependencies 90 | dnf: 91 | name: 92 | - elfutils-devel 93 | - zlib-devel 94 | - cmake 95 | state: installed 96 | 97 | 98 | - name: Packages - Extra dependencies eBPF 99 | dnf: 100 | name: 101 | - binutils-devel 102 | - libcap-devel 103 | state: installed 104 | 105 | - name: Packages - Devel tools eBPF extra for bpftrace 106 | dnf: 107 | name: 108 | - clang-devel 109 | - llvm-devel 110 | state: installed 111 | 112 | - name: Packages - XDP-tutorial required tools 113 | dnf: 114 | name: 115 | - ethtool 116 | - iputils 117 | - iproute-tc 118 | - bpftool 119 | - perf 120 | - perl-Time-HiRes 121 | - bash-completion 122 | - redhat-lsb-core 123 | - stow 124 | - socat 125 | - nmap-ncat 126 | - nmap 127 | - hping3 128 | - sysstat 129 | - tcpdump 130 | state: installed 131 | -------------------------------------------------------------------------------- /setup-testlab/ansible/run-on-hosts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ansible-playbook -i hosts site.yml 3 | -------------------------------------------------------------------------------- /setup-testlab/ansible/run-on-localhost.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ansible-playbook --connection=local --inventory 127.0.0.1, site.yml 3 | -------------------------------------------------------------------------------- /setup-testlab/ansible/site.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook is for XDP-tutorial VM-image installing dependencies 3 | 4 | # Pickup directory: roles/tutorial/ 5 | - name: Apply XDP-tutorial configuration to all nodes 6 | hosts: all 7 | remote_user: fedora 8 | become: true 9 | become_method: sudo 10 | 11 | roles: 12 | - tutorial 13 | 14 | -------------------------------------------------------------------------------- /setup-testlab/create_vm_image.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+Title: Howto create VM image 3 | #+OPTIONS: ^:nil 4 | 5 | This document describe how the virtual machine (VM) image for the 6 | XDP-tutorial were created. 7 | 8 | * Installing needed packages for XDP-tutorial 9 | 10 | The Ansible setup in directory [[file:ansible][ansible/]] is used to define and install the 11 | needed software packages. This were used on the image after below setup. 12 | 13 | * Downloading images 14 | 15 | Here we download a predefined "cloud" image, and update it to suit the 16 | tutorial. 17 | 18 | Finding some alternative images to download here: 19 | - https://alt.fedoraproject.org/ 20 | - https://alt.fedoraproject.org/cloud/ 21 | 22 | Specifically downlaod the: "Cloud Base qcow2 image" (size 294M) 23 | - https://download.fedoraproject.org/pub/fedora/linux/releases/29/Cloud/x86_64/images/Fedora-Cloud-Base-29-1.2.x86_64.qcow2 24 | 25 | Place a copy in: /var/lib/libvirt/images/ 26 | - with filename: F29-xdp-tutorial.qcow2. 27 | 28 | ** Initial setup for cloud-init 29 | 30 | Use of the cloud image is annoying, because it requires creating a special 31 | CDROM image to reconfigure. After reboots it still want to do some 32 | cloud-init, which either requires mounting below CDROM-image or [[https://fatmin.com/2017/10/19/how-to-disable-cloud-init-in-a-rhel-cloud-image/][disabling 33 | cloud-init]] via cmdline: touch /etc/cloud/cloud-init.disabled 34 | 35 | *** Create: cloud-init.iso 36 | 37 | As describe [[https://www.technovelty.org/linux/running-cloud-images-locally.html][here]], these cloud images need/tries to fetch some setup info. We 38 | need to supply this, else you cannot login to them. This is supplied by 39 | creating a special crafted ISO image. 40 | 41 | Create two files: 42 | 43 | #+begin_example 44 | cat > meta-data << EOF 45 | instance-id: iid-local01 46 | local-hostname: xdp-tutorial 47 | EOF 48 | #+end_example 49 | 50 | #+begin_example 51 | cat > user-data << EOF 52 | #cloud-config 53 | password: xdptut 54 | ssh_pwauth: True 55 | chpasswd: { expire: False } 56 | 57 | ssh_authorized_keys: 58 | - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAEAQC+4vvIvwdZDBqVpeTHNv1QVxmWyK9rPTcIeAEssPly9aMe3Z0pzCikKnmH0biQQBn+hY3N7M6lrtE/n5znyClblS7k4Wud2GsZDjwmEYHPsi2/mf8JmJvNkJXTwd/1fOqr4LX4XFRVxbpT4cJ4qmtVSUjzc5I3a0/GJzm/0t9eXFHlIA/Ei+mFWF9b6y+0hWudb3Uwe1AwY1orM8imiHkS6/kztvD1FRJWZswwZi2fb5EfCCfyZCDGNDljueIWGt/I64iMgVBWuttyUxHvKkOd2AZ4up3K2JDnI0RC6pPWW4QiP6DLYYSRX9NofnO2/PaBjuKo1dGyGb8EAM5NrR/UxWAKpVThx8ya/j375ut2OsleCIhoj4Kb1depuZtGdJyj7p6c86K7pum4CqS8ar8XbnGsW1Nt6SaCOdcP4LkZAk/eGVMuRkFbU9C2fDaaipuzlh+kZxWS5/1PZVKfOK+X0xU8c0aBifqhy4kYpsLRkVPwv/EdjYbiKYdRZIJMIRgkyZDSStI2mJAzbIO4VVgGVWeRiVZsPw3nbt6GvgNCd1WGnjvR5LuvrFevDaXgnpsLHU42bwuuq30M+lKh4Ysu0xgniCZsEc7JWZkjzZi7s3I2r6Q2iL6hq+WheDsGEcrOFWo3FDe8mGDkmyGC3SzSUZmhJPav0WfWbWtPnz9a9Tmd48un7fngxYO9lQVBTotJ6uHs9JsrRENWDuSiIUQk9D2XB3x82tJiL3Pb+8CGaJqCzt5Zs0HkfF9K2LMw40ENcWwDMqdXmuHprUuwhTXk1tZtkQqAbghQVx+ivBmaq4issgPfyaUyCnETFjmmNXRirYAsWQUiYmxbz78jUBM7+idEDhvJANCprcgU8W8g8nvv9Gfbg781jCxZim5qm/3PpkSJGfcoQQYQyVguI1a9A2YrtK5EwHNFmVdW0v8Qs9gP60WGOI+2GIk73ePMsD99eowAKMBKAYbcst+sGGY3h8TS3md4TFT3zlgU4EgAOs3NDytxKJWI0G1I7FVVfbWRgIedNxT5Z0g9Mbw+rDo42lIEL6EBe3q1vkwg0bS7Y0+9HCMKDO3NFDKXhE9b/IYjB2QYb18ZlhxENFZPyXkTKKL5S9O7i5vTQ/efGZRNw/MVB52V6UPh+mY3KIUOkTyafUtlx0ufqshkJklTndRE9oOt/0iuLt1aM6ruK77HYY6gEvnHUZwJXsw4YbiaX+8m9RG7G/otxBihJZQTKO4LnB8XRZj7eZatV4MoXAxXuWMYvsB6Hlzg5J8gWP6egfk2dYoC9SAEko1LpAspMilJDEFtZZepYkMqH8wsFyXgRb5CUrSOh4jSRtBix/uMCPtHq+z77mzKCEZTy3L+Wzbr netoptimizer@github 59 | EOF 60 | #+end_example 61 | 62 | Create the ISO file that the images can use as CDROM drive: 63 | 64 | #+begin_example 65 | genisoimage -output cloud-init.iso -volid cidata -joliet -rock user-data meta-data 66 | #+end_example 67 | 68 | Then copy init.iso into /var/lib/libvirtd/images as well (if you already 69 | have an non-functional cloud-image, you can connect this images as 70 | virtual-CDROM drive). 71 | 72 | #+begin_example 73 | sudo cp cloud-init.iso /var/lib/libvirt/images/ 74 | #+end_example 75 | 76 | * Initial: Import/use VM-image 77 | 78 | ** Create via virt-install 79 | 80 | You can create a new libvirt machine, that use this image, from the command 81 | line using =virt-install=. 82 | 83 | Notice these two files must exist: 84 | - /var/lib/libvirt/images/F29-xdp-tutorial.qcow2 85 | - /var/lib/libvirt/images/cloud-init.iso 86 | 87 | Use virt-install: 88 | #+begin_example 89 | sudo virt-install --name F29-xdp-tutorial \ 90 | --description 'Fedora 29 - XDP-tutorial' \ 91 | --ram 2048 \ 92 | --vcpus 2 \ 93 | --disk path=/var/lib/libvirt/images/F29-xdp-tutorial.qcow2 \ 94 | --cdrom /var/lib/libvirt/images/cloud-init.iso \ 95 | --os-type linux \ 96 | --os-variant fedora29 \ 97 | --network bridge=virbr0 \ 98 | --graphics vnc,listen=127.0.0.1,port=5901 \ 99 | --noautoconsole 100 | #+end_example 101 | 102 | Another important detail: If the machines doesn't give you a console after 103 | rebooting, then you likely need to (again) add the CDROM drive with the 104 | cloud-init.iso (/var/lib/libvirt/images/cloud-init.iso). This can be done 105 | via virt-manager. You can also disable "cloud-init" via command line: 106 | =sudo touch /etc/cloud/cloud-init.disabled= 107 | 108 | ** Default user+password 109 | 110 | End-result: A virtual machine with: 111 | - Username: fedora 112 | - Password: xdptut 113 | 114 | This Fedora-Cloud image have a 10 sec. delay when logging in as root, remove 115 | that via editing /root/.ssh/authorized_keys. 116 | 117 | * Disk capacity 118 | ** Increasing VM disk capacity 119 | 120 | The remaining disc capacity is getting low, it will be unfortunate if this 121 | disrupt the tutorial. Thus, we resize the VM disk. First shutdown the VM. 122 | 123 | Following [[https://nullr0ute.com/2018/08/increasing-a-libvirt-kvm-virtual-machine-disk-capacity/][this blogpost]] we resize disk via commands. 124 | 125 | On host-OS machine: 126 | #+begin_example 127 | cd /var/lib/libvirt/images/ 128 | qemu-img resize F29-xdp-tutorial.qcow2 +2G 129 | qemu-img info F29-xdp-tutorial.qcow2 130 | #+end_example 131 | 132 | Now power up the VM, login as root (or use sudo) for next commands on VM: 133 | #+begin_example 134 | echo ", +" | sfdisk -N 1 /dev/vda --no-reread 135 | partprobe 136 | resize2fs /dev/vda1 137 | #+end_example 138 | 139 | ** Reduce VM image size 140 | 141 | Before uploading, use the tool =virt-sparsify= to reduce disk capacity used 142 | by VM image. And afterwards also compress image, to reduce download size. 143 | 144 | * Extra manual changes 145 | 146 | Cloned some git trees in user 'fedora' home directory. 147 | 148 | List of git trees: 149 | - https://github.com/xdp-project/xdp-tutorial/ 150 | - https://github.com/netoptimizer/network-testing 151 | - git://git.kernel.org/pub/scm/devel/pahole/pahole.git 152 | 153 | ** stow 154 | 155 | Use stow to easier keep track of manually installed packages under: 156 | =/usr/local/= 157 | 158 | Some manual setup (TODO move this to ansible): 159 | #+begin_export 160 | sudo mkdir /usr/local/stow/ 161 | sudo chown fedora /usr/local/stow/ 162 | sudo chgrp -R adm /usr/local/ 163 | sudo chmod -R g+ws /usr/local/ 164 | #+end_export 165 | 166 | Note, ansible-setup have already added /usr/local/lib to ldconfig. 167 | 168 | ** pahole 169 | 170 | Until LLVM 8.0.0 gets released, we can use pahole to create the BTF-info 171 | into in the BPF-ELF files. The on Fedora 29 the package 'dwarves' that 172 | contain pahole is too old to have the needed BTF-features (avail in version 173 | 1.12). Thus, we need to build it ourselves. 174 | 175 | Building pahole: 176 | 177 | #+begin_example 178 | cd ~/git/pahole 179 | mkdir build 180 | cd build 181 | cmake -DCMAKE_INSTALL_PREFIX=/usr/local/stow/pahole-git01 -D__LIB=lib .. 182 | make 183 | #+end_example 184 | 185 | Stow part of the setup: 186 | #+begin_example 187 | cd /usr/local/stow/ 188 | stow pahole-git01 189 | sudo ldconfig 190 | #+end_example 191 | 192 | ** Kernel samples/bpf/ 193 | 194 | Uploaded a compiled version of kernel samples/bpf/ directory. 195 | The =xdp_monitor= tool could come in handy for participants. 196 | -------------------------------------------------------------------------------- /setup-testlab/using_vm_image.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+Title: Howto use the VM image 3 | #+OPTIONS: ^:nil 4 | 5 | How can you use the provided VM-image (which were created as described in 6 | [[file:create_vm_image.org]]). 7 | 8 | * Host-OS dependencies 9 | 10 | First of all, the host-OS (likely your laptop) need some software packages for 11 | running a virtual machine (VM) image. 12 | 13 | ** Fedora: libvirt software setup 14 | 15 | There is a guide for Fedora here: 16 | - https://docs.fedoraproject.org/en-US/quick-docs/getting-started-with-virtualization/ 17 | 18 | Fedora have a package collection called @virtualization. 19 | 20 | #+begin_example 21 | $ dnf groupinfo virtualization 22 | Group: Virtualization 23 | Description: These packages provide a graphical virtualization environment. 24 | Mandatory Packages: 25 | virt-install 26 | Default Packages: 27 | libvirt-daemon-config-network 28 | libvirt-daemon-kvm 29 | qemu-kvm 30 | virt-manager 31 | virt-viewer 32 | Optional Packages: 33 | guestfs-browser 34 | libguestfs-tools 35 | python3-libguestfs 36 | virt-top 37 | #+end_example 38 | 39 | Follow the instruction in [[ https://docs.fedoraproject.org/en-US/quick-docs/getting-started-with-virtualization/][guide link]]: 40 | 41 | #+begin_example 42 | sudo dnf group install --with-optional virtualization 43 | 44 | # After the packages install, start the libvirtd service: 45 | sudo systemctl start libvirtd 46 | 47 | # To start the service on boot, run: 48 | sudo systemctl enable libvirtd 49 | 50 | # I had to restart libvirtd 51 | sudo systemctl restart libvirtd 52 | 53 | # verify that the KVM kernel modules are properly loaded 54 | lsmod | grep kvm 55 | #+end_example 56 | 57 | 58 | * Import/use VM-image 59 | 60 | There are a number of ways to use/import the provided image. 61 | 62 | ** Use via virt-manager 63 | 64 | Create a new virtual machine and import provided disk image virt-manager 65 | interface selecting "Import existing disk image" and adding CDROM drive 66 | manually. 67 | 68 | Use graphical tool: virt-manager 69 | - (If not already connected: connect to QEMU/KVM on localhost) 70 | - File -> "New Virtual Machine" 71 | - Radio-button: "Import existing disk image" 72 | - "Browse..." for file: 73 | * Select "F29-xdp-tutorial.qcow2" (Choose Volume) 74 | - Choose the operating system; name: Fedora 29 75 | * Select "Forward" 76 | - Choose Memory and CPU settings 77 | - Choose: Name: "F29-xdp-tutorial" 78 | 79 | ** Use via virt-install 80 | 81 | You can create a new libvirt machine, that use this image, from the command 82 | line using =virt-install=: 83 | 84 | Here we assume you installed the VM image in: 85 | - /var/lib/libvirt/images/F29-xdp-tutorial.qcow2 86 | 87 | #+begin_example 88 | sudo virt-install --name F29-xdp-tutorial \ 89 | --description 'Fedora 29 - XDP-tutorial' \ 90 | --ram 2048 \ 91 | --vcpus 2 \ 92 | --disk path=/var/lib/libvirt/images/F29-xdp-tutorial.qcow2 \ 93 | --cdrom /dev/null \ 94 | --os-type linux \ 95 | --os-variant fedora29 \ 96 | --network bridge=virbr0 \ 97 | --graphics vnc,listen=127.0.0.1,port=5901 \ 98 | --noautoconsole 99 | #+end_example 100 | 101 | Guess you don't prefer the graphical tool virt-manager. You can start a 102 | console login via: 103 | 104 | #+begin_example 105 | sudo virsh console F29-xdp-tutorial 106 | #+end_example 107 | 108 | You should login with user "fedora", observe the IP-address (e.g. ifconfig) 109 | and then instead use SSH to login. To exit the console use: =Ctrl + 5=. 110 | 111 | * Default username+password 112 | 113 | The default username+password for your new VM image is: 114 | - Username: fedora 115 | - Password: xdptut 116 | 117 | You should login and add your own SSH-key to the user "fedora" 118 | authorized_keys, e.g. via copy-paste into: 119 | 120 | #+begin_example 121 | cat >> /home/fedora/.ssh/authorized_keys 122 | #+end_example 123 | 124 | -------------------------------------------------------------------------------- /setup-testlab/workarounds.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Setup workarounds 3 | #+OPTIONS: ^:nil 4 | 5 | This files contains notes about needed setup workarounds. 6 | 7 | * SElinux workaround 8 | 9 | In current Fedora 29, SElinux deny bpftool access to listing maps via e.g. 10 | commands: 11 | 12 | #+begin_example 13 | # bpftool map 14 | # bpftool map list 15 | #+end_example 16 | 17 | Users see this error: 18 | 19 | #+begin_example 20 | $ sudo bpftool map 21 | Error: can't get map by id (13): Permission denied 22 | #+end_example 23 | 24 | Other part of the bpftool command do work, like listing BPF-prog running: 25 | 26 | #+begin_example 27 | # bpftool prog 28 | # bpftool prog list 29 | #+end_example 30 | 31 | Filed: Red Hat [[https://bugzilla.redhat.com/show_bug.cgi?id=1688668][Bug 1688668]] - SElinux conflict with bpftool map listing 32 | - As of this writing it is already resolved 33 | - But not fully rolled out, so use below workaround 34 | - Fixed in selinux-policy version 3.14.2-51.fc29 35 | 36 | Using the proposed workaround: 37 | - https://bodhi.fedoraproject.org/updates/FEDORA-2019-4cc36fafbb 38 | - sudo dnf upgrade --enablerepo=updates-testing --advisory=FEDORA-2019-4cc36fafbb 39 | 40 | -------------------------------------------------------------------------------- /testenv/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Test environment script 3 | #+OPTIONS: ^:nil 4 | 5 | This directory contains a setup script that you can use to create test 6 | environments for testing your XDP programs. It works by creating virtual 7 | ethernet (veth) interface pairs and moving one end of each pair to another 8 | network namespace. You can load the XDP program in the other namespace and 9 | send traffic to it through the interface that is visible in the root 10 | namespace. 11 | 12 | Run =./testenv.sh= with no parameter to get a list of available commands, or 13 | run =./testenv.sh --help= to get the full help listing with all options. The 14 | script can maintain several environments active at the same time, and you 15 | can switch between them using the =--name= option. 16 | 17 | If you don't specify a name, the most recently used environment will be 18 | used. If you don't specify a name when setting up a new environment, a 19 | random name will be generated for you. 20 | 21 | Examples: 22 | 23 | Setup new environment named "test": 24 | =./testenv.sh setup --name=test= 25 | 26 | Create a shell alias for easy use of script from anywhere: 27 | =eval $(./testenv.sh alias)= 28 | 29 | See the currently active environment, and a list of all active environment 30 | names (with alias defined as above): 31 | =t status= 32 | 33 | Enter the currently active environment: 34 | =t enter= 35 | 36 | Execute a command inside the environment: 37 | =t exec -- ip a= 38 | 39 | Teardown the environment: 40 | =t teardown= 41 | 42 | * Understanding the network topology 43 | 44 | When setting up a test environment, there will be a virtual link between the 45 | environment inside the new namespace, and the interface visible from the 46 | host system root namespace. The new namespace will be named after the 47 | environment name passed to the script, as will the interface visible in the 48 | outer namespace. The interface *inside* the namespace will always be named 49 | 'veth0'. 50 | 51 | To illustrate this, creating a test environment with the name 'test01' (with 52 | =t setup --name test01= will result in the following environment being set 53 | up: 54 | 55 | #+begin_example 56 | +-----------------------------+ +-----------------------------+ 57 | | Root namespace | | Testenv namespace 'test01' | 58 | | | From 'test01' | | 59 | | +--------+ TX-> RX-> +--------+ | 60 | | | test01 +--------------------------+ veth0 | | 61 | | +--------+ <-RX <-TX +--------+ | 62 | | | From 'veth0' | | 63 | +-----------------------------+ +-----------------------------+ 64 | #+end_example 65 | 66 | The 'test01' interface visible in the root namespace is the one we will be 67 | installing XDP programs on in the tutorial lessons. The XDP program will see 68 | packets being *received* on this interface; as you can see from the diagram, 69 | this means all packets being transmitted from inside the new namespace. 70 | 71 | The setup is created this way to simulate the case where the host machine 72 | have physical interfaces; but instead of the traffic arriving from outside 73 | hosts on physical interfaces, they will arrive from inside the namespace on 74 | the virtual interface. This also means that when you generate traffic to 75 | test your XDP programs, you need to generate it from *inside* the test 76 | environment. The =t ping= command will start the ping inside the test 77 | environment by default, and you can run arbitrary programs inside the 78 | environment by using =t exec -- =, or simply spawning a shell with 79 | =t enter=. 80 | -------------------------------------------------------------------------------- /testenv/config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # These are the config options for the testlab 4 | 5 | 6 | SETUP_SCRIPT="$(dirname "$0")/setup-env.sh" 7 | STATEDIR="${TMPDIR:-/tmp}/xdp-tutorial-testlab" 8 | IP6_SUBNET=fc00:dead:cafe # must have exactly three :-separated elements 9 | IP6_PREFIX_SIZE=64 # Size of assigned prefixes 10 | IP6_FULL_PREFIX_SIZE=48 # Size of IP6_SUBNET 11 | IP4_SUBNET=10.11 12 | IP4_PREFIX_SIZE=24 # Size of assigned prefixes 13 | IP4_FULL_PREFIX_SIZE=16 # Size of IP4_SUBNET 14 | VLAN_IDS=(1 2) 15 | GENERATED_NAME_PREFIX="xdptut" 16 | -------------------------------------------------------------------------------- /testenv/setup-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: GPL-2.0-or-later 3 | # 4 | # Script to setup things inside a test environment, used by testenv.sh for 5 | # executing commands. 6 | # 7 | # Author: Toke Høiland-Jørgensen (toke@redhat.com) 8 | # Date: 7 March 2019 9 | # Copyright (c) 2019 Red Hat 10 | 11 | 12 | die() 13 | { 14 | echo "$1" >&2 15 | exit 1 16 | } 17 | 18 | [ -n "$TESTENV_NAME" ] || die "TESTENV_NAME missing from environment" 19 | [ -n "$1" ] || die "Usage: $0 " 20 | 21 | set -o nounset 22 | 23 | mount -t bpf bpf /sys/fs/bpf/ || die "Unable to mount /sys/fs/bpf inside test environment" 24 | 25 | exec "$@" 26 | -------------------------------------------------------------------------------- /tracing01-xdp-simple/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | # Departing from the implicit _user.c scheme 4 | XDP_TARGETS := trace_prog_kern xdp_prog_kern 5 | USER_TARGETS := trace_load_and_stats 6 | 7 | COMMON_DIR := ../common 8 | 9 | include $(COMMON_DIR)/common.mk 10 | 11 | -------------------------------------------------------------------------------- /tracing01-xdp-simple/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Tracing01 - monitor xdp tracepoint 3 | #+OPTIONS: ^:nil 4 | 5 | In this lesson we will show how to create and load eBPF program that 6 | hooks on xdp:exception tracepoint and get its values to user space 7 | stats application. 8 | 9 | * Table of Contents :TOC: 10 | - [[#xdp-tracepoints][XDP tracepoints]] 11 | - [[#tracepoint-program-section][Tracepoint program section]] 12 | - [[#tracepoint-arguments][Tracepoint arguments]] 13 | - [[#tracepoint-attaching][Tracepoint attaching]] 14 | - [[#hash-map][HASH map]] 15 | - [[#assignments][Assignments]] 16 | - [[#assignment-1-setting-up-your-test-lab][Assignment 1: Setting up your test lab]] 17 | - [[#assignment-2-load-tracepoint-monitor-program][Assignment 2: Load tracepoint monitor program]] 18 | 19 | * XDP tracepoints 20 | 21 | The eBPF programs can be attached also to tracepoints. There are 22 | several tracepoints related to the xdp tracepoint subsystem: 23 | 24 | #+begin_src sh 25 | ls /sys/kernel/debug/tracing/events/xdp/ 26 | xdp_cpumap_enqueue 27 | xdp_cpumap_kthread 28 | xdp_devmap_xmit 29 | xdp_exception 30 | xdp_redirect 31 | xdp_redirect_err 32 | xdp_redirect_map 33 | xdp_redirect_map_err 34 | #+end_src 35 | 36 | ** Tracepoint program section 37 | 38 | The bpf library expects the tracepoint eBPF program to be stored 39 | in a section with following name: 40 | 41 | #+begin_src C 42 | tracepoint// 43 | #+end_src 44 | 45 | where == is the tracepoint subsystem and == is 46 | the tracepoint name, which can be done with following construct: 47 | 48 | #+begin_src sh 49 | SEC("tracepoint/xdp/xdp_exception") 50 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) 51 | #+end_src 52 | 53 | ** Tracepoint arguments 54 | 55 | There's single program pointer argument which points 56 | to the structure, that defines the tracepoint fields. 57 | 58 | Like for xdp:xdp_exception tracepoint: 59 | 60 | #+begin_src C 61 | struct xdp_exception_ctx { 62 | __u64 __pad; // First 8 bytes are not accessible by bpf code 63 | __s32 prog_id; // offset:8; size:4; signed:1; 64 | __u32 act; // offset:12; size:4; signed:0; 65 | __s32 ifindex; // offset:16; size:4; signed:1; 66 | }; 67 | 68 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) 69 | #+end_src 70 | 71 | This struct is exported in tracepoint format file: 72 | 73 | #+begin_src C 74 | # cat /sys/kernel/debug/tracing/events/xdp/xdp_exception/format 75 | ... 76 | field:unsigned short common_type; offset:0; size:2; signed:0; 77 | field:unsigned char common_flags; offset:2; size:1; signed:0; 78 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; 79 | field:int common_pid; offset:4; size:4; signed:1; 80 | 81 | field:int prog_id; offset:8; size:4; signed:1; 82 | field:u32 act; offset:12; size:4; signed:0; 83 | field:int ifindex; offset:16; size:4; signed:1; 84 | ... 85 | #+end_src 86 | 87 | ** Tracepoint attaching 88 | 89 | To load a tracepoint program for this example we use following bpf 90 | library helper functions: 91 | 92 | #+begin_src sh 93 | bpf_object__open_file(cfg->filename, NULL); 94 | #+end_src 95 | 96 | #+begin_src sh 97 | bpf_object__load(obj); 98 | #+end_src 99 | 100 | To attach the program to the tracepoint we need to create a tracepoint 101 | perf event and attach the eBPF program to it, using its file descriptor. 102 | Under the hood this function sets up the PERF_EVENT_IOC_SET_BPF ioctl call: 103 | 104 | #+begin_src sh 105 | bpf_program__attach_tracepoint(prog, "xdp", "xdp_exception"); 106 | #+end_src 107 | 108 | Please check trace_load_and_stats.c load_bpf_and_trace_attach function 109 | for all the details. 110 | 111 | * HASH map 112 | 113 | This example is using PERCPU HASH map, that stores number of aborted 114 | packets for interface 115 | #+begin_src C 116 | struct { 117 | __uint(type, BPF_MAP_TYPE_PERCPU_HASH); 118 | __type(key, __s32); 119 | __type(value, __u64); 120 | __uint(max_entries, 10); 121 | } xdp_stats_map SEC(".maps"); 122 | #+end_src 123 | 124 | The interface is similar to the ARRAY map except that we need to specifically 125 | create new element in the hash if it does not exist: 126 | 127 | #+begin_src C 128 | /* Lookup in kernel BPF-side returns pointer to actual data. */ 129 | valp = bpf_map_lookup_elem(&xdp_stats_map, &key); 130 | 131 | /* If there's no record for interface, we need to create one, 132 | * with number of packets == 1 133 | */ 134 | if (!valp) { 135 | __u64 one = 1; 136 | return bpf_map_update_elem(&xdp_stats_map, &key, &one, 0) ? 1 : 0; 137 | } 138 | 139 | (*valp)++; 140 | #+end_src 141 | 142 | Please check trace_prog_kern.c for the full code. 143 | 144 | * Assignments 145 | 146 | ** Assignment 1: Setting up your test lab 147 | 148 | In this lesson we will use the setup of the previous lesson: 149 | Basic02 - loading a program by name [[https://github.com/xdp-project/xdp-tutorial/tree/master/basic02-prog-by-name#assignment-2-add-xdp_abort-program]] 150 | 151 | and load XDP program from xdp_prog_kern.o that will abort every 152 | incoming packet: 153 | 154 | #+begin_src C 155 | SEC("xdp_abort") 156 | int xdp_drop_func(struct xdp_md *ctx) 157 | { 158 | return XDP_ABORTED; 159 | } 160 | #+end_src 161 | 162 | with xdp-loader: 163 | Assignment 2: Add xdp_abort program [[https://github.com/xdp-project/xdp-tutorial/tree/master/basic02-prog-by-name#assignment-2-add-xdp_abort-program]] 164 | 165 | Setup the environment: 166 | 167 | #+begin_src sh 168 | $ sudo ../testenv/testenv.sh setup --name veth-basic02 169 | #+end_src 170 | 171 | Load the XDP program, that produces aborted packets: 172 | 173 | #+begin_src sh 174 | $ sudo xdp-loader load veth-basic02 xdp_prog_kern.o -n xdp_drop_func 175 | #+end_src 176 | 177 | and generate some packets: 178 | 179 | #+begin_src sh 180 | $ sudo ../testenv/testenv.sh enter --name veth-basic02 181 | # ping fc00:dead:cafe:1::1 182 | PING fc00:dead:cafe:1::1(fc00:dead:cafe:1::1) 56 data bytes 183 | #+end_src 184 | 185 | ** Assignment 2: Load tracepoint monitor program 186 | 187 | Now when you run the trace_load_and_stats application it will 188 | load and attach the tracepoint eBPF program and display number 189 | of aborted packets per interface: 190 | 191 | #+begin_src sh 192 | # ./trace_load_and_stats 193 | Success: Loaded BPF-object(trace_prog_kern.o) 194 | 195 | Collecting stats from BPF map 196 | - BPF map (bpf_map_type:1) id:46 name:xdp_stats_map key_size:4 value_size:4 max_entries:10 197 | 198 | veth-basic02 (2) 199 | veth-basic02 (4) 200 | veth-basic02 (6) 201 | ... 202 | #+end_src 203 | -------------------------------------------------------------------------------- /tracing01-xdp-simple/trace_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | struct { 6 | __uint(type, BPF_MAP_TYPE_PERCPU_HASH); 7 | __type(key, __s32); 8 | __type(value, __u64); 9 | __uint(max_entries, 10); 10 | } xdp_stats_map SEC(".maps"); 11 | 12 | struct xdp_exception_ctx { 13 | __u64 __pad; // First 8 bytes are not accessible by bpf code 14 | __s32 prog_id; // offset:8; size:4; signed:1; 15 | __u32 act; // offset:12; size:4; signed:0; 16 | __s32 ifindex; // offset:16; size:4; signed:1; 17 | }; 18 | 19 | SEC("tracepoint/xdp/xdp_exception") 20 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) 21 | { 22 | __s32 key = ctx->ifindex; 23 | __u32 *valp; 24 | 25 | /* Collecting stats only for XDP_ABORTED action. */ 26 | if (ctx->act != XDP_ABORTED) 27 | return 0; 28 | 29 | /* Lookup in kernel BPF-side returns pointer to actual data. */ 30 | valp = bpf_map_lookup_elem(&xdp_stats_map, &key); 31 | 32 | /* If there's no record for interface, we need to create one, 33 | * with number of packets == 1 34 | */ 35 | if (!valp) { 36 | __u64 one = 1; 37 | return bpf_map_update_elem(&xdp_stats_map, &key, &one, 0) ? 1 : 0; 38 | } 39 | 40 | (*valp)++; 41 | return 0; 42 | } 43 | 44 | char _license[] SEC("license") = "GPL"; 45 | -------------------------------------------------------------------------------- /tracing01-xdp-simple/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | 5 | SEC("xdp") 6 | int xdp_drop_func(struct xdp_md *ctx) 7 | { 8 | return XDP_ABORTED; 9 | } 10 | 11 | char _license[] SEC("license") = "GPL"; 12 | -------------------------------------------------------------------------------- /tracing02-xdp-monitor/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | # Departing from the implicit _user.c scheme 4 | XDP_TARGETS := trace_prog_kern 5 | USER_TARGETS := trace_load_and_stats 6 | 7 | COMMON_DIR := ../common 8 | 9 | COMMON_OBJS := $(COMMON_DIR)/common_user_bpf_xdp.o 10 | 11 | include $(COMMON_DIR)/common.mk 12 | 13 | -------------------------------------------------------------------------------- /tracing02-xdp-monitor/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Tracing02 - monitor xdp tracepoints 3 | #+OPTIONS: ^:nil 4 | 5 | In this lesson we will show how to attach to and monitor all 6 | xdp related tracepoints and some related info to user space 7 | stat application. 8 | 9 | * Table of Contents :TOC: 10 | - [[#tracepoints][Tracepoints]] 11 | - [[#assignments][Assignments]] 12 | - [[#assignment-1-monitor-all-xdp-tracepoints][Assignment 1: Monitor all xdp tracepoints]] 13 | - [[#alternative-solutions][Alternative solutions]] 14 | - [[#bpftrace][bpftrace]] 15 | - [[#perf-record][perf record]] 16 | 17 | * Tracepoints 18 | 19 | Tracepoints are useful for debugging XDP, especially for XDP_REDIRECT. 20 | 21 | To gain performance XDP_REDIRECT does RX-bulking towards destinations, which 22 | unfortunately means that XDP-prog doesn't get errors directly returned 23 | through the BPF-helper call =bpf_redirect()= (or =bpf_redirect_map=). 24 | Instead these errors can be debugged via using the XDP tracepoint available 25 | in the kernel. 26 | 27 | The bpf library expects the tracepoint eBPF program to be stored 28 | in a section with following name: 29 | 30 | #+begin_example sh 31 | tracepoint// 32 | #+end_example 33 | 34 | where == is the tracepoint subsystem and == is 35 | the tracepoint name, which can be done with following construct: 36 | 37 | #+begin_example sh 38 | SEC("tracepoint/xdp/xdp_exception") 39 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) 40 | #+end_example 41 | 42 | Via the libbpf library =open= and =load= the bpf_object the usual way. E.g. 43 | 44 | #+begin_src C 45 | obj = bpf_object__open_file(cfg->filename, NULL) 46 | bpf_object__load(obj); 47 | #+end_src 48 | 49 | You can then iterate through all the programs and attach 50 | every program to the tracepoint: 51 | 52 | #+begin_src C 53 | bpf_object__for_each_program(prog, obj) { 54 | ... 55 | tp_link = bpf_program__attach_tracepoint(prog, "xdp", tp); 56 | err = libbpf_get_error(tp_link); 57 | ... 58 | } 59 | #+end_src 60 | 61 | for more details please check load_bpf_and_trace_attach function 62 | in [[file:trace_load_and_stats.c]] object. 63 | 64 | * Assignments 65 | 66 | ** Assignment 1: Monitor all xdp tracepoints 67 | 68 | #+begin_example sh 69 | $ sudo ./trace_load_and_stats 70 | XDP-event CPU:to pps drop-pps extra-info 71 | XDP_REDIRECT total 0 0 Success 72 | XDP_REDIRECT total 0 0 Error 73 | Exception 0 0 11 XDP_UNKNOWN 74 | Exception 1 0 2 XDP_UNKNOWN 75 | Exception 2 0 36 XDP_UNKNOWN 76 | Exception 3 0 29 XDP_UNKNOWN 77 | Exception 4 0 3 XDP_UNKNOWN 78 | Exception 5 0 8 XDP_UNKNOWN 79 | Exception total 0 91 XDP_UNKNOWN 80 | cpumap-kthread total 0 0 0 81 | devmap-xmit total 0 0 0.00 82 | #+end_example 83 | 84 | * Alternative solutions 85 | 86 | ** bpftrace 87 | 88 | The bpftrace tool is easy to construct an oneliner that can capture and 89 | e.g. count the events of a given tracepoint. E.g. attaching to all XDP 90 | tracepoints and counting them: 91 | 92 | #+begin_example sh 93 | sudo bpftrace -e 'tracepoint:xdp:* { @cnt[probe] = count(); }' 94 | Attaching 12 probes... 95 | ^C 96 | 97 | @cnt[tracepoint:xdp:mem_connect]: 18 98 | @cnt[tracepoint:xdp:mem_disconnect]: 18 99 | @cnt[tracepoint:xdp:xdp_exception]: 19605 100 | @cnt[tracepoint:xdp:xdp_devmap_xmit]: 1393604 101 | @cnt[tracepoint:xdp:xdp_redirect]: 22292200 102 | #+end_example 103 | 104 | To extract the "ERRNO" being return as part of the =err= parameter, this 105 | bpftrace oneliner can be useful: 106 | 107 | #+begin_example sh 108 | sudo bpftrace -e \ 109 | 'tracepoint:xdp:xdp_redirect*_err {@redir_errno[-args->err] = count();} 110 | tracepoint:xdp:xdp_devmap_xmit {@devmap_errno[-args->err] = count();}' 111 | #+end_example 112 | 113 | ** perf record 114 | 115 | The perf tool also supports recording tracepoints of the box: 116 | 117 | #+begin_src sh 118 | perf record -a -e xdp:xdp_redirect_err \ 119 | -e xdp:xdp_redirect_map_err \ 120 | -e xdp:xdp_exception \ 121 | -e xdp:xdp_devmap_xmit 122 | #+end_src 123 | -------------------------------------------------------------------------------- /tracing02-xdp-monitor/trace_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | #include 5 | 6 | struct { 7 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 8 | __type(key, __u32); 9 | __type(value, __u64); 10 | __uint(max_entries, 2); 11 | } redirect_err_cnt SEC(".maps"); 12 | 13 | #define XDP_UNKNOWN XDP_REDIRECT + 1 14 | struct { 15 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 16 | __type(key, __u32); 17 | __type(value, __u64); 18 | __uint(max_entries, 6); 19 | } exception_cnt SEC(".maps"); 20 | 21 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format 22 | * Code in: kernel/include/trace/events/xdp.h 23 | */ 24 | struct xdp_redirect_ctx { 25 | __u64 pad; 26 | int prog_id; // offset: 0; size:4; signed:1; 27 | __u32 act; // offset: 4 size:4; signed:0; 28 | int ifindex; // offset: 8 size:4; signed:1; 29 | int err; // offset:12 size:4; signed:1; 30 | int to_ifindex; // offset:16 size:4; signed:1; 31 | __u32 map_id; // offset:20 size:4; signed:0; 32 | int map_index; // offset:24 size:4; signed:1; 33 | }; // offset:28 34 | 35 | enum { 36 | XDP_REDIRECT_SUCCESS = 0, 37 | XDP_REDIRECT_ERROR = 1 38 | }; 39 | 40 | static __always_inline 41 | int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) 42 | { 43 | __u32 key = XDP_REDIRECT_ERROR; 44 | int err = ctx->err; 45 | __u64 *cnt; 46 | 47 | if (!err) 48 | key = XDP_REDIRECT_SUCCESS; 49 | 50 | cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key); 51 | if (!cnt) 52 | return 1; 53 | *cnt += 1; 54 | 55 | return 0; /* Indicate event was filtered (no further processing)*/ 56 | /* 57 | * Returning 1 here would allow e.g. a perf-record tracepoint 58 | * to see and record these events, but it doesn't work well 59 | * in-practice as stopping perf-record also unload this 60 | * bpf_prog. Plus, there is additional overhead of doing so. 61 | */ 62 | } 63 | 64 | SEC("tracepoint/xdp/xdp_redirect_err") 65 | int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) 66 | { 67 | return xdp_redirect_collect_stat(ctx); 68 | } 69 | 70 | SEC("tracepoint/xdp/xdp_redirect_map_err") 71 | int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) 72 | { 73 | return xdp_redirect_collect_stat(ctx); 74 | } 75 | 76 | /* Likely unloaded when prog starts */ 77 | SEC("tracepoint/xdp/xdp_redirect") 78 | int trace_xdp_redirect(struct xdp_redirect_ctx *ctx) 79 | { 80 | return xdp_redirect_collect_stat(ctx); 81 | } 82 | 83 | /* Likely unloaded when prog starts */ 84 | SEC("tracepoint/xdp/xdp_redirect_map") 85 | int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx) 86 | { 87 | return xdp_redirect_collect_stat(ctx); 88 | } 89 | 90 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format 91 | * Code in: kernel/include/trace/events/xdp.h 92 | */ 93 | struct xdp_exception_ctx { 94 | __u64 pad; 95 | int prog_id; // offset:0; size:4; signed:1; 96 | __u32 act; // offset:4; size:4; signed:0; 97 | int ifindex; // offset:8; size:4; signed:1; 98 | }; 99 | 100 | SEC("tracepoint/xdp/xdp_exception") 101 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) 102 | { 103 | __u64 *cnt; 104 | __u32 key; 105 | 106 | key = ctx->act; 107 | if (key > XDP_REDIRECT) 108 | key = XDP_UNKNOWN; 109 | 110 | cnt = bpf_map_lookup_elem(&exception_cnt, &key); 111 | if (!cnt) 112 | return 1; 113 | *cnt += 1; 114 | 115 | return 0; 116 | } 117 | 118 | /* Common stats data record shared with _user.c */ 119 | struct datarec { 120 | __u64 processed; 121 | __u64 dropped; 122 | __u64 info; 123 | __u64 err; 124 | }; 125 | #define MAX_CPUS 64 126 | 127 | struct { 128 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 129 | __type(key, __u32); 130 | __type(value, struct datarec); 131 | __uint(max_entries, MAX_CPUS); 132 | } cpumap_enqueue_cnt SEC(".maps"); 133 | 134 | struct { 135 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 136 | __type(key, __u32); 137 | __type(value, struct datarec); 138 | __uint(max_entries, 1); 139 | } cpumap_kthread_cnt SEC(".maps"); 140 | 141 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format 142 | * Code in: kernel/include/trace/events/xdp.h 143 | */ 144 | struct cpumap_enqueue_ctx { 145 | __u64 pad; 146 | int map_id; // offset: 0; size:4; signed:1; 147 | __u32 act; // offset: 4; size:4; signed:0; 148 | int cpu; // offset: 8; size:4; signed:1; 149 | unsigned int drops; // offset:12; size:4; signed:0; 150 | unsigned int processed; // offset:16; size:4; signed:0; 151 | int to_cpu; // offset:20; size:4; signed:1; 152 | }; 153 | 154 | SEC("tracepoint/xdp/xdp_cpumap_enqueue") 155 | int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) 156 | { 157 | __u32 to_cpu = ctx->to_cpu; 158 | struct datarec *rec; 159 | 160 | if (to_cpu >= MAX_CPUS) 161 | return 1; 162 | 163 | rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); 164 | if (!rec) 165 | return 0; 166 | rec->processed += ctx->processed; 167 | rec->dropped += ctx->drops; 168 | 169 | /* Record bulk events, then userspace can calc average bulk size */ 170 | if (ctx->processed > 0) 171 | rec->info += 1; 172 | 173 | return 0; 174 | } 175 | 176 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format 177 | * Code in: kernel/include/trace/events/xdp.h 178 | */ 179 | struct cpumap_kthread_ctx { 180 | __u64 pad; 181 | int map_id; // offset: 0; size:4; signed:1; 182 | __u32 act; // offset: 4; size:4; signed:0; 183 | int cpu; // offset: 8; size:4; signed:1; 184 | unsigned int drops; // offset:12; size:4; signed:0; 185 | unsigned int processed; // offset:16; size:4; signed:0; 186 | int sched; // offset:20; size:4; signed:1; 187 | }; 188 | 189 | SEC("tracepoint/xdp/xdp_cpumap_kthread") 190 | int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) 191 | { 192 | struct datarec *rec; 193 | __u32 key = 0; 194 | 195 | rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); 196 | if (!rec) 197 | return 0; 198 | rec->processed += ctx->processed; 199 | rec->dropped += ctx->drops; 200 | 201 | /* Count times kthread yielded CPU via schedule call */ 202 | if (ctx->sched) 203 | rec->info++; 204 | 205 | return 0; 206 | } 207 | struct { 208 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 209 | __type(key, __u32); 210 | __type(value, struct datarec); 211 | __uint(max_entries, 1); 212 | } devmap_xmit_cnt SEC(".maps"); 213 | 214 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format 215 | * Code in: kernel/include/trace/events/xdp.h 216 | */ 217 | struct devmap_xmit_ctx { 218 | __u64 pad; 219 | int from_ifindex; // offset: 0; size:4; signed:1; 220 | __u32 act; // offset: 4; size:4; signed:0; 221 | int to_ifindex; // offset: 8; size:4; signed:1; 222 | int drops; // offset:12; size:4; signed:1; 223 | int sent; // offset:16; size:4; signed:1; 224 | int err; // offset:28; size:4; signed:1; 225 | }; 226 | 227 | SEC("tracepoint/xdp/xdp_devmap_xmit") 228 | int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx) 229 | { 230 | struct datarec *rec; 231 | __u32 key = 0; 232 | 233 | rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key); 234 | if (!rec) 235 | return 0; 236 | rec->processed += ctx->sent; 237 | rec->dropped += ctx->drops; 238 | 239 | /* Record bulk events, then userspace can calc average bulk size */ 240 | rec->info += 1; 241 | 242 | /* Record error cases, where no frame were sent */ 243 | if (ctx->err) 244 | rec->err++; 245 | 246 | /* Catch API error of drv ndo_xdp_xmit sent more than count */ 247 | if (ctx->drops < 0) 248 | rec->err++; 249 | 250 | return 1; 251 | } 252 | -------------------------------------------------------------------------------- /tracing03-xdp-debug-print/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_prog_kern 4 | USER_TARGETS := trace_read 5 | 6 | COPY_LOADER := xdp-loader 7 | 8 | LLC ?= llc 9 | CLANG ?= clang 10 | CC := gcc 11 | 12 | COMMON_DIR := ../common 13 | 14 | include $(COMMON_DIR)/common.mk 15 | -------------------------------------------------------------------------------- /tracing03-xdp-debug-print/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Tracing03 - debug print 3 | #+OPTIONS: ^:nil 4 | 5 | In this lesson we will show how to print message from eBPF program 6 | into tracefs buffer. 7 | 8 | * Table of Contents :TOC: 9 | - [[#ebpf-trace-printk-helper][eBPF trace printk helper]] 10 | - [[#the-tracefs-pipe-reader][The tracefs pipe reader]] 11 | - [[#assignments][Assignments]] 12 | - [[#assignment-1-setting-up-your-test-lab][Assignment 1: Setting up your test lab]] 13 | - [[#assignment-2-run-debug-code][Assignment 2: Run debug code]] 14 | 15 | * eBPF trace printk helper 16 | 17 | The bpf_trace_print helper function is very useful when debugging or 18 | when there's need for immediate feedback from the eBPF program. 19 | 20 | It offers limited trace_printk capability and basically stores message 21 | into the tracefs buffer. 22 | 23 | The bpf_trace_printk interface is: 24 | 25 | #+begin_example sh 26 | #define __bpf_printk(fmt, ...) \ 27 | ({ \ 28 | BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \ 29 | bpf_trace_printk(____fmt, sizeof(____fmt), \ 30 | ##__VA_ARGS__); \ 31 | }) 32 | 33 | /* 34 | * __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments 35 | * instead of an array of u64. 36 | */ 37 | #define __bpf_vprintk(fmt, args...) \ 38 | ({ \ 39 | static const char ___fmt[] = fmt; \ 40 | unsigned long long ___param[___bpf_narg(args)]; \ 41 | \ 42 | _Pragma("GCC diagnostic push") \ 43 | _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 44 | ___bpf_fill(___param, args); \ 45 | _Pragma("GCC diagnostic pop") \ 46 | \ 47 | bpf_trace_vprintk(___fmt, sizeof(___fmt), \ 48 | ___param, sizeof(___param)); \ 49 | }) 50 | 51 | /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args 52 | * Otherwise use __bpf_vprintk 53 | */ 54 | #define ___bpf_pick_printk(...) \ 55 | ___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ 56 | __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \ 57 | __bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\ 58 | __bpf_printk /*1*/, __bpf_printk /*0*/) 59 | 60 | /* Helper macro to print out debug messages */ 61 | #define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args) 62 | #+end_example 63 | 64 | Because the above interface requires to put the size of the format 65 | string it's more convenient to use =bpf_printk(fmt, args...)= helper: 66 | 67 | #+begin_example sh 68 | SEC("xdp") 69 | int xdp_prog_simple(struct xdp_md *ctx) 70 | { 71 | bpf_printk("..."); 72 | return XDP_PASS; 73 | } 74 | #+end_example 75 | 76 | * The tracefs pipe reader 77 | 78 | To retrieve the message printed by bpf_trace_printk, you can either 79 | read tracefs buffer directly: 80 | 81 | #+begin_example sh 82 | $ sudo cat /sys/kernel/debug/tracing/trace_pipe 83 | #+end_example 84 | 85 | Or you can use standard C file-reading/parsing code to get the data: 86 | 87 | #+begin_example sh 88 | stream = fopen(TRACEFS_PIPE, "r"); 89 | 90 | ... 91 | 92 | while ((nread = getline(&line, &len, stream)) != -1) { 93 | #+end_example 94 | 95 | for more details please check on trace_read.c file. 96 | 97 | * Assignments 98 | 99 | ** Assignment 1: Setting up your test lab 100 | 101 | In this lesson we will use the setup of the previous lesson: 102 | Basic02 - loading a program by name [[https://github.com/xdp-project/xdp-tutorial/tree/master/basic02-prog-by-name#assignment-2-add-xdp_abort-program]] 103 | 104 | Setup the environment: 105 | 106 | #+begin_example sh 107 | $ sudo ../testenv/testenv.sh setup --name veth-basic02 108 | #+end_example 109 | 110 | Load XDP program from xdp_prog_kern.o that will print 111 | ethernet header on every incoming packet: 112 | 113 | #+begin_example sh 114 | $ sudo xdp-loader load veth-basic02 xdp_prog_kern.o -s xdp 115 | #+end_example 116 | 117 | and generate some packets: 118 | 119 | #+begin_example sh 120 | $ sudo ../testenv/testenv.sh enter --name veth-basic02 121 | # ping fc00:dead:cafe:1::1 122 | PING fc00:dead:cafe:1::1(fc00:dead:cafe:1::1) 56 data bytes 123 | #+end_example 124 | 125 | ** Assignment 2: Run debug code 126 | 127 | #+begin_example sh 128 | bpf_printk("src: %llu, dst: %llu, proto: %u\n", 129 | ether_addr_to_u64(eth->h_source), 130 | ether_addr_to_u64(eth->h_dest), 131 | bpf_ntohs(eth->h_proto)); 132 | #+end_example 133 | 134 | You can monitor the message either via tracefs: 135 | 136 | #+begin_example sh 137 | $ sudo cat /sys/kernel/debug/tracing/trace_pipe 138 | ping-28172 [001] ..s1 155229.100016: 0: src: 99726513069783, dst: 63819112930922, proto: 56710 139 | ping-28172 [001] ..s1 155230.124054: 0: src: 99726513069783, dst: 63819112930922, proto: 56710 140 | ping-28172 [001] ..s1 155231.148018: 0: src: 99726513069783, dst: 63819112930922, proto: 56710 141 | ping-28172 [001] ..s1 155232.172022: 0: src: 99726513069783, dst: 63819112930922, proto: 56710 142 | #+end_example 143 | 144 | or with the trace_read application: 145 | 146 | #+begin_example sh 147 | $ sudo ./trace_read 148 | src: 5a:b3:63:62:de:d7 dst: 3a:b:b:8e:5e:6a proto: 56710 149 | src: 5a:b3:63:62:de:d7 dst: 3a:b:b:8e:5e:6a proto: 56710 150 | src: 5a:b3:63:62:de:d7 dst: 3a:b:b:8e:5e:6a proto: 56710 151 | ... 152 | #+end_example 153 | -------------------------------------------------------------------------------- /tracing03-xdp-debug-print/trace_read.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | 3 | #define _GNU_SOURCE 4 | #include 5 | #include 6 | #include 7 | #include "../common/common_defines.h" 8 | #include 9 | 10 | #define TRACEFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" 11 | 12 | #ifndef PATH_MAX 13 | #define PATH_MAX 4096 14 | #endif 15 | 16 | static void print_ether_addr(const char *type, char *str) 17 | { 18 | __u64 addr; 19 | 20 | if (1 != sscanf(str, "%llu", &addr)) 21 | return; 22 | 23 | printf("%s: %s ", type, ether_ntoa((struct ether_addr *) &addr)); 24 | } 25 | 26 | int main(int argc, char **argv) 27 | { 28 | FILE *stream; 29 | char *line = NULL; 30 | size_t len = 0; 31 | ssize_t nread; 32 | 33 | stream = fopen(TRACEFS_PIPE, "r"); 34 | if (stream == NULL) { 35 | perror("fopen"); 36 | exit(EXIT_FAILURE); 37 | } 38 | 39 | 40 | while ((nread = getline(&line, &len, stream)) != -1) { 41 | char *tok, *saveptr; 42 | unsigned int proto; 43 | 44 | tok = strtok_r(line, " ", &saveptr); 45 | 46 | while (tok) { 47 | if (!strncmp(tok, "src:", 4)) { 48 | tok = strtok_r(NULL, " ", &saveptr); 49 | print_ether_addr("src", tok); 50 | } 51 | 52 | if (!strncmp(tok, "dst:", 4)) { 53 | tok = strtok_r(NULL, " ", &saveptr); 54 | print_ether_addr("dst", tok); 55 | } 56 | 57 | if (!strncmp(tok, "proto:", 5)) { 58 | tok = strtok_r(NULL, " ", &saveptr); 59 | if (1 == sscanf(tok, "%u", &proto)) 60 | printf("proto: %u", proto); 61 | } 62 | tok = strtok_r(NULL, " ", &saveptr); 63 | } 64 | 65 | printf("\n"); 66 | } 67 | 68 | free(line); 69 | fclose(stream); 70 | return EXIT_OK; 71 | } 72 | -------------------------------------------------------------------------------- /tracing03-xdp-debug-print/xdp_prog_kern.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /* to u64 in host order */ 13 | static inline __u64 ether_addr_to_u64(const __u8 *addr) 14 | { 15 | __u64 u = 0; 16 | int i; 17 | 18 | for (i = ETH_ALEN - 1; i >= 0; i--) 19 | u = u << 8 | addr[i]; 20 | return u; 21 | } 22 | 23 | SEC("xdp") 24 | int xdp_prog_simple(struct xdp_md *ctx) 25 | { 26 | void *data = (void *)(long)ctx->data; 27 | void *data_end = (void *)(long)ctx->data_end; 28 | struct ethhdr *eth = data; 29 | __u64 offset = sizeof(*eth); 30 | 31 | if ((void *)eth + offset > data_end) 32 | return 0; 33 | 34 | bpf_printk("src: %llu, dst: %llu, proto: %u\n", 35 | ether_addr_to_u64(eth->h_source), 36 | ether_addr_to_u64(eth->h_dest), 37 | bpf_ntohs(eth->h_proto)); 38 | 39 | return XDP_PASS; 40 | } 41 | 42 | char _license[] SEC("license") = "GPL"; 43 | -------------------------------------------------------------------------------- /tracing04-xdp-tcpdump/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) 2 | 3 | XDP_TARGETS := xdp_sample_pkts_kern 4 | USER_TARGETS := xdp_sample_pkts_user 5 | LDLIBS+=-lpcap 6 | 7 | COMMON_DIR = ../common 8 | 9 | include $(COMMON_DIR)/common.mk 10 | -------------------------------------------------------------------------------- /tracing04-xdp-tcpdump/README.org: -------------------------------------------------------------------------------- 1 | # -*- fill-column: 76; -*- 2 | #+TITLE: Tutorial: Tracing04 - tcpdump 3 | #+OPTIONS: ^:nil 4 | 5 | In this lesson we will show how to dump the packet samples 6 | from XDP program all the way to the pcap dump file. 7 | 8 | 9 | * Table of Contents :TOC: 10 | - [[#dump-the-packet-sample][Dump the packet sample]] 11 | - [[#assignments][Assignments]] 12 | - [[#assignment-1-setting-up-your-test-lab][Assignment 1: Setting up your test lab]] 13 | - [[#assignment-2-the-pcap-dump-file][Assignment 2: The PCAP dump file]] 14 | 15 | * Dump the packet sample 16 | 17 | In this example we will show how to send data and packet sample 18 | into user space via perf event. 19 | 20 | First you need to define the event map, which will allow you 21 | to send events to the user space via perf event ring buffer: 22 | 23 | #+begin_example sh 24 | struct { 25 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 26 | __type(key, int); 27 | __type(value, struct S); 28 | __uint(max_entries, MAX_CPUS); 29 | } my_map SEC(".maps"); 30 | #+end_example 31 | 32 | The =value= determines the type of the event data we will 33 | be posting to the user space through perf event ring buffer. 34 | In our case it's =struct S=: 35 | 36 | #+begin_example sh 37 | struct S { 38 | __u16 cookie; 39 | __u16 pkt_len; 40 | } __packed; 41 | #+end_example 42 | 43 | We set the values of the event (=metadata= variable) and pass them 44 | into the =bpf_perf_event_output= call: 45 | 46 | #+begin_example sh 47 | int xdp_sample_prog(struct xdp_md *ctx) 48 | { 49 | void *data_end = (void *)(long)ctx->data_end; 50 | void *data = (void *)(long)ctx->data; 51 | 52 | ... 53 | 54 | __u64 flags = BPF_F_CURRENT_CPU; 55 | __u16 sample_size = (__u16)(data_end - data); 56 | struct S metadata; 57 | 58 | metadata.cookie = 0xdead; 59 | metadata.pkt_len = min(sample_size, SAMPLE_SIZE); 60 | 61 | ret = bpf_perf_event_output(ctx, &my_map, flags, 62 | &metadata, sizeof(metadata)); 63 | ... 64 | #+end_example 65 | 66 | To add the actual packet dump to the event, we can 67 | set =flags= upper 32 bits with the size of the requested sample 68 | and the =bpf_perf_event_output= will attach the specified 69 | amount of bytes from packet to the perf event: 70 | 71 | 72 | #+begin_example sh 73 | __u64 flags = BPF_F_CURRENT_CPU; 74 | 75 | flags |= (__u64)sample_size << 32; 76 | 77 | ret = bpf_perf_event_output(ctx, &my_map, flags, 78 | &metadata, sizeof(metadata)); 79 | #+end_example 80 | 81 | Please check the whole eBPF code in =xdp_sample_pkts_kern.c= file. 82 | 83 | * Assignments 84 | 85 | ** Assignment 1: Setting up your test lab 86 | 87 | In this lesson we will use the setup of the previous lesson: 88 | Basic02 - loading a program by name [[https://github.com/xdp-project/xdp-tutorial/tree/master/basic02-prog-by-name#assignment-2-add-xdp_abort-program]] 89 | 90 | #+begin_example sh 91 | $ sudo ../testenv/testenv.sh setup --name veth-basic02 92 | #+end_example 93 | 94 | and make some packets: 95 | 96 | #+begin_example sh 97 | $ sudo ../testenv/testenv.sh enter --name veth-basic02 98 | # ping fc00:dead:cafe:1::1 99 | PING fc00:dead:cafe:1::1(fc00:dead:cafe:1::1) 56 data bytes 100 | #+end_example 101 | 102 | ** Assignment 2: The PCAP dump file 103 | 104 | Build the =xdp_sample_pkts_user= dump program; to do so you might have to 105 | install the =libpcap-dev= and the 32 bit libc dev packages. Load the eBPF 106 | kernel packets dump program and store the packets to the dump file: 107 | 108 | #+begin_example sh 109 | $ sudo ./xdp_sample_pkts_user -d veth-basic02 -F 110 | pkt len: 118 bytes. hdr: 76 58 28 55 df 4e fa e2 b6 27 8e 79 86 dd 60 0d 48 1b 00 40 3a 40 fc 00 de ad ca fe 00 ... 111 | pkt len: 118 bytes. hdr: 76 58 28 55 df 4e fa e2 b6 27 8e 79 86 dd 60 0d 48 1b 00 40 3a 40 fc 00 de ad ca fe 00 ... 112 | ^C 113 | 2 packet samples stored in samples.pcap 114 | #+end_example 115 | 116 | Check the pcap dump with the tcpdump application: 117 | #+begin_example sh 118 | $ tcpdump -r ./samples.pcap 119 | reading from file ./samples.pcap, link-type EN10MB (Ethernet) 120 | 12:12:04.553039 IP6 fc00:dead:cafe:1::2 > krava: ICMP6, echo request, seq 2177, length 64 121 | 12:12:05.576864 IP6 fc00:dead:cafe:1::2 > krava: ICMP6, echo request, seq 2178, length 64 122 | #+end_example 123 | -------------------------------------------------------------------------------- /tracing04-xdp-tcpdump/xdp_sample_pkts_kern.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | #include 3 | #include 4 | 5 | #define SAMPLE_SIZE 1024ul 6 | #define MAX_CPUS 128 7 | 8 | #ifndef __packed 9 | #define __packed __attribute__((packed)) 10 | #endif 11 | 12 | #define min(x, y) ((x) < (y) ? (x) : (y)) 13 | 14 | /* Metadata will be in the perf event before the packet data. */ 15 | struct S { 16 | __u16 cookie; 17 | __u16 pkt_len; 18 | } __packed; 19 | 20 | struct { 21 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 22 | __type(key, int); 23 | __type(value, struct S); 24 | __uint(max_entries, MAX_CPUS); 25 | } my_map SEC(".maps"); 26 | 27 | SEC("xdp") 28 | int xdp_sample_prog(struct xdp_md *ctx) 29 | { 30 | void *data_end = (void *)(long)ctx->data_end; 31 | void *data = (void *)(long)ctx->data; 32 | if (data < data_end) { 33 | /* The XDP perf_event_output handler will use the upper 32 bits 34 | * of the flags argument as a number of bytes to include of the 35 | * packet payload in the event data. If the size is too big, the 36 | * call to bpf_perf_event_output will fail and return -EFAULT. 37 | * 38 | * See bpf_xdp_event_output in net/core/filter.c. 39 | * 40 | * The BPF_F_CURRENT_CPU flag means that the event output fd 41 | * will be indexed by the CPU number in the event map. 42 | */ 43 | __u64 flags = BPF_F_CURRENT_CPU; 44 | __u16 sample_size = (__u16)(data_end - data); 45 | int ret; 46 | struct S metadata; 47 | 48 | metadata.cookie = 0xdead; 49 | metadata.pkt_len = min(sample_size, SAMPLE_SIZE); 50 | 51 | flags |= (__u64)sample_size << 32; 52 | 53 | ret = bpf_perf_event_output(ctx, &my_map, flags, 54 | &metadata, sizeof(metadata)); 55 | if (ret) 56 | bpf_printk("perf_event_output failed: %d\n", ret); 57 | } 58 | 59 | return XDP_PASS; 60 | } 61 | 62 | char _license[] SEC("license") = "GPL"; 63 | -------------------------------------------------------------------------------- /tracing04-xdp-tcpdump/xdp_sample_pkts_user.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | static const char *__doc__ = "XDP sample packet\n"; 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #define PCAP_DONT_INCLUDE_PCAP_BPF_H 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "../common/common_params.h" 32 | #include "../common/common_user_bpf_xdp.h" 33 | 34 | #ifndef __packed 35 | #define __packed __attribute__((packed)) 36 | #endif 37 | 38 | #define MAX_CPUS 128 39 | 40 | static pcap_t* pd; 41 | static pcap_dumper_t* pdumper; 42 | static unsigned int pcap_pkts; 43 | static struct config cfg = { 44 | .ifindex = -1, 45 | }; 46 | static struct xdp_program *prog; 47 | struct perf_buffer *pb; 48 | 49 | static const char *default_filename = "samples.pcap"; 50 | #define SAMPLE_SIZE 1024 51 | #define NANOSECS_PER_USEC 1000 52 | 53 | static inline int 54 | sys_perf_event_open(struct perf_event_attr *attr, 55 | pid_t pid, int cpu, int group_fd, 56 | unsigned long flags) 57 | { 58 | return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); 59 | } 60 | 61 | static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) 62 | { 63 | struct { 64 | __u16 cookie; 65 | __u16 pkt_len; 66 | __u8 pkt_data[SAMPLE_SIZE]; 67 | } __packed *e = data; 68 | struct pcap_pkthdr h = { 69 | .caplen = e->pkt_len, 70 | .len = e->pkt_len, 71 | }; 72 | struct timespec ts; 73 | int i, err; 74 | 75 | if (e->cookie != 0xdead) 76 | printf("BUG cookie %x sized %d\n", 77 | e->cookie, size); 78 | 79 | err = clock_gettime(CLOCK_MONOTONIC, &ts); 80 | if (err < 0) 81 | printf("Error with clock_gettime! (%i)\n", err); 82 | 83 | h.ts.tv_sec = ts.tv_sec; 84 | h.ts.tv_usec = ts.tv_nsec / NANOSECS_PER_USEC; 85 | 86 | if (verbose) { 87 | printf("pkt len: %-5d bytes. hdr: ", e->pkt_len); 88 | for (i = 0; i < e->pkt_len; i++) 89 | printf("%02x ", e->pkt_data[i]); 90 | printf("\n"); 91 | } 92 | 93 | pcap_dump((u_char *) pdumper, &h, e->pkt_data); 94 | pcap_pkts++; 95 | } 96 | 97 | static void sig_handler(int signo) 98 | { 99 | struct xdp_multiprog *mp = xdp_multiprog__get_from_ifindex(cfg.ifindex); 100 | enum xdp_attach_mode m = xdp_multiprog__attach_mode(mp); 101 | 102 | printf("\n Cleaning up..."); 103 | xdp_program__detach(prog, cfg.ifindex, m, 0); 104 | perf_buffer__free(pb); 105 | pcap_dump_close(pdumper); 106 | pcap_close(pd); 107 | printf("\n%u packet samples stored in %s\n", pcap_pkts, cfg.filename); 108 | exit(0); 109 | } 110 | 111 | static const struct option_wrapper long_options[] = { 112 | {{"help", no_argument, NULL, 'h' }, 113 | "Show help", false}, 114 | 115 | {{"force", no_argument, NULL, 'F' }, 116 | "Force install, replacing existing program on interface"}, 117 | 118 | {{"dev", required_argument, NULL, 'd' }, 119 | "Operate on device ", "", true}, 120 | 121 | {{"filename", required_argument, NULL, 1 }, 122 | "Store packet sample into ", ""}, 123 | 124 | {{"quiet", no_argument, NULL, 'q' }, 125 | "Quiet mode (no output)"}, 126 | 127 | {{0, 0, NULL, 0 }, NULL, false} 128 | }; 129 | 130 | int main(int argc, char **argv) 131 | { 132 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 133 | int map_fd; 134 | struct bpf_map *map; 135 | char filename[256]; 136 | int err; 137 | DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); 138 | DECLARE_LIBXDP_OPTS(xdp_program_opts, xdp_opts, 0); 139 | char progname[] = "xdp_sample_prog"; 140 | char errmsg[1024]; 141 | 142 | strncpy(cfg.filename, default_filename, sizeof(cfg.filename)); 143 | 144 | /* Cmdline options can change these */ 145 | parse_cmdline_args(argc, argv, long_options, &cfg, __doc__); 146 | 147 | /* Required option */ 148 | if (cfg.ifindex == -1) { 149 | fprintf(stderr, "ERR: required option --dev missing\n"); 150 | usage(argv[0], __doc__, long_options, (argc == 1)); 151 | return EXIT_FAIL_OPTION; 152 | } 153 | 154 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { 155 | perror("setrlimit(RLIMIT_MEMLOCK)"); 156 | return 1; 157 | } 158 | 159 | snprintf(filename, sizeof(filename), "xdp_sample_pkts_kern.o"); 160 | xdp_opts.open_filename = filename; 161 | xdp_opts.prog_name = progname; 162 | xdp_opts.opts = &opts; 163 | 164 | prog = xdp_program__create(&xdp_opts); 165 | err = libxdp_get_error(prog); 166 | if (err) { 167 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 168 | fprintf(stderr, "ERR: loading program: %s\n", errmsg); 169 | return err; 170 | } 171 | err = xdp_program__attach(prog, cfg.ifindex, cfg.attach_mode, 0); 172 | if (err) { 173 | libxdp_strerror(err, errmsg, sizeof(errmsg)); 174 | fprintf(stderr, "Couldn't attach XDP program on iface '%s' : %s (%d)\n", 175 | cfg.ifname, errmsg, err); 176 | return err; 177 | } 178 | 179 | map = bpf_object__next_map(xdp_program__bpf_obj(prog), NULL); 180 | if (!map) { 181 | fprintf(stderr, "finding a map in obj file failed\n"); 182 | return 1; 183 | } 184 | map_fd = bpf_map__fd(map); 185 | 186 | if (signal(SIGINT, sig_handler) || 187 | signal(SIGHUP, sig_handler) || 188 | signal(SIGTERM, sig_handler)) { 189 | fprintf(stderr, "signal"); 190 | return 1; 191 | } 192 | pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); 193 | err = libbpf_get_error(pb); 194 | if (err) { 195 | fprintf(stderr, "perf_buffer setup failed"); 196 | return 1; 197 | } 198 | 199 | pd = pcap_open_dead(DLT_EN10MB, 65535); 200 | if (!pd) { 201 | perf_buffer__free(pb); 202 | goto out; 203 | } 204 | 205 | pdumper = pcap_dump_open(pd, cfg.filename); 206 | if (!pdumper) { 207 | perf_buffer__free(pb); 208 | pcap_close(pd); 209 | goto out; 210 | } 211 | 212 | while ((err = perf_buffer__poll(pb, 1000)) >= 0) { 213 | } 214 | 215 | return 0; 216 | out: 217 | xdp_program__detach(prog, cfg.ifindex, cfg.attach_mode, 0); 218 | return -1; 219 | } 220 | --------------------------------------------------------------------------------