├── .gitignore ├── Makefile ├── README.md ├── configure ├── docs └── netdev-0x14-XDP-and-the-cloud.pdf ├── include ├── execsnoop.h ├── napi_poll.h ├── net_rx_action.h ├── opensnoop.h ├── ovslatency.h ├── pktdrop.h ├── pktlatency.h ├── sched_tp.h ├── tcp_probe.h ├── tools │ └── linux │ │ ├── compiler-gcc.h │ │ ├── compiler.h │ │ ├── export.h │ │ ├── kernel.h │ │ ├── list.h │ │ ├── poison.h │ │ ├── rbtree.h │ │ └── rbtree_augmented.h ├── tp_verify.h ├── uapi │ └── linux │ │ ├── bpf.h │ │ └── perf_event.h ├── vm_info.h ├── xdp_acl.h ├── xdp_devmap_xmit.h └── xdp_fdb.h ├── ksrc ├── Makefile ├── acl_simple.h ├── acl_vm_rx.c ├── acl_vm_tx.c ├── bpf_debug.h ├── channel_map.c ├── eth_helpers.h ├── execsnoop.c ├── execsnoop_legacy.c ├── flow.h ├── include │ └── asm_goto_workaround.h ├── ipv6_helpers.h ├── kvm-nested.c ├── napi_poll.c ├── net_rx_action.c ├── opensnoop.c ├── ovslatency.c ├── pktdrop.c ├── pktlatency.c ├── rx_acl.c ├── set_current_info.c ├── tcp_probe.c ├── xdp_devmap_xmit.c ├── xdp_dummy.c ├── xdp_l2fwd.c ├── xdp_l3fwd.c ├── xdp_vlan.h └── xdp_vmegress.c ├── libbpf └── usr │ ├── include │ └── bpf │ │ ├── bpf.h │ │ ├── bpf_core_read.h │ │ ├── bpf_endian.h │ │ ├── bpf_helper_defs.h │ │ ├── bpf_helpers.h │ │ ├── bpf_tracing.h │ │ ├── btf.h │ │ ├── libbpf.h │ │ ├── libbpf_common.h │ │ ├── libbpf_util.h │ │ └── xsk.h │ └── lib64 │ ├── libbpf.a │ └── pkgconfig │ └── libbpf.pc ├── scripts ├── l2fwd-demo.sh └── l2fwd-only.sh ├── src ├── Makefile ├── bpf_util.h ├── cgroup_sock.c ├── execsnoop.c ├── flow.h ├── kprobes.c ├── kprobes.h ├── ksyms.c ├── ksyms.h ├── kvm-nested.c ├── libbpf_helpers.c ├── libbpf_helpers.h ├── napi_poll.c ├── net_rx_action.c ├── netmon.c ├── opensnoop.c ├── ovslatency.c ├── parse_pkt.c ├── perf_events.c ├── perf_events.h ├── pktlatency.c ├── print_pkt.c ├── rbtree.c ├── str_utils.c ├── str_utils.h ├── tcp_probe.c ├── timestamps.c ├── timestamps.h ├── tp_verify.c ├── vm_info.c ├── xdp_acl_user.c ├── xdp_devmap_xmit.c ├── xdp_dummy_user.c ├── xdp_l2fwd_user.c └── xdp_l3fwd_user.c └── utils ├── Makefile ├── build-deb.sh └── socktest.c /.gitignore: -------------------------------------------------------------------------------- 1 | obj/ 2 | config.mk 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0 2 | 3 | SUBDIRS = ksrc src 4 | 5 | all: 6 | @for s in $(SUBDIRS); do \ 7 | make -C $$s $(BUILDDIR) all; \ 8 | done 9 | 10 | clean: 11 | @for s in $(SUBDIRS); do \ 12 | make -C $$s $(BUILDDIR) clean; \ 13 | done 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bpf-progs 2 | 3 | This repository is self contained now. It compiles cleanly on Ubuntu 4 | 18.04 for kernel versions 4.14, 4.15, 5.3 and 5.5+. 5 | ``` 6 | apt-get install clang llvm gcc make libelf-dev libpcap-dev 7 | 8 | install linux-headers for kernel version. 9 | ``` 10 | This code is structured to make the data collection as much of a template 11 | as possible, so new programs can copy-modify and focus on the analysis at 12 | hand as much as possible. 13 | 14 | ## netmon 15 | 16 | netmon is similar to dropwatch, but examines the packet headers and summarizes 17 | drops by a number of options: 18 | - source mac, 19 | - destination mac, 20 | - IPv4 source IP, 21 | - IPv4 destination IP, and 22 | - network namespaces. 23 | 24 | Network namespace support is best effort in determining the association. It has 25 | been used to look at drops for containers. 26 | 27 | ### show packet drops sorted by destination IP 28 | sudo src/obj/netmon -s dip 29 | 30 | ### show packet drops sorted by destination mac 31 | sudo src/obj/netmon -s dmac 32 | 33 | ### TO-DO: 34 | - support for drops at XDP layer 35 | 36 | ## pktlatency 37 | 38 | pktlatency is used to examine the overhead of the host networking stack in 39 | pushing packets to userspace. At the moment it is focused on virtual machines 40 | using tap devices and vhost. The program requires a NIC with PTP support 41 | (e.g., mlx5). I am very much new to PTP, so there very well could be some 42 | bugs here. 43 | 44 | It too was just renamed, from skblatency to pktlatency, in hopes of adding 45 | support for packets pushed to a VM using XDP redirect. 46 | 47 | ### example 48 | sudo src/obj/pktlatency 49 | 50 | ## ovslatency 51 | 52 | ovslatency measures the time to run ovs\_vport\_receive which is the primary 53 | workhorse for the OVS rx\_handler, netdev\_frame\_hook. 54 | 55 | ## execsnoop / opensnoop 56 | 57 | execsnoop and opensnoop are ebpf versions of what I previously would do using 58 | kernel modules. bcc's python version inspired me to do the deep dive on bpf 59 | attached to kprobes and tracepoints to get the same intent with ebpf. 60 | 61 | ### examples 62 | sudo src/obj/execsnoop 63 | sudo src/obj/opensnoop 64 | 65 | ## XDP L2 forwarding 66 | 67 | xdp\_l2fwd handles Layer 2 forwarding between an ingress device (e.g., host 68 | devices) and egress device (e.g., tap device for VMs). Userspace populates 69 | an FDB (hash map) with \ pairs returning an index into a device 70 | map which contains the device to receive the packet. See scripts/l2fwd-demo.sh 71 | for an example. 72 | 73 | This program is used for the netdev 0x14 tutorial, XDP and the cloud: Using 74 | XDP on hosts and VMs https://netdevconf.info/0x14/session.html?tutorial-XDP-and-the-cloud 75 | 76 | ## Dummy XDP program 77 | 78 | xdp\_dummy is a dummy XDP program that just returns XDP\_PASS. 79 | -------------------------------------------------------------------------------- /docs/netdev-0x14-XDP-and-the-cloud.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsahern/bpf-progs/e31fa1023bb9af1f977152a69fca5db9ca411f07/docs/netdev-0x14-XDP-and-the-cloud.pdf -------------------------------------------------------------------------------- /include/execsnoop.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _EXECSNOOP_H_ 3 | #define _EXECSNOOP_H_ 4 | 5 | #define ARGSIZE 128 6 | #define MAXARG 20 7 | #define TASK_COMM_LEN 16 8 | 9 | enum event_type { 10 | EVENT_START, 11 | EVENT_ARG, 12 | EVENT_RET, 13 | EVENT_EXIT, 14 | EVENT_MAX, 15 | }; 16 | 17 | struct data { 18 | __u64 time; 19 | __u32 tid; 20 | __u32 pid; 21 | __u32 ppid; 22 | __u16 event_type; 23 | __u16 cpu; 24 | int retval; 25 | char comm[TASK_COMM_LEN]; 26 | char arg[ARGSIZE]; 27 | }; 28 | 29 | /* order of arguments from 30 | * /sys/kernel/tracing/events/syscalls/sys_enter_execve/format 31 | * but skipping all of the common fields: 32 | 33 | field:int __syscall_nr; offset:8; size:4; signed:1; 34 | field:const char * filename; offset:16; size:8; signed:0; 35 | field:const char *const * argv; offset:24; size:8; signed:0; 36 | field:const char *const * envp; offset:32; size:8; signed:0; 37 | */ 38 | 39 | struct execve_enter_args { 40 | unsigned long long unused; 41 | 42 | int __syscall_nr; 43 | const char * filename; 44 | const char *const * argv; 45 | const char *const * envp; 46 | }; 47 | 48 | /* order of arguments from 49 | * /sys/kernel/tracing/events/syscalls/sys_exit_execve/format 50 | * but skipping all of the common fields: 51 | 52 | field:int __syscall_nr; offset:8; size:4; signed:1; 53 | field:long ret; offset:16; size:8; signed:1; 54 | */ 55 | 56 | struct execve_exit_args { 57 | unsigned long long unused; 58 | 59 | int __syscall_nr; 60 | long ret; 61 | }; 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /include/napi_poll.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _NET_NAPI_POLL_H_ 3 | #define _NET_NAPI_POLL_H_ 4 | 5 | #define NAPI_BUCKETS 9 6 | struct napi_poll_hist { 7 | __u64 buckets[NAPI_BUCKETS]; 8 | }; 9 | 10 | /* order of arguments from 11 | * /sys/kernel/debug/tracing/events/napi/napi_poll/format 12 | * but skipping all of the common fields: 13 | * 14 | field:struct napi_struct * napi; offset:8; size:8; signed:0; 15 | field:__data_loc char[] dev_name; offset:16; size:4; signed:1; 16 | field:int work; offset:20; size:4; signed:1; 17 | field:int budget; offset:24; size:4; signed:1; 18 | */ 19 | struct napi_poll_args { 20 | __u64 unused; 21 | 22 | void *napi; 23 | int data_loc_dev_name; 24 | int work; 25 | int budget; 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /include/net_rx_action.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _NET_RX_LATENCY_H_ 3 | #define _NET_RX_LATENCY_H_ 4 | 5 | #define NET_RX_BUCKET_0 5 6 | #define NET_RX_BUCKET_1 10 7 | #define NET_RX_BUCKET_2 25 8 | #define NET_RX_BUCKET_3 50 9 | #define NET_RX_BUCKET_4 100 10 | #define NET_RX_BUCKET_5 500 11 | #define NET_RX_BUCKET_6 1000 12 | #define NET_RX_BUCKET_7 2000 13 | #define NET_RX_BUCKET_8 5000 14 | 15 | /* bucket 9 is anything > than bucket 8 */ 16 | /* bucket 10 is errors */ 17 | 18 | #define NET_RX_NUM_BKTS 11 19 | #define NET_RX_ERR_BKT (NET_RX_NUM_BKTS-1) 20 | 21 | struct net_rx_hist_val { 22 | __u64 buckets[NET_RX_NUM_BKTS]; 23 | }; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /include/opensnoop.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _OPENSNOOP_H_ 3 | #define _OPENSNOOP_H_ 4 | 5 | #define ARGSIZE 128 6 | #define TASK_COMM_LEN 16 7 | 8 | 9 | enum event_type { 10 | EVENT_ARG, 11 | EVENT_RET, 12 | }; 13 | 14 | struct data { 15 | __u64 time; 16 | __u32 tid; 17 | __u32 pid; 18 | __u32 ppid; 19 | __u32 flags; 20 | __u32 mode; 21 | __u16 event_type; 22 | __u16 cpu; 23 | int retval; 24 | char comm[TASK_COMM_LEN]; 25 | char filename[ARGSIZE]; 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /include/ovslatency.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _OVSLATENCY_H_ 3 | #define _OVSLATENCY_H_ 4 | 5 | #define OVS_BUCKET_0 10 6 | #define OVS_BUCKET_1 25 7 | #define OVS_BUCKET_2 50 8 | #define OVS_BUCKET_3 100 9 | #define OVS_BUCKET_4 250 10 | #define OVS_BUCKET_5 500 11 | /* bucket 6 is > 5 */ 12 | /* bucket 7 is total number of packets */ 13 | 14 | struct ovslat_hist_val { 15 | __u64 buckets[8]; 16 | }; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /include/pktdrop.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _PKTDROP_H_ 3 | #define _PKTDROP_H_ 4 | 5 | enum event_type { 6 | EVENT_SAMPLE, 7 | EVENT_EXIT, 8 | }; 9 | 10 | struct data { 11 | __u64 time; 12 | __u64 location; 13 | __u64 netns; 14 | __u8 event_type; 15 | __u8 cpu; 16 | __u8 nr_frags; 17 | __u8 pkt_type; 18 | __u16 gso_size; 19 | __be16 protocol; 20 | __u32 reason; 21 | __u32 ifindex; 22 | __u16 vlan_tci; 23 | __be16 vlan_proto; 24 | __u32 pkt_len; 25 | __u8 pkt_data[64]; 26 | }; 27 | 28 | /* order of arguments from 29 | * /sys/kernel/tracing/events/skb/kfree_skb/format 30 | * common fields represented by 'unsigned long long unused;' 31 | 32 | field:void * skbaddr; offset:8; size:8; signed:0; 33 | field:void * location; offset:16; size:8; signed:0; 34 | field:unsigned short protocol; offset:24; size:2; signed:0; 35 | */ 36 | struct kfree_skb_args { 37 | unsigned long long unused; 38 | 39 | void *skbaddr; 40 | void *location; 41 | unsigned short protocol; 42 | unsigned short unused2; 43 | unsigned int reason; 44 | }; 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /include/pktlatency.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _PKTLATENCY_H_ 3 | #define _PKTLATENCY_H_ 4 | 5 | #define PKTLAT_BUCKET_0 25 6 | #define PKTLAT_BUCKET_1 50 7 | #define PKTLAT_BUCKET_2 75 8 | #define PKTLAT_BUCKET_3 100 9 | #define PKTLAT_BUCKET_4 250 10 | #define PKTLAT_BUCKET_5 500 11 | /* bucket 6 is > 5 12 | * bucket 7 is missing timestamps, 13 | * bucket 8 is running sum 14 | */ 15 | #define PKTLAT_MAX_BUCKETS 9 16 | 17 | struct pktlat_ctl { 18 | __u64 ptp_ref; 19 | __u64 mono_ref; 20 | int ifindex_min; /* used to ignore packets on eth0, eth1 */ 21 | __u32 latency_gen_sample; /* latency at which a sample is generated */ 22 | __u8 gen_samples; /* send samples to userspace as well as histogram */ 23 | }; 24 | 25 | struct pktlat_hist_key { 26 | __u32 pid; 27 | }; 28 | 29 | struct pktlat_hist_val { 30 | __u64 buckets[PKTLAT_MAX_BUCKETS]; 31 | }; 32 | 33 | enum event_type { 34 | EVENT_SAMPLE, 35 | EVENT_EXIT, 36 | }; 37 | 38 | struct data { 39 | __u64 time; 40 | __s64 tstamp; 41 | __u32 ifindex; 42 | __u32 pkt_len; 43 | __u32 pid; 44 | __u8 event_type; 45 | __u8 cpu; 46 | __be16 protocol; 47 | __u8 pkt_data[64]; 48 | }; 49 | 50 | /* order of arguments from 51 | * /sys/kernel/tracing/events/skb/skb_copy_datagram_iovec/format 52 | * common fields represented by 'unsigned long long unused;' 53 | 54 | field:const void * skbaddr; offset:8; size:8; signed:0; 55 | field:int len; offset:16; size:4; signed:1; 56 | */ 57 | struct skb_dg_iov_args { 58 | unsigned long long unused; 59 | 60 | void *skbaddr; 61 | int len; 62 | }; 63 | 64 | /* order of arguments from 65 | * /sys/kernel/tracing/events/sched/sched_process_exit/format 66 | * common fields represented by 'unsigned long long unused;' 67 | 68 | field:char comm[16]; offset:8; size:16; signed:1; 69 | field:pid_t pid; offset:24; size:4; signed:1; 70 | field:int prio; offset:28; size:4; signed:1; 71 | */ 72 | struct sched_exit_args { 73 | unsigned long long unused; 74 | 75 | char comm[16]; 76 | pid_t pid; 77 | int prio; 78 | }; 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /include/sched_tp.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _SCHED_TP_H_ 3 | #define _SCHED_TP_H_ 4 | 5 | /* order of arguments from 6 | * /sys/kernel/tracing/events/sched/sched_switch/format 7 | * common fields represented by 'unsigned long long unused;' 8 | 9 | field:char prev_comm[16]; offset:8; size:16; signed:1; 10 | field:pid_t prev_pid; offset:24; size:4; signed:1; 11 | field:int prev_prio; offset:28; size:4; signed:1; 12 | field:long prev_state; offset:32; size:8; signed:1; 13 | field:char next_comm[16]; offset:40; size:16; signed:1; 14 | field:pid_t next_pid; offset:56; size:4; signed:1; 15 | field:int next_prio; offset:60; size:4; signed:1; 16 | */ 17 | struct sched_switch_args { 18 | unsigned long long unused; 19 | 20 | char prev_comm[16]; 21 | pid_t prev_pid; 22 | int prev_prio; 23 | long prev_state; 24 | char next_comm[16]; 25 | pid_t next_pid; 26 | int next_prio; 27 | }; 28 | 29 | /* order of arguments from 30 | * /sys/kernel/tracing/events/sched/sched_wakeup/format 31 | * common fields represented by 'unsigned long long unused;' 32 | 33 | field:char comm[16]; offset:8; size:16; signed:1; 34 | field:pid_t pid; offset:24; size:4; signed:1; 35 | field:int prio; offset:28; size:4; signed:1; 36 | field:int success; offset:32; size:4; signed:1; 37 | field:int target_cpu; offset:36; size:4; signed:1; 38 | */ 39 | struct sched_wakeup_args { 40 | unsigned long long unused; 41 | 42 | char comm[16]; 43 | pid_t pid; 44 | int prio; 45 | int success; 46 | int target_cpu; 47 | }; 48 | 49 | /* order of arguments from 50 | * /sys/kernel/tracing/events/sched/sched_process_exit/format 51 | * common fields represented by 'unsigned long long unused;' 52 | 53 | field:char comm[16]; offset:8; size:16; signed:1; 54 | field:pid_t pid; offset:24; size:4; signed:1; 55 | field:int prio; offset:28; size:4; signed:1; 56 | */ 57 | struct sched_exit_args { 58 | unsigned long long unused; 59 | 60 | char comm[16]; 61 | pid_t pid; 62 | int prio; 63 | }; 64 | 65 | /* order of arguments from 66 | * /sys/kernel/tracing/events/sched/sched_stat_runtime/format 67 | * common fields represented by 'unsigned long long unused;' 68 | 69 | field:char comm[16]; offset:8; size:16; signed:1; 70 | field:pid_t pid; offset:24; size:4; signed:1; 71 | field:u64 runtime; offset:32; size:8; signed:0; 72 | field:u64 vruntime; offset:40; size:8; signed:0; 73 | */ 74 | struct sched_stat_run_args { 75 | unsigned long long unused; 76 | 77 | char comm[16]; 78 | pid_t pid; 79 | u64 runtime; 80 | u64 vruntime; 81 | }; 82 | 83 | /* order of arguments from 84 | * /sys/kernel/tracing/events/sched/sched_stat_wait/format 85 | * common fields represented by 'unsigned long long unused;' 86 | 87 | field:char comm[16]; offset:8; size:16; signed:1; 88 | field:pid_t pid; offset:24; size:4; signed:1; 89 | field:u64 delay; offset:32; size:8; signed:0; 90 | */ 91 | struct sched_stat_wait_args { 92 | unsigned long long unused; 93 | 94 | char comm[16]; 95 | pid_t pid; 96 | u64 delay; 97 | }; 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /include/tcp_probe.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _TCP_PROBE_H_ 3 | #define _TCP_PROBE_H_ 4 | 5 | struct data { 6 | __u64 time; 7 | union { 8 | struct sockaddr s_addr; /* for sa_family check */ 9 | struct sockaddr_in s_in; /* for ipv4 */ 10 | struct sockaddr_in6 s_in6; /* memory allocated */ 11 | }; 12 | union { 13 | struct sockaddr d_addr; /* for sa_family check */ 14 | struct sockaddr_in d_in; /* for ipv4 */ 15 | struct sockaddr_in6 d_in6; /* memory allocated */ 16 | }; 17 | 18 | __u32 mark; 19 | __u16 cpu; 20 | __u16 data_len; /* tcp payload length */ 21 | __u32 snd_nxt; /* next sequence we send */ 22 | __u32 snd_una; /* first byte we want an ack for */ 23 | __u32 snd_cwnd; /* sending congestion window */ 24 | __u32 ssthresh; 25 | __u32 snd_wnd; /* window we expect to receive */ 26 | __u32 rcv_wnd; /* current receiver window */ 27 | __u32 srtt; /* smoothed round trip time */ 28 | }; 29 | 30 | /* order of arguments from 31 | * /sys/kernel/debug/tracing/events/tcp/tcp_probe/format 32 | * but skipping all of the common fields: 33 | * 34 | field:__u8 saddr[sizeof(struct sockaddr_in6)]; offset:8; size:28; signed:0; 35 | field:__u8 daddr[sizeof(struct sockaddr_in6)]; offset:36; size:28; signed:0; 36 | field:__u16 sport; offset:64; size:2; signed:0; 37 | field:__u16 dport; offset:66; size:2; signed:0; 38 | field:__u32 mark; offset:68; size:4; signed:0; 39 | field:__u16 data_len; offset:72; size:2; signed:0; 40 | field:__u32 snd_nxt; offset:76; size:4; signed:0; 41 | field:__u32 snd_una; offset:80; size:4; signed:0; 42 | field:__u32 snd_cwnd; offset:84; size:4; signed:0; 43 | field:__u32 ssthresh; offset:88; size:4; signed:0; 44 | field:__u32 snd_wnd; offset:92; size:4; signed:0; 45 | field:__u32 srtt; offset:96; size:4; signed:0; 46 | field:__u32 rcv_wnd; offset:100; size:4; signed:0; 47 | field:__u64 sock_cookie; offset:104; size:8; signed:0; 48 | 49 | 5.13: 50 | field:__u8 saddr[sizeof(struct sockaddr_in6)]; offset:8; size:28; signed:0; 51 | field:__u8 daddr[sizeof(struct sockaddr_in6)]; offset:36; size:28; signed:0; 52 | field:__u16 sport; offset:64; size:2; signed:0; 53 | field:__u16 dport; offset:66; size:2; signed:0; 54 | field:__u16 family; offset:68; size:2; signed:0; 55 | field:__u32 mark; offset:72; size:4; signed:0; 56 | field:__u16 data_len; offset:76; size:2; signed:0; 57 | field:__u32 snd_nxt; offset:80; size:4; signed:0; 58 | field:__u32 snd_una; offset:84; size:4; signed:0; 59 | field:__u32 snd_cwnd; offset:88; size:4; signed:0; 60 | field:__u32 ssthresh; offset:92; size:4; signed:0; 61 | field:__u32 snd_wnd; offset:96; size:4; signed:0; 62 | field:__u32 srtt; offset:100; size:4; signed:0; 63 | field:__u32 rcv_wnd; offset:104; size:4; signed:0; 64 | field:__u64 sock_cookie; offset:112; size:8; signed:0; 65 | */ 66 | struct tcp_probe_args { 67 | unsigned long long unused; 68 | 69 | union { 70 | struct sockaddr s_addr; /* for sa_family check */ 71 | struct sockaddr_in s_in; /* for ipv4 */ 72 | struct sockaddr_in6 s_in6; /* memory allocated */ 73 | }; 74 | union { 75 | struct sockaddr d_addr; /* for sa_family check */ 76 | struct sockaddr_in d_in; /* for ipv4 */ 77 | struct sockaddr_in6 d_in6; /* memory allocated */ 78 | }; 79 | 80 | __u16 sport; 81 | __u16 dport; 82 | __u16 family; 83 | __u16 hole1; 84 | __u32 mark; 85 | __u16 data_len; 86 | __u16 hole2; 87 | __u32 snd_nxt; 88 | __u32 snd_una; 89 | __u32 snd_cwnd; 90 | __u32 ssthresh; 91 | __u32 snd_wnd; 92 | __u32 srtt; 93 | __u32 rcv_wnd; 94 | __u32 hole3; 95 | __u64 sock_cookie; 96 | } __attribute__ ((packed)); 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /include/tools/linux/compiler-gcc.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _TOOLS_LINUX_COMPILER_H_ 3 | #error "Please don't include directly, include instead." 4 | #endif 5 | 6 | /* 7 | * Common definitions for all gcc versions go here. 8 | */ 9 | #ifndef GCC_VERSION 10 | #define GCC_VERSION (__GNUC__ * 10000 \ 11 | + __GNUC_MINOR__ * 100 \ 12 | + __GNUC_PATCHLEVEL__) 13 | #endif 14 | 15 | #if GCC_VERSION >= 70000 && !defined(__CHECKER__) 16 | # define __fallthrough __attribute__ ((fallthrough)) 17 | #endif 18 | 19 | #if GCC_VERSION >= 40300 20 | # define __compiletime_error(message) __attribute__((error(message))) 21 | #endif /* GCC_VERSION >= 40300 */ 22 | 23 | /* &a[0] degrades to a pointer: a different type from an array */ 24 | #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) 25 | 26 | #ifndef __pure 27 | #define __pure __attribute__((pure)) 28 | #endif 29 | #define noinline __attribute__((noinline)) 30 | #ifndef __packed 31 | #define __packed __attribute__((packed)) 32 | #endif 33 | #ifndef __noreturn 34 | #define __noreturn __attribute__((noreturn)) 35 | #endif 36 | #ifndef __aligned 37 | #define __aligned(x) __attribute__((aligned(x))) 38 | #endif 39 | #define __printf(a, b) __attribute__((format(printf, a, b))) 40 | #define __scanf(a, b) __attribute__((format(scanf, a, b))) 41 | 42 | #if GCC_VERSION >= 50100 43 | #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 44 | #endif 45 | -------------------------------------------------------------------------------- /include/tools/linux/compiler.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _TOOLS_LINUX_COMPILER_H_ 3 | #define _TOOLS_LINUX_COMPILER_H_ 4 | 5 | #ifdef __GNUC__ 6 | #include 7 | #endif 8 | 9 | #ifndef __compiletime_error 10 | # define __compiletime_error(message) 11 | #endif 12 | 13 | /* Optimization barrier */ 14 | /* The "volatile" is due to gcc bugs */ 15 | #define barrier() __asm__ __volatile__("": : :"memory") 16 | 17 | #ifndef __always_inline 18 | # define __always_inline inline __attribute__((always_inline)) 19 | #endif 20 | 21 | #ifndef noinline 22 | #define noinline 23 | #endif 24 | 25 | /* Are two types/vars the same type (ignoring qualifiers)? */ 26 | #ifndef __same_type 27 | # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) 28 | #endif 29 | 30 | #ifdef __ANDROID__ 31 | /* 32 | * FIXME: Big hammer to get rid of tons of: 33 | * "warning: always_inline function might not be inlinable" 34 | * 35 | * At least on android-ndk-r12/platforms/android-24/arch-arm 36 | */ 37 | #undef __always_inline 38 | #define __always_inline inline 39 | #endif 40 | 41 | #define __user 42 | #define __rcu 43 | #define __read_mostly 44 | 45 | #ifndef __attribute_const__ 46 | # define __attribute_const__ 47 | #endif 48 | 49 | #ifndef __maybe_unused 50 | # define __maybe_unused __attribute__((unused)) 51 | #endif 52 | 53 | #ifndef __used 54 | # define __used __attribute__((__unused__)) 55 | #endif 56 | 57 | #ifndef __packed 58 | # define __packed __attribute__((__packed__)) 59 | #endif 60 | 61 | #ifndef __force 62 | # define __force 63 | #endif 64 | 65 | #ifndef __weak 66 | # define __weak __attribute__((weak)) 67 | #endif 68 | 69 | #ifndef likely 70 | # define likely(x) __builtin_expect(!!(x), 1) 71 | #endif 72 | 73 | #ifndef unlikely 74 | # define unlikely(x) __builtin_expect(!!(x), 0) 75 | #endif 76 | 77 | #ifndef __init 78 | # define __init 79 | #endif 80 | 81 | #ifndef noinline 82 | # define noinline 83 | #endif 84 | 85 | #define uninitialized_var(x) x = *(&(x)) 86 | 87 | #include 88 | 89 | /* 90 | * Following functions are taken from kernel sources and 91 | * break aliasing rules in their original form. 92 | * 93 | * While kernel is compiled with -fno-strict-aliasing, 94 | * perf uses -Wstrict-aliasing=3 which makes build fail 95 | * under gcc 4.4. 96 | * 97 | * Using extra __may_alias__ type to allow aliasing 98 | * in this case. 99 | */ 100 | typedef __u8 __attribute__((__may_alias__)) __u8_alias_t; 101 | typedef __u16 __attribute__((__may_alias__)) __u16_alias_t; 102 | typedef __u32 __attribute__((__may_alias__)) __u32_alias_t; 103 | typedef __u64 __attribute__((__may_alias__)) __u64_alias_t; 104 | 105 | static __always_inline void __read_once_size(const volatile void *p, void *res, int size) 106 | { 107 | switch (size) { 108 | case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break; 109 | case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break; 110 | case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break; 111 | case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break; 112 | default: 113 | barrier(); 114 | __builtin_memcpy((void *)res, (const void *)p, size); 115 | barrier(); 116 | } 117 | } 118 | 119 | static __always_inline void __write_once_size(volatile void *p, void *res, int size) 120 | { 121 | switch (size) { 122 | case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break; 123 | case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break; 124 | case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break; 125 | case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break; 126 | default: 127 | barrier(); 128 | __builtin_memcpy((void *)p, (const void *)res, size); 129 | barrier(); 130 | } 131 | } 132 | 133 | /* 134 | * Prevent the compiler from merging or refetching reads or writes. The 135 | * compiler is also forbidden from reordering successive instances of 136 | * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some 137 | * particular ordering. One way to make the compiler aware of ordering is to 138 | * put the two invocations of READ_ONCE or WRITE_ONCE in different C 139 | * statements. 140 | * 141 | * These two macros will also work on aggregate data types like structs or 142 | * unions. If the size of the accessed data type exceeds the word size of 143 | * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will 144 | * fall back to memcpy and print a compile-time warning. 145 | * 146 | * Their two major use cases are: (1) Mediating communication between 147 | * process-level code and irq/NMI handlers, all running on the same CPU, 148 | * and (2) Ensuring that the compiler does not fold, spindle, or otherwise 149 | * mutilate accesses that either do not require ordering or that interact 150 | * with an explicit memory barrier or atomic instruction that provides the 151 | * required ordering. 152 | */ 153 | 154 | #define READ_ONCE(x) \ 155 | ({ \ 156 | union { typeof(x) __val; char __c[1]; } __u = \ 157 | { .__c = { 0 } }; \ 158 | __read_once_size(&(x), __u.__c, sizeof(x)); \ 159 | __u.__val; \ 160 | }) 161 | 162 | #define WRITE_ONCE(x, val) \ 163 | ({ \ 164 | union { typeof(x) __val; char __c[1]; } __u = \ 165 | { .__val = (val) }; \ 166 | __write_once_size(&(x), __u.__c, sizeof(x)); \ 167 | __u.__val; \ 168 | }) 169 | 170 | 171 | #ifndef __fallthrough 172 | # define __fallthrough 173 | #endif 174 | 175 | #endif /* _TOOLS_LINUX_COMPILER_H */ 176 | -------------------------------------------------------------------------------- /include/tools/linux/export.h: -------------------------------------------------------------------------------- 1 | #ifndef _TOOLS_LINUX_EXPORT_H_ 2 | #define _TOOLS_LINUX_EXPORT_H_ 3 | 4 | #define EXPORT_SYMBOL(sym) 5 | #define EXPORT_SYMBOL_GPL(sym) 6 | #define EXPORT_SYMBOL_GPL_FUTURE(sym) 7 | #define EXPORT_UNUSED_SYMBOL(sym) 8 | #define EXPORT_UNUSED_SYMBOL_GPL(sym) 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /include/tools/linux/kernel.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef __TOOLS_LINUX_KERNEL_H 3 | #define __TOOLS_LINUX_KERNEL_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #ifndef UINT_MAX 13 | #define UINT_MAX (~0U) 14 | #endif 15 | 16 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 17 | 18 | #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) 19 | #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) 20 | 21 | #ifndef offsetof 22 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 23 | #endif 24 | 25 | #ifndef container_of 26 | /** 27 | * container_of - cast a member of a structure out to the containing structure 28 | * @ptr: the pointer to the member. 29 | * @type: the type of the container struct this is embedded in. 30 | * @member: the name of the member within the struct. 31 | * 32 | */ 33 | #define container_of(ptr, type, member) ({ \ 34 | const typeof(((type *)0)->member) * __mptr = (ptr); \ 35 | (type *)((char *)__mptr - offsetof(type, member)); }) 36 | #endif 37 | 38 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) 39 | #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) 40 | 41 | #ifndef max 42 | #define max(x, y) ({ \ 43 | typeof(x) _max1 = (x); \ 44 | typeof(y) _max2 = (y); \ 45 | (void) (&_max1 == &_max2); \ 46 | _max1 > _max2 ? _max1 : _max2; }) 47 | #endif 48 | 49 | #ifndef min 50 | #define min(x, y) ({ \ 51 | typeof(x) _min1 = (x); \ 52 | typeof(y) _min2 = (y); \ 53 | (void) (&_min1 == &_min2); \ 54 | _min1 < _min2 ? _min1 : _min2; }) 55 | #endif 56 | 57 | #ifndef roundup 58 | #define roundup(x, y) ( \ 59 | { \ 60 | const typeof(y) __y = y; \ 61 | (((x) + (__y - 1)) / __y) * __y; \ 62 | } \ 63 | ) 64 | #endif 65 | 66 | #ifndef BUG_ON 67 | #ifdef NDEBUG 68 | #define BUG_ON(cond) do { if (cond) {} } while (0) 69 | #else 70 | #define BUG_ON(cond) assert(!(cond)) 71 | #endif 72 | #endif 73 | #define BUG() BUG_ON(1) 74 | 75 | #if __BYTE_ORDER == __BIG_ENDIAN 76 | #define cpu_to_le16 bswap_16 77 | #define cpu_to_le32 bswap_32 78 | #define cpu_to_le64 bswap_64 79 | #define le16_to_cpu bswap_16 80 | #define le32_to_cpu bswap_32 81 | #define le64_to_cpu bswap_64 82 | #define cpu_to_be16 83 | #define cpu_to_be32 84 | #define cpu_to_be64 85 | #define be16_to_cpu 86 | #define be32_to_cpu 87 | #define be64_to_cpu 88 | #else 89 | #define cpu_to_le16 90 | #define cpu_to_le32 91 | #define cpu_to_le64 92 | #define le16_to_cpu 93 | #define le32_to_cpu 94 | #define le64_to_cpu 95 | #define cpu_to_be16 bswap_16 96 | #define cpu_to_be32 bswap_32 97 | #define cpu_to_be64 bswap_64 98 | #define be16_to_cpu bswap_16 99 | #define be32_to_cpu bswap_32 100 | #define be64_to_cpu bswap_64 101 | #endif 102 | 103 | int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); 104 | int scnprintf(char * buf, size_t size, const char * fmt, ...); 105 | int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); 106 | 107 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) 108 | 109 | /* 110 | * This looks more complex than it should be. But we need to 111 | * get the type for the ~ right in round_down (it needs to be 112 | * as wide as the result!), and we want to evaluate the macro 113 | * arguments just once each. 114 | */ 115 | #define __round_mask(x, y) ((__typeof__(x))((y)-1)) 116 | #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) 117 | #define round_down(x, y) ((x) & ~__round_mask(x, y)) 118 | 119 | #define current_gfp_context(k) 0 120 | #define synchronize_rcu() 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /include/tools/linux/poison.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _LINUX_POISON_H 3 | #define _LINUX_POISON_H 4 | 5 | /********** include/linux/list.h **********/ 6 | 7 | /* 8 | * Architectures might want to move the poison pointer offset 9 | * into some well-recognized area such as 0xdead000000000000, 10 | * that is also not mappable by user-space exploits: 11 | */ 12 | #ifdef CONFIG_ILLEGAL_POINTER_VALUE 13 | # define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) 14 | #else 15 | # define POISON_POINTER_DELTA 0 16 | #endif 17 | 18 | #ifdef __cplusplus 19 | #define LIST_POISON1 NULL 20 | #define LIST_POISON2 NULL 21 | #else 22 | /* 23 | * These are non-NULL pointers that will result in page faults 24 | * under normal circumstances, used to verify that nobody uses 25 | * non-initialized list entries. 26 | */ 27 | #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) 28 | #define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) 29 | #endif 30 | 31 | /********** include/linux/timer.h **********/ 32 | /* 33 | * Magic number "tsta" to indicate a static timer initializer 34 | * for the object debugging code. 35 | */ 36 | #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) 37 | 38 | /********** mm/debug-pagealloc.c **********/ 39 | #ifdef CONFIG_PAGE_POISONING_ZERO 40 | #define PAGE_POISON 0x00 41 | #else 42 | #define PAGE_POISON 0xaa 43 | #endif 44 | 45 | /********** mm/page_alloc.c ************/ 46 | 47 | #define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA) 48 | 49 | /********** mm/slab.c **********/ 50 | /* 51 | * Magic nums for obj red zoning. 52 | * Placed in the first word before and the first word after an obj. 53 | */ 54 | #define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ 55 | #define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ 56 | 57 | #define SLUB_RED_INACTIVE 0xbb 58 | #define SLUB_RED_ACTIVE 0xcc 59 | 60 | /* ...and for poisoning */ 61 | #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ 62 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ 63 | #define POISON_END 0xa5 /* end-byte of poisoning */ 64 | 65 | /********** arch/$ARCH/mm/init.c **********/ 66 | #define POISON_FREE_INITMEM 0xcc 67 | 68 | /********** arch/ia64/hp/common/sba_iommu.c **********/ 69 | /* 70 | * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a 71 | * value of "SBAIOMMU POISON\0" for spill-over poisoning. 72 | */ 73 | 74 | /********** fs/jbd/journal.c **********/ 75 | #define JBD_POISON_FREE 0x5b 76 | #define JBD2_POISON_FREE 0x5c 77 | 78 | /********** drivers/base/dmapool.c **********/ 79 | #define POOL_POISON_FREED 0xa7 /* !inuse */ 80 | #define POOL_POISON_ALLOCATED 0xa9 /* !initted */ 81 | 82 | /********** drivers/atm/ **********/ 83 | #define ATM_POISON_FREE 0x12 84 | #define ATM_POISON 0xdeadbeef 85 | 86 | /********** kernel/mutexes **********/ 87 | #define MUTEX_DEBUG_INIT 0x11 88 | #define MUTEX_DEBUG_FREE 0x22 89 | 90 | /********** security/ **********/ 91 | #define KEY_DESTROY 0xbd 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /include/tools/linux/rbtree.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 | /* 3 | Red Black Trees 4 | (C) 1999 Andrea Arcangeli 5 | 6 | 7 | linux/include/linux/rbtree.h 8 | 9 | To use rbtrees you'll have to implement your own insert and search cores. 10 | This will avoid us to use callbacks and to drop drammatically performances. 11 | I know it's not the cleaner way, but in C (not in C++) to get 12 | performances and genericity... 13 | 14 | See Documentation/rbtree.txt for documentation and samples. 15 | */ 16 | 17 | #ifndef __TOOLS_LINUX_PERF_RBTREE_H 18 | #define __TOOLS_LINUX_PERF_RBTREE_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | struct rb_node { 25 | unsigned long __rb_parent_color; 26 | struct rb_node *rb_right; 27 | struct rb_node *rb_left; 28 | } __attribute__((aligned(sizeof(long)))); 29 | /* The alignment might seem pointless, but allegedly CRIS needs it */ 30 | 31 | struct rb_root { 32 | struct rb_node *rb_node; 33 | }; 34 | 35 | #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) 36 | 37 | #define RB_ROOT (struct rb_root) { NULL, } 38 | #define rb_entry(ptr, type, member) container_of(ptr, type, member) 39 | 40 | #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) 41 | 42 | /* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ 43 | #define RB_EMPTY_NODE(node) \ 44 | ((node)->__rb_parent_color == (unsigned long)(node)) 45 | #define RB_CLEAR_NODE(node) \ 46 | ((node)->__rb_parent_color = (unsigned long)(node)) 47 | 48 | 49 | extern void rb_insert_color(struct rb_node *, struct rb_root *); 50 | extern void rb_erase(struct rb_node *, struct rb_root *); 51 | 52 | 53 | /* Find logical next and previous nodes in a tree */ 54 | extern struct rb_node *rb_next(const struct rb_node *); 55 | extern struct rb_node *rb_prev(const struct rb_node *); 56 | extern struct rb_node *rb_first(const struct rb_root *); 57 | extern struct rb_node *rb_last(const struct rb_root *); 58 | 59 | /* Postorder iteration - always visit the parent after its children */ 60 | extern struct rb_node *rb_first_postorder(const struct rb_root *); 61 | extern struct rb_node *rb_next_postorder(const struct rb_node *); 62 | 63 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ 64 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 65 | struct rb_root *root); 66 | 67 | static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, 68 | struct rb_node **rb_link) 69 | { 70 | node->__rb_parent_color = (unsigned long)parent; 71 | node->rb_left = node->rb_right = NULL; 72 | 73 | *rb_link = node; 74 | } 75 | 76 | #define rb_entry_safe(ptr, type, member) \ 77 | ({ typeof(ptr) ____ptr = (ptr); \ 78 | ____ptr ? rb_entry(____ptr, type, member) : NULL; \ 79 | }) 80 | 81 | /** 82 | * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of 83 | * given type allowing the backing memory of @pos to be invalidated 84 | * 85 | * @pos: the 'type *' to use as a loop cursor. 86 | * @n: another 'type *' to use as temporary storage 87 | * @root: 'rb_root *' of the rbtree. 88 | * @field: the name of the rb_node field within 'type'. 89 | * 90 | * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as 91 | * list_for_each_entry_safe() and allows the iteration to continue independent 92 | * of changes to @pos by the body of the loop. 93 | * 94 | * Note, however, that it cannot handle other modifications that re-order the 95 | * rbtree it is iterating over. This includes calling rb_erase() on @pos, as 96 | * rb_erase() may rebalance the tree, causing us to miss some nodes. 97 | */ 98 | #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ 99 | for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ 100 | pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ 101 | typeof(*pos), field); 1; }); \ 102 | pos = n) 103 | 104 | static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) 105 | { 106 | rb_erase(n, root); 107 | RB_CLEAR_NODE(n); 108 | } 109 | 110 | /* 111 | * Leftmost-cached rbtrees. 112 | * 113 | * We do not cache the rightmost node based on footprint 114 | * size vs number of potential users that could benefit 115 | * from O(1) rb_last(). Just not worth it, users that want 116 | * this feature can always implement the logic explicitly. 117 | * Furthermore, users that want to cache both pointers may 118 | * find it a bit asymmetric, but that's ok. 119 | */ 120 | struct rb_root_cached { 121 | struct rb_root rb_root; 122 | struct rb_node *rb_leftmost; 123 | }; 124 | 125 | #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } 126 | 127 | /* Same as rb_first(), but O(1) */ 128 | #define rb_first_cached(root) (root)->rb_leftmost 129 | 130 | static inline void rb_insert_color_cached(struct rb_node *node, 131 | struct rb_root_cached *root, 132 | bool leftmost) 133 | { 134 | if (leftmost) 135 | root->rb_leftmost = node; 136 | rb_insert_color(node, &root->rb_root); 137 | } 138 | 139 | static inline void rb_erase_cached(struct rb_node *node, 140 | struct rb_root_cached *root) 141 | { 142 | if (root->rb_leftmost == node) 143 | root->rb_leftmost = rb_next(node); 144 | rb_erase(node, &root->rb_root); 145 | } 146 | 147 | static inline void rb_replace_node_cached(struct rb_node *victim, 148 | struct rb_node *new, 149 | struct rb_root_cached *root) 150 | { 151 | if (root->rb_leftmost == victim) 152 | root->rb_leftmost = new; 153 | rb_replace_node(victim, new, &root->rb_root); 154 | } 155 | 156 | #endif /* __TOOLS_LINUX_PERF_RBTREE_H */ 157 | -------------------------------------------------------------------------------- /include/tp_verify.h: -------------------------------------------------------------------------------- 1 | #ifndef _TP_VERIFY_H_ 2 | #define _TP_VERIFY_H_ 3 | 4 | #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) 5 | 6 | struct tp_ctx { 7 | char *tp_field; 8 | unsigned int tp_offset; 9 | unsigned int tp_size; 10 | char *struct_field; 11 | unsigned int struct_offset; 12 | unsigned int struct_size; 13 | }; 14 | 15 | #define TP_ARG(name, offset, size, dtype, sname) \ 16 | { .tp_field = #name, \ 17 | .tp_offset = offset, \ 18 | .tp_size = size, \ 19 | .struct_field = #sname, \ 20 | .struct_offset = offsetof(struct dtype, sname), \ 21 | .struct_size = sizeof_field(struct dtype, sname) } 22 | 23 | int tp_validate_context(char *sys_name, char *tp_name, 24 | struct tp_ctx *ctx, unsigned int ctx_entries); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /include/vm_info.h: -------------------------------------------------------------------------------- 1 | #ifndef __VM_INFO_H_ 2 | #define __VM_INFO_H_ 3 | 4 | #include 5 | 6 | struct vm_info 7 | { 8 | __u32 vmid; 9 | __u8 mac[ETH_ALEN]; 10 | __be16 vlan_TCI; /* vlan tag to add on egress redirect */ 11 | __u32 v4addr; 12 | struct in6_addr v6addr; 13 | }; 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /include/xdp_acl.h: -------------------------------------------------------------------------------- 1 | #ifndef __XDP_ACL_H_ 2 | #define __XDP_ACL_H_ 3 | 4 | struct acl_key 5 | { 6 | __be16 port; 7 | __u8 protocol; /* ip protocol (TCP, UDP, ...) */ 8 | }; 9 | 10 | #define ACL_FLAG_ADDR_CHECK (1<<1) 11 | 12 | struct acl_val 13 | { 14 | union { 15 | __u32 ipv4; 16 | struct in6_addr ipv6; 17 | } addr; 18 | 19 | __u8 family; 20 | __u8 flags; 21 | __be16 port; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /include/xdp_devmap_xmit.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _DEVMAP_XMIT_H_ 3 | #define _DEVMAP_XMIT_H_ 4 | 5 | #define DEVMAP_BUCKETS 10 6 | struct devmap_xmit_hist { 7 | __u64 buckets[DEVMAP_BUCKETS]; 8 | }; 9 | 10 | /* order of arguments from 11 | * /sys/kernel/debug/tracing/events/napi/devmap_xmit/format 12 | * but skipping all of the common fields: 13 | * 14 | field:int from_ifindex; offset:8; size:4; signed:1; 15 | field:u32 act; offset:12; size:4; signed:0; 16 | field:int to_ifindex; offset:16; size:4; signed:1; 17 | field:int drops; offset:20; size:4; signed:1; 18 | field:int sent; offset:24; size:4; signed:1; 19 | field:int err; offset:28; size:4; signed:1; 20 | */ 21 | struct devmap_xmit_args { 22 | __u64 unused; 23 | 24 | int from_ifindex; 25 | __u32 act; 26 | int to_ifindex; 27 | int drops; 28 | int sent; 29 | int err; 30 | }; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /include/xdp_fdb.h: -------------------------------------------------------------------------------- 1 | #ifndef _XDP_FDB_H_ 2 | #define _XDP_FDB_H_ 3 | 4 | #include 5 | 6 | struct xdp_stats { 7 | __u64 bytes_fwd; 8 | __u64 pkts_fwd; 9 | __u64 dropped; 10 | }; 11 | 12 | struct fdb_key 13 | { 14 | __u8 mac[ETH_ALEN]; 15 | __u16 vlan; 16 | }; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /ksrc/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0 2 | 3 | include ../config.mk 4 | 5 | ifneq (,$(BUILDDIR)) 6 | OBJDIR = $(BUILDDIR)/ksrc/obj/ 7 | else 8 | OBJDIR = obj/ 9 | endif 10 | 11 | MODS += $(OBJDIR)execsnoop.o 12 | MODS += $(OBJDIR)execsnoop_legacy.o 13 | MODS += $(OBJDIR)opensnoop.o 14 | MODS += $(OBJDIR)kvm-nested.o 15 | 16 | MODS += $(OBJDIR)pktdrop.o 17 | MODS += $(OBJDIR)pktlatency.o 18 | MODS += $(OBJDIR)ovslatency.o 19 | MODS += $(OBJDIR)net_rx_action.o 20 | MODS += $(OBJDIR)napi_poll.o 21 | MODS += $(OBJDIR)xdp_devmap_xmit.o 22 | 23 | MODS += $(OBJDIR)tcp_probe.o 24 | MODS += $(OBJDIR)xdp_l2fwd.o 25 | MODS += $(OBJDIR)xdp_l3fwd.o 26 | MODS += $(OBJDIR)xdp_dummy.o 27 | MODS += $(OBJDIR)xdp_vmegress.o 28 | 29 | MODS += $(OBJDIR)acl_vm_tx.o 30 | MODS += $(OBJDIR)acl_vm_rx.o 31 | MODS += $(OBJDIR)rx_acl.o 32 | 33 | VPATH := . 34 | 35 | # rule is based on samples/bpf/Makefile 36 | DEFS = -D__KERNEL__ -D__BPF_TRACING__ -D__TARGET_ARCH_x86 $(EXTRA_DEFS) 37 | 38 | CFLAGS += -g 39 | CFLAGS += -Wno-unused-value -Wno-pointer-sign 40 | CFLAGS += -Wno-compare-distinct-pointer-types 41 | CFLAGS += -Wno-gnu-variable-sized-type-not-at-end 42 | CFLAGS += -Wno-address-of-packed-member 43 | CFLAGS += -Wno-tautological-compare 44 | CFLAGS += -Wno-unknown-warning-option 45 | CFLAGS += -fno-stack-protector 46 | CFLAGS += -fcf-protection 47 | 48 | INCLUDES = -I../include 49 | INCLUDES += -I../libbpf/usr/include 50 | INCLUDES += -I$(KSRC)/arch/x86/include 51 | INCLUDES += -I$(KBLD)/arch/x86/include/generated 52 | INCLUDES += -I$(KBLD)/include 53 | INCLUDES += -I$(KSRC)/include 54 | INCLUDES += -I$(KSRC)/arch/x86/include/uapi 55 | INCLUDES += -I$(KBLD)/arch/x86/include/generated/uapi 56 | INCLUDES += -I$(KSRC)/include/uapi 57 | INCLUDES += -I$(KBLD)/include/generated/uapi 58 | 59 | SINCLUDES = -include $(KSRC)/include/linux/kconfig.h 60 | SINCLUDES += -include include/asm_goto_workaround.h 61 | 62 | # this is to find stdarg.h. Ubuntu has this under x86_64-linux-gnu 63 | # and Fedora is under x86_64-redhat-linux. Let's try 'find'. 64 | GCCVER=$(shell gcc -v 2>&1 | awk '{if ($$0 ~ /gcc version/) {ver=split($$3,n,"."); print n[1]}}') 65 | GCC_INC=$(shell find /usr/lib/gcc/x86_64-*linux*/$(GCCVER) -name include) 66 | NOSTDINC_FLAGS = -nostdinc -isystem $(GCC_INC) 67 | 68 | all: build $(MODS) 69 | 70 | build: 71 | @mkdir -p $(OBJDIR) 72 | 73 | $(OBJDIR)%.o: %.c 74 | $(QUIET_CLANG)$(CLANG) $(NOSTDINC_FLAGS) $(INCLUDES) \ 75 | $(SINCLUDES) $(DEFS) $(CFLAGS) \ 76 | -O2 -emit-llvm $(CLANG_FLAGS) -c $< -o $@.cl 77 | $(QUIET_LLC)$(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ $@.cl 78 | @rm $@.cl 79 | 80 | clean: 81 | @rm -rf $(OBJDIR) 82 | -------------------------------------------------------------------------------- /ksrc/acl_simple.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Copyright (c) 2019-2020 David Ahern 3 | * 4 | * Implement simple address / protocol / port ACL for a 5 | * VM, but implemented in a host. 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "xdp_acl.h" 18 | #include "vm_info.h" 19 | #include "eth_helpers.h" 20 | #include "ipv6_helpers.h" 21 | #include "flow.h" 22 | 23 | static __always_inline bool acl_simple(struct ethhdr *eth, struct flow *fl, 24 | bool use_src, struct bpf_map_def *acl_map) 25 | { 26 | struct acl_key key = {}; 27 | struct acl_val *val; 28 | 29 | key.protocol = fl->protocol; 30 | if (key.protocol == IPPROTO_TCP || key.protocol == IPPROTO_UDP) 31 | key.port = fl->ports.dport; 32 | 33 | val = bpf_map_lookup_elem(acl_map, &key); 34 | /* if no entry, pass */ 35 | if (!val) { 36 | /* check for just protocol; maybe a sport ACL */ 37 | key.port = 0; 38 | val = bpf_map_lookup_elem(acl_map, &key); 39 | } 40 | if (!val) 41 | return false; 42 | 43 | /* action on hit */ 44 | if (val->family) { 45 | if (fl->family != val->family) 46 | return false; 47 | } else if (val->flags & ACL_FLAG_ADDR_CHECK) { 48 | if (fl->family != val->family) 49 | return false; 50 | } 51 | if (val->flags & ACL_FLAG_ADDR_CHECK) { 52 | struct in6_addr *v6addr; 53 | __be32 v4addr; 54 | 55 | switch(fl->family) { 56 | case AF_INET: 57 | if (!val->addr.ipv4) 58 | return true; 59 | 60 | v4addr = use_src ? fl->saddr.ipv4 : fl->daddr.ipv4; 61 | if (v4addr != val->addr.ipv4) 62 | return false; 63 | break; 64 | case AF_INET6: 65 | if (ipv6_is_any(&val->addr.ipv6)) 66 | return true; 67 | 68 | v6addr = use_src ? &fl->saddr.ipv6 : &fl->daddr.ipv6; 69 | if (!do_ipv6_addr_cmp(v6addr, &val->addr.ipv6)) 70 | return false; 71 | break; 72 | default: 73 | return false; 74 | } 75 | } 76 | 77 | if (val->port && val->port == fl->ports.sport) 78 | return true; 79 | 80 | return key.port ? true : false; 81 | } 82 | 83 | /* returns true if packet should be dropped; false to continue */ 84 | static __always_inline bool drop_packet(void *data, void *data_end, 85 | struct vm_info *vi, bool rx, 86 | struct flow *fl, 87 | struct bpf_map_def *acl_map) 88 | { 89 | struct ethhdr *eth = data; 90 | bool rc = false; 91 | int ret; 92 | 93 | if (eth + 1 > data_end) 94 | return true; 95 | 96 | /* direction: Tx = to VM, Rx = from VM */ 97 | if (vi && !mac_cmp(vi->mac, rx ? eth->h_source : eth->h_dest)) 98 | return true; 99 | 100 | ret = parse_pkt(fl, data, data_end, 0); 101 | if (ret) 102 | return ret > 0 ? false : true; 103 | 104 | /* Rx = from VM: check dest address against ACL 105 | * Tx = to VM: check source address against ACL 106 | */ 107 | if (acl_map) 108 | rc = acl_simple(eth, fl, !rx, acl_map); 109 | 110 | return rc; 111 | } 112 | -------------------------------------------------------------------------------- /ksrc/acl_vm_rx.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Copyright (c) 2019-2020 David Ahern 3 | * 4 | * Rx ACL for a VM - packets from VM 5 | */ 6 | #define KBUILD_MODNAME "acl_vm_rx" 7 | #include 8 | 9 | #include "acl_simple.h" 10 | 11 | struct bpf_map_def SEC("maps") __rx_acl_map = { 12 | .type = BPF_MAP_TYPE_HASH, 13 | .key_size = sizeof(struct acl_key), 14 | .value_size = sizeof(struct acl_val), 15 | .max_entries = 64, 16 | }; 17 | 18 | struct bpf_map_def SEC("maps") __vm_info_map = { 19 | .type = BPF_MAP_TYPE_HASH, 20 | .key_size = sizeof(u32), 21 | .value_size = sizeof(struct vm_info), 22 | .max_entries = 1, 23 | }; 24 | 25 | SEC("classifier/acl_vm_rx") 26 | int tc_acl_vm_rx_prog(struct __sk_buff *skb) 27 | { 28 | void *data_end = (void *)(long)skb->data_end; 29 | void *data = (void *)(long)skb->data; 30 | u32 idx = skb->ifindex; 31 | struct flow fl = {}; 32 | struct vm_info *vi; 33 | bool rc; 34 | 35 | vi = bpf_map_lookup_elem(&__vm_info_map, &idx); 36 | if (!vi) 37 | return TC_ACT_OK; 38 | 39 | rc = drop_packet(data, data_end, vi, true, &fl, &__rx_acl_map); 40 | 41 | return rc ? TC_ACT_SHOT : TC_ACT_OK; 42 | } 43 | 44 | SEC("xdp/acl_vm_rx") 45 | int xdp_acl_vm_rx_prog(struct xdp_md *ctx) 46 | { 47 | void *data_end = (void *)(long)ctx->data_end; 48 | void *data = (void *)(long)ctx->data; 49 | u32 idx = ctx->ingress_ifindex; 50 | struct flow fl = {}; 51 | struct vm_info *vi; 52 | bool rc; 53 | 54 | vi = bpf_map_lookup_elem(&__vm_info_map, &idx); 55 | if (!vi) 56 | return XDP_PASS; 57 | 58 | rc = drop_packet(data, data_end, vi, true, &fl, &__rx_acl_map); 59 | 60 | return rc ? XDP_DROP : XDP_PASS; 61 | } 62 | 63 | char _license[] SEC("license") = "GPL"; 64 | -------------------------------------------------------------------------------- /ksrc/acl_vm_tx.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Copyright (c) 2019-2020 David Ahern 3 | * 4 | * Tx ACL for a VM - packets going to a VM 5 | */ 6 | #define KBUILD_MODNAME "acl_vm_tx" 7 | #include 8 | 9 | #include "acl_simple.h" 10 | 11 | struct bpf_map_def SEC("maps") __tx_acl_map = { 12 | .type = BPF_MAP_TYPE_HASH, 13 | .key_size = sizeof(struct acl_key), 14 | .value_size = sizeof(struct acl_val), 15 | .max_entries = 64, 16 | }; 17 | 18 | struct bpf_map_def SEC("maps") __vm_info_map = { 19 | .type = BPF_MAP_TYPE_HASH, 20 | .key_size = sizeof(u32), 21 | .value_size = sizeof(struct vm_info), 22 | .max_entries = 1, 23 | }; 24 | 25 | SEC("classifier/acl_vm_tx") 26 | int tc_acl_vm_tx_prog(struct __sk_buff *skb) 27 | { 28 | void *data_end = (void *)(long)skb->data_end; 29 | void *data = (void *)(long)skb->data; 30 | u32 idx = skb->ifindex; 31 | struct flow fl = {}; 32 | struct vm_info *vi; 33 | bool rc; 34 | 35 | vi = bpf_map_lookup_elem(&__vm_info_map, &idx); 36 | if (!vi) 37 | return TC_ACT_OK; 38 | 39 | rc = drop_packet(data, data_end, vi, false, &fl, &__tx_acl_map); 40 | return rc ? TC_ACT_SHOT : TC_ACT_OK; 41 | } 42 | 43 | SEC("xdp_devmap/acl_vm_tx") 44 | int xdp_acl_vm_tx_prog(struct xdp_md *ctx) 45 | { 46 | void *data_end = (void *)(long)ctx->data_end; 47 | void *data = (void *)(long)ctx->data; 48 | u32 idx = ctx->egress_ifindex; 49 | struct flow fl = {}; 50 | struct vm_info *vi; 51 | bool rc; 52 | 53 | vi = bpf_map_lookup_elem(&__vm_info_map, &idx); 54 | if (!vi) 55 | return XDP_PASS; 56 | 57 | rc = drop_packet(data, data_end, vi, false, &fl, &__tx_acl_map); 58 | 59 | return rc ? XDP_DROP : XDP_PASS; 60 | } 61 | 62 | char _license[] SEC("license") = "GPL"; 63 | -------------------------------------------------------------------------------- /ksrc/bpf_debug.h: -------------------------------------------------------------------------------- 1 | #ifndef _BPF_DEBUG_H 2 | #define _BPF_DEBUG_H 3 | 4 | #define bpf_debug(fmt, ...) \ 5 | { \ 6 | char __fmt[] = fmt; \ 7 | bpf_trace_printk(__fmt, sizeof(__fmt), \ 8 | ##__VA_ARGS__); \ 9 | } 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /ksrc/channel_map.c: -------------------------------------------------------------------------------- 1 | 2 | struct bpf_map_def SEC("maps") channel = { 3 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 4 | .key_size = sizeof(int), 5 | .value_size = sizeof(u32), 6 | .max_entries = MAX_CPUS, 7 | }; 8 | -------------------------------------------------------------------------------- /ksrc/eth_helpers.h: -------------------------------------------------------------------------------- 1 | static __always_inline bool mac_cmp(const unsigned char *mac1, const unsigned char *mac2) 2 | { 3 | u32 a1 = *((u32 *)&mac1[0]); 4 | u32 a2 = *((u32 *)&mac2[0]); 5 | u16 b1 = *((u16 *)&mac1[4]); 6 | u16 b2 = *((u16 *)&mac2[4]); 7 | 8 | return a1 == a2 && b1 == b2; 9 | } 10 | -------------------------------------------------------------------------------- /ksrc/execsnoop.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Snoop process exec(). Inspired by execsnoop.py from bcc repository 3 | * 4 | * David Ahern 5 | */ 6 | 7 | #define KBUILD_MODNAME "execsnoop" 8 | #include /* pt_regs via asm/ptrace.h */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "execsnoop.h" 16 | #include "sched_tp.h" 17 | 18 | #include "channel_map.c" 19 | #include "set_current_info.c" 20 | 21 | /* expecting args to be filename, argv, envp */ 22 | SEC("kprobe/execve") 23 | int bpf_sys_execve(struct pt_regs *ctx) 24 | { 25 | struct data data = { 26 | .time = bpf_ktime_get_ns(), 27 | .cpu = (u8) bpf_get_smp_processor_id(), 28 | .event_type = EVENT_START 29 | }; 30 | void *pfilename = (void *)(ctx->di + offsetof(struct pt_regs, di)); 31 | void *pargv = (void *)(ctx->di + offsetof(struct pt_regs, si)); 32 | char *filename, **argv; 33 | bool bail = false; 34 | int i; 35 | 36 | set_current_info(&data); 37 | 38 | if (bpf_probe_read(&filename, sizeof(filename), pfilename) || 39 | bpf_probe_read_str(data.arg, sizeof(data.arg)-1, filename) < 0) { 40 | __builtin_strcpy(data.arg, ""); 41 | bail = true; 42 | } 43 | 44 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 45 | &data, sizeof(data)) < 0 || bail) 46 | goto out; 47 | 48 | data.event_type = EVENT_ARG; 49 | if (bpf_probe_read((void *) &argv, sizeof(void *), pargv)) 50 | goto out; 51 | 52 | /* skip first arg; submitted filename */ 53 | #pragma unroll 54 | for (int i = 1; i <= MAXARG; i++) { 55 | void *ptr = NULL; 56 | 57 | if (bpf_probe_read(&ptr, sizeof(ptr), &argv[i]) || ptr == NULL) 58 | goto out; 59 | if (bpf_probe_read_str(data.arg, sizeof(data.arg)-1, ptr) < 0) 60 | goto out; 61 | 62 | /* give each event a different timestamp */ 63 | data.time++; 64 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 65 | &data, sizeof(data)) < 0) 66 | goto out; 67 | } 68 | 69 | __builtin_strcpy(data.arg, "..."); 70 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 71 | &data, sizeof(data)); 72 | out: 73 | return 0; 74 | } 75 | 76 | SEC("kprobe/execve_ret") 77 | int bpf_sys_execve_ret(struct pt_regs *ctx) 78 | { 79 | struct data data = { 80 | .time = bpf_ktime_get_ns(), 81 | .cpu = (u8) bpf_get_smp_processor_id(), 82 | .event_type = EVENT_RET, 83 | .retval = ctx->ax, 84 | }; 85 | 86 | set_current_info(&data); 87 | 88 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 89 | &data, sizeof(data)); 90 | 91 | return 0; 92 | } 93 | 94 | SEC("tracepoint/syscalls/sys_enter_execve") 95 | int bpf_sys_enter_execve(struct execve_enter_args *ctx) 96 | { 97 | struct data data = { 98 | .time = bpf_ktime_get_ns(), 99 | .cpu = (u8) bpf_get_smp_processor_id(), 100 | .event_type = EVENT_START 101 | }; 102 | int i; 103 | 104 | set_current_info(&data); 105 | 106 | if (bpf_probe_read_str(data.arg, sizeof(data.arg), ctx->filename) < 0) 107 | __builtin_strcpy(data.arg, ""); 108 | 109 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 110 | &data, sizeof(data)) < 0) 111 | goto out; 112 | 113 | data.event_type = EVENT_ARG; 114 | 115 | /* skip first arg; submitted filename */ 116 | #pragma unroll 117 | for (int i = 1; i <= MAXARG; i++) { 118 | void *ptr = NULL; 119 | 120 | if (bpf_probe_read(&ptr, sizeof(ptr), &ctx->argv[i])) 121 | goto out; 122 | if (ptr == NULL) 123 | goto out; 124 | if (bpf_probe_read_user_str(data.arg, sizeof(data.arg), ptr) < 0) 125 | goto out; 126 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 127 | &data, sizeof(data)) < 0) 128 | goto out; 129 | } 130 | 131 | __builtin_strcpy(data.arg, "..."); 132 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 133 | &data, sizeof(data)); 134 | out: 135 | return 0; 136 | } 137 | 138 | SEC("tracepoint/syscalls/sys_exit_execve") 139 | int bpf_sys_exit_execve(struct execve_exit_args *ctx) 140 | { 141 | struct data data = { 142 | .time = bpf_ktime_get_ns(), 143 | .cpu = (u8) bpf_get_smp_processor_id(), 144 | .event_type = EVENT_RET, 145 | .retval = ctx->ret, 146 | }; 147 | 148 | set_current_info(&data); 149 | 150 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 151 | &data, sizeof(data)); 152 | 153 | return 0; 154 | } 155 | 156 | SEC("tracepoint/sched/sched_process_exit") 157 | int bpf_sched_exit(struct sched_exit_args *ctx) 158 | { 159 | struct data data = { 160 | .time = bpf_ktime_get_ns(), 161 | .cpu = (u8) bpf_get_smp_processor_id(), 162 | .event_type = EVENT_EXIT, 163 | }; 164 | 165 | __builtin_memcpy(data.comm, ctx->comm, 15); 166 | data.pid = ctx->pid; 167 | 168 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 169 | &data, sizeof(data)) < 0) { 170 | } 171 | 172 | return 0; 173 | } 174 | 175 | char _license[] SEC("license") = "GPL"; 176 | int _version SEC("version") = LINUX_VERSION_CODE; 177 | -------------------------------------------------------------------------------- /ksrc/execsnoop_legacy.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Snoop process exec(). Inspired by execsnoop.py from bcc repository 3 | * Version works on older kernels ... e.g., 4.14 4 | * 5 | * David Ahern 6 | */ 7 | 8 | #define KBUILD_MODNAME "execsnoop" 9 | #include /* pt_regs via asm/ptrace.h */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "execsnoop.h" 17 | #include "sched_tp.h" 18 | 19 | #include "channel_map.c" 20 | #include "set_current_info.c" 21 | 22 | /* expecting args to be filename, argv, envp */ 23 | SEC("kprobe/execve") 24 | int bpf_sys_execve(struct pt_regs *ctx) 25 | { 26 | struct data data = { 27 | .time = bpf_ktime_get_ns(), 28 | .cpu = (u8) bpf_get_smp_processor_id(), 29 | .event_type = EVENT_START 30 | }; 31 | void *filename = (void *)ctx->di; 32 | void **argv = (void **)ctx->si; 33 | bool bail = false; 34 | int i; 35 | 36 | set_current_info(&data); 37 | 38 | if (bpf_probe_read_str(data.arg, sizeof(data.arg), filename) < 0) { 39 | strcpy(data.arg, ""); 40 | bail = true; 41 | } 42 | 43 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 44 | &data, sizeof(data)) < 0 || bail) 45 | goto out; 46 | 47 | data.event_type = EVENT_ARG; 48 | 49 | /* skip first arg; submitted filename */ 50 | #pragma unroll 51 | for (int i = 1; i <= MAXARG; i++) { 52 | void *ptr = NULL; 53 | 54 | if (bpf_probe_read(&ptr, sizeof(ptr), &argv[i]) || ptr == NULL) 55 | goto out; 56 | if (bpf_probe_read_str(data.arg, sizeof(data.arg), ptr) < 0) 57 | goto out; 58 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 59 | &data, sizeof(data)) < 0) 60 | goto out; 61 | } 62 | 63 | strcpy(data.arg, "..."); 64 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 65 | &data, sizeof(data)); 66 | out: 67 | return 0; 68 | } 69 | 70 | SEC("kprobe/execve_ret") 71 | int bpf_sys_execve_ret(struct pt_regs *ctx) 72 | { 73 | struct data data = { 74 | .time = bpf_ktime_get_ns(), 75 | .cpu = (u8) bpf_get_smp_processor_id(), 76 | .event_type = EVENT_RET, 77 | .retval = ctx->ax, 78 | }; 79 | 80 | set_current_info(&data); 81 | 82 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 83 | &data, sizeof(data)); 84 | 85 | return 0; 86 | } 87 | 88 | SEC("tracepoint/sched/sched_process_exit") 89 | int bpf_sched_exit(struct sched_exit_args *ctx) 90 | { 91 | struct data data = { 92 | .time = bpf_ktime_get_ns(), 93 | .cpu = (u8) bpf_get_smp_processor_id(), 94 | .event_type = EVENT_EXIT, 95 | }; 96 | 97 | memcpy(data.comm, ctx->comm, 15); 98 | data.pid = ctx->pid; 99 | 100 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 101 | &data, sizeof(data)) < 0) { 102 | } 103 | 104 | return 0; 105 | } 106 | 107 | char _license[] SEC("license") = "GPL"; 108 | int _version SEC("version") = LINUX_VERSION_CODE; 109 | -------------------------------------------------------------------------------- /ksrc/flow.h: -------------------------------------------------------------------------------- 1 | #ifndef _FLOW_H_ 2 | #define _FLOW_H_ 3 | // SPDX-License-Identifier: GPL-2.0 4 | /* Copyright (c) 2019-2021 David Ahern 5 | * 6 | * Packet parser 7 | */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define ENABLE_FLOW_IPV6 20 | 21 | struct flow_ports { 22 | __be16 sport; 23 | __be16 dport; 24 | }; 25 | 26 | struct flow_icmp { 27 | __u8 type; 28 | __u8 code; 29 | __be16 id; 30 | }; 31 | 32 | #define TCP_FLAG_SYN 1 << 0 33 | #define TCP_FLAG_ACK 1 << 1 34 | 35 | /* used for dissecting packets */ 36 | struct flow { 37 | union { 38 | __u32 ipv4; 39 | #ifdef ENABLE_FLOW_IPV6 40 | struct in6_addr ipv6; 41 | #endif 42 | } saddr; 43 | 44 | union { 45 | __u32 ipv4; 46 | #ifdef ENABLE_FLOW_IPV6 47 | struct in6_addr ipv6; 48 | #endif 49 | } daddr; 50 | 51 | __be16 eth_proto; 52 | __u8 family; /* network address family */ 53 | __u8 protocol; /* L4 protocol */ 54 | __u8 fragment; 55 | __u8 inner_protocol; 56 | __u8 tcp_flags; 57 | __u32 inner_saddr; 58 | __u32 inner_daddr; 59 | 60 | union { 61 | struct flow_ports ports; 62 | struct flow_icmp icmp; 63 | }; 64 | }; 65 | 66 | #define PARSE_STOP_AT_NET 0x1 67 | 68 | #ifdef ENABLE_FLOW_IPV6 69 | static __always_inline int parse_icmp6(struct flow *fl, void *nh, 70 | void *data_end) 71 | { 72 | struct icmp6hdr *icmph = nh; 73 | 74 | if (icmph + 1 > data_end) 75 | return -1; 76 | 77 | fl->icmp.type = icmph->icmp6_type; 78 | fl->icmp.code = icmph->icmp6_code; 79 | 80 | switch (icmph->icmp6_type) { 81 | case ICMPV6_ECHO_REQUEST: 82 | case ICMPV6_ECHO_REPLY: 83 | fl->icmp.id = icmph->icmp6_identifier ? : 1; 84 | break; 85 | } 86 | 87 | return 0; 88 | } 89 | #endif 90 | 91 | static __always_inline int parse_icmp(struct flow *fl, void *nh, 92 | void *data_end) 93 | { 94 | struct icmphdr *icmph = nh; 95 | 96 | if (icmph + 1 > data_end) 97 | return -1; 98 | 99 | fl->icmp.type = icmph->type; 100 | fl->icmp.code = icmph->code; 101 | 102 | switch (icmph->type) { 103 | case ICMP_ECHO: 104 | case ICMP_ECHOREPLY: 105 | case ICMP_TIMESTAMP: 106 | case ICMP_TIMESTAMPREPLY: 107 | fl->icmp.id = icmph->un.echo.id ? : 1; 108 | break; 109 | } 110 | 111 | return 0; 112 | } 113 | 114 | static __always_inline int parse_udp(struct flow *fl, void *nh, 115 | void *data_end) 116 | { 117 | struct udphdr *uhdr = nh; 118 | 119 | if (uhdr + 1 > data_end) 120 | return -1; 121 | 122 | fl->ports.sport = uhdr->source; 123 | fl->ports.dport = uhdr->dest; 124 | 125 | return 0; 126 | } 127 | 128 | static __always_inline int parse_tcp(struct flow *fl, void *nh, 129 | void *data_end) 130 | { 131 | struct tcphdr *thdr = nh; 132 | 133 | if (thdr + 1 > data_end) 134 | return -1; 135 | 136 | fl->ports.sport = thdr->source; 137 | fl->ports.dport = thdr->dest; 138 | 139 | if (thdr->syn) 140 | fl->tcp_flags |= TCP_FLAG_SYN; 141 | if (thdr->ack) 142 | fl->tcp_flags |= TCP_FLAG_ACK; 143 | 144 | return 0; 145 | } 146 | 147 | static __always_inline int parse_transport(struct flow *fl, void *nh, 148 | void *data_end) 149 | { 150 | switch (fl->protocol) { 151 | case IPPROTO_TCP: 152 | return parse_tcp(fl, nh, data_end); 153 | case IPPROTO_UDP: 154 | return parse_udp(fl, nh, data_end); 155 | case IPPROTO_ICMP: 156 | return parse_icmp(fl, nh, data_end); 157 | case IPPROTO_ICMPV6: 158 | return parse_icmp6(fl, nh, data_end); 159 | } 160 | return 1; 161 | } 162 | 163 | #ifdef ENABLE_FLOW_IPV6 164 | static __always_inline int parse_v6(struct flow *fl, void *nh, void *data_end, 165 | unsigned int flags) 166 | { 167 | struct ipv6hdr *ip6h = nh; 168 | 169 | if (ip6h + 1 > data_end) 170 | return -1; 171 | 172 | if (ip6h->version != 6) 173 | return -1; 174 | 175 | fl->family = AF_INET6; 176 | fl->protocol = ip6h->nexthdr; 177 | fl->saddr.ipv6 = ip6h->saddr; 178 | fl->daddr.ipv6 = ip6h->daddr; 179 | 180 | if (flags & PARSE_STOP_AT_NET) 181 | return 0; 182 | 183 | nh += sizeof(*ip6h); 184 | return parse_transport(fl, nh, data_end); 185 | } 186 | #endif 187 | 188 | static __always_inline int parse_v4(struct flow *fl, void *nh, void *data_end, 189 | unsigned int flags) 190 | { 191 | struct iphdr *iph = nh; 192 | 193 | if (iph + 1 > data_end) 194 | return -1; 195 | 196 | if (iph->version != 4 || iph->ihl < 5) 197 | return -1; 198 | 199 | fl->family = AF_INET; 200 | fl->saddr.ipv4 = iph->saddr; 201 | fl->daddr.ipv4 = iph->daddr; 202 | fl->protocol = iph->protocol; 203 | 204 | /* fragments won't have the transport header */ 205 | if (ntohs(iph->frag_off) & (IP_MF | IP_OFFSET)) { 206 | fl->fragment = 1; 207 | return 0; 208 | } 209 | 210 | if (flags & PARSE_STOP_AT_NET) 211 | return 0; 212 | 213 | nh += (iph->ihl << 2); 214 | 215 | if (fl->protocol == IPPROTO_IPIP) { 216 | iph = nh; 217 | 218 | if (iph + 1 > data_end) 219 | return -1; 220 | 221 | if (iph->version != 4 || iph->ihl < 5) 222 | return -1; 223 | 224 | fl->inner_saddr = iph->saddr; 225 | fl->inner_daddr = iph->daddr; 226 | fl->inner_protocol = iph->protocol; 227 | if (ntohs(iph->frag_off) & (IP_MF | IP_OFFSET)) { 228 | fl->fragment = 1; 229 | return 0; 230 | } 231 | 232 | nh += (iph->ihl << 2); 233 | } 234 | return parse_transport(fl, nh, data_end); 235 | } 236 | 237 | /* 238 | * rc > 0: unhandled protocol 239 | * rc < 0: error parsing headers 240 | * rc == 0: all good 241 | */ 242 | static __always_inline int parse_pkt(struct flow *fl, void *data, 243 | void *data_end, unsigned int flags) 244 | { 245 | struct ethhdr *eth = data; 246 | void *nh = eth + 1; 247 | u16 eth_proto; 248 | int rc; 249 | 250 | if (nh > data_end) 251 | return true; 252 | 253 | eth_proto = eth->h_proto; 254 | #ifdef SUPPORT_QINQ 255 | if (eth_proto == htons(ETH_P_8021AD)) { 256 | struct vlan_hdr *vhdr; 257 | 258 | vhdr = nh; 259 | if (vhdr + 1 > data_end) 260 | return -1; 261 | 262 | nh += sizeof(*vhdr); 263 | eth_proto = vhdr->h_vlan_encapsulated_proto; 264 | } 265 | #endif 266 | if (eth_proto == htons(ETH_P_8021Q)) { 267 | struct vlan_hdr *vhdr; 268 | 269 | vhdr = nh; 270 | if (vhdr + 1 > data_end) 271 | return -1; 272 | 273 | nh += sizeof(*vhdr); 274 | eth_proto = vhdr->h_vlan_encapsulated_proto; 275 | } 276 | 277 | fl->eth_proto = eth_proto; 278 | if (eth_proto == htons(ETH_P_IP)) 279 | rc = parse_v4(fl, nh, data_end, flags); 280 | #ifdef ENABLE_FLOW_IPV6 281 | else if (eth_proto == htons(ETH_P_IPV6)) 282 | rc = parse_v6(fl, nh, data_end, flags); 283 | #endif 284 | else 285 | rc = 1; 286 | 287 | return rc; 288 | } 289 | #endif 290 | -------------------------------------------------------------------------------- /ksrc/include/asm_goto_workaround.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* Copyright (c) 2019 Facebook */ 3 | #ifndef __ASM_GOTO_WORKAROUND_H 4 | #define __ASM_GOTO_WORKAROUND_H 5 | 6 | /* 7 | * This will bring in asm_volatile_goto and asm_inline macro definitions 8 | * if enabled by compiler and config options. 9 | */ 10 | #include 11 | 12 | #ifdef asm_volatile_goto 13 | #undef asm_volatile_goto 14 | #define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") 15 | #endif 16 | 17 | /* 18 | * asm_inline is defined as asm __inline in "include/linux/compiler_types.h" 19 | * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not 20 | * supported by CLANG. 21 | */ 22 | #ifdef asm_inline 23 | #undef asm_inline 24 | #define asm_inline asm 25 | #endif 26 | 27 | #define volatile(x...) volatile("") 28 | #endif 29 | -------------------------------------------------------------------------------- /ksrc/ipv6_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _IPV6_HELPERS_H 2 | #define _IPV6_HELPERS_H 3 | 4 | static __always_inline bool do_ipv6_addr_cmp(const struct in6_addr *a1, 5 | const struct in6_addr *a2) 6 | { 7 | return a1->s6_addr32[0] == a2->s6_addr32[0] && 8 | a1->s6_addr32[1] == a2->s6_addr32[1] && 9 | a1->s6_addr32[2] == a2->s6_addr32[2] && 10 | a1->s6_addr32[3] == a2->s6_addr32[3]; 11 | } 12 | 13 | static __always_inline bool ipv6_is_any(const struct in6_addr *a1) 14 | { 15 | struct in6_addr a2 = {}; 16 | 17 | return do_ipv6_addr_cmp(a1, &a2); 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /ksrc/kvm-nested.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* track which processes are doing nested virt 3 | * David Ahern 4 | */ 5 | 6 | #define KBUILD_MODNAME "kvm_nested" 7 | #include 8 | #include 9 | #include 10 | 11 | struct bpf_map_def SEC("maps") nested_virt_map = { 12 | .type = BPF_MAP_TYPE_HASH, 13 | .key_size = sizeof(u64), 14 | .value_size = sizeof(u64), 15 | .max_entries = 512, 16 | }; 17 | 18 | static __always_inline void do_nested_kvm(void) 19 | { 20 | u64 pid = bpf_get_current_pid_tgid(); 21 | u64 *entry; 22 | 23 | entry = bpf_map_lookup_elem(&nested_virt_map, &pid); 24 | if (entry) { 25 | __sync_fetch_and_add(entry, 1); 26 | } else { 27 | u64 val = 1; 28 | 29 | bpf_map_update_elem(&nested_virt_map, &pid, &val, BPF_ANY); 30 | } 31 | } 32 | 33 | SEC("tracepoint/kvm/kvm_nested_vmexit") 34 | int tp_nested_kvm(void *ctx) 35 | { 36 | do_nested_kvm(); 37 | return 0; 38 | } 39 | 40 | SEC("kprobe/handle_vmresume") 41 | int kp_nested_kvm(void *ctx) 42 | { 43 | do_nested_kvm(); 44 | return 0; 45 | } 46 | 47 | SEC("tracepoint/sched/sched_process_exit") 48 | int bpf_sched_exit(void *ctx) 49 | { 50 | u64 pid = bpf_get_current_pid_tgid(); 51 | u64 *entry; 52 | 53 | entry = bpf_map_lookup_elem(&nested_virt_map, &pid); 54 | if (entry) 55 | bpf_map_delete_elem(&nested_virt_map, &pid); 56 | 57 | return 0; 58 | } 59 | 60 | char _license[] SEC("license") = "GPL"; 61 | int _version SEC("version") = LINUX_VERSION_CODE; 62 | -------------------------------------------------------------------------------- /ksrc/napi_poll.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Track histogram of napi poll 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | 7 | #define KBUILD_MODNAME "napi_poll" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "napi_poll.h" 15 | 16 | #include "bpf_debug.h" 17 | 18 | struct bpf_map_def SEC("maps") napi_poll_map = { 19 | .type = BPF_MAP_TYPE_ARRAY, 20 | .key_size = sizeof(u32), 21 | .value_size = sizeof(struct napi_poll_hist), 22 | .max_entries = 1, 23 | }; 24 | 25 | SEC("tracepoint/napi/napi_poll") 26 | int bpf_napi_poll(struct napi_poll_args *ctx) 27 | { 28 | struct napi_poll_hist *hist; 29 | __u32 idx = 0; 30 | 31 | if (bpf_get_smp_processor_id() != 5) 32 | return 0; 33 | 34 | //bpf_debug("work %d budget %d\n", ctx->work, ctx->budget); 35 | hist = bpf_map_lookup_elem(&napi_poll_map, &idx); 36 | if (hist) { 37 | u64 *c; 38 | 39 | /* update hist entry */ 40 | if (ctx->work == 0) 41 | c = &hist->buckets[0]; 42 | else if (ctx->work == 1) 43 | c = &hist->buckets[1]; 44 | else if (ctx->work == 2) 45 | c = &hist->buckets[2]; 46 | else if (ctx->work < 5) 47 | c = &hist->buckets[3]; 48 | else if (ctx->work < 9) 49 | c = &hist->buckets[4]; 50 | else if (ctx->work < 17) 51 | c = &hist->buckets[5]; 52 | else if (ctx->work < 33) 53 | c = &hist->buckets[6]; 54 | else if (ctx->work < 64) 55 | c = &hist->buckets[7]; 56 | else 57 | c = &hist->buckets[8]; 58 | 59 | __sync_fetch_and_add(c, 1); 60 | } 61 | 62 | return 0; 63 | } 64 | 65 | char _license[] SEC("license") = "GPL"; 66 | int _version SEC("version") = LINUX_VERSION_CODE; 67 | -------------------------------------------------------------------------------- /ksrc/net_rx_action.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Track time to run net_rx_action 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | 7 | #define KBUILD_MODNAME "net_rx_action" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "net_rx_action.h" 15 | 16 | struct bpf_map_def SEC("maps") net_rx_map = { 17 | .type = BPF_MAP_TYPE_ARRAY, 18 | .key_size = sizeof(u32), 19 | .value_size = sizeof(struct net_rx_hist_val), 20 | .max_entries = 1, 21 | }; 22 | 23 | struct net_rx_enter { 24 | u64 t_enter; 25 | int cpu; 26 | }; 27 | 28 | struct bpf_map_def SEC("maps") net_rx_enter_map = { 29 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, 30 | .key_size = sizeof(u32), 31 | .value_size = sizeof(struct net_rx_enter), 32 | .max_entries = 1 33 | }; 34 | 35 | SEC("kprobe/net_rx_action") 36 | int bpf_net_rx_kprobe(struct pt_regs *ctx) 37 | { 38 | struct net_rx_enter *e; 39 | bool inc_error = false; 40 | u32 idx = 0; 41 | 42 | e = bpf_map_lookup_elem(&net_rx_enter_map, &idx); 43 | if (e) { 44 | if (e->t_enter || e->cpu != -1) 45 | inc_error = true; 46 | 47 | e->t_enter = bpf_ktime_get_ns(); 48 | e->cpu = bpf_get_smp_processor_id(); 49 | } else { 50 | inc_error = true; 51 | } 52 | 53 | if (inc_error) { 54 | struct net_rx_hist_val *hist; 55 | u32 idx = 0; 56 | 57 | hist = bpf_map_lookup_elem(&net_rx_map, &idx); 58 | if (hist) 59 | __sync_fetch_and_add(&hist->buckets[NET_RX_ERR_BKT], 1); 60 | } 61 | 62 | return 0; 63 | } 64 | 65 | SEC("kprobe/net_rx_action_ret") 66 | int bpf_net_rx_kprobe_ret(struct pt_regs *ctx) 67 | { 68 | struct net_rx_hist_val *hist; 69 | struct net_rx_enter *e; 70 | u32 idx = 0; 71 | 72 | e = bpf_map_lookup_elem(&net_rx_enter_map, &idx); 73 | if (!e) 74 | return 0; 75 | 76 | hist = bpf_map_lookup_elem(&net_rx_map, &idx); 77 | if (!hist) 78 | goto out; 79 | 80 | if (e->cpu != bpf_get_smp_processor_id() || !e->t_enter) { 81 | __sync_fetch_and_add(&hist->buckets[NET_RX_ERR_BKT], 1); 82 | goto out; 83 | } 84 | 85 | if (e->t_enter) { 86 | u64 t = bpf_ktime_get_ns(); 87 | u64 dt = (t - e->t_enter) / 1000; /* nsec to usec */ 88 | u64 *c; 89 | 90 | /* update hist entry */ 91 | if (dt <= NET_RX_BUCKET_0) 92 | c = &hist->buckets[0]; 93 | else if (dt <= NET_RX_BUCKET_1) 94 | c = &hist->buckets[1]; 95 | else if (dt <= NET_RX_BUCKET_2) 96 | c = &hist->buckets[2]; 97 | else if (dt <= NET_RX_BUCKET_3) 98 | c = &hist->buckets[3]; 99 | else if (dt <= NET_RX_BUCKET_4) 100 | c = &hist->buckets[4]; 101 | else if (dt <= NET_RX_BUCKET_5) 102 | c = &hist->buckets[5]; 103 | else if (dt <= NET_RX_BUCKET_6) 104 | c = &hist->buckets[6]; 105 | else if (dt <= NET_RX_BUCKET_7) 106 | c = &hist->buckets[7]; 107 | else if (dt <= NET_RX_BUCKET_8) 108 | c = &hist->buckets[8]; 109 | else 110 | c = &hist->buckets[9]; 111 | 112 | __sync_fetch_and_add(c, 1); 113 | } 114 | out: 115 | e->t_enter = 0; 116 | e->cpu = -1; 117 | return 0; 118 | } 119 | 120 | char _license[] SEC("license") = "GPL"; 121 | int _version SEC("version") = LINUX_VERSION_CODE; 122 | -------------------------------------------------------------------------------- /ksrc/opensnoop.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Entry and return robes on do_sys_open to track file opens 3 | * by processes. Data sent to userspace using perf_events 4 | * and channel map. 5 | * 6 | * David Ahern 7 | */ 8 | #define KBUILD_MODNAME "opensnoop" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "opensnoop.h" 17 | 18 | #include "channel_map.c" 19 | #include "set_current_info.c" 20 | 21 | SEC("kprobe/do_sys_open") 22 | int bpf_sys_open(struct pt_regs *ctx) 23 | { 24 | struct data data = { 25 | .time = bpf_ktime_get_ns(), 26 | .cpu = (u8) bpf_get_smp_processor_id(), 27 | .event_type = EVENT_ARG, 28 | }; 29 | char *filename = (char *)PT_REGS_PARM2(ctx); 30 | unsigned long flags = PT_REGS_PARM3(ctx); 31 | unsigned long mode = PT_REGS_PARM4(ctx); 32 | 33 | set_current_info(&data); 34 | 35 | bpf_probe_read_str(data.filename, sizeof(data.filename), filename); 36 | data.flags = (u32) flags; 37 | data.mode = (u32) mode; 38 | 39 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 40 | &data, sizeof(data)); 41 | 42 | return 0; 43 | } 44 | 45 | SEC("kprobe/do_sys_open_ret") 46 | int bpf_sys_open_ret(struct pt_regs *ctx) 47 | { 48 | struct data data = { 49 | .time = bpf_ktime_get_ns(), 50 | .cpu = (u8) bpf_get_smp_processor_id(), 51 | .event_type = EVENT_RET, 52 | .retval = ctx->ax 53 | }; 54 | 55 | set_current_info(&data); 56 | 57 | bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 58 | &data, sizeof(data)); 59 | 60 | return 0; 61 | } 62 | 63 | char _license[] SEC("license") = "GPL"; 64 | int _version SEC("version") = LINUX_VERSION_CODE; 65 | -------------------------------------------------------------------------------- /ksrc/ovslatency.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Track latency induced by OVS. 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | 7 | #define KBUILD_MODNAME "ovslatency" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "ovslatency.h" 15 | 16 | struct bpf_map_def SEC("maps") ovslat_map = { 17 | .type = BPF_MAP_TYPE_ARRAY, 18 | .key_size = sizeof(u32), 19 | .value_size = sizeof(struct ovslat_hist_val), 20 | .max_entries = 1, 21 | }; 22 | 23 | struct ovs_enter { 24 | u64 t_enter; 25 | void *skb; 26 | }; 27 | 28 | struct bpf_map_def SEC("maps") ovs_enter_map = { 29 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, 30 | .key_size = sizeof(u32), 31 | .value_size = sizeof(struct ovs_enter), 32 | .max_entries = 1 33 | }; 34 | 35 | SEC("kprobe/ovs_vport_receive") 36 | int bpf_ovs_kprobe(struct pt_regs *ctx) 37 | { 38 | struct ovs_enter *e; 39 | u32 idx = 0; 40 | 41 | e = bpf_map_lookup_elem(&ovs_enter_map, &idx); 42 | if (e) { 43 | e->t_enter = bpf_ktime_get_ns(); 44 | e->skb = (void *)PT_REGS_PARM1(ctx); 45 | } 46 | 47 | return 0; 48 | } 49 | 50 | SEC("kprobe/ovs_vport_receive_ret") 51 | int bpf_ovs_kprobe_ret(struct pt_regs *ctx) 52 | { 53 | struct ovs_enter *e; 54 | u32 idx = 0; 55 | 56 | e = bpf_map_lookup_elem(&ovs_enter_map, &idx); 57 | if (!e) 58 | goto out; 59 | 60 | if (e->t_enter) { 61 | struct ovslat_hist_val *hist; 62 | u64 t = bpf_ktime_get_ns(); 63 | u64 dt = (t - e->t_enter) / 1000; /* nsec to usec */ 64 | 65 | hist = bpf_map_lookup_elem(&ovslat_map, &idx); 66 | if (hist) { 67 | u64 *c; 68 | 69 | __sync_fetch_and_add(&hist->buckets[7], 1); 70 | 71 | /* update hist entry */ 72 | if (dt <= OVS_BUCKET_0) 73 | c = &hist->buckets[0]; 74 | else if (dt <= OVS_BUCKET_1) 75 | c = &hist->buckets[1]; 76 | else if (dt <= OVS_BUCKET_2) 77 | c = &hist->buckets[2]; 78 | else if (dt <= OVS_BUCKET_3) 79 | c = &hist->buckets[3]; 80 | else if (dt <= OVS_BUCKET_4) 81 | c = &hist->buckets[4]; 82 | else if (dt <= OVS_BUCKET_5) 83 | c = &hist->buckets[5]; 84 | else 85 | c = &hist->buckets[6]; 86 | 87 | __sync_fetch_and_add(c, 1); 88 | } 89 | } 90 | e->t_enter = 0; 91 | e->skb = NULL; 92 | out: 93 | return 0; 94 | } 95 | 96 | char _license[] SEC("license") = "GPL"; 97 | int _version SEC("version") = LINUX_VERSION_CODE; 98 | -------------------------------------------------------------------------------- /ksrc/pktdrop.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* ebpf program on skb/kfree_skb tracepoint. Adds a sample to perf_event 3 | * buffer for the first 64 bytes of the packet and skb meta-data. 4 | * Attempts to get namespace from skb device or dst attached to skb. 5 | * 6 | * Copyright (c) 2019-2020 David Ahern 7 | */ 8 | #define KBUILD_MODNAME "pktdrop" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "pktdrop.h" 18 | 19 | #include "channel_map.c" 20 | 21 | SEC("tracepoint/skb/kfree_skb") 22 | int bpf_kfree_skb(struct kfree_skb_args *ctx) 23 | { 24 | struct data data = { 25 | .time = bpf_ktime_get_ns(), 26 | .event_type = EVENT_SAMPLE, 27 | .cpu = (u8) bpf_get_smp_processor_id(), 28 | }; 29 | struct sk_buff *skb = ctx->skbaddr; 30 | struct net_device *dev; 31 | u16 mhdr, nhdr, thdr; 32 | unsigned char *head; 33 | unsigned int end; 34 | int ifindex = -1; 35 | u8 pkt_type; 36 | 37 | data.location = (u64)ctx->location; 38 | data.protocol = htons(ctx->protocol); 39 | data.reason = htons(ctx->reason); 40 | 41 | /* Try to find a net_device. Prefer skb->dev but it gets 42 | * dropped at the transport layer. 43 | */ 44 | if (bpf_probe_read(&dev, sizeof(dev), &skb->dev) || !dev || 45 | bpf_probe_read(&ifindex, sizeof(ifindex), &dev->ifindex)) { 46 | unsigned long skb_refdst = 0; 47 | 48 | /* fallback to skb_iif which should be set on ingress */ 49 | if (bpf_probe_read(&ifindex, sizeof(ifindex), &skb->skb_iif)) 50 | ifindex = -1; 51 | 52 | if (!bpf_probe_read(&skb_refdst, sizeof(skb_refdst), 53 | &skb->_skb_refdst) && skb_refdst) { 54 | struct dst_entry *dst; 55 | 56 | dst = (struct dst_entry *)(skb_refdst & SKB_DST_PTRMASK); 57 | bpf_probe_read(&dev, sizeof(dev), &dst->dev); 58 | } 59 | } 60 | 61 | data.ifindex = ifindex; 62 | 63 | /* assumes network namespaces enabled */ 64 | if (dev) 65 | bpf_probe_read(&data.netns, sizeof(data.netns), &dev->nd_net); 66 | 67 | bpf_probe_read(&data.pkt_len, sizeof(data.pkt_len), &skb->len); 68 | if (!bpf_probe_read(&pkt_type, sizeof(pkt_type), &skb->__pkt_type_offset)) 69 | data.pkt_type = pkt_type & 7; 70 | 71 | bpf_probe_read(&data.vlan_tci, sizeof(data.vlan_tci), &skb->vlan_tci); 72 | bpf_probe_read(&data.vlan_proto, sizeof(data.vlan_proto), 73 | &skb->vlan_proto); 74 | 75 | if (!bpf_probe_read(&head, sizeof(head), &skb->head) && 76 | !bpf_probe_read(&mhdr, sizeof(mhdr), &skb->mac_header) && 77 | !bpf_probe_read(&nhdr, sizeof(nhdr), &skb->network_header) && 78 | !bpf_probe_read(&thdr, sizeof(thdr), &skb->transport_header)) { 79 | u8 *skbdata = head + mhdr; 80 | 81 | data.pkt_len += nhdr + thdr; 82 | bpf_probe_read(data.pkt_data, sizeof(data.pkt_data), skbdata); 83 | } 84 | 85 | /* get frags and gso size information if possible. Based on 86 | * the expansion of skb_shinfo(skb) which relies on 87 | * skb_end_pointer which is a function of BITS_PER_LONG. This 88 | * expansion is for 64-bit. 89 | */ 90 | if (!bpf_probe_read(&end, sizeof(end), &skb->end)) { 91 | struct skb_shared_info *sh; 92 | 93 | sh = (struct skb_shared_info *) (head + end); 94 | bpf_probe_read(&data.nr_frags, sizeof(data.nr_frags), 95 | &sh->nr_frags); 96 | 97 | bpf_probe_read(&data.gso_size, sizeof(data.gso_size), 98 | &sh->gso_size); 99 | } 100 | 101 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 102 | &data, sizeof(data)) < 0) { 103 | } 104 | 105 | return 0; 106 | } 107 | 108 | /* capture network namespace delete */ 109 | SEC("kprobe/fib_net_exit") 110 | int bpf_fib_net_exit(struct pt_regs *ctx) 111 | { 112 | struct data data = { 113 | .time = bpf_ktime_get_ns(), 114 | .event_type = EVENT_EXIT, 115 | .cpu = (u8) bpf_get_smp_processor_id(), 116 | }; 117 | struct net *net = (struct net *)ctx->di; 118 | 119 | if (net) { 120 | data.netns = ctx->di; 121 | 122 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 123 | &data, sizeof(data)) < 0) { 124 | } 125 | } 126 | 127 | return 0; 128 | } 129 | 130 | char _license[] SEC("license") = "GPL"; 131 | int _version SEC("version") = LINUX_VERSION_CODE; 132 | -------------------------------------------------------------------------------- /ksrc/pktlatency.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Monitor packet latency. Latency is measured as the time between 3 | * PTP timestamping in the NIC and handoff to process (currently 4 | * only users of skb_copy_datagram_iovec - e.g., virtual machines). 5 | * 6 | * Data is collected as a histogram per process id with samples 7 | * exceeding a threshold sent to userspace for further analysis 8 | * (e.g., to show affected flow). 9 | * 10 | * Userspace updates control map with a conversion between ptp 11 | * and monotonic timestamps (good enough for the purpose at hand) 12 | * as well threshold for generating samples. 13 | * 14 | * Copyright (c) 2019-2020 David Ahern 15 | */ 16 | 17 | #define KBUILD_MODNAME "pktlatency" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "pktlatency.h" 25 | 26 | #include "channel_map.c" 27 | 28 | struct bpf_map_def SEC("maps") pktlat_map = { 29 | .type = BPF_MAP_TYPE_HASH, // BPF_MAP_TYPE_PERCPU_HASH 30 | .key_size = sizeof(struct pktlat_hist_key), 31 | .value_size = sizeof(struct pktlat_hist_val), 32 | .max_entries = 512, 33 | }; 34 | 35 | struct bpf_map_def SEC("maps") pktlat_ctl_map = { 36 | .type = BPF_MAP_TYPE_ARRAY, 37 | .key_size = sizeof(u32), 38 | .value_size = sizeof(struct pktlat_ctl), 39 | .max_entries = 1, 40 | }; 41 | 42 | static __always_inline int update_stats(struct pktlat_hist_val *hist, 43 | struct pktlat_ctl *ctl, 44 | u64 tstamp) 45 | { 46 | u64 hw_mono, dt, t; 47 | 48 | if (!tstamp) { 49 | hist->buckets[7]++; 50 | return 0; 51 | } 52 | 53 | /* convert ptp time to monotonic */ 54 | if (tstamp > ctl->ptp_ref) 55 | hw_mono = ctl->mono_ref + (tstamp - ctl->ptp_ref); 56 | else 57 | hw_mono = ctl->mono_ref - (ctl->ptp_ref - tstamp); 58 | 59 | t = bpf_ktime_get_ns(); 60 | dt = (t - hw_mono)/1000; 61 | 62 | if (dt <= PKTLAT_BUCKET_0) 63 | hist->buckets[0]++; 64 | else if (dt <= PKTLAT_BUCKET_1) 65 | hist->buckets[1]++; 66 | else if (dt <= PKTLAT_BUCKET_2) 67 | hist->buckets[2]++; 68 | else if (dt <= PKTLAT_BUCKET_3) 69 | hist->buckets[3]++; 70 | else if (dt <= PKTLAT_BUCKET_4) 71 | hist->buckets[4]++; 72 | else if (dt <= PKTLAT_BUCKET_5) 73 | hist->buckets[5]++; 74 | else 75 | hist->buckets[6]++; 76 | 77 | hist->buckets[8] += dt; 78 | 79 | /* TO-DO: moving average */ 80 | 81 | if (ctl->latency_gen_sample && dt > ctl->latency_gen_sample) 82 | return 1; 83 | 84 | return 0; 85 | } 86 | 87 | static __always_inline void gen_sample(struct skb_dg_iov_args *ctx, 88 | u64 tstamp, int ifindex, u32 pid, 89 | bool with_skb_data) 90 | { 91 | struct data data; 92 | 93 | memset(&data, 0, sizeof(data)); 94 | 95 | data.event_type = EVENT_SAMPLE; 96 | data.time = bpf_ktime_get_ns(); 97 | data.cpu = (u8) bpf_get_smp_processor_id(); 98 | 99 | data.tstamp = tstamp; 100 | data.ifindex = ifindex; 101 | data.pid = pid; 102 | data.pkt_len = ctx->len; 103 | 104 | if (with_skb_data) { 105 | struct sk_buff *skb = ctx->skbaddr; 106 | unsigned char *head; 107 | u16 mac_header; 108 | u8 *skbdata; 109 | 110 | bpf_probe_read(&data.protocol, sizeof(data.protocol), 111 | &skb->protocol); 112 | 113 | if (!bpf_probe_read(&head, sizeof(head), &skb->head) && 114 | !bpf_probe_read(&mac_header, sizeof(mac_header), 115 | &skb->mac_header)) { 116 | skbdata = head + mac_header; 117 | bpf_probe_read(data.pkt_data, sizeof(data.pkt_data), skbdata); 118 | } 119 | } 120 | 121 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 122 | &data, sizeof(data)) < 0) { 123 | } 124 | } 125 | 126 | static __always_inline void get_skb_tstamp(struct sk_buff *skb, u64 *tstamp) 127 | { 128 | unsigned char *head; 129 | unsigned int end; 130 | 131 | if (!bpf_probe_read(&head, sizeof(head), &skb->head) && 132 | !bpf_probe_read(&end, sizeof(end), &skb->end)) { 133 | struct skb_shared_hwtstamps *hwtstamp; 134 | struct skb_shared_info *sh; 135 | 136 | sh = (struct skb_shared_info *) (head + end); 137 | hwtstamp = &sh->hwtstamps; 138 | bpf_probe_read(tstamp, sizeof(*tstamp), &hwtstamp->hwtstamp); 139 | } 140 | } 141 | 142 | SEC("tracepoint/skb/skb_copy_datagram_iovec") 143 | int bpf_skb_dg_iov(struct skb_dg_iov_args *ctx) 144 | { 145 | struct sk_buff *skb = ctx->skbaddr; 146 | struct pktlat_hist_key hkey = {}; 147 | struct pktlat_hist_val *hist; 148 | bool with_skb_data = false; 149 | struct pktlat_ctl *ctl; 150 | struct net_device *dev; 151 | int ifindex = -1; 152 | u64 tstamp = 0; 153 | u32 key = 0; 154 | 155 | ctl = bpf_map_lookup_elem(&pktlat_ctl_map, &key); 156 | if (!ctl) 157 | return 0; 158 | 159 | if (bpf_probe_read(&dev, sizeof(dev), &skb->dev)) 160 | ifindex = -2; 161 | else if (!dev) 162 | ifindex = -3; 163 | else if (bpf_probe_read(&ifindex, sizeof(ifindex), &dev->ifindex)) 164 | ifindex = -4; 165 | 166 | /* this should limit samples to tap devices only */ 167 | if (ifindex < ctl->ifindex_min) 168 | goto out; 169 | 170 | get_skb_tstamp(skb, &tstamp); 171 | 172 | hkey.pid = (u32) (bpf_get_current_pid_tgid() >> 32); 173 | 174 | hist = bpf_map_lookup_elem(&pktlat_map, &hkey); 175 | if (hist) { 176 | if (update_stats(hist, ctl, tstamp)) 177 | with_skb_data = true; 178 | } else { 179 | struct pktlat_hist_val hist2; 180 | 181 | memset(&hist2, 0, sizeof(hist2)); 182 | if (update_stats(&hist2, ctl, tstamp)) 183 | with_skb_data = true; 184 | bpf_map_update_elem(&pktlat_map, &hkey, &hist2, BPF_ANY); 185 | } 186 | 187 | if ((tstamp && ctl->gen_samples) || with_skb_data) 188 | gen_sample(ctx, tstamp, ifindex, hkey.pid, with_skb_data); 189 | 190 | out: 191 | return 0; 192 | } 193 | 194 | SEC("tracepoint/sched/sched_process_exit") 195 | int bpf_sched_exit(struct sched_exit_args *ctx) 196 | { 197 | struct pktlat_hist_key hkey = { 198 | .pid = (u32)(bpf_get_current_pid_tgid() >> 32), 199 | }; 200 | struct data data; 201 | 202 | if (!bpf_map_lookup_elem(&pktlat_map, &hkey)) 203 | return 0; 204 | 205 | bpf_map_delete_elem(&pktlat_map, &hkey); 206 | 207 | memset(&data, 0, sizeof(data)); 208 | data.event_type = EVENT_EXIT, 209 | data.time = bpf_ktime_get_ns(); 210 | data.pid = ctx->pid; 211 | data.cpu = (u8) bpf_get_smp_processor_id(); 212 | 213 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 214 | &data, sizeof(data)) < 0) { 215 | } 216 | 217 | return 0; 218 | } 219 | 220 | char _license[] SEC("license") = "GPL"; 221 | int _version SEC("version") = LINUX_VERSION_CODE; 222 | -------------------------------------------------------------------------------- /ksrc/rx_acl.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Copyright (c) 2019-2020 David Ahern 3 | * 4 | * Rx ACL 5 | */ 6 | #define KBUILD_MODNAME "rx_acl" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "xdp_acl.h" 18 | #include "acl_simple.h" 19 | 20 | struct bpf_map_def SEC("maps") rx_acl_map = { 21 | .type = BPF_MAP_TYPE_HASH, 22 | .key_size = sizeof(struct acl_key), 23 | .value_size = sizeof(struct acl_val), 24 | .max_entries = 64, 25 | }; 26 | 27 | SEC("classifier/rx_acl") 28 | int tc_acl_rx_prog(struct __sk_buff *skb) 29 | { 30 | void *data_end = (void *)(long)skb->data_end; 31 | void *data = (void *)(long)skb->data; 32 | u32 idx = skb->ifindex; 33 | struct flow fl = {}; 34 | bool rc; 35 | 36 | rc = drop_packet(data, data_end, NULL, true, &fl, &rx_acl_map); 37 | 38 | return rc ? TC_ACT_SHOT : TC_ACT_OK; 39 | } 40 | 41 | SEC("xdp/rx_acl") 42 | int xdp_rx_acl_prog(struct xdp_md *ctx) 43 | { 44 | void *data_end = (void *)(long)ctx->data_end; 45 | void *data = (void *)(long)ctx->data; 46 | struct flow fl = {}; 47 | bool rc; 48 | 49 | rc = drop_packet(data, data_end, NULL, true, &fl, &rx_acl_map); 50 | 51 | return rc ? XDP_DROP : XDP_PASS; 52 | } 53 | 54 | char _license[] SEC("license") = "GPL"; 55 | -------------------------------------------------------------------------------- /ksrc/set_current_info.c: -------------------------------------------------------------------------------- 1 | 2 | /* struct data changes between probes, but all definitions are 3 | * expected to have comm, pid and ppid fields 4 | */ 5 | static __always_inline void set_current_info(struct data *data) 6 | { 7 | struct task_struct *task, *parent; 8 | u64 tgid; 9 | 10 | tgid = bpf_get_current_pid_tgid(); 11 | data->pid = (u32)(tgid >> 32); 12 | data->tid = (u32)tgid; 13 | 14 | task = (struct task_struct *)bpf_get_current_task(); 15 | 16 | if (!bpf_probe_read(&parent, sizeof(parent), &task->real_parent)) 17 | bpf_probe_read(&data->ppid, sizeof(data->ppid), &parent->tgid); 18 | 19 | bpf_get_current_comm(&data->comm, sizeof(data->comm)); 20 | } 21 | -------------------------------------------------------------------------------- /ksrc/tcp_probe.c: -------------------------------------------------------------------------------- 1 | #define KBUILD_MODNAME "tcp_probe" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "tcp_probe.h" 10 | 11 | #include "channel_map.c" 12 | 13 | SEC("tracepoint/tcp/tcp_probe") 14 | int bpf_tcp_probe(struct tcp_probe_args *ctx) 15 | { 16 | struct data data = { 17 | .time = bpf_ktime_get_ns(), 18 | .cpu = bpf_get_smp_processor_id(), 19 | }; 20 | 21 | if (ctx->sport == 22 || ctx->dport == 22) 22 | return 0; 23 | 24 | __builtin_memcpy(&data.s_addr, &ctx->s_addr, sizeof(struct sockaddr_in6)); 25 | __builtin_memcpy(&data.d_addr, &ctx->d_addr, sizeof(struct sockaddr_in6)); 26 | data.mark = ctx->mark; 27 | data.data_len = ctx->data_len; 28 | data.snd_nxt = ctx->snd_nxt; 29 | data.snd_una = ctx->snd_una; 30 | data.snd_cwnd = ctx->snd_cwnd; 31 | data.ssthresh = ctx->ssthresh; 32 | data.snd_wnd = ctx->snd_wnd; 33 | data.srtt = ctx->srtt; 34 | data.rcv_wnd = ctx->rcv_wnd; 35 | 36 | if (bpf_perf_event_output(ctx, &channel, BPF_F_CURRENT_CPU, 37 | &data, sizeof(data)) < 0) { 38 | // TO-DO: track number of failed writes? 39 | // bpf printk?? 40 | } 41 | 42 | return 0; 43 | } 44 | 45 | char _license[] SEC("license") = "GPL"; 46 | int _version SEC("version") = LINUX_VERSION_CODE; 47 | -------------------------------------------------------------------------------- /ksrc/xdp_devmap_xmit.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Track histogram of work done on devmap flush 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | 7 | #define KBUILD_MODNAME "xdp_devmap_xmit" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "xdp_devmap_xmit.h" 15 | 16 | #include "bpf_debug.h" 17 | 18 | struct bpf_map_def SEC("maps") devmap_xmit_map = { 19 | .type = BPF_MAP_TYPE_ARRAY, 20 | .key_size = sizeof(u32), 21 | .value_size = sizeof(struct devmap_xmit_hist), 22 | .max_entries = 1, 23 | }; 24 | 25 | SEC("tracepoint/xdp/xdp_devmap_xmit") 26 | int bpf_devmap_xmit(struct devmap_xmit_args *ctx) 27 | { 28 | struct devmap_xmit_hist *hist; 29 | __u32 idx = 0; 30 | 31 | hist = bpf_map_lookup_elem(&devmap_xmit_map, &idx); 32 | if (hist) { 33 | u64 *c; 34 | 35 | /* update hist entry */ 36 | if (ctx->sent == 0) 37 | c = &hist->buckets[0]; 38 | else if (ctx->sent == 1) 39 | c = &hist->buckets[1]; 40 | else if (ctx->sent == 2) 41 | c = &hist->buckets[2]; 42 | else if (ctx->sent < 5) 43 | c = &hist->buckets[3]; 44 | else if (ctx->sent < 9) 45 | c = &hist->buckets[4]; 46 | else if (ctx->sent < 16) 47 | c = &hist->buckets[5]; 48 | else if (ctx->sent == 16) 49 | c = &hist->buckets[6]; 50 | else if (ctx->sent < 33) 51 | c = &hist->buckets[7]; 52 | else if (ctx->sent < 64) 53 | c = &hist->buckets[8]; 54 | else 55 | c = &hist->buckets[9]; 56 | 57 | __sync_fetch_and_add(c, 1); 58 | } 59 | 60 | return 0; 61 | } 62 | 63 | char _license[] SEC("license") = "GPL"; 64 | int _version SEC("version") = LINUX_VERSION_CODE; 65 | -------------------------------------------------------------------------------- /ksrc/xdp_dummy.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Dummy XDP program 3 | * 4 | * David Ahern 5 | */ 6 | #define KBUILD_MODNAME "xdp_dummy" 7 | #include 8 | #include 9 | #include 10 | 11 | SEC("xdp_dummy") 12 | int xdp_dummy_prog(struct xdp_md *ctx) 13 | { 14 | //bpf_debug("ingress: device %u queue %u\n", 15 | // ctx->ingress_ifindex, ctx->rx_queue_index); 16 | 17 | return XDP_PASS; 18 | } 19 | 20 | char _license[] SEC("license") = "GPL"; 21 | int _version SEC("version") = LINUX_VERSION_CODE; 22 | -------------------------------------------------------------------------------- /ksrc/xdp_l2fwd.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Example of L2 forwarding via XDP. FDB is a hash table 3 | * returning device index to redirect packet. 4 | * 5 | * Copyright (c) 2019-2020 David Ahern 6 | */ 7 | #define KBUILD_MODNAME "xdp_l2fwd" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "xdp_fdb.h" 17 | 18 | /* For TX-traffic redirect requires net_device ifindex to be in this devmap */ 19 | struct bpf_map_def SEC("maps") xdp_fwd_ports = { 20 | .type = BPF_MAP_TYPE_DEVMAP_HASH, 21 | .key_size = sizeof(u32), 22 | .value_size = sizeof(struct bpf_devmap_val), 23 | .max_entries = 512, 24 | }; 25 | 26 | /* to device index map */ 27 | struct bpf_map_def SEC("maps") fdb_map = { 28 | .type = BPF_MAP_TYPE_HASH, 29 | .key_size = sizeof(struct fdb_key), 30 | .value_size = sizeof(u32), 31 | .max_entries = 512, 32 | }; 33 | 34 | SEC("xdp_l2fwd") 35 | int xdp_l2fwd_prog(struct xdp_md *ctx) 36 | { 37 | void *data_end = (void *)(long)ctx->data_end; 38 | void *data = (void *)(long)ctx->data; 39 | struct bpf_devmap_val *entry; 40 | struct vlan_hdr *vhdr = NULL; 41 | struct ethhdr *eth; 42 | struct fdb_key key; 43 | u8 smac[ETH_ALEN]; 44 | u16 h_proto = 0; 45 | void *nh; 46 | int rc; 47 | 48 | /* data in context points to ethernet header */ 49 | eth = data; 50 | 51 | /* set pointer to header after ethernet header */ 52 | nh = data + sizeof(*eth); 53 | if (nh > data_end) 54 | return XDP_DROP; // malformed packet 55 | 56 | __builtin_memset(&key, 0, sizeof(key)); 57 | __builtin_memcpy(key.mac, eth->h_dest, ETH_ALEN); 58 | 59 | if (eth->h_proto == htons(ETH_P_8021Q)) { 60 | vhdr = nh; 61 | if (vhdr + 1 > data_end) 62 | return XDP_DROP; // malformed packet 63 | 64 | key.vlan = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; 65 | } 66 | 67 | entry = bpf_map_lookup_elem(&fdb_map, &key); 68 | if (!entry || entry->ifindex == 0) 69 | return XDP_PASS; 70 | 71 | /* Verify redirect index exists in port map */ 72 | if (!bpf_map_lookup_elem(&xdp_fwd_ports, &entry->ifindex)) 73 | return XDP_PASS; 74 | 75 | if (vhdr) { 76 | /* remove VLAN header before hand off to VM */ 77 | h_proto = vhdr->h_vlan_encapsulated_proto; 78 | __builtin_memcpy(smac, eth->h_source, ETH_ALEN); 79 | 80 | if (bpf_xdp_adjust_head(ctx, sizeof(*vhdr))) 81 | return XDP_PASS; 82 | 83 | /* reset data pointers after adjust */ 84 | data = (void *)(long)ctx->data; 85 | data_end = (void *)(long)ctx->data_end; 86 | eth = data; 87 | if (eth + 1 > data_end) 88 | return XDP_DROP; 89 | 90 | __builtin_memcpy(eth->h_dest, key.mac, ETH_ALEN); 91 | __builtin_memcpy(eth->h_source, smac, ETH_ALEN); 92 | eth->h_proto = h_proto; 93 | } 94 | 95 | return bpf_redirect_map(&xdp_fwd_ports, entry->ifindex, 0); 96 | } 97 | 98 | char _license[] SEC("license") = "GPL"; 99 | int _version SEC("version") = LINUX_VERSION_CODE; 100 | -------------------------------------------------------------------------------- /ksrc/xdp_l3fwd.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Example of L3 forwarding via XDP and use of bpf FIB lookup helper. 3 | * 4 | * Copyright (c) 2017-18 David Ahern 5 | */ 6 | #define KBUILD_MODNAME "xdp_l3fwd" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) 18 | 19 | struct bpf_map_def SEC("maps") xdp_l3fwd_ports = { 20 | .type = BPF_MAP_TYPE_DEVMAP, 21 | .key_size = sizeof(int), 22 | .value_size = sizeof(int), 23 | .max_entries = 512, 24 | }; 25 | 26 | /* from include/net/ip.h */ 27 | static __always_inline int ip_decrease_ttl(struct iphdr *iph) 28 | { 29 | u32 check = (__force u32)iph->check; 30 | 31 | check += (__force u32)htons(0x0100); 32 | iph->check = (__force __sum16)(check + (check >= 0xFFFF)); 33 | return --iph->ttl; 34 | } 35 | 36 | static __always_inline int xdp_l3fwd_flags(struct xdp_md *ctx, u32 flags) 37 | { 38 | void *data_end = (void *)(long)ctx->data_end; 39 | void *data = (void *)(long)ctx->data; 40 | struct bpf_fib_lookup fib_params; 41 | struct ethhdr *eth = data; 42 | struct ipv6hdr *ip6h; 43 | struct iphdr *iph; 44 | u16 h_proto; 45 | u64 nh_off; 46 | int rc; 47 | 48 | nh_off = sizeof(*eth); 49 | if (data + nh_off > data_end) 50 | return XDP_DROP; 51 | 52 | __builtin_memset(&fib_params, 0, sizeof(fib_params)); 53 | 54 | h_proto = eth->h_proto; 55 | if (h_proto == htons(ETH_P_IP)) { 56 | iph = data + nh_off; 57 | 58 | if (iph + 1 > data_end) 59 | return XDP_DROP; 60 | 61 | if (iph->ttl <= 1) 62 | return XDP_PASS; 63 | 64 | fib_params.family = AF_INET; 65 | fib_params.tos = iph->tos; 66 | fib_params.l4_protocol = iph->protocol; 67 | fib_params.tot_len = ntohs(iph->tot_len); 68 | fib_params.ipv4_src = iph->saddr; 69 | fib_params.ipv4_dst = iph->daddr; 70 | } else if (h_proto == htons(ETH_P_IPV6)) { 71 | struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src; 72 | struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst; 73 | 74 | ip6h = data + nh_off; 75 | if (ip6h + 1 > data_end) 76 | return XDP_DROP; 77 | 78 | if (ip6h->hop_limit <= 1) 79 | return XDP_PASS; 80 | 81 | fib_params.family = AF_INET6; 82 | fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; 83 | fib_params.l4_protocol = ip6h->nexthdr; 84 | fib_params.tot_len = ntohs(ip6h->payload_len); 85 | *src = ip6h->saddr; 86 | *dst = ip6h->daddr; 87 | } else { 88 | return XDP_PASS; 89 | } 90 | 91 | fib_params.ifindex = ctx->ingress_ifindex; 92 | 93 | rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); 94 | if (rc == BPF_FIB_LKUP_RET_SUCCESS) { 95 | if (!bpf_map_lookup_elem(&xdp_l3fwd_ports, &fib_params.ifindex)) 96 | return XDP_PASS; 97 | 98 | if (h_proto == htons(ETH_P_IP)) 99 | ip_decrease_ttl(iph); 100 | else if (h_proto == htons(ETH_P_IPV6)) 101 | ip6h->hop_limit--; 102 | 103 | __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); 104 | __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); 105 | return bpf_redirect_map(&xdp_l3fwd_ports, fib_params.ifindex, 0); 106 | } 107 | 108 | return XDP_PASS; 109 | } 110 | 111 | SEC("xdp_l3fwd") 112 | int xdp_l3fwd_prog(struct xdp_md *ctx) 113 | { 114 | return xdp_l3fwd_flags(ctx, 0); 115 | } 116 | 117 | SEC("xdp_l3fwd_direct") 118 | int xdp_l3fwd_direct_prog(struct xdp_md *ctx) 119 | { 120 | return xdp_l3fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); 121 | } 122 | 123 | char _license[] SEC("license") = "GPL"; 124 | -------------------------------------------------------------------------------- /ksrc/xdp_vlan.h: -------------------------------------------------------------------------------- 1 | #ifndef __XDP_VLAN_H 2 | #define __XDP_VLAN_H 3 | 4 | /* 5 | * helpers for pushing/popping vlan for xdp context 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | static __always_inline int xdp_vlan_push(struct xdp_md *ctx, __be16 vlan) 15 | { 16 | void *data_end = (void *)(long)ctx->data_end; 17 | void *data = (void *)(long)ctx->data; 18 | u8 smac[ETH_ALEN], dmac[ETH_ALEN]; 19 | struct ethhdr *eth = data; 20 | struct vlan_hdr *vhdr; 21 | int delta = sizeof(*vhdr); 22 | u16 h_proto; 23 | int rc; 24 | 25 | if (eth + 1 > data_end) 26 | return -1; 27 | 28 | h_proto = eth->h_proto; 29 | __builtin_memcpy(smac, eth->h_source, ETH_ALEN); 30 | __builtin_memcpy(dmac, eth->h_dest, ETH_ALEN); 31 | 32 | if (bpf_xdp_adjust_head(ctx, -delta)) 33 | return -1; 34 | 35 | data = (void *)(long)ctx->data; 36 | data_end = (void *)(long)ctx->data_end; 37 | eth = data; 38 | vhdr = data + sizeof(*eth); 39 | if (vhdr + 1 > data_end) 40 | return -1; 41 | 42 | vhdr->h_vlan_TCI = vlan; 43 | vhdr->h_vlan_encapsulated_proto = h_proto; 44 | 45 | __builtin_memcpy(eth->h_dest, dmac, ETH_ALEN); 46 | __builtin_memcpy(eth->h_source, smac, ETH_ALEN); 47 | eth->h_proto = htons(ETH_P_8021Q); 48 | 49 | return 0; 50 | } 51 | 52 | /* pop vlan header if vlan tag is given one. 53 | * return -1 on error, > 1 if vlan does not match 54 | */ 55 | static __always_inline int xdp_vlan_pop(struct xdp_md *ctx, __be16 vlan) 56 | { 57 | void *data_end = (void *)(long)ctx->data_end; 58 | void *data = (void *)(long)ctx->data; 59 | u8 smac[ETH_ALEN], dmac[ETH_ALEN]; 60 | struct ethhdr *eth = data; 61 | struct vlan_hdr *vhdr; 62 | int delta = sizeof(*vhdr); 63 | u16 h_proto; 64 | int rc; 65 | 66 | if (eth + 1 > data_end) 67 | return -1; 68 | 69 | /* expecting a specific vlan tag */ 70 | if (eth->h_proto != htons(ETH_P_8021Q)) 71 | return 1; 72 | 73 | vhdr = data + sizeof(*eth); 74 | if (vhdr + 1 > data_end) 75 | return -1; 76 | 77 | if (vhdr->h_vlan_TCI != vlan) 78 | return 1; 79 | 80 | __builtin_memcpy(smac, eth->h_source, ETH_ALEN); 81 | __builtin_memcpy(dmac, eth->h_dest, ETH_ALEN); 82 | h_proto = vhdr->h_vlan_encapsulated_proto; 83 | 84 | /* pop vlan header */ 85 | if (bpf_xdp_adjust_head(ctx, delta)) 86 | return -1; 87 | 88 | data = (void *)(long)ctx->data; 89 | data_end = (void *)(long)ctx->data_end; 90 | eth = data; 91 | if (eth + 1 > data_end) 92 | return -1; 93 | 94 | __builtin_memcpy(eth->h_dest, dmac, ETH_ALEN); 95 | __builtin_memcpy(eth->h_source, smac, ETH_ALEN); 96 | eth->h_proto = h_proto; 97 | 98 | return 0; 99 | } 100 | #endif /* __XDP_VLAN_H */ 101 | -------------------------------------------------------------------------------- /ksrc/xdp_vmegress.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Handle traffic from a VM. Expects host NICs to be into a bond 3 | * configured with L3+L4 hashing to spread traffic across ports. 4 | * 5 | * Copyright (c) 2019-20 David Ahern 6 | */ 7 | #define KBUILD_MODNAME "xdp_vmegress" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "xdp_vlan.h" 15 | #include "acl_simple.h" 16 | 17 | /* TO-DO: pull this from a map */ 18 | #define EGRESS_ETH0 2 19 | #define EGRESS_ETH1 3 20 | 21 | /* For TX-traffic redirect requires net_device ifindex to be in this devmap */ 22 | struct bpf_map_def SEC("maps") __egress_ports = { 23 | .type = BPF_MAP_TYPE_DEVMAP_HASH, 24 | .key_size = sizeof(u32), 25 | .value_size = sizeof(struct bpf_devmap_val), 26 | .max_entries = 2, 27 | }; 28 | 29 | struct bpf_map_def SEC("maps") __acl_map = { 30 | .type = BPF_MAP_TYPE_HASH, 31 | .key_size = sizeof(struct acl_key), 32 | .value_size = sizeof(struct acl_val), 33 | .max_entries = 64, 34 | }; 35 | 36 | struct bpf_map_def SEC("maps") __vm_info_map = { 37 | .type = BPF_MAP_TYPE_HASH, 38 | .key_size = sizeof(u32), 39 | .value_size = sizeof(struct vm_info), 40 | .max_entries = 1, 41 | }; 42 | 43 | static __always_inline u32 bond_hash(struct flow *fl) 44 | { 45 | u32 hash, idx; 46 | 47 | /* flow_icmp and flow_ports are a union in flow 48 | * and both are u32 in size 49 | */ 50 | __builtin_memcpy(&hash, &fl->ports, sizeof(hash)); 51 | 52 | if (fl->family == AF_INET) { 53 | hash ^= fl->daddr.ipv4 ^ fl->saddr.ipv4; 54 | } else if (fl->family == AF_INET6) { 55 | hash ^= ipv6_addr_hash(&fl->daddr.ipv6); 56 | hash ^= ipv6_addr_hash(&fl->saddr.ipv6); 57 | } 58 | 59 | hash ^= (hash >> 16); 60 | hash ^= (hash >> 8); 61 | hash = (hash >> 1); 62 | 63 | idx = hash & 1 ? EGRESS_ETH1 : EGRESS_ETH0; 64 | 65 | return idx; 66 | } 67 | 68 | SEC("xdp/egress") 69 | int xdp_egress_prog(struct xdp_md *ctx) 70 | { 71 | void *data_end = (void *)(long)ctx->data_end; 72 | void *data = (void *)(long)ctx->data; 73 | u32 idx = ctx->ingress_ifindex; 74 | struct ethhdr *eth = data; 75 | struct flow fl = {}; 76 | struct vm_info *vi; 77 | u16 h_proto; 78 | int rc; 79 | 80 | vi = bpf_map_lookup_elem(&__vm_info_map, &idx); 81 | if (!vi) 82 | return XDP_PASS; 83 | 84 | if (drop_packet(data, data_end, vi, true, &fl, &__acl_map)) 85 | return XDP_DROP; 86 | 87 | /* don't redirect broadcast frames */ 88 | if (eth->h_dest[0] == 0xff) 89 | return XDP_PASS; 90 | 91 | if (vi->vlan_TCI && xdp_vlan_push(ctx, vi->vlan_TCI) < 0) 92 | return XDP_PASS; 93 | 94 | idx = bond_hash(&fl); 95 | 96 | return bpf_redirect_map(&__egress_ports, idx, 0); 97 | } 98 | 99 | char _license[] SEC("license") = "GPL"; 100 | -------------------------------------------------------------------------------- /libbpf/usr/include/bpf/bpf_endian.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __BPF_ENDIAN__ 3 | #define __BPF_ENDIAN__ 4 | 5 | /* 6 | * Isolate byte #n and put it into byte #m, for __u##b type. 7 | * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: 8 | * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 9 | * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 10 | * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 11 | * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 12 | */ 13 | #define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) 14 | 15 | #define ___bpf_swab16(x) ((__u16)( \ 16 | ___bpf_mvb(x, 16, 0, 1) | \ 17 | ___bpf_mvb(x, 16, 1, 0))) 18 | 19 | #define ___bpf_swab32(x) ((__u32)( \ 20 | ___bpf_mvb(x, 32, 0, 3) | \ 21 | ___bpf_mvb(x, 32, 1, 2) | \ 22 | ___bpf_mvb(x, 32, 2, 1) | \ 23 | ___bpf_mvb(x, 32, 3, 0))) 24 | 25 | #define ___bpf_swab64(x) ((__u64)( \ 26 | ___bpf_mvb(x, 64, 0, 7) | \ 27 | ___bpf_mvb(x, 64, 1, 6) | \ 28 | ___bpf_mvb(x, 64, 2, 5) | \ 29 | ___bpf_mvb(x, 64, 3, 4) | \ 30 | ___bpf_mvb(x, 64, 4, 3) | \ 31 | ___bpf_mvb(x, 64, 5, 2) | \ 32 | ___bpf_mvb(x, 64, 6, 1) | \ 33 | ___bpf_mvb(x, 64, 7, 0))) 34 | 35 | /* LLVM's BPF target selects the endianness of the CPU 36 | * it compiles on, or the user specifies (bpfel/bpfeb), 37 | * respectively. The used __BYTE_ORDER__ is defined by 38 | * the compiler, we cannot rely on __BYTE_ORDER from 39 | * libc headers, since it doesn't reflect the actual 40 | * requested byte order. 41 | * 42 | * Note, LLVM's BPF target has different __builtin_bswapX() 43 | * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE 44 | * in bpfel and bpfeb case, which means below, that we map 45 | * to cpu_to_be16(). We could use it unconditionally in BPF 46 | * case, but better not rely on it, so that this header here 47 | * can be used from application and BPF program side, which 48 | * use different targets. 49 | */ 50 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 51 | # define __bpf_ntohs(x) __builtin_bswap16(x) 52 | # define __bpf_htons(x) __builtin_bswap16(x) 53 | # define __bpf_constant_ntohs(x) ___bpf_swab16(x) 54 | # define __bpf_constant_htons(x) ___bpf_swab16(x) 55 | # define __bpf_ntohl(x) __builtin_bswap32(x) 56 | # define __bpf_htonl(x) __builtin_bswap32(x) 57 | # define __bpf_constant_ntohl(x) ___bpf_swab32(x) 58 | # define __bpf_constant_htonl(x) ___bpf_swab32(x) 59 | # define __bpf_be64_to_cpu(x) __builtin_bswap64(x) 60 | # define __bpf_cpu_to_be64(x) __builtin_bswap64(x) 61 | # define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) 62 | # define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) 63 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 64 | # define __bpf_ntohs(x) (x) 65 | # define __bpf_htons(x) (x) 66 | # define __bpf_constant_ntohs(x) (x) 67 | # define __bpf_constant_htons(x) (x) 68 | # define __bpf_ntohl(x) (x) 69 | # define __bpf_htonl(x) (x) 70 | # define __bpf_constant_ntohl(x) (x) 71 | # define __bpf_constant_htonl(x) (x) 72 | # define __bpf_be64_to_cpu(x) (x) 73 | # define __bpf_cpu_to_be64(x) (x) 74 | # define __bpf_constant_be64_to_cpu(x) (x) 75 | # define __bpf_constant_cpu_to_be64(x) (x) 76 | #else 77 | # error "Fix your compiler's __BYTE_ORDER__?!" 78 | #endif 79 | 80 | #define bpf_htons(x) \ 81 | (__builtin_constant_p(x) ? \ 82 | __bpf_constant_htons(x) : __bpf_htons(x)) 83 | #define bpf_ntohs(x) \ 84 | (__builtin_constant_p(x) ? \ 85 | __bpf_constant_ntohs(x) : __bpf_ntohs(x)) 86 | #define bpf_htonl(x) \ 87 | (__builtin_constant_p(x) ? \ 88 | __bpf_constant_htonl(x) : __bpf_htonl(x)) 89 | #define bpf_ntohl(x) \ 90 | (__builtin_constant_p(x) ? \ 91 | __bpf_constant_ntohl(x) : __bpf_ntohl(x)) 92 | #define bpf_cpu_to_be64(x) \ 93 | (__builtin_constant_p(x) ? \ 94 | __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) 95 | #define bpf_be64_to_cpu(x) \ 96 | (__builtin_constant_p(x) ? \ 97 | __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) 98 | 99 | #endif /* __BPF_ENDIAN__ */ 100 | -------------------------------------------------------------------------------- /libbpf/usr/include/bpf/bpf_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __BPF_HELPERS__ 3 | #define __BPF_HELPERS__ 4 | 5 | /* 6 | * Note that bpf programs need to include either 7 | * vmlinux.h (auto-generated from BTF) or linux/types.h 8 | * in advance since bpf_helper_defs.h uses such types 9 | * as __u64. 10 | */ 11 | #include "bpf_helper_defs.h" 12 | 13 | #define __uint(name, val) int (*name)[val] 14 | #define __type(name, val) typeof(val) *name 15 | #define __array(name, val) typeof(val) *name[] 16 | 17 | /* Helper macro to print out debug messages */ 18 | #define bpf_printk(fmt, ...) \ 19 | ({ \ 20 | char ____fmt[] = fmt; \ 21 | bpf_trace_printk(____fmt, sizeof(____fmt), \ 22 | ##__VA_ARGS__); \ 23 | }) 24 | 25 | /* 26 | * Helper macro to place programs, maps, license in 27 | * different sections in elf_bpf file. Section names 28 | * are interpreted by elf_bpf loader 29 | */ 30 | #define SEC(NAME) __attribute__((section(NAME), used)) 31 | 32 | #ifndef __always_inline 33 | #define __always_inline __attribute__((always_inline)) 34 | #endif 35 | #ifndef __weak 36 | #define __weak __attribute__((weak)) 37 | #endif 38 | 39 | /* 40 | * Helper macro to manipulate data structures 41 | */ 42 | #ifndef offsetof 43 | #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 44 | #endif 45 | #ifndef container_of 46 | #define container_of(ptr, type, member) \ 47 | ({ \ 48 | void *__mptr = (void *)(ptr); \ 49 | ((type *)(__mptr - offsetof(type, member))); \ 50 | }) 51 | #endif 52 | 53 | /* 54 | * Helper structure used by eBPF C program 55 | * to describe BPF map attributes to libbpf loader 56 | */ 57 | struct bpf_map_def { 58 | unsigned int type; 59 | unsigned int key_size; 60 | unsigned int value_size; 61 | unsigned int max_entries; 62 | unsigned int map_flags; 63 | }; 64 | 65 | enum libbpf_pin_type { 66 | LIBBPF_PIN_NONE, 67 | /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 68 | LIBBPF_PIN_BY_NAME, 69 | }; 70 | 71 | enum libbpf_tristate { 72 | TRI_NO = 0, 73 | TRI_YES = 1, 74 | TRI_MODULE = 2, 75 | }; 76 | 77 | #define __kconfig __attribute__((section(".kconfig"))) 78 | #define __ksym __attribute__((section(".ksyms"))) 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /libbpf/usr/include/bpf/libbpf_common.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | 3 | /* 4 | * Common user-facing libbpf helpers. 5 | * 6 | * Copyright (c) 2019 Facebook 7 | */ 8 | 9 | #ifndef __LIBBPF_LIBBPF_COMMON_H 10 | #define __LIBBPF_LIBBPF_COMMON_H 11 | 12 | #include 13 | 14 | #ifndef LIBBPF_API 15 | #define LIBBPF_API __attribute__((visibility("default"))) 16 | #endif 17 | 18 | /* Helper macro to declare and initialize libbpf options struct 19 | * 20 | * This dance with uninitialized declaration, followed by memset to zero, 21 | * followed by assignment using compound literal syntax is done to preserve 22 | * ability to use a nice struct field initialization syntax and **hopefully** 23 | * have all the padding bytes initialized to zero. It's not guaranteed though, 24 | * when copying literal, that compiler won't copy garbage in literal's padding 25 | * bytes, but that's the best way I've found and it seems to work in practice. 26 | * 27 | * Macro declares opts struct of given type and name, zero-initializes, 28 | * including any extra padding, it with memset() and then assigns initial 29 | * values provided by users in struct initializer-syntax as varargs. 30 | */ 31 | #define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ 32 | struct TYPE NAME = ({ \ 33 | memset(&NAME, 0, sizeof(struct TYPE)); \ 34 | (struct TYPE) { \ 35 | .sz = sizeof(struct TYPE), \ 36 | __VA_ARGS__ \ 37 | }; \ 38 | }) 39 | 40 | #endif /* __LIBBPF_LIBBPF_COMMON_H */ 41 | -------------------------------------------------------------------------------- /libbpf/usr/include/bpf/libbpf_util.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | /* Copyright (c) 2019 Facebook */ 3 | 4 | #ifndef __LIBBPF_LIBBPF_UTIL_H 5 | #define __LIBBPF_LIBBPF_UTIL_H 6 | 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | /* Use these barrier functions instead of smp_[rw]mb() when they are 14 | * used in a libbpf header file. That way they can be built into the 15 | * application that uses libbpf. 16 | */ 17 | #if defined(__i386__) || defined(__x86_64__) 18 | # define libbpf_smp_rmb() asm volatile("" : : : "memory") 19 | # define libbpf_smp_wmb() asm volatile("" : : : "memory") 20 | # define libbpf_smp_mb() \ 21 | asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") 22 | /* Hinders stores to be observed before older loads. */ 23 | # define libbpf_smp_rwmb() asm volatile("" : : : "memory") 24 | #elif defined(__aarch64__) 25 | # define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") 26 | # define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") 27 | # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") 28 | # define libbpf_smp_rwmb() libbpf_smp_mb() 29 | #elif defined(__arm__) 30 | /* These are only valid for armv7 and above */ 31 | # define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") 32 | # define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") 33 | # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") 34 | # define libbpf_smp_rwmb() libbpf_smp_mb() 35 | #else 36 | /* Architecture missing native barrier functions. */ 37 | # define libbpf_smp_rmb() __sync_synchronize() 38 | # define libbpf_smp_wmb() __sync_synchronize() 39 | # define libbpf_smp_mb() __sync_synchronize() 40 | # define libbpf_smp_rwmb() __sync_synchronize() 41 | #endif 42 | 43 | #ifdef __cplusplus 44 | } /* extern "C" */ 45 | #endif 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /libbpf/usr/lib64/libbpf.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dsahern/bpf-progs/e31fa1023bb9af1f977152a69fca5db9ca411f07/libbpf/usr/lib64/libbpf.a -------------------------------------------------------------------------------- /libbpf/usr/lib64/pkgconfig/libbpf.pc: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | 3 | prefix=/usr 4 | libdir=/usr/lib64 5 | includedir=${prefix}/include 6 | 7 | Name: libbpf 8 | Description: BPF library 9 | Version: 0.1.0 10 | Libs: -L${libdir} -lbpf 11 | Requires.private: libelf zlib 12 | Cflags: -I${includedir} 13 | -------------------------------------------------------------------------------- /scripts/l2fwd-only.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BPFFS=/sys/fs/bpf 4 | BPFTOOL=~/bin/bpftool 5 | 6 | # public vlan 7 | PVLAN=51 8 | 9 | # VM data 10 | VMID=4798884 11 | VMNAME=Droplet-${VMID} 12 | # public data 13 | PMAC=12:41:da:80:1f:71 14 | PV4=10.39.16.67 15 | PV6=fd53:616d:6d60:5::f000 16 | 17 | # VPC data 18 | VMAC=12:30:6d:9f:6f:42 19 | VV4=10.39.32.23 20 | 21 | ################################################################################ 22 | # 23 | pr_msg() 24 | { 25 | echo -e "\e[34m$*\e[00m" 26 | } 27 | 28 | run_cmd() 29 | { 30 | local cmd="$*" 31 | 32 | echo 33 | echo -e "\e[31m${cmd}\e[00m" 34 | sudo $cmd 35 | } 36 | 37 | show_maps() 38 | { 39 | echo 40 | echo -e "\e[31m${BPFTOOL} map sh\e[00m" 41 | sudo ${BPFTOOL} map sh | \ 42 | awk 'BEGIN { skip = 0 } { 43 | if (skip) { 44 | skip-- 45 | } else if ($2 == "lpm_trie") { 46 | skip = 1 47 | } else { 48 | print 49 | } 50 | }' 51 | } 52 | 53 | show_progs() 54 | { 55 | echo 56 | echo -e "\e[31m${BPFTOOL} prog sh\e[00m" 57 | sudo ${BPFTOOL} prog sh | \ 58 | awk 'BEGIN { skip = 0 } { 59 | if (skip) { 60 | skip-- 61 | } else if ($2 == "cgroup_skb") { 62 | skip = 2 63 | } else { 64 | print 65 | } 66 | }' 67 | } 68 | 69 | show_status() 70 | { 71 | show_maps 72 | show_progs 73 | run_cmd ${BPFTOOL} net sh 74 | } 75 | 76 | do_reset() 77 | { 78 | sudo rm -rf ${BPFFS}/map 79 | sudo rm -rf ${BPFFS}/prog 80 | sudo mkdir ${BPFFS}/map 81 | sudo mkdir ${BPFFS}/prog 82 | 83 | for d in eth0 eth1 84 | do 85 | sudo ${BPFTOOL} net detach xdp dev ${d} 86 | sudo ethtool -K ${d} hw-tc-offload on 87 | sudo ethtool -K ${d} rxvlan off 88 | done 89 | } 90 | 91 | ################################################################################ 92 | # start 93 | 94 | do_reset >/dev/null 2>&1 95 | 96 | echo 97 | pr_msg "Create ports map" 98 | pr_msg "- global map used for bulking redirected packets" 99 | 100 | run_cmd ${BPFTOOL} map create ${BPFFS}/map/xdp_fwd_ports \ 101 | type devmap_hash key 4 value 8 entries 512 name xdp_fwd_ports 102 | 103 | echo 104 | pr_msg "Add entries to the egress port map for eth0 (index 2) and eth1 (index 3)" 105 | run_cmd ${BPFTOOL} map update pinned ${BPFFS}/map/xdp_fwd_ports \ 106 | key hex 2 0 0 0 value hex 2 0 0 0 0 0 0 0 107 | run_cmd ${BPFTOOL} map update pinned ${BPFFS}/map/xdp_fwd_ports \ 108 | key hex 3 0 0 0 value hex 3 0 0 0 0 0 0 0 109 | 110 | echo 111 | pr_msg "load l2fwd program and attach to eth0 and eth1" 112 | 113 | run_cmd ${BPFTOOL} prog load ksrc/obj/xdp_l2fwd.o ${BPFFS}/prog/xdp_l2fwd \ 114 | map name xdp_fwd_ports name xdp_fwd_ports 115 | run_cmd ${BPFTOOL} net attach xdp pinned ${BPFFS}/prog/xdp_l2fwd dev eth0 116 | run_cmd ${BPFTOOL} net attach xdp pinned ${BPFFS}/prog/xdp_l2fwd dev eth1 117 | 118 | echo 119 | pr_msg "Add FDB and port map entries for this VM" 120 | run_cmd src/bin/xdp_l2fwd -v ${PVLAN} -m ${PMAC} -d tapext${VMID} 121 | run_cmd src/bin/xdp_l2fwd -P 122 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0 2 | 3 | include ../config.mk 4 | 5 | ifneq (,$(BUILDDIR)) 6 | OBJDIR = $(BUILDDIR)/src/obj/ 7 | else 8 | OBJDIR = obj/ 9 | endif 10 | 11 | ifneq (,$(BUILDDIR)) 12 | BINDIR = $(BUILDDIR)/src/bin/ 13 | else 14 | BINDIR = bin/ 15 | endif 16 | 17 | MODS += $(BINDIR)tcp_probe 18 | MODS += $(BINDIR)xdp_l2fwd 19 | MODS += $(BINDIR)xdp_l3fwd 20 | MODS += $(BINDIR)xdp_acl 21 | 22 | MODS += $(BINDIR)execsnoop 23 | MODS += $(BINDIR)opensnoop 24 | MODS += $(BINDIR)cgroup_sock 25 | MODS += $(BINDIR)kvm-nested 26 | 27 | MODS += $(BINDIR)netmon 28 | MODS += $(BINDIR)pktlatency 29 | MODS += $(BINDIR)ovslatency 30 | MODS += $(BINDIR)net_rx_action 31 | MODS += $(BINDIR)napi_poll 32 | MODS += $(BINDIR)xdp_devmap_xmit 33 | 34 | MODS += $(BINDIR)xdp_dummy 35 | MODS += $(BINDIR)vm_info 36 | 37 | VPATH := . 38 | 39 | CC = gcc 40 | CFLAGS += -O2 -g -Wall 41 | 42 | INCLUDES = -I../include -I../include/uapi 43 | INCLUDES += -I../include/tools 44 | 45 | ifneq (,$(LIBBPF_DIR)) 46 | BPF_LINK_FEAT := $(shell egrep 'bpf_link_create' $(LIBBPF_DIR)/usr/include/bpf/libbpf.h) 47 | ifneq (,$(BPF_LINK_FEAT)) 48 | CFLAGS += -DHAVE_BPF_LINK_CREATE 49 | endif 50 | else 51 | LIBBPF=-lbpf 52 | endif 53 | 54 | COMMON += $(OBJDIR)perf_events.o 55 | COMMON += $(OBJDIR)kprobes.o 56 | COMMON += $(OBJDIR)timestamps.o 57 | COMMON += $(OBJDIR)libbpf_helpers.o 58 | COMMON += $(OBJDIR)str_utils.o 59 | COMMON += $(OBJDIR)rbtree.o 60 | COMMON += $(OBJDIR)parse_pkt.o 61 | COMMON += $(OBJDIR)print_pkt.o 62 | COMMON += $(OBJDIR)ksyms.o 63 | COMMON += $(OBJDIR)tp_verify.o 64 | 65 | all: build $(MODS) 66 | 67 | build: 68 | @mkdir -p $(OBJDIR) $(BINDIR) 69 | 70 | $(BINDIR)%: $(OBJDIR)%.o $(COMMON) 71 | $(QUIET_LINK)$(CC) $(INCLUDES) $(DEFS) $(CFLAGS) $^ -o $@ $(LDLIBS) 72 | 73 | $(BINDIR)xdp_%: $(OBJDIR)xdp_%_user.o $(COMMON) 74 | $(QUIET_LINK)$(CC) $(INCLUDES) $(DEFS) $(CFLAGS) $^ -o $@ $(LDLIBS) 75 | 76 | $(OBJDIR)%.o: %.c 77 | $(QUIET_CC)$(CC) $(INCLUDES) $(DEFS) $(CFLAGS) -c $^ -o $@ 78 | 79 | $(BINDIR)netmon: $(OBJDIR)netmon.o $(COMMON) 80 | $(QUIET_LINK)$(CC) $(INCLUDES) $(DEFS) $(CFLAGS) $^ -o $@ $(LDLIBS) -lpcap 81 | 82 | clean: 83 | @rm -rf $(OBJDIR) $(BINDIR) 84 | -------------------------------------------------------------------------------- /src/bpf_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * bpf_util.h BPF common code 3 | * 4 | * This program is free software; you can distribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * as published by the Free Software Foundation; either version 7 | * 2 of the License, or (at your option) any later version. 8 | * 9 | * Authors: Daniel Borkmann 10 | * Jiri Pirko 11 | */ 12 | 13 | #ifndef __BPF_UTIL__ 14 | #define __BPF_UTIL__ 15 | 16 | #include 17 | #include 18 | 19 | /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ 20 | 21 | #define BPF_ALU64_REG(OP, DST, SRC) \ 22 | ((struct bpf_insn) { \ 23 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ 24 | .dst_reg = DST, \ 25 | .src_reg = SRC, \ 26 | .off = 0, \ 27 | .imm = 0 }) 28 | 29 | #define BPF_ALU32_REG(OP, DST, SRC) \ 30 | ((struct bpf_insn) { \ 31 | .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ 32 | .dst_reg = DST, \ 33 | .src_reg = SRC, \ 34 | .off = 0, \ 35 | .imm = 0 }) 36 | 37 | /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ 38 | 39 | #define BPF_ALU64_IMM(OP, DST, IMM) \ 40 | ((struct bpf_insn) { \ 41 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ 42 | .dst_reg = DST, \ 43 | .src_reg = 0, \ 44 | .off = 0, \ 45 | .imm = IMM }) 46 | 47 | #define BPF_ALU32_IMM(OP, DST, IMM) \ 48 | ((struct bpf_insn) { \ 49 | .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ 50 | .dst_reg = DST, \ 51 | .src_reg = 0, \ 52 | .off = 0, \ 53 | .imm = IMM }) 54 | 55 | /* Short form of mov, dst_reg = src_reg */ 56 | 57 | #define BPF_MOV64_REG(DST, SRC) \ 58 | ((struct bpf_insn) { \ 59 | .code = BPF_ALU64 | BPF_MOV | BPF_X, \ 60 | .dst_reg = DST, \ 61 | .src_reg = SRC, \ 62 | .off = 0, \ 63 | .imm = 0 }) 64 | 65 | #define BPF_MOV32_REG(DST, SRC) \ 66 | ((struct bpf_insn) { \ 67 | .code = BPF_ALU | BPF_MOV | BPF_X, \ 68 | .dst_reg = DST, \ 69 | .src_reg = SRC, \ 70 | .off = 0, \ 71 | .imm = 0 }) 72 | 73 | /* Short form of mov, dst_reg = imm32 */ 74 | 75 | #define BPF_MOV64_IMM(DST, IMM) \ 76 | ((struct bpf_insn) { \ 77 | .code = BPF_ALU64 | BPF_MOV | BPF_K, \ 78 | .dst_reg = DST, \ 79 | .src_reg = 0, \ 80 | .off = 0, \ 81 | .imm = IMM }) 82 | 83 | #define BPF_MOV32_IMM(DST, IMM) \ 84 | ((struct bpf_insn) { \ 85 | .code = BPF_ALU | BPF_MOV | BPF_K, \ 86 | .dst_reg = DST, \ 87 | .src_reg = 0, \ 88 | .off = 0, \ 89 | .imm = IMM }) 90 | 91 | /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ 92 | #define BPF_LD_IMM64(DST, IMM) \ 93 | BPF_LD_IMM64_RAW(DST, 0, IMM) 94 | 95 | #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ 96 | ((struct bpf_insn) { \ 97 | .code = BPF_LD | BPF_DW | BPF_IMM, \ 98 | .dst_reg = DST, \ 99 | .src_reg = SRC, \ 100 | .off = 0, \ 101 | .imm = (__u32) (IMM) }), \ 102 | ((struct bpf_insn) { \ 103 | .code = 0, /* zero is reserved opcode */ \ 104 | .dst_reg = 0, \ 105 | .src_reg = 0, \ 106 | .off = 0, \ 107 | .imm = ((__u64) (IMM)) >> 32 }) 108 | 109 | #ifndef BPF_PSEUDO_MAP_FD 110 | # define BPF_PSEUDO_MAP_FD 1 111 | #endif 112 | 113 | /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ 114 | #define BPF_LD_MAP_FD(DST, MAP_FD) \ 115 | BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) 116 | 117 | 118 | /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ 119 | 120 | #define BPF_LD_ABS(SIZE, IMM) \ 121 | ((struct bpf_insn) { \ 122 | .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ 123 | .dst_reg = 0, \ 124 | .src_reg = 0, \ 125 | .off = 0, \ 126 | .imm = IMM }) 127 | 128 | /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ 129 | 130 | #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 131 | ((struct bpf_insn) { \ 132 | .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ 133 | .dst_reg = DST, \ 134 | .src_reg = SRC, \ 135 | .off = OFF, \ 136 | .imm = 0 }) 137 | 138 | /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ 139 | 140 | #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ 141 | ((struct bpf_insn) { \ 142 | .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ 143 | .dst_reg = DST, \ 144 | .src_reg = SRC, \ 145 | .off = OFF, \ 146 | .imm = 0 }) 147 | 148 | /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ 149 | 150 | #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ 151 | ((struct bpf_insn) { \ 152 | .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ 153 | .dst_reg = DST, \ 154 | .src_reg = 0, \ 155 | .off = OFF, \ 156 | .imm = IMM }) 157 | 158 | /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ 159 | 160 | #define BPF_JMP_REG(OP, DST, SRC, OFF) \ 161 | ((struct bpf_insn) { \ 162 | .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ 163 | .dst_reg = DST, \ 164 | .src_reg = SRC, \ 165 | .off = OFF, \ 166 | .imm = 0 }) 167 | 168 | /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ 169 | 170 | #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ 171 | ((struct bpf_insn) { \ 172 | .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ 173 | .dst_reg = DST, \ 174 | .src_reg = 0, \ 175 | .off = OFF, \ 176 | .imm = IMM }) 177 | 178 | /* Raw code statement block */ 179 | 180 | #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ 181 | ((struct bpf_insn) { \ 182 | .code = CODE, \ 183 | .dst_reg = DST, \ 184 | .src_reg = SRC, \ 185 | .off = OFF, \ 186 | .imm = IMM }) 187 | 188 | /* Program exit */ 189 | 190 | #define BPF_EXIT_INSN() \ 191 | ((struct bpf_insn) { \ 192 | .code = BPF_JMP | BPF_EXIT, \ 193 | .dst_reg = 0, \ 194 | .src_reg = 0, \ 195 | .off = 0, \ 196 | .imm = 0 }) 197 | 198 | #endif /* __BPF_UTIL__ */ 199 | -------------------------------------------------------------------------------- /src/cgroup_sock.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include "libbpf_helpers.h" 21 | #include "bpf_util.h" 22 | #include "str_utils.h" 23 | 24 | #ifndef ARRAY_SIZE 25 | #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) 26 | #endif 27 | 28 | static char bpf_log_buf[256*1024]; 29 | 30 | static int load_dev_prog(int idx) 31 | { 32 | struct bpf_insn prog[] = { 33 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 34 | BPF_MOV64_IMM(BPF_REG_3, idx), 35 | BPF_MOV64_IMM(BPF_REG_2, 36 | offsetof(struct bpf_sock, bound_dev_if)), 37 | BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, 38 | offsetof(struct bpf_sock, bound_dev_if)), 39 | BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ 40 | BPF_EXIT_INSN(), 41 | }; 42 | size_t size_insns = ARRAY_SIZE(prog); 43 | 44 | return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, size_insns, 45 | "GPL", 0, bpf_log_buf, sizeof(bpf_log_buf)); 46 | } 47 | 48 | static int load_mark_prog(__u32 mark) 49 | { 50 | struct bpf_insn prog[] = { 51 | BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), 52 | BPF_MOV64_IMM(BPF_REG_3, mark), 53 | BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)), 54 | BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, 55 | offsetof(struct bpf_sock, mark)), 56 | BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ 57 | BPF_EXIT_INSN(), 58 | }; 59 | size_t size_insns = ARRAY_SIZE(prog); 60 | 61 | return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, size_insns, 62 | "GPL", 0, bpf_log_buf, sizeof(bpf_log_buf)); 63 | } 64 | 65 | #ifdef HAVE_BPF_LINK_CREATE 66 | static bool done; 67 | 68 | static void sig_handler(int signo) 69 | { 70 | printf("Terminating by signal %d\n", signo); 71 | done = true; 72 | } 73 | #endif 74 | 75 | static void usage(const char *prog) 76 | { 77 | fprintf(stderr, 78 | "usage: %s [OPTS] cgroup-path\n" 79 | "\nOPTS:\n" 80 | " -i name interface to attach device program\n" 81 | " -l use bpf-link\n" 82 | " -m mark load mark program with given mark\n" 83 | " -M set BPF_F_ALLOW_MULTI flag on attach\n" 84 | " -O set BPF_F_ALLOW_OVERRIDE flag on attach\n" 85 | , prog); 86 | } 87 | 88 | static int load_prog(int ifindex, __u32 mark) 89 | { 90 | int prog_fd; 91 | 92 | if (mark) { 93 | prog_fd = load_mark_prog(mark); 94 | } else if (ifindex > 0) { 95 | prog_fd = load_dev_prog(ifindex); 96 | } else { 97 | fprintf(stderr, "Neither mark nor device option set.\n"); 98 | return -1; 99 | } 100 | 101 | if (prog_fd < 0) 102 | fprintf(stderr, "Failed to load program\n"); 103 | 104 | return prog_fd; 105 | } 106 | 107 | static int do_bpf_link(int cg_fd, const char *path, int ifindex, __u32 mark, 108 | __u32 flags) 109 | { 110 | #ifdef HAVE_BPF_LINK_CREATE 111 | int prog_fd, link_fd; 112 | 113 | prog_fd = load_prog(ifindex, mark); 114 | if (prog_fd < 0) 115 | return 1; 116 | 117 | link_fd = bpf_link_create(prog_fd, cg_fd,BPF_CGROUP_INET_SOCK_CREATE, 118 | NULL); 119 | if (link_fd < 0) { 120 | fprintf(stderr, "Failed to attach program to cgroup\n"); 121 | return 1; 122 | } 123 | 124 | close(cg_fd); 125 | printf("program attached to %s\n", path); 126 | 127 | if (signal(SIGINT, sig_handler) || 128 | signal(SIGHUP, sig_handler) || 129 | signal(SIGTERM, sig_handler)) { 130 | perror("signal"); 131 | return 1; 132 | } 133 | 134 | while (!done) 135 | pause(); 136 | 137 | printf("dropping link\n"); 138 | close(link_fd); 139 | 140 | return 0; 141 | #else 142 | fprintf(stderr, "libbpf does not suppport bpf_link_create\n"); 143 | return 1; 144 | #endif 145 | } 146 | 147 | static int do_prog(int cg_fd, const char *path, int ifindex, __u32 mark, 148 | __u32 flags) 149 | { 150 | int prog_fd; 151 | 152 | prog_fd = load_prog(ifindex, mark); 153 | if (prog_fd < 0) 154 | return 1; 155 | 156 | if (bpf_prog_attach(prog_fd, cg_fd, 157 | BPF_CGROUP_INET_SOCK_CREATE, flags) < 0) { 158 | fprintf(stderr, "Failed to attach program to cgroup\n"); 159 | return 1; 160 | } 161 | 162 | close(cg_fd); 163 | printf("program attached to %s\n", path); 164 | 165 | return 0; 166 | } 167 | 168 | int main(int argc, char **argv) 169 | { 170 | int (*fn)(int cg_fd, const char *path, int ifindex, __u32 mark, 171 | __u32 flags) = do_prog; 172 | int ifindex = -1, cg_fd, opt; 173 | __u32 flags = 0, mark = 0; 174 | unsigned long tmp; 175 | 176 | while ((opt = getopt(argc, argv, ":i:lm:MO")) != -1) { 177 | switch (opt) { 178 | case 'i': 179 | ifindex = if_nametoindex(optarg); 180 | if (ifindex < 0) { 181 | fprintf(stderr, "Invalid device\n"); 182 | return 1; 183 | } 184 | break; 185 | case 'l': 186 | fn = do_bpf_link; 187 | break; 188 | case 'm': 189 | if (str_to_ulong(optarg, &tmp)) { 190 | fprintf(stderr, "Invalid mark\n"); 191 | return 1; 192 | } 193 | mark = (__u32)tmp; 194 | if ((unsigned long)mark != tmp) { 195 | fprintf(stderr, "Invalid mark\n"); 196 | return 1; 197 | } 198 | break; 199 | case 'M': 200 | flags |= BPF_F_ALLOW_MULTI; 201 | break; 202 | case 'O': 203 | flags |= BPF_F_ALLOW_OVERRIDE; 204 | break; 205 | default: 206 | usage(basename(argv[0])); 207 | return 1; 208 | } 209 | } 210 | 211 | if (optind == argc) { 212 | usage(basename(argv[0])); 213 | return 1; 214 | } 215 | 216 | cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY); 217 | if (cg_fd < 0) { 218 | fprintf(stderr, "Failed to open cgroup path: '%s'\n", 219 | strerror(errno)); 220 | return -1; 221 | } 222 | 223 | return fn(cg_fd, argv[optind], ifindex, mark, flags); 224 | } 225 | -------------------------------------------------------------------------------- /src/flow.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef _FLOW_H_ 3 | #define _FLOW_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define NDISC_ROUTER_SOLICITATION 133 11 | #define NDISC_ROUTER_ADVERTISEMENT 134 12 | #define NDISC_NEIGHBOUR_SOLICITATION 135 13 | #define NDISC_NEIGHBOUR_ADVERTISEMENT 136 14 | 15 | // https://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml 16 | #define IPPROTO_VRRP 112 17 | 18 | #ifndef ETH_P_LLDP 19 | #define ETH_P_LLDP 0x88CC /* Link Layer Discovery Protocol */ 20 | #endif 21 | 22 | /* from linux/if_vlan.h */ 23 | #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ 24 | #define VLAN_PRIO_SHIFT 13 25 | #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ 26 | 27 | struct vlan_hdr { 28 | __be16 h_vlan_TCI; 29 | __be16 h_vlan_encapsulated_proto; 30 | }; 31 | 32 | struct arpdata { 33 | __u8 ar_sha[ETH_ALEN]; 34 | __u8 ar_sip[4]; 35 | __u8 ar_tha[ETH_ALEN]; 36 | __u8 ar_tip[4]; 37 | }; 38 | 39 | /* not handling Q-in-Q at the moment */ 40 | struct flow_vlan { 41 | __u16 outer_vlan_TCI; 42 | }; 43 | 44 | struct flow_arp { 45 | __u16 op; 46 | struct arpdata data; 47 | }; 48 | 49 | struct flow_tcp { 50 | __u16 sport; 51 | __u16 dport; 52 | bool fin; 53 | bool syn; 54 | bool rst; 55 | bool ack; 56 | }; 57 | 58 | struct flow_udp { 59 | __u16 sport; 60 | __u16 dport; 61 | }; 62 | 63 | struct flow_icmp6 { 64 | __u8 icmp6_type; 65 | __u8 icmp6_code; 66 | }; 67 | 68 | struct flow_transport { 69 | __u8 proto; 70 | 71 | union { 72 | struct flow_tcp tcp; 73 | struct flow_udp udp; 74 | struct flow_icmp6 icmp6; 75 | }; 76 | }; 77 | 78 | struct flow_ip4 { 79 | __u32 saddr; 80 | __u32 daddr; 81 | struct flow_transport trans; 82 | }; 83 | 84 | struct flow_ip6 { 85 | struct in6_addr saddr; 86 | struct in6_addr daddr; 87 | struct flow_transport trans; 88 | }; 89 | 90 | struct flow { 91 | /* only interested in ethernet frames */ 92 | __u8 dmac[ETH_ALEN]; 93 | __u8 smac[ETH_ALEN]; 94 | __u16 proto; /* network protocol */ 95 | 96 | bool has_vlan; 97 | 98 | struct flow_vlan vlan; 99 | 100 | union { 101 | struct flow_arp arp; 102 | struct flow_ip4 ip4; 103 | struct flow_ip6 ip6; 104 | }; 105 | }; 106 | 107 | int parse_pkt(struct flow *flow, __u8 protocol, const __u8 *data, int len); 108 | void print_flow(const struct flow *fl); 109 | void print_pkt(__u16 protocol, const __u8 *data, int len); 110 | int cmp_flow(const struct flow *fl1, const struct flow *fl2); 111 | int cmp_flow_reverse(const struct flow *fl1, const struct flow *fl2); 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /src/kprobes.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "kprobes.h" 11 | #include "perf_events.h" 12 | 13 | static int kprobes_event_id(const char *event) 14 | { 15 | char filename[PATH_MAX]; 16 | int fd, n, id = -1; 17 | char buf[64] = {}; 18 | 19 | /* "probes" directory for some use cases? */ 20 | snprintf(filename, sizeof(filename), "%s/events/kprobes/%s/id", 21 | TRACINGFS, event); 22 | 23 | fd = open(filename, O_RDONLY); 24 | if (fd < 0) { 25 | fprintf(stderr, "Failed to open '%s' to learn id for tracing event '%s'\n", 26 | filename, event); 27 | return -1; 28 | } 29 | 30 | n = read(fd, buf, sizeof(buf)-1); 31 | if (n < 0) { 32 | fprintf(stderr, "Failed to open '%s' to learn kprobe type\n", 33 | filename); 34 | } else { 35 | id = atoi(buf); 36 | } 37 | close(fd); 38 | 39 | return id; 40 | } 41 | 42 | static int do_kprobe_event(const char *event) 43 | { 44 | char filename[PATH_MAX]; 45 | int rc = 0; 46 | int fd; 47 | 48 | snprintf(filename, sizeof(filename), "%s/kprobe_events", TRACINGFS); 49 | 50 | fd = open(filename, O_WRONLY|O_APPEND); 51 | if (fd < 0) { 52 | fprintf(stderr, "Failed to open '%s' to learn id for event '%s'\n", 53 | filename, event); 54 | return -1; 55 | } 56 | if (write(fd, event, strlen(event)) != strlen(event)) { 57 | fprintf(stderr, "Failed writing event '%s' to '%s'\n", 58 | event, filename); 59 | rc = -1; 60 | } 61 | close(fd); 62 | 63 | return rc; 64 | } 65 | 66 | static int kprobe_perf_event_legacy(int prog_fd, const char *func, 67 | bool retprobe) 68 | { 69 | char event[128], pname[64]; 70 | char t = 'p'; 71 | int id; 72 | 73 | if (strlen(func) + 10 > sizeof(pname)) { 74 | fprintf(stderr, 75 | "buf size too small in kprobe_perf_event_legacy\n"); 76 | return -1; 77 | } 78 | if (retprobe) 79 | t = 'r'; 80 | 81 | /* probe: p:kprobes/p__ 82 | * retprobe: r:kprobes/r__ 83 | * delete: -:kprobes/

__ 84 | */ 85 | snprintf(pname, sizeof(pname), "%c_%s_%d", t, func, getpid()); 86 | if (prog_fd < 0) 87 | snprintf(event, sizeof(event), "-:kprobes/%s", pname); 88 | else 89 | snprintf(event, sizeof(event), "%c:kprobes/%s %s", t, pname, func); 90 | 91 | if (do_kprobe_event(event)) 92 | return -1; 93 | 94 | if (prog_fd < 0) 95 | return 0; 96 | 97 | id = kprobes_event_id(pname); 98 | if (id < 0) { 99 | fprintf(stderr, "Failed to get id for '%s'\n", pname); 100 | return -1; 101 | } 102 | 103 | return perf_event_tp_set_prog(prog_fd, id); 104 | } 105 | 106 | int kprobe_event_type(void) 107 | { 108 | char filename[] = "/sys/bus/event_source/devices/kprobe/type"; 109 | static int kprobe_type = -1; 110 | static bool checked = false; 111 | char buf[64] = {}; 112 | int fd, n; 113 | 114 | if (checked) 115 | return kprobe_type; 116 | 117 | fd = open(filename, O_RDONLY); 118 | if (fd < 0) 119 | return -1; 120 | 121 | n = read(fd, buf, sizeof(buf)-1); 122 | if (n < 0) { 123 | fprintf(stderr, "Failed to open '%s' to learn kprobe type\n", 124 | filename); 125 | } else { 126 | kprobe_type = atoi(buf); 127 | } 128 | close(fd); 129 | 130 | checked = true; 131 | 132 | return kprobe_type; 133 | } 134 | 135 | /* probes is a NULL terminated array of function names to put 136 | * kprobe. bpf program is expected to be named kprobe/%s. 137 | * If retprobe is set, bpf program name is expected to be 138 | * "kprobe/%s_ret" 139 | */ 140 | int kprobe_init(struct bpf_object *obj, struct kprobe_data *probes, 141 | unsigned int count) 142 | { 143 | struct bpf_program *prog; 144 | int prog_fd, attr_type; 145 | unsigned int i; 146 | int rc = 0; 147 | 148 | attr_type = kprobe_event_type(); 149 | 150 | for (i = 0; i < count; ++i) { 151 | char buf[256]; 152 | 153 | if (probes[i].prog) { 154 | snprintf(buf, sizeof(buf), "%s", probes[i].prog); 155 | } else { 156 | snprintf(buf, sizeof(buf), "kprobe/%s%s", 157 | probes[i].func, 158 | probes[i].retprobe ? "_ret" : ""); 159 | } 160 | 161 | prog = bpf_object__find_program_by_title(obj, buf); 162 | if (!prog) { 163 | printf("Failed to get prog in obj file\n"); 164 | rc = 1; 165 | continue; 166 | } 167 | prog_fd = bpf_program__fd(prog); 168 | 169 | 170 | if (attr_type < 0) { 171 | probes[i].fd = kprobe_perf_event_legacy(prog_fd, 172 | probes[i].func, 173 | probes[i].retprobe); 174 | } else { 175 | probes[i].fd = kprobe_perf_event(prog_fd, 176 | probes[i].func, 177 | probes[i].retprobe, 178 | attr_type); 179 | } 180 | if (probes[i].fd < 0) { 181 | fprintf(stderr, 182 | "Failed to create perf_event on %s\n", 183 | probes[i].func); 184 | rc = 1; 185 | } 186 | } 187 | 188 | return rc; 189 | } 190 | 191 | void kprobe_cleanup(struct kprobe_data *probes, unsigned int count) 192 | { 193 | unsigned int i; 194 | int attr_type; 195 | 196 | attr_type = kprobe_event_type(); 197 | for (i = 0; i < count; ++i) { 198 | if (probes[i].fd < 0) 199 | continue; 200 | 201 | close(probes[i].fd); 202 | if (attr_type < 0) { 203 | kprobe_perf_event_legacy(-1, probes[i].func, 204 | probes[i].retprobe); 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/kprobes.h: -------------------------------------------------------------------------------- 1 | #ifndef _KPROBES_H_ 2 | #define _KPROBES_H_ 3 | 4 | #include 5 | #include 6 | 7 | struct kprobe_data { 8 | const char *prog; 9 | const char *func; 10 | int fd; 11 | bool retprobe; 12 | }; 13 | 14 | int kprobe_init(struct bpf_object *obj, struct kprobe_data *probes, 15 | unsigned int count); 16 | 17 | void kprobe_cleanup(struct kprobe_data *probes, unsigned int count); 18 | 19 | int kprobe_perf_event(int prog_fd, const char *func, int retprobe, 20 | int attr_type); 21 | 22 | int kprobe_event_type(void); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/ksyms.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Resolve addresses to kernel symbols. 3 | * 4 | * Copyright (c) 2009-2020 David Ahern 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "ksyms.h" 15 | #include "str_utils.h" 16 | 17 | static struct rb_root ksyms; 18 | static bool ksyms_initialized; 19 | 20 | struct ksym_s *new_ksym(unsigned long addr, const char *name, const char *mod) 21 | { 22 | struct ksym_s *sym = calloc(1, sizeof(struct ksym_s)); 23 | 24 | if (sym) { 25 | sym->addr = addr; 26 | sym->name = strdup(name); 27 | sym->mod = strdup(mod); 28 | 29 | if (!strncmp(name, "unix", 4)) 30 | sym->is_unix = true; 31 | else if (!strncmp(name, "tcp_", 4)) 32 | sym->is_tcp = true; 33 | } 34 | 35 | return sym; 36 | } 37 | 38 | void free_ksym(struct ksym_s *sym) 39 | { 40 | free(sym->name); 41 | free(sym->mod); 42 | free(sym); 43 | } 44 | 45 | static struct ksym_s *__new_ksym(unsigned long addr, char *fields[], 46 | int nfields) 47 | { 48 | return new_ksym(addr, fields[2], nfields > 3 ? fields[3] : "[kernel]"); 49 | } 50 | 51 | /* 52 | * return entry whose addr value is < given argument 53 | */ 54 | struct ksym_s *find_ksym(unsigned long addr) 55 | { 56 | struct rb_node **p = &ksyms.rb_node; 57 | struct rb_node *parent = NULL; 58 | 59 | while (*p != NULL) { 60 | struct ksym_s *sym; 61 | 62 | parent = *p; 63 | 64 | sym = container_of(parent, struct ksym_s, rb_node); 65 | if (addr >= sym->addr && addr < sym->addr_next) 66 | return sym; 67 | 68 | if (addr < sym->addr) 69 | p = &(*p)->rb_left; 70 | else if (addr > sym->addr) 71 | p = &(*p)->rb_right; 72 | } 73 | 74 | return NULL; 75 | } 76 | 77 | struct ksym_s *find_ksym_by_name(const char *name) 78 | { 79 | struct rb_node *node; 80 | 81 | for (node = rb_first(&ksyms); node; node = rb_next(node)) { 82 | struct ksym_s *sym; 83 | 84 | sym = rb_entry(node, struct ksym_s, rb_node); 85 | if (!strcmp(sym->name, name)) 86 | return sym; 87 | } 88 | 89 | return NULL; 90 | } 91 | 92 | /* look for sym with this starting address */ 93 | static struct ksym_s *find_ksym_start(unsigned long addr) 94 | { 95 | struct rb_node **p = &ksyms.rb_node; 96 | struct rb_node *parent = NULL; 97 | 98 | while (*p != NULL) { 99 | struct ksym_s *sym; 100 | 101 | parent = *p; 102 | 103 | sym = container_of(parent, struct ksym_s, rb_node); 104 | if (addr == sym->addr) 105 | return sym; 106 | 107 | if (addr < sym->addr) 108 | p = &(*p)->rb_left; 109 | else if (addr > sym->addr) 110 | p = &(*p)->rb_right; 111 | } 112 | 113 | return NULL; 114 | } 115 | 116 | int insert_ksym(struct ksym_s *new_sym) 117 | { 118 | struct rb_node **node = &ksyms.rb_node; 119 | struct rb_node *parent = NULL; 120 | 121 | if (!new_sym->addr_next) 122 | new_sym->addr_next = new_sym->addr; 123 | 124 | #ifdef KSYM_DEBUG 125 | printf("insert_ksym: %s %s %lx -> %lx\n", 126 | new_sym->name, new_sym->mod, new_sym->addr, new_sym->addr_next); 127 | #endif 128 | while (*node != NULL) { 129 | struct ksym_s *sym; 130 | 131 | parent = *node; 132 | sym = container_of(parent, struct ksym_s, rb_node); 133 | if (new_sym->addr < sym->addr) 134 | node = &(*node)->rb_left; 135 | else if (new_sym->addr > sym->addr) 136 | node = &(*node)->rb_right; 137 | else 138 | return -EEXIST; 139 | } 140 | 141 | rb_link_node(&new_sym->rb_node, parent, node); 142 | rb_insert_color(&new_sym->rb_node, &ksyms); 143 | 144 | return 0; 145 | } 146 | 147 | static void fixup_ksym(struct ksym_s *sym, char *fields[], int nfields) 148 | { 149 | #ifdef KSYM_DEBUG 150 | const char *mod = nfields > 3 ? fields[3] : "[kernel]"; 151 | const char *name = fields[2]; 152 | 153 | fprintf(stderr, "2 entries with address %lx: %s and %s [%s]\n", 154 | sym->addr, sym->name, name, mod); 155 | #endif 156 | } 157 | 158 | /* 159 | * expecting lines with the following format: 160 | * addr symbol module 161 | */ 162 | int load_ksyms(const char *file) 163 | { 164 | struct ksym_s *sym = NULL, *prev_sym = NULL; 165 | unsigned long long init_net = 0; 166 | unsigned int lineno = 0; 167 | unsigned long addr = 0; 168 | char line[1024]; 169 | char *fields[4]; 170 | int nfields; 171 | int rc = 0; 172 | char *nl; 173 | FILE *fp; 174 | 175 | if (ksyms_initialized) { 176 | fprintf(stderr, "ksyms already populated\n"); 177 | return -1; 178 | } 179 | 180 | fp = fopen(file, "r"); 181 | if (!fp) { 182 | fprintf(stderr, 183 | "failed to open %s: %s\n", file, strerror(errno)); 184 | return -1; 185 | } 186 | 187 | while(fgets(line, sizeof(line), fp)) 188 | { 189 | const char *stype; 190 | 191 | lineno++; 192 | nl = strchr(line, '\n'); 193 | if (!nl) { 194 | fprintf(stderr, 195 | "failed to read full line at line %u\n", lineno); 196 | rc = -1; 197 | break; 198 | } 199 | 200 | nfields = parsestr(line, " \n\r\t", fields, 4); 201 | if (nfields < 3) { 202 | fprintf(stderr, "line %d: not enough fields\n", lineno); 203 | continue; 204 | } 205 | 206 | if (str_to_ulong_base(fields[0], &addr, 16) != 0) { 207 | fprintf(stderr, 208 | "line %d: failed to convert %s to an integer\n", 209 | lineno, fields[0]); 210 | continue; 211 | } 212 | 213 | stype = fields[1]; 214 | if (*stype != 'T' && *stype != 't') { 215 | if (!strcmp(fields[2], "init_net")) 216 | init_net = addr; 217 | continue; 218 | } 219 | 220 | /* 221 | * check for multiple entries with the same address 222 | */ 223 | if (prev_sym && prev_sym->addr == addr) { 224 | fixup_ksym(prev_sym, fields, nfields); 225 | continue; 226 | } 227 | 228 | sym = find_ksym_start(addr); 229 | if (sym) { 230 | fixup_ksym(sym, fields, nfields); 231 | continue; 232 | } 233 | 234 | sym = __new_ksym(addr, fields, nfields); 235 | if (!sym) { 236 | fprintf(stderr, 237 | "failed to allocate memory for new ksym entry\n"); 238 | rc = -1; 239 | break; 240 | } 241 | 242 | if (prev_sym) { 243 | prev_sym->addr_next = addr ? : prev_sym->addr; 244 | rc = insert_ksym(prev_sym); 245 | if (rc) { 246 | fprintf(stderr, 247 | "failed to insert %s [%s] %lx\n", 248 | prev_sym->name, prev_sym->mod, 249 | prev_sym->addr); 250 | break; 251 | } 252 | } 253 | prev_sym = sym; 254 | } 255 | 256 | if (prev_sym) { 257 | prev_sym->addr_next = (unsigned long long)(-1); 258 | insert_ksym(prev_sym); 259 | } 260 | 261 | if (init_net) { 262 | sym = new_ksym(init_net, "init_net", "[kernel]"); 263 | if (sym) 264 | insert_ksym(sym); 265 | } 266 | 267 | fclose(fp); 268 | 269 | ksyms_initialized = true; 270 | 271 | return rc; 272 | } 273 | -------------------------------------------------------------------------------- /src/ksyms.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * kernel address to symbol interface 4 | * 5 | * Copyright (c) 2019-2020 David Ahern 6 | */ 7 | #ifndef _INCLUDE_KSYMS_H_ 8 | #define _INCLUDE_KSYMS_H_ 9 | 10 | #include 11 | 12 | struct ksym_s { 13 | struct rb_node rb_node; 14 | 15 | unsigned long addr; 16 | unsigned long addr_next; 17 | char *name; 18 | char *mod; 19 | 20 | bool is_unix; 21 | bool is_tcp; 22 | }; 23 | 24 | int load_ksyms(const char *file); 25 | struct ksym_s *find_ksym(unsigned long addr); 26 | struct ksym_s *find_ksym_by_name(const char *name); 27 | struct ksym_s *new_ksym(unsigned long addr, const char *name, const char *mod); 28 | int insert_ksym(struct ksym_s *new_sym); 29 | void free_ksym(struct ksym_s *sym); 30 | #endif 31 | -------------------------------------------------------------------------------- /src/kvm-nested.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Dump users of kvm-nested virt 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "libbpf_helpers.h" 19 | #include "perf_events.h" 20 | #include "kprobes.h" 21 | #include "linux/kernel.h" 22 | #include "timestamps.h" 23 | 24 | struct data { 25 | __u64 time; 26 | __u32 cpu; 27 | }; 28 | 29 | bool done; 30 | 31 | static int dump_map(int map_fd) 32 | { 33 | __u64 *key, *prev_key = NULL, val, pid; 34 | char buf[64]; 35 | int err; 36 | 37 | key = calloc(1, sizeof(*key)); 38 | if (!key) { 39 | fprintf(stderr, "Failed to allocate memory for key\n"); 40 | return 1; 41 | } 42 | 43 | printf("\n%s:\n", timestamp(buf, sizeof(buf), 0)); 44 | while(1) { 45 | err = bpf_map_get_next_key(map_fd, prev_key, key); 46 | if (err) { 47 | if (errno == ENOENT) 48 | err = 0; 49 | break; 50 | } 51 | 52 | val = 0; 53 | if (!bpf_map_lookup_elem(map_fd, key, &val)) { 54 | pid = *key; 55 | printf(" tgid %u pid %u count %llu\n", 56 | (__u32)(pid >> 32), (__u32)pid, val); 57 | } 58 | 59 | prev_key = key; 60 | } 61 | 62 | free(key); 63 | return err; 64 | } 65 | 66 | static void sig_handler(int signo) 67 | { 68 | printf("Terminating by signal %d\n", signo); 69 | done = true; 70 | } 71 | 72 | static void print_usage(char *prog) 73 | { 74 | printf( 75 | "usage: %s OPTS\n\n" 76 | " -f bpf-file bpf filename to load\n" 77 | " -t rate time rate (seconds) to dump stats\n" 78 | , basename(prog)); 79 | } 80 | 81 | int main(int argc, char **argv) 82 | { 83 | struct bpf_prog_load_attr prog_load_attr = { }; 84 | char *objfile = "kvm-nested.o"; 85 | struct kprobe_data probes[] = { 86 | { .func = "handle_vmresume", .fd = -1 }, 87 | }; 88 | const char *tps[] = { 89 | "kvm/kvm_nested_vmexit", 90 | "sched/sched_process_exit", 91 | NULL 92 | }; 93 | bool filename_set = false; 94 | bool use_kprobe = false; 95 | struct bpf_object *obj; 96 | int display_rate = 10; 97 | struct bpf_map *map; 98 | int rc, tmp; 99 | int map_fd; 100 | 101 | while ((rc = getopt(argc, argv, "f:t:k")) != -1) 102 | { 103 | switch(rc) { 104 | case 'f': 105 | objfile = optarg; 106 | filename_set = true; 107 | break; 108 | case 't': 109 | tmp = atoi(optarg); 110 | if (!tmp) { 111 | fprintf(stderr, "Invalid display rate\n"); 112 | return 1; 113 | } 114 | display_rate = tmp; 115 | break; 116 | case 'k': 117 | use_kprobe = true; 118 | break; 119 | default: 120 | print_usage(argv[0]); 121 | return 1; 122 | } 123 | } 124 | 125 | if (signal(SIGINT, sig_handler) || 126 | signal(SIGHUP, sig_handler) || 127 | signal(SIGTERM, sig_handler)) { 128 | perror("signal"); 129 | return 1; 130 | } 131 | 132 | setlinebuf(stdout); 133 | setlinebuf(stderr); 134 | 135 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 136 | return 1; 137 | 138 | map = bpf_object__find_map_by_name(obj, "nested_virt_map"); 139 | if (!map) { 140 | printf("Failed to get map in obj file\n"); 141 | return 1; 142 | } 143 | map_fd = bpf_map__fd(map); 144 | 145 | rc = 1; 146 | if (use_kprobe) { 147 | if (kprobe_init(obj, probes, ARRAY_SIZE(probes))) 148 | goto out; 149 | } else { 150 | if (configure_tracepoints(obj, tps)) 151 | goto out; 152 | } 153 | 154 | rc = 0; 155 | while (!done) { 156 | sleep(display_rate); 157 | if (dump_map(map_fd)) 158 | break; 159 | } 160 | 161 | out: 162 | return rc; 163 | } 164 | -------------------------------------------------------------------------------- /src/libbpf_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef __LIBBPF_HELPERS_H 3 | #define __LIBBPF_HELPERS_H 4 | 5 | #include 6 | 7 | int load_obj_file(struct bpf_prog_load_attr *attr, 8 | struct bpf_object **obj, 9 | const char *objfile, bool user_set); 10 | 11 | int bpf_map_get_fd_by_name(const char *name); 12 | int bpf_map_get_fd_by_path(const char *path); 13 | int bpf_map_get_fd(__u32 id, const char *path, const char *name, 14 | const char *desc); 15 | 16 | int bpf_prog_get_fd_by_path(const char *path); 17 | int bpf_prog_get_fd(__u32 id, const char *path, const char *name, 18 | const char *desc); 19 | 20 | int attach_to_dev_generic(int idx, int prog_fd, const char *dev); 21 | int detach_from_dev_generic(int idx, const char *dev); 22 | 23 | int attach_to_dev(int idx, int prog_fd, const char *dev); 24 | int detach_from_dev(int idx, const char *dev); 25 | 26 | int attach_to_dev_tx(int idx, int prog_fd, const char *dev); 27 | int detach_from_dev_tx(int idx, const char *dev); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/napi_poll.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Analyze latency of the OVS. 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "napi_poll.h" 19 | #include "libbpf_helpers.h" 20 | #include "perf_events.h" 21 | #include "timestamps.h" 22 | 23 | struct data { 24 | __u64 time; 25 | __u32 cpu; 26 | }; 27 | 28 | static bool done; 29 | 30 | static void dump_buckets(__u64 *buckets, __u64 *prev_buckets) 31 | { 32 | __u64 diff[NAPI_BUCKETS]; 33 | char buf[64]; 34 | int i; 35 | 36 | /* get difference between samples and save 37 | * new sample as old 38 | */ 39 | for (i = 0; i < NAPI_BUCKETS; ++i) { 40 | diff[i] = buckets[i] - prev_buckets[i]; 41 | prev_buckets[i] = buckets[i]; 42 | } 43 | 44 | printf("%s: ", timestamp(buf, sizeof(buf), 0)); 45 | printf("Packets processed per NAPI poll\n"); 46 | printf(" 0 : %'8llu\n", diff[0]); 47 | printf(" 1 : %'8llu\n", diff[1]); 48 | printf(" 2 : %'8llu\n", diff[2]); 49 | printf(" 3-4 : %'8llu\n", diff[3]); 50 | printf(" 5-8 : %'8llu\n", diff[4]); 51 | printf(" 9-16: %'8llu\n", diff[5]); 52 | printf(" 17-32: %'8llu\n", diff[6]); 53 | printf(" 33-63: %'8llu\n", diff[7]); 54 | printf(" 64: %'8llu\n", diff[8]); 55 | } 56 | 57 | static int napi_poll_dump_hist(int hist_map_fd) 58 | { 59 | static __u64 prev_buckets[NAPI_BUCKETS]; 60 | struct napi_poll_hist val; 61 | __u32 idx = 0; 62 | 63 | if (bpf_map_lookup_elem(hist_map_fd, &idx, &val)) { 64 | fprintf(stderr, "Failed to get hist values\n"); 65 | return 1; 66 | } 67 | 68 | dump_buckets(val.buckets, prev_buckets); 69 | printf("\n"); 70 | 71 | return 0; 72 | } 73 | 74 | static void sig_handler(int signo) 75 | { 76 | printf("Terminating by signal %d\n", signo); 77 | done = true; 78 | } 79 | 80 | static void print_usage(char *prog) 81 | { 82 | printf( 83 | "usage: %s OPTS\n\n" 84 | " -f bpf-file bpf filename to load\n" 85 | " -t rate time rate (seconds) to dump stats\n" 86 | , basename(prog)); 87 | } 88 | 89 | int main(int argc, char **argv) 90 | { 91 | struct bpf_prog_load_attr prog_load_attr = { }; 92 | const char *tps[] = { 93 | "napi/napi_poll", 94 | NULL 95 | }; 96 | struct napi_poll_hist hist = {}; 97 | char *objfile = "napi_poll.o"; 98 | bool filename_set = false; 99 | struct bpf_object *obj; 100 | int display_rate = 10; 101 | struct bpf_map *map; 102 | int hist_map_fd; 103 | __u32 idx = 0; 104 | int rc, tmp; 105 | 106 | while ((rc = getopt(argc, argv, "f:t:")) != -1) 107 | { 108 | switch(rc) { 109 | case 'f': 110 | objfile = optarg; 111 | filename_set = true; 112 | break; 113 | case 't': 114 | tmp = atoi(optarg); 115 | if (!tmp) { 116 | fprintf(stderr, "Invalid display rate\n"); 117 | return 1; 118 | } 119 | display_rate = tmp; 120 | break; 121 | default: 122 | print_usage(argv[0]); 123 | return 1; 124 | } 125 | } 126 | 127 | if (signal(SIGINT, sig_handler) || 128 | signal(SIGHUP, sig_handler) || 129 | signal(SIGTERM, sig_handler)) { 130 | perror("signal"); 131 | return 1; 132 | } 133 | 134 | setlinebuf(stdout); 135 | setlinebuf(stderr); 136 | setlocale(LC_NUMERIC, "en_US.utf-8"); 137 | 138 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 139 | return 1; 140 | 141 | map = bpf_object__find_map_by_name(obj, "napi_poll_map"); 142 | if (!map) { 143 | printf("Failed to get histogram map in obj file\n"); 144 | return 1; 145 | } 146 | hist_map_fd = bpf_map__fd(map); 147 | 148 | /* make sure index 0 entry exists */ 149 | bpf_map_update_elem(hist_map_fd, &idx, &hist, BPF_ANY); 150 | 151 | if (configure_tracepoints(obj, tps)) 152 | return 1; 153 | 154 | while (!done) { 155 | sleep(display_rate); 156 | if (napi_poll_dump_hist(hist_map_fd)) 157 | break; 158 | } 159 | 160 | return 0; 161 | } 162 | -------------------------------------------------------------------------------- /src/net_rx_action.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Analyze time to run net_rx_action 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "net_rx_action.h" 19 | #include "libbpf_helpers.h" 20 | #include "perf_events.h" 21 | #include "kprobes.h" 22 | #include "timestamps.h" 23 | #include "linux/kernel.h" 24 | 25 | struct data { 26 | __u64 time; 27 | __u32 cpu; 28 | }; 29 | 30 | static bool done; 31 | 32 | static void dump_buckets(__u64 *buckets, __u64 *prev_buckets) 33 | { 34 | __u64 diff[NET_RX_NUM_BKTS]; 35 | char buf[64]; 36 | int i; 37 | 38 | /* get difference between samples and save 39 | * new sample as old 40 | */ 41 | for (i = 0; i < NET_RX_NUM_BKTS; ++i) { 42 | diff[i] = buckets[i] - prev_buckets[i]; 43 | 44 | prev_buckets[i] = buckets[i]; 45 | } 46 | 47 | printf("%s: ", timestamp(buf, sizeof(buf), 0)); 48 | printf("errors: %llu\n", diff[NET_RX_ERR_BKT]); 49 | printf(" time (usec) count\n"); 50 | printf(" 0 - %'7u: %'8llu\n", NET_RX_BUCKET_0, diff[0]); 51 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_0, NET_RX_BUCKET_1, diff[1]); 52 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_1, NET_RX_BUCKET_2, diff[2]); 53 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_2, NET_RX_BUCKET_3, diff[3]); 54 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_3, NET_RX_BUCKET_4, diff[4]); 55 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_4, NET_RX_BUCKET_5, diff[5]); 56 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_5, NET_RX_BUCKET_6, diff[6]); 57 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_6, NET_RX_BUCKET_7, diff[7]); 58 | printf(" %'7u+ - %'7u: %'8llu\n", NET_RX_BUCKET_7, NET_RX_BUCKET_8, diff[8]); 59 | printf(" %'7u+ - up: %'8llu\n", NET_RX_BUCKET_8, diff[9]); 60 | } 61 | 62 | static int net_rx_dump_hist(int hist_map_fd) 63 | { 64 | static __u64 prev_buckets[NET_RX_NUM_BKTS]; 65 | struct net_rx_hist_val val; 66 | __u32 idx = 0; 67 | 68 | if (bpf_map_lookup_elem(hist_map_fd, &idx, &val)) { 69 | fprintf(stderr, "Failed to get hist values\n"); 70 | return 1; 71 | } 72 | 73 | dump_buckets(val.buckets, prev_buckets); 74 | printf("\n"); 75 | 76 | return 0; 77 | } 78 | 79 | static void sig_handler(int signo) 80 | { 81 | printf("Terminating by signal %d\n", signo); 82 | done = true; 83 | } 84 | 85 | static void print_usage(char *prog) 86 | { 87 | printf( 88 | "usage: %s OPTS\n\n" 89 | " -f bpf-file bpf filename to load\n" 90 | " -t rate time rate (seconds) to dump stats\n" 91 | , basename(prog)); 92 | } 93 | 94 | int main(int argc, char **argv) 95 | { 96 | struct bpf_prog_load_attr prog_load_attr = { }; 97 | struct net_rx_hist_val hist2 = {}; 98 | char *objfile = "net_rx_action.o"; 99 | struct kprobe_data probes[] = { 100 | { .func = "net_rx_action", .fd = -1 }, 101 | { .func = "net_rx_action", .fd = -1, .retprobe = true }, 102 | }; 103 | bool filename_set = false; 104 | struct bpf_object *obj; 105 | int display_rate = 10; 106 | struct bpf_map *map; 107 | int hist_map_fd; 108 | __u32 idx = 0; 109 | int rc, tmp; 110 | 111 | while ((rc = getopt(argc, argv, "f:t:")) != -1) 112 | { 113 | switch(rc) { 114 | case 'f': 115 | objfile = optarg; 116 | filename_set = true; 117 | break; 118 | case 't': 119 | tmp = atoi(optarg); 120 | if (!tmp) { 121 | fprintf(stderr, "Invalid display rate\n"); 122 | return 1; 123 | } 124 | display_rate = tmp; 125 | break; 126 | default: 127 | print_usage(argv[0]); 128 | return 1; 129 | } 130 | } 131 | 132 | if (signal(SIGINT, sig_handler) || 133 | signal(SIGHUP, sig_handler) || 134 | signal(SIGTERM, sig_handler)) { 135 | perror("signal"); 136 | return 1; 137 | } 138 | 139 | setlinebuf(stdout); 140 | setlinebuf(stderr); 141 | setlocale(LC_NUMERIC, "en_US.utf-8"); 142 | 143 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 144 | return 1; 145 | 146 | map = bpf_object__find_map_by_name(obj, "net_rx_map"); 147 | if (!map) { 148 | printf("Failed to get histogram map in obj file\n"); 149 | return 1; 150 | } 151 | hist_map_fd = bpf_map__fd(map); 152 | 153 | /* make sure index 0 entry exists */ 154 | bpf_map_update_elem(hist_map_fd, &idx, &hist2, BPF_ANY); 155 | 156 | rc = 1; 157 | if (kprobe_init(obj, probes, ARRAY_SIZE(probes))) 158 | goto out; 159 | 160 | rc = 0; 161 | while (!done) { 162 | sleep(display_rate); 163 | if (net_rx_dump_hist(hist_map_fd)) 164 | break; 165 | } 166 | out: 167 | kprobe_cleanup(probes, ARRAY_SIZE(probes)); 168 | 169 | return rc; 170 | } 171 | -------------------------------------------------------------------------------- /src/opensnoop.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Track calls to open. Similar to and inspired by opensnoop in bcc-tools. 3 | * 4 | * Copyright (c) 2019-2020 David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include "opensnoop.h" 21 | #include "libbpf_helpers.h" 22 | #include "perf_events.h" 23 | #include "kprobes.h" 24 | #include "timestamps.h" 25 | 26 | static bool print_time = true; 27 | static bool print_dt; 28 | static bool done; 29 | 30 | struct task { 31 | struct list_head list; 32 | __u64 time; 33 | __u32 tid; 34 | __u32 pid; 35 | __u32 ppid; 36 | __u32 flags; 37 | __u32 mode; 38 | char comm[TASK_COMM_LEN]; 39 | char *filename; 40 | }; 41 | 42 | LIST_HEAD(entries); 43 | 44 | static void free_task(struct task *task) 45 | { 46 | list_del(&task->list); 47 | free(task->filename); 48 | free(task); 49 | } 50 | 51 | static struct task *get_task(struct data *data, bool create) 52 | { 53 | struct task *task; 54 | 55 | list_for_each_entry(task, &entries, list) { 56 | if (data->tid == task->tid && 57 | data->pid == task->pid && 58 | data->ppid == task->ppid) 59 | return task; 60 | } 61 | 62 | if (!create) 63 | return NULL; 64 | 65 | task = calloc(1, sizeof(*task)); 66 | if (task) { 67 | task->time = data->time; 68 | task->tid = data->tid; 69 | task->pid = data->pid; 70 | task->ppid = data->ppid; 71 | strcpy(task->comm, data->comm); 72 | list_add(&task->list, &entries); 73 | } 74 | return task; 75 | } 76 | 77 | static void print_header(void) 78 | { 79 | printf("\n\n"); 80 | if (print_time) 81 | printf("%15s", "TIME"); 82 | if (print_dt) 83 | printf(" %10s", "DT"); 84 | if (print_time || print_dt) 85 | printf(" "); 86 | 87 | printf("%5s %-16s %6s/%-6s %6s %8s %8s %6s %s\n", 88 | " CPU ", "COMM", "TID", "PID", "PPID", "FLAGS", "MODE", "RET", 89 | "FILENAME"); 90 | fflush(stdout); 91 | } 92 | 93 | static void show_timestamps(__u64 start, __u64 end) 94 | { 95 | char buf[64]; 96 | 97 | if (print_time) 98 | printf("%15s", timestamp(buf, sizeof(buf), start)); 99 | 100 | if (print_dt) 101 | print_time_usecs(end - start); 102 | 103 | printf(" "); 104 | } 105 | 106 | bool show_header; 107 | 108 | static __u64 event_timestamp(struct perf_event_ctx *ctx, void *_data) 109 | { 110 | struct data *data = _data; 111 | 112 | return data->time; 113 | } 114 | 115 | static void process_event(struct perf_event_ctx *ctx, void *_data) 116 | { 117 | struct data *data = _data; 118 | struct task *task; 119 | 120 | if (show_header) { 121 | show_header = false; 122 | print_header(); 123 | } 124 | 125 | task = get_task(data, data->event_type == EVENT_ARG); 126 | if (!task) { 127 | printf("Failed to get task entry\n"); 128 | return; 129 | } 130 | 131 | switch (data->event_type) { 132 | case EVENT_ARG: 133 | task->filename = strdup(data->filename); 134 | break; 135 | case EVENT_RET: 136 | if (print_time || print_dt) 137 | show_timestamps(task->time, data->time); 138 | printf("[%03d] %-16s %6d/%-6d %6d %8x %8x %6d %s\n", 139 | data->cpu, task->comm, task->tid, task->pid, 140 | task->ppid, task->flags, task->mode, data->retval, 141 | task->filename); 142 | free_task(task); 143 | break; 144 | } 145 | } 146 | 147 | static int opensnoop_complete(struct perf_event_ctx *ctx) 148 | { 149 | show_header = true; 150 | perf_event_process_events(ctx); 151 | 152 | return done; 153 | } 154 | 155 | static void sig_handler(int signo) 156 | { 157 | printf("Terminating by signal %d\n", signo); 158 | done = true; 159 | } 160 | 161 | int main(int argc, char **argv) 162 | { 163 | struct bpf_prog_load_attr prog_load_attr = { 164 | .prog_type = BPF_PROG_TYPE_KPROBE, 165 | }; 166 | struct perf_event_ctx ctx = { 167 | .event_timestamp = event_timestamp, 168 | .process_event = process_event, 169 | .complete_fn = opensnoop_complete, 170 | .data_size = sizeof(struct data), 171 | }; 172 | char *objfile = "opensnoop.o"; 173 | bool filename_set = false; 174 | struct kprobe_data probes[] = { 175 | { .func = "do_sys_open", .fd = -1 }, 176 | { .func = "do_sys_open", .fd = -1, .retprobe = true }, 177 | }; 178 | struct bpf_object *obj; 179 | int nevents = 1000; 180 | int rc; 181 | 182 | if (argc > 1) { 183 | objfile = argv[1]; 184 | filename_set = true; 185 | } 186 | 187 | if (set_reftime()) 188 | return 1; 189 | 190 | if (signal(SIGINT, sig_handler) || 191 | signal(SIGHUP, sig_handler) || 192 | signal(SIGTERM, sig_handler)) { 193 | perror("signal"); 194 | return 1; 195 | } 196 | 197 | setlinebuf(stdout); 198 | setlinebuf(stderr); 199 | setlocale(LC_NUMERIC, "en_US.utf-8"); 200 | 201 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 202 | return 1; 203 | 204 | rc = 1; 205 | if (kprobe_init(obj, probes, ARRAY_SIZE(probes))) 206 | goto out; 207 | 208 | if (perf_event_configure(&ctx, obj, "channel", nevents)) 209 | goto out; 210 | 211 | /* main event loop */ 212 | rc = perf_event_loop(&ctx); 213 | out: 214 | perf_event_close(&ctx); 215 | kprobe_cleanup(probes, ARRAY_SIZE(probes)); 216 | 217 | return rc; 218 | } 219 | -------------------------------------------------------------------------------- /src/ovslatency.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Analyze latency of the OVS. 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ovslatency.h" 19 | #include "libbpf_helpers.h" 20 | #include "perf_events.h" 21 | #include "kprobes.h" 22 | #include "timestamps.h" 23 | #include "linux/kernel.h" 24 | 25 | struct data { 26 | __u64 time; 27 | __u32 cpu; 28 | }; 29 | 30 | static bool done; 31 | 32 | static void dump_buckets(__u64 *buckets, __u64 *prev_buckets) 33 | { 34 | __u64 diff[8]; 35 | char buf[64]; 36 | int i; 37 | 38 | /* get difference between samples and save 39 | * new sample as old 40 | */ 41 | for (i = 0; i < 8; ++i) { 42 | diff[i] = buckets[i] - prev_buckets[i]; 43 | 44 | prev_buckets[i] = buckets[i]; 45 | } 46 | 47 | printf("%s: ", timestamp(buf, sizeof(buf), 0)); 48 | if (diff[7] == 0) { 49 | printf("No packets\n"); 50 | return; 51 | } 52 | 53 | printf("total number of packets %llu:\n", diff[7]); 54 | printf(" time (usec) count\n"); 55 | printf(" 0 - %4u: %'8llu\n", OVS_BUCKET_0, diff[0]); 56 | printf(" %4u+ - %4u: %'8llu\n", OVS_BUCKET_0, OVS_BUCKET_1, diff[1]); 57 | printf(" %4u+ - %4u: %'8llu\n", OVS_BUCKET_1, OVS_BUCKET_2, diff[2]); 58 | printf(" %4u+ - %4u: %'8llu\n", OVS_BUCKET_2, OVS_BUCKET_3, diff[3]); 59 | printf(" %4u+ - %4u: %'8llu\n", OVS_BUCKET_3, OVS_BUCKET_4, diff[4]); 60 | printf(" %4u+ - %4u: %'8llu\n", OVS_BUCKET_4, OVS_BUCKET_5, diff[5]); 61 | printf(" %4u+ - up: %'8llu\n", OVS_BUCKET_5, diff[6]); 62 | } 63 | 64 | static int ovslat_dump_hist(int hist_map_fd) 65 | { 66 | static __u64 prev_buckets[8]; 67 | struct ovslat_hist_val val; 68 | __u32 idx = 0; 69 | 70 | if (bpf_map_lookup_elem(hist_map_fd, &idx, &val)) { 71 | fprintf(stderr, "Failed to get hist values\n"); 72 | return 1; 73 | } 74 | 75 | dump_buckets(val.buckets, prev_buckets); 76 | printf("\n"); 77 | 78 | return 0; 79 | } 80 | 81 | static void sig_handler(int signo) 82 | { 83 | printf("Terminating by signal %d\n", signo); 84 | done = true; 85 | } 86 | 87 | static void print_usage(char *prog) 88 | { 89 | printf( 90 | "usage: %s OPTS\n\n" 91 | " -f bpf-file bpf filename to load\n" 92 | " -t rate time rate (seconds) to dump stats\n" 93 | , basename(prog)); 94 | } 95 | 96 | int main(int argc, char **argv) 97 | { 98 | struct bpf_prog_load_attr prog_load_attr = { }; 99 | struct ovslat_hist_val hist2 = {}; 100 | char *objfile = "ovslatency.o"; 101 | struct kprobe_data probes[] = { 102 | { .func = "ovs_vport_receive", .fd = -1 }, 103 | { .func = "ovs_vport_receive", .fd = -1, .retprobe = true }, 104 | }; 105 | bool filename_set = false; 106 | struct bpf_object *obj; 107 | int display_rate = 10; 108 | struct bpf_map *map; 109 | int hist_map_fd; 110 | __u32 idx = 0; 111 | int rc, tmp; 112 | 113 | while ((rc = getopt(argc, argv, "f:t:")) != -1) 114 | { 115 | switch(rc) { 116 | case 'f': 117 | objfile = optarg; 118 | filename_set = true; 119 | break; 120 | case 't': 121 | tmp = atoi(optarg); 122 | if (!tmp) { 123 | fprintf(stderr, "Invalid display rate\n"); 124 | return 1; 125 | } 126 | display_rate = tmp; 127 | break; 128 | default: 129 | print_usage(argv[0]); 130 | return 1; 131 | } 132 | } 133 | 134 | if (signal(SIGINT, sig_handler) || 135 | signal(SIGHUP, sig_handler) || 136 | signal(SIGTERM, sig_handler)) { 137 | perror("signal"); 138 | return 1; 139 | } 140 | 141 | setlinebuf(stdout); 142 | setlinebuf(stderr); 143 | setlocale(LC_NUMERIC, "en_US.utf-8"); 144 | 145 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 146 | return 1; 147 | 148 | map = bpf_object__find_map_by_name(obj, "ovslat_map"); 149 | if (!map) { 150 | printf("Failed to get histogram map in obj file\n"); 151 | return 1; 152 | } 153 | hist_map_fd = bpf_map__fd(map); 154 | 155 | /* make sure index 0 entry exists */ 156 | bpf_map_update_elem(hist_map_fd, &idx, &hist2, BPF_ANY); 157 | 158 | rc = 1; 159 | if (kprobe_init(obj, probes, ARRAY_SIZE(probes))) 160 | goto out; 161 | 162 | rc = 0; 163 | while (!done) { 164 | sleep(display_rate); 165 | if (ovslat_dump_hist(hist_map_fd)) 166 | break; 167 | } 168 | 169 | out: 170 | kprobe_cleanup(probes, ARRAY_SIZE(probes)); 171 | return rc; 172 | } 173 | -------------------------------------------------------------------------------- /src/perf_events.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef __PERF_EVENTS_H 3 | #define __PERF_EVENTS_H 4 | 5 | #include 6 | #include 7 | 8 | #define TRACINGFS "/sys/kernel/debug/tracing" 9 | 10 | struct perf_event_ctx { 11 | /* called before starting data collection */ 12 | void (*start_fn)(void); 13 | 14 | /* called for each event */ 15 | enum bpf_perf_event_ret (*output_fn)(struct perf_event_ctx *ctx, 16 | void *data, int size); 17 | 18 | /* users of cache API */ 19 | __u64 (*event_timestamp)(struct perf_event_ctx *ctx, void *data); 20 | void (*process_event)(struct perf_event_ctx *ctx, void *data); 21 | 22 | /* called at the end of a polling loop; non-0 terminates polling */ 23 | int (*complete_fn)(struct perf_event_ctx *ctx); 24 | 25 | int *pmu_fds; 26 | struct perf_event_mmap_page **headers; 27 | int num_cpus; 28 | 29 | int data_size; 30 | int page_size; 31 | int page_cnt; /* pages per mmap */ 32 | __u64 total_events; 33 | __u64 time_drops; 34 | }; 35 | 36 | int sys_perf_event_open(struct perf_event_attr *attr, 37 | int cpu, unsigned long flags); 38 | 39 | int perf_event_tp_set_prog(int prog_fd, __u64 config); 40 | int configure_tracepoints(struct bpf_object *obj, const char *tps[]); 41 | 42 | int perf_event_syscall(int prog_fd, const char *name); 43 | 44 | /* attach channel map to perf */ 45 | int perf_event_configure(struct perf_event_ctx *ctx, struct bpf_object *obj, 46 | const char *map_name, int nevents); 47 | int configure_raw_tracepoints(struct bpf_object *obj, const char *tps[]); 48 | 49 | void perf_event_close(struct perf_event_ctx *ctx); 50 | 51 | int perf_event_loop(struct perf_event_ctx *ctx); 52 | 53 | void perf_event_process_events(struct perf_event_ctx *ctx); 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/print_pkt.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Functions to pretty print packet headers 3 | * 4 | * David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "flow.h" 19 | #include "str_utils.h" 20 | 21 | static void print_tcp(const struct flow_tcp *fl, const char *src, 22 | const char *dst) 23 | { 24 | printf(" src=%s/%d -> dst=%s/%d TCP", 25 | src, fl->sport, dst, fl->dport); 26 | 27 | if (fl->syn) 28 | printf(" SYN"); 29 | if (fl->ack) 30 | printf(" ACK"); 31 | if (fl->fin) 32 | printf(" FIN"); 33 | if (fl->rst) 34 | printf(" RST"); 35 | 36 | printf("\n"); 37 | } 38 | 39 | static void print_udp(const struct flow_udp *fl, const char *src, 40 | const char *dst) 41 | { 42 | printf(" src=%s/%d -> dst=%s/%d UDP\n", 43 | src, fl->sport, dst, fl->dport); 44 | } 45 | 46 | static void print_icmp6(const struct flow_icmp6 *fli, const char *src, 47 | const char *dst) 48 | { 49 | printf(" src=%s -> dst=%s ICMP ", src, dst); 50 | switch(fli->icmp6_type) { 51 | case NDISC_ROUTER_SOLICITATION: 52 | printf("router solicitation\n"); 53 | break; 54 | case NDISC_ROUTER_ADVERTISEMENT: 55 | printf("router advertisement\n"); 56 | break; 57 | case NDISC_NEIGHBOUR_SOLICITATION: 58 | printf("neighbor solicitation\n"); 59 | break; 60 | case NDISC_NEIGHBOUR_ADVERTISEMENT: 61 | printf("neighbor advertisement\n"); 62 | break; 63 | case ICMPV6_ECHO_REQUEST: 64 | printf("echo request\n"); 65 | break; 66 | case ICMPV6_ECHO_REPLY: 67 | printf("echo reply\n"); 68 | break; 69 | default: 70 | printf("unknown %u/%u\n", fli->icmp6_type, fli->icmp6_code); 71 | } 72 | } 73 | 74 | static void print_transport(const struct flow_transport *fl, 75 | const char *src, const char *dst) 76 | { 77 | switch(fl->proto) { 78 | case IPPROTO_TCP: 79 | print_tcp(&fl->tcp, src, dst); 80 | break; 81 | case IPPROTO_UDP: 82 | print_udp(&fl->udp, src, dst); 83 | break; 84 | case IPPROTO_VRRP: 85 | printf(" VRRP: src=%s -> dst=%s\n", src, dst); 86 | break; 87 | case IPPROTO_ICMPV6: 88 | print_icmp6(&fl->icmp6, src, dst); 89 | break; 90 | default: 91 | printf(" protocol %u: src=%s -> dst=%s\n", 92 | fl->proto, src, dst); 93 | } 94 | } 95 | 96 | static void print_ipv6(const struct flow_ip6 *fl6) 97 | { 98 | char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; 99 | 100 | inet_ntop(AF_INET6, &fl6->saddr, src, sizeof(src)); 101 | inet_ntop(AF_INET6, &fl6->daddr, dst, sizeof(dst)); 102 | 103 | print_transport(&fl6->trans, src, dst); 104 | } 105 | 106 | static void print_ipv4(const struct flow_ip4 *fl4) 107 | { 108 | char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 109 | 110 | inet_ntop(AF_INET, &fl4->saddr, src, sizeof(src)); 111 | inet_ntop(AF_INET, &fl4->daddr, dst, sizeof(dst)); 112 | 113 | print_transport(&fl4->trans, src, dst); 114 | } 115 | 116 | static void print_arphdr(const struct flow_arp *fla) 117 | { 118 | char addr[INET_ADDRSTRLEN]; 119 | 120 | inet_ntop(AF_INET, &fla->data.ar_sip, addr, sizeof(addr)); 121 | printf("sender: %s ", addr); 122 | print_mac(fla->data.ar_sha, false); 123 | 124 | inet_ntop(AF_INET, &fla->data.ar_tip, addr, sizeof(addr)); 125 | printf(" target: %s ", addr); 126 | print_mac(fla->data.ar_tha, false); 127 | } 128 | 129 | static void print_arp(const struct flow_arp *fla) 130 | { 131 | printf(" "); 132 | 133 | switch(fla->op) { 134 | case ARPOP_REQUEST: 135 | printf("arp request: "); 136 | break; 137 | case ARPOP_REPLY: 138 | printf("arp reply: "); 139 | break; 140 | case ARPOP_RREQUEST: 141 | printf("rarp request: "); 142 | break; 143 | case ARPOP_RREPLY: 144 | printf("rarp reply: "); 145 | break; 146 | default: 147 | printf("arp op %x: ", fla->op); 148 | break; 149 | } 150 | print_arphdr(fla); 151 | printf("\n"); 152 | } 153 | 154 | void print_flow(const struct flow *fl) 155 | { 156 | print_mac(fl->smac, false); 157 | printf(" -> "); 158 | print_mac(fl->dmac, false); 159 | 160 | if (fl->has_vlan) { 161 | __u16 vlan, prio; 162 | 163 | vlan = fl->vlan.outer_vlan_TCI & VLAN_VID_MASK; 164 | printf(" vlan %u", vlan); 165 | 166 | prio = (fl->vlan.outer_vlan_TCI & VLAN_PRIO_MASK); 167 | prio >>= VLAN_PRIO_SHIFT; 168 | if (prio) 169 | printf(" prio %u", prio); 170 | } 171 | 172 | switch(fl->proto) { 173 | case ETH_P_ARP: 174 | print_arp(&fl->arp); 175 | break; 176 | case ETH_P_IP: 177 | print_ipv4(&fl->ip4); 178 | break; 179 | case ETH_P_IPV6: 180 | print_ipv6(&fl->ip6); 181 | break; 182 | case ETH_P_LLDP: 183 | printf(" LLDP\n"); 184 | break; 185 | default: 186 | printf(" ethernet protocol %x\n", fl->proto); 187 | } 188 | } 189 | 190 | void print_pkt(__u16 protocol, const __u8 *data, int len) 191 | { 192 | struct flow fl = {}; 193 | 194 | if (parse_pkt(&fl, protocol, data, len)) 195 | printf("*** failed to parse packet ***\n"); 196 | else 197 | print_flow(&fl); 198 | } 199 | -------------------------------------------------------------------------------- /src/str_utils.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* 3 | * String conversion and parsing functions. 4 | * 5 | * David Ahern 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "str_utils.h" 15 | 16 | static int str_to_int_base(const char *str, int min, int max, int *value, int base) 17 | { 18 | int number; 19 | char *end; 20 | 21 | errno = 0; 22 | number = (int) strtol(str, &end, base); 23 | 24 | if ( ((*end == '\0') || (*end == '\n')) && (end != str) && 25 | (errno != ERANGE) && (min <= number) && (number <= max)) { 26 | *value = number; 27 | return 0; 28 | } 29 | 30 | return -1; 31 | } 32 | 33 | int str_to_int(const char *str, int min, int max, int *value) 34 | { 35 | return str_to_int_base(str, min, max, value, 0); 36 | } 37 | 38 | int str_to_ushort(const char *str, unsigned short *us) 39 | { 40 | int i; 41 | 42 | if (str_to_int(str, 0, 0xFFFF, &i) != 0) 43 | return -1; 44 | 45 | *us = (unsigned short) (i); 46 | 47 | return 0; 48 | } 49 | 50 | int str_to_ulong_base(const char *str, unsigned long *ul, int base) 51 | { 52 | char *end; 53 | 54 | *ul= strtoul(str, &end, base); 55 | if (*end != '\0') 56 | return -1; 57 | 58 | return 0; 59 | } 60 | 61 | int str_to_ulong(const char *str, unsigned long *ul) 62 | { 63 | return str_to_ulong_base(str, ul, 0); 64 | } 65 | 66 | int str_to_ullong(const char *str, unsigned long long *ul) 67 | { 68 | char *end; 69 | 70 | *ul= strtoull(str, &end, 0); 71 | if (*end != '\0') 72 | return -1; 73 | 74 | return 0; 75 | } 76 | 77 | int str_to_mac(const char *str, unsigned char *mac) 78 | { 79 | int rc = -1, m, i; 80 | char *s = strdup(str), *p, *d, tmp[3]; 81 | 82 | if (!s) 83 | return -1; 84 | 85 | p = s; 86 | tmp[2] = '\0'; 87 | for (i = 0; i < ETH_ALEN; ++i) { 88 | if (*p == '\0') 89 | goto out; 90 | 91 | d = strchr(p, ':'); 92 | if (d) { 93 | *d = '\0'; 94 | if (strlen(p) > 2) 95 | goto out; 96 | 97 | strcpy(tmp, p); 98 | p = d + 1; 99 | } else { 100 | strncpy(tmp, p, 2); 101 | p += 2; 102 | } 103 | 104 | if (str_to_int_base(tmp, 0, 0xFF, &m, 16) != 0) 105 | goto out; 106 | 107 | mac[i] = m; 108 | } 109 | 110 | if (*p == '\0') 111 | rc = 0; 112 | out: 113 | free(s); 114 | 115 | return rc; 116 | } 117 | 118 | int get_ifidx(const char *arg) 119 | { 120 | int idx; 121 | 122 | idx = if_nametoindex(arg); 123 | if (!idx) 124 | idx = strtoul(arg, NULL, 0); 125 | 126 | return idx; 127 | } 128 | 129 | /* find parameters in a string -- based on Harbison and Steele, p. 291 */ 130 | int parsestr(char *str, char *delims, char *fields[], int nmax) 131 | { 132 | int n; 133 | 134 | if (!str || (*str == '\0')) 135 | return 0; 136 | 137 | n = 0; 138 | fields[0] = strtok(str, delims); 139 | while ((fields[n] != (char *) NULL) && (n < (nmax-1))) { 140 | ++n; 141 | fields[n] = strtok(NULL, delims); 142 | } 143 | 144 | if ((n == (nmax - 1)) && (fields[n] != (char *) NULL)) 145 | ++n; 146 | 147 | return n; 148 | } 149 | 150 | void print_mac(const __u8 *mac, bool reverse) 151 | { 152 | if (reverse) 153 | printf("%.02x:%.02x:%.02x:%.02x:%.02x:%.02x", 154 | mac[5], mac[4], mac[3], mac[2], mac[1], mac[0]); 155 | else 156 | printf("%.02x:%.02x:%.02x:%.02x:%.02x:%.02x", 157 | mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); 158 | } 159 | -------------------------------------------------------------------------------- /src/str_utils.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef __STR_UTILS_H 3 | #define __STR_UTILS_H 4 | 5 | #include 6 | #include 7 | 8 | int str_to_int(const char *str, int min, int max, int *value); 9 | int str_to_ushort(const char *str, unsigned short *us); 10 | int str_to_ulong(const char *str, unsigned long *ul); 11 | int str_to_ulong_base(const char *str, unsigned long *ul, int base); 12 | int str_to_ullong(const char *str, unsigned long long *ul); 13 | int str_to_mac(const char *str, unsigned char *mac); 14 | int get_ifidx(const char *arg); 15 | 16 | int parsestr(char *str, char *delims, char *fields[], int nmax); 17 | void print_mac(const __u8 *mac, bool reverse); 18 | #endif 19 | -------------------------------------------------------------------------------- /src/timestamps.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Various time related helpers. 3 | * 4 | * phc_open code copied from linuxptp. 5 | * hw timestamp code based on example in 6 | * tools/testing/selftests/networking/timestamping/hwtstamp_config.c 7 | * 8 | * David Ahern 9 | */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "timestamps.h" 27 | 28 | static __u64 mono_ref; 29 | static struct timeval tod_ref; 30 | 31 | /* convert monotonic clock to realtime */ 32 | static void tod_from_mono(__u64 stime, struct timeval *tv_res) 33 | { 34 | unsigned long long dt; 35 | struct timeval tv_dt; 36 | 37 | if (stime > mono_ref) { 38 | dt = stime - mono_ref; 39 | tv_dt.tv_sec = (time_t) (dt / NSEC_PER_SEC); 40 | tv_dt.tv_usec = (dt - tv_dt.tv_sec * NSEC_PER_SEC) / 1000; 41 | timeradd(&tod_ref, &tv_dt, tv_res); 42 | } else { 43 | dt = mono_ref - stime; 44 | tv_dt.tv_sec = (time_t) (dt / NSEC_PER_SEC); 45 | tv_dt.tv_usec = (dt - tv_dt.tv_sec * NSEC_PER_SEC) / 1000; 46 | timersub(&tod_ref, &tv_dt, tv_res); 47 | } 48 | } 49 | 50 | char *timestamp(char *buf, int len, __u64 stime) 51 | { 52 | struct timeval tv; 53 | 54 | buf[0] = '\0'; 55 | if (len < 64) 56 | return buf; 57 | 58 | if (mono_ref == 0 && stime) { 59 | unsigned long secs, usecs; 60 | unsigned long long nsecs; 61 | 62 | nsecs = stime; 63 | secs = nsecs / NSEC_PER_SEC; 64 | nsecs -= secs * NSEC_PER_SEC; 65 | usecs = nsecs / NSEC_PER_USEC; 66 | snprintf(buf, len, "%5lu.%06lu", secs, usecs); 67 | 68 | return buf; 69 | } 70 | 71 | if (stime) 72 | tod_from_mono(stime, &tv); 73 | else 74 | gettimeofday(&tv, NULL); 75 | 76 | return timestamp_tv(&tv, buf, len); 77 | } 78 | 79 | char *timestamp_tv(const struct timeval *tv, char *buf, int len) 80 | { 81 | struct tm ltime; 82 | 83 | if (localtime_r(&tv->tv_sec, <ime) == NULL) 84 | buf[0] = '\0'; 85 | else { 86 | char date[64]; 87 | 88 | strftime(date, sizeof(date), "%H:%M:%S", <ime); 89 | snprintf(buf, len, "%s.%06d", date, (int) tv->tv_usec); 90 | } 91 | 92 | return buf; 93 | } 94 | 95 | __u64 get_time_ns(clockid_t clk) 96 | { 97 | struct timespec ts; 98 | 99 | if (clock_gettime(clk, &ts) != 0) { 100 | fprintf(stderr, "clock_gettime(CLOCK_MONOTONIC) failed\n"); 101 | return 0; 102 | } 103 | 104 | return (__u64)ts_to_ull(&ts); 105 | } 106 | 107 | /* used to convert monotonic timestamps to time-of-day. 108 | * good enough for the purpose at hand 109 | */ 110 | int set_reftime(void) 111 | { 112 | if (gettimeofday(&tod_ref, NULL) != 0) { 113 | fprintf(stderr, "gettimeofday failed\n"); 114 | return 1; 115 | } 116 | 117 | mono_ref = get_time_ns(CLOCK_MONOTONIC); 118 | 119 | return 0; 120 | } 121 | 122 | static int tstamp_sd = -1; 123 | 124 | /* based on example usage in 125 | * tools/testing/selftests/networking/timestamping/hwtstamp_config.c 126 | */ 127 | static int do_hw_tstamp(const char *dev, int rx_filter, int tx_type) 128 | { 129 | unsigned long cmd = SIOCSHWTSTAMP; 130 | struct hwtstamp_config config = { 131 | .tx_type = tx_type, 132 | .rx_filter = rx_filter, 133 | }; 134 | struct ifreq ifr = {}; 135 | 136 | if (tstamp_sd < 1) { 137 | tstamp_sd = socket(AF_INET, SOCK_DGRAM, 0); 138 | if (tstamp_sd < 0) { 139 | fprintf(stderr, "Failed to open ipv4 datagram socket\n"); 140 | return -1; 141 | } 142 | } 143 | 144 | strcpy(ifr.ifr_name, dev); 145 | ifr.ifr_data = (caddr_t)&config; 146 | 147 | if (ioctl(tstamp_sd, cmd, &ifr)) { 148 | perror("ioctl"); 149 | return 1; 150 | } 151 | 152 | return 0; 153 | } 154 | 155 | int enable_hw_tstamp(const char *dev) 156 | { 157 | return do_hw_tstamp(dev, HWTSTAMP_FILTER_ALL, HWTSTAMP_TX_OFF); 158 | } 159 | 160 | int disable_hw_tstamp(const char *dev) 161 | { 162 | return do_hw_tstamp(dev, HWTSTAMP_FILTER_NONE, HWTSTAMP_TX_OFF); 163 | } 164 | 165 | int enable_sw_tstamp(void) 166 | { 167 | int val = SOF_TIMESTAMPING_RX_SOFTWARE; 168 | 169 | if (tstamp_sd < 1) { 170 | tstamp_sd = socket(AF_INET, SOCK_DGRAM, 0); 171 | if (tstamp_sd < 0) { 172 | fprintf(stderr, "Failed to open ipv4 datagram socket\n"); 173 | return -1; 174 | } 175 | } 176 | 177 | if (setsockopt(tstamp_sd, SOL_SOCKET, SO_TIMESTAMPING, 178 | &val, sizeof(val))) { 179 | fprintf(stderr, "Failed to set SO_TIMESTAMPING socket option\n"); 180 | return 1; 181 | } 182 | 183 | val = 1; 184 | if (setsockopt(tstamp_sd, SOL_SOCKET, SO_TIMESTAMPNS, 185 | &val, sizeof(val))) { 186 | fprintf(stderr, "Failed to set SO_TIMESTAMPNS socket option\n"); 187 | return 1; 188 | } 189 | 190 | return 0; 191 | } 192 | 193 | /* copied from linuxptp */ 194 | 195 | #include 196 | #include 197 | 198 | #define CLOCKFD 3 199 | #define FD_TO_CLOCKID(fd) ((clockid_t) ((((unsigned int) ~fd) << 3) | CLOCKFD)) 200 | 201 | static inline int clock_adjtime(clockid_t id, struct timex *tx) 202 | { 203 | return syscall(__NR_clock_adjtime, id, tx); 204 | } 205 | 206 | clockid_t phc_open(const char *phc) 207 | { 208 | struct timex tx = {}; 209 | struct timespec ts; 210 | clockid_t clkid; 211 | int fd; 212 | 213 | fd = open(phc, O_RDWR); 214 | if (fd < 0) 215 | return CLOCK_INVALID; 216 | 217 | clkid = FD_TO_CLOCKID(fd); 218 | /* check if clkid is valid */ 219 | if (clock_gettime(clkid, &ts)) { 220 | close(fd); 221 | return CLOCK_INVALID; 222 | } 223 | 224 | if (clock_adjtime(clkid, &tx)) { 225 | close(fd); 226 | return CLOCK_INVALID; 227 | } 228 | 229 | return clkid; 230 | } 231 | 232 | /* end copied from linuxptp */ 233 | -------------------------------------------------------------------------------- /src/timestamps.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef __TIMESTAMPS_H 3 | #define __TIMESTAMPS_H 4 | 5 | #include 6 | 7 | #define USEC_PER_SEC 1000000ULL 8 | #define NSEC_PER_SEC 1000000000ULL 9 | #define NSEC_PER_MSEC 1000000ULL 10 | #define NSEC_PER_USEC 1000ULL 11 | 12 | #define CLOCK_INVALID -1 13 | 14 | static inline unsigned long long ts_to_ull(struct timespec *ts) 15 | { 16 | return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; 17 | } 18 | 19 | static inline struct timeval ns_to_timeval(const __s64 nsec) 20 | { 21 | struct timeval tv; 22 | 23 | tv.tv_sec = nsec / NSEC_PER_SEC; 24 | tv.tv_usec = (nsec % NSEC_PER_SEC) / NSEC_PER_USEC; 25 | 26 | return tv; 27 | } 28 | 29 | static inline void print_time_msec(unsigned long long nsecs, int width) 30 | { 31 | unsigned long msecs; 32 | unsigned long usecs; 33 | 34 | msecs = nsecs / NSEC_PER_MSEC; 35 | nsecs -= msecs * NSEC_PER_MSEC; 36 | usecs = nsecs / NSEC_PER_USEC; 37 | printf(" %*lu.%03lu", width, msecs, usecs); 38 | } 39 | 40 | /* print nanosecond timestamp as sec.usec */ 41 | static inline void print_time_usecs(unsigned long long nsecs) 42 | { 43 | unsigned long secs, usecs; 44 | 45 | secs = nsecs / NSEC_PER_SEC; 46 | nsecs -= secs * NSEC_PER_SEC; 47 | usecs = nsecs / NSEC_PER_USEC; 48 | printf("%lu.%06lu", secs, usecs); 49 | } 50 | 51 | int set_reftime(void); 52 | char *timestamp(char *buf, int len, __u64 stime); 53 | char *timestamp_tv(const struct timeval *tv, char *buf, int len); 54 | 55 | int enable_sw_tstamp(void); 56 | int enable_hw_tstamp(const char *dev); 57 | int disable_hw_tstamp(const char *dev); 58 | 59 | clockid_t phc_open(const char *phc); 60 | __u64 get_time_ns(clockid_t clk); 61 | #endif 62 | -------------------------------------------------------------------------------- /src/tp_verify.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "tp_verify.h" 11 | #include "str_utils.h" 12 | 13 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) 14 | 15 | int tp_validate_context(char *sys_name, char *tp_name, 16 | struct tp_ctx *ctx, unsigned int ctx_entries) 17 | { 18 | unsigned int lineno = 0; 19 | int n, i = 0, rc = -1; 20 | char buf[PATH_MAX]; 21 | FILE *fp; 22 | 23 | n = snprintf(buf, sizeof(buf) - 1, 24 | "/sys/kernel/debug/tracing/events/%s/%s/format", 25 | sys_name, tp_name); 26 | buf[n] = '\0'; 27 | 28 | fp = fopen(buf, "r"); 29 | if (!fp) { 30 | fprintf(stderr, "Failed to open %s: %s: %d\n", 31 | buf, strerror(errno), errno); 32 | return -1; 33 | } 34 | 35 | while(fgets(buf, sizeof(buf), fp)) { 36 | unsigned short offs, sz; 37 | char *fields[10], *b; 38 | 39 | lineno++; 40 | 41 | /* remove '[' to ']' in names such as 42 | * "name[sizeof(struct blah)];" 43 | */ 44 | b = strchr(buf, '['); 45 | if (b) { 46 | while(*b != ';') { 47 | *b = ' '; 48 | b++; 49 | } 50 | } 51 | 52 | /* only care about lines with field:, offset: and size: */ 53 | 54 | n = parsestr(buf, "\n\t :;", fields, ARRAY_SIZE(fields)); 55 | if (n != 9) 56 | continue; 57 | 58 | if (strcmp(fields[0], "field") || 59 | strcmp(fields[3], "offset") || 60 | strcmp(fields[5], "size")) 61 | continue; 62 | 63 | 64 | if (str_to_ushort(fields[4], &offs) || 65 | str_to_ushort(fields[6], &sz)) { 66 | fprintf(stderr, "Line %d: Failed to convert offset or size\n", 67 | lineno); 68 | goto out; 69 | } 70 | 71 | /* skipping common lines */ 72 | if (offs < 8) 73 | continue; 74 | 75 | if (i >= ctx_entries) { 76 | fprintf(stderr, "Tracepoint %s:%s has new fields after %s\n", 77 | sys_name, tp_name, fields[2]); 78 | break; 79 | } 80 | 81 | if (strcmp(fields[2], ctx[i].tp_field)) { 82 | fprintf(stderr, 83 | "Line %d: Unexpected field name: expected \"%s\" have \"%s\"\n", 84 | lineno, ctx[i].tp_field, fields[2]); 85 | goto out; 86 | } 87 | 88 | if (ctx[i].tp_offset != offs) { 89 | fprintf(stderr, 90 | "Line %d: Field %s has unexpected offset: expected %d, have %d\n", 91 | lineno, fields[2], ctx[i].tp_offset, offs); 92 | goto out; 93 | } 94 | 95 | if (ctx[i].tp_size != sz) { 96 | fprintf(stderr, 97 | "Line %d: Field %s has unexpected size: expected %d, have %d\n", 98 | lineno, fields[2], ctx[i].tp_size, sz); 99 | goto out; 100 | } 101 | 102 | //printf("Match: line %d: field %s -> %s offset %d -> %d size %d -> %d\n", 103 | // lineno, ctx[i].tp_field, fields[2], ctx[i].tp_offset, 104 | // offs, ctx[i].tp_size, sz); 105 | i++; 106 | } 107 | 108 | if (i == ctx_entries) { 109 | rc = 0; 110 | printf("TP matches\n"); 111 | } else 112 | fprintf(stderr, "Tracepoint %s:%s has chopped fields\n", 113 | sys_name, tp_name); 114 | out: 115 | fclose(fp); 116 | return rc; 117 | } 118 | -------------------------------------------------------------------------------- /src/vm_info.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Manage VM info map 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "vm_info.h" 22 | #include "str_utils.h" 23 | #include "libbpf_helpers.h" 24 | 25 | static int show_entries(int fd, bool cli_arg) 26 | { 27 | __u32 *key, *prev_key = NULL; 28 | struct bpf_map_info info = {}; 29 | struct vm_info val; 30 | char buf[IFNAMSIZ]; 31 | char v4str[64]; 32 | char v6str[64]; 33 | int err, i; 34 | __u32 len; 35 | 36 | len = sizeof(info); 37 | if (bpf_obj_get_info_by_fd(fd, &info, &len)) { 38 | fprintf(stderr, "Failed to get map info: %s: %d", 39 | strerror(errno), errno); 40 | return 1; 41 | } 42 | 43 | if (info.type != BPF_MAP_TYPE_HASH || 44 | info.key_size != sizeof(__u32) || 45 | info.value_size != sizeof(struct vm_info)) { 46 | fprintf(stderr, "Incompatible map\n"); 47 | return 1; 48 | } 49 | 50 | key = calloc(1, sizeof(*key)); 51 | if (!key) { 52 | fprintf(stderr, "Failed to allocate memory for key\n"); 53 | return 1; 54 | } 55 | 56 | for (i = 0; ; ++i) { 57 | err = bpf_map_get_next_key(fd, prev_key, key); 58 | if (err) { 59 | if (errno == ENOENT) 60 | err = 0; 61 | break; 62 | } 63 | 64 | memset(&val, 0, sizeof(val)); 65 | if (bpf_map_lookup_elem(fd, key, &val)) 66 | goto next_key; 67 | 68 | 69 | if (if_indextoname(*key, buf) == NULL) { 70 | fprintf(stderr, "WARNING: stale device index\n"); 71 | snprintf(buf, IFNAMSIZ, "-"); 72 | } 73 | 74 | inet_ntop(AF_INET, &val.v4addr, v4str, sizeof(v4str)); 75 | inet_ntop(AF_INET6, &val.v6addr, v6str, sizeof(v6str)); 76 | 77 | if (cli_arg) { 78 | printf(" -i %u -d %u -m ", val.vmid, *key); 79 | print_mac(val.mac, false); 80 | if (val.vlan_TCI) 81 | printf(" -v %u", ntohs(val.vlan_TCI)); 82 | printf(" -4 %s -6 %s\n", v4str, v6str); 83 | } else { 84 | printf(" device key %u / %s vm %u mac ", 85 | *key, buf, val.vmid); 86 | print_mac(val.mac, false); 87 | if (val.vlan_TCI) 88 | printf(" vlan %u", ntohs(val.vlan_TCI)); 89 | printf(" v4 %s v6 %s\n", v4str, v6str); 90 | } 91 | next_key: 92 | prev_key = key; 93 | } 94 | 95 | free(key); 96 | return err; 97 | } 98 | 99 | static int remove_entry(int fd, __u32 idx) 100 | { 101 | int rc; 102 | 103 | rc = bpf_map_delete_elem(fd, &idx); 104 | if (rc) 105 | fprintf(stderr, "Failed to delete VM entry\n"); 106 | 107 | return rc; 108 | } 109 | 110 | static int parse_v6_addr(const char *arg, struct in6_addr *addr) 111 | { 112 | struct in6_addr any_in6 = {}; 113 | 114 | if (*arg == '\0') 115 | return -1; 116 | 117 | if (inet_pton(AF_INET6, arg, addr) == 0 || 118 | memcmp(addr, &any_in6, sizeof(any_in6)) == 0) { 119 | fprintf(stderr, "Invalid IPv6 address\n"); 120 | return -1; 121 | } 122 | 123 | return 0; 124 | } 125 | 126 | static int parse_v4_addr(const char *arg, __u32 *addr) 127 | { 128 | struct in_addr in; 129 | 130 | if (*arg == '\0') 131 | return -1; 132 | 133 | if (inet_pton(AF_INET, arg, &in) == 0 || 134 | in.s_addr == 0) { 135 | fprintf(stderr, "Invalid IPv4 address\n"); 136 | return -1; 137 | } 138 | 139 | *addr = in.s_addr; 140 | 141 | return 0; 142 | } 143 | 144 | static void usage(const char *prog) 145 | { 146 | fprintf(stderr, 147 | "usage: %s [OPTS]\n" 148 | "\nOPTS:\n" 149 | " -I id VM info map id (default: by name vm_info_map)\n" 150 | " -i id VM id\n" 151 | " -4 addr IPv4 network address for VM\n" 152 | " -6 addr IPv6 network address for VM\n" 153 | " -m mac mac address for VM\n" 154 | " -d device tap device for VM\n" 155 | " -v vlan egress vlan tci\n" 156 | " -r remove entry (only device arg needed)\n" 157 | " -P print map entries\n" 158 | , prog); 159 | } 160 | 161 | int main(int argc, char **argv) 162 | { 163 | __u32 map_id = 0, ifindex = 0; 164 | bool print_entries = false; 165 | bool cli_arg = false; 166 | struct vm_info vi = {}; 167 | bool delete = false; 168 | unsigned long tmp; 169 | int fd, opt, ret; 170 | 171 | while ((opt = getopt(argc, argv, ":I:i:4:6:m:v:d:rPC")) != -1) { 172 | switch (opt) { 173 | case 'I': 174 | if (str_to_ulong(optarg, &tmp)) { 175 | fprintf(stderr, "Invalid map id\n"); 176 | return 1; 177 | } 178 | map_id = (__u32)tmp; 179 | break; 180 | case 'i': 181 | if (str_to_ulong(optarg, &tmp)) { 182 | fprintf(stderr, "Invalid map id\n"); 183 | return 1; 184 | } 185 | vi.vmid = (__u32)tmp; 186 | break; 187 | case '4': 188 | if (parse_v4_addr(optarg, &vi.v4addr)) { 189 | fprintf(stderr, "Invalid IPv4 address\n"); 190 | return 1; 191 | } 192 | break; 193 | case '6': 194 | if (parse_v6_addr(optarg, &vi.v6addr)) { 195 | fprintf(stderr, "Invalid IPv4 address\n"); 196 | return 1; 197 | } 198 | break; 199 | case 'm': 200 | if (str_to_mac(optarg, vi.mac)) { 201 | fprintf(stderr, "Invalid mac address\n"); 202 | return 1; 203 | } 204 | break; 205 | case 'v': 206 | if (str_to_int(optarg, 1, 4095, &ret)) { 207 | fprintf(stderr, "Invalid vlan\n"); 208 | return 1; 209 | } 210 | vi.vlan_TCI = htons(ret); 211 | break; 212 | case 'd': 213 | ifindex = if_nametoindex(optarg); 214 | if (!ifindex) { 215 | if (str_to_int(optarg, 0, INT_MAX, &ret)) { 216 | fprintf(stderr, "Invalid device\n"); 217 | return 1; 218 | } 219 | ifindex = (__u32)ret; 220 | } 221 | break; 222 | case 'r': 223 | delete = true; 224 | break; 225 | case 'C': 226 | cli_arg = true; 227 | /* fallthrough */ 228 | case 'P': 229 | print_entries = true; 230 | break; 231 | default: 232 | usage(basename(argv[0])); 233 | return 1; 234 | } 235 | } 236 | 237 | if (map_id) { 238 | fd = bpf_map_get_fd_by_id(map_id); 239 | if (fd < 0) { 240 | fprintf(stderr, 241 | "Failed to get fd for fdb map id, %u: %s: %d\n", 242 | map_id, strerror(errno), errno); 243 | return 1; 244 | } 245 | } else { 246 | fd = bpf_map_get_fd_by_name("vm_info_map"); 247 | if (fd < 0) { 248 | fprintf(stderr, "Failed to get fd for vm_info map: %s: %d\n", 249 | strerror(errno), errno); 250 | return 1; 251 | } 252 | } 253 | 254 | if (print_entries) 255 | return show_entries(fd, cli_arg); 256 | 257 | if (!ifindex) { 258 | fprintf(stderr, "Device index required\n"); 259 | return 1; 260 | } 261 | 262 | if (delete) 263 | return remove_entry(fd, ifindex); 264 | 265 | if (!vi.vmid) { 266 | fprintf(stderr, "VM id required\n"); 267 | return 1; 268 | } 269 | 270 | /* add device to port map and then add fdb entry */ 271 | ret = bpf_map_update_elem(fd, &ifindex, &vi, BPF_ANY); 272 | if (ret) { 273 | fprintf(stderr, "Failed to add VM entry: %s: %d\n", 274 | strerror(errno), errno); 275 | return ret; 276 | } 277 | 278 | return 0; 279 | } 280 | -------------------------------------------------------------------------------- /src/xdp_devmap_xmit.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Analyze latency of the OVS. 3 | * 4 | * Copyright (c) 2020 David Ahern 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "xdp_devmap_xmit.h" 19 | #include "libbpf_helpers.h" 20 | #include "perf_events.h" 21 | #include "timestamps.h" 22 | 23 | struct data { 24 | __u64 time; 25 | __u32 cpu; 26 | }; 27 | 28 | static bool done; 29 | 30 | static void dump_buckets(__u64 *buckets, __u64 *prev_buckets) 31 | { 32 | __u64 diff[DEVMAP_BUCKETS]; 33 | char buf[64]; 34 | int i; 35 | 36 | /* get difference between samples and save 37 | * new sample as old 38 | */ 39 | for (i = 0; i < DEVMAP_BUCKETS; ++i) { 40 | diff[i] = buckets[i] - prev_buckets[i]; 41 | prev_buckets[i] = buckets[i]; 42 | } 43 | 44 | printf("%s: ", timestamp(buf, sizeof(buf), 0)); 45 | printf("Batching per xdp devmap xmit\n"); 46 | printf(" 0: %'8llu\n", diff[0]); 47 | printf(" 1: %'8llu\n", diff[1]); 48 | printf(" 2: %'8llu\n", diff[2]); 49 | printf(" 3-4: %'8llu\n", diff[3]); 50 | printf(" 5-8: %'8llu\n", diff[4]); 51 | printf(" 9-15: %'8llu\n", diff[5]); 52 | printf(" 16: %'8llu\n", diff[6]); 53 | printf(" 17-32: %'8llu\n", diff[7]); 54 | printf(" 33-63: %'8llu\n", diff[8]); 55 | printf(" 64: %'8llu\n", diff[9]); 56 | } 57 | 58 | static int devmap_xmit_dump_hist(int hist_map_fd) 59 | { 60 | static __u64 prev_buckets[DEVMAP_BUCKETS]; 61 | struct devmap_xmit_hist val; 62 | __u32 idx = 0; 63 | 64 | if (bpf_map_lookup_elem(hist_map_fd, &idx, &val)) { 65 | fprintf(stderr, "Failed to get hist values\n"); 66 | return 1; 67 | } 68 | 69 | dump_buckets(val.buckets, prev_buckets); 70 | printf("\n"); 71 | 72 | return 0; 73 | } 74 | 75 | static void sig_handler(int signo) 76 | { 77 | printf("Terminating by signal %d\n", signo); 78 | done = true; 79 | } 80 | 81 | static void print_usage(char *prog) 82 | { 83 | printf( 84 | "usage: %s OPTS\n\n" 85 | " -f bpf-file bpf filename to load\n" 86 | " -t rate time rate (seconds) to dump stats\n" 87 | , basename(prog)); 88 | } 89 | 90 | int main(int argc, char **argv) 91 | { 92 | struct bpf_prog_load_attr prog_load_attr = { }; 93 | const char *tps[] = { 94 | "xdp/xdp_devmap_xmit", 95 | NULL 96 | }; 97 | struct devmap_xmit_hist hist = {}; 98 | char *objfile = "xdp_devmap_xmit.o"; 99 | bool filename_set = false; 100 | struct bpf_object *obj; 101 | int display_rate = 10; 102 | struct bpf_map *map; 103 | int hist_map_fd; 104 | __u32 idx = 0; 105 | int rc, tmp; 106 | 107 | while ((rc = getopt(argc, argv, "f:t:")) != -1) 108 | { 109 | switch(rc) { 110 | case 'f': 111 | objfile = optarg; 112 | filename_set = true; 113 | break; 114 | case 't': 115 | tmp = atoi(optarg); 116 | if (!tmp) { 117 | fprintf(stderr, "Invalid display rate\n"); 118 | return 1; 119 | } 120 | display_rate = tmp; 121 | break; 122 | default: 123 | print_usage(argv[0]); 124 | return 1; 125 | } 126 | } 127 | 128 | if (signal(SIGINT, sig_handler) || 129 | signal(SIGHUP, sig_handler) || 130 | signal(SIGTERM, sig_handler)) { 131 | perror("signal"); 132 | return 1; 133 | } 134 | 135 | setlinebuf(stdout); 136 | setlinebuf(stderr); 137 | setlocale(LC_NUMERIC, "en_US.utf-8"); 138 | 139 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 140 | return 1; 141 | 142 | map = bpf_object__find_map_by_name(obj, "devmap_xmit_map"); 143 | if (!map) { 144 | printf("Failed to get histogram map in obj file\n"); 145 | return 1; 146 | } 147 | hist_map_fd = bpf_map__fd(map); 148 | 149 | /* make sure index 0 entry exists */ 150 | bpf_map_update_elem(hist_map_fd, &idx, &hist, BPF_ANY); 151 | 152 | if (configure_tracepoints(obj, tps)) 153 | return 1; 154 | 155 | while (!done) { 156 | sleep(display_rate); 157 | if (devmap_xmit_dump_hist(hist_map_fd)) 158 | break; 159 | } 160 | 161 | return 0; 162 | } 163 | -------------------------------------------------------------------------------- /src/xdp_dummy_user.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include "libbpf_helpers.h" 19 | 20 | static void usage(const char *prog) 21 | { 22 | fprintf(stderr, 23 | "usage: %s [OPTS] interface-list\n" 24 | "\nOPTS:\n" 25 | " -d detach program\n" 26 | " -f bpf-file bpf filename to load\n" 27 | " -g skb mode\n" 28 | , prog); 29 | } 30 | 31 | int main(int argc, char **argv) 32 | { 33 | int (*attach_fn)(int idx, int prog_fd, const char *dev) = attach_to_dev; 34 | int (*detach_fn)(int idx, const char *dev) = detach_from_dev; 35 | struct bpf_prog_load_attr prog_load_attr = { }; 36 | const char *objfile = "xdp_dummy_kern.o"; 37 | const char *pname = "xdp_dummy"; 38 | bool filename_set = false; 39 | struct bpf_program *prog; 40 | struct bpf_object *obj; 41 | int opt, i, prog_fd; 42 | bool attach = true; 43 | int ret = 0; 44 | 45 | while ((opt = getopt(argc, argv, ":df:g")) != -1) { 46 | switch (opt) { 47 | case 'f': 48 | objfile = optarg; 49 | filename_set = true; 50 | break; 51 | case 'd': 52 | attach = false; 53 | break; 54 | case 'g': 55 | attach_fn = attach_to_dev_generic; 56 | detach_fn = detach_from_dev_generic; 57 | break; 58 | default: 59 | usage(basename(argv[0])); 60 | return 1; 61 | } 62 | } 63 | 64 | if (optind == argc) { 65 | usage(basename(argv[0])); 66 | return 1; 67 | } 68 | 69 | if (!attach) { 70 | for (i = optind; i < argc; ++i) { 71 | int idx, err; 72 | 73 | idx = if_nametoindex(argv[i]); 74 | if (!idx) 75 | idx = strtoul(argv[i], NULL, 0); 76 | 77 | if (!idx) { 78 | fprintf(stderr, "Invalid device argument\n"); 79 | return 1; 80 | } 81 | err = detach_fn(idx, argv[i]); 82 | if (err) 83 | ret = err; 84 | } 85 | return ret; 86 | } 87 | 88 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 89 | return 1; 90 | 91 | prog = bpf_object__find_program_by_title(obj, pname); 92 | prog_fd = bpf_program__fd(prog); 93 | if (prog_fd < 0) { 94 | printf("program not found: %s\n", strerror(prog_fd)); 95 | return 1; 96 | } 97 | 98 | for (i = optind; i < argc; ++i) { 99 | int idx, err; 100 | 101 | idx = if_nametoindex(argv[i]); 102 | if (!idx) 103 | idx = strtoul(argv[i], NULL, 0); 104 | 105 | if (!idx) { 106 | fprintf(stderr, "Invalid device argument\n"); 107 | return 1; 108 | } 109 | 110 | err = attach_fn(idx, prog_fd, argv[i]); 111 | if (err) 112 | ret = err; 113 | } 114 | 115 | return ret; 116 | } 117 | -------------------------------------------------------------------------------- /src/xdp_l3fwd_user.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Example using ebpf at XDP layer for Layer 3 forwarding. 3 | * 4 | * Copyright (c) 2017-2020 David Ahern 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include "libbpf_helpers.h" 24 | #include "str_utils.h" 25 | 26 | static int do_attach(int idx, int prog_fd, int map_fd, const char *name) 27 | { 28 | int err; 29 | 30 | err = attach_to_dev(idx, prog_fd, name); 31 | if (err < 0) 32 | return err; 33 | 34 | /* Adding ifindex as a possible egress TX port */ 35 | err = bpf_map_update_elem(map_fd, &idx, &idx, 0); 36 | if (err) 37 | printf("ERROR: failed using device %s as TX-port\n", name); 38 | 39 | return err; 40 | } 41 | 42 | static void usage(const char *prog) 43 | { 44 | fprintf(stderr, 45 | "usage: %s [OPTS] interface-list\n" 46 | "\nOPTS:\n" 47 | " -f bpf-file bpf filename to load\n" 48 | " -d detach program\n" 49 | " -D direct table lookups (skip fib rules)\n", 50 | prog); 51 | } 52 | 53 | int main(int argc, char **argv) 54 | { 55 | struct bpf_prog_load_attr prog_load_attr = { 56 | .prog_type = BPF_PROG_TYPE_XDP, 57 | }; 58 | const char *objfile = "xdp_l3fwd_kern.o"; 59 | const char *prog_name = "xdp_l3fwd"; 60 | bool filename_set = false; 61 | struct bpf_program *prog; 62 | int prog_fd, map_fd = -1; 63 | struct bpf_object *obj; 64 | int opt, i, idx, err; 65 | bool attach = true; 66 | int ret = 0; 67 | 68 | while ((opt = getopt(argc, argv, ":dDf:")) != -1) { 69 | switch (opt) { 70 | case 'f': 71 | objfile = optarg; 72 | filename_set = true; 73 | break; 74 | case 'd': 75 | attach = false; 76 | break; 77 | case 'D': 78 | prog_name = "xdp_l3fwd_direct"; 79 | break; 80 | default: 81 | usage(basename(argv[0])); 82 | return 1; 83 | } 84 | } 85 | 86 | if (optind == argc) { 87 | usage(basename(argv[0])); 88 | return 1; 89 | } 90 | 91 | if (attach) { 92 | if (load_obj_file(&prog_load_attr, &obj, objfile, filename_set)) 93 | return 1; 94 | 95 | prog = bpf_object__find_program_by_title(obj, prog_name); 96 | prog_fd = bpf_program__fd(prog); 97 | if (prog_fd < 0) { 98 | printf("program not found: %s\n", strerror(prog_fd)); 99 | return 1; 100 | } 101 | map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, 102 | "xdp_tx_ports")); 103 | if (map_fd < 0) { 104 | printf("map not found: %s\n", strerror(map_fd)); 105 | return 1; 106 | } 107 | } 108 | 109 | for (i = optind; i < argc; ++i) { 110 | idx = get_ifidx(argv[i]); 111 | if (!idx) { 112 | fprintf(stderr, "Invalid arg\n"); 113 | return 1; 114 | } 115 | if (attach) 116 | err = do_attach(idx, prog_fd, map_fd, argv[i]); 117 | else 118 | err = detach_from_dev(idx, argv[i]); 119 | 120 | if (err) 121 | ret = err; 122 | } 123 | 124 | return ret; 125 | } 126 | -------------------------------------------------------------------------------- /utils/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -O2 -g -Wall 3 | 4 | PROGS := socktest 5 | 6 | all: $(PROGS) 7 | 8 | %: %.c 9 | $(CC) $(INCLUDES) $(DEFS) $(CFLAGS) $^ -o $@ $(LIBS) 10 | 11 | clean: 12 | @rm -f $(PROGS) 13 | -------------------------------------------------------------------------------- /utils/build-deb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dpkg -l | grep -q debhelper 4 | if [ $? -ne 0 ] 5 | then 6 | echo "debhelper package not installed" 7 | exit 1 8 | fi 9 | 10 | set -e 11 | set -x 12 | 13 | KERNEL_VER=$1 14 | [ -z "${KERNEL_VER}" ] && KERNEL_VER=$(uname -r) 15 | 16 | # When linux-headers is installed 17 | KERNDIR=/usr/src/linux-headers-$KERNEL_VER 18 | if [ ! -e $KERNDIR -a -e /lib/modules/${KERNEL_VER}/build ]; then 19 | # When the prebuilt sources are installed 20 | KERNDIR=/build/linux 21 | elif [ ! -e $KERNDIR ]; then 22 | echo error: $KERNDIR does not exist 23 | exit 1 24 | fi 25 | 26 | DEBNAME=dsa-bpf-progs 27 | PACKAGE_NAME=dsa-bpf-progs 28 | PACKAGE_VERSION=1.0 29 | PACKAGE_REVISION=$(git rev-parse --short HEAD) 30 | PACKAGE_REVISION="1+dsa~${PACKAGE_REVISION}" 31 | 32 | SRCDIR=$(pwd) 33 | BUILDDIR=${SRCDIR}/build 34 | 35 | # 36 | # Create debian packaging 37 | # 38 | rm -rf $BUILDDIR 39 | mkdir -p $BUILDDIR/debian 40 | mkdir -p $BUILDDIR/debian/build 41 | 42 | echo 10 > $BUILDDIR/debian/compat 43 | 44 | DATE=$(date '+%a, %d %b %Y %T %z') 45 | 46 | cat > $BUILDDIR/debian/changelog < $DATE 52 | EOF 53 | 54 | cat > $BUILDDIR/debian/control < 59 | Build-Depends: debhelper (>= 5.0.37), 60 | libpcap-dev, 61 | clang, 62 | llvm, 63 | gcc, 64 | make, 65 | libelf-dev 66 | Standards-Version: 3.7.3 67 | 68 | Package: $DEBNAME 69 | Architecture: any 70 | Provides: $DEBNAME 71 | Description: $PACKAGE_NAME userspace 72 | This package contains the $PACKAGE_NAME userspace commands. 73 | 74 | Package: $DEBNAME-$KERNEL_VER 75 | Architecture: any 76 | Provides: $DEBNAME 77 | Description: $PACKAGE_NAME bpf modules 78 | This package contains the $PACKAGE_NAME bpf modules for 79 | the kernel-image-$KERNEL_VER package. 80 | . 81 | If you compiled a custom kernel, you will most likely need to compile 82 | a custom version of this module as well. 83 | EOF 84 | 85 | cat > $BUILDDIR/debian/rules <> $BUILDDIR/debian/rules < $BUILDDIR/debian/$DEBNAME.install 121 | echo "usr/lib/bpf-obj/*/${KVERS}/${PACKAGE_VERSION}-${PACKAGE_REVISION}" > $BUILDDIR/debian/$DEBNAME-$KERNEL_VER.install 122 | 123 | chmod a+x $BUILDDIR/debian/rules 124 | 125 | # FIXME: Create debian/copyright? 126 | 127 | # Build .deb package 128 | (cd $BUILDDIR; fakeroot debian/rules all) 129 | -------------------------------------------------------------------------------- /utils/socktest.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) 8 | { 9 | char name[32]; 10 | socklen_t optlen = sizeof(name); 11 | struct sockaddr_in laddr = { 12 | .sin_family = AF_INET, 13 | .sin_port = htons(12345), 14 | .sin_addr.s_addr = htonl(INADDR_ANY), 15 | }; 16 | unsigned int mark; 17 | int sd; 18 | 19 | sd = socket(AF_INET, SOCK_STREAM, 0); 20 | if (sd < 0) { 21 | perror("socket"); 22 | return 1; 23 | } 24 | 25 | name[0] = '\0'; 26 | if (getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen) < 0) 27 | perror("setsockopt(SO_BINDTODEVICE)"); 28 | else 29 | printf("socket bound to dev %s\n", name); 30 | 31 | optlen = sizeof(mark); 32 | if (getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen) < 0) 33 | perror("setsockopt(SO_BINDTODEVICE)"); 34 | else 35 | printf("socket mark %u\n", mark); 36 | 37 | 38 | if (bind(sd, (struct sockaddr *) &laddr, sizeof(laddr)) < 0) { 39 | perror("bind"); 40 | return 1; 41 | } 42 | if (listen(sd, 1) < 0) { 43 | perror("listen"); 44 | return 1; 45 | } 46 | pause(); 47 | close (sd); 48 | 49 | return 0; 50 | } 51 | --------------------------------------------------------------------------------