├── .gitignore
├── Makefile
├── Readme.md
├── TestCodes
    ├── compile.sh
    ├── icmp.c
    ├── mac.c
    ├── rmi.c
    ├── rmi.h
    ├── route.c
    ├── route_dump.c
    ├── routemac
    └── xlb_test.c
├── bpf_helpers.h
├── bpf_load.c
├── bpf_load.h
├── bpf_util.h
├── icmp.c
├── libbpf.h
├── mac.c
├── perf-sys.h
├── rmi.h
├── route.c
├── tools
    ├── include
    │   ├── linux
    │   │   └── bpf.h
    │   └── uapi
    │   │   └── linux
    │   │       ├── bpf.h
    │   │       └── bpf_common.h
    └── lib
    │   └── bpf
    │       ├── bpf.c
    │       ├── bpf.h
    │       ├── libbpf.c
    │       └── libbpf.h
├── xlb.sh
├── xlb.sh2
├── xlb.sh3
├── xlb.sh4
├── xlb_cmdline.c
├── xlb_common.h
├── xlb_kern.c
├── xlb_user.c
├── xlb_util.c
├── xlb_util.h
├── xlbd.c
└── xlbd.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.o
 3 | *ll
 4 | map
 5 | map_cmdline
 6 | xlb
 7 | xlb_cmdline
 8 | xdp_ddos01_blacklist
 9 | xdp_ddos01_blacklist_cmdline
10 | xdp_tx_iptunnel
11 | xlb_test
12 | rmi
13 | a.out
14 | xlbd
15 | 
16 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | #
  2 | TARGETS := xlb
  3 | 
  4 | MANAGEMENT_DAEMON := xlbd
  5 | 
  6 | CMDLINE_TOOLS := xlb_cmdline
  7 | COMMON_H      =  ${CMDLINE_TOOLS:_cmdline=_common.h}
  8 | 
  9 | RMI_SOURCES := route.c
 10 | RMI_SOURCES += icmp.c
 11 | RMI_SOURCES += mac.c
 12 | RMI_SOURCES += xlb_util.c
 13 | RMI_OBJECTS = ${RMI_SOURCES:.c=.o}
 14 | 
 15 | # Generate file name-scheme based on TARGETS
 16 | KERN_SOURCES = ${TARGETS:=_kern.c}
 17 | USER_SOURCES = ${TARGETS:=_user.c}
 18 | KERN_OBJECTS = ${KERN_SOURCES:.c=.o}
 19 | USER_OBJECTS = ${USER_SOURCES:.c=.o}
 20 | 
 21 | # Notice: the kbuilddir can be redefined on make cmdline
 22 | kbuilddir ?= /lib/modules/$(shell uname -r)/build/
 23 | KERNEL=$(kbuilddir)
 24 | 
 25 | CFLAGS := -g -O2 -Wall
 26 | 
 27 | # Local copy of kernel/tools/lib/
 28 | #CFLAGS += -I./tools/lib
 29 | CFLAGS += -I$(KERNEL)/tools/lib
 30 | #
 31 | # Local copy of uapi/linux/bpf.h kept under ./tools/include
 32 | # needed due to enum dependency in bpf_helpers.h
 33 | #CFLAGS += -I./tools/include
 34 | # For building libbpf there is a lot of kernel includes in tools/include/
 35 | CFLAGS += -I$(KERNEL)/tools/include
 36 | CFLAGS += -I$(KERNEL)/tools/perf
 37 | CFLAGS += -I$(KERNEL)/usr/include
 38 | # Strange dependency to "selftests" due to "bpf_util.h"
 39 | #CFLAGS += -I$(KERNEL)/tools/testing/selftests/bpf/
 40 | 
 41 | LDFLAGS= -lelf
 42 | 
 43 | # Objects that xxx_user program is linked with:
 44 | OBJECT_BPF_SYSCALLS  = tools/lib/bpf/bpf.o
 45 | OBJECT_LOADBPF = bpf_load.o
 46 | OBJECTS = $(OBJECT_BPF_SYSCALLS) $(OBJECT_LOADBPF)
 47 | #
 48 | # The tools/lib/bpf/libbpf is avail via a library
 49 | OBJECT_BPF_LIBBPF  = tools/lib/bpf/libbpf.o
 50 | 
 51 | # Allows pointing LLC/CLANG to another LLVM backend, redefine on cmdline:
 52 | #  make LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
 53 | LLC ?= llc
 54 | CLANG ?= clang
 55 | 
 56 | CC = gcc
 57 | 
 58 | NOSTDINC_FLAGS := -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 59 | 
 60 | # Copy of uapi/linux/bpf.h stored here:
 61 | 
 62 | LINUXINCLUDE := -I$(KERNEL)/arch/x86/include
 63 | LINUXINCLUDE += -I$(KERNEL)/arch/x86/include/generated/uapi
 64 | LINUXINCLUDE += -I$(KERNEL)/arch/x86/include/generated
 65 | LINUXINCLUDE += -I$(KERNEL)/include
 66 | LINUXINCLUDE += -I$(KERNEL)/arch/x86/include/uapi
 67 | LINUXINCLUDE += -I$(KERNEL)/include/uapi
 68 | LINUXINCLUDE += -I$(KERNEL)/include/generated/uapi
 69 | LINUXINCLUDE += -include $(KERNEL)/include/linux/kconfig.h
 70 | LINUXINCLUDE += -I$(KERNEL)/tools/lib
 71 | 
 72 | #LINUXINCLUDE += -I./tools/include/
 73 | 
 74 | #EXTRA_CFLAGS=-Werror
 75 | EXTRA_CFLAGS= -D__BPF_TRACING__
 76 | 
 77 | all: dependencies $(TARGETS) $(KERN_OBJECTS) $(CMDLINE_TOOLS) $(MANAGEMENT_DAEMON)
 78 | 
 79 | .PHONY: dependencies clean verify_cmds verify_llvm_target_bpf $(CLANG) $(LLC)
 80 | 
 81 | # Manually define dependencies to e.g. include files
 82 | napi_monitor:        napi_monitor.h
 83 | napi_monitor_kern.o: napi_monitor.h
 84 | 
 85 | clean:
 86 | 	@find . -type f \
 87 | 		\( -name '*~' \
 88 | 		-o -name '*.ll' \
 89 | 		-o -name '*.bc' \
 90 | 		-o -name 'core' \) \
 91 | 		-exec rm -vf '{}' \;
 92 | 	rm -f $(OBJECTS)
 93 | 	rm -f $(TARGETS)
 94 | 	rm -f $(KERN_OBJECTS)
 95 | 	rm -f $(USER_OBJECTS)
 96 | 	rm -f $(RMI_OBJECTS)
 97 | 	rm -f $(OBJECT_BPF_LIBBPF) libbpf.a
 98 | 
 99 | dependencies: verify_llvm_target_bpf linux-src-devel-headers
100 | 
101 | linux-src:
102 | 	@if ! test -d $(KERNEL)/; then \
103 | 		echo "ERROR: Need kernel source code to compile against" ;\
104 | 		echo "(Cannot open directory: $(KERNEL))" ;\
105 | 		exit 1; \
106 | 	else true; fi
107 | 
108 | linux-src-libbpf: linux-src
109 | 	@if ! test -d $(KERNEL)/tools/lib/bpf/; then \
110 | 		echo "ERROR: Need kernel source code to compile against" ;\
111 | 		echo "       and specifically tools/lib/bpf/ "; \
112 | 		exit 1; \
113 | 	else true; fi
114 | 
115 | linux-src-devel-headers: linux-src-libbpf
116 | 	@if ! test -d $(KERNEL)/usr/include/ ; then \
117 | 		echo -n "WARNING: Need kernel source devel headers"; \
118 | 		echo    " likely need to run:"; \
119 | 		echo "       (in kernel source dir: $(KERNEL))"; \
120 | 		echo -e "\n  make headers_install\n"; \
121 | 		true ; \
122 | 	else true; fi
123 | 
124 | verify_cmds: $(CLANG) $(LLC)
125 | 	@for TOOL in $^ ; do \
126 | 		if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
127 | 			echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
128 | 			exit 1; \
129 | 		else true; fi; \
130 | 	done
131 | 
132 | verify_llvm_target_bpf: verify_cmds
133 | 	@if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
134 | 		echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
135 | 		echo "   NOTICE: LLVM version >= 3.7.1 required" ;\
136 | 		exit 2; \
137 | 	else true; fi
138 | 
139 | # Helpers for bpf syscalls (from tools/lib/bpf/bpf.c)
140 | $(OBJECT_BPF_SYSCALLS): %.o: %.c
141 | 	$(CC) $(CFLAGS) -o $@ -c $<
142 | 
143 | $(OBJECT_LOADBPF): bpf_load.c bpf_load.h
144 | 	$(CC) $(CFLAGS) -o $@ -c $<
145 | 
146 | # ISSUE: The libbpf.a library creates a kernel source dependency, for
147 | # include files from tools/include/
148 | $(OBJECT_BPF_LIBBPF): %.o: %.c
149 | 	$(CC) $(CFLAGS) -o $@ -c $<
150 | #
151 | libbpf.a: $(OBJECT_BPF_LIBBPF) $(OBJECT_BPF_SYSCALLS)
152 | 	$(RM) $@; $(AR) rcs $@ $^
153 | 
154 | # Compiling of eBPF restricted-C code with LLVM
155 | #  clang option -S generated output file with suffix .ll
156 | #   which is the non-binary LLVM assembly language format
157 | #   (normally LLVM bitcode format .bc is generated)
158 | #
159 | # Use -Wno-address-of-packed-member as eBPF verifier enforces
160 | # unaligned access checks where necessary
161 | #
162 | $(KERN_OBJECTS): %.o: %.c bpf_helpers.h
163 | 	$(CLANG) -S $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
164 | 	    -D__KERNEL__ -D__ASM_SYSREG_H \
165 | 	    -Wall \
166 | 	    -Wno-unused-value -Wno-pointer-sign \
167 | 	    -D__TARGET_ARCH_$(ARCH) \
168 | 	    -Wno-compare-distinct-pointer-types \
169 | 	    -Wno-gnu-variable-sized-type-not-at-end \
170 | 	    -Wno-tautological-compare \
171 | 	    -Wno-unknown-warning-option \
172 | 	    -Wno-address-of-packed-member \
173 | 	    -O2 -emit-llvm -c $<
174 | 	$(LLC) -march=bpf -filetype=obj -o $@ ${@:.o=.ll}
175 | 
176 | $(TARGETS): %: %_user.c $(OBJECTS) Makefile
177 | 	$(CC) $(CFLAGS) $(OBJECTS) $(LDFLAGS) -o $@ $<
178 | 
179 | $(CMDLINE_TOOLS): %: %.c $(OBJECTS) Makefile $(COMMON_H) $(RMI_OBJECTS) rmi.h
180 | 	$(CC) -g $(CFLAGS) $(OBJECTS) $(RMI_OBJECTS) $(LDFLAGS) -o $@ $<
181 | 
182 | $(MANAGEMENT_DAEMON): %: %.c $(OBJECTS) Makefile $(COMMON_H) $(RMI_OBJECTS) rmi.h
183 | 	$(CC) -g $(CFLAGS) $(OBJECTS) $(RMI_OBJECTS) $(LDFLAGS) -o $@ $< -lyaml
184 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # XDP Loadbalancer
 2 | 
 3 | ## To load xdp bytecode
 4 | 
 5 | Load:
 6 | ```
 7 | ./xlb -i eth0 -v
 8 | ```
 9 | 
10 | Unload:
11 | ```
12 | ./xlb -i eth0 -r
13 | ```
14 | 
15 | Check to see the xdp binary is loaded
16 | ```
17 | # ip link show dev eth0
18 | 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 xdp qdisc mq state UP mode DEFAULT group default qlen 1000
19 |     link/ether 52:54:00:11:00:1b brd ff:ff:ff:ff:ff:ff
20 |     prog/xdp id 94 tag e09d47c63a72ab36 jited 
21 | ```
22 | The line, "prog/xdp id 94 tag e09d47c63a72ab36 jited" indicate that the xdp byte code is correctly hooked to the eth0.
23 | 
24 | ## Setup loadbalancer
25 | 
26 | Basic usage of the xlb_cmdline
27 | ```
28 | Usage: ./xlb_cmdline [...]
29 |     -i Interface name(eg. eth0)
30 |     -A ServiceIP(a.k.a. VIP)
31 |     -t (for TCP, optional, default)
32 |     -u (for UDP, optional)
33 |     -r WorkerIP
34 |     -v verbose
35 |     -L list lb table
36 |     -l list lbcache
37 |     -h Display this help
38 | ```
39 | 
40 | Create service
41 | ```
42 | ./xlb_cmdline -i eth0 -A 10.1.4.1 -p 80
43 | ```
44 | 
45 | Add real servers
46 | ```
47 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.24
48 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.23
49 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.22
50 | ```
51 | 
52 | Show registered services.
53 | ```
54 | ./xlb_cmdline -i eth0 -L
55 | service: 10.1.4.1:80(6) {
56 | src: 10.0.0.27, dst: 10.0.0.22 (52:54:0:11:0:16)
57 | src: 10.0.0.27, dst: 10.0.0.23 (52:54:0:11:0:17)
58 | src: 10.0.0.27, dst: 10.0.0.24 (52:54:0:11:0:18)
59 | }
60 | ```
61 | 
62 | Delete real servers
63 | ```
64 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
65 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.23
66 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.24
67 | ```
68 | 
69 | Delete service
70 | ```
71 | ./xlb_cmdline -i eth0 -D 10.1.4.1 -p 80
72 | ```
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/TestCodes/compile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | gcc -c -o icmp.o icmp.c
4 | gcc -c -o mac.o mac.c
5 | gcc -c -o route.o route.c
6 | gcc -c -o rmi.o rmi.c
7 | gcc -o rmi rmi.o icmp.o mac.o route.o
8 | 
9 | 


--------------------------------------------------------------------------------
/TestCodes/icmp.c:
--------------------------------------------------------------------------------
 1 | #include "rmi.h"
 2 | 
 3 | #define PACKETSIZE	64
 4 | struct packet
 5 | {
 6 | 	struct icmphdr hdr;
 7 | 	char msg[PACKETSIZE-sizeof(struct icmphdr)];
 8 | };
 9 | 
10 | int pid=-1;
11 | struct protoent *proto=NULL;
12 | 
13 | unsigned short checksum(void *b, int len)
14 | {	unsigned short *buf = b;
15 | 	unsigned int sum=0;
16 | 	unsigned short result;
17 | 
18 | 	for ( sum = 0; len > 1; len -= 2 )
19 | 		sum += *buf++;
20 | 	if ( len == 1 )
21 | 		sum += *(unsigned char*)buf;
22 | 	sum = (sum >> 16) + (sum & 0xFFFF);
23 | 	sum += (sum >> 16);
24 | 	result = ~sum;
25 | 	return result;
26 | }
27 | 
28 | void ping(struct sockaddr_in *addr)
29 | {	const int val=255;
30 | 	int i, sd, cnt=1;
31 | 	struct packet pckt;
32 | 
33 | 	sd = socket(PF_INET, SOCK_RAW, proto->p_proto);
34 | 	if ( sd < 0 )
35 | 	{
36 | 		perror("socket");
37 | 		return;
38 | 	}
39 | 	if ( setsockopt(sd, SOL_IP, IP_TTL, &val, sizeof(val)) != 0)
40 | 		perror("Set TTL option");
41 | 	if ( fcntl(sd, F_SETFL, O_NONBLOCK) != 0 )
42 | 		perror("Request nonblocking I/O");
43 | 
44 | 	if (DEBUG) printf("ICMP #%d sent.\n", cnt);
45 | 	bzero(&pckt, sizeof(pckt));
46 | 	pckt.hdr.type = ICMP_ECHO;
47 | 	pckt.hdr.un.echo.id = pid;
48 | 	for ( i = 0; i < sizeof(pckt.msg)-1; i++ )
49 | 	  pckt.msg[i] = i+'0';
50 | 	pckt.msg[i] = 0;
51 | 	pckt.hdr.un.echo.sequence = cnt++;
52 | 	pckt.hdr.checksum = checksum(&pckt, sizeof(pckt));
53 | 	if ( sendto(sd, &pckt, sizeof(pckt), 0, (struct sockaddr*)addr, sizeof(*addr)) <= 0 )
54 | 	  perror("sendto");
55 | }
56 | 
57 | int icmp_send_1pkt(in_addr_t *dst_ip)
58 | {
59 |   struct sockaddr_in addr;
60 | 
61 |   proto = getprotobyname("ICMP");
62 |   bzero(&addr, sizeof(addr));
63 |   addr.sin_family = AF_INET;
64 |   addr.sin_port = 0;
65 |   addr.sin_addr.s_addr = *dst_ip;
66 |   ping(&addr);
67 | 
68 |   return 0;
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/TestCodes/mac.c:
--------------------------------------------------------------------------------
 1 | #include "rmi.h"
 2 | 
 3 | int xlb_get_mac(in_addr_t *host, char *mac, int *dev){
 4 | 
 5 |   int s;
 6 | 
 7 |   struct arpreq req;
 8 |   struct sockaddr_in *sin;
 9 |   static char buf[256];
10 | 
11 |   bzero((caddr_t)&req, sizeof(req));
12 | 
13 |   sin = (struct sockaddr_in *)&req.arp_pa;
14 |   sin->sin_family = AF_INET; 
15 |   sin->sin_addr.s_addr = *host;
16 | 
17 |   if((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0){
18 |     perror("socket() failed.");
19 |     exit(-1);
20 |   }
21 | 
22 |   if_indextoname(*dev, req.arp_dev);
23 |   if (DEBUG) printf("ifname= %s\n", req.arp_dev);
24 |   
25 |   if(ioctl(s, SIOCGARP, (caddr_t)&req) <0){
26 |     if(errno == ENXIO){
27 |   
28 |       icmp_send_1pkt(&sin->sin_addr.s_addr);
29 |       usleep(100000);
30 |       
31 |       if(ioctl(s, SIOCGARP, (caddr_t)&req) <0){
32 | 	if(errno == ENXIO){
33 | 	  printf("%s - no entry.\n", inet_ntop(AF_INET, host, buf, 256));
34 | 	  //	  printf("%lu - no entry.\n", *host);
35 | 	  exit(-1);
36 | 	} else {
37 | 	  perror("SIOCGARP");
38 | 	  exit(-1);
39 | 	}
40 |       }
41 | 
42 |     } else {
43 |       perror("SIOCGARP");
44 |       exit(-1);
45 |     }
46 |   }
47 |       
48 |   if(!(req.arp_flags & ATF_COM)){
49 |     printf("Could not get workers Mac address from arp cache.\n");
50 |     exit(-1);
51 |   }
52 | 
53 |   memcpy(mac, req.arp_ha.sa_data, 6);
54 |   
55 |   return(0);
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/TestCodes/rmi.c:
--------------------------------------------------------------------------------
 1 | #include "rmi.h"
 2 | 
 3 | int main(int argc, char *argv[])
 4 | {
 5 |   char ipaddr[16];
 6 |   strcpy(ipaddr, argv[1]);
 7 | 
 8 |   char mac[6];
 9 |   int dev=0;
10 | 
11 |   //  struct in_addr src_ip, nh_ip, dst_ip;
12 |   in_addr_t src_ip, nh_ip, dst_ip;
13 | 
14 |   inet_pton(AF_INET, argv[1], &dst_ip);
15 |   xlb_iproute_get(&dst_ip,&src_ip,&nh_ip, &dev);
16 | 
17 |   static char buf[256];
18 |   printf("src: %s \n", inet_ntop(AF_INET, &src_ip, buf, 256));
19 |   printf("nexthop: %s \n", inet_ntop(AF_INET, &nh_ip, buf, 256));
20 |   printf("dev: %d \n", dev);
21 | 
22 |   xlb_get_mac(&nh_ip, mac , &dev);
23 | 
24 |   char mac_txt[6] = {0};
25 |   ether_ntoa_r((struct ether_addr *)mac, mac_txt);
26 |   printf("mac: %s\n", mac_txt );
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/TestCodes/rmi.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <fcntl.h>
 5 | #include <string.h>
 6 | #include <time.h>
 7 | #include <sys/time.h>
 8 | #include <sys/socket.h>
 9 | #include <netinet/in.h>
10 | #include <netinet/ip.h>
11 | #include <arpa/inet.h>
12 | #include <linux/in_route.h>
13 | //#include <linux/icmpv6.h>
14 | #include <errno.h>
15 | 
16 | #include <net/if_arp.h>
17 | #include <sys/ioctl.h>
18 | 
19 | #include <linux/netlink.h>
20 | #include <linux/rtnetlink.h>
21 | #include <netinet/ether.h>
22 | 
23 | 
24 | /// icmp
25 | #include <fcntl.h>
26 | #include <errno.h>
27 | #include <sys/socket.h>
28 | #include <resolv.h>
29 | #include <netdb.h>
30 | #include <netinet/in.h>
31 | #include <netinet/ip_icmp.h>
32 | #include <strings.h>
33 | #include <net/if.h>
34 | ///
35 | 
36 | #define IFLIST_REPLY_BUFFER 8192
37 | 
38 | #ifndef DEBUG
39 | #define DEBUG 0
40 | #endif
41 | 
42 | #define NLMSG_TAIL(nmsg) \
43 |         ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
44 | 
45 | int xlb_parse_route(struct nlmsghdr *nlh, in_addr_t *src_ip, in_addr_t *nh_ip, int *dev);
46 | int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
47 |               int alen);
48 | int xlb_iproute_get(in_addr_t *dst_ip, in_addr_t *src_ip , in_addr_t *nh_ip, int *dev);
49 | int xlb_get_mac(in_addr_t *host, char *mac, int *dev);
50 | 
51 | unsigned short checksum(void *b, int len);
52 | void ping(struct sockaddr_in *addr);
53 | int icmp_send_1pkt(in_addr_t *dst_ip);
54 | 
55 | 


--------------------------------------------------------------------------------
/TestCodes/route.c:
--------------------------------------------------------------------------------
  1 | #include "rmi.h"
  2 | 
  3 | int xlb_parse_route(struct nlmsghdr *nlh, in_addr_t *src_ip, in_addr_t *nh_ip, int *dev)
  4 | {
  5 |     struct  rtmsg *route_entry;
  6 |     struct  rtattr *route_attribute; 
  7 |     int     route_attribute_len = 0;
  8 |     unsigned char    route_netmask = 0;
  9 |     unsigned char    route_protocol = 0;
 10 |     int  via = 0;
 11 |     
 12 |     route_entry = (struct rtmsg *) NLMSG_DATA(nlh);
 13 | 
 14 |     if (route_entry->rtm_table != RT_TABLE_MAIN)
 15 |       return 1;
 16 | 
 17 |     route_netmask = route_entry->rtm_dst_len;
 18 |     route_protocol = route_entry->rtm_protocol;
 19 |     route_attribute = (struct rtattr *) RTM_RTA(route_entry);
 20 |     route_attribute_len = RTM_PAYLOAD(nlh);
 21 | 
 22 |     for ( ; RTA_OK(route_attribute, route_attribute_len);		\
 23 | 	  route_attribute = RTA_NEXT(route_attribute, route_attribute_len))
 24 |       {
 25 | 
 26 |         if (route_attribute->rta_type == RTA_DST)
 27 | 	    if (via == 0)
 28 | 	      memcpy(nh_ip, RTA_DATA(route_attribute), 4);
 29 | 
 30 |         if (route_attribute->rta_type == RTA_GATEWAY)
 31 | 	  {
 32 | 	    memcpy(nh_ip, RTA_DATA(route_attribute), 4);
 33 | 	    via = 1;
 34 | 	  }
 35 | 
 36 |         if (route_attribute->rta_type == RTA_PREFSRC)
 37 | 	    memcpy(src_ip, RTA_DATA(route_attribute), 4);
 38 | 	
 39 | 	if (route_attribute->rta_type == RTA_OIF)
 40 | 	    memcpy(dev, RTA_DATA(route_attribute), sizeof(int));
 41 |       }
 42 | 
 43 |     return 0;
 44 | }
 45 | 
 46 | int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
 47 |               int alen)
 48 | {
 49 |         int len = RTA_LENGTH(alen);
 50 |         struct rtattr *rta;
 51 | 
 52 |         if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
 53 |                 fprintf(stderr,
 54 |                         "addattr_l ERROR: message exceeded bound of %d\n",
 55 |                         maxlen);
 56 |                 return -1;
 57 |         }
 58 |         rta = NLMSG_TAIL(n);
 59 |         rta->rta_type = type;
 60 |         rta->rta_len = len;
 61 |         if (alen)
 62 |                 memcpy(RTA_DATA(rta), data, alen);
 63 |         n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
 64 |         return 0;
 65 | }
 66 | 
 67 | int xlb_iproute_get(in_addr_t *dst_ip, in_addr_t *src_ip , in_addr_t *nh_ip, int *dev)
 68 | {
 69 |   struct msghdr rtnl_msg;
 70 |   struct iovec io;
 71 |   int fd;
 72 |     
 73 |   struct {
 74 |     struct nlmsghdr	n;
 75 |     struct rtmsg		r;
 76 |     char			buf[1024];
 77 |   } req;
 78 | 
 79 |   memset(&rtnl_msg, 0, sizeof(rtnl_msg));
 80 |   memset(&req, 0, sizeof(req));
 81 | 
 82 |   req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 83 |   req.n.nlmsg_flags = NLM_F_REQUEST;
 84 |   req.n.nlmsg_type = RTM_GETROUTE;
 85 |   req.r.rtm_family = AF_INET;
 86 | 
 87 | 
 88 |   addattr_l(&req.n, sizeof(req), RTA_DST, dst_ip, 4);
 89 | 	
 90 |   io.iov_base = &req;
 91 |   io.iov_len = req.n.nlmsg_len;
 92 |   rtnl_msg.msg_iov = &io;
 93 |   rtnl_msg.msg_iovlen = 1;
 94 | 
 95 |   fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
 96 |   sendmsg(fd, (struct msghdr *) &rtnl_msg, 0);
 97 | 
 98 |   /* parse reply */
 99 | 
100 |   {
101 |     struct nlmsghdr *answer;
102 |     struct msghdr rtnl_reply;
103 |     struct iovec io_reply;
104 |     char reply[IFLIST_REPLY_BUFFER];
105 | 
106 |     
107 |     memset(&io_reply, 0, sizeof(io_reply));
108 |     memset(&rtnl_reply, 0, sizeof(rtnl_reply));
109 |       
110 |     io.iov_base = reply;
111 |     io.iov_len = IFLIST_REPLY_BUFFER;
112 |     rtnl_reply.msg_iov = &io;
113 |     rtnl_reply.msg_iovlen = 1;
114 |     
115 |     recvmsg(fd, &rtnl_reply, 0);
116 |     answer = (struct nlmsghdr *) reply;
117 | 
118 |     xlb_parse_route(answer, src_ip, nh_ip, dev);
119 |   }
120 |   
121 |   close(fd);
122 | 
123 |   return 0;
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/TestCodes/route_dump.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | #include <sys/types.h>
  6 | #include <sys/socket.h>
  7 | 
  8 | #include <unistd.h>
  9 | 
 10 | #include <linux/netlink.h>
 11 | #include <linux/rtnetlink.h>
 12 | 
 13 | #include <arpa/inet.h>
 14 | 
 15 | #define IFLIST_REPLY_BUFFER	8192
 16 | 
 17 | typedef struct nl_req_s nl_req_t;  
 18 | 
 19 | struct nl_req_s {
 20 |   struct nlmsghdr hdr;
 21 |   struct rtmsg	r;
 22 |   char      buf[1025];
 23 | };
 24 | 
 25 | void rtnl_print_route(struct nlmsghdr *nlh)
 26 | {
 27 |     struct  rtmsg *route_entry;
 28 |     struct  rtattr *route_attribute; 
 29 |     int     route_attribute_len = 0;
 30 |     unsigned char    route_netmask = 0;
 31 |     unsigned char    route_protocol = 0;
 32 |     char    dst_ip[32];
 33 |     char    gw_ip[32];
 34 |     char    src_ip[32];
 35 |     int     via = 0;
 36 | 
 37 |     route_entry = (struct rtmsg *) NLMSG_DATA(nlh);
 38 | 
 39 |     if (route_entry->rtm_table != RT_TABLE_MAIN)
 40 |       return;
 41 | 
 42 |     route_netmask = route_entry->rtm_dst_len;
 43 |     route_protocol = route_entry->rtm_protocol;
 44 |     route_attribute = (struct rtattr *) RTM_RTA(route_entry);
 45 |     route_attribute_len = RTM_PAYLOAD(nlh);
 46 | 
 47 |     for ( ; RTA_OK(route_attribute, route_attribute_len);		\
 48 | 	  route_attribute = RTA_NEXT(route_attribute, route_attribute_len))
 49 |       {
 50 | 	printf("hello\n");
 51 |         if (route_attribute->rta_type == RTA_DST)
 52 | 	  {
 53 |             inet_ntop(AF_INET, RTA_DATA(route_attribute),		\
 54 | 		      dst_ip, sizeof(dst_ip));
 55 | 	  }
 56 |         if (route_attribute->rta_type == RTA_GATEWAY)
 57 | 	  {
 58 |             inet_ntop(AF_INET, RTA_DATA(route_attribute),	\
 59 | 		      gw_ip, sizeof(gw_ip));
 60 | 	    via = 1;
 61 | 	  }
 62 |         if (route_attribute->rta_type == RTA_PREFSRC)
 63 | 	  {
 64 |             inet_ntop(AF_INET, RTA_DATA(route_attribute),	\
 65 | 		      src_ip, sizeof(src_ip));
 66 | 	  }
 67 |       }
 68 |     printf("route to destination --> %s/%d proto %d and gateway %s\n src=%s, via=%d\n", \
 69 | 	   dst_ip, route_netmask, route_protocol, gw_ip,src_ip, via);
 70 | 
 71 | }
 72 | 
 73 | #define NLMSG_TAIL(nmsg) \
 74 |         ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
 75 | 
 76 | int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
 77 |               int alen)
 78 | {
 79 |         int len = RTA_LENGTH(alen);
 80 |         struct rtattr *rta;
 81 | 
 82 |         if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
 83 |                 fprintf(stderr,
 84 |                         "addattr_l ERROR: message exceeded bound of %d\n",
 85 |                         maxlen);
 86 |                 return -1;
 87 |         }
 88 |         rta = NLMSG_TAIL(n);
 89 |         rta->rta_type = type;
 90 |         rta->rta_len = len;
 91 |         if (alen)
 92 |                 memcpy(RTA_DATA(rta), data, alen);
 93 |         n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
 94 |         return 0;
 95 | }
 96 | 
 97 | int main(int argc, char **argv)
 98 | {
 99 |   int fd;
100 |  
101 |   struct msghdr rtnl_msg;    /* generic msghdr struct for use with sendmsg */
102 |   struct iovec io;	     /* IO vector for sendmsg */
103 | 
104 |   nl_req_t req;              /* structure that describes the rtnetlink packet itself */
105 |   char reply[IFLIST_REPLY_BUFFER]; /* a large buffer to receive lots of link information */
106 | 
107 |   pid_t pid = getpid();	     /* our process ID to build the correct netlink address */
108 |   int end = 0;		     /* some flag to end loop parsing */
109 | 
110 |   fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
111 | 
112 |   memset(&rtnl_msg, 0, sizeof(rtnl_msg));
113 |   memset(&req, 0, sizeof(req));
114 | 
115 |   req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
116 |   req.hdr.nlmsg_type = RTM_GETROUTE;
117 |   req.hdr.nlmsg_flags = NLM_F_REQUEST; 
118 |   req.r.rtm_family = AF_INET; 
119 | 
120 | //  char ipaddr[16];
121 | //  strcpy(ipaddr, argv[1]);
122 | //  strcpy(ipaddr, "10.0.0.22");
123 |   __u8 cp[]={10,1,0,22};
124 |   __u8 *ap;
125 |   int i;
126 | 
127 |   addattr_l(&req.hdr, sizeof(req), RTA_DST, cp, 4);
128 | 
129 |   io.iov_base = &req;
130 |   io.iov_len = req.hdr.nlmsg_len;
131 |   rtnl_msg.msg_iov = &io;
132 |   rtnl_msg.msg_iovlen = 1;
133 | 
134 |   sendmsg(fd, (struct msghdr *) &rtnl_msg, 0);
135 | 
136 |   /* parse reply */
137 | 
138 |   {
139 |     int len;
140 |     struct nlmsghdr *msg_ptr;	/* pointer to current message part */
141 |   
142 |     struct msghdr rtnl_reply;	/* generic msghdr structure for use with recvmsg */
143 |     struct iovec io_reply;
144 | 
145 |     memset(&io_reply, 0, sizeof(io_reply));
146 |     memset(&rtnl_reply, 0, sizeof(rtnl_reply));
147 |       
148 |     io.iov_base = reply;
149 |     io.iov_len = IFLIST_REPLY_BUFFER;
150 |     rtnl_reply.msg_iov = &io;
151 |     rtnl_reply.msg_iovlen = 1;
152 |     
153 |     len = recvmsg(fd, &rtnl_reply, 0); /* read as much data as fits in the receive buffer */
154 |     msg_ptr = (struct nlmsghdr *) reply;
155 |     rtnl_print_route(msg_ptr);
156 |   }
157 |   
158 |   close(fd);
159 | 
160 |   return 0;
161 | }
162 | 


--------------------------------------------------------------------------------
/TestCodes/routemac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ktaka-ccmp/xdp-loadbalancer/2008d0dd7c42cf5ccec53fcfb9a797f5c70042f6/TestCodes/routemac


--------------------------------------------------------------------------------
/TestCodes/xlb_test.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <unistd.h>
  4 | #include <fcntl.h>
  5 | #include <string.h>
  6 | #include <time.h>
  7 | #include <sys/time.h>
  8 | #include <sys/socket.h>
  9 | #include <netinet/in.h>
 10 | #include <netinet/ip.h>
 11 | #include <arpa/inet.h>
 12 | #include <linux/in_route.h>
 13 | #include <linux/icmpv6.h>
 14 | #include <errno.h>
 15 | 
 16 | #include <net/if_arp.h>
 17 | #include <sys/ioctl.h>
 18 | 
 19 | #include <linux/netlink.h>
 20 | #include <linux/rtnetlink.h>
 21 | #include <netinet/ether.h>
 22 | 
 23 | 
 24 | #define IFLIST_REPLY_BUFFER 8192
 25 | #define DEBUG 0
 26 | 
 27 | int xlb_parse_route(struct nlmsghdr *nlh, __u8 *src, __u8 *next, int *dev)
 28 | {
 29 |     struct  rtmsg *route_entry;
 30 |     struct  rtattr *route_attribute; 
 31 |     int     route_attribute_len = 0;
 32 |     unsigned char    route_netmask = 0;
 33 |     unsigned char    route_protocol = 0;
 34 |     char    dst_ip[32];
 35 |     char    gw_ip[32];
 36 |     char    src_ip[32];
 37 |     int i, via = 0;
 38 |     __u8 *addr;
 39 |     
 40 |     route_entry = (struct rtmsg *) NLMSG_DATA(nlh);
 41 | 
 42 |     if (route_entry->rtm_table != RT_TABLE_MAIN)
 43 |       return 1;
 44 | 
 45 |     route_netmask = route_entry->rtm_dst_len;
 46 |     route_protocol = route_entry->rtm_protocol;
 47 |     route_attribute = (struct rtattr *) RTM_RTA(route_entry);
 48 |     route_attribute_len = RTM_PAYLOAD(nlh);
 49 | 
 50 |     for ( ; RTA_OK(route_attribute, route_attribute_len);		\
 51 | 	  route_attribute = RTA_NEXT(route_attribute, route_attribute_len))
 52 |       {
 53 | 
 54 |         if (route_attribute->rta_type == RTA_DST)
 55 | 	  {
 56 |             if(DEBUG) inet_ntop(AF_INET, RTA_DATA(route_attribute), dst_ip, sizeof(dst_ip));
 57 | 	    if (via == 0)
 58 | 	      memcpy(next, RTA_DATA(route_attribute), 4);
 59 | 	  }
 60 | 
 61 |         if (route_attribute->rta_type == RTA_GATEWAY)
 62 | 	  {
 63 | 	    if(DEBUG) inet_ntop(AF_INET, RTA_DATA(route_attribute), gw_ip, sizeof(gw_ip));
 64 | 	    memcpy(next, RTA_DATA(route_attribute), 4);
 65 | 	    via = 1;
 66 | 	  }
 67 | 
 68 |         if (route_attribute->rta_type == RTA_PREFSRC)
 69 | 	  {
 70 | 	    if(DEBUG) inet_ntop(AF_INET, RTA_DATA(route_attribute), src_ip, sizeof(src_ip));
 71 | 	    memcpy(src, RTA_DATA(route_attribute), 4);
 72 | 	  }
 73 | 	
 74 | 	if (route_attribute->rta_type == RTA_OIF)
 75 | 	  {
 76 | 	    memcpy(dev, RTA_DATA(route_attribute), sizeof(int));
 77 | 	  }
 78 |       }
 79 | 
 80 |     if(DEBUG) 
 81 |       printf("route to destination --> %s/%d proto %d and gateway %s\n src=%s\n", \
 82 | 	   dst_ip, route_netmask, route_protocol, gw_ip,src_ip);
 83 | 
 84 |     return 0;
 85 | }
 86 | 
 87 | #define NLMSG_TAIL(nmsg) \
 88 |         ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
 89 | 
 90 | int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
 91 |               int alen)
 92 | {
 93 |         int len = RTA_LENGTH(alen);
 94 |         struct rtattr *rta;
 95 | 
 96 |         if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
 97 |                 fprintf(stderr,
 98 |                         "addattr_l ERROR: message exceeded bound of %d\n",
 99 |                         maxlen);
100 |                 return -1;
101 |         }
102 |         rta = NLMSG_TAIL(n);
103 |         rta->rta_type = type;
104 |         rta->rta_len = len;
105 |         if (alen)
106 |                 memcpy(RTA_DATA(rta), data, alen);
107 |         n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
108 |         return 0;
109 | }
110 | 
111 | static int xlb_iproute_get(char *dst_ip, __u8 *src , __u8 *next, int *dev)
112 | {
113 |   struct msghdr rtnl_msg;
114 |   struct iovec io;
115 |   int fd;
116 |   __u32 addr;
117 |     
118 |   struct {
119 |     struct nlmsghdr	n;
120 |     struct rtmsg		r;
121 |     char			buf[1024];
122 |   } req;
123 | 
124 |   memset(&rtnl_msg, 0, sizeof(rtnl_msg));
125 |   memset(&req, 0, sizeof(req));
126 | 
127 |   req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
128 |   req.n.nlmsg_flags = NLM_F_REQUEST;
129 |   req.n.nlmsg_type = RTM_GETROUTE;
130 |   req.r.rtm_family = AF_INET;
131 | 
132 |   inet_pton(AF_INET, dst_ip , &addr); 
133 | 
134 |   addattr_l(&req.n, sizeof(req), RTA_DST, &addr, 4);
135 | 	
136 |   io.iov_base = &req;
137 |   io.iov_len = req.n.nlmsg_len;
138 |   rtnl_msg.msg_iov = &io;
139 |   rtnl_msg.msg_iovlen = 1;
140 | 
141 |   fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
142 |   sendmsg(fd, (struct msghdr *) &rtnl_msg, 0);
143 | 
144 |   /* parse reply */
145 | 
146 |   {
147 |     int len;
148 |     struct nlmsghdr *answer;
149 |     struct msghdr rtnl_reply;
150 |     struct iovec io_reply;
151 |     char reply[IFLIST_REPLY_BUFFER];
152 | 
153 |     
154 |     memset(&io_reply, 0, sizeof(io_reply));
155 |     memset(&rtnl_reply, 0, sizeof(rtnl_reply));
156 |       
157 |     io.iov_base = reply;
158 |     io.iov_len = IFLIST_REPLY_BUFFER;
159 |     rtnl_reply.msg_iov = &io;
160 |     rtnl_reply.msg_iovlen = 1;
161 |     
162 |     len = recvmsg(fd, &rtnl_reply, 0);
163 |     answer = (struct nlmsghdr *) reply;
164 |     //    rtnl_print_route(msg_ptr);
165 | 
166 |     xlb_parse_route(answer, src, next, dev);
167 |   }
168 |   
169 |   close(fd);
170 | 
171 |   return 0;
172 | }
173 | 
174 | static int xlb_get_mac(__u8 *host, __u8 *mac, int *dev){
175 |   int s;
176 | 
177 |   struct arpreq req;
178 |   struct sockaddr_in *sin;
179 |   static char buf[256];
180 | 
181 |   //  char *host = argv[1];
182 | 
183 |   bzero((caddr_t)&req, sizeof(req));
184 | 
185 |   sin = (struct sockaddr_in *)&req.arp_pa;
186 |   sin->sin_family = AF_INET; /* Address Family: Internet */
187 |   sin->sin_addr.s_addr = inet_addr(inet_ntop(AF_INET, host, buf, 256));
188 |   //  sin->sin_addr.s_addr = host;
189 | 
190 |   if((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0){
191 |     perror("socket() failed.");
192 |     exit(-1);
193 |   } /* Socket is opened.*/
194 | 
195 |   strcpy(req.arp_dev, "eth0");
196 | 
197 |   
198 |   if(ioctl(s, SIOCGARP, (caddr_t)&req) <0){
199 |     if(errno == ENXIO){
200 | 
201 |       icmp_send_1pkt(&sin->sin_addr.s_addr);
202 |       usleep(100000);
203 |       
204 |       if(ioctl(s, SIOCGARP, (caddr_t)&req) <0){
205 | 	if(errno == ENXIO){
206 | 	  printf("%s - no entry.\n", inet_ntop(AF_INET, host, buf, 256));
207 | 	  printf("%lu - no entry.\n", *host);
208 | 	  exit(-1);
209 | 	} else {
210 | 	  perror("SIOCGARP");
211 | 	  exit(-1);
212 | 	}
213 |       }
214 | 
215 |     } else {
216 |       perror("SIOCGARP");
217 |       exit(-1);
218 |     }
219 |   }
220 | 
221 |   memcpy(mac, req.arp_ha.sa_data, 6);
222 | 
223 |   
224 |   return(0);
225 | }
226 | 
227 | 
228 | int main(int argc, char *argv[])
229 | {
230 |   char ipaddr[16];
231 |   strcpy(ipaddr, argv[1]);
232 |   //  strcpy(ipaddr, "10.0.0.22");
233 | 
234 |   __u8 src[4], nexthop[4], mac[6];
235 |   int dev=0;
236 |   
237 |   xlb_iproute_get(ipaddr,src,nexthop, &dev);
238 | 
239 |   xlb_get_mac(nexthop, mac , &dev);
240 | 
241 |   static char buf[256];
242 |   printf("src: %s \n", inet_ntop(AF_INET, src, buf, 256));
243 |   printf("nexthop: %s \n", inet_ntop(AF_INET, nexthop, buf, 256));
244 |   printf("dev: %d \n", dev);
245 | 
246 |   char mac_txt[6] = {0};
247 |   ether_ntoa_r((struct ether_addr *)mac, mac_txt);
248 |   printf("mac: %s\n", mac_txt );
249 | 
250 | }
251 | 


--------------------------------------------------------------------------------
/bpf_helpers.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: GPL-2.0 */
  2 | /* Copied from $(KERNEL)/tools/testing/selftests/bpf/bpf_helpers.h
  3 |  *  Needed by xxx_kern.c files
  4 |  *
  5 |  * Pulling this in creates a dependency to uapi/linux/bpf.h
  6 |  *  this is maintained under tools/include/uapi/linux/bpf.h
  7 |  */
  8 | #ifndef __BPF_HELPERS_H
  9 | #define __BPF_HELPERS_H
 10 | 
 11 | /* helper macro to place programs, maps, license in
 12 |  * different sections in elf_bpf file. Section names
 13 |  * are interpreted by elf_bpf loader
 14 |  */
 15 | #define SEC(NAME) __attribute__((section(NAME), used))
 16 | 
 17 | /* helper functions called from eBPF programs written in C */
 18 | static void *(*bpf_map_lookup_elem)(void *map, void *key) =
 19 | 	(void *) BPF_FUNC_map_lookup_elem;
 20 | static int (*bpf_map_update_elem)(void *map, void *key, void *value,
 21 | 				  unsigned long long flags) =
 22 | 	(void *) BPF_FUNC_map_update_elem;
 23 | static int (*bpf_map_delete_elem)(void *map, void *key) =
 24 | 	(void *) BPF_FUNC_map_delete_elem;
 25 | static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
 26 | 	(void *) BPF_FUNC_probe_read;
 27 | static unsigned long long (*bpf_ktime_get_ns)(void) =
 28 | 	(void *) BPF_FUNC_ktime_get_ns;
 29 | static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
 30 | 	(void *) BPF_FUNC_trace_printk;
 31 | static void (*bpf_tail_call)(void *ctx, void *map, int index) =
 32 | 	(void *) BPF_FUNC_tail_call;
 33 | static unsigned long long (*bpf_get_smp_processor_id)(void) =
 34 | 	(void *) BPF_FUNC_get_smp_processor_id;
 35 | static unsigned long long (*bpf_get_current_pid_tgid)(void) =
 36 | 	(void *) BPF_FUNC_get_current_pid_tgid;
 37 | static unsigned long long (*bpf_get_current_uid_gid)(void) =
 38 | 	(void *) BPF_FUNC_get_current_uid_gid;
 39 | static int (*bpf_get_current_comm)(void *buf, int buf_size) =
 40 | 	(void *) BPF_FUNC_get_current_comm;
 41 | static unsigned long long (*bpf_perf_event_read)(void *map,
 42 | 						 unsigned long long flags) =
 43 | 	(void *) BPF_FUNC_perf_event_read;
 44 | static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
 45 | 	(void *) BPF_FUNC_clone_redirect;
 46 | static int (*bpf_redirect)(int ifindex, int flags) =
 47 | 	(void *) BPF_FUNC_redirect;
 48 | static int (*bpf_redirect_map)(void *map, int key, int flags) =
 49 | 	(void *) BPF_FUNC_redirect_map;
 50 | static int (*bpf_perf_event_output)(void *ctx, void *map,
 51 | 				    unsigned long long flags, void *data,
 52 | 				    int size) =
 53 | 	(void *) BPF_FUNC_perf_event_output;
 54 | static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 55 | 	(void *) BPF_FUNC_get_stackid;
 56 | static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
 57 | 	(void *) BPF_FUNC_probe_write_user;
 58 | static int (*bpf_current_task_under_cgroup)(void *map, int index) =
 59 | 	(void *) BPF_FUNC_current_task_under_cgroup;
 60 | static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
 61 | 	(void *) BPF_FUNC_skb_get_tunnel_key;
 62 | static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
 63 | 	(void *) BPF_FUNC_skb_set_tunnel_key;
 64 | static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
 65 | 	(void *) BPF_FUNC_skb_get_tunnel_opt;
 66 | static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
 67 | 	(void *) BPF_FUNC_skb_set_tunnel_opt;
 68 | static unsigned long long (*bpf_get_prandom_u32)(void) =
 69 | 	(void *) BPF_FUNC_get_prandom_u32;
 70 | static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 71 | 	(void *) BPF_FUNC_xdp_adjust_head;
 72 | static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
 73 | 	(void *) BPF_FUNC_xdp_adjust_meta;
 74 | static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
 75 | 			     int optlen) =
 76 | 	(void *) BPF_FUNC_setsockopt;
 77 | static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
 78 | 			     int optlen) =
 79 | 	(void *) BPF_FUNC_getsockopt;
 80 | static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
 81 | 	(void *) BPF_FUNC_sk_redirect_map;
 82 | static int (*bpf_sock_map_update)(void *map, void *key, void *value,
 83 | 				  unsigned long long flags) =
 84 | 	(void *) BPF_FUNC_sock_map_update;
 85 | static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
 86 | 					void *buf, unsigned int buf_size) =
 87 | 	(void *) BPF_FUNC_perf_event_read_value;
 88 | static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
 89 | 				       unsigned int buf_size) =
 90 | 	(void *) BPF_FUNC_perf_prog_read_value;
 91 | static int (*bpf_override_return)(void *ctx, unsigned long rc) =
 92 | 	(void *) BPF_FUNC_override_return;
 93 | 
 94 | /* llvm builtin functions that eBPF C program may use to
 95 |  * emit BPF_LD_ABS and BPF_LD_IND instructions
 96 |  */
 97 | struct sk_buff;
 98 | unsigned long long load_byte(void *skb,
 99 | 			     unsigned long long off) asm("llvm.bpf.load.byte");
100 | unsigned long long load_half(void *skb,
101 | 			     unsigned long long off) asm("llvm.bpf.load.half");
102 | unsigned long long load_word(void *skb,
103 | 			     unsigned long long off) asm("llvm.bpf.load.word");
104 | 
105 | /* a helper structure used by eBPF C program
106 |  * to describe map attributes to elf_bpf loader
107 |  */
108 | struct bpf_map_def {
109 | 	unsigned int type;
110 | 	unsigned int key_size;
111 | 	unsigned int value_size;
112 | 	unsigned int max_entries;
113 | 	unsigned int map_flags;
114 | 	unsigned int inner_map_idx;
115 | 	unsigned int numa_node;
116 | };
117 | 
118 | static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
119 | 	(void *) BPF_FUNC_skb_load_bytes;
120 | static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
121 | 	(void *) BPF_FUNC_skb_store_bytes;
122 | static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
123 | 	(void *) BPF_FUNC_l3_csum_replace;
124 | static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
125 | 	(void *) BPF_FUNC_l4_csum_replace;
126 | static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
127 | 	(void *) BPF_FUNC_skb_under_cgroup;
128 | static int (*bpf_skb_change_head)(void *, int len, int flags) =
129 | 	(void *) BPF_FUNC_skb_change_head;
130 | 
131 | /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
132 | #if defined(__TARGET_ARCH_x86)
133 | 	#define bpf_target_x86
134 | 	#define bpf_target_defined
135 | #elif defined(__TARGET_ARCH_s930x)
136 | 	#define bpf_target_s930x
137 | 	#define bpf_target_defined
138 | #elif defined(__TARGET_ARCH_arm64)
139 | 	#define bpf_target_arm64
140 | 	#define bpf_target_defined
141 | #elif defined(__TARGET_ARCH_mips)
142 | 	#define bpf_target_mips
143 | 	#define bpf_target_defined
144 | #elif defined(__TARGET_ARCH_powerpc)
145 | 	#define bpf_target_powerpc
146 | 	#define bpf_target_defined
147 | #elif defined(__TARGET_ARCH_sparc)
148 | 	#define bpf_target_sparc
149 | 	#define bpf_target_defined
150 | #else
151 | 	#undef bpf_target_defined
152 | #endif
153 | 
154 | /* Fall back to what the compiler says */
155 | #ifndef bpf_target_defined
156 | #if defined(__x86_64__)
157 | 	#define bpf_target_x86
158 | #elif defined(__s390x__)
159 | 	#define bpf_target_s930x
160 | #elif defined(__aarch64__)
161 | 	#define bpf_target_arm64
162 | #elif defined(__mips__)
163 | 	#define bpf_target_mips
164 | #elif defined(__powerpc__)
165 | 	#define bpf_target_powerpc
166 | #elif defined(__sparc__)
167 | 	#define bpf_target_sparc
168 | #endif
169 | #endif
170 | 
171 | #if defined(bpf_target_x86)
172 | 
173 | #define PT_REGS_PARM1(x) ((x)->di)
174 | #define PT_REGS_PARM2(x) ((x)->si)
175 | #define PT_REGS_PARM3(x) ((x)->dx)
176 | #define PT_REGS_PARM4(x) ((x)->cx)
177 | #define PT_REGS_PARM5(x) ((x)->r8)
178 | #define PT_REGS_RET(x) ((x)->sp)
179 | #define PT_REGS_FP(x) ((x)->bp)
180 | #define PT_REGS_RC(x) ((x)->ax)
181 | #define PT_REGS_SP(x) ((x)->sp)
182 | #define PT_REGS_IP(x) ((x)->ip)
183 | 
184 | #elif defined(bpf_target_s390x)
185 | 
186 | #define PT_REGS_PARM1(x) ((x)->gprs[2])
187 | #define PT_REGS_PARM2(x) ((x)->gprs[3])
188 | #define PT_REGS_PARM3(x) ((x)->gprs[4])
189 | #define PT_REGS_PARM4(x) ((x)->gprs[5])
190 | #define PT_REGS_PARM5(x) ((x)->gprs[6])
191 | #define PT_REGS_RET(x) ((x)->gprs[14])
192 | #define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
193 | #define PT_REGS_RC(x) ((x)->gprs[2])
194 | #define PT_REGS_SP(x) ((x)->gprs[15])
195 | #define PT_REGS_IP(x) ((x)->psw.addr)
196 | 
197 | #elif defined(bpf_target_arm64)
198 | 
199 | #define PT_REGS_PARM1(x) ((x)->regs[0])
200 | #define PT_REGS_PARM2(x) ((x)->regs[1])
201 | #define PT_REGS_PARM3(x) ((x)->regs[2])
202 | #define PT_REGS_PARM4(x) ((x)->regs[3])
203 | #define PT_REGS_PARM5(x) ((x)->regs[4])
204 | #define PT_REGS_RET(x) ((x)->regs[30])
205 | #define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
206 | #define PT_REGS_RC(x) ((x)->regs[0])
207 | #define PT_REGS_SP(x) ((x)->sp)
208 | #define PT_REGS_IP(x) ((x)->pc)
209 | 
210 | #elif defined(bpf_target_mips)
211 | 
212 | #define PT_REGS_PARM1(x) ((x)->regs[4])
213 | #define PT_REGS_PARM2(x) ((x)->regs[5])
214 | #define PT_REGS_PARM3(x) ((x)->regs[6])
215 | #define PT_REGS_PARM4(x) ((x)->regs[7])
216 | #define PT_REGS_PARM5(x) ((x)->regs[8])
217 | #define PT_REGS_RET(x) ((x)->regs[31])
218 | #define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
219 | #define PT_REGS_RC(x) ((x)->regs[1])
220 | #define PT_REGS_SP(x) ((x)->regs[29])
221 | #define PT_REGS_IP(x) ((x)->cp0_epc)
222 | 
223 | #elif defined(bpf_target_powerpc)
224 | 
225 | #define PT_REGS_PARM1(x) ((x)->gpr[3])
226 | #define PT_REGS_PARM2(x) ((x)->gpr[4])
227 | #define PT_REGS_PARM3(x) ((x)->gpr[5])
228 | #define PT_REGS_PARM4(x) ((x)->gpr[6])
229 | #define PT_REGS_PARM5(x) ((x)->gpr[7])
230 | #define PT_REGS_RC(x) ((x)->gpr[3])
231 | #define PT_REGS_SP(x) ((x)->sp)
232 | #define PT_REGS_IP(x) ((x)->nip)
233 | 
234 | #elif defined(bpf_target_sparc)
235 | 
236 | #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
237 | #define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
238 | #define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
239 | #define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
240 | #define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
241 | #define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
242 | #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
243 | #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
244 | 
245 | /* Should this also be a bpf_target check for the sparc case? */
246 | #if defined(__arch64__)
247 | #define PT_REGS_IP(x) ((x)->tpc)
248 | #else
249 | #define PT_REGS_IP(x) ((x)->pc)
250 | #endif
251 | 
252 | #endif
253 | 
254 | #ifdef bpf_target_powerpc
255 | #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
256 | #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
257 | #elif bpf_target_sparc
258 | #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
259 | #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
260 | #else
261 | #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({				\
262 | 		bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
263 | #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)	({				\
264 | 		bpf_probe_read(&(ip), sizeof(ip),				\
265 | 				(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
266 | #endif
267 | 
268 | #endif
269 | 


--------------------------------------------------------------------------------
/bpf_load.c:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: GPL-2.0
  2 | /*
  3 |  * Notice: Modified copy of kernel/samples/bpf/bpf_load.c
  4 |  *  - Up-to-date with kernel v4.14-rc8
  5 |  *
  6 |  * Added features:
  7 |  *  - Fixed load order of prog_fd[] program sections
  8 |  */
  9 | #include <stdio.h>
 10 | #include <sys/types.h>
 11 | #include <sys/stat.h>
 12 | #include <fcntl.h>
 13 | #include <libelf.h>
 14 | #include <gelf.h>
 15 | #include <errno.h>
 16 | #include <unistd.h>
 17 | #include <string.h>
 18 | #include <stdbool.h>
 19 | #include <stdlib.h>
 20 | #include <linux/bpf.h>
 21 | #include <linux/filter.h>
 22 | #include <linux/perf_event.h>
 23 | #include <linux/netlink.h>
 24 | #include <linux/rtnetlink.h>
 25 | #include <linux/types.h>
 26 | #include <sys/types.h>
 27 | #include <sys/socket.h>
 28 | #include <sys/syscall.h>
 29 | #include <sys/ioctl.h>
 30 | #include <sys/mman.h>
 31 | #include <poll.h>
 32 | #include <ctype.h>
 33 | #include <assert.h>
 34 | #include "libbpf.h"
 35 | #include "bpf_load.h"
 36 | #include "perf-sys.h"
 37 | 
 38 | #define DEBUGFS "/sys/kernel/debug/tracing/"
 39 | 
 40 | static char license[128];
 41 | static int kern_version;
 42 | static bool processed_sec[128];
 43 | char bpf_log_buf[BPF_LOG_BUF_SIZE];
 44 | int map_fd[MAX_MAPS];
 45 | int prog_fd[MAX_PROGS];
 46 | int event_fd[MAX_PROGS];
 47 | int prog_cnt;
 48 | int prog_array_fd = -1;
 49 | 
 50 | struct bpf_map_data map_data[MAX_MAPS];
 51 | int map_data_count = 0;
 52 | 
 53 | static int populate_prog_array(const char *event, int prog_fd)
 54 | {
 55 | 	int ind = atoi(event), err;
 56 | 
 57 | 	err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
 58 | 	if (err < 0) {
 59 | 		printf("failed to store prog_fd in prog_array\n");
 60 | 		return -1;
 61 | 	}
 62 | 	return 0;
 63 | }
 64 | 
 65 | static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 66 | {
 67 | 	bool is_socket = strncmp(event, "socket", 6) == 0;
 68 | 	bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
 69 | 	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
 70 | 	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
 71 | 	bool is_xdp = strncmp(event, "xdp", 3) == 0;
 72 | 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
 73 | 	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
 74 | 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
 75 | 	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 76 | 	bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
 77 | 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 78 | 	enum bpf_prog_type prog_type;
 79 | 	char buf[256];
 80 | 	int fd, efd, err, id;
 81 | 	struct perf_event_attr attr = {};
 82 | 
 83 | 	attr.type = PERF_TYPE_TRACEPOINT;
 84 | 	attr.sample_type = PERF_SAMPLE_RAW;
 85 | 	attr.sample_period = 1;
 86 | 	attr.wakeup_events = 1;
 87 | 
 88 | 	if (is_socket) {
 89 | 		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 90 | 	} else if (is_kprobe || is_kretprobe) {
 91 | 		prog_type = BPF_PROG_TYPE_KPROBE;
 92 | 	} else if (is_tracepoint) {
 93 | 		prog_type = BPF_PROG_TYPE_TRACEPOINT;
 94 | 	} else if (is_xdp) {
 95 | 		prog_type = BPF_PROG_TYPE_XDP;
 96 | 	} else if (is_perf_event) {
 97 | 		prog_type = BPF_PROG_TYPE_PERF_EVENT;
 98 | 	} else if (is_cgroup_skb) {
 99 | 		prog_type = BPF_PROG_TYPE_CGROUP_SKB;
100 | 	} else if (is_cgroup_sk) {
101 | 		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
102 | 	} else if (is_sockops) {
103 | 		prog_type = BPF_PROG_TYPE_SOCK_OPS;
104 | 	} else if (is_sk_skb) {
105 | 		prog_type = BPF_PROG_TYPE_SK_SKB;
106 | 	} else {
107 | 		printf("Unknown event '%s'\n", event);
108 | 		return -1;
109 | 	}
110 | 
111 | 	fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
112 | 			      bpf_log_buf, BPF_LOG_BUF_SIZE);
113 | 	if (fd < 0) {
114 | 		printf("bpf_load_program(prog_cnt=%d) err=%d\n%s",
115 | 		       prog_cnt, errno, bpf_log_buf);
116 | 		return -1;
117 | 	}
118 | 
119 | 	prog_fd[prog_cnt++] = fd;
120 | 
121 | 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
122 | 		return 0;
123 | 
124 | 	if (is_socket || is_sockops || is_sk_skb) {
125 | 		if (is_socket)
126 | 			event += 6;
127 | 		else
128 | 			event += 7;
129 | 		if (*event != '/')
130 | 			return 0;
131 | 		event++;
132 | 		if (!isdigit(*event)) {
133 | 			printf("invalid prog number\n");
134 | 			return -1;
135 | 		}
136 | 		return populate_prog_array(event, fd);
137 | 	}
138 | 
139 | 	if (is_kprobe || is_kretprobe) {
140 | 		if (is_kprobe)
141 | 			event += 7;
142 | 		else
143 | 			event += 10;
144 | 
145 | 		if (*event == 0) {
146 | 			printf("event name cannot be empty\n");
147 | 			return -1;
148 | 		}
149 | 
150 | 		if (isdigit(*event))
151 | 			return populate_prog_array(event, fd);
152 | 
153 | 		snprintf(buf, sizeof(buf),
154 | 			 "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
155 | 			 is_kprobe ? 'p' : 'r', event, event);
156 | 		err = system(buf);
157 | 		if (err < 0) {
158 | 			printf("failed to create kprobe '%s' error '%s'\n",
159 | 			       event, strerror(errno));
160 | 			return -1;
161 | 		}
162 | 
163 | 		strcpy(buf, DEBUGFS);
164 | 		strcat(buf, "events/kprobes/");
165 | 		strcat(buf, event);
166 | 		strcat(buf, "/id");
167 | 	} else if (is_tracepoint) {
168 | 		event += 11;
169 | 
170 | 		if (*event == 0) {
171 | 			printf("event name cannot be empty\n");
172 | 			return -1;
173 | 		}
174 | 		strcpy(buf, DEBUGFS);
175 | 		strcat(buf, "events/");
176 | 		strcat(buf, event);
177 | 		strcat(buf, "/id");
178 | 	}
179 | 
180 | 	efd = open(buf, O_RDONLY, 0);
181 | 	if (efd < 0) {
182 | 		printf("failed to open event %s\n", event);
183 | 		return -1;
184 | 	}
185 | 
186 | 	err = read(efd, buf, sizeof(buf));
187 | 	if (err < 0 || err >= sizeof(buf)) {
188 | 		printf("read from '%s' failed '%s'\n", event, strerror(errno));
189 | 		return -1;
190 | 	}
191 | 
192 | 	close(efd);
193 | 
194 | 	buf[err] = 0;
195 | 	id = atoi(buf);
196 | 	attr.config = id;
197 | 
198 | 	efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
199 | 	if (efd < 0) {
200 | 		printf("event %d fd %d err %s\n", id, efd, strerror(errno));
201 | 		return -1;
202 | 	}
203 | 	event_fd[prog_cnt - 1] = efd;
204 | 	err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
205 | 	if (err < 0) {
206 | 		printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
207 | 		       strerror(errno));
208 | 		return -1;
209 | 	}
210 | 	err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
211 | 	if (err < 0) {
212 | 		printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n",
213 | 		       strerror(errno));
214 | 		return -1;
215 | 	}
216 | 
217 | 	return 0;
218 | }
219 | 
220 | static int load_maps(struct bpf_map_data *maps, int nr_maps,
221 | 		     fixup_map_cb fixup_map)
222 | {
223 | 	int i, numa_node;
224 | 
225 | 	for (i = 0; i < nr_maps; i++) {
226 | 		if (fixup_map) {
227 | 			fixup_map(&maps[i], i);
228 | 			/* Allow userspace to assign map FD prior to creation */
229 | 			if (maps[i].fd != -1) {
230 | 				map_fd[i] = maps[i].fd;
231 | 				continue;
232 | 			}
233 | 		}
234 | 
235 | 		numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
236 | 			maps[i].def.numa_node : -1;
237 | 
238 | 		if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
239 | 		    maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
240 | 			int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
241 | 
242 | 			map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
243 | 							maps[i].name,
244 | 							maps[i].def.key_size,
245 | 							inner_map_fd,
246 | 							maps[i].def.max_entries,
247 | 							maps[i].def.map_flags,
248 | 							numa_node);
249 | 		} else {
250 | 			map_fd[i] = bpf_create_map_node(maps[i].def.type,
251 | 							maps[i].name,
252 | 							maps[i].def.key_size,
253 | 							maps[i].def.value_size,
254 | 							maps[i].def.max_entries,
255 | 							maps[i].def.map_flags,
256 | 							numa_node);
257 | 
258 | 			/* DEBUG
259 | 			  printf("MapType: %d \n", maps[i].def.type);
260 | 			  printf("MapName: %s \n", maps[i].name);
261 | 			  printf("MapKeysize: %d \n", maps[i].def.key_size);
262 | 			  printf("MapValusize: %d \n", maps[i].def.value_size);
263 | 			  printf("MapMaxEnt: %d \n", maps[i].def.max_entries);
264 | 			  printf("MapMapFlags: %d \n", maps[i].def.map_flags);
265 | 			  printf("mapfd: %d \n",  map_fd[i]);
266 | 			  
267 | 			  printf("Hello World2\n");
268 | 			*/
269 | 
270 | 		}
271 | 		if (map_fd[i] < 0) {
272 | 			printf("failed to create a map: %d %s\n",
273 | 			       errno, strerror(errno));
274 | 			return 1;
275 | 		}
276 | 		maps[i].fd = map_fd[i];
277 | 
278 | 		if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
279 | 			prog_array_fd = map_fd[i];
280 | 	}
281 | 	return 0;
282 | }
283 | 
284 | static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
285 | 		   GElf_Shdr *shdr, Elf_Data **data)
286 | {
287 | 	Elf_Scn *scn;
288 | 
289 | 	scn = elf_getscn(elf, i);
290 | 	if (!scn)
291 | 		return 1;
292 | 
293 | 	if (gelf_getshdr(scn, shdr) != shdr)
294 | 		return 2;
295 | 
296 | 	*shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
297 | 	if (!*shname || !shdr->sh_size)
298 | 		return 3;
299 | 
300 | 	*data = elf_getdata(scn, 0);
301 | 	if (!*data || elf_getdata(scn, *data) != NULL)
302 | 		return 4;
303 | 
304 | 	return 0;
305 | }
306 | 
307 | static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
308 | 				GElf_Shdr *shdr, struct bpf_insn *insn,
309 | 				struct bpf_map_data *maps, int nr_maps)
310 | {
311 | 	int i, nrels;
312 | 
313 | 	nrels = shdr->sh_size / shdr->sh_entsize;
314 | 
315 | 	for (i = 0; i < nrels; i++) {
316 | 		GElf_Sym sym;
317 | 		GElf_Rel rel;
318 | 		unsigned int insn_idx;
319 | 		bool match = false;
320 | 		int map_idx;
321 | 
322 | 		gelf_getrel(data, i, &rel);
323 | 
324 | 		insn_idx = rel.r_offset / sizeof(struct bpf_insn);
325 | 
326 | 		gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
327 | 
328 | 		if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
329 | 			printf("invalid relo for insn[%d].code 0x%x\n",
330 | 			       insn_idx, insn[insn_idx].code);
331 | 			return 1;
332 | 		}
333 | 		insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
334 | 
335 | 		/* Match FD relocation against recorded map_data[] offset */
336 | 		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
337 | 			if (maps[map_idx].elf_offset == sym.st_value) {
338 | 				match = true;
339 | 				break;
340 | 			}
341 | 		}
342 | 		if (match) {
343 | 			insn[insn_idx].imm = maps[map_idx].fd;
344 | 		} else {
345 | 			printf("invalid relo for insn[%d] no map_data match\n",
346 | 			       insn_idx);
347 | 			return 1;
348 | 		}
349 | 	}
350 | 
351 | 	return 0;
352 | }
353 | 
354 | static int cmp_symbols(const void *l, const void *r)
355 | {
356 | 	const GElf_Sym *lsym = (const GElf_Sym *)l;
357 | 	const GElf_Sym *rsym = (const GElf_Sym *)r;
358 | 
359 | 	if (lsym->st_value < rsym->st_value)
360 | 		return -1;
361 | 	else if (lsym->st_value > rsym->st_value)
362 | 		return 1;
363 | 	else
364 | 		return 0;
365 | }
366 | 
367 | static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
368 | 				 Elf *elf, Elf_Data *symbols, int strtabidx)
369 | {
370 | 	int map_sz_elf, map_sz_copy;
371 | 	bool validate_zero = false;
372 | 	Elf_Data *data_maps;
373 | 	int i, nr_maps;
374 | 	GElf_Sym *sym;
375 | 	Elf_Scn *scn;
376 | 
377 | 	if (maps_shndx < 0)
378 | 		return -EINVAL;
379 | 	if (!symbols)
380 | 		return -EINVAL;
381 | 
382 | 	/* Get data for maps section via elf index */
383 | 	scn = elf_getscn(elf, maps_shndx);
384 | 	if (scn)
385 | 		data_maps = elf_getdata(scn, NULL);
386 | 	if (!scn || !data_maps) {
387 | 		printf("Failed to get Elf_Data from maps section %d\n",
388 | 		       maps_shndx);
389 | 		return -EINVAL;
390 | 	}
391 | 
392 | 	/* For each map get corrosponding symbol table entry */
393 | 	sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
394 | 	for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
395 | 		assert(nr_maps < MAX_MAPS+1);
396 | 		if (!gelf_getsym(symbols, i, &sym[nr_maps]))
397 | 			continue;
398 | 		if (sym[nr_maps].st_shndx != maps_shndx)
399 | 			continue;
400 | 		/* Only increment iif maps section */
401 | 		nr_maps++;
402 | 	}
403 | 
404 | 	/* Align to map_fd[] order, via sort on offset in sym.st_value */
405 | 	qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
406 | 
407 | 	/* Keeping compatible with ELF maps section changes
408 | 	 * ------------------------------------------------
409 | 	 * The program size of struct bpf_map_def is known by loader
410 | 	 * code, but struct stored in ELF file can be different.
411 | 	 *
412 | 	 * Unfortunately sym[i].st_size is zero.  To calculate the
413 | 	 * struct size stored in the ELF file, assume all struct have
414 | 	 * the same size, and simply divide with number of map
415 | 	 * symbols.
416 | 	 */
417 | 	map_sz_elf = data_maps->d_size / nr_maps;
418 | 	map_sz_copy = sizeof(struct bpf_map_def);
419 | 	if (map_sz_elf < map_sz_copy) {
420 | 		/*
421 | 		 * Backward compat, loading older ELF file with
422 | 		 * smaller struct, keeping remaining bytes zero.
423 | 		 */
424 | 		map_sz_copy = map_sz_elf;
425 | 	} else if (map_sz_elf > map_sz_copy) {
426 | 		/*
427 | 		 * Forward compat, loading newer ELF file with larger
428 | 		 * struct with unknown features. Assume zero means
429 | 		 * feature not used.  Thus, validate rest of struct
430 | 		 * data is zero.
431 | 		 */
432 | 		validate_zero = true;
433 | 	}
434 | 
435 | 	/* Memcpy relevant part of ELF maps data to loader maps */
436 | 	for (i = 0; i < nr_maps; i++) {
437 | 		unsigned char *addr, *end;
438 | 		struct bpf_map_def *def;
439 | 		const char *map_name;
440 | 		size_t offset;
441 | 
442 | 		map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
443 | 		maps[i].name = strdup(map_name);
444 | 		if (!maps[i].name) {
445 | 			printf("strdup(%s): %s(%d)\n", map_name,
446 | 			       strerror(errno), errno);
447 | 			free(sym);
448 | 			return -errno;
449 | 		}
450 | 
451 | 		/* Symbol value is offset into ELF maps section data area */
452 | 		offset = sym[i].st_value;
453 | 		def = (struct bpf_map_def *)(data_maps->d_buf + offset);
454 | 		maps[i].elf_offset = offset;
455 | 		memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
456 | 		memcpy(&maps[i].def, def, map_sz_copy);
457 | 
458 | 		/* Verify no newer features were requested */
459 | 		if (validate_zero) {
460 | 			addr = (unsigned char*) def + map_sz_copy;
461 | 			end  = (unsigned char*) def + map_sz_elf;
462 | 			for (; addr < end; addr++) {
463 | 				if (*addr != 0) {
464 | 					free(sym);
465 | 					return -EFBIG;
466 | 				}
467 | 			}
468 | 		}
469 | 	}
470 | 
471 | 	free(sym);
472 | 	return nr_maps;
473 | }
474 | 
475 | static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
476 | {
477 | 	int fd, i, ret, maps_shndx = -1, strtabidx = -1;
478 | 	Elf *elf;
479 | 	GElf_Ehdr ehdr;
480 | 	GElf_Shdr shdr, shdr_prog;
481 | 	Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
482 | 	char *shname, *shname_prog;
483 | 	int nr_maps = 0;
484 | 
485 | 	/* reset global variables */
486 | 	kern_version = 0;
487 | 	memset(license, 0, sizeof(license));
488 | 	memset(processed_sec, 0, sizeof(processed_sec));
489 | 
490 | 	if (elf_version(EV_CURRENT) == EV_NONE)
491 | 		return 1;
492 | 
493 | 	fd = open(path, O_RDONLY, 0);
494 | 	if (fd < 0)
495 | 		return 1;
496 | 
497 | 	elf = elf_begin(fd, ELF_C_READ, NULL);
498 | 
499 | 	if (!elf)
500 | 		return 1;
501 | 
502 | 	if (gelf_getehdr(elf, &ehdr) != &ehdr)
503 | 		return 1;
504 | 
505 | 	/* clear all kprobes */
506 | 	i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
507 | 
508 | 	/* scan over all elf sections to get license and map info */
509 | 	for (i = 1; i < ehdr.e_shnum; i++) {
510 | 
511 | 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
512 | 			continue;
513 | 
514 | 		if (0) /* helpful for llvm debugging */
515 | 			printf("section %d:%s data %p size %zd link %d flags %d\n",
516 | 			       i, shname, data->d_buf, data->d_size,
517 | 			       shdr.sh_link, (int) shdr.sh_flags);
518 | 
519 | 		if (strcmp(shname, "license") == 0) {
520 | 			processed_sec[i] = true;
521 | 			memcpy(license, data->d_buf, data->d_size);
522 | 		} else if (strcmp(shname, "version") == 0) {
523 | 			processed_sec[i] = true;
524 | 			if (data->d_size != sizeof(int)) {
525 | 				printf("invalid size of version section %zd\n",
526 | 				       data->d_size);
527 | 				return 1;
528 | 			}
529 | 			memcpy(&kern_version, data->d_buf, sizeof(int));
530 | 		} else if (strcmp(shname, "maps") == 0) {
531 | 			int j;
532 | 
533 | 			maps_shndx = i;
534 | 			data_maps = data;
535 | 			for (j = 0; j < MAX_MAPS; j++)
536 | 				map_data[j].fd = -1;
537 | 		} else if (shdr.sh_type == SHT_SYMTAB) {
538 | 			strtabidx = shdr.sh_link;
539 | 			symbols = data;
540 | 		}
541 | 	}
542 | 
543 | 	ret = 1;
544 | 
545 | 	if (!symbols) {
546 | 		printf("missing SHT_SYMTAB section\n");
547 | 		goto done;
548 | 	}
549 | 
550 | 	if (data_maps) {
551 | 		nr_maps = load_elf_maps_section(map_data, maps_shndx,
552 | 						elf, symbols, strtabidx);
553 | 		if (nr_maps < 0) {
554 | 			printf("Error: Failed loading ELF maps (errno:%d):%s\n",
555 | 			       nr_maps, strerror(-nr_maps));
556 | 			ret = 1;
557 | 			goto done;
558 | 		}
559 | 		if (load_maps(map_data, nr_maps, fixup_map))
560 | 			goto done;
561 | 		map_data_count = nr_maps;
562 | 
563 | 		processed_sec[maps_shndx] = true;
564 | 	}
565 | 
566 | 	/* process all relo sections, and rewrite bpf insns for maps */
567 | 	for (i = 1; i < ehdr.e_shnum; i++) {
568 | 		if (processed_sec[i])
569 | 			continue;
570 | 
571 | 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
572 | 			continue;
573 | 
574 | 		if (shdr.sh_type == SHT_REL) {
575 | 			struct bpf_insn *insns;
576 | 
577 | 			/* locate prog sec that need map fixup (relocations) */
578 | 			if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
579 | 				    &shdr_prog, &data_prog))
580 | 				continue;
581 | 
582 | 			if (shdr_prog.sh_type != SHT_PROGBITS ||
583 | 			    !(shdr_prog.sh_flags & SHF_EXECINSTR))
584 | 				continue;
585 | 
586 | 			insns = (struct bpf_insn *) data_prog->d_buf;
587 | 			processed_sec[i] = true; /* relo section */
588 | 
589 | 			if (parse_relo_and_apply(data, symbols, &shdr, insns,
590 | 						 map_data, nr_maps))
591 | 				continue;
592 | 		}
593 | 	}
594 | 
595 | 	/* load programs */
596 | 	for (i = 1; i < ehdr.e_shnum; i++) {
597 | 
598 | 		if (processed_sec[i])
599 | 			continue;
600 | 
601 | 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
602 | 			continue;
603 | 
604 | 		if (memcmp(shname, "kprobe/", 7) == 0 ||
605 | 		    memcmp(shname, "kretprobe/", 10) == 0 ||
606 | 		    memcmp(shname, "tracepoint/", 11) == 0 ||
607 | 		    memcmp(shname, "xdp", 3) == 0 ||
608 | 		    memcmp(shname, "perf_event", 10) == 0 ||
609 | 		    memcmp(shname, "socket", 6) == 0 ||
610 | 		    memcmp(shname, "cgroup/", 7) == 0 ||
611 | 		    memcmp(shname, "sockops", 7) == 0 ||
612 | 		    memcmp(shname, "sk_skb", 6) == 0) {
613 | 			ret = load_and_attach(shname, data->d_buf,
614 | 					      data->d_size);
615 | 			if (ret != 0)
616 | 				goto done;
617 | 		}
618 | 	}
619 | 
620 | 	ret = 0;
621 | done:
622 | 	close(fd);
623 | 	return ret;
624 | }
625 | 
626 | int load_bpf_file(char *path)
627 | {
628 | 	return do_load_bpf_file(path, NULL);
629 | }
630 | 
631 | int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
632 | {
633 | 	return do_load_bpf_file(path, fixup_map);
634 | }
635 | 
636 | void read_trace_pipe(void)
637 | {
638 | 	int trace_fd;
639 | 
640 | 	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
641 | 	if (trace_fd < 0)
642 | 		return;
643 | 
644 | 	while (1) {
645 | 		static char buf[4096];
646 | 		ssize_t sz;
647 | 
648 | 		sz = read(trace_fd, buf, sizeof(buf));
649 | 		if (sz > 0) {
650 | 			buf[sz] = 0;
651 | 			puts(buf);
652 | 		}
653 | 	}
654 | }
655 | 
656 | #define MAX_SYMS 300000
657 | static struct ksym syms[MAX_SYMS];
658 | static int sym_cnt;
659 | 
660 | static int ksym_cmp(const void *p1, const void *p2)
661 | {
662 | 	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
663 | }
664 | 
665 | int load_kallsyms(void)
666 | {
667 | 	FILE *f = fopen("/proc/kallsyms", "r");
668 | 	char func[256], buf[256];
669 | 	char symbol;
670 | 	void *addr;
671 | 	int i = 0;
672 | 
673 | 	if (!f)
674 | 		return -ENOENT;
675 | 
676 | 	while (!feof(f)) {
677 | 		if (!fgets(buf, sizeof(buf), f))
678 | 			break;
679 | 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
680 | 			break;
681 | 		if (!addr)
682 | 			continue;
683 | 		syms[i].addr = (long) addr;
684 | 		syms[i].name = strdup(func);
685 | 		i++;
686 | 	}
687 | 	sym_cnt = i;
688 | 	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
689 | 	return 0;
690 | }
691 | 
692 | struct ksym *ksym_search(long key)
693 | {
694 | 	int start = 0, end = sym_cnt;
695 | 	int result;
696 | 
697 | 	while (start < end) {
698 | 		size_t mid = start + (end - start) / 2;
699 | 
700 | 		result = key - syms[mid].addr;
701 | 		if (result < 0)
702 | 			end = mid;
703 | 		else if (result > 0)
704 | 			start = mid + 1;
705 | 		else
706 | 			return &syms[mid];
707 | 	}
708 | 
709 | 	if (start >= 1 && syms[start - 1].addr < key &&
710 | 	    key < syms[start].addr)
711 | 		/* valid ksym */
712 | 		return &syms[start - 1];
713 | 
714 | 	/* out of range. return _stext */
715 | 	return &syms[0];
716 | }
717 | 
718 | int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
719 | {
720 | 	struct sockaddr_nl sa;
721 | 	int sock, seq = 0, len, ret = -1;
722 | 	char buf[4096];
723 | 	struct nlattr *nla, *nla_xdp;
724 | 	struct {
725 | 		struct nlmsghdr  nh;
726 | 		struct ifinfomsg ifinfo;
727 | 		char             attrbuf[64];
728 | 	} req;
729 | 	struct nlmsghdr *nh;
730 | 	struct nlmsgerr *err;
731 | 
732 | 	memset(&sa, 0, sizeof(sa));
733 | 	sa.nl_family = AF_NETLINK;
734 | 
735 | 	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
736 | 	if (sock < 0) {
737 | 		printf("open netlink socket: %s\n", strerror(errno));
738 | 		return -1;
739 | 	}
740 | 
741 | 	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
742 | 		printf("bind to netlink: %s\n", strerror(errno));
743 | 		goto cleanup;
744 | 	}
745 | 
746 | 	memset(&req, 0, sizeof(req));
747 | 	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
748 | 	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
749 | 	req.nh.nlmsg_type = RTM_SETLINK;
750 | 	req.nh.nlmsg_pid = 0;
751 | 	req.nh.nlmsg_seq = ++seq;
752 | 	req.ifinfo.ifi_family = AF_UNSPEC;
753 | 	req.ifinfo.ifi_index = ifindex;
754 | 
755 | 	/* started nested attribute for XDP */
756 | 	nla = (struct nlattr *)(((char *)&req)
757 | 				+ NLMSG_ALIGN(req.nh.nlmsg_len));
758 | 	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
759 | 	nla->nla_len = NLA_HDRLEN;
760 | 
761 | 	/* add XDP fd */
762 | 	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
763 | 	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
764 | 	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
765 | 	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
766 | 	nla->nla_len += nla_xdp->nla_len;
767 | 
768 | 	/* if user passed in any flags, add those too */
769 | 	if (flags) {
770 | 		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
771 | 		nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
772 | 		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
773 | 		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
774 | 		nla->nla_len += nla_xdp->nla_len;
775 | 	}
776 | 
777 | 	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
778 | 
779 | 	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
780 | 		printf("send to netlink: %s\n", strerror(errno));
781 | 		goto cleanup;
782 | 	}
783 | 
784 | 	len = recv(sock, buf, sizeof(buf), 0);
785 | 	if (len < 0) {
786 | 		printf("recv from netlink: %s\n", strerror(errno));
787 | 		goto cleanup;
788 | 	}
789 | 
790 | 	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
791 | 	     nh = NLMSG_NEXT(nh, len)) {
792 | 		if (nh->nlmsg_pid != getpid()) {
793 | 			printf("Wrong pid %d, expected %d\n",
794 | 			       nh->nlmsg_pid, getpid());
795 | 			goto cleanup;
796 | 		}
797 | 		if (nh->nlmsg_seq != seq) {
798 | 			printf("Wrong seq %d, expected %d\n",
799 | 			       nh->nlmsg_seq, seq);
800 | 			goto cleanup;
801 | 		}
802 | 		switch (nh->nlmsg_type) {
803 | 		case NLMSG_ERROR:
804 | 			err = (struct nlmsgerr *)NLMSG_DATA(nh);
805 | 			if (!err->error)
806 | 				continue;
807 | 			printf("nlmsg error %s\n", strerror(-err->error));
808 | 			goto cleanup;
809 | 		case NLMSG_DONE:
810 | 			break;
811 | 		}
812 | 	}
813 | 
814 | 	ret = 0;
815 | 
816 | cleanup:
817 | 	close(sock);
818 | 	return ret;
819 | }
820 | 


--------------------------------------------------------------------------------
/bpf_load.h:
--------------------------------------------------------------------------------
 1 | /* Copy of samples/bpf/bpf_load.h */
 2 | #ifndef __BPF_LOAD_H
 3 | #define __BPF_LOAD_H
 4 | 
 5 | #include "libbpf.h"
 6 | 
 7 | #define MAX_MAPS 32
 8 | #define MAX_PROGS 32
 9 | 
10 | struct bpf_map_def {
11 | 	unsigned int type;
12 | 	unsigned int key_size;
13 | 	unsigned int value_size;
14 | 	unsigned int max_entries;
15 | 	unsigned int map_flags;
16 | 	unsigned int inner_map_idx;
17 | 	unsigned int numa_node;
18 | };
19 | 
20 | struct bpf_map_data {
21 | 	int fd;
22 | 	char *name;
23 | 	size_t elf_offset;
24 | 	struct bpf_map_def def;
25 | };
26 | 
27 | typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
28 | 
29 | extern int prog_fd[MAX_PROGS];
30 | extern int event_fd[MAX_PROGS];
31 | extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
32 | extern int prog_cnt;
33 | 
34 | /* There is a one-to-one mapping between map_fd[] and map_data[].
35 |  * The map_data[] just contains more rich info on the given map.
36 |  */
37 | extern int map_fd[MAX_MAPS];
38 | extern struct bpf_map_data map_data[MAX_MAPS];
39 | extern int map_data_count;
40 | 
41 | /* parses elf file compiled by llvm .c->.o
42 |  * . parses 'maps' section and creates maps via BPF syscall
43 |  * . parses 'license' section and passes it to syscall
44 |  * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
45 |  *   storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
46 |  * . loads eBPF programs via BPF syscall
47 |  *
48 |  * One ELF file can contain multiple BPF programs which will be loaded
49 |  * and their FDs stored stored in prog_fd array
50 |  *
51 |  * returns zero on success
52 |  */
53 | int load_bpf_file(char *path);
54 | int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
55 | 
56 | void read_trace_pipe(void);
57 | struct ksym {
58 | 	long addr;
59 | 	char *name;
60 | };
61 | 
62 | int load_kallsyms(void);
63 | struct ksym *ksym_search(long key);
64 | 
65 | /* UAPI XDP_FLAGS avail in include/linux/if_link.h, but distro are
66 |  * lacking behind.
67 |  */
68 | #ifndef XDP_FLAGS_UPDATE_IF_NOEXIST
69 | #define XDP_FLAGS_UPDATE_IF_NOEXIST     (1U << 0)
70 | #endif
71 | /* Since v4.12-rc1 : b5cdae3291f7 ("net: Generic XDP") */
72 | #ifndef XDP_FLAGS_SKB_MODE
73 | #define XDP_FLAGS_SKB_MODE	(1U << 1)
74 | #endif
75 | /* Since: v4.12-rc2 : 0489df9a430e ("xdp: add flag to enforce driver mode") */
76 | #ifndef XDP_FLAGS_DRV_MODE
77 | #define XDP_FLAGS_DRV_MODE              (1U << 2)
78 | #endif
79 | /* Since: v4.13-rc1 / ee5d032f7d03 ("xdp: add HW offload mode flag for installing programs")*/
80 | #ifndef XDP_FLAGS_HW_MODE
81 | #define XDP_FLAGS_HW_MODE               (1U << 3)
82 | #undef  XDP_FLAGS_MODES
83 | #define XDP_FLAGS_MODES                 (XDP_FLAGS_SKB_MODE | \
84 |                                          XDP_FLAGS_DRV_MODE | \
85 |                                          XDP_FLAGS_HW_MODE)
86 | #undef  XDP_FLAGS_MASK
87 | #define XDP_FLAGS_MASK                  (XDP_FLAGS_UPDATE_IF_NOEXIST |	\
88 |                                          XDP_FLAGS_MODES)
89 | #endif
90 | 
91 | int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
92 | #endif
93 | 


--------------------------------------------------------------------------------
/bpf_util.h:
--------------------------------------------------------------------------------
 1 | #ifndef __BPF_UTIL__
 2 | #define __BPF_UTIL__
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <errno.h>
 8 | 
 9 | static inline unsigned int bpf_num_possible_cpus(void)
10 | {
11 | 	static const char *fcpu = "/sys/devices/system/cpu/possible";
12 | 	unsigned int start, end, possible_cpus = 0;
13 | 	char buff[128];
14 | 	FILE *fp;
15 | 
16 | 	fp = fopen(fcpu, "r");
17 | 	if (!fp) {
18 | 		printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
19 | 		exit(1);
20 | 	}
21 | 
22 | 	while (fgets(buff, sizeof(buff), fp)) {
23 | 		if (sscanf(buff, "%u-%u", &start, &end) == 2) {
24 | 			possible_cpus = start == 0 ? end + 1 : 0;
25 | 			break;
26 | 		}
27 | 	}
28 | 
29 | 	fclose(fp);
30 | 	if (!possible_cpus) {
31 | 		printf("Failed to retrieve # possible CPUs!\n");
32 | 		exit(1);
33 | 	}
34 | 
35 | 	return possible_cpus;
36 | }
37 | 
38 | #endif /* __BPF_UTIL__ */
39 | 


--------------------------------------------------------------------------------
/icmp.c:
--------------------------------------------------------------------------------
 1 | #include "rmi.h"
 2 | 
 3 | #define PACKETSIZE	64
 4 | struct packet
 5 | {
 6 | 	struct icmphdr hdr;
 7 | 	char msg[PACKETSIZE-sizeof(struct icmphdr)];
 8 | };
 9 | 
10 | int pid=-1;
11 | struct protoent *proto=NULL;
12 | 
13 | unsigned short checksum(void *b, int len)
14 | {	unsigned short *buf = b;
15 | 	unsigned int sum=0;
16 | 	unsigned short result;
17 | 
18 | 	for ( sum = 0; len > 1; len -= 2 )
19 | 		sum += *buf++;
20 | 	if ( len == 1 )
21 | 		sum += *(unsigned char*)buf;
22 | 	sum = (sum >> 16) + (sum & 0xFFFF);
23 | 	sum += (sum >> 16);
24 | 	result = ~sum;
25 | 	return result;
26 | }
27 | 
28 | void ping(struct sockaddr_in *addr)
29 | {	const int val=255;
30 | 	int i, sd, cnt=1;
31 | 	struct packet pckt;
32 | 
33 | 	sd = socket(PF_INET, SOCK_RAW, proto->p_proto);
34 | 	if ( sd < 0 )
35 | 	{
36 | 		perror("socket");
37 | 		return;
38 | 	}
39 | 	if ( setsockopt(sd, SOL_IP, IP_TTL, &val, sizeof(val)) != 0)
40 | 		perror("Set TTL option");
41 | 	if ( fcntl(sd, F_SETFL, O_NONBLOCK) != 0 )
42 | 		perror("Request nonblocking I/O");
43 | 
44 | 	if (DEBUG) printf("ICMP #%d sent.\n", cnt);
45 | 	bzero(&pckt, sizeof(pckt));
46 | 	pckt.hdr.type = ICMP_ECHO;
47 | 	pckt.hdr.un.echo.id = pid;
48 | 	for ( i = 0; i < sizeof(pckt.msg)-1; i++ )
49 | 	  pckt.msg[i] = i+'0';
50 | 	pckt.msg[i] = 0;
51 | 	pckt.hdr.un.echo.sequence = cnt++;
52 | 	pckt.hdr.checksum = checksum(&pckt, sizeof(pckt));
53 | 	if ( sendto(sd, &pckt, sizeof(pckt), 0, (struct sockaddr*)addr, sizeof(*addr)) <= 0 )
54 | 	  perror("sendto");
55 | }
56 | 
57 | int icmp_send_1pkt(in_addr_t *dst_ip)
58 | {
59 |   struct sockaddr_in addr;
60 | 
61 |   proto = getprotobyname("ICMP");
62 |   bzero(&addr, sizeof(addr));
63 |   addr.sin_family = AF_INET;
64 |   addr.sin_port = 0;
65 |   addr.sin_addr.s_addr = *dst_ip;
66 |   ping(&addr);
67 | 
68 |   return 0;
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/libbpf.h:
--------------------------------------------------------------------------------
  1 | /* Copied from $(KERNEL)/samples/bpf/libbpf.h
  2 |  *  WARNING: Don't confuse this with tools/lib/bpf/libbpf.h
  3 |  */
  4 | /* eBPF mini library */
  5 | #ifndef __LIBBPF_H
  6 | #define __LIBBPF_H
  7 | 
  8 | /* Notice: This include is tricky because, due to Makefile
  9 |  * construct of -I$(KERNEL)/tools/lib/ this include find
 10 |  * tools/lib/bpf/bpf.h which defines the userspace API
 11 |  */
 12 | #include <bpf/bpf.h>
 13 | 
 14 | struct bpf_insn;
 15 | 
 16 | /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
 17 | 
 18 | #define BPF_ALU64_REG(OP, DST, SRC)				\
 19 | 	((struct bpf_insn) {					\
 20 | 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
 21 | 		.dst_reg = DST,					\
 22 | 		.src_reg = SRC,					\
 23 | 		.off   = 0,					\
 24 | 		.imm   = 0 })
 25 | 
 26 | #define BPF_ALU32_REG(OP, DST, SRC)				\
 27 | 	((struct bpf_insn) {					\
 28 | 		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
 29 | 		.dst_reg = DST,					\
 30 | 		.src_reg = SRC,					\
 31 | 		.off   = 0,					\
 32 | 		.imm   = 0 })
 33 | 
 34 | /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
 35 | 
 36 | #define BPF_ALU64_IMM(OP, DST, IMM)				\
 37 | 	((struct bpf_insn) {					\
 38 | 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
 39 | 		.dst_reg = DST,					\
 40 | 		.src_reg = 0,					\
 41 | 		.off   = 0,					\
 42 | 		.imm   = IMM })
 43 | 
 44 | #define BPF_ALU32_IMM(OP, DST, IMM)				\
 45 | 	((struct bpf_insn) {					\
 46 | 		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
 47 | 		.dst_reg = DST,					\
 48 | 		.src_reg = 0,					\
 49 | 		.off   = 0,					\
 50 | 		.imm   = IMM })
 51 | 
 52 | /* Short form of mov, dst_reg = src_reg */
 53 | 
 54 | #define BPF_MOV64_REG(DST, SRC)					\
 55 | 	((struct bpf_insn) {					\
 56 | 		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
 57 | 		.dst_reg = DST,					\
 58 | 		.src_reg = SRC,					\
 59 | 		.off   = 0,					\
 60 | 		.imm   = 0 })
 61 | 
 62 | #define BPF_MOV32_REG(DST, SRC)					\
 63 | 	((struct bpf_insn) {					\
 64 | 		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
 65 | 		.dst_reg = DST,					\
 66 | 		.src_reg = SRC,					\
 67 | 		.off   = 0,					\
 68 | 		.imm   = 0 })
 69 | 
 70 | /* Short form of mov, dst_reg = imm32 */
 71 | 
 72 | #define BPF_MOV64_IMM(DST, IMM)					\
 73 | 	((struct bpf_insn) {					\
 74 | 		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
 75 | 		.dst_reg = DST,					\
 76 | 		.src_reg = 0,					\
 77 | 		.off   = 0,					\
 78 | 		.imm   = IMM })
 79 | 
 80 | #define BPF_MOV32_IMM(DST, IMM)					\
 81 | 	((struct bpf_insn) {					\
 82 | 		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
 83 | 		.dst_reg = DST,					\
 84 | 		.src_reg = 0,					\
 85 | 		.off   = 0,					\
 86 | 		.imm   = IMM })
 87 | 
 88 | /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
 89 | #define BPF_LD_IMM64(DST, IMM)					\
 90 | 	BPF_LD_IMM64_RAW(DST, 0, IMM)
 91 | 
 92 | #define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
 93 | 	((struct bpf_insn) {					\
 94 | 		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
 95 | 		.dst_reg = DST,					\
 96 | 		.src_reg = SRC,					\
 97 | 		.off   = 0,					\
 98 | 		.imm   = (__u32) (IMM) }),			\
 99 | 	((struct bpf_insn) {					\
100 | 		.code  = 0, /* zero is reserved opcode */	\
101 | 		.dst_reg = 0,					\
102 | 		.src_reg = 0,					\
103 | 		.off   = 0,					\
104 | 		.imm   = ((__u64) (IMM)) >> 32 })
105 | 
106 | #ifndef BPF_PSEUDO_MAP_FD
107 | # define BPF_PSEUDO_MAP_FD	1
108 | #endif
109 | 
110 | /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
111 | #define BPF_LD_MAP_FD(DST, MAP_FD)				\
112 | 	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
113 | 
114 | 
115 | /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
116 | 
117 | #define BPF_LD_ABS(SIZE, IMM)					\
118 | 	((struct bpf_insn) {					\
119 | 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
120 | 		.dst_reg = 0,					\
121 | 		.src_reg = 0,					\
122 | 		.off   = 0,					\
123 | 		.imm   = IMM })
124 | 
125 | /* Memory load, dst_reg = *(uint *) (src_reg + off16) */
126 | 
127 | #define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
128 | 	((struct bpf_insn) {					\
129 | 		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
130 | 		.dst_reg = DST,					\
131 | 		.src_reg = SRC,					\
132 | 		.off   = OFF,					\
133 | 		.imm   = 0 })
134 | 
135 | /* Memory store, *(uint *) (dst_reg + off16) = src_reg */
136 | 
137 | #define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
138 | 	((struct bpf_insn) {					\
139 | 		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
140 | 		.dst_reg = DST,					\
141 | 		.src_reg = SRC,					\
142 | 		.off   = OFF,					\
143 | 		.imm   = 0 })
144 | 
145 | /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
146 | 
147 | #define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
148 | 	((struct bpf_insn) {					\
149 | 		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
150 | 		.dst_reg = DST,					\
151 | 		.src_reg = 0,					\
152 | 		.off   = OFF,					\
153 | 		.imm   = IMM })
154 | 
155 | /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
156 | 
157 | #define BPF_JMP_REG(OP, DST, SRC, OFF)				\
158 | 	((struct bpf_insn) {					\
159 | 		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
160 | 		.dst_reg = DST,					\
161 | 		.src_reg = SRC,					\
162 | 		.off   = OFF,					\
163 | 		.imm   = 0 })
164 | 
165 | /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
166 | 
167 | #define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
168 | 	((struct bpf_insn) {					\
169 | 		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
170 | 		.dst_reg = DST,					\
171 | 		.src_reg = 0,					\
172 | 		.off   = OFF,					\
173 | 		.imm   = IMM })
174 | 
175 | /* Raw code statement block */
176 | 
177 | #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
178 | 	((struct bpf_insn) {					\
179 | 		.code  = CODE,					\
180 | 		.dst_reg = DST,					\
181 | 		.src_reg = SRC,					\
182 | 		.off   = OFF,					\
183 | 		.imm   = IMM })
184 | 
185 | /* Program exit */
186 | 
187 | #define BPF_EXIT_INSN()						\
188 | 	((struct bpf_insn) {					\
189 | 		.code  = BPF_JMP | BPF_EXIT,			\
190 | 		.dst_reg = 0,					\
191 | 		.src_reg = 0,					\
192 | 		.off   = 0,					\
193 | 		.imm   = 0 })
194 | 
195 | #endif
196 | 


--------------------------------------------------------------------------------
/mac.c:
--------------------------------------------------------------------------------
 1 | #include "rmi.h"
 2 | 
 3 | int xlb_get_mac(in_addr_t *host, char *mac, int *dev){
 4 | 
 5 |   int s;
 6 | 
 7 |   struct arpreq req;
 8 |   struct sockaddr_in *sin;
 9 |   static char buf[256];
10 | 
11 |   bzero((caddr_t)&req, sizeof(req));
12 | 
13 |   sin = (struct sockaddr_in *)&req.arp_pa;
14 |   sin->sin_family = AF_INET; 
15 |   sin->sin_addr.s_addr = *host;
16 | 
17 |   if((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0){
18 |     perror("socket() failed.");
19 |     exit(-1);
20 |   }
21 | 
22 |   if_indextoname(*dev, req.arp_dev);
23 |   if (DEBUG) printf("ifname= %s\n", req.arp_dev);
24 |   
25 |   if(ioctl(s, SIOCGARP, (caddr_t)&req) <0){
26 |     if(errno == ENXIO){
27 |   
28 |       icmp_send_1pkt(&sin->sin_addr.s_addr);
29 |       usleep(100000);
30 |       
31 |       if(ioctl(s, SIOCGARP, (caddr_t)&req) <0){
32 | 	if(errno == ENXIO){
33 | 	  printf("%s - no entry.\n", inet_ntop(AF_INET, host, buf, 256));
34 | 	  //	  printf("%lu - no entry.\n", *host);
35 | 	  exit(-1);
36 | 	} else {
37 | 	  perror("SIOCGARP");
38 | 	  exit(-1);
39 | 	}
40 |       }
41 | 
42 |     } else {
43 |       perror("SIOCGARP");
44 |       exit(-1);
45 |     }
46 |   }
47 |       
48 |   if(!(req.arp_flags & ATF_COM)){
49 |     printf("Could not get workers Mac address from arp cache.\n");
50 |     exit(-1);
51 |   }
52 | 
53 |   memcpy(mac, req.arp_ha.sa_data, 6);
54 |   
55 |   return(0);
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/perf-sys.h:
--------------------------------------------------------------------------------
  1 | /* Notice: copy of kernel/tools/perf/perf-sys.h */
  2 | #ifndef _PERF_SYS_H
  3 | #define _PERF_SYS_H
  4 | 
  5 | #include <unistd.h>
  6 | #include <sys/types.h>
  7 | #include <sys/syscall.h>
  8 | #include <linux/types.h>
  9 | 
 10 | /*
 11 | #include <linux/compiler.h>
 12 | #include <linux/perf_event.h>
 13 | #include <asm/barrier.h>
 14 | 
 15 | #if defined(__i386__)
 16 | #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 17 | #define CPUINFO_PROC	{"model name"}
 18 | #endif
 19 | 
 20 | #if defined(__x86_64__)
 21 | #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 22 | #define CPUINFO_PROC	{"model name"}
 23 | #endif
 24 | 
 25 | #ifdef __powerpc__
 26 | #define CPUINFO_PROC	{"cpu"}
 27 | #endif
 28 | 
 29 | #ifdef __s390__
 30 | #define CPUINFO_PROC	{"vendor_id"}
 31 | #endif
 32 | 
 33 | #ifdef __sh__
 34 | #define CPUINFO_PROC	{"cpu type"}
 35 | #endif
 36 | 
 37 | #ifdef __hppa__
 38 | #define CPUINFO_PROC	{"cpu"}
 39 | #endif
 40 | 
 41 | #ifdef __sparc__
 42 | #define CPUINFO_PROC	{"cpu"}
 43 | #endif
 44 | 
 45 | #ifdef __alpha__
 46 | #define CPUINFO_PROC	{"cpu model"}
 47 | #endif
 48 | 
 49 | #ifdef __ia64__
 50 | #define cpu_relax()	asm volatile ("hint @pause" ::: "memory")
 51 | #define CPUINFO_PROC	{"model name"}
 52 | #endif
 53 | 
 54 | #ifdef __arm__
 55 | #define CPUINFO_PROC	{"model name", "Processor"}
 56 | #endif
 57 | 
 58 | #ifdef __aarch64__
 59 | #define cpu_relax()	asm volatile("yield" ::: "memory")
 60 | #endif
 61 | 
 62 | #ifdef __mips__
 63 | #define CPUINFO_PROC	{"cpu model"}
 64 | #endif
 65 | 
 66 | #ifdef __arc__
 67 | #define CPUINFO_PROC	{"Processor"}
 68 | #endif
 69 | 
 70 | #ifdef __metag__
 71 | #define CPUINFO_PROC	{"CPU"}
 72 | #endif
 73 | 
 74 | #ifdef __xtensa__
 75 | #define CPUINFO_PROC	{"core ID"}
 76 | #endif
 77 | 
 78 | #ifdef __tile__
 79 | #define cpu_relax()	asm volatile ("mfspr zero, PASS" ::: "memory")
 80 | #define CPUINFO_PROC    {"model name"}
 81 | #endif
 82 | 
 83 | #ifndef cpu_relax
 84 | #define cpu_relax() barrier()
 85 | #endif
 86 | */
 87 | 
 88 | static inline int
 89 | sys_perf_event_open(struct perf_event_attr *attr,
 90 | 		      pid_t pid, int cpu, int group_fd,
 91 | 		      unsigned long flags)
 92 | {
 93 | 	int fd;
 94 | 
 95 | 	fd = syscall(__NR_perf_event_open, attr, pid, cpu,
 96 | 		     group_fd, flags);
 97 | 
 98 | #ifdef HAVE_ATTR_TEST
 99 | 	if (unlikely(test_attr__enabled))
100 | 		test_attr__open(attr, pid, cpu, fd, group_fd, flags);
101 | #endif
102 | 	return fd;
103 | }
104 | 
105 | #endif /* _PERF_SYS_H */
106 | 


--------------------------------------------------------------------------------
/rmi.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <fcntl.h>
 5 | #include <string.h>
 6 | #include <time.h>
 7 | #include <sys/time.h>
 8 | #include <sys/socket.h>
 9 | #include <netinet/in.h>
10 | #include <netinet/ip.h>
11 | #include <arpa/inet.h>
12 | #include <linux/in_route.h>
13 | //#include <linux/icmpv6.h>
14 | #include <errno.h>
15 | 
16 | #include <net/if_arp.h>
17 | #include <sys/ioctl.h>
18 | 
19 | #include <linux/netlink.h>
20 | #include <linux/rtnetlink.h>
21 | #include <netinet/ether.h>
22 | 
23 | 
24 | /// icmp
25 | #include <fcntl.h>
26 | #include <errno.h>
27 | #include <sys/socket.h>
28 | #include <resolv.h>
29 | #include <netdb.h>
30 | #include <netinet/in.h>
31 | #include <netinet/ip_icmp.h>
32 | #include <strings.h>
33 | #include <net/if.h>
34 | ///
35 | 
36 | #define IFLIST_REPLY_BUFFER 8192
37 | 
38 | #ifndef DEBUG
39 | #define DEBUG 0
40 | #endif
41 | 
42 | #define NLMSG_TAIL(nmsg) \
43 |         ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
44 | 
45 | int xlb_parse_route(struct nlmsghdr *nlh, in_addr_t *src_ip, in_addr_t *nh_ip, int *dev);
46 | int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
47 |               int alen);
48 | int xlb_iproute_get(in_addr_t *dst_ip, in_addr_t *src_ip , in_addr_t *nh_ip, int *dev);
49 | int xlb_get_mac(in_addr_t *host, char *mac, int *dev);
50 | 
51 | unsigned short checksum(void *b, int len);
52 | void ping(struct sockaddr_in *addr);
53 | int icmp_send_1pkt(in_addr_t *dst_ip);
54 | 
55 | 


--------------------------------------------------------------------------------
/route.c:
--------------------------------------------------------------------------------
  1 | #include "rmi.h"
  2 | 
  3 | int xlb_parse_route(struct nlmsghdr *nlh, in_addr_t *src_ip, in_addr_t *nh_ip, int *dev)
  4 | {
  5 |     struct  rtmsg *route_entry;
  6 |     struct  rtattr *route_attribute; 
  7 |     int     route_attribute_len = 0;
  8 |     //    unsigned char route_netmask = 0;
  9 |     //    unsigned char route_protocol = 0;
 10 |     int  via = 0;
 11 |     
 12 |     route_entry = (struct rtmsg *) NLMSG_DATA(nlh);
 13 | 
 14 |     if (route_entry->rtm_table != RT_TABLE_MAIN)
 15 |       return 1;
 16 | 
 17 |     //    route_netmask = route_entry->rtm_dst_len;
 18 |     //    route_protocol = route_entry->rtm_protocol;
 19 |     route_attribute = (struct rtattr *) RTM_RTA(route_entry);
 20 |     route_attribute_len = RTM_PAYLOAD(nlh);
 21 | 
 22 |     for ( ; RTA_OK(route_attribute, route_attribute_len);		\
 23 | 	  route_attribute = RTA_NEXT(route_attribute, route_attribute_len))
 24 |       {
 25 | 
 26 |         if (route_attribute->rta_type == RTA_DST)
 27 | 	    if (via == 0)
 28 | 	      memcpy(nh_ip, RTA_DATA(route_attribute), 4);
 29 | 
 30 |         if (route_attribute->rta_type == RTA_GATEWAY)
 31 | 	  {
 32 | 	    memcpy(nh_ip, RTA_DATA(route_attribute), 4);
 33 | 	    via = 1;
 34 | 	  }
 35 | 
 36 |         if (route_attribute->rta_type == RTA_PREFSRC)
 37 | 	    memcpy(src_ip, RTA_DATA(route_attribute), 4);
 38 | 	
 39 | 	if (route_attribute->rta_type == RTA_OIF)
 40 | 	    memcpy(dev, RTA_DATA(route_attribute), sizeof(int));
 41 |       }
 42 | 
 43 |     return 0;
 44 | }
 45 | 
 46 | int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
 47 |               int alen)
 48 | {
 49 |         int len = RTA_LENGTH(alen);
 50 |         struct rtattr *rta;
 51 | 
 52 |         if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
 53 |                 fprintf(stderr,
 54 |                         "addattr_l ERROR: message exceeded bound of %d\n",
 55 |                         maxlen);
 56 |                 return -1;
 57 |         }
 58 |         rta = NLMSG_TAIL(n);
 59 |         rta->rta_type = type;
 60 |         rta->rta_len = len;
 61 |         if (alen)
 62 |                 memcpy(RTA_DATA(rta), data, alen);
 63 |         n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
 64 |         return 0;
 65 | }
 66 | 
 67 | int xlb_iproute_get(in_addr_t *dst_ip, in_addr_t *src_ip , in_addr_t *nh_ip, int *dev)
 68 | {
 69 |   struct msghdr rtnl_msg;
 70 |   struct iovec io;
 71 |   int fd;
 72 |     
 73 |   struct {
 74 |     struct nlmsghdr	n;
 75 |     struct rtmsg		r;
 76 |     char			buf[1024];
 77 |   } req;
 78 | 
 79 |   memset(&rtnl_msg, 0, sizeof(rtnl_msg));
 80 |   memset(&req, 0, sizeof(req));
 81 | 
 82 |   req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 83 |   req.n.nlmsg_flags = NLM_F_REQUEST;
 84 |   req.n.nlmsg_type = RTM_GETROUTE;
 85 |   req.r.rtm_family = AF_INET;
 86 | 
 87 | 
 88 |   addattr_l(&req.n, sizeof(req), RTA_DST, dst_ip, 4);
 89 | 	
 90 |   io.iov_base = &req;
 91 |   io.iov_len = req.n.nlmsg_len;
 92 |   rtnl_msg.msg_iov = &io;
 93 |   rtnl_msg.msg_iovlen = 1;
 94 | 
 95 |   fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
 96 |   sendmsg(fd, (struct msghdr *) &rtnl_msg, 0);
 97 | 
 98 |   /* parse reply */
 99 | 
100 |   {
101 |     struct nlmsghdr *answer;
102 |     struct msghdr rtnl_reply;
103 |     struct iovec io_reply;
104 |     char reply[IFLIST_REPLY_BUFFER];
105 | 
106 |     
107 |     memset(&io_reply, 0, sizeof(io_reply));
108 |     memset(&rtnl_reply, 0, sizeof(rtnl_reply));
109 |       
110 |     io.iov_base = reply;
111 |     io.iov_len = IFLIST_REPLY_BUFFER;
112 |     rtnl_reply.msg_iov = &io;
113 |     rtnl_reply.msg_iovlen = 1;
114 |     
115 |     recvmsg(fd, &rtnl_reply, 0);
116 |     answer = (struct nlmsghdr *) reply;
117 | 
118 |     xlb_parse_route(answer, src_ip, nh_ip, dev);
119 |   }
120 |   
121 |   close(fd);
122 | 
123 |   return 0;
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/tools/include/linux/bpf.h:
--------------------------------------------------------------------------------
1 | ../uapi/linux/bpf.h


--------------------------------------------------------------------------------
/tools/include/uapi/linux/bpf.h:
--------------------------------------------------------------------------------
   1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   2 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3 |  *
   4 |  *  Copy of kernel tools/include/uapi/linux/bpf.h
   5 |  *
   6 |  * This program is free software; you can redistribute it and/or
   7 |  * modify it under the terms of version 2 of the GNU General Public
   8 |  * License as published by the Free Software Foundation.
   9 |  */
  10 | 
  11 | /* NOTICE: Need to keep a more updated copy of bpf.h UAPI definitions
  12 |  * when developing new bpf features.  Unfortunately there is an
  13 |  * include mess, I haven't solved 100% yet.  Thus, this trick warns
  14 |  * when another (presumably) older version of this bpf.h UAPI got
  15 |  * included (likely from distro or kernel source)
  16 |  */
  17 | #ifdef   __LINUX_BPF_H__
  18 | # ifndef __LINUX_BPF_H__git_repo_copy
  19 | #  warning  "This bpf.h git-repo-copy not getting used"
  20 | //# else
  21 | //#  warning "Double include of this bpf.h (consider cleanup)"
  22 | # endif
  23 | #endif
  24 | 
  25 | #ifndef __LINUX_BPF_H__
  26 | #define __LINUX_BPF_H__
  27 | 
  28 | #define __LINUX_BPF_H__git_repo_copy
  29 | 
  30 | #include <linux/types.h>
  31 | #include <linux/bpf_common.h>
  32 | 
  33 | /* Extended instruction set based on top of classic BPF */
  34 | 
  35 | /* instruction classes */
  36 | #define BPF_ALU64	0x07	/* alu mode in double word width */
  37 | 
  38 | /* ld/ldx fields */
  39 | #define BPF_DW		0x18	/* double word (64-bit) */
  40 | #define BPF_XADD	0xc0	/* exclusive add */
  41 | 
  42 | /* alu/jmp fields */
  43 | #define BPF_MOV		0xb0	/* mov reg to reg */
  44 | #define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
  45 | 
  46 | /* change endianness of a register */
  47 | #define BPF_END		0xd0	/* flags for endianness conversion: */
  48 | #define BPF_TO_LE	0x00	/* convert to little-endian */
  49 | #define BPF_TO_BE	0x08	/* convert to big-endian */
  50 | #define BPF_FROM_LE	BPF_TO_LE
  51 | #define BPF_FROM_BE	BPF_TO_BE
  52 | 
  53 | /* jmp encodings */
  54 | #define BPF_JNE		0x50	/* jump != */
  55 | #define BPF_JLT		0xa0	/* LT is unsigned, '<' */
  56 | #define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
  57 | #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
  58 | #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
  59 | #define BPF_JSLT	0xc0	/* SLT is signed, '<' */
  60 | #define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
  61 | #define BPF_CALL	0x80	/* function call */
  62 | #define BPF_EXIT	0x90	/* function return */
  63 | 
  64 | /* Register numbers */
  65 | enum {
  66 | 	BPF_REG_0 = 0,
  67 | 	BPF_REG_1,
  68 | 	BPF_REG_2,
  69 | 	BPF_REG_3,
  70 | 	BPF_REG_4,
  71 | 	BPF_REG_5,
  72 | 	BPF_REG_6,
  73 | 	BPF_REG_7,
  74 | 	BPF_REG_8,
  75 | 	BPF_REG_9,
  76 | 	BPF_REG_10,
  77 | 	__MAX_BPF_REG,
  78 | };
  79 | 
  80 | /* BPF has 10 general purpose 64-bit registers and stack frame. */
  81 | #define MAX_BPF_REG	__MAX_BPF_REG
  82 | 
  83 | struct bpf_insn {
  84 | 	__u8	code;		/* opcode */
  85 | 	__u8	dst_reg:4;	/* dest register */
  86 | 	__u8	src_reg:4;	/* source register */
  87 | 	__s16	off;		/* signed offset */
  88 | 	__s32	imm;		/* signed immediate constant */
  89 | };
  90 | 
  91 | /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
  92 | struct bpf_lpm_trie_key {
  93 | 	__u32	prefixlen;	/* up to 32 for AF_INET, 128 for AF_INET6 */
  94 | 	__u8	data[0];	/* Arbitrary size */
  95 | };
  96 | 
  97 | /* BPF syscall commands, see bpf(2) man-page for details. */
  98 | enum bpf_cmd {
  99 | 	BPF_MAP_CREATE,
 100 | 	BPF_MAP_LOOKUP_ELEM,
 101 | 	BPF_MAP_UPDATE_ELEM,
 102 | 	BPF_MAP_DELETE_ELEM,
 103 | 	BPF_MAP_GET_NEXT_KEY,
 104 | 	BPF_PROG_LOAD,
 105 | 	BPF_OBJ_PIN,
 106 | 	BPF_OBJ_GET,
 107 | 	BPF_PROG_ATTACH,
 108 | 	BPF_PROG_DETACH,
 109 | 	BPF_PROG_TEST_RUN,
 110 | 	BPF_PROG_GET_NEXT_ID,
 111 | 	BPF_MAP_GET_NEXT_ID,
 112 | 	BPF_PROG_GET_FD_BY_ID,
 113 | 	BPF_MAP_GET_FD_BY_ID,
 114 | 	BPF_OBJ_GET_INFO_BY_FD,
 115 | 	BPF_PROG_QUERY,
 116 | };
 117 | 
 118 | enum bpf_map_type {
 119 | 	BPF_MAP_TYPE_UNSPEC,
 120 | 	BPF_MAP_TYPE_HASH,
 121 | 	BPF_MAP_TYPE_ARRAY,
 122 | 	BPF_MAP_TYPE_PROG_ARRAY,
 123 | 	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 124 | 	BPF_MAP_TYPE_PERCPU_HASH,
 125 | 	BPF_MAP_TYPE_PERCPU_ARRAY,
 126 | 	BPF_MAP_TYPE_STACK_TRACE,
 127 | 	BPF_MAP_TYPE_CGROUP_ARRAY,
 128 | 	BPF_MAP_TYPE_LRU_HASH,
 129 | 	BPF_MAP_TYPE_LRU_PERCPU_HASH,
 130 | 	BPF_MAP_TYPE_LPM_TRIE,
 131 | 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 132 | 	BPF_MAP_TYPE_HASH_OF_MAPS,
 133 | 	BPF_MAP_TYPE_DEVMAP,
 134 | 	BPF_MAP_TYPE_SOCKMAP,
 135 | 	BPF_MAP_TYPE_CPUMAP,
 136 | };
 137 | 
 138 | enum bpf_prog_type {
 139 | 	BPF_PROG_TYPE_UNSPEC,
 140 | 	BPF_PROG_TYPE_SOCKET_FILTER,
 141 | 	BPF_PROG_TYPE_KPROBE,
 142 | 	BPF_PROG_TYPE_SCHED_CLS,
 143 | 	BPF_PROG_TYPE_SCHED_ACT,
 144 | 	BPF_PROG_TYPE_TRACEPOINT,
 145 | 	BPF_PROG_TYPE_XDP,
 146 | 	BPF_PROG_TYPE_PERF_EVENT,
 147 | 	BPF_PROG_TYPE_CGROUP_SKB,
 148 | 	BPF_PROG_TYPE_CGROUP_SOCK,
 149 | 	BPF_PROG_TYPE_LWT_IN,
 150 | 	BPF_PROG_TYPE_LWT_OUT,
 151 | 	BPF_PROG_TYPE_LWT_XMIT,
 152 | 	BPF_PROG_TYPE_SOCK_OPS,
 153 | 	BPF_PROG_TYPE_SK_SKB,
 154 | 	BPF_PROG_TYPE_CGROUP_DEVICE,
 155 | };
 156 | 
 157 | enum bpf_attach_type {
 158 | 	BPF_CGROUP_INET_INGRESS,
 159 | 	BPF_CGROUP_INET_EGRESS,
 160 | 	BPF_CGROUP_INET_SOCK_CREATE,
 161 | 	BPF_CGROUP_SOCK_OPS,
 162 | 	BPF_SK_SKB_STREAM_PARSER,
 163 | 	BPF_SK_SKB_STREAM_VERDICT,
 164 | 	BPF_CGROUP_DEVICE,
 165 | 	__MAX_BPF_ATTACH_TYPE
 166 | };
 167 | 
 168 | #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 169 | 
 170 | /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
 171 |  *
 172 |  * NONE(default): No further bpf programs allowed in the subtree.
 173 |  *
 174 |  * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
 175 |  * the program in this cgroup yields to sub-cgroup program.
 176 |  *
 177 |  * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
 178 |  * that cgroup program gets run in addition to the program in this cgroup.
 179 |  *
 180 |  * Only one program is allowed to be attached to a cgroup with
 181 |  * NONE or BPF_F_ALLOW_OVERRIDE flag.
 182 |  * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
 183 |  * release old program and attach the new one. Attach flags has to match.
 184 |  *
 185 |  * Multiple programs are allowed to be attached to a cgroup with
 186 |  * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
 187 |  * (those that were attached first, run first)
 188 |  * The programs of sub-cgroup are executed first, then programs of
 189 |  * this cgroup and then programs of parent cgroup.
 190 |  * When children program makes decision (like picking TCP CA or sock bind)
 191 |  * parent program has a chance to override it.
 192 |  *
 193 |  * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
 194 |  * A cgroup with NONE doesn't allow any programs in sub-cgroups.
 195 |  * Ex1:
 196 |  * cgrp1 (MULTI progs A, B) ->
 197 |  *    cgrp2 (OVERRIDE prog C) ->
 198 |  *      cgrp3 (MULTI prog D) ->
 199 |  *        cgrp4 (OVERRIDE prog E) ->
 200 |  *          cgrp5 (NONE prog F)
 201 |  * the event in cgrp5 triggers execution of F,D,A,B in that order.
 202 |  * if prog F is detached, the execution is E,D,A,B
 203 |  * if prog F and D are detached, the execution is E,A,B
 204 |  * if prog F, E and D are detached, the execution is C,A,B
 205 |  *
 206 |  * All eligible programs are executed regardless of return code from
 207 |  * earlier programs.
 208 |  */
 209 | #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 210 | #define BPF_F_ALLOW_MULTI	(1U << 1)
 211 | 
 212 | /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
 213 |  * verifier will perform strict alignment checking as if the kernel
 214 |  * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
 215 |  * and NET_IP_ALIGN defined to 2.
 216 |  */
 217 | #define BPF_F_STRICT_ALIGNMENT	(1U << 0)
 218 | 
 219 | /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
 220 | #define BPF_PSEUDO_MAP_FD	1
 221 | 
 222 | /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
 223 |  * offset to another bpf function
 224 |  */
 225 | #define BPF_PSEUDO_CALL		1
 226 | 
 227 | /* flags for BPF_MAP_UPDATE_ELEM command */
 228 | #define BPF_ANY		0 /* create new element or update existing */
 229 | #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 230 | #define BPF_EXIST	2 /* update existing element */
 231 | 
 232 | /* flags for BPF_MAP_CREATE command */
 233 | #define BPF_F_NO_PREALLOC	(1U << 0)
 234 | /* Instead of having one common LRU list in the
 235 |  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
 236 |  * which can scale and perform better.
 237 |  * Note, the LRU nodes (including free nodes) cannot be moved
 238 |  * across different LRU lists.
 239 |  */
 240 | #define BPF_F_NO_COMMON_LRU	(1U << 1)
 241 | /* Specify numa node during map creation */
 242 | #define BPF_F_NUMA_NODE		(1U << 2)
 243 | 
 244 | /* flags for BPF_PROG_QUERY */
 245 | #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 246 | 
 247 | #define BPF_OBJ_NAME_LEN 16U
 248 | 
 249 | /* Flags for accessing BPF object */
 250 | #define BPF_F_RDONLY		(1U << 3)
 251 | #define BPF_F_WRONLY		(1U << 4)
 252 | 
 253 | union bpf_attr {
 254 | 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 255 | 		__u32	map_type;	/* one of enum bpf_map_type */
 256 | 		__u32	key_size;	/* size of key in bytes */
 257 | 		__u32	value_size;	/* size of value in bytes */
 258 | 		__u32	max_entries;	/* max number of entries in a map */
 259 | 		__u32	map_flags;	/* BPF_MAP_CREATE related
 260 | 					 * flags defined above.
 261 | 					 */
 262 | 		__u32	inner_map_fd;	/* fd pointing to the inner map */
 263 | 		__u32	numa_node;	/* numa node (effective only if
 264 | 					 * BPF_F_NUMA_NODE is set).
 265 | 					 */
 266 | 		char	map_name[BPF_OBJ_NAME_LEN];
 267 | 		__u32	map_ifindex;	/* ifindex of netdev to create on */
 268 | 	};
 269 | 
 270 | 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
 271 | 		__u32		map_fd;
 272 | 		__aligned_u64	key;
 273 | 		union {
 274 | 			__aligned_u64 value;
 275 | 			__aligned_u64 next_key;
 276 | 		};
 277 | 		__u64		flags;
 278 | 	};
 279 | 
 280 | 	struct { /* anonymous struct used by BPF_PROG_LOAD command */
 281 | 		__u32		prog_type;	/* one of enum bpf_prog_type */
 282 | 		__u32		insn_cnt;
 283 | 		__aligned_u64	insns;
 284 | 		__aligned_u64	license;
 285 | 		__u32		log_level;	/* verbosity level of verifier */
 286 | 		__u32		log_size;	/* size of user buffer */
 287 | 		__aligned_u64	log_buf;	/* user supplied buffer */
 288 | 		__u32		kern_version;	/* checked when prog_type=kprobe */
 289 | 		__u32		prog_flags;
 290 | 		char		prog_name[BPF_OBJ_NAME_LEN];
 291 | 		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
 292 | 	};
 293 | 
 294 | 	struct { /* anonymous struct used by BPF_OBJ_* commands */
 295 | 		__aligned_u64	pathname;
 296 | 		__u32		bpf_fd;
 297 | 		__u32		file_flags;
 298 | 	};
 299 | 
 300 | 	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
 301 | 		__u32		target_fd;	/* container object to attach to */
 302 | 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 303 | 		__u32		attach_type;
 304 | 		__u32		attach_flags;
 305 | 	};
 306 | 
 307 | 	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
 308 | 		__u32		prog_fd;
 309 | 		__u32		retval;
 310 | 		__u32		data_size_in;
 311 | 		__u32		data_size_out;
 312 | 		__aligned_u64	data_in;
 313 | 		__aligned_u64	data_out;
 314 | 		__u32		repeat;
 315 | 		__u32		duration;
 316 | 	} test;
 317 | 
 318 | 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
 319 | 		union {
 320 | 			__u32		start_id;
 321 | 			__u32		prog_id;
 322 | 			__u32		map_id;
 323 | 		};
 324 | 		__u32		next_id;
 325 | 		__u32		open_flags;
 326 | 	};
 327 | 
 328 | 	struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
 329 | 		__u32		bpf_fd;
 330 | 		__u32		info_len;
 331 | 		__aligned_u64	info;
 332 | 	} info;
 333 | 
 334 | 	struct { /* anonymous struct used by BPF_PROG_QUERY command */
 335 | 		__u32		target_fd;	/* container object to query */
 336 | 		__u32		attach_type;
 337 | 		__u32		query_flags;
 338 | 		__u32		attach_flags;
 339 | 		__aligned_u64	prog_ids;
 340 | 		__u32		prog_cnt;
 341 | 	} query;
 342 | } __attribute__((aligned(8)));
 343 | 
 344 | /* BPF helper function descriptions:
 345 |  *
 346 |  * void *bpf_map_lookup_elem(&map, &key)
 347 |  *     Return: Map value or NULL
 348 |  *
 349 |  * int bpf_map_update_elem(&map, &key, &value, flags)
 350 |  *     Return: 0 on success or negative error
 351 |  *
 352 |  * int bpf_map_delete_elem(&map, &key)
 353 |  *     Return: 0 on success or negative error
 354 |  *
 355 |  * int bpf_probe_read(void *dst, int size, void *src)
 356 |  *     Return: 0 on success or negative error
 357 |  *
 358 |  * u64 bpf_ktime_get_ns(void)
 359 |  *     Return: current ktime
 360 |  *
 361 |  * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
 362 |  *     Return: length of buffer written or negative error
 363 |  *
 364 |  * u32 bpf_prandom_u32(void)
 365 |  *     Return: random value
 366 |  *
 367 |  * u32 bpf_raw_smp_processor_id(void)
 368 |  *     Return: SMP processor ID
 369 |  *
 370 |  * int bpf_skb_store_bytes(skb, offset, from, len, flags)
 371 |  *     store bytes into packet
 372 |  *     @skb: pointer to skb
 373 |  *     @offset: offset within packet from skb->mac_header
 374 |  *     @from: pointer where to copy bytes from
 375 |  *     @len: number of bytes to store into packet
 376 |  *     @flags: bit 0 - if true, recompute skb->csum
 377 |  *             other bits - reserved
 378 |  *     Return: 0 on success or negative error
 379 |  *
 380 |  * int bpf_l3_csum_replace(skb, offset, from, to, flags)
 381 |  *     recompute IP checksum
 382 |  *     @skb: pointer to skb
 383 |  *     @offset: offset within packet where IP checksum is located
 384 |  *     @from: old value of header field
 385 |  *     @to: new value of header field
 386 |  *     @flags: bits 0-3 - size of header field
 387 |  *             other bits - reserved
 388 |  *     Return: 0 on success or negative error
 389 |  *
 390 |  * int bpf_l4_csum_replace(skb, offset, from, to, flags)
 391 |  *     recompute TCP/UDP checksum
 392 |  *     @skb: pointer to skb
 393 |  *     @offset: offset within packet where TCP/UDP checksum is located
 394 |  *     @from: old value of header field
 395 |  *     @to: new value of header field
 396 |  *     @flags: bits 0-3 - size of header field
 397 |  *             bit 4 - is pseudo header
 398 |  *             other bits - reserved
 399 |  *     Return: 0 on success or negative error
 400 |  *
 401 |  * int bpf_tail_call(ctx, prog_array_map, index)
 402 |  *     jump into another BPF program
 403 |  *     @ctx: context pointer passed to next program
 404 |  *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
 405 |  *     @index: 32-bit index inside array that selects specific program to run
 406 |  *     Return: 0 on success or negative error
 407 |  *
 408 |  * int bpf_clone_redirect(skb, ifindex, flags)
 409 |  *     redirect to another netdev
 410 |  *     @skb: pointer to skb
 411 |  *     @ifindex: ifindex of the net device
 412 |  *     @flags: bit 0 - if set, redirect to ingress instead of egress
 413 |  *             other bits - reserved
 414 |  *     Return: 0 on success or negative error
 415 |  *
 416 |  * u64 bpf_get_current_pid_tgid(void)
 417 |  *     Return: current->tgid << 32 | current->pid
 418 |  *
 419 |  * u64 bpf_get_current_uid_gid(void)
 420 |  *     Return: current_gid << 32 | current_uid
 421 |  *
 422 |  * int bpf_get_current_comm(char *buf, int size_of_buf)
 423 |  *     stores current->comm into buf
 424 |  *     Return: 0 on success or negative error
 425 |  *
 426 |  * u32 bpf_get_cgroup_classid(skb)
 427 |  *     retrieve a proc's classid
 428 |  *     @skb: pointer to skb
 429 |  *     Return: classid if != 0
 430 |  *
 431 |  * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
 432 |  *     Return: 0 on success or negative error
 433 |  *
 434 |  * int bpf_skb_vlan_pop(skb)
 435 |  *     Return: 0 on success or negative error
 436 |  *
 437 |  * int bpf_skb_get_tunnel_key(skb, key, size, flags)
 438 |  * int bpf_skb_set_tunnel_key(skb, key, size, flags)
 439 |  *     retrieve or populate tunnel metadata
 440 |  *     @skb: pointer to skb
 441 |  *     @key: pointer to 'struct bpf_tunnel_key'
 442 |  *     @size: size of 'struct bpf_tunnel_key'
 443 |  *     @flags: room for future extensions
 444 |  *     Return: 0 on success or negative error
 445 |  *
 446 |  * u64 bpf_perf_event_read(map, flags)
 447 |  *     read perf event counter value
 448 |  *     @map: pointer to perf_event_array map
 449 |  *     @flags: index of event in the map or bitmask flags
 450 |  *     Return: value of perf event counter read or error code
 451 |  *
 452 |  * int bpf_redirect(ifindex, flags)
 453 |  *     redirect to another netdev
 454 |  *     @ifindex: ifindex of the net device
 455 |  *     @flags:
 456 |  *	  cls_bpf:
 457 |  *          bit 0 - if set, redirect to ingress instead of egress
 458 |  *          other bits - reserved
 459 |  *	  xdp_bpf:
 460 |  *	    all bits - reserved
 461 |  *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
 462 |  *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
 463 |  * int bpf_redirect_map(map, key, flags)
 464 |  *     redirect to endpoint in map
 465 |  *     @map: pointer to dev map
 466 |  *     @key: index in map to lookup
 467 |  *     @flags: --
 468 |  *     Return: XDP_REDIRECT on success or XDP_ABORT on error
 469 |  *
 470 |  * u32 bpf_get_route_realm(skb)
 471 |  *     retrieve a dst's tclassid
 472 |  *     @skb: pointer to skb
 473 |  *     Return: realm if != 0
 474 |  *
 475 |  * int bpf_perf_event_output(ctx, map, flags, data, size)
 476 |  *     output perf raw sample
 477 |  *     @ctx: struct pt_regs*
 478 |  *     @map: pointer to perf_event_array map
 479 |  *     @flags: index of event in the map or bitmask flags
 480 |  *     @data: data on stack to be output as raw data
 481 |  *     @size: size of data
 482 |  *     Return: 0 on success or negative error
 483 |  *
 484 |  * int bpf_get_stackid(ctx, map, flags)
 485 |  *     walk user or kernel stack and return id
 486 |  *     @ctx: struct pt_regs*
 487 |  *     @map: pointer to stack_trace map
 488 |  *     @flags: bits 0-7 - numer of stack frames to skip
 489 |  *             bit 8 - collect user stack instead of kernel
 490 |  *             bit 9 - compare stacks by hash only
 491 |  *             bit 10 - if two different stacks hash into the same stackid
 492 |  *                      discard old
 493 |  *             other bits - reserved
 494 |  *     Return: >= 0 stackid on success or negative error
 495 |  *
 496 |  * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
 497 |  *     calculate csum diff
 498 |  *     @from: raw from buffer
 499 |  *     @from_size: length of from buffer
 500 |  *     @to: raw to buffer
 501 |  *     @to_size: length of to buffer
 502 |  *     @seed: optional seed
 503 |  *     Return: csum result or negative error code
 504 |  *
 505 |  * int bpf_skb_get_tunnel_opt(skb, opt, size)
 506 |  *     retrieve tunnel options metadata
 507 |  *     @skb: pointer to skb
 508 |  *     @opt: pointer to raw tunnel option data
 509 |  *     @size: size of @opt
 510 |  *     Return: option size
 511 |  *
 512 |  * int bpf_skb_set_tunnel_opt(skb, opt, size)
 513 |  *     populate tunnel options metadata
 514 |  *     @skb: pointer to skb
 515 |  *     @opt: pointer to raw tunnel option data
 516 |  *     @size: size of @opt
 517 |  *     Return: 0 on success or negative error
 518 |  *
 519 |  * int bpf_skb_change_proto(skb, proto, flags)
 520 |  *     Change protocol of the skb. Currently supported is v4 -> v6,
 521 |  *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
 522 |  *     program is expected to fill the new headers via skb_store_bytes
 523 |  *     and lX_csum_replace.
 524 |  *     @skb: pointer to skb
 525 |  *     @proto: new skb->protocol type
 526 |  *     @flags: reserved
 527 |  *     Return: 0 on success or negative error
 528 |  *
 529 |  * int bpf_skb_change_type(skb, type)
 530 |  *     Change packet type of skb.
 531 |  *     @skb: pointer to skb
 532 |  *     @type: new skb->pkt_type type
 533 |  *     Return: 0 on success or negative error
 534 |  *
 535 |  * int bpf_skb_under_cgroup(skb, map, index)
 536 |  *     Check cgroup2 membership of skb
 537 |  *     @skb: pointer to skb
 538 |  *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
 539 |  *     @index: index of the cgroup in the bpf_map
 540 |  *     Return:
 541 |  *       == 0 skb failed the cgroup2 descendant test
 542 |  *       == 1 skb succeeded the cgroup2 descendant test
 543 |  *        < 0 error
 544 |  *
 545 |  * u32 bpf_get_hash_recalc(skb)
 546 |  *     Retrieve and possibly recalculate skb->hash.
 547 |  *     @skb: pointer to skb
 548 |  *     Return: hash
 549 |  *
 550 |  * u64 bpf_get_current_task(void)
 551 |  *     Returns current task_struct
 552 |  *     Return: current
 553 |  *
 554 |  * int bpf_probe_write_user(void *dst, void *src, int len)
 555 |  *     safely attempt to write to a location
 556 |  *     @dst: destination address in userspace
 557 |  *     @src: source address on stack
 558 |  *     @len: number of bytes to copy
 559 |  *     Return: 0 on success or negative error
 560 |  *
 561 |  * int bpf_current_task_under_cgroup(map, index)
 562 |  *     Check cgroup2 membership of current task
 563 |  *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
 564 |  *     @index: index of the cgroup in the bpf_map
 565 |  *     Return:
 566 |  *       == 0 current failed the cgroup2 descendant test
 567 |  *       == 1 current succeeded the cgroup2 descendant test
 568 |  *        < 0 error
 569 |  *
 570 |  * int bpf_skb_change_tail(skb, len, flags)
 571 |  *     The helper will resize the skb to the given new size, to be used f.e.
 572 |  *     with control messages.
 573 |  *     @skb: pointer to skb
 574 |  *     @len: new skb length
 575 |  *     @flags: reserved
 576 |  *     Return: 0 on success or negative error
 577 |  *
 578 |  * int bpf_skb_pull_data(skb, len)
 579 |  *     The helper will pull in non-linear data in case the skb is non-linear
 580 |  *     and not all of len are part of the linear section. Only needed for
 581 |  *     read/write with direct packet access.
 582 |  *     @skb: pointer to skb
 583 |  *     @len: len to make read/writeable
 584 |  *     Return: 0 on success or negative error
 585 |  *
 586 |  * s64 bpf_csum_update(skb, csum)
 587 |  *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
 588 |  *     @skb: pointer to skb
 589 |  *     @csum: csum to add
 590 |  *     Return: csum on success or negative error
 591 |  *
 592 |  * void bpf_set_hash_invalid(skb)
 593 |  *     Invalidate current skb->hash.
 594 |  *     @skb: pointer to skb
 595 |  *
 596 |  * int bpf_get_numa_node_id()
 597 |  *     Return: Id of current NUMA node.
 598 |  *
 599 |  * int bpf_skb_change_head()
 600 |  *     Grows headroom of skb and adjusts MAC header offset accordingly.
 601 |  *     Will extends/reallocae as required automatically.
 602 |  *     May change skb data pointer and will thus invalidate any check
 603 |  *     performed for direct packet access.
 604 |  *     @skb: pointer to skb
 605 |  *     @len: length of header to be pushed in front
 606 |  *     @flags: Flags (unused for now)
 607 |  *     Return: 0 on success or negative error
 608 |  *
 609 |  * int bpf_xdp_adjust_head(xdp_md, delta)
 610 |  *     Adjust the xdp_md.data by delta
 611 |  *     @xdp_md: pointer to xdp_md
 612 |  *     @delta: An positive/negative integer to be added to xdp_md.data
 613 |  *     Return: 0 on success or negative on error
 614 |  *
 615 |  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
 616 |  *     Copy a NUL terminated string from unsafe address. In case the string
 617 |  *     length is smaller than size, the target is not padded with further NUL
 618 |  *     bytes. In case the string length is larger than size, just count-1
 619 |  *     bytes are copied and the last byte is set to NUL.
 620 |  *     @dst: destination address
 621 |  *     @size: maximum number of bytes to copy, including the trailing NUL
 622 |  *     @unsafe_ptr: unsafe address
 623 |  *     Return:
 624 |  *       > 0 length of the string including the trailing NUL on success
 625 |  *       < 0 error
 626 |  *
 627 |  * u64 bpf_get_socket_cookie(skb)
 628 |  *     Get the cookie for the socket stored inside sk_buff.
 629 |  *     @skb: pointer to skb
 630 |  *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
 631 |  *     field is missing inside sk_buff
 632 |  *
 633 |  * u32 bpf_get_socket_uid(skb)
 634 |  *     Get the owner uid of the socket stored inside sk_buff.
 635 |  *     @skb: pointer to skb
 636 |  *     Return: uid of the socket owner on success or overflowuid if failed.
 637 |  *
 638 |  * u32 bpf_set_hash(skb, hash)
 639 |  *     Set full skb->hash.
 640 |  *     @skb: pointer to skb
 641 |  *     @hash: hash to set
 642 |  *
 643 |  * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
 644 |  *     Calls setsockopt. Not all opts are available, only those with
 645 |  *     integer optvals plus TCP_CONGESTION.
 646 |  *     Supported levels: SOL_SOCKET and IPPROTO_TCP
 647 |  *     @bpf_socket: pointer to bpf_socket
 648 |  *     @level: SOL_SOCKET or IPPROTO_TCP
 649 |  *     @optname: option name
 650 |  *     @optval: pointer to option value
 651 |  *     @optlen: length of optval in bytes
 652 |  *     Return: 0 or negative error
 653 |  *
 654 |  * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
 655 |  *     Calls getsockopt. Not all opts are available.
 656 |  *     Supported levels: IPPROTO_TCP
 657 |  *     @bpf_socket: pointer to bpf_socket
 658 |  *     @level: IPPROTO_TCP
 659 |  *     @optname: option name
 660 |  *     @optval: pointer to option value
 661 |  *     @optlen: length of optval in bytes
 662 |  *     Return: 0 or negative error
 663 |  *
 664 |  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
 665 |  *     Grow or shrink room in sk_buff.
 666 |  *     @skb: pointer to skb
 667 |  *     @len_diff: (signed) amount of room to grow/shrink
 668 |  *     @mode: operation mode (enum bpf_adj_room_mode)
 669 |  *     @flags: reserved for future use
 670 |  *     Return: 0 on success or negative error code
 671 |  *
 672 |  * int bpf_sk_redirect_map(map, key, flags)
 673 |  *     Redirect skb to a sock in map using key as a lookup key for the
 674 |  *     sock in map.
 675 |  *     @map: pointer to sockmap
 676 |  *     @key: key to lookup sock in map
 677 |  *     @flags: reserved for future use
 678 |  *     Return: SK_PASS
 679 |  *
 680 |  * int bpf_sock_map_update(skops, map, key, flags)
 681 |  *	@skops: pointer to bpf_sock_ops
 682 |  *	@map: pointer to sockmap to update
 683 |  *	@key: key to insert/update sock in map
 684 |  *	@flags: same flags as map update elem
 685 |  *
 686 |  * int bpf_xdp_adjust_meta(xdp_md, delta)
 687 |  *     Adjust the xdp_md.data_meta by delta
 688 |  *     @xdp_md: pointer to xdp_md
 689 |  *     @delta: An positive/negative integer to be added to xdp_md.data_meta
 690 |  *     Return: 0 on success or negative on error
 691 |  *
 692 |  * int bpf_perf_event_read_value(map, flags, buf, buf_size)
 693 |  *     read perf event counter value and perf event enabled/running time
 694 |  *     @map: pointer to perf_event_array map
 695 |  *     @flags: index of event in the map or bitmask flags
 696 |  *     @buf: buf to fill
 697 |  *     @buf_size: size of the buf
 698 |  *     Return: 0 on success or negative error code
 699 |  *
 700 |  * int bpf_perf_prog_read_value(ctx, buf, buf_size)
 701 |  *     read perf prog attached perf event counter and enabled/running time
 702 |  *     @ctx: pointer to ctx
 703 |  *     @buf: buf to fill
 704 |  *     @buf_size: size of the buf
 705 |  *     Return : 0 on success or negative error code
 706 |  *
 707 |  * int bpf_override_return(pt_regs, rc)
 708 |  *	@pt_regs: pointer to struct pt_regs
 709 |  *	@rc: the return value to set
 710 |  */
 711 | #define __BPF_FUNC_MAPPER(FN)		\
 712 | 	FN(unspec),			\
 713 | 	FN(map_lookup_elem),		\
 714 | 	FN(map_update_elem),		\
 715 | 	FN(map_delete_elem),		\
 716 | 	FN(probe_read),			\
 717 | 	FN(ktime_get_ns),		\
 718 | 	FN(trace_printk),		\
 719 | 	FN(get_prandom_u32),		\
 720 | 	FN(get_smp_processor_id),	\
 721 | 	FN(skb_store_bytes),		\
 722 | 	FN(l3_csum_replace),		\
 723 | 	FN(l4_csum_replace),		\
 724 | 	FN(tail_call),			\
 725 | 	FN(clone_redirect),		\
 726 | 	FN(get_current_pid_tgid),	\
 727 | 	FN(get_current_uid_gid),	\
 728 | 	FN(get_current_comm),		\
 729 | 	FN(get_cgroup_classid),		\
 730 | 	FN(skb_vlan_push),		\
 731 | 	FN(skb_vlan_pop),		\
 732 | 	FN(skb_get_tunnel_key),		\
 733 | 	FN(skb_set_tunnel_key),		\
 734 | 	FN(perf_event_read),		\
 735 | 	FN(redirect),			\
 736 | 	FN(get_route_realm),		\
 737 | 	FN(perf_event_output),		\
 738 | 	FN(skb_load_bytes),		\
 739 | 	FN(get_stackid),		\
 740 | 	FN(csum_diff),			\
 741 | 	FN(skb_get_tunnel_opt),		\
 742 | 	FN(skb_set_tunnel_opt),		\
 743 | 	FN(skb_change_proto),		\
 744 | 	FN(skb_change_type),		\
 745 | 	FN(skb_under_cgroup),		\
 746 | 	FN(get_hash_recalc),		\
 747 | 	FN(get_current_task),		\
 748 | 	FN(probe_write_user),		\
 749 | 	FN(current_task_under_cgroup),	\
 750 | 	FN(skb_change_tail),		\
 751 | 	FN(skb_pull_data),		\
 752 | 	FN(csum_update),		\
 753 | 	FN(set_hash_invalid),		\
 754 | 	FN(get_numa_node_id),		\
 755 | 	FN(skb_change_head),		\
 756 | 	FN(xdp_adjust_head),		\
 757 | 	FN(probe_read_str),		\
 758 | 	FN(get_socket_cookie),		\
 759 | 	FN(get_socket_uid),		\
 760 | 	FN(set_hash),			\
 761 | 	FN(setsockopt),			\
 762 | 	FN(skb_adjust_room),		\
 763 | 	FN(redirect_map),		\
 764 | 	FN(sk_redirect_map),		\
 765 | 	FN(sock_map_update),		\
 766 | 	FN(xdp_adjust_meta),		\
 767 | 	FN(perf_event_read_value),	\
 768 | 	FN(perf_prog_read_value),	\
 769 | 	FN(getsockopt),			\
 770 | 	FN(override_return),
 771 | 
 772 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 773 |  * function eBPF program intends to call
 774 |  */
 775 | #define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
 776 | enum bpf_func_id {
 777 | 	__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
 778 | 	__BPF_FUNC_MAX_ID,
 779 | };
 780 | #undef __BPF_ENUM_FN
 781 | 
 782 | /* All flags used by eBPF helper functions, placed here. */
 783 | 
 784 | /* BPF_FUNC_skb_store_bytes flags. */
 785 | #define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
 786 | #define BPF_F_INVALIDATE_HASH		(1ULL << 1)
 787 | 
 788 | /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
 789 |  * First 4 bits are for passing the header field size.
 790 |  */
 791 | #define BPF_F_HDR_FIELD_MASK		0xfULL
 792 | 
 793 | /* BPF_FUNC_l4_csum_replace flags. */
 794 | #define BPF_F_PSEUDO_HDR		(1ULL << 4)
 795 | #define BPF_F_MARK_MANGLED_0		(1ULL << 5)
 796 | #define BPF_F_MARK_ENFORCE		(1ULL << 6)
 797 | 
 798 | /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
 799 | #define BPF_F_INGRESS			(1ULL << 0)
 800 | 
 801 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 802 | #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 803 | 
 804 | /* BPF_FUNC_get_stackid flags. */
 805 | #define BPF_F_SKIP_FIELD_MASK		0xffULL
 806 | #define BPF_F_USER_STACK		(1ULL << 8)
 807 | #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
 808 | #define BPF_F_REUSE_STACKID		(1ULL << 10)
 809 | 
 810 | /* BPF_FUNC_skb_set_tunnel_key flags. */
 811 | #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 812 | #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 813 | 
 814 | /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
 815 |  * BPF_FUNC_perf_event_read_value flags.
 816 |  */
 817 | #define BPF_F_INDEX_MASK		0xffffffffULL
 818 | #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
 819 | /* BPF_FUNC_perf_event_output for sk_buff input context. */
 820 | #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 821 | 
 822 | /* Mode for BPF_FUNC_skb_adjust_room helper. */
 823 | enum bpf_adj_room_mode {
 824 | 	BPF_ADJ_ROOM_NET,
 825 | };
 826 | 
 827 | /* user accessible mirror of in-kernel sk_buff.
 828 |  * new fields can only be added to the end of this structure
 829 |  */
 830 | struct __sk_buff {
 831 | 	__u32 len;
 832 | 	__u32 pkt_type;
 833 | 	__u32 mark;
 834 | 	__u32 queue_mapping;
 835 | 	__u32 protocol;
 836 | 	__u32 vlan_present;
 837 | 	__u32 vlan_tci;
 838 | 	__u32 vlan_proto;
 839 | 	__u32 priority;
 840 | 	__u32 ingress_ifindex;
 841 | 	__u32 ifindex;
 842 | 	__u32 tc_index;
 843 | 	__u32 cb[5];
 844 | 	__u32 hash;
 845 | 	__u32 tc_classid;
 846 | 	__u32 data;
 847 | 	__u32 data_end;
 848 | 	__u32 napi_id;
 849 | 
 850 | 	/* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
 851 | 	__u32 family;
 852 | 	__u32 remote_ip4;	/* Stored in network byte order */
 853 | 	__u32 local_ip4;	/* Stored in network byte order */
 854 | 	__u32 remote_ip6[4];	/* Stored in network byte order */
 855 | 	__u32 local_ip6[4];	/* Stored in network byte order */
 856 | 	__u32 remote_port;	/* Stored in network byte order */
 857 | 	__u32 local_port;	/* stored in host byte order */
 858 | 	/* ... here. */
 859 | 
 860 | 	__u32 data_meta;
 861 | };
 862 | 
 863 | struct bpf_tunnel_key {
 864 | 	__u32 tunnel_id;
 865 | 	union {
 866 | 		__u32 remote_ipv4;
 867 | 		__u32 remote_ipv6[4];
 868 | 	};
 869 | 	__u8 tunnel_tos;
 870 | 	__u8 tunnel_ttl;
 871 | 	__u16 tunnel_ext;
 872 | 	__u32 tunnel_label;
 873 | };
 874 | 
 875 | /* Generic BPF return codes which all BPF program types may support.
 876 |  * The values are binary compatible with their TC_ACT_* counter-part to
 877 |  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
 878 |  * programs.
 879 |  *
 880 |  * XDP is handled seprately, see XDP_*.
 881 |  */
 882 | enum bpf_ret_code {
 883 | 	BPF_OK = 0,
 884 | 	/* 1 reserved */
 885 | 	BPF_DROP = 2,
 886 | 	/* 3-6 reserved */
 887 | 	BPF_REDIRECT = 7,
 888 | 	/* >127 are reserved for prog type specific return codes */
 889 | };
 890 | 
 891 | struct bpf_sock {
 892 | 	__u32 bound_dev_if;
 893 | 	__u32 family;
 894 | 	__u32 type;
 895 | 	__u32 protocol;
 896 | 	__u32 mark;
 897 | 	__u32 priority;
 898 | };
 899 | 
 900 | #define XDP_PACKET_HEADROOM 256
 901 | 
 902 | /* User return codes for XDP prog type.
 903 |  * A valid XDP program must return one of these defined values. All other
 904 |  * return codes are reserved for future use. Unknown return codes will
 905 |  * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
 906 |  */
 907 | enum xdp_action {
 908 | 	XDP_ABORTED = 0,
 909 | 	XDP_DROP,
 910 | 	XDP_PASS,
 911 | 	XDP_TX,
 912 | 	XDP_REDIRECT,
 913 | };
 914 | 
 915 | /* user accessible metadata for XDP packet hook
 916 |  * new fields must be added to the end of this structure
 917 |  */
 918 | struct xdp_md {
 919 | 	__u32 data;
 920 | 	__u32 data_end;
 921 | 	__u32 data_meta;
 922 | 	/* Below access go through struct xdp_rxq_info */
 923 | 	__u32 ingress_ifindex; /* rxq->dev->ifindex */
 924 | 	__u32 rx_queue_index;  /* rxq->queue_index  */
 925 | };
 926 | 
 927 | enum sk_action {
 928 | 	SK_DROP = 0,
 929 | 	SK_PASS,
 930 | };
 931 | 
 932 | #define BPF_TAG_SIZE	8
 933 | 
 934 | struct bpf_prog_info {
 935 | 	__u32 type;
 936 | 	__u32 id;
 937 | 	__u8  tag[BPF_TAG_SIZE];
 938 | 	__u32 jited_prog_len;
 939 | 	__u32 xlated_prog_len;
 940 | 	__aligned_u64 jited_prog_insns;
 941 | 	__aligned_u64 xlated_prog_insns;
 942 | 	__u64 load_time;	/* ns since boottime */
 943 | 	__u32 created_by_uid;
 944 | 	__u32 nr_map_ids;
 945 | 	__aligned_u64 map_ids;
 946 | 	char name[BPF_OBJ_NAME_LEN];
 947 | 	__u32 ifindex;
 948 | 	__u64 netns_dev;
 949 | 	__u64 netns_ino;
 950 | } __attribute__((aligned(8)));
 951 | 
 952 | struct bpf_map_info {
 953 | 	__u32 type;
 954 | 	__u32 id;
 955 | 	__u32 key_size;
 956 | 	__u32 value_size;
 957 | 	__u32 max_entries;
 958 | 	__u32 map_flags;
 959 | 	char  name[BPF_OBJ_NAME_LEN];
 960 | 	__u32 ifindex;
 961 | 	__u64 netns_dev;
 962 | 	__u64 netns_ino;
 963 | } __attribute__((aligned(8)));
 964 | 
 965 | /* User bpf_sock_ops struct to access socket values and specify request ops
 966 |  * and their replies.
 967 |  * Some of this fields are in network (bigendian) byte order and may need
 968 |  * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
 969 |  * New fields can only be added at the end of this structure
 970 |  */
 971 | struct bpf_sock_ops {
 972 | 	__u32 op;
 973 | 	union {
 974 | 		__u32 reply;
 975 | 		__u32 replylong[4];
 976 | 	};
 977 | 	__u32 family;
 978 | 	__u32 remote_ip4;	/* Stored in network byte order */
 979 | 	__u32 local_ip4;	/* Stored in network byte order */
 980 | 	__u32 remote_ip6[4];	/* Stored in network byte order */
 981 | 	__u32 local_ip6[4];	/* Stored in network byte order */
 982 | 	__u32 remote_port;	/* Stored in network byte order */
 983 | 	__u32 local_port;	/* stored in host byte order */
 984 | 	__u32 is_fullsock;	/* Some TCP fields are only valid if
 985 | 				 * there is a full socket. If not, the
 986 | 				 * fields read as zero.
 987 | 				 */
 988 | 	__u32 snd_cwnd;
 989 | 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 990 | };
 991 | 
 992 | /* List of known BPF sock_ops operators.
 993 |  * New entries can only be added at the end
 994 |  */
 995 | enum {
 996 | 	BPF_SOCK_OPS_VOID,
 997 | 	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
 998 | 					 * -1 if default value should be used
 999 | 					 */
1000 | 	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
1001 | 					 * window (in packets) or -1 if default
1002 | 					 * value should be used
1003 | 					 */
1004 | 	BPF_SOCK_OPS_TCP_CONNECT_CB,	/* Calls BPF program right before an
1005 | 					 * active connection is initialized
1006 | 					 */
1007 | 	BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,	/* Calls BPF program when an
1008 | 						 * active connection is
1009 | 						 * established
1010 | 						 */
1011 | 	BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,	/* Calls BPF program when a
1012 | 						 * passive connection is
1013 | 						 * established
1014 | 						 */
1015 | 	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
1016 | 					 * needs ECN
1017 | 					 */
1018 | 	BPF_SOCK_OPS_BASE_RTT,		/* Get base RTT. The correct value is
1019 | 					 * based on the path and may be
1020 | 					 * dependent on the congestion control
1021 | 					 * algorithm. In general it indicates
1022 | 					 * a congestion threshold. RTTs above
1023 | 					 * this indicate congestion
1024 | 					 */
1025 | };
1026 | 
1027 | #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
1028 | #define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
1029 | 
1030 | struct bpf_perf_event_value {
1031 | 	__u64 counter;
1032 | 	__u64 enabled;
1033 | 	__u64 running;
1034 | };
1035 | 
1036 | #define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
1037 | #define BPF_DEVCG_ACC_READ	(1ULL << 1)
1038 | #define BPF_DEVCG_ACC_WRITE	(1ULL << 2)
1039 | 
1040 | #define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
1041 | #define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
1042 | 
1043 | struct bpf_cgroup_dev_ctx {
1044 | 	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
1045 | 	__u32 access_type;
1046 | 	__u32 major;
1047 | 	__u32 minor;
1048 | };
1049 | 
1050 | #endif /* __LINUX_BPF_H__ */
1051 | 


--------------------------------------------------------------------------------
/tools/include/uapi/linux/bpf_common.h:
--------------------------------------------------------------------------------
 1 | /* Copy of kernel tools/include/uapi/linux/bpf_common.h
 2 |  */
 3 | #ifndef _UAPI__LINUX_BPF_COMMON_H__
 4 | #define _UAPI__LINUX_BPF_COMMON_H__
 5 | 
 6 | /* Instruction classes */
 7 | #define BPF_CLASS(code) ((code) & 0x07)
 8 | #define		BPF_LD		0x00
 9 | #define		BPF_LDX		0x01
10 | #define		BPF_ST		0x02
11 | #define		BPF_STX		0x03
12 | #define		BPF_ALU		0x04
13 | #define		BPF_JMP		0x05
14 | #define		BPF_RET		0x06
15 | #define		BPF_MISC        0x07
16 | 
17 | /* ld/ldx fields */
18 | #define BPF_SIZE(code)  ((code) & 0x18)
19 | #define		BPF_W		0x00
20 | #define		BPF_H		0x08
21 | #define		BPF_B		0x10
22 | #define BPF_MODE(code)  ((code) & 0xe0)
23 | #define		BPF_IMM		0x00
24 | #define		BPF_ABS		0x20
25 | #define		BPF_IND		0x40
26 | #define		BPF_MEM		0x60
27 | #define		BPF_LEN		0x80
28 | #define		BPF_MSH		0xa0
29 | 
30 | /* alu/jmp fields */
31 | #define BPF_OP(code)    ((code) & 0xf0)
32 | #define		BPF_ADD		0x00
33 | #define		BPF_SUB		0x10
34 | #define		BPF_MUL		0x20
35 | #define		BPF_DIV		0x30
36 | #define		BPF_OR		0x40
37 | #define		BPF_AND		0x50
38 | #define		BPF_LSH		0x60
39 | #define		BPF_RSH		0x70
40 | #define		BPF_NEG		0x80
41 | #define		BPF_MOD		0x90
42 | #define		BPF_XOR		0xa0
43 | 
44 | #define		BPF_JA		0x00
45 | #define		BPF_JEQ		0x10
46 | #define		BPF_JGT		0x20
47 | #define		BPF_JGE		0x30
48 | #define		BPF_JSET        0x40
49 | #define BPF_SRC(code)   ((code) & 0x08)
50 | #define		BPF_K		0x00
51 | #define		BPF_X		0x08
52 | 
53 | #ifndef BPF_MAXINSNS
54 | #define BPF_MAXINSNS 4096
55 | #endif
56 | 
57 | #endif /* _UAPI__LINUX_BPF_COMMON_H__ */
58 | 


--------------------------------------------------------------------------------
/tools/lib/bpf/bpf.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * common eBPF ELF operations.
  3 |  *
  4 |  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  5 |  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  6 |  * Copyright (C) 2015 Huawei Inc.
  7 |  *
  8 |  * This program is free software; you can redistribute it and/or
  9 |  * modify it under the terms of the GNU Lesser General Public
 10 |  * License as published by the Free Software Foundation;
 11 |  * version 2.1 of the License (not later!)
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU Lesser General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU Lesser General Public
 19 |  * License along with this program; if not,  see <http://www.gnu.org/licenses>
 20 |  */
 21 | 
 22 | #include <stdlib.h>
 23 | #include <memory.h>
 24 | #include <unistd.h>
 25 | #include <asm/unistd.h>
 26 | #include <linux/bpf.h>
 27 | #include "bpf.h"
 28 | 
 29 | /*
 30 |  * When building perf, unistd.h is overridden. __NR_bpf is
 31 |  * required to be defined explicitly.
 32 |  */
 33 | #ifndef __NR_bpf
 34 | # if defined(__i386__)
 35 | #  define __NR_bpf 357
 36 | # elif defined(__x86_64__)
 37 | #  define __NR_bpf 321
 38 | # elif defined(__aarch64__)
 39 | #  define __NR_bpf 280
 40 | # elif defined(__sparc__)
 41 | #  define __NR_bpf 349
 42 | # elif defined(__s390__)
 43 | #  define __NR_bpf 351
 44 | # else
 45 | #  error __NR_bpf not defined. libbpf does not support your arch.
 46 | # endif
 47 | #endif
 48 | 
 49 | #define min(x, y) ((x) < (y) ? (x) : (y))
 50 | 
 51 | static inline __u64 ptr_to_u64(const void *ptr)
 52 | {
 53 | 	return (__u64) (unsigned long) ptr;
 54 | }
 55 | 
 56 | static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 57 | 			  unsigned int size)
 58 | {
 59 | 	return syscall(__NR_bpf, cmd, attr, size);
 60 | }
 61 | 
 62 | int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
 63 | 			int key_size, int value_size, int max_entries,
 64 | 			__u32 map_flags, int node)
 65 | {
 66 | 	__u32 name_len = name ? strlen(name) : 0;
 67 | 	union bpf_attr attr;
 68 | 
 69 | 	memset(&attr, '\0', sizeof(attr));
 70 | 
 71 | 	attr.map_type = map_type;
 72 | 	attr.key_size = key_size;
 73 | 	attr.value_size = value_size;
 74 | 	attr.max_entries = max_entries;
 75 | 	attr.map_flags = map_flags;
 76 | 	memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
 77 | 
 78 | 	if (node >= 0) {
 79 | 		attr.map_flags |= BPF_F_NUMA_NODE;
 80 | 		attr.numa_node = node;
 81 | 	}
 82 | 
 83 | 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 84 | }
 85 | 
 86 | int bpf_create_map(enum bpf_map_type map_type, int key_size,
 87 | 		   int value_size, int max_entries, __u32 map_flags)
 88 | {
 89 | 	return bpf_create_map_node(map_type, NULL, key_size, value_size,
 90 | 				   max_entries, map_flags, -1);
 91 | }
 92 | 
 93 | int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
 94 | 			int key_size, int value_size, int max_entries,
 95 | 			__u32 map_flags)
 96 | {
 97 | 	return bpf_create_map_node(map_type, name, key_size, value_size,
 98 | 				   max_entries, map_flags, -1);
 99 | }
100 | 
101 | int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
102 | 			       int key_size, int inner_map_fd, int max_entries,
103 | 			       __u32 map_flags, int node)
104 | {
105 | 	__u32 name_len = name ? strlen(name) : 0;
106 | 	union bpf_attr attr;
107 | 
108 | 	memset(&attr, '\0', sizeof(attr));
109 | 
110 | 	attr.map_type = map_type;
111 | 	attr.key_size = key_size;
112 | 	attr.value_size = 4;
113 | 	attr.inner_map_fd = inner_map_fd;
114 | 	attr.max_entries = max_entries;
115 | 	attr.map_flags = map_flags;
116 | 	memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
117 | 
118 | 	if (node >= 0) {
119 | 		attr.map_flags |= BPF_F_NUMA_NODE;
120 | 		attr.numa_node = node;
121 | 	}
122 | 
123 | 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
124 | }
125 | 
126 | int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
127 | 			  int key_size, int inner_map_fd, int max_entries,
128 | 			  __u32 map_flags)
129 | {
130 | 	return bpf_create_map_in_map_node(map_type, name, key_size,
131 | 					  inner_map_fd, max_entries, map_flags,
132 | 					  -1);
133 | }
134 | 
135 | int bpf_load_program_name(enum bpf_prog_type type, const char *name,
136 | 			  const struct bpf_insn *insns,
137 | 			  size_t insns_cnt, const char *license,
138 | 			  __u32 kern_version, char *log_buf,
139 | 			  size_t log_buf_sz)
140 | {
141 | 	int fd;
142 | 	union bpf_attr attr;
143 | 	__u32 name_len = name ? strlen(name) : 0;
144 | 
145 | 	bzero(&attr, sizeof(attr));
146 | 	attr.prog_type = type;
147 | 	attr.insn_cnt = (__u32)insns_cnt;
148 | 	attr.insns = ptr_to_u64(insns);
149 | 	attr.license = ptr_to_u64(license);
150 | 	attr.log_buf = ptr_to_u64(NULL);
151 | 	attr.log_size = 0;
152 | 	attr.log_level = 0;
153 | 	attr.kern_version = kern_version;
154 | 	memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
155 | 
156 | 	fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
157 | 	if (fd >= 0 || !log_buf || !log_buf_sz)
158 | 		return fd;
159 | 
160 | 	/* Try again with log */
161 | 	attr.log_buf = ptr_to_u64(log_buf);
162 | 	attr.log_size = log_buf_sz;
163 | 	attr.log_level = 1;
164 | 	log_buf[0] = 0;
165 | 	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
166 | }
167 | 
168 | int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
169 | 		     size_t insns_cnt, const char *license,
170 | 		     __u32 kern_version, char *log_buf,
171 | 		     size_t log_buf_sz)
172 | {
173 | 	return bpf_load_program_name(type, NULL, insns, insns_cnt, license,
174 | 				     kern_version, log_buf, log_buf_sz);
175 | }
176 | 
177 | int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
178 | 		       size_t insns_cnt, int strict_alignment,
179 | 		       const char *license, __u32 kern_version,
180 | 		       char *log_buf, size_t log_buf_sz, int log_level)
181 | {
182 | 	union bpf_attr attr;
183 | 
184 | 	bzero(&attr, sizeof(attr));
185 | 	attr.prog_type = type;
186 | 	attr.insn_cnt = (__u32)insns_cnt;
187 | 	attr.insns = ptr_to_u64(insns);
188 | 	attr.license = ptr_to_u64(license);
189 | 	attr.log_buf = ptr_to_u64(log_buf);
190 | 	attr.log_size = log_buf_sz;
191 | 	attr.log_level = log_level;
192 | 	log_buf[0] = 0;
193 | 	attr.kern_version = kern_version;
194 | 	attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0;
195 | 
196 | 	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
197 | }
198 | 
199 | int bpf_map_update_elem(int fd, const void *key, const void *value,
200 | 			__u64 flags)
201 | {
202 | 	union bpf_attr attr;
203 | 
204 | 	bzero(&attr, sizeof(attr));
205 | 	attr.map_fd = fd;
206 | 	attr.key = ptr_to_u64(key);
207 | 	attr.value = ptr_to_u64(value);
208 | 	attr.flags = flags;
209 | 
210 | 	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
211 | }
212 | 
213 | int bpf_map_lookup_elem(int fd, const void *key, void *value)
214 | {
215 | 	union bpf_attr attr;
216 | 
217 | 	bzero(&attr, sizeof(attr));
218 | 	attr.map_fd = fd;
219 | 	attr.key = ptr_to_u64(key);
220 | 	attr.value = ptr_to_u64(value);
221 | 
222 | 	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
223 | }
224 | 
225 | int bpf_map_delete_elem(int fd, const void *key)
226 | {
227 | 	union bpf_attr attr;
228 | 
229 | 	bzero(&attr, sizeof(attr));
230 | 	attr.map_fd = fd;
231 | 	attr.key = ptr_to_u64(key);
232 | 
233 | 	return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
234 | }
235 | 
236 | int bpf_map_get_next_key(int fd, const void *key, void *next_key)
237 | {
238 | 	union bpf_attr attr;
239 | 
240 | 	bzero(&attr, sizeof(attr));
241 | 	attr.map_fd = fd;
242 | 	attr.key = ptr_to_u64(key);
243 | 	attr.next_key = ptr_to_u64(next_key);
244 | 
245 | 	return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
246 | }
247 | 
248 | int bpf_obj_pin(int fd, const char *pathname)
249 | {
250 | 	union bpf_attr attr;
251 | 
252 | 	bzero(&attr, sizeof(attr));
253 | 	attr.pathname = ptr_to_u64((void *)pathname);
254 | 	attr.bpf_fd = fd;
255 | 
256 | 	return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
257 | }
258 | 
259 | int bpf_obj_get(const char *pathname)
260 | {
261 | 	union bpf_attr attr;
262 | 
263 | 	bzero(&attr, sizeof(attr));
264 | 	attr.pathname = ptr_to_u64((void *)pathname);
265 | 
266 | 	return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
267 | }
268 | 
269 | int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
270 | 		    unsigned int flags)
271 | {
272 | 	union bpf_attr attr;
273 | 
274 | 	bzero(&attr, sizeof(attr));
275 | 	attr.target_fd	   = target_fd;
276 | 	attr.attach_bpf_fd = prog_fd;
277 | 	attr.attach_type   = type;
278 | 	attr.attach_flags  = flags;
279 | 
280 | 	return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
281 | }
282 | 
283 | int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
284 | {
285 | 	union bpf_attr attr;
286 | 
287 | 	bzero(&attr, sizeof(attr));
288 | 	attr.target_fd	 = target_fd;
289 | 	attr.attach_type = type;
290 | 
291 | 	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
292 | }
293 | 
294 | int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
295 | {
296 | 	union bpf_attr attr;
297 | 
298 | 	bzero(&attr, sizeof(attr));
299 | 	attr.target_fd	 = target_fd;
300 | 	attr.attach_bpf_fd = prog_fd;
301 | 	attr.attach_type = type;
302 | 
303 | 	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
304 | }
305 | 
306 | int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
307 | 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
308 | {
309 | 	union bpf_attr attr;
310 | 	int ret;
311 | 
312 | 	bzero(&attr, sizeof(attr));
313 | 	attr.query.target_fd	= target_fd;
314 | 	attr.query.attach_type	= type;
315 | 	attr.query.query_flags	= query_flags;
316 | 	attr.query.prog_cnt	= *prog_cnt;
317 | 	attr.query.prog_ids	= ptr_to_u64(prog_ids);
318 | 
319 | 	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
320 | 	if (attach_flags)
321 | 		*attach_flags = attr.query.attach_flags;
322 | 	*prog_cnt = attr.query.prog_cnt;
323 | 	return ret;
324 | }
325 | 
326 | int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
327 | 		      void *data_out, __u32 *size_out, __u32 *retval,
328 | 		      __u32 *duration)
329 | {
330 | 	union bpf_attr attr;
331 | 	int ret;
332 | 
333 | 	bzero(&attr, sizeof(attr));
334 | 	attr.test.prog_fd = prog_fd;
335 | 	attr.test.data_in = ptr_to_u64(data);
336 | 	attr.test.data_out = ptr_to_u64(data_out);
337 | 	attr.test.data_size_in = size;
338 | 	attr.test.repeat = repeat;
339 | 
340 | 	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
341 | 	if (size_out)
342 | 		*size_out = attr.test.data_size_out;
343 | 	if (retval)
344 | 		*retval = attr.test.retval;
345 | 	if (duration)
346 | 		*duration = attr.test.duration;
347 | 	return ret;
348 | }
349 | 
350 | int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
351 | {
352 | 	union bpf_attr attr;
353 | 	int err;
354 | 
355 | 	bzero(&attr, sizeof(attr));
356 | 	attr.start_id = start_id;
357 | 
358 | 	err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
359 | 	if (!err)
360 | 		*next_id = attr.next_id;
361 | 
362 | 	return err;
363 | }
364 | 
365 | int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
366 | {
367 | 	union bpf_attr attr;
368 | 	int err;
369 | 
370 | 	bzero(&attr, sizeof(attr));
371 | 	attr.start_id = start_id;
372 | 
373 | 	err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
374 | 	if (!err)
375 | 		*next_id = attr.next_id;
376 | 
377 | 	return err;
378 | }
379 | 
380 | int bpf_prog_get_fd_by_id(__u32 id)
381 | {
382 | 	union bpf_attr attr;
383 | 
384 | 	bzero(&attr, sizeof(attr));
385 | 	attr.prog_id = id;
386 | 
387 | 	return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
388 | }
389 | 
390 | int bpf_map_get_fd_by_id(__u32 id)
391 | {
392 | 	union bpf_attr attr;
393 | 
394 | 	bzero(&attr, sizeof(attr));
395 | 	attr.map_id = id;
396 | 
397 | 	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
398 | }
399 | 
400 | int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
401 | {
402 | 	union bpf_attr attr;
403 | 	int err;
404 | 
405 | 	bzero(&attr, sizeof(attr));
406 | 	attr.info.bpf_fd = prog_fd;
407 | 	attr.info.info_len = *info_len;
408 | 	attr.info.info = ptr_to_u64(info);
409 | 
410 | 	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
411 | 	if (!err)
412 | 		*info_len = attr.info.info_len;
413 | 
414 | 	return err;
415 | }
416 | 


--------------------------------------------------------------------------------
/tools/lib/bpf/bpf.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * common eBPF ELF operations.
 3 |  *
 4 |  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
 5 |  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
 6 |  * Copyright (C) 2015 Huawei Inc.
 7 |  *
 8 |  * This program is free software; you can redistribute it and/or
 9 |  * modify it under the terms of the GNU Lesser General Public
10 |  * License as published by the Free Software Foundation;
11 |  * version 2.1 of the License (not later!)
12 |  *
13 |  * This program is distributed in the hope that it will be useful,
14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  * GNU Lesser General Public License for more details.
17 |  *
18 |  * You should have received a copy of the GNU Lesser General Public
19 |  * License along with this program; if not,  see <http://www.gnu.org/licenses>
20 |  */
21 | #ifndef __BPF_BPF_H
22 | #define __BPF_BPF_H
23 | 
24 | #include <linux/bpf.h>
25 | #include <stddef.h>
26 | 
27 | int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
28 | 			int key_size, int value_size, int max_entries,
29 | 			__u32 map_flags, int node);
30 | int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
31 | 			int key_size, int value_size, int max_entries,
32 | 			__u32 map_flags);
33 | int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
34 | 		   int max_entries, __u32 map_flags);
35 | int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
36 | 			       int key_size, int inner_map_fd, int max_entries,
37 | 			       __u32 map_flags, int node);
38 | int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
39 | 			  int key_size, int inner_map_fd, int max_entries,
40 | 			  __u32 map_flags);
41 | 
42 | /* Recommend log buffer size */
43 | #define BPF_LOG_BUF_SIZE (256 * 1024)
44 | int bpf_load_program_name(enum bpf_prog_type type, const char *name,
45 | 			  const struct bpf_insn *insns,
46 | 			  size_t insns_cnt, const char *license,
47 | 			  __u32 kern_version, char *log_buf,
48 | 			  size_t log_buf_sz);
49 | int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
50 | 		     size_t insns_cnt, const char *license,
51 | 		     __u32 kern_version, char *log_buf,
52 | 		     size_t log_buf_sz);
53 | int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
54 | 		       size_t insns_cnt, int strict_alignment,
55 | 		       const char *license, __u32 kern_version,
56 | 		       char *log_buf, size_t log_buf_sz, int log_level);
57 | 
58 | int bpf_map_update_elem(int fd, const void *key, const void *value,
59 | 			__u64 flags);
60 | 
61 | int bpf_map_lookup_elem(int fd, const void *key, void *value);
62 | int bpf_map_delete_elem(int fd, const void *key);
63 | int bpf_map_get_next_key(int fd, const void *key, void *next_key);
64 | int bpf_obj_pin(int fd, const char *pathname);
65 | int bpf_obj_get(const char *pathname);
66 | int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
67 | 		    unsigned int flags);
68 | int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
69 | int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type);
70 | int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
71 | 		      void *data_out, __u32 *size_out, __u32 *retval,
72 | 		      __u32 *duration);
73 | int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
74 | int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
75 | int bpf_prog_get_fd_by_id(__u32 id);
76 | int bpf_map_get_fd_by_id(__u32 id);
77 | int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
78 | int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
79 | 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
80 | #endif
81 | 


--------------------------------------------------------------------------------
/tools/lib/bpf/libbpf.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Common eBPF ELF object loading operations.
  3 |  *
  4 |  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
  5 |  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
  6 |  * Copyright (C) 2015 Huawei Inc.
  7 |  *
  8 |  * This program is free software; you can redistribute it and/or
  9 |  * modify it under the terms of the GNU Lesser General Public
 10 |  * License as published by the Free Software Foundation;
 11 |  * version 2.1 of the License (not later!)
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful,
 14 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  * GNU Lesser General Public License for more details.
 17 |  *
 18 |  * You should have received a copy of the GNU Lesser General Public
 19 |  * License along with this program; if not,  see <http://www.gnu.org/licenses>
 20 |  */
 21 | #ifndef __BPF_LIBBPF_H
 22 | #define __BPF_LIBBPF_H
 23 | 
 24 | #include <stdio.h>
 25 | #include <stdint.h>
 26 | #include <stdbool.h>
 27 | #include <sys/types.h>  // for size_t
 28 | #include <linux/bpf.h>
 29 | 
 30 | enum libbpf_errno {
 31 | 	__LIBBPF_ERRNO__START = 4000,
 32 | 
 33 | 	/* Something wrong in libelf */
 34 | 	LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START,
 35 | 	LIBBPF_ERRNO__FORMAT,	/* BPF object format invalid */
 36 | 	LIBBPF_ERRNO__KVERSION,	/* Incorrect or no 'version' section */
 37 | 	LIBBPF_ERRNO__ENDIAN,	/* Endian mismatch */
 38 | 	LIBBPF_ERRNO__INTERNAL,	/* Internal error in libbpf */
 39 | 	LIBBPF_ERRNO__RELOC,	/* Relocation failed */
 40 | 	LIBBPF_ERRNO__LOAD,	/* Load program failure for unknown reason */
 41 | 	LIBBPF_ERRNO__VERIFY,	/* Kernel verifier blocks program loading */
 42 | 	LIBBPF_ERRNO__PROG2BIG,	/* Program too big */
 43 | 	LIBBPF_ERRNO__KVER,	/* Incorrect kernel version */
 44 | 	LIBBPF_ERRNO__PROGTYPE,	/* Kernel doesn't support this program type */
 45 | 	__LIBBPF_ERRNO__END,
 46 | };
 47 | 
 48 | int libbpf_strerror(int err, char *buf, size_t size);
 49 | 
 50 | /*
 51 |  * In include/linux/compiler-gcc.h, __printf is defined. However
 52 |  * it should be better if libbpf.h doesn't depend on Linux header file.
 53 |  * So instead of __printf, here we use gcc attribute directly.
 54 |  */
 55 | typedef int (*libbpf_print_fn_t)(const char *, ...)
 56 | 	__attribute__((format(printf, 1, 2)));
 57 | 
 58 | void libbpf_set_print(libbpf_print_fn_t warn,
 59 | 		      libbpf_print_fn_t info,
 60 | 		      libbpf_print_fn_t debug);
 61 | 
 62 | /* Hide internal to user */
 63 | struct bpf_object;
 64 | 
 65 | struct bpf_object *bpf_object__open(const char *path);
 66 | struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 67 | 					   size_t obj_buf_sz,
 68 | 					   const char *name);
 69 | int bpf_object__pin(struct bpf_object *object, const char *path);
 70 | void bpf_object__close(struct bpf_object *object);
 71 | 
 72 | /* Load/unload object into/from kernel */
 73 | int bpf_object__load(struct bpf_object *obj);
 74 | int bpf_object__unload(struct bpf_object *obj);
 75 | const char *bpf_object__name(struct bpf_object *obj);
 76 | unsigned int bpf_object__kversion(struct bpf_object *obj);
 77 | 
 78 | struct bpf_object *bpf_object__next(struct bpf_object *prev);
 79 | #define bpf_object__for_each_safe(pos, tmp)			\
 80 | 	for ((pos) = bpf_object__next(NULL),		\
 81 | 		(tmp) = bpf_object__next(pos);		\
 82 | 	     (pos) != NULL;				\
 83 | 	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
 84 | 
 85 | typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
 86 | int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 87 | 			 bpf_object_clear_priv_t clear_priv);
 88 | void *bpf_object__priv(struct bpf_object *prog);
 89 | 
 90 | /* Accessors of bpf_program. */
 91 | struct bpf_program;
 92 | struct bpf_program *bpf_program__next(struct bpf_program *prog,
 93 | 				      struct bpf_object *obj);
 94 | 
 95 | #define bpf_object__for_each_program(pos, obj)		\
 96 | 	for ((pos) = bpf_program__next(NULL, (obj));	\
 97 | 	     (pos) != NULL;				\
 98 | 	     (pos) = bpf_program__next((pos), (obj)))
 99 | 
100 | typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
101 | 					 void *);
102 | 
103 | int bpf_program__set_priv(struct bpf_program *prog, void *priv,
104 | 			  bpf_program_clear_priv_t clear_priv);
105 | 
106 | void *bpf_program__priv(struct bpf_program *prog);
107 | 
108 | const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
109 | 
110 | int bpf_program__fd(struct bpf_program *prog);
111 | int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
112 | 			      int instance);
113 | int bpf_program__pin(struct bpf_program *prog, const char *path);
114 | 
115 | struct bpf_insn;
116 | 
117 | /*
118 |  * Libbpf allows callers to adjust BPF programs before being loaded
119 |  * into kernel. One program in an object file can be transform into
120 |  * multiple variants to be attached to different code.
121 |  *
122 |  * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
123 |  * are APIs for this propose.
124 |  *
125 |  * - bpf_program_prep_t:
126 |  *   It defines 'preprocessor', which is a caller defined function
127 |  *   passed to libbpf through bpf_program__set_prep(), and will be
128 |  *   called before program is loaded. The processor should adjust
129 |  *   the program one time for each instances according to the number
130 |  *   passed to it.
131 |  *
132 |  * - bpf_program__set_prep:
133 |  *   Attachs a preprocessor to a BPF program. The number of instances
134 |  *   whould be created is also passed through this function.
135 |  *
136 |  * - bpf_program__nth_fd:
137 |  *   After the program is loaded, get resuling fds from bpf program for
138 |  *   each instances.
139 |  *
140 |  * If bpf_program__set_prep() is not used, the program whould be loaded
141 |  * without adjustment during bpf_object__load(). The program has only
142 |  * one instance. In this case bpf_program__fd(prog) is equal to
143 |  * bpf_program__nth_fd(prog, 0).
144 |  */
145 | 
146 | struct bpf_prog_prep_result {
147 | 	/*
148 | 	 * If not NULL, load new instruction array.
149 | 	 * If set to NULL, don't load this instance.
150 | 	 */
151 | 	struct bpf_insn *new_insn_ptr;
152 | 	int new_insn_cnt;
153 | 
154 | 	/* If not NULL, result fd is set to it */
155 | 	int *pfd;
156 | };
157 | 
158 | /*
159 |  * Parameters of bpf_program_prep_t:
160 |  *  - prog:	The bpf_program being loaded.
161 |  *  - n:	Index of instance being generated.
162 |  *  - insns:	BPF instructions array.
163 |  *  - insns_cnt:Number of instructions in insns.
164 |  *  - res:	Output parameter, result of transformation.
165 |  *
166 |  * Return value:
167 |  *  - Zero: pre-processing success.
168 |  *  - Non-zero: pre-processing, stop loading.
169 |  */
170 | typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
171 | 				  struct bpf_insn *insns, int insns_cnt,
172 | 				  struct bpf_prog_prep_result *res);
173 | 
174 | int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
175 | 			  bpf_program_prep_t prep);
176 | 
177 | int bpf_program__nth_fd(struct bpf_program *prog, int n);
178 | 
179 | /*
180 |  * Adjust type of bpf program. Default is kprobe.
181 |  */
182 | int bpf_program__set_socket_filter(struct bpf_program *prog);
183 | int bpf_program__set_tracepoint(struct bpf_program *prog);
184 | int bpf_program__set_kprobe(struct bpf_program *prog);
185 | int bpf_program__set_sched_cls(struct bpf_program *prog);
186 | int bpf_program__set_sched_act(struct bpf_program *prog);
187 | int bpf_program__set_xdp(struct bpf_program *prog);
188 | int bpf_program__set_perf_event(struct bpf_program *prog);
189 | void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
190 | 
191 | bool bpf_program__is_socket_filter(struct bpf_program *prog);
192 | bool bpf_program__is_tracepoint(struct bpf_program *prog);
193 | bool bpf_program__is_kprobe(struct bpf_program *prog);
194 | bool bpf_program__is_sched_cls(struct bpf_program *prog);
195 | bool bpf_program__is_sched_act(struct bpf_program *prog);
196 | bool bpf_program__is_xdp(struct bpf_program *prog);
197 | bool bpf_program__is_perf_event(struct bpf_program *prog);
198 | 
199 | /*
200 |  * We don't need __attribute__((packed)) now since it is
201 |  * unnecessary for 'bpf_map_def' because they are all aligned.
202 |  * In addition, using it will trigger -Wpacked warning message,
203 |  * and will be treated as an error due to -Werror.
204 |  */
205 | struct bpf_map_def {
206 | 	unsigned int type;
207 | 	unsigned int key_size;
208 | 	unsigned int value_size;
209 | 	unsigned int max_entries;
210 | };
211 | 
212 | /*
213 |  * There is another 'struct bpf_map' in include/linux/map.h. However,
214 |  * it is not a uapi header so no need to consider name clash.
215 |  */
216 | struct bpf_map;
217 | struct bpf_map *
218 | bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
219 | 
220 | /*
221 |  * Get bpf_map through the offset of corresponding struct bpf_map_def
222 |  * in the bpf object file.
223 |  */
224 | struct bpf_map *
225 | bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
226 | 
227 | struct bpf_map *
228 | bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
229 | #define bpf_map__for_each(pos, obj)		\
230 | 	for ((pos) = bpf_map__next(NULL, (obj));	\
231 | 	     (pos) != NULL;				\
232 | 	     (pos) = bpf_map__next((pos), (obj)))
233 | 
234 | int bpf_map__fd(struct bpf_map *map);
235 | const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
236 | const char *bpf_map__name(struct bpf_map *map);
237 | 
238 | typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
239 | int bpf_map__set_priv(struct bpf_map *map, void *priv,
240 | 		      bpf_map_clear_priv_t clear_priv);
241 | void *bpf_map__priv(struct bpf_map *map);
242 | int bpf_map__pin(struct bpf_map *map, const char *path);
243 | 
244 | long libbpf_get_error(const void *ptr);
245 | 
246 | int bpf_prog_load(const char *file, enum bpf_prog_type type,
247 | 		  struct bpf_object **pobj, int *prog_fd);
248 | #endif
249 | 


--------------------------------------------------------------------------------
/xlb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ./xlb -i eth0 -r ;./xlb -i eth0 -v
 4 | ./xlb_cmdline -i eth0 -A 10.1.2.1 -p 80
 5 | ./xlb_cmdline -i eth0 -a 10.1.2.1 -p 80 -r 10.0.0.24
 6 | ./xlb_cmdline -i eth0 -a 10.1.2.1 -p 80 -r 10.0.0.23
 7 | ./xlb_cmdline -i eth0 -a 10.1.2.1 -p 80 -r 10.0.0.22
 8 | 
 9 | ./xlb_cmdline -i eth0 -L
10 | 
11 | 


--------------------------------------------------------------------------------
/xlb.sh2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ./xlb -i eth0 -r ;./xlb -i eth0 -v
 4 | ./xlb_cmdline -i eth0 -A 10.1.4.1 -p 80
 5 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.24
 6 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.23
 7 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.22
 8 | 
 9 | ./xlb_cmdline -i eth0 -A 10.1.4.2 -p 80
10 | ./xlb_cmdline -i eth0 -a 10.1.4.2 -p 80 -r 10.0.0.22
11 | 
12 | ./xlb_cmdline -i eth0 -A 10.1.4.3 -p 80
13 | ./xlb_cmdline -i eth0 -a 10.1.4.3 -p 80 -r 10.0.0.22
14 | ./xlb_cmdline -i eth0 -a 10.1.4.3 -p 80 -r 10.0.0.23
15 | 
16 | ./xlb_cmdline -i eth0 -L
17 | 
18 | 


--------------------------------------------------------------------------------
/xlb.sh3:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ./xlb -i eth0 -r ;./xlb -i eth0 -v
 4 | ./xlb_cmdline -i eth0 -A 10.1.4.1 -p 80
 5 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.22
 6 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.23
 7 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.24
 8 | 
 9 | ./xlb_cmdline -i eth0 -A 10.1.4.2 -p 80
10 | ./xlb_cmdline -i eth0 -a 10.1.4.2 -p 80 -r 10.0.0.22
11 | ./xlb_cmdline -i eth0 -a 10.1.4.2 -p 80 -r 10.0.0.23
12 | ./xlb_cmdline -i eth0 -a 10.1.4.2 -p 80 -r 10.0.0.24
13 | 
14 | ./xlb_cmdline -i eth0 -L
15 | 
16 | echo
17 | read -p "Type enter to continue ..." choice
18 | echo
19 | 
20 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
21 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.23
22 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.24
23 | ./xlb_cmdline -i eth0 -D 10.1.4.1 -p 80
24 | ./xlb_cmdline -i eth0 -L -v
25 | 
26 | echo
27 | read -p "Type enter to continue ..." choice
28 | echo
29 | 
30 | ./xlb_cmdline -i eth0 -d 10.1.4.2 -p 80 -r 10.0.0.23
31 | ./xlb_cmdline -i eth0 -d 10.1.4.2 -p 80 -r 10.0.0.24
32 | ./xlb_cmdline -i eth0 -D 10.1.4.2 -p 80 
33 | ./xlb_cmdline -i eth0 -L -v
34 | 
35 | 


--------------------------------------------------------------------------------
/xlb.sh4:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | createsvc(){
 4 | ./xlb -i eth0 -r ;./xlb -i eth0
 5 | ./xlb_cmdline -i eth0 -A 10.1.4.1 -p 80
 6 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.22
 7 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.23
 8 | ./xlb_cmdline -i eth0 -a 10.1.4.1 -p 80 -r 10.0.0.24
 9 | 
10 | 
11 | ./xlb_cmdline -i eth0 -A 10.1.4.2 -p 80
12 | ./xlb_cmdline -i eth0 -a 10.1.4.2 -p 80 -r 10.0.0.24
13 | ./xlb_cmdline -i eth0 -a 10.1.4.2 -p 80 -r 10.0.0.23
14 | 
15 | }
16 | 
17 | deletesvc(){
18 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
19 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.23
20 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.24
21 | ./xlb_cmdline -i eth0 -D 10.1.4.1 -p 80
22 | 
23 | ./xlb_cmdline -i eth0 -d 10.1.4.2 -p 80 -r 10.0.0.23
24 | ./xlb_cmdline -i eth0 -d 10.1.4.2 -p 80 -r 10.0.0.24
25 | ./xlb_cmdline -i eth0 -D 10.1.4.2 -p 80 
26 | }
27 | 
28 | listsvc(){
29 | ./xlb_cmdline -i eth0 -L
30 | }
31 | 
32 | waitkey(){
33 | echo
34 | #read -p "Type enter to continue ..." choice
35 | read -p "Type enter... " choice
36 | echo
37 | }
38 | 
39 | createsvc
40 | 
41 | waitkey
42 | 
43 | listsvc
44 | 
45 | waitkey
46 | 
47 | deletesvc
48 | listsvc
49 | 
50 | waitkey
51 | 
52 | createsvc
53 | 
54 | waitkey
55 | 
56 | listsvc
57 | 
58 | waitkey
59 | 
60 | cat <<EOF
61 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
62 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
63 | ./xlb_cmdline -i eth0 -D 10.1.4.1 -p 80
64 | EOF
65 | 
66 | waitkey
67 | 
68 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
69 | ./xlb_cmdline -i eth0 -d 10.1.4.1 -p 80 -r 10.0.0.22
70 | ./xlb_cmdline -i eth0 -D 10.1.4.1 -p 80
71 | 
72 | waitkey
73 | 
74 | ./xlb_cmdline -i eth0 -L
75 | 
76 | echo
77 | 


--------------------------------------------------------------------------------
/xlb_cmdline.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016 Facebook
  3 |  * Copyright (c) 2018 Cluster Computing Inc.
  4 |  *
  5 |  * This program is free software; you can redistribute it and/or
  6 |  * modify it under the terms of version 2 of the GNU General Public
  7 |  * License as published by the Free Software Foundation.
  8 |  */
  9 | 
 10 | #include "rmi.h"
 11 | #include "xlb_util.h"
 12 | 
 13 | #define STATS_INTERVAL_S 2U
 14 | 
 15 | static char ifname_buf[IF_NAMESIZE];
 16 | static char *ifname = NULL;
 17 | 
 18 | static int ifindex = -1;
 19 | static __u32 xdp_flags = 0;
 20 | 
 21 | static void usage(const char *cmd)
 22 | {
 23 | 	printf("xlb_cmdline: Xdp Load Balancer command line utility.\n"
 24 | 	       "The xlb encapsulate incomming packet toward <VIP:PORT> in an IPv4 header and XDP_TX it out.\n"
 25 | 	       "The workers are selected by a round robin manner.\n\n");
 26 | 	printf("Usage: %s [...]\n", cmd);
 27 | 	printf("    -i Interface name(eg. eth0)\n");
 28 | 	printf("    -A ServiceIP(a.k.a. VIP)\n");
 29 | 	printf("    -t (for TCP, optional, default)\n");
 30 | 	printf("    -u (for UDP, optional)\n");
 31 | 	printf("    -r WorkerIP\n");
 32 | 	printf("    -v verbose\n");
 33 | 	printf("    -L list lb table\n");
 34 | 	printf("    -l list lbcache\n");
 35 | 	printf("    -h Display this help\n");
 36 | }
 37 | 
 38 | int main(int argc, char **argv)
 39 | {
 40 |   const char *optstr = "i:A:D:a:d:r:p:SLlvhut";
 41 |   int port = 0;
 42 |   struct iptnl_info tnl = {};
 43 |   struct vip vip = {};
 44 |   int opt;
 45 | 	
 46 |   int fd_service, fd_linklist, fd_worker, fd_svcid;
 47 |   
 48 |   bool do_list = false;
 49 |   bool monitor = false;
 50 | 	
 51 |   enum action action = ACTION_LIST;
 52 | 	
 53 |   tnl.family = AF_UNSPEC;
 54 |   vip.protocol = IPPROTO_TCP;
 55 | 
 56 | 
 57 |   while ((opt = getopt(argc, argv, optstr)) != -1) {
 58 |     unsigned short family;
 59 |     unsigned int *v6;
 60 | 
 61 |     switch (opt) {
 62 |     case 'v':
 63 |       verbose = 1;
 64 |       break;
 65 |     case 'i':
 66 |       if (strlen(optarg) >= IF_NAMESIZE) {
 67 | 	fprintf(stderr, "ERR: Intereface name too long\n");
 68 | 	goto error;
 69 |       }
 70 |       ifname = (char *)&ifname_buf;
 71 |       strncpy(ifname, optarg, IF_NAMESIZE);
 72 |       ifindex = if_nametoindex(ifname);
 73 |       if (ifindex == 0) {
 74 | 	fprintf(stderr,
 75 | 		"ERR: Interface name unknown err(%d):%s\n",
 76 | 		errno, strerror(errno));
 77 | 	goto error;
 78 |       }
 79 |       break;
 80 |     case 'A':
 81 |       action = ACTION_ADD_SVC;
 82 |       vip.family = parse_ipstr(optarg, vip.daddr.v6);
 83 |       if (vip.family == AF_UNSPEC)
 84 | 	return 1;
 85 |       break;
 86 |     case 'D':
 87 |       action = ACTION_DEL_SVC;
 88 |       vip.family = parse_ipstr(optarg, vip.daddr.v6);
 89 |       if (vip.family == AF_UNSPEC)
 90 | 	return 1;
 91 |       break;
 92 |     case 'a':
 93 |       action = ACTION_ADD_REAL;
 94 |       vip.family = parse_ipstr(optarg, vip.daddr.v6);
 95 |       if (vip.family == AF_UNSPEC)
 96 | 	return 1;
 97 |       break;
 98 |     case 'd':
 99 |       action = ACTION_DEL_REAL;
100 |       vip.family = parse_ipstr(optarg, vip.daddr.v6);
101 |       if (vip.family == AF_UNSPEC)
102 | 	return 1;
103 |       break;
104 |     case 'L':
105 |       do_list = true;
106 |       break;
107 |     case 'l':
108 |       monitor = true;
109 |       break;
110 |     case 'u':
111 |       vip.protocol = IPPROTO_UDP;
112 |       break;
113 |     case 't':
114 |       vip.protocol = IPPROTO_TCP;
115 |       break;
116 |     case 'p':
117 |       if (parse_port(optarg, &port))
118 | 	return 1;
119 |       break;
120 |     case 'r':
121 |       v6 = tnl.daddr.v6;
122 | 
123 |       family = parse_ipstr(optarg, v6);
124 |       if (family == AF_UNSPEC)
125 | 	return 1;
126 |       if (tnl.family == AF_UNSPEC) {
127 | 	tnl.family = family;
128 |       } else if (tnl.family != family) {
129 | 	fprintf(stderr,
130 | 		"The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
131 | 	return 1;
132 |       }
133 |       break;
134 |     case 'S':
135 |       xdp_flags |= XDP_FLAGS_SKB_MODE;
136 |       break;
137 |     error:
138 |     default:
139 |       usage(argv[0]);
140 |       return 1;
141 |     }
142 |     //		opt_flags[opt] = 0;
143 |   }
144 | 
145 | 
146 |   if (ifindex == -1) {
147 |     printf("ERR: required option -i missing");
148 |     usage(argv[0]);
149 |     return EXIT_FAIL_OPTION;
150 |   }
151 | 
152 |   vip.dport = htons(port);
153 | 
154 |   if (action == ACTION_ADD_SVC) {
155 |     xlb_add_svc(&vip);
156 |   } else if (action == ACTION_DEL_SVC) {
157 |     xlb_del_svc(&vip);
158 |   } else if (action == ACTION_ADD_REAL) {
159 |     xlb_add_real(&vip, &tnl);
160 |   } else if (action == ACTION_DEL_REAL) {
161 |     xlb_del_real(&vip, &tnl);
162 |   }
163 | 
164 |   if (DEBUG||verbose||do_list) {
165 |     list_all();
166 |   }
167 | 
168 |   if (verbose) {
169 |     service_list_all();
170 |     linklist_list_all();
171 |     worker_list_all();
172 |     svcid_list_all();
173 |   }
174 | 
175 |   if (monitor) {
176 |     list_lbcache();
177 |   }
178 | 
179 |   return 0;
180 | }
181 | 


--------------------------------------------------------------------------------
/xlb_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2016 Facebook
 3 |  * Copyright (c) 2018 Cluster Computing Inc.
 4 |  *
 5 |  * This program is free software; you can redistribute it and/or
 6 |  * modify it under the terms of version 2 of the GNU General Public
 7 |  * License as published by the Free Software Foundation.
 8 |  */
 9 | 
10 | #ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
11 | #define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
12 | 
13 | #include <linux/types.h>
14 | 
15 | #define EXIT_OK                 0
16 | #define EXIT_FAIL               1
17 | #define EXIT_FAIL_OPTION        2
18 | #define EXIT_FAIL_XDP           3
19 | #define EXIT_FAIL_MAP           20
20 | #define EXIT_FAIL_MAP_KEY       21
21 | #define EXIT_FAIL_MAP_FILE      22
22 | #define EXIT_FAIL_MAP_FS        23
23 | #define EXIT_FAIL_IP            30
24 | #define EXIT_FAIL_PORT          31
25 | #define EXIT_FAIL_BPF           40
26 | #define EXIT_FAIL_BPF_ELF       41
27 | #define EXIT_FAIL_BPF_RELOCATE  42
28 | 
29 | #define MAX_IPTNL_ENTRIES 256U
30 | #define MAX_SVC_ENTRIES 256U
31 | 
32 | //#define ACTION_ADD      (1<<0)
33 | //#define ACTION_DEL      (1<<1)
34 | 
35 | enum action {
36 |   ACTION_LIST,
37 |   ACTION_ADD_SVC,
38 |   ACTION_DEL_SVC,
39 |   ACTION_ADD_REAL,
40 |   ACTION_DEL_REAL
41 | };
42 | 
43 | static int verbose = 0;
44 | 
45 | //#define DEBUG true 
46 | #ifndef DEBUG
47 | #define DEBUG false 
48 | #endif
49 | 
50 | static const char *file_service   = "/sys/fs/bpf/service";
51 | static const char *file_linklist   = "/sys/fs/bpf/linklist";
52 | static const char *file_worker   = "/sys/fs/bpf/worker";
53 | static const char *file_svcid   = "/sys/fs/bpf/svcid";
54 | static const char *file_lbcache   = "/sys/fs/bpf/lbcache";
55 | 
56 | struct vip {
57 | 	union {
58 | 		__u32 v6[4];
59 | 		__u32 v4;
60 | 	} daddr;
61 | 	__u16 dport;
62 | 	__u16 family;
63 | 	__u8 protocol;
64 | };
65 | 
66 | struct iptnl_info {
67 | 	union {
68 | 		__u32 v6[4];
69 | 		__u32 v4;
70 | 	} saddr;
71 | 	union {
72 | 		__u32 v6[4];
73 | 		__u32 v4;
74 | 	} daddr;
75 | 	__u16 family;
76 |   //	__u8 dmac[6];
77 |   	char dmac[6];
78 |   //  struct ether_addr* dmac;
79 | };
80 | 
81 | struct sip {
82 | 	union {
83 | 		__u32 v6[4];
84 | 		__u32 v4;
85 | 	} saddr;
86 | 	__u16 sport;
87 | 	__u16 family;
88 | 	__u8 protocol;
89 | };
90 | 
91 | struct flow {
92 |   struct vip vip;
93 |   struct sip sip;
94 | };
95 | 
96 | #endif
97 | 


--------------------------------------------------------------------------------
/xlb_kern.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016 Facebook
  3 |  * Copyright (c) 2018 Cluster Computing Inc.
  4 |  *
  5 |  *
  6 |  * This program is free software; you can redistribute it and/or
  7 |  * modify it under the terms of version 2 of the GNU General Public
  8 |  * License as published by the Free Software Foundation.
  9 |  *
 10 |  */
 11 | #define KBUILD_MODNAME "foo"
 12 | #include <uapi/linux/bpf.h>
 13 | #include <linux/in.h>
 14 | #include <linux/if_ether.h>
 15 | #include <linux/if_packet.h>
 16 | #include <linux/if_vlan.h>
 17 | #include <linux/ip.h>
 18 | #include <linux/ipv6.h>
 19 | #include "bpf_helpers.h"
 20 | #include "xlb_common.h"
 21 | 
 22 | struct bpf_map_def SEC("maps") service = {
 23 | 	.type = BPF_MAP_TYPE_HASH,
 24 | 	.key_size = sizeof(struct vip),
 25 | 	.value_size = sizeof(__u64),
 26 | 	.max_entries = MAX_IPTNL_ENTRIES,
 27 | };
 28 | 
 29 | struct bpf_map_def SEC("maps") linklist = {
 30 | 	.type = BPF_MAP_TYPE_HASH,
 31 | 	.key_size = sizeof(__u64),
 32 | 	.value_size = sizeof(__u64),
 33 | 	.max_entries = MAX_IPTNL_ENTRIES,
 34 | };
 35 | 
 36 | struct bpf_map_def SEC("maps") worker = {
 37 | 	.type = BPF_MAP_TYPE_HASH,
 38 | 	.key_size = sizeof(__u64),
 39 | 	.value_size = sizeof(struct iptnl_info),
 40 | 	.max_entries = 65536,
 41 | };
 42 | 
 43 | struct bpf_map_def SEC("maps") lbcache = {
 44 |   .type = BPF_MAP_TYPE_LRU_HASH,
 45 |   //  .type = BPF_MAP_TYPE_HASH,
 46 |   .key_size = sizeof(struct flow),
 47 |   .value_size = sizeof(__u64),
 48 |   .max_entries = 200,
 49 |   //  .max_entries = 65536,
 50 | };
 51 | 
 52 | struct bpf_map_def SEC("maps") svcid = {
 53 |   .type = BPF_MAP_TYPE_HASH,
 54 |   .key_size = sizeof(__u16),
 55 |   .value_size = sizeof(struct vip),
 56 |   .max_entries = 256,
 57 | };
 58 | 
 59 | static __always_inline int get_dport(void *trans_data, void *data_end,
 60 | 				     u8 protocol)
 61 | {
 62 | 	struct tcphdr *th;
 63 | 	struct udphdr *uh;
 64 | 
 65 | 	switch (protocol) {
 66 | 	case IPPROTO_TCP:
 67 | 		th = (struct tcphdr *)trans_data;
 68 | 		if (th + 1 > data_end)
 69 | 			return -1;
 70 | 		return th->dest;
 71 | 	case IPPROTO_UDP:
 72 | 		uh = (struct udphdr *)trans_data;
 73 | 		if (uh + 1 > data_end)
 74 | 			return -1;
 75 | 		return uh->dest;
 76 | 	default:
 77 | 		return 0;
 78 | 	}
 79 | }
 80 | 
 81 | static __always_inline int get_sport(void *trans_data, void *data_end,
 82 | 				     u8 protocol)
 83 | {
 84 | 	struct tcphdr *th;
 85 | 	struct udphdr *uh;
 86 | 
 87 | 	switch (protocol) {
 88 | 	case IPPROTO_TCP:
 89 | 		th = (struct tcphdr *)trans_data;
 90 | 		if (th + 1 > data_end)
 91 | 			return -1;
 92 | 		return th->source;
 93 | 	case IPPROTO_UDP:
 94 | 		uh = (struct udphdr *)trans_data;
 95 | 		if (uh + 1 > data_end)
 96 | 			return -1;
 97 | 		return uh->source;
 98 | 	default:
 99 | 		return 0;
100 | 	}
101 | }
102 | 
103 | static __always_inline void set_ethhdr(struct ethhdr *new_eth,
104 | 				       const struct ethhdr *old_eth,
105 | 				       const struct iptnl_info *tnl,
106 | 				       __be16 h_proto)
107 | {
108 | 	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
109 | 	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
110 | 	new_eth->h_proto = h_proto;
111 | }
112 | 
113 | static __always_inline void update_lbcache_v4(struct ethhdr *new_eth)
114 | {
115 | }
116 | 
117 | static __always_inline int handle_ipv4(struct xdp_md *xdp)
118 | {
119 | 	void *data_end = (void *)(long)xdp->data_end;
120 | 	void *data = (void *)(long)xdp->data;
121 | 	struct iptnl_info *tnl;
122 | 	struct ethhdr *new_eth;
123 | 	struct ethhdr *old_eth;
124 | 	struct iphdr *iph = data + sizeof(struct ethhdr);
125 | 	u16 *next_iph_u16;
126 | 	u16 payload_len;
127 | 	struct vip vip = {};
128 | 	int dport;
129 | 	u32 csum = 0;
130 | 	int i;
131 | 
132 | 	if (iph + 1 > data_end)
133 | 		return XDP_DROP;
134 | 
135 | 	dport = get_dport(iph + 1, data_end, iph->protocol);
136 | 	if (dport == -1)
137 | 		return XDP_DROP;
138 | 
139 | 	vip.protocol = iph->protocol;
140 | 	vip.family = AF_INET;
141 | 	vip.daddr.v4 = iph->daddr;
142 | 	vip.dport = dport;
143 | 	payload_len = ntohs(iph->tot_len);
144 | 
145 | 	struct flow flow = {};
146 | 	__u64 *wkid_p, wkid;
147 | 	__u64 *next_wkid_p, next_wkid;
148 | 	struct sip sip = {};
149 | 	int sport;
150 | 
151 | 	if (iph + 1 > data_end)
152 | 		return XDP_DROP;
153 | 
154 | 	sport = get_sport(iph + 1, data_end, iph->protocol);
155 | 	if (sport == -1)
156 | 		return XDP_DROP;
157 | 
158 | 	sip.protocol = iph->protocol;
159 | 	sip.family = AF_INET;
160 | 	sip.saddr.v4 = iph->saddr;
161 | 	sip.sport = sport;
162 | 
163 | 	flow.vip = vip;
164 | 	flow.sip = sip;
165 | 
166 | 	wkid_p = bpf_map_lookup_elem(&lbcache, &flow);
167 | 	if (!wkid_p) {
168 | 	  wkid_p = bpf_map_lookup_elem(&service, &vip);
169 | 	  if (!wkid_p) return XDP_PASS;
170 | 
171 | 	  wkid = *wkid_p;
172 | 	  bpf_map_update_elem(&lbcache, &flow, &wkid, BPF_ANY);
173 | 
174 | 	  next_wkid_p = bpf_map_lookup_elem(&linklist, &wkid);
175 | 	  if (!next_wkid_p) return XDP_PASS;
176 | 	  next_wkid = *next_wkid_p;
177 | 	  bpf_map_update_elem(&service, &vip, &next_wkid, BPF_ANY);
178 | 	}
179 | 
180 | 	wkid = *wkid_p;
181 | 	tnl = bpf_map_lookup_elem(&worker, &wkid);
182 | 	if (!tnl || tnl->family != AF_INET)
183 | 	  return XDP_PASS;
184 | 
185 | 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
186 | 		return XDP_DROP;
187 | 
188 | 	data = (void *)(long)xdp->data;
189 | 	data_end = (void *)(long)xdp->data_end;
190 | 
191 | 	new_eth = data;
192 | 	iph = data + sizeof(*new_eth);
193 | 	old_eth = data + sizeof(*iph);
194 | 
195 | 	if (new_eth + 1 > data_end ||
196 | 	    old_eth + 1 > data_end ||
197 | 	    iph + 1 > data_end)
198 | 		return XDP_DROP;
199 | 
200 | 	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
201 | 
202 | 	iph->version = 4;
203 | 	iph->ihl = sizeof(*iph) >> 2;
204 | 	iph->frag_off =	0;
205 | 	iph->protocol = IPPROTO_IPIP;
206 | 	iph->check = 0;
207 | 	iph->tos = 0;
208 | 	iph->tot_len = htons(payload_len + sizeof(*iph));
209 | 	iph->daddr = tnl->daddr.v4;
210 | 	iph->saddr = tnl->saddr.v4;
211 | 	iph->ttl = 8;
212 | 
213 | 	next_iph_u16 = (u16 *)iph;
214 | #pragma clang loop unroll(full)
215 | 	for (i = 0; i < sizeof(*iph) >> 1; i++)
216 | 		csum += *next_iph_u16++;
217 | 
218 | 	iph->check = ~((csum & 0xffff) + (csum >> 16));
219 | 
220 | 	return XDP_TX;
221 | }
222 | 
223 | SEC("xdp_tx_iptunnel")
224 | int _xdp_tx_iptunnel(struct xdp_md *xdp)
225 | {
226 | 	void *data_end = (void *)(long)xdp->data_end;
227 | 	void *data = (void *)(long)xdp->data;
228 | 	struct ethhdr *eth = data;
229 | 	__u16 h_proto;
230 | 
231 | 	if (eth + 1 > data_end)
232 | 		return XDP_DROP;
233 | 
234 | 	h_proto = eth->h_proto;
235 | 
236 | 	if (h_proto == htons(ETH_P_IP))
237 | 		return handle_ipv4(xdp);
238 | 	else
239 | 		return XDP_PASS;
240 | }
241 | 
242 | char _license[] SEC("license") = "GPL";
243 | 


--------------------------------------------------------------------------------
/xlb_user.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2016 Facebook
  3 |  * Copyright (c) 2018 Cluster Computing Inc.
  4 |  *
  5 |  * This program is free software; you can redistribute it and/or
  6 |  * modify it under the terms of version 2 of the GNU General Public
  7 |  * License as published by the Free Software Foundation.
  8 |  *
  9 |  */
 10 | 
 11 | #include <linux/bpf.h>
 12 | #include <linux/if_link.h>
 13 | #include <assert.h>
 14 | #include <errno.h>
 15 | #include <signal.h>
 16 | #include <stdio.h>
 17 | #include <stdlib.h>
 18 | #include <string.h>
 19 | #include <sys/resource.h>
 20 | #include <arpa/inet.h>
 21 | #include <netinet/ether.h>
 22 | #include <unistd.h>
 23 | #include <time.h>
 24 | #include "bpf_load.h"
 25 | #include "libbpf.h"
 26 | #include "bpf_util.h"
 27 | #include "xlb_common.h"
 28 | 
 29 | #include <net/if.h>
 30 | #include <sys/statfs.h>
 31 | #include <libgen.h>
 32 | 
 33 | static char ifname_buf[IF_NAMESIZE];
 34 | static char *ifname = NULL;
 35 | 
 36 | static int ifindex = -1;
 37 | static __u32 xdp_flags = 0;
 38 | 
 39 | #define NR_MAPS 5
 40 | int maps_marked_for_export[MAX_MAPS] = { 0 };
 41 | 
 42 | static const char* map_idx_to_export_filename(int idx)
 43 | {
 44 |   const char *file = NULL;
 45 | 
 46 |   /* Mapping map_fd[idx] to export filenames */
 47 |   switch (idx) {
 48 |   case 0: 
 49 |     file =   file_service;
 50 |     break;
 51 |   case 1: 
 52 |     file =   file_linklist;
 53 |     break;
 54 |   case 2: 
 55 |     file =   file_worker;
 56 |     break;
 57 |   case 3:
 58 |     file =   file_lbcache;
 59 |     break;
 60 |   case 4:
 61 |     file =   file_svcid;
 62 |     break;
 63 |   default:
 64 |     break;
 65 |   }
 66 | 
 67 |   if (DEBUG) printf("FileNAME: %s \n", file);
 68 | 
 69 |   return file;
 70 | }
 71 | 
 72 | static void remove_xdp_program(int ifindex, const char *ifname, __u32 xdp_flags)
 73 | {
 74 |   int i;
 75 |   fprintf(stderr, "Removing XDP program on ifindex:%d device:%s\n",
 76 | 	  ifindex, ifname);
 77 |   if (ifindex > -1)
 78 |     set_link_xdp_fd(ifindex, -1, xdp_flags);
 79 | 
 80 |   for (i = 0; i < NR_MAPS; i++) {
 81 |     const char *file = map_idx_to_export_filename(i);
 82 | 
 83 |     if (unlink(file) < 0) {
 84 |       printf("WARN: cannot rm map(%s) file:%s err(%d):%s\n",
 85 | 	     map_data[i].name, file, errno, strerror(errno));
 86 |     }
 87 |   }
 88 | }
 89 | 
 90 | static void usage(const char *cmd)
 91 | {
 92 |   printf("Start a XDP prog which encapsulates incoming packets\n");
 93 |   printf("Usage: %s [...]\n", cmd);
 94 |   printf("    -i <ifindex> Interface Index\n");
 95 |   printf("    -S use skb-mode\n");
 96 |   printf("    -N enforce native mode\n");
 97 |   printf("    -v verbose\n");
 98 |   printf("    -h Display this help\n");
 99 | }
100 | 
101 | #ifndef BPF_FS_MAGIC
102 | # define BPF_FS_MAGIC   0xcafe4a11
103 | #endif
104 | 
105 | static int bpf_fs_check_path(const char *path)
106 | {
107 |   struct statfs st_fs;
108 |   char *dname, *dir;
109 |   int err = 0;
110 | 
111 |   if (path == NULL)
112 |     return -EINVAL;
113 | 
114 |   dname = strdup(path);
115 |   if (dname == NULL)
116 |     return -ENOMEM;
117 | 
118 |   dir = dirname(dname);
119 |   if (statfs(dir, &st_fs)) {
120 |     fprintf(stderr, "ERR: failed to statfs %s: (%d)%s\n",
121 | 	    dir, errno, strerror(errno));
122 |     err = -errno;
123 |   }
124 |   free(dname);
125 | 
126 |   if (!err && st_fs.f_type != BPF_FS_MAGIC) {
127 |     fprintf(stderr,
128 | 	                            "ERR: specified path %s is not on BPF FS\n\n"
129 | 	                            " You need to mount the BPF filesystem type like:\n"
130 | 	    "  mount -t bpf bpf /sys/fs/bpf/\n\n",
131 | 	    path);
132 |     err = -EINVAL;
133 |   }
134 | 
135 |   return err;
136 | }
137 | 
138 | int load_map_file(const char *file, struct bpf_map_data *map_data)
139 | {
140 |   int fd;
141 | 
142 |   if (bpf_fs_check_path(file) < 0) {
143 |     exit(EXIT_FAIL_MAP_FS);
144 |   }
145 | 
146 |   fd = bpf_obj_get(file);
147 |   if (fd > 0) { /* Great: map file already existed use it */
148 |     if (verbose)
149 |       printf(" - Loaded bpf-map:%-30s from file:%s\n",
150 | 	     map_data->name, file);
151 |     return fd;
152 |   }
153 |   return -1;
154 | }
155 | 
156 | void pre_load_maps_via_fs(struct bpf_map_data *map_data, int idx)
157 | {
158 |   const char *file;
159 |   int fd;
160 | 
161 |   file = map_idx_to_export_filename(idx);
162 |   fd = load_map_file(file, map_data);
163 | 
164 |   if (fd > 0) {
165 |     map_data->fd = fd;
166 |   } else {
167 |     maps_marked_for_export[idx] = 1;
168 |   }
169 | }
170 | 
171 | int export_map_idx(int map_idx)
172 | {
173 |   const char *file;
174 | 
175 |   file = map_idx_to_export_filename(map_idx);
176 | 
177 |   if (bpf_obj_pin(map_fd[map_idx], file) != 0) {
178 |     fprintf(stderr, "ERR: Cannot pin map(%s) file:%s err(%d):%s\n",
179 | 	    map_data[map_idx].name, file, errno, strerror(errno));
180 |     return EXIT_FAIL_MAP;
181 |   }
182 |   if (verbose)
183 |     printf(" - Export bpf-map:%-30s to   file:%s\n",
184 | 	   map_data[map_idx].name, file);
185 |   return 0;
186 | }
187 | 
188 | void export_maps(void)
189 | {
190 |   int i;
191 | 
192 |   for (i = 0; i < NR_MAPS; i++) {
193 |     if (maps_marked_for_export[i] == 1)
194 |       export_map_idx(i);
195 |   }
196 | }
197 | 
198 | void chown_maps(uid_t owner, gid_t group)
199 | {
200 |   const char *file;
201 |   int i;
202 | 
203 |   for (i = 0; i < NR_MAPS; i++) {
204 |     file = map_idx_to_export_filename(i);
205 | 
206 |     if (chown(file, owner, group) < 0)
207 |       fprintf(stderr,
208 | 	      "WARN: Cannot chown file:%s err(%d):%s\n",
209 | 	      file, errno, strerror(errno));
210 |   }
211 | }
212 | 
213 | int main(int argc, char **argv)
214 | {
215 | 	const char *optstr = "i:Shvr";
216 | 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
217 | 	char filename[256];
218 | 	int opt;
219 | 
220 | 	uid_t owner = -1; /* -1 result in no-change of owner */
221 | 	gid_t group = -1;
222 | 	
223 | 	bool rm_xdp_prog = false;
224 | 
225 | 	while ((opt = getopt(argc, argv, optstr)) != -1) {
226 | 		switch (opt) {
227 | 		case 'v':
228 | 		  verbose = 1;
229 | 		  break;
230 | 		case 'r':
231 | 		  rm_xdp_prog = true;
232 | 		  break;
233 | 		case 'i':
234 | 		  if (strlen(optarg) >= IF_NAMESIZE) {
235 | 		    fprintf(stderr, "ERR: Intereface name too long\n");
236 | 		    goto error;
237 | 		  }
238 | 		  ifname = (char *)&ifname_buf;
239 | 		  strncpy(ifname, optarg, IF_NAMESIZE);
240 | 		  ifindex = if_nametoindex(ifname);
241 | 		  if (ifindex == 0) {
242 | 		    fprintf(stderr,
243 | 			    "ERR: Interface name unknown err(%d):%s\n",
244 | 			    errno, strerror(errno));
245 | 		    goto error;
246 | 		  }
247 | 		  break;
248 | 		case 'S':
249 | 			xdp_flags |= XDP_FLAGS_SKB_MODE;
250 | 			break;
251 | 		error:
252 | 		default:
253 | 			usage(argv[0]);
254 | 			return 1;
255 | 		}
256 | 	}
257 | 
258 |         if (ifindex == -1) {
259 | 	  printf("ERR: required option -i missing");
260 | 	  usage(argv[0]);
261 | 	  return EXIT_FAIL_OPTION;
262 | 	}
263 | 		
264 | 	if (rm_xdp_prog) {
265 | 	  remove_xdp_program(ifindex, ifname, xdp_flags);
266 | 	  return 0;
267 | 	}
268 | 	
269 | 	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
270 | 		perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
271 | 		return 1;
272 | 	}
273 | 
274 | 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
275 | 
276 | 	if (load_bpf_file_fixup_map(filename, pre_load_maps_via_fs)) {
277 | 	  fprintf(stderr, "Error in load_bpf_file_fixup_map(): %s", bpf_log_buf);
278 | 		return 1;
279 | 	}
280 | 
281 | 	if (!prog_fd[0]) {
282 | 		printf("load_bpf_file: %s\n", strerror(errno));
283 | 		return 1;
284 | 	}
285 | 
286 | 	export_maps();
287 | 
288 | 	if (owner >= 0)
289 | 	  chown_maps(owner, group);
290 | 
291 | 	if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
292 | 		printf("link set xdp fd failed\n");
293 | 		return 1;
294 | 	}
295 | 
296 | 	return 0;
297 | }
298 | 


--------------------------------------------------------------------------------
/xlb_util.c:
--------------------------------------------------------------------------------
  1 | #include "rmi.h"
  2 | #include "xlb_util.h"
  3 | 
  4 | int parse_ipstr(const char *ipstr, unsigned int *addr)
  5 | {
  6 | 	if (inet_pton(AF_INET6, ipstr, addr) == 1) {
  7 | 		return AF_INET6;
  8 | 	} else if (inet_pton(AF_INET, ipstr, addr) == 1) {
  9 | 		addr[1] = addr[2] = addr[3] = 0;
 10 | 		return AF_INET;
 11 | 	}
 12 | 
 13 | 	fprintf(stderr, "%s is an invalid IP\n", ipstr);
 14 | 	return AF_UNSPEC;
 15 | }
 16 | 
 17 | int parse_port(const char *port_str, int *port)
 18 | {
 19 | 	char *end;
 20 | 	long tmp_port;
 21 | 
 22 | 	tmp_port = strtol(port_str, &end, 10);
 23 | 	if (tmp_port < 1 || tmp_port > 65535) {
 24 | 		fprintf(stderr, "Invalid port(s):%s\n", port_str);
 25 | 		return 1;
 26 | 	}
 27 | 
 28 | 	*port = tmp_port;
 29 | 	return 0;
 30 | }
 31 | 
 32 | int open_bpf_map(const char *file)
 33 | {
 34 |   int fd;
 35 | 
 36 |   fd = bpf_obj_get(file);
 37 |   if (fd < 0) {
 38 |     printf("ERR: Failed to open bpf map file:%s err(%d):%s\n",
 39 | 	   file, errno, strerror(errno));
 40 |     exit(EXIT_FAIL_MAP_FILE);
 41 |   }
 42 |   return fd;
 43 | }
 44 | 
 45 | __u64 conv(char ipadr[], __u16 svcid)
 46 | {
 47 |   
 48 |   __u64 num=svcid, val;
 49 |   char *tok,*ptr;
 50 |   char ip_txt[INET_ADDRSTRLEN] = {0};
 51 |   
 52 |   strncpy(ip_txt, ipadr, INET_ADDRSTRLEN);
 53 | 
 54 |   tok=strtok(ip_txt,".");
 55 |   while( tok != NULL)
 56 |     {
 57 |       val=strtoul(tok,&ptr,0);
 58 |       num=(num << 8) + val;
 59 |       //      printf("(val,num)=(%llu,%llu)\n",val,num);
 60 |       tok=strtok(NULL,".");
 61 |     }
 62 |   return(num);
 63 | }
 64 | 
 65 | void lnklst_add_to_map(int fd, struct iptnl_info *vip , __u64 *head){
 66 |   __u64 key = *head , next, min, max, ipint;
 67 |   char ip_txt[INET_ADDRSTRLEN] = {0};
 68 | 
 69 |   assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
 70 |   ipint = conv(ip_txt, *head>>32);
 71 | 
 72 |   if ( bpf_map_lookup_elem(fd, &ipint, &next) == 0 ){
 73 |     printf("Worker already exists!\n");
 74 |     return;
 75 |   }
 76 | 
 77 |   if ( bpf_map_lookup_elem(fd, &key, &next) == -1 ){ // 1st entry. Create new.
 78 |     next = key;
 79 |     assert(bpf_map_update_elem(fd, &key, &next, BPF_NOEXIST) == 0 );
 80 | 
 81 |   } else if ( next == key ){ // 2nd entry. Only one entry exists.
 82 |     assert(bpf_map_update_elem(fd, &key, &ipint, BPF_ANY) == 0 );
 83 |     assert(bpf_map_update_elem(fd, &ipint, &key,  BPF_ANY) == 0 );
 84 |     *head = key < ipint ? key : ipint;
 85 | 
 86 |   } else {
 87 | 
 88 |     // Find minimum
 89 |     if (key > next){ // if head is the last entry
 90 |       min = next;
 91 |       max = key;
 92 |     } else {
 93 |       while (key < next){
 94 | 	key = next;
 95 | 	bpf_map_lookup_elem(fd, &key, &next);
 96 |       }
 97 |       max = key;
 98 |       min = next;
 99 |     }
100 | 
101 |     *head = min;
102 | 
103 |     if (( ipint < min )||( max < ipint )){ // new entry is the smallest or the largest
104 | 
105 |       assert(bpf_map_update_elem(fd, &ipint, &min, BPF_ANY) == 0 );
106 |       assert(bpf_map_update_elem(fd, &max, &ipint, BPF_ANY) == 0 ); // update tail
107 | 
108 |       *head = min < ipint ? min : ipint;
109 | 
110 |     } else if (( min < ipint ) && ( ipint < max )){
111 | 
112 |       key = min;
113 |       bpf_map_lookup_elem(fd, &key, &next);
114 | 
115 |       while ( next < ipint ){ // find the key where (key < ipint < next)
116 | 	key = next;
117 | 	bpf_map_lookup_elem(fd, &key, &next);
118 |       }
119 |       assert(bpf_map_update_elem(fd, &key, &ipint, BPF_ANY) == 0);
120 |       assert(bpf_map_update_elem(fd, &ipint, &next, BPF_ANY) == 0);
121 |     }
122 | 
123 |   }
124 | }
125 | 
126 | void lnklst_del_from_map(int fd, struct iptnl_info *vip , __u64 *head){
127 |   __u64 key = *head , next, min, max, ipint;
128 |   char ip_txt[INET_ADDRSTRLEN] = {0};
129 | 
130 |   int svcint = *head>>32;
131 |   
132 |   assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
133 |   ipint = conv(ip_txt, svcint);
134 | 
135 |   if ( bpf_map_lookup_elem(fd, &ipint, &next) != 0 ){
136 |     printf("Worker does not exist!\n");
137 |     return;
138 |   }
139 | 
140 |   if ( ipint == next ) {// last entry. Delete & update head
141 | 
142 |     assert(bpf_map_delete_elem(fd, &ipint) == 0 );
143 | 
144 |     *head = conv("0.0.0.0", svcint);
145 |   
146 |   } else {
147 |     bpf_map_lookup_elem(fd, &key, &next);
148 |     // Find minimum
149 |     if (key > next){ // if head is the last entry
150 |       min = next;
151 |       max = key;
152 |     } else {
153 |       while (key < next){
154 | 	key = next;
155 | 	bpf_map_lookup_elem(fd, &key, &next);
156 |       }
157 |       min = next;
158 |       max = key;
159 |     }
160 | 
161 |     *head = min;
162 | 
163 |     if ( ipint == min ){ // new entry is the smallest or the largest
164 | 
165 |       bpf_map_lookup_elem(fd, &ipint, &next);
166 | 
167 |       assert(bpf_map_update_elem(fd, &max, &next, BPF_ANY) == 0 );
168 |       assert(bpf_map_delete_elem(fd, &ipint) == 0 );
169 | 
170 |       *head = next;
171 | 
172 |     } else if ( max == ipint ){ // new entry is the smallest or the largest
173 | 
174 |       key = min;
175 |       bpf_map_lookup_elem(fd, &key, &next);
176 | 
177 |       while ( next < ipint ){ // find the key where (key < ipint = next = max)
178 | 	key = next;
179 | 	bpf_map_lookup_elem(fd, &key, &next);
180 |       }
181 |       assert(bpf_map_update_elem(fd, &key, &min, BPF_ANY) == 0);
182 |       assert(bpf_map_delete_elem(fd, &ipint) == 0);
183 | 
184 |     } else if (( min < ipint ) && ( ipint < max ) ){
185 | 
186 |       key = min;
187 |       bpf_map_lookup_elem(fd, &key, &next);
188 | 
189 |       while ( next < ipint ){ // find the key where (key < ipint = next)
190 | 	key = next;
191 | 	bpf_map_lookup_elem(fd, &key, &next);
192 |       }
193 |       bpf_map_lookup_elem(fd, &ipint, &next); 
194 |       assert(bpf_map_update_elem(fd, &key, &next, BPF_ANY) == 0);
195 |       assert(bpf_map_delete_elem(fd, &ipint) == 0);
196 |     }
197 | 
198 |   }
199 | }
200 | 
201 | void svcid_list_all()
202 | {
203 | 
204 |   __u64 key = 0, next_key;
205 |   __u64 head;
206 | 
207 |   int fd = open_bpf_map(file_svcid);
208 |   
209 |   while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
210 |     key = next_key;
211 |     bpf_map_lookup_elem(fd, &key, &head);
212 |     
213 |     printf("svcid = %llu\n}\n", key);
214 |     printf("head = %llu\n}\n", head);
215 |   }
216 | 
217 |   close(fd);
218 | }
219 | 
220 | void service_list_all()
221 | {
222 | 
223 |   struct vip key = {}, next_key;
224 |   __u64 head;
225 |   char ip_txt[INET_ADDRSTRLEN] = {0};
226 | 
227 |   int fd = open_bpf_map(file_service);
228 |   
229 |   printf("Service List: \n");
230 |   while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
231 |     key = next_key;
232 |     bpf_map_lookup_elem(fd, &key, &head);
233 |     
234 |     assert(inet_ntop(key.family, &key.daddr.v4, ip_txt, sizeof(ip_txt)));
235 |     printf("{\nVIP: %s\n" , ip_txt);
236 |     printf("%d\n", key.protocol );
237 |     printf("%d\n", ntohs(key.dport));
238 |     printf("head = %llu\n}\n", head);
239 |   }
240 |   printf("\n");
241 | 
242 |   close(fd);
243 | }
244 | 
245 | void worker_list_all()
246 | {
247 |   __u64 key = 0, next_key;
248 |   struct iptnl_info value;
249 |   char ip_txt[INET_ADDRSTRLEN] = {0};
250 |   char mac_txt[] = "00:00:00:00:00:00";
251 | 
252 |   int fd = open_bpf_map(file_worker);
253 | 
254 |   while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
255 |     bpf_map_lookup_elem(fd, &next_key, &value);
256 | 
257 |     printf("{\nkey: %llu\n" , next_key);
258 |     printf("{\nsvcid: %d\n" , next_key>>32);
259 | 
260 |     assert(inet_ntop(value.family, &value.saddr.v4, ip_txt, sizeof(ip_txt)));
261 |     printf("src: %s\n", ip_txt );
262 |     assert(inet_ntop(value.family, &value.daddr.v4, ip_txt, sizeof(ip_txt)));
263 |     printf("dst: %s\n", ip_txt );
264 |     assert(ether_ntoa_r((struct ether_addr *)value.dmac, mac_txt));
265 |     printf("mac: %s\n}\n", mac_txt );
266 | 
267 |     key = next_key;
268 |   }
269 | 
270 |   close(fd);
271 | }
272 | 
273 | void linklist_list_all(){
274 | 
275 |   __u64 key = 0, next_key;
276 |   __u64 value;
277 | 
278 |   int fd = open_bpf_map(file_linklist);
279 | 
280 |   while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
281 |     key = next_key;
282 |     bpf_map_lookup_elem(fd, &key, &value);
283 |     printf("(key, value) = (%llu,%llu)\n" , key, value);
284 |   }
285 |   close(fd);
286 | }
287 | 
288 | void show_worker( __u64 key){
289 | 
290 |   struct iptnl_info value;
291 |   char daddr_txt[INET_ADDRSTRLEN] = {0};
292 |   char saddr_txt[INET_ADDRSTRLEN] = {0};
293 |   char mac_txt[] = "00:00:00:00:00:00";
294 |   
295 |   int fd = open_bpf_map(file_worker);
296 |   
297 |   if (bpf_map_lookup_elem(fd, &key, &value) == -1 ) return;
298 | 
299 |   assert(inet_ntop(value.family, &value.saddr.v4, saddr_txt, sizeof(saddr_txt)));
300 |   assert(inet_ntop(value.family, &value.daddr.v4, daddr_txt, sizeof(daddr_txt)));
301 |   assert(ether_ntoa_r((struct ether_addr *)value.dmac, mac_txt));
302 |   
303 |   if (DEBUG) printf("key: %llu\n", key);
304 | 
305 |   //  printf(" dst: %u\n", value.daddr.v4);
306 |   printf(" src: %s, dst: %s (%s)\n", saddr_txt, daddr_txt, mac_txt );
307 | 
308 |   close(fd);
309 | }
310 | 
311 | void list_worker_from_head( __u64 head){
312 | 
313 |   __u64 key = head;
314 |   __u64 value=0;
315 | 
316 |   int fd = open_bpf_map(file_linklist);
317 | 
318 |   printf("{\n");
319 |   while (value != head){
320 |     show_worker(key);
321 |     if (bpf_map_lookup_elem(fd, &key, &value) != 0) break;
322 |     key = value;
323 |   }
324 |   printf("}\n");
325 | 
326 |   close(fd);
327 | }
328 | 
329 | void list_all()
330 | {
331 |   int fd, flag=0;
332 |   struct vip key = {}, next_key;
333 |   __u64 head;
334 |   char daddr_txt[INET_ADDRSTRLEN] = {0};
335 | 
336 |   fd = open_bpf_map(file_service);
337 | 
338 |   while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
339 |     key = next_key;
340 |     bpf_map_lookup_elem(fd, &key, &head);
341 |     
342 |     assert(inet_ntop(key.family, &key.daddr.v4, daddr_txt, sizeof(daddr_txt)));
343 |     printf("service(#%d): %s:%d(%d) " , (__u16)(head>>32), daddr_txt, ntohs(key.dport), key.protocol);
344 | 
345 |     if (DEBUG) printf(", head = %llu ", head);
346 | 
347 |     list_worker_from_head(head);
348 |     flag=1;
349 |   }
350 | 
351 |   if (flag == 0){
352 |     printf("We have no service here.\n");
353 |   }
354 |   
355 |   close(fd);
356 | }
357 | 
358 | void list_lbcache()
359 | {
360 |   int fd;
361 |   struct flow key = {}, next_key;
362 |   __u64 wkid;
363 | 
364 |   char daddr_txt[INET_ADDRSTRLEN] = {0};
365 |   char saddr_txt[INET_ADDRSTRLEN] = {0};
366 | 
367 |   fd = open_bpf_map(file_lbcache);
368 |   int fdw = open_bpf_map(file_worker);
369 | 
370 |   while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
371 | 
372 |     key = next_key;
373 |     bpf_map_lookup_elem(fd, &key, &wkid);
374 | 
375 |     inet_ntop(key.vip.family, &key.vip.daddr.v4, daddr_txt, sizeof(daddr_txt));
376 |     inet_ntop(key.sip.family, &key.sip.saddr.v4, saddr_txt, sizeof(saddr_txt));
377 | 
378 |     printf(" %s:%d -> %s:%d (%d) => "
379 | 	   ,saddr_txt,ntohs(key.sip.sport)
380 | 	   ,daddr_txt,ntohs(key.vip.dport)
381 | 	   ,key.vip.protocol
382 | 	   );
383 | 
384 |     struct iptnl_info value;
385 |     char mac_txt[] = "00:00:00:00:00:00";
386 | 
387 |     bpf_map_lookup_elem(fdw, &wkid, &value);
388 |     inet_ntop(value.family, &value.daddr.v4, daddr_txt, sizeof(daddr_txt));
389 |     assert(ether_ntoa_r((struct ether_addr *)value.dmac, mac_txt));
390 |     printf("%s (%s)\n", daddr_txt, mac_txt );
391 | 
392 |   }
393 | 
394 |   close(fdw);
395 |   close(fd);
396 | }
397 | 
398 | void xlb_add_svc(struct vip* vip)
399 | {
400 |   int i;
401 |   struct vip vip_tmp;
402 |   char ip_txt[INET_ADDRSTRLEN] = {0};
403 |   __u16 svcid = 0;
404 |   __u64 head;
405 | 
406 |   //  printf("vip->daddr.v4 = %u \n", &vip->daddr.v4);
407 |   //  assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
408 |   //  printf("Adding service \"%s:%d\".\n", ip_txt, ntohs(vip->dport));
409 | 
410 |   int fd_service = open_bpf_map(file_service);
411 |   int fd_svcid = open_bpf_map(file_svcid);
412 |  
413 |     // 0. Check if the service already exists.
414 |   if (bpf_map_lookup_elem(fd_service, vip, &head) == 0 ){
415 |     //    assert(inet_ntop((*vip).family, &(*vip).daddr.v4, ip_txt, sizeof(ip_txt)));
416 |     assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
417 |     printf("%s:%d (#%d)\n",ip_txt,ntohs(vip->dport),head>>32);
418 |     return;
419 |   }
420 | 
421 |   // 1. Assign svcid and create head(32+8 bit number).
422 |   for (i = 1; i < MAX_SVC_ENTRIES ; i++){
423 |     if (bpf_map_lookup_elem(fd_svcid, &i, &vip_tmp) == -1 ){
424 |       svcid = i ;
425 |       bpf_map_update_elem(fd_svcid, &i, vip, BPF_NOEXIST);
426 |       break ;
427 |     }
428 |   }
429 |   if (svcid == 0) return;
430 | 
431 |   //  printf("Service id %d\n", svcid);
432 |     
433 |   head = conv("0.0.0.0", svcid);
434 |   
435 |   // 2. Add service to the service map.
436 |   //  bpf_map_update_elem(fd_service, &vip->daddr.v4, &head, BPF_NOEXIST);
437 |   bpf_map_update_elem(fd_service, vip, &head, BPF_NOEXIST);
438 | 
439 |   assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
440 |   printf("+%s:%d (#%d)\n",ip_txt,ntohs(vip->dport),svcid);
441 | 
442 |   close(fd_service);
443 |   close(fd_svcid);
444 | }
445 | 
446 | void xlb_del_svc(struct vip* vip)
447 | {
448 |   char ip_txt[INET_ADDRSTRLEN] = {0};
449 |   __u16 svcid = 0;
450 |   __u64 head;
451 | 
452 |   int fd_service = open_bpf_map(file_service);
453 |   int fd_svcid = open_bpf_map(file_svcid);
454 | 
455 |   // 0. Check if the service & worker exist.
456 |   if (bpf_map_lookup_elem(fd_service, vip, &head) == -1 ){
457 |     assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
458 |     printf("The service \"%s:%d\" does not exist!\n", ip_txt, ntohs(vip->dport));
459 |     return;
460 |   }
461 |   svcid = head>>32;
462 | 
463 |   if (head == conv("0.0.0.0", svcid)) { // If there is no worker then remove service
464 |     bpf_map_delete_elem(fd_service, vip);
465 |     bpf_map_delete_elem(fd_svcid, &svcid);
466 | 
467 |     assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
468 |     printf("-%s:%d (#%d)\n",ip_txt,ntohs(vip->dport),svcid);
469 | 
470 |   } else {
471 |     printf("\nWorkers still exist for service(#%d)! Delete them first.\n\n",svcid);
472 |     //    do_list=1;
473 |     //	      return EXIT_FAIL;
474 |   }
475 |   close(fd_service);
476 |   close(fd_svcid);
477 | }
478 | 
479 | void xlb_add_real(struct vip* vip, struct iptnl_info* tnl)
480 | {
481 |   char ip_txt[INET_ADDRSTRLEN] = {0};
482 |   struct vip vip_tmp;
483 |   struct iptnl_info tnl_tmp = {};
484 |   __u16 svcid = 0;
485 |   __u64 head, daddrint;
486 | 
487 | 
488 |   in_addr_t nh_ip;
489 |   int dev=0;
490 | 
491 |   xlb_iproute_get(&tnl->daddr.v4, &tnl->saddr.v4, &nh_ip, &dev);
492 |   xlb_get_mac(&nh_ip, tnl->dmac , &dev);
493 | 
494 |   if (DEBUG){
495 |     char buf[256];
496 |     char mac_txt[] = "00:00:00:00:00:00";
497 | 
498 |     printf("src: %s \n", inet_ntop(AF_INET, &tnl->saddr.v4, buf, 256));
499 |     assert(ether_ntoa_r((struct ether_addr *)tnl->dmac, mac_txt));
500 |     printf("nexthop: %s (%s) \n", inet_ntop(AF_INET, &nh_ip, buf, 256), mac_txt);
501 |     //    printf("mac: %s\n", mac_txt );
502 |   }
503 | 
504 |   int fd_service = open_bpf_map(file_service);
505 |   int fd_linklist = open_bpf_map(file_linklist);
506 |   int fd_worker = open_bpf_map(file_worker);
507 |   int fd_svcid = open_bpf_map(file_svcid);
508 | 
509 |   // 0. Check if the service & worker exist.
510 |   if (bpf_map_lookup_elem(fd_service, vip, &head) == -1 ){
511 |     assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
512 |     printf("The service \"%s:%d\" does not exist!\n", ip_txt, ntohs(vip->dport));
513 |     return;
514 |   }
515 |   svcid = head>>32;
516 | 
517 |   if (bpf_map_lookup_elem(fd_svcid, &svcid, &vip_tmp) == -1 ){
518 |     // No svcid in the fd_svcid map? Unlikey but just checking.
519 |     return;
520 |   }
521 | 
522 |   assert(inet_ntop(tnl->family, &tnl->daddr.v4, ip_txt, sizeof(ip_txt)));
523 |   daddrint = conv(ip_txt, svcid);
524 | 
525 |   // 1. Check if the head is for "0.0.0.0" i.e. there's no worker yet.
526 |   //    If so, generate new head from worker ip. 
527 | 
528 |   if (head == conv("0.0.0.0",svcid)) { 
529 |     head = daddrint;
530 |   }
531 | 
532 |   // 2. Check if the worker already exists for the service.
533 |   if (bpf_map_lookup_elem(fd_worker, &daddrint, &tnl_tmp) == 0 ){
534 |     //    printf("\"%s\" already exists for service(#%d)!\n",ip_txt,svcid);
535 |     printf("  %s (#%d)\n",ip_txt,svcid);
536 |   return;
537 |   }
538 | 
539 |   if (verbose) printf("head old = %llu\n", head);
540 | 	    
541 |   // 3. Insert wkrtag into the linked-list.
542 |   // 4. Add worker.
543 |   // 5. Update service map entry with new head.
544 |   lnklst_add_to_map(fd_linklist, tnl, &head);
545 |   bpf_map_update_elem(fd_worker, &daddrint, tnl, BPF_ANY);
546 |   bpf_map_update_elem(fd_service, &vip->daddr.v4, &head, BPF_ANY);
547 | 
548 |   //  printf("+   %s added for #%d\n",ip_txt,svcid);
549 |   printf("+  %s (#%d)\n",ip_txt,svcid);
550 |   
551 |   if (verbose) printf("head new = %llu\n", head);
552 | 
553 |   close(fd_service);
554 |   close(fd_svcid);
555 |   close(fd_linklist);
556 |   close(fd_worker);
557 | }
558 | 
559 | void xlb_del_real(struct vip* vip, struct iptnl_info* tnl)
560 | {
561 |   char ip_txt[INET_ADDRSTRLEN] = {0};
562 |   struct iptnl_info tnl_tmp = {};
563 |   __u16 svcid = 0;
564 |   __u64 head, daddrint;
565 | 
566 | 
567 |   int fd_service = open_bpf_map(file_service);
568 |   int fd_linklist = open_bpf_map(file_linklist);
569 |   int fd_worker = open_bpf_map(file_worker);
570 | 
571 |   // 0. Check if the service & worker exist.
572 |   if (bpf_map_lookup_elem(fd_service, vip, &head) == -1 ){
573 |     assert(inet_ntop(vip->family, &vip->daddr.v4, ip_txt, sizeof(ip_txt)));
574 |     printf("The service \"%s:%d\" does not exist!\n", ip_txt, ntohs(vip->dport));
575 |     return;
576 |   }
577 |   svcid = head>>32;
578 | 
579 |   assert(inet_ntop(tnl->family, &tnl->daddr.v4, ip_txt, sizeof(ip_txt)));
580 |   daddrint = conv(ip_txt, svcid);
581 |   if (bpf_map_lookup_elem(fd_worker, &daddrint, &tnl_tmp) == -1 ){
582 |     printf("%s does not exist for service(#%d)!\n",ip_txt,svcid);
583 |     return;
584 |   }
585 | 
586 | 
587 |   // 1. Delete wkrtag from the linked-list.
588 |   //	    lnklst_del_from_map(fd_linklist, &tnl, &daddr);
589 |   // 2. Delete worker.
590 |   // 3. Update service map entry with new head.
591 | 
592 |   lnklst_del_from_map(fd_linklist, tnl, &head);
593 |   bpf_map_delete_elem(fd_worker, &daddrint);
594 |   bpf_map_update_elem(fd_service, &vip->daddr.v4, &head, BPF_ANY);
595 | 
596 |   //  printf("  %s removed from #%d\n",ip_txt,svcid);
597 |   printf("-  %s (#%d)\n",ip_txt,svcid);
598 | 
599 |   close(fd_service);
600 |   close(fd_linklist);
601 |   close(fd_worker);
602 | }
603 | 
604 | 


--------------------------------------------------------------------------------
/xlb_util.h:
--------------------------------------------------------------------------------
 1 | #include <linux/bpf.h>
 2 | #include <linux/if_link.h>
 3 | #include <assert.h>
 4 | #include <errno.h>
 5 | #include <signal.h>
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <string.h>
 9 | #include <sys/resource.h>
10 | #include <arpa/inet.h>
11 | #include <netinet/ether.h>
12 | #include <unistd.h>
13 | #include <time.h>
14 | #include "bpf_load.h"
15 | #include "libbpf.h"
16 | #include "bpf_util.h"
17 | #include <net/if.h>
18 | #include "xlb_common.h"
19 | 
20 | int parse_ipstr(const char*, unsigned int*);
21 | int parse_port(const char*, int*);
22 | 
23 | int open_bpf_map(const char*);
24 | 
25 | void lnklst_add_to_map(int, struct iptnl_info *, __u64*);
26 | void lnklst_del_from_map(int, struct iptnl_info*, __u64*);
27 | 
28 | void svcid_list_all();
29 | void service_list_all();
30 | void worker_list_all();
31 | void linklist_list_all();
32 | void show_worker(__u64);
33 | void list_worker_from_head(__u64);
34 | void list_all();
35 | void list_lbcache();
36 | 
37 | void xlb_add_svc(struct vip*);
38 | void xlb_del_svc(struct vip*);
39 | void xlb_add_real(struct vip*, struct iptnl_info*);
40 | void xlb_del_real(struct vip*, struct iptnl_info*);
41 | 
42 | struct _service {
43 |   struct vip svc;
44 |   struct iptnl_info wkr[256];
45 |   int wkr_count;
46 | };
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/xlbd.c:
--------------------------------------------------------------------------------
  1 | #include <yaml.h>
  2 | #include "xlb_util.h"
  3 | 
  4 | char* conf_yaml;
  5 | 
  6 | 
  7 | enum state_value {
  8 |     EXPECT_NONE,
  9 |     EXPECT_MAP,
 10 |     EXPECT_IPV4,
 11 |     EXPECT_PORT,
 12 | };
 13 | 
 14 | enum vip_or_rip {
 15 |     NONE,
 16 |     VIP,
 17 |     RIP,
 18 | };
 19 | 
 20 | struct parser_state {
 21 |   int rip_nest_level;
 22 |   int vip_nest_level;
 23 |   enum state_value state;
 24 |   enum vip_or_rip vor;
 25 |   char *vip;
 26 |   char *rip;
 27 |   char *port;
 28 | };
 29 | 
 30 | int svc_num;
 31 | 
 32 | struct _service service[256];
 33 | 
 34 | void prune_workers(){
 35 |   __u64 key = 0, next_key;
 36 |   struct iptnl_info tnl;
 37 |   char ip_txt[INET_ADDRSTRLEN] = {0};
 38 | 
 39 |   int fd_worker = open_bpf_map(file_worker);
 40 | 
 41 |   while (bpf_map_get_next_key(fd_worker, &key, &next_key) == 0) {
 42 |     bool doomed_worker = true;
 43 |     bpf_map_lookup_elem(fd_worker, &next_key, &tnl);
 44 | 
 45 |     if(DEBUG){
 46 |       printf("\nsvcid: %d\n" , next_key>>32);
 47 |       assert(inet_ntop(tnl.family, &tnl.daddr.v4, ip_txt, sizeof(ip_txt)));
 48 |       printf("dst: %s\n", ip_txt );
 49 |     }
 50 |     
 51 |     struct vip vip;
 52 |     int svcid = next_key>>32;
 53 | 
 54 |     int fd_svcid = open_bpf_map(file_svcid);
 55 |     bpf_map_lookup_elem(fd_svcid, &svcid, &vip);
 56 |     close(fd_svcid);
 57 |     
 58 |     for (int k=1 ; k < svc_num+1;k++){
 59 |       if ( vip.daddr.v4 ==  service[k].svc.daddr.v4 &&
 60 | 	   vip.dport == service[k].svc.dport &&
 61 | 	   vip.protocol == service[k].svc.protocol){
 62 | 
 63 | 	for (int l=0 ; l < service[k].wkr_count ;l++){
 64 | 	  if (DEBUG)
 65 | 	    printf("%d,%d,%d,%d\n",tnl.daddr.v4,service->wkr[l].daddr.v4,l,service->wkr_count);
 66 | 
 67 | 	  if ( tnl.daddr.v4 ==  service[k].wkr[l].daddr.v4){
 68 |  	    doomed_worker = false;
 69 | 	    break;
 70 | 	  }
 71 | 	}
 72 | 
 73 | 	if (doomed_worker == false)
 74 | 	  break;
 75 |       }
 76 |     }
 77 |      
 78 |     if (doomed_worker==true){
 79 |       if (DEBUG){
 80 | 	assert(inet_ntop(tnl.family, &tnl.daddr.v4, ip_txt, sizeof(ip_txt)));
 81 | 	printf("Worker %s for #%d is doomed\n", ip_txt, svcid);
 82 |       }
 83 |       xlb_del_real(&vip,&tnl);
 84 |     }
 85 |     
 86 |     key = next_key;
 87 |   }
 88 | 
 89 |   close(fd_worker);
 90 | }
 91 |   
 92 | void prune_services()
 93 | {
 94 |   struct vip key = {}, next_key;
 95 |   __u64 head,value;
 96 |   
 97 |   int fd_service = open_bpf_map(file_service);
 98 | 
 99 |   while (bpf_map_get_next_key(fd_service, &key, &next_key) == 0) {
100 |     key = next_key;
101 |     bpf_map_lookup_elem(fd_service, &key, &head);
102 |     
103 |     bool doomed_service = true;
104 |     if (DEBUG)
105 |       printf("%d, %d, %d\n",key.daddr.v4, key.dport, key.protocol);
106 |     
107 |     for (int k=1 ; k < svc_num+1;k++){
108 |       if (DEBUG)
109 | 	printf("....-> %d, %d, %d\n",service[k].svc.daddr.v4, service[k].svc.dport, service[k].svc.protocol);
110 | 
111 |       if ( key.daddr.v4 ==  service[k].svc.daddr.v4 &&
112 | 	   key.dport == service[k].svc.dport &&
113 | 	   key.protocol == service[k].svc.protocol){
114 | 	doomed_service = false;
115 |       }
116 |     }
117 | 
118 |     if (doomed_service){
119 |       if (DEBUG){
120 | 	char ip_txt[INET_ADDRSTRLEN] = {0};
121 | 	assert(inet_ntop(key.family, &key.daddr.v4, ip_txt, sizeof(ip_txt)));
122 | 	printf("Service %s:%d(%d) is doomed\n", ip_txt, ntohs(key.dport), key.protocol);
123 |       }
124 |       xlb_del_svc(&key);
125 |     }
126 |   }
127 | 
128 |   close(fd_service);
129 | }
130 | 
131 | int reflect_yaml()
132 | {
133 |   for (int k=1 ; k < svc_num+1;k++){
134 |     xlb_add_svc(&service[k].svc);
135 |     for (int l=0 ; l < service[k].wkr_count ;l++){
136 | 	xlb_add_real(&service[k].svc, &service[k].wkr[l]);
137 |     }
138 |   }
139 | 
140 |   printf("\n");
141 |   
142 |   prune_workers();
143 |   prune_services();
144 | 
145 |   printf("\n");
146 | 
147 |   return 0;
148 | }
149 | 
150 | int parse_yaml()
151 | {
152 |   struct _rs {
153 |     char *ipv4;
154 |   };
155 | 
156 |   struct _vs {
157 |     int num_rs;
158 |     char *ipv4;
159 |     char *port;
160 |     struct _rs rs[256];
161 |   };
162 | 
163 |   FILE *fh;
164 |   yaml_parser_t parser;
165 |   yaml_event_t  event;
166 |   int nest_level = 0 ;
167 |   struct parser_state state = {.state=EXPECT_NONE};
168 | 
169 |   struct _vs *vs = malloc(sizeof(struct _vs)*256); 
170 |   int j=0,i=0;
171 |   
172 |   fh = fopen(conf_yaml, "rb");
173 |   if(fh == NULL)
174 |     printf("Failed to open \"%s\"\n", conf_yaml);
175 |   assert(fh);
176 |   
177 |   if(!yaml_parser_initialize(&parser))
178 |     fputs("Failed to initialize parser!\n", stderr);
179 |   if(fh == NULL)
180 |     fputs("Failed to open file!\n", stderr);
181 | 
182 |   yaml_parser_set_input_file(&parser, fh);
183 | 
184 |   do {
185 |     if (!yaml_parser_parse(&parser, &event)) {
186 |       printf("Parser error %d\n", parser.error);
187 |       exit(EXIT_FAILURE);
188 |     }
189 | 
190 |     switch(event.type)
191 |       {
192 |       case YAML_MAPPING_START_EVENT:
193 | 	nest_level++;
194 | 	break;
195 |       case YAML_MAPPING_END_EVENT:
196 | 	nest_level--;
197 | 	if ( state.rip_nest_level == nest_level) {
198 | 	  //	  printf("(VIP,PORT,RIP) = (%s,%s,%s)\n", state.vip, state.port, state.rip);
199 | 	  vs[i].rs[j].ipv4 = strdup(state.rip);
200 | 	  j++;
201 | 	  vs[i].num_rs=j;
202 | 	}
203 | 	break;
204 |       case YAML_SCALAR_EVENT:
205 | 
206 | 	if (strcmp(event.data.scalar.value, "virtual_server") == 0) {
207 | 	  state.state = EXPECT_MAP;
208 | 	  state.vor = VIP;
209 | 	  i++;vs[i].num_rs=0;
210 | 	  //	  vs[i].num_rs=0;i++;
211 | 	  state.vip_nest_level = nest_level;
212 | 	} else if (strcmp((char*)event.data.scalar.value, "real_servers") == 0 ||
213 | 		   strcmp((char*)event.data.scalar.value, "real_servers") == 0) {
214 | 	  //	  printf("(VIP,PORT) = (%s,%s)\n", state.vip, state.port);
215 | 	  vs[i].ipv4 = strdup(state.vip);
216 | 	  vs[i].port = strdup(state.port);
217 | 	  j=0;
218 | 	  state.state = EXPECT_MAP;
219 | 	  state.vor = RIP;
220 | 	  state.rip_nest_level = nest_level;
221 | 	} else if (strcmp((char*)event.data.scalar.value, "ipv4") == 0 ){
222 | 	  state.state = EXPECT_IPV4;
223 | 	} else if (strcmp(event.data.scalar.value, "port") == 0 ){
224 | 	  state.state = EXPECT_PORT;
225 | 	} else { // parse values
226 | 
227 | 	  if (state.vor == VIP && state.state == EXPECT_IPV4 ){
228 | 	    state.vip = strdup(event.data.scalar.value);
229 | 	  } else if (state.vor == VIP && state.state == EXPECT_PORT){
230 | 	    state.port = strdup(event.data.scalar.value);
231 | 	  } else if (state.vor == RIP && state.state == EXPECT_IPV4){
232 | 	    state.rip = strdup(event.data.scalar.value);
233 | 	  }
234 | 	  
235 | 	  state.state = EXPECT_NONE;
236 | 	}
237 | 	break;
238 | 
239 |       case YAML_NO_EVENT:
240 |       case YAML_STREAM_START_EVENT:
241 |       case YAML_STREAM_END_EVENT:
242 |       case YAML_DOCUMENT_START_EVENT:
243 |       case YAML_DOCUMENT_END_EVENT:
244 |       case YAML_SEQUENCE_START_EVENT:
245 |       case YAML_SEQUENCE_END_EVENT:
246 |       case YAML_ALIAS_EVENT:
247 | 	break;
248 |       default:
249 | 	break;
250 |       }
251 |     if(event.type != YAML_STREAM_END_EVENT)
252 |       yaml_event_delete(&event);
253 |   } while(event.type != YAML_STREAM_END_EVENT);
254 | 
255 |   yaml_event_delete(&event);
256 |   yaml_parser_delete(&parser);
257 |   fclose(fh);
258 | 
259 |   svc_num=i;
260 | 
261 |   /*
262 |   for (int k=1 ; k < i+1;k++){
263 |     printf("%s:%s\n",vs[k].ipv4,vs[k].port);
264 |     for (int l=0 ; l < vs[k].num_rs ;l++){
265 |       printf("  %s\n",vs[k].rs[l].ipv4);
266 |     }
267 |   }
268 |   printf("\n");
269 |   */
270 | 
271 |   for (int k=1 ; k < svc_num+1;k++){
272 | 
273 |     service[k].svc.protocol = IPPROTO_TCP;
274 |     service[k].svc.family= parse_ipstr(vs[k].ipv4, &service[k].svc.daddr.v6);
275 | 
276 |     int port=0;
277 |     parse_port(vs[k].port, &port);
278 |     service[k].svc.dport=htons(port);
279 | 
280 |     for (int l=0 ; l < vs[k].num_rs ;l++){
281 |       service[k].wkr[l].family=parse_ipstr(vs[k].rs[l].ipv4, &service[k].wkr[l].daddr.v6);
282 |     }
283 |     service[k].wkr_count = vs[k].num_rs;
284 |   }
285 | 
286 |   free(vs);
287 |   return 0;
288 | }
289 | 
290 | void sig_reader(int signal){
291 |   printf("recved signal = %d\n",signal);
292 |   parse_yaml();
293 |   reflect_yaml();
294 | }
295 | 
296 | int main(int argc, const char *argv[])
297 | {
298 |   struct sigaction sa;
299 |   if (argc != 2){
300 |     printf("argc = %d\n", argc);
301 |     printf("argc must be 2\n");
302 |     exit(1);
303 |   }
304 |   conf_yaml = strdup(argv[1]);
305 |   parse_yaml();
306 |   reflect_yaml();
307 |   
308 |   printf("\nMy pid is: %d\n\n", getpid());
309 |   sa.sa_handler = &sig_reader;
310 |   sa.sa_flags = SA_RESTART;
311 |   sigfillset(&sa.sa_mask);
312 | 
313 |   sigaction (SIGUSR1, &sa, NULL);
314 |   sigaction (SIGHUP, &sa, NULL);
315 | 
316 | 
317 |   while(1) {
318 |     sleep(1);
319 |   }
320 | 
321 | }
322 | 


--------------------------------------------------------------------------------
/xlbd.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - virtual_server:
 3 |     ipv4: 10.1.1.1
 4 |     port: 80
 5 |     real_servers:
 6 |       - ipv4: 172.16.51.2
 7 |       - ipv4: 172.16.57.2
 8 | - virtual_server:
 9 |     ipv4: 10.1.1.2
10 |     port: 80
11 |     real_servers:
12 |       - ipv4: 172.16.51.2
13 | - virtual_server:
14 |     ipv4: 10.0.0.1
15 |     port: 81
16 |     real_servers:
17 |       - ipv4: 172.16.51.2
18 |       - ipv4: 192.168.51.2
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------