├── .gitignore ├── testdata ├── bad.yaml ├── first.yaml ├── input.yaml ├── second.yaml └── third.yaml ├── icon.png ├── main.go ├── ebpf ├── Makefile ├── xconnect.c ├── xconnect.d └── include │ └── bpf │ ├── bpf_helpers.h │ └── bpf_helper_defs.h ├── go.mod ├── Dockerfile ├── Makefile ├── .github └── workflows │ ├── ci.yml │ └── docker.yml ├── LICENSE ├── cmd ├── config.go └── cmd.go ├── pkg └── xdp │ ├── netlink.go │ ├── xdp_test.go │ ├── xdp.go │ └── xdp_bpf.go ├── go.sum └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | xdp-xconnect 3 | -------------------------------------------------------------------------------- /testdata/bad.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | links: 3 | xconnect-1: xconnect-asd -------------------------------------------------------------------------------- /testdata/first.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | links: 3 | xconnect-1: xconnect-1 -------------------------------------------------------------------------------- /testdata/input.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | links: 3 | xconnect-1: xconnect-1 -------------------------------------------------------------------------------- /testdata/second.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | links: 3 | xconnect-1: xconnect-2 -------------------------------------------------------------------------------- /testdata/third.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | links: 3 | xconnect-1: xconnect-3 -------------------------------------------------------------------------------- /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/networkop/xdp-xconnect/HEAD/icon.png -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/networkop/xdp-xconnect/cmd" 8 | ) 9 | 10 | var ( 11 | GitCommit = "latest" 12 | ) 13 | 14 | func main() { 15 | if err := cmd.Run(GitCommit); err != nil { 16 | fmt.Fprintf(os.Stderr, "Error:\n%s\n", err.Error()) 17 | os.Exit(1) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ebpf/Makefile: -------------------------------------------------------------------------------- 1 | 2 | BPF = xconnect.o 3 | BPFCC = clang 4 | BPFCFLAGS = -target bpf -Wall -O2 -emit-llvm -g 5 | BPFCFLAGS += -Iinclude 6 | BPFLC = llc 7 | BPFLCFLAGS = -march=bpf -mcpu=probe -filetype=obj 8 | 9 | all: $(BPF) 10 | 11 | %.o: %.c 12 | $(BPFCC) $(BPFCFLAGS) -c $< -o - | \ 13 | $(BPFLC) $(BPFLCFLAGS) -o $@ 14 | 15 | clean: 16 | rm -f *.o -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/networkop/xdp-xconnect 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/cilium/ebpf v0.3.0 7 | github.com/hashicorp/go-multierror v1.1.0 8 | github.com/pkg/errors v0.9.1 // indirect 9 | github.com/vishvananda/netlink v1.1.0 10 | golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9 11 | gopkg.in/fsnotify.v1 v1.4.7 12 | gopkg.in/yaml.v2 v2.4.0 13 | gotest.tools v2.2.0+incompatible 14 | ) 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=${BUILDPLATFORM} golang:1.15.6-buster as builder 2 | 3 | WORKDIR /src 4 | 5 | ARG LDFLAGS 6 | 7 | COPY go.mod . 8 | COPY go.sum . 9 | 10 | RUN go mod download 11 | 12 | COPY . . 13 | 14 | ENV CGO_ENABLED=0 15 | ARG TARGETOS 16 | ARG TARGETARCH 17 | 18 | RUN GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -ldflags "${LDFLAGS}" -o xdp-xconnect main.go 19 | 20 | FROM alpine:latest 21 | WORKDIR / 22 | COPY --from=builder /src/xdp-xconnect . 23 | 24 | ENTRYPOINT ["/xdp-xconnect"] -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | COMMIT := $(shell git describe --dirty --always) 2 | LDFLAGS := "-s -w -X main.GitCommit=$(COMMIT)" 3 | DOCKER_IMAGE ?= networkop/xdp-xconnect 4 | 5 | generate: 6 | go generate ./... 7 | 8 | build: 9 | go build -o xdp-xconnect main.go 10 | 11 | lint: 12 | golangci-lint run 13 | 14 | test: 15 | go test -race ./... -v 16 | 17 | docker: Dockerfile test 18 | docker buildx build --push \ 19 | --platform linux/amd64 \ 20 | --build-arg LDFLAGS=$(LDFLAGS) \ 21 | -t $(DOCKER_IMAGE):$(COMMIT) \ 22 | -t $(DOCKER_IMAGE):latest . -------------------------------------------------------------------------------- /ebpf/xconnect.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define XCONNECT_MAP_SIZE 1024 5 | 6 | struct bpf_map_def SEC("maps") xconnect_map = { 7 | .type = BPF_MAP_TYPE_DEVMAP, 8 | .key_size = sizeof(int), 9 | .value_size = sizeof(int), 10 | .max_entries = XCONNECT_MAP_SIZE, 11 | }; 12 | 13 | 14 | SEC("xdp") 15 | int xdp_xconnect(struct xdp_md *ctx) 16 | { 17 | return bpf_redirect_map(&xconnect_map, ctx->ingress_ifindex, 0); 18 | } 19 | 20 | 21 | char _license[] SEC("license") = "GPL"; 22 | 23 | 24 | -------------------------------------------------------------------------------- /ebpf/xconnect.d: -------------------------------------------------------------------------------- 1 | xconnect.o: xconnect.c /usr/include/linux/bpf.h \ 2 | /usr/include/linux/types.h /usr/include/asm/types.h \ 3 | /usr/include/asm-generic/types.h /usr/include/asm-generic/int-ll64.h \ 4 | /usr/include/asm/bitsperlong.h /usr/include/asm-generic/bitsperlong.h \ 5 | /usr/include/linux/posix_types.h /usr/include/linux/stddef.h \ 6 | /usr/include/asm/posix_types.h /usr/include/asm/posix_types_64.h \ 7 | /usr/include/asm-generic/posix_types.h /usr/include/linux/bpf_common.h \ 8 | /usr/include/bpf/bpf_helpers.h /usr/include/bpf/bpf_helper_defs.h 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | # Linting is in a separate job because golangci-lint is quite slow when 11 | # running it in cold-start mode. 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v2 17 | - name: golangci-lint 18 | uses: golangci/golangci-lint-action@v2 19 | with: 20 | version: v1.29 21 | args: --timeout=2m30s 22 | 23 | unit-test: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/setup-go@v2 27 | with: 28 | go-version: 1.15.x 29 | - uses: actions/checkout@v2 30 | - run: make test 31 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: docker 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | docker: 11 | strategy: 12 | matrix: 13 | go-version: [1.15.x] 14 | os: [ubuntu-20.04] 15 | runs-on: ${{ matrix.os }} 16 | steps: 17 | - 18 | name: Checkout code 19 | uses: actions/checkout@v2 20 | - 21 | name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v1 23 | - 24 | name: Login to Container Registry 25 | uses: docker/login-action@v1 26 | with: 27 | registry: docker.io 28 | username: ${{ secrets.DOCKER_LOGIN }} 29 | password: ${{ secrets.DOCKER_PASSWORD }} 30 | - 31 | name: Build and Push container images 32 | run: | 33 | make docker -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Michael Kashin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ebpf/include/bpf/bpf_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __BPF_HELPERS__ 3 | #define __BPF_HELPERS__ 4 | 5 | #include "bpf_helper_defs.h" 6 | 7 | #define __uint(name, val) int (*name)[val] 8 | #define __type(name, val) typeof(val) *name 9 | 10 | /* Helper macro to print out debug messages */ 11 | #define bpf_printk(fmt, ...) \ 12 | ({ \ 13 | char ____fmt[] = fmt; \ 14 | bpf_trace_printk(____fmt, sizeof(____fmt), \ 15 | ##__VA_ARGS__); \ 16 | }) 17 | 18 | /* 19 | * Helper macro to place programs, maps, license in 20 | * different sections in elf_bpf file. Section names 21 | * are interpreted by elf_bpf loader 22 | */ 23 | #define SEC(NAME) __attribute__((section(NAME), used)) 24 | 25 | #ifndef __always_inline 26 | #define __always_inline __attribute__((always_inline)) 27 | #endif 28 | 29 | /* 30 | * Helper structure used by eBPF C program 31 | * to describe BPF map attributes to libbpf loader 32 | */ 33 | struct bpf_map_def { 34 | unsigned int type; 35 | unsigned int key_size; 36 | unsigned int value_size; 37 | unsigned int max_entries; 38 | unsigned int map_flags; 39 | }; 40 | 41 | enum libbpf_pin_type { 42 | LIBBPF_PIN_NONE, 43 | /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 44 | LIBBPF_PIN_BY_NAME, 45 | }; 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /cmd/config.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "log" 5 | "os" 6 | 7 | "gopkg.in/fsnotify.v1" 8 | "gopkg.in/yaml.v2" 9 | ) 10 | 11 | // Config holds the parsed configuration file 12 | type Config struct { 13 | Links map[string]string `yaml:"links"` 14 | } 15 | 16 | func newFromFile(file string) (*Config, error) { 17 | var cfg Config 18 | log.Printf("Parsing config file: %s", file) 19 | 20 | f, err := os.Open(file) 21 | if err != nil { 22 | return nil, err 23 | } 24 | defer f.Close() 25 | 26 | decoder := yaml.NewDecoder(f) 27 | err = decoder.Decode(&cfg) 28 | if err != nil { 29 | return nil, err 30 | } 31 | 32 | log.Printf("App configuration: %+v", cfg) 33 | return &cfg, nil 34 | } 35 | 36 | func configWatcher(file string, out chan map[string]string) { 37 | 38 | watcher, err := fsnotify.NewWatcher() 39 | if err != nil { 40 | log.Panicf("Failed to initialise fsnotify: %s", err) 41 | } 42 | defer watcher.Close() 43 | 44 | if err := watcher.Add(file); err != nil { 45 | log.Panicf("Error watching the configuration file: %s", err) 46 | } 47 | 48 | for { 49 | select { 50 | case <-watcher.Events: 51 | cfg, err := newFromFile(file) 52 | if err != nil { 53 | log.Printf("Error parsing the configuration file: %s", err) 54 | } else { 55 | out <- cfg.Links 56 | } 57 | 58 | case err := <-watcher.Errors: 59 | log.Printf("Received watcher.Error: %s", err) 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /cmd/cmd.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "syscall" 10 | 11 | "os/signal" 12 | 13 | "github.com/networkop/xdp-xconnect/pkg/xdp" 14 | ) 15 | 16 | var ( 17 | configFlag = flag.String("conf", "", "[mandatory] xdp-xconnect configuration file (YAML)") 18 | version = flag.Bool("v", false, "Display version") 19 | ) 20 | 21 | func setupSigHandlers(cancel context.CancelFunc) { 22 | sigs := make(chan os.Signal, 1) 23 | signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP) 24 | 25 | go func() { 26 | sig := <-sigs 27 | log.Printf("Received syscall:%+v", sig) 28 | cancel() 29 | }() 30 | 31 | } 32 | 33 | func printVersion(gitCommit string) { 34 | fmt.Printf("Version: %s\n", gitCommit) 35 | os.Exit(0) 36 | } 37 | 38 | // Run the xdp-xconnect app 39 | func Run(commit string) error { 40 | fmt.Println("eBPF x-connect") 41 | 42 | ctx, cancel := context.WithCancel(context.Background()) 43 | 44 | setupSigHandlers(cancel) 45 | 46 | flag.Parse() 47 | 48 | if *version { 49 | printVersion(commit) 50 | } 51 | 52 | if *configFlag == "" { 53 | flag.Usage() 54 | return fmt.Errorf("-conf flag was not provided") 55 | } 56 | 57 | cfg, err := newFromFile(*configFlag) 58 | if err != nil { 59 | return fmt.Errorf("Parsing configuration file: %s", err) 60 | } 61 | 62 | updateCh := make(chan map[string]string, 1) 63 | go configWatcher(*configFlag, updateCh) 64 | 65 | app, err := xdp.NewXconnectApp(cfg.Links) 66 | if err != nil { 67 | return fmt.Errorf("Loading eBPF: %s", err) 68 | } 69 | 70 | app.Launch(ctx, updateCh) 71 | 72 | return nil 73 | } 74 | -------------------------------------------------------------------------------- /pkg/xdp/netlink.go: -------------------------------------------------------------------------------- 1 | package xdp 2 | 3 | import ( 4 | "github.com/hashicorp/go-multierror" 5 | "github.com/vishvananda/netlink" 6 | ) 7 | 8 | func lookupLink(intf string) (*netlink.Link, error) { 9 | link, err := netlink.LinkByName(intf) 10 | if err != nil { 11 | return nil, err 12 | } 13 | return &link, nil 14 | } 15 | 16 | // forcing xdpgeneric for veth because https://www.netdevconf.org/0x13/session.html?talk-veth-xdp 17 | // tuntap also requires this probably for the same reasons 18 | func xdpFlags(linkType string) int { 19 | if linkType == "veth" || linkType == "tuntap" { 20 | return 2 21 | } 22 | return 0 // native xdp (xdpdrv) by default 23 | } 24 | 25 | func (c *App) updateLinkMap(intfs []string) error { 26 | var errs error 27 | 28 | for _, intf := range intfs { 29 | link, err := lookupLink(intf) 30 | if err != nil { 31 | errs = multierror.Append(errs, err) 32 | continue 33 | } 34 | c.linkMap[intf] = link 35 | } 36 | 37 | return errs 38 | } 39 | 40 | func (c *App) cleanupLinkMap(intfs []string) { 41 | for _, intf := range intfs { 42 | delete(c.linkMap, intf) 43 | } 44 | } 45 | 46 | func (c *App) addXdpToLink(intfs []string) error { 47 | 48 | var errs error 49 | for _, intf := range intfs { 50 | link := c.linkMap[intf] 51 | err := netlink.LinkSetXdpFdWithFlags(*link, c.objs.ProgramXdpXconnect.FD(), xdpFlags((*link).Type())) 52 | if err != nil { 53 | errs = multierror.Append(errs, err) 54 | } 55 | } 56 | 57 | return errs 58 | } 59 | 60 | func (c *App) delXdpFromLink(intfs []string) error { 61 | 62 | var errs error 63 | for _, intf := range intfs { 64 | link := c.linkMap[intf] 65 | err := netlink.LinkSetXdpFdWithFlags(*link, -1, xdpFlags((*link).Type())) 66 | if err != nil { 67 | errs = multierror.Append(errs, err) 68 | } 69 | } 70 | 71 | return errs 72 | } 73 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/cilium/ebpf v0.3.0 h1:LI3lsl5GmTh+OFYamrj8sp+R0yam38zHG6NTDhSlNmQ= 2 | github.com/cilium/ebpf v0.3.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= 3 | github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM= 4 | github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 5 | github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= 6 | github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 7 | github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g4TbElacI= 8 | github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= 9 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 10 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 11 | github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0= 12 | github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= 13 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k= 14 | github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= 15 | golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 16 | golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9 h1:1/DFK4b7JH8DmkqhUk48onnSfrPzImPoVxuomtbT2nk= 17 | golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 18 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 19 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 20 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 21 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= 22 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 23 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 24 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 25 | gotest.tools v1.4.0 h1:BjtEgfuw8Qyd+jPvQz8CfoxiO/UjFEidWinwEXZiWv0= 26 | gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= 27 | gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= 28 | -------------------------------------------------------------------------------- /pkg/xdp/xdp_test.go: -------------------------------------------------------------------------------- 1 | package xdp 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "sort" 7 | "testing" 8 | ) 9 | 10 | func TestDiff(t *testing.T) { 11 | tests := []struct { 12 | running map[string]string 13 | candidate map[string]string 14 | new []string 15 | changed []string 16 | removed []string 17 | }{ 18 | { 19 | running: map[string]string{ 20 | "intf-1": "intf-2", 21 | "intf-2": "intf-1", 22 | }, 23 | candidate: map[string]string{ 24 | "intf-1": "intf-3", 25 | "intf-3": "intf-1", 26 | }, 27 | new: []string{"intf-3"}, 28 | changed: []string{"intf-1"}, 29 | removed: []string{"intf-2"}, 30 | }, 31 | { 32 | running: map[string]string{ 33 | "intf-1": "intf-2", 34 | "intf-2": "intf-1", 35 | }, 36 | candidate: map[string]string{ 37 | "intf-1": "intf-2", 38 | "intf-2": "intf-1", 39 | }, 40 | new: nil, 41 | changed: nil, 42 | removed: nil, 43 | }, 44 | { 45 | running: map[string]string{ 46 | "intf-1": "intf-2", 47 | "intf-2": "intf-1", 48 | }, 49 | candidate: map[string]string{ 50 | "intf-1": "intf-2", 51 | "intf-2": "intf-1", 52 | "intf-3": "intf-4", 53 | "intf-4": "intf-3", 54 | }, 55 | new: []string{"intf-3", "intf-4"}, 56 | changed: nil, 57 | removed: nil, 58 | }, 59 | { 60 | running: map[string]string{ 61 | "intf-1": "intf-2", 62 | "intf-2": "intf-1", 63 | "intf-3": "intf-4", 64 | "intf-4": "intf-3", 65 | }, 66 | candidate: map[string]string{ 67 | "intf-1": "intf-4", 68 | "intf-4": "intf-1", 69 | "intf-3": "intf-2", 70 | "intf-2": "intf-3", 71 | }, 72 | new: nil, 73 | changed: []string{"intf-1", "intf-2", "intf-3", "intf-4"}, 74 | removed: nil, 75 | }, 76 | } 77 | for i, tt := range tests { 78 | t.Run(fmt.Sprintf("TestDiff_%d", i), func(t *testing.T) { 79 | 80 | new, changed, removed := confDiff(tt.running, tt.candidate) 81 | 82 | sort.Strings(new) 83 | sort.Strings(changed) 84 | sort.Strings(removed) 85 | 86 | if !reflect.DeepEqual(new, tt.new) { 87 | t.Errorf("#%d NEW wanted %v, got: %v", i, tt.new, new) 88 | } 89 | if !reflect.DeepEqual(changed, tt.changed) { 90 | t.Errorf("#%d CHANGED wanted %v, got: %v", i, tt.changed, changed) 91 | } 92 | if !reflect.DeepEqual(removed, tt.removed) { 93 | t.Errorf("#%d REMOVED wanted %v, got: %v", i, tt.removed, removed) 94 | } 95 | }) 96 | } 97 | } 98 | 99 | func TestSymm(t *testing.T) { 100 | tests := []struct { 101 | input map[string]string 102 | output map[string]string 103 | }{ 104 | { 105 | input: map[string]string{ 106 | "intf-1": "intf-2", 107 | }, 108 | output: map[string]string{ 109 | "intf-1": "intf-2", 110 | "intf-2": "intf-1", 111 | }, 112 | }, 113 | { 114 | input: map[string]string{ 115 | "intf-2": "intf-2", 116 | }, 117 | output: map[string]string{ 118 | "intf-2": "intf-2", 119 | }, 120 | }, 121 | { 122 | input: map[string]string{ 123 | "intf-1": "intf-2", 124 | "intf-2": "intf-3", 125 | }, 126 | output: map[string]string{ 127 | "intf-1": "intf-2", 128 | "intf-2": "intf-1", 129 | }, 130 | }, 131 | { 132 | input: map[string]string{ 133 | "intf-3": "intf-2", 134 | "intf-1": "intf-2", 135 | }, 136 | output: map[string]string{ 137 | "intf-1": "intf-2", 138 | "intf-2": "intf-1", 139 | }, 140 | }, 141 | { 142 | input: map[string]string{}, 143 | output: map[string]string{}, 144 | }, 145 | } 146 | for i, tt := range tests { 147 | t.Run(fmt.Sprintf("TestSymm_%d", i), func(t *testing.T) { 148 | 149 | output := makeSymm(tt.input) 150 | 151 | if !reflect.DeepEqual(tt.output, output) { 152 | t.Errorf("#%d wanted %v, got: %v", i, tt.output, output) 153 | } 154 | }) 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /pkg/xdp/xdp.go: -------------------------------------------------------------------------------- 1 | package xdp 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "sort" 8 | 9 | "github.com/hashicorp/go-multierror" 10 | "github.com/vishvananda/netlink" 11 | "golang.org/x/sys/unix" 12 | ) 13 | 14 | //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpf xdp ../../ebpf/xconnect.c -- -I./include -O2 -Wall 15 | 16 | // App stores ebpf programs and maps together with the desired state 17 | type App struct { 18 | objs *xdpObjects 19 | input map[string]string 20 | linkMap map[string]*netlink.Link 21 | } 22 | 23 | // NewXconnectApp sets up the XDP xconnect application 24 | // Input expects a map between existing pairs of interface names that will be cross-connected. 25 | func NewXconnectApp(input map[string]string) (*App, error) { 26 | 27 | c := &App{ 28 | input: make(map[string]string), 29 | linkMap: make(map[string]*netlink.Link), 30 | } 31 | 32 | c.input = makeSymm(input) 33 | 34 | if err := increaseResourceLimits(); err != nil { 35 | return nil, err 36 | } 37 | 38 | specs, err := newXdpSpecs() 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | objs, err := specs.Load(nil) 44 | if err != nil { 45 | return nil, fmt.Errorf("Can't load objects:%s", err) 46 | } 47 | c.objs = objs 48 | 49 | if err := c.init(); err != nil { 50 | return nil, err 51 | } 52 | 53 | return c, nil 54 | } 55 | 56 | // init populates linkMap and eBPF map 57 | func (c *App) init() error { 58 | 59 | var added []string 60 | for intf := range c.input { 61 | added = append(added, intf) 62 | } 63 | 64 | err := c.updateLinkMap(added) 65 | if err != nil { 66 | return err 67 | } 68 | 69 | return c.updateBpfMap(added, []string{}, []string{}) 70 | } 71 | 72 | // cleanup clears netlink XDP configuration and closes eBPF objects 73 | func (c *App) cleanup() error { 74 | 75 | var errs error 76 | 77 | var removed []string 78 | for intf := range c.linkMap { 79 | removed = append(removed, intf) 80 | } 81 | 82 | if err := c.delXdpFromLink(removed); err != nil { 83 | errs = multierror.Append(errs, err) 84 | } 85 | 86 | if err := c.objs.Close(); err != nil { 87 | errs = multierror.Append(errs, err) 88 | } 89 | 90 | c.cleanupLinkMap(removed) 91 | 92 | return errs 93 | } 94 | 95 | // update ensures running state matches the candidate 96 | func (c *App) update(candidates map[string]string) error { 97 | 98 | candidates = makeSymm(candidates) 99 | 100 | added, changed, orphaned := confDiff(c.input, candidates) 101 | 102 | // Dealing with added interfaces 103 | err := c.updateLinkMap(added) 104 | if err != nil { 105 | return err 106 | } 107 | if err := c.addXdpToLink(added); err != nil { 108 | return err 109 | } 110 | 111 | // Updating eBPF map 112 | c.input = candidates 113 | err = c.updateBpfMap(added, changed, orphaned) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | // Dealing with removed interfaces 119 | if err := c.delXdpFromLink(orphaned); err != nil { 120 | return err 121 | } 122 | c.cleanupLinkMap(orphaned) 123 | 124 | return nil 125 | } 126 | 127 | // updateBpfMap adjusts Bpf Map based on detected changes 128 | func (c *App) updateBpfMap(added, changed, removed []string) error { 129 | var errs error 130 | 131 | for _, intf := range added { 132 | link1 := c.linkMap[intf] 133 | link2 := c.linkMap[c.input[intf]] 134 | if err := c.objs.MapXconnectMap.Put(uint32((*link1).Attrs().Index), uint32((*link2).Attrs().Index)); err != nil { 135 | errs = multierror.Append(errs, err) 136 | } 137 | } 138 | 139 | for _, intf := range changed { 140 | link1 := c.linkMap[intf] 141 | link2 := c.linkMap[c.input[intf]] 142 | if err := c.objs.MapXconnectMap.Put(uint32((*link1).Attrs().Index), uint32((*link2).Attrs().Index)); err != nil { 143 | errs = multierror.Append(errs, err) 144 | } 145 | } 146 | 147 | for _, intf := range removed { 148 | link1 := c.linkMap[intf] 149 | if err := c.objs.MapXconnectMap.Delete(uint32((*link1).Attrs().Index)); err != nil { 150 | errs = multierror.Append(errs, err) 151 | } 152 | } 153 | 154 | return errs 155 | } 156 | 157 | // Launch app, watch for changes and perform "warm" reloads. 158 | // This function blocks forever and context can be used to gracefully stop it. 159 | // updateCh expects a map between interfaces, similar to input of NewXconnectApp. 160 | func (c *App) Launch(ctx context.Context, updateCh chan map[string]string) { 161 | 162 | var links []string 163 | for link := range c.linkMap { 164 | links = append(links, link) 165 | } 166 | if err := c.addXdpToLink(links); err != nil { 167 | log.Fatalf("Failed to set up XDP on links: %s", err) 168 | } 169 | 170 | for { 171 | select { 172 | case <-ctx.Done(): 173 | log.Printf("ctx.Done") 174 | if err := c.cleanup(); err != nil { 175 | log.Fatalf("Cleanup Failed: %s", err) 176 | } 177 | return 178 | case config := <-updateCh: 179 | if err := c.update(config); err != nil { 180 | log.Printf("Error updating eBPF: %s", err) 181 | } 182 | } 183 | } 184 | } 185 | 186 | // increaseResourceLimits https://prototype-kernel.readthedocs.io/en/latest/bpf/troubleshooting.html#memory-ulimits 187 | func increaseResourceLimits() error { 188 | return unix.Setrlimit(unix.RLIMIT_MEMLOCK, &unix.Rlimit{ 189 | Cur: unix.RLIM_INFINITY, 190 | Max: unix.RLIM_INFINITY, 191 | }) 192 | } 193 | 194 | // makeSymm enforces symmetricity of map[string]string 195 | // first k/v pair wins, repeated values are discarded 196 | func makeSymm(inMap map[string]string) map[string]string { 197 | res := make(map[string]string) 198 | 199 | var keys []string 200 | for k := range inMap { 201 | keys = append(keys, k) 202 | } 203 | sort.Strings(keys) 204 | 205 | for _, k := range keys { 206 | v := inMap[k] 207 | _, keyFound := res[k] 208 | _, valFound := res[v] 209 | if !keyFound && !valFound { 210 | res[k] = v 211 | res[v] = k 212 | } 213 | } 214 | 215 | return res 216 | } 217 | 218 | // confDiff compares the running and candidate configurations 219 | // and returns any new, changed or removed interface names 220 | func confDiff(running, candidates map[string]string) ([]string, []string, []string) { 221 | var new, changed, orphaned []string 222 | for c1, c2 := range candidates { 223 | p2, ok := running[c1] 224 | if !ok { 225 | new = append(new, c1) 226 | } else if p2 != c2 { 227 | changed = append(changed, c1) 228 | } 229 | } 230 | 231 | for p1 := range running { 232 | _, ok := candidates[p1] 233 | if !ok { 234 | orphaned = append(orphaned, p1) 235 | } 236 | } 237 | return new, changed, orphaned 238 | } 239 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cross-connect Linux interfaces with XDP redirect 2 | 3 | 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/networkop/xdp-xconnect)](https://goreportcard.com/report/github.com/networkop/xdp-xconnect) 5 | [![GoDoc](https://godoc.org/istio.io/istio?status.svg)](https://pkg.go.dev/github.com/networkop/xdp-xconnect) 6 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 7 | ![Build Status](https://github.com/networkop/xdp-xconnect/actions/workflows/ci.yml/badge.svg) 8 | 9 | ![](icon.png) 10 | 11 | `xdp-xconnect` daemon is a long-running process that uses a YAML file as its configuration API. For example: 12 | 13 | ```yaml 14 | links: 15 | eth0: tap0 16 | veth2: veth3 17 | ``` 18 | 19 | Given the above YAML file, local Linux interfaces will be cross-connected (`eth0<->tap0` and `veth2<->veth3`) with the following command: 20 | 21 | ``` 22 | sudo xdp-xconnect -conf config.yaml 23 | ``` 24 | 25 | This command will block, listening to any changes to the file and will perform "warm" reconfigurations on the fly. 26 | 27 | > Note: due to its nature (loading eBPF progs, maps and interacting with netlink), `xdp-xconnect` requires `NET_ADMIN` capabilities (root privileges are used for simplicity). 28 | 29 | ## Theory 30 | 31 | Each pair of interfaces will have an eBPF program attached to its XDP hook and will use `bpf_redirect_map` eBPF [helper function](https://man7.org/linux/man-pages/man7/bpf-helpers.7.html) to redirect packets directly to the receive queue of the peer interface. For mode details read [this](https://github.com/xdp-project/xdp-tutorial/tree/master/packet03-redirecting). 32 | 33 | 34 | ## Prerequisites 35 | 36 | Linux Kernel version > 4.14 (introduced veth XDP support) 37 | Go 38 | 39 | ## Installation 40 | 41 | Binary: 42 | 43 | ``` 44 | go install github.com/networkop/xdp-xconnect@latest 45 | ``` 46 | 47 | Docker: 48 | 49 | ``` 50 | docker pull networkop/xdp-xconnect 51 | ``` 52 | 53 | ## Usage 54 | 55 | Binary: 56 | 57 | ``` 58 | sudo xdp-xconnect -conf input.yaml 59 | ``` 60 | 61 | Docker: 62 | 63 | ``` 64 | docker run --net host -v$(pwd):/xc --privileged networkop/xdp-xconnect -conf /xc/input.yaml 65 | ``` 66 | 67 | Go code: 68 | 69 | 70 | ```go 71 | import "github.com/networkop/xdp-xconnect/pkg/xdp" 72 | 73 | func main() { 74 | 75 | input := map[string]string{"eth1":"tap1"} 76 | 77 | app, err := xdp.NewXconnectApp(input) 78 | // handle error 79 | 80 | updateCh := make(chan map[string]string, 1) 81 | 82 | app.Launch(ctx, updateCh) 83 | } 84 | ``` 85 | 86 | ## Demo 87 | 88 | Create three network namespaces and three veth links: 89 | 90 | 91 | ``` 92 | sudo ip link add dev xconnect-1 type veth peer name xc-1 93 | sudo ip link add dev xconnect-2 type veth peer name xc-2 94 | sudo ip link add dev xconnect-3 type veth peer name xc-3 95 | sudo ip netns add ns1 96 | sudo ip netns add ns2 97 | sudo ip netns add ns3 98 | ``` 99 | 100 | Move one side of each veth link into a correponding namespace and configure an IP address from `169.254.0.0/16` subnet: 101 | 102 | ``` 103 | sudo ip link set xc-1 netns ns1 104 | sudo ip link set xc-2 netns ns2 105 | sudo ip link set xc-3 netns ns3 106 | sudo ip netns exec ns1 ip addr add 169.254.1.10/24 dev xc-1 107 | sudo ip netns exec ns2 ip addr add 169.254.1.20/24 dev xc-2 108 | sudo ip netns exec ns3 ip addr add 169.254.1.30/24 dev xc-3 109 | ``` 110 | 111 | Bring up both sides of veth links 112 | 113 | ``` 114 | sudo ip link set dev xconnect-1 up 115 | sudo ip link set dev xconnect-2 up 116 | sudo ip link set dev xconnect-3 up 117 | sudo ip netns exec ns1 ip link set dev xc-1 up 118 | sudo ip netns exec ns2 ip link set dev xc-2 up 119 | sudo ip netns exec ns3 ip link set dev xc-3 up 120 | ``` 121 | 122 | At this point there should be no connectivity between IPs in individual namespaces, i.e. the following commands will return no output: 123 | 124 | ``` 125 | sudo ip netns exec ns1 ping 169.254.1.20 & 126 | sudo ip netns exec ns1 ping 169.254.1.30 & 127 | ``` 128 | 129 | Start the `xdp-xconnect` app with and connect `xconnect-1` to itself (see [first.yaml](testdata/first.yaml)): 130 | 131 | ``` 132 | cp testdata/first.yaml testdata/input.yaml 133 | sudo go run ./main.go -conf testdata/input.yaml & 134 | 135 | 2021/03/03 20:08:41 Parsing config file: testdata/input.yaml 136 | 2021/03/03 20:08:41 App configuration: {Links:map[xconnect-1:xconnect-1]} 137 | ``` 138 | 139 | Now update the configuration by replacing the file to connect the first two interfaces (see [second.yaml](testdata/second.yaml)): 140 | 141 | 142 | ``` 143 | cp testdata/second.yaml testdata/input.yaml 144 | 145 | 2021/03/03 20:12:16 Parsing config file: testdata/input.yaml 146 | 2021/03/03 20:12:16 Error parsing the configuration file: EOF 147 | 2021/03/03 20:12:16 Parsing config file: testdata/input.yaml 148 | 2021/03/03 20:12:16 App configuration: {Links:map[xconnect-1:xconnect-2]} 149 | 64 bytes from 169.254.1.20: icmp_seq=113 ttl=64 time=0.068 ms 150 | 64 bytes from 169.254.1.20: icmp_seq=114 ttl=64 time=0.068 ms 151 | 64 bytes from 169.254.1.20: icmp_seq=115 ttl=64 time=0.075 ms 152 | ``` 153 | 154 | This proves that the first and second links are now connected. Now swap the connection over to the third link (see [third.yaml](testdata/third.yaml)): 155 | 156 | ``` 157 | cp testdata/third.yaml testdata/input.yaml 158 | 159 | 2021/03/03 20:13:53 Parsing config file: testdata/input.yaml 160 | 2021/03/03 20:13:53 Error parsing the configuration file: EOF 161 | 2021/03/03 20:13:53 Parsing config file: testdata/input.yaml 162 | 2021/03/03 20:13:53 App configuration: {Links:map[xconnect-1:xconnect-3] 163 | 64 bytes from 169.254.1.30: icmp_seq=207 ttl=64 time=0.075 ms 164 | 64 bytes from 169.254.1.30: icmp_seq=208 ttl=64 time=0.070 ms 165 | 64 bytes from 169.254.1.30: icmp_seq=209 ttl=64 time=0.071 ms 166 | 64 bytes from 169.254.1.30: icmp_seq=210 ttl=64 time=0.071 ms 167 | ``` 168 | 169 | Ping replies now come from the third link. Let's see what happens if we provide the malformed input (see [bad.yaml](testdata/bad.yaml)) 170 | 171 | ``` 172 | cp testdata/bad.yaml testdata/input.yaml 173 | 174 | 2021/03/03 20:15:46 Parsing config file: testdata/input.yaml 175 | 2021/03/03 20:15:46 App configuration: {Links:map[xconnect-1:xconnect-asd]} 176 | 2021/03/03 20:15:46 Error updating eBPF: 1 error occurred: 177 | * Link not found 178 | 179 | 64 bytes from 169.254.1.30: icmp_seq=317 ttl=64 time=0.065 ms 180 | 64 bytes from 169.254.1.30: icmp_seq=318 ttl=64 time=0.065 ms 181 | 64 bytes from 169.254.1.30: icmp_seq=319 ttl=64 time=0.064 m 182 | ``` 183 | 184 | The app detected the error and cross-connect continues working with its last known configuration. Finally, we can terminate the application which will cleanup all of the configured state: 185 | 186 | 187 | ``` 188 | fg 189 | [1] + 1095444 running sudo go run ./main.go -conf testdata/input.yaml 190 | ^C 191 | 2021/03/03 20:16:44 Received syscall:interrupt 192 | 2021/03/03 20:16:44 ctx.Done 193 | ``` 194 | 195 | Don't forget to cleanup test interfaces and namespaces: 196 | 197 | ``` 198 | sudo ip link del dev xconnect-1 199 | sudo ip link del dev xconnect-2 200 | sudo ip link del dev xconnect-3 201 | sudo ip netns del ns1 202 | sudo ip netns del ns2 203 | sudo ip netns del ns3 204 | ``` 205 | 206 | 207 | 208 | ## Additional Reading and References 209 | 210 | https://github.com/xdp-project/xdp-tutorial 211 | 212 | https://docs.cilium.io/en/stable/bpf/ 213 | 214 | https://qmonnet.github.io/whirl-offload/2020/04/12/llvm-ebpf-asm/ 215 | 216 | https://github.com/takehaya/goxdp-template 217 | 218 | https://github.com/hrntknr/nfNat 219 | 220 | https://github.com/takehaya/Vinbero 221 | 222 | https://github.com/tcfw/vpc 223 | 224 | https://github.com/florianl/tc-skeleton 225 | 226 | https://github.com/cloudflare/rakelimit 227 | 228 | https://github.com/b3a-dev/ebpf-geoip-demo 229 | 230 | https://github.com/lmb/ship-bpf-with-go 231 | 232 | 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /pkg/xdp/xdp_bpf.go: -------------------------------------------------------------------------------- 1 | // Code generated by bpf2go; DO NOT EDIT. 2 | 3 | package xdp 4 | 5 | import ( 6 | "bytes" 7 | "fmt" 8 | "io" 9 | 10 | "github.com/cilium/ebpf" 11 | ) 12 | 13 | type xdpSpecs struct { 14 | ProgramXdpXconnect *ebpf.ProgramSpec `ebpf:"xdp_xconnect"` 15 | MapXconnectMap *ebpf.MapSpec `ebpf:"xconnect_map"` 16 | } 17 | 18 | func newXdpSpecs() (*xdpSpecs, error) { 19 | reader := bytes.NewReader(_XdpBytes) 20 | spec, err := ebpf.LoadCollectionSpecFromReader(reader) 21 | if err != nil { 22 | return nil, fmt.Errorf("can't load xdp: %w", err) 23 | } 24 | 25 | specs := new(xdpSpecs) 26 | if err := spec.Assign(specs); err != nil { 27 | return nil, fmt.Errorf("can't assign xdp: %w", err) 28 | } 29 | 30 | return specs, nil 31 | } 32 | 33 | func (s *xdpSpecs) CollectionSpec() *ebpf.CollectionSpec { 34 | return &ebpf.CollectionSpec{ 35 | Programs: map[string]*ebpf.ProgramSpec{ 36 | "xdp_xconnect": s.ProgramXdpXconnect, 37 | }, 38 | Maps: map[string]*ebpf.MapSpec{ 39 | "xconnect_map": s.MapXconnectMap, 40 | }, 41 | } 42 | } 43 | 44 | func (s *xdpSpecs) Load(opts *ebpf.CollectionOptions) (*xdpObjects, error) { 45 | var objs xdpObjects 46 | if err := s.CollectionSpec().LoadAndAssign(&objs, opts); err != nil { 47 | return nil, err 48 | } 49 | return &objs, nil 50 | } 51 | 52 | func (s *xdpSpecs) Copy() *xdpSpecs { 53 | return &xdpSpecs{ 54 | ProgramXdpXconnect: s.ProgramXdpXconnect.Copy(), 55 | MapXconnectMap: s.MapXconnectMap.Copy(), 56 | } 57 | } 58 | 59 | type xdpObjects struct { 60 | ProgramXdpXconnect *ebpf.Program `ebpf:"xdp_xconnect"` 61 | MapXconnectMap *ebpf.Map `ebpf:"xconnect_map"` 62 | } 63 | 64 | func (o *xdpObjects) Close() error { 65 | for _, closer := range []io.Closer{ 66 | o.ProgramXdpXconnect, 67 | o.MapXconnectMap, 68 | } { 69 | if err := closer.Close(); err != nil { 70 | return err 71 | } 72 | } 73 | return nil 74 | } 75 | 76 | // Do not access this directly. 77 | var _XdpBytes = []byte("\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\xf7\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x40\x00\x16\x00\x01\x00\x61\x12\x0c\x00\x00\x00\x00\x00\x18\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb7\x03\x00\x00\x00\x00\x00\x00\x85\x00\x00\x00\x33\x00\x00\x00\x95\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x47\x50\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x01\x00\x51\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x11\x01\x25\x0e\x13\x05\x03\x0e\x10\x17\x1b\x0e\x11\x01\x12\x06\x00\x00\x02\x34\x00\x03\x0e\x49\x13\x3f\x19\x3a\x0b\x3b\x0b\x02\x18\x00\x00\x03\x13\x01\x03\x0e\x0b\x0b\x3a\x0b\x3b\x0b\x00\x00\x04\x0d\x00\x03\x0e\x49\x13\x3a\x0b\x3b\x0b\x38\x0b\x00\x00\x05\x24\x00\x03\x0e\x3e\x0b\x0b\x0b\x00\x00\x06\x01\x01\x49\x13\x00\x00\x07\x21\x00\x49\x13\x37\x0b\x00\x00\x08\x24\x00\x03\x0e\x0b\x0b\x3e\x0b\x00\x00\x09\x34\x00\x03\x0e\x49\x13\x3a\x0b\x3b\x05\x00\x00\x0a\x0f\x00\x49\x13\x00\x00\x0b\x15\x01\x49\x13\x27\x19\x00\x00\x0c\x05\x00\x49\x13\x00\x00\x0d\x0f\x00\x00\x00\x0e\x16\x00\x49\x13\x03\x0e\x3a\x0b\x3b\x0b\x00\x00\x0f\x2e\x01\x11\x01\x12\x06\x40\x18\x97\x42\x19\x03\x0e\x3a\x0b\x3b\x0b\x27\x19\x49\x13\x3f\x19\x00\x00\x10\x05\x00\x02\x17\x03\x0e\x3a\x0b\x3b\x0b\x49\x13\x00\x00\x11\x13\x01\x03\x0e\x0b\x0b\x3a\x0b\x3b\x05\x00\x00\x12\x0d\x00\x03\x0e\x49\x13\x3a\x0b\x3b\x05\x38\x0b\x00\x00\x00\x8f\x01\x00\x00\x04\x00\x00\x00\x00\x00\x08\x01\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x02\x00\x00\x00\x00\x3f\x00\x00\x00\x02\x06\x09\x03\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x14\x01\x6c\x04\x00\x00\x00\x00\x84\x00\x00\x00\x01\x6d\x00\x04\x00\x00\x00\x00\x84\x00\x00\x00\x01\x6e\x04\x04\x00\x00\x00\x00\x84\x00\x00\x00\x01\x6f\x08\x04\x00\x00\x00\x00\x84\x00\x00\x00\x01\x70\x0c\x04\x00\x00\x00\x00\x84\x00\x00\x00\x01\x71\x10\x00\x05\x00\x00\x00\x00\x07\x04\x02\x00\x00\x00\x00\xa0\x00\x00\x00\x02\x15\x09\x03\x00\x00\x00\x00\x00\x00\x00\x00\x06\xac\x00\x00\x00\x07\xb3\x00\x00\x00\x04\x00\x05\x00\x00\x00\x00\x06\x01\x08\x00\x00\x00\x00\x08\x07\x09\x00\x00\x00\x00\xc6\x00\x00\x00\x04\x06\x05\x0a\xcb\x00\x00\x00\x0b\xe0\x00\x00\x00\x0c\xe7\x00\x00\x00\x0c\xe8\x00\x00\x00\x0c\xf3\x00\x00\x00\x00\x05\x00\x00\x00\x00\x05\x08\x0d\x0e\x84\x00\x00\x00\x00\x00\x00\x00\x03\x1b\x0e\xfe\x00\x00\x00\x00\x00\x00\x00\x03\x1f\x05\x00\x00\x00\x00\x07\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x01\x5a\x00\x00\x00\x00\x02\x0f\x2e\x01\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x02\x0f\x35\x01\x00\x00\x00\x05\x00\x00\x00\x00\x05\x04\x0a\x3a\x01\x00\x00\x11\x00\x00\x00\x00\x18\x05\xb2\x10\x12\x00\x00\x00\x00\xe8\x00\x00\x00\x05\xb3\x10\x00\x12\x00\x00\x00\x00\xe8\x00\x00\x00\x05\xb4\x10\x04\x12\x00\x00\x00\x00\xe8\x00\x00\x00\x05\xb5\x10\x08\x12\x00\x00\x00\x00\xe8\x00\x00\x00\x05\xb7\x10\x0c\x12\x00\x00\x00\x00\xe8\x00\x00\x00\x05\xb8\x10\x10\x12\x00\x00\x00\x00\xe8\x00\x00\x00\x05\xba\x10\x14\x00\x00\x00\x2e\x2e\x2f\x2e\x2e\x2f\x65\x62\x70\x66\x2f\x78\x63\x6f\x6e\x6e\x65\x63\x74\x2e\x63\x00\x2e\x00\x78\x63\x6f\x6e\x6e\x65\x63\x74\x5f\x6d\x61\x70\x00\x74\x79\x70\x65\x00\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\x00\x6b\x65\x79\x5f\x73\x69\x7a\x65\x00\x76\x61\x6c\x75\x65\x5f\x73\x69\x7a\x65\x00\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x00\x6d\x61\x70\x5f\x66\x6c\x61\x67\x73\x00\x62\x70\x66\x5f\x6d\x61\x70\x5f\x64\x65\x66\x00\x5f\x6c\x69\x63\x65\x6e\x73\x65\x00\x63\x68\x61\x72\x00\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\x00\x62\x70\x66\x5f\x72\x65\x64\x69\x72\x65\x63\x74\x5f\x6d\x61\x70\x00\x6c\x6f\x6e\x67\x20\x69\x6e\x74\x00\x5f\x5f\x75\x33\x32\x00\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\x20\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\x00\x5f\x5f\x75\x36\x34\x00\x78\x64\x70\x5f\x78\x63\x6f\x6e\x6e\x65\x63\x74\x00\x69\x6e\x74\x00\x63\x74\x78\x00\x64\x61\x74\x61\x00\x64\x61\x74\x61\x5f\x65\x6e\x64\x00\x64\x61\x74\x61\x5f\x6d\x65\x74\x61\x00\x69\x6e\x67\x72\x65\x73\x73\x5f\x69\x66\x69\x6e\x64\x65\x78\x00\x72\x78\x5f\x71\x75\x65\x75\x65\x5f\x69\x6e\x64\x65\x78\x00\x65\x67\x72\x65\x73\x73\x5f\x69\x66\x69\x6e\x64\x65\x78\x00\x78\x64\x70\x5f\x6d\x64\x00\x9f\xeb\x01\x00\x18\x00\x00\x00\x00\x00\x00\x00\x7c\x01\x00\x00\x7c\x01\x00\x00\x4e\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x02\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x04\x18\x00\x00\x00\x08\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x03\x00\x00\x00\x20\x00\x00\x00\x16\x00\x00\x00\x03\x00\x00\x00\x40\x00\x00\x00\x20\x00\x00\x00\x03\x00\x00\x00\x60\x00\x00\x00\x30\x00\x00\x00\x03\x00\x00\x00\x80\x00\x00\x00\x3f\x00\x00\x00\x03\x00\x00\x00\xa0\x00\x00\x00\x4e\x00\x00\x00\x00\x00\x00\x08\x04\x00\x00\x00\x54\x00\x00\x00\x00\x00\x00\x01\x04\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x0d\x06\x00\x00\x00\x61\x00\x00\x00\x01\x00\x00\x00\x65\x00\x00\x00\x00\x00\x00\x01\x04\x00\x00\x00\x20\x00\x00\x01\x69\x00\x00\x00\x01\x00\x00\x0c\x05\x00\x00\x00\xd7\x00\x00\x00\x05\x00\x00\x04\x14\x00\x00\x00\xe3\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\xe8\x00\x00\x00\x04\x00\x00\x00\x20\x00\x00\x00\xf1\x00\x00\x00\x04\x00\x00\x00\x40\x00\x00\x00\xfc\x00\x00\x00\x04\x00\x00\x00\x60\x00\x00\x00\x08\x01\x00\x00\x04\x00\x00\x00\x80\x00\x00\x00\x12\x01\x00\x00\x00\x00\x00\x0e\x08\x00\x00\x00\x01\x00\x00\x00\x1f\x01\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x08\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x0a\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x24\x01\x00\x00\x00\x00\x00\x01\x04\x00\x00\x00\x20\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x0e\x0b\x00\x00\x00\x01\x00\x00\x00\x41\x01\x00\x00\x01\x00\x00\x0f\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x49\x01\x00\x00\x01\x00\x00\x0f\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x78\x64\x70\x5f\x6d\x64\x00\x64\x61\x74\x61\x00\x64\x61\x74\x61\x5f\x65\x6e\x64\x00\x64\x61\x74\x61\x5f\x6d\x65\x74\x61\x00\x69\x6e\x67\x72\x65\x73\x73\x5f\x69\x66\x69\x6e\x64\x65\x78\x00\x72\x78\x5f\x71\x75\x65\x75\x65\x5f\x69\x6e\x64\x65\x78\x00\x65\x67\x72\x65\x73\x73\x5f\x69\x66\x69\x6e\x64\x65\x78\x00\x5f\x5f\x75\x33\x32\x00\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\x74\x00\x63\x74\x78\x00\x69\x6e\x74\x00\x78\x64\x70\x5f\x78\x63\x6f\x6e\x6e\x65\x63\x74\x00\x78\x64\x70\x00\x2e\x2f\x2e\x2e\x2f\x2e\x2e\x2f\x65\x62\x70\x66\x2f\x78\x63\x6f\x6e\x6e\x65\x63\x74\x2e\x63\x00\x20\x20\x20\x20\x72\x65\x74\x75\x72\x6e\x20\x62\x70\x66\x5f\x72\x65\x64\x69\x72\x65\x63\x74\x5f\x6d\x61\x70\x28\x26\x78\x63\x6f\x6e\x6e\x65\x63\x74\x5f\x6d\x61\x70\x2c\x20\x63\x74\x78\x2d\x3e\x69\x6e\x67\x72\x65\x73\x73\x5f\x69\x66\x69\x6e\x64\x65\x78\x2c\x20\x30\x29\x3b\x00\x62\x70\x66\x5f\x6d\x61\x70\x5f\x64\x65\x66\x00\x74\x79\x70\x65\x00\x6b\x65\x79\x5f\x73\x69\x7a\x65\x00\x76\x61\x6c\x75\x65\x5f\x73\x69\x7a\x65\x00\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x00\x6d\x61\x70\x5f\x66\x6c\x61\x67\x73\x00\x78\x63\x6f\x6e\x6e\x65\x63\x74\x5f\x6d\x61\x70\x00\x63\x68\x61\x72\x00\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\x00\x5f\x6c\x69\x63\x65\x6e\x73\x65\x00\x6c\x69\x63\x65\x6e\x73\x65\x00\x6d\x61\x70\x73\x00\x9f\xeb\x01\x00\x20\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x14\x00\x00\x00\x2c\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x76\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x10\x00\x00\x00\x76\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x7a\x00\x00\x00\x92\x00\x00\x00\x0c\x44\x00\x00\x28\x00\x00\x00\x7a\x00\x00\x00\x92\x00\x00\x00\x05\x44\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\xff\xff\xff\xff\x04\x00\x08\x00\x08\x7c\x0b\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\xca\x00\x00\x00\x04\x00\xa7\x00\x00\x00\x08\x01\x01\xfb\x0e\x0d\x00\x01\x01\x01\x01\x00\x00\x00\x01\x00\x00\x01\x2f\x75\x73\x72\x2f\x69\x6e\x63\x6c\x75\x64\x65\x2f\x62\x70\x66\x00\x2e\x2e\x2f\x2e\x2e\x2f\x65\x62\x70\x66\x00\x2f\x75\x73\x72\x2f\x69\x6e\x63\x6c\x75\x64\x65\x2f\x61\x73\x6d\x2d\x67\x65\x6e\x65\x72\x69\x63\x00\x2f\x75\x73\x72\x2f\x69\x6e\x63\x6c\x75\x64\x65\x2f\x6c\x69\x6e\x75\x78\x00\x00\x62\x70\x66\x5f\x68\x65\x6c\x70\x65\x72\x73\x2e\x68\x00\x01\x00\x00\x78\x63\x6f\x6e\x6e\x65\x63\x74\x2e\x63\x00\x02\x00\x00\x69\x6e\x74\x2d\x6c\x6c\x36\x34\x2e\x68\x00\x03\x00\x00\x62\x70\x66\x5f\x68\x65\x6c\x70\x65\x72\x5f\x64\x65\x66\x73\x2e\x68\x00\x01\x00\x00\x62\x70\x66\x2e\x68\x00\x04\x00\x00\x00\x04\x02\x00\x09\x02\x00\x00\x00\x00\x00\x00\x00\x00\x03\x0f\x01\x05\x0c\x0a\x13\x05\x05\x06\x58\x02\x01\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x04\x00\xf1\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x17\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x19\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x62\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x26\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x41\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x58\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x2b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x6e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x77\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x7c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x90\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xa1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xc7\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xb0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xcd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xde\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xda\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x28\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xe7\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\xfa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x0a\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x19\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x12\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x72\x00\x00\x00\x11\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x47\x00\x00\x00\x11\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x22\x00\x00\x00\x12\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x25\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x21\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x02\x00\x00\x00\x12\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x03\x00\x00\x00\x16\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x23\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x04\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x1f\x00\x00\x00\x2b\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x05\x00\x00\x00\x37\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x25\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x06\x00\x00\x00\x48\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x07\x00\x00\x00\x54\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x08\x00\x00\x00\x60\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x09\x00\x00\x00\x6c\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x0a\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x0b\x00\x00\x00\x85\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x0c\x00\x00\x00\x8c\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x0d\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x24\x00\x00\x00\xad\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x0e\x00\x00\x00\xb4\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x0f\x00\x00\x00\xbb\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x10\x00\x00\x00\xe1\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x11\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x12\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x13\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x14\x00\x00\x00\x06\x01\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x1f\x00\x00\x00\x14\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x15\x00\x00\x00\x1f\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x20\x00\x00\x00\x23\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x16\x00\x00\x00\x2f\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x17\x00\x00\x00\x3b\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x18\x00\x00\x00\x44\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x19\x00\x00\x00\x51\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x1a\x00\x00\x00\x5e\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x1b\x00\x00\x00\x6b\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x1c\x00\x00\x00\x78\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x1d\x00\x00\x00\x85\x01\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x1e\x00\x00\x00\x74\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x24\x00\x00\x00\x8c\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x25\x00\x00\x00\x2c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f\x00\x00\x00\x50\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x22\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x1f\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x1f\x00\x00\x00\x26\x25\x24\x00\x2e\x64\x65\x62\x75\x67\x5f\x61\x62\x62\x72\x65\x76\x00\x2e\x74\x65\x78\x74\x00\x2e\x72\x65\x6c\x2e\x42\x54\x46\x2e\x65\x78\x74\x00\x78\x64\x70\x5f\x78\x63\x6f\x6e\x6e\x65\x63\x74\x00\x6d\x61\x70\x73\x00\x2e\x64\x65\x62\x75\x67\x5f\x73\x74\x72\x00\x2e\x72\x65\x6c\x78\x64\x70\x00\x78\x63\x6f\x6e\x6e\x65\x63\x74\x5f\x6d\x61\x70\x00\x2e\x72\x65\x6c\x2e\x64\x65\x62\x75\x67\x5f\x69\x6e\x66\x6f\x00\x2e\x6c\x6c\x76\x6d\x5f\x61\x64\x64\x72\x73\x69\x67\x00\x5f\x6c\x69\x63\x65\x6e\x73\x65\x00\x2e\x72\x65\x6c\x2e\x64\x65\x62\x75\x67\x5f\x6c\x69\x6e\x65\x00\x2e\x72\x65\x6c\x2e\x64\x65\x62\x75\x67\x5f\x66\x72\x61\x6d\x65\x00\x2e\x64\x65\x62\x75\x67\x5f\x6c\x6f\x63\x00\x78\x63\x6f\x6e\x6e\x65\x63\x74\x2e\x63\x00\x2e\x73\x74\x72\x74\x61\x62\x00\x2e\x73\x79\x6d\x74\x61\x62\x00\x2e\x72\x65\x6c\x2e\x42\x54\x46\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb2\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x0f\x00\x00\x00\x00\x00\x00\xcb\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x43\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x38\x0c\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x03\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x2f\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x70\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x84\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x9c\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x23\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xab\x00\x00\x00\x00\x00\x00\x00\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x58\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x8d\x01\x00\x00\x00\x00\x00\x00\x93\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x54\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x48\x0c\x00\x00\x00\x00\x00\x00\x40\x02\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x09\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x34\x00\x00\x00\x01\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x03\x00\x00\x00\x00\x00\x00\x2f\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xc6\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x4f\x04\x00\x00\x00\x00\x00\x00\xe2\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc2\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x88\x0e\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x0c\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x31\x07\x00\x00\x00\x00\x00\x00\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa8\x0e\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x0e\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x8f\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\x07\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x8b\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\x0e\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x10\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x7f\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc0\x07\x00\x00\x00\x00\x00\x00\xce\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x7b\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x0e\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x12\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x64\x00\x00\x00\x03\x4c\xff\x6f\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x0f\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x90\x08\x00\x00\x00\x00\x00\x00\xa8\x03\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x24\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00") 78 | -------------------------------------------------------------------------------- /ebpf/include/bpf/bpf_helper_defs.h: -------------------------------------------------------------------------------- 1 | /* This is auto-generated file. See bpf_helpers_doc.py for details. */ 2 | 3 | /* Forward declarations of BPF structs */ 4 | struct bpf_fib_lookup; 5 | struct bpf_perf_event_data; 6 | struct bpf_perf_event_value; 7 | struct bpf_sock; 8 | struct bpf_sock_addr; 9 | struct bpf_sock_ops; 10 | struct bpf_sock_tuple; 11 | struct bpf_spin_lock; 12 | struct bpf_sysctl; 13 | struct bpf_tcp_sock; 14 | struct bpf_tunnel_key; 15 | struct bpf_xfrm_state; 16 | struct pt_regs; 17 | struct sk_reuseport_md; 18 | struct sockaddr; 19 | struct tcphdr; 20 | struct __sk_buff; 21 | struct sk_msg_md; 22 | struct xdp_md; 23 | 24 | /* 25 | * bpf_map_lookup_elem 26 | * 27 | * Perform a lookup in *map* for an entry associated to *key*. 28 | * 29 | * Returns 30 | * Map value associated to *key*, or **NULL** if no entry was 31 | * found. 32 | */ 33 | static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1; 34 | 35 | /* 36 | * bpf_map_update_elem 37 | * 38 | * Add or update the value of the entry associated to *key* in 39 | * *map* with *value*. *flags* is one of: 40 | * 41 | * **BPF_NOEXIST** 42 | * The entry for *key* must not exist in the map. 43 | * **BPF_EXIST** 44 | * The entry for *key* must already exist in the map. 45 | * **BPF_ANY** 46 | * No condition on the existence of the entry for *key*. 47 | * 48 | * Flag value **BPF_NOEXIST** cannot be used for maps of types 49 | * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all 50 | * elements always exist), the helper would return an error. 51 | * 52 | * Returns 53 | * 0 on success, or a negative error in case of failure. 54 | */ 55 | static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2; 56 | 57 | /* 58 | * bpf_map_delete_elem 59 | * 60 | * Delete entry with *key* from *map*. 61 | * 62 | * Returns 63 | * 0 on success, or a negative error in case of failure. 64 | */ 65 | static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3; 66 | 67 | /* 68 | * bpf_probe_read 69 | * 70 | * For tracing programs, safely attempt to read *size* bytes from 71 | * kernel space address *unsafe_ptr* and store the data in *dst*. 72 | * 73 | * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() 74 | * instead. 75 | * 76 | * Returns 77 | * 0 on success, or a negative error in case of failure. 78 | */ 79 | static int (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4; 80 | 81 | /* 82 | * bpf_ktime_get_ns 83 | * 84 | * Return the time elapsed since system boot, in nanoseconds. 85 | * 86 | * Returns 87 | * Current *ktime*. 88 | */ 89 | static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5; 90 | 91 | /* 92 | * bpf_trace_printk 93 | * 94 | * This helper is a "printk()-like" facility for debugging. It 95 | * prints a message defined by format *fmt* (of size *fmt_size*) 96 | * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if 97 | * available. It can take up to three additional **u64** 98 | * arguments (as an eBPF helpers, the total number of arguments is 99 | * limited to five). 100 | * 101 | * Each time the helper is called, it appends a line to the trace. 102 | * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is 103 | * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. 104 | * The format of the trace is customizable, and the exact output 105 | * one will get depends on the options set in 106 | * *\/sys/kernel/debug/tracing/trace_options* (see also the 107 | * *README* file under the same directory). However, it usually 108 | * defaults to something like: 109 | * 110 | * :: 111 | * 112 | * telnet-470 [001] .N.. 419421.045894: 0x00000001: 113 | * 114 | * In the above: 115 | * 116 | * * ``telnet`` is the name of the current task. 117 | * * ``470`` is the PID of the current task. 118 | * * ``001`` is the CPU number on which the task is 119 | * running. 120 | * * In ``.N..``, each character refers to a set of 121 | * options (whether irqs are enabled, scheduling 122 | * options, whether hard/softirqs are running, level of 123 | * preempt_disabled respectively). **N** means that 124 | * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED** 125 | * are set. 126 | * * ``419421.045894`` is a timestamp. 127 | * * ``0x00000001`` is a fake value used by BPF for the 128 | * instruction pointer register. 129 | * * ```` is the message formatted with 130 | * *fmt*. 131 | * 132 | * The conversion specifiers supported by *fmt* are similar, but 133 | * more limited than for printk(). They are **%d**, **%i**, 134 | * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**, 135 | * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size 136 | * of field, padding with zeroes, etc.) is available, and the 137 | * helper will return **-EINVAL** (but print nothing) if it 138 | * encounters an unknown specifier. 139 | * 140 | * Also, note that **bpf_trace_printk**\ () is slow, and should 141 | * only be used for debugging purposes. For this reason, a notice 142 | * bloc (spanning several lines) is printed to kernel logs and 143 | * states that the helper should not be used "for production use" 144 | * the first time this helper is used (or more precisely, when 145 | * **trace_printk**\ () buffers are allocated). For passing values 146 | * to user space, perf events should be preferred. 147 | * 148 | * Returns 149 | * The number of bytes written to the buffer, or a negative error 150 | * in case of failure. 151 | */ 152 | static int (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6; 153 | 154 | /* 155 | * bpf_get_prandom_u32 156 | * 157 | * Get a pseudo-random number. 158 | * 159 | * From a security point of view, this helper uses its own 160 | * pseudo-random internal state, and cannot be used to infer the 161 | * seed of other random functions in the kernel. However, it is 162 | * essential to note that the generator used by the helper is not 163 | * cryptographically secure. 164 | * 165 | * Returns 166 | * A random 32-bit unsigned value. 167 | */ 168 | static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7; 169 | 170 | /* 171 | * bpf_get_smp_processor_id 172 | * 173 | * Get the SMP (symmetric multiprocessing) processor id. Note that 174 | * all programs run with preemption disabled, which means that the 175 | * SMP processor id is stable during all the execution of the 176 | * program. 177 | * 178 | * Returns 179 | * The SMP id of the processor running the program. 180 | */ 181 | static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8; 182 | 183 | /* 184 | * bpf_skb_store_bytes 185 | * 186 | * Store *len* bytes from address *from* into the packet 187 | * associated to *skb*, at *offset*. *flags* are a combination of 188 | * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the 189 | * checksum for the packet after storing the bytes) and 190 | * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ 191 | * **->swhash** and *skb*\ **->l4hash** to 0). 192 | * 193 | * A call to this helper is susceptible to change the underlying 194 | * packet buffer. Therefore, at load time, all checks on pointers 195 | * previously done by the verifier are invalidated and must be 196 | * performed again, if the helper is used in combination with 197 | * direct packet access. 198 | * 199 | * Returns 200 | * 0 on success, or a negative error in case of failure. 201 | */ 202 | static int (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9; 203 | 204 | /* 205 | * bpf_l3_csum_replace 206 | * 207 | * Recompute the layer 3 (e.g. IP) checksum for the packet 208 | * associated to *skb*. Computation is incremental, so the helper 209 | * must know the former value of the header field that was 210 | * modified (*from*), the new value of this field (*to*), and the 211 | * number of bytes (2 or 4) for this field, stored in *size*. 212 | * Alternatively, it is possible to store the difference between 213 | * the previous and the new values of the header field in *to*, by 214 | * setting *from* and *size* to 0. For both methods, *offset* 215 | * indicates the location of the IP checksum within the packet. 216 | * 217 | * This helper works in combination with **bpf_csum_diff**\ (), 218 | * which does not update the checksum in-place, but offers more 219 | * flexibility and can handle sizes larger than 2 or 4 for the 220 | * checksum to update. 221 | * 222 | * A call to this helper is susceptible to change the underlying 223 | * packet buffer. Therefore, at load time, all checks on pointers 224 | * previously done by the verifier are invalidated and must be 225 | * performed again, if the helper is used in combination with 226 | * direct packet access. 227 | * 228 | * Returns 229 | * 0 on success, or a negative error in case of failure. 230 | */ 231 | static int (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10; 232 | 233 | /* 234 | * bpf_l4_csum_replace 235 | * 236 | * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the 237 | * packet associated to *skb*. Computation is incremental, so the 238 | * helper must know the former value of the header field that was 239 | * modified (*from*), the new value of this field (*to*), and the 240 | * number of bytes (2 or 4) for this field, stored on the lowest 241 | * four bits of *flags*. Alternatively, it is possible to store 242 | * the difference between the previous and the new values of the 243 | * header field in *to*, by setting *from* and the four lowest 244 | * bits of *flags* to 0. For both methods, *offset* indicates the 245 | * location of the IP checksum within the packet. In addition to 246 | * the size of the field, *flags* can be added (bitwise OR) actual 247 | * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left 248 | * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and 249 | * for updates resulting in a null checksum the value is set to 250 | * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates 251 | * the checksum is to be computed against a pseudo-header. 252 | * 253 | * This helper works in combination with **bpf_csum_diff**\ (), 254 | * which does not update the checksum in-place, but offers more 255 | * flexibility and can handle sizes larger than 2 or 4 for the 256 | * checksum to update. 257 | * 258 | * A call to this helper is susceptible to change the underlying 259 | * packet buffer. Therefore, at load time, all checks on pointers 260 | * previously done by the verifier are invalidated and must be 261 | * performed again, if the helper is used in combination with 262 | * direct packet access. 263 | * 264 | * Returns 265 | * 0 on success, or a negative error in case of failure. 266 | */ 267 | static int (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11; 268 | 269 | /* 270 | * bpf_tail_call 271 | * 272 | * This special helper is used to trigger a "tail call", or in 273 | * other words, to jump into another eBPF program. The same stack 274 | * frame is used (but values on stack and in registers for the 275 | * caller are not accessible to the callee). This mechanism allows 276 | * for program chaining, either for raising the maximum number of 277 | * available eBPF instructions, or to execute given programs in 278 | * conditional blocks. For security reasons, there is an upper 279 | * limit to the number of successive tail calls that can be 280 | * performed. 281 | * 282 | * Upon call of this helper, the program attempts to jump into a 283 | * program referenced at index *index* in *prog_array_map*, a 284 | * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes 285 | * *ctx*, a pointer to the context. 286 | * 287 | * If the call succeeds, the kernel immediately runs the first 288 | * instruction of the new program. This is not a function call, 289 | * and it never returns to the previous program. If the call 290 | * fails, then the helper has no effect, and the caller continues 291 | * to run its subsequent instructions. A call can fail if the 292 | * destination program for the jump does not exist (i.e. *index* 293 | * is superior to the number of entries in *prog_array_map*), or 294 | * if the maximum number of tail calls has been reached for this 295 | * chain of programs. This limit is defined in the kernel by the 296 | * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), 297 | * which is currently set to 32. 298 | * 299 | * Returns 300 | * 0 on success, or a negative error in case of failure. 301 | */ 302 | static int (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12; 303 | 304 | /* 305 | * bpf_clone_redirect 306 | * 307 | * Clone and redirect the packet associated to *skb* to another 308 | * net device of index *ifindex*. Both ingress and egress 309 | * interfaces can be used for redirection. The **BPF_F_INGRESS** 310 | * value in *flags* is used to make the distinction (ingress path 311 | * is selected if the flag is present, egress path otherwise). 312 | * This is the only flag supported for now. 313 | * 314 | * In comparison with **bpf_redirect**\ () helper, 315 | * **bpf_clone_redirect**\ () has the associated cost of 316 | * duplicating the packet buffer, but this can be executed out of 317 | * the eBPF program. Conversely, **bpf_redirect**\ () is more 318 | * efficient, but it is handled through an action code where the 319 | * redirection happens only after the eBPF program has returned. 320 | * 321 | * A call to this helper is susceptible to change the underlying 322 | * packet buffer. Therefore, at load time, all checks on pointers 323 | * previously done by the verifier are invalidated and must be 324 | * performed again, if the helper is used in combination with 325 | * direct packet access. 326 | * 327 | * Returns 328 | * 0 on success, or a negative error in case of failure. 329 | */ 330 | static int (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13; 331 | 332 | /* 333 | * bpf_get_current_pid_tgid 334 | * 335 | * 336 | * Returns 337 | * A 64-bit integer containing the current tgid and pid, and 338 | * created as such: 339 | * *current_task*\ **->tgid << 32 \|** 340 | * *current_task*\ **->pid**. 341 | */ 342 | static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14; 343 | 344 | /* 345 | * bpf_get_current_uid_gid 346 | * 347 | * 348 | * Returns 349 | * A 64-bit integer containing the current GID and UID, and 350 | * created as such: *current_gid* **<< 32 \|** *current_uid*. 351 | */ 352 | static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15; 353 | 354 | /* 355 | * bpf_get_current_comm 356 | * 357 | * Copy the **comm** attribute of the current task into *buf* of 358 | * *size_of_buf*. The **comm** attribute contains the name of 359 | * the executable (excluding the path) for the current task. The 360 | * *size_of_buf* must be strictly positive. On success, the 361 | * helper makes sure that the *buf* is NUL-terminated. On failure, 362 | * it is filled with zeroes. 363 | * 364 | * Returns 365 | * 0 on success, or a negative error in case of failure. 366 | */ 367 | static int (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16; 368 | 369 | /* 370 | * bpf_get_cgroup_classid 371 | * 372 | * Retrieve the classid for the current task, i.e. for the net_cls 373 | * cgroup to which *skb* belongs. 374 | * 375 | * This helper can be used on TC egress path, but not on ingress. 376 | * 377 | * The net_cls cgroup provides an interface to tag network packets 378 | * based on a user-provided identifier for all traffic coming from 379 | * the tasks belonging to the related cgroup. See also the related 380 | * kernel documentation, available from the Linux sources in file 381 | * *Documentation/admin-guide/cgroup-v1/net_cls.rst*. 382 | * 383 | * The Linux kernel has two versions for cgroups: there are 384 | * cgroups v1 and cgroups v2. Both are available to users, who can 385 | * use a mixture of them, but note that the net_cls cgroup is for 386 | * cgroup v1 only. This makes it incompatible with BPF programs 387 | * run on cgroups, which is a cgroup-v2-only feature (a socket can 388 | * only hold data for one version of cgroups at a time). 389 | * 390 | * This helper is only available is the kernel was compiled with 391 | * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to 392 | * "**y**" or to "**m**". 393 | * 394 | * Returns 395 | * The classid, or 0 for the default unconfigured classid. 396 | */ 397 | static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17; 398 | 399 | /* 400 | * bpf_skb_vlan_push 401 | * 402 | * Push a *vlan_tci* (VLAN tag control information) of protocol 403 | * *vlan_proto* to the packet associated to *skb*, then update 404 | * the checksum. Note that if *vlan_proto* is different from 405 | * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to 406 | * be **ETH_P_8021Q**. 407 | * 408 | * A call to this helper is susceptible to change the underlying 409 | * packet buffer. Therefore, at load time, all checks on pointers 410 | * previously done by the verifier are invalidated and must be 411 | * performed again, if the helper is used in combination with 412 | * direct packet access. 413 | * 414 | * Returns 415 | * 0 on success, or a negative error in case of failure. 416 | */ 417 | static int (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18; 418 | 419 | /* 420 | * bpf_skb_vlan_pop 421 | * 422 | * Pop a VLAN header from the packet associated to *skb*. 423 | * 424 | * A call to this helper is susceptible to change the underlying 425 | * packet buffer. Therefore, at load time, all checks on pointers 426 | * previously done by the verifier are invalidated and must be 427 | * performed again, if the helper is used in combination with 428 | * direct packet access. 429 | * 430 | * Returns 431 | * 0 on success, or a negative error in case of failure. 432 | */ 433 | static int (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19; 434 | 435 | /* 436 | * bpf_skb_get_tunnel_key 437 | * 438 | * Get tunnel metadata. This helper takes a pointer *key* to an 439 | * empty **struct bpf_tunnel_key** of **size**, that will be 440 | * filled with tunnel metadata for the packet associated to *skb*. 441 | * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which 442 | * indicates that the tunnel is based on IPv6 protocol instead of 443 | * IPv4. 444 | * 445 | * The **struct bpf_tunnel_key** is an object that generalizes the 446 | * principal parameters used by various tunneling protocols into a 447 | * single struct. This way, it can be used to easily make a 448 | * decision based on the contents of the encapsulation header, 449 | * "summarized" in this struct. In particular, it holds the IP 450 | * address of the remote end (IPv4 or IPv6, depending on the case) 451 | * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also, 452 | * this struct exposes the *key*\ **->tunnel_id**, which is 453 | * generally mapped to a VNI (Virtual Network Identifier), making 454 | * it programmable together with the **bpf_skb_set_tunnel_key**\ 455 | * () helper. 456 | * 457 | * Let's imagine that the following code is part of a program 458 | * attached to the TC ingress interface, on one end of a GRE 459 | * tunnel, and is supposed to filter out all messages coming from 460 | * remote ends with IPv4 address other than 10.0.0.1: 461 | * 462 | * :: 463 | * 464 | * int ret; 465 | * struct bpf_tunnel_key key = {}; 466 | * 467 | * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); 468 | * if (ret < 0) 469 | * return TC_ACT_SHOT; // drop packet 470 | * 471 | * if (key.remote_ipv4 != 0x0a000001) 472 | * return TC_ACT_SHOT; // drop packet 473 | * 474 | * return TC_ACT_OK; // accept packet 475 | * 476 | * This interface can also be used with all encapsulation devices 477 | * that can operate in "collect metadata" mode: instead of having 478 | * one network device per specific configuration, the "collect 479 | * metadata" mode only requires a single device where the 480 | * configuration can be extracted from this helper. 481 | * 482 | * This can be used together with various tunnels such as VXLan, 483 | * Geneve, GRE or IP in IP (IPIP). 484 | * 485 | * Returns 486 | * 0 on success, or a negative error in case of failure. 487 | */ 488 | static int (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20; 489 | 490 | /* 491 | * bpf_skb_set_tunnel_key 492 | * 493 | * Populate tunnel metadata for packet associated to *skb.* The 494 | * tunnel metadata is set to the contents of *key*, of *size*. The 495 | * *flags* can be set to a combination of the following values: 496 | * 497 | * **BPF_F_TUNINFO_IPV6** 498 | * Indicate that the tunnel is based on IPv6 protocol 499 | * instead of IPv4. 500 | * **BPF_F_ZERO_CSUM_TX** 501 | * For IPv4 packets, add a flag to tunnel metadata 502 | * indicating that checksum computation should be skipped 503 | * and checksum set to zeroes. 504 | * **BPF_F_DONT_FRAGMENT** 505 | * Add a flag to tunnel metadata indicating that the 506 | * packet should not be fragmented. 507 | * **BPF_F_SEQ_NUMBER** 508 | * Add a flag to tunnel metadata indicating that a 509 | * sequence number should be added to tunnel header before 510 | * sending the packet. This flag was added for GRE 511 | * encapsulation, but might be used with other protocols 512 | * as well in the future. 513 | * 514 | * Here is a typical usage on the transmit path: 515 | * 516 | * :: 517 | * 518 | * struct bpf_tunnel_key key; 519 | * populate key ... 520 | * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); 521 | * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0); 522 | * 523 | * See also the description of the **bpf_skb_get_tunnel_key**\ () 524 | * helper for additional information. 525 | * 526 | * Returns 527 | * 0 on success, or a negative error in case of failure. 528 | */ 529 | static int (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21; 530 | 531 | /* 532 | * bpf_perf_event_read 533 | * 534 | * Read the value of a perf event counter. This helper relies on a 535 | * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of 536 | * the perf event counter is selected when *map* is updated with 537 | * perf event file descriptors. The *map* is an array whose size 538 | * is the number of available CPUs, and each cell contains a value 539 | * relative to one CPU. The value to retrieve is indicated by 540 | * *flags*, that contains the index of the CPU to look up, masked 541 | * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to 542 | * **BPF_F_CURRENT_CPU** to indicate that the value for the 543 | * current CPU should be retrieved. 544 | * 545 | * Note that before Linux 4.13, only hardware perf event can be 546 | * retrieved. 547 | * 548 | * Also, be aware that the newer helper 549 | * **bpf_perf_event_read_value**\ () is recommended over 550 | * **bpf_perf_event_read**\ () in general. The latter has some ABI 551 | * quirks where error and counter value are used as a return code 552 | * (which is wrong to do since ranges may overlap). This issue is 553 | * fixed with **bpf_perf_event_read_value**\ (), which at the same 554 | * time provides more features over the **bpf_perf_event_read**\ 555 | * () interface. Please refer to the description of 556 | * **bpf_perf_event_read_value**\ () for details. 557 | * 558 | * Returns 559 | * The value of the perf event counter read from the map, or a 560 | * negative error code in case of failure. 561 | */ 562 | static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22; 563 | 564 | /* 565 | * bpf_redirect 566 | * 567 | * Redirect the packet to another net device of index *ifindex*. 568 | * This helper is somewhat similar to **bpf_clone_redirect**\ 569 | * (), except that the packet is not cloned, which provides 570 | * increased performance. 571 | * 572 | * Except for XDP, both ingress and egress interfaces can be used 573 | * for redirection. The **BPF_F_INGRESS** value in *flags* is used 574 | * to make the distinction (ingress path is selected if the flag 575 | * is present, egress path otherwise). Currently, XDP only 576 | * supports redirection to the egress interface, and accepts no 577 | * flag at all. 578 | * 579 | * The same effect can be attained with the more generic 580 | * **bpf_redirect_map**\ (), which requires specific maps to be 581 | * used but offers better performance. 582 | * 583 | * Returns 584 | * For XDP, the helper returns **XDP_REDIRECT** on success or 585 | * **XDP_ABORTED** on error. For other program types, the values 586 | * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on 587 | * error. 588 | */ 589 | static int (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23; 590 | 591 | /* 592 | * bpf_get_route_realm 593 | * 594 | * Retrieve the realm or the route, that is to say the 595 | * **tclassid** field of the destination for the *skb*. The 596 | * indentifier retrieved is a user-provided tag, similar to the 597 | * one used with the net_cls cgroup (see description for 598 | * **bpf_get_cgroup_classid**\ () helper), but here this tag is 599 | * held by a route (a destination entry), not by a task. 600 | * 601 | * Retrieving this identifier works with the clsact TC egress hook 602 | * (see also **tc-bpf(8)**), or alternatively on conventional 603 | * classful egress qdiscs, but not on TC ingress path. In case of 604 | * clsact TC egress hook, this has the advantage that, internally, 605 | * the destination entry has not been dropped yet in the transmit 606 | * path. Therefore, the destination entry does not need to be 607 | * artificially held via **netif_keep_dst**\ () for a classful 608 | * qdisc until the *skb* is freed. 609 | * 610 | * This helper is available only if the kernel was compiled with 611 | * **CONFIG_IP_ROUTE_CLASSID** configuration option. 612 | * 613 | * Returns 614 | * The realm of the route for the packet associated to *skb*, or 0 615 | * if none was found. 616 | */ 617 | static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24; 618 | 619 | /* 620 | * bpf_perf_event_output 621 | * 622 | * Write raw *data* blob into a special BPF perf event held by 623 | * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf 624 | * event must have the following attributes: **PERF_SAMPLE_RAW** 625 | * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and 626 | * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. 627 | * 628 | * The *flags* are used to indicate the index in *map* for which 629 | * the value must be put, masked with **BPF_F_INDEX_MASK**. 630 | * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** 631 | * to indicate that the index of the current CPU core should be 632 | * used. 633 | * 634 | * The value to write, of *size*, is passed through eBPF stack and 635 | * pointed by *data*. 636 | * 637 | * The context of the program *ctx* needs also be passed to the 638 | * helper. 639 | * 640 | * On user space, a program willing to read the values needs to 641 | * call **perf_event_open**\ () on the perf event (either for 642 | * one or for all CPUs) and to store the file descriptor into the 643 | * *map*. This must be done before the eBPF program can send data 644 | * into it. An example is available in file 645 | * *samples/bpf/trace_output_user.c* in the Linux kernel source 646 | * tree (the eBPF program counterpart is in 647 | * *samples/bpf/trace_output_kern.c*). 648 | * 649 | * **bpf_perf_event_output**\ () achieves better performance 650 | * than **bpf_trace_printk**\ () for sharing data with user 651 | * space, and is much better suitable for streaming data from eBPF 652 | * programs. 653 | * 654 | * Note that this helper is not restricted to tracing use cases 655 | * and can be used with programs attached to TC or XDP as well, 656 | * where it allows for passing data to user space listeners. Data 657 | * can be: 658 | * 659 | * * Only custom structs, 660 | * * Only the packet payload, or 661 | * * A combination of both. 662 | * 663 | * Returns 664 | * 0 on success, or a negative error in case of failure. 665 | */ 666 | static int (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25; 667 | 668 | /* 669 | * bpf_skb_load_bytes 670 | * 671 | * This helper was provided as an easy way to load data from a 672 | * packet. It can be used to load *len* bytes from *offset* from 673 | * the packet associated to *skb*, into the buffer pointed by 674 | * *to*. 675 | * 676 | * Since Linux 4.7, usage of this helper has mostly been replaced 677 | * by "direct packet access", enabling packet data to be 678 | * manipulated with *skb*\ **->data** and *skb*\ **->data_end** 679 | * pointing respectively to the first byte of packet data and to 680 | * the byte after the last byte of packet data. However, it 681 | * remains useful if one wishes to read large quantities of data 682 | * at once from a packet into the eBPF stack. 683 | * 684 | * Returns 685 | * 0 on success, or a negative error in case of failure. 686 | */ 687 | static int (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26; 688 | 689 | /* 690 | * bpf_get_stackid 691 | * 692 | * Walk a user or a kernel stack and return its id. To achieve 693 | * this, the helper needs *ctx*, which is a pointer to the context 694 | * on which the tracing program is executed, and a pointer to a 695 | * *map* of type **BPF_MAP_TYPE_STACK_TRACE**. 696 | * 697 | * The last argument, *flags*, holds the number of stack frames to 698 | * skip (from 0 to 255), masked with 699 | * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set 700 | * a combination of the following flags: 701 | * 702 | * **BPF_F_USER_STACK** 703 | * Collect a user space stack instead of a kernel stack. 704 | * **BPF_F_FAST_STACK_CMP** 705 | * Compare stacks by hash only. 706 | * **BPF_F_REUSE_STACKID** 707 | * If two different stacks hash into the same *stackid*, 708 | * discard the old one. 709 | * 710 | * The stack id retrieved is a 32 bit long integer handle which 711 | * can be further combined with other data (including other stack 712 | * ids) and used as a key into maps. This can be useful for 713 | * generating a variety of graphs (such as flame graphs or off-cpu 714 | * graphs). 715 | * 716 | * For walking a stack, this helper is an improvement over 717 | * **bpf_probe_read**\ (), which can be used with unrolled loops 718 | * but is not efficient and consumes a lot of eBPF instructions. 719 | * Instead, **bpf_get_stackid**\ () can collect up to 720 | * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that 721 | * this limit can be controlled with the **sysctl** program, and 722 | * that it should be manually increased in order to profile long 723 | * user stacks (such as stacks for Java programs). To do so, use: 724 | * 725 | * :: 726 | * 727 | * # sysctl kernel.perf_event_max_stack= 728 | * 729 | * Returns 730 | * The positive or null stack id on success, or a negative error 731 | * in case of failure. 732 | */ 733 | static int (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27; 734 | 735 | /* 736 | * bpf_csum_diff 737 | * 738 | * Compute a checksum difference, from the raw buffer pointed by 739 | * *from*, of length *from_size* (that must be a multiple of 4), 740 | * towards the raw buffer pointed by *to*, of size *to_size* 741 | * (same remark). An optional *seed* can be added to the value 742 | * (this can be cascaded, the seed may come from a previous call 743 | * to the helper). 744 | * 745 | * This is flexible enough to be used in several ways: 746 | * 747 | * * With *from_size* == 0, *to_size* > 0 and *seed* set to 748 | * checksum, it can be used when pushing new data. 749 | * * With *from_size* > 0, *to_size* == 0 and *seed* set to 750 | * checksum, it can be used when removing data from a packet. 751 | * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it 752 | * can be used to compute a diff. Note that *from_size* and 753 | * *to_size* do not need to be equal. 754 | * 755 | * This helper can be used in combination with 756 | * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to 757 | * which one can feed in the difference computed with 758 | * **bpf_csum_diff**\ (). 759 | * 760 | * Returns 761 | * The checksum result, or a negative error code in case of 762 | * failure. 763 | */ 764 | static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28; 765 | 766 | /* 767 | * bpf_skb_get_tunnel_opt 768 | * 769 | * Retrieve tunnel options metadata for the packet associated to 770 | * *skb*, and store the raw tunnel option data to the buffer *opt* 771 | * of *size*. 772 | * 773 | * This helper can be used with encapsulation devices that can 774 | * operate in "collect metadata" mode (please refer to the related 775 | * note in the description of **bpf_skb_get_tunnel_key**\ () for 776 | * more details). A particular example where this can be used is 777 | * in combination with the Geneve encapsulation protocol, where it 778 | * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper) 779 | * and retrieving arbitrary TLVs (Type-Length-Value headers) from 780 | * the eBPF program. This allows for full customization of these 781 | * headers. 782 | * 783 | * Returns 784 | * The size of the option data retrieved. 785 | */ 786 | static int (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29; 787 | 788 | /* 789 | * bpf_skb_set_tunnel_opt 790 | * 791 | * Set tunnel options metadata for the packet associated to *skb* 792 | * to the option data contained in the raw buffer *opt* of *size*. 793 | * 794 | * See also the description of the **bpf_skb_get_tunnel_opt**\ () 795 | * helper for additional information. 796 | * 797 | * Returns 798 | * 0 on success, or a negative error in case of failure. 799 | */ 800 | static int (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30; 801 | 802 | /* 803 | * bpf_skb_change_proto 804 | * 805 | * Change the protocol of the *skb* to *proto*. Currently 806 | * supported are transition from IPv4 to IPv6, and from IPv6 to 807 | * IPv4. The helper takes care of the groundwork for the 808 | * transition, including resizing the socket buffer. The eBPF 809 | * program is expected to fill the new headers, if any, via 810 | * **skb_store_bytes**\ () and to recompute the checksums with 811 | * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ 812 | * (). The main case for this helper is to perform NAT64 813 | * operations out of an eBPF program. 814 | * 815 | * Internally, the GSO type is marked as dodgy so that headers are 816 | * checked and segments are recalculated by the GSO/GRO engine. 817 | * The size for GSO target is adapted as well. 818 | * 819 | * All values for *flags* are reserved for future usage, and must 820 | * be left at zero. 821 | * 822 | * A call to this helper is susceptible to change the underlying 823 | * packet buffer. Therefore, at load time, all checks on pointers 824 | * previously done by the verifier are invalidated and must be 825 | * performed again, if the helper is used in combination with 826 | * direct packet access. 827 | * 828 | * Returns 829 | * 0 on success, or a negative error in case of failure. 830 | */ 831 | static int (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31; 832 | 833 | /* 834 | * bpf_skb_change_type 835 | * 836 | * Change the packet type for the packet associated to *skb*. This 837 | * comes down to setting *skb*\ **->pkt_type** to *type*, except 838 | * the eBPF program does not have a write access to *skb*\ 839 | * **->pkt_type** beside this helper. Using a helper here allows 840 | * for graceful handling of errors. 841 | * 842 | * The major use case is to change incoming *skb*s to 843 | * **PACKET_HOST** in a programmatic way instead of having to 844 | * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for 845 | * example. 846 | * 847 | * Note that *type* only allows certain values. At this time, they 848 | * are: 849 | * 850 | * **PACKET_HOST** 851 | * Packet is for us. 852 | * **PACKET_BROADCAST** 853 | * Send packet to all. 854 | * **PACKET_MULTICAST** 855 | * Send packet to group. 856 | * **PACKET_OTHERHOST** 857 | * Send packet to someone else. 858 | * 859 | * Returns 860 | * 0 on success, or a negative error in case of failure. 861 | */ 862 | static int (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32; 863 | 864 | /* 865 | * bpf_skb_under_cgroup 866 | * 867 | * Check whether *skb* is a descendant of the cgroup2 held by 868 | * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. 869 | * 870 | * Returns 871 | * The return value depends on the result of the test, and can be: 872 | * 873 | * * 0, if the *skb* failed the cgroup2 descendant test. 874 | * * 1, if the *skb* succeeded the cgroup2 descendant test. 875 | * * A negative error code, if an error occurred. 876 | */ 877 | static int (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33; 878 | 879 | /* 880 | * bpf_get_hash_recalc 881 | * 882 | * Retrieve the hash of the packet, *skb*\ **->hash**. If it is 883 | * not set, in particular if the hash was cleared due to mangling, 884 | * recompute this hash. Later accesses to the hash can be done 885 | * directly with *skb*\ **->hash**. 886 | * 887 | * Calling **bpf_set_hash_invalid**\ (), changing a packet 888 | * prototype with **bpf_skb_change_proto**\ (), or calling 889 | * **bpf_skb_store_bytes**\ () with the 890 | * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear 891 | * the hash and to trigger a new computation for the next call to 892 | * **bpf_get_hash_recalc**\ (). 893 | * 894 | * Returns 895 | * The 32-bit hash. 896 | */ 897 | static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34; 898 | 899 | /* 900 | * bpf_get_current_task 901 | * 902 | * 903 | * Returns 904 | * A pointer to the current task struct. 905 | */ 906 | static __u64 (*bpf_get_current_task)(void) = (void *) 35; 907 | 908 | /* 909 | * bpf_probe_write_user 910 | * 911 | * Attempt in a safe way to write *len* bytes from the buffer 912 | * *src* to *dst* in memory. It only works for threads that are in 913 | * user context, and *dst* must be a valid user space address. 914 | * 915 | * This helper should not be used to implement any kind of 916 | * security mechanism because of TOC-TOU attacks, but rather to 917 | * debug, divert, and manipulate execution of semi-cooperative 918 | * processes. 919 | * 920 | * Keep in mind that this feature is meant for experiments, and it 921 | * has a risk of crashing the system and running programs. 922 | * Therefore, when an eBPF program using this helper is attached, 923 | * a warning including PID and process name is printed to kernel 924 | * logs. 925 | * 926 | * Returns 927 | * 0 on success, or a negative error in case of failure. 928 | */ 929 | static int (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36; 930 | 931 | /* 932 | * bpf_current_task_under_cgroup 933 | * 934 | * Check whether the probe is being run is the context of a given 935 | * subset of the cgroup2 hierarchy. The cgroup2 to test is held by 936 | * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. 937 | * 938 | * Returns 939 | * The return value depends on the result of the test, and can be: 940 | * 941 | * * 0, if the *skb* task belongs to the cgroup2. 942 | * * 1, if the *skb* task does not belong to the cgroup2. 943 | * * A negative error code, if an error occurred. 944 | */ 945 | static int (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37; 946 | 947 | /* 948 | * bpf_skb_change_tail 949 | * 950 | * Resize (trim or grow) the packet associated to *skb* to the 951 | * new *len*. The *flags* are reserved for future usage, and must 952 | * be left at zero. 953 | * 954 | * The basic idea is that the helper performs the needed work to 955 | * change the size of the packet, then the eBPF program rewrites 956 | * the rest via helpers like **bpf_skb_store_bytes**\ (), 957 | * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ () 958 | * and others. This helper is a slow path utility intended for 959 | * replies with control messages. And because it is targeted for 960 | * slow path, the helper itself can afford to be slow: it 961 | * implicitly linearizes, unclones and drops offloads from the 962 | * *skb*. 963 | * 964 | * A call to this helper is susceptible to change the underlying 965 | * packet buffer. Therefore, at load time, all checks on pointers 966 | * previously done by the verifier are invalidated and must be 967 | * performed again, if the helper is used in combination with 968 | * direct packet access. 969 | * 970 | * Returns 971 | * 0 on success, or a negative error in case of failure. 972 | */ 973 | static int (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38; 974 | 975 | /* 976 | * bpf_skb_pull_data 977 | * 978 | * Pull in non-linear data in case the *skb* is non-linear and not 979 | * all of *len* are part of the linear section. Make *len* bytes 980 | * from *skb* readable and writable. If a zero value is passed for 981 | * *len*, then the whole length of the *skb* is pulled. 982 | * 983 | * This helper is only needed for reading and writing with direct 984 | * packet access. 985 | * 986 | * For direct packet access, testing that offsets to access 987 | * are within packet boundaries (test on *skb*\ **->data_end**) is 988 | * susceptible to fail if offsets are invalid, or if the requested 989 | * data is in non-linear parts of the *skb*. On failure the 990 | * program can just bail out, or in the case of a non-linear 991 | * buffer, use a helper to make the data available. The 992 | * **bpf_skb_load_bytes**\ () helper is a first solution to access 993 | * the data. Another one consists in using **bpf_skb_pull_data** 994 | * to pull in once the non-linear parts, then retesting and 995 | * eventually access the data. 996 | * 997 | * At the same time, this also makes sure the *skb* is uncloned, 998 | * which is a necessary condition for direct write. As this needs 999 | * to be an invariant for the write part only, the verifier 1000 | * detects writes and adds a prologue that is calling 1001 | * **bpf_skb_pull_data()** to effectively unclone the *skb* from 1002 | * the very beginning in case it is indeed cloned. 1003 | * 1004 | * A call to this helper is susceptible to change the underlying 1005 | * packet buffer. Therefore, at load time, all checks on pointers 1006 | * previously done by the verifier are invalidated and must be 1007 | * performed again, if the helper is used in combination with 1008 | * direct packet access. 1009 | * 1010 | * Returns 1011 | * 0 on success, or a negative error in case of failure. 1012 | */ 1013 | static int (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39; 1014 | 1015 | /* 1016 | * bpf_csum_update 1017 | * 1018 | * Add the checksum *csum* into *skb*\ **->csum** in case the 1019 | * driver has supplied a checksum for the entire packet into that 1020 | * field. Return an error otherwise. This helper is intended to be 1021 | * used in combination with **bpf_csum_diff**\ (), in particular 1022 | * when the checksum needs to be updated after data has been 1023 | * written into the packet through direct packet access. 1024 | * 1025 | * Returns 1026 | * The checksum on success, or a negative error code in case of 1027 | * failure. 1028 | */ 1029 | static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40; 1030 | 1031 | /* 1032 | * bpf_set_hash_invalid 1033 | * 1034 | * Invalidate the current *skb*\ **->hash**. It can be used after 1035 | * mangling on headers through direct packet access, in order to 1036 | * indicate that the hash is outdated and to trigger a 1037 | * recalculation the next time the kernel tries to access this 1038 | * hash or when the **bpf_get_hash_recalc**\ () helper is called. 1039 | * 1040 | */ 1041 | static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41; 1042 | 1043 | /* 1044 | * bpf_get_numa_node_id 1045 | * 1046 | * Return the id of the current NUMA node. The primary use case 1047 | * for this helper is the selection of sockets for the local NUMA 1048 | * node, when the program is attached to sockets using the 1049 | * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**), 1050 | * but the helper is also available to other eBPF program types, 1051 | * similarly to **bpf_get_smp_processor_id**\ (). 1052 | * 1053 | * Returns 1054 | * The id of current NUMA node. 1055 | */ 1056 | static int (*bpf_get_numa_node_id)(void) = (void *) 42; 1057 | 1058 | /* 1059 | * bpf_skb_change_head 1060 | * 1061 | * Grows headroom of packet associated to *skb* and adjusts the 1062 | * offset of the MAC header accordingly, adding *len* bytes of 1063 | * space. It automatically extends and reallocates memory as 1064 | * required. 1065 | * 1066 | * This helper can be used on a layer 3 *skb* to push a MAC header 1067 | * for redirection into a layer 2 device. 1068 | * 1069 | * All values for *flags* are reserved for future usage, and must 1070 | * be left at zero. 1071 | * 1072 | * A call to this helper is susceptible to change the underlying 1073 | * packet buffer. Therefore, at load time, all checks on pointers 1074 | * previously done by the verifier are invalidated and must be 1075 | * performed again, if the helper is used in combination with 1076 | * direct packet access. 1077 | * 1078 | * Returns 1079 | * 0 on success, or a negative error in case of failure. 1080 | */ 1081 | static int (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43; 1082 | 1083 | /* 1084 | * bpf_xdp_adjust_head 1085 | * 1086 | * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that 1087 | * it is possible to use a negative value for *delta*. This helper 1088 | * can be used to prepare the packet for pushing or popping 1089 | * headers. 1090 | * 1091 | * A call to this helper is susceptible to change the underlying 1092 | * packet buffer. Therefore, at load time, all checks on pointers 1093 | * previously done by the verifier are invalidated and must be 1094 | * performed again, if the helper is used in combination with 1095 | * direct packet access. 1096 | * 1097 | * Returns 1098 | * 0 on success, or a negative error in case of failure. 1099 | */ 1100 | static int (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44; 1101 | 1102 | /* 1103 | * bpf_probe_read_str 1104 | * 1105 | * Copy a NUL terminated string from an unsafe kernel address 1106 | * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for 1107 | * more details. 1108 | * 1109 | * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() 1110 | * instead. 1111 | * 1112 | * Returns 1113 | * On success, the strictly positive length of the string, 1114 | * including the trailing NUL character. On error, a negative 1115 | * value. 1116 | */ 1117 | static int (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45; 1118 | 1119 | /* 1120 | * bpf_get_socket_cookie 1121 | * 1122 | * If the **struct sk_buff** pointed by *skb* has a known socket, 1123 | * retrieve the cookie (generated by the kernel) of this socket. 1124 | * If no cookie has been set yet, generate a new cookie. Once 1125 | * generated, the socket cookie remains stable for the life of the 1126 | * socket. This helper can be useful for monitoring per socket 1127 | * networking traffic statistics as it provides a global socket 1128 | * identifier that can be assumed unique. 1129 | * 1130 | * Returns 1131 | * A 8-byte long non-decreasing number on success, or 0 if the 1132 | * socket field is missing inside *skb*. 1133 | */ 1134 | static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46; 1135 | 1136 | /* 1137 | * bpf_get_socket_uid 1138 | * 1139 | * 1140 | * Returns 1141 | * The owner UID of the socket associated to *skb*. If the socket 1142 | * is **NULL**, or if it is not a full socket (i.e. if it is a 1143 | * time-wait or a request socket instead), **overflowuid** value 1144 | * is returned (note that **overflowuid** might also be the actual 1145 | * UID value for the socket). 1146 | */ 1147 | static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47; 1148 | 1149 | /* 1150 | * bpf_set_hash 1151 | * 1152 | * Set the full hash for *skb* (set the field *skb*\ **->hash**) 1153 | * to value *hash*. 1154 | * 1155 | * Returns 1156 | * 0 1157 | */ 1158 | static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48; 1159 | 1160 | /* 1161 | * bpf_setsockopt 1162 | * 1163 | * Emulate a call to **setsockopt()** on the socket associated to 1164 | * *bpf_socket*, which must be a full socket. The *level* at 1165 | * which the option resides and the name *optname* of the option 1166 | * must be specified, see **setsockopt(2)** for more information. 1167 | * The option value of length *optlen* is pointed by *optval*. 1168 | * 1169 | * This helper actually implements a subset of **setsockopt()**. 1170 | * It supports the following *level*\ s: 1171 | * 1172 | * * **SOL_SOCKET**, which supports the following *optname*\ s: 1173 | * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, 1174 | * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. 1175 | * * **IPPROTO_TCP**, which supports the following *optname*\ s: 1176 | * **TCP_CONGESTION**, **TCP_BPF_IW**, 1177 | * **TCP_BPF_SNDCWND_CLAMP**. 1178 | * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. 1179 | * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. 1180 | * 1181 | * Returns 1182 | * 0 on success, or a negative error in case of failure. 1183 | */ 1184 | static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49; 1185 | 1186 | /* 1187 | * bpf_skb_adjust_room 1188 | * 1189 | * Grow or shrink the room for data in the packet associated to 1190 | * *skb* by *len_diff*, and according to the selected *mode*. 1191 | * 1192 | * There are two supported modes at this time: 1193 | * 1194 | * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer 1195 | * (room space is added or removed below the layer 2 header). 1196 | * 1197 | * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer 1198 | * (room space is added or removed below the layer 3 header). 1199 | * 1200 | * The following flags are supported at this time: 1201 | * 1202 | * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. 1203 | * Adjusting mss in this way is not allowed for datagrams. 1204 | * 1205 | * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**, 1206 | * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**: 1207 | * Any new space is reserved to hold a tunnel header. 1208 | * Configure skb offsets and other fields accordingly. 1209 | * 1210 | * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**, 1211 | * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**: 1212 | * Use with ENCAP_L3 flags to further specify the tunnel type. 1213 | * 1214 | * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*): 1215 | * Use with ENCAP_L3/L4 flags to further specify the tunnel 1216 | * type; *len* is the length of the inner MAC header. 1217 | * 1218 | * A call to this helper is susceptible to change the underlying 1219 | * packet buffer. Therefore, at load time, all checks on pointers 1220 | * previously done by the verifier are invalidated and must be 1221 | * performed again, if the helper is used in combination with 1222 | * direct packet access. 1223 | * 1224 | * Returns 1225 | * 0 on success, or a negative error in case of failure. 1226 | */ 1227 | static int (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50; 1228 | 1229 | /* 1230 | * bpf_redirect_map 1231 | * 1232 | * Redirect the packet to the endpoint referenced by *map* at 1233 | * index *key*. Depending on its type, this *map* can contain 1234 | * references to net devices (for forwarding packets through other 1235 | * ports), or to CPUs (for redirecting XDP frames to another CPU; 1236 | * but this is only implemented for native XDP (with driver 1237 | * support) as of this writing). 1238 | * 1239 | * The lower two bits of *flags* are used as the return code if 1240 | * the map lookup fails. This is so that the return value can be 1241 | * one of the XDP program return codes up to XDP_TX, as chosen by 1242 | * the caller. Any higher bits in the *flags* argument must be 1243 | * unset. 1244 | * 1245 | * When used to redirect packets to net devices, this helper 1246 | * provides a high performance increase over **bpf_redirect**\ (). 1247 | * This is due to various implementation details of the underlying 1248 | * mechanisms, one of which is the fact that **bpf_redirect_map**\ 1249 | * () tries to send packet as a "bulk" to the device. 1250 | * 1251 | * Returns 1252 | * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. 1253 | */ 1254 | static int (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51; 1255 | 1256 | /* 1257 | * bpf_sk_redirect_map 1258 | * 1259 | * Redirect the packet to the socket referenced by *map* (of type 1260 | * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and 1261 | * egress interfaces can be used for redirection. The 1262 | * **BPF_F_INGRESS** value in *flags* is used to make the 1263 | * distinction (ingress path is selected if the flag is present, 1264 | * egress path otherwise). This is the only flag supported for now. 1265 | * 1266 | * Returns 1267 | * **SK_PASS** on success, or **SK_DROP** on error. 1268 | */ 1269 | static int (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52; 1270 | 1271 | /* 1272 | * bpf_sock_map_update 1273 | * 1274 | * Add an entry to, or update a *map* referencing sockets. The 1275 | * *skops* is used as a new value for the entry associated to 1276 | * *key*. *flags* is one of: 1277 | * 1278 | * **BPF_NOEXIST** 1279 | * The entry for *key* must not exist in the map. 1280 | * **BPF_EXIST** 1281 | * The entry for *key* must already exist in the map. 1282 | * **BPF_ANY** 1283 | * No condition on the existence of the entry for *key*. 1284 | * 1285 | * If the *map* has eBPF programs (parser and verdict), those will 1286 | * be inherited by the socket being added. If the socket is 1287 | * already attached to eBPF programs, this results in an error. 1288 | * 1289 | * Returns 1290 | * 0 on success, or a negative error in case of failure. 1291 | */ 1292 | static int (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53; 1293 | 1294 | /* 1295 | * bpf_xdp_adjust_meta 1296 | * 1297 | * Adjust the address pointed by *xdp_md*\ **->data_meta** by 1298 | * *delta* (which can be positive or negative). Note that this 1299 | * operation modifies the address stored in *xdp_md*\ **->data**, 1300 | * so the latter must be loaded only after the helper has been 1301 | * called. 1302 | * 1303 | * The use of *xdp_md*\ **->data_meta** is optional and programs 1304 | * are not required to use it. The rationale is that when the 1305 | * packet is processed with XDP (e.g. as DoS filter), it is 1306 | * possible to push further meta data along with it before passing 1307 | * to the stack, and to give the guarantee that an ingress eBPF 1308 | * program attached as a TC classifier on the same device can pick 1309 | * this up for further post-processing. Since TC works with socket 1310 | * buffers, it remains possible to set from XDP the **mark** or 1311 | * **priority** pointers, or other pointers for the socket buffer. 1312 | * Having this scratch space generic and programmable allows for 1313 | * more flexibility as the user is free to store whatever meta 1314 | * data they need. 1315 | * 1316 | * A call to this helper is susceptible to change the underlying 1317 | * packet buffer. Therefore, at load time, all checks on pointers 1318 | * previously done by the verifier are invalidated and must be 1319 | * performed again, if the helper is used in combination with 1320 | * direct packet access. 1321 | * 1322 | * Returns 1323 | * 0 on success, or a negative error in case of failure. 1324 | */ 1325 | static int (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54; 1326 | 1327 | /* 1328 | * bpf_perf_event_read_value 1329 | * 1330 | * Read the value of a perf event counter, and store it into *buf* 1331 | * of size *buf_size*. This helper relies on a *map* of type 1332 | * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event 1333 | * counter is selected when *map* is updated with perf event file 1334 | * descriptors. The *map* is an array whose size is the number of 1335 | * available CPUs, and each cell contains a value relative to one 1336 | * CPU. The value to retrieve is indicated by *flags*, that 1337 | * contains the index of the CPU to look up, masked with 1338 | * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to 1339 | * **BPF_F_CURRENT_CPU** to indicate that the value for the 1340 | * current CPU should be retrieved. 1341 | * 1342 | * This helper behaves in a way close to 1343 | * **bpf_perf_event_read**\ () helper, save that instead of 1344 | * just returning the value observed, it fills the *buf* 1345 | * structure. This allows for additional data to be retrieved: in 1346 | * particular, the enabled and running times (in *buf*\ 1347 | * **->enabled** and *buf*\ **->running**, respectively) are 1348 | * copied. In general, **bpf_perf_event_read_value**\ () is 1349 | * recommended over **bpf_perf_event_read**\ (), which has some 1350 | * ABI issues and provides fewer functionalities. 1351 | * 1352 | * These values are interesting, because hardware PMU (Performance 1353 | * Monitoring Unit) counters are limited resources. When there are 1354 | * more PMU based perf events opened than available counters, 1355 | * kernel will multiplex these events so each event gets certain 1356 | * percentage (but not all) of the PMU time. In case that 1357 | * multiplexing happens, the number of samples or counter value 1358 | * will not reflect the case compared to when no multiplexing 1359 | * occurs. This makes comparison between different runs difficult. 1360 | * Typically, the counter value should be normalized before 1361 | * comparing to other experiments. The usual normalization is done 1362 | * as follows. 1363 | * 1364 | * :: 1365 | * 1366 | * normalized_counter = counter * t_enabled / t_running 1367 | * 1368 | * Where t_enabled is the time enabled for event and t_running is 1369 | * the time running for event since last normalization. The 1370 | * enabled and running times are accumulated since the perf event 1371 | * open. To achieve scaling factor between two invocations of an 1372 | * eBPF program, users can can use CPU id as the key (which is 1373 | * typical for perf array usage model) to remember the previous 1374 | * value and do the calculation inside the eBPF program. 1375 | * 1376 | * Returns 1377 | * 0 on success, or a negative error in case of failure. 1378 | */ 1379 | static int (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55; 1380 | 1381 | /* 1382 | * bpf_perf_prog_read_value 1383 | * 1384 | * For en eBPF program attached to a perf event, retrieve the 1385 | * value of the event counter associated to *ctx* and store it in 1386 | * the structure pointed by *buf* and of size *buf_size*. Enabled 1387 | * and running times are also stored in the structure (see 1388 | * description of helper **bpf_perf_event_read_value**\ () for 1389 | * more details). 1390 | * 1391 | * Returns 1392 | * 0 on success, or a negative error in case of failure. 1393 | */ 1394 | static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56; 1395 | 1396 | /* 1397 | * bpf_getsockopt 1398 | * 1399 | * Emulate a call to **getsockopt()** on the socket associated to 1400 | * *bpf_socket*, which must be a full socket. The *level* at 1401 | * which the option resides and the name *optname* of the option 1402 | * must be specified, see **getsockopt(2)** for more information. 1403 | * The retrieved value is stored in the structure pointed by 1404 | * *opval* and of length *optlen*. 1405 | * 1406 | * This helper actually implements a subset of **getsockopt()**. 1407 | * It supports the following *level*\ s: 1408 | * 1409 | * * **IPPROTO_TCP**, which supports *optname* 1410 | * **TCP_CONGESTION**. 1411 | * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. 1412 | * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. 1413 | * 1414 | * Returns 1415 | * 0 on success, or a negative error in case of failure. 1416 | */ 1417 | static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57; 1418 | 1419 | /* 1420 | * bpf_override_return 1421 | * 1422 | * Used for error injection, this helper uses kprobes to override 1423 | * the return value of the probed function, and to set it to *rc*. 1424 | * The first argument is the context *regs* on which the kprobe 1425 | * works. 1426 | * 1427 | * This helper works by setting setting the PC (program counter) 1428 | * to an override function which is run in place of the original 1429 | * probed function. This means the probed function is not run at 1430 | * all. The replacement function just returns with the required 1431 | * value. 1432 | * 1433 | * This helper has security implications, and thus is subject to 1434 | * restrictions. It is only available if the kernel was compiled 1435 | * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration 1436 | * option, and in this case it only works on functions tagged with 1437 | * **ALLOW_ERROR_INJECTION** in the kernel code. 1438 | * 1439 | * Also, the helper is only available for the architectures having 1440 | * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, 1441 | * x86 architecture is the only one to support this feature. 1442 | * 1443 | * Returns 1444 | * 0 1445 | */ 1446 | static int (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58; 1447 | 1448 | /* 1449 | * bpf_sock_ops_cb_flags_set 1450 | * 1451 | * Attempt to set the value of the **bpf_sock_ops_cb_flags** field 1452 | * for the full TCP socket associated to *bpf_sock_ops* to 1453 | * *argval*. 1454 | * 1455 | * The primary use of this field is to determine if there should 1456 | * be calls to eBPF programs of type 1457 | * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP 1458 | * code. A program of the same type can change its value, per 1459 | * connection and as necessary, when the connection is 1460 | * established. This field is directly accessible for reading, but 1461 | * this helper must be used for updates in order to return an 1462 | * error if an eBPF program tries to set a callback that is not 1463 | * supported in the current kernel. 1464 | * 1465 | * *argval* is a flag array which can combine these flags: 1466 | * 1467 | * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) 1468 | * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) 1469 | * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) 1470 | * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) 1471 | * 1472 | * Therefore, this function can be used to clear a callback flag by 1473 | * setting the appropriate bit to zero. e.g. to disable the RTO 1474 | * callback: 1475 | * 1476 | * **bpf_sock_ops_cb_flags_set(bpf_sock,** 1477 | * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** 1478 | * 1479 | * Here are some examples of where one could call such eBPF 1480 | * program: 1481 | * 1482 | * * When RTO fires. 1483 | * * When a packet is retransmitted. 1484 | * * When the connection terminates. 1485 | * * When a packet is sent. 1486 | * * When a packet is received. 1487 | * 1488 | * Returns 1489 | * Code **-EINVAL** if the socket is not a full TCP socket; 1490 | * otherwise, a positive number containing the bits that could not 1491 | * be set is returned (which comes down to 0 if all bits were set 1492 | * as required). 1493 | */ 1494 | static int (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59; 1495 | 1496 | /* 1497 | * bpf_msg_redirect_map 1498 | * 1499 | * This helper is used in programs implementing policies at the 1500 | * socket level. If the message *msg* is allowed to pass (i.e. if 1501 | * the verdict eBPF program returns **SK_PASS**), redirect it to 1502 | * the socket referenced by *map* (of type 1503 | * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and 1504 | * egress interfaces can be used for redirection. The 1505 | * **BPF_F_INGRESS** value in *flags* is used to make the 1506 | * distinction (ingress path is selected if the flag is present, 1507 | * egress path otherwise). This is the only flag supported for now. 1508 | * 1509 | * Returns 1510 | * **SK_PASS** on success, or **SK_DROP** on error. 1511 | */ 1512 | static int (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60; 1513 | 1514 | /* 1515 | * bpf_msg_apply_bytes 1516 | * 1517 | * For socket policies, apply the verdict of the eBPF program to 1518 | * the next *bytes* (number of bytes) of message *msg*. 1519 | * 1520 | * For example, this helper can be used in the following cases: 1521 | * 1522 | * * A single **sendmsg**\ () or **sendfile**\ () system call 1523 | * contains multiple logical messages that the eBPF program is 1524 | * supposed to read and for which it should apply a verdict. 1525 | * * An eBPF program only cares to read the first *bytes* of a 1526 | * *msg*. If the message has a large payload, then setting up 1527 | * and calling the eBPF program repeatedly for all bytes, even 1528 | * though the verdict is already known, would create unnecessary 1529 | * overhead. 1530 | * 1531 | * When called from within an eBPF program, the helper sets a 1532 | * counter internal to the BPF infrastructure, that is used to 1533 | * apply the last verdict to the next *bytes*. If *bytes* is 1534 | * smaller than the current data being processed from a 1535 | * **sendmsg**\ () or **sendfile**\ () system call, the first 1536 | * *bytes* will be sent and the eBPF program will be re-run with 1537 | * the pointer for start of data pointing to byte number *bytes* 1538 | * **+ 1**. If *bytes* is larger than the current data being 1539 | * processed, then the eBPF verdict will be applied to multiple 1540 | * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are 1541 | * consumed. 1542 | * 1543 | * Note that if a socket closes with the internal counter holding 1544 | * a non-zero value, this is not a problem because data is not 1545 | * being buffered for *bytes* and is sent as it is received. 1546 | * 1547 | * Returns 1548 | * 0 1549 | */ 1550 | static int (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61; 1551 | 1552 | /* 1553 | * bpf_msg_cork_bytes 1554 | * 1555 | * For socket policies, prevent the execution of the verdict eBPF 1556 | * program for message *msg* until *bytes* (byte number) have been 1557 | * accumulated. 1558 | * 1559 | * This can be used when one needs a specific number of bytes 1560 | * before a verdict can be assigned, even if the data spans 1561 | * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme 1562 | * case would be a user calling **sendmsg**\ () repeatedly with 1563 | * 1-byte long message segments. Obviously, this is bad for 1564 | * performance, but it is still valid. If the eBPF program needs 1565 | * *bytes* bytes to validate a header, this helper can be used to 1566 | * prevent the eBPF program to be called again until *bytes* have 1567 | * been accumulated. 1568 | * 1569 | * Returns 1570 | * 0 1571 | */ 1572 | static int (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62; 1573 | 1574 | /* 1575 | * bpf_msg_pull_data 1576 | * 1577 | * For socket policies, pull in non-linear data from user space 1578 | * for *msg* and set pointers *msg*\ **->data** and *msg*\ 1579 | * **->data_end** to *start* and *end* bytes offsets into *msg*, 1580 | * respectively. 1581 | * 1582 | * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a 1583 | * *msg* it can only parse data that the (**data**, **data_end**) 1584 | * pointers have already consumed. For **sendmsg**\ () hooks this 1585 | * is likely the first scatterlist element. But for calls relying 1586 | * on the **sendpage** handler (e.g. **sendfile**\ ()) this will 1587 | * be the range (**0**, **0**) because the data is shared with 1588 | * user space and by default the objective is to avoid allowing 1589 | * user space to modify data while (or after) eBPF verdict is 1590 | * being decided. This helper can be used to pull in data and to 1591 | * set the start and end pointer to given values. Data will be 1592 | * copied if necessary (i.e. if data was not linear and if start 1593 | * and end pointers do not point to the same chunk). 1594 | * 1595 | * A call to this helper is susceptible to change the underlying 1596 | * packet buffer. Therefore, at load time, all checks on pointers 1597 | * previously done by the verifier are invalidated and must be 1598 | * performed again, if the helper is used in combination with 1599 | * direct packet access. 1600 | * 1601 | * All values for *flags* are reserved for future usage, and must 1602 | * be left at zero. 1603 | * 1604 | * Returns 1605 | * 0 on success, or a negative error in case of failure. 1606 | */ 1607 | static int (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63; 1608 | 1609 | /* 1610 | * bpf_bind 1611 | * 1612 | * Bind the socket associated to *ctx* to the address pointed by 1613 | * *addr*, of length *addr_len*. This allows for making outgoing 1614 | * connection from the desired IP address, which can be useful for 1615 | * example when all processes inside a cgroup should use one 1616 | * single IP address on a host that has multiple IP configured. 1617 | * 1618 | * This helper works for IPv4 and IPv6, TCP and UDP sockets. The 1619 | * domain (*addr*\ **->sa_family**) must be **AF_INET** (or 1620 | * **AF_INET6**). Looking for a free port to bind to can be 1621 | * expensive, therefore binding to port is not permitted by the 1622 | * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively) 1623 | * must be set to zero. 1624 | * 1625 | * Returns 1626 | * 0 on success, or a negative error in case of failure. 1627 | */ 1628 | static int (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64; 1629 | 1630 | /* 1631 | * bpf_xdp_adjust_tail 1632 | * 1633 | * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is 1634 | * only possible to shrink the packet as of this writing, 1635 | * therefore *delta* must be a negative integer. 1636 | * 1637 | * A call to this helper is susceptible to change the underlying 1638 | * packet buffer. Therefore, at load time, all checks on pointers 1639 | * previously done by the verifier are invalidated and must be 1640 | * performed again, if the helper is used in combination with 1641 | * direct packet access. 1642 | * 1643 | * Returns 1644 | * 0 on success, or a negative error in case of failure. 1645 | */ 1646 | static int (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65; 1647 | 1648 | /* 1649 | * bpf_skb_get_xfrm_state 1650 | * 1651 | * Retrieve the XFRM state (IP transform framework, see also 1652 | * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. 1653 | * 1654 | * The retrieved value is stored in the **struct bpf_xfrm_state** 1655 | * pointed by *xfrm_state* and of length *size*. 1656 | * 1657 | * All values for *flags* are reserved for future usage, and must 1658 | * be left at zero. 1659 | * 1660 | * This helper is available only if the kernel was compiled with 1661 | * **CONFIG_XFRM** configuration option. 1662 | * 1663 | * Returns 1664 | * 0 on success, or a negative error in case of failure. 1665 | */ 1666 | static int (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66; 1667 | 1668 | /* 1669 | * bpf_get_stack 1670 | * 1671 | * Return a user or a kernel stack in bpf program provided buffer. 1672 | * To achieve this, the helper needs *ctx*, which is a pointer 1673 | * to the context on which the tracing program is executed. 1674 | * To store the stacktrace, the bpf program provides *buf* with 1675 | * a nonnegative *size*. 1676 | * 1677 | * The last argument, *flags*, holds the number of stack frames to 1678 | * skip (from 0 to 255), masked with 1679 | * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set 1680 | * the following flags: 1681 | * 1682 | * **BPF_F_USER_STACK** 1683 | * Collect a user space stack instead of a kernel stack. 1684 | * **BPF_F_USER_BUILD_ID** 1685 | * Collect buildid+offset instead of ips for user stack, 1686 | * only valid if **BPF_F_USER_STACK** is also specified. 1687 | * 1688 | * **bpf_get_stack**\ () can collect up to 1689 | * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject 1690 | * to sufficient large buffer size. Note that 1691 | * this limit can be controlled with the **sysctl** program, and 1692 | * that it should be manually increased in order to profile long 1693 | * user stacks (such as stacks for Java programs). To do so, use: 1694 | * 1695 | * :: 1696 | * 1697 | * # sysctl kernel.perf_event_max_stack= 1698 | * 1699 | * Returns 1700 | * A non-negative value equal to or less than *size* on success, 1701 | * or a negative error in case of failure. 1702 | */ 1703 | static int (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67; 1704 | 1705 | /* 1706 | * bpf_skb_load_bytes_relative 1707 | * 1708 | * This helper is similar to **bpf_skb_load_bytes**\ () in that 1709 | * it provides an easy way to load *len* bytes from *offset* 1710 | * from the packet associated to *skb*, into the buffer pointed 1711 | * by *to*. The difference to **bpf_skb_load_bytes**\ () is that 1712 | * a fifth argument *start_header* exists in order to select a 1713 | * base offset to start from. *start_header* can be one of: 1714 | * 1715 | * **BPF_HDR_START_MAC** 1716 | * Base offset to load data from is *skb*'s mac header. 1717 | * **BPF_HDR_START_NET** 1718 | * Base offset to load data from is *skb*'s network header. 1719 | * 1720 | * In general, "direct packet access" is the preferred method to 1721 | * access packet data, however, this helper is in particular useful 1722 | * in socket filters where *skb*\ **->data** does not always point 1723 | * to the start of the mac header and where "direct packet access" 1724 | * is not available. 1725 | * 1726 | * Returns 1727 | * 0 on success, or a negative error in case of failure. 1728 | */ 1729 | static int (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68; 1730 | 1731 | /* 1732 | * bpf_fib_lookup 1733 | * 1734 | * Do FIB lookup in kernel tables using parameters in *params*. 1735 | * If lookup is successful and result shows packet is to be 1736 | * forwarded, the neighbor tables are searched for the nexthop. 1737 | * If successful (ie., FIB lookup shows forwarding and nexthop 1738 | * is resolved), the nexthop address is returned in ipv4_dst 1739 | * or ipv6_dst based on family, smac is set to mac address of 1740 | * egress device, dmac is set to nexthop mac address, rt_metric 1741 | * is set to metric from route (IPv4/IPv6 only), and ifindex 1742 | * is set to the device index of the nexthop from the FIB lookup. 1743 | * 1744 | * *plen* argument is the size of the passed in struct. 1745 | * *flags* argument can be a combination of one or more of the 1746 | * following values: 1747 | * 1748 | * **BPF_FIB_LOOKUP_DIRECT** 1749 | * Do a direct table lookup vs full lookup using FIB 1750 | * rules. 1751 | * **BPF_FIB_LOOKUP_OUTPUT** 1752 | * Perform lookup from an egress perspective (default is 1753 | * ingress). 1754 | * 1755 | * *ctx* is either **struct xdp_md** for XDP programs or 1756 | * **struct sk_buff** tc cls_act programs. 1757 | * 1758 | * Returns 1759 | * * < 0 if any input argument is invalid 1760 | * * 0 on success (packet is forwarded, nexthop neighbor exists) 1761 | * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the 1762 | * packet is not forwarded or needs assist from full stack 1763 | */ 1764 | static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69; 1765 | 1766 | /* 1767 | * bpf_sock_hash_update 1768 | * 1769 | * Add an entry to, or update a sockhash *map* referencing sockets. 1770 | * The *skops* is used as a new value for the entry associated to 1771 | * *key*. *flags* is one of: 1772 | * 1773 | * **BPF_NOEXIST** 1774 | * The entry for *key* must not exist in the map. 1775 | * **BPF_EXIST** 1776 | * The entry for *key* must already exist in the map. 1777 | * **BPF_ANY** 1778 | * No condition on the existence of the entry for *key*. 1779 | * 1780 | * If the *map* has eBPF programs (parser and verdict), those will 1781 | * be inherited by the socket being added. If the socket is 1782 | * already attached to eBPF programs, this results in an error. 1783 | * 1784 | * Returns 1785 | * 0 on success, or a negative error in case of failure. 1786 | */ 1787 | static int (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70; 1788 | 1789 | /* 1790 | * bpf_msg_redirect_hash 1791 | * 1792 | * This helper is used in programs implementing policies at the 1793 | * socket level. If the message *msg* is allowed to pass (i.e. if 1794 | * the verdict eBPF program returns **SK_PASS**), redirect it to 1795 | * the socket referenced by *map* (of type 1796 | * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and 1797 | * egress interfaces can be used for redirection. The 1798 | * **BPF_F_INGRESS** value in *flags* is used to make the 1799 | * distinction (ingress path is selected if the flag is present, 1800 | * egress path otherwise). This is the only flag supported for now. 1801 | * 1802 | * Returns 1803 | * **SK_PASS** on success, or **SK_DROP** on error. 1804 | */ 1805 | static int (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71; 1806 | 1807 | /* 1808 | * bpf_sk_redirect_hash 1809 | * 1810 | * This helper is used in programs implementing policies at the 1811 | * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. 1812 | * if the verdeict eBPF program returns **SK_PASS**), redirect it 1813 | * to the socket referenced by *map* (of type 1814 | * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and 1815 | * egress interfaces can be used for redirection. The 1816 | * **BPF_F_INGRESS** value in *flags* is used to make the 1817 | * distinction (ingress path is selected if the flag is present, 1818 | * egress otherwise). This is the only flag supported for now. 1819 | * 1820 | * Returns 1821 | * **SK_PASS** on success, or **SK_DROP** on error. 1822 | */ 1823 | static int (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72; 1824 | 1825 | /* 1826 | * bpf_lwt_push_encap 1827 | * 1828 | * Encapsulate the packet associated to *skb* within a Layer 3 1829 | * protocol header. This header is provided in the buffer at 1830 | * address *hdr*, with *len* its size in bytes. *type* indicates 1831 | * the protocol of the header and can be one of: 1832 | * 1833 | * **BPF_LWT_ENCAP_SEG6** 1834 | * IPv6 encapsulation with Segment Routing Header 1835 | * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, 1836 | * the IPv6 header is computed by the kernel. 1837 | * **BPF_LWT_ENCAP_SEG6_INLINE** 1838 | * Only works if *skb* contains an IPv6 packet. Insert a 1839 | * Segment Routing Header (**struct ipv6_sr_hdr**) inside 1840 | * the IPv6 header. 1841 | * **BPF_LWT_ENCAP_IP** 1842 | * IP encapsulation (GRE/GUE/IPIP/etc). The outer header 1843 | * must be IPv4 or IPv6, followed by zero or more 1844 | * additional headers, up to **LWT_BPF_MAX_HEADROOM** 1845 | * total bytes in all prepended headers. Please note that 1846 | * if **skb_is_gso**\ (*skb*) is true, no more than two 1847 | * headers can be prepended, and the inner header, if 1848 | * present, should be either GRE or UDP/GUE. 1849 | * 1850 | * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs 1851 | * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can 1852 | * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and 1853 | * **BPF_PROG_TYPE_LWT_XMIT**. 1854 | * 1855 | * A call to this helper is susceptible to change the underlying 1856 | * packet buffer. Therefore, at load time, all checks on pointers 1857 | * previously done by the verifier are invalidated and must be 1858 | * performed again, if the helper is used in combination with 1859 | * direct packet access. 1860 | * 1861 | * Returns 1862 | * 0 on success, or a negative error in case of failure. 1863 | */ 1864 | static int (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73; 1865 | 1866 | /* 1867 | * bpf_lwt_seg6_store_bytes 1868 | * 1869 | * Store *len* bytes from address *from* into the packet 1870 | * associated to *skb*, at *offset*. Only the flags, tag and TLVs 1871 | * inside the outermost IPv6 Segment Routing Header can be 1872 | * modified through this helper. 1873 | * 1874 | * A call to this helper is susceptible to change the underlying 1875 | * packet buffer. Therefore, at load time, all checks on pointers 1876 | * previously done by the verifier are invalidated and must be 1877 | * performed again, if the helper is used in combination with 1878 | * direct packet access. 1879 | * 1880 | * Returns 1881 | * 0 on success, or a negative error in case of failure. 1882 | */ 1883 | static int (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74; 1884 | 1885 | /* 1886 | * bpf_lwt_seg6_adjust_srh 1887 | * 1888 | * Adjust the size allocated to TLVs in the outermost IPv6 1889 | * Segment Routing Header contained in the packet associated to 1890 | * *skb*, at position *offset* by *delta* bytes. Only offsets 1891 | * after the segments are accepted. *delta* can be as well 1892 | * positive (growing) as negative (shrinking). 1893 | * 1894 | * A call to this helper is susceptible to change the underlying 1895 | * packet buffer. Therefore, at load time, all checks on pointers 1896 | * previously done by the verifier are invalidated and must be 1897 | * performed again, if the helper is used in combination with 1898 | * direct packet access. 1899 | * 1900 | * Returns 1901 | * 0 on success, or a negative error in case of failure. 1902 | */ 1903 | static int (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75; 1904 | 1905 | /* 1906 | * bpf_lwt_seg6_action 1907 | * 1908 | * Apply an IPv6 Segment Routing action of type *action* to the 1909 | * packet associated to *skb*. Each action takes a parameter 1910 | * contained at address *param*, and of length *param_len* bytes. 1911 | * *action* can be one of: 1912 | * 1913 | * **SEG6_LOCAL_ACTION_END_X** 1914 | * End.X action: Endpoint with Layer-3 cross-connect. 1915 | * Type of *param*: **struct in6_addr**. 1916 | * **SEG6_LOCAL_ACTION_END_T** 1917 | * End.T action: Endpoint with specific IPv6 table lookup. 1918 | * Type of *param*: **int**. 1919 | * **SEG6_LOCAL_ACTION_END_B6** 1920 | * End.B6 action: Endpoint bound to an SRv6 policy. 1921 | * Type of *param*: **struct ipv6_sr_hdr**. 1922 | * **SEG6_LOCAL_ACTION_END_B6_ENCAP** 1923 | * End.B6.Encap action: Endpoint bound to an SRv6 1924 | * encapsulation policy. 1925 | * Type of *param*: **struct ipv6_sr_hdr**. 1926 | * 1927 | * A call to this helper is susceptible to change the underlying 1928 | * packet buffer. Therefore, at load time, all checks on pointers 1929 | * previously done by the verifier are invalidated and must be 1930 | * performed again, if the helper is used in combination with 1931 | * direct packet access. 1932 | * 1933 | * Returns 1934 | * 0 on success, or a negative error in case of failure. 1935 | */ 1936 | static int (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76; 1937 | 1938 | /* 1939 | * bpf_rc_repeat 1940 | * 1941 | * This helper is used in programs implementing IR decoding, to 1942 | * report a successfully decoded repeat key message. This delays 1943 | * the generation of a key up event for previously generated 1944 | * key down event. 1945 | * 1946 | * Some IR protocols like NEC have a special IR message for 1947 | * repeating last button, for when a button is held down. 1948 | * 1949 | * The *ctx* should point to the lirc sample as passed into 1950 | * the program. 1951 | * 1952 | * This helper is only available is the kernel was compiled with 1953 | * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 1954 | * "**y**". 1955 | * 1956 | * Returns 1957 | * 0 1958 | */ 1959 | static int (*bpf_rc_repeat)(void *ctx) = (void *) 77; 1960 | 1961 | /* 1962 | * bpf_rc_keydown 1963 | * 1964 | * This helper is used in programs implementing IR decoding, to 1965 | * report a successfully decoded key press with *scancode*, 1966 | * *toggle* value in the given *protocol*. The scancode will be 1967 | * translated to a keycode using the rc keymap, and reported as 1968 | * an input key down event. After a period a key up event is 1969 | * generated. This period can be extended by calling either 1970 | * **bpf_rc_keydown**\ () again with the same values, or calling 1971 | * **bpf_rc_repeat**\ (). 1972 | * 1973 | * Some protocols include a toggle bit, in case the button was 1974 | * released and pressed again between consecutive scancodes. 1975 | * 1976 | * The *ctx* should point to the lirc sample as passed into 1977 | * the program. 1978 | * 1979 | * The *protocol* is the decoded protocol number (see 1980 | * **enum rc_proto** for some predefined values). 1981 | * 1982 | * This helper is only available is the kernel was compiled with 1983 | * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 1984 | * "**y**". 1985 | * 1986 | * Returns 1987 | * 0 1988 | */ 1989 | static int (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78; 1990 | 1991 | /* 1992 | * bpf_skb_cgroup_id 1993 | * 1994 | * Return the cgroup v2 id of the socket associated with the *skb*. 1995 | * This is roughly similar to the **bpf_get_cgroup_classid**\ () 1996 | * helper for cgroup v1 by providing a tag resp. identifier that 1997 | * can be matched on or used for map lookups e.g. to implement 1998 | * policy. The cgroup v2 id of a given path in the hierarchy is 1999 | * exposed in user space through the f_handle API in order to get 2000 | * to the same 64-bit id. 2001 | * 2002 | * This helper can be used on TC egress path, but not on ingress, 2003 | * and is available only if the kernel was compiled with the 2004 | * **CONFIG_SOCK_CGROUP_DATA** configuration option. 2005 | * 2006 | * Returns 2007 | * The id is returned or 0 in case the id could not be retrieved. 2008 | */ 2009 | static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79; 2010 | 2011 | /* 2012 | * bpf_get_current_cgroup_id 2013 | * 2014 | * 2015 | * Returns 2016 | * A 64-bit integer containing the current cgroup id based 2017 | * on the cgroup within which the current task is running. 2018 | */ 2019 | static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80; 2020 | 2021 | /* 2022 | * bpf_get_local_storage 2023 | * 2024 | * Get the pointer to the local storage area. 2025 | * The type and the size of the local storage is defined 2026 | * by the *map* argument. 2027 | * The *flags* meaning is specific for each map type, 2028 | * and has to be 0 for cgroup local storage. 2029 | * 2030 | * Depending on the BPF program type, a local storage area 2031 | * can be shared between multiple instances of the BPF program, 2032 | * running simultaneously. 2033 | * 2034 | * A user should care about the synchronization by himself. 2035 | * For example, by using the **BPF_STX_XADD** instruction to alter 2036 | * the shared data. 2037 | * 2038 | * Returns 2039 | * A pointer to the local storage area. 2040 | */ 2041 | static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81; 2042 | 2043 | /* 2044 | * bpf_sk_select_reuseport 2045 | * 2046 | * Select a **SO_REUSEPORT** socket from a 2047 | * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. 2048 | * It checks the selected socket is matching the incoming 2049 | * request in the socket buffer. 2050 | * 2051 | * Returns 2052 | * 0 on success, or a negative error in case of failure. 2053 | */ 2054 | static int (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82; 2055 | 2056 | /* 2057 | * bpf_skb_ancestor_cgroup_id 2058 | * 2059 | * Return id of cgroup v2 that is ancestor of cgroup associated 2060 | * with the *skb* at the *ancestor_level*. The root cgroup is at 2061 | * *ancestor_level* zero and each step down the hierarchy 2062 | * increments the level. If *ancestor_level* == level of cgroup 2063 | * associated with *skb*, then return value will be same as that 2064 | * of **bpf_skb_cgroup_id**\ (). 2065 | * 2066 | * The helper is useful to implement policies based on cgroups 2067 | * that are upper in hierarchy than immediate cgroup associated 2068 | * with *skb*. 2069 | * 2070 | * The format of returned id and helper limitations are same as in 2071 | * **bpf_skb_cgroup_id**\ (). 2072 | * 2073 | * Returns 2074 | * The id is returned or 0 in case the id could not be retrieved. 2075 | */ 2076 | static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83; 2077 | 2078 | /* 2079 | * bpf_sk_lookup_tcp 2080 | * 2081 | * Look for TCP socket matching *tuple*, optionally in a child 2082 | * network namespace *netns*. The return value must be checked, 2083 | * and if non-**NULL**, released via **bpf_sk_release**\ (). 2084 | * 2085 | * The *ctx* should point to the context of the program, such as 2086 | * the skb or socket (depending on the hook in use). This is used 2087 | * to determine the base network namespace for the lookup. 2088 | * 2089 | * *tuple_size* must be one of: 2090 | * 2091 | * **sizeof**\ (*tuple*\ **->ipv4**) 2092 | * Look for an IPv4 socket. 2093 | * **sizeof**\ (*tuple*\ **->ipv6**) 2094 | * Look for an IPv6 socket. 2095 | * 2096 | * If the *netns* is a negative signed 32-bit integer, then the 2097 | * socket lookup table in the netns associated with the *ctx* will 2098 | * will be used. For the TC hooks, this is the netns of the device 2099 | * in the skb. For socket hooks, this is the netns of the socket. 2100 | * If *netns* is any other signed 32-bit value greater than or 2101 | * equal to zero then it specifies the ID of the netns relative to 2102 | * the netns associated with the *ctx*. *netns* values beyond the 2103 | * range of 32-bit integers are reserved for future use. 2104 | * 2105 | * All values for *flags* are reserved for future usage, and must 2106 | * be left at zero. 2107 | * 2108 | * This helper is available only if the kernel was compiled with 2109 | * **CONFIG_NET** configuration option. 2110 | * 2111 | * Returns 2112 | * Pointer to **struct bpf_sock**, or **NULL** in case of failure. 2113 | * For sockets with reuseport option, the **struct bpf_sock** 2114 | * result is from *reuse*\ **->socks**\ [] using the hash of the 2115 | * tuple. 2116 | */ 2117 | static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84; 2118 | 2119 | /* 2120 | * bpf_sk_lookup_udp 2121 | * 2122 | * Look for UDP socket matching *tuple*, optionally in a child 2123 | * network namespace *netns*. The return value must be checked, 2124 | * and if non-**NULL**, released via **bpf_sk_release**\ (). 2125 | * 2126 | * The *ctx* should point to the context of the program, such as 2127 | * the skb or socket (depending on the hook in use). This is used 2128 | * to determine the base network namespace for the lookup. 2129 | * 2130 | * *tuple_size* must be one of: 2131 | * 2132 | * **sizeof**\ (*tuple*\ **->ipv4**) 2133 | * Look for an IPv4 socket. 2134 | * **sizeof**\ (*tuple*\ **->ipv6**) 2135 | * Look for an IPv6 socket. 2136 | * 2137 | * If the *netns* is a negative signed 32-bit integer, then the 2138 | * socket lookup table in the netns associated with the *ctx* will 2139 | * will be used. For the TC hooks, this is the netns of the device 2140 | * in the skb. For socket hooks, this is the netns of the socket. 2141 | * If *netns* is any other signed 32-bit value greater than or 2142 | * equal to zero then it specifies the ID of the netns relative to 2143 | * the netns associated with the *ctx*. *netns* values beyond the 2144 | * range of 32-bit integers are reserved for future use. 2145 | * 2146 | * All values for *flags* are reserved for future usage, and must 2147 | * be left at zero. 2148 | * 2149 | * This helper is available only if the kernel was compiled with 2150 | * **CONFIG_NET** configuration option. 2151 | * 2152 | * Returns 2153 | * Pointer to **struct bpf_sock**, or **NULL** in case of failure. 2154 | * For sockets with reuseport option, the **struct bpf_sock** 2155 | * result is from *reuse*\ **->socks**\ [] using the hash of the 2156 | * tuple. 2157 | */ 2158 | static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85; 2159 | 2160 | /* 2161 | * bpf_sk_release 2162 | * 2163 | * Release the reference held by *sock*. *sock* must be a 2164 | * non-**NULL** pointer that was returned from 2165 | * **bpf_sk_lookup_xxx**\ (). 2166 | * 2167 | * Returns 2168 | * 0 on success, or a negative error in case of failure. 2169 | */ 2170 | static int (*bpf_sk_release)(struct bpf_sock *sock) = (void *) 86; 2171 | 2172 | /* 2173 | * bpf_map_push_elem 2174 | * 2175 | * Push an element *value* in *map*. *flags* is one of: 2176 | * 2177 | * **BPF_EXIST** 2178 | * If the queue/stack is full, the oldest element is 2179 | * removed to make room for this. 2180 | * 2181 | * Returns 2182 | * 0 on success, or a negative error in case of failure. 2183 | */ 2184 | static int (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87; 2185 | 2186 | /* 2187 | * bpf_map_pop_elem 2188 | * 2189 | * Pop an element from *map*. 2190 | * 2191 | * Returns 2192 | * 0 on success, or a negative error in case of failure. 2193 | */ 2194 | static int (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88; 2195 | 2196 | /* 2197 | * bpf_map_peek_elem 2198 | * 2199 | * Get an element from *map* without removing it. 2200 | * 2201 | * Returns 2202 | * 0 on success, or a negative error in case of failure. 2203 | */ 2204 | static int (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89; 2205 | 2206 | /* 2207 | * bpf_msg_push_data 2208 | * 2209 | * For socket policies, insert *len* bytes into *msg* at offset 2210 | * *start*. 2211 | * 2212 | * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a 2213 | * *msg* it may want to insert metadata or options into the *msg*. 2214 | * This can later be read and used by any of the lower layer BPF 2215 | * hooks. 2216 | * 2217 | * This helper may fail if under memory pressure (a malloc 2218 | * fails) in these cases BPF programs will get an appropriate 2219 | * error and BPF programs will need to handle them. 2220 | * 2221 | * Returns 2222 | * 0 on success, or a negative error in case of failure. 2223 | */ 2224 | static int (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90; 2225 | 2226 | /* 2227 | * bpf_msg_pop_data 2228 | * 2229 | * Will remove *len* bytes from a *msg* starting at byte *start*. 2230 | * This may result in **ENOMEM** errors under certain situations if 2231 | * an allocation and copy are required due to a full ring buffer. 2232 | * However, the helper will try to avoid doing the allocation 2233 | * if possible. Other errors can occur if input parameters are 2234 | * invalid either due to *start* byte not being valid part of *msg* 2235 | * payload and/or *pop* value being to large. 2236 | * 2237 | * Returns 2238 | * 0 on success, or a negative error in case of failure. 2239 | */ 2240 | static int (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91; 2241 | 2242 | /* 2243 | * bpf_rc_pointer_rel 2244 | * 2245 | * This helper is used in programs implementing IR decoding, to 2246 | * report a successfully decoded pointer movement. 2247 | * 2248 | * The *ctx* should point to the lirc sample as passed into 2249 | * the program. 2250 | * 2251 | * This helper is only available is the kernel was compiled with 2252 | * the **CONFIG_BPF_LIRC_MODE2** configuration option set to 2253 | * "**y**". 2254 | * 2255 | * Returns 2256 | * 0 2257 | */ 2258 | static int (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92; 2259 | 2260 | /* 2261 | * bpf_spin_lock 2262 | * 2263 | * Acquire a spinlock represented by the pointer *lock*, which is 2264 | * stored as part of a value of a map. Taking the lock allows to 2265 | * safely update the rest of the fields in that value. The 2266 | * spinlock can (and must) later be released with a call to 2267 | * **bpf_spin_unlock**\ (\ *lock*\ ). 2268 | * 2269 | * Spinlocks in BPF programs come with a number of restrictions 2270 | * and constraints: 2271 | * 2272 | * * **bpf_spin_lock** objects are only allowed inside maps of 2273 | * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this 2274 | * list could be extended in the future). 2275 | * * BTF description of the map is mandatory. 2276 | * * The BPF program can take ONE lock at a time, since taking two 2277 | * or more could cause dead locks. 2278 | * * Only one **struct bpf_spin_lock** is allowed per map element. 2279 | * * When the lock is taken, calls (either BPF to BPF or helpers) 2280 | * are not allowed. 2281 | * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not 2282 | * allowed inside a spinlock-ed region. 2283 | * * The BPF program MUST call **bpf_spin_unlock**\ () to release 2284 | * the lock, on all execution paths, before it returns. 2285 | * * The BPF program can access **struct bpf_spin_lock** only via 2286 | * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () 2287 | * helpers. Loading or storing data into the **struct 2288 | * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. 2289 | * * To use the **bpf_spin_lock**\ () helper, the BTF description 2290 | * of the map value must be a struct and have **struct 2291 | * bpf_spin_lock** *anyname*\ **;** field at the top level. 2292 | * Nested lock inside another struct is not allowed. 2293 | * * The **struct bpf_spin_lock** *lock* field in a map value must 2294 | * be aligned on a multiple of 4 bytes in that value. 2295 | * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy 2296 | * the **bpf_spin_lock** field to user space. 2297 | * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from 2298 | * a BPF program, do not update the **bpf_spin_lock** field. 2299 | * * **bpf_spin_lock** cannot be on the stack or inside a 2300 | * networking packet (it can only be inside of a map values). 2301 | * * **bpf_spin_lock** is available to root only. 2302 | * * Tracing programs and socket filter programs cannot use 2303 | * **bpf_spin_lock**\ () due to insufficient preemption checks 2304 | * (but this may change in the future). 2305 | * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. 2306 | * 2307 | * Returns 2308 | * 0 2309 | */ 2310 | static int (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93; 2311 | 2312 | /* 2313 | * bpf_spin_unlock 2314 | * 2315 | * Release the *lock* previously locked by a call to 2316 | * **bpf_spin_lock**\ (\ *lock*\ ). 2317 | * 2318 | * Returns 2319 | * 0 2320 | */ 2321 | static int (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94; 2322 | 2323 | /* 2324 | * bpf_sk_fullsock 2325 | * 2326 | * This helper gets a **struct bpf_sock** pointer such 2327 | * that all the fields in this **bpf_sock** can be accessed. 2328 | * 2329 | * Returns 2330 | * A **struct bpf_sock** pointer on success, or **NULL** in 2331 | * case of failure. 2332 | */ 2333 | static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95; 2334 | 2335 | /* 2336 | * bpf_tcp_sock 2337 | * 2338 | * This helper gets a **struct bpf_tcp_sock** pointer from a 2339 | * **struct bpf_sock** pointer. 2340 | * 2341 | * Returns 2342 | * A **struct bpf_tcp_sock** pointer on success, or **NULL** in 2343 | * case of failure. 2344 | */ 2345 | static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96; 2346 | 2347 | /* 2348 | * bpf_skb_ecn_set_ce 2349 | * 2350 | * Set ECN (Explicit Congestion Notification) field of IP header 2351 | * to **CE** (Congestion Encountered) if current value is **ECT** 2352 | * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 2353 | * and IPv4. 2354 | * 2355 | * Returns 2356 | * 1 if the **CE** flag is set (either by the current helper call 2357 | * or because it was already present), 0 if it is not set. 2358 | */ 2359 | static int (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97; 2360 | 2361 | /* 2362 | * bpf_get_listener_sock 2363 | * 2364 | * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. 2365 | * **bpf_sk_release**\ () is unnecessary and not allowed. 2366 | * 2367 | * Returns 2368 | * A **struct bpf_sock** pointer on success, or **NULL** in 2369 | * case of failure. 2370 | */ 2371 | static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98; 2372 | 2373 | /* 2374 | * bpf_skc_lookup_tcp 2375 | * 2376 | * Look for TCP socket matching *tuple*, optionally in a child 2377 | * network namespace *netns*. The return value must be checked, 2378 | * and if non-**NULL**, released via **bpf_sk_release**\ (). 2379 | * 2380 | * This function is identical to **bpf_sk_lookup_tcp**\ (), except 2381 | * that it also returns timewait or request sockets. Use 2382 | * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the 2383 | * full structure. 2384 | * 2385 | * This helper is available only if the kernel was compiled with 2386 | * **CONFIG_NET** configuration option. 2387 | * 2388 | * Returns 2389 | * Pointer to **struct bpf_sock**, or **NULL** in case of failure. 2390 | * For sockets with reuseport option, the **struct bpf_sock** 2391 | * result is from *reuse*\ **->socks**\ [] using the hash of the 2392 | * tuple. 2393 | */ 2394 | static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99; 2395 | 2396 | /* 2397 | * bpf_tcp_check_syncookie 2398 | * 2399 | * Check whether *iph* and *th* contain a valid SYN cookie ACK for 2400 | * the listening socket in *sk*. 2401 | * 2402 | * *iph* points to the start of the IPv4 or IPv6 header, while 2403 | * *iph_len* contains **sizeof**\ (**struct iphdr**) or 2404 | * **sizeof**\ (**struct ip6hdr**). 2405 | * 2406 | * *th* points to the start of the TCP header, while *th_len* 2407 | * contains **sizeof**\ (**struct tcphdr**). 2408 | * 2409 | * 2410 | * Returns 2411 | * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative 2412 | * error otherwise. 2413 | */ 2414 | static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100; 2415 | 2416 | /* 2417 | * bpf_sysctl_get_name 2418 | * 2419 | * Get name of sysctl in /proc/sys/ and copy it into provided by 2420 | * program buffer *buf* of size *buf_len*. 2421 | * 2422 | * The buffer is always NUL terminated, unless it's zero-sized. 2423 | * 2424 | * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is 2425 | * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name 2426 | * only (e.g. "tcp_mem"). 2427 | * 2428 | * Returns 2429 | * Number of character copied (not including the trailing NUL). 2430 | * 2431 | * **-E2BIG** if the buffer wasn't big enough (*buf* will contain 2432 | * truncated name in this case). 2433 | */ 2434 | static int (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101; 2435 | 2436 | /* 2437 | * bpf_sysctl_get_current_value 2438 | * 2439 | * Get current value of sysctl as it is presented in /proc/sys 2440 | * (incl. newline, etc), and copy it as a string into provided 2441 | * by program buffer *buf* of size *buf_len*. 2442 | * 2443 | * The whole value is copied, no matter what file position user 2444 | * space issued e.g. sys_read at. 2445 | * 2446 | * The buffer is always NUL terminated, unless it's zero-sized. 2447 | * 2448 | * Returns 2449 | * Number of character copied (not including the trailing NUL). 2450 | * 2451 | * **-E2BIG** if the buffer wasn't big enough (*buf* will contain 2452 | * truncated name in this case). 2453 | * 2454 | * **-EINVAL** if current value was unavailable, e.g. because 2455 | * sysctl is uninitialized and read returns -EIO for it. 2456 | */ 2457 | static int (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102; 2458 | 2459 | /* 2460 | * bpf_sysctl_get_new_value 2461 | * 2462 | * Get new value being written by user space to sysctl (before 2463 | * the actual write happens) and copy it as a string into 2464 | * provided by program buffer *buf* of size *buf_len*. 2465 | * 2466 | * User space may write new value at file position > 0. 2467 | * 2468 | * The buffer is always NUL terminated, unless it's zero-sized. 2469 | * 2470 | * Returns 2471 | * Number of character copied (not including the trailing NUL). 2472 | * 2473 | * **-E2BIG** if the buffer wasn't big enough (*buf* will contain 2474 | * truncated name in this case). 2475 | * 2476 | * **-EINVAL** if sysctl is being read. 2477 | */ 2478 | static int (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103; 2479 | 2480 | /* 2481 | * bpf_sysctl_set_new_value 2482 | * 2483 | * Override new value being written by user space to sysctl with 2484 | * value provided by program in buffer *buf* of size *buf_len*. 2485 | * 2486 | * *buf* should contain a string in same form as provided by user 2487 | * space on sysctl write. 2488 | * 2489 | * User space may write new value at file position > 0. To override 2490 | * the whole sysctl value file position should be set to zero. 2491 | * 2492 | * Returns 2493 | * 0 on success. 2494 | * 2495 | * **-E2BIG** if the *buf_len* is too big. 2496 | * 2497 | * **-EINVAL** if sysctl is being read. 2498 | */ 2499 | static int (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104; 2500 | 2501 | /* 2502 | * bpf_strtol 2503 | * 2504 | * Convert the initial part of the string from buffer *buf* of 2505 | * size *buf_len* to a long integer according to the given base 2506 | * and save the result in *res*. 2507 | * 2508 | * The string may begin with an arbitrary amount of white space 2509 | * (as determined by **isspace**\ (3)) followed by a single 2510 | * optional '**-**' sign. 2511 | * 2512 | * Five least significant bits of *flags* encode base, other bits 2513 | * are currently unused. 2514 | * 2515 | * Base must be either 8, 10, 16 or 0 to detect it automatically 2516 | * similar to user space **strtol**\ (3). 2517 | * 2518 | * Returns 2519 | * Number of characters consumed on success. Must be positive but 2520 | * no more than *buf_len*. 2521 | * 2522 | * **-EINVAL** if no valid digits were found or unsupported base 2523 | * was provided. 2524 | * 2525 | * **-ERANGE** if resulting value was out of range. 2526 | */ 2527 | static int (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105; 2528 | 2529 | /* 2530 | * bpf_strtoul 2531 | * 2532 | * Convert the initial part of the string from buffer *buf* of 2533 | * size *buf_len* to an unsigned long integer according to the 2534 | * given base and save the result in *res*. 2535 | * 2536 | * The string may begin with an arbitrary amount of white space 2537 | * (as determined by **isspace**\ (3)). 2538 | * 2539 | * Five least significant bits of *flags* encode base, other bits 2540 | * are currently unused. 2541 | * 2542 | * Base must be either 8, 10, 16 or 0 to detect it automatically 2543 | * similar to user space **strtoul**\ (3). 2544 | * 2545 | * Returns 2546 | * Number of characters consumed on success. Must be positive but 2547 | * no more than *buf_len*. 2548 | * 2549 | * **-EINVAL** if no valid digits were found or unsupported base 2550 | * was provided. 2551 | * 2552 | * **-ERANGE** if resulting value was out of range. 2553 | */ 2554 | static int (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106; 2555 | 2556 | /* 2557 | * bpf_sk_storage_get 2558 | * 2559 | * Get a bpf-local-storage from a *sk*. 2560 | * 2561 | * Logically, it could be thought of getting the value from 2562 | * a *map* with *sk* as the **key**. From this 2563 | * perspective, the usage is not much different from 2564 | * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this 2565 | * helper enforces the key must be a full socket and the map must 2566 | * be a **BPF_MAP_TYPE_SK_STORAGE** also. 2567 | * 2568 | * Underneath, the value is stored locally at *sk* instead of 2569 | * the *map*. The *map* is used as the bpf-local-storage 2570 | * "type". The bpf-local-storage "type" (i.e. the *map*) is 2571 | * searched against all bpf-local-storages residing at *sk*. 2572 | * 2573 | * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be 2574 | * used such that a new bpf-local-storage will be 2575 | * created if one does not exist. *value* can be used 2576 | * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify 2577 | * the initial value of a bpf-local-storage. If *value* is 2578 | * **NULL**, the new bpf-local-storage will be zero initialized. 2579 | * 2580 | * Returns 2581 | * A bpf-local-storage pointer is returned on success. 2582 | * 2583 | * **NULL** if not found or there was an error in adding 2584 | * a new bpf-local-storage. 2585 | */ 2586 | static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, void *value, __u64 flags) = (void *) 107; 2587 | 2588 | /* 2589 | * bpf_sk_storage_delete 2590 | * 2591 | * Delete a bpf-local-storage from a *sk*. 2592 | * 2593 | * Returns 2594 | * 0 on success. 2595 | * 2596 | * **-ENOENT** if the bpf-local-storage cannot be found. 2597 | */ 2598 | static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = (void *) 108; 2599 | 2600 | /* 2601 | * bpf_send_signal 2602 | * 2603 | * Send signal *sig* to the current task. 2604 | * 2605 | * Returns 2606 | * 0 on success or successfully queued. 2607 | * 2608 | * **-EBUSY** if work queue under nmi is full. 2609 | * 2610 | * **-EINVAL** if *sig* is invalid. 2611 | * 2612 | * **-EPERM** if no permission to send the *sig*. 2613 | * 2614 | * **-EAGAIN** if bpf program can try again. 2615 | */ 2616 | static int (*bpf_send_signal)(__u32 sig) = (void *) 109; 2617 | 2618 | /* 2619 | * bpf_tcp_gen_syncookie 2620 | * 2621 | * Try to issue a SYN cookie for the packet with corresponding 2622 | * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. 2623 | * 2624 | * *iph* points to the start of the IPv4 or IPv6 header, while 2625 | * *iph_len* contains **sizeof**\ (**struct iphdr**) or 2626 | * **sizeof**\ (**struct ip6hdr**). 2627 | * 2628 | * *th* points to the start of the TCP header, while *th_len* 2629 | * contains the length of the TCP header. 2630 | * 2631 | * 2632 | * Returns 2633 | * On success, lower 32 bits hold the generated SYN cookie in 2634 | * followed by 16 bits which hold the MSS value for that cookie, 2635 | * and the top 16 bits are unused. 2636 | * 2637 | * On failure, the returned value is one of the following: 2638 | * 2639 | * **-EINVAL** SYN cookie cannot be issued due to error 2640 | * 2641 | * **-ENOENT** SYN cookie should not be issued (no SYN flood) 2642 | * 2643 | * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies 2644 | * 2645 | * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 2646 | */ 2647 | static __s64 (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110; 2648 | 2649 | /* 2650 | * bpf_skb_output 2651 | * 2652 | * Write raw *data* blob into a special BPF perf event held by 2653 | * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf 2654 | * event must have the following attributes: **PERF_SAMPLE_RAW** 2655 | * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and 2656 | * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. 2657 | * 2658 | * The *flags* are used to indicate the index in *map* for which 2659 | * the value must be put, masked with **BPF_F_INDEX_MASK**. 2660 | * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** 2661 | * to indicate that the index of the current CPU core should be 2662 | * used. 2663 | * 2664 | * The value to write, of *size*, is passed through eBPF stack and 2665 | * pointed by *data*. 2666 | * 2667 | * *ctx* is a pointer to in-kernel struct sk_buff. 2668 | * 2669 | * This helper is similar to **bpf_perf_event_output**\ () but 2670 | * restricted to raw_tracepoint bpf programs. 2671 | * 2672 | * Returns 2673 | * 0 on success, or a negative error in case of failure. 2674 | */ 2675 | static int (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111; 2676 | 2677 | /* 2678 | * bpf_probe_read_user 2679 | * 2680 | * Safely attempt to read *size* bytes from user space address 2681 | * *unsafe_ptr* and store the data in *dst*. 2682 | * 2683 | * Returns 2684 | * 0 on success, or a negative error in case of failure. 2685 | */ 2686 | static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112; 2687 | 2688 | /* 2689 | * bpf_probe_read_kernel 2690 | * 2691 | * Safely attempt to read *size* bytes from kernel space address 2692 | * *unsafe_ptr* and store the data in *dst*. 2693 | * 2694 | * Returns 2695 | * 0 on success, or a negative error in case of failure. 2696 | */ 2697 | static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113; 2698 | 2699 | /* 2700 | * bpf_probe_read_user_str 2701 | * 2702 | * Copy a NUL terminated string from an unsafe user address 2703 | * *unsafe_ptr* to *dst*. The *size* should include the 2704 | * terminating NUL byte. In case the string length is smaller than 2705 | * *size*, the target is not padded with further NUL bytes. If the 2706 | * string length is larger than *size*, just *size*-1 bytes are 2707 | * copied and the last byte is set to NUL. 2708 | * 2709 | * On success, the length of the copied string is returned. This 2710 | * makes this helper useful in tracing programs for reading 2711 | * strings, and more importantly to get its length at runtime. See 2712 | * the following snippet: 2713 | * 2714 | * :: 2715 | * 2716 | * SEC("kprobe/sys_open") 2717 | * void bpf_sys_open(struct pt_regs *ctx) 2718 | * { 2719 | * char buf[PATHLEN]; // PATHLEN is defined to 256 2720 | * int res = bpf_probe_read_user_str(buf, sizeof(buf), 2721 | * ctx->di); 2722 | * 2723 | * // Consume buf, for example push it to 2724 | * // userspace via bpf_perf_event_output(); we 2725 | * // can use res (the string length) as event 2726 | * // size, after checking its boundaries. 2727 | * } 2728 | * 2729 | * In comparison, using **bpf_probe_read_user()** helper here 2730 | * instead to read the string would require to estimate the length 2731 | * at compile time, and would often result in copying more memory 2732 | * than necessary. 2733 | * 2734 | * Another useful use case is when parsing individual process 2735 | * arguments or individual environment variables navigating 2736 | * *current*\ **->mm->arg_start** and *current*\ 2737 | * **->mm->env_start**: using this helper and the return value, 2738 | * one can quickly iterate at the right offset of the memory area. 2739 | * 2740 | * Returns 2741 | * On success, the strictly positive length of the string, 2742 | * including the trailing NUL character. On error, a negative 2743 | * value. 2744 | */ 2745 | static int (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114; 2746 | 2747 | /* 2748 | * bpf_probe_read_kernel_str 2749 | * 2750 | * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* 2751 | * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. 2752 | * 2753 | * Returns 2754 | * On success, the strictly positive length of the string, including 2755 | * the trailing NUL character. On error, a negative value. 2756 | */ 2757 | static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115; 2758 | 2759 | 2760 | --------------------------------------------------------------------------------