├── .gitignore
├── .gitmodules
├── LICENSE
├── Makefile
├── README.md
├── azure-pipelines.yaml
├── headers
    ├── bpf_endian.h
    ├── bpf_legacy.h
    ├── bpf_util.h
    ├── jhash.h
    ├── linux
    │   ├── bpf.h
    │   ├── err.h
    │   ├── if_link.h
    │   └── if_xdp.h
    └── perf-sys.h
├── scripts
    └── ci_test.sh
└── src
    ├── common.h
    ├── keepalive_gre.c
    └── keepalive_gre6.c


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.ll 
 2 | *.pcap
 3 | build/
 4 | 
 5 | # Created by https://www.gitignore.io/api/c
 6 | # Edit at https://www.gitignore.io/?templates=c
 7 | 
 8 | ### C ###
 9 | # Prerequisites
10 | *.d
11 | 
12 | # Object files
13 | *.o
14 | *.ko
15 | *.obj
16 | *.elf
17 | 
18 | # Linker output
19 | *.ilk
20 | *.map
21 | *.exp
22 | 
23 | # Precompiled Headers
24 | *.gch
25 | *.pch
26 | 
27 | # Libraries
28 | *.lib
29 | *.a
30 | *.la
31 | *.lo
32 | 
33 | # Shared objects (inc. Windows DLLs)
34 | *.dll
35 | *.so
36 | *.so.*
37 | *.dylib
38 | 
39 | # Executables
40 | *.exe
41 | *.out
42 | *.app
43 | *.i*86
44 | *.x86_64
45 | *.hex
46 | 
47 | # Debug files
48 | *.dSYM/
49 | *.su
50 | *.idb
51 | *.pdb
52 | 
53 | # Kernel Module Compile Results
54 | *.mod*
55 | *.cmd
56 | .tmp_versions/
57 | modules.order
58 | Module.symvers
59 | Mkfile.old
60 | dkms.conf
61 | 
62 | # End of https://www.gitignore.io/api/c


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "libbpf"]
2 | 	path = libbpf
3 | 	url = https://github.com/libbpf/libbpf.git
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 2 | 
 3 | SRC_DIR = src
 4 | BUILD_DIR = build
 5 | 
 6 | XDP_C = $(wildcard $(SRC_DIR)/*.c)
 7 | XDP_OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o, $(XDP_C))
 8 | 
 9 | USER_LIBS :=
10 | EXTRA_DEPS :=
11 | 
12 | LLC ?= llc
13 | CLANG ?= clang
14 | CC ?= gcc
15 | 
16 | LIBBPF_DIR = libbpf/src/
17 | OBJECT_LIBBPF = $(LIBBPF_DIR)/libbpf.a
18 | 
19 | CFLAGS ?= -I$(LIBBPF_DIR)/build/usr/include/ -g
20 | CFLAGS += -I../headers/
21 | LDFLAGS ?= -L$(LIBBPF_DIR)
22 | 
23 | LIBS = -l:libbpf.a -lelf $(USER_LIBS)
24 | 
25 | BPF_CFLAGS ?= -I$(LIBBPF_DIR)/build/usr/include/ -I../headers/
26 | BPF_CFLAGS += -Wall -Wno-unused-value -Wno-pointer-sign -Wno-compare-distinct-pointer-types
27 | BPF_CFLAGS_EXTRA ?= -Werror -Wno-visibility
28 | BPF_CFLAGS_USER ?=
29 | 
30 | ifeq ($(DEBUG), 1)
31 | BPF_CFLAGS_USER += -DDEBUG
32 | endif
33 | 
34 | all: llvm-check $(XDP_OBJ)
35 | 
36 | .PHONY: clean $(CLANG) $(LLC)
37 | 
38 | clean:
39 | 	rm -rf $(LIBBPF_DIR)/build
40 | 	$(MAKE) -C $(LIBBPF_DIR) clean
41 | 	rm -rf $(BUILD_DIR)
42 | 	rm -f *~
43 | 
44 | llvm-check: $(CLANG) $(LLC)
45 | 	@for TOOL in $^ ; do \
46 | 		if [ ! $$(command -v $${TOOL} 2>/dev/null) ]; then \
47 | 			echo "*** ERROR: Cannot find tool $${TOOL}" ;\
48 | 			exit 1; \
49 | 		else true; fi; \
50 | 	done
51 | 
52 | $(BUILD_DIR): 
53 | 	mkdir -p $(BUILD_DIR)
54 | 
55 | $(OBJECT_LIBBPF):
56 | 	@if [ ! -d $(LIBBPF_DIR) ]; then \
57 | 		echo "Error: Need libbpf submodule"; \
58 | 		echo "May need to run git submodule update --init"; \
59 | 		exit 1; \
60 | 	else \
61 | 		cd $(LIBBPF_DIR) && $(MAKE) all; \
62 | 		mkdir -p build; DESTDIR=build $(MAKE) install_headers; \
63 | 	fi
64 | 
65 | $(XDP_OBJ): $(BUILD_DIR)/%.o: $(SRC_DIR)/%.c  $(BUILD_DIR) $(OBJECT_LIBBPF) Makefile $(EXTRA_DEPS)
66 | 	$(CLANG) -S \
67 | 	    -target bpf \
68 | 	    -D __BPF_TRACING__ \
69 | 	    $(BPF_CFLAGS) $(BPF_CFLAGS_EXTRA) $(BPF_CFLAGS_USER) \
70 | 	    -O2 -emit-llvm -c -g -o ${@:.o=.ll} $<
71 | 	$(LLC) -march=bpf -filetype=obj -o $@ ${@:.o=.ll}
72 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # linux-gre-keepalive
  2 | 
  3 | This eBPF program adds high-performance reply-only GRE keepalive support for Linux kernel.
  4 | 
  5 | [![Build Status](https://dev.azure.com/nekomimiswitch/General/_apis/build/status/linux-gre-keepalive?branchName=master)](https://dev.azure.com/nekomimiswitch/General/_build/latest?definitionId=78&branchName=master)
  6 | 
  7 | Note: If you don't want to install anything and don't care about some potential security problems, just enable the following 2 options to get native GRE keepalive support on Linux:
  8 | ```
  9 | sysctl net.ipv4.conf.default.accept_local=1
 10 | sysctl net.ipv4.conf.all.accept_local=1
 11 | ```
 12 | 
 13 | ## Compatiblity
 14 | 
 15 | | Protocol 	| Linux name 	| XDP Executable   	| Tested Vendors  	| Comments    	|
 16 | |----------	|------------	|------------------	|-----------------	|-------------	|
 17 | | GRE      	| gre        	| keepalive_gre.o  	| Cisco, MikroTik 	|             	|
 18 | | GRE6     	| ip6gre     	| keepalive_gre6.o 	| MikroTik       	|             	|
 19 | 
 20 | ## Usage
 21 | 
 22 | Simply load the correct XDP executable on the tunnel interface you just created. For example, assume you have set up the GRE tunnel as `gre0`, to enable GRE keepalive:
 23 | 
 24 | ```shell
 25 | ip link set dev gre0 xdp object build/keepalive_gre.o
 26 | ```
 27 | 
 28 | To disable it without removing the tunnel interface:
 29 | 
 30 | ```shell
 31 | ip link set dev gre0 xdp off
 32 | ```
 33 | 
 34 | Loading an executable on other types of interfaces is considered an undefined behavior.
 35 | 
 36 | ## Caveats
 37 | 
 38 | ### GRE on Cisco IOS XE
 39 | 
 40 | On Cisco IOS XE, you must explicitly configure an ip address or an ipv6 address to make the GRE tunnel actually send something. If you don't configure IP addresses, `debug tunnel keepalive` will still show keepalive packets being sent, but the other end won't receive anything. A valid configuration example:
 41 | 
 42 | ```
 43 | interface Tunnel10
 44 |  ip address 10.0.0.1 255.255.255.0
 45 |  keepalive 1 2
 46 |  tunnel source GigabitEthernet1
 47 |  tunnel destination your.other.end.ip.address
 48 |  tunnel mode gre ip
 49 | ```
 50 | 
 51 | ### GRE6 (ip6gre) keepalive support
 52 | 
 53 | GRE6 keepalive is not supported by:
 54 | 
 55 | * [Cisco IOS XE](https://www.cisco.com/c/en/us/td/docs/ios-xml/ios/interface/configuration/xe-16-6/ir-xe-16-6-book/ir-gre-ipv6-tunls-xe.html#GUID-B8369497-671A-4B51-A749-A81971011A29)
 56 | * [Juniper Junos OS](https://www.juniper.net/documentation/en_US/junos/topics/concept/gre-keepalive-time-overview.html)
 57 | 
 58 | MikroTik RouterOS implements their own GRE IPv6 keepalive with inner GRE header's proto field set to `0x86dd`. This have been implemented by us.
 59 | 
 60 | ## Building
 61 | 
 62 | Assume we are on a Debian 10.
 63 | 
 64 | ```shell
 65 | sudo apt install build-essential clang llvm libelf-dev gcc-multilib linux-headers-$(dpkg --print-architecture)
 66 | make all
 67 | ```
 68 | 
 69 | ### Debugging
 70 | 
 71 | View compiled bytecode:
 72 | 
 73 | ```shell
 74 | llvm-objdump -S build/keepalive_gre.o
 75 | ```
 76 | 
 77 | Enabling debugging output:
 78 | 
 79 | ```c
 80 | #define DEBUG
 81 | #define DEBUG_PRINT_HEADER_SIZE 32
 82 | ```
 83 | 
 84 | Then view debug output after enabling it by:
 85 | 
 86 | ```shell
 87 | cat /sys/kernel/debug/tracing/trace_pipe
 88 | ```
 89 | 
 90 | ## References
 91 | 
 92 | Here's a list of awesome articles and projects I found useful:
 93 | 
 94 | * [BPF and XDP Reference Guide](https://docs.cilium.io/en/latest/bpf/)
 95 | * [xdp-project/xdp-tutorial](https://github.com/xdp-project/xdp-tutorial)
 96 | * [dpino/xdp_ipv6_filter](https://github.com/dpino/xdp_ipv6_filter)
 97 | * [How GRE Keepalives Work](https://www.cisco.com/c/en/us/support/docs/ip/generic-routing-encapsulation-gre/63760-gre-keepalives-63760.html)
 98 | * [OISF/suricata](https://github.com/OISF/suricata)
 99 | * [iovisor/bpf-docs](https://github.com/iovisor/bpf-docs)
100 | * [PaulTimmins/linux-gre-keepalive](https://github.com/PaulTimmins/linux-gre-keepalive)
101 | * [An introduction to Linux virtual interfaces: Tunnels](https://developers.redhat.com/blog/2019/05/17/an-introduction-to-linux-virtual-interfaces-tunnels/)
102 | 


--------------------------------------------------------------------------------
/azure-pipelines.yaml:
--------------------------------------------------------------------------------
 1 | name: $(Date:yyyyMMdd).$(Rev:r)
 2 | 
 3 | trigger:
 4 |   batch: true
 5 |   branches:
 6 |     include: [ "*" ]
 7 |   paths:
 8 |     exclude: [ "README.md" ]
 9 | 
10 | jobs:
11 |   - job: build
12 |     displayName: "Build"
13 |     pool:
14 |       vmImage: "ubuntu-latest"
15 |     workspace:
16 |       clean: all
17 |     timeoutInMinutes: 10
18 | 
19 |     steps:
20 |     - checkout: 'self'
21 |       clean: true
22 |       submodules: 'recursive'
23 | 
24 |     - bash: |
25 |         sudo apt update
26 |         sudo apt install build-essential clang llvm libelf-dev gcc-multilib linux-headers-$(uname -r)
27 |       displayName: 'Install dependencies'
28 | 
29 |     - bash: |
30 |         make DEBUG=1 all
31 |       displayName: 'Build (debug)'
32 | 
33 |     - bash: |
34 |         rm -r ${BUILD_ARTIFACTSTAGINGDIRECTORY}/*
35 |         cp build/* ${BUILD_ARTIFACTSTAGINGDIRECTORY}
36 |       displayName: 'Copy artifacts (debug)'
37 |     
38 |     - task: PublishBuildArtifacts@1
39 |       displayName: 'Publish Artifacts (debug)'
40 |       inputs:
41 |         artifactName: 'debug'
42 | 
43 |     - bash: |
44 |         sudo -E scripts/ci_test.sh
45 |       displayName: 'Test (debug)'
46 | 
47 |     - bash: |
48 |         rm -r build/*
49 |         make all
50 |       displayName: 'Build (production)'
51 | 
52 |     - bash: |
53 |         rm -r ${BUILD_ARTIFACTSTAGINGDIRECTORY}/*
54 |         cp build/* ${BUILD_ARTIFACTSTAGINGDIRECTORY}
55 |       displayName: 'Copy artifacts (production)'
56 |     
57 |     - task: PublishBuildArtifacts@1
58 |       displayName: 'Publish Artifacts (production)'
59 |       inputs:
60 |         artifactName: 'production'
61 | 
62 |     - bash: |
63 |         sudo -E scripts/ci_test.sh
64 |       displayName: 'Test (production)'


--------------------------------------------------------------------------------
/headers/bpf_endian.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: GPL-2.0 */
 2 | /* Copied from $(LINUX)/tools/testing/selftests/bpf/bpf_endian.h */
 3 | #ifndef __BPF_ENDIAN__
 4 | #define __BPF_ENDIAN__
 5 | 
 6 | #include <linux/swab.h>
 7 | 
 8 | /* LLVM's BPF target selects the endianness of the CPU
 9 |  * it compiles on, or the user specifies (bpfel/bpfeb),
10 |  * respectively. The used __BYTE_ORDER__ is defined by
11 |  * the compiler, we cannot rely on __BYTE_ORDER from
12 |  * libc headers, since it doesn't reflect the actual
13 |  * requested byte order.
14 |  *
15 |  * Note, LLVM's BPF target has different __builtin_bswapX()
16 |  * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
17 |  * in bpfel and bpfeb case, which means below, that we map
18 |  * to cpu_to_be16(). We could use it unconditionally in BPF
19 |  * case, but better not rely on it, so that this header here
20 |  * can be used from application and BPF program side, which
21 |  * use different targets.
22 |  */
23 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
24 | # define __bpf_ntohs(x)__builtin_bswap16(x)
25 | # define __bpf_htons(x)__builtin_bswap16(x)
26 | # define __bpf_constant_ntohs(x)___constant_swab16(x)
27 | # define __bpf_constant_htons(x)___constant_swab16(x)
28 | # define __bpf_ntohl(x)__builtin_bswap32(x)
29 | # define __bpf_htonl(x)__builtin_bswap32(x)
30 | # define __bpf_constant_ntohl(x)___constant_swab32(x)
31 | # define __bpf_constant_htonl(x)___constant_swab32(x)
32 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
33 | # define __bpf_ntohs(x)(x)
34 | # define __bpf_htons(x)(x)
35 | # define __bpf_constant_ntohs(x)(x)
36 | # define __bpf_constant_htons(x)(x)
37 | # define __bpf_ntohl(x)(x)
38 | # define __bpf_htonl(x)(x)
39 | # define __bpf_constant_ntohl(x)(x)
40 | # define __bpf_constant_htonl(x)(x)
41 | #else
42 | # error "Fix your compiler's __BYTE_ORDER__?!"
43 | #endif
44 | 
45 | #define bpf_htons(x)\
46 |   (__builtin_constant_p(x) ?\
47 |    __bpf_constant_htons(x) : __bpf_htons(x))
48 | #define bpf_ntohs(x)\
49 |   (__builtin_constant_p(x) ?\
50 |    __bpf_constant_ntohs(x) : __bpf_ntohs(x))
51 | #define bpf_htonl(x)\
52 |   (__builtin_constant_p(x) ?\
53 |    __bpf_constant_htonl(x) : __bpf_htonl(x))
54 | #define bpf_ntohl(x)\
55 |   (__builtin_constant_p(x) ?\
56 |    __bpf_constant_ntohl(x) : __bpf_ntohl(x))
57 | 
58 | #endif /* __BPF_ENDIAN__ */
59 | 


--------------------------------------------------------------------------------
/headers/bpf_legacy.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 2 | #ifndef __BPF_LEGACY__
 3 | #define __BPF_LEGACY__
 4 | 
 5 | /*
 6 |  * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
 7 |  * use outside of samples/bpf
 8 |  */
 9 | struct bpf_map_def_legacy {
10 | 	unsigned int type;
11 | 	unsigned int key_size;
12 | 	unsigned int value_size;
13 | 	unsigned int max_entries;
14 | 	unsigned int map_flags;
15 | 	unsigned int inner_map_idx;
16 | 	unsigned int numa_node;
17 | };
18 | 
19 | #define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
20 | 	struct ____btf_map_##name {				\
21 | 		type_key key;					\
22 | 		type_val value;					\
23 | 	};							\
24 | 	struct ____btf_map_##name				\
25 | 	__attribute__ ((section(".maps." #name), used))		\
26 | 		____btf_map_##name = { }
27 | 
28 | /* llvm builtin functions that eBPF C program may use to
29 |  * emit BPF_LD_ABS and BPF_LD_IND instructions
30 |  */
31 | unsigned long long load_byte(void *skb,
32 | 			     unsigned long long off) asm("llvm.bpf.load.byte");
33 | unsigned long long load_half(void *skb,
34 | 			     unsigned long long off) asm("llvm.bpf.load.half");
35 | unsigned long long load_word(void *skb,
36 | 			     unsigned long long off) asm("llvm.bpf.load.word");
37 | 
38 | #endif
39 | 
40 | 


--------------------------------------------------------------------------------
/headers/bpf_util.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: GPL-2.0 */
 2 | /* Copied from $(LINUX)/tools/testing/selftests/bpf/bpf_util.h */
 3 | #ifndef __BPF_UTIL__
 4 | #define __BPF_UTIL__
 5 | 
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | #include <string.h>
 9 | #include <errno.h>
10 | 
11 | static inline unsigned int bpf_num_possible_cpus(void)
12 | {
13 | 	static const char *fcpu = "/sys/devices/system/cpu/possible";
14 | 	unsigned int start, end, possible_cpus = 0;
15 | 	char buff[128];
16 | 	FILE *fp;
17 | 	int n;
18 | 
19 | 	fp = fopen(fcpu, "r");
20 | 	if (!fp) {
21 | 		printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
22 | 		exit(1);
23 | 	}
24 | 
25 | 	while (fgets(buff, sizeof(buff), fp)) {
26 | 		n = sscanf(buff, "%u-%u", &start, &end);
27 | 		if (n == 0) {
28 | 			printf("Failed to retrieve # possible CPUs!\n");
29 | 			exit(1);
30 | 		} else if (n == 1) {
31 | 			end = start;
32 | 		}
33 | 		possible_cpus = start == 0 ? end + 1 : 0;
34 | 		break;
35 | 	}
36 | 	fclose(fp);
37 | 
38 | 	return possible_cpus;
39 | }
40 | 
41 | #define __bpf_percpu_val_align	__attribute__((__aligned__(8)))
42 | 
43 | #define BPF_DECLARE_PERCPU(type, name)				\
44 | 	struct { type v; /* padding */ } __bpf_percpu_val_align	\
45 | 		name[bpf_num_possible_cpus()]
46 | #define bpf_percpu(name, cpu) name[(cpu)].v
47 | 
48 | #ifndef ARRAY_SIZE
49 | # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
50 | #endif
51 | 
52 | #ifndef sizeof_field
53 | #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
54 | #endif
55 | 
56 | #ifndef offsetofend
57 | #define offsetofend(TYPE, MEMBER) \
58 | 	(offsetof(TYPE, MEMBER)	+ sizeof_field(TYPE, MEMBER))
59 | #endif
60 | 
61 | #endif /* __BPF_UTIL__ */
62 | 


--------------------------------------------------------------------------------
/headers/jhash.h:
--------------------------------------------------------------------------------
  1 | #ifndef _LINUX_JHASH_H
  2 | #define _LINUX_JHASH_H
  3 | 
  4 | /* Copied from $(LINUX)/include/linux/jhash.h (kernel 4.18) */
  5 | 
  6 | /* jhash.h: Jenkins hash support.
  7 |  *
  8 |  * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
  9 |  *
 10 |  * http://burtleburtle.net/bob/hash/
 11 |  *
 12 |  * These are the credits from Bob's sources:
 13 |  *
 14 |  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
 15 |  *
 16 |  * These are functions for producing 32-bit hashes for hash table lookup.
 17 |  * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
 18 |  * are externally useful functions.  Routines to test the hash are included
 19 |  * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
 20 |  * the public domain.  It has no warranty.
 21 |  *
 22 |  * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
 23 |  */
 24 | 
 25 | static inline __u32 rol32(__u32 word, unsigned int shift)
 26 | {
 27 | 	return (word << shift) | (word >> ((-shift) & 31));
 28 | }
 29 | 
 30 | /* copy paste of jhash from kernel sources (include/linux/jhash.h) to make sure
 31 |  * LLVM can compile it into valid sequence of BPF instructions
 32 |  */
 33 | #define __jhash_mix(a, b, c)			\
 34 | {						\
 35 | 	a -= c;  a ^= rol32(c, 4);  c += b;	\
 36 | 	b -= a;  b ^= rol32(a, 6);  a += c;	\
 37 | 	c -= b;  c ^= rol32(b, 8);  b += a;	\
 38 | 	a -= c;  a ^= rol32(c, 16); c += b;	\
 39 | 	b -= a;  b ^= rol32(a, 19); a += c;	\
 40 | 	c -= b;  c ^= rol32(b, 4);  b += a;	\
 41 | }
 42 | 
 43 | #define __jhash_final(a, b, c)			\
 44 | {						\
 45 | 	c ^= b; c -= rol32(b, 14);		\
 46 | 	a ^= c; a -= rol32(c, 11);		\
 47 | 	b ^= a; b -= rol32(a, 25);		\
 48 | 	c ^= b; c -= rol32(b, 16);		\
 49 | 	a ^= c; a -= rol32(c, 4);		\
 50 | 	b ^= a; b -= rol32(a, 14);		\
 51 | 	c ^= b; c -= rol32(b, 24);		\
 52 | }
 53 | 
 54 | #define JHASH_INITVAL		0xdeadbeef
 55 | 
 56 | typedef unsigned int u32;
 57 | 
 58 | /* jhash - hash an arbitrary key
 59 |  * @k: sequence of bytes as key
 60 |  * @length: the length of the key
 61 |  * @initval: the previous hash, or an arbitray value
 62 |  *
 63 |  * The generic version, hashes an arbitrary sequence of bytes.
 64 |  * No alignment or length assumptions are made about the input key.
 65 |  *
 66 |  * Returns the hash value of the key. The result depends on endianness.
 67 |  */
 68 | static inline u32 jhash(const void *key, u32 length, u32 initval)
 69 | {
 70 | 	u32 a, b, c;
 71 | 	const unsigned char *k = key;
 72 | 
 73 | 	/* Set up the internal state */
 74 | 	a = b = c = JHASH_INITVAL + length + initval;
 75 | 
 76 | 	/* All but the last block: affect some 32 bits of (a,b,c) */
 77 | 	while (length > 12) {
 78 | 		a += *(u32 *)(k);
 79 | 		b += *(u32 *)(k + 4);
 80 | 		c += *(u32 *)(k + 8);
 81 | 		__jhash_mix(a, b, c);
 82 | 		length -= 12;
 83 | 		k += 12;
 84 | 	}
 85 | 	/* Last block: affect all 32 bits of (c) */
 86 | 	switch (length) {
 87 | 	case 12: c += (u32)k[11]<<24;	/* fall through */
 88 | 	case 11: c += (u32)k[10]<<16;	/* fall through */
 89 | 	case 10: c += (u32)k[9]<<8;	/* fall through */
 90 | 	case 9:  c += k[8];		/* fall through */
 91 | 	case 8:  b += (u32)k[7]<<24;	/* fall through */
 92 | 	case 7:  b += (u32)k[6]<<16;	/* fall through */
 93 | 	case 6:  b += (u32)k[5]<<8;	/* fall through */
 94 | 	case 5:  b += k[4];		/* fall through */
 95 | 	case 4:  a += (u32)k[3]<<24;	/* fall through */
 96 | 	case 3:  a += (u32)k[2]<<16;	/* fall through */
 97 | 	case 2:  a += (u32)k[1]<<8;	/* fall through */
 98 | 	case 1:  a += k[0];
 99 | 		 __jhash_final(a, b, c);
100 | 	case 0: /* Nothing left to add */
101 | 		break;
102 | 	}
103 | 
104 | 	return c;
105 | }
106 | 
107 | /* jhash2 - hash an array of u32's
108 |  * @k: the key which must be an array of u32's
109 |  * @length: the number of u32's in the key
110 |  * @initval: the previous hash, or an arbitray value
111 |  *
112 |  * Returns the hash value of the key.
113 |  */
114 | static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
115 | {
116 | 	u32 a, b, c;
117 | 
118 | 	/* Set up the internal state */
119 | 	a = b = c = JHASH_INITVAL + (length<<2) + initval;
120 | 
121 | 	/* Handle most of the key */
122 | 	while (length > 3) {
123 | 		a += k[0];
124 | 		b += k[1];
125 | 		c += k[2];
126 | 		__jhash_mix(a, b, c);
127 | 		length -= 3;
128 | 		k += 3;
129 | 	}
130 | 
131 | 	/* Handle the last 3 u32's */
132 | 	switch (length) {
133 | 	case 3: c += k[2];	/* fall through */
134 | 	case 2: b += k[1];	/* fall through */
135 | 	case 1: a += k[0];
136 | 		__jhash_final(a, b, c);
137 | 	case 0:	/* Nothing left to add */
138 | 		break;
139 | 	}
140 | 
141 | 	return c;
142 | }
143 | 
144 | 
145 | /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
146 | static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
147 | {
148 | 	a += initval;
149 | 	b += initval;
150 | 	c += initval;
151 | 
152 | 	__jhash_final(a, b, c);
153 | 
154 | 	return c;
155 | }
156 | 
157 | static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
158 | {
159 | 	return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
160 | }
161 | 
162 | static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
163 | {
164 | 	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
165 | }
166 | 
167 | static inline u32 jhash_1word(u32 a, u32 initval)
168 | {
169 | 	return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
170 | }
171 | 
172 | #endif /* _LINUX_JHASH_H */
173 | 


--------------------------------------------------------------------------------
/headers/linux/bpf.h:
--------------------------------------------------------------------------------
   1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   2 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3 |  *
   4 |  * This program is free software; you can redistribute it and/or
   5 |  * modify it under the terms of version 2 of the GNU General Public
   6 |  * License as published by the Free Software Foundation.
   7 |  */
   8 | #ifndef _UAPI__LINUX_BPF_H__
   9 | #define _UAPI__LINUX_BPF_H__
  10 | 
  11 | #include <linux/types.h>
  12 | #include <linux/bpf_common.h>
  13 | 
  14 | /* Extended instruction set based on top of classic BPF */
  15 | 
  16 | /* instruction classes */
  17 | #define BPF_JMP32	0x06	/* jmp mode in word width */
  18 | #define BPF_ALU64	0x07	/* alu mode in double word width */
  19 | 
  20 | /* ld/ldx fields */
  21 | #define BPF_DW		0x18	/* double word (64-bit) */
  22 | #define BPF_XADD	0xc0	/* exclusive add */
  23 | 
  24 | /* alu/jmp fields */
  25 | #define BPF_MOV		0xb0	/* mov reg to reg */
  26 | #define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
  27 | 
  28 | /* change endianness of a register */
  29 | #define BPF_END		0xd0	/* flags for endianness conversion: */
  30 | #define BPF_TO_LE	0x00	/* convert to little-endian */
  31 | #define BPF_TO_BE	0x08	/* convert to big-endian */
  32 | #define BPF_FROM_LE	BPF_TO_LE
  33 | #define BPF_FROM_BE	BPF_TO_BE
  34 | 
  35 | /* jmp encodings */
  36 | #define BPF_JNE		0x50	/* jump != */
  37 | #define BPF_JLT		0xa0	/* LT is unsigned, '<' */
  38 | #define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
  39 | #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
  40 | #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
  41 | #define BPF_JSLT	0xc0	/* SLT is signed, '<' */
  42 | #define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
  43 | #define BPF_CALL	0x80	/* function call */
  44 | #define BPF_EXIT	0x90	/* function return */
  45 | 
  46 | /* Register numbers */
  47 | enum {
  48 | 	BPF_REG_0 = 0,
  49 | 	BPF_REG_1,
  50 | 	BPF_REG_2,
  51 | 	BPF_REG_3,
  52 | 	BPF_REG_4,
  53 | 	BPF_REG_5,
  54 | 	BPF_REG_6,
  55 | 	BPF_REG_7,
  56 | 	BPF_REG_8,
  57 | 	BPF_REG_9,
  58 | 	BPF_REG_10,
  59 | 	__MAX_BPF_REG,
  60 | };
  61 | 
  62 | /* BPF has 10 general purpose 64-bit registers and stack frame. */
  63 | #define MAX_BPF_REG	__MAX_BPF_REG
  64 | 
  65 | struct bpf_insn {
  66 | 	__u8	code;		/* opcode */
  67 | 	__u8	dst_reg:4;	/* dest register */
  68 | 	__u8	src_reg:4;	/* source register */
  69 | 	__s16	off;		/* signed offset */
  70 | 	__s32	imm;		/* signed immediate constant */
  71 | };
  72 | 
  73 | /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
  74 | struct bpf_lpm_trie_key {
  75 | 	__u32	prefixlen;	/* up to 32 for AF_INET, 128 for AF_INET6 */
  76 | 	__u8	data[0];	/* Arbitrary size */
  77 | };
  78 | 
  79 | struct bpf_cgroup_storage_key {
  80 | 	__u64	cgroup_inode_id;	/* cgroup inode id */
  81 | 	__u32	attach_type;		/* program attach type */
  82 | };
  83 | 
  84 | /* BPF syscall commands, see bpf(2) man-page for details. */
  85 | enum bpf_cmd {
  86 | 	BPF_MAP_CREATE,
  87 | 	BPF_MAP_LOOKUP_ELEM,
  88 | 	BPF_MAP_UPDATE_ELEM,
  89 | 	BPF_MAP_DELETE_ELEM,
  90 | 	BPF_MAP_GET_NEXT_KEY,
  91 | 	BPF_PROG_LOAD,
  92 | 	BPF_OBJ_PIN,
  93 | 	BPF_OBJ_GET,
  94 | 	BPF_PROG_ATTACH,
  95 | 	BPF_PROG_DETACH,
  96 | 	BPF_PROG_TEST_RUN,
  97 | 	BPF_PROG_GET_NEXT_ID,
  98 | 	BPF_MAP_GET_NEXT_ID,
  99 | 	BPF_PROG_GET_FD_BY_ID,
 100 | 	BPF_MAP_GET_FD_BY_ID,
 101 | 	BPF_OBJ_GET_INFO_BY_FD,
 102 | 	BPF_PROG_QUERY,
 103 | 	BPF_RAW_TRACEPOINT_OPEN,
 104 | 	BPF_BTF_LOAD,
 105 | 	BPF_BTF_GET_FD_BY_ID,
 106 | 	BPF_TASK_FD_QUERY,
 107 | 	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 108 | };
 109 | 
 110 | enum bpf_map_type {
 111 | 	BPF_MAP_TYPE_UNSPEC,
 112 | 	BPF_MAP_TYPE_HASH,
 113 | 	BPF_MAP_TYPE_ARRAY,
 114 | 	BPF_MAP_TYPE_PROG_ARRAY,
 115 | 	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 116 | 	BPF_MAP_TYPE_PERCPU_HASH,
 117 | 	BPF_MAP_TYPE_PERCPU_ARRAY,
 118 | 	BPF_MAP_TYPE_STACK_TRACE,
 119 | 	BPF_MAP_TYPE_CGROUP_ARRAY,
 120 | 	BPF_MAP_TYPE_LRU_HASH,
 121 | 	BPF_MAP_TYPE_LRU_PERCPU_HASH,
 122 | 	BPF_MAP_TYPE_LPM_TRIE,
 123 | 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 124 | 	BPF_MAP_TYPE_HASH_OF_MAPS,
 125 | 	BPF_MAP_TYPE_DEVMAP,
 126 | 	BPF_MAP_TYPE_SOCKMAP,
 127 | 	BPF_MAP_TYPE_CPUMAP,
 128 | 	BPF_MAP_TYPE_XSKMAP,
 129 | 	BPF_MAP_TYPE_SOCKHASH,
 130 | 	BPF_MAP_TYPE_CGROUP_STORAGE,
 131 | 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 132 | 	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 133 | 	BPF_MAP_TYPE_QUEUE,
 134 | 	BPF_MAP_TYPE_STACK,
 135 | };
 136 | 
 137 | /* Note that tracing related programs such as
 138 |  * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
 139 |  * are not subject to a stable API since kernel internal data
 140 |  * structures can change from release to release and may
 141 |  * therefore break existing tracing BPF programs. Tracing BPF
 142 |  * programs correspond to /a/ specific kernel which is to be
 143 |  * analyzed, and not /a/ specific kernel /and/ all future ones.
 144 |  */
 145 | enum bpf_prog_type {
 146 | 	BPF_PROG_TYPE_UNSPEC,
 147 | 	BPF_PROG_TYPE_SOCKET_FILTER,
 148 | 	BPF_PROG_TYPE_KPROBE,
 149 | 	BPF_PROG_TYPE_SCHED_CLS,
 150 | 	BPF_PROG_TYPE_SCHED_ACT,
 151 | 	BPF_PROG_TYPE_TRACEPOINT,
 152 | 	BPF_PROG_TYPE_XDP,
 153 | 	BPF_PROG_TYPE_PERF_EVENT,
 154 | 	BPF_PROG_TYPE_CGROUP_SKB,
 155 | 	BPF_PROG_TYPE_CGROUP_SOCK,
 156 | 	BPF_PROG_TYPE_LWT_IN,
 157 | 	BPF_PROG_TYPE_LWT_OUT,
 158 | 	BPF_PROG_TYPE_LWT_XMIT,
 159 | 	BPF_PROG_TYPE_SOCK_OPS,
 160 | 	BPF_PROG_TYPE_SK_SKB,
 161 | 	BPF_PROG_TYPE_CGROUP_DEVICE,
 162 | 	BPF_PROG_TYPE_SK_MSG,
 163 | 	BPF_PROG_TYPE_RAW_TRACEPOINT,
 164 | 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 165 | 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 166 | 	BPF_PROG_TYPE_LIRC_MODE2,
 167 | 	BPF_PROG_TYPE_SK_REUSEPORT,
 168 | 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 169 | };
 170 | 
 171 | enum bpf_attach_type {
 172 | 	BPF_CGROUP_INET_INGRESS,
 173 | 	BPF_CGROUP_INET_EGRESS,
 174 | 	BPF_CGROUP_INET_SOCK_CREATE,
 175 | 	BPF_CGROUP_SOCK_OPS,
 176 | 	BPF_SK_SKB_STREAM_PARSER,
 177 | 	BPF_SK_SKB_STREAM_VERDICT,
 178 | 	BPF_CGROUP_DEVICE,
 179 | 	BPF_SK_MSG_VERDICT,
 180 | 	BPF_CGROUP_INET4_BIND,
 181 | 	BPF_CGROUP_INET6_BIND,
 182 | 	BPF_CGROUP_INET4_CONNECT,
 183 | 	BPF_CGROUP_INET6_CONNECT,
 184 | 	BPF_CGROUP_INET4_POST_BIND,
 185 | 	BPF_CGROUP_INET6_POST_BIND,
 186 | 	BPF_CGROUP_UDP4_SENDMSG,
 187 | 	BPF_CGROUP_UDP6_SENDMSG,
 188 | 	BPF_LIRC_MODE2,
 189 | 	BPF_FLOW_DISSECTOR,
 190 | 	__MAX_BPF_ATTACH_TYPE
 191 | };
 192 | 
 193 | #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 194 | 
 195 | /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
 196 |  *
 197 |  * NONE(default): No further bpf programs allowed in the subtree.
 198 |  *
 199 |  * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
 200 |  * the program in this cgroup yields to sub-cgroup program.
 201 |  *
 202 |  * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
 203 |  * that cgroup program gets run in addition to the program in this cgroup.
 204 |  *
 205 |  * Only one program is allowed to be attached to a cgroup with
 206 |  * NONE or BPF_F_ALLOW_OVERRIDE flag.
 207 |  * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
 208 |  * release old program and attach the new one. Attach flags has to match.
 209 |  *
 210 |  * Multiple programs are allowed to be attached to a cgroup with
 211 |  * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
 212 |  * (those that were attached first, run first)
 213 |  * The programs of sub-cgroup are executed first, then programs of
 214 |  * this cgroup and then programs of parent cgroup.
 215 |  * When children program makes decision (like picking TCP CA or sock bind)
 216 |  * parent program has a chance to override it.
 217 |  *
 218 |  * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
 219 |  * A cgroup with NONE doesn't allow any programs in sub-cgroups.
 220 |  * Ex1:
 221 |  * cgrp1 (MULTI progs A, B) ->
 222 |  *    cgrp2 (OVERRIDE prog C) ->
 223 |  *      cgrp3 (MULTI prog D) ->
 224 |  *        cgrp4 (OVERRIDE prog E) ->
 225 |  *          cgrp5 (NONE prog F)
 226 |  * the event in cgrp5 triggers execution of F,D,A,B in that order.
 227 |  * if prog F is detached, the execution is E,D,A,B
 228 |  * if prog F and D are detached, the execution is E,A,B
 229 |  * if prog F, E and D are detached, the execution is C,A,B
 230 |  *
 231 |  * All eligible programs are executed regardless of return code from
 232 |  * earlier programs.
 233 |  */
 234 | #define BPF_F_ALLOW_OVERRIDE	(1U << 0)
 235 | #define BPF_F_ALLOW_MULTI	(1U << 1)
 236 | 
 237 | /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
 238 |  * verifier will perform strict alignment checking as if the kernel
 239 |  * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
 240 |  * and NET_IP_ALIGN defined to 2.
 241 |  */
 242 | #define BPF_F_STRICT_ALIGNMENT	(1U << 0)
 243 | 
 244 | /* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
 245 |  * verifier will allow any alignment whatsoever.  On platforms
 246 |  * with strict alignment requirements for loads ands stores (such
 247 |  * as sparc and mips) the verifier validates that all loads and
 248 |  * stores provably follow this requirement.  This flag turns that
 249 |  * checking and enforcement off.
 250 |  *
 251 |  * It is mostly used for testing when we want to validate the
 252 |  * context and memory access aspects of the verifier, but because
 253 |  * of an unaligned access the alignment check would trigger before
 254 |  * the one we are interested in.
 255 |  */
 256 | #define BPF_F_ANY_ALIGNMENT	(1U << 1)
 257 | 
 258 | /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
 259 | #define BPF_PSEUDO_MAP_FD	1
 260 | 
 261 | /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
 262 |  * offset to another bpf function
 263 |  */
 264 | #define BPF_PSEUDO_CALL		1
 265 | 
 266 | /* flags for BPF_MAP_UPDATE_ELEM command */
 267 | #define BPF_ANY		0 /* create new element or update existing */
 268 | #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 269 | #define BPF_EXIST	2 /* update existing element */
 270 | #define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
 271 | 
 272 | /* flags for BPF_MAP_CREATE command */
 273 | #define BPF_F_NO_PREALLOC	(1U << 0)
 274 | /* Instead of having one common LRU list in the
 275 |  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
 276 |  * which can scale and perform better.
 277 |  * Note, the LRU nodes (including free nodes) cannot be moved
 278 |  * across different LRU lists.
 279 |  */
 280 | #define BPF_F_NO_COMMON_LRU	(1U << 1)
 281 | /* Specify numa node during map creation */
 282 | #define BPF_F_NUMA_NODE		(1U << 2)
 283 | 
 284 | #define BPF_OBJ_NAME_LEN 16U
 285 | 
 286 | /* Flags for accessing BPF object */
 287 | #define BPF_F_RDONLY		(1U << 3)
 288 | #define BPF_F_WRONLY		(1U << 4)
 289 | 
 290 | /* Flag for stack_map, store build_id+offset instead of pointer */
 291 | #define BPF_F_STACK_BUILD_ID	(1U << 5)
 292 | 
 293 | /* Zero-initialize hash function seed. This should only be used for testing. */
 294 | #define BPF_F_ZERO_SEED		(1U << 6)
 295 | 
 296 | /* flags for BPF_PROG_QUERY */
 297 | #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 298 | 
 299 | enum bpf_stack_build_id_status {
 300 | 	/* user space need an empty entry to identify end of a trace */
 301 | 	BPF_STACK_BUILD_ID_EMPTY = 0,
 302 | 	/* with valid build_id and offset */
 303 | 	BPF_STACK_BUILD_ID_VALID = 1,
 304 | 	/* couldn't get build_id, fallback to ip */
 305 | 	BPF_STACK_BUILD_ID_IP = 2,
 306 | };
 307 | 
 308 | #define BPF_BUILD_ID_SIZE 20
 309 | struct bpf_stack_build_id {
 310 | 	__s32		status;
 311 | 	unsigned char	build_id[BPF_BUILD_ID_SIZE];
 312 | 	union {
 313 | 		__u64	offset;
 314 | 		__u64	ip;
 315 | 	};
 316 | };
 317 | 
 318 | union bpf_attr {
 319 | 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 320 | 		__u32	map_type;	/* one of enum bpf_map_type */
 321 | 		__u32	key_size;	/* size of key in bytes */
 322 | 		__u32	value_size;	/* size of value in bytes */
 323 | 		__u32	max_entries;	/* max number of entries in a map */
 324 | 		__u32	map_flags;	/* BPF_MAP_CREATE related
 325 | 					 * flags defined above.
 326 | 					 */
 327 | 		__u32	inner_map_fd;	/* fd pointing to the inner map */
 328 | 		__u32	numa_node;	/* numa node (effective only if
 329 | 					 * BPF_F_NUMA_NODE is set).
 330 | 					 */
 331 | 		char	map_name[BPF_OBJ_NAME_LEN];
 332 | 		__u32	map_ifindex;	/* ifindex of netdev to create on */
 333 | 		__u32	btf_fd;		/* fd pointing to a BTF type data */
 334 | 		__u32	btf_key_type_id;	/* BTF type_id of the key */
 335 | 		__u32	btf_value_type_id;	/* BTF type_id of the value */
 336 | 	};
 337 | 
 338 | 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
 339 | 		__u32		map_fd;
 340 | 		__aligned_u64	key;
 341 | 		union {
 342 | 			__aligned_u64 value;
 343 | 			__aligned_u64 next_key;
 344 | 		};
 345 | 		__u64		flags;
 346 | 	};
 347 | 
 348 | 	struct { /* anonymous struct used by BPF_PROG_LOAD command */
 349 | 		__u32		prog_type;	/* one of enum bpf_prog_type */
 350 | 		__u32		insn_cnt;
 351 | 		__aligned_u64	insns;
 352 | 		__aligned_u64	license;
 353 | 		__u32		log_level;	/* verbosity level of verifier */
 354 | 		__u32		log_size;	/* size of user buffer */
 355 | 		__aligned_u64	log_buf;	/* user supplied buffer */
 356 | 		__u32		kern_version;	/* not used */
 357 | 		__u32		prog_flags;
 358 | 		char		prog_name[BPF_OBJ_NAME_LEN];
 359 | 		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
 360 | 		/* For some prog types expected attach type must be known at
 361 | 		 * load time to verify attach type specific parts of prog
 362 | 		 * (context accesses, allowed helpers, etc).
 363 | 		 */
 364 | 		__u32		expected_attach_type;
 365 | 		__u32		prog_btf_fd;	/* fd pointing to BTF type data */
 366 | 		__u32		func_info_rec_size;	/* userspace bpf_func_info size */
 367 | 		__aligned_u64	func_info;	/* func info */
 368 | 		__u32		func_info_cnt;	/* number of bpf_func_info records */
 369 | 		__u32		line_info_rec_size;	/* userspace bpf_line_info size */
 370 | 		__aligned_u64	line_info;	/* line info */
 371 | 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 372 | 	};
 373 | 
 374 | 	struct { /* anonymous struct used by BPF_OBJ_* commands */
 375 | 		__aligned_u64	pathname;
 376 | 		__u32		bpf_fd;
 377 | 		__u32		file_flags;
 378 | 	};
 379 | 
 380 | 	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
 381 | 		__u32		target_fd;	/* container object to attach to */
 382 | 		__u32		attach_bpf_fd;	/* eBPF program to attach */
 383 | 		__u32		attach_type;
 384 | 		__u32		attach_flags;
 385 | 	};
 386 | 
 387 | 	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
 388 | 		__u32		prog_fd;
 389 | 		__u32		retval;
 390 | 		__u32		data_size_in;	/* input: len of data_in */
 391 | 		__u32		data_size_out;	/* input/output: len of data_out
 392 | 						 *   returns ENOSPC if data_out
 393 | 						 *   is too small.
 394 | 						 */
 395 | 		__aligned_u64	data_in;
 396 | 		__aligned_u64	data_out;
 397 | 		__u32		repeat;
 398 | 		__u32		duration;
 399 | 	} test;
 400 | 
 401 | 	struct { /* anonymous struct used by BPF_*_GET_*_ID */
 402 | 		union {
 403 | 			__u32		start_id;
 404 | 			__u32		prog_id;
 405 | 			__u32		map_id;
 406 | 			__u32		btf_id;
 407 | 		};
 408 | 		__u32		next_id;
 409 | 		__u32		open_flags;
 410 | 	};
 411 | 
 412 | 	struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
 413 | 		__u32		bpf_fd;
 414 | 		__u32		info_len;
 415 | 		__aligned_u64	info;
 416 | 	} info;
 417 | 
 418 | 	struct { /* anonymous struct used by BPF_PROG_QUERY command */
 419 | 		__u32		target_fd;	/* container object to query */
 420 | 		__u32		attach_type;
 421 | 		__u32		query_flags;
 422 | 		__u32		attach_flags;
 423 | 		__aligned_u64	prog_ids;
 424 | 		__u32		prog_cnt;
 425 | 	} query;
 426 | 
 427 | 	struct {
 428 | 		__u64 name;
 429 | 		__u32 prog_fd;
 430 | 	} raw_tracepoint;
 431 | 
 432 | 	struct { /* anonymous struct for BPF_BTF_LOAD */
 433 | 		__aligned_u64	btf;
 434 | 		__aligned_u64	btf_log_buf;
 435 | 		__u32		btf_size;
 436 | 		__u32		btf_log_size;
 437 | 		__u32		btf_log_level;
 438 | 	};
 439 | 
 440 | 	struct {
 441 | 		__u32		pid;		/* input: pid */
 442 | 		__u32		fd;		/* input: fd */
 443 | 		__u32		flags;		/* input: flags */
 444 | 		__u32		buf_len;	/* input/output: buf len */
 445 | 		__aligned_u64	buf;		/* input/output:
 446 | 						 *   tp_name for tracepoint
 447 | 						 *   symbol for kprobe
 448 | 						 *   filename for uprobe
 449 | 						 */
 450 | 		__u32		prog_id;	/* output: prod_id */
 451 | 		__u32		fd_type;	/* output: BPF_FD_TYPE_* */
 452 | 		__u64		probe_offset;	/* output: probe_offset */
 453 | 		__u64		probe_addr;	/* output: probe_addr */
 454 | 	} task_fd_query;
 455 | } __attribute__((aligned(8)));
 456 | 
 457 | /* The description below is an attempt at providing documentation to eBPF
 458 |  * developers about the multiple available eBPF helper functions. It can be
 459 |  * parsed and used to produce a manual page. The workflow is the following,
 460 |  * and requires the rst2man utility:
 461 |  *
 462 |  *     $ ./scripts/bpf_helpers_doc.py \
 463 |  *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
 464 |  *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
 465 |  *     $ man /tmp/bpf-helpers.7
 466 |  *
 467 |  * Note that in order to produce this external documentation, some RST
 468 |  * formatting is used in the descriptions to get "bold" and "italics" in
 469 |  * manual pages. Also note that the few trailing white spaces are
 470 |  * intentional, removing them would break paragraphs for rst2man.
 471 |  *
 472 |  * Start of BPF helper function descriptions:
 473 |  *
 474 |  * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
 475 |  * 	Description
 476 |  * 		Perform a lookup in *map* for an entry associated to *key*.
 477 |  * 	Return
 478 |  * 		Map value associated to *key*, or **NULL** if no entry was
 479 |  * 		found.
 480 |  *
 481 |  * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
 482 |  * 	Description
 483 |  * 		Add or update the value of the entry associated to *key* in
 484 |  * 		*map* with *value*. *flags* is one of:
 485 |  *
 486 |  * 		**BPF_NOEXIST**
 487 |  * 			The entry for *key* must not exist in the map.
 488 |  * 		**BPF_EXIST**
 489 |  * 			The entry for *key* must already exist in the map.
 490 |  * 		**BPF_ANY**
 491 |  * 			No condition on the existence of the entry for *key*.
 492 |  *
 493 |  * 		Flag value **BPF_NOEXIST** cannot be used for maps of types
 494 |  * 		**BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
 495 |  * 		elements always exist), the helper would return an error.
 496 |  * 	Return
 497 |  * 		0 on success, or a negative error in case of failure.
 498 |  *
 499 |  * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
 500 |  * 	Description
 501 |  * 		Delete entry with *key* from *map*.
 502 |  * 	Return
 503 |  * 		0 on success, or a negative error in case of failure.
 504 |  *
 505 |  * int bpf_probe_read(void *dst, u32 size, const void *src)
 506 |  * 	Description
 507 |  * 		For tracing programs, safely attempt to read *size* bytes from
 508 |  * 		address *src* and store the data in *dst*.
 509 |  * 	Return
 510 |  * 		0 on success, or a negative error in case of failure.
 511 |  *
 512 |  * u64 bpf_ktime_get_ns(void)
 513 |  * 	Description
 514 |  * 		Return the time elapsed since system boot, in nanoseconds.
 515 |  * 	Return
 516 |  * 		Current *ktime*.
 517 |  *
 518 |  * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
 519 |  * 	Description
 520 |  * 		This helper is a "printk()-like" facility for debugging. It
 521 |  * 		prints a message defined by format *fmt* (of size *fmt_size*)
 522 |  * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
 523 |  * 		available. It can take up to three additional **u64**
 524 |  * 		arguments (as an eBPF helpers, the total number of arguments is
 525 |  * 		limited to five).
 526 |  *
 527 |  * 		Each time the helper is called, it appends a line to the trace.
 528 |  * 		The format of the trace is customizable, and the exact output
 529 |  * 		one will get depends on the options set in
 530 |  * 		*\/sys/kernel/debug/tracing/trace_options* (see also the
 531 |  * 		*README* file under the same directory). However, it usually
 532 |  * 		defaults to something like:
 533 |  *
 534 |  * 		::
 535 |  *
 536 |  * 			telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
 537 |  *
 538 |  * 		In the above:
 539 |  *
 540 |  * 			* ``telnet`` is the name of the current task.
 541 |  * 			* ``470`` is the PID of the current task.
 542 |  * 			* ``001`` is the CPU number on which the task is
 543 |  * 			  running.
 544 |  * 			* In ``.N..``, each character refers to a set of
 545 |  * 			  options (whether irqs are enabled, scheduling
 546 |  * 			  options, whether hard/softirqs are running, level of
 547 |  * 			  preempt_disabled respectively). **N** means that
 548 |  * 			  **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
 549 |  * 			  are set.
 550 |  * 			* ``419421.045894`` is a timestamp.
 551 |  * 			* ``0x00000001`` is a fake value used by BPF for the
 552 |  * 			  instruction pointer register.
 553 |  * 			* ``<formatted msg>`` is the message formatted with
 554 |  * 			  *fmt*.
 555 |  *
 556 |  * 		The conversion specifiers supported by *fmt* are similar, but
 557 |  * 		more limited than for printk(). They are **%d**, **%i**,
 558 |  * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
 559 |  * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
 560 |  * 		of field, padding with zeroes, etc.) is available, and the
 561 |  * 		helper will return **-EINVAL** (but print nothing) if it
 562 |  * 		encounters an unknown specifier.
 563 |  *
 564 |  * 		Also, note that **bpf_trace_printk**\ () is slow, and should
 565 |  * 		only be used for debugging purposes. For this reason, a notice
 566 |  * 		bloc (spanning several lines) is printed to kernel logs and
 567 |  * 		states that the helper should not be used "for production use"
 568 |  * 		the first time this helper is used (or more precisely, when
 569 |  * 		**trace_printk**\ () buffers are allocated). For passing values
 570 |  * 		to user space, perf events should be preferred.
 571 |  * 	Return
 572 |  * 		The number of bytes written to the buffer, or a negative error
 573 |  * 		in case of failure.
 574 |  *
 575 |  * u32 bpf_get_prandom_u32(void)
 576 |  * 	Description
 577 |  * 		Get a pseudo-random number.
 578 |  *
 579 |  * 		From a security point of view, this helper uses its own
 580 |  * 		pseudo-random internal state, and cannot be used to infer the
 581 |  * 		seed of other random functions in the kernel. However, it is
 582 |  * 		essential to note that the generator used by the helper is not
 583 |  * 		cryptographically secure.
 584 |  * 	Return
 585 |  * 		A random 32-bit unsigned value.
 586 |  *
 587 |  * u32 bpf_get_smp_processor_id(void)
 588 |  * 	Description
 589 |  * 		Get the SMP (symmetric multiprocessing) processor id. Note that
 590 |  * 		all programs run with preemption disabled, which means that the
 591 |  * 		SMP processor id is stable during all the execution of the
 592 |  * 		program.
 593 |  * 	Return
 594 |  * 		The SMP id of the processor running the program.
 595 |  *
 596 |  * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
 597 |  * 	Description
 598 |  * 		Store *len* bytes from address *from* into the packet
 599 |  * 		associated to *skb*, at *offset*. *flags* are a combination of
 600 |  * 		**BPF_F_RECOMPUTE_CSUM** (automatically recompute the
 601 |  * 		checksum for the packet after storing the bytes) and
 602 |  * 		**BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
 603 |  * 		**->swhash** and *skb*\ **->l4hash** to 0).
 604 |  *
 605 |  * 		A call to this helper is susceptible to change the underlaying
 606 |  * 		packet buffer. Therefore, at load time, all checks on pointers
 607 |  * 		previously done by the verifier are invalidated and must be
 608 |  * 		performed again, if the helper is used in combination with
 609 |  * 		direct packet access.
 610 |  * 	Return
 611 |  * 		0 on success, or a negative error in case of failure.
 612 |  *
 613 |  * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
 614 |  * 	Description
 615 |  * 		Recompute the layer 3 (e.g. IP) checksum for the packet
 616 |  * 		associated to *skb*. Computation is incremental, so the helper
 617 |  * 		must know the former value of the header field that was
 618 |  * 		modified (*from*), the new value of this field (*to*), and the
 619 |  * 		number of bytes (2 or 4) for this field, stored in *size*.
 620 |  * 		Alternatively, it is possible to store the difference between
 621 |  * 		the previous and the new values of the header field in *to*, by
 622 |  * 		setting *from* and *size* to 0. For both methods, *offset*
 623 |  * 		indicates the location of the IP checksum within the packet.
 624 |  *
 625 |  * 		This helper works in combination with **bpf_csum_diff**\ (),
 626 |  * 		which does not update the checksum in-place, but offers more
 627 |  * 		flexibility and can handle sizes larger than 2 or 4 for the
 628 |  * 		checksum to update.
 629 |  *
 630 |  * 		A call to this helper is susceptible to change the underlaying
 631 |  * 		packet buffer. Therefore, at load time, all checks on pointers
 632 |  * 		previously done by the verifier are invalidated and must be
 633 |  * 		performed again, if the helper is used in combination with
 634 |  * 		direct packet access.
 635 |  * 	Return
 636 |  * 		0 on success, or a negative error in case of failure.
 637 |  *
 638 |  * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
 639 |  * 	Description
 640 |  * 		Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
 641 |  * 		packet associated to *skb*. Computation is incremental, so the
 642 |  * 		helper must know the former value of the header field that was
 643 |  * 		modified (*from*), the new value of this field (*to*), and the
 644 |  * 		number of bytes (2 or 4) for this field, stored on the lowest
 645 |  * 		four bits of *flags*. Alternatively, it is possible to store
 646 |  * 		the difference between the previous and the new values of the
 647 |  * 		header field in *to*, by setting *from* and the four lowest
 648 |  * 		bits of *flags* to 0. For both methods, *offset* indicates the
 649 |  * 		location of the IP checksum within the packet. In addition to
 650 |  * 		the size of the field, *flags* can be added (bitwise OR) actual
 651 |  * 		flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
 652 |  * 		untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
 653 |  * 		for updates resulting in a null checksum the value is set to
 654 |  * 		**CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
 655 |  * 		the checksum is to be computed against a pseudo-header.
 656 |  *
 657 |  * 		This helper works in combination with **bpf_csum_diff**\ (),
 658 |  * 		which does not update the checksum in-place, but offers more
 659 |  * 		flexibility and can handle sizes larger than 2 or 4 for the
 660 |  * 		checksum to update.
 661 |  *
 662 |  * 		A call to this helper is susceptible to change the underlaying
 663 |  * 		packet buffer. Therefore, at load time, all checks on pointers
 664 |  * 		previously done by the verifier are invalidated and must be
 665 |  * 		performed again, if the helper is used in combination with
 666 |  * 		direct packet access.
 667 |  * 	Return
 668 |  * 		0 on success, or a negative error in case of failure.
 669 |  *
 670 |  * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
 671 |  * 	Description
 672 |  * 		This special helper is used to trigger a "tail call", or in
 673 |  * 		other words, to jump into another eBPF program. The same stack
 674 |  * 		frame is used (but values on stack and in registers for the
 675 |  * 		caller are not accessible to the callee). This mechanism allows
 676 |  * 		for program chaining, either for raising the maximum number of
 677 |  * 		available eBPF instructions, or to execute given programs in
 678 |  * 		conditional blocks. For security reasons, there is an upper
 679 |  * 		limit to the number of successive tail calls that can be
 680 |  * 		performed.
 681 |  *
 682 |  * 		Upon call of this helper, the program attempts to jump into a
 683 |  * 		program referenced at index *index* in *prog_array_map*, a
 684 |  * 		special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
 685 |  * 		*ctx*, a pointer to the context.
 686 |  *
 687 |  * 		If the call succeeds, the kernel immediately runs the first
 688 |  * 		instruction of the new program. This is not a function call,
 689 |  * 		and it never returns to the previous program. If the call
 690 |  * 		fails, then the helper has no effect, and the caller continues
 691 |  * 		to run its subsequent instructions. A call can fail if the
 692 |  * 		destination program for the jump does not exist (i.e. *index*
 693 |  * 		is superior to the number of entries in *prog_array_map*), or
 694 |  * 		if the maximum number of tail calls has been reached for this
 695 |  * 		chain of programs. This limit is defined in the kernel by the
 696 |  * 		macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
 697 |  * 		which is currently set to 32.
 698 |  * 	Return
 699 |  * 		0 on success, or a negative error in case of failure.
 700 |  *
 701 |  * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
 702 |  * 	Description
 703 |  * 		Clone and redirect the packet associated to *skb* to another
 704 |  * 		net device of index *ifindex*. Both ingress and egress
 705 |  * 		interfaces can be used for redirection. The **BPF_F_INGRESS**
 706 |  * 		value in *flags* is used to make the distinction (ingress path
 707 |  * 		is selected if the flag is present, egress path otherwise).
 708 |  * 		This is the only flag supported for now.
 709 |  *
 710 |  * 		In comparison with **bpf_redirect**\ () helper,
 711 |  * 		**bpf_clone_redirect**\ () has the associated cost of
 712 |  * 		duplicating the packet buffer, but this can be executed out of
 713 |  * 		the eBPF program. Conversely, **bpf_redirect**\ () is more
 714 |  * 		efficient, but it is handled through an action code where the
 715 |  * 		redirection happens only after the eBPF program has returned.
 716 |  *
 717 |  * 		A call to this helper is susceptible to change the underlaying
 718 |  * 		packet buffer. Therefore, at load time, all checks on pointers
 719 |  * 		previously done by the verifier are invalidated and must be
 720 |  * 		performed again, if the helper is used in combination with
 721 |  * 		direct packet access.
 722 |  * 	Return
 723 |  * 		0 on success, or a negative error in case of failure.
 724 |  *
 725 |  * u64 bpf_get_current_pid_tgid(void)
 726 |  * 	Return
 727 |  * 		A 64-bit integer containing the current tgid and pid, and
 728 |  * 		created as such:
 729 |  * 		*current_task*\ **->tgid << 32 \|**
 730 |  * 		*current_task*\ **->pid**.
 731 |  *
 732 |  * u64 bpf_get_current_uid_gid(void)
 733 |  * 	Return
 734 |  * 		A 64-bit integer containing the current GID and UID, and
 735 |  * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
 736 |  *
 737 |  * int bpf_get_current_comm(char *buf, u32 size_of_buf)
 738 |  * 	Description
 739 |  * 		Copy the **comm** attribute of the current task into *buf* of
 740 |  * 		*size_of_buf*. The **comm** attribute contains the name of
 741 |  * 		the executable (excluding the path) for the current task. The
 742 |  * 		*size_of_buf* must be strictly positive. On success, the
 743 |  * 		helper makes sure that the *buf* is NUL-terminated. On failure,
 744 |  * 		it is filled with zeroes.
 745 |  * 	Return
 746 |  * 		0 on success, or a negative error in case of failure.
 747 |  *
 748 |  * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
 749 |  * 	Description
 750 |  * 		Retrieve the classid for the current task, i.e. for the net_cls
 751 |  * 		cgroup to which *skb* belongs.
 752 |  *
 753 |  * 		This helper can be used on TC egress path, but not on ingress.
 754 |  *
 755 |  * 		The net_cls cgroup provides an interface to tag network packets
 756 |  * 		based on a user-provided identifier for all traffic coming from
 757 |  * 		the tasks belonging to the related cgroup. See also the related
 758 |  * 		kernel documentation, available from the Linux sources in file
 759 |  * 		*Documentation/cgroup-v1/net_cls.txt*.
 760 |  *
 761 |  * 		The Linux kernel has two versions for cgroups: there are
 762 |  * 		cgroups v1 and cgroups v2. Both are available to users, who can
 763 |  * 		use a mixture of them, but note that the net_cls cgroup is for
 764 |  * 		cgroup v1 only. This makes it incompatible with BPF programs
 765 |  * 		run on cgroups, which is a cgroup-v2-only feature (a socket can
 766 |  * 		only hold data for one version of cgroups at a time).
 767 |  *
 768 |  * 		This helper is only available is the kernel was compiled with
 769 |  * 		the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
 770 |  * 		"**y**" or to "**m**".
 771 |  * 	Return
 772 |  * 		The classid, or 0 for the default unconfigured classid.
 773 |  *
 774 |  * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 775 |  * 	Description
 776 |  * 		Push a *vlan_tci* (VLAN tag control information) of protocol
 777 |  * 		*vlan_proto* to the packet associated to *skb*, then update
 778 |  * 		the checksum. Note that if *vlan_proto* is different from
 779 |  * 		**ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
 780 |  * 		be **ETH_P_8021Q**.
 781 |  *
 782 |  * 		A call to this helper is susceptible to change the underlaying
 783 |  * 		packet buffer. Therefore, at load time, all checks on pointers
 784 |  * 		previously done by the verifier are invalidated and must be
 785 |  * 		performed again, if the helper is used in combination with
 786 |  * 		direct packet access.
 787 |  * 	Return
 788 |  * 		0 on success, or a negative error in case of failure.
 789 |  *
 790 |  * int bpf_skb_vlan_pop(struct sk_buff *skb)
 791 |  * 	Description
 792 |  * 		Pop a VLAN header from the packet associated to *skb*.
 793 |  *
 794 |  * 		A call to this helper is susceptible to change the underlaying
 795 |  * 		packet buffer. Therefore, at load time, all checks on pointers
 796 |  * 		previously done by the verifier are invalidated and must be
 797 |  * 		performed again, if the helper is used in combination with
 798 |  * 		direct packet access.
 799 |  * 	Return
 800 |  * 		0 on success, or a negative error in case of failure.
 801 |  *
 802 |  * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
 803 |  * 	Description
 804 |  * 		Get tunnel metadata. This helper takes a pointer *key* to an
 805 |  * 		empty **struct bpf_tunnel_key** of **size**, that will be
 806 |  * 		filled with tunnel metadata for the packet associated to *skb*.
 807 |  * 		The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
 808 |  * 		indicates that the tunnel is based on IPv6 protocol instead of
 809 |  * 		IPv4.
 810 |  *
 811 |  * 		The **struct bpf_tunnel_key** is an object that generalizes the
 812 |  * 		principal parameters used by various tunneling protocols into a
 813 |  * 		single struct. This way, it can be used to easily make a
 814 |  * 		decision based on the contents of the encapsulation header,
 815 |  * 		"summarized" in this struct. In particular, it holds the IP
 816 |  * 		address of the remote end (IPv4 or IPv6, depending on the case)
 817 |  * 		in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
 818 |  * 		this struct exposes the *key*\ **->tunnel_id**, which is
 819 |  * 		generally mapped to a VNI (Virtual Network Identifier), making
 820 |  * 		it programmable together with the **bpf_skb_set_tunnel_key**\
 821 |  * 		() helper.
 822 |  *
 823 |  * 		Let's imagine that the following code is part of a program
 824 |  * 		attached to the TC ingress interface, on one end of a GRE
 825 |  * 		tunnel, and is supposed to filter out all messages coming from
 826 |  * 		remote ends with IPv4 address other than 10.0.0.1:
 827 |  *
 828 |  * 		::
 829 |  *
 830 |  * 			int ret;
 831 |  * 			struct bpf_tunnel_key key = {};
 832 |  * 			
 833 |  * 			ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 834 |  * 			if (ret < 0)
 835 |  * 				return TC_ACT_SHOT;	// drop packet
 836 |  * 			
 837 |  * 			if (key.remote_ipv4 != 0x0a000001)
 838 |  * 				return TC_ACT_SHOT;	// drop packet
 839 |  * 			
 840 |  * 			return TC_ACT_OK;		// accept packet
 841 |  *
 842 |  * 		This interface can also be used with all encapsulation devices
 843 |  * 		that can operate in "collect metadata" mode: instead of having
 844 |  * 		one network device per specific configuration, the "collect
 845 |  * 		metadata" mode only requires a single device where the
 846 |  * 		configuration can be extracted from this helper.
 847 |  *
 848 |  * 		This can be used together with various tunnels such as VXLan,
 849 |  * 		Geneve, GRE or IP in IP (IPIP).
 850 |  * 	Return
 851 |  * 		0 on success, or a negative error in case of failure.
 852 |  *
 853 |  * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
 854 |  * 	Description
 855 |  * 		Populate tunnel metadata for packet associated to *skb.* The
 856 |  * 		tunnel metadata is set to the contents of *key*, of *size*. The
 857 |  * 		*flags* can be set to a combination of the following values:
 858 |  *
 859 |  * 		**BPF_F_TUNINFO_IPV6**
 860 |  * 			Indicate that the tunnel is based on IPv6 protocol
 861 |  * 			instead of IPv4.
 862 |  * 		**BPF_F_ZERO_CSUM_TX**
 863 |  * 			For IPv4 packets, add a flag to tunnel metadata
 864 |  * 			indicating that checksum computation should be skipped
 865 |  * 			and checksum set to zeroes.
 866 |  * 		**BPF_F_DONT_FRAGMENT**
 867 |  * 			Add a flag to tunnel metadata indicating that the
 868 |  * 			packet should not be fragmented.
 869 |  * 		**BPF_F_SEQ_NUMBER**
 870 |  * 			Add a flag to tunnel metadata indicating that a
 871 |  * 			sequence number should be added to tunnel header before
 872 |  * 			sending the packet. This flag was added for GRE
 873 |  * 			encapsulation, but might be used with other protocols
 874 |  * 			as well in the future.
 875 |  *
 876 |  * 		Here is a typical usage on the transmit path:
 877 |  *
 878 |  * 		::
 879 |  *
 880 |  * 			struct bpf_tunnel_key key;
 881 |  * 			     populate key ...
 882 |  * 			bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
 883 |  * 			bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
 884 |  *
 885 |  * 		See also the description of the **bpf_skb_get_tunnel_key**\ ()
 886 |  * 		helper for additional information.
 887 |  * 	Return
 888 |  * 		0 on success, or a negative error in case of failure.
 889 |  *
 890 |  * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
 891 |  * 	Description
 892 |  * 		Read the value of a perf event counter. This helper relies on a
 893 |  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
 894 |  * 		the perf event counter is selected when *map* is updated with
 895 |  * 		perf event file descriptors. The *map* is an array whose size
 896 |  * 		is the number of available CPUs, and each cell contains a value
 897 |  * 		relative to one CPU. The value to retrieve is indicated by
 898 |  * 		*flags*, that contains the index of the CPU to look up, masked
 899 |  * 		with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
 900 |  * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
 901 |  * 		current CPU should be retrieved.
 902 |  *
 903 |  * 		Note that before Linux 4.13, only hardware perf event can be
 904 |  * 		retrieved.
 905 |  *
 906 |  * 		Also, be aware that the newer helper
 907 |  * 		**bpf_perf_event_read_value**\ () is recommended over
 908 |  * 		**bpf_perf_event_read**\ () in general. The latter has some ABI
 909 |  * 		quirks where error and counter value are used as a return code
 910 |  * 		(which is wrong to do since ranges may overlap). This issue is
 911 |  * 		fixed with **bpf_perf_event_read_value**\ (), which at the same
 912 |  * 		time provides more features over the **bpf_perf_event_read**\
 913 |  * 		() interface. Please refer to the description of
 914 |  * 		**bpf_perf_event_read_value**\ () for details.
 915 |  * 	Return
 916 |  * 		The value of the perf event counter read from the map, or a
 917 |  * 		negative error code in case of failure.
 918 |  *
 919 |  * int bpf_redirect(u32 ifindex, u64 flags)
 920 |  * 	Description
 921 |  * 		Redirect the packet to another net device of index *ifindex*.
 922 |  * 		This helper is somewhat similar to **bpf_clone_redirect**\
 923 |  * 		(), except that the packet is not cloned, which provides
 924 |  * 		increased performance.
 925 |  *
 926 |  * 		Except for XDP, both ingress and egress interfaces can be used
 927 |  * 		for redirection. The **BPF_F_INGRESS** value in *flags* is used
 928 |  * 		to make the distinction (ingress path is selected if the flag
 929 |  * 		is present, egress path otherwise). Currently, XDP only
 930 |  * 		supports redirection to the egress interface, and accepts no
 931 |  * 		flag at all.
 932 |  *
 933 |  * 		The same effect can be attained with the more generic
 934 |  * 		**bpf_redirect_map**\ (), which requires specific maps to be
 935 |  * 		used but offers better performance.
 936 |  * 	Return
 937 |  * 		For XDP, the helper returns **XDP_REDIRECT** on success or
 938 |  * 		**XDP_ABORTED** on error. For other program types, the values
 939 |  * 		are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
 940 |  * 		error.
 941 |  *
 942 |  * u32 bpf_get_route_realm(struct sk_buff *skb)
 943 |  * 	Description
 944 |  * 		Retrieve the realm or the route, that is to say the
 945 |  * 		**tclassid** field of the destination for the *skb*. The
 946 |  * 		indentifier retrieved is a user-provided tag, similar to the
 947 |  * 		one used with the net_cls cgroup (see description for
 948 |  * 		**bpf_get_cgroup_classid**\ () helper), but here this tag is
 949 |  * 		held by a route (a destination entry), not by a task.
 950 |  *
 951 |  * 		Retrieving this identifier works with the clsact TC egress hook
 952 |  * 		(see also **tc-bpf(8)**), or alternatively on conventional
 953 |  * 		classful egress qdiscs, but not on TC ingress path. In case of
 954 |  * 		clsact TC egress hook, this has the advantage that, internally,
 955 |  * 		the destination entry has not been dropped yet in the transmit
 956 |  * 		path. Therefore, the destination entry does not need to be
 957 |  * 		artificially held via **netif_keep_dst**\ () for a classful
 958 |  * 		qdisc until the *skb* is freed.
 959 |  *
 960 |  * 		This helper is available only if the kernel was compiled with
 961 |  * 		**CONFIG_IP_ROUTE_CLASSID** configuration option.
 962 |  * 	Return
 963 |  * 		The realm of the route for the packet associated to *skb*, or 0
 964 |  * 		if none was found.
 965 |  *
 966 |  * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
 967 |  * 	Description
 968 |  * 		Write raw *data* blob into a special BPF perf event held by
 969 |  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
 970 |  * 		event must have the following attributes: **PERF_SAMPLE_RAW**
 971 |  * 		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
 972 |  * 		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
 973 |  *
 974 |  * 		The *flags* are used to indicate the index in *map* for which
 975 |  * 		the value must be put, masked with **BPF_F_INDEX_MASK**.
 976 |  * 		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
 977 |  * 		to indicate that the index of the current CPU core should be
 978 |  * 		used.
 979 |  *
 980 |  * 		The value to write, of *size*, is passed through eBPF stack and
 981 |  * 		pointed by *data*.
 982 |  *
 983 |  * 		The context of the program *ctx* needs also be passed to the
 984 |  * 		helper.
 985 |  *
 986 |  * 		On user space, a program willing to read the values needs to
 987 |  * 		call **perf_event_open**\ () on the perf event (either for
 988 |  * 		one or for all CPUs) and to store the file descriptor into the
 989 |  * 		*map*. This must be done before the eBPF program can send data
 990 |  * 		into it. An example is available in file
 991 |  * 		*samples/bpf/trace_output_user.c* in the Linux kernel source
 992 |  * 		tree (the eBPF program counterpart is in
 993 |  * 		*samples/bpf/trace_output_kern.c*).
 994 |  *
 995 |  * 		**bpf_perf_event_output**\ () achieves better performance
 996 |  * 		than **bpf_trace_printk**\ () for sharing data with user
 997 |  * 		space, and is much better suitable for streaming data from eBPF
 998 |  * 		programs.
 999 |  *
1000 |  * 		Note that this helper is not restricted to tracing use cases
1001 |  * 		and can be used with programs attached to TC or XDP as well,
1002 |  * 		where it allows for passing data to user space listeners. Data
1003 |  * 		can be:
1004 |  *
1005 |  * 		* Only custom structs,
1006 |  * 		* Only the packet payload, or
1007 |  * 		* A combination of both.
1008 |  * 	Return
1009 |  * 		0 on success, or a negative error in case of failure.
1010 |  *
1011 |  * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
1012 |  * 	Description
1013 |  * 		This helper was provided as an easy way to load data from a
1014 |  * 		packet. It can be used to load *len* bytes from *offset* from
1015 |  * 		the packet associated to *skb*, into the buffer pointed by
1016 |  * 		*to*.
1017 |  *
1018 |  * 		Since Linux 4.7, usage of this helper has mostly been replaced
1019 |  * 		by "direct packet access", enabling packet data to be
1020 |  * 		manipulated with *skb*\ **->data** and *skb*\ **->data_end**
1021 |  * 		pointing respectively to the first byte of packet data and to
1022 |  * 		the byte after the last byte of packet data. However, it
1023 |  * 		remains useful if one wishes to read large quantities of data
1024 |  * 		at once from a packet into the eBPF stack.
1025 |  * 	Return
1026 |  * 		0 on success, or a negative error in case of failure.
1027 |  *
1028 |  * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
1029 |  * 	Description
1030 |  * 		Walk a user or a kernel stack and return its id. To achieve
1031 |  * 		this, the helper needs *ctx*, which is a pointer to the context
1032 |  * 		on which the tracing program is executed, and a pointer to a
1033 |  * 		*map* of type **BPF_MAP_TYPE_STACK_TRACE**.
1034 |  *
1035 |  * 		The last argument, *flags*, holds the number of stack frames to
1036 |  * 		skip (from 0 to 255), masked with
1037 |  * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
1038 |  * 		a combination of the following flags:
1039 |  *
1040 |  * 		**BPF_F_USER_STACK**
1041 |  * 			Collect a user space stack instead of a kernel stack.
1042 |  * 		**BPF_F_FAST_STACK_CMP**
1043 |  * 			Compare stacks by hash only.
1044 |  * 		**BPF_F_REUSE_STACKID**
1045 |  * 			If two different stacks hash into the same *stackid*,
1046 |  * 			discard the old one.
1047 |  *
1048 |  * 		The stack id retrieved is a 32 bit long integer handle which
1049 |  * 		can be further combined with other data (including other stack
1050 |  * 		ids) and used as a key into maps. This can be useful for
1051 |  * 		generating a variety of graphs (such as flame graphs or off-cpu
1052 |  * 		graphs).
1053 |  *
1054 |  * 		For walking a stack, this helper is an improvement over
1055 |  * 		**bpf_probe_read**\ (), which can be used with unrolled loops
1056 |  * 		but is not efficient and consumes a lot of eBPF instructions.
1057 |  * 		Instead, **bpf_get_stackid**\ () can collect up to
1058 |  * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
1059 |  * 		this limit can be controlled with the **sysctl** program, and
1060 |  * 		that it should be manually increased in order to profile long
1061 |  * 		user stacks (such as stacks for Java programs). To do so, use:
1062 |  *
1063 |  * 		::
1064 |  *
1065 |  * 			# sysctl kernel.perf_event_max_stack=<new value>
1066 |  * 	Return
1067 |  * 		The positive or null stack id on success, or a negative error
1068 |  * 		in case of failure.
1069 |  *
1070 |  * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
1071 |  * 	Description
1072 |  * 		Compute a checksum difference, from the raw buffer pointed by
1073 |  * 		*from*, of length *from_size* (that must be a multiple of 4),
1074 |  * 		towards the raw buffer pointed by *to*, of size *to_size*
1075 |  * 		(same remark). An optional *seed* can be added to the value
1076 |  * 		(this can be cascaded, the seed may come from a previous call
1077 |  * 		to the helper).
1078 |  *
1079 |  * 		This is flexible enough to be used in several ways:
1080 |  *
1081 |  * 		* With *from_size* == 0, *to_size* > 0 and *seed* set to
1082 |  * 		  checksum, it can be used when pushing new data.
1083 |  * 		* With *from_size* > 0, *to_size* == 0 and *seed* set to
1084 |  * 		  checksum, it can be used when removing data from a packet.
1085 |  * 		* With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
1086 |  * 		  can be used to compute a diff. Note that *from_size* and
1087 |  * 		  *to_size* do not need to be equal.
1088 |  *
1089 |  * 		This helper can be used in combination with
1090 |  * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
1091 |  * 		which one can feed in the difference computed with
1092 |  * 		**bpf_csum_diff**\ ().
1093 |  * 	Return
1094 |  * 		The checksum result, or a negative error code in case of
1095 |  * 		failure.
1096 |  *
1097 |  * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
1098 |  * 	Description
1099 |  * 		Retrieve tunnel options metadata for the packet associated to
1100 |  * 		*skb*, and store the raw tunnel option data to the buffer *opt*
1101 |  * 		of *size*.
1102 |  *
1103 |  * 		This helper can be used with encapsulation devices that can
1104 |  * 		operate in "collect metadata" mode (please refer to the related
1105 |  * 		note in the description of **bpf_skb_get_tunnel_key**\ () for
1106 |  * 		more details). A particular example where this can be used is
1107 |  * 		in combination with the Geneve encapsulation protocol, where it
1108 |  * 		allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
1109 |  * 		and retrieving arbitrary TLVs (Type-Length-Value headers) from
1110 |  * 		the eBPF program. This allows for full customization of these
1111 |  * 		headers.
1112 |  * 	Return
1113 |  * 		The size of the option data retrieved.
1114 |  *
1115 |  * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
1116 |  * 	Description
1117 |  * 		Set tunnel options metadata for the packet associated to *skb*
1118 |  * 		to the option data contained in the raw buffer *opt* of *size*.
1119 |  *
1120 |  * 		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
1121 |  * 		helper for additional information.
1122 |  * 	Return
1123 |  * 		0 on success, or a negative error in case of failure.
1124 |  *
1125 |  * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
1126 |  * 	Description
1127 |  * 		Change the protocol of the *skb* to *proto*. Currently
1128 |  * 		supported are transition from IPv4 to IPv6, and from IPv6 to
1129 |  * 		IPv4. The helper takes care of the groundwork for the
1130 |  * 		transition, including resizing the socket buffer. The eBPF
1131 |  * 		program is expected to fill the new headers, if any, via
1132 |  * 		**skb_store_bytes**\ () and to recompute the checksums with
1133 |  * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
1134 |  * 		(). The main case for this helper is to perform NAT64
1135 |  * 		operations out of an eBPF program.
1136 |  *
1137 |  * 		Internally, the GSO type is marked as dodgy so that headers are
1138 |  * 		checked and segments are recalculated by the GSO/GRO engine.
1139 |  * 		The size for GSO target is adapted as well.
1140 |  *
1141 |  * 		All values for *flags* are reserved for future usage, and must
1142 |  * 		be left at zero.
1143 |  *
1144 |  * 		A call to this helper is susceptible to change the underlaying
1145 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1146 |  * 		previously done by the verifier are invalidated and must be
1147 |  * 		performed again, if the helper is used in combination with
1148 |  * 		direct packet access.
1149 |  * 	Return
1150 |  * 		0 on success, or a negative error in case of failure.
1151 |  *
1152 |  * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
1153 |  * 	Description
1154 |  * 		Change the packet type for the packet associated to *skb*. This
1155 |  * 		comes down to setting *skb*\ **->pkt_type** to *type*, except
1156 |  * 		the eBPF program does not have a write access to *skb*\
1157 |  * 		**->pkt_type** beside this helper. Using a helper here allows
1158 |  * 		for graceful handling of errors.
1159 |  *
1160 |  * 		The major use case is to change incoming *skb*s to
1161 |  * 		**PACKET_HOST** in a programmatic way instead of having to
1162 |  * 		recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
1163 |  * 		example.
1164 |  *
1165 |  * 		Note that *type* only allows certain values. At this time, they
1166 |  * 		are:
1167 |  *
1168 |  * 		**PACKET_HOST**
1169 |  * 			Packet is for us.
1170 |  * 		**PACKET_BROADCAST**
1171 |  * 			Send packet to all.
1172 |  * 		**PACKET_MULTICAST**
1173 |  * 			Send packet to group.
1174 |  * 		**PACKET_OTHERHOST**
1175 |  * 			Send packet to someone else.
1176 |  * 	Return
1177 |  * 		0 on success, or a negative error in case of failure.
1178 |  *
1179 |  * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
1180 |  * 	Description
1181 |  * 		Check whether *skb* is a descendant of the cgroup2 held by
1182 |  * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
1183 |  * 	Return
1184 |  * 		The return value depends on the result of the test, and can be:
1185 |  *
1186 |  * 		* 0, if the *skb* failed the cgroup2 descendant test.
1187 |  * 		* 1, if the *skb* succeeded the cgroup2 descendant test.
1188 |  * 		* A negative error code, if an error occurred.
1189 |  *
1190 |  * u32 bpf_get_hash_recalc(struct sk_buff *skb)
1191 |  * 	Description
1192 |  * 		Retrieve the hash of the packet, *skb*\ **->hash**. If it is
1193 |  * 		not set, in particular if the hash was cleared due to mangling,
1194 |  * 		recompute this hash. Later accesses to the hash can be done
1195 |  * 		directly with *skb*\ **->hash**.
1196 |  *
1197 |  * 		Calling **bpf_set_hash_invalid**\ (), changing a packet
1198 |  * 		prototype with **bpf_skb_change_proto**\ (), or calling
1199 |  * 		**bpf_skb_store_bytes**\ () with the
1200 |  * 		**BPF_F_INVALIDATE_HASH** are actions susceptible to clear
1201 |  * 		the hash and to trigger a new computation for the next call to
1202 |  * 		**bpf_get_hash_recalc**\ ().
1203 |  * 	Return
1204 |  * 		The 32-bit hash.
1205 |  *
1206 |  * u64 bpf_get_current_task(void)
1207 |  * 	Return
1208 |  * 		A pointer to the current task struct.
1209 |  *
1210 |  * int bpf_probe_write_user(void *dst, const void *src, u32 len)
1211 |  * 	Description
1212 |  * 		Attempt in a safe way to write *len* bytes from the buffer
1213 |  * 		*src* to *dst* in memory. It only works for threads that are in
1214 |  * 		user context, and *dst* must be a valid user space address.
1215 |  *
1216 |  * 		This helper should not be used to implement any kind of
1217 |  * 		security mechanism because of TOC-TOU attacks, but rather to
1218 |  * 		debug, divert, and manipulate execution of semi-cooperative
1219 |  * 		processes.
1220 |  *
1221 |  * 		Keep in mind that this feature is meant for experiments, and it
1222 |  * 		has a risk of crashing the system and running programs.
1223 |  * 		Therefore, when an eBPF program using this helper is attached,
1224 |  * 		a warning including PID and process name is printed to kernel
1225 |  * 		logs.
1226 |  * 	Return
1227 |  * 		0 on success, or a negative error in case of failure.
1228 |  *
1229 |  * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
1230 |  * 	Description
1231 |  * 		Check whether the probe is being run is the context of a given
1232 |  * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
1233 |  * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
1234 |  * 	Return
1235 |  * 		The return value depends on the result of the test, and can be:
1236 |  *
1237 |  * 		* 0, if the *skb* task belongs to the cgroup2.
1238 |  * 		* 1, if the *skb* task does not belong to the cgroup2.
1239 |  * 		* A negative error code, if an error occurred.
1240 |  *
1241 |  * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
1242 |  * 	Description
1243 |  * 		Resize (trim or grow) the packet associated to *skb* to the
1244 |  * 		new *len*. The *flags* are reserved for future usage, and must
1245 |  * 		be left at zero.
1246 |  *
1247 |  * 		The basic idea is that the helper performs the needed work to
1248 |  * 		change the size of the packet, then the eBPF program rewrites
1249 |  * 		the rest via helpers like **bpf_skb_store_bytes**\ (),
1250 |  * 		**bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
1251 |  * 		and others. This helper is a slow path utility intended for
1252 |  * 		replies with control messages. And because it is targeted for
1253 |  * 		slow path, the helper itself can afford to be slow: it
1254 |  * 		implicitly linearizes, unclones and drops offloads from the
1255 |  * 		*skb*.
1256 |  *
1257 |  * 		A call to this helper is susceptible to change the underlaying
1258 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1259 |  * 		previously done by the verifier are invalidated and must be
1260 |  * 		performed again, if the helper is used in combination with
1261 |  * 		direct packet access.
1262 |  * 	Return
1263 |  * 		0 on success, or a negative error in case of failure.
1264 |  *
1265 |  * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
1266 |  * 	Description
1267 |  * 		Pull in non-linear data in case the *skb* is non-linear and not
1268 |  * 		all of *len* are part of the linear section. Make *len* bytes
1269 |  * 		from *skb* readable and writable. If a zero value is passed for
1270 |  * 		*len*, then the whole length of the *skb* is pulled.
1271 |  *
1272 |  * 		This helper is only needed for reading and writing with direct
1273 |  * 		packet access.
1274 |  *
1275 |  * 		For direct packet access, testing that offsets to access
1276 |  * 		are within packet boundaries (test on *skb*\ **->data_end**) is
1277 |  * 		susceptible to fail if offsets are invalid, or if the requested
1278 |  * 		data is in non-linear parts of the *skb*. On failure the
1279 |  * 		program can just bail out, or in the case of a non-linear
1280 |  * 		buffer, use a helper to make the data available. The
1281 |  * 		**bpf_skb_load_bytes**\ () helper is a first solution to access
1282 |  * 		the data. Another one consists in using **bpf_skb_pull_data**
1283 |  * 		to pull in once the non-linear parts, then retesting and
1284 |  * 		eventually access the data.
1285 |  *
1286 |  * 		At the same time, this also makes sure the *skb* is uncloned,
1287 |  * 		which is a necessary condition for direct write. As this needs
1288 |  * 		to be an invariant for the write part only, the verifier
1289 |  * 		detects writes and adds a prologue that is calling
1290 |  * 		**bpf_skb_pull_data()** to effectively unclone the *skb* from
1291 |  * 		the very beginning in case it is indeed cloned.
1292 |  *
1293 |  * 		A call to this helper is susceptible to change the underlaying
1294 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1295 |  * 		previously done by the verifier are invalidated and must be
1296 |  * 		performed again, if the helper is used in combination with
1297 |  * 		direct packet access.
1298 |  * 	Return
1299 |  * 		0 on success, or a negative error in case of failure.
1300 |  *
1301 |  * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
1302 |  * 	Description
1303 |  * 		Add the checksum *csum* into *skb*\ **->csum** in case the
1304 |  * 		driver has supplied a checksum for the entire packet into that
1305 |  * 		field. Return an error otherwise. This helper is intended to be
1306 |  * 		used in combination with **bpf_csum_diff**\ (), in particular
1307 |  * 		when the checksum needs to be updated after data has been
1308 |  * 		written into the packet through direct packet access.
1309 |  * 	Return
1310 |  * 		The checksum on success, or a negative error code in case of
1311 |  * 		failure.
1312 |  *
1313 |  * void bpf_set_hash_invalid(struct sk_buff *skb)
1314 |  * 	Description
1315 |  * 		Invalidate the current *skb*\ **->hash**. It can be used after
1316 |  * 		mangling on headers through direct packet access, in order to
1317 |  * 		indicate that the hash is outdated and to trigger a
1318 |  * 		recalculation the next time the kernel tries to access this
1319 |  * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
1320 |  *
1321 |  * int bpf_get_numa_node_id(void)
1322 |  * 	Description
1323 |  * 		Return the id of the current NUMA node. The primary use case
1324 |  * 		for this helper is the selection of sockets for the local NUMA
1325 |  * 		node, when the program is attached to sockets using the
1326 |  * 		**SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
1327 |  * 		but the helper is also available to other eBPF program types,
1328 |  * 		similarly to **bpf_get_smp_processor_id**\ ().
1329 |  * 	Return
1330 |  * 		The id of current NUMA node.
1331 |  *
1332 |  * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
1333 |  * 	Description
1334 |  * 		Grows headroom of packet associated to *skb* and adjusts the
1335 |  * 		offset of the MAC header accordingly, adding *len* bytes of
1336 |  * 		space. It automatically extends and reallocates memory as
1337 |  * 		required.
1338 |  *
1339 |  * 		This helper can be used on a layer 3 *skb* to push a MAC header
1340 |  * 		for redirection into a layer 2 device.
1341 |  *
1342 |  * 		All values for *flags* are reserved for future usage, and must
1343 |  * 		be left at zero.
1344 |  *
1345 |  * 		A call to this helper is susceptible to change the underlaying
1346 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1347 |  * 		previously done by the verifier are invalidated and must be
1348 |  * 		performed again, if the helper is used in combination with
1349 |  * 		direct packet access.
1350 |  * 	Return
1351 |  * 		0 on success, or a negative error in case of failure.
1352 |  *
1353 |  * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
1354 |  * 	Description
1355 |  * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
1356 |  * 		it is possible to use a negative value for *delta*. This helper
1357 |  * 		can be used to prepare the packet for pushing or popping
1358 |  * 		headers.
1359 |  *
1360 |  * 		A call to this helper is susceptible to change the underlaying
1361 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1362 |  * 		previously done by the verifier are invalidated and must be
1363 |  * 		performed again, if the helper is used in combination with
1364 |  * 		direct packet access.
1365 |  * 	Return
1366 |  * 		0 on success, or a negative error in case of failure.
1367 |  *
1368 |  * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
1369 |  * 	Description
1370 |  * 		Copy a NUL terminated string from an unsafe address
1371 |  * 		*unsafe_ptr* to *dst*. The *size* should include the
1372 |  * 		terminating NUL byte. In case the string length is smaller than
1373 |  * 		*size*, the target is not padded with further NUL bytes. If the
1374 |  * 		string length is larger than *size*, just *size*-1 bytes are
1375 |  * 		copied and the last byte is set to NUL.
1376 |  *
1377 |  * 		On success, the length of the copied string is returned. This
1378 |  * 		makes this helper useful in tracing programs for reading
1379 |  * 		strings, and more importantly to get its length at runtime. See
1380 |  * 		the following snippet:
1381 |  *
1382 |  * 		::
1383 |  *
1384 |  * 			SEC("kprobe/sys_open")
1385 |  * 			void bpf_sys_open(struct pt_regs *ctx)
1386 |  * 			{
1387 |  * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
1388 |  * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
1389 |  * 				                             ctx->di);
1390 |  *
1391 |  * 				// Consume buf, for example push it to
1392 |  * 				// userspace via bpf_perf_event_output(); we
1393 |  * 				// can use res (the string length) as event
1394 |  * 				// size, after checking its boundaries.
1395 |  * 			}
1396 |  *
1397 |  * 		In comparison, using **bpf_probe_read()** helper here instead
1398 |  * 		to read the string would require to estimate the length at
1399 |  * 		compile time, and would often result in copying more memory
1400 |  * 		than necessary.
1401 |  *
1402 |  * 		Another useful use case is when parsing individual process
1403 |  * 		arguments or individual environment variables navigating
1404 |  * 		*current*\ **->mm->arg_start** and *current*\
1405 |  * 		**->mm->env_start**: using this helper and the return value,
1406 |  * 		one can quickly iterate at the right offset of the memory area.
1407 |  * 	Return
1408 |  * 		On success, the strictly positive length of the string,
1409 |  * 		including the trailing NUL character. On error, a negative
1410 |  * 		value.
1411 |  *
1412 |  * u64 bpf_get_socket_cookie(struct sk_buff *skb)
1413 |  * 	Description
1414 |  * 		If the **struct sk_buff** pointed by *skb* has a known socket,
1415 |  * 		retrieve the cookie (generated by the kernel) of this socket.
1416 |  * 		If no cookie has been set yet, generate a new cookie. Once
1417 |  * 		generated, the socket cookie remains stable for the life of the
1418 |  * 		socket. This helper can be useful for monitoring per socket
1419 |  * 		networking traffic statistics as it provides a unique socket
1420 |  * 		identifier per namespace.
1421 |  * 	Return
1422 |  * 		A 8-byte long non-decreasing number on success, or 0 if the
1423 |  * 		socket field is missing inside *skb*.
1424 |  *
1425 |  * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
1426 |  * 	Description
1427 |  * 		Equivalent to bpf_get_socket_cookie() helper that accepts
1428 |  * 		*skb*, but gets socket from **struct bpf_sock_addr** context.
1429 |  * 	Return
1430 |  * 		A 8-byte long non-decreasing number.
1431 |  *
1432 |  * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
1433 |  * 	Description
1434 |  * 		Equivalent to bpf_get_socket_cookie() helper that accepts
1435 |  * 		*skb*, but gets socket from **struct bpf_sock_ops** context.
1436 |  * 	Return
1437 |  * 		A 8-byte long non-decreasing number.
1438 |  *
1439 |  * u32 bpf_get_socket_uid(struct sk_buff *skb)
1440 |  * 	Return
1441 |  * 		The owner UID of the socket associated to *skb*. If the socket
1442 |  * 		is **NULL**, or if it is not a full socket (i.e. if it is a
1443 |  * 		time-wait or a request socket instead), **overflowuid** value
1444 |  * 		is returned (note that **overflowuid** might also be the actual
1445 |  * 		UID value for the socket).
1446 |  *
1447 |  * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
1448 |  * 	Description
1449 |  * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
1450 |  * 		to value *hash*.
1451 |  * 	Return
1452 |  * 		0
1453 |  *
1454 |  * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
1455 |  * 	Description
1456 |  * 		Emulate a call to **setsockopt()** on the socket associated to
1457 |  * 		*bpf_socket*, which must be a full socket. The *level* at
1458 |  * 		which the option resides and the name *optname* of the option
1459 |  * 		must be specified, see **setsockopt(2)** for more information.
1460 |  * 		The option value of length *optlen* is pointed by *optval*.
1461 |  *
1462 |  * 		This helper actually implements a subset of **setsockopt()**.
1463 |  * 		It supports the following *level*\ s:
1464 |  *
1465 |  * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
1466 |  * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
1467 |  * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
1468 |  * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
1469 |  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
1470 |  * 		  **TCP_BPF_SNDCWND_CLAMP**.
1471 |  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
1472 |  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
1473 |  * 	Return
1474 |  * 		0 on success, or a negative error in case of failure.
1475 |  *
1476 |  * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
1477 |  * 	Description
1478 |  * 		Grow or shrink the room for data in the packet associated to
1479 |  * 		*skb* by *len_diff*, and according to the selected *mode*.
1480 |  *
1481 |  *		There are two supported modes at this time:
1482 |  *
1483 |  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
1484 |  *		  (room space is added or removed below the layer 2 header).
1485 |  *
1486 |  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
1487 |  * 		  (room space is added or removed below the layer 3 header).
1488 |  *
1489 |  *		The following flags are supported at this time:
1490 |  *
1491 |  *		* **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
1492 |  *		  Adjusting mss in this way is not allowed for datagrams.
1493 |  *
1494 |  *		* **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
1495 |  *		* **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
1496 |  *		  Any new space is reserved to hold a tunnel header.
1497 |  *		  Configure skb offsets and other fields accordingly.
1498 |  *
1499 |  *		* **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
1500 |  *		* **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
1501 |  *		  Use with ENCAP_L3 flags to further specify the tunnel type.
1502 |  *
1503 |  * 		A call to this helper is susceptible to change the underlaying
1504 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1505 |  * 		previously done by the verifier are invalidated and must be
1506 |  * 		performed again, if the helper is used in combination with
1507 |  * 		direct packet access.
1508 |  * 	Return
1509 |  * 		0 on success, or a negative error in case of failure.
1510 |  *
1511 |  * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
1512 |  * 	Description
1513 |  * 		Redirect the packet to the endpoint referenced by *map* at
1514 |  * 		index *key*. Depending on its type, this *map* can contain
1515 |  * 		references to net devices (for forwarding packets through other
1516 |  * 		ports), or to CPUs (for redirecting XDP frames to another CPU;
1517 |  * 		but this is only implemented for native XDP (with driver
1518 |  * 		support) as of this writing).
1519 |  *
1520 |  * 		All values for *flags* are reserved for future usage, and must
1521 |  * 		be left at zero.
1522 |  *
1523 |  * 		When used to redirect packets to net devices, this helper
1524 |  * 		provides a high performance increase over **bpf_redirect**\ ().
1525 |  * 		This is due to various implementation details of the underlying
1526 |  * 		mechanisms, one of which is the fact that **bpf_redirect_map**\
1527 |  * 		() tries to send packet as a "bulk" to the device.
1528 |  * 	Return
1529 |  * 		**XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
1530 |  *
1531 |  * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
1532 |  * 	Description
1533 |  * 		Redirect the packet to the socket referenced by *map* (of type
1534 |  * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
1535 |  * 		egress interfaces can be used for redirection. The
1536 |  * 		**BPF_F_INGRESS** value in *flags* is used to make the
1537 |  * 		distinction (ingress path is selected if the flag is present,
1538 |  * 		egress path otherwise). This is the only flag supported for now.
1539 |  * 	Return
1540 |  * 		**SK_PASS** on success, or **SK_DROP** on error.
1541 |  *
1542 |  * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
1543 |  * 	Description
1544 |  * 		Add an entry to, or update a *map* referencing sockets. The
1545 |  * 		*skops* is used as a new value for the entry associated to
1546 |  * 		*key*. *flags* is one of:
1547 |  *
1548 |  * 		**BPF_NOEXIST**
1549 |  * 			The entry for *key* must not exist in the map.
1550 |  * 		**BPF_EXIST**
1551 |  * 			The entry for *key* must already exist in the map.
1552 |  * 		**BPF_ANY**
1553 |  * 			No condition on the existence of the entry for *key*.
1554 |  *
1555 |  * 		If the *map* has eBPF programs (parser and verdict), those will
1556 |  * 		be inherited by the socket being added. If the socket is
1557 |  * 		already attached to eBPF programs, this results in an error.
1558 |  * 	Return
1559 |  * 		0 on success, or a negative error in case of failure.
1560 |  *
1561 |  * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
1562 |  * 	Description
1563 |  * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
1564 |  * 		*delta* (which can be positive or negative). Note that this
1565 |  * 		operation modifies the address stored in *xdp_md*\ **->data**,
1566 |  * 		so the latter must be loaded only after the helper has been
1567 |  * 		called.
1568 |  *
1569 |  * 		The use of *xdp_md*\ **->data_meta** is optional and programs
1570 |  * 		are not required to use it. The rationale is that when the
1571 |  * 		packet is processed with XDP (e.g. as DoS filter), it is
1572 |  * 		possible to push further meta data along with it before passing
1573 |  * 		to the stack, and to give the guarantee that an ingress eBPF
1574 |  * 		program attached as a TC classifier on the same device can pick
1575 |  * 		this up for further post-processing. Since TC works with socket
1576 |  * 		buffers, it remains possible to set from XDP the **mark** or
1577 |  * 		**priority** pointers, or other pointers for the socket buffer.
1578 |  * 		Having this scratch space generic and programmable allows for
1579 |  * 		more flexibility as the user is free to store whatever meta
1580 |  * 		data they need.
1581 |  *
1582 |  * 		A call to this helper is susceptible to change the underlaying
1583 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1584 |  * 		previously done by the verifier are invalidated and must be
1585 |  * 		performed again, if the helper is used in combination with
1586 |  * 		direct packet access.
1587 |  * 	Return
1588 |  * 		0 on success, or a negative error in case of failure.
1589 |  *
1590 |  * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
1591 |  * 	Description
1592 |  * 		Read the value of a perf event counter, and store it into *buf*
1593 |  * 		of size *buf_size*. This helper relies on a *map* of type
1594 |  * 		**BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
1595 |  * 		counter is selected when *map* is updated with perf event file
1596 |  * 		descriptors. The *map* is an array whose size is the number of
1597 |  * 		available CPUs, and each cell contains a value relative to one
1598 |  * 		CPU. The value to retrieve is indicated by *flags*, that
1599 |  * 		contains the index of the CPU to look up, masked with
1600 |  * 		**BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
1601 |  * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
1602 |  * 		current CPU should be retrieved.
1603 |  *
1604 |  * 		This helper behaves in a way close to
1605 |  * 		**bpf_perf_event_read**\ () helper, save that instead of
1606 |  * 		just returning the value observed, it fills the *buf*
1607 |  * 		structure. This allows for additional data to be retrieved: in
1608 |  * 		particular, the enabled and running times (in *buf*\
1609 |  * 		**->enabled** and *buf*\ **->running**, respectively) are
1610 |  * 		copied. In general, **bpf_perf_event_read_value**\ () is
1611 |  * 		recommended over **bpf_perf_event_read**\ (), which has some
1612 |  * 		ABI issues and provides fewer functionalities.
1613 |  *
1614 |  * 		These values are interesting, because hardware PMU (Performance
1615 |  * 		Monitoring Unit) counters are limited resources. When there are
1616 |  * 		more PMU based perf events opened than available counters,
1617 |  * 		kernel will multiplex these events so each event gets certain
1618 |  * 		percentage (but not all) of the PMU time. In case that
1619 |  * 		multiplexing happens, the number of samples or counter value
1620 |  * 		will not reflect the case compared to when no multiplexing
1621 |  * 		occurs. This makes comparison between different runs difficult.
1622 |  * 		Typically, the counter value should be normalized before
1623 |  * 		comparing to other experiments. The usual normalization is done
1624 |  * 		as follows.
1625 |  *
1626 |  * 		::
1627 |  *
1628 |  * 			normalized_counter = counter * t_enabled / t_running
1629 |  *
1630 |  * 		Where t_enabled is the time enabled for event and t_running is
1631 |  * 		the time running for event since last normalization. The
1632 |  * 		enabled and running times are accumulated since the perf event
1633 |  * 		open. To achieve scaling factor between two invocations of an
1634 |  * 		eBPF program, users can can use CPU id as the key (which is
1635 |  * 		typical for perf array usage model) to remember the previous
1636 |  * 		value and do the calculation inside the eBPF program.
1637 |  * 	Return
1638 |  * 		0 on success, or a negative error in case of failure.
1639 |  *
1640 |  * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
1641 |  * 	Description
1642 |  * 		For en eBPF program attached to a perf event, retrieve the
1643 |  * 		value of the event counter associated to *ctx* and store it in
1644 |  * 		the structure pointed by *buf* and of size *buf_size*. Enabled
1645 |  * 		and running times are also stored in the structure (see
1646 |  * 		description of helper **bpf_perf_event_read_value**\ () for
1647 |  * 		more details).
1648 |  * 	Return
1649 |  * 		0 on success, or a negative error in case of failure.
1650 |  *
1651 |  * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
1652 |  * 	Description
1653 |  * 		Emulate a call to **getsockopt()** on the socket associated to
1654 |  * 		*bpf_socket*, which must be a full socket. The *level* at
1655 |  * 		which the option resides and the name *optname* of the option
1656 |  * 		must be specified, see **getsockopt(2)** for more information.
1657 |  * 		The retrieved value is stored in the structure pointed by
1658 |  * 		*opval* and of length *optlen*.
1659 |  *
1660 |  * 		This helper actually implements a subset of **getsockopt()**.
1661 |  * 		It supports the following *level*\ s:
1662 |  *
1663 |  * 		* **IPPROTO_TCP**, which supports *optname*
1664 |  * 		  **TCP_CONGESTION**.
1665 |  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
1666 |  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
1667 |  * 	Return
1668 |  * 		0 on success, or a negative error in case of failure.
1669 |  *
1670 |  * int bpf_override_return(struct pt_reg *regs, u64 rc)
1671 |  * 	Description
1672 |  * 		Used for error injection, this helper uses kprobes to override
1673 |  * 		the return value of the probed function, and to set it to *rc*.
1674 |  * 		The first argument is the context *regs* on which the kprobe
1675 |  * 		works.
1676 |  *
1677 |  * 		This helper works by setting setting the PC (program counter)
1678 |  * 		to an override function which is run in place of the original
1679 |  * 		probed function. This means the probed function is not run at
1680 |  * 		all. The replacement function just returns with the required
1681 |  * 		value.
1682 |  *
1683 |  * 		This helper has security implications, and thus is subject to
1684 |  * 		restrictions. It is only available if the kernel was compiled
1685 |  * 		with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
1686 |  * 		option, and in this case it only works on functions tagged with
1687 |  * 		**ALLOW_ERROR_INJECTION** in the kernel code.
1688 |  *
1689 |  * 		Also, the helper is only available for the architectures having
1690 |  * 		the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
1691 |  * 		x86 architecture is the only one to support this feature.
1692 |  * 	Return
1693 |  * 		0
1694 |  *
1695 |  * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
1696 |  * 	Description
1697 |  * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
1698 |  * 		for the full TCP socket associated to *bpf_sock_ops* to
1699 |  * 		*argval*.
1700 |  *
1701 |  * 		The primary use of this field is to determine if there should
1702 |  * 		be calls to eBPF programs of type
1703 |  * 		**BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
1704 |  * 		code. A program of the same type can change its value, per
1705 |  * 		connection and as necessary, when the connection is
1706 |  * 		established. This field is directly accessible for reading, but
1707 |  * 		this helper must be used for updates in order to return an
1708 |  * 		error if an eBPF program tries to set a callback that is not
1709 |  * 		supported in the current kernel.
1710 |  *
1711 |  * 		The supported callback values that *argval* can combine are:
1712 |  *
1713 |  * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
1714 |  * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
1715 |  * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
1716 |  *
1717 |  * 		Here are some examples of where one could call such eBPF
1718 |  * 		program:
1719 |  *
1720 |  * 		* When RTO fires.
1721 |  * 		* When a packet is retransmitted.
1722 |  * 		* When the connection terminates.
1723 |  * 		* When a packet is sent.
1724 |  * 		* When a packet is received.
1725 |  * 	Return
1726 |  * 		Code **-EINVAL** if the socket is not a full TCP socket;
1727 |  * 		otherwise, a positive number containing the bits that could not
1728 |  * 		be set is returned (which comes down to 0 if all bits were set
1729 |  * 		as required).
1730 |  *
1731 |  * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
1732 |  * 	Description
1733 |  * 		This helper is used in programs implementing policies at the
1734 |  * 		socket level. If the message *msg* is allowed to pass (i.e. if
1735 |  * 		the verdict eBPF program returns **SK_PASS**), redirect it to
1736 |  * 		the socket referenced by *map* (of type
1737 |  * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
1738 |  * 		egress interfaces can be used for redirection. The
1739 |  * 		**BPF_F_INGRESS** value in *flags* is used to make the
1740 |  * 		distinction (ingress path is selected if the flag is present,
1741 |  * 		egress path otherwise). This is the only flag supported for now.
1742 |  * 	Return
1743 |  * 		**SK_PASS** on success, or **SK_DROP** on error.
1744 |  *
1745 |  * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
1746 |  * 	Description
1747 |  * 		For socket policies, apply the verdict of the eBPF program to
1748 |  * 		the next *bytes* (number of bytes) of message *msg*.
1749 |  *
1750 |  * 		For example, this helper can be used in the following cases:
1751 |  *
1752 |  * 		* A single **sendmsg**\ () or **sendfile**\ () system call
1753 |  * 		  contains multiple logical messages that the eBPF program is
1754 |  * 		  supposed to read and for which it should apply a verdict.
1755 |  * 		* An eBPF program only cares to read the first *bytes* of a
1756 |  * 		  *msg*. If the message has a large payload, then setting up
1757 |  * 		  and calling the eBPF program repeatedly for all bytes, even
1758 |  * 		  though the verdict is already known, would create unnecessary
1759 |  * 		  overhead.
1760 |  *
1761 |  * 		When called from within an eBPF program, the helper sets a
1762 |  * 		counter internal to the BPF infrastructure, that is used to
1763 |  * 		apply the last verdict to the next *bytes*. If *bytes* is
1764 |  * 		smaller than the current data being processed from a
1765 |  * 		**sendmsg**\ () or **sendfile**\ () system call, the first
1766 |  * 		*bytes* will be sent and the eBPF program will be re-run with
1767 |  * 		the pointer for start of data pointing to byte number *bytes*
1768 |  * 		**+ 1**. If *bytes* is larger than the current data being
1769 |  * 		processed, then the eBPF verdict will be applied to multiple
1770 |  * 		**sendmsg**\ () or **sendfile**\ () calls until *bytes* are
1771 |  * 		consumed.
1772 |  *
1773 |  * 		Note that if a socket closes with the internal counter holding
1774 |  * 		a non-zero value, this is not a problem because data is not
1775 |  * 		being buffered for *bytes* and is sent as it is received.
1776 |  * 	Return
1777 |  * 		0
1778 |  *
1779 |  * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
1780 |  * 	Description
1781 |  * 		For socket policies, prevent the execution of the verdict eBPF
1782 |  * 		program for message *msg* until *bytes* (byte number) have been
1783 |  * 		accumulated.
1784 |  *
1785 |  * 		This can be used when one needs a specific number of bytes
1786 |  * 		before a verdict can be assigned, even if the data spans
1787 |  * 		multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
1788 |  * 		case would be a user calling **sendmsg**\ () repeatedly with
1789 |  * 		1-byte long message segments. Obviously, this is bad for
1790 |  * 		performance, but it is still valid. If the eBPF program needs
1791 |  * 		*bytes* bytes to validate a header, this helper can be used to
1792 |  * 		prevent the eBPF program to be called again until *bytes* have
1793 |  * 		been accumulated.
1794 |  * 	Return
1795 |  * 		0
1796 |  *
1797 |  * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
1798 |  * 	Description
1799 |  * 		For socket policies, pull in non-linear data from user space
1800 |  * 		for *msg* and set pointers *msg*\ **->data** and *msg*\
1801 |  * 		**->data_end** to *start* and *end* bytes offsets into *msg*,
1802 |  * 		respectively.
1803 |  *
1804 |  * 		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
1805 |  * 		*msg* it can only parse data that the (**data**, **data_end**)
1806 |  * 		pointers have already consumed. For **sendmsg**\ () hooks this
1807 |  * 		is likely the first scatterlist element. But for calls relying
1808 |  * 		on the **sendpage** handler (e.g. **sendfile**\ ()) this will
1809 |  * 		be the range (**0**, **0**) because the data is shared with
1810 |  * 		user space and by default the objective is to avoid allowing
1811 |  * 		user space to modify data while (or after) eBPF verdict is
1812 |  * 		being decided. This helper can be used to pull in data and to
1813 |  * 		set the start and end pointer to given values. Data will be
1814 |  * 		copied if necessary (i.e. if data was not linear and if start
1815 |  * 		and end pointers do not point to the same chunk).
1816 |  *
1817 |  * 		A call to this helper is susceptible to change the underlaying
1818 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1819 |  * 		previously done by the verifier are invalidated and must be
1820 |  * 		performed again, if the helper is used in combination with
1821 |  * 		direct packet access.
1822 |  *
1823 |  * 		All values for *flags* are reserved for future usage, and must
1824 |  * 		be left at zero.
1825 |  * 	Return
1826 |  * 		0 on success, or a negative error in case of failure.
1827 |  *
1828 |  * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
1829 |  * 	Description
1830 |  * 		Bind the socket associated to *ctx* to the address pointed by
1831 |  * 		*addr*, of length *addr_len*. This allows for making outgoing
1832 |  * 		connection from the desired IP address, which can be useful for
1833 |  * 		example when all processes inside a cgroup should use one
1834 |  * 		single IP address on a host that has multiple IP configured.
1835 |  *
1836 |  * 		This helper works for IPv4 and IPv6, TCP and UDP sockets. The
1837 |  * 		domain (*addr*\ **->sa_family**) must be **AF_INET** (or
1838 |  * 		**AF_INET6**). Looking for a free port to bind to can be
1839 |  * 		expensive, therefore binding to port is not permitted by the
1840 |  * 		helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
1841 |  * 		must be set to zero.
1842 |  * 	Return
1843 |  * 		0 on success, or a negative error in case of failure.
1844 |  *
1845 |  * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
1846 |  * 	Description
1847 |  * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
1848 |  * 		only possible to shrink the packet as of this writing,
1849 |  * 		therefore *delta* must be a negative integer.
1850 |  *
1851 |  * 		A call to this helper is susceptible to change the underlaying
1852 |  * 		packet buffer. Therefore, at load time, all checks on pointers
1853 |  * 		previously done by the verifier are invalidated and must be
1854 |  * 		performed again, if the helper is used in combination with
1855 |  * 		direct packet access.
1856 |  * 	Return
1857 |  * 		0 on success, or a negative error in case of failure.
1858 |  *
1859 |  * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
1860 |  * 	Description
1861 |  * 		Retrieve the XFRM state (IP transform framework, see also
1862 |  * 		**ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
1863 |  *
1864 |  * 		The retrieved value is stored in the **struct bpf_xfrm_state**
1865 |  * 		pointed by *xfrm_state* and of length *size*.
1866 |  *
1867 |  * 		All values for *flags* are reserved for future usage, and must
1868 |  * 		be left at zero.
1869 |  *
1870 |  * 		This helper is available only if the kernel was compiled with
1871 |  * 		**CONFIG_XFRM** configuration option.
1872 |  * 	Return
1873 |  * 		0 on success, or a negative error in case of failure.
1874 |  *
1875 |  * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
1876 |  * 	Description
1877 |  * 		Return a user or a kernel stack in bpf program provided buffer.
1878 |  * 		To achieve this, the helper needs *ctx*, which is a pointer
1879 |  * 		to the context on which the tracing program is executed.
1880 |  * 		To store the stacktrace, the bpf program provides *buf* with
1881 |  * 		a nonnegative *size*.
1882 |  *
1883 |  * 		The last argument, *flags*, holds the number of stack frames to
1884 |  * 		skip (from 0 to 255), masked with
1885 |  * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
1886 |  * 		the following flags:
1887 |  *
1888 |  * 		**BPF_F_USER_STACK**
1889 |  * 			Collect a user space stack instead of a kernel stack.
1890 |  * 		**BPF_F_USER_BUILD_ID**
1891 |  * 			Collect buildid+offset instead of ips for user stack,
1892 |  * 			only valid if **BPF_F_USER_STACK** is also specified.
1893 |  *
1894 |  * 		**bpf_get_stack**\ () can collect up to
1895 |  * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
1896 |  * 		to sufficient large buffer size. Note that
1897 |  * 		this limit can be controlled with the **sysctl** program, and
1898 |  * 		that it should be manually increased in order to profile long
1899 |  * 		user stacks (such as stacks for Java programs). To do so, use:
1900 |  *
1901 |  * 		::
1902 |  *
1903 |  * 			# sysctl kernel.perf_event_max_stack=<new value>
1904 |  * 	Return
1905 |  * 		A non-negative value equal to or less than *size* on success,
1906 |  * 		or a negative error in case of failure.
1907 |  *
1908 |  * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
1909 |  * 	Description
1910 |  * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
1911 |  * 		it provides an easy way to load *len* bytes from *offset*
1912 |  * 		from the packet associated to *skb*, into the buffer pointed
1913 |  * 		by *to*. The difference to **bpf_skb_load_bytes**\ () is that
1914 |  * 		a fifth argument *start_header* exists in order to select a
1915 |  * 		base offset to start from. *start_header* can be one of:
1916 |  *
1917 |  * 		**BPF_HDR_START_MAC**
1918 |  * 			Base offset to load data from is *skb*'s mac header.
1919 |  * 		**BPF_HDR_START_NET**
1920 |  * 			Base offset to load data from is *skb*'s network header.
1921 |  *
1922 |  * 		In general, "direct packet access" is the preferred method to
1923 |  * 		access packet data, however, this helper is in particular useful
1924 |  * 		in socket filters where *skb*\ **->data** does not always point
1925 |  * 		to the start of the mac header and where "direct packet access"
1926 |  * 		is not available.
1927 |  * 	Return
1928 |  * 		0 on success, or a negative error in case of failure.
1929 |  *
1930 |  * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
1931 |  *	Description
1932 |  *		Do FIB lookup in kernel tables using parameters in *params*.
1933 |  *		If lookup is successful and result shows packet is to be
1934 |  *		forwarded, the neighbor tables are searched for the nexthop.
1935 |  *		If successful (ie., FIB lookup shows forwarding and nexthop
1936 |  *		is resolved), the nexthop address is returned in ipv4_dst
1937 |  *		or ipv6_dst based on family, smac is set to mac address of
1938 |  *		egress device, dmac is set to nexthop mac address, rt_metric
1939 |  *		is set to metric from route (IPv4/IPv6 only), and ifindex
1940 |  *		is set to the device index of the nexthop from the FIB lookup.
1941 |  *
1942 |  *		*plen* argument is the size of the passed in struct.
1943 |  *		*flags* argument can be a combination of one or more of the
1944 |  *		following values:
1945 |  *
1946 |  *		**BPF_FIB_LOOKUP_DIRECT**
1947 |  *			Do a direct table lookup vs full lookup using FIB
1948 |  *			rules.
1949 |  *		**BPF_FIB_LOOKUP_OUTPUT**
1950 |  *			Perform lookup from an egress perspective (default is
1951 |  *			ingress).
1952 |  *
1953 |  *		*ctx* is either **struct xdp_md** for XDP programs or
1954 |  *		**struct sk_buff** tc cls_act programs.
1955 |  *	Return
1956 |  *		* < 0 if any input argument is invalid
1957 |  *		*   0 on success (packet is forwarded, nexthop neighbor exists)
1958 |  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
1959 |  *		  packet is not forwarded or needs assist from full stack
1960 |  *
1961 |  * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
1962 |  *	Description
1963 |  *		Add an entry to, or update a sockhash *map* referencing sockets.
1964 |  *		The *skops* is used as a new value for the entry associated to
1965 |  *		*key*. *flags* is one of:
1966 |  *
1967 |  *		**BPF_NOEXIST**
1968 |  *			The entry for *key* must not exist in the map.
1969 |  *		**BPF_EXIST**
1970 |  *			The entry for *key* must already exist in the map.
1971 |  *		**BPF_ANY**
1972 |  *			No condition on the existence of the entry for *key*.
1973 |  *
1974 |  *		If the *map* has eBPF programs (parser and verdict), those will
1975 |  *		be inherited by the socket being added. If the socket is
1976 |  *		already attached to eBPF programs, this results in an error.
1977 |  *	Return
1978 |  *		0 on success, or a negative error in case of failure.
1979 |  *
1980 |  * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
1981 |  *	Description
1982 |  *		This helper is used in programs implementing policies at the
1983 |  *		socket level. If the message *msg* is allowed to pass (i.e. if
1984 |  *		the verdict eBPF program returns **SK_PASS**), redirect it to
1985 |  *		the socket referenced by *map* (of type
1986 |  *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1987 |  *		egress interfaces can be used for redirection. The
1988 |  *		**BPF_F_INGRESS** value in *flags* is used to make the
1989 |  *		distinction (ingress path is selected if the flag is present,
1990 |  *		egress path otherwise). This is the only flag supported for now.
1991 |  *	Return
1992 |  *		**SK_PASS** on success, or **SK_DROP** on error.
1993 |  *
1994 |  * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
1995 |  *	Description
1996 |  *		This helper is used in programs implementing policies at the
1997 |  *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
1998 |  *		if the verdeict eBPF program returns **SK_PASS**), redirect it
1999 |  *		to the socket referenced by *map* (of type
2000 |  *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
2001 |  *		egress interfaces can be used for redirection. The
2002 |  *		**BPF_F_INGRESS** value in *flags* is used to make the
2003 |  *		distinction (ingress path is selected if the flag is present,
2004 |  *		egress otherwise). This is the only flag supported for now.
2005 |  *	Return
2006 |  *		**SK_PASS** on success, or **SK_DROP** on error.
2007 |  *
2008 |  * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
2009 |  *	Description
2010 |  *		Encapsulate the packet associated to *skb* within a Layer 3
2011 |  *		protocol header. This header is provided in the buffer at
2012 |  *		address *hdr*, with *len* its size in bytes. *type* indicates
2013 |  *		the protocol of the header and can be one of:
2014 |  *
2015 |  *		**BPF_LWT_ENCAP_SEG6**
2016 |  *			IPv6 encapsulation with Segment Routing Header
2017 |  *			(**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
2018 |  *			the IPv6 header is computed by the kernel.
2019 |  *		**BPF_LWT_ENCAP_SEG6_INLINE**
2020 |  *			Only works if *skb* contains an IPv6 packet. Insert a
2021 |  *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
2022 |  *			the IPv6 header.
2023 |  *		**BPF_LWT_ENCAP_IP**
2024 |  *			IP encapsulation (GRE/GUE/IPIP/etc). The outer header
2025 |  *			must be IPv4 or IPv6, followed by zero or more
2026 |  *			additional headers, up to LWT_BPF_MAX_HEADROOM total
2027 |  *			bytes in all prepended headers. Please note that
2028 |  *			if skb_is_gso(skb) is true, no more than two headers
2029 |  *			can be prepended, and the inner header, if present,
2030 |  *			should be either GRE or UDP/GUE.
2031 |  *
2032 |  *		BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
2033 |  *		type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
2034 |  *		by bpf programs of types BPF_PROG_TYPE_LWT_IN and
2035 |  *		BPF_PROG_TYPE_LWT_XMIT.
2036 |  *
2037 |  * 		A call to this helper is susceptible to change the underlaying
2038 |  * 		packet buffer. Therefore, at load time, all checks on pointers
2039 |  * 		previously done by the verifier are invalidated and must be
2040 |  * 		performed again, if the helper is used in combination with
2041 |  * 		direct packet access.
2042 |  *	Return
2043 |  * 		0 on success, or a negative error in case of failure.
2044 |  *
2045 |  * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
2046 |  *	Description
2047 |  *		Store *len* bytes from address *from* into the packet
2048 |  *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
2049 |  *		inside the outermost IPv6 Segment Routing Header can be
2050 |  *		modified through this helper.
2051 |  *
2052 |  * 		A call to this helper is susceptible to change the underlaying
2053 |  * 		packet buffer. Therefore, at load time, all checks on pointers
2054 |  * 		previously done by the verifier are invalidated and must be
2055 |  * 		performed again, if the helper is used in combination with
2056 |  * 		direct packet access.
2057 |  *	Return
2058 |  * 		0 on success, or a negative error in case of failure.
2059 |  *
2060 |  * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
2061 |  *	Description
2062 |  *		Adjust the size allocated to TLVs in the outermost IPv6
2063 |  *		Segment Routing Header contained in the packet associated to
2064 |  *		*skb*, at position *offset* by *delta* bytes. Only offsets
2065 |  *		after the segments are accepted. *delta* can be as well
2066 |  *		positive (growing) as negative (shrinking).
2067 |  *
2068 |  * 		A call to this helper is susceptible to change the underlaying
2069 |  * 		packet buffer. Therefore, at load time, all checks on pointers
2070 |  * 		previously done by the verifier are invalidated and must be
2071 |  * 		performed again, if the helper is used in combination with
2072 |  * 		direct packet access.
2073 |  *	Return
2074 |  * 		0 on success, or a negative error in case of failure.
2075 |  *
2076 |  * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
2077 |  *	Description
2078 |  *		Apply an IPv6 Segment Routing action of type *action* to the
2079 |  *		packet associated to *skb*. Each action takes a parameter
2080 |  *		contained at address *param*, and of length *param_len* bytes.
2081 |  *		*action* can be one of:
2082 |  *
2083 |  *		**SEG6_LOCAL_ACTION_END_X**
2084 |  *			End.X action: Endpoint with Layer-3 cross-connect.
2085 |  *			Type of *param*: **struct in6_addr**.
2086 |  *		**SEG6_LOCAL_ACTION_END_T**
2087 |  *			End.T action: Endpoint with specific IPv6 table lookup.
2088 |  *			Type of *param*: **int**.
2089 |  *		**SEG6_LOCAL_ACTION_END_B6**
2090 |  *			End.B6 action: Endpoint bound to an SRv6 policy.
2091 |  *			Type of param: **struct ipv6_sr_hdr**.
2092 |  *		**SEG6_LOCAL_ACTION_END_B6_ENCAP**
2093 |  *			End.B6.Encap action: Endpoint bound to an SRv6
2094 |  *			encapsulation policy.
2095 |  *			Type of param: **struct ipv6_sr_hdr**.
2096 |  *
2097 |  * 		A call to this helper is susceptible to change the underlaying
2098 |  * 		packet buffer. Therefore, at load time, all checks on pointers
2099 |  * 		previously done by the verifier are invalidated and must be
2100 |  * 		performed again, if the helper is used in combination with
2101 |  * 		direct packet access.
2102 |  *	Return
2103 |  * 		0 on success, or a negative error in case of failure.
2104 |  *
2105 |  * int bpf_rc_repeat(void *ctx)
2106 |  *	Description
2107 |  *		This helper is used in programs implementing IR decoding, to
2108 |  *		report a successfully decoded repeat key message. This delays
2109 |  *		the generation of a key up event for previously generated
2110 |  *		key down event.
2111 |  *
2112 |  *		Some IR protocols like NEC have a special IR message for
2113 |  *		repeating last button, for when a button is held down.
2114 |  *
2115 |  *		The *ctx* should point to the lirc sample as passed into
2116 |  *		the program.
2117 |  *
2118 |  *		This helper is only available is the kernel was compiled with
2119 |  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2120 |  *		"**y**".
2121 |  *	Return
2122 |  *		0
2123 |  *
2124 |  * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
2125 |  *	Description
2126 |  *		This helper is used in programs implementing IR decoding, to
2127 |  *		report a successfully decoded key press with *scancode*,
2128 |  *		*toggle* value in the given *protocol*. The scancode will be
2129 |  *		translated to a keycode using the rc keymap, and reported as
2130 |  *		an input key down event. After a period a key up event is
2131 |  *		generated. This period can be extended by calling either
2132 |  *		**bpf_rc_keydown**\ () again with the same values, or calling
2133 |  *		**bpf_rc_repeat**\ ().
2134 |  *
2135 |  *		Some protocols include a toggle bit, in case the button	was
2136 |  *		released and pressed again between consecutive scancodes.
2137 |  *
2138 |  *		The *ctx* should point to the lirc sample as passed into
2139 |  *		the program.
2140 |  *
2141 |  *		The *protocol* is the decoded protocol number (see
2142 |  *		**enum rc_proto** for some predefined values).
2143 |  *
2144 |  *		This helper is only available is the kernel was compiled with
2145 |  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2146 |  *		"**y**".
2147 |  *	Return
2148 |  *		0
2149 |  *
2150 |  * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
2151 |  * 	Description
2152 |  * 		Return the cgroup v2 id of the socket associated with the *skb*.
2153 |  * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
2154 |  * 		helper for cgroup v1 by providing a tag resp. identifier that
2155 |  * 		can be matched on or used for map lookups e.g. to implement
2156 |  * 		policy. The cgroup v2 id of a given path in the hierarchy is
2157 |  * 		exposed in user space through the f_handle API in order to get
2158 |  * 		to the same 64-bit id.
2159 |  *
2160 |  * 		This helper can be used on TC egress path, but not on ingress,
2161 |  * 		and is available only if the kernel was compiled with the
2162 |  * 		**CONFIG_SOCK_CGROUP_DATA** configuration option.
2163 |  * 	Return
2164 |  * 		The id is returned or 0 in case the id could not be retrieved.
2165 |  *
2166 |  * u64 bpf_get_current_cgroup_id(void)
2167 |  * 	Return
2168 |  * 		A 64-bit integer containing the current cgroup id based
2169 |  * 		on the cgroup within which the current task is running.
2170 |  *
2171 |  * void *bpf_get_local_storage(void *map, u64 flags)
2172 |  *	Description
2173 |  *		Get the pointer to the local storage area.
2174 |  *		The type and the size of the local storage is defined
2175 |  *		by the *map* argument.
2176 |  *		The *flags* meaning is specific for each map type,
2177 |  *		and has to be 0 for cgroup local storage.
2178 |  *
2179 |  *		Depending on the BPF program type, a local storage area
2180 |  *		can be shared between multiple instances of the BPF program,
2181 |  *		running simultaneously.
2182 |  *
2183 |  *		A user should care about the synchronization by himself.
2184 |  *		For example, by using the **BPF_STX_XADD** instruction to alter
2185 |  *		the shared data.
2186 |  *	Return
2187 |  *		A pointer to the local storage area.
2188 |  *
2189 |  * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
2190 |  *	Description
2191 |  *		Select a **SO_REUSEPORT** socket from a
2192 |  *		**BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
2193 |  *		It checks the selected socket is matching the incoming
2194 |  *		request in the socket buffer.
2195 |  *	Return
2196 |  *		0 on success, or a negative error in case of failure.
2197 |  *
2198 |  * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
2199 |  *	Description
2200 |  *		Return id of cgroup v2 that is ancestor of cgroup associated
2201 |  *		with the *skb* at the *ancestor_level*.  The root cgroup is at
2202 |  *		*ancestor_level* zero and each step down the hierarchy
2203 |  *		increments the level. If *ancestor_level* == level of cgroup
2204 |  *		associated with *skb*, then return value will be same as that
2205 |  *		of **bpf_skb_cgroup_id**\ ().
2206 |  *
2207 |  *		The helper is useful to implement policies based on cgroups
2208 |  *		that are upper in hierarchy than immediate cgroup associated
2209 |  *		with *skb*.
2210 |  *
2211 |  *		The format of returned id and helper limitations are same as in
2212 |  *		**bpf_skb_cgroup_id**\ ().
2213 |  *	Return
2214 |  *		The id is returned or 0 in case the id could not be retrieved.
2215 |  *
2216 |  * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
2217 |  *	Description
2218 |  *		Look for TCP socket matching *tuple*, optionally in a child
2219 |  *		network namespace *netns*. The return value must be checked,
2220 |  *		and if non-**NULL**, released via **bpf_sk_release**\ ().
2221 |  *
2222 |  *		The *ctx* should point to the context of the program, such as
2223 |  *		the skb or socket (depending on the hook in use). This is used
2224 |  *		to determine the base network namespace for the lookup.
2225 |  *
2226 |  *		*tuple_size* must be one of:
2227 |  *
2228 |  *		**sizeof**\ (*tuple*\ **->ipv4**)
2229 |  *			Look for an IPv4 socket.
2230 |  *		**sizeof**\ (*tuple*\ **->ipv6**)
2231 |  *			Look for an IPv6 socket.
2232 |  *
2233 |  *		If the *netns* is a negative signed 32-bit integer, then the
2234 |  *		socket lookup table in the netns associated with the *ctx* will
2235 |  *		will be used. For the TC hooks, this is the netns of the device
2236 |  *		in the skb. For socket hooks, this is the netns of the socket.
2237 |  *		If *netns* is any other signed 32-bit value greater than or
2238 |  *		equal to zero then it specifies the ID of the netns relative to
2239 |  *		the netns associated with the *ctx*. *netns* values beyond the
2240 |  *		range of 32-bit integers are reserved for future use.
2241 |  *
2242 |  *		All values for *flags* are reserved for future usage, and must
2243 |  *		be left at zero.
2244 |  *
2245 |  *		This helper is available only if the kernel was compiled with
2246 |  *		**CONFIG_NET** configuration option.
2247 |  *	Return
2248 |  *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
2249 |  *		For sockets with reuseport option, the **struct bpf_sock**
2250 |  *		result is from **reuse->socks**\ [] using the hash of the tuple.
2251 |  *
2252 |  * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
2253 |  *	Description
2254 |  *		Look for UDP socket matching *tuple*, optionally in a child
2255 |  *		network namespace *netns*. The return value must be checked,
2256 |  *		and if non-**NULL**, released via **bpf_sk_release**\ ().
2257 |  *
2258 |  *		The *ctx* should point to the context of the program, such as
2259 |  *		the skb or socket (depending on the hook in use). This is used
2260 |  *		to determine the base network namespace for the lookup.
2261 |  *
2262 |  *		*tuple_size* must be one of:
2263 |  *
2264 |  *		**sizeof**\ (*tuple*\ **->ipv4**)
2265 |  *			Look for an IPv4 socket.
2266 |  *		**sizeof**\ (*tuple*\ **->ipv6**)
2267 |  *			Look for an IPv6 socket.
2268 |  *
2269 |  *		If the *netns* is a negative signed 32-bit integer, then the
2270 |  *		socket lookup table in the netns associated with the *ctx* will
2271 |  *		will be used. For the TC hooks, this is the netns of the device
2272 |  *		in the skb. For socket hooks, this is the netns of the socket.
2273 |  *		If *netns* is any other signed 32-bit value greater than or
2274 |  *		equal to zero then it specifies the ID of the netns relative to
2275 |  *		the netns associated with the *ctx*. *netns* values beyond the
2276 |  *		range of 32-bit integers are reserved for future use.
2277 |  *
2278 |  *		All values for *flags* are reserved for future usage, and must
2279 |  *		be left at zero.
2280 |  *
2281 |  *		This helper is available only if the kernel was compiled with
2282 |  *		**CONFIG_NET** configuration option.
2283 |  *	Return
2284 |  *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
2285 |  *		For sockets with reuseport option, the **struct bpf_sock**
2286 |  *		result is from **reuse->socks**\ [] using the hash of the tuple.
2287 |  *
2288 |  * int bpf_sk_release(struct bpf_sock *sock)
2289 |  *	Description
2290 |  *		Release the reference held by *sock*. *sock* must be a
2291 |  *		non-**NULL** pointer that was returned from
2292 |  *		**bpf_sk_lookup_xxx**\ ().
2293 |  *	Return
2294 |  *		0 on success, or a negative error in case of failure.
2295 |  *
2296 |  * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
2297 |  * 	Description
2298 |  * 		Push an element *value* in *map*. *flags* is one of:
2299 |  *
2300 |  * 		**BPF_EXIST**
2301 |  * 			If the queue/stack is full, the oldest element is
2302 |  * 			removed to make room for this.
2303 |  * 	Return
2304 |  * 		0 on success, or a negative error in case of failure.
2305 |  *
2306 |  * int bpf_map_pop_elem(struct bpf_map *map, void *value)
2307 |  * 	Description
2308 |  * 		Pop an element from *map*.
2309 |  * 	Return
2310 |  * 		0 on success, or a negative error in case of failure.
2311 |  *
2312 |  * int bpf_map_peek_elem(struct bpf_map *map, void *value)
2313 |  * 	Description
2314 |  * 		Get an element from *map* without removing it.
2315 |  * 	Return
2316 |  * 		0 on success, or a negative error in case of failure.
2317 |  *
2318 |  * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
2319 |  *	Description
2320 |  *		For socket policies, insert *len* bytes into *msg* at offset
2321 |  *		*start*.
2322 |  *
2323 |  *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
2324 |  *		*msg* it may want to insert metadata or options into the *msg*.
2325 |  *		This can later be read and used by any of the lower layer BPF
2326 |  *		hooks.
2327 |  *
2328 |  *		This helper may fail if under memory pressure (a malloc
2329 |  *		fails) in these cases BPF programs will get an appropriate
2330 |  *		error and BPF programs will need to handle them.
2331 |  *	Return
2332 |  *		0 on success, or a negative error in case of failure.
2333 |  *
2334 |  * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
2335 |  *	Description
2336 |  *		Will remove *pop* bytes from a *msg* starting at byte *start*.
2337 |  *		This may result in **ENOMEM** errors under certain situations if
2338 |  *		an allocation and copy are required due to a full ring buffer.
2339 |  *		However, the helper will try to avoid doing the allocation
2340 |  *		if possible. Other errors can occur if input parameters are
2341 |  *		invalid either due to *start* byte not being valid part of *msg*
2342 |  *		payload and/or *pop* value being to large.
2343 |  *	Return
2344 |  *		0 on success, or a negative error in case of failure.
2345 |  *
2346 |  * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
2347 |  *	Description
2348 |  *		This helper is used in programs implementing IR decoding, to
2349 |  *		report a successfully decoded pointer movement.
2350 |  *
2351 |  *		The *ctx* should point to the lirc sample as passed into
2352 |  *		the program.
2353 |  *
2354 |  *		This helper is only available is the kernel was compiled with
2355 |  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2356 |  *		"**y**".
2357 |  *	Return
2358 |  *		0
2359 |  *
2360 |  * int bpf_spin_lock(struct bpf_spin_lock *lock)
2361 |  *	Description
2362 |  *		Acquire a spinlock represented by the pointer *lock*, which is
2363 |  *		stored as part of a value of a map. Taking the lock allows to
2364 |  *		safely update the rest of the fields in that value. The
2365 |  *		spinlock can (and must) later be released with a call to
2366 |  *		**bpf_spin_unlock**\ (\ *lock*\ ).
2367 |  *
2368 |  *		Spinlocks in BPF programs come with a number of restrictions
2369 |  *		and constraints:
2370 |  *
2371 |  *		* **bpf_spin_lock** objects are only allowed inside maps of
2372 |  *		  types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
2373 |  *		  list could be extended in the future).
2374 |  *		* BTF description of the map is mandatory.
2375 |  *		* The BPF program can take ONE lock at a time, since taking two
2376 |  *		  or more could cause dead locks.
2377 |  *		* Only one **struct bpf_spin_lock** is allowed per map element.
2378 |  *		* When the lock is taken, calls (either BPF to BPF or helpers)
2379 |  *		  are not allowed.
2380 |  *		* The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
2381 |  *		  allowed inside a spinlock-ed region.
2382 |  *		* The BPF program MUST call **bpf_spin_unlock**\ () to release
2383 |  *		  the lock, on all execution paths, before it returns.
2384 |  *		* The BPF program can access **struct bpf_spin_lock** only via
2385 |  *		  the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
2386 |  *		  helpers. Loading or storing data into the **struct
2387 |  *		  bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
2388 |  *		* To use the **bpf_spin_lock**\ () helper, the BTF description
2389 |  *		  of the map value must be a struct and have **struct
2390 |  *		  bpf_spin_lock** *anyname*\ **;** field at the top level.
2391 |  *		  Nested lock inside another struct is not allowed.
2392 |  *		* The **struct bpf_spin_lock** *lock* field in a map value must
2393 |  *		  be aligned on a multiple of 4 bytes in that value.
2394 |  *		* Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
2395 |  *		  the **bpf_spin_lock** field to user space.
2396 |  *		* Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
2397 |  *		  a BPF program, do not update the **bpf_spin_lock** field.
2398 |  *		* **bpf_spin_lock** cannot be on the stack or inside a
2399 |  *		  networking packet (it can only be inside of a map values).
2400 |  *		* **bpf_spin_lock** is available to root only.
2401 |  *		* Tracing programs and socket filter programs cannot use
2402 |  *		  **bpf_spin_lock**\ () due to insufficient preemption checks
2403 |  *		  (but this may change in the future).
2404 |  *		* **bpf_spin_lock** is not allowed in inner maps of map-in-map.
2405 |  *	Return
2406 |  *		0
2407 |  *
2408 |  * int bpf_spin_unlock(struct bpf_spin_lock *lock)
2409 |  *	Description
2410 |  *		Release the *lock* previously locked by a call to
2411 |  *		**bpf_spin_lock**\ (\ *lock*\ ).
2412 |  *	Return
2413 |  *		0
2414 |  *
2415 |  * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
2416 |  *	Description
2417 |  *		This helper gets a **struct bpf_sock** pointer such
2418 |  *		that all the fields in this **bpf_sock** can be accessed.
2419 |  *	Return
2420 |  *		A **struct bpf_sock** pointer on success, or **NULL** in
2421 |  *		case of failure.
2422 |  *
2423 |  * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
2424 |  *	Description
2425 |  *		This helper gets a **struct bpf_tcp_sock** pointer from a
2426 |  *		**struct bpf_sock** pointer.
2427 |  *	Return
2428 |  *		A **struct bpf_tcp_sock** pointer on success, or **NULL** in
2429 |  *		case of failure.
2430 |  *
2431 |  * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
2432 |  *	Description
2433 |  *		Set ECN (Explicit Congestion Notification) field of IP header
2434 |  *		to **CE** (Congestion Encountered) if current value is **ECT**
2435 |  *		(ECN Capable Transport). Otherwise, do nothing. Works with IPv6
2436 |  *		and IPv4.
2437 |  *	Return
2438 |  *		1 if the **CE** flag is set (either by the current helper call
2439 |  *		or because it was already present), 0 if it is not set.
2440 |  *
2441 |  * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
2442 |  *	Description
2443 |  *		Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
2444 |  *		**bpf_sk_release**\ () is unnecessary and not allowed.
2445 |  *	Return
2446 |  *		A **struct bpf_sock** pointer on success, or **NULL** in
2447 |  *		case of failure.
2448 |  *
2449 |  * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
2450 |  *	Description
2451 |  *		Look for TCP socket matching *tuple*, optionally in a child
2452 |  *		network namespace *netns*. The return value must be checked,
2453 |  *		and if non-**NULL**, released via **bpf_sk_release**\ ().
2454 |  *
2455 |  *		This function is identical to bpf_sk_lookup_tcp, except that it
2456 |  *		also returns timewait or request sockets. Use bpf_sk_fullsock
2457 |  *		or bpf_tcp_socket to access the full structure.
2458 |  *
2459 |  *		This helper is available only if the kernel was compiled with
2460 |  *		**CONFIG_NET** configuration option.
2461 |  *	Return
2462 |  *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
2463 |  *		For sockets with reuseport option, the **struct bpf_sock**
2464 |  *		result is from **reuse->socks**\ [] using the hash of the tuple.
2465 |  *
2466 |  * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
2467 |  * 	Description
2468 |  * 		Check whether iph and th contain a valid SYN cookie ACK for
2469 |  * 		the listening socket in sk.
2470 |  *
2471 |  * 		iph points to the start of the IPv4 or IPv6 header, while
2472 |  * 		iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
2473 |  *
2474 |  * 		th points to the start of the TCP header, while th_len contains
2475 |  * 		sizeof(struct tcphdr).
2476 |  *
2477 |  * 	Return
2478 |  * 		0 if iph and th are a valid SYN cookie ACK, or a negative error
2479 |  * 		otherwise.
2480 |  */
2481 | #define __BPF_FUNC_MAPPER(FN)		\
2482 | 	FN(unspec),			\
2483 | 	FN(map_lookup_elem),		\
2484 | 	FN(map_update_elem),		\
2485 | 	FN(map_delete_elem),		\
2486 | 	FN(probe_read),			\
2487 | 	FN(ktime_get_ns),		\
2488 | 	FN(trace_printk),		\
2489 | 	FN(get_prandom_u32),		\
2490 | 	FN(get_smp_processor_id),	\
2491 | 	FN(skb_store_bytes),		\
2492 | 	FN(l3_csum_replace),		\
2493 | 	FN(l4_csum_replace),		\
2494 | 	FN(tail_call),			\
2495 | 	FN(clone_redirect),		\
2496 | 	FN(get_current_pid_tgid),	\
2497 | 	FN(get_current_uid_gid),	\
2498 | 	FN(get_current_comm),		\
2499 | 	FN(get_cgroup_classid),		\
2500 | 	FN(skb_vlan_push),		\
2501 | 	FN(skb_vlan_pop),		\
2502 | 	FN(skb_get_tunnel_key),		\
2503 | 	FN(skb_set_tunnel_key),		\
2504 | 	FN(perf_event_read),		\
2505 | 	FN(redirect),			\
2506 | 	FN(get_route_realm),		\
2507 | 	FN(perf_event_output),		\
2508 | 	FN(skb_load_bytes),		\
2509 | 	FN(get_stackid),		\
2510 | 	FN(csum_diff),			\
2511 | 	FN(skb_get_tunnel_opt),		\
2512 | 	FN(skb_set_tunnel_opt),		\
2513 | 	FN(skb_change_proto),		\
2514 | 	FN(skb_change_type),		\
2515 | 	FN(skb_under_cgroup),		\
2516 | 	FN(get_hash_recalc),		\
2517 | 	FN(get_current_task),		\
2518 | 	FN(probe_write_user),		\
2519 | 	FN(current_task_under_cgroup),	\
2520 | 	FN(skb_change_tail),		\
2521 | 	FN(skb_pull_data),		\
2522 | 	FN(csum_update),		\
2523 | 	FN(set_hash_invalid),		\
2524 | 	FN(get_numa_node_id),		\
2525 | 	FN(skb_change_head),		\
2526 | 	FN(xdp_adjust_head),		\
2527 | 	FN(probe_read_str),		\
2528 | 	FN(get_socket_cookie),		\
2529 | 	FN(get_socket_uid),		\
2530 | 	FN(set_hash),			\
2531 | 	FN(setsockopt),			\
2532 | 	FN(skb_adjust_room),		\
2533 | 	FN(redirect_map),		\
2534 | 	FN(sk_redirect_map),		\
2535 | 	FN(sock_map_update),		\
2536 | 	FN(xdp_adjust_meta),		\
2537 | 	FN(perf_event_read_value),	\
2538 | 	FN(perf_prog_read_value),	\
2539 | 	FN(getsockopt),			\
2540 | 	FN(override_return),		\
2541 | 	FN(sock_ops_cb_flags_set),	\
2542 | 	FN(msg_redirect_map),		\
2543 | 	FN(msg_apply_bytes),		\
2544 | 	FN(msg_cork_bytes),		\
2545 | 	FN(msg_pull_data),		\
2546 | 	FN(bind),			\
2547 | 	FN(xdp_adjust_tail),		\
2548 | 	FN(skb_get_xfrm_state),		\
2549 | 	FN(get_stack),			\
2550 | 	FN(skb_load_bytes_relative),	\
2551 | 	FN(fib_lookup),			\
2552 | 	FN(sock_hash_update),		\
2553 | 	FN(msg_redirect_hash),		\
2554 | 	FN(sk_redirect_hash),		\
2555 | 	FN(lwt_push_encap),		\
2556 | 	FN(lwt_seg6_store_bytes),	\
2557 | 	FN(lwt_seg6_adjust_srh),	\
2558 | 	FN(lwt_seg6_action),		\
2559 | 	FN(rc_repeat),			\
2560 | 	FN(rc_keydown),			\
2561 | 	FN(skb_cgroup_id),		\
2562 | 	FN(get_current_cgroup_id),	\
2563 | 	FN(get_local_storage),		\
2564 | 	FN(sk_select_reuseport),	\
2565 | 	FN(skb_ancestor_cgroup_id),	\
2566 | 	FN(sk_lookup_tcp),		\
2567 | 	FN(sk_lookup_udp),		\
2568 | 	FN(sk_release),			\
2569 | 	FN(map_push_elem),		\
2570 | 	FN(map_pop_elem),		\
2571 | 	FN(map_peek_elem),		\
2572 | 	FN(msg_push_data),		\
2573 | 	FN(msg_pop_data),		\
2574 | 	FN(rc_pointer_rel),		\
2575 | 	FN(spin_lock),			\
2576 | 	FN(spin_unlock),		\
2577 | 	FN(sk_fullsock),		\
2578 | 	FN(tcp_sock),			\
2579 | 	FN(skb_ecn_set_ce),		\
2580 | 	FN(get_listener_sock),		\
2581 | 	FN(skc_lookup_tcp),		\
2582 | 	FN(tcp_check_syncookie),
2583 | 
2584 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper
2585 |  * function eBPF program intends to call
2586 |  */
2587 | #define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
2588 | enum bpf_func_id {
2589 | 	__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
2590 | 	__BPF_FUNC_MAX_ID,
2591 | };
2592 | #undef __BPF_ENUM_FN
2593 | 
2594 | /* All flags used by eBPF helper functions, placed here. */
2595 | 
2596 | /* BPF_FUNC_skb_store_bytes flags. */
2597 | #define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
2598 | #define BPF_F_INVALIDATE_HASH		(1ULL << 1)
2599 | 
2600 | /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
2601 |  * First 4 bits are for passing the header field size.
2602 |  */
2603 | #define BPF_F_HDR_FIELD_MASK		0xfULL
2604 | 
2605 | /* BPF_FUNC_l4_csum_replace flags. */
2606 | #define BPF_F_PSEUDO_HDR		(1ULL << 4)
2607 | #define BPF_F_MARK_MANGLED_0		(1ULL << 5)
2608 | #define BPF_F_MARK_ENFORCE		(1ULL << 6)
2609 | 
2610 | /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
2611 | #define BPF_F_INGRESS			(1ULL << 0)
2612 | 
2613 | /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
2614 | #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
2615 | 
2616 | /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
2617 | #define BPF_F_SKIP_FIELD_MASK		0xffULL
2618 | #define BPF_F_USER_STACK		(1ULL << 8)
2619 | /* flags used by BPF_FUNC_get_stackid only. */
2620 | #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
2621 | #define BPF_F_REUSE_STACKID		(1ULL << 10)
2622 | /* flags used by BPF_FUNC_get_stack only. */
2623 | #define BPF_F_USER_BUILD_ID		(1ULL << 11)
2624 | 
2625 | /* BPF_FUNC_skb_set_tunnel_key flags. */
2626 | #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
2627 | #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
2628 | #define BPF_F_SEQ_NUMBER		(1ULL << 3)
2629 | 
2630 | /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
2631 |  * BPF_FUNC_perf_event_read_value flags.
2632 |  */
2633 | #define BPF_F_INDEX_MASK		0xffffffffULL
2634 | #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
2635 | /* BPF_FUNC_perf_event_output for sk_buff input context. */
2636 | #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
2637 | 
2638 | /* Current network namespace */
2639 | #define BPF_F_CURRENT_NETNS		(-1L)
2640 | 
2641 | /* BPF_FUNC_skb_adjust_room flags. */
2642 | #define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
2643 | 
2644 | #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
2645 | #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
2646 | #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
2647 | #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
2648 | 
2649 | /* Mode for BPF_FUNC_skb_adjust_room helper. */
2650 | enum bpf_adj_room_mode {
2651 | 	BPF_ADJ_ROOM_NET,
2652 | 	BPF_ADJ_ROOM_MAC,
2653 | };
2654 | 
2655 | /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
2656 | enum bpf_hdr_start_off {
2657 | 	BPF_HDR_START_MAC,
2658 | 	BPF_HDR_START_NET,
2659 | };
2660 | 
2661 | /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
2662 | enum bpf_lwt_encap_mode {
2663 | 	BPF_LWT_ENCAP_SEG6,
2664 | 	BPF_LWT_ENCAP_SEG6_INLINE,
2665 | 	BPF_LWT_ENCAP_IP,
2666 | };
2667 | 
2668 | #define __bpf_md_ptr(type, name)	\
2669 | union {					\
2670 | 	type name;			\
2671 | 	__u64 :64;			\
2672 | } __attribute__((aligned(8)))
2673 | 
2674 | /* user accessible mirror of in-kernel sk_buff.
2675 |  * new fields can only be added to the end of this structure
2676 |  */
2677 | struct __sk_buff {
2678 | 	__u32 len;
2679 | 	__u32 pkt_type;
2680 | 	__u32 mark;
2681 | 	__u32 queue_mapping;
2682 | 	__u32 protocol;
2683 | 	__u32 vlan_present;
2684 | 	__u32 vlan_tci;
2685 | 	__u32 vlan_proto;
2686 | 	__u32 priority;
2687 | 	__u32 ingress_ifindex;
2688 | 	__u32 ifindex;
2689 | 	__u32 tc_index;
2690 | 	__u32 cb[5];
2691 | 	__u32 hash;
2692 | 	__u32 tc_classid;
2693 | 	__u32 data;
2694 | 	__u32 data_end;
2695 | 	__u32 napi_id;
2696 | 
2697 | 	/* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
2698 | 	__u32 family;
2699 | 	__u32 remote_ip4;	/* Stored in network byte order */
2700 | 	__u32 local_ip4;	/* Stored in network byte order */
2701 | 	__u32 remote_ip6[4];	/* Stored in network byte order */
2702 | 	__u32 local_ip6[4];	/* Stored in network byte order */
2703 | 	__u32 remote_port;	/* Stored in network byte order */
2704 | 	__u32 local_port;	/* stored in host byte order */
2705 | 	/* ... here. */
2706 | 
2707 | 	__u32 data_meta;
2708 | 	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
2709 | 	__u64 tstamp;
2710 | 	__u32 wire_len;
2711 | 	__u32 gso_segs;
2712 | 	__bpf_md_ptr(struct bpf_sock *, sk);
2713 | };
2714 | 
2715 | struct bpf_tunnel_key {
2716 | 	__u32 tunnel_id;
2717 | 	union {
2718 | 		__u32 remote_ipv4;
2719 | 		__u32 remote_ipv6[4];
2720 | 	};
2721 | 	__u8 tunnel_tos;
2722 | 	__u8 tunnel_ttl;
2723 | 	__u16 tunnel_ext;	/* Padding, future use. */
2724 | 	__u32 tunnel_label;
2725 | };
2726 | 
2727 | /* user accessible mirror of in-kernel xfrm_state.
2728 |  * new fields can only be added to the end of this structure
2729 |  */
2730 | struct bpf_xfrm_state {
2731 | 	__u32 reqid;
2732 | 	__u32 spi;	/* Stored in network byte order */
2733 | 	__u16 family;
2734 | 	__u16 ext;	/* Padding, future use. */
2735 | 	union {
2736 | 		__u32 remote_ipv4;	/* Stored in network byte order */
2737 | 		__u32 remote_ipv6[4];	/* Stored in network byte order */
2738 | 	};
2739 | };
2740 | 
2741 | /* Generic BPF return codes which all BPF program types may support.
2742 |  * The values are binary compatible with their TC_ACT_* counter-part to
2743 |  * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
2744 |  * programs.
2745 |  *
2746 |  * XDP is handled seprately, see XDP_*.
2747 |  */
2748 | enum bpf_ret_code {
2749 | 	BPF_OK = 0,
2750 | 	/* 1 reserved */
2751 | 	BPF_DROP = 2,
2752 | 	/* 3-6 reserved */
2753 | 	BPF_REDIRECT = 7,
2754 | 	/* >127 are reserved for prog type specific return codes.
2755 | 	 *
2756 | 	 * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
2757 | 	 *    BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
2758 | 	 *    changed and should be routed based on its new L3 header.
2759 | 	 *    (This is an L3 redirect, as opposed to L2 redirect
2760 | 	 *    represented by BPF_REDIRECT above).
2761 | 	 */
2762 | 	BPF_LWT_REROUTE = 128,
2763 | };
2764 | 
2765 | struct bpf_sock {
2766 | 	__u32 bound_dev_if;
2767 | 	__u32 family;
2768 | 	__u32 type;
2769 | 	__u32 protocol;
2770 | 	__u32 mark;
2771 | 	__u32 priority;
2772 | 	/* IP address also allows 1 and 2 bytes access */
2773 | 	__u32 src_ip4;
2774 | 	__u32 src_ip6[4];
2775 | 	__u32 src_port;		/* host byte order */
2776 | 	__u32 dst_port;		/* network byte order */
2777 | 	__u32 dst_ip4;
2778 | 	__u32 dst_ip6[4];
2779 | 	__u32 state;
2780 | };
2781 | 
2782 | struct bpf_tcp_sock {
2783 | 	__u32 snd_cwnd;		/* Sending congestion window		*/
2784 | 	__u32 srtt_us;		/* smoothed round trip time << 3 in usecs */
2785 | 	__u32 rtt_min;
2786 | 	__u32 snd_ssthresh;	/* Slow start size threshold		*/
2787 | 	__u32 rcv_nxt;		/* What we want to receive next		*/
2788 | 	__u32 snd_nxt;		/* Next sequence we send		*/
2789 | 	__u32 snd_una;		/* First byte we want an ack for	*/
2790 | 	__u32 mss_cache;	/* Cached effective mss, not including SACKS */
2791 | 	__u32 ecn_flags;	/* ECN status bits.			*/
2792 | 	__u32 rate_delivered;	/* saved rate sample: packets delivered */
2793 | 	__u32 rate_interval_us;	/* saved rate sample: time elapsed */
2794 | 	__u32 packets_out;	/* Packets which are "in flight"	*/
2795 | 	__u32 retrans_out;	/* Retransmitted packets out		*/
2796 | 	__u32 total_retrans;	/* Total retransmits for entire connection */
2797 | 	__u32 segs_in;		/* RFC4898 tcpEStatsPerfSegsIn
2798 | 				 * total number of segments in.
2799 | 				 */
2800 | 	__u32 data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
2801 | 				 * total number of data segments in.
2802 | 				 */
2803 | 	__u32 segs_out;		/* RFC4898 tcpEStatsPerfSegsOut
2804 | 				 * The total number of segments sent.
2805 | 				 */
2806 | 	__u32 data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
2807 | 				 * total number of data segments sent.
2808 | 				 */
2809 | 	__u32 lost_out;		/* Lost packets			*/
2810 | 	__u32 sacked_out;	/* SACK'd packets			*/
2811 | 	__u64 bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
2812 | 				 * sum(delta(rcv_nxt)), or how many bytes
2813 | 				 * were acked.
2814 | 				 */
2815 | 	__u64 bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
2816 | 				 * sum(delta(snd_una)), or how many bytes
2817 | 				 * were acked.
2818 | 				 */
2819 | };
2820 | 
2821 | struct bpf_sock_tuple {
2822 | 	union {
2823 | 		struct {
2824 | 			__be32 saddr;
2825 | 			__be32 daddr;
2826 | 			__be16 sport;
2827 | 			__be16 dport;
2828 | 		} ipv4;
2829 | 		struct {
2830 | 			__be32 saddr[4];
2831 | 			__be32 daddr[4];
2832 | 			__be16 sport;
2833 | 			__be16 dport;
2834 | 		} ipv6;
2835 | 	};
2836 | };
2837 | 
2838 | #define XDP_PACKET_HEADROOM 256
2839 | 
2840 | /* User return codes for XDP prog type.
2841 |  * A valid XDP program must return one of these defined values. All other
2842 |  * return codes are reserved for future use. Unknown return codes will
2843 |  * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
2844 |  */
2845 | enum xdp_action {
2846 | 	XDP_ABORTED = 0,
2847 | 	XDP_DROP,
2848 | 	XDP_PASS,
2849 | 	XDP_TX,
2850 | 	XDP_REDIRECT,
2851 | };
2852 | 
2853 | /* user accessible metadata for XDP packet hook
2854 |  * new fields must be added to the end of this structure
2855 |  */
2856 | struct xdp_md {
2857 | 	__u32 data;
2858 | 	__u32 data_end;
2859 | 	__u32 data_meta;
2860 | 	/* Below access go through struct xdp_rxq_info */
2861 | 	__u32 ingress_ifindex; /* rxq->dev->ifindex */
2862 | 	__u32 rx_queue_index;  /* rxq->queue_index  */
2863 | };
2864 | 
2865 | enum sk_action {
2866 | 	SK_DROP = 0,
2867 | 	SK_PASS,
2868 | };
2869 | 
2870 | /* user accessible metadata for SK_MSG packet hook, new fields must
2871 |  * be added to the end of this structure
2872 |  */
2873 | struct sk_msg_md {
2874 | 	__bpf_md_ptr(void *, data);
2875 | 	__bpf_md_ptr(void *, data_end);
2876 | 
2877 | 	__u32 family;
2878 | 	__u32 remote_ip4;	/* Stored in network byte order */
2879 | 	__u32 local_ip4;	/* Stored in network byte order */
2880 | 	__u32 remote_ip6[4];	/* Stored in network byte order */
2881 | 	__u32 local_ip6[4];	/* Stored in network byte order */
2882 | 	__u32 remote_port;	/* Stored in network byte order */
2883 | 	__u32 local_port;	/* stored in host byte order */
2884 | 	__u32 size;		/* Total size of sk_msg */
2885 | };
2886 | 
2887 | struct sk_reuseport_md {
2888 | 	/*
2889 | 	 * Start of directly accessible data. It begins from
2890 | 	 * the tcp/udp header.
2891 | 	 */
2892 | 	__bpf_md_ptr(void *, data);
2893 | 	/* End of directly accessible data */
2894 | 	__bpf_md_ptr(void *, data_end);
2895 | 	/*
2896 | 	 * Total length of packet (starting from the tcp/udp header).
2897 | 	 * Note that the directly accessible bytes (data_end - data)
2898 | 	 * could be less than this "len".  Those bytes could be
2899 | 	 * indirectly read by a helper "bpf_skb_load_bytes()".
2900 | 	 */
2901 | 	__u32 len;
2902 | 	/*
2903 | 	 * Eth protocol in the mac header (network byte order). e.g.
2904 | 	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
2905 | 	 */
2906 | 	__u32 eth_protocol;
2907 | 	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
2908 | 	__u32 bind_inany;	/* Is sock bound to an INANY address? */
2909 | 	__u32 hash;		/* A hash of the packet 4 tuples */
2910 | };
2911 | 
2912 | #define BPF_TAG_SIZE	8
2913 | 
2914 | struct bpf_prog_info {
2915 | 	__u32 type;
2916 | 	__u32 id;
2917 | 	__u8  tag[BPF_TAG_SIZE];
2918 | 	__u32 jited_prog_len;
2919 | 	__u32 xlated_prog_len;
2920 | 	__aligned_u64 jited_prog_insns;
2921 | 	__aligned_u64 xlated_prog_insns;
2922 | 	__u64 load_time;	/* ns since boottime */
2923 | 	__u32 created_by_uid;
2924 | 	__u32 nr_map_ids;
2925 | 	__aligned_u64 map_ids;
2926 | 	char name[BPF_OBJ_NAME_LEN];
2927 | 	__u32 ifindex;
2928 | 	__u32 gpl_compatible:1;
2929 | 	__u64 netns_dev;
2930 | 	__u64 netns_ino;
2931 | 	__u32 nr_jited_ksyms;
2932 | 	__u32 nr_jited_func_lens;
2933 | 	__aligned_u64 jited_ksyms;
2934 | 	__aligned_u64 jited_func_lens;
2935 | 	__u32 btf_id;
2936 | 	__u32 func_info_rec_size;
2937 | 	__aligned_u64 func_info;
2938 | 	__u32 nr_func_info;
2939 | 	__u32 nr_line_info;
2940 | 	__aligned_u64 line_info;
2941 | 	__aligned_u64 jited_line_info;
2942 | 	__u32 nr_jited_line_info;
2943 | 	__u32 line_info_rec_size;
2944 | 	__u32 jited_line_info_rec_size;
2945 | 	__u32 nr_prog_tags;
2946 | 	__aligned_u64 prog_tags;
2947 | 	__u64 run_time_ns;
2948 | 	__u64 run_cnt;
2949 | } __attribute__((aligned(8)));
2950 | 
2951 | struct bpf_map_info {
2952 | 	__u32 type;
2953 | 	__u32 id;
2954 | 	__u32 key_size;
2955 | 	__u32 value_size;
2956 | 	__u32 max_entries;
2957 | 	__u32 map_flags;
2958 | 	char  name[BPF_OBJ_NAME_LEN];
2959 | 	__u32 ifindex;
2960 | 	__u32 :32;
2961 | 	__u64 netns_dev;
2962 | 	__u64 netns_ino;
2963 | 	__u32 btf_id;
2964 | 	__u32 btf_key_type_id;
2965 | 	__u32 btf_value_type_id;
2966 | } __attribute__((aligned(8)));
2967 | 
2968 | struct bpf_btf_info {
2969 | 	__aligned_u64 btf;
2970 | 	__u32 btf_size;
2971 | 	__u32 id;
2972 | } __attribute__((aligned(8)));
2973 | 
2974 | /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
2975 |  * by user and intended to be used by socket (e.g. to bind to, depends on
2976 |  * attach attach type).
2977 |  */
2978 | struct bpf_sock_addr {
2979 | 	__u32 user_family;	/* Allows 4-byte read, but no write. */
2980 | 	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
2981 | 				 * Stored in network byte order.
2982 | 				 */
2983 | 	__u32 user_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
2984 | 				 * Stored in network byte order.
2985 | 				 */
2986 | 	__u32 user_port;	/* Allows 4-byte read and write.
2987 | 				 * Stored in network byte order
2988 | 				 */
2989 | 	__u32 family;		/* Allows 4-byte read, but no write */
2990 | 	__u32 type;		/* Allows 4-byte read, but no write */
2991 | 	__u32 protocol;		/* Allows 4-byte read, but no write */
2992 | 	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read an 4-byte write.
2993 | 				 * Stored in network byte order.
2994 | 				 */
2995 | 	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
2996 | 				 * Stored in network byte order.
2997 | 				 */
2998 | };
2999 | 
3000 | /* User bpf_sock_ops struct to access socket values and specify request ops
3001 |  * and their replies.
3002 |  * Some of this fields are in network (bigendian) byte order and may need
3003 |  * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
3004 |  * New fields can only be added at the end of this structure
3005 |  */
3006 | struct bpf_sock_ops {
3007 | 	__u32 op;
3008 | 	union {
3009 | 		__u32 args[4];		/* Optionally passed to bpf program */
3010 | 		__u32 reply;		/* Returned by bpf program	    */
3011 | 		__u32 replylong[4];	/* Optionally returned by bpf prog  */
3012 | 	};
3013 | 	__u32 family;
3014 | 	__u32 remote_ip4;	/* Stored in network byte order */
3015 | 	__u32 local_ip4;	/* Stored in network byte order */
3016 | 	__u32 remote_ip6[4];	/* Stored in network byte order */
3017 | 	__u32 local_ip6[4];	/* Stored in network byte order */
3018 | 	__u32 remote_port;	/* Stored in network byte order */
3019 | 	__u32 local_port;	/* stored in host byte order */
3020 | 	__u32 is_fullsock;	/* Some TCP fields are only valid if
3021 | 				 * there is a full socket. If not, the
3022 | 				 * fields read as zero.
3023 | 				 */
3024 | 	__u32 snd_cwnd;
3025 | 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
3026 | 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
3027 | 	__u32 state;
3028 | 	__u32 rtt_min;
3029 | 	__u32 snd_ssthresh;
3030 | 	__u32 rcv_nxt;
3031 | 	__u32 snd_nxt;
3032 | 	__u32 snd_una;
3033 | 	__u32 mss_cache;
3034 | 	__u32 ecn_flags;
3035 | 	__u32 rate_delivered;
3036 | 	__u32 rate_interval_us;
3037 | 	__u32 packets_out;
3038 | 	__u32 retrans_out;
3039 | 	__u32 total_retrans;
3040 | 	__u32 segs_in;
3041 | 	__u32 data_segs_in;
3042 | 	__u32 segs_out;
3043 | 	__u32 data_segs_out;
3044 | 	__u32 lost_out;
3045 | 	__u32 sacked_out;
3046 | 	__u32 sk_txhash;
3047 | 	__u64 bytes_received;
3048 | 	__u64 bytes_acked;
3049 | };
3050 | 
3051 | /* Definitions for bpf_sock_ops_cb_flags */
3052 | #define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
3053 | #define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
3054 | #define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
3055 | #define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
3056 | 							 * supported cb flags
3057 | 							 */
3058 | 
3059 | /* List of known BPF sock_ops operators.
3060 |  * New entries can only be added at the end
3061 |  */
3062 | enum {
3063 | 	BPF_SOCK_OPS_VOID,
3064 | 	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
3065 | 					 * -1 if default value should be used
3066 | 					 */
3067 | 	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
3068 | 					 * window (in packets) or -1 if default
3069 | 					 * value should be used
3070 | 					 */
3071 | 	BPF_SOCK_OPS_TCP_CONNECT_CB,	/* Calls BPF program right before an
3072 | 					 * active connection is initialized
3073 | 					 */
3074 | 	BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,	/* Calls BPF program when an
3075 | 						 * active connection is
3076 | 						 * established
3077 | 						 */
3078 | 	BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,	/* Calls BPF program when a
3079 | 						 * passive connection is
3080 | 						 * established
3081 | 						 */
3082 | 	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
3083 | 					 * needs ECN
3084 | 					 */
3085 | 	BPF_SOCK_OPS_BASE_RTT,		/* Get base RTT. The correct value is
3086 | 					 * based on the path and may be
3087 | 					 * dependent on the congestion control
3088 | 					 * algorithm. In general it indicates
3089 | 					 * a congestion threshold. RTTs above
3090 | 					 * this indicate congestion
3091 | 					 */
3092 | 	BPF_SOCK_OPS_RTO_CB,		/* Called when an RTO has triggered.
3093 | 					 * Arg1: value of icsk_retransmits
3094 | 					 * Arg2: value of icsk_rto
3095 | 					 * Arg3: whether RTO has expired
3096 | 					 */
3097 | 	BPF_SOCK_OPS_RETRANS_CB,	/* Called when skb is retransmitted.
3098 | 					 * Arg1: sequence number of 1st byte
3099 | 					 * Arg2: # segments
3100 | 					 * Arg3: return value of
3101 | 					 *       tcp_transmit_skb (0 => success)
3102 | 					 */
3103 | 	BPF_SOCK_OPS_STATE_CB,		/* Called when TCP changes state.
3104 | 					 * Arg1: old_state
3105 | 					 * Arg2: new_state
3106 | 					 */
3107 | 	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
3108 | 					 * socket transition to LISTEN state.
3109 | 					 */
3110 | };
3111 | 
3112 | /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
3113 |  * changes between the TCP and BPF versions. Ideally this should never happen.
3114 |  * If it does, we need to add code to convert them before calling
3115 |  * the BPF sock_ops function.
3116 |  */
3117 | enum {
3118 | 	BPF_TCP_ESTABLISHED = 1,
3119 | 	BPF_TCP_SYN_SENT,
3120 | 	BPF_TCP_SYN_RECV,
3121 | 	BPF_TCP_FIN_WAIT1,
3122 | 	BPF_TCP_FIN_WAIT2,
3123 | 	BPF_TCP_TIME_WAIT,
3124 | 	BPF_TCP_CLOSE,
3125 | 	BPF_TCP_CLOSE_WAIT,
3126 | 	BPF_TCP_LAST_ACK,
3127 | 	BPF_TCP_LISTEN,
3128 | 	BPF_TCP_CLOSING,	/* Now a valid state */
3129 | 	BPF_TCP_NEW_SYN_RECV,
3130 | 
3131 | 	BPF_TCP_MAX_STATES	/* Leave at the end! */
3132 | };
3133 | 
3134 | #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
3135 | #define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
3136 | 
3137 | struct bpf_perf_event_value {
3138 | 	__u64 counter;
3139 | 	__u64 enabled;
3140 | 	__u64 running;
3141 | };
3142 | 
3143 | #define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
3144 | #define BPF_DEVCG_ACC_READ	(1ULL << 1)
3145 | #define BPF_DEVCG_ACC_WRITE	(1ULL << 2)
3146 | 
3147 | #define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
3148 | #define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
3149 | 
3150 | struct bpf_cgroup_dev_ctx {
3151 | 	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
3152 | 	__u32 access_type;
3153 | 	__u32 major;
3154 | 	__u32 minor;
3155 | };
3156 | 
3157 | struct bpf_raw_tracepoint_args {
3158 | 	__u64 args[0];
3159 | };
3160 | 
3161 | /* DIRECT:  Skip the FIB rules and go to FIB table associated with device
3162 |  * OUTPUT:  Do lookup from egress perspective; default is ingress
3163 |  */
3164 | #define BPF_FIB_LOOKUP_DIRECT  BIT(0)
3165 | #define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
3166 | 
3167 | enum {
3168 | 	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
3169 | 	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
3170 | 	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
3171 | 	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
3172 | 	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
3173 | 	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
3174 | 	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
3175 | 	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
3176 | 	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
3177 | };
3178 | 
3179 | struct bpf_fib_lookup {
3180 | 	/* input:  network family for lookup (AF_INET, AF_INET6)
3181 | 	 * output: network family of egress nexthop
3182 | 	 */
3183 | 	__u8	family;
3184 | 
3185 | 	/* set if lookup is to consider L4 data - e.g., FIB rules */
3186 | 	__u8	l4_protocol;
3187 | 	__be16	sport;
3188 | 	__be16	dport;
3189 | 
3190 | 	/* total length of packet from network header - used for MTU check */
3191 | 	__u16	tot_len;
3192 | 
3193 | 	/* input: L3 device index for lookup
3194 | 	 * output: device index from FIB lookup
3195 | 	 */
3196 | 	__u32	ifindex;
3197 | 
3198 | 	union {
3199 | 		/* inputs to lookup */
3200 | 		__u8	tos;		/* AF_INET  */
3201 | 		__be32	flowinfo;	/* AF_INET6, flow_label + priority */
3202 | 
3203 | 		/* output: metric of fib result (IPv4/IPv6 only) */
3204 | 		__u32	rt_metric;
3205 | 	};
3206 | 
3207 | 	union {
3208 | 		__be32		ipv4_src;
3209 | 		__u32		ipv6_src[4];  /* in6_addr; network order */
3210 | 	};
3211 | 
3212 | 	/* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
3213 | 	 * network header. output: bpf_fib_lookup sets to gateway address
3214 | 	 * if FIB lookup returns gateway route
3215 | 	 */
3216 | 	union {
3217 | 		__be32		ipv4_dst;
3218 | 		__u32		ipv6_dst[4];  /* in6_addr; network order */
3219 | 	};
3220 | 
3221 | 	/* output */
3222 | 	__be16	h_vlan_proto;
3223 | 	__be16	h_vlan_TCI;
3224 | 	__u8	smac[6];     /* ETH_ALEN */
3225 | 	__u8	dmac[6];     /* ETH_ALEN */
3226 | };
3227 | 
3228 | enum bpf_task_fd_type {
3229 | 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
3230 | 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
3231 | 	BPF_FD_TYPE_KPROBE,		/* (symbol + offset) or addr */
3232 | 	BPF_FD_TYPE_KRETPROBE,		/* (symbol + offset) or addr */
3233 | 	BPF_FD_TYPE_UPROBE,		/* filename + offset */
3234 | 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
3235 | };
3236 | 
3237 | struct bpf_flow_keys {
3238 | 	__u16	nhoff;
3239 | 	__u16	thoff;
3240 | 	__u16	addr_proto;			/* ETH_P_* of valid addrs */
3241 | 	__u8	is_frag;
3242 | 	__u8	is_first_frag;
3243 | 	__u8	is_encap;
3244 | 	__u8	ip_proto;
3245 | 	__be16	n_proto;
3246 | 	__be16	sport;
3247 | 	__be16	dport;
3248 | 	union {
3249 | 		struct {
3250 | 			__be32	ipv4_src;
3251 | 			__be32	ipv4_dst;
3252 | 		};
3253 | 		struct {
3254 | 			__u32	ipv6_src[4];	/* in6_addr; network order */
3255 | 			__u32	ipv6_dst[4];	/* in6_addr; network order */
3256 | 		};
3257 | 	};
3258 | };
3259 | 
3260 | struct bpf_func_info {
3261 | 	__u32	insn_off;
3262 | 	__u32	type_id;
3263 | };
3264 | 
3265 | #define BPF_LINE_INFO_LINE_NUM(line_col)	((line_col) >> 10)
3266 | #define BPF_LINE_INFO_LINE_COL(line_col)	((line_col) & 0x3ff)
3267 | 
3268 | struct bpf_line_info {
3269 | 	__u32	insn_off;
3270 | 	__u32	file_name_off;
3271 | 	__u32	line_off;
3272 | 	__u32	line_col;
3273 | };
3274 | 
3275 | struct bpf_spin_lock {
3276 | 	__u32	val;
3277 | };
3278 | #endif /* _UAPI__LINUX_BPF_H__ */
3279 | 


--------------------------------------------------------------------------------
/headers/linux/err.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 2 | 
 3 | #ifndef __LINUX_ERR_H
 4 | #define __LINUX_ERR_H
 5 | 
 6 | #include <linux/types.h>
 7 | #include <asm/errno.h>
 8 | 
 9 | #define MAX_ERRNO       4095
10 | 
11 | #define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO)
12 | 
13 | static inline void * ERR_PTR(long error_)
14 | {
15 | 	return (void *) error_;
16 | }
17 | 
18 | static inline long PTR_ERR(const void *ptr)
19 | {
20 | 	return (long) ptr;
21 | }
22 | 
23 | static inline bool IS_ERR(const void *ptr)
24 | {
25 | 	return IS_ERR_VALUE((unsigned long)ptr);
26 | }
27 | 
28 | static inline bool IS_ERR_OR_NULL(const void *ptr)
29 | {
30 | 	return (!ptr) || IS_ERR_VALUE((unsigned long)ptr);
31 | }
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/headers/linux/if_link.h:
--------------------------------------------------------------------------------
   1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
   2 | #ifndef _UAPI_LINUX_IF_LINK_H
   3 | #define _UAPI_LINUX_IF_LINK_H
   4 | 
   5 | #include <linux/types.h>
   6 | #include <linux/netlink.h>
   7 | 
   8 | /* This struct should be in sync with struct rtnl_link_stats64 */
   9 | struct rtnl_link_stats {
  10 | 	__u32	rx_packets;		/* total packets received	*/
  11 | 	__u32	tx_packets;		/* total packets transmitted	*/
  12 | 	__u32	rx_bytes;		/* total bytes received 	*/
  13 | 	__u32	tx_bytes;		/* total bytes transmitted	*/
  14 | 	__u32	rx_errors;		/* bad packets received		*/
  15 | 	__u32	tx_errors;		/* packet transmit problems	*/
  16 | 	__u32	rx_dropped;		/* no space in linux buffers	*/
  17 | 	__u32	tx_dropped;		/* no space available in linux	*/
  18 | 	__u32	multicast;		/* multicast packets received	*/
  19 | 	__u32	collisions;
  20 | 
  21 | 	/* detailed rx_errors: */
  22 | 	__u32	rx_length_errors;
  23 | 	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
  24 | 	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
  25 | 	__u32	rx_frame_errors;	/* recv'd frame alignment error */
  26 | 	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
  27 | 	__u32	rx_missed_errors;	/* receiver missed packet	*/
  28 | 
  29 | 	/* detailed tx_errors */
  30 | 	__u32	tx_aborted_errors;
  31 | 	__u32	tx_carrier_errors;
  32 | 	__u32	tx_fifo_errors;
  33 | 	__u32	tx_heartbeat_errors;
  34 | 	__u32	tx_window_errors;
  35 | 
  36 | 	/* for cslip etc */
  37 | 	__u32	rx_compressed;
  38 | 	__u32	tx_compressed;
  39 | 
  40 | 	__u32	rx_nohandler;		/* dropped, no handler found	*/
  41 | };
  42 | 
  43 | /* The main device statistics structure */
  44 | struct rtnl_link_stats64 {
  45 | 	__u64	rx_packets;		/* total packets received	*/
  46 | 	__u64	tx_packets;		/* total packets transmitted	*/
  47 | 	__u64	rx_bytes;		/* total bytes received 	*/
  48 | 	__u64	tx_bytes;		/* total bytes transmitted	*/
  49 | 	__u64	rx_errors;		/* bad packets received		*/
  50 | 	__u64	tx_errors;		/* packet transmit problems	*/
  51 | 	__u64	rx_dropped;		/* no space in linux buffers	*/
  52 | 	__u64	tx_dropped;		/* no space available in linux	*/
  53 | 	__u64	multicast;		/* multicast packets received	*/
  54 | 	__u64	collisions;
  55 | 
  56 | 	/* detailed rx_errors: */
  57 | 	__u64	rx_length_errors;
  58 | 	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
  59 | 	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
  60 | 	__u64	rx_frame_errors;	/* recv'd frame alignment error */
  61 | 	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
  62 | 	__u64	rx_missed_errors;	/* receiver missed packet	*/
  63 | 
  64 | 	/* detailed tx_errors */
  65 | 	__u64	tx_aborted_errors;
  66 | 	__u64	tx_carrier_errors;
  67 | 	__u64	tx_fifo_errors;
  68 | 	__u64	tx_heartbeat_errors;
  69 | 	__u64	tx_window_errors;
  70 | 
  71 | 	/* for cslip etc */
  72 | 	__u64	rx_compressed;
  73 | 	__u64	tx_compressed;
  74 | 
  75 | 	__u64	rx_nohandler;		/* dropped, no handler found	*/
  76 | };
  77 | 
  78 | /* The struct should be in sync with struct ifmap */
  79 | struct rtnl_link_ifmap {
  80 | 	__u64	mem_start;
  81 | 	__u64	mem_end;
  82 | 	__u64	base_addr;
  83 | 	__u16	irq;
  84 | 	__u8	dma;
  85 | 	__u8	port;
  86 | };
  87 | 
  88 | /*
  89 |  * IFLA_AF_SPEC
  90 |  *   Contains nested attributes for address family specific attributes.
  91 |  *   Each address family may create a attribute with the address family
  92 |  *   number as type and create its own attribute structure in it.
  93 |  *
  94 |  *   Example:
  95 |  *   [IFLA_AF_SPEC] = {
  96 |  *       [AF_INET] = {
  97 |  *           [IFLA_INET_CONF] = ...,
  98 |  *       },
  99 |  *       [AF_INET6] = {
 100 |  *           [IFLA_INET6_FLAGS] = ...,
 101 |  *           [IFLA_INET6_CONF] = ...,
 102 |  *       }
 103 |  *   }
 104 |  */
 105 | 
 106 | enum {
 107 | 	IFLA_UNSPEC,
 108 | 	IFLA_ADDRESS,
 109 | 	IFLA_BROADCAST,
 110 | 	IFLA_IFNAME,
 111 | 	IFLA_MTU,
 112 | 	IFLA_LINK,
 113 | 	IFLA_QDISC,
 114 | 	IFLA_STATS,
 115 | 	IFLA_COST,
 116 | #define IFLA_COST IFLA_COST
 117 | 	IFLA_PRIORITY,
 118 | #define IFLA_PRIORITY IFLA_PRIORITY
 119 | 	IFLA_MASTER,
 120 | #define IFLA_MASTER IFLA_MASTER
 121 | 	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
 122 | #define IFLA_WIRELESS IFLA_WIRELESS
 123 | 	IFLA_PROTINFO,		/* Protocol specific information for a link */
 124 | #define IFLA_PROTINFO IFLA_PROTINFO
 125 | 	IFLA_TXQLEN,
 126 | #define IFLA_TXQLEN IFLA_TXQLEN
 127 | 	IFLA_MAP,
 128 | #define IFLA_MAP IFLA_MAP
 129 | 	IFLA_WEIGHT,
 130 | #define IFLA_WEIGHT IFLA_WEIGHT
 131 | 	IFLA_OPERSTATE,
 132 | 	IFLA_LINKMODE,
 133 | 	IFLA_LINKINFO,
 134 | #define IFLA_LINKINFO IFLA_LINKINFO
 135 | 	IFLA_NET_NS_PID,
 136 | 	IFLA_IFALIAS,
 137 | 	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
 138 | 	IFLA_VFINFO_LIST,
 139 | 	IFLA_STATS64,
 140 | 	IFLA_VF_PORTS,
 141 | 	IFLA_PORT_SELF,
 142 | 	IFLA_AF_SPEC,
 143 | 	IFLA_GROUP,		/* Group the device belongs to */
 144 | 	IFLA_NET_NS_FD,
 145 | 	IFLA_EXT_MASK,		/* Extended info mask, VFs, etc */
 146 | 	IFLA_PROMISCUITY,	/* Promiscuity count: > 0 means acts PROMISC */
 147 | #define IFLA_PROMISCUITY IFLA_PROMISCUITY
 148 | 	IFLA_NUM_TX_QUEUES,
 149 | 	IFLA_NUM_RX_QUEUES,
 150 | 	IFLA_CARRIER,
 151 | 	IFLA_PHYS_PORT_ID,
 152 | 	IFLA_CARRIER_CHANGES,
 153 | 	IFLA_PHYS_SWITCH_ID,
 154 | 	IFLA_LINK_NETNSID,
 155 | 	IFLA_PHYS_PORT_NAME,
 156 | 	IFLA_PROTO_DOWN,
 157 | 	IFLA_GSO_MAX_SEGS,
 158 | 	IFLA_GSO_MAX_SIZE,
 159 | 	IFLA_PAD,
 160 | 	IFLA_XDP,
 161 | 	IFLA_EVENT,
 162 | 	IFLA_NEW_NETNSID,
 163 | 	IFLA_IF_NETNSID,
 164 | 	IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
 165 | 	IFLA_CARRIER_UP_COUNT,
 166 | 	IFLA_CARRIER_DOWN_COUNT,
 167 | 	IFLA_NEW_IFINDEX,
 168 | 	IFLA_MIN_MTU,
 169 | 	IFLA_MAX_MTU,
 170 | 	__IFLA_MAX
 171 | };
 172 | 
 173 | 
 174 | #define IFLA_MAX (__IFLA_MAX - 1)
 175 | 
 176 | /* backwards compatibility for userspace */
 177 | #ifndef __KERNEL__
 178 | #define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
 179 | #define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
 180 | #endif
 181 | 
 182 | enum {
 183 | 	IFLA_INET_UNSPEC,
 184 | 	IFLA_INET_CONF,
 185 | 	__IFLA_INET_MAX,
 186 | };
 187 | 
 188 | #define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
 189 | 
 190 | /* ifi_flags.
 191 | 
 192 |    IFF_* flags.
 193 | 
 194 |    The only change is:
 195 |    IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
 196 |    more not changeable by user. They describe link media
 197 |    characteristics and set by device driver.
 198 | 
 199 |    Comments:
 200 |    - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
 201 |    - If neither of these three flags are set;
 202 |      the interface is NBMA.
 203 | 
 204 |    - IFF_MULTICAST does not mean anything special:
 205 |    multicasts can be used on all not-NBMA links.
 206 |    IFF_MULTICAST means that this media uses special encapsulation
 207 |    for multicast frames. Apparently, all IFF_POINTOPOINT and
 208 |    IFF_BROADCAST devices are able to use multicasts too.
 209 |  */
 210 | 
 211 | /* IFLA_LINK.
 212 |    For usual devices it is equal ifi_index.
 213 |    If it is a "virtual interface" (f.e. tunnel), ifi_link
 214 |    can point to real physical interface (f.e. for bandwidth calculations),
 215 |    or maybe 0, what means, that real media is unknown (usual
 216 |    for IPIP tunnels, when route to endpoint is allowed to change)
 217 |  */
 218 | 
 219 | /* Subtype attributes for IFLA_PROTINFO */
 220 | enum {
 221 | 	IFLA_INET6_UNSPEC,
 222 | 	IFLA_INET6_FLAGS,	/* link flags			*/
 223 | 	IFLA_INET6_CONF,	/* sysctl parameters		*/
 224 | 	IFLA_INET6_STATS,	/* statistics			*/
 225 | 	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
 226 | 	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
 227 | 	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
 228 | 	IFLA_INET6_TOKEN,	/* device token			*/
 229 | 	IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
 230 | 	__IFLA_INET6_MAX
 231 | };
 232 | 
 233 | #define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
 234 | 
 235 | enum in6_addr_gen_mode {
 236 | 	IN6_ADDR_GEN_MODE_EUI64,
 237 | 	IN6_ADDR_GEN_MODE_NONE,
 238 | 	IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
 239 | 	IN6_ADDR_GEN_MODE_RANDOM,
 240 | };
 241 | 
 242 | /* Bridge section */
 243 | 
 244 | enum {
 245 | 	IFLA_BR_UNSPEC,
 246 | 	IFLA_BR_FORWARD_DELAY,
 247 | 	IFLA_BR_HELLO_TIME,
 248 | 	IFLA_BR_MAX_AGE,
 249 | 	IFLA_BR_AGEING_TIME,
 250 | 	IFLA_BR_STP_STATE,
 251 | 	IFLA_BR_PRIORITY,
 252 | 	IFLA_BR_VLAN_FILTERING,
 253 | 	IFLA_BR_VLAN_PROTOCOL,
 254 | 	IFLA_BR_GROUP_FWD_MASK,
 255 | 	IFLA_BR_ROOT_ID,
 256 | 	IFLA_BR_BRIDGE_ID,
 257 | 	IFLA_BR_ROOT_PORT,
 258 | 	IFLA_BR_ROOT_PATH_COST,
 259 | 	IFLA_BR_TOPOLOGY_CHANGE,
 260 | 	IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
 261 | 	IFLA_BR_HELLO_TIMER,
 262 | 	IFLA_BR_TCN_TIMER,
 263 | 	IFLA_BR_TOPOLOGY_CHANGE_TIMER,
 264 | 	IFLA_BR_GC_TIMER,
 265 | 	IFLA_BR_GROUP_ADDR,
 266 | 	IFLA_BR_FDB_FLUSH,
 267 | 	IFLA_BR_MCAST_ROUTER,
 268 | 	IFLA_BR_MCAST_SNOOPING,
 269 | 	IFLA_BR_MCAST_QUERY_USE_IFADDR,
 270 | 	IFLA_BR_MCAST_QUERIER,
 271 | 	IFLA_BR_MCAST_HASH_ELASTICITY,
 272 | 	IFLA_BR_MCAST_HASH_MAX,
 273 | 	IFLA_BR_MCAST_LAST_MEMBER_CNT,
 274 | 	IFLA_BR_MCAST_STARTUP_QUERY_CNT,
 275 | 	IFLA_BR_MCAST_LAST_MEMBER_INTVL,
 276 | 	IFLA_BR_MCAST_MEMBERSHIP_INTVL,
 277 | 	IFLA_BR_MCAST_QUERIER_INTVL,
 278 | 	IFLA_BR_MCAST_QUERY_INTVL,
 279 | 	IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
 280 | 	IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
 281 | 	IFLA_BR_NF_CALL_IPTABLES,
 282 | 	IFLA_BR_NF_CALL_IP6TABLES,
 283 | 	IFLA_BR_NF_CALL_ARPTABLES,
 284 | 	IFLA_BR_VLAN_DEFAULT_PVID,
 285 | 	IFLA_BR_PAD,
 286 | 	IFLA_BR_VLAN_STATS_ENABLED,
 287 | 	IFLA_BR_MCAST_STATS_ENABLED,
 288 | 	IFLA_BR_MCAST_IGMP_VERSION,
 289 | 	IFLA_BR_MCAST_MLD_VERSION,
 290 | 	IFLA_BR_VLAN_STATS_PER_PORT,
 291 | 	IFLA_BR_MULTI_BOOLOPT,
 292 | 	__IFLA_BR_MAX,
 293 | };
 294 | 
 295 | #define IFLA_BR_MAX	(__IFLA_BR_MAX - 1)
 296 | 
 297 | struct ifla_bridge_id {
 298 | 	__u8	prio[2];
 299 | 	__u8	addr[6]; /* ETH_ALEN */
 300 | };
 301 | 
 302 | enum {
 303 | 	BRIDGE_MODE_UNSPEC,
 304 | 	BRIDGE_MODE_HAIRPIN,
 305 | };
 306 | 
 307 | enum {
 308 | 	IFLA_BRPORT_UNSPEC,
 309 | 	IFLA_BRPORT_STATE,	/* Spanning tree state     */
 310 | 	IFLA_BRPORT_PRIORITY,	/* "             priority  */
 311 | 	IFLA_BRPORT_COST,	/* "             cost      */
 312 | 	IFLA_BRPORT_MODE,	/* mode (hairpin)          */
 313 | 	IFLA_BRPORT_GUARD,	/* bpdu guard              */
 314 | 	IFLA_BRPORT_PROTECT,	/* root port protection    */
 315 | 	IFLA_BRPORT_FAST_LEAVE,	/* multicast fast leave    */
 316 | 	IFLA_BRPORT_LEARNING,	/* mac learning */
 317 | 	IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
 318 | 	IFLA_BRPORT_PROXYARP,	/* proxy ARP */
 319 | 	IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
 320 | 	IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
 321 | 	IFLA_BRPORT_ROOT_ID,	/* designated root */
 322 | 	IFLA_BRPORT_BRIDGE_ID,	/* designated bridge */
 323 | 	IFLA_BRPORT_DESIGNATED_PORT,
 324 | 	IFLA_BRPORT_DESIGNATED_COST,
 325 | 	IFLA_BRPORT_ID,
 326 | 	IFLA_BRPORT_NO,
 327 | 	IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
 328 | 	IFLA_BRPORT_CONFIG_PENDING,
 329 | 	IFLA_BRPORT_MESSAGE_AGE_TIMER,
 330 | 	IFLA_BRPORT_FORWARD_DELAY_TIMER,
 331 | 	IFLA_BRPORT_HOLD_TIMER,
 332 | 	IFLA_BRPORT_FLUSH,
 333 | 	IFLA_BRPORT_MULTICAST_ROUTER,
 334 | 	IFLA_BRPORT_PAD,
 335 | 	IFLA_BRPORT_MCAST_FLOOD,
 336 | 	IFLA_BRPORT_MCAST_TO_UCAST,
 337 | 	IFLA_BRPORT_VLAN_TUNNEL,
 338 | 	IFLA_BRPORT_BCAST_FLOOD,
 339 | 	IFLA_BRPORT_GROUP_FWD_MASK,
 340 | 	IFLA_BRPORT_NEIGH_SUPPRESS,
 341 | 	IFLA_BRPORT_ISOLATED,
 342 | 	IFLA_BRPORT_BACKUP_PORT,
 343 | 	__IFLA_BRPORT_MAX
 344 | };
 345 | #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
 346 | 
 347 | struct ifla_cacheinfo {
 348 | 	__u32	max_reasm_len;
 349 | 	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
 350 | 	__u32	reachable_time;
 351 | 	__u32	retrans_time;
 352 | };
 353 | 
 354 | enum {
 355 | 	IFLA_INFO_UNSPEC,
 356 | 	IFLA_INFO_KIND,
 357 | 	IFLA_INFO_DATA,
 358 | 	IFLA_INFO_XSTATS,
 359 | 	IFLA_INFO_SLAVE_KIND,
 360 | 	IFLA_INFO_SLAVE_DATA,
 361 | 	__IFLA_INFO_MAX,
 362 | };
 363 | 
 364 | #define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
 365 | 
 366 | /* VLAN section */
 367 | 
 368 | enum {
 369 | 	IFLA_VLAN_UNSPEC,
 370 | 	IFLA_VLAN_ID,
 371 | 	IFLA_VLAN_FLAGS,
 372 | 	IFLA_VLAN_EGRESS_QOS,
 373 | 	IFLA_VLAN_INGRESS_QOS,
 374 | 	IFLA_VLAN_PROTOCOL,
 375 | 	__IFLA_VLAN_MAX,
 376 | };
 377 | 
 378 | #define IFLA_VLAN_MAX	(__IFLA_VLAN_MAX - 1)
 379 | 
 380 | struct ifla_vlan_flags {
 381 | 	__u32	flags;
 382 | 	__u32	mask;
 383 | };
 384 | 
 385 | enum {
 386 | 	IFLA_VLAN_QOS_UNSPEC,
 387 | 	IFLA_VLAN_QOS_MAPPING,
 388 | 	__IFLA_VLAN_QOS_MAX
 389 | };
 390 | 
 391 | #define IFLA_VLAN_QOS_MAX	(__IFLA_VLAN_QOS_MAX - 1)
 392 | 
 393 | struct ifla_vlan_qos_mapping {
 394 | 	__u32 from;
 395 | 	__u32 to;
 396 | };
 397 | 
 398 | /* MACVLAN section */
 399 | enum {
 400 | 	IFLA_MACVLAN_UNSPEC,
 401 | 	IFLA_MACVLAN_MODE,
 402 | 	IFLA_MACVLAN_FLAGS,
 403 | 	IFLA_MACVLAN_MACADDR_MODE,
 404 | 	IFLA_MACVLAN_MACADDR,
 405 | 	IFLA_MACVLAN_MACADDR_DATA,
 406 | 	IFLA_MACVLAN_MACADDR_COUNT,
 407 | 	__IFLA_MACVLAN_MAX,
 408 | };
 409 | 
 410 | #define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
 411 | 
 412 | enum macvlan_mode {
 413 | 	MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
 414 | 	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
 415 | 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 416 | 	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
 417 | 	MACVLAN_MODE_SOURCE  = 16,/* use source MAC address list to assign */
 418 | };
 419 | 
 420 | enum macvlan_macaddr_mode {
 421 | 	MACVLAN_MACADDR_ADD,
 422 | 	MACVLAN_MACADDR_DEL,
 423 | 	MACVLAN_MACADDR_FLUSH,
 424 | 	MACVLAN_MACADDR_SET,
 425 | };
 426 | 
 427 | #define MACVLAN_FLAG_NOPROMISC	1
 428 | 
 429 | /* VRF section */
 430 | enum {
 431 | 	IFLA_VRF_UNSPEC,
 432 | 	IFLA_VRF_TABLE,
 433 | 	__IFLA_VRF_MAX
 434 | };
 435 | 
 436 | #define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
 437 | 
 438 | enum {
 439 | 	IFLA_VRF_PORT_UNSPEC,
 440 | 	IFLA_VRF_PORT_TABLE,
 441 | 	__IFLA_VRF_PORT_MAX
 442 | };
 443 | 
 444 | #define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
 445 | 
 446 | /* MACSEC section */
 447 | enum {
 448 | 	IFLA_MACSEC_UNSPEC,
 449 | 	IFLA_MACSEC_SCI,
 450 | 	IFLA_MACSEC_PORT,
 451 | 	IFLA_MACSEC_ICV_LEN,
 452 | 	IFLA_MACSEC_CIPHER_SUITE,
 453 | 	IFLA_MACSEC_WINDOW,
 454 | 	IFLA_MACSEC_ENCODING_SA,
 455 | 	IFLA_MACSEC_ENCRYPT,
 456 | 	IFLA_MACSEC_PROTECT,
 457 | 	IFLA_MACSEC_INC_SCI,
 458 | 	IFLA_MACSEC_ES,
 459 | 	IFLA_MACSEC_SCB,
 460 | 	IFLA_MACSEC_REPLAY_PROTECT,
 461 | 	IFLA_MACSEC_VALIDATION,
 462 | 	IFLA_MACSEC_PAD,
 463 | 	__IFLA_MACSEC_MAX,
 464 | };
 465 | 
 466 | #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 467 | 
 468 | /* XFRM section */
 469 | enum {
 470 | 	IFLA_XFRM_UNSPEC,
 471 | 	IFLA_XFRM_LINK,
 472 | 	IFLA_XFRM_IF_ID,
 473 | 	__IFLA_XFRM_MAX
 474 | };
 475 | 
 476 | #define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
 477 | 
 478 | enum macsec_validation_type {
 479 | 	MACSEC_VALIDATE_DISABLED = 0,
 480 | 	MACSEC_VALIDATE_CHECK = 1,
 481 | 	MACSEC_VALIDATE_STRICT = 2,
 482 | 	__MACSEC_VALIDATE_END,
 483 | 	MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
 484 | };
 485 | 
 486 | /* IPVLAN section */
 487 | enum {
 488 | 	IFLA_IPVLAN_UNSPEC,
 489 | 	IFLA_IPVLAN_MODE,
 490 | 	IFLA_IPVLAN_FLAGS,
 491 | 	__IFLA_IPVLAN_MAX
 492 | };
 493 | 
 494 | #define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
 495 | 
 496 | enum ipvlan_mode {
 497 | 	IPVLAN_MODE_L2 = 0,
 498 | 	IPVLAN_MODE_L3,
 499 | 	IPVLAN_MODE_L3S,
 500 | 	IPVLAN_MODE_MAX
 501 | };
 502 | 
 503 | #define IPVLAN_F_PRIVATE	0x01
 504 | #define IPVLAN_F_VEPA		0x02
 505 | 
 506 | /* VXLAN section */
 507 | enum {
 508 | 	IFLA_VXLAN_UNSPEC,
 509 | 	IFLA_VXLAN_ID,
 510 | 	IFLA_VXLAN_GROUP,	/* group or remote address */
 511 | 	IFLA_VXLAN_LINK,
 512 | 	IFLA_VXLAN_LOCAL,
 513 | 	IFLA_VXLAN_TTL,
 514 | 	IFLA_VXLAN_TOS,
 515 | 	IFLA_VXLAN_LEARNING,
 516 | 	IFLA_VXLAN_AGEING,
 517 | 	IFLA_VXLAN_LIMIT,
 518 | 	IFLA_VXLAN_PORT_RANGE,	/* source port */
 519 | 	IFLA_VXLAN_PROXY,
 520 | 	IFLA_VXLAN_RSC,
 521 | 	IFLA_VXLAN_L2MISS,
 522 | 	IFLA_VXLAN_L3MISS,
 523 | 	IFLA_VXLAN_PORT,	/* destination port */
 524 | 	IFLA_VXLAN_GROUP6,
 525 | 	IFLA_VXLAN_LOCAL6,
 526 | 	IFLA_VXLAN_UDP_CSUM,
 527 | 	IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 528 | 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 529 | 	IFLA_VXLAN_REMCSUM_TX,
 530 | 	IFLA_VXLAN_REMCSUM_RX,
 531 | 	IFLA_VXLAN_GBP,
 532 | 	IFLA_VXLAN_REMCSUM_NOPARTIAL,
 533 | 	IFLA_VXLAN_COLLECT_METADATA,
 534 | 	IFLA_VXLAN_LABEL,
 535 | 	IFLA_VXLAN_GPE,
 536 | 	IFLA_VXLAN_TTL_INHERIT,
 537 | 	IFLA_VXLAN_DF,
 538 | 	__IFLA_VXLAN_MAX
 539 | };
 540 | #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
 541 | 
 542 | struct ifla_vxlan_port_range {
 543 | 	__be16	low;
 544 | 	__be16	high;
 545 | };
 546 | 
 547 | enum ifla_vxlan_df {
 548 | 	VXLAN_DF_UNSET = 0,
 549 | 	VXLAN_DF_SET,
 550 | 	VXLAN_DF_INHERIT,
 551 | 	__VXLAN_DF_END,
 552 | 	VXLAN_DF_MAX = __VXLAN_DF_END - 1,
 553 | };
 554 | 
 555 | /* GENEVE section */
 556 | enum {
 557 | 	IFLA_GENEVE_UNSPEC,
 558 | 	IFLA_GENEVE_ID,
 559 | 	IFLA_GENEVE_REMOTE,
 560 | 	IFLA_GENEVE_TTL,
 561 | 	IFLA_GENEVE_TOS,
 562 | 	IFLA_GENEVE_PORT,	/* destination port */
 563 | 	IFLA_GENEVE_COLLECT_METADATA,
 564 | 	IFLA_GENEVE_REMOTE6,
 565 | 	IFLA_GENEVE_UDP_CSUM,
 566 | 	IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
 567 | 	IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 568 | 	IFLA_GENEVE_LABEL,
 569 | 	IFLA_GENEVE_TTL_INHERIT,
 570 | 	IFLA_GENEVE_DF,
 571 | 	__IFLA_GENEVE_MAX
 572 | };
 573 | #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
 574 | 
 575 | enum ifla_geneve_df {
 576 | 	GENEVE_DF_UNSET = 0,
 577 | 	GENEVE_DF_SET,
 578 | 	GENEVE_DF_INHERIT,
 579 | 	__GENEVE_DF_END,
 580 | 	GENEVE_DF_MAX = __GENEVE_DF_END - 1,
 581 | };
 582 | 
 583 | /* PPP section */
 584 | enum {
 585 | 	IFLA_PPP_UNSPEC,
 586 | 	IFLA_PPP_DEV_FD,
 587 | 	__IFLA_PPP_MAX
 588 | };
 589 | #define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
 590 | 
 591 | /* GTP section */
 592 | 
 593 | enum ifla_gtp_role {
 594 | 	GTP_ROLE_GGSN = 0,
 595 | 	GTP_ROLE_SGSN,
 596 | };
 597 | 
 598 | enum {
 599 | 	IFLA_GTP_UNSPEC,
 600 | 	IFLA_GTP_FD0,
 601 | 	IFLA_GTP_FD1,
 602 | 	IFLA_GTP_PDP_HASHSIZE,
 603 | 	IFLA_GTP_ROLE,
 604 | 	__IFLA_GTP_MAX,
 605 | };
 606 | #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
 607 | 
 608 | /* Bonding section */
 609 | 
 610 | enum {
 611 | 	IFLA_BOND_UNSPEC,
 612 | 	IFLA_BOND_MODE,
 613 | 	IFLA_BOND_ACTIVE_SLAVE,
 614 | 	IFLA_BOND_MIIMON,
 615 | 	IFLA_BOND_UPDELAY,
 616 | 	IFLA_BOND_DOWNDELAY,
 617 | 	IFLA_BOND_USE_CARRIER,
 618 | 	IFLA_BOND_ARP_INTERVAL,
 619 | 	IFLA_BOND_ARP_IP_TARGET,
 620 | 	IFLA_BOND_ARP_VALIDATE,
 621 | 	IFLA_BOND_ARP_ALL_TARGETS,
 622 | 	IFLA_BOND_PRIMARY,
 623 | 	IFLA_BOND_PRIMARY_RESELECT,
 624 | 	IFLA_BOND_FAIL_OVER_MAC,
 625 | 	IFLA_BOND_XMIT_HASH_POLICY,
 626 | 	IFLA_BOND_RESEND_IGMP,
 627 | 	IFLA_BOND_NUM_PEER_NOTIF,
 628 | 	IFLA_BOND_ALL_SLAVES_ACTIVE,
 629 | 	IFLA_BOND_MIN_LINKS,
 630 | 	IFLA_BOND_LP_INTERVAL,
 631 | 	IFLA_BOND_PACKETS_PER_SLAVE,
 632 | 	IFLA_BOND_AD_LACP_RATE,
 633 | 	IFLA_BOND_AD_SELECT,
 634 | 	IFLA_BOND_AD_INFO,
 635 | 	IFLA_BOND_AD_ACTOR_SYS_PRIO,
 636 | 	IFLA_BOND_AD_USER_PORT_KEY,
 637 | 	IFLA_BOND_AD_ACTOR_SYSTEM,
 638 | 	IFLA_BOND_TLB_DYNAMIC_LB,
 639 | 	__IFLA_BOND_MAX,
 640 | };
 641 | 
 642 | #define IFLA_BOND_MAX	(__IFLA_BOND_MAX - 1)
 643 | 
 644 | enum {
 645 | 	IFLA_BOND_AD_INFO_UNSPEC,
 646 | 	IFLA_BOND_AD_INFO_AGGREGATOR,
 647 | 	IFLA_BOND_AD_INFO_NUM_PORTS,
 648 | 	IFLA_BOND_AD_INFO_ACTOR_KEY,
 649 | 	IFLA_BOND_AD_INFO_PARTNER_KEY,
 650 | 	IFLA_BOND_AD_INFO_PARTNER_MAC,
 651 | 	__IFLA_BOND_AD_INFO_MAX,
 652 | };
 653 | 
 654 | #define IFLA_BOND_AD_INFO_MAX	(__IFLA_BOND_AD_INFO_MAX - 1)
 655 | 
 656 | enum {
 657 | 	IFLA_BOND_SLAVE_UNSPEC,
 658 | 	IFLA_BOND_SLAVE_STATE,
 659 | 	IFLA_BOND_SLAVE_MII_STATUS,
 660 | 	IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
 661 | 	IFLA_BOND_SLAVE_PERM_HWADDR,
 662 | 	IFLA_BOND_SLAVE_QUEUE_ID,
 663 | 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 664 | 	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
 665 | 	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
 666 | 	__IFLA_BOND_SLAVE_MAX,
 667 | };
 668 | 
 669 | #define IFLA_BOND_SLAVE_MAX	(__IFLA_BOND_SLAVE_MAX - 1)
 670 | 
 671 | /* SR-IOV virtual function management section */
 672 | 
 673 | enum {
 674 | 	IFLA_VF_INFO_UNSPEC,
 675 | 	IFLA_VF_INFO,
 676 | 	__IFLA_VF_INFO_MAX,
 677 | };
 678 | 
 679 | #define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
 680 | 
 681 | enum {
 682 | 	IFLA_VF_UNSPEC,
 683 | 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
 684 | 	IFLA_VF_VLAN,		/* VLAN ID and QoS */
 685 | 	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
 686 | 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 687 | 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
 688 | 	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
 689 | 	IFLA_VF_RSS_QUERY_EN,	/* RSS Redirection Table and Hash Key query
 690 | 				 * on/off switch
 691 | 				 */
 692 | 	IFLA_VF_STATS,		/* network device statistics */
 693 | 	IFLA_VF_TRUST,		/* Trust VF */
 694 | 	IFLA_VF_IB_NODE_GUID,	/* VF Infiniband node GUID */
 695 | 	IFLA_VF_IB_PORT_GUID,	/* VF Infiniband port GUID */
 696 | 	IFLA_VF_VLAN_LIST,	/* nested list of vlans, option for QinQ */
 697 | 	__IFLA_VF_MAX,
 698 | };
 699 | 
 700 | #define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
 701 | 
 702 | struct ifla_vf_mac {
 703 | 	__u32 vf;
 704 | 	__u8 mac[32]; /* MAX_ADDR_LEN */
 705 | };
 706 | 
 707 | struct ifla_vf_vlan {
 708 | 	__u32 vf;
 709 | 	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
 710 | 	__u32 qos;
 711 | };
 712 | 
 713 | enum {
 714 | 	IFLA_VF_VLAN_INFO_UNSPEC,
 715 | 	IFLA_VF_VLAN_INFO,	/* VLAN ID, QoS and VLAN protocol */
 716 | 	__IFLA_VF_VLAN_INFO_MAX,
 717 | };
 718 | 
 719 | #define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
 720 | #define MAX_VLAN_LIST_LEN 1
 721 | 
 722 | struct ifla_vf_vlan_info {
 723 | 	__u32 vf;
 724 | 	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
 725 | 	__u32 qos;
 726 | 	__be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
 727 | };
 728 | 
 729 | struct ifla_vf_tx_rate {
 730 | 	__u32 vf;
 731 | 	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
 732 | };
 733 | 
 734 | struct ifla_vf_rate {
 735 | 	__u32 vf;
 736 | 	__u32 min_tx_rate; /* Min Bandwidth in Mbps */
 737 | 	__u32 max_tx_rate; /* Max Bandwidth in Mbps */
 738 | };
 739 | 
 740 | struct ifla_vf_spoofchk {
 741 | 	__u32 vf;
 742 | 	__u32 setting;
 743 | };
 744 | 
 745 | struct ifla_vf_guid {
 746 | 	__u32 vf;
 747 | 	__u64 guid;
 748 | };
 749 | 
 750 | enum {
 751 | 	IFLA_VF_LINK_STATE_AUTO,	/* link state of the uplink */
 752 | 	IFLA_VF_LINK_STATE_ENABLE,	/* link always up */
 753 | 	IFLA_VF_LINK_STATE_DISABLE,	/* link always down */
 754 | 	__IFLA_VF_LINK_STATE_MAX,
 755 | };
 756 | 
 757 | struct ifla_vf_link_state {
 758 | 	__u32 vf;
 759 | 	__u32 link_state;
 760 | };
 761 | 
 762 | struct ifla_vf_rss_query_en {
 763 | 	__u32 vf;
 764 | 	__u32 setting;
 765 | };
 766 | 
 767 | enum {
 768 | 	IFLA_VF_STATS_RX_PACKETS,
 769 | 	IFLA_VF_STATS_TX_PACKETS,
 770 | 	IFLA_VF_STATS_RX_BYTES,
 771 | 	IFLA_VF_STATS_TX_BYTES,
 772 | 	IFLA_VF_STATS_BROADCAST,
 773 | 	IFLA_VF_STATS_MULTICAST,
 774 | 	IFLA_VF_STATS_PAD,
 775 | 	IFLA_VF_STATS_RX_DROPPED,
 776 | 	IFLA_VF_STATS_TX_DROPPED,
 777 | 	__IFLA_VF_STATS_MAX,
 778 | };
 779 | 
 780 | #define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
 781 | 
 782 | struct ifla_vf_trust {
 783 | 	__u32 vf;
 784 | 	__u32 setting;
 785 | };
 786 | 
 787 | /* VF ports management section
 788 |  *
 789 |  *	Nested layout of set/get msg is:
 790 |  *
 791 |  *		[IFLA_NUM_VF]
 792 |  *		[IFLA_VF_PORTS]
 793 |  *			[IFLA_VF_PORT]
 794 |  *				[IFLA_PORT_*], ...
 795 |  *			[IFLA_VF_PORT]
 796 |  *				[IFLA_PORT_*], ...
 797 |  *			...
 798 |  *		[IFLA_PORT_SELF]
 799 |  *			[IFLA_PORT_*], ...
 800 |  */
 801 | 
 802 | enum {
 803 | 	IFLA_VF_PORT_UNSPEC,
 804 | 	IFLA_VF_PORT,			/* nest */
 805 | 	__IFLA_VF_PORT_MAX,
 806 | };
 807 | 
 808 | #define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
 809 | 
 810 | enum {
 811 | 	IFLA_PORT_UNSPEC,
 812 | 	IFLA_PORT_VF,			/* __u32 */
 813 | 	IFLA_PORT_PROFILE,		/* string */
 814 | 	IFLA_PORT_VSI_TYPE,		/* 802.1Qbg (pre-)standard VDP */
 815 | 	IFLA_PORT_INSTANCE_UUID,	/* binary UUID */
 816 | 	IFLA_PORT_HOST_UUID,		/* binary UUID */
 817 | 	IFLA_PORT_REQUEST,		/* __u8 */
 818 | 	IFLA_PORT_RESPONSE,		/* __u16, output only */
 819 | 	__IFLA_PORT_MAX,
 820 | };
 821 | 
 822 | #define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
 823 | 
 824 | #define PORT_PROFILE_MAX	40
 825 | #define PORT_UUID_MAX		16
 826 | #define PORT_SELF_VF		-1
 827 | 
 828 | enum {
 829 | 	PORT_REQUEST_PREASSOCIATE = 0,
 830 | 	PORT_REQUEST_PREASSOCIATE_RR,
 831 | 	PORT_REQUEST_ASSOCIATE,
 832 | 	PORT_REQUEST_DISASSOCIATE,
 833 | };
 834 | 
 835 | enum {
 836 | 	PORT_VDP_RESPONSE_SUCCESS = 0,
 837 | 	PORT_VDP_RESPONSE_INVALID_FORMAT,
 838 | 	PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
 839 | 	PORT_VDP_RESPONSE_UNUSED_VTID,
 840 | 	PORT_VDP_RESPONSE_VTID_VIOLATION,
 841 | 	PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
 842 | 	PORT_VDP_RESPONSE_OUT_OF_SYNC,
 843 | 	/* 0x08-0xFF reserved for future VDP use */
 844 | 	PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
 845 | 	PORT_PROFILE_RESPONSE_INPROGRESS,
 846 | 	PORT_PROFILE_RESPONSE_INVALID,
 847 | 	PORT_PROFILE_RESPONSE_BADSTATE,
 848 | 	PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
 849 | 	PORT_PROFILE_RESPONSE_ERROR,
 850 | };
 851 | 
 852 | struct ifla_port_vsi {
 853 | 	__u8 vsi_mgr_id;
 854 | 	__u8 vsi_type_id[3];
 855 | 	__u8 vsi_type_version;
 856 | 	__u8 pad[3];
 857 | };
 858 | 
 859 | 
 860 | /* IPoIB section */
 861 | 
 862 | enum {
 863 | 	IFLA_IPOIB_UNSPEC,
 864 | 	IFLA_IPOIB_PKEY,
 865 | 	IFLA_IPOIB_MODE,
 866 | 	IFLA_IPOIB_UMCAST,
 867 | 	__IFLA_IPOIB_MAX
 868 | };
 869 | 
 870 | enum {
 871 | 	IPOIB_MODE_DATAGRAM  = 0, /* using unreliable datagram QPs */
 872 | 	IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
 873 | };
 874 | 
 875 | #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
 876 | 
 877 | 
 878 | /* HSR section */
 879 | 
 880 | enum {
 881 | 	IFLA_HSR_UNSPEC,
 882 | 	IFLA_HSR_SLAVE1,
 883 | 	IFLA_HSR_SLAVE2,
 884 | 	IFLA_HSR_MULTICAST_SPEC,	/* Last byte of supervision addr */
 885 | 	IFLA_HSR_SUPERVISION_ADDR,	/* Supervision frame multicast addr */
 886 | 	IFLA_HSR_SEQ_NR,
 887 | 	IFLA_HSR_VERSION,		/* HSR version */
 888 | 	__IFLA_HSR_MAX,
 889 | };
 890 | 
 891 | #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
 892 | 
 893 | /* STATS section */
 894 | 
 895 | struct if_stats_msg {
 896 | 	__u8  family;
 897 | 	__u8  pad1;
 898 | 	__u16 pad2;
 899 | 	__u32 ifindex;
 900 | 	__u32 filter_mask;
 901 | };
 902 | 
 903 | /* A stats attribute can be netdev specific or a global stat.
 904 |  * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
 905 |  */
 906 | enum {
 907 | 	IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
 908 | 	IFLA_STATS_LINK_64,
 909 | 	IFLA_STATS_LINK_XSTATS,
 910 | 	IFLA_STATS_LINK_XSTATS_SLAVE,
 911 | 	IFLA_STATS_LINK_OFFLOAD_XSTATS,
 912 | 	IFLA_STATS_AF_SPEC,
 913 | 	__IFLA_STATS_MAX,
 914 | };
 915 | 
 916 | #define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
 917 | 
 918 | #define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR - 1))
 919 | 
 920 | /* These are embedded into IFLA_STATS_LINK_XSTATS:
 921 |  * [IFLA_STATS_LINK_XSTATS]
 922 |  * -> [LINK_XSTATS_TYPE_xxx]
 923 |  *    -> [rtnl link type specific attributes]
 924 |  */
 925 | enum {
 926 | 	LINK_XSTATS_TYPE_UNSPEC,
 927 | 	LINK_XSTATS_TYPE_BRIDGE,
 928 | 	LINK_XSTATS_TYPE_BOND,
 929 | 	__LINK_XSTATS_TYPE_MAX
 930 | };
 931 | #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
 932 | 
 933 | /* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
 934 | enum {
 935 | 	IFLA_OFFLOAD_XSTATS_UNSPEC,
 936 | 	IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
 937 | 	__IFLA_OFFLOAD_XSTATS_MAX
 938 | };
 939 | #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
 940 | 
 941 | /* XDP section */
 942 | 
 943 | #define XDP_FLAGS_UPDATE_IF_NOEXIST	(1U << 0)
 944 | #define XDP_FLAGS_SKB_MODE		(1U << 1)
 945 | #define XDP_FLAGS_DRV_MODE		(1U << 2)
 946 | #define XDP_FLAGS_HW_MODE		(1U << 3)
 947 | #define XDP_FLAGS_MODES			(XDP_FLAGS_SKB_MODE | \
 948 | 					 XDP_FLAGS_DRV_MODE | \
 949 | 					 XDP_FLAGS_HW_MODE)
 950 | #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
 951 | 					 XDP_FLAGS_MODES)
 952 | 
 953 | /* These are stored into IFLA_XDP_ATTACHED on dump. */
 954 | enum {
 955 | 	XDP_ATTACHED_NONE = 0,
 956 | 	XDP_ATTACHED_DRV,
 957 | 	XDP_ATTACHED_SKB,
 958 | 	XDP_ATTACHED_HW,
 959 | 	XDP_ATTACHED_MULTI,
 960 | };
 961 | 
 962 | enum {
 963 | 	IFLA_XDP_UNSPEC,
 964 | 	IFLA_XDP_FD,
 965 | 	IFLA_XDP_ATTACHED,
 966 | 	IFLA_XDP_FLAGS,
 967 | 	IFLA_XDP_PROG_ID,
 968 | 	IFLA_XDP_DRV_PROG_ID,
 969 | 	IFLA_XDP_SKB_PROG_ID,
 970 | 	IFLA_XDP_HW_PROG_ID,
 971 | 	__IFLA_XDP_MAX,
 972 | };
 973 | 
 974 | #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
 975 | 
 976 | enum {
 977 | 	IFLA_EVENT_NONE,
 978 | 	IFLA_EVENT_REBOOT,		/* internal reset / reboot */
 979 | 	IFLA_EVENT_FEATURES,		/* change in offload features */
 980 | 	IFLA_EVENT_BONDING_FAILOVER,	/* change in active slave */
 981 | 	IFLA_EVENT_NOTIFY_PEERS,	/* re-sent grat. arp/ndisc */
 982 | 	IFLA_EVENT_IGMP_RESEND,		/* re-sent IGMP JOIN */
 983 | 	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
 984 | };
 985 | 
 986 | /* tun section */
 987 | 
 988 | enum {
 989 | 	IFLA_TUN_UNSPEC,
 990 | 	IFLA_TUN_OWNER,
 991 | 	IFLA_TUN_GROUP,
 992 | 	IFLA_TUN_TYPE,
 993 | 	IFLA_TUN_PI,
 994 | 	IFLA_TUN_VNET_HDR,
 995 | 	IFLA_TUN_PERSIST,
 996 | 	IFLA_TUN_MULTI_QUEUE,
 997 | 	IFLA_TUN_NUM_QUEUES,
 998 | 	IFLA_TUN_NUM_DISABLED_QUEUES,
 999 | 	__IFLA_TUN_MAX,
1000 | };
1001 | 
1002 | #define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
1003 | 
1004 | /* rmnet section */
1005 | 
1006 | #define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
1007 | #define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
1008 | #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
1009 | #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
1010 | 
1011 | enum {
1012 | 	IFLA_RMNET_UNSPEC,
1013 | 	IFLA_RMNET_MUX_ID,
1014 | 	IFLA_RMNET_FLAGS,
1015 | 	__IFLA_RMNET_MAX,
1016 | };
1017 | 
1018 | #define IFLA_RMNET_MAX	(__IFLA_RMNET_MAX - 1)
1019 | 
1020 | struct ifla_rmnet_flags {
1021 | 	__u32	flags;
1022 | 	__u32	mask;
1023 | };
1024 | 
1025 | #endif /* _UAPI_LINUX_IF_LINK_H */
1026 | 


--------------------------------------------------------------------------------
/headers/linux/if_xdp.h:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
  2 | /*
  3 |  * if_xdp: XDP socket user-space interface
  4 |  * Copyright(c) 2018 Intel Corporation.
  5 |  *
  6 |  * Author(s): Björn Töpel <bjorn.topel@intel.com>
  7 |  *	      Magnus Karlsson <magnus.karlsson@intel.com>
  8 |  */
  9 | 
 10 | #ifndef _LINUX_IF_XDP_H
 11 | #define _LINUX_IF_XDP_H
 12 | 
 13 | #include <linux/types.h>
 14 | 
 15 | /* Options for the sxdp_flags field */
 16 | #define XDP_SHARED_UMEM	(1 << 0)
 17 | #define XDP_COPY	(1 << 1) /* Force copy-mode */
 18 | #define XDP_ZEROCOPY	(1 << 2) /* Force zero-copy mode */
 19 | /* If this option is set, the driver might go sleep and in that case
 20 |  * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
 21 |  * set. If it is set, the application need to explicitly wake up the
 22 |  * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
 23 |  * running the driver and the application on the same core, you should
 24 |  * use this option so that the kernel will yield to the user space
 25 |  * application.
 26 |  */
 27 | #define XDP_USE_NEED_WAKEUP (1 << 3)
 28 | 
 29 | /* Flags for xsk_umem_config flags */
 30 | #define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
 31 | 
 32 | struct sockaddr_xdp {
 33 | 	__u16 sxdp_family;
 34 | 	__u16 sxdp_flags;
 35 | 	__u32 sxdp_ifindex;
 36 | 	__u32 sxdp_queue_id;
 37 | 	__u32 sxdp_shared_umem_fd;
 38 | };
 39 | 
 40 | /* XDP_RING flags */
 41 | #define XDP_RING_NEED_WAKEUP (1 << 0)
 42 | 
 43 | struct xdp_ring_offset {
 44 | 	__u64 producer;
 45 | 	__u64 consumer;
 46 | 	__u64 desc;
 47 | 	__u64 flags;
 48 | };
 49 | 
 50 | struct xdp_mmap_offsets {
 51 | 	struct xdp_ring_offset rx;
 52 | 	struct xdp_ring_offset tx;
 53 | 	struct xdp_ring_offset fr; /* Fill */
 54 | 	struct xdp_ring_offset cr; /* Completion */
 55 | };
 56 | 
 57 | /* XDP socket options */
 58 | #define XDP_MMAP_OFFSETS		1
 59 | #define XDP_RX_RING			2
 60 | #define XDP_TX_RING			3
 61 | #define XDP_UMEM_REG			4
 62 | #define XDP_UMEM_FILL_RING		5
 63 | #define XDP_UMEM_COMPLETION_RING	6
 64 | #define XDP_STATISTICS			7
 65 | #define XDP_OPTIONS			8
 66 | 
 67 | struct xdp_umem_reg {
 68 | 	__u64 addr; /* Start of packet data area */
 69 | 	__u64 len; /* Length of packet data area */
 70 | 	__u32 chunk_size;
 71 | 	__u32 headroom;
 72 | 	__u32 flags;
 73 | };
 74 | 
 75 | struct xdp_statistics {
 76 | 	__u64 rx_dropped; /* Dropped for reasons other than invalid desc */
 77 | 	__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
 78 | 	__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
 79 | };
 80 | 
 81 | struct xdp_options {
 82 | 	__u32 flags;
 83 | };
 84 | 
 85 | /* Flags for the flags field of struct xdp_options */
 86 | #define XDP_OPTIONS_ZEROCOPY (1 << 0)
 87 | 
 88 | /* Pgoff for mmaping the rings */
 89 | #define XDP_PGOFF_RX_RING			  0
 90 | #define XDP_PGOFF_TX_RING		 0x80000000
 91 | #define XDP_UMEM_PGOFF_FILL_RING	0x100000000ULL
 92 | #define XDP_UMEM_PGOFF_COMPLETION_RING	0x180000000ULL
 93 | 
 94 | /* Masks for unaligned chunks mode */
 95 | #define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
 96 | #define XSK_UNALIGNED_BUF_ADDR_MASK \
 97 | 	((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
 98 | 
 99 | /* Rx/Tx descriptor */
100 | struct xdp_desc {
101 | 	__u64 addr;
102 | 	__u32 len;
103 | 	__u32 options;
104 | };
105 | 
106 | /* UMEM descriptor is __u64 */
107 | 
108 | #endif /* _LINUX_IF_XDP_H */
109 | 


--------------------------------------------------------------------------------
/headers/perf-sys.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: GPL-2.0 */
 2 | /* Copied from $(LINUX)/tools/perf/perf-sys.h (kernel 4.18) */
 3 | #ifndef _PERF_SYS_H
 4 | #define _PERF_SYS_H
 5 | 
 6 | #include <unistd.h>
 7 | #include <sys/types.h>
 8 | #include <sys/syscall.h>
 9 | #include <linux/types.h>
10 | #include <linux/perf_event.h>
11 | /*
12 |  * remove the following headers to allow for userspace program compilation
13 |  * #include <linux/compiler.h>
14 |  * #include <asm/barrier.h>
15 |  */
16 | #ifdef __powerpc__
17 | #define CPUINFO_PROC	{"cpu"}
18 | #endif
19 | 
20 | #ifdef __s390__
21 | #define CPUINFO_PROC	{"vendor_id"}
22 | #endif
23 | 
24 | #ifdef __sh__
25 | #define CPUINFO_PROC	{"cpu type"}
26 | #endif
27 | 
28 | #ifdef __hppa__
29 | #define CPUINFO_PROC	{"cpu"}
30 | #endif
31 | 
32 | #ifdef __sparc__
33 | #define CPUINFO_PROC	{"cpu"}
34 | #endif
35 | 
36 | #ifdef __alpha__
37 | #define CPUINFO_PROC	{"cpu model"}
38 | #endif
39 | 
40 | #ifdef __arm__
41 | #define CPUINFO_PROC	{"model name", "Processor"}
42 | #endif
43 | 
44 | #ifdef __mips__
45 | #define CPUINFO_PROC	{"cpu model"}
46 | #endif
47 | 
48 | #ifdef __arc__
49 | #define CPUINFO_PROC	{"Processor"}
50 | #endif
51 | 
52 | #ifdef __xtensa__
53 | #define CPUINFO_PROC	{"core ID"}
54 | #endif
55 | 
56 | #ifndef CPUINFO_PROC
57 | #define CPUINFO_PROC	{ "model name", }
58 | #endif
59 | 
60 | static inline int
61 | sys_perf_event_open(struct perf_event_attr *attr,
62 | 		      pid_t pid, int cpu, int group_fd,
63 | 		      unsigned long flags)
64 | {
65 | 	int fd;
66 | 
67 | 	fd = syscall(__NR_perf_event_open, attr, pid, cpu,
68 | 		     group_fd, flags);
69 | 
70 | #ifdef HAVE_ATTR_TEST
71 | 	if (unlikely(test_attr__enabled))
72 | 		test_attr__open(attr, pid, cpu, fd, group_fd, flags);
73 | #endif
74 | 	return fd;
75 | }
76 | 
77 | #endif /* _PERF_SYS_H */
78 | 


--------------------------------------------------------------------------------
/scripts/ci_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -Eeuo pipefail
 3 | 
 4 | # Usage: 
 5 | #   try_load tunnel_type xdp_executable tunnel_config
 6 | try_load() {
 7 |     TUNNEL_TYPE=$1
 8 |     XDP_EXECUTABLE=$2
 9 |     TUNNEL_CONFIG=${@:3}
10 |     TUNNEL_INTERFACE_NAME=test1
11 | 
12 |     echo "Testing ${XDP_EXECUTABLE} on ${TUNNEL_TYPE}..."
13 | 
14 |     ip link del ${TUNNEL_INTERFACE_NAME} || true
15 |     ip link add ${TUNNEL_INTERFACE_NAME} type ${TUNNEL_TYPE} ${TUNNEL_CONFIG}
16 |     ip link set ${TUNNEL_INTERFACE_NAME} up
17 |     ip link set dev ${TUNNEL_INTERFACE_NAME} xdp object "${XDP_EXECUTABLE}"
18 |     ip link del ${TUNNEL_INTERFACE_NAME}
19 | }
20 | 
21 | if [ $EUID -ne 0 ]; then
22 |     echo "This script must be run as root"
23 |     exit 1
24 | fi
25 | 
26 | cd "$( dirname "${BASH_SOURCE[0]}" )"/..
27 | 
28 | modprobe ip_gre
29 | 
30 | try_load gre build/keepalive_gre.o local 169.254.1.1 remote 169.254.1.2 ttl 255
31 | try_load ip6gre build/keepalive_gre6.o local fd00::1 remote fd00::2 ttl 255
32 | 


--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef __COMMON_H__
 3 | #define __COMMON_H__
 4 | 
 5 | struct gre_hdr {
 6 | 	__be16 flags;
 7 | 	__be16 proto;
 8 | };
 9 | 
10 | // have to be static and __always_inline, otherwise you will have `Error fetching program/map!`
11 | static __always_inline bool compare_ipv6_address(struct in6_addr *a, struct in6_addr *b) {
12 | 	#pragma unroll
13 | 	for (int i = 0; i < 4; ++i) {
14 | 		if (a->in6_u.u6_addr32[i] != b->in6_u.u6_addr32[i]) return false;
15 | 	}
16 | 	return true;
17 | }
18 | 
19 | #endif


--------------------------------------------------------------------------------
/src/keepalive_gre.c:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: GPL-2.0 */
  2 | #include <stddef.h>
  3 | #include <stdbool.h>
  4 | #include <linux/bpf.h>
  5 | #include <linux/in.h>
  6 | #include <linux/if_ether.h>
  7 | #include <linux/if_packet.h>
  8 | #include <linux/ip.h>
  9 | #include <linux/ipv6.h>
 10 | #include <linux/icmp.h>
 11 | #include <linux/icmpv6.h>
 12 | #include <linux/udp.h>
 13 | #include <linux/tcp.h>
 14 | #include <bpf/bpf_helpers.h>
 15 | #include <bpf/bpf_endian.h>
 16 | #include "common.h"
 17 | 
 18 | // enable debug print
 19 | // #define DEBUG
 20 | // enable packet header dump
 21 | // #define DEBUG_PRINT_HEADER_SIZE 32
 22 | 
 23 | char _license[4] SEC("license") = "GPL";
 24 | 
 25 | SEC("prog")
 26 | int xdp_gre_keepalive_func(struct xdp_md *ctx)
 27 | {
 28 | 	// for border checking
 29 | 	void *data_start = (void *)(long)ctx->data;
 30 | 	void *data_end = (void *)(long)ctx->data_end;
 31 | 
 32 | 	// result
 33 | 	__u32 action = XDP_PASS;
 34 | 
 35 | 	// current parsed header position pointer
 36 | 	void *dataptr = data_start;
 37 | 
 38 | 	#ifdef DEBUG
 39 | 		bpf_printk("New packet\n");
 40 | 	#endif
 41 | 
 42 | 	// debug print packet header
 43 | 	#if (defined DEBUG_PRINT_HEADER_SIZE) && (DEBUG_PRINT_HEADER_SIZE > 0)
 44 | 		// check for out of boarder access is necessary, kernel will run static analysis on our program
 45 | 		if ((dataptr + DEBUG_PRINT_HEADER_SIZE) > data_end) {
 46 | 			bpf_printk("Packet size too small, dump failed\n");
 47 | 			goto out;
 48 | 		}
 49 | 		__u8 *data_raw = (__u8 *)dataptr;
 50 | 		bpf_printk("Packet header dump:\n");
 51 | 		#pragma unroll
 52 | 		for (int i = 0; i < DEBUG_PRINT_HEADER_SIZE; ++i) {
 53 | 			bpf_printk("#%d: %x\n", i, data_raw[i]);
 54 | 		}
 55 | 	#endif
 56 | 
 57 | 	struct iphdr *outer_iphdr;
 58 | 
 59 | 	// GRE packet directly starts with an IPv4 header
 60 | 	if ((dataptr + 1) > data_end) goto out;
 61 | 	if ((((__u8 *)dataptr)[0] & 0xF0) != 0x40) {
 62 | 		goto out;
 63 | 	}
 64 | 
 65 | 	if (dataptr + sizeof(struct iphdr) > data_end) return -1;
 66 | 	outer_iphdr = (struct iphdr *)dataptr;
 67 | 	dataptr += sizeof(struct iphdr);
 68 | 
 69 | 	// now we are at the outer GRE header
 70 | 	if (dataptr + sizeof(struct gre_hdr) > data_end) return -1;
 71 | 	struct gre_hdr *outer_grehdr = (struct gre_hdr *)(dataptr);
 72 | 	dataptr += sizeof(struct gre_hdr);
 73 | 	#ifdef DEBUG
 74 | 		bpf_printk("Outer GRE flags=0x%x proto=%x\n", outer_grehdr->flags, outer_grehdr->proto);
 75 | 	#endif
 76 | 
 77 | 	// here is all the headers we need to chop off before sending the packet back
 78 | 	void *cutoff_pos = dataptr;
 79 | 
 80 | 	// parse inner IP header
 81 | 	if (outer_grehdr -> proto == bpf_htons(ETH_P_IP)) {
 82 | 		if (dataptr + 1 > data_end) return -1;
 83 | 		struct iphdr *inner_iphdr = dataptr;
 84 | 		int ip_header_size = (inner_iphdr -> ihl) * 4;
 85 | 		if (dataptr + 20 > data_end) return -1; // workaround kernel static check
 86 | 		if (dataptr + ip_header_size > data_end) return -1;
 87 | 		dataptr += ip_header_size;
 88 | 		__u8 inner_ip_proto = inner_iphdr -> protocol;
 89 | 		#ifdef DEBUG
 90 | 			bpf_printk("IPv4 packet_size=0x%x, proto=0x%x\n", ip_header_size, inner_ip_proto);
 91 | 		#endif
 92 | 
 93 | 		// check if it is a GRE encapsulated in an IPv4 packet
 94 | 		if (inner_ip_proto != IPPROTO_GRE) goto out;
 95 | 
 96 | 		// get the inner GRE header
 97 | 		if (dataptr + sizeof(struct gre_hdr) > data_end) return -1;
 98 | 		struct gre_hdr *inner_grehdr = (struct gre_hdr *)(dataptr);
 99 | 		dataptr += sizeof(struct gre_hdr);
100 | 		#ifdef DEBUG
101 | 			bpf_printk("Inner is GRE4, proto=%x\n", inner_grehdr -> proto);
102 | 		#endif
103 | 
104 | 		// check if the GRE header is keepalive
105 | 		// we need: 
106 | 		// * proto == 0
107 | 		// * ip address match
108 | 		// 
109 | 		if (
110 | 			inner_grehdr -> proto != 0
111 | 			|| inner_iphdr -> saddr != outer_iphdr -> daddr
112 | 			|| inner_iphdr -> daddr != outer_iphdr -> saddr
113 | 			) goto out;
114 | 		#ifdef DEBUG
115 | 			bpf_printk("GRE4 keepalive received!\n");
116 | 		#endif
117 | 
118 | 	} else {
119 | 		// unknown protocol
120 | 		#ifdef DEBUG
121 | 			bpf_printk("Unknown proto %x inside GRE", outer_grehdr->proto);
122 | 		#endif
123 | 		goto out;
124 | 	}
125 | 
126 | 	// remove the header and send the packet back
127 | 	if (bpf_xdp_adjust_head(ctx, (int)(cutoff_pos - data_start))) return -1;
128 | 	action = XDP_TX;
129 | 
130 | out:
131 | 	return action;
132 | }
133 | 


--------------------------------------------------------------------------------
/src/keepalive_gre6.c:
--------------------------------------------------------------------------------
  1 | /* SPDX-License-Identifier: GPL-2.0 */
  2 | #include <stddef.h>
  3 | #include <stdbool.h>
  4 | #include <linux/bpf.h>
  5 | #include <linux/in.h>
  6 | #include <linux/if_ether.h>
  7 | #include <linux/if_packet.h>
  8 | #include <linux/ip.h>
  9 | #include <linux/ipv6.h>
 10 | #include <linux/icmp.h>
 11 | #include <linux/icmpv6.h>
 12 | #include <linux/udp.h>
 13 | #include <linux/tcp.h>
 14 | #include <bpf/bpf_helpers.h>
 15 | #include <bpf/bpf_endian.h>
 16 | #include "common.h"
 17 | 
 18 | // enable debug print
 19 | // #define DEBUG
 20 | // enable packet header dump
 21 | // #define DEBUG_PRINT_HEADER_SIZE 32
 22 | 
 23 | char _license[4] SEC("license") = "GPL";
 24 | 
 25 | SEC("prog")
 26 | int xdp_keepalive_gre6(struct xdp_md *ctx)
 27 | {
 28 | 	// for border checking
 29 | 	void *data_start = (void *)(long)ctx->data;
 30 | 	void *data_end = (void *)(long)ctx->data_end;
 31 | 
 32 | 	// result
 33 | 	__u32 action = XDP_PASS;
 34 | 
 35 | 	// current parsed header position pointer
 36 | 	void *dataptr = data_start;
 37 | 
 38 | 	#ifdef DEBUG
 39 | 		bpf_printk("New packet\n");
 40 | 	#endif
 41 | 
 42 | 	// debug print packet header
 43 | 	#if (defined DEBUG_PRINT_HEADER_SIZE) && (DEBUG_PRINT_HEADER_SIZE > 0)
 44 | 		// check for out of boarder access is necessary, kernel will run static analysis on our program
 45 | 		if ((dataptr + DEBUG_PRINT_HEADER_SIZE) > data_end) {
 46 | 			bpf_printk("Packet size too small, dump failed\n");
 47 | 			goto out;
 48 | 		}
 49 | 		__u8 *data_raw = (__u8 *)dataptr;
 50 | 		bpf_printk("Packet header dump:\n");
 51 | 		#pragma unroll
 52 | 		for (int i = 0; i < DEBUG_PRINT_HEADER_SIZE; ++i) {
 53 | 			bpf_printk("#%d: %x\n", i, data_raw[i]);
 54 | 		}
 55 | 	#endif
 56 | 
 57 | 	struct ipv6hdr *outer_ipv6hdr;
 58 | 
 59 | 	// if the packet is from GREv6 (tunnel mode ip6gre), then it starts with an ethernet header:
 60 | 	// * dst MAC address (6 bytes)
 61 | 	// * src MAC address (6 bytes)
 62 | 	// * ethernet proto (0x86dd, 2 bytes)
 63 | 	// Then comes IPv6 header.
 64 | 	// So we skip the first 12 bytes and verify ethernet proto field and IPv6 header version field
 65 | 	if ((dataptr + 15) > data_end) goto out;
 66 | 	if (!(
 67 |         ((__u16 *)dataptr)[6] == 0xdd86
 68 | 		&& (((__u8 *)dataptr)[14] & 0xF0) == 0x60
 69 |     )) {
 70 |         // cannot verify packet header
 71 |         goto out;
 72 |     }
 73 | 
 74 |     dataptr += 14; // skip to the IPv6 header
 75 | 
 76 |     if (dataptr + sizeof(struct ipv6hdr) > data_end) return -1;
 77 |     outer_ipv6hdr = (struct ipv6hdr *)dataptr;
 78 |     dataptr += sizeof(struct ipv6hdr);
 79 | 
 80 | 	// now we are at the outer GRE header
 81 | 	if (dataptr + sizeof(struct gre_hdr) > data_end) return -1;
 82 | 	struct gre_hdr *outer_grehdr = (struct gre_hdr *)(dataptr);
 83 | 	dataptr += sizeof(struct gre_hdr);
 84 | 	#ifdef DEBUG
 85 | 		bpf_printk("Outer GRE flags=0x%x proto=%x\n", outer_grehdr->flags, outer_grehdr->proto);
 86 | 	#endif
 87 | 
 88 | 	// here is all the headers we need to chop off before sending the packet back
 89 | 	void *cutoff_pos = dataptr;
 90 | 
 91 | 	// parse inner IP header (must be an IPv6 header too)
 92 | 	if (outer_grehdr->proto == bpf_htons(ETH_P_IPV6)) {
 93 | 		if (dataptr + sizeof(struct ipv6hdr) + 1 > data_end) return -1;
 94 | 		struct ipv6hdr *inner_ipv6hdr = (struct ipv6hdr *)(dataptr);
 95 | 		dataptr += sizeof(struct ipv6hdr);
 96 | 		__u8 inner_ip_proto = inner_ipv6hdr -> nexthdr;
 97 | 		#ifdef DEBUG
 98 | 			bpf_printk("IPv6 proto=0x%x\n", inner_ip_proto);
 99 | 		#endif
100 | 
101 | 		// check if it is a GRE encapsulated in an IPv6 packet
102 | 		if (inner_ip_proto != IPPROTO_GRE) goto out;
103 | 
104 | 		// get the inner GRE header
105 | 		if (dataptr + sizeof(struct gre_hdr) > data_end) return -1;
106 | 		struct gre_hdr *inner_grehdr = (struct gre_hdr *)(dataptr);
107 | 		dataptr += sizeof(struct gre_hdr);
108 | 		#ifdef DEBUG
109 | 			bpf_printk("Inner is GRE6, proto %x\n", inner_grehdr -> proto);
110 | 		#endif
111 | 
112 | 		// check if the GRE packet is a keepalive packet
113 | 		if (
114 | 			inner_grehdr -> proto != 0xdd86 // seems to be the case for MikroTik RouterOS, TODO: verify compatibility with other vendors
115 | 			|| !compare_ipv6_address(&(outer_ipv6hdr -> saddr), &(inner_ipv6hdr -> daddr))
116 | 			|| !compare_ipv6_address(&(outer_ipv6hdr -> daddr), &(inner_ipv6hdr -> saddr))
117 | 			) goto out;
118 | 		#ifdef DEBUG
119 | 			bpf_printk("GRE6 keepalive received!\n");
120 | 		#endif
121 | 
122 | 	} else {
123 | 		// unknown protocol
124 | 		#ifdef DEBUG
125 | 			bpf_printk("Unknown proto %x inside GRE", outer_grehdr->proto);
126 | 		#endif
127 | 		goto out;
128 | 	}
129 | 
130 | 	// remove the header and send the packet back
131 | 	if (bpf_xdp_adjust_head(ctx, (int)(cutoff_pos - data_start))) return -1;
132 | 	action = XDP_TX;
133 | 
134 | out:
135 | 	return action;
136 | }
137 | 


--------------------------------------------------------------------------------