├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── examples ├── CMakeLists.txt ├── README.md ├── filter.S ├── filter.bc ├── filter.bpf ├── filter.c └── filter_test.c ├── include ├── atomic.h ├── bpf.h ├── bpf_trace.h ├── filter.h └── uapi │ ├── bpf.h │ ├── bpf_common.h │ └── filter.h └── src ├── core.c └── jit.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.RData 2 | *.Rhistory 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tools/llvm"] 2 | path = tools/llvm 3 | url = https://github.com/iovisor/llvm_bpf.git 4 | [submodule "tools/llvm_bpf"] 5 | path = tools/llvm_bpf 6 | url = https://github.com/iovisor/llvm_bpf.git 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(libebpf) 3 | 4 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 5 | 6 | include_directories(${libebpf_SOURCE_DIR}/include) 7 | 8 | set(HEADER_FILES 9 | ${PROJECT_SOURCE_DIR}/include/uapi/bpf.h 10 | ${PROJECT_SOURCE_DIR}/include/uapi/bpf_common.h 11 | ${PROJECT_SOURCE_DIR}/include/uapi/filter.h 12 | ${PROJECT_SOURCE_DIR}/include/atomic.h 13 | ${PROJECT_SOURCE_DIR}/include/bpf.h 14 | ${PROJECT_SOURCE_DIR}/include/bpf_trace.h 15 | ${PROJECT_SOURCE_DIR}/include/filter.h 16 | ) 17 | 18 | set(SOURCE_FILES 19 | src/jit.c 20 | src/core.c 21 | ) 22 | 23 | add_library(ebpf SHARED ${SOURCE_FILES} ${HEADER_FILES}) 24 | target_link_libraries(ebpf udis86) 25 | 26 | add_subdirectory(examples) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Userspace eBPF Library 2 | ====================== 3 | This is a modified port of the Berkeley Packet Filter (BPF) infrastructure from 4 | the Linux kernel to the userspace as a shared library. It is derived from early 5 | stages of the RFC patchset and hence may have a few bugs. The goal of this library is 6 | pure experimentation and performance analysis for tracing tools **only**. Support for maps 7 | and packet filtering specific instructions has been removed. Before using the library, 8 | please make sure your use-case allows GPLv2 licenses. 9 | 10 | Building 11 | -------- 12 | This library used CMake build system. Make sure you have `cmake` version > 2.8 13 | installed on your system. This can usually be done on Fedora by a simple, 14 | 15 | $ dnf install cmake 16 | 17 | Proceed to your `build` directory and then, 18 | 19 | $ cd build 20 | $ cmake ../ 21 | $ make 22 | 23 | Usage 24 | ----- 25 | You can either give raw BPF instructions inside your target program or use the compiled 26 | filter binary. The `examples` directory contains a simple C-style BPF `filter.c` file which 27 | is compiled to `filter.bpf` binary. The target program can use a similar style as shown in 28 | `filter_test.c` to load and use the filter binary. Over the time this will probably be made easier, 29 | with BPF being generated by a script invoking a small compiler and verifying the generated bytecode. 30 | 31 | LLVM BPF backend developed by Alexei is provided in the `tools` directory. A BPF target for LLVM 32 | has been upstreamed already but the backend is provided here as well. -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wl,-rpath,'$$ORIGIN/../build' -L../build") 2 | 3 | set(SOURCE_FILES 4 | filter_test.c 5 | ) 6 | 7 | add_executable(filter_test ${SOURCE_FILES} ${HEADER_FILES}) 8 | target_link_libraries(filter_test ebpf m elf) 9 | 10 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | Using LLVM BPF Backend 2 | ---------------------- 3 | 4 | Even though the LLVM BPF backend has been merged in LLVM, an implementation provided in ../tools directory 5 | of the LLVM eBPF backend can still be used to generate the BPF bytecode from the C-like filter expressions 6 | 7 | For example to generate the filter binary, 8 | 9 | $ clang -O2 -I../ -emit-llvm -c filter.c 10 | $ ../tools/llvm_bpf/bld/Debug+Asserts/bin/llc filter.bc -o filter.bpf 11 | 12 | You can also generate assembly to see BPF instructions 13 | $ ../tools/llvm_bpf/bld/Debug+Asserts/bin/llc filter.bc -filetype=asm -o filter.S 14 | -------------------------------------------------------------------------------- /examples/filter.S: -------------------------------------------------------------------------------- 1 | .text 2 | .globl filter 3 | .align 8 4 | filter: # @filter 5 | # BB#0: 6 | mov r2, 3239269 7 | stw -4(r10), r2 8 | ldd r1, 0(r1) 9 | mov r2, r10 10 | addi r2, -4 11 | call 3 12 | mov r1, r0 13 | slli r1, 32 14 | srli r1, 32 15 | mov r0, 1 16 | mov r2, 0 17 | jeq r1, r2 goto .LBB0_2 18 | # BB#1: 19 | mov r0, r2 20 | .LBB0_2: 21 | andi r0, 1 22 | ret 23 | 24 | 25 | -------------------------------------------------------------------------------- /examples/filter.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuxology/libebpf/5c61bcce16ba662d51ea70c370c999688ad6bde3/examples/filter.bc -------------------------------------------------------------------------------- /examples/filter.bpf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuxology/libebpf/5c61bcce16ba662d51ea70c370c999688ad6bde3/examples/filter.bpf -------------------------------------------------------------------------------- /examples/filter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | static int (*bpf_strcmp)(void *ptr1, void *ptr2) = (void *) BPF_FUNC_strcmp; 6 | 7 | struct filt_args { 8 | char *dev; 9 | int prot; 10 | int len; 11 | }; 12 | 13 | int filter(struct bpf_context *ctx) 14 | { 15 | char devname[] = "em1"; 16 | struct filt_args *filt = 0; 17 | 18 | filt = (struct filt_args*) ctx->arg1; 19 | if (bpf_strcmp(&filt->dev, devname) == 0){ 20 | return 1; 21 | } 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/filter_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Userspace eBPF filter test with generated filter 3 | * 4 | * Copyright (C) 2012 Suchakra Sharma 5 | * 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | 22 | /* Global definitions */ 23 | struct bpf_prog *prog; 24 | 25 | struct filt_args { 26 | char *dev; 27 | int prot; 28 | int len; 29 | }; 30 | 31 | struct bpf_insn *insn_prog; 32 | int prog_size = 0; 33 | 34 | static void *u64_to_ptr(__u64 val){ 35 | return (void *) (unsigned long) val; 36 | } 37 | 38 | static __u64 ptr_to_u64(void *ptr){ 39 | return (__u64) (unsigned long) ptr; 40 | } 41 | 42 | unsigned int run_bpf_filter(struct bpf_prog *prog1, struct bpf_context *ctx){ 43 | __u64 ret = BPF_PROG_RUN(prog1, (void*) ctx); 44 | return ret; 45 | } 46 | 47 | static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, 48 | GElf_Shdr *shdr, Elf_Data **data) 49 | { 50 | Elf_Scn *scn; 51 | 52 | scn = elf_getscn(elf, i); 53 | if (!scn) 54 | return 1; 55 | 56 | if (gelf_getshdr(scn, shdr) != shdr) 57 | return 2; 58 | 59 | *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); 60 | if (!*shname || !shdr->sh_size) 61 | return 3; 62 | 63 | *data = elf_getdata(scn, 0); 64 | if (!*data || elf_getdata(scn, *data) != NULL) 65 | return 4; 66 | 67 | return 0; 68 | } 69 | 70 | int load_bpf_file(char *path) 71 | { 72 | int fd, i; 73 | Elf *elf; 74 | GElf_Ehdr ehdr; 75 | GElf_Shdr shdr, shdr_prog; 76 | Elf_Data *data, *data_prog, *symbols = NULL; 77 | char *shname, *shname_prog; 78 | 79 | if (elf_version(EV_CURRENT) == EV_NONE) 80 | return 1; 81 | 82 | fd = open(path, O_RDONLY, 0); 83 | if (fd < 0) 84 | return 1; 85 | 86 | elf = elf_begin(fd, ELF_C_READ, NULL); 87 | 88 | if (!elf) 89 | return 1; 90 | 91 | if (gelf_getehdr(elf, &ehdr) != &ehdr) 92 | return 1; 93 | 94 | /* scan over all elf sections to get license and map info */ 95 | for (i = 1; i < ehdr.e_shnum; i++) { 96 | 97 | if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) 98 | continue; 99 | 100 | if (0) 101 | printf("section %d:%s data %p size %zd link %d flags %d\n", 102 | i, shname, data->d_buf, data->d_size, 103 | shdr.sh_link, (int) shdr.sh_flags); 104 | } 105 | #if 1 106 | /* load programs that don't use maps */ 107 | for (i = 1; i < ehdr.e_shnum; i++) { 108 | 109 | if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) 110 | continue; 111 | 112 | if (strcmp(shname, ".text") == 0){ 113 | insn_prog = (struct insn_prog *) data->d_buf; 114 | prog_size = data->d_size; 115 | printf("DEBUG : section name %s, data %p, size %d\n", shname, data->d_buf, data->d_size); 116 | } 117 | } 118 | #endif 119 | close(fd); 120 | return 0; 121 | 122 | } 123 | /* Inititlize and prepare the eBPF prog */ 124 | unsigned int init_ebpf_prog(void) 125 | { 126 | int ret = 0; 127 | if (load_bpf_file("./filter.bpf") != 0){ 128 | printf("BPF load error"); 129 | return 1; 130 | } 131 | char bpf_log_buf[1024]; 132 | unsigned int insn_count = prog_size / sizeof(struct bpf_insn); 133 | 134 | union bpf_attr attr = { 135 | .prog_type = BPF_PROG_TYPE_UNSPEC, 136 | .insns = ptr_to_u64((void*) insn_prog), 137 | .insn_cnt = insn_count, 138 | .license = ptr_to_u64((void *) "GPL"), 139 | .log_buf = ptr_to_u64(bpf_log_buf), 140 | .log_size = 1024, 141 | .log_level = 1, 142 | }; 143 | 144 | prog = bpf_prog_alloc(bpf_prog_size(attr.insn_cnt)); 145 | if (!prog) 146 | return -ENOMEM; 147 | prog->jited = 0; 148 | prog->orig_prog = NULL; 149 | prog->len = attr.insn_cnt; 150 | 151 | if (memcpy(prog->insnsi, u64_to_ptr(attr.insns), prog->len * sizeof(struct bpf_insn)) != 0) 152 | atomic_set(&prog->aux->refcnt, 1); 153 | prog->aux->is_gpl_compatible = 1; 154 | 155 | fixup_bpf_calls(prog); 156 | 157 | // ready for JIT 158 | bpf_prog_select_runtime(prog); 159 | printf("DEBUG : JITed? : %d\n", prog->jited); 160 | 161 | /* set context values */ 162 | return ret; 163 | } 164 | 165 | unsigned int run_filt(struct filt_args *fargs) 166 | { 167 | 168 | struct bpf_context bctx = {}; 169 | bctx.arg1 = (__u64) fargs->dev; 170 | bctx.arg2 = (__u64) "em1"; 171 | bctx.arg3 = (__u64) fargs->prot; 172 | bctx.arg4 = (__u64) fargs->len; 173 | 174 | unsigned int ret = 0; 175 | ret = run_bpf_filter(prog, &bctx); 176 | if (ret == 1){ 177 | printf("True\n"); 178 | } 179 | else { 180 | printf("False\n"); 181 | } 182 | 183 | return ret; 184 | } 185 | 186 | 187 | unsigned int cleanup(void) 188 | { 189 | bpf_prog_free(prog); 190 | printf("Freed bpf prog\n"); 191 | return 0; 192 | } 193 | 194 | int main(int argv, char **argc) 195 | { 196 | struct filt_args *args = (struct filt_args *) malloc (sizeof(struct filt_args)); 197 | args->dev = "em1"; 198 | args->prot = 8; 199 | args->len = 110; 200 | 201 | /* Prepare eBPF prog*/ 202 | int ret = init_ebpf_prog(); 203 | ret = run_filt(args); 204 | ret = cleanup(); 205 | free(args); 206 | 207 | return 0; 208 | } 209 | 210 | -------------------------------------------------------------------------------- /include/atomic.h: -------------------------------------------------------------------------------- 1 | #ifndef _ATOMIC_H 2 | #define _ATOMIC_H 3 | 4 | /** 5 | * * Atomic type. 6 | * */ 7 | 8 | typedef struct { 9 | volatile int counter; 10 | } atomic_t; 11 | 12 | #define ATOMIC_INIT(i) { (i) } 13 | 14 | /** 15 | * * Read atomic variable 16 | * * @param v pointer of type atomic_t 17 | * * 18 | * * Atomically reads the value of @v. 19 | * */ 20 | #define atomic_read(v) ((v)->counter) 21 | 22 | /** 23 | * * Set atomic variable 24 | * * @param v pointer of type atomic_t 25 | * * @param i required value 26 | * */ 27 | #define atomic_set(v,i) (((v)->counter) = (i)) 28 | 29 | /** 30 | * * Add to the atomic variable 31 | * * @param i integer value to add 32 | * * @param v pointer of type atomic_t 33 | * */ 34 | static inline void atomic_add( int i, atomic_t *v ) 35 | { 36 | (void)__sync_add_and_fetch(&v->counter, i); 37 | } 38 | 39 | /** 40 | * * Subtract the atomic variable 41 | * * @param i integer value to subtract 42 | * * @param v pointer of type atomic_t 43 | * * 44 | * * Atomically subtracts @i from @v. 45 | * */ 46 | static inline void atomic_sub( int i, atomic_t *v ) 47 | { 48 | (void)__sync_sub_and_fetch(&v->counter, i); 49 | } 50 | 51 | /** 52 | * * Subtract value from variable and test result 53 | * * @param i integer value to subtract 54 | * * @param v pointer of type atomic_t 55 | * * 56 | * * Atomically subtracts @i from @v and returns 57 | * * true if the result is zero, or false for all 58 | * * other cases. 59 | * */ 60 | static inline int atomic_sub_and_test( int i, atomic_t *v ) 61 | { 62 | return !(__sync_sub_and_fetch(&v->counter, i)); 63 | } 64 | 65 | /** 66 | * * Increment atomic variable 67 | * * @param v pointer of type atomic_t 68 | * * 69 | * * Atomically increments @v by 1. 70 | * */ 71 | static inline void atomic_inc( atomic_t *v ) 72 | { 73 | (void)__sync_fetch_and_add(&v->counter, 1); 74 | } 75 | 76 | /** 77 | * * @brief decrement atomic variable 78 | * * @param v: pointer of type atomic_t 79 | * * 80 | * * Atomically decrements @v by 1. Note that the guaranteed 81 | * * useful range of an atomic_t is only 24 bits. 82 | * */ 83 | static inline void atomic_dec( atomic_t *v ) 84 | { 85 | (void)__sync_fetch_and_sub(&v->counter, 1); 86 | } 87 | 88 | /** 89 | * * @brief Decrement and test 90 | * * @param v pointer of type atomic_t 91 | * * 92 | * * Atomically decrements @v by 1 and 93 | * * returns true if the result is 0, or false for all other 94 | * * cases. 95 | * */ 96 | static inline int atomic_dec_and_test( atomic_t *v ) 97 | { 98 | return !(__sync_sub_and_fetch(&v->counter, 1)); 99 | } 100 | 101 | /** 102 | * * @brief Increment and test 103 | * * @param v pointer of type atomic_t 104 | * * 105 | * * Atomically increments @v by 1 106 | * * and returns true if the result is zero, or false for all 107 | * * other cases. 108 | * */ 109 | static inline int atomic_inc_and_test( atomic_t *v ) 110 | { 111 | return !(__sync_add_and_fetch(&v->counter, 1)); 112 | } 113 | 114 | /** 115 | * * @brief add and test if negative 116 | * * @param v pointer of type atomic_t 117 | * * @param i integer value to add 118 | * * 119 | * * Atomically adds @i to @v and returns true 120 | * * if the result is negative, or false when 121 | * * result is greater than or equal to zero. 122 | * */ 123 | static inline int atomic_add_negative( int i, atomic_t *v ) 124 | { 125 | return (__sync_add_and_fetch(&v->counter, i) < 0); 126 | } 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /include/bpf.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 | * 3 | * This program is free software; you can redistribute it and/or 4 | * modify it under the terms of version 2 of the GNU General Public 5 | * License as published by the Free Software Foundation. 6 | */ 7 | #ifndef _LINUX_BPF_H 8 | #define _LINUX_BPF_H 1 9 | 10 | #include "../include/uapi/bpf.h" 11 | #include "../include/atomic.h" 12 | //#include 13 | //#include 14 | 15 | struct list_head { 16 | struct list_head *next, *prev; 17 | }; 18 | 19 | struct bpf_map; 20 | 21 | /* map is generic key/value storage optionally accesible by eBPF programs */ 22 | struct bpf_map_ops { 23 | /* funcs callable from userspace (via syscall) */ 24 | struct bpf_map *(*map_alloc)(union bpf_attr *attr); 25 | void (*map_free)(struct bpf_map *); 26 | int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); 27 | 28 | /* funcs callable from userspace and from eBPF programs */ 29 | void *(*map_lookup_elem)(struct bpf_map *map, void *key); 30 | int (*map_update_elem)(struct bpf_map *map, void *key, void *value); 31 | int (*map_delete_elem)(struct bpf_map *map, void *key); 32 | }; 33 | 34 | struct bpf_map { 35 | // atomic_t refcnt; 36 | enum bpf_map_type map_type; 37 | __u32 key_size; 38 | __u32 value_size; 39 | __u32 max_entries; 40 | struct bpf_map_ops *ops; 41 | //struct work_struct work; 42 | }; 43 | 44 | struct bpf_map_type_list { 45 | struct list_head list_node; 46 | struct bpf_map_ops *ops; 47 | enum bpf_map_type type; 48 | }; 49 | 50 | void bpf_register_map_type(struct bpf_map_type_list *tl); 51 | void bpf_map_put(struct bpf_map *map); 52 | struct bpf_map *bpf_map_get(struct fd f); 53 | 54 | /* function argument constraints */ 55 | enum bpf_arg_type { 56 | ARG_ANYTHING = 0, /* any argument is ok */ 57 | 58 | /* the following constraints used to prototype 59 | * bpf_map_lookup/update/delete_elem() functions 60 | */ 61 | ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ 62 | ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ 63 | ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ 64 | 65 | /* the following constraints used to prototype bpf_memcmp() and other 66 | * functions that access data on eBPF program stack 67 | */ 68 | ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ 69 | ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ 70 | }; 71 | 72 | /* type of values returned from helper functions */ 73 | enum bpf_return_type { 74 | RET_INTEGER, /* function returns integer */ 75 | RET_VOID, /* function doesn't return anything */ 76 | RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ 77 | }; 78 | 79 | /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs 80 | * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL 81 | * instructions after verifying 82 | */ 83 | struct bpf_func_proto { 84 | __u64 (*func)(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5); 85 | _Bool gpl_only; 86 | enum bpf_return_type ret_type; 87 | enum bpf_arg_type arg1_type; 88 | enum bpf_arg_type arg2_type; 89 | enum bpf_arg_type arg3_type; 90 | enum bpf_arg_type arg4_type; 91 | enum bpf_arg_type arg5_type; 92 | }; 93 | 94 | /* bpf_context is intentionally undefined structure. Pointer to bpf_context is 95 | * the first argument to eBPF programs. 96 | * For socket filters: 'struct bpf_context *' == 'struct sk_buff *' 97 | */ 98 | struct bpf_context; 99 | 100 | enum bpf_access_type { 101 | BPF_READ = 1, 102 | BPF_WRITE = 2 103 | }; 104 | 105 | struct bpf_verifier_ops { 106 | /* return eBPF function prototype for verification */ 107 | const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); 108 | 109 | /* return true if 'size' wide access at offset 'off' within bpf_context 110 | * with 'type' (read or write) is allowed 111 | */ 112 | _Bool (*is_valid_access)(int off, int size, enum bpf_access_type type); 113 | }; 114 | 115 | struct bpf_prog_type_list { 116 | struct list_head list_node; 117 | struct bpf_verifier_ops *ops; 118 | enum bpf_prog_type type; 119 | }; 120 | 121 | void bpf_register_prog_type(struct bpf_prog_type_list *tl); 122 | 123 | struct bpf_prog; 124 | 125 | struct bpf_prog_aux { 126 | atomic_t refcnt; 127 | _Bool is_gpl_compatible; 128 | enum bpf_prog_type prog_type; 129 | struct bpf_verifier_ops *ops; 130 | struct bpf_map **used_maps; 131 | __u32 used_map_cnt; 132 | struct bpf_prog *prog; 133 | //struct work_struct work; 134 | }; 135 | 136 | void bpf_prog_put(struct bpf_prog *prog); 137 | struct bpf_prog *bpf_prog_get(__u32 ufd); 138 | /* verify correctness of eBPF program */ 139 | int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); 140 | 141 | /* in-kernel helper functions called from eBPF programs */ 142 | __u64 bpf_map_lookup_elem(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5); 143 | __u64 bpf_map_update_elem(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5); 144 | __u64 bpf_map_delete_elem(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5); 145 | 146 | #endif /* _LINUX_BPF_H */ 147 | -------------------------------------------------------------------------------- /include/bpf_trace.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 | * 3 | * This program is free software; you can redistribute it and/or 4 | * modify it under the terms of version 2 of the GNU General Public 5 | * License as published by the Free Software Foundation. 6 | */ 7 | #ifndef _LINUX_KERNEL_BPF_TRACE_H 8 | #define _LINUX_KERNEL_BPF_TRACE_H 9 | 10 | /* For tracing filters save first six arguments of tracepoint events. 11 | * argN fields match one to one to arguments passed to tracepoint events. 12 | */ 13 | struct bpf_context { 14 | __u64 arg1; 15 | __u64 arg2; 16 | __u64 arg3; 17 | __u64 arg4; 18 | __u64 arg5; 19 | __u64 arg6; 20 | __u64 ret; 21 | }; 22 | 23 | #endif /* _LINUX_KERNEL_BPF_TRACE_H */ 24 | -------------------------------------------------------------------------------- /include/filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Linux Socket Filter Data Structures 3 | */ 4 | #ifndef __LINUX_FILTER_H__ 5 | #define __LINUX_FILTER_H__ 6 | 7 | #include 8 | #include 9 | #include 10 | /* 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | */ 18 | #include "uapi/filter.h" 19 | #include "uapi/bpf.h" 20 | //#include "filter.h" 21 | /* 22 | struct sk_buff; 23 | struct sock; 24 | struct seccomp_data; 25 | */ 26 | 27 | #define CONFIG_BPF_JIT 1 28 | 29 | struct callback_head { 30 | struct callback_head *next; 31 | void (*func)(struct callback_head *head); 32 | }; 33 | #define rcu_head callback_head 34 | 35 | #define max(a,b) \ 36 | ({ __typeof__ (a) _a = (a); \ 37 | __typeof__ (b) _b = (b); \ 38 | _a > _b ? _a : _b; }) 39 | 40 | struct bpf_prog_aux; 41 | 42 | /* ArgX, context and stack frame pointer register positions. Note, 43 | * Arg1, Arg2, Arg3, etc are used as argument mappings of function 44 | * calls in BPF_CALL instruction. 45 | */ 46 | #define BPF_REG_ARG1 BPF_REG_1 47 | #define BPF_REG_ARG2 BPF_REG_2 48 | #define BPF_REG_ARG3 BPF_REG_3 49 | #define BPF_REG_ARG4 BPF_REG_4 50 | #define BPF_REG_ARG5 BPF_REG_5 51 | #define BPF_REG_CTX BPF_REG_6 52 | #define BPF_REG_FP BPF_REG_10 53 | 54 | /* Additional register mappings for converted user programs. */ 55 | #define BPF_REG_A BPF_REG_0 56 | #define BPF_REG_X BPF_REG_7 57 | #define BPF_REG_TMP BPF_REG_8 58 | 59 | /* BPF program can access up to 512 bytes of stack space. */ 60 | #define MAX_BPF_STACK 512 61 | 62 | /* Helper macros for filter block array initializers. */ 63 | 64 | /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ 65 | 66 | #define BPF_ALU64_REG(OP, DST, SRC) \ 67 | ((struct bpf_insn) { \ 68 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ 69 | .dst_reg = DST, \ 70 | .src_reg = SRC, \ 71 | .off = 0, \ 72 | .imm = 0 }) 73 | 74 | #define BPF_ALU32_REG(OP, DST, SRC) \ 75 | ((struct bpf_insn) { \ 76 | .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ 77 | .dst_reg = DST, \ 78 | .src_reg = SRC, \ 79 | .off = 0, \ 80 | .imm = 0 }) 81 | 82 | /* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ 83 | 84 | #define BPF_ALU64_IMM(OP, DST, IMM) \ 85 | ((struct bpf_insn) { \ 86 | .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ 87 | .dst_reg = DST, \ 88 | .src_reg = 0, \ 89 | .off = 0, \ 90 | .imm = IMM }) 91 | 92 | #define BPF_ALU32_IMM(OP, DST, IMM) \ 93 | ((struct bpf_insn) { \ 94 | .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ 95 | .dst_reg = DST, \ 96 | .src_reg = 0, \ 97 | .off = 0, \ 98 | .imm = IMM }) 99 | 100 | /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ 101 | 102 | #define BPF_ENDIAN(TYPE, DST, LEN) \ 103 | ((struct bpf_insn) { \ 104 | .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ 105 | .dst_reg = DST, \ 106 | .src_reg = 0, \ 107 | .off = 0, \ 108 | .imm = LEN }) 109 | 110 | /* Short form of mov, dst_reg = src_reg */ 111 | 112 | #define BPF_MOV64_REG(DST, SRC) \ 113 | ((struct bpf_insn) { \ 114 | .code = BPF_ALU64 | BPF_MOV | BPF_X, \ 115 | .dst_reg = DST, \ 116 | .src_reg = SRC, \ 117 | .off = 0, \ 118 | .imm = 0 }) 119 | 120 | #define BPF_MOV32_REG(DST, SRC) \ 121 | ((struct bpf_insn) { \ 122 | .code = BPF_ALU | BPF_MOV | BPF_X, \ 123 | .dst_reg = DST, \ 124 | .src_reg = SRC, \ 125 | .off = 0, \ 126 | .imm = 0 }) 127 | 128 | /* Short form of mov, dst_reg = imm32 */ 129 | 130 | #define BPF_MOV64_IMM(DST, IMM) \ 131 | ((struct bpf_insn) { \ 132 | .code = BPF_ALU64 | BPF_MOV | BPF_K, \ 133 | .dst_reg = DST, \ 134 | .src_reg = 0, \ 135 | .off = 0, \ 136 | .imm = IMM }) 137 | 138 | #define BPF_MOV32_IMM(DST, IMM) \ 139 | ((struct bpf_insn) { \ 140 | .code = BPF_ALU | BPF_MOV | BPF_K, \ 141 | .dst_reg = DST, \ 142 | .src_reg = 0, \ 143 | .off = 0, \ 144 | .imm = IMM }) 145 | 146 | /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ 147 | #define BPF_LD_IMM64(DST, IMM) \ 148 | BPF_LD_IMM64_RAW(DST, 0, IMM) 149 | 150 | #define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ 151 | ((struct bpf_insn) { \ 152 | .code = BPF_LD | BPF_DW | BPF_IMM, \ 153 | .dst_reg = DST, \ 154 | .src_reg = SRC, \ 155 | .off = 0, \ 156 | .imm = (__u32) (IMM) }), \ 157 | ((struct bpf_insn) { \ 158 | .code = 0, /* zero is reserved opcode */ \ 159 | .dst_reg = 0, \ 160 | .src_reg = 0, \ 161 | .off = 0, \ 162 | .imm = ((__u64) (IMM)) >> 32 }) 163 | 164 | #define BPF_PSEUDO_MAP_FD 1 165 | 166 | /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ 167 | #define BPF_LD_MAP_FD(DST, MAP_FD) \ 168 | BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) 169 | 170 | /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ 171 | 172 | #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ 173 | ((struct bpf_insn) { \ 174 | .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ 175 | .dst_reg = DST, \ 176 | .src_reg = SRC, \ 177 | .off = 0, \ 178 | .imm = IMM }) 179 | 180 | #define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ 181 | ((struct bpf_insn) { \ 182 | .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ 183 | .dst_reg = DST, \ 184 | .src_reg = SRC, \ 185 | .off = 0, \ 186 | .imm = IMM }) 187 | 188 | /* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ 189 | 190 | #define BPF_LD_ABS(SIZE, IMM) \ 191 | ((struct bpf_insn) { \ 192 | .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ 193 | .dst_reg = 0, \ 194 | .src_reg = 0, \ 195 | .off = 0, \ 196 | .imm = IMM }) 197 | 198 | /* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ 199 | 200 | #define BPF_LD_IND(SIZE, SRC, IMM) \ 201 | ((struct bpf_insn) { \ 202 | .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ 203 | .dst_reg = 0, \ 204 | .src_reg = SRC, \ 205 | .off = 0, \ 206 | .imm = IMM }) 207 | 208 | /* Memory load, dst_reg = *(uint *) (src_reg + off16) */ 209 | 210 | #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 211 | ((struct bpf_insn) { \ 212 | .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ 213 | .dst_reg = DST, \ 214 | .src_reg = SRC, \ 215 | .off = OFF, \ 216 | .imm = 0 }) 217 | 218 | /* Memory store, *(uint *) (dst_reg + off16) = src_reg */ 219 | 220 | #define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ 221 | ((struct bpf_insn) { \ 222 | .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ 223 | .dst_reg = DST, \ 224 | .src_reg = SRC, \ 225 | .off = OFF, \ 226 | .imm = 0 }) 227 | 228 | /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ 229 | 230 | #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ 231 | ((struct bpf_insn) { \ 232 | .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ 233 | .dst_reg = DST, \ 234 | .src_reg = 0, \ 235 | .off = OFF, \ 236 | .imm = IMM }) 237 | 238 | /* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ 239 | 240 | #define BPF_JMP_REG(OP, DST, SRC, OFF) \ 241 | ((struct bpf_insn) { \ 242 | .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ 243 | .dst_reg = DST, \ 244 | .src_reg = SRC, \ 245 | .off = OFF, \ 246 | .imm = 0 }) 247 | 248 | /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ 249 | 250 | #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ 251 | ((struct bpf_insn) { \ 252 | .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ 253 | .dst_reg = DST, \ 254 | .src_reg = 0, \ 255 | .off = OFF, \ 256 | .imm = IMM }) 257 | 258 | /* Function call */ 259 | 260 | #define BPF_EMIT_CALL(FUNC) \ 261 | ((struct bpf_insn) { \ 262 | .code = BPF_JMP | BPF_CALL, \ 263 | .dst_reg = 0, \ 264 | .src_reg = 0, \ 265 | .off = 0, \ 266 | .imm = ((FUNC) - __bpf_call_base) }) 267 | 268 | /* Raw code statement block */ 269 | 270 | #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ 271 | ((struct bpf_insn) { \ 272 | .code = CODE, \ 273 | .dst_reg = DST, \ 274 | .src_reg = SRC, \ 275 | .off = OFF, \ 276 | .imm = IMM }) 277 | 278 | /* Program exit */ 279 | 280 | #define BPF_EXIT_INSN() \ 281 | ((struct bpf_insn) { \ 282 | .code = BPF_JMP | BPF_EXIT, \ 283 | .dst_reg = 0, \ 284 | .src_reg = 0, \ 285 | .off = 0, \ 286 | .imm = 0 }) 287 | 288 | #define bytes_to_bpf_size(bytes) \ 289 | ({ \ 290 | int bpf_size = -EINVAL; \ 291 | \ 292 | if (bytes == sizeof(__u8)) \ 293 | bpf_size = BPF_B; \ 294 | else if (bytes == sizeof(__u16)) \ 295 | bpf_size = BPF_H; \ 296 | else if (bytes == sizeof(__u32)) \ 297 | bpf_size = BPF_W; \ 298 | else if (bytes == sizeof(__u64)) \ 299 | bpf_size = BPF_DW; \ 300 | \ 301 | bpf_size; \ 302 | }) 303 | 304 | /* Macro to invoke filter function. */ 305 | #define SK_RUN_FILTER(filter, ctx) \ 306 | (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) 307 | 308 | #ifdef CONFIG_COMPAT 309 | /* A struct sock_filter is architecture independent. */ 310 | struct compat_sock_fprog { 311 | __u16 len; 312 | compat_uptr_t filter; /* struct sock_filter * */ 313 | }; 314 | #endif 315 | 316 | struct sock_fprog_kern { 317 | __u16 len; 318 | struct sock_filter *filter; 319 | }; 320 | 321 | struct bpf_binary_header { 322 | unsigned int pages; 323 | __u8 image[]; 324 | }; 325 | 326 | struct bpf_prog { 327 | __u16 pages; /* Number of allocated pages */ 328 | _Bool jited; /* Is our filter JIT'ed? */ 329 | __u32 len; /* Number of filter blocks */ 330 | struct sock_fprog_kern *orig_prog; /* Original BPF program */ 331 | struct bpf_prog_aux *aux; /* Auxiliary fields */ 332 | unsigned int (*bpf_func)(const struct sk_buff *skb, 333 | const struct bpf_insn *filter); 334 | /* Instructions for interpreter */ 335 | union { 336 | struct sock_filter insns[0]; 337 | struct bpf_insn insnsi[0]; 338 | }; 339 | }; 340 | 341 | struct sk_filter { 342 | atomic_t refcnt; 343 | struct rcu_head rcu; 344 | struct bpf_prog *prog; 345 | }; 346 | 347 | #define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) 348 | 349 | static inline unsigned int bpf_prog_size(unsigned int proglen) 350 | { 351 | return max(sizeof(struct bpf_prog), 352 | offsetof(struct bpf_prog, insns[proglen])); 353 | } 354 | 355 | #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) 356 | 357 | #ifdef CONFIG_DEBUG_SET_MODULE_RONX 358 | static inline void bpf_prog_lock_ro(struct bpf_prog *fp) 359 | { 360 | set_memory_ro((unsigned long)fp, fp->pages); 361 | } 362 | 363 | static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) 364 | { 365 | set_memory_rw((unsigned long)fp, fp->pages); 366 | } 367 | #else 368 | static inline void bpf_prog_lock_ro(struct bpf_prog *fp) 369 | { 370 | } 371 | 372 | static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) 373 | { 374 | } 375 | #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ 376 | 377 | int sk_filter(struct sock *sk, struct sk_buff *skb); 378 | 379 | void bpf_prog_select_runtime(struct bpf_prog *fp); 380 | void bpf_prog_free(struct bpf_prog *fp); 381 | 382 | int bpf_convert_filter(struct sock_filter *prog, int len, 383 | struct bpf_insn *new_prog, int *new_len); 384 | 385 | struct bpf_prog *bpf_prog_alloc(unsigned int size); 386 | struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size); 387 | void __bpf_prog_free(struct bpf_prog *fp); 388 | void fixup_bpf_calls(struct bpf_prog *prog); 389 | 390 | static inline void bpf_prog_unlock_free(struct bpf_prog *fp) 391 | { 392 | bpf_prog_unlock_ro(fp); 393 | __bpf_prog_free(fp); 394 | } 395 | 396 | int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); 397 | void bpf_prog_destroy(struct bpf_prog *fp); 398 | 399 | int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 400 | int sk_attach_filter_ebpf(__u32 ufd, struct sock *sk); 401 | int sk_detach_filter(struct sock *sk); 402 | 403 | int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); 404 | int sk_get_filter(struct sock *sk, struct sock_filter *filter, 405 | unsigned int len); 406 | 407 | _Bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); 408 | void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); 409 | 410 | __u64 __bpf_call_base(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5); 411 | void bpf_int_jit_compile(struct bpf_prog *fp); 412 | 413 | #ifdef CONFIG_BPF_JIT 414 | typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); 415 | 416 | struct bpf_binary_header * 417 | bpf_jit_binary_alloc(unsigned int proglen, __u8 **image_ptr, 418 | unsigned int alignment, 419 | bpf_jit_fill_hole_t bpf_fill_ill_insns); 420 | void bpf_jit_binary_free(struct bpf_binary_header *hdr); 421 | 422 | void bpf_jit_compile(struct bpf_prog *fp); 423 | void bpf_jit_free(struct bpf_prog *fp); 424 | 425 | static inline void print_hex_dump(char *desc, void *addr, unsigned int len) { 426 | #if 0 427 | int i; 428 | unsigned char buff[len+1]; 429 | unsigned char *pc = (unsigned char*)addr; 430 | 431 | // Output description if given. 432 | if (desc != NULL) 433 | printf ("%s:\n", desc); 434 | 435 | // Process every byte in the data. 436 | for (i = 0; i < len; i++) { 437 | // Multiple of 16 means new line (with line offset). 438 | /* 439 | if ((i % 16) == 0) { 440 | // Just don't print ASCII for the zeroth line. 441 | if (i != 0) 442 | printf ("\n"); 443 | 444 | // Output the offset. 445 | printf (" %04x ", i); 446 | } 447 | */ 448 | // Now the hex code for the specific character. 449 | printf ("%02x ", pc[i]); 450 | } 451 | #endif 452 | 453 | /* x86 specific stuff */ 454 | printf ("\n%s:\n", desc); 455 | 456 | ud_t ud_obj; 457 | ud_init(&ud_obj); 458 | ud_set_input_buffer(&ud_obj, (unsigned char*) addr, len); 459 | ud_set_mode(&ud_obj, 64); 460 | ud_set_syntax(&ud_obj, UD_SYN_ATT); 461 | 462 | while (ud_disassemble(&ud_obj)) { 463 | printf("%-4x%-24s%-30s\n", ud_insn_off(&ud_obj), ud_insn_hex(&ud_obj), ud_insn_asm(&ud_obj)); 464 | } 465 | printf("\n"); 466 | } 467 | 468 | static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, 469 | __u32 pass, void *image) 470 | { 471 | printf("flen=%u proglen=%u pass=%u image=%pK\n", 472 | flen, proglen, pass, image); 473 | if (image) 474 | print_hex_dump("BPF JIT code", image, proglen); 475 | // print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 476 | // 16, 1, image, proglen, false); 477 | } 478 | #else 479 | static inline void bpf_jit_compile(struct bpf_prog *fp) 480 | { 481 | } 482 | 483 | static inline void bpf_jit_free(struct bpf_prog *fp) 484 | { 485 | bpf_prog_unlock_free(fp); 486 | } 487 | #endif /* CONFIG_BPF_JIT */ 488 | 489 | #if 0 490 | #define BPF_ANC BIT(15) 491 | 492 | static inline __u16 bpf_anc_helper(const struct sock_filter *ftest) 493 | { 494 | //BUG_ON(ftest->code & BPF_ANC); 495 | 496 | switch (ftest->code) { 497 | case BPF_LD | BPF_W | BPF_ABS: 498 | case BPF_LD | BPF_H | BPF_ABS: 499 | case BPF_LD | BPF_B | BPF_ABS: 500 | #define BPF_ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ 501 | return BPF_ANC | SKF_AD_##CODE 502 | switch (ftest->k) { 503 | BPF_ANCILLARY(PROTOCOL); 504 | BPF_ANCILLARY(PKTTYPE); 505 | BPF_ANCILLARY(IFINDEX); 506 | BPF_ANCILLARY(NLATTR); 507 | BPF_ANCILLARY(NLATTR_NEST); 508 | BPF_ANCILLARY(MARK); 509 | BPF_ANCILLARY(QUEUE); 510 | BPF_ANCILLARY(HATYPE); 511 | BPF_ANCILLARY(RXHASH); 512 | BPF_ANCILLARY(CPU); 513 | BPF_ANCILLARY(ALU_XOR_X); 514 | BPF_ANCILLARY(VLAN_TAG); 515 | BPF_ANCILLARY(VLAN_TAG_PRESENT); 516 | BPF_ANCILLARY(PAY_OFFSET); 517 | BPF_ANCILLARY(RANDOM); 518 | } 519 | /* Fallthrough. */ 520 | default: 521 | return ftest->code; 522 | } 523 | } 524 | 525 | void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, 526 | int k, unsigned int size); 527 | 528 | static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, 529 | unsigned int size, void *buffer) 530 | { 531 | if (k >= 0) 532 | return skb_header_pointer(skb, k, size, buffer); 533 | 534 | return bpf_internal_load_pointer_neg_helper(skb, k, size); 535 | } 536 | #endif 537 | 538 | static inline int bpf_tell_extensions(void) 539 | { 540 | return SKF_AD_MAX; 541 | } 542 | 543 | #endif /* __LINUX_FILTER_H__ */ 544 | -------------------------------------------------------------------------------- /include/uapi/bpf.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 | * 3 | * This program is free software; you can redistribute it and/or 4 | * modify it under the terms of version 2 of the GNU General Public 5 | * License as published by the Free Software Foundation. 6 | */ 7 | #ifndef _UAPI__LINUX_BPF_H__ 8 | #define _UAPI__LINUX_BPF_H__ 9 | 10 | #include 11 | #include "bpf_common.h" 12 | 13 | /* Extended instruction set based on top of classic BPF */ 14 | 15 | /* instruction classes */ 16 | #define BPF_ALU64 0x07 /* alu mode in double word width */ 17 | 18 | /* ld/ldx fields */ 19 | #define BPF_DW 0x18 /* double word */ 20 | #define BPF_XADD 0xc0 /* exclusive add */ 21 | 22 | /* alu/jmp fields */ 23 | #define BPF_MOV 0xb0 /* mov reg to reg */ 24 | #define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */ 25 | 26 | /* change endianness of a register */ 27 | #define BPF_END 0xd0 /* flags for endianness conversion: */ 28 | #define BPF_TO_LE 0x00 /* convert to little-endian */ 29 | #define BPF_TO_BE 0x08 /* convert to big-endian */ 30 | #define BPF_FROM_LE BPF_TO_LE 31 | #define BPF_FROM_BE BPF_TO_BE 32 | 33 | #define BPF_JNE 0x50 /* jump != */ 34 | #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ 35 | #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ 36 | #define BPF_CALL 0x80 /* function call */ 37 | #define BPF_EXIT 0x90 /* function return */ 38 | 39 | /* Register numbers */ 40 | enum { 41 | BPF_REG_0 = 0, 42 | BPF_REG_1, 43 | BPF_REG_2, 44 | BPF_REG_3, 45 | BPF_REG_4, 46 | BPF_REG_5, 47 | BPF_REG_6, 48 | BPF_REG_7, 49 | BPF_REG_8, 50 | BPF_REG_9, 51 | BPF_REG_10, 52 | __MAX_BPF_REG, 53 | }; 54 | 55 | /* BPF has 10 general purpose 64-bit registers and stack frame. */ 56 | #define MAX_BPF_REG __MAX_BPF_REG 57 | 58 | struct bpf_insn { 59 | __u8 code; /* opcode */ 60 | __u8 dst_reg:4; /* dest register */ 61 | __u8 src_reg:4; /* source register */ 62 | __s16 off; /* signed offset */ 63 | __s32 imm; /* signed immediate constant */ 64 | }; 65 | 66 | /* BPF syscall commands */ 67 | enum bpf_cmd { 68 | /* create a map with given type and attributes 69 | * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size) 70 | * returns fd or negative error 71 | * map is deleted when fd is closed 72 | */ 73 | BPF_MAP_CREATE, 74 | 75 | /* lookup key in a given map 76 | * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) 77 | * Using attr->map_fd, attr->key, attr->value 78 | * returns zero and stores found elem into value 79 | * or negative error 80 | */ 81 | BPF_MAP_LOOKUP_ELEM, 82 | 83 | /* create or update key/value pair in a given map 84 | * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) 85 | * Using attr->map_fd, attr->key, attr->value 86 | * returns zero or negative error 87 | */ 88 | BPF_MAP_UPDATE_ELEM, 89 | 90 | /* find and delete elem by key in a given map 91 | * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) 92 | * Using attr->map_fd, attr->key 93 | * returns zero or negative error 94 | */ 95 | BPF_MAP_DELETE_ELEM, 96 | 97 | /* lookup key in a given map and return next key 98 | * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) 99 | * Using attr->map_fd, attr->key, attr->next_key 100 | * returns zero and stores next key or negative error 101 | */ 102 | BPF_MAP_GET_NEXT_KEY, 103 | 104 | /* verify and load eBPF program 105 | * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size) 106 | * Using attr->prog_type, attr->insns, attr->license 107 | * returns fd or negative error 108 | */ 109 | BPF_PROG_LOAD, 110 | }; 111 | 112 | enum bpf_map_type { 113 | BPF_MAP_TYPE_UNSPEC, 114 | BPF_MAP_TYPE_HASH, 115 | }; 116 | 117 | enum bpf_prog_type { 118 | BPF_PROG_TYPE_UNSPEC, 119 | BPF_PROG_TYPE_SOCKET_FILTER, 120 | BPF_PROG_TYPE_TRACING_FILTER, 121 | }; 122 | 123 | union bpf_attr { 124 | struct { /* anonymous struct used by BPF_MAP_CREATE command */ 125 | __u32 map_type; /* one of enum bpf_map_type */ 126 | __u32 key_size; /* size of key in bytes */ 127 | __u32 value_size; /* size of value in bytes */ 128 | __u32 max_entries; /* max number of entries in a map */ 129 | }; 130 | 131 | struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ 132 | __u32 map_fd; 133 | __aligned_u64 key; 134 | union { 135 | __aligned_u64 value; 136 | __aligned_u64 next_key; 137 | }; 138 | }; 139 | 140 | struct { /* anonymous struct used by BPF_PROG_LOAD command */ 141 | __u32 prog_type; /* one of enum bpf_prog_type */ 142 | __u32 insn_cnt; 143 | __aligned_u64 insns; 144 | __aligned_u64 license; 145 | __u32 log_level; /* verbosity level of verifier */ 146 | __u32 log_size; /* size of user buffer */ 147 | __aligned_u64 log_buf; /* user supplied buffer */ 148 | }; 149 | } __attribute__((aligned(8))); 150 | 151 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper 152 | * function eBPF program intends to call 153 | */ 154 | enum bpf_func_id { 155 | BPF_FUNC_unspec, 156 | #if 0 157 | BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */ 158 | BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value) */ 159 | BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */ 160 | BPF_FUNC_fetch_ptr, /* void *bpf_fetch_ptr(void *unsafe_ptr) */ 161 | BPF_FUNC_fetch_u64, /* u64 bpf_fetch_u64(void *unsafe_ptr) */ 162 | BPF_FUNC_fetch_u32, /* u32 bpf_fetch_u32(void *unsafe_ptr) */ 163 | BPF_FUNC_fetch_u16, /* u16 bpf_fetch_u16(void *unsafe_ptr) */ 164 | BPF_FUNC_fetch_u8, /* u8 bpf_fetch_u8(void *unsafe_ptr) */ 165 | BPF_FUNC_dump_stack, /* void bpf_dump_stack(void) */ 166 | BPF_FUNC_printk, /* int bpf_printk(const char *fmt, int fmt_size, ...) */ 167 | BPF_FUNC_ktime_get_ns, /* u64 bpf_ktime_get_ns(void) */ 168 | BPF_FUNC_get_current, /* struct task_struct *bpf_get_current(void) */ 169 | #endif 170 | BPF_FUNC_memcmp, /* int bpf_memcmp(void *unsafe_ptr, void *safe_ptr, int size) */ 171 | BPF_FUNC_dummy, 172 | BPF_FUNC_strcmp, 173 | BPF_FUNC_set_threshold, 174 | __BPF_FUNC_MAX_ID, 175 | }; 176 | 177 | #endif /* _UAPI__LINUX_BPF_H__ */ 178 | -------------------------------------------------------------------------------- /include/uapi/bpf_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _UAPI__LINUX_BPF_COMMON_H__ 2 | #define _UAPI__LINUX_BPF_COMMON_H__ 3 | 4 | /* Instruction classes */ 5 | #define BPF_CLASS(code) ((code) & 0x07) 6 | #define BPF_LD 0x00 7 | #define BPF_LDX 0x01 8 | #define BPF_ST 0x02 9 | #define BPF_STX 0x03 10 | #define BPF_ALU 0x04 11 | #define BPF_JMP 0x05 12 | #define BPF_RET 0x06 13 | #define BPF_MISC 0x07 14 | 15 | /* ld/ldx fields */ 16 | #define BPF_SIZE(code) ((code) & 0x18) 17 | #define BPF_W 0x00 18 | #define BPF_H 0x08 19 | #define BPF_B 0x10 20 | #define BPF_MODE(code) ((code) & 0xe0) 21 | #define BPF_IMM 0x00 22 | #define BPF_ABS 0x20 23 | #define BPF_IND 0x40 24 | #define BPF_MEM 0x60 25 | #define BPF_LEN 0x80 26 | #define BPF_MSH 0xa0 27 | 28 | /* alu/jmp fields */ 29 | #define BPF_OP(code) ((code) & 0xf0) 30 | #define BPF_ADD 0x00 31 | #define BPF_SUB 0x10 32 | #define BPF_MUL 0x20 33 | #define BPF_DIV 0x30 34 | #define BPF_OR 0x40 35 | #define BPF_AND 0x50 36 | #define BPF_LSH 0x60 37 | #define BPF_RSH 0x70 38 | #define BPF_NEG 0x80 39 | #define BPF_MOD 0x90 40 | #define BPF_XOR 0xa0 41 | 42 | #define BPF_JA 0x00 43 | #define BPF_JEQ 0x10 44 | #define BPF_JGT 0x20 45 | #define BPF_JGE 0x30 46 | #define BPF_JSET 0x40 47 | #define BPF_SRC(code) ((code) & 0x08) 48 | #define BPF_K 0x00 49 | #define BPF_X 0x08 50 | 51 | #ifndef BPF_MAXINSNS 52 | #define BPF_MAXINSNS 8192 53 | #endif 54 | 55 | #endif /* _UAPI__LINUX_BPF_COMMON_H__ */ 56 | -------------------------------------------------------------------------------- /include/uapi/filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Linux Socket Filter Data Structures 3 | */ 4 | 5 | #ifndef _UAPI__LINUX_FILTER_H__ 6 | #define _UAPI__LINUX_FILTER_H__ 7 | /* 8 | #include 9 | #include 10 | #include 11 | */ 12 | #include 13 | /* 14 | * Current version of the filter code architecture. 15 | */ 16 | #define BPF_MAJOR_VERSION 1 17 | #define BPF_MINOR_VERSION 1 18 | 19 | /* 20 | * Try and keep these values and structures similar to BSD, especially 21 | * the BPF code definitions which need to match so you can share filters 22 | */ 23 | 24 | struct sock_filter { /* Filter block */ 25 | __u16 code; /* Actual filter code */ 26 | __u8 jt; /* Jump true */ 27 | __u8 jf; /* Jump false */ 28 | __u32 k; /* Generic multiuse field */ 29 | }; 30 | 31 | struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ 32 | unsigned short len; /* Number of filter blocks */ 33 | struct sock_filter *filter; 34 | }; 35 | 36 | /* ret - BPF_K and BPF_X also apply */ 37 | #define BPF_RVAL(code) ((code) & 0x18) 38 | #define BPF_A 0x10 39 | 40 | /* misc */ 41 | #define BPF_MISCOP(code) ((code) & 0xf8) 42 | #define BPF_TAX 0x00 43 | #define BPF_TXA 0x80 44 | 45 | /* 46 | * Macros for filter block array initializers. 47 | */ 48 | #ifndef BPF_STMT 49 | #define BPF_STMT(code, k) { (unsigned short)(code), 0, 0, k } 50 | #endif 51 | #ifndef BPF_JUMP 52 | #define BPF_JUMP(code, k, jt, jf) { (unsigned short)(code), jt, jf, k } 53 | #endif 54 | 55 | /* 56 | * Number of scratch memory words for: BPF_ST and BPF_STX 57 | */ 58 | #define BPF_MEMWORDS 16 59 | 60 | /* RATIONALE. Negative offsets are invalid in BPF. 61 | We use them to reference ancillary data. 62 | Unlike introduction new instructions, it does not break 63 | existing compilers/optimizers. 64 | */ 65 | #define SKF_AD_OFF (-0x1000) 66 | #define SKF_AD_PROTOCOL 0 67 | #define SKF_AD_PKTTYPE 4 68 | #define SKF_AD_IFINDEX 8 69 | #define SKF_AD_NLATTR 12 70 | #define SKF_AD_NLATTR_NEST 16 71 | #define SKF_AD_MARK 20 72 | #define SKF_AD_QUEUE 24 73 | #define SKF_AD_HATYPE 28 74 | #define SKF_AD_RXHASH 32 75 | #define SKF_AD_CPU 36 76 | #define SKF_AD_ALU_XOR_X 40 77 | #define SKF_AD_VLAN_TAG 44 78 | #define SKF_AD_VLAN_TAG_PRESENT 48 79 | #define SKF_AD_PAY_OFFSET 52 80 | #define SKF_AD_RANDOM 56 81 | #define SKF_AD_MAX 60 82 | #define SKF_NET_OFF (-0x100000) 83 | #define SKF_LL_OFF (-0x200000) 84 | 85 | 86 | #endif /* _UAPI__LINUX_FILTER_H__ */ 87 | -------------------------------------------------------------------------------- /src/core.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Userspace eBPF Core 3 | * ------------------- 4 | * 5 | * Adapted from the new Kernel eBPF implementation designed by PLUMgrid. 6 | * Experimental release without verifier, useful only for tracing. 7 | * 8 | * Suchakra Sharma 9 | * 10 | * Original Kernel BPF Authors: 11 | * Jay Schulist 12 | * Alexei Starovoitov 13 | * Daniel Borkmann 14 | * 15 | * Other Contributors: 16 | * Andi Kleen - Fix a few bad bugs and races. 17 | * Kris Katterjohn - Added many additional checks in bpf_check_classic() 18 | * 19 | * This program is free software; you can redistribute it and/or 20 | * modify it under the terms of the GNU General Public License 21 | * as published by the Free Software Foundation; either version 22 | * 2 of the License, or (at your option) any later version. 23 | * 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | /* Registers */ 38 | #define BPF_R0 regs[BPF_REG_0] 39 | #define BPF_R1 regs[BPF_REG_1] 40 | #define BPF_R2 regs[BPF_REG_2] 41 | #define BPF_R3 regs[BPF_REG_3] 42 | #define BPF_R4 regs[BPF_REG_4] 43 | #define BPF_R5 regs[BPF_REG_5] 44 | #define BPF_R6 regs[BPF_REG_6] 45 | #define BPF_R7 regs[BPF_REG_7] 46 | #define BPF_R8 regs[BPF_REG_8] 47 | #define BPF_R9 regs[BPF_REG_9] 48 | #define BPF_R10 regs[BPF_REG_10] 49 | 50 | /* Named registers */ 51 | #define DST regs[insn->dst_reg] 52 | #define SRC regs[insn->src_reg] 53 | #define FP regs[BPF_REG_FP] 54 | #define ARG1 regs[BPF_REG_ARG1] 55 | #define CTX regs[BPF_REG_CTX] 56 | #define IMM insn->imm 57 | 58 | #define min_t(type, x, y) ({ \ 59 | type __min1 = (x); \ 60 | type __min2 = (y); \ 61 | __min1 < __min2 ? __min1: __min2; }) 62 | 63 | #define NOINLINE __attribute__ ((noinline)) 64 | #define likely(x) __builtin_expect(!!(x), 1) 65 | #define unlikely(x) __builtin_expect(!!(x), 0) 66 | #define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) 67 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) 68 | #define __force __attribute__((force)) 69 | #define __weak __attribute__((weak)) 70 | #define PAGE_SIZE getpagesize() 71 | 72 | /* No hurry in this branch 73 | * 74 | * Exported for the bpf jit load helper. 75 | */ 76 | 77 | #define CONFIG_BPF_JIT 1 78 | 79 | /* Structs for UeBPF-KeBPF trial*/ 80 | struct procdat 81 | { 82 | int thresh; 83 | int miss; 84 | }; 85 | 86 | struct mmap_info 87 | { 88 | void *data; 89 | unsigned int thresh; 90 | int reference; 91 | }; 92 | 93 | 94 | struct bpf_prog *bpf_prog_alloc(unsigned int size) 95 | { 96 | struct bpf_prog_aux *aux; 97 | struct bpf_prog *fp; 98 | 99 | int page_size = getpagesize(); 100 | 101 | size = ROUND_UP(size, page_size); 102 | fp = malloc(size); 103 | if (fp == NULL) 104 | return NULL; 105 | 106 | aux = malloc(sizeof(*aux)); 107 | if (aux == NULL) { 108 | free(fp); 109 | return NULL; 110 | } 111 | 112 | fp->pages = size / page_size; 113 | fp->aux = aux; 114 | 115 | return fp; 116 | } 117 | 118 | struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size) 119 | { 120 | struct bpf_prog *fp; 121 | 122 | int page_size = getpagesize(); 123 | 124 | size = ROUND_UP(size, page_size); 125 | if (size <= fp_old->pages * page_size) 126 | return fp_old; 127 | 128 | fp = malloc(size); 129 | if (fp != NULL) { 130 | memcpy(fp, fp_old, fp_old->pages * page_size); 131 | fp->pages = size / page_size; 132 | 133 | /* We keep fp->aux from fp_old around in the new 134 | * reallocated structure. 135 | */ 136 | fp_old->aux = NULL; 137 | __bpf_prog_free(fp_old); 138 | } 139 | 140 | return fp; 141 | } 142 | 143 | void __bpf_prog_free(struct bpf_prog *fp) 144 | { 145 | free(fp->aux); 146 | free(fp); 147 | } 148 | 149 | #ifdef CONFIG_BPF_JIT 150 | void* alloc_mmap(size_t size) { 151 | void* ptr = mmap(0, size, 152 | PROT_READ | PROT_WRITE | PROT_EXEC, 153 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 154 | if (ptr == (void*)-1) { 155 | perror("mmap"); 156 | return NULL; 157 | } 158 | return ptr; 159 | } 160 | 161 | struct bpf_binary_header * 162 | bpf_jit_binary_alloc(unsigned int proglen, __u8 **image_ptr, 163 | unsigned int alignment, 164 | bpf_jit_fill_hole_t bpf_fill_ill_insns) 165 | { 166 | struct bpf_binary_header *hdr; 167 | unsigned int size, hole, start; 168 | 169 | /* Most of BPF filters are really small, but if some of them 170 | * fill a page, allow at least 128 extra bytes to insert a 171 | * random section of illegal instructions. 172 | */ 173 | size = ROUND_UP(proglen + sizeof(*hdr) + 128, PAGE_SIZE); 174 | hdr = alloc_mmap(size); 175 | if (hdr == NULL) 176 | return NULL; 177 | 178 | /* Fill space with illegal/arch-dep instructions. */ 179 | bpf_fill_ill_insns(hdr, size); 180 | 181 | hdr->pages = size / PAGE_SIZE; 182 | hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), 183 | PAGE_SIZE - sizeof(*hdr)); 184 | start = (rand() % hole) & ~(alignment - 1); 185 | 186 | /* Leave a random number of instructions before BPF code. */ 187 | *image_ptr = &hdr->image[start]; 188 | 189 | return hdr; 190 | } 191 | 192 | void bpf_jit_binary_free(struct bpf_binary_header *hdr) 193 | { 194 | free(hdr); 195 | } 196 | #endif /* CONFIG_BPF_JIT */ 197 | 198 | /* Helper Functions for BPF_CALL*/ 199 | 200 | static __u64 bpf_memcmp(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5) 201 | { 202 | void *ptr1 = (void*) (long) r1; 203 | void *ptr2 = (void*) (long) r2; 204 | __u32 size = (__u32) r3; 205 | if (size < 64){ 206 | return memcmp(ptr1, ptr2, size); 207 | } 208 | return -1; 209 | } 210 | 211 | static __u64 bpf_strcmp(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5) 212 | { 213 | char *ptr1 = (char*) (long) r1; 214 | char *ptr2 = (char*) (long) r2; 215 | return strcmp(ptr1, ptr2); 216 | } 217 | 218 | static __u64 bpf_dummy(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5) 219 | { 220 | printf("In bpf_dummy\n"); 221 | } 222 | 223 | static __u64 bpf_set_threshold(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5) 224 | { 225 | int configfd; 226 | struct procdat *address = NULL; 227 | 228 | configfd = open("/sys/kernel/debug/ebpflttng", O_RDWR); 229 | if(configfd < 0) 230 | { 231 | perror("Open call failed"); 232 | return -1; 233 | } 234 | 235 | address = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, configfd, 0); 236 | if (address == MAP_FAILED) 237 | { 238 | perror("mmap operation failed"); 239 | return -1; 240 | } 241 | 242 | printf("Thresh %d\n", address->thresh); 243 | address->thresh = (unsigned int) r1; 244 | printf("New Thresh %d\n", address->thresh); 245 | close(configfd); 246 | } 247 | 248 | static struct bpf_func_proto filter_funcs[] = { 249 | [BPF_FUNC_memcmp] = { 250 | .func = bpf_memcmp, 251 | .gpl_only = 0, 252 | .ret_type = RET_INTEGER, 253 | .arg1_type = ARG_ANYTHING, 254 | .arg2_type = ARG_PTR_TO_STACK, 255 | .arg3_type = ARG_CONST_STACK_SIZE, 256 | }, 257 | [BPF_FUNC_strcmp] = { 258 | .func = bpf_strcmp, 259 | .gpl_only = 0, 260 | .ret_type = RET_INTEGER, 261 | .arg1_type = ARG_ANYTHING, 262 | .arg2_type = ARG_PTR_TO_STACK, 263 | .arg3_type = ARG_CONST_STACK_SIZE, 264 | }, 265 | [BPF_FUNC_dummy] = { 266 | .func = bpf_dummy, 267 | .gpl_only = 0, 268 | .ret_type = RET_VOID, 269 | }, 270 | [BPF_FUNC_set_threshold] = { 271 | .func = bpf_set_threshold, 272 | .gpl_only = 0, 273 | .ret_type = RET_VOID, 274 | .arg1_type = ARG_ANYTHING, 275 | }, 276 | }; 277 | 278 | static const struct bpf_func_proto *func_proto(enum bpf_func_id func_id) 279 | { 280 | if (func_id < 0 || func_id >= ARRAY_SIZE(filter_funcs)) 281 | return NULL; 282 | return &filter_funcs[func_id]; 283 | } 284 | /* Handle BPF_CALL instructions 285 | * Make them call actual functions 286 | */ 287 | void fixup_bpf_calls(struct bpf_prog *prog) 288 | { 289 | const struct bpf_func_proto *fn; 290 | int i; 291 | 292 | for (i = 0; i < prog->len; i++){ 293 | struct bpf_insn *insn = &prog->insnsi[i]; 294 | if (insn->code == (BPF_JMP | BPF_CALL)){ 295 | fn = func_proto(insn->imm); 296 | if (!fn->func) 297 | printf("No func!\n"); 298 | insn->imm = fn->func - __bpf_call_base; 299 | } 300 | } 301 | } 302 | 303 | /* Base function for offset calculation. Needs to go into .text section, 304 | * therefore keeping it non-static as well; will also be used by JITs 305 | * anyway later on, so do not let the compiler omit it. 306 | */ 307 | NOINLINE __u64 __bpf_call_base(__u64 r1, __u64 r2, __u64 r3, __u64 r4, __u64 r5) 308 | { 309 | return 0; 310 | } 311 | 312 | /** 313 | * __bpf_prog_run - run eBPF program on a given context 314 | * @ctx: is the data we are operating on 315 | * @insn: is the array of eBPF instructions 316 | * 317 | * Decode and execute eBPF instructions. 318 | */ 319 | static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) 320 | { 321 | __u64 stack[MAX_BPF_STACK / sizeof(__u64)]; 322 | __u64 regs[MAX_BPF_REG], tmp; 323 | static const void *jumptable[256] = { 324 | [0 ... 255] = &&default_label, 325 | /* Now overwrite non-defaults ... */ 326 | /* 32 bit ALU operations */ 327 | [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X, 328 | [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K, 329 | [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X, 330 | [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K, 331 | [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X, 332 | [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K, 333 | [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X, 334 | [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K, 335 | [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X, 336 | [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K, 337 | [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X, 338 | [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K, 339 | [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X, 340 | [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K, 341 | [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X, 342 | [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K, 343 | [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X, 344 | [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K, 345 | [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X, 346 | [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K, 347 | [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X, 348 | [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K, 349 | [BPF_ALU | BPF_NEG] = &&ALU_NEG, 350 | // [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE, 351 | // [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE, 352 | /* 64 bit ALU operations */ 353 | [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X, 354 | [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K, 355 | [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X, 356 | [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K, 357 | [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X, 358 | [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K, 359 | [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X, 360 | [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K, 361 | [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X, 362 | [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K, 363 | [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X, 364 | [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K, 365 | [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X, 366 | [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K, 367 | [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X, 368 | [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K, 369 | [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X, 370 | [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K, 371 | [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X, 372 | [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K, 373 | [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X, 374 | [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K, 375 | [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X, 376 | [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K, 377 | [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG, 378 | /* Call instruction */ 379 | [BPF_JMP | BPF_CALL] = &&JMP_CALL, 380 | /* Jumps */ 381 | [BPF_JMP | BPF_JA] = &&JMP_JA, 382 | [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X, 383 | [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K, 384 | [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X, 385 | [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K, 386 | [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X, 387 | [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K, 388 | [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X, 389 | [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K, 390 | [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X, 391 | [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K, 392 | [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X, 393 | [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K, 394 | [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X, 395 | [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K, 396 | /* Program return */ 397 | [BPF_JMP | BPF_EXIT] = &&JMP_EXIT, 398 | /* Store instructions */ 399 | [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B, 400 | [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H, 401 | [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W, 402 | [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW, 403 | [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W, 404 | // [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW, 405 | [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B, 406 | [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H, 407 | [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W, 408 | [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW, 409 | /* Load instructions */ 410 | [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B, 411 | [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H, 412 | [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W, 413 | [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW, 414 | [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, 415 | }; 416 | void *ptr; 417 | int off; 418 | 419 | #define CONT ({ insn++; goto select_insn; }) 420 | #define CONT_JMP ({ insn++; goto select_insn; }) 421 | 422 | FP = (__u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; 423 | ARG1 = (__u64) (unsigned long) ctx; 424 | 425 | /* Registers used in classic BPF programs need to be reset first. */ 426 | regs[BPF_REG_A] = 0; 427 | regs[BPF_REG_X] = 0; 428 | 429 | select_insn: 430 | goto *jumptable[insn->code]; 431 | 432 | /* ALU */ 433 | #define ALU(OPCODE, OP) \ 434 | ALU64_##OPCODE##_X: \ 435 | DST = DST OP SRC; \ 436 | CONT; \ 437 | ALU_##OPCODE##_X: \ 438 | DST = (__u32) DST OP (__u32) SRC; \ 439 | CONT; \ 440 | ALU64_##OPCODE##_K: \ 441 | DST = DST OP IMM; \ 442 | CONT; \ 443 | ALU_##OPCODE##_K: \ 444 | DST = (__u32) DST OP (__u32) IMM; \ 445 | CONT; 446 | 447 | ALU(ADD, +) 448 | ALU(SUB, -) 449 | ALU(AND, &) 450 | ALU(OR, |) 451 | ALU(LSH, <<) 452 | ALU(RSH, >>) 453 | ALU(XOR, ^) 454 | ALU(MUL, *) 455 | #undef ALU 456 | ALU_NEG: 457 | DST = (__u32) -DST; 458 | CONT; 459 | ALU64_NEG: 460 | DST = -DST; 461 | CONT; 462 | ALU_MOV_X: 463 | DST = (__u32) SRC; 464 | CONT; 465 | ALU_MOV_K: 466 | DST = (__u32) IMM; 467 | CONT; 468 | ALU64_MOV_X: 469 | DST = SRC; 470 | CONT; 471 | ALU64_MOV_K: 472 | DST = IMM; 473 | CONT; 474 | LD_IMM_DW: 475 | DST = (__u64) (__u32) insn[0].imm | ((__u64) (__u32) insn[1].imm) << 32; 476 | insn++; 477 | CONT; 478 | ALU64_ARSH_X: 479 | (*(__s64 *) &DST) >>= SRC; 480 | CONT; 481 | ALU64_ARSH_K: 482 | (*(__s64 *) &DST) >>= IMM; 483 | CONT; 484 | ALU64_MOD_X: 485 | if (unlikely(SRC == 0)) 486 | return 0; 487 | tmp = DST; 488 | DST = remainderl(tmp, SRC); 489 | CONT; 490 | ALU_MOD_X: 491 | if (unlikely(SRC == 0)) 492 | return 0; 493 | tmp = (__u32) DST; 494 | DST = remainderl(tmp, (__u32) SRC); 495 | CONT; 496 | ALU64_MOD_K: 497 | tmp = DST; 498 | DST = remainderl(tmp, IMM); 499 | CONT; 500 | ALU_MOD_K: 501 | tmp = (__u32) DST; 502 | DST = remainderl(tmp, (__u32) IMM); 503 | CONT; 504 | ALU64_DIV_X: 505 | if (unlikely(SRC == 0)) 506 | return 0; 507 | remainderl(DST, SRC); 508 | CONT; 509 | ALU_DIV_X: 510 | if (unlikely(SRC == 0)) 511 | return 0; 512 | tmp = (__u32) DST; 513 | remainderl(tmp, (__u32) SRC); 514 | DST = (__u32) tmp; 515 | CONT; 516 | ALU64_DIV_K: 517 | remainderl(DST, IMM); 518 | CONT; 519 | ALU_DIV_K: 520 | tmp = (__u32) DST; 521 | remainderl(tmp, (__u32) IMM); 522 | DST = (__u32) tmp; 523 | CONT; 524 | #if 0 525 | ALU_END_TO_BE: 526 | switch (IMM) { 527 | case 16: 528 | DST = (__force __u16) cpu_to_be16(DST); 529 | break; 530 | case 32: 531 | DST = (__force __u32) cpu_to_be32(DST); 532 | break; 533 | case 64: 534 | DST = (__force __u64) cpu_to_be64(DST); 535 | break; 536 | } 537 | CONT; 538 | ALU_END_TO_LE: 539 | switch (IMM) { 540 | case 16: 541 | DST = (__force __u16) cpu_to_le16(DST); 542 | break; 543 | case 32: 544 | DST = (__force __u32) cpu_to_le32(DST); 545 | break; 546 | case 64: 547 | DST = (__force __u64) cpu_to_le64(DST); 548 | break; 549 | } 550 | CONT; 551 | #endif 552 | /* CALL */ 553 | JMP_CALL: 554 | /* Function call scratches BPF_R1-BPF_R5 registers, 555 | * preserves BPF_R6-BPF_R9, and stores return value 556 | * into BPF_R0. 557 | */ 558 | BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, 559 | BPF_R4, BPF_R5); 560 | CONT; 561 | 562 | /* JMP */ 563 | JMP_JA: 564 | insn += insn->off; 565 | CONT; 566 | JMP_JEQ_X: 567 | if (DST == SRC) { 568 | insn += insn->off; 569 | CONT_JMP; 570 | } 571 | CONT; 572 | JMP_JEQ_K: 573 | if (DST == IMM) { 574 | insn += insn->off; 575 | CONT_JMP; 576 | } 577 | CONT; 578 | JMP_JNE_X: 579 | if (DST != SRC) { 580 | insn += insn->off; 581 | CONT_JMP; 582 | } 583 | CONT; 584 | JMP_JNE_K: 585 | if (DST != IMM) { 586 | insn += insn->off; 587 | CONT_JMP; 588 | } 589 | CONT; 590 | JMP_JGT_X: 591 | if (DST > SRC) { 592 | insn += insn->off; 593 | CONT_JMP; 594 | } 595 | CONT; 596 | JMP_JGT_K: 597 | if (DST > IMM) { 598 | insn += insn->off; 599 | CONT_JMP; 600 | } 601 | CONT; 602 | JMP_JGE_X: 603 | if (DST >= SRC) { 604 | insn += insn->off; 605 | CONT_JMP; 606 | } 607 | CONT; 608 | JMP_JGE_K: 609 | if (DST >= IMM) { 610 | insn += insn->off; 611 | CONT_JMP; 612 | } 613 | CONT; 614 | JMP_JSGT_X: 615 | if (((__s64) DST) > ((__s64) SRC)) { 616 | insn += insn->off; 617 | CONT_JMP; 618 | } 619 | CONT; 620 | JMP_JSGT_K: 621 | if (((__s64) DST) > ((__s64) IMM)) { 622 | insn += insn->off; 623 | CONT_JMP; 624 | } 625 | CONT; 626 | JMP_JSGE_X: 627 | if (((__s64) DST) >= ((__s64) SRC)) { 628 | insn += insn->off; 629 | CONT_JMP; 630 | } 631 | CONT; 632 | JMP_JSGE_K: 633 | if (((__s64) DST) >= ((__s64) IMM)) { 634 | insn += insn->off; 635 | CONT_JMP; 636 | } 637 | CONT; 638 | JMP_JSET_X: 639 | if (DST & SRC) { 640 | insn += insn->off; 641 | CONT_JMP; 642 | } 643 | CONT; 644 | JMP_JSET_K: 645 | if (DST & IMM) { 646 | insn += insn->off; 647 | CONT_JMP; 648 | } 649 | CONT; 650 | JMP_EXIT: 651 | return BPF_R0; 652 | 653 | /* STX and ST and LDX*/ 654 | #define LDST(SIZEOP, SIZE) \ 655 | STX_MEM_##SIZEOP: \ 656 | *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ 657 | CONT; \ 658 | ST_MEM_##SIZEOP: \ 659 | *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ 660 | CONT; \ 661 | LDX_MEM_##SIZEOP: \ 662 | DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ 663 | CONT; 664 | 665 | LDST(B, __u8) 666 | LDST(H, __u16) 667 | LDST(W, __u32) 668 | LDST(DW, __u64) 669 | #undef LDST 670 | STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ 671 | atomic_add((__u32) SRC, (atomic_t *)(unsigned long) 672 | (DST + insn->off)); 673 | CONT; 674 | 675 | default_label: 676 | /* If we ever reach this, we have a bug somewhere. */ 677 | printf("unknown opcode %02x\n", insn->code); 678 | return 0; 679 | } 680 | 681 | void __weak bpf_int_jit_compile(struct bpf_prog *prog) 682 | { 683 | } 684 | 685 | /** 686 | * bpf_prog_select_runtime - select execution runtime for BPF program 687 | * @fp: bpf_prog populated with internal BPF program 688 | * 689 | * try to JIT internal BPF program, if JIT is not available select interpreter 690 | * BPF program will be executed via BPF_PROG_RUN() macro 691 | */ 692 | void bpf_prog_select_runtime(struct bpf_prog *fp) 693 | { 694 | fp->bpf_func = (void *) __bpf_prog_run; 695 | 696 | /* Probe if internal BPF can be JITed */ 697 | bpf_int_jit_compile(fp); 698 | /* Lock whole bpf_prog as read-only */ 699 | bpf_prog_lock_ro(fp); 700 | } 701 | 702 | /* Free internal BPF program */ 703 | void bpf_prog_free(struct bpf_prog *fp) 704 | { 705 | struct bpf_prog_aux *aux = fp->aux; 706 | aux->prog = fp; 707 | bpf_jit_free(aux->prog); 708 | } 709 | -------------------------------------------------------------------------------- /src/jit.c: -------------------------------------------------------------------------------- 1 | /* Userspace BPF JIT compiler (x86_64) 2 | * 3 | * Suchakra Sharma 4 | * 5 | * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 6 | * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 7 | * 8 | * This program is free software; you can redistribute it and/or 9 | * modify it under the terms of the GNU General Public License 10 | * as published by the Free Software Foundation; version 2 11 | * of the License. 12 | */ 13 | 14 | // Allow JITing 15 | #define CONFIG_BPF_JIT 1 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #define unlikely(x) __builtin_expect(!!(x), 0) 29 | 30 | // 0 for no jit, 1 for jit w/o debug, 2 for JIT with jited code dump 31 | int bpf_jit_enable = 2; 32 | 33 | static inline __u8 *emit_code(__u8 *ptr, __u32 bytes, unsigned int len) 34 | { 35 | if (len == 1) 36 | *ptr = bytes; 37 | else if (len == 2) 38 | *(__u16 *)ptr = bytes; 39 | else { 40 | *(__u32 *)ptr = bytes; 41 | __sync_synchronize(); 42 | } 43 | return ptr + len; 44 | } 45 | 46 | #define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) 47 | 48 | #define EMIT1(b1) EMIT(b1, 1) 49 | #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 50 | #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 51 | #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 52 | #define EMIT1_off32(b1, off) \ 53 | do {EMIT1(b1); EMIT(off, 4); } while (0) 54 | #define EMIT2_off32(b1, b2, off) \ 55 | do {EMIT2(b1, b2); EMIT(off, 4); } while (0) 56 | #define EMIT3_off32(b1, b2, b3, off) \ 57 | do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) 58 | #define EMIT4_off32(b1, b2, b3, b4, off) \ 59 | do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) 60 | 61 | static inline _Bool is_imm8(int value) 62 | { 63 | return value <= 127 && value >= -128; 64 | } 65 | 66 | static inline _Bool is_simm32(__s64 value) 67 | { 68 | return value == (__s64) (__s32) value; 69 | } 70 | 71 | /* mov dst, src */ 72 | #define EMIT_mov(DST, SRC) \ 73 | do {if (DST != SRC) \ 74 | EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ 75 | } while (0) 76 | 77 | static int bpf_size_to_x86_bytes(int bpf_size) 78 | { 79 | if (bpf_size == BPF_W) 80 | return 4; 81 | else if (bpf_size == BPF_H) 82 | return 2; 83 | else if (bpf_size == BPF_B) 84 | return 1; 85 | else if (bpf_size == BPF_DW) 86 | return 4; /* imm32 */ 87 | else 88 | return 0; 89 | } 90 | 91 | /* list of x86 cond jumps opcodes (. + s8) 92 | * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 93 | */ 94 | #define X86_JB 0x72 95 | #define X86_JAE 0x73 96 | #define X86_JE 0x74 97 | #define X86_JNE 0x75 98 | #define X86_JBE 0x76 99 | #define X86_JA 0x77 100 | #define X86_JGE 0x7D 101 | #define X86_JG 0x7F 102 | 103 | #define CHOOSE_LOAD_FUNC(K, func) \ 104 | ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) 105 | 106 | /* pick a register outside of BPF range for JIT internal work */ 107 | #define AUX_REG (MAX_BPF_REG + 1) 108 | 109 | /* the following table maps BPF registers to x64 registers. 110 | * x64 register r12 is unused, since if used as base address register 111 | * in load/store instructions, it always needs an extra byte of encoding 112 | */ 113 | static const int reg2hex[] = { 114 | [BPF_REG_0] = 0, /* rax */ 115 | [BPF_REG_1] = 7, /* rdi */ 116 | [BPF_REG_2] = 6, /* rsi */ 117 | [BPF_REG_3] = 2, /* rdx */ 118 | [BPF_REG_4] = 1, /* rcx */ 119 | [BPF_REG_5] = 0, /* r8 */ 120 | [BPF_REG_6] = 3, /* rbx callee saved */ 121 | [BPF_REG_7] = 5, /* r13 callee saved */ 122 | [BPF_REG_8] = 6, /* r14 callee saved */ 123 | [BPF_REG_9] = 7, /* r15 callee saved */ 124 | [BPF_REG_FP] = 5, /* rbp readonly */ 125 | [AUX_REG] = 3, /* r11 temp register */ 126 | }; 127 | 128 | /* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 129 | * which need extra byte of encoding. 130 | * rax,rcx,...,rbp have simpler encoding 131 | */ 132 | static inline _Bool is_ereg(__u32 reg) 133 | { 134 | if (reg == BPF_REG_5 || reg == AUX_REG || 135 | (reg >= BPF_REG_7 && reg <= BPF_REG_9)) 136 | return 1; 137 | else 138 | return 0; 139 | } 140 | 141 | /* add modifiers if 'reg' maps to x64 registers r8..r15 */ 142 | static inline __u8 add_1mod(__u8 byte, __u32 reg) 143 | { 144 | if (is_ereg(reg)) 145 | byte |= 1; 146 | return byte; 147 | } 148 | 149 | static inline __u8 add_2mod(__u8 byte, __u32 r1, __u32 r2) 150 | { 151 | if (is_ereg(r1)) 152 | byte |= 1; 153 | if (is_ereg(r2)) 154 | byte |= 4; 155 | return byte; 156 | } 157 | 158 | /* encode 'dst_reg' register into x64 opcode 'byte' */ 159 | static inline __u8 add_1reg(__u8 byte, __u32 dst_reg) 160 | { 161 | return byte + reg2hex[dst_reg]; 162 | } 163 | 164 | /* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ 165 | static inline __u8 add_2reg(__u8 byte, __u32 dst_reg, __u32 src_reg) 166 | { 167 | return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); 168 | } 169 | 170 | static void jit_fill_hole(void *area, unsigned int size) 171 | { 172 | /* fill whole space with int3 instructions */ 173 | memset(area, 0xcc, size); 174 | } 175 | 176 | struct jit_context { 177 | unsigned int cleanup_addr; /* epilogue code offset */ 178 | _Bool seen_ld_abs; 179 | }; 180 | 181 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, __u8 *image, 182 | int oldproglen, struct jit_context *ctx) 183 | { 184 | struct bpf_insn *insn = bpf_prog->insnsi; 185 | int insn_cnt = bpf_prog->len; 186 | __u8 temp[64]; 187 | int i; 188 | int proglen = 0; 189 | __u8 *prog = temp; 190 | int stacksize = MAX_BPF_STACK + 191 | 32 /* space for rbx, r13, r14, r15 */ + 192 | 8 /* space for skb_copy_bits() buffer */; 193 | 194 | EMIT1(0x55); /* push rbp */ 195 | EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ 196 | 197 | /* sub rsp, stacksize */ 198 | EMIT3_off32(0x48, 0x81, 0xEC, stacksize); 199 | 200 | /* all classic BPF filters use R6(rbx) save it */ 201 | 202 | /* mov qword ptr [rbp-X],rbx */ 203 | EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); 204 | 205 | /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 206 | * as temporary, so all tcpdump filters need to spill/fill R7(r13) and 207 | * R8(r14). R9(r15) spill could be made conditional, but there is only 208 | * one 'bpf_error' return path out of helper functions inside bpf_jit.S 209 | * The overhead of extra spill is negligible for any filter other 210 | * than synthetic ones. Therefore not worth adding complexity. 211 | */ 212 | 213 | /* mov qword ptr [rbp-X],r13 */ 214 | EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8); 215 | /* mov qword ptr [rbp-X],r14 */ 216 | EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16); 217 | /* mov qword ptr [rbp-X],r15 */ 218 | EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24); 219 | 220 | /* clear A and X registers */ 221 | EMIT2(0x31, 0xc0); /* xor eax, eax */ 222 | EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ 223 | 224 | for (i = 0; i < insn_cnt; i++, insn++) { 225 | const __s32 imm32 = insn->imm; 226 | __u32 dst_reg = insn->dst_reg; 227 | __u32 src_reg = insn->src_reg; 228 | __u8 b1 = 0, b2 = 0, b3 = 0; 229 | __s64 jmp_offset; 230 | __u8 jmp_cond; 231 | int ilen; 232 | __u8 *func; 233 | 234 | switch (insn->code) { 235 | /* ALU */ 236 | case BPF_ALU | BPF_ADD | BPF_X: 237 | case BPF_ALU | BPF_SUB | BPF_X: 238 | case BPF_ALU | BPF_AND | BPF_X: 239 | case BPF_ALU | BPF_OR | BPF_X: 240 | case BPF_ALU | BPF_XOR | BPF_X: 241 | case BPF_ALU64 | BPF_ADD | BPF_X: 242 | case BPF_ALU64 | BPF_SUB | BPF_X: 243 | case BPF_ALU64 | BPF_AND | BPF_X: 244 | case BPF_ALU64 | BPF_OR | BPF_X: 245 | case BPF_ALU64 | BPF_XOR | BPF_X: 246 | switch (BPF_OP(insn->code)) { 247 | case BPF_ADD: b2 = 0x01; break; 248 | case BPF_SUB: b2 = 0x29; break; 249 | case BPF_AND: b2 = 0x21; break; 250 | case BPF_OR: b2 = 0x09; break; 251 | case BPF_XOR: b2 = 0x31; break; 252 | } 253 | if (BPF_CLASS(insn->code) == BPF_ALU64) 254 | EMIT1(add_2mod(0x48, dst_reg, src_reg)); 255 | else if (is_ereg(dst_reg) || is_ereg(src_reg)) 256 | EMIT1(add_2mod(0x40, dst_reg, src_reg)); 257 | EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); 258 | break; 259 | 260 | /* mov dst, src */ 261 | case BPF_ALU64 | BPF_MOV | BPF_X: 262 | EMIT_mov(dst_reg, src_reg); 263 | break; 264 | 265 | /* mov32 dst, src */ 266 | case BPF_ALU | BPF_MOV | BPF_X: 267 | if (is_ereg(dst_reg) || is_ereg(src_reg)) 268 | EMIT1(add_2mod(0x40, dst_reg, src_reg)); 269 | EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); 270 | break; 271 | 272 | /* neg dst */ 273 | case BPF_ALU | BPF_NEG: 274 | case BPF_ALU64 | BPF_NEG: 275 | if (BPF_CLASS(insn->code) == BPF_ALU64) 276 | EMIT1(add_1mod(0x48, dst_reg)); 277 | else if (is_ereg(dst_reg)) 278 | EMIT1(add_1mod(0x40, dst_reg)); 279 | EMIT2(0xF7, add_1reg(0xD8, dst_reg)); 280 | break; 281 | 282 | case BPF_ALU | BPF_ADD | BPF_K: 283 | case BPF_ALU | BPF_SUB | BPF_K: 284 | case BPF_ALU | BPF_AND | BPF_K: 285 | case BPF_ALU | BPF_OR | BPF_K: 286 | case BPF_ALU | BPF_XOR | BPF_K: 287 | case BPF_ALU64 | BPF_ADD | BPF_K: 288 | case BPF_ALU64 | BPF_SUB | BPF_K: 289 | case BPF_ALU64 | BPF_AND | BPF_K: 290 | case BPF_ALU64 | BPF_OR | BPF_K: 291 | case BPF_ALU64 | BPF_XOR | BPF_K: 292 | if (BPF_CLASS(insn->code) == BPF_ALU64) 293 | EMIT1(add_1mod(0x48, dst_reg)); 294 | else if (is_ereg(dst_reg)) 295 | EMIT1(add_1mod(0x40, dst_reg)); 296 | 297 | switch (BPF_OP(insn->code)) { 298 | case BPF_ADD: b3 = 0xC0; break; 299 | case BPF_SUB: b3 = 0xE8; break; 300 | case BPF_AND: b3 = 0xE0; break; 301 | case BPF_OR: b3 = 0xC8; break; 302 | case BPF_XOR: b3 = 0xF0; break; 303 | } 304 | 305 | if (is_imm8(imm32)) 306 | EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 307 | else 308 | EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 309 | break; 310 | 311 | case BPF_ALU64 | BPF_MOV | BPF_K: 312 | /* optimization: if imm32 is positive, 313 | * use 'mov eax, imm32' (which zero-extends imm32) 314 | * to save 2 bytes 315 | */ 316 | if (imm32 < 0) { 317 | /* 'mov rax, imm32' sign extends imm32 */ 318 | b1 = add_1mod(0x48, dst_reg); 319 | b2 = 0xC7; 320 | b3 = 0xC0; 321 | EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); 322 | break; 323 | } 324 | 325 | case BPF_ALU | BPF_MOV | BPF_K: 326 | /* mov %eax, imm32 */ 327 | if (is_ereg(dst_reg)) 328 | EMIT1(add_1mod(0x40, dst_reg)); 329 | EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); 330 | break; 331 | 332 | case BPF_LD | BPF_IMM | BPF_DW: 333 | if (insn[1].code != 0 || insn[1].src_reg != 0 || 334 | insn[1].dst_reg != 0 || insn[1].off != 0) { 335 | /* verifier must catch invalid insns */ 336 | printf("invalid BPF_LD_IMM64 insn\n"); 337 | return -EINVAL; 338 | } 339 | 340 | /* movabsq %rax, imm64 */ 341 | EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); 342 | EMIT(insn[0].imm, 4); 343 | EMIT(insn[1].imm, 4); 344 | 345 | insn++; 346 | i++; 347 | break; 348 | 349 | /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ 350 | case BPF_ALU | BPF_MOD | BPF_X: 351 | case BPF_ALU | BPF_DIV | BPF_X: 352 | case BPF_ALU | BPF_MOD | BPF_K: 353 | case BPF_ALU | BPF_DIV | BPF_K: 354 | case BPF_ALU64 | BPF_MOD | BPF_X: 355 | case BPF_ALU64 | BPF_DIV | BPF_X: 356 | case BPF_ALU64 | BPF_MOD | BPF_K: 357 | case BPF_ALU64 | BPF_DIV | BPF_K: 358 | EMIT1(0x50); /* push rax */ 359 | EMIT1(0x52); /* push rdx */ 360 | 361 | if (BPF_SRC(insn->code) == BPF_X) 362 | /* mov r11, src_reg */ 363 | EMIT_mov(AUX_REG, src_reg); 364 | else 365 | /* mov r11, imm32 */ 366 | EMIT3_off32(0x49, 0xC7, 0xC3, imm32); 367 | 368 | /* mov rax, dst_reg */ 369 | EMIT_mov(BPF_REG_0, dst_reg); 370 | 371 | /* xor edx, edx 372 | * equivalent to 'xor rdx, rdx', but one byte less 373 | */ 374 | EMIT2(0x31, 0xd2); 375 | 376 | if (BPF_SRC(insn->code) == BPF_X) { 377 | /* if (src_reg == 0) return 0 */ 378 | 379 | /* cmp r11, 0 */ 380 | EMIT4(0x49, 0x83, 0xFB, 0x00); 381 | 382 | /* jne .+9 (skip over pop, pop, xor and jmp) */ 383 | EMIT2(X86_JNE, 1 + 1 + 2 + 5); 384 | EMIT1(0x5A); /* pop rdx */ 385 | EMIT1(0x58); /* pop rax */ 386 | EMIT2(0x31, 0xc0); /* xor eax, eax */ 387 | 388 | /* jmp cleanup_addr 389 | * addrs[i] - 11, because there are 11 bytes 390 | * after this insn: div, mov, pop, pop, mov 391 | */ 392 | jmp_offset = ctx->cleanup_addr - (addrs[i] - 11); 393 | EMIT1_off32(0xE9, jmp_offset); 394 | } 395 | 396 | if (BPF_CLASS(insn->code) == BPF_ALU64) 397 | /* div r11 */ 398 | EMIT3(0x49, 0xF7, 0xF3); 399 | else 400 | /* div r11d */ 401 | EMIT3(0x41, 0xF7, 0xF3); 402 | 403 | if (BPF_OP(insn->code) == BPF_MOD) 404 | /* mov r11, rdx */ 405 | EMIT3(0x49, 0x89, 0xD3); 406 | else 407 | /* mov r11, rax */ 408 | EMIT3(0x49, 0x89, 0xC3); 409 | 410 | EMIT1(0x5A); /* pop rdx */ 411 | EMIT1(0x58); /* pop rax */ 412 | 413 | /* mov dst_reg, r11 */ 414 | EMIT_mov(dst_reg, AUX_REG); 415 | break; 416 | 417 | case BPF_ALU | BPF_MUL | BPF_K: 418 | case BPF_ALU | BPF_MUL | BPF_X: 419 | case BPF_ALU64 | BPF_MUL | BPF_K: 420 | case BPF_ALU64 | BPF_MUL | BPF_X: 421 | EMIT1(0x50); /* push rax */ 422 | EMIT1(0x52); /* push rdx */ 423 | 424 | /* mov r11, dst_reg */ 425 | EMIT_mov(AUX_REG, dst_reg); 426 | 427 | if (BPF_SRC(insn->code) == BPF_X) 428 | /* mov rax, src_reg */ 429 | EMIT_mov(BPF_REG_0, src_reg); 430 | else 431 | /* mov rax, imm32 */ 432 | EMIT3_off32(0x48, 0xC7, 0xC0, imm32); 433 | 434 | if (BPF_CLASS(insn->code) == BPF_ALU64) 435 | EMIT1(add_1mod(0x48, AUX_REG)); 436 | else if (is_ereg(AUX_REG)) 437 | EMIT1(add_1mod(0x40, AUX_REG)); 438 | /* mul(q) r11 */ 439 | EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); 440 | 441 | /* mov r11, rax */ 442 | EMIT_mov(AUX_REG, BPF_REG_0); 443 | 444 | EMIT1(0x5A); /* pop rdx */ 445 | EMIT1(0x58); /* pop rax */ 446 | 447 | /* mov dst_reg, r11 */ 448 | EMIT_mov(dst_reg, AUX_REG); 449 | break; 450 | 451 | /* shifts */ 452 | case BPF_ALU | BPF_LSH | BPF_K: 453 | case BPF_ALU | BPF_RSH | BPF_K: 454 | case BPF_ALU | BPF_ARSH | BPF_K: 455 | case BPF_ALU64 | BPF_LSH | BPF_K: 456 | case BPF_ALU64 | BPF_RSH | BPF_K: 457 | case BPF_ALU64 | BPF_ARSH | BPF_K: 458 | if (BPF_CLASS(insn->code) == BPF_ALU64) 459 | EMIT1(add_1mod(0x48, dst_reg)); 460 | else if (is_ereg(dst_reg)) 461 | EMIT1(add_1mod(0x40, dst_reg)); 462 | 463 | switch (BPF_OP(insn->code)) { 464 | case BPF_LSH: b3 = 0xE0; break; 465 | case BPF_RSH: b3 = 0xE8; break; 466 | case BPF_ARSH: b3 = 0xF8; break; 467 | } 468 | EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); 469 | break; 470 | 471 | case BPF_ALU | BPF_LSH | BPF_X: 472 | case BPF_ALU | BPF_RSH | BPF_X: 473 | case BPF_ALU | BPF_ARSH | BPF_X: 474 | case BPF_ALU64 | BPF_LSH | BPF_X: 475 | case BPF_ALU64 | BPF_RSH | BPF_X: 476 | case BPF_ALU64 | BPF_ARSH | BPF_X: 477 | 478 | /* check for bad case when dst_reg == rcx */ 479 | if (dst_reg == BPF_REG_4) { 480 | /* mov r11, dst_reg */ 481 | EMIT_mov(AUX_REG, dst_reg); 482 | dst_reg = AUX_REG; 483 | } 484 | 485 | if (src_reg != BPF_REG_4) { /* common case */ 486 | EMIT1(0x51); /* push rcx */ 487 | 488 | /* mov rcx, src_reg */ 489 | EMIT_mov(BPF_REG_4, src_reg); 490 | } 491 | 492 | /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ 493 | if (BPF_CLASS(insn->code) == BPF_ALU64) 494 | EMIT1(add_1mod(0x48, dst_reg)); 495 | else if (is_ereg(dst_reg)) 496 | EMIT1(add_1mod(0x40, dst_reg)); 497 | 498 | switch (BPF_OP(insn->code)) { 499 | case BPF_LSH: b3 = 0xE0; break; 500 | case BPF_RSH: b3 = 0xE8; break; 501 | case BPF_ARSH: b3 = 0xF8; break; 502 | } 503 | EMIT2(0xD3, add_1reg(b3, dst_reg)); 504 | 505 | if (src_reg != BPF_REG_4) 506 | EMIT1(0x59); /* pop rcx */ 507 | 508 | if (insn->dst_reg == BPF_REG_4) 509 | /* mov dst_reg, r11 */ 510 | EMIT_mov(insn->dst_reg, AUX_REG); 511 | break; 512 | 513 | case BPF_ALU | BPF_END | BPF_FROM_BE: 514 | switch (imm32) { 515 | case 16: 516 | /* emit 'ror %ax, 8' to swap lower 2 bytes */ 517 | EMIT1(0x66); 518 | if (is_ereg(dst_reg)) 519 | EMIT1(0x41); 520 | EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); 521 | break; 522 | case 32: 523 | /* emit 'bswap eax' to swap lower 4 bytes */ 524 | if (is_ereg(dst_reg)) 525 | EMIT2(0x41, 0x0F); 526 | else 527 | EMIT1(0x0F); 528 | EMIT1(add_1reg(0xC8, dst_reg)); 529 | break; 530 | case 64: 531 | /* emit 'bswap rax' to swap 8 bytes */ 532 | EMIT3(add_1mod(0x48, dst_reg), 0x0F, 533 | add_1reg(0xC8, dst_reg)); 534 | break; 535 | } 536 | break; 537 | 538 | case BPF_ALU | BPF_END | BPF_FROM_LE: 539 | break; 540 | 541 | /* ST: *(__u8*)(dst_reg + off) = imm */ 542 | case BPF_ST | BPF_MEM | BPF_B: 543 | if (is_ereg(dst_reg)) 544 | EMIT2(0x41, 0xC6); 545 | else 546 | EMIT1(0xC6); 547 | goto st; 548 | case BPF_ST | BPF_MEM | BPF_H: 549 | if (is_ereg(dst_reg)) 550 | EMIT3(0x66, 0x41, 0xC7); 551 | else 552 | EMIT2(0x66, 0xC7); 553 | goto st; 554 | case BPF_ST | BPF_MEM | BPF_W: 555 | if (is_ereg(dst_reg)) 556 | EMIT2(0x41, 0xC7); 557 | else 558 | EMIT1(0xC7); 559 | goto st; 560 | case BPF_ST | BPF_MEM | BPF_DW: 561 | EMIT2(add_1mod(0x48, dst_reg), 0xC7); 562 | 563 | st: if (is_imm8(insn->off)) 564 | EMIT2(add_1reg(0x40, dst_reg), insn->off); 565 | else 566 | EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); 567 | 568 | EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); 569 | break; 570 | 571 | /* STX: *(__u8*)(dst_reg + off) = src_reg */ 572 | case BPF_STX | BPF_MEM | BPF_B: 573 | /* emit 'mov byte ptr [rax + off], al' */ 574 | if (is_ereg(dst_reg) || is_ereg(src_reg) || 575 | /* have to add extra byte for x86 SIL, DIL regs */ 576 | src_reg == BPF_REG_1 || src_reg == BPF_REG_2) 577 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 578 | else 579 | EMIT1(0x88); 580 | goto stx; 581 | case BPF_STX | BPF_MEM | BPF_H: 582 | if (is_ereg(dst_reg) || is_ereg(src_reg)) 583 | EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 584 | else 585 | EMIT2(0x66, 0x89); 586 | goto stx; 587 | case BPF_STX | BPF_MEM | BPF_W: 588 | if (is_ereg(dst_reg) || is_ereg(src_reg)) 589 | EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 590 | else 591 | EMIT1(0x89); 592 | goto stx; 593 | case BPF_STX | BPF_MEM | BPF_DW: 594 | EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 595 | stx: if (is_imm8(insn->off)) 596 | EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 597 | else 598 | EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 599 | insn->off); 600 | break; 601 | 602 | /* LDX: dst_reg = *(__u8*)(src_reg + off) */ 603 | case BPF_LDX | BPF_MEM | BPF_B: 604 | /* emit 'movzx rax, byte ptr [rax + off]' */ 605 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 606 | goto ldx; 607 | case BPF_LDX | BPF_MEM | BPF_H: 608 | /* emit 'movzx rax, word ptr [rax + off]' */ 609 | EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 610 | goto ldx; 611 | case BPF_LDX | BPF_MEM | BPF_W: 612 | /* emit 'mov eax, dword ptr [rax+0x14]' */ 613 | if (is_ereg(dst_reg) || is_ereg(src_reg)) 614 | EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 615 | else 616 | EMIT1(0x8B); 617 | goto ldx; 618 | case BPF_LDX | BPF_MEM | BPF_DW: 619 | /* emit 'mov rax, qword ptr [rax+0x14]' */ 620 | EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 621 | ldx: /* if insn->off == 0 we can save one extra byte, but 622 | * special case of x86 r13 which always needs an offset 623 | * is not worth the hassle 624 | */ 625 | if (is_imm8(insn->off)) 626 | EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); 627 | else 628 | EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), 629 | insn->off); 630 | break; 631 | 632 | /* STX XADD: lock *(__u32*)(dst_reg + off) += src_reg */ 633 | case BPF_STX | BPF_XADD | BPF_W: 634 | /* emit 'lock add dword ptr [rax + off], eax' */ 635 | if (is_ereg(dst_reg) || is_ereg(src_reg)) 636 | EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); 637 | else 638 | EMIT2(0xF0, 0x01); 639 | goto xadd; 640 | case BPF_STX | BPF_XADD | BPF_DW: 641 | EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); 642 | xadd: if (is_imm8(insn->off)) 643 | EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 644 | else 645 | EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 646 | insn->off); 647 | break; 648 | 649 | /* call */ 650 | case BPF_JMP | BPF_CALL: 651 | func = (__u8 *) __bpf_call_base + imm32; 652 | //printf("JIT: func %p, imm32 %p\n", func, imm32); 653 | jmp_offset = func - (image + addrs[i]); 654 | //printf("JIT: jmp_offset %p\n", jmp_offset); 655 | 656 | #if 0 657 | if (!imm32 || !is_simm32(jmp_offset)) { 658 | printf("unsupported bpf func %d addr %p image %p\n", 659 | imm32, func, image); 660 | return -EINVAL; 661 | } 662 | #endif 663 | EMIT1_off32(0xE8, jmp_offset); 664 | //EMIT1_off32(0xE8, imm32); 665 | break; 666 | 667 | /* cond jump */ 668 | case BPF_JMP | BPF_JEQ | BPF_X: 669 | case BPF_JMP | BPF_JNE | BPF_X: 670 | case BPF_JMP | BPF_JGT | BPF_X: 671 | case BPF_JMP | BPF_JGE | BPF_X: 672 | case BPF_JMP | BPF_JSGT | BPF_X: 673 | case BPF_JMP | BPF_JSGE | BPF_X: 674 | /* cmp dst_reg, src_reg */ 675 | EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, 676 | add_2reg(0xC0, dst_reg, src_reg)); 677 | goto emit_cond_jmp; 678 | 679 | case BPF_JMP | BPF_JSET | BPF_X: 680 | /* test dst_reg, src_reg */ 681 | EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, 682 | add_2reg(0xC0, dst_reg, src_reg)); 683 | goto emit_cond_jmp; 684 | 685 | case BPF_JMP | BPF_JSET | BPF_K: 686 | /* test dst_reg, imm32 */ 687 | EMIT1(add_1mod(0x48, dst_reg)); 688 | EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); 689 | goto emit_cond_jmp; 690 | 691 | case BPF_JMP | BPF_JEQ | BPF_K: 692 | case BPF_JMP | BPF_JNE | BPF_K: 693 | case BPF_JMP | BPF_JGT | BPF_K: 694 | case BPF_JMP | BPF_JGE | BPF_K: 695 | case BPF_JMP | BPF_JSGT | BPF_K: 696 | case BPF_JMP | BPF_JSGE | BPF_K: 697 | /* cmp dst_reg, imm8/32 */ 698 | EMIT1(add_1mod(0x48, dst_reg)); 699 | 700 | if (is_imm8(imm32)) 701 | EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); 702 | else 703 | EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); 704 | 705 | emit_cond_jmp: /* convert BPF opcode to x86 */ 706 | switch (BPF_OP(insn->code)) { 707 | case BPF_JEQ: 708 | jmp_cond = X86_JE; 709 | break; 710 | case BPF_JSET: 711 | case BPF_JNE: 712 | jmp_cond = X86_JNE; 713 | break; 714 | case BPF_JGT: 715 | /* GT is unsigned '>', JA in x86 */ 716 | jmp_cond = X86_JA; 717 | break; 718 | case BPF_JGE: 719 | /* GE is unsigned '>=', JAE in x86 */ 720 | jmp_cond = X86_JAE; 721 | break; 722 | case BPF_JSGT: 723 | /* signed '>', GT in x86 */ 724 | jmp_cond = X86_JG; 725 | break; 726 | case BPF_JSGE: 727 | /* signed '>=', GE in x86 */ 728 | jmp_cond = X86_JGE; 729 | break; 730 | default: /* to silence gcc warning */ 731 | return -EFAULT; 732 | } 733 | jmp_offset = addrs[i + insn->off] - addrs[i]; 734 | if (is_imm8(jmp_offset)) { 735 | EMIT2(jmp_cond, jmp_offset); 736 | } else if (is_simm32(jmp_offset)) { 737 | EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); 738 | } else { 739 | printf("cond_jmp gen bug %llx\n", jmp_offset); 740 | return -EFAULT; 741 | } 742 | 743 | break; 744 | 745 | case BPF_JMP | BPF_JA: 746 | jmp_offset = addrs[i + insn->off] - addrs[i]; 747 | if (!jmp_offset) 748 | /* optimize out nop jumps */ 749 | break; 750 | emit_jmp: 751 | if (is_imm8(jmp_offset)) { 752 | EMIT2(0xEB, jmp_offset); 753 | } else if (is_simm32(jmp_offset)) { 754 | EMIT1_off32(0xE9, jmp_offset); 755 | } else { 756 | printf("jmp gen bug %llx\n", jmp_offset); 757 | return -EFAULT; 758 | } 759 | break; 760 | 761 | common_load: ctx->seen_ld_abs = 1; 762 | jmp_offset = func - (image + addrs[i]); 763 | if (!func || !is_simm32(jmp_offset)) { 764 | printf("unsupported bpf func %d addr %p image %p\n", 765 | imm32, func, image); 766 | return -EINVAL; 767 | } 768 | if (BPF_MODE(insn->code) == BPF_ABS) { 769 | /* mov %esi, imm32 */ 770 | EMIT1_off32(0xBE, imm32); 771 | } else { 772 | /* mov %rsi, src_reg */ 773 | EMIT_mov(BPF_REG_2, src_reg); 774 | if (imm32) { 775 | if (is_imm8(imm32)) 776 | /* add %esi, imm8 */ 777 | EMIT3(0x83, 0xC6, imm32); 778 | else 779 | /* add %esi, imm32 */ 780 | EMIT2_off32(0x81, 0xC6, imm32); 781 | } 782 | } 783 | /* skb pointer is in R6 (%rbx), it will be copied into 784 | * %rdi if skb_copy_bits() call is necessary. 785 | * sk_load_* helpers also use %r10 and %r9d. 786 | * See bpf_jit.S 787 | */ 788 | EMIT1_off32(0xE8, jmp_offset); /* call */ 789 | break; 790 | 791 | case BPF_JMP | BPF_EXIT: 792 | if (i != insn_cnt - 1) { 793 | jmp_offset = ctx->cleanup_addr - addrs[i]; 794 | goto emit_jmp; 795 | } 796 | /* update cleanup_addr */ 797 | ctx->cleanup_addr = proglen; 798 | /* mov rbx, qword ptr [rbp-X] */ 799 | EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize); 800 | /* mov r13, qword ptr [rbp-X] */ 801 | EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8); 802 | /* mov r14, qword ptr [rbp-X] */ 803 | EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16); 804 | /* mov r15, qword ptr [rbp-X] */ 805 | EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24); 806 | 807 | EMIT1(0xC9); /* leave */ 808 | EMIT1(0xC3); /* ret */ 809 | break; 810 | 811 | default: 812 | /* By design x64 JIT should support all BPF instructions 813 | * This error will be seen if new instruction was added 814 | * to interpreter, but not to JIT 815 | * or if there is junk in bpf_prog 816 | */ 817 | printf("bpf_jit: unknown opcode %02x\n", insn->code); 818 | return -EINVAL; 819 | } 820 | 821 | ilen = prog - temp; 822 | if (image) { 823 | if (unlikely(proglen + ilen > oldproglen)) { 824 | printf("bpf_jit_compile fatal error\n"); 825 | return -EFAULT; 826 | } 827 | memcpy(image + proglen, temp, ilen); 828 | } 829 | proglen += ilen; 830 | addrs[i] = proglen; 831 | prog = temp; 832 | } 833 | return proglen; 834 | } 835 | 836 | void bpf_jit_compile(struct bpf_prog *prog) 837 | { 838 | } 839 | 840 | void bpf_int_jit_compile(struct bpf_prog *prog) 841 | { 842 | struct bpf_binary_header *header = NULL; 843 | int proglen, oldproglen = 0; 844 | struct jit_context ctx = {}; 845 | __u8 *image = NULL; 846 | int *addrs; 847 | int pass; 848 | int i; 849 | 850 | /* Get filter mode from env*/ 851 | char *mode; 852 | mode = getenv("BPF_JIT"); 853 | if (atoi(mode) == 0) 854 | { 855 | return; 856 | } 857 | 858 | if (atoi(mode) == 1) 859 | { 860 | bpf_jit_enable = 1; 861 | } 862 | 863 | if (atoi(mode) == 2) 864 | { 865 | bpf_jit_enable = 2; 866 | } 867 | 868 | if (!bpf_jit_enable) 869 | return; 870 | 871 | if (!prog || !prog->len) 872 | return; 873 | 874 | addrs = malloc(prog->len * sizeof(*addrs)); 875 | if (!addrs) 876 | return; 877 | 878 | /* Before first pass, make a rough estimation of addrs[] 879 | * each bpf instruction is translated to less than 64 bytes 880 | */ 881 | for (proglen = 0, i = 0; i < prog->len; i++) { 882 | proglen += 64; 883 | addrs[i] = proglen; 884 | } 885 | ctx.cleanup_addr = proglen; 886 | 887 | for (pass = 0; pass < 10; pass++) { 888 | proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 889 | if (proglen <= 0) { 890 | image = NULL; 891 | if (header) 892 | bpf_jit_binary_free(header); 893 | goto out; 894 | } 895 | if (image) { 896 | if (proglen != oldproglen) 897 | printf("bpf_jit: proglen=%d != oldproglen=%d\n", 898 | proglen, oldproglen); 899 | break; 900 | } 901 | if (proglen == oldproglen) { 902 | header = bpf_jit_binary_alloc(proglen, &image, 903 | 1, jit_fill_hole); 904 | if (!header) 905 | goto out; 906 | } 907 | oldproglen = proglen; 908 | } 909 | 910 | if (bpf_jit_enable > 1) 911 | bpf_jit_dump(prog->len, proglen, 0, image); 912 | 913 | if (image) { 914 | size_t size = header->pages * getpagesize(); 915 | mprotect((void*)header, size, PROT_READ | PROT_EXEC); 916 | prog->bpf_func = (void *)image; 917 | prog->jited = 1; 918 | } 919 | out: 920 | free(addrs); 921 | } 922 | 923 | void bpf_jit_free(struct bpf_prog *fp) 924 | { 925 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 926 | struct bpf_binary_header *header = (void *)addr; 927 | 928 | if (!fp->jited) 929 | goto free_filter; 930 | size_t size = header->pages * getpagesize(); 931 | mprotect((void*)addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); 932 | 933 | free_filter: 934 | bpf_prog_unlock_free(fp); 935 | } 936 | --------------------------------------------------------------------------------