├── .clang-format ├── .gitignore ├── .gitmodules ├── ebpf-bolt.bpf.c ├── ebpf-bolt.h ├── Makefile ├── README.md ├── LICENSE └── ebpf-bolt.cc /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ebpf-bolt 2 | ebpf-bolt.bpf.o 3 | ebpf-bolt.skel.h 4 | vmlinux.h 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libbpf"] 2 | path = libbpf 3 | url = https://github.com/libbpf/libbpf.git 4 | -------------------------------------------------------------------------------- /ebpf-bolt.bpf.c: -------------------------------------------------------------------------------- 1 | #include "ebpf-bolt.h" 2 | #include "vmlinux.h" 3 | #include 4 | 5 | struct { 6 | __uint(type, BPF_MAP_TYPE_RINGBUF); 7 | __uint(max_entries, 1024 * 1024 /* 1 MB */); 8 | } rb SEC(".maps"); 9 | 10 | SEC("perf_event") 11 | int lbr_branches(void *ctx) { 12 | struct event *e = bpf_ringbuf_reserve(&rb, sizeof(struct event), 0); 13 | if (!e) 14 | return 0; 15 | 16 | long bytes_written = bpf_read_branch_records( 17 | ctx, e->entries, sizeof(struct perf_branch_entry) * ENTRY_CNT, 0); 18 | 19 | if (bytes_written < 0) { 20 | bpf_ringbuf_discard(e, 0); 21 | return -bytes_written; 22 | } 23 | e->size = bytes_written; 24 | bpf_ringbuf_submit(e, 0); 25 | return 0; 26 | } 27 | 28 | char LICENSE[] SEC("license") = "Dual BSD/GPL"; 29 | -------------------------------------------------------------------------------- /ebpf-bolt.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __EBPF_BOLT_H 3 | #define __EBPF_BOLT_H 4 | 5 | #define ENTRY_CNT 32 6 | #define MAX_CPU_NR 128 7 | 8 | struct branch_flags { 9 | union { 10 | unsigned long long value; 11 | struct { 12 | unsigned long long mispred : 1; 13 | unsigned long long predicted : 1; 14 | unsigned long long in_tx : 1; 15 | unsigned long long abort : 1; 16 | unsigned long long cycles : 16; 17 | unsigned long long type : 4; 18 | unsigned long long spec : 2; 19 | unsigned long long new_type : 4; 20 | unsigned long long priv : 3; 21 | unsigned long long reserved : 31; 22 | }; 23 | }; 24 | }; 25 | 26 | struct event { 27 | struct entry_t { 28 | unsigned long long from, to; 29 | struct branch_flags flags; 30 | } entries[ENTRY_CNT]; 31 | long size; 32 | }; 33 | 34 | #endif /* __EBPF_BOLT_H */ 35 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Based off github.com/lizrice/learning-ebpf 2 | TARGET = ebpf-bolt 3 | ARCH = x86 4 | CXXFLAGS = -O3 -Wall -std=c++20 5 | BPF_OBJ = ${TARGET:=.bpf.o} 6 | 7 | USER_CC = ${TARGET:=.cc} 8 | USER_SKEL = ${TARGET:=.skel.h} 9 | 10 | COMMON_H = ${TARGET:=.h} 11 | 12 | app: $(TARGET) $(BPF_OBJ) 13 | .PHONY: app 14 | 15 | $(TARGET): $(USER_CC) $(USER_SKEL) $(COMMON_H) 16 | $(CXX) $(CXXFLAGS) -o $(TARGET) $(USER_CC) \ 17 | -L./libbpf/src -l:libbpf.a -lelf -lz \ 18 | -I${CURDIR}/libbpf/install/include -I${CURDIR}/libbpf/include 19 | 20 | %.bpf.o: %.bpf.c vmlinux.h $(COMMON_H) 21 | clang \ 22 | -I${CURDIR}/libbpf/install/include \ 23 | -target bpf \ 24 | -D __BPF_TRACING__ \ 25 | -D __TARGET_ARCH_$(ARCH) \ 26 | -Wall \ 27 | -O2 -g -o $@ -c $< 28 | llvm-strip -g $@ 29 | 30 | $(USER_SKEL): $(BPF_OBJ) 31 | bpftool gen skeleton $< > $@ 32 | 33 | vmlinux.h: 34 | bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h 35 | 36 | clean: 37 | - rm $(BPF_OBJ) 38 | - rm $(TARGET) 39 | - rm $(USER_SKEL) 40 | - rm vmlinux.h 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ebpf-bolt: eBPF tool to collect BOLT profile 2 | Collect and aggregate LBR samples using eBPF with minimal profiling overhead. 3 | 4 | Output pre-aggregated BOLT profile suitable for optimizing the profiled binary or converting to other profile formats (fdata or YAML) that can be used with a different binary. 5 | 6 | This tool enables quicker profiling + optimization turnaround time thanks to processing LBR samples on the fly and producing pre-aggregated profile at the end of profiling step, ready to be directly consumed by BOLT. 7 | 8 | ## Limitations 9 | 1. Collecting the profile for shared libraries is not yet supported (perf2bolt limitation). 10 | 2. PIE support is experimental. 11 | 3. Only ELF64 (64-bit) binaries are supported. ELF32 (32-bit) binaries are not supported (BOLT limitation). 12 | 13 | ## Prerequisites 14 | This tool makes use of LBR for 0-overhead sampling and [eBPF CO-RE](https://docs.kernel.org/bpf/libbpf/libbpf_overview.html#bpf-co-re-compile-once-run-everywhere) for portability. 15 | - CPU: LBR/branch stack sampling support 16 | - Intel Last Branch Record (LBR): since Pentium 4 Netburst, including all Atom CPUs, Linux 2.6.35. 17 | - AMD LBRv2: since Zen4, Linux v6.1. 18 | - AMD Branch Sampling (BRS): since Zen3 for EPYC, Linux 5.19. Untested. 19 | - ARM Branch Record Buffer Extensions (BRBE): since v9.2-A (Cortex-X4, A720, and A520), Linux v6.1. 20 | - Kernel: Linux 4.16 with `CONFIG_DEBUG_INFO_BTF=y` for BPF CO-RE, 21 | - Compiler: Clang 10 or GCC 12 with BPF target and CO-RE relocations support. 22 | - libelf: 23 | - CentOS: `dnf install elfutils-libelf-devel` 24 | - Ubuntu: `apt install libelf-dev` 25 | 26 | ## Build instructions 27 | 1. Clone this repository with libbpf submodule: 28 | ``` 29 | git clone --recurse-submodules https://github.com/aaupov/ebpf-bolt 30 | ``` 31 | 2. Build libbpf: 32 | ``` 33 | cd ebpf-bolt 34 | cd libbpf/src 35 | PREFIX=../install make install 36 | ``` 37 | 3. Build ebpf-bolt tool: 38 | ``` 39 | cd .. 40 | make 41 | ``` 42 | 4. Set tracing capabilities to allow non-root operation: 43 | ``` 44 | sudo setcap "cap_perfmon=+ep cap_bpf=+ep" ebpf-bolt 45 | ``` 46 | 47 | ## Usage 48 | 49 | ``` 50 | $ ./ebpf-bolt -h 51 | Usage: ebpf-bolt [OPTION...] 52 | Collect pre-aggregated BOLT profile. 53 | 54 | USAGE: ebpf-bolt [-f FREQUENCY (max)] -p PID [duration (10s)] 55 | 56 | -f, --frequency=FREQUENCY Sample with a certain frequency 57 | -p, --pid=PID Sample on this PID only 58 | -v, --verbose Verbose debug output 59 | -?, --help Give this help list 60 | --usage Give a short usage message 61 | -V, --version Print program version 62 | 63 | Mandatory or optional arguments to long options are also mandatory or optional 64 | for any corresponding short options. 65 | 66 | Report bugs to https://github.com/aaupov/ebpf-bolt/issues. 67 | ``` 68 | 69 | Example usage: 70 | ``` 71 | ./ebpf-bolt -p `pgrep app` > preagg.data 72 | llvm-bolt app --pa -p preagg.data ... 73 | ``` 74 | Note the `--pa` flag instructing BOLT to read pre-aggregated profile. 75 | 76 | ## Showcases 77 | 78 | ### Profiling, perf record vs ebpf-bolt 79 | Collecting the profile for Clang for 10 seconds with sampling frequency of 5000 Hz, average of 5 runs: 80 | | | Samples | User time | System time | CPU usage | Max RSS | File size | 81 | | --------- | ------: | --------: | ----------: | --------: | ------: | --------: | 82 | | perf record | 49304±25 | 0.40±0.02s | 0.27±0.01s | 5.4±0.5% | 96.8±0.2MB | 39.2MB | 83 | | ebpf-bolt | 49306±94 | 0.56±0.03s | 0.18±0.01s | 7.0±0.0% | 17.7±0.1MB | 3.4MB | 84 | | | **=** | **+0.16s** | **-0.09s** | **+1.6pp** | **-81.7%** | **-91.3%** | 85 | 86 | Summary: 87 | - Profiling with ebpf-bolt still has minimal overhead in terms of CPU usage, similar to `perf record`. 88 | - Peak memory usage during profiling is reduced significantly (96.8MB -> 17.7MB, -82%). 89 | - ebpf-bolt collects the same number of LBR samples, but produces a much 90 | smaller output file (39.2MB -> 3.4MB, -91%). 91 | - Slightly higher user time (+0.16s) in ebpf-bolt compared to perf is due to 92 | parsing and aggregating LBR samples, but these steps are eliminated from 93 | profile preprocessing in BOLT (-6.84s), which saves time overall. 94 | 95 | ### BOLT processing time, perf.data vs pre-aggregated profile 96 | When perf profile is processed by BOLT, it's parsed using `perf script` commands. 97 | No extra processing is needed for pre-aggregated profile produced by ebpf-bolt. 98 | 99 | | | Pre-process profile | Process profile | Total rewrite time | 100 | | --------------- | ------------------: | --------------: | -----------------: | 101 | | perf.data | 7.26s | 7.29s | 140.58s | 102 | | pre-aggregated | 0.42s | 6.38s | 132.43s | 103 | | | **-6.84s** | **-0.91s** | **-8.15s** | 104 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /ebpf-bolt.cc: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | // 3 | // Based on runqlen(8) from BCC by Brendan Gregg. 4 | // Based on runqlen from iovisor/BCC by Wenbo Zhang. 5 | // Amir Ayupov 6 | 7 | #include "ebpf-bolt.h" 8 | #include "ebpf-bolt.skel.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | struct env { 25 | time_t duration; 26 | bool max_freq; 27 | int freq; 28 | int pid; 29 | bool verbose; 30 | } env = {.duration = 10, .max_freq = true, .freq = 99, .pid = -1, .verbose = 0}; 31 | 32 | static volatile bool exiting; 33 | 34 | const char *argp_program_version = "ebpf-bolt 0.3"; 35 | const char *argp_program_bug_address = 36 | "https://github.com/aaupov/ebpf-bolt/issues"; 37 | const char argp_program_doc[] = 38 | "Collect pre-aggregated BOLT profile.\n\n" 39 | "USAGE: ebpf-bolt [-f FREQUENCY (max)] -p PID [duration (10s)]\n"; 40 | 41 | static const struct argp_option opts[] = { 42 | {"pid", 'p', "PID", 0, "Sample on this PID only"}, 43 | {"frequency", 'f', "FREQUENCY", 0, 44 | "Sample with a certain frequency, integer or `max'"}, 45 | {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, 46 | {"verbose", 'v', NULL, 0, "Verbose debug output"}, 47 | {}, 48 | }; 49 | 50 | int read_max_sample_rate() { 51 | int max_freq; 52 | int fd = open("/proc/sys/kernel/perf_event_max_sample_rate", O_RDONLY); 53 | fscanf(fdopen(fd, "r"), "%u", &max_freq); 54 | close(fd); 55 | return max_freq; 56 | } 57 | 58 | static error_t parse_arg(int key, char *arg, struct argp_state *state) { 59 | int max_freq = read_max_sample_rate(); 60 | 61 | static int pos_args; 62 | 63 | switch (key) { 64 | case 'h': 65 | argp_state_help(state, stderr, ARGP_HELP_STD_HELP); 66 | break; 67 | case 'v': 68 | env.verbose = true; 69 | break; 70 | case 'p': 71 | errno = 0; 72 | env.pid = strtol(arg, NULL, 10); 73 | if (errno || env.pid <= 0) { 74 | fprintf(stderr, "Invalid PID: %s\n", arg); 75 | argp_usage(state); 76 | } 77 | break; 78 | case 'f': { 79 | errno = 0; 80 | if (strncmp(arg, "max", strlen("max")) == 0) { 81 | // default behavior, do nothing 82 | } else { 83 | env.max_freq = false; 84 | env.freq = strtol(arg, NULL, 10); 85 | } 86 | if (errno || env.freq <= 0 || env.freq > max_freq) { 87 | fprintf(stderr, "Invalid freq: %s", arg); 88 | if (env.freq > max_freq) 89 | fprintf(stderr, ": exceeds max_sample_rate %d", max_freq); 90 | fprintf(stderr, "\n"); 91 | argp_usage(state); 92 | } 93 | } break; 94 | case ARGP_KEY_ARG: 95 | errno = 0; 96 | if (pos_args == 0) { 97 | env.duration = strtol(arg, NULL, 10); 98 | if (errno) { 99 | fprintf(stderr, "invalid internal\n"); 100 | argp_usage(state); 101 | } 102 | } else { 103 | fprintf(stderr, "unrecognized positional argument: %s\n", arg); 104 | argp_usage(state); 105 | } 106 | pos_args++; 107 | break; 108 | default: 109 | return ARGP_ERR_UNKNOWN; 110 | } 111 | if (env.pid == -1) { 112 | fprintf(stderr, "Please specify PID\n"); 113 | argp_usage(state); 114 | } 115 | if (env.max_freq) { 116 | env.freq = max_freq; 117 | if (env.verbose) 118 | fprintf(stderr, "Using max_sample_rate from /proc/sys: %d\n", env.freq); 119 | } 120 | return 0; 121 | } 122 | 123 | static int nr_cpus; 124 | 125 | static int open_and_attach_perf_event(int freq, struct bpf_program *prog, 126 | struct bpf_link *links[]) { 127 | struct perf_event_attr attr = { 128 | .type = PERF_TYPE_HARDWARE, 129 | .config = PERF_COUNT_HW_CPU_CYCLES, 130 | .sample_freq = (unsigned)freq, 131 | .sample_type = PERF_SAMPLE_BRANCH_STACK, 132 | .freq = 1, 133 | .branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY, 134 | }; 135 | attr.size = sizeof(attr); 136 | int i, fd; 137 | 138 | for (i = 0; i < nr_cpus; i++) { 139 | fd = syscall(__NR_perf_event_open, &attr, env.pid, i, -1, 0); 140 | if (fd < 0) { 141 | /* Ignore CPU that is offline */ 142 | if (errno == ENODEV) 143 | continue; 144 | fprintf(stderr, "failed to init perf sampling: %s\n", strerror(errno)); 145 | return -1; 146 | } 147 | links[i] = bpf_program__attach_perf_event(prog, fd); 148 | if (!links[i]) { 149 | fprintf(stderr, "failed to attach perf event on cpu: %d\n", i); 150 | close(fd); 151 | return -1; 152 | } 153 | } 154 | 155 | return 0; 156 | } 157 | 158 | void cleanup_core_btf(struct bpf_object_open_opts *opts) { 159 | if (!opts) 160 | return; 161 | 162 | if (!opts->btf_custom_path) 163 | return; 164 | 165 | unlink(opts->btf_custom_path); 166 | free((void *)opts->btf_custom_path); 167 | } 168 | 169 | struct trace_t { 170 | uint64_t branch, from, to; 171 | bool operator<(const trace_t &O) const { 172 | return std::tie(branch, from, to) < std::tie(O.branch, O.from, O.to); 173 | } 174 | }; 175 | 176 | std::map traces; 177 | 178 | int handle_event(void *ctx, void *data, size_t data_sz) { 179 | auto &traces = *static_cast *>(ctx); 180 | const struct event *e = reinterpret_cast(data); 181 | long entries = e->size / sizeof(event::entry_t); 182 | uint64_t next_branch = -1ULL; 183 | for (int i = 0; i < entries; ++i) { 184 | trace_t trace{e->entries[i].from, e->entries[i].to, next_branch}; 185 | ++traces[trace]; 186 | next_branch = e->entries[i].from; 187 | } 188 | return 0; 189 | } 190 | 191 | void print_aggregated(unsigned long long base_addr, unsigned long long end_addr) { 192 | auto filter_addr = [&](unsigned long long addr) { 193 | if (addr >= base_addr && addr < end_addr) 194 | return addr - base_addr; // PIE, offset from base address 195 | else if (addr < base_addr) 196 | return 0ULL; // avoid conflicting addresses 197 | return addr; // external address, don't care 198 | }; 199 | fprintf(stderr, "%ld traces\n", traces.size()); 200 | for (auto &&[key, cnt] : traces) 201 | printf("T %llx %llx %llx %lu\n", filter_addr(key.branch), 202 | filter_addr(key.from), filter_addr(key.to), cnt); 203 | } 204 | 205 | static int libbpf_print_fn(enum libbpf_print_level level, const char *format, 206 | va_list args) { 207 | if (level == LIBBPF_DEBUG && !env.verbose) 208 | return 0; 209 | return vfprintf(stderr, format, args); 210 | } 211 | 212 | static void sig_handler(int sig) 213 | { 214 | exiting = true; 215 | } 216 | 217 | static int64_t diff_s(const struct timespec &start, 218 | const struct timespec &end) { 219 | time_t seconds = end.tv_sec - start.tv_sec; 220 | if (end.tv_nsec < start.tv_nsec) 221 | --seconds; 222 | return seconds; 223 | } 224 | 225 | std::pair get_base_address(int pid) { 226 | std::string maps_path = "/proc/" + std::to_string(pid) + "/maps"; 227 | std::ifstream maps_file(maps_path); 228 | if (!maps_file.is_open()) { 229 | fprintf(stderr, "Failed to open %s\n", maps_path.c_str()); 230 | exit(1); 231 | } 232 | uint64_t base_addr = 0; 233 | uint64_t end_addr = 0; 234 | std::string line; 235 | while (std::getline(maps_file, line)) { 236 | std::istringstream iss(line); 237 | std::string address_range, perms, offset, dev, inode, pathname; 238 | if (!(iss >> address_range >> perms >> offset >> dev >> inode)) 239 | continue; 240 | std::getline(iss, pathname); // get the rest of the line 241 | // When looking for the end address, check the first executable mapping 242 | // (r-xp and inode != 0) 243 | if (base_addr && (perms.find('x') == std::string::npos || inode == "0")) 244 | continue; 245 | // Assuming the first mapping belongs to the process... 246 | size_t dash = address_range.find('-'); 247 | if (dash == std::string::npos) { 248 | fprintf(stderr, "Invalid address range format: %s\n", address_range.c_str()); 249 | exit(1); // No dash found in address range 250 | } 251 | std::string base_addr_str = address_range.substr(0, dash); 252 | std::string end_addr_str = address_range.substr(dash + 1); 253 | if (!base_addr) { 254 | base_addr = std::stoull(base_addr_str, nullptr, 16); 255 | continue; 256 | } else { 257 | end_addr = std::stoull(end_addr_str, nullptr, 16); 258 | break; 259 | } 260 | } 261 | if (base_addr && end_addr) 262 | return {base_addr, end_addr}; 263 | fprintf(stderr, "No base address found for %d\n", pid); 264 | exit(1); // No base address found 265 | } 266 | 267 | bool is_pie_executable(int pid) { 268 | std::string exe_path = "/proc/" + std::to_string(pid) + "/exe"; 269 | struct stat st; 270 | if (lstat(exe_path.c_str(), &st) == -1) { 271 | fprintf(stderr, "Failed to stat %s\n", exe_path.c_str()); 272 | exit(1); 273 | } 274 | // Open the ELF file 275 | FILE *f = fopen(exe_path.c_str(), "rb"); 276 | if (!f) { 277 | fprintf(stderr, "Failed to open %s\n", exe_path.c_str()); 278 | exit(1); 279 | } 280 | unsigned char e_ident[EI_NIDENT]; 281 | if (fread(e_ident, 1, EI_NIDENT, f) != EI_NIDENT) { 282 | fclose(f); 283 | fprintf(stderr, "Failed to read e_ident from %s\n", exe_path.c_str()); 284 | exit(1); 285 | } 286 | if (e_ident[EI_CLASS] != ELFCLASS64) { 287 | fclose(f); 288 | fprintf(stderr, "Only ELF64 is supported (BOLT limitation)\n"); 289 | exit(1); 290 | } 291 | fseek(f, 0, SEEK_SET); 292 | Elf64_Ehdr ehdr; 293 | if (fread(&ehdr, 1, sizeof(ehdr), f) != sizeof(ehdr)) { 294 | fclose(f); 295 | fprintf(stderr, "Failed to read ehdr from %s\n", exe_path.c_str()); 296 | exit(1); 297 | } 298 | if (ehdr.e_type != ET_DYN) { 299 | fclose(f); 300 | if (env.verbose) 301 | fprintf(stderr, "non-ET_DYN\n"); 302 | return false; 303 | } 304 | 305 | // Find dynamic section 306 | fseek(f, ehdr.e_phoff, SEEK_SET); 307 | for (int i = 0; i < ehdr.e_phnum; ++i) { 308 | Elf64_Phdr phdr; 309 | if (fread(&phdr, 1, sizeof(phdr), f) != sizeof(phdr)) break; 310 | if (phdr.p_type != PT_DYNAMIC) 311 | continue; 312 | size_t dyn_count = phdr.p_filesz / sizeof(Elf64_Dyn); 313 | fseek(f, phdr.p_offset, SEEK_SET); 314 | for (size_t j = 0; j < dyn_count; ++j) { 315 | Elf64_Dyn dyn; 316 | if (fread(&dyn, 1, sizeof(dyn), f) != sizeof(dyn)) break; 317 | if (dyn.d_tag != DT_FLAGS_1) 318 | continue; 319 | if (dyn.d_un.d_val & DF_1_PIE) { 320 | fclose(f); 321 | if (env.verbose) 322 | fprintf(stderr, "DF_1_PIE\n"); 323 | return true; 324 | } else { 325 | fclose(f); 326 | if (env.verbose) 327 | fprintf(stderr, "non-DF_1_PIE\n"); 328 | return false; 329 | } 330 | } 331 | } 332 | // If ET_DYN but no DT_FLAGS_1, check executable bit 333 | if (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) { 334 | fclose(f); 335 | if (env.verbose) 336 | fprintf(stderr, "ET_DYN executable with no DT_FLAGS_1\n"); 337 | return true; 338 | } 339 | fclose(f); 340 | if (env.verbose) 341 | fprintf(stderr, "regular shared object\n"); 342 | return false; 343 | } 344 | 345 | int main(int argc, char **argv) { 346 | int i; 347 | LIBBPF_OPTS(bpf_object_open_opts, open_opts); 348 | static const struct argp argp = { 349 | .options = opts, 350 | .parser = parse_arg, 351 | .doc = argp_program_doc, 352 | }; 353 | struct bpf_link *links[MAX_CPU_NR] = {}; 354 | struct ring_buffer *rb = NULL; 355 | 356 | struct ebpf_bolt_bpf *skel; 357 | int err = 0; 358 | err = argp_parse(&argp, argc, argv, 0, NULL, NULL); 359 | if (err) 360 | return err; 361 | 362 | // PIE support: check if PIE and get base address if so 363 | bool is_pie = is_pie_executable(env.pid); 364 | unsigned long long base_addr = 0; 365 | unsigned long long end_addr = 0; 366 | if (is_pie) { 367 | fprintf(stderr, "PIE executable\n"); 368 | std::tie(base_addr, end_addr) = get_base_address(env.pid); 369 | if (env.verbose) 370 | fprintf(stderr, "Base LOAD address for PID %d: 0x%llx\n", env.pid, 371 | base_addr); 372 | } 373 | 374 | nr_cpus = libbpf_num_possible_cpus(); 375 | if (nr_cpus < 0) { 376 | fprintf(stderr, "failed to get # of possible cpus: '%s'!\n", 377 | strerror(-nr_cpus)); 378 | return 1; 379 | } 380 | if (nr_cpus > MAX_CPU_NR) { 381 | fprintf(stderr, "the number of cpu cores is too big, please " 382 | "increase MAX_CPU_NR's value and recompile"); 383 | return 1; 384 | } 385 | 386 | /* Set up libbpf errors and debug info callback */ 387 | libbpf_set_print(libbpf_print_fn); 388 | 389 | skel = ebpf_bolt_bpf__open_opts(&open_opts); 390 | if (!skel) { 391 | fprintf(stderr, "failed to open BPF object\n"); 392 | return 1; 393 | } 394 | err = ebpf_bolt_bpf__load(skel); 395 | if (err) { 396 | fprintf(stderr, "failed to load BPF object: %d\n", err); 397 | goto cleanup; 398 | } 399 | 400 | err = open_and_attach_perf_event(env.freq, skel->progs.lbr_branches, links); 401 | if (err) 402 | goto cleanup; 403 | 404 | /* Set up ring buffer polling */ 405 | rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, &traces, 406 | NULL); 407 | if (!rb) { 408 | err = -1; 409 | fprintf(stderr, "Failed to create ring buffer\n"); 410 | goto cleanup; 411 | } 412 | 413 | if (env.verbose) 414 | fprintf(stderr, "Sampling pid %d for %ld s... Hit Ctrl-C to end.\n", 415 | env.pid, env.duration); 416 | 417 | signal(SIGINT, sig_handler); 418 | 419 | struct timespec start_ts, curr_ts; 420 | clock_gettime(CLOCK_MONOTONIC, &start_ts); 421 | 422 | while (1) { 423 | err = ring_buffer__poll(rb, 1000 /* timeout, ms */); 424 | /* Ctrl-C will cause -EINTR */ 425 | if (err == -EINTR) { 426 | err = 0; 427 | break; 428 | } else if (err < 0) { 429 | fprintf(stderr, "Error polling ring buffer: %s\n", strerror(-err)); 430 | goto cleanup; 431 | } 432 | clock_gettime(CLOCK_MONOTONIC, &curr_ts); 433 | if (diff_s(start_ts, curr_ts) >= env.duration) 434 | break; 435 | if (exiting) 436 | break; 437 | } 438 | // Read maps and print aggregated data 439 | print_aggregated(base_addr, end_addr); 440 | cleanup: 441 | for (i = 0; i < nr_cpus; i++) 442 | bpf_link__destroy(links[i]); 443 | ring_buffer__free(rb); 444 | ebpf_bolt_bpf__destroy(skel); 445 | cleanup_core_btf(&open_opts); 446 | 447 | return err; 448 | } 449 | --------------------------------------------------------------------------------