├── .asf.yaml ├── .gitignore ├── .gitmodules ├── README.md ├── docs └── en │ └── latest │ └── developer-guide.md ├── helpers ├── maps.bpf.h ├── trace_helpers.c ├── trace_helpers.h ├── uprobe_helpers.c └── uprobe_helpers.h └── profiler ├── Makefile ├── lua_stacks_map.cpp ├── lua_stacks_map.h ├── lua_state.h ├── profile.bpf.c ├── profile.cpp ├── profile.h ├── stack_printer.cpp └── stack_printer.h /.asf.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | github: 19 | description: Profiler for API Gateway Apache APISIX 20 | homepage: https://apisix.apache.org/ 21 | labels: 22 | - api-gateway 23 | - cloud-native 24 | - nginx 25 | - lua 26 | - luajit 27 | - apigateway 28 | - microservices 29 | - api 30 | - apis 31 | - prof 32 | - profiler 33 | - api-management 34 | - apisix 35 | 36 | 37 | enabled_merge_buttons: 38 | squash: true 39 | # **WARNING**: rebase should only be used 40 | # when backport multiple commits to the `release/xx` branch 41 | rebase: true 42 | merge: false 43 | 44 | protected_branches: 45 | main: 46 | required_pull_request_reviews: 47 | dismiss_stale_reviews: true 48 | require_code_owner_reviews: true 49 | required_approving_review_count: 2 50 | 51 | 52 | notifications: 53 | commits: notifications@apisix.apache.org 54 | issues: notifications@apisix.apache.org 55 | pullrequests: notifications@apisix.apache.org 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | profiler/.output/* 2 | profiler/*.o 3 | profiler/profile 4 | tools/ 5 | vmlinux/ 6 | profiler/vmlinux.tar 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libbpf"] 2 | path = libbpf 3 | url = https://github.com/libbpf/libbpf.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # apisix-profiler 2 | -------------------------------------------------------------------------------- /docs/en/latest/developer-guide.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Developer Guide 3 | --- 4 | 5 | 23 | 24 | ## Overview 25 | 26 | This documentation explains how to develop this project. 27 | 28 | ## Prerequisites 29 | 30 | You may need `clang`, `libelf` and `zlib` to build the project, package names may vary across distros. 31 | 32 | On `Ubuntu/Debian`, you need: 33 | 34 | ```sh 35 | $ apt install clang libelf1 libelf-dev zlib1g-dev 36 | ``` 37 | 38 | On `CentOS/Fedora`, you need: 39 | 40 | ```sh 41 | $ dnf install clang elfutils-libelf elfutils-libelf-devel zlib-devel 42 | ``` 43 | 44 | ## How it works 45 | 46 | First, the eBPF program use `uprobe` to attach to `libluajit.so` get the `lua_State` pointer: 47 | 48 | bpftools/profile_nginx_lua/profile.bpf.c 49 | ```c 50 | static int probe_entry_lua(struct pt_regs *ctx) 51 | { 52 | if (!PT_REGS_PARM1(ctx)) 53 | return 0; 54 | 55 | __u64 pid_tgid = bpf_get_current_pid_tgid(); 56 | __u32 pid = pid_tgid >> 32; 57 | __u32 tid = (__u32)pid_tgid; 58 | struct lua_stack_event event = {}; 59 | 60 | if (targ_pid != -1 && targ_pid != pid) 61 | return 0; 62 | event.pid = pid; 63 | event.L = (void *)PT_REGS_PARM1(ctx); 64 | bpf_map_update_elem(&lua_events, &tid, &event, BPF_ANY); 65 | return 0; 66 | } 67 | ``` 68 | 69 | To get stack frame of lua, it uses a loop to backtrace the lua vm stack and find all information of functions, see the `fix_lua_stack` function: 70 | 71 | ```c 72 | cTValue *frame, *nextframe, *bot = tvref(BPF_PROBE_READ_USER(L, stack)) + LJ_FR2; 73 | int i = 0; 74 | frame = nextframe = BPF_PROBE_READ_USER(L, base) - 1; 75 | /* Traverse frames backwards. */ 76 | // for the ebpf verifier insns (limit 1000000), we need to limit the max loop times to 15 77 | for (; i < 15 && frame > bot; i++) 78 | { 79 | if (frame_gc(frame) == obj2gco(L)) 80 | { 81 | level++; /* Skip dummy frames. See lj_err_optype_call(). */ 82 | } 83 | if (level-- == 0) 84 | { 85 | level++; 86 | /* Level found. */ 87 | if (lua_get_funcdata(ctx, frame, eventp, count) != 0) 88 | { 89 | continue; 90 | } 91 | count++; 92 | } 93 | nextframe = frame; 94 | if (frame_islua(frame)) 95 | { 96 | frame = frame_prevl(frame); 97 | } 98 | else 99 | { 100 | if (frame_isvarg(frame)) 101 | level++; /* Skip vararg pseudo-frame. */ 102 | frame = frame_prevd(frame); 103 | } 104 | } 105 | ``` 106 | 107 | After that, it gets the function data and send the backtrace to user space: 108 | 109 | ```c 110 | static inline int lua_get_funcdata(struct bpf_perf_event_data *ctx, cTValue *frame, struct lua_stack_event *eventp, int level) 111 | { 112 | if (!frame) 113 | return -1; 114 | GCfunc *fn = frame_func(frame); 115 | if (!fn) 116 | return -1; 117 | if (isluafunc(fn)) 118 | { 119 | eventp->type = FUNC_TYPE_LUA; 120 | GCproto *pt = funcproto(fn); 121 | if (!pt) 122 | return -1; 123 | eventp->ffid = BPF_PROBE_READ_USER(pt, firstline); 124 | GCstr *name = proto_chunkname(pt); /* GCstr *name */ 125 | const char *src = strdata(name); 126 | if (!src) 127 | return -1; 128 | bpf_probe_read_user_str(eventp->name, sizeof(eventp->name), src); 129 | bpf_printk("level= %d, fn_name=%s\n", level, eventp->name); 130 | } 131 | else if (iscfunc(fn)) 132 | { 133 | eventp->type = FUNC_TYPE_C; 134 | eventp->funcp = BPF_PROBE_READ_USER(fn, c.f); 135 | } 136 | else if (isffunc(fn)) 137 | { 138 | eventp->type = FUNC_TYPE_F; 139 | eventp->ffid = BPF_PROBE_READ_USER(fn, c.ffid); 140 | } 141 | eventp->level = level; 142 | bpf_perf_event_output(ctx, &lua_event_output, BPF_F_CURRENT_CPU, eventp, sizeof(*eventp)); 143 | return 0; 144 | } 145 | ``` 146 | 147 | In user space, it will use the `user_stack_id` to mix the lua stack with the original user and kernel stack: 148 | 149 | bpftools/profile_nginx_lua/profile.c: print_fold_user_stack_with_lua 150 | ```c 151 | const struct lua_stack_event* eventp = &(lua_bt->stack[count]); 152 | if (eventp->type == FUNC_TYPE_LUA) 153 | { 154 | if (eventp->ffid) { 155 | printf(";L:%s:%d", eventp->name, eventp->ffid); 156 | } else { 157 | printf(";L:%s", eventp->name); 158 | } 159 | } 160 | else if (eventp->type == FUNC_TYPE_C) 161 | { 162 | sym = syms__map_addr(syms, (unsigned long)eventp->funcp); 163 | if (sym) 164 | { 165 | printf(";C:%s", sym ? sym->name : "[unknown]"); 166 | } 167 | } 168 | else if (eventp->type == FUNC_TYPE_F) 169 | { 170 | printf(";builtin#%d", eventp->ffid); 171 | } 172 | ``` 173 | 174 | If the lua stack output `user_stack_id` matches the original `user_stack_id`, this means the stack is a lua stack. Then, the program replace the `[unknown]` function whose uip insides the luajit vm function range with our lua stack. This may not be totally correct, but it works for now. After printing the stack, you may use the [FlameGraph](https://github.com/brendangregg/FlameGraph) tool to generate the flame graph for `APISIX`. 175 | 176 | ## Test 177 | 178 | Run `make test` in the test directory. 179 | -------------------------------------------------------------------------------- /helpers/maps.bpf.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | // Copyright (c) 2020 Anton Protopopov 3 | #ifndef __MAPS_BPF_H 4 | #define __MAPS_BPF_H 5 | 6 | #include 7 | #include 8 | 9 | static __always_inline void * 10 | bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) 11 | { 12 | void *val; 13 | long err; 14 | 15 | val = bpf_map_lookup_elem(map, key); 16 | if (val) 17 | return val; 18 | 19 | err = bpf_map_update_elem(map, key, init, BPF_NOEXIST); 20 | if (err && err != -EEXIST) 21 | return 0; 22 | 23 | return bpf_map_lookup_elem(map, key); 24 | } 25 | 26 | #endif /* __MAPS_BPF_H */ 27 | -------------------------------------------------------------------------------- /helpers/trace_helpers.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | // Copyright (c) 2020 Wenbo Zhang 3 | // 4 | // Based on ksyms improvements from Andrii Nakryiko, add more helpers. 5 | // 28-Feb-2020 Wenbo Zhang Created this. 6 | #ifndef _GNU_SOURCE 7 | #define _GNU_SOURCE 8 | #endif 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "trace_helpers.h" 23 | #include "uprobe_helpers.h" 24 | 25 | #define min(x, y) ({ \ 26 | typeof(x) _min1 = (x); \ 27 | typeof(y) _min2 = (y); \ 28 | (void) (&_min1 == &_min2); \ 29 | _min1 < _min2 ? _min1 : _min2; }) 30 | 31 | #define DISK_NAME_LEN 32 32 | 33 | #define MINORBITS 20 34 | #define MINORMASK ((1U << MINORBITS) - 1) 35 | 36 | #define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi)) 37 | 38 | struct ksyms { 39 | struct ksym *syms; 40 | int syms_sz; 41 | int syms_cap; 42 | char *strs; 43 | int strs_sz; 44 | int strs_cap; 45 | }; 46 | 47 | static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, unsigned long addr) 48 | { 49 | size_t new_cap, name_len = strlen(name) + 1; 50 | struct ksym *ksym; 51 | void *tmp; 52 | 53 | if (ksyms->strs_sz + name_len > ksyms->strs_cap) { 54 | new_cap = ksyms->strs_cap * 4 / 3; 55 | if (new_cap < ksyms->strs_sz + name_len) 56 | new_cap = ksyms->strs_sz + name_len; 57 | if (new_cap < 1024) 58 | new_cap = 1024; 59 | tmp = realloc(ksyms->strs, new_cap); 60 | if (!tmp) 61 | return -1; 62 | ksyms->strs = tmp; 63 | ksyms->strs_cap = new_cap; 64 | } 65 | if (ksyms->syms_sz + 1 > ksyms->syms_cap) { 66 | new_cap = ksyms->syms_cap * 4 / 3; 67 | if (new_cap < 1024) 68 | new_cap = 1024; 69 | tmp = realloc(ksyms->syms, sizeof(*ksyms->syms) * new_cap); 70 | if (!tmp) 71 | return -1; 72 | ksyms->syms = tmp; 73 | ksyms->syms_cap = new_cap; 74 | } 75 | 76 | ksym = &ksyms->syms[ksyms->syms_sz]; 77 | /* while constructing, re-use pointer as just a plain offset */ 78 | ksym->name = (void *)(unsigned long)ksyms->strs_sz; 79 | ksym->addr = addr; 80 | 81 | memcpy(ksyms->strs + ksyms->strs_sz, name, name_len); 82 | ksyms->strs_sz += name_len; 83 | ksyms->syms_sz++; 84 | 85 | return 0; 86 | } 87 | 88 | static int ksym_cmp(const void *p1, const void *p2) 89 | { 90 | const struct ksym *s1 = p1, *s2 = p2; 91 | 92 | if (s1->addr == s2->addr) 93 | return strcmp(s1->name, s2->name); 94 | return s1->addr < s2->addr ? -1 : 1; 95 | } 96 | 97 | struct ksyms *ksyms__load(void) 98 | { 99 | char sym_type, sym_name[256]; 100 | struct ksyms *ksyms; 101 | unsigned long sym_addr; 102 | int i, ret; 103 | FILE *f; 104 | 105 | f = fopen("/proc/kallsyms", "r"); 106 | if (!f) 107 | return NULL; 108 | 109 | ksyms = calloc(1, sizeof(*ksyms)); 110 | if (!ksyms) 111 | goto err_out; 112 | 113 | while (true) { 114 | ret = fscanf(f, "%lx %c %s%*[^\n]\n", 115 | &sym_addr, &sym_type, sym_name); 116 | if (ret == EOF && feof(f)) 117 | break; 118 | if (ret != 3) 119 | goto err_out; 120 | if (ksyms__add_symbol(ksyms, sym_name, sym_addr)) 121 | goto err_out; 122 | } 123 | 124 | /* now when strings are finalized, adjust pointers properly */ 125 | for (i = 0; i < ksyms->syms_sz; i++) 126 | ksyms->syms[i].name += (unsigned long)ksyms->strs; 127 | 128 | qsort(ksyms->syms, ksyms->syms_sz, sizeof(*ksyms->syms), ksym_cmp); 129 | 130 | fclose(f); 131 | return ksyms; 132 | 133 | err_out: 134 | ksyms__free(ksyms); 135 | fclose(f); 136 | return NULL; 137 | } 138 | 139 | void ksyms__free(struct ksyms *ksyms) 140 | { 141 | if (!ksyms) 142 | return; 143 | 144 | free(ksyms->syms); 145 | free(ksyms->strs); 146 | free(ksyms); 147 | } 148 | 149 | const struct ksym *ksyms__map_addr(const struct ksyms *ksyms, 150 | unsigned long addr) 151 | { 152 | int start = 0, end = ksyms->syms_sz - 1, mid; 153 | unsigned long sym_addr; 154 | 155 | /* find largest sym_addr <= addr using binary search */ 156 | while (start < end) { 157 | mid = start + (end - start + 1) / 2; 158 | sym_addr = ksyms->syms[mid].addr; 159 | 160 | if (sym_addr <= addr) 161 | start = mid; 162 | else 163 | end = mid - 1; 164 | } 165 | 166 | if (start == end && ksyms->syms[start].addr <= addr) 167 | return &ksyms->syms[start]; 168 | return NULL; 169 | } 170 | 171 | const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, 172 | const char *name) 173 | { 174 | int i; 175 | 176 | for (i = 0; i < ksyms->syms_sz; i++) { 177 | if (strcmp(ksyms->syms[i].name, name) == 0) 178 | return &ksyms->syms[i]; 179 | } 180 | 181 | return NULL; 182 | } 183 | 184 | struct load_range { 185 | uint64_t start; 186 | uint64_t end; 187 | uint64_t file_off; 188 | }; 189 | 190 | enum elf_type { 191 | EXEC, 192 | DYN, 193 | PERF_MAP, 194 | VDSO, 195 | UNKNOWN, 196 | }; 197 | 198 | struct dso { 199 | char *name; 200 | struct load_range *ranges; 201 | int range_sz; 202 | /* Dyn's first text section virtual addr at execution */ 203 | uint64_t sh_addr; 204 | /* Dyn's first text section file offset */ 205 | uint64_t sh_offset; 206 | enum elf_type type; 207 | 208 | struct sym *syms; 209 | int syms_sz; 210 | int syms_cap; 211 | 212 | /* 213 | * libbpf's struct btf is actually a pretty efficient 214 | * "set of strings" data structure, so we create an 215 | * empty one and use it to store symbol names. 216 | */ 217 | struct btf *btf; 218 | }; 219 | 220 | struct map { 221 | uint64_t start_addr; 222 | uint64_t end_addr; 223 | uint64_t file_off; 224 | uint64_t dev_major; 225 | uint64_t dev_minor; 226 | uint64_t inode; 227 | }; 228 | 229 | struct syms { 230 | struct dso *dsos; 231 | int dso_sz; 232 | }; 233 | 234 | static bool is_file_backed(const char *mapname) 235 | { 236 | #define STARTS_WITH(mapname, prefix) \ 237 | (!strncmp(mapname, prefix, sizeof(prefix) - 1)) 238 | 239 | return mapname[0] && !( 240 | STARTS_WITH(mapname, "//anon") || 241 | STARTS_WITH(mapname, "/dev/zero") || 242 | STARTS_WITH(mapname, "/anon_hugepage") || 243 | STARTS_WITH(mapname, "[stack") || 244 | STARTS_WITH(mapname, "/SYSV") || 245 | STARTS_WITH(mapname, "[heap]") || 246 | STARTS_WITH(mapname, "[vsyscall]")); 247 | } 248 | 249 | static bool is_perf_map(const char *path) 250 | { 251 | return false; 252 | } 253 | 254 | static bool is_vdso(const char *path) 255 | { 256 | return !strcmp(path, "[vdso]"); 257 | } 258 | 259 | static int get_elf_type(const char *path) 260 | { 261 | GElf_Ehdr hdr; 262 | void *res; 263 | Elf *e; 264 | int fd; 265 | 266 | if (is_vdso(path)) 267 | return -1; 268 | e = open_elf(path, &fd); 269 | if (!e) 270 | return -1; 271 | res = gelf_getehdr(e, &hdr); 272 | close_elf(e, fd); 273 | if (!res) 274 | return -1; 275 | return hdr.e_type; 276 | } 277 | 278 | static int get_elf_text_scn_info(const char *path, uint64_t *addr, 279 | uint64_t *offset) 280 | { 281 | Elf_Scn *section = NULL; 282 | int fd = -1, err = -1; 283 | GElf_Shdr header; 284 | size_t stridx; 285 | Elf *e = NULL; 286 | char *name; 287 | 288 | e = open_elf(path, &fd); 289 | if (!e) 290 | goto err_out; 291 | err = elf_getshdrstrndx(e, &stridx); 292 | if (err < 0) 293 | goto err_out; 294 | 295 | err = -1; 296 | while ((section = elf_nextscn(e, section)) != 0) { 297 | if (!gelf_getshdr(section, &header)) 298 | continue; 299 | 300 | name = elf_strptr(e, stridx, header.sh_name); 301 | if (name && !strcmp(name, ".text")) { 302 | *addr = (uint64_t)header.sh_addr; 303 | *offset = (uint64_t)header.sh_offset; 304 | err = 0; 305 | break; 306 | } 307 | } 308 | 309 | err_out: 310 | close_elf(e, fd); 311 | return err; 312 | } 313 | 314 | static int syms__add_dso(struct syms *syms, struct map *map, const char *name) 315 | { 316 | struct dso *dso = NULL; 317 | int i, type; 318 | void *tmp; 319 | 320 | for (i = 0; i < syms->dso_sz; i++) { 321 | if (!strcmp(syms->dsos[i].name, name)) { 322 | dso = &syms->dsos[i]; 323 | break; 324 | } 325 | } 326 | 327 | if (!dso) { 328 | tmp = realloc(syms->dsos, (syms->dso_sz + 1) * 329 | sizeof(*syms->dsos)); 330 | if (!tmp) 331 | return -1; 332 | syms->dsos = tmp; 333 | dso = &syms->dsos[syms->dso_sz++]; 334 | memset(dso, 0, sizeof(*dso)); 335 | dso->name = strdup(name); 336 | dso->btf = btf__new_empty(); 337 | } 338 | 339 | tmp = realloc(dso->ranges, (dso->range_sz + 1) * sizeof(*dso->ranges)); 340 | if (!tmp) 341 | return -1; 342 | dso->ranges = tmp; 343 | dso->ranges[dso->range_sz].start = map->start_addr; 344 | dso->ranges[dso->range_sz].end = map->end_addr; 345 | dso->ranges[dso->range_sz].file_off = map->file_off; 346 | dso->range_sz++; 347 | type = get_elf_type(name); 348 | if (type == ET_EXEC) { 349 | dso->type = EXEC; 350 | } else if (type == ET_DYN) { 351 | dso->type = DYN; 352 | if (get_elf_text_scn_info(name, &dso->sh_addr, &dso->sh_offset) < 0) 353 | return -1; 354 | } else if (is_perf_map(name)) { 355 | dso->type = PERF_MAP; 356 | } else if (is_vdso(name)) { 357 | dso->type = VDSO; 358 | } else { 359 | dso->type = UNKNOWN; 360 | } 361 | return 0; 362 | } 363 | 364 | static struct dso *syms__find_dso(const struct syms *syms, unsigned long addr, 365 | uint64_t *offset) 366 | { 367 | struct load_range *range; 368 | struct dso *dso; 369 | int i, j; 370 | 371 | for (i = 0; i < syms->dso_sz; i++) { 372 | dso = &syms->dsos[i]; 373 | for (j = 0; j < dso->range_sz; j++) { 374 | range = &dso->ranges[j]; 375 | if (addr <= range->start || addr >= range->end) 376 | continue; 377 | if (dso->type == DYN || dso->type == VDSO) { 378 | /* Offset within the mmap */ 379 | *offset = addr - range->start + range->file_off; 380 | /* Offset within the ELF for dyn symbol lookup */ 381 | *offset += dso->sh_addr - dso->sh_offset; 382 | } else { 383 | *offset = addr; 384 | } 385 | 386 | return dso; 387 | } 388 | } 389 | 390 | return NULL; 391 | } 392 | 393 | static int dso__load_sym_table_from_perf_map(struct dso *dso) 394 | { 395 | return -1; 396 | } 397 | 398 | static int dso__add_sym(struct dso *dso, const char *name, uint64_t start, 399 | uint64_t size) 400 | { 401 | struct sym *sym; 402 | size_t new_cap; 403 | void *tmp; 404 | int off; 405 | 406 | off = btf__add_str(dso->btf, name); 407 | if (off < 0) 408 | return off; 409 | 410 | if (dso->syms_sz + 1 > dso->syms_cap) { 411 | new_cap = dso->syms_cap * 4 / 3; 412 | if (new_cap < 1024) 413 | new_cap = 1024; 414 | tmp = realloc(dso->syms, sizeof(*dso->syms) * new_cap); 415 | if (!tmp) 416 | return -1; 417 | dso->syms = tmp; 418 | dso->syms_cap = new_cap; 419 | } 420 | 421 | sym = &dso->syms[dso->syms_sz++]; 422 | /* while constructing, re-use pointer as just a plain offset */ 423 | sym->name = (void*)(unsigned long)off; 424 | sym->start = start; 425 | sym->size = size; 426 | sym->offset = 0; 427 | 428 | return 0; 429 | } 430 | 431 | static int sym_cmp(const void *p1, const void *p2) 432 | { 433 | const struct sym *s1 = p1, *s2 = p2; 434 | 435 | if (s1->start == s2->start) 436 | return strcmp(s1->name, s2->name); 437 | return s1->start < s2->start ? -1 : 1; 438 | } 439 | 440 | static int dso__add_syms(struct dso *dso, Elf *e, Elf_Scn *section, 441 | size_t stridx, size_t symsize) 442 | { 443 | Elf_Data *data = NULL; 444 | 445 | while ((data = elf_getdata(section, data)) != 0) { 446 | size_t i, symcount = data->d_size / symsize; 447 | 448 | if (data->d_size % symsize) 449 | return -1; 450 | 451 | for (i = 0; i < symcount; ++i) { 452 | const char *name; 453 | GElf_Sym sym; 454 | 455 | if (!gelf_getsym(data, (int)i, &sym)) 456 | continue; 457 | if (!(name = elf_strptr(e, stridx, sym.st_name))) 458 | continue; 459 | if (name[0] == '\0') 460 | continue; 461 | 462 | if (sym.st_value == 0) 463 | continue; 464 | 465 | if (dso__add_sym(dso, name, sym.st_value, sym.st_size)) 466 | goto err_out; 467 | } 468 | } 469 | 470 | return 0; 471 | 472 | err_out: 473 | return -1; 474 | } 475 | 476 | static void dso__free_fields(struct dso *dso) 477 | { 478 | if (!dso) 479 | return; 480 | 481 | free(dso->name); 482 | free(dso->ranges); 483 | free(dso->syms); 484 | btf__free(dso->btf); 485 | } 486 | 487 | static int dso__load_sym_table_from_elf(struct dso *dso, int fd) 488 | { 489 | Elf_Scn *section = NULL; 490 | Elf *e; 491 | int i; 492 | 493 | e = fd > 0 ? open_elf_by_fd(fd) : open_elf(dso->name, &fd); 494 | if (!e) 495 | return -1; 496 | 497 | while ((section = elf_nextscn(e, section)) != 0) { 498 | GElf_Shdr header; 499 | 500 | if (!gelf_getshdr(section, &header)) 501 | continue; 502 | 503 | if (header.sh_type != SHT_SYMTAB && 504 | header.sh_type != SHT_DYNSYM) 505 | continue; 506 | 507 | if (dso__add_syms(dso, e, section, header.sh_link, 508 | header.sh_entsize)) 509 | goto err_out; 510 | } 511 | 512 | /* now when strings are finalized, adjust pointers properly */ 513 | for (i = 0; i < dso->syms_sz; i++) 514 | dso->syms[i].name = 515 | btf__name_by_offset(dso->btf, 516 | (unsigned long)dso->syms[i].name); 517 | 518 | qsort(dso->syms, dso->syms_sz, sizeof(*dso->syms), sym_cmp); 519 | 520 | close_elf(e, fd); 521 | return 0; 522 | 523 | err_out: 524 | dso__free_fields(dso); 525 | close_elf(e, fd); 526 | return -1; 527 | } 528 | 529 | static int create_tmp_vdso_image(struct dso *dso) 530 | { 531 | uint64_t start_addr, end_addr; 532 | long pid = getpid(); 533 | char buf[PATH_MAX]; 534 | void *image = NULL; 535 | char tmpfile[128]; 536 | int ret, fd = -1; 537 | uint64_t sz; 538 | char *name; 539 | FILE *f; 540 | 541 | snprintf(tmpfile, sizeof(tmpfile), "/proc/%ld/maps", pid); 542 | f = fopen(tmpfile, "r"); 543 | if (!f) 544 | return -1; 545 | 546 | while (true) { 547 | ret = fscanf(f, "%lx-%lx %*s %*x %*x:%*x %*u%[^\n]", 548 | &start_addr, &end_addr, buf); 549 | if (ret == EOF && feof(f)) 550 | break; 551 | if (ret != 3) 552 | goto err_out; 553 | 554 | name = buf; 555 | while (isspace(*name)) 556 | name++; 557 | if (!is_file_backed(name)) 558 | continue; 559 | if (is_vdso(name)) 560 | break; 561 | } 562 | 563 | sz = end_addr - start_addr; 564 | image = malloc(sz); 565 | if (!image) 566 | goto err_out; 567 | memcpy(image, (void *)start_addr, sz); 568 | 569 | snprintf(tmpfile, sizeof(tmpfile), 570 | "/tmp/libbpf_%ld_vdso_image_XXXXXX", pid); 571 | fd = mkostemp(tmpfile, O_CLOEXEC); 572 | if (fd < 0) { 573 | fprintf(stderr, "failed to create temp file: %s\n", 574 | strerror(errno)); 575 | goto err_out; 576 | } 577 | /* Unlink the file to avoid leaking */ 578 | if (unlink(tmpfile) == -1) 579 | fprintf(stderr, "failed to unlink %s: %s\n", tmpfile, 580 | strerror(errno)); 581 | if (write(fd, image, sz) == -1) { 582 | fprintf(stderr, "failed to write to vDSO image: %s\n", 583 | strerror(errno)); 584 | close(fd); 585 | fd = -1; 586 | goto err_out; 587 | } 588 | 589 | err_out: 590 | fclose(f); 591 | free(image); 592 | return fd; 593 | } 594 | 595 | static int dso__load_sym_table_from_vdso_image(struct dso *dso) 596 | { 597 | int fd = create_tmp_vdso_image(dso); 598 | 599 | if (fd < 0) 600 | return -1; 601 | return dso__load_sym_table_from_elf(dso, fd); 602 | } 603 | 604 | static int dso__load_sym_table(struct dso *dso) 605 | { 606 | if (dso->type == UNKNOWN) 607 | return -1; 608 | if (dso->type == PERF_MAP) 609 | return dso__load_sym_table_from_perf_map(dso); 610 | if (dso->type == EXEC || dso->type == DYN) 611 | return dso__load_sym_table_from_elf(dso, 0); 612 | if (dso->type == VDSO) 613 | return dso__load_sym_table_from_vdso_image(dso); 614 | return -1; 615 | } 616 | 617 | static struct sym *dso__find_sym(struct dso *dso, uint64_t offset) 618 | { 619 | unsigned long sym_addr; 620 | int start, end, mid; 621 | 622 | if (!dso->syms && dso__load_sym_table(dso)) 623 | return NULL; 624 | 625 | start = 0; 626 | end = dso->syms_sz - 1; 627 | 628 | /* find largest sym_addr <= addr using binary search */ 629 | while (start < end) { 630 | mid = start + (end - start + 1) / 2; 631 | sym_addr = dso->syms[mid].start; 632 | 633 | if (sym_addr <= offset) 634 | start = mid; 635 | else 636 | end = mid - 1; 637 | } 638 | 639 | if (start == end && dso->syms[start].start <= offset) { 640 | (dso->syms[start]).offset = offset - dso->syms[start].start; 641 | return &dso->syms[start]; 642 | } 643 | return NULL; 644 | } 645 | 646 | struct syms *syms__load_file(const char *fname) 647 | { 648 | char buf[PATH_MAX], perm[5]; 649 | struct syms *syms; 650 | struct map map; 651 | char *name; 652 | FILE *f; 653 | int ret; 654 | 655 | f = fopen(fname, "r"); 656 | if (!f) 657 | return NULL; 658 | 659 | syms = calloc(1, sizeof(*syms)); 660 | if (!syms) 661 | goto err_out; 662 | 663 | while (true) { 664 | ret = fscanf(f, "%lx-%lx %4s %lx %lx:%lx %lu%[^\n]", 665 | &map.start_addr, &map.end_addr, perm, 666 | &map.file_off, &map.dev_major, 667 | &map.dev_minor, &map.inode, buf); 668 | if (ret == EOF && feof(f)) 669 | break; 670 | if (ret != 8) /* perf-.map */ 671 | goto err_out; 672 | 673 | if (perm[2] != 'x') 674 | continue; 675 | 676 | name = buf; 677 | while (isspace(*name)) 678 | name++; 679 | if (!is_file_backed(name)) 680 | continue; 681 | 682 | if (syms__add_dso(syms, &map, name)) 683 | goto err_out; 684 | } 685 | 686 | fclose(f); 687 | return syms; 688 | 689 | err_out: 690 | syms__free(syms); 691 | fclose(f); 692 | return NULL; 693 | } 694 | 695 | struct syms *syms__load_pid(pid_t tgid) 696 | { 697 | char fname[128]; 698 | 699 | snprintf(fname, sizeof(fname), "/proc/%ld/maps", (long)tgid); 700 | return syms__load_file(fname); 701 | } 702 | 703 | void syms__free(struct syms *syms) 704 | { 705 | int i; 706 | 707 | if (!syms) 708 | return; 709 | 710 | for (i = 0; i < syms->dso_sz; i++) 711 | dso__free_fields(&syms->dsos[i]); 712 | free(syms->dsos); 713 | free(syms); 714 | } 715 | 716 | const struct sym *syms__map_addr(const struct syms *syms, unsigned long addr) 717 | { 718 | struct dso *dso; 719 | uint64_t offset; 720 | 721 | dso = syms__find_dso(syms, addr, &offset); 722 | if (!dso) 723 | return NULL; 724 | return dso__find_sym(dso, offset); 725 | } 726 | 727 | const struct sym *syms__map_addr_dso(const struct syms *syms, unsigned long addr, 728 | char **dso_name, uint64_t *dso_offset) 729 | { 730 | struct dso *dso; 731 | uint64_t offset; 732 | 733 | dso = syms__find_dso(syms, addr, &offset); 734 | if (!dso) 735 | return NULL; 736 | 737 | *dso_name = dso->name; 738 | *dso_offset = offset; 739 | 740 | return dso__find_sym(dso, offset); 741 | } 742 | 743 | struct syms_cache { 744 | struct { 745 | struct syms *syms; 746 | int tgid; 747 | } *data; 748 | int nr; 749 | }; 750 | 751 | struct syms_cache *syms_cache__new(int nr) 752 | { 753 | struct syms_cache *syms_cache; 754 | 755 | syms_cache = calloc(1, sizeof(*syms_cache)); 756 | if (!syms_cache) 757 | return NULL; 758 | if (nr > 0) 759 | syms_cache->data = calloc(nr, sizeof(*syms_cache->data)); 760 | return syms_cache; 761 | } 762 | 763 | void syms_cache__free(struct syms_cache *syms_cache) 764 | { 765 | int i; 766 | 767 | if (!syms_cache) 768 | return; 769 | 770 | for (i = 0; i < syms_cache->nr; i++) 771 | syms__free(syms_cache->data[i].syms); 772 | free(syms_cache->data); 773 | free(syms_cache); 774 | } 775 | 776 | struct syms *syms_cache__get_syms(struct syms_cache *syms_cache, int tgid) 777 | { 778 | void *tmp; 779 | int i; 780 | 781 | for (i = 0; i < syms_cache->nr; i++) { 782 | if (syms_cache->data[i].tgid == tgid) 783 | return syms_cache->data[i].syms; 784 | } 785 | 786 | tmp = realloc(syms_cache->data, (syms_cache->nr + 1) * 787 | sizeof(*syms_cache->data)); 788 | if (!tmp) 789 | return NULL; 790 | syms_cache->data = tmp; 791 | syms_cache->data[syms_cache->nr].syms = syms__load_pid(tgid); 792 | syms_cache->data[syms_cache->nr].tgid = tgid; 793 | return syms_cache->data[syms_cache->nr++].syms; 794 | } 795 | 796 | struct partitions { 797 | struct partition *items; 798 | int sz; 799 | }; 800 | 801 | static int partitions__add_partition(struct partitions *partitions, 802 | const char *name, unsigned int dev) 803 | { 804 | struct partition *partition; 805 | void *tmp; 806 | 807 | tmp = realloc(partitions->items, (partitions->sz + 1) * 808 | sizeof(*partitions->items)); 809 | if (!tmp) 810 | return -1; 811 | partitions->items = tmp; 812 | partition = &partitions->items[partitions->sz]; 813 | partition->name = strdup(name); 814 | partition->dev = dev; 815 | partitions->sz++; 816 | 817 | return 0; 818 | } 819 | 820 | struct partitions *partitions__load(void) 821 | { 822 | char part_name[DISK_NAME_LEN]; 823 | unsigned int devmaj, devmin; 824 | unsigned long long nop; 825 | struct partitions *partitions; 826 | char buf[64]; 827 | FILE *f; 828 | 829 | f = fopen("/proc/partitions", "r"); 830 | if (!f) 831 | return NULL; 832 | 833 | partitions = calloc(1, sizeof(*partitions)); 834 | if (!partitions) 835 | goto err_out; 836 | 837 | while (fgets(buf, sizeof(buf), f) != NULL) { 838 | /* skip heading */ 839 | if (buf[0] != ' ' || buf[0] == '\n') 840 | continue; 841 | if (sscanf(buf, "%u %u %llu %s", &devmaj, &devmin, &nop, 842 | part_name) != 4) 843 | goto err_out; 844 | if (partitions__add_partition(partitions, part_name, 845 | MKDEV(devmaj, devmin))) 846 | goto err_out; 847 | } 848 | 849 | fclose(f); 850 | return partitions; 851 | 852 | err_out: 853 | partitions__free(partitions); 854 | fclose(f); 855 | return NULL; 856 | } 857 | 858 | void partitions__free(struct partitions *partitions) 859 | { 860 | int i; 861 | 862 | if (!partitions) 863 | return; 864 | 865 | for (i = 0; i < partitions->sz; i++) 866 | free(partitions->items[i].name); 867 | free(partitions->items); 868 | free(partitions); 869 | } 870 | 871 | const struct partition * 872 | partitions__get_by_dev(const struct partitions *partitions, unsigned int dev) 873 | { 874 | int i; 875 | 876 | for (i = 0; i < partitions->sz; i++) { 877 | if (partitions->items[i].dev == dev) 878 | return &partitions->items[i]; 879 | } 880 | 881 | return NULL; 882 | } 883 | 884 | const struct partition * 885 | partitions__get_by_name(const struct partitions *partitions, const char *name) 886 | { 887 | int i; 888 | 889 | for (i = 0; i < partitions->sz; i++) { 890 | if (strcmp(partitions->items[i].name, name) == 0) 891 | return &partitions->items[i]; 892 | } 893 | 894 | return NULL; 895 | } 896 | 897 | static void print_stars(unsigned int val, unsigned int val_max, int width) 898 | { 899 | int num_stars, num_spaces, i; 900 | bool need_plus; 901 | 902 | num_stars = min(val, val_max) * width / val_max; 903 | num_spaces = width - num_stars; 904 | need_plus = val > val_max; 905 | 906 | for (i = 0; i < num_stars; i++) 907 | printf("*"); 908 | for (i = 0; i < num_spaces; i++) 909 | printf(" "); 910 | if (need_plus) 911 | printf("+"); 912 | } 913 | 914 | void print_log2_hist(unsigned int *vals, int vals_size, const char *val_type) 915 | { 916 | int stars_max = 40, idx_max = -1; 917 | unsigned int val, val_max = 0; 918 | unsigned long long low, high; 919 | int stars, width, i; 920 | 921 | for (i = 0; i < vals_size; i++) { 922 | val = vals[i]; 923 | if (val > 0) 924 | idx_max = i; 925 | if (val > val_max) 926 | val_max = val; 927 | } 928 | 929 | if (idx_max < 0) 930 | return; 931 | 932 | printf("%*s%-*s : count distribution\n", idx_max <= 32 ? 5 : 15, "", 933 | idx_max <= 32 ? 19 : 29, val_type); 934 | 935 | if (idx_max <= 32) 936 | stars = stars_max; 937 | else 938 | stars = stars_max / 2; 939 | 940 | for (i = 0; i <= idx_max; i++) { 941 | low = (1ULL << (i + 1)) >> 1; 942 | high = (1ULL << (i + 1)) - 1; 943 | if (low == high) 944 | low -= 1; 945 | val = vals[i]; 946 | width = idx_max <= 32 ? 10 : 20; 947 | printf("%*lld -> %-*lld : %-8d |", width, low, width, high, val); 948 | print_stars(val, val_max, stars); 949 | printf("|\n"); 950 | } 951 | } 952 | 953 | void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base, 954 | unsigned int step, const char *val_type) 955 | { 956 | int i, stars_max = 40, idx_min = -1, idx_max = -1; 957 | unsigned int val, val_max = 0; 958 | 959 | for (i = 0; i < vals_size; i++) { 960 | val = vals[i]; 961 | if (val > 0) { 962 | idx_max = i; 963 | if (idx_min < 0) 964 | idx_min = i; 965 | } 966 | if (val > val_max) 967 | val_max = val; 968 | } 969 | 970 | if (idx_max < 0) 971 | return; 972 | 973 | printf(" %-13s : count distribution\n", val_type); 974 | for (i = idx_min; i <= idx_max; i++) { 975 | val = vals[i]; 976 | printf(" %-10d : %-8d |", base + i * step, val); 977 | print_stars(val, val_max, stars_max); 978 | printf("|\n"); 979 | } 980 | } 981 | 982 | unsigned long long get_ktime_ns(void) 983 | { 984 | struct timespec ts; 985 | 986 | clock_gettime(CLOCK_MONOTONIC, &ts); 987 | return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; 988 | } 989 | 990 | bool is_kernel_module(const char *name) 991 | { 992 | bool found = false; 993 | char buf[64]; 994 | FILE *f; 995 | 996 | f = fopen("/proc/modules", "r"); 997 | if (!f) 998 | return false; 999 | 1000 | while (fgets(buf, sizeof(buf), f) != NULL) { 1001 | if (sscanf(buf, "%s %*s\n", buf) != 1) 1002 | break; 1003 | if (!strcmp(buf, name)) { 1004 | found = true; 1005 | break; 1006 | } 1007 | } 1008 | 1009 | fclose(f); 1010 | return found; 1011 | } 1012 | 1013 | static bool fentry_try_attach(int id) 1014 | { 1015 | struct bpf_insn insns[] = { { .code = BPF_JMP | BPF_EXIT } }; 1016 | LIBBPF_OPTS(bpf_prog_load_opts, opts); 1017 | int prog_fd, attach_fd; 1018 | 1019 | opts.expected_attach_type = BPF_TRACE_FENTRY; 1020 | opts.attach_btf_id = id, 1021 | 1022 | prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACING, "test", NULL, insns, 1, &opts); 1023 | if (prog_fd < 0) 1024 | return false; 1025 | 1026 | attach_fd = bpf_raw_tracepoint_open(NULL, prog_fd); 1027 | if (attach_fd >= 0) 1028 | close(attach_fd); 1029 | 1030 | close(prog_fd); 1031 | return attach_fd >= 0; 1032 | } 1033 | 1034 | bool fentry_can_attach(const char *name, const char *mod) 1035 | { 1036 | const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux"; 1037 | struct btf *base, *btf = NULL; 1038 | char sysfs_mod[80]; 1039 | int id = -1, err; 1040 | 1041 | base = btf__parse(sysfs_vmlinux, NULL); 1042 | if (!base) { 1043 | err = -errno; 1044 | fprintf(stderr, "failed to parse vmlinux BTF at '%s': %s\n", 1045 | sysfs_vmlinux, strerror(-err)); 1046 | goto err_out; 1047 | } 1048 | if (mod && module_btf_exists(mod)) { 1049 | snprintf(sysfs_mod, sizeof(sysfs_mod), "/sys/kernel/btf/%s", mod); 1050 | btf = btf__parse_split(sysfs_mod, base); 1051 | if (!btf) { 1052 | err = -errno; 1053 | fprintf(stderr, "failed to load BTF from %s: %s\n", 1054 | sysfs_mod, strerror(-err)); 1055 | btf = base; 1056 | base = NULL; 1057 | } 1058 | } else { 1059 | btf = base; 1060 | base = NULL; 1061 | } 1062 | 1063 | id = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 1064 | 1065 | err_out: 1066 | btf__free(btf); 1067 | btf__free(base); 1068 | return id > 0 && fentry_try_attach(id); 1069 | } 1070 | 1071 | bool kprobe_exists(const char *name) 1072 | { 1073 | char sym_name[256]; 1074 | FILE *f; 1075 | int ret; 1076 | 1077 | f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); 1078 | if (!f) 1079 | goto slow_path; 1080 | 1081 | while (true) { 1082 | ret = fscanf(f, "%s%*[^\n]\n", sym_name); 1083 | if (ret == EOF && feof(f)) 1084 | break; 1085 | if (ret != 1) { 1086 | fprintf(stderr, "failed to read symbol from available_filter_functions\n"); 1087 | break; 1088 | } 1089 | if (!strcmp(name, sym_name)) { 1090 | fclose(f); 1091 | return true; 1092 | } 1093 | } 1094 | 1095 | fclose(f); 1096 | return false; 1097 | 1098 | slow_path: 1099 | f = fopen("/proc/kallsyms", "r"); 1100 | if (!f) 1101 | return false; 1102 | 1103 | while (true) { 1104 | ret = fscanf(f, "%*x %*c %s%*[^\n]\n", sym_name); 1105 | if (ret == EOF && feof(f)) 1106 | break; 1107 | if (ret != 1) { 1108 | fprintf(stderr, "failed to read symbol from kallsyms\n"); 1109 | break; 1110 | } 1111 | if (!strcmp(name, sym_name)) { 1112 | fclose(f); 1113 | return true; 1114 | } 1115 | } 1116 | 1117 | fclose(f); 1118 | return false; 1119 | } 1120 | 1121 | bool vmlinux_btf_exists(void) 1122 | { 1123 | if (!access("/sys/kernel/btf/vmlinux", R_OK)) 1124 | return true; 1125 | return false; 1126 | } 1127 | 1128 | bool module_btf_exists(const char *mod) 1129 | { 1130 | char sysfs_mod[80]; 1131 | 1132 | if (mod) { 1133 | snprintf(sysfs_mod, sizeof(sysfs_mod), "/sys/kernel/btf/%s", mod); 1134 | if (!access(sysfs_mod, R_OK)) 1135 | return true; 1136 | } 1137 | return false; 1138 | } 1139 | -------------------------------------------------------------------------------- /helpers/trace_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __TRACE_HELPERS_H 3 | #define __TRACE_HELPERS_H 4 | 5 | #include 6 | 7 | #define NSEC_PER_SEC 1000000000ULL 8 | 9 | struct ksym { 10 | const char *name; 11 | unsigned long addr; 12 | }; 13 | 14 | struct ksyms; 15 | 16 | struct ksyms *ksyms__load(void); 17 | void ksyms__free(struct ksyms *ksyms); 18 | const struct ksym *ksyms__map_addr(const struct ksyms *ksyms, 19 | unsigned long addr); 20 | const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, 21 | const char *name); 22 | 23 | struct sym { 24 | const char *name; 25 | unsigned long start; 26 | unsigned long size; 27 | unsigned long offset; 28 | }; 29 | 30 | struct syms; 31 | 32 | struct syms *syms__load_pid(int tgid); 33 | struct syms *syms__load_file(const char *fname); 34 | void syms__free(struct syms *syms); 35 | const struct sym *syms__map_addr(const struct syms *syms, unsigned long addr); 36 | const struct sym *syms__map_addr_dso(const struct syms *syms, unsigned long addr, 37 | char **dso_name, uint64_t *dso_offset); 38 | 39 | struct syms_cache; 40 | 41 | struct syms_cache *syms_cache__new(int nr); 42 | struct syms *syms_cache__get_syms(struct syms_cache *syms_cache, int tgid); 43 | void syms_cache__free(struct syms_cache *syms_cache); 44 | 45 | struct partition { 46 | char *name; 47 | unsigned int dev; 48 | }; 49 | 50 | struct partitions; 51 | 52 | struct partitions *partitions__load(void); 53 | void partitions__free(struct partitions *partitions); 54 | const struct partition * 55 | partitions__get_by_dev(const struct partitions *partitions, unsigned int dev); 56 | const struct partition * 57 | partitions__get_by_name(const struct partitions *partitions, const char *name); 58 | 59 | void print_log2_hist(unsigned int *vals, int vals_size, const char *val_type); 60 | void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base, 61 | unsigned int step, const char *val_type); 62 | 63 | unsigned long long get_ktime_ns(void); 64 | 65 | bool is_kernel_module(const char *name); 66 | 67 | /* 68 | * When attempting to use kprobe/kretprobe, please check out new fentry/fexit 69 | * probes, as they provide better performance and usability. But in some 70 | * situations we have to fallback to kprobe/kretprobe probes. This helper 71 | * is used to detect fentry/fexit support for the specified kernel function. 72 | * 73 | * 1. A gap between kernel versions, kernel BTF is exposed 74 | * starting from 5.4 kernel. but fentry/fexit is actually 75 | * supported starting from 5.5. 76 | * 2. Whether kernel supports module BTF or not 77 | * 78 | * *name* is the name of a kernel function to be attached to, which can be 79 | * from vmlinux or a kernel module. 80 | * *mod* is a hint that indicates the *name* may reside in module BTF, 81 | * if NULL, it means *name* belongs to vmlinux. 82 | */ 83 | bool fentry_can_attach(const char *name, const char *mod); 84 | 85 | /* 86 | * The name of a kernel function to be attached to may be changed between 87 | * kernel releases. This helper is used to confirm whether the target kernel 88 | * uses a certain function name before attaching. 89 | * 90 | * It is achieved by scaning 91 | * /sys/kernel/debug/tracing/available_filter_functions 92 | * If this file does not exist, it fallbacks to parse /proc/kallsyms, 93 | * which is slower. 94 | */ 95 | bool kprobe_exists(const char *name); 96 | 97 | bool vmlinux_btf_exists(void); 98 | bool module_btf_exists(const char *mod); 99 | 100 | #endif /* __TRACE_HELPERS_H */ 101 | -------------------------------------------------------------------------------- /helpers/uprobe_helpers.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | /* Copyright (c) 2021 Google LLC. */ 3 | #ifndef _GNU_SOURCE 4 | #define _GNU_SOURCE 5 | #endif 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define warn(...) fprintf(stderr, __VA_ARGS__) 19 | 20 | /* 21 | * Returns 0 on success; -1 on failure. On sucess, returns via `path` the full 22 | * path to the program for pid. 23 | */ 24 | int get_pid_binary_path(pid_t pid, char *path, size_t path_sz) 25 | { 26 | ssize_t ret; 27 | char proc_pid_exe[32]; 28 | 29 | if (snprintf(proc_pid_exe, sizeof(proc_pid_exe), "/proc/%d/exe", pid) 30 | >= sizeof(proc_pid_exe)) { 31 | warn("snprintf /proc/PID/exe failed"); 32 | return -1; 33 | } 34 | ret = readlink(proc_pid_exe, path, path_sz); 35 | if (ret < 0) { 36 | warn("No such pid %d\n", pid); 37 | return -1; 38 | } 39 | if (ret >= path_sz) { 40 | warn("readlink truncation"); 41 | return -1; 42 | } 43 | path[ret] = '\0'; 44 | 45 | return 0; 46 | } 47 | 48 | /* 49 | * Returns 0 on success; -1 on failure. On success, returns via `path` the full 50 | * path to a library matching the name `lib` that is loaded into pid's address 51 | * space. 52 | */ 53 | int get_pid_lib_path(pid_t pid, const char *lib, char *path, size_t path_sz) 54 | { 55 | FILE *maps; 56 | char *p; 57 | char proc_pid_maps[32]; 58 | char line_buf[1024]; 59 | 60 | if (snprintf(proc_pid_maps, sizeof(proc_pid_maps), "/proc/%d/maps", pid) 61 | >= sizeof(proc_pid_maps)) { 62 | warn("snprintf /proc/PID/maps failed"); 63 | return -1; 64 | } 65 | maps = fopen(proc_pid_maps, "r"); 66 | if (!maps) { 67 | warn("No such pid %d\n", pid); 68 | return -1; 69 | } 70 | while (fgets(line_buf, sizeof(line_buf), maps)) { 71 | if (sscanf(line_buf, "%*x-%*x %*s %*x %*s %*u %s", path) != 1) 72 | continue; 73 | /* e.g. /usr/lib/x86_64-linux-gnu/libc-2.31.so */ 74 | p = strrchr(path, '/'); 75 | if (!p) 76 | continue; 77 | if (strncmp(p, "/lib", 4)) 78 | continue; 79 | p += 4; 80 | if (strncmp(lib, p, strlen(lib))) 81 | continue; 82 | p += strlen(lib); 83 | /* libraries can have - or . after the name */ 84 | if (*p != '.' && *p != '-') 85 | continue; 86 | 87 | fclose(maps); 88 | return 0; 89 | } 90 | 91 | warn("Cannot find library %s\n", lib); 92 | fclose(maps); 93 | return -1; 94 | } 95 | 96 | /* 97 | * Returns 0 on success; -1 on failure. On success, returns via `path` the full 98 | * path to the program. 99 | */ 100 | static int which_program(const char *prog, char *path, size_t path_sz) 101 | { 102 | FILE *which; 103 | char cmd[100]; 104 | 105 | if (snprintf(cmd, sizeof(cmd), "which %s", prog) >= sizeof(cmd)) { 106 | warn("snprintf which prog failed"); 107 | return -1; 108 | } 109 | which = popen(cmd, "r"); 110 | if (!which) { 111 | warn("which failed"); 112 | return -1; 113 | } 114 | if (!fgets(path, path_sz, which)) { 115 | warn("fgets which failed"); 116 | pclose(which); 117 | return -1; 118 | } 119 | /* which has a \n at the end of the string */ 120 | path[strlen(path) - 1] = '\0'; 121 | pclose(which); 122 | return 0; 123 | } 124 | 125 | /* 126 | * Returns 0 on success; -1 on failure. On success, returns via `path` the full 127 | * path to the binary for the given pid. 128 | * 1) pid == x, binary == "" : returns the path to x's program 129 | * 2) pid == x, binary == "foo" : returns the path to libfoo linked in x 130 | * 3) pid == 0, binary == "" : failure: need a pid or a binary 131 | * 4) pid == 0, binary == "bar" : returns the path to `which bar` 132 | * 133 | * For case 4), ideally we'd like to search for libbar too, but we don't support 134 | * that yet. 135 | */ 136 | int resolve_binary_path(const char *binary, pid_t pid, char *path, size_t path_sz) 137 | { 138 | if (!strcmp(binary, "")) { 139 | if (!pid) { 140 | warn("Uprobes need a pid or a binary\n"); 141 | return -1; 142 | } 143 | return get_pid_binary_path(pid, path, path_sz); 144 | } 145 | if (pid) 146 | return get_pid_lib_path(pid, binary, path, path_sz); 147 | 148 | if (which_program(binary, path, path_sz)) { 149 | /* 150 | * If the user is tracing a program by name, we can find it. 151 | * But we can't find a library by name yet. We'd need to parse 152 | * ld.so.cache or something similar. 153 | */ 154 | warn("Can't find %s (Need a PID if this is a library)\n", binary); 155 | return -1; 156 | } 157 | return 0; 158 | } 159 | 160 | /* 161 | * Opens an elf at `path` of kind ELF_K_ELF. Returns NULL on failure. On 162 | * success, close with close_elf(e, fd_close). 163 | */ 164 | Elf *open_elf(const char *path, int *fd_close) 165 | { 166 | int fd; 167 | Elf *e; 168 | 169 | if (elf_version(EV_CURRENT) == EV_NONE) { 170 | warn("elf init failed\n"); 171 | return NULL; 172 | } 173 | fd = open(path, O_RDONLY); 174 | if (fd < 0) { 175 | warn("Could not open %s\n", path); 176 | return NULL; 177 | } 178 | e = elf_begin(fd, ELF_C_READ, NULL); 179 | if (!e) { 180 | warn("elf_begin failed: %s\n", elf_errmsg(-1)); 181 | close(fd); 182 | return NULL; 183 | } 184 | if (elf_kind(e) != ELF_K_ELF) { 185 | warn("elf kind %d is not ELF_K_ELF\n", elf_kind(e)); 186 | elf_end(e); 187 | close(fd); 188 | return NULL; 189 | } 190 | *fd_close = fd; 191 | return e; 192 | } 193 | 194 | Elf *open_elf_by_fd(int fd) 195 | { 196 | Elf *e; 197 | 198 | if (elf_version(EV_CURRENT) == EV_NONE) { 199 | warn("elf init failed\n"); 200 | return NULL; 201 | } 202 | e = elf_begin(fd, ELF_C_READ, NULL); 203 | if (!e) { 204 | warn("elf_begin failed: %s\n", elf_errmsg(-1)); 205 | close(fd); 206 | return NULL; 207 | } 208 | if (elf_kind(e) != ELF_K_ELF) { 209 | warn("elf kind %d is not ELF_K_ELF\n", elf_kind(e)); 210 | elf_end(e); 211 | close(fd); 212 | return NULL; 213 | } 214 | return e; 215 | } 216 | 217 | void close_elf(Elf *e, int fd_close) 218 | { 219 | elf_end(e); 220 | close(fd_close); 221 | } 222 | 223 | /* Returns the offset of a function in the elf file `path`, or -1 on failure. */ 224 | off_t get_elf_func_offset(const char *path, const char *func) 225 | { 226 | off_t ret = -1; 227 | int i, fd = -1; 228 | Elf *e; 229 | Elf_Scn *scn; 230 | Elf_Data *data; 231 | GElf_Ehdr ehdr; 232 | GElf_Shdr shdr[1]; 233 | GElf_Phdr phdr; 234 | GElf_Sym sym[1]; 235 | size_t shstrndx, nhdrs; 236 | char *n; 237 | 238 | e = open_elf(path, &fd); 239 | 240 | if (!gelf_getehdr(e, &ehdr)) 241 | goto out; 242 | 243 | if (elf_getshdrstrndx(e, &shstrndx) != 0) 244 | goto out; 245 | 246 | scn = NULL; 247 | while ((scn = elf_nextscn(e, scn))) { 248 | if (!gelf_getshdr(scn, shdr)) 249 | continue; 250 | if (!(shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM)) 251 | continue; 252 | data = NULL; 253 | while ((data = elf_getdata(scn, data))) { 254 | for (i = 0; gelf_getsym(data, i, sym); i++) { 255 | n = elf_strptr(e, shdr->sh_link, sym->st_name); 256 | if (!n) 257 | continue; 258 | if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) 259 | continue; 260 | if (!strcmp(n, func)) { 261 | ret = sym->st_value; 262 | goto check; 263 | } 264 | } 265 | } 266 | } 267 | 268 | check: 269 | if (ehdr.e_type == ET_EXEC || ehdr.e_type == ET_DYN) { 270 | if (elf_getphdrnum(e, &nhdrs) != 0) { 271 | ret = -1; 272 | goto out; 273 | } 274 | for (i = 0; i < (int)nhdrs; i++) { 275 | if (!gelf_getphdr(e, i, &phdr)) 276 | continue; 277 | if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) 278 | continue; 279 | if (phdr.p_vaddr <= ret && ret < (phdr.p_vaddr + phdr.p_memsz)) { 280 | ret = ret - phdr.p_vaddr + phdr.p_offset; 281 | goto out; 282 | } 283 | } 284 | ret = -1; 285 | } 286 | out: 287 | close_elf(e, fd); 288 | return ret; 289 | } 290 | -------------------------------------------------------------------------------- /helpers/uprobe_helpers.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | /* Copyright (c) 2021 Google LLC. */ 3 | #ifndef __UPROBE_HELPERS_H 4 | #define __UPROBE_HELPERS_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | int get_pid_binary_path(pid_t pid, char *path, size_t path_sz); 11 | int get_pid_lib_path(pid_t pid, const char *lib, char *path, size_t path_sz); 12 | int resolve_binary_path(const char *binary, pid_t pid, char *path, size_t path_sz); 13 | off_t get_elf_func_offset(const char *path, const char *func); 14 | Elf *open_elf(const char *path, int *fd_close); 15 | Elf *open_elf_by_fd(int fd); 16 | void close_elf(Elf *e, int fd_close); 17 | 18 | #endif /* __UPROBE_HELPERS_H */ 19 | -------------------------------------------------------------------------------- /profiler/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 | # from https://github.com/libbpf/libbpf-bootstrap/ 3 | OUTPUT := .output 4 | CLANG ?= clang 5 | LLVM_STRIP ?= llvm-strip 6 | BPFTOOL ?= $(abspath ../tools/bpftool) 7 | LIBBPF_SRC := $(abspath ../libbpf/src) 8 | LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) 9 | ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/' | sed 's/ppc64le/powerpc/' | sed 's/mips.*/mips/') 10 | VMLINUX := $(ARCH)/$(ARCH)/vmlinux.h 11 | HELPERS := $(abspath ../helpers) 12 | # Use our own libbpf API headers and Linux UAPI headers distributed with 13 | # libbpf to avoid dependency on system-wide headers, which could be missing or 14 | # outdated 15 | INCLUDES := -I$(OUTPUT) -I../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(HELPERS) 16 | CFLAGS := -g -Wall # -fsanitize=address 17 | 18 | APPS = profile 19 | 20 | # Get Clang's default includes on this system. We'll explicitly add these dirs 21 | # to the includes list when compiling with `-target bpf` because otherwise some 22 | # architecture-specific dirs will be "missing" on some architectures/distros - 23 | # headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, 24 | # sys/cdefs.h etc. might be missing. 25 | # 26 | # Use '-idirafter': Don't interfere with include mechanics except where the 27 | # build would have failed anyways. 28 | CLANG_BPF_SYS_INCLUDES = $(shell $(CLANG) -v -E - &1 \ 29 | | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') 30 | 31 | ifeq ($(V),1) 32 | Q = 33 | msg = 34 | else 35 | Q = @ 36 | msg = @printf ' %-8s %s%s\n' \ 37 | "$(1)" \ 38 | "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ 39 | "$(if $(3), $(3))"; 40 | MAKEFLAGS += --no-print-directory 41 | endif 42 | 43 | .PHONY: all 44 | all: $(APPS) 45 | 46 | $(VMLINUX): 47 | $(Q)wget https://github.com/yunwei37/apisix-profiler/releases/download/vmlinux/vmlinux.tar 48 | $(Q)tar -xvf vmlinux.tar ../ 49 | $(Q)rm vmlinux.tar 50 | 51 | .PHONY: clean 52 | clean: 53 | $(call msg,CLEAN) 54 | $(Q)rm -rf $(OUTPUT) $(APPS) *.o 55 | 56 | $(OUTPUT) $(OUTPUT)/libbpf: 57 | $(call msg,MKDIR,$@) 58 | $(Q)mkdir -p $@ 59 | 60 | # Build libbpf 61 | $(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf 62 | $(call msg,LIB,$@) 63 | $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ 64 | OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ 65 | INCLUDEDIR= LIBDIR= UAPIDIR= \ 66 | install 67 | 68 | # Build BPF code 69 | $(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) 70 | $(call msg,BPF,$@) 71 | $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) -c $(filter %.c,$^) -o $@ 72 | $(Q)$(LLVM_STRIP) -g $@ # strip useless DWARF info 73 | 74 | # Generate BPF skeletons 75 | $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) 76 | $(call msg,GEN-SKEL,$@) 77 | $(Q)$(BPFTOOL) gen skeleton $< > $@ 78 | 79 | # Build user-space code 80 | $(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h 81 | 82 | $(OUTPUT)/%.o: %.cpp $(wildcard %.h) | $(OUTPUT) 83 | $(call msg,CC,$@) 84 | $(Q)$(CXX) $(CFLAGS) $(INCLUDES) -c $(filter %.cpp,$^) -o $@ 85 | 86 | trace_helpers.o: $(HELPERS)/trace_helpers.c 87 | $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ 88 | 89 | uprobe_helpers.o: $(HELPERS)/uprobe_helpers.c 90 | $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ 91 | 92 | lua_stacks_map.o: lua_stacks_map.cpp profile.h lua_stacks_map.h 93 | $(CXX) $(CFLAGS) $(INCLUDES) -c $(filter %.cpp,$^) -o $@ 94 | 95 | stack_printer.o: stack_printer.cpp profile.h stack_printer.h 96 | $(CXX) $(CFLAGS) $(INCLUDES) -c $(filter %.cpp,$^) -o $@ 97 | 98 | # Build application binary 99 | $(APPS): %: $(OUTPUT)/%.o uprobe_helpers.o trace_helpers.o lua_stacks_map.o stack_printer.o $(LIBBPF_OBJ) | $(OUTPUT) 100 | $(call msg,BINARY,$@) 101 | $(Q)$(CXX) $(CFLAGS) $^ -lelf -lz -o $@ 102 | 103 | # delete failed targets 104 | .DELETE_ON_ERROR: 105 | 106 | # keep intermediate (.skel.h, .bpf.o, etc) targets 107 | .SECONDARY: 108 | -------------------------------------------------------------------------------- /profiler/lua_stacks_map.cpp: -------------------------------------------------------------------------------- 1 | #include "lua_stacks_map.h" 2 | #include 3 | 4 | void lua_stack_map::insert_lua_stack_map(const struct lua_stack_event *e) 5 | { 6 | if (!e) 7 | { 8 | return; 9 | } 10 | auto it = map.find(e->user_stack_id); 11 | if (it == map.end()) 12 | { 13 | lua_stack_backtrace stack = {*e}; 14 | map[e->user_stack_id] = stack; // insert 15 | return; 16 | } 17 | lua_stack_backtrace *stack = &it->second; 18 | stack->push_back(*e); 19 | return; 20 | } 21 | 22 | // return the level of stack in the map 23 | int lua_stack_map::get_lua_stack_backtrace(int user_stack_id, lua_stack_backtrace *stack) 24 | { 25 | if (!stack) 26 | { 27 | return -1; 28 | } 29 | auto it = map.find(user_stack_id); 30 | if (it == map.end()) 31 | { 32 | *stack = lua_stack_backtrace{}; 33 | return -1; 34 | } 35 | *stack = it->second; 36 | return stack->size(); 37 | } -------------------------------------------------------------------------------- /profiler/lua_stacks_map.h: -------------------------------------------------------------------------------- 1 | #ifndef LUA_STACKS_HELPER_H 2 | #define LUA_STACKS_HELPER_H 3 | 4 | #define MAX_STACK_DEPTH 64 5 | 6 | #include "profile.h" 7 | #include 8 | #include 9 | 10 | // lua stack backtrace events 11 | using lua_stack_backtrace = std::vector; 12 | 13 | // The map to collect and reserved the stack event found in perf event. 14 | // The stack info will be printed when the profiler stopped. 15 | class lua_stack_map 16 | { 17 | private: 18 | std::map map; 19 | 20 | public: 21 | // insert a lua stack event into the map. 22 | // The event will be push into the backtrace vector with the same stack_id. 23 | void insert_lua_stack_map(const struct lua_stack_event *event); 24 | // get the lua stack backtrace with the stack_id. 25 | // return the level of stack in the map 26 | int get_lua_stack_backtrace(int user_stack_id, lua_stack_backtrace *stack); 27 | }; 28 | 29 | #endif -------------------------------------------------------------------------------- /profiler/lua_state.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** LuaJIT common internal definitions for profiler to get BTF format. 3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h 4 | ** 5 | ** 17-Jul-2022 Yusheng Zheng modified this from lua.h, lua_state.h and 6 | ** lj_def.h. 7 | */ 8 | 9 | #ifndef __LUA_STATE_H 10 | #define __LUA_STATE_H 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define LJ_TARGET_GC64 1 18 | 19 | /* 64 bit GC references. */ 20 | #if LJ_TARGET_GC64 21 | #define LJ_GC64 1 22 | #else 23 | #define LJ_GC64 0 24 | #endif 25 | 26 | /* GCobj reference */ 27 | typedef struct GCRef 28 | { 29 | #if LJ_GC64 30 | uint64_t gcptr64; /* True 64 bit pointer. */ 31 | #else 32 | uint32_t gcptr32; /* Pseudo 32 bit pointer. */ 33 | #endif 34 | } GCRef; 35 | 36 | /* 2-slot frame info. */ 37 | #if LJ_GC64 38 | #define LJ_FR2 1 39 | #else 40 | #define LJ_FR2 0 41 | #endif 42 | 43 | /* Optional defines. */ 44 | #ifndef LJ_FASTCALL 45 | #define LJ_FASTCALL 46 | #endif 47 | #ifndef LJ_NORET 48 | #define LJ_NORET 49 | #endif 50 | #ifndef LJ_NOAPI 51 | #define LJ_NOAPI extern 52 | #endif 53 | #ifndef LJ_LIKELY 54 | #define LJ_LIKELY(x) (x) 55 | #define LJ_UNLIKELY(x) (x) 56 | #endif 57 | 58 | /* Attributes for internal functions. */ 59 | #define LJ_DATA LJ_NOAPI 60 | #define LJ_DATADEF 61 | #define LJ_ASMF LJ_NOAPI 62 | #define LJ_FUNCA LJ_NOAPI 63 | #if defined(ljamalg_c) 64 | #define LJ_FUNC static 65 | #else 66 | #define LJ_FUNC LJ_NOAPI 67 | #endif 68 | #define LJ_FUNC_NORET LJ_FUNC LJ_NORET 69 | #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET 70 | #define LJ_ASMF_NORET LJ_ASMF LJ_NORET 71 | 72 | /* Internal assertions. */ 73 | #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) 74 | #define lj_assert_check(g, c, ...) \ 75 | ((c) ? (void)0 : (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0)) 76 | #define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) 77 | #else 78 | #define lj_checkapi(c, ...) ((void)L) 79 | #endif 80 | 81 | #ifdef LUA_USE_ASSERT 82 | #define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__) 83 | #define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__) 84 | #define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) 85 | #define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__) 86 | #define check_exp(c, e) (lj_assertX((c), #c), (e)) 87 | #else 88 | #define lj_assertG_(g, c, ...) ((void)0) 89 | #define lj_assertG(c, ...) ((void)g) 90 | #define lj_assertL(c, ...) ((void)L) 91 | #define lj_assertX(c, ...) ((void)0) 92 | #define check_exp(c, e) (e) 93 | #endif 94 | 95 | /* Static assertions. */ 96 | #define LJ_ASSERT_NAME2(name, line) name##line 97 | #define LJ_ASSERT_NAME(line) LJ_ASSERT_NAME2(lj_assert_, line) 98 | #ifdef __COUNTER__ 99 | #define LJ_STATIC_ASSERT(cond) \ 100 | extern void LJ_ASSERT_NAME(__COUNTER__)(int STATIC_ASSERTION_FAILED[(cond) ? 1 : -1]) 101 | #else 102 | #define LJ_STATIC_ASSERT(cond) \ 103 | extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond) ? 1 : -1]) 104 | #endif 105 | 106 | /* PRNG state. Need this here, details in lj_prng.h. */ 107 | typedef struct PRNGState 108 | { 109 | uint64_t u[4]; 110 | } PRNGState; 111 | 112 | /* Common GC header for all collectable objects. */ 113 | #define GCHeader \ 114 | GCRef nextgc; \ 115 | uint8_t marked; \ 116 | uint8_t gct 117 | /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ 118 | 119 | /* Memory reference */ 120 | typedef struct MRef 121 | { 122 | #if LJ_GC64 123 | uint64_t ptr64; /* True 64 bit pointer. */ 124 | #else 125 | uint32_t ptr32; /* Pseudo 32 bit pointer. */ 126 | #endif 127 | } MRef; 128 | 129 | #if LJ_GC64 130 | #define mref(r, t) ((t *)(void *)(r).ptr64) 131 | #define mrefu(r) ((r).ptr64) 132 | 133 | #define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) 134 | #define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u)) 135 | #define setmrefr(r, v) ((r).ptr64 = (v).ptr64) 136 | #else 137 | #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) 138 | #define mrefu(r) ((r).ptr32) 139 | 140 | #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) 141 | #define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u)) 142 | #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) 143 | #endif 144 | 145 | #define LJ_ALIGN(n) __attribute__((aligned(n))) 146 | 147 | #define LUA_NUMBER double 148 | 149 | /* type of numbers in Lua */ 150 | typedef LUA_NUMBER lua_Number; 151 | 152 | #if LJ_ARCH_ENDIAN == LUAJIT_BE 153 | #define LJ_LE 0 154 | #define LJ_BE 1 155 | #define LJ_ENDIAN_SELECT(le, be) be 156 | #define LJ_ENDIAN_LOHI(lo, hi) hi lo 157 | #else 158 | #define LJ_LE 1 159 | #define LJ_BE 0 160 | #define LJ_ENDIAN_SELECT(le, be) le 161 | #define LJ_ENDIAN_LOHI(lo, hi) lo hi 162 | #endif 163 | 164 | /* Frame link. */ 165 | typedef union 166 | { 167 | int32_t ftsz; /* Frame type and size of previous frame. */ 168 | MRef pcr; /* Or PC for Lua frames. */ 169 | } FrameLink; 170 | 171 | /* Tagged value. */ 172 | typedef LJ_ALIGN(8) union TValue 173 | { 174 | uint64_t u64; /* 64 bit pattern overlaps number. */ 175 | lua_Number n; /* Number object overlaps split tag/value object. */ 176 | #if LJ_GC64 177 | GCRef gcr; /* GCobj reference with tag. */ 178 | int64_t it64; 179 | struct 180 | { 181 | LJ_ENDIAN_LOHI( 182 | int32_t i; /* Integer value. */ 183 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ 184 | ) 185 | }; 186 | #else 187 | struct 188 | { 189 | LJ_ENDIAN_LOHI( 190 | union { 191 | GCRef gcr; /* GCobj reference (if any). */ 192 | int32_t i; /* Integer value. */ 193 | }; 194 | , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ 195 | ) 196 | }; 197 | #endif 198 | #if LJ_FR2 199 | int64_t ftsz; /* Frame type and size of previous frame, or PC. */ 200 | #else 201 | struct 202 | { 203 | LJ_ENDIAN_LOHI( 204 | GCRef func; /* Function for next frame (or dummy L). */ 205 | , FrameLink tp; /* Link to previous frame. */ 206 | ) 207 | } fr; 208 | #endif 209 | struct 210 | { 211 | LJ_ENDIAN_LOHI( 212 | uint32_t lo; /* Lower 32 bits of number. */ 213 | , uint32_t hi; /* Upper 32 bits of number. */ 214 | ) 215 | } u32; 216 | } TValue; 217 | 218 | /* Memory and GC object sizes. */ 219 | typedef uint32_t MSize; 220 | #if LJ_GC64 221 | typedef uint64_t GCSize; 222 | #else 223 | typedef uint32_t GCSize; 224 | #endif 225 | 226 | /* Per-thread state object. */ 227 | struct lua_State 228 | { 229 | GCHeader; 230 | uint8_t dummy_ffid; /* Fake FF_C for curr_funcisL() on dummy frames. */ 231 | uint8_t status; /* Thread status. */ 232 | MRef glref; /* Link to global state. */ 233 | GCRef gclist; /* GC chain. */ 234 | TValue *base; /* Base of currently executing function. */ 235 | TValue *top; /* First free slot in the stack. */ 236 | MRef maxstack; /* Last free slot in the stack. */ 237 | MRef stack; /* Stack base. */ 238 | GCRef openupval; /* List of open upvalues in the stack. */ 239 | GCRef env; /* Thread environment (table of globals). */ 240 | void *cframe; /* End of C stack frame chain. */ 241 | MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */ 242 | void *exdata; /* user extra data pointer. added by OpenResty */ 243 | void *exdata2; /* the 2nd user extra data pointer. added by OpenResty */ 244 | #if LJ_TARGET_ARM 245 | uint32_t unused1; 246 | uint32_t unused2; 247 | #endif 248 | }; 249 | 250 | typedef struct lua_State lua_State; 251 | 252 | typedef int (*lua_CFunction)(lua_State *L); 253 | 254 | typedef const TValue cTValue; 255 | /* Internal object tags. 256 | ** 257 | ** Format for 32 bit GC references (!LJ_GC64): 258 | ** 259 | ** Internal tags overlap the MSW of a number object (must be a double). 260 | ** Interpreted as a double these are special NaNs. The FPU only generates 261 | ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available 262 | ** for use as internal tags. Small negative numbers are used to shorten the 263 | ** encoding of type comparisons (reg/mem against sign-ext. 8 bit immediate). 264 | ** 265 | ** ---MSW---.---LSW--- 266 | ** primitive types | itype | | 267 | ** lightuserdata | itype | void * | (32 bit platforms) 268 | ** lightuserdata |ffff|seg| ofs | (64 bit platforms) 269 | ** GC objects | itype | GCRef | 270 | ** int (LJ_DUALNUM)| itype | int | 271 | ** number -------double------ 272 | ** 273 | ** Format for 64 bit GC references (LJ_GC64): 274 | ** 275 | ** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next 276 | ** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer, 277 | ** a zero-extended 32 bit integer or all bits set to 1 for primitive types. 278 | ** 279 | ** ------MSW------.------LSW------ 280 | ** primitive types |1..1|itype|1..................1| 281 | ** GC objects |1..1|itype|-------GCRef--------| 282 | ** lightuserdata |1..1|itype|seg|------ofs-------| 283 | ** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| 284 | ** number ------------double------------- 285 | ** 286 | ** ORDER LJ_T 287 | ** Primitive types nil/false/true must be first, lightuserdata next. 288 | ** GC objects are at the end, table/userdata must be lowest. 289 | ** Also check lj_ir.h for similar ordering constraints. 290 | */ 291 | #define LJ_TNIL (~0u) 292 | #define LJ_TFALSE (~1u) 293 | #define LJ_TTRUE (~2u) 294 | #define LJ_TLIGHTUD (~3u) 295 | #define LJ_TSTR (~4u) 296 | #define LJ_TUPVAL (~5u) 297 | #define LJ_TTHREAD (~6u) 298 | #define LJ_TPROTO (~7u) 299 | #define LJ_TFUNC (~8u) 300 | #define LJ_TTRACE (~9u) 301 | #define LJ_TCDATA (~10u) 302 | #define LJ_TTAB (~11u) 303 | #define LJ_TUDATA (~12u) 304 | /* This is just the canonical number type used in some places. */ 305 | #define LJ_TNUMX (~13u) 306 | 307 | /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ 308 | #if LJ_64 && !LJ_GC64 309 | #define LJ_TISNUM 0xfffeffffu 310 | #else 311 | #define LJ_TISNUM LJ_TNUMX 312 | #endif 313 | #define LJ_TISTRUECOND LJ_TFALSE 314 | #define LJ_TISPRI LJ_TTRUE 315 | #define LJ_TISGCV (LJ_TSTR + 1) 316 | #define LJ_TISTABUD LJ_TTAB 317 | 318 | /* Type marker for slot holding a traversal index. Must be lightuserdata. */ 319 | #define LJ_KEYINDEX 0xfffe7fffu 320 | 321 | #if LJ_GC64 322 | #define LJ_GCVMASK (((uint64_t)1 << 47) - 1) 323 | #endif 324 | 325 | #if LJ_64 326 | /* To stay within 47 bits, lightuserdata is segmented. */ 327 | #define LJ_LIGHTUD_BITS_SEG 8 328 | #define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG) 329 | #endif 330 | 331 | /* -- Common type definitions --------------------------------------------- */ 332 | 333 | /* Types for handling bytecodes. Need this here, details in lj_bc.h. */ 334 | typedef uint32_t BCIns; /* Bytecode instruction. */ 335 | typedef uint32_t BCPos; /* Bytecode position. */ 336 | typedef uint32_t BCReg; /* Bytecode register. */ 337 | typedef int32_t BCLine; /* Bytecode line number. */ 338 | 339 | /* Internal assembler functions. Never call these directly from C. */ 340 | typedef void (*ASMFunction)(void); 341 | 342 | /* Resizable string buffer. Need this here, details in lj_buf.h. */ 343 | #define SBufHeader \ 344 | char *w, *e, *b; \ 345 | MRef L 346 | typedef struct SBuf 347 | { 348 | SBufHeader; 349 | } SBuf; 350 | 351 | /* Operand ranges and related constants. */ 352 | #define BCMAX_A 0xff 353 | #define BCMAX_B 0xff 354 | #define BCMAX_C 0xff 355 | #define BCMAX_D 0xffff 356 | #define BCBIAS_J 0x8000 357 | #define NO_REG BCMAX_A 358 | #define NO_JMP (~(BCPos)0) 359 | 360 | /* Macros to get instruction fields. */ 361 | #define bc_op(i) ((BCOp)((i)&0xff)) 362 | #define bc_a(i) ((BCReg)(((i) >> 8) & 0xff)) 363 | #define bc_b(i) ((BCReg)((i) >> 24)) 364 | #define bc_c(i) ((BCReg)(((i) >> 16) & 0xff)) 365 | #define bc_d(i) ((BCReg)((i) >> 16)) 366 | #define bc_j(i) ((ptrdiff_t)bc_d(i) - BCBIAS_J) 367 | 368 | /* Macros to set instruction fields. */ 369 | #define setbc_byte(p, x, ofs) \ 370 | ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3 - ofs)] = (uint8_t)(x) 371 | #define setbc_op(p, x) setbc_byte(p, (x), 0) 372 | #define setbc_a(p, x) setbc_byte(p, (x), 1) 373 | #define setbc_b(p, x) setbc_byte(p, (x), 3) 374 | #define setbc_c(p, x) setbc_byte(p, (x), 2) 375 | #define setbc_d(p, x) \ 376 | ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = (uint16_t)(x) 377 | #define setbc_j(p, x) setbc_d(p, (BCPos)((int32_t)(x) + BCBIAS_J)) 378 | 379 | /* Macros to compose instructions. */ 380 | #define BCINS_ABC(o, a, b, c) \ 381 | (((BCIns)(o)) | ((BCIns)(a) << 8) | ((BCIns)(b) << 24) | ((BCIns)(c) << 16)) 382 | #define BCINS_AD(o, a, d) \ 383 | (((BCIns)(o)) | ((BCIns)(a) << 8) | ((BCIns)(d) << 16)) 384 | #define BCINS_AJ(o, a, j) BCINS_AD(o, a, (BCPos)((int32_t)(j) + BCBIAS_J)) 385 | 386 | #if LJ_GC64 387 | #define gcref(r) ((GCobj *)(r).gcptr64) 388 | #define gcrefp(r, t) ((t *)(void *)(r).gcptr64) 389 | #define gcrefu(r) ((r).gcptr64) 390 | #define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64) 391 | 392 | #define setgcref(r, gc) ((r).gcptr64 = (uint64_t) & (gc)->gch) 393 | #define setgcreft(r, gc, it) \ 394 | (r).gcptr64 = (uint64_t) & (gc)->gch | (((uint64_t)(it)) << 47) 395 | #define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p)) 396 | #define setgcrefnull(r) ((r).gcptr64 = 0) 397 | #define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64) 398 | #else 399 | #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) 400 | #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) 401 | #define gcrefu(r) ((r).gcptr32) 402 | #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) 403 | 404 | #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t) & (gc)->gch) 405 | #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) 406 | #define setgcrefnull(r) ((r).gcptr32 = 0) 407 | #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) 408 | #endif 409 | 410 | #define tvref(r) (mref(r, TValue)) 411 | 412 | /* -- String object ------------------------------------------------------- */ 413 | 414 | typedef uint32_t StrHash; /* String hash value. */ 415 | typedef uint32_t StrID; /* String ID. */ 416 | 417 | /* String object header. String payload follows. */ 418 | typedef struct GCstr 419 | { 420 | GCHeader; 421 | uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ 422 | uint8_t hashalg; /* Hash algorithm. */ 423 | StrID sid; /* Interned string ID. */ 424 | StrHash hash; /* Hash of string. */ 425 | MSize len; /* Size of string. */ 426 | } GCstr; 427 | 428 | #define strref(r) (&gcref((r))->str) 429 | #define strdata(s) ((const char *)((s) + 1)) 430 | #define strdatawr(s) ((char *)((s) + 1)) 431 | /* -- Userdata object ----------------------------------------------------- */ 432 | 433 | /* Userdata object. Payload follows. */ 434 | typedef struct GCudata 435 | { 436 | GCHeader; 437 | uint8_t udtype; /* Userdata type. */ 438 | uint8_t unused2; 439 | GCRef env; /* Should be at same offset in GCfunc. */ 440 | MSize len; /* Size of payload. */ 441 | GCRef metatable; /* Must be at same offset in GCtab. */ 442 | uint32_t align1; /* To force 8 byte alignment of the payload. */ 443 | } GCudata; 444 | 445 | /* Userdata types. */ 446 | enum 447 | { 448 | UDTYPE_USERDATA, /* Regular userdata. */ 449 | UDTYPE_IO_FILE, /* I/O library FILE. */ 450 | UDTYPE_FFI_CLIB, /* FFI C library namespace. */ 451 | UDTYPE_BUFFER, /* String buffer. */ 452 | UDTYPE__MAX 453 | }; 454 | 455 | #define uddata(u) ((void *)((u) + 1)) 456 | #define sizeudata(u) (sizeof(struct GCudata) + (u)->len) 457 | 458 | /* -- C data object ------------------------------------------------------- */ 459 | 460 | /* C data object. Payload follows. */ 461 | typedef struct GCcdata 462 | { 463 | GCHeader; 464 | uint16_t ctypeid; /* C type ID. */ 465 | } GCcdata; 466 | 467 | /* Prepended to variable-sized or realigned C data objects. */ 468 | typedef struct GCcdataVar 469 | { 470 | uint16_t offset; /* Offset to allocated memory (relative to GCcdata). */ 471 | uint16_t extra; /* Extra space allocated (incl. GCcdata + GCcdatav). */ 472 | MSize len; /* Size of payload. */ 473 | } GCcdataVar; 474 | 475 | #define cdataptr(cd) ((void *)((cd) + 1)) 476 | #define cdataisv(cd) ((cd)->marked & 0x80) 477 | #define cdatav(cd) ((GCcdataVar *)((char *)(cd) - sizeof(GCcdataVar))) 478 | #define cdatavlen(cd) check_exp(cdataisv(cd), cdatav(cd)->len) 479 | #define sizecdatav(cd) (cdatavlen(cd) + cdatav(cd)->extra) 480 | #define memcdatav(cd) ((void *)((char *)(cd)-cdatav(cd)->offset)) 481 | 482 | /* -- Prototype object ---------------------------------------------------- */ 483 | 484 | #define SCALE_NUM_GCO ((int32_t)sizeof(lua_Number) / sizeof(GCRef)) 485 | #define round_nkgc(n) (((n) + SCALE_NUM_GCO - 1) & ~(SCALE_NUM_GCO - 1)) 486 | 487 | typedef struct GCproto 488 | { 489 | GCHeader; 490 | uint8_t numparams; /* Number of parameters. */ 491 | uint8_t framesize; /* Fixed frame size. */ 492 | MSize sizebc; /* Number of bytecode instructions. */ 493 | #if LJ_GC64 494 | uint32_t unused_gc64; 495 | #endif 496 | GCRef gclist; 497 | MRef k; /* Split constant array (points to the middle). */ 498 | MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ 499 | MSize sizekgc; /* Number of collectable constants. */ 500 | MSize sizekn; /* Number of lua_Number constants. */ 501 | MSize sizept; /* Total size including colocated arrays. */ 502 | uint8_t sizeuv; /* Number of upvalues. */ 503 | uint8_t flags; /* Miscellaneous flags (see below). */ 504 | uint16_t trace; /* Anchor for chain of root traces. */ 505 | /* ------ The following fields are for debugging/tracebacks only ------ */ 506 | GCRef chunkname; /* Name of the chunk this function was defined in. */ 507 | BCLine firstline; /* First line of the function definition. */ 508 | BCLine numline; /* Number of lines for the function definition. */ 509 | MRef lineinfo; /* Compressed map from bytecode ins. to source line. */ 510 | MRef uvinfo; /* Upvalue names. */ 511 | MRef varinfo; /* Names and compressed extents of local variables. */ 512 | } GCproto; 513 | 514 | /* Flags for prototype. */ 515 | #define PROTO_CHILD 0x01 /* Has child prototypes. */ 516 | #define PROTO_VARARG 0x02 /* Vararg function. */ 517 | #define PROTO_FFI 0x04 /* Uses BC_KCDATA for FFI datatypes. */ 518 | #define PROTO_NOJIT 0x08 /* JIT disabled for this function. */ 519 | #define PROTO_ILOOP 0x10 /* Patched bytecode with ILOOP etc. */ 520 | /* Only used during parsing. */ 521 | #define PROTO_HAS_RETURN 0x20 /* Already emitted a return. */ 522 | #define PROTO_FIXUP_RETURN 0x40 /* Need to fixup emitted returns. */ 523 | /* Top bits used for counting created closures. */ 524 | #define PROTO_CLCOUNT 0x20 /* Base of saturating 3 bit counter. */ 525 | #define PROTO_CLC_BITS 3 526 | #define PROTO_CLC_POLY (3 * PROTO_CLCOUNT) /* Polymorphic threshold. */ 527 | 528 | #define PROTO_UV_LOCAL 0x8000 /* Upvalue for local slot. */ 529 | #define PROTO_UV_IMMUTABLE 0x4000 /* Immutable upvalue. */ 530 | 531 | #define proto_kgc(pt, idx) \ 532 | check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t) - (intptr_t)(pt)->sizekgc, \ 533 | gcref(mref((pt)->k, GCRef)[(idx)])) 534 | #define proto_knumtv(pt, idx) \ 535 | check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)]) 536 | #define proto_bc(pt) ((BCIns *)((char *)(pt) + sizeof(GCproto))) 537 | #define proto_bcpos(pt, pc) ((BCPos)((pc)-proto_bc(pt))) 538 | #define proto_uv(pt) (mref((pt)->uv, uint16_t)) 539 | 540 | #define proto_chunkname(pt) (strref(BPF_PROBE_READ_USER(pt, chunkname))) 541 | 542 | #define proto_chunknamestr(pt) (strdata(proto_chunkname((pt)))) 543 | #define proto_lineinfo(pt) (mref((pt)->lineinfo, const void)) 544 | #define proto_uvinfo(pt) (mref((pt)->uvinfo, const uint8_t)) 545 | #define proto_varinfo(pt) (mref((pt)->varinfo, const uint8_t)) 546 | 547 | /* -- Upvalue object ------------------------------------------------------ */ 548 | 549 | typedef struct GCupval 550 | { 551 | GCHeader; 552 | uint8_t closed; /* Set if closed (i.e. uv->v == &uv->u.value). */ 553 | uint8_t immutable; /* Immutable value. */ 554 | union 555 | { 556 | TValue tv; /* If closed: the value itself. */ 557 | struct 558 | { /* If open: double linked list, anchored at thread. */ 559 | GCRef prev; 560 | GCRef next; 561 | }; 562 | }; 563 | MRef v; /* Points to stack slot (open) or above (closed). */ 564 | uint32_t dhash; /* Disambiguation hash: dh1 != dh2 => cannot alias. */ 565 | } GCupval; 566 | 567 | #define uvprev(uv_) (&gcref((uv_)->prev)->uv) 568 | #define uvnext(uv_) (&gcref((uv_)->next)->uv) 569 | #define uvval(uv_) (mref((uv_)->v, TValue)) 570 | 571 | /* GC header for generic access to common fields of GC objects. */ 572 | typedef struct GChead 573 | { 574 | GCHeader; 575 | uint8_t unused1; 576 | uint8_t unused2; 577 | GCRef env; 578 | GCRef gclist; 579 | GCRef metatable; 580 | } GChead; 581 | 582 | /* -- Function object (closures) ------------------------------------------ */ 583 | 584 | /* Common header for functions. env should be at same offset in GCudata. */ 585 | #define GCfuncHeader \ 586 | GCHeader; \ 587 | uint8_t ffid; \ 588 | uint8_t nupvalues; \ 589 | GCRef env; \ 590 | GCRef gclist; \ 591 | MRef pc 592 | 593 | typedef struct GCfuncC 594 | { 595 | GCfuncHeader; 596 | lua_CFunction f; /* C function to be called. */ 597 | TValue upvalue[1]; /* Array of upvalues (TValue). */ 598 | } GCfuncC; 599 | 600 | typedef struct GCfuncL 601 | { 602 | GCfuncHeader; 603 | GCRef uvptr[1]; /* Array of _pointers_ to upvalue objects (GCupval). */ 604 | } GCfuncL; 605 | 606 | typedef union GCfunc 607 | { 608 | GCfuncC c; 609 | GCfuncL l; 610 | } GCfunc; 611 | 612 | #define FF_LUA 0 613 | #define FF_C 1 614 | #define isluafunc(fn) (BPF_PROBE_READ_USER(fn, c.ffid) == FF_LUA) 615 | #define iscfunc(fn) (BPF_PROBE_READ_USER(fn, c.ffid) == FF_C) 616 | #define isffunc(fn) (BPF_PROBE_READ_USER(fn, c.ffid) > FF_C) 617 | #define funcproto(fn) \ 618 | check_exp(isluafunc(fn), (GCproto *)(mref(BPF_PROBE_READ_USER((fn), l.pc), char) - sizeof(GCproto))) 619 | #define sizeCfunc(n) (sizeof(GCfuncC) - sizeof(TValue) + sizeof(TValue) * (n)) 620 | #define sizeLfunc(n) (sizeof(GCfuncL) - sizeof(GCRef) + sizeof(GCRef) * (n)) 621 | 622 | typedef struct GCtab 623 | { 624 | GCHeader; 625 | uint8_t nomm; /* Negative cache for fast metamethods. */ 626 | char colo; /* Array colocation. */ 627 | MRef array; /* Array part. */ 628 | GCRef gclist; 629 | GCRef metatable; /* Must be at same offset in GCudata. */ 630 | MRef node; /* Hash part. */ 631 | uint32_t asize; /* Size of array part (keys [0, asize-1]). */ 632 | uint32_t hmask; /* Hash part mask (size of hash part - 1). */ 633 | #if LJ_GC64 634 | MRef freetop; /* Top of free elements. */ 635 | #endif 636 | } GCtab; 637 | 638 | #define sizetabcolo(n) ((n) * sizeof(TValue) + sizeof(GCtab)) 639 | #define tabref(r) (&gcref((r))->tab) 640 | #define noderef(r) (mref((r), Node)) 641 | #define nextnode(n) (mref((n)->next, Node)) 642 | #if LJ_GC64 643 | #define getfreetop(t, n) (noderef((t)->freetop)) 644 | #define setfreetop(t, n, v) (setmref((t)->freetop, (v))) 645 | #else 646 | #define getfreetop(t, n) (noderef((n)->freetop)) 647 | #define setfreetop(t, n, v) (setmref((n)->freetop, (v))) 648 | #endif 649 | 650 | typedef union GCobj 651 | { 652 | GChead gch; 653 | GCstr str; 654 | GCupval uv; 655 | lua_State th; 656 | GCproto pt; 657 | GCfunc fn; 658 | GCcdata cd; 659 | GCtab tab; 660 | GCudata ud; 661 | } GCobj; 662 | 663 | /* Macros to convert a GCobj pointer into a specific value. */ 664 | #define gco2str(o) check_exp((o)->gch.gct == ~LJ_TSTR, &(o)->str) 665 | #define gco2uv(o) check_exp((o)->gch.gct == ~LJ_TUPVAL, &(o)->uv) 666 | #define gco2th(o) check_exp((o)->gch.gct == ~LJ_TTHREAD, &(o)->th) 667 | #define gco2pt(o) check_exp((o)->gch.gct == ~LJ_TPROTO, &(o)->pt) 668 | #define gco2func(o) check_exp((o)->gch.gct == ~LJ_TFUNC, &(o)->fn) 669 | #define gco2cd(o) check_exp((o)->gch.gct == ~LJ_TCDATA, &(o)->cd) 670 | #define gco2tab(o) check_exp((o)->gch.gct == ~LJ_TTAB, &(o)->tab) 671 | #define gco2ud(o) check_exp((o)->gch.gct == ~LJ_TUDATA, &(o)->ud) 672 | 673 | /* Macro to convert any collectable object into a GCobj pointer. */ 674 | #define obj2gco(v) ((GCobj *)(v)) 675 | 676 | #if LJ_GC64 677 | #define gcval(o) ((GCobj *)(gcrefu(BPF_PROBE_READ_USER(o, gcr)) & LJ_GCVMASK)) 678 | 679 | #else 680 | #define gcval(o) (gcref((o)->gcr)) 681 | #endif 682 | 683 | /* -- Lua stack frame ----------------------------------------------------- */ 684 | 685 | /* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned: 686 | ** 687 | ** PC 00 Lua frame 688 | ** delta 001 C frame 689 | ** delta 010 Continuation frame 690 | ** delta 011 Lua vararg frame 691 | ** delta 101 cpcall() frame 692 | ** delta 110 ff pcall() frame 693 | ** delta 111 ff pcall() frame with active hook 694 | */ 695 | enum 696 | { 697 | FRAME_LUA, 698 | FRAME_C, 699 | FRAME_CONT, 700 | FRAME_VARG, 701 | FRAME_LUAP, 702 | FRAME_CP, 703 | FRAME_PCALL, 704 | FRAME_PCALLH 705 | }; 706 | #define FRAME_TYPE 3 707 | #define FRAME_P 4 708 | #define FRAME_TYPEP (FRAME_TYPE | FRAME_P) 709 | 710 | /* Macros to access and modify Lua frames. */ 711 | #if LJ_FR2 712 | /* Two-slot frame info, required for 64 bit PC/GCRef: 713 | ** 714 | ** base-2 base-1 | base base+1 ... 715 | ** [func PC/delta/ft] | [slots ...] 716 | ** ^-- frame | ^-- base ^-- top 717 | ** 718 | ** Continuation frames: 719 | ** 720 | ** base-4 base-3 base-2 base-1 | base base+1 ... 721 | ** [cont PC ] [func PC/delta/ft] | [slots ...] 722 | ** ^-- frame | ^-- base ^-- top 723 | */ 724 | #define frame_gc(f) (gcval((f)-1)) 725 | #define frame_ftsz(f) ((ptrdiff_t)BPF_PROBE_READ_USER(frame, ftsz)) 726 | 727 | #define frame_pc(f) ((const BCIns *)frame_ftsz(f)) 728 | #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) 729 | #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) 730 | #else 731 | /* One-slot frame info, sufficient for 32 bit PC/GCRef: 732 | ** 733 | ** base-1 | base base+1 ... 734 | ** lo hi | 735 | ** [func | PC/delta/ft] | [slots ...] 736 | ** ^-- frame | ^-- base ^-- top 737 | ** 738 | ** Continuation frames: 739 | ** 740 | ** base-2 base-1 | base base+1 ... 741 | ** lo hi lo hi | 742 | ** [cont | PC] [func | PC/delta/ft] | [slots ...] 743 | ** ^-- frame | ^-- base ^-- top 744 | */ 745 | #define frame_gc(f) (gcref((f)->fr.func)) 746 | #define frame_ftsz(f) ((ptrdiff_t)BPF_PROBE_READ_USER(f, fr.tp.ftsz)) 747 | 748 | #define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) 749 | #define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp)) 750 | #define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz)) 751 | #define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) 752 | #endif 753 | 754 | #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) 755 | #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) 756 | #define frame_islua(f) (frame_type(f) == FRAME_LUA) 757 | #define frame_isc(f) (frame_type(f) == FRAME_C) 758 | #define frame_iscont(f) (frame_typep(f) == FRAME_CONT) 759 | #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) 760 | #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) 761 | #define frame_func(f) (&frame_gc(f)->fn) 762 | 763 | #define frame_delta(f) (frame_ftsz(f) >> 3) 764 | #define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) 765 | 766 | enum 767 | { 768 | LJ_CONT_TAILCALL, 769 | LJ_CONT_FFI_CALLBACK 770 | }; /* Special continuations. */ 771 | 772 | #define frame_iscont_fficb(f) \ 773 | (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK) 774 | 775 | static __always_inline BCIns frame_pc_prev(const BCIns *bcins) 776 | { 777 | const BCIns bcins_prev; 778 | bpf_probe_read_user((void *)&bcins_prev, sizeof(bcins_prev), bcins - 1); 779 | return bcins_prev; 780 | } 781 | 782 | #define frame_prevl(f) ((f) - (1 + LJ_FR2 + bc_a(frame_pc_prev(frame_pc(f))))) 783 | #define frame_prevd(f) ((TValue *)((char *)(f)-frame_sized(f))) 784 | #define frame_prev(f) (frame_islua(f) ? frame_prevl(f) : frame_prevd(f)) 785 | 786 | /* -- State objects ------------------------------------------------------- */ 787 | 788 | /* VM states. */ 789 | enum 790 | { 791 | LJ_VMST_INTERP, /* Interpreter. */ 792 | LJ_VMST_C, /* C function. */ 793 | LJ_VMST_GC, /* Garbage collector. */ 794 | LJ_VMST_EXIT, /* Trace exit handler. */ 795 | LJ_VMST_RECORD, /* Trace recorder. */ 796 | LJ_VMST_OPT, /* Optimizer. */ 797 | LJ_VMST_ASM, /* Assembler. */ 798 | LJ_VMST__MAX 799 | }; 800 | 801 | #define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st) 802 | 803 | /* Metamethods. ORDER MM */ 804 | #ifdef LJ_HASFFI 805 | #define MMDEF_FFI(_) _(new) 806 | #else 807 | #define MMDEF_FFI(_) 808 | #endif 809 | 810 | #if LJ_52 || LJ_HASFFI 811 | #define MMDEF_PAIRS(_) _(pairs) _(ipairs) 812 | #else 813 | #define MMDEF_PAIRS(_) 814 | #define MM_pairs 255 815 | #define MM_ipairs 255 816 | #endif 817 | 818 | #define MMDEF(_) \ 819 | _(index) \ 820 | _(newindex) _(gc) _(mode) _(eq) _(len) /* Only the above (fast) metamethods are negative cached (max. 8). */ \ 821 | _(lt) _(le) _(concat) _(call) /* The following must be in ORDER ARITH. */ \ 822 | _(add) _(sub) _(mul) _(div) _(mod) _(pow) _(unm) /* The following are used in the standard libraries. */ \ 823 | _(metatable) _(tostring) MMDEF_FFI(_) MMDEF_PAIRS(_) 824 | 825 | typedef enum 826 | { 827 | #define MMENUM(name) MM_##name, 828 | MMDEF(MMENUM) 829 | #undef MMENUM 830 | MM__MAX, 831 | MM____ = MM__MAX, 832 | MM_FAST = MM_len 833 | } MMS; 834 | 835 | /* GC root IDs. */ 836 | typedef enum 837 | { 838 | GCROOT_MMNAME, /* Metamethod names. */ 839 | GCROOT_MMNAME_LAST = GCROOT_MMNAME + MM__MAX - 1, 840 | GCROOT_BASEMT, /* Metatables for base types. */ 841 | GCROOT_BASEMT_NUM = GCROOT_BASEMT + ~LJ_TNUMX, 842 | GCROOT_IO_INPUT, /* Userdata for default I/O input file. */ 843 | GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */ 844 | GCROOT_MAX 845 | } GCRootID; 846 | 847 | /* Garbage collector state. */ 848 | typedef struct GCState 849 | { 850 | GCSize total; /* Memory currently allocated. */ 851 | GCSize threshold; /* Memory threshold. */ 852 | uint8_t currentwhite; /* Current white color. */ 853 | uint8_t state; /* GC state. */ 854 | uint8_t nocdatafin; /* No cdata finalizer called. */ 855 | #if LJ_64 856 | uint8_t lightudnum; /* Number of lightuserdata segments - 1. */ 857 | #else 858 | uint8_t unused1; 859 | #endif 860 | MSize sweepstr; /* Sweep position in string table. */ 861 | GCRef root; /* List of all collectable objects. */ 862 | MRef sweep; /* Sweep position in root list. */ 863 | GCRef gray; /* List of gray objects. */ 864 | GCRef grayagain; /* List of objects for atomic traversal. */ 865 | GCRef weak; /* List of weak tables (to be cleared). */ 866 | GCRef mmudata; /* List of userdata (to be finalized). */ 867 | GCSize debt; /* Debt (how much GC is behind schedule). */ 868 | GCSize estimate; /* Estimate of memory actually in use. */ 869 | MSize stepmul; /* Incremental GC step granularity. */ 870 | MSize pause; /* Pause between successive GC cycles. */ 871 | #if LJ_64 872 | MRef lightudseg; /* Upper bits of lightuserdata segments. */ 873 | #endif 874 | } GCState; 875 | 876 | /* thread status */ 877 | #define LUA_OK 0 878 | #define LUA_YIELD 1 879 | #define LUA_ERRRUN 2 880 | #define LUA_ERRSYNTAX 3 881 | #define LUA_ERRMEM 4 882 | #define LUA_ERRERR 5 883 | 884 | #endif 885 | -------------------------------------------------------------------------------- /profiler/profile.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | /* Copyright (c) 2022 LG Electronics */ 3 | #include "lua_state.h" 4 | #include "profile.h" 5 | #include "maps.bpf.h" 6 | 7 | const volatile bool kernel_stacks_only = false; 8 | const volatile bool user_stacks_only = false; 9 | const volatile bool disable_lua_user_trace = false; 10 | const volatile bool include_idle = false; 11 | const volatile pid_t targ_pid = -1; 12 | const volatile pid_t targ_tid = -1; 13 | const volatile int frame_depth = 15; 14 | 15 | struct 16 | { 17 | __uint(type, BPF_MAP_TYPE_STACK_TRACE); 18 | __type(key, u32); 19 | } stackmap SEC(".maps"); 20 | 21 | struct 22 | { 23 | __uint(type, BPF_MAP_TYPE_HASH); 24 | __type(key, struct stack_key); 25 | __type(value, sizeof(u64)); 26 | __uint(max_entries, MAX_ENTRIES); 27 | } counts SEC(".maps"); 28 | 29 | #define MAX_ENTRIES 10240 30 | 31 | // for collecting lua stack trace function name 32 | // and pass the pointer of Lua_state to perf event 33 | struct 34 | { 35 | __uint(type, BPF_MAP_TYPE_HASH); 36 | __uint(max_entries, MAX_ENTRIES); 37 | __type(key, __u32); 38 | __type(value, struct lua_stack_event); 39 | } lua_events SEC(".maps"); 40 | 41 | // output the lua stack to user space because we cannot keep all of them in 42 | // ebpf maps 43 | struct 44 | { 45 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 46 | __uint(key_size, sizeof(__u32)); 47 | __uint(value_size, sizeof(__u32)); 48 | } lua_event_output SEC(".maps"); 49 | 50 | /* 51 | * If PAGE_OFFSET macro is not available in vmlinux.h, determine ip whose MSB 52 | * (Most Significant Bit) is 1 as the kernel address. 53 | * TODO: use end address of user space to determine the address space of ip 54 | */ 55 | #if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) 56 | #define BITS_PER_ADDR (64) 57 | #define MSB_SET_ULONG (1UL << (BITS_PER_ADDR - 1)) 58 | static __always_inline bool is_kernel_addr(u64 addr) 59 | { 60 | return !!(addr & MSB_SET_ULONG); 61 | } 62 | #else 63 | static __always_inline bool is_kernel_addr(u64 addr) 64 | { 65 | return false; 66 | } 67 | #endif /* __TARGET_ARCH_arm64 || __TARGET_ARCH_x86 */ 68 | 69 | static inline int lua_get_funcdata(struct bpf_perf_event_data *ctx, cTValue *frame, struct lua_stack_event *eventp, int level) 70 | { 71 | if (!frame) 72 | return -1; 73 | GCfunc *fn = frame_func(frame); 74 | if (!fn) 75 | return -1; 76 | if (isluafunc(fn)) 77 | { 78 | eventp->type = FUNC_TYPE_LUA; 79 | GCproto *pt = funcproto(fn); 80 | if (!pt) 81 | return -1; 82 | eventp->ffid = BPF_PROBE_READ_USER(pt, firstline); 83 | GCstr *name = proto_chunkname(pt); /* GCstr *name */ 84 | const char *src = strdata(name); 85 | if (!src) 86 | return -1; 87 | bpf_probe_read_user_str(eventp->name, sizeof(eventp->name), src); 88 | bpf_printk("level= %d, fn_name=%s\n", level, eventp->name); 89 | } 90 | else if (iscfunc(fn)) 91 | { 92 | eventp->type = FUNC_TYPE_C; 93 | eventp->funcp = BPF_PROBE_READ_USER(fn, c.f); 94 | } 95 | else if (isffunc(fn)) 96 | { 97 | eventp->type = FUNC_TYPE_F; 98 | eventp->ffid = BPF_PROBE_READ_USER(fn, c.ffid); 99 | } 100 | eventp->level = level; 101 | bpf_perf_event_output(ctx, &lua_event_output, BPF_F_CURRENT_CPU, eventp, sizeof(*eventp)); 102 | return 0; 103 | } 104 | 105 | static int fix_lua_stack(struct bpf_perf_event_data *ctx, __u32 tid, int stack_id) 106 | { 107 | if (stack_id == 0) 108 | { 109 | return 0; 110 | } 111 | struct lua_stack_event *eventp; 112 | 113 | eventp = bpf_map_lookup_elem(&lua_events, &tid); 114 | if (!eventp) 115 | return 0; 116 | 117 | eventp->user_stack_id = stack_id; 118 | lua_State *L = eventp->L; 119 | if (!L) 120 | return 0; 121 | 122 | // start from the top of the stack and trace back 123 | // count the number of function calls founded 124 | int level = 1, count = 0; 125 | 126 | cTValue *frame, *nextframe, *bot = tvref(BPF_PROBE_READ_USER(L, stack)) + LJ_FR2; 127 | int i = 0; 128 | frame = nextframe = BPF_PROBE_READ_USER(L, base) - 1; 129 | /* Traverse frames backwards. */ 130 | // for the ebpf verifier insns (limit 1000000), we need to limit the max loop times to 13 131 | for (; i < frame_depth && frame > bot; i++) 132 | { 133 | if (frame_gc(frame) == obj2gco(L)) 134 | { 135 | level++; /* Skip dummy frames. See lj_err_optype_call(). */ 136 | } 137 | if (level-- == 0) 138 | { 139 | level++; 140 | /* Level found. */ 141 | if (lua_get_funcdata(ctx, frame, eventp, count) != 0) 142 | { 143 | continue; 144 | } 145 | count++; 146 | } 147 | nextframe = frame; 148 | if (frame_islua(frame)) 149 | { 150 | frame = frame_prevl(frame); 151 | } 152 | else 153 | { 154 | if (frame_isvarg(frame)) 155 | level++; /* Skip vararg pseudo-frame. */ 156 | frame = frame_prevd(frame); 157 | } 158 | } 159 | return 0; 160 | } 161 | 162 | SEC("perf_event") 163 | int do_perf_event(struct bpf_perf_event_data *ctx) 164 | { 165 | __u64 id = bpf_get_current_pid_tgid(); 166 | __u32 pid = id >> 32; 167 | __u32 tid = id; 168 | __u64 *valp; 169 | static const __u64 zero; 170 | struct stack_key key = {}; 171 | 172 | if (!include_idle && tid == 0) 173 | return 0; 174 | 175 | if (targ_pid != -1 && targ_pid != pid) 176 | return 0; 177 | if (targ_tid != -1 && targ_tid != tid) 178 | return 0; 179 | 180 | key.pid = pid; 181 | bpf_get_current_comm(&key.name, sizeof(key.name)); 182 | 183 | if (user_stacks_only) 184 | key.kern_stack_id = -1; 185 | else 186 | key.kern_stack_id = bpf_get_stackid(&ctx->regs, &stackmap, 0); 187 | 188 | if (kernel_stacks_only) 189 | key.user_stack_id = -1; 190 | else 191 | key.user_stack_id = bpf_get_stackid(&ctx->regs, &stackmap, BPF_F_USER_STACK); 192 | 193 | if (key.kern_stack_id >= 0) 194 | { 195 | // populate extras to fix the kernel stack 196 | __u64 ip = PT_REGS_IP(&ctx->regs); 197 | 198 | if (is_kernel_addr(ip)) 199 | { 200 | key.kernel_ip = ip; 201 | } 202 | } 203 | 204 | valp = bpf_map_lookup_or_try_init(&counts, &key, &zero); 205 | if (valp) 206 | __sync_fetch_and_add(valp, 1); 207 | 208 | if (!disable_lua_user_trace && (!valp || *valp <= 1)) 209 | { 210 | // only get lua stack the first time we found a new stack id 211 | fix_lua_stack(ctx, tid, key.user_stack_id); 212 | } 213 | return 0; 214 | } 215 | 216 | static int probe_entry_lua_cancel(struct pt_regs *ctx) 217 | { 218 | if (!PT_REGS_PARM2(ctx)) 219 | return 0; 220 | if (!PT_REGS_PARM4(ctx)) 221 | return 0; 222 | 223 | __u64 pid_tgid = bpf_get_current_pid_tgid(); 224 | __u32 pid = pid_tgid >> 32; 225 | __u32 tid = (__u32)pid_tgid; 226 | 227 | if (targ_pid != -1 && targ_pid != pid) 228 | return 0; 229 | bpf_map_delete_elem(&lua_events, &tid); 230 | return 0; 231 | } 232 | 233 | SEC("uprobe/handle_entry_lua_cancel") 234 | int handle_entry_lua_cancel(struct pt_regs *ctx) 235 | { 236 | return probe_entry_lua_cancel(ctx); 237 | } 238 | 239 | static int probe_entry_lua(struct pt_regs *ctx) 240 | { 241 | if (!PT_REGS_PARM1(ctx)) 242 | return 0; 243 | 244 | __u64 pid_tgid = bpf_get_current_pid_tgid(); 245 | __u32 pid = pid_tgid >> 32; 246 | __u32 tid = (__u32)pid_tgid; 247 | struct lua_stack_event event = {}; 248 | 249 | if (targ_pid != -1 && targ_pid != pid) 250 | return 0; 251 | 252 | event.pid = pid; 253 | event.L = (void *)PT_REGS_PARM1(ctx); 254 | bpf_map_update_elem(&lua_events, &tid, &event, BPF_ANY); 255 | return 0; 256 | } 257 | 258 | SEC("uprobe/handle_entry_lua") 259 | int handle_entry_lua(struct pt_regs *ctx) 260 | { 261 | return probe_entry_lua(ctx); 262 | } 263 | 264 | char LICENSE[] SEC("license") = "GPL"; 265 | -------------------------------------------------------------------------------- /profiler/profile.cpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | 3 | /* 4 | * Copyright (c) 2022 LG Electronics 5 | * 6 | * Based on profile(8) from BCC by Brendan Gregg. 7 | * 28-Dec-2021 Eunseon Lee Created this, 8 | * 17-Jul-2022 Yusheng Zheng modified this. 9 | */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "profile.h" 22 | #include "lua_stacks_map.h" 23 | #include "profile.skel.h" 24 | #include "stack_printer.h" 25 | 26 | extern "C" 27 | { 28 | #include "trace_helpers.h" 29 | #include "uprobe_helpers.h" 30 | } 31 | 32 | bool exiting = false; 33 | class lua_stack_map lua_bt_map; 34 | 35 | #define warn(...) fprintf(stderr, __VA_ARGS__) 36 | 37 | struct profile_env env = { 38 | .pid = -1, 39 | .tid = -1, 40 | .stack_storage_size = 8192, 41 | .perf_max_stack_depth = 127, 42 | .duration = 3, 43 | .freq = 1, 44 | .sample_freq = 49, 45 | .cpu = -1, 46 | .frame_depth = 15 47 | }; 48 | 49 | #define UPROBE_SIZE 3 50 | 51 | const char *argp_program_version = "profile 0.1"; 52 | const char *argp_program_bug_address = 53 | "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; 54 | const char argp_program_doc[] = 55 | "Profile CPU usage by sampling stack traces at a timed interval.\n" 56 | "\n" 57 | "USAGE: profile [OPTIONS...] [duration]\n" 58 | "EXAMPLES:\n" 59 | " profile # profile stack traces at 49 Hertz until Ctrl-C\n" 60 | " profile -F 99 # profile stack traces at 99 Hertz\n" 61 | " profile -c 1000000 # profile stack traces every 1 in a million events\n" 62 | " profile 5 # profile at 49 Hertz for 5 seconds only\n" 63 | " profile -f # output in folded format for flame graphs\n" 64 | " profile -p 185 # only profile process with PID 185\n" 65 | " profile -U # only show user space stacks (no kernel)\n" 66 | " profile -K # only show kernel space stacks (no user)\n"; 67 | 68 | #define OPT_PERF_MAX_STACK_DEPTH 1 /* --perf-max-stack-depth */ 69 | #define OPT_STACK_STORAGE_SIZE 2 /* --stack-storage-size */ 70 | #define OPT_LUA_USER_STACK_ONLY 3 /* --lua-user-stacks-only */ 71 | #define OPT_DISABLE_LUA_USER_TRACE 4 /* --disable-lua-user-trace */ 72 | #define PERF_BUFFER_PAGES 16 73 | #define PERF_POLL_TIMEOUT_MS 100 74 | 75 | static const struct argp_option opts[] = { 76 | {"pid", 'p', "PID", 0, "profile process with this PID only"}, 77 | {"tid", 'L', "TID", 0, "profile thread with this TID only"}, 78 | {"user-stacks-only", 'U', NULL, 0, 79 | "show stacks from user space only (no kernel space stacks)"}, 80 | {"kernel-stacks-only", 'K', NULL, 0, 81 | "show stacks from kernel space only (no user space stacks)"}, 82 | {"lua-user-stacks-only", OPT_LUA_USER_STACK_ONLY, NULL, 0, 83 | "replace user stacks with lua stack traces (no other user space stacks)"}, 84 | {"disable-lua-user-trace", OPT_DISABLE_LUA_USER_TRACE, NULL, 0, 85 | "disable lua user space stack trace"}, 86 | {"frequency", 'F', "FREQUENCY", 0, "sample frequency, Hertz"}, 87 | {"delimited", 'd', NULL, 0, "insert delimiter between kernel/user stacks"}, 88 | {"include-idle ", 'I', NULL, 0, "include CPU idle stacks"}, 89 | {"folded", 'f', NULL, 0, "output folded format, one line per stack (for flame graphs)"}, 90 | {"stack-storage-size", OPT_STACK_STORAGE_SIZE, "STACK-STORAGE-SIZE", 0, 91 | "the number of unique stack traces that can be stored and displayed (default 1024)"}, 92 | {"cpu", 'C', "CPU", 0, "cpu number to run profile on"}, 93 | {"perf-max-stack-depth", OPT_PERF_MAX_STACK_DEPTH, 94 | "PERF-MAX-STACK-DEPTH", 0, "the limit for both kernel and user stack traces (default 127)"}, 95 | {"max-frame-depth", 'D', "DEPTH", 0, "max frame depth for eBPF to travel in the stack (default 15)"}, 96 | {"verbose", 'v', NULL, 0, "Verbose debug output"}, 97 | {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, 98 | {}, 99 | }; 100 | 101 | static error_t parse_arg(int key, char *arg, struct argp_state *state) 102 | { 103 | static int pos_args; 104 | 105 | switch (key) 106 | { 107 | case 'h': 108 | argp_state_help(state, stderr, ARGP_HELP_STD_HELP); 109 | break; 110 | case 'v': 111 | env.verbose = true; 112 | break; 113 | case 'p': 114 | errno = 0; 115 | env.pid = strtol(arg, NULL, 10); 116 | if (errno) 117 | { 118 | fprintf(stderr, "invalid PID: %s\n", arg); 119 | argp_usage(state); 120 | } 121 | break; 122 | case 'L': 123 | errno = 0; 124 | env.tid = strtol(arg, NULL, 10); 125 | if (errno || env.tid <= 0) 126 | { 127 | fprintf(stderr, "Invalid TID: %s\n", arg); 128 | argp_usage(state); 129 | } 130 | break; 131 | case 'U': 132 | env.user_stacks_only = true; 133 | break; 134 | case 'K': 135 | env.kernel_stacks_only = true; 136 | break; 137 | case 'F': 138 | errno = 0; 139 | env.sample_freq = strtol(arg, NULL, 10); 140 | if (errno || env.sample_freq <= 0) 141 | { 142 | fprintf(stderr, "invalid FREQUENCY: %s\n", arg); 143 | argp_usage(state); 144 | } 145 | break; 146 | case 'D': 147 | errno = 0; 148 | env.frame_depth = strtol(arg, NULL, 10); 149 | if (errno || env.frame_depth <= 0) 150 | { 151 | fprintf(stderr, "invalid FREQUENCY: %s\n", arg); 152 | argp_usage(state); 153 | } 154 | break; 155 | case 'd': 156 | env.delimiter = true; 157 | break; 158 | case 'I': 159 | env.include_idle = true; 160 | break; 161 | case 'f': 162 | env.folded = true; 163 | break; 164 | case 'C': 165 | errno = 0; 166 | env.cpu = strtol(arg, NULL, 10); 167 | if (errno) 168 | { 169 | fprintf(stderr, "invalid CPU: %s\n", arg); 170 | argp_usage(state); 171 | } 172 | break; 173 | case OPT_PERF_MAX_STACK_DEPTH: 174 | errno = 0; 175 | env.perf_max_stack_depth = strtol(arg, NULL, 10); 176 | if (errno) 177 | { 178 | fprintf(stderr, "invalid perf max stack depth: %s\n", arg); 179 | argp_usage(state); 180 | } 181 | break; 182 | case OPT_STACK_STORAGE_SIZE: 183 | errno = 0; 184 | env.stack_storage_size = strtol(arg, NULL, 10); 185 | if (errno) 186 | { 187 | fprintf(stderr, "invalid stack storage size: %s\n", arg); 188 | argp_usage(state); 189 | } 190 | break; 191 | case OPT_LUA_USER_STACK_ONLY: 192 | env.lua_user_stacks_only = true; 193 | break; 194 | case OPT_DISABLE_LUA_USER_TRACE: 195 | env.disable_lua_user_trace = true; 196 | break; 197 | case ARGP_KEY_ARG: 198 | if (pos_args++) 199 | { 200 | fprintf(stderr, 201 | "Unrecognized positional argument: %s\n", arg); 202 | argp_usage(state); 203 | } 204 | errno = 0; 205 | env.duration = strtol(arg, NULL, 10); 206 | if (errno || env.duration <= 0) 207 | { 208 | fprintf(stderr, "Invalid duration (in s): %s\n", arg); 209 | argp_usage(state); 210 | } 211 | break; 212 | default: 213 | return ARGP_ERR_UNKNOWN; 214 | } 215 | return 0; 216 | } 217 | 218 | static int nr_cpus; 219 | 220 | static int open_and_attach_perf_event(int freq, struct bpf_program *prog, 221 | struct bpf_link *links[]) 222 | { 223 | struct perf_event_attr attr = { 224 | .type = PERF_TYPE_SOFTWARE, 225 | .config = PERF_COUNT_SW_CPU_CLOCK, 226 | .sample_freq = env.sample_freq, 227 | .freq = env.freq, 228 | }; 229 | int i, fd; 230 | 231 | for (i = 0; i < nr_cpus; i++) 232 | { 233 | if (env.cpu != -1 && env.cpu != i) 234 | continue; 235 | 236 | fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0); 237 | if (fd < 0) 238 | { 239 | /* Ignore CPU that is offline */ 240 | if (errno == ENODEV) 241 | continue; 242 | fprintf(stderr, "failed to init perf sampling: %s\n", 243 | strerror(errno)); 244 | return -1; 245 | } 246 | links[i] = bpf_program__attach_perf_event(prog, fd); 247 | if (!links[i]) 248 | { 249 | fprintf(stderr, "failed to attach perf event on cpu: " 250 | "%d\n", 251 | i); 252 | links[i] = NULL; 253 | close(fd); 254 | return -1; 255 | } 256 | } 257 | 258 | return 0; 259 | } 260 | 261 | static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) 262 | { 263 | if (level == LIBBPF_DEBUG && !env.verbose) 264 | return 0; 265 | return vfprintf(stderr, format, args); 266 | } 267 | 268 | static void sig_handler(int sig) 269 | { 270 | exiting = true; 271 | } 272 | 273 | static void handle_lua_stack_event(void *ctx, int cpu, void *data, __u32 data_sz) 274 | { 275 | const struct lua_stack_event *e = static_cast(data); 276 | lua_bt_map.insert_lua_stack_map(e); 277 | } 278 | 279 | static void handle_lua_stack_lost_events(void *ctx, int cpu, __u64 lost_cnt) 280 | { 281 | warn("lost %llu events on CPU #%d\n", lost_cnt, cpu); 282 | } 283 | 284 | static struct bpf_link * 285 | attach_lua_func(const char *lua_path, const char *func_name, const bpf_program *prog) 286 | { 287 | off_t func_off = get_elf_func_offset(lua_path, func_name); 288 | if (func_off < 0) 289 | { 290 | warn("could not find %s in %s\n", func_name, lua_path); 291 | return NULL; 292 | } 293 | struct bpf_link *link = bpf_program__attach_uprobe(prog, false, 294 | -1, lua_path, func_off); 295 | if (!link) 296 | { 297 | warn("failed to attach %s: %d\n", func_name, -errno); 298 | return NULL; 299 | } 300 | return link; 301 | } 302 | 303 | static int attach_lua_uprobes(struct profile_bpf *obj, struct bpf_link *links[]) 304 | { 305 | char lua_path[128]; 306 | if (env.pid) 307 | { 308 | int res = 0; 309 | 310 | res = get_pid_lib_path(env.pid, "luajit-5.1.so", lua_path, sizeof(lua_path)); 311 | if (res < 0) 312 | { 313 | fprintf(stderr, "warning: failed to get lib path for pid %d\n", env.pid); 314 | return -1; 315 | } 316 | } 317 | 318 | links[0] = attach_lua_func(lua_path, "lua_resume", obj->progs.handle_entry_lua); 319 | if (!links[0]) 320 | { 321 | return -1; 322 | } 323 | 324 | links[1] = attach_lua_func(lua_path, "lua_pcall", obj->progs.handle_entry_lua); 325 | if (!links[1]) 326 | { 327 | return -1; 328 | } 329 | 330 | links[2] = attach_lua_func(lua_path, "lua_yield", obj->progs.handle_entry_lua_cancel); 331 | if (!links[2]) 332 | { 333 | return -1; 334 | } 335 | return 0; 336 | } 337 | 338 | int main(int argc, char **argv) 339 | { 340 | static const struct argp argp = { 341 | .options = opts, 342 | .parser = parse_arg, 343 | .doc = argp_program_doc, 344 | }; 345 | struct syms_cache *syms_cache = NULL; 346 | struct ksyms *ksyms = NULL; 347 | struct bpf_link *cpu_links[MAX_CPU_NR] = {}; 348 | struct bpf_link *uprobe_links[UPROBE_SIZE] = {}; 349 | struct profile_bpf *obj = nullptr; 350 | struct perf_buffer *pb = nullptr; 351 | int err, i; 352 | const char *stack_context = "user + kernel"; 353 | char thread_context[64]; 354 | char sample_context[64]; 355 | 356 | err = argp_parse(&argp, argc, argv, 0, NULL, NULL); 357 | if (err) 358 | return err; 359 | if (env.user_stacks_only && env.kernel_stacks_only) 360 | { 361 | fprintf(stderr, "user_stacks_only and kernel_stacks_only cannot be used together.\n"); 362 | return 1; 363 | } 364 | 365 | libbpf_set_print(libbpf_print_fn); 366 | libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 367 | 368 | nr_cpus = libbpf_num_possible_cpus(); 369 | if (nr_cpus < 0) 370 | { 371 | printf("failed to get # of possible cpus: '%s'!\n", 372 | strerror(-nr_cpus)); 373 | return 1; 374 | } 375 | if (nr_cpus > MAX_CPU_NR) 376 | { 377 | fprintf(stderr, "the number of cpu cores is too big, please " 378 | "increase MAX_CPU_NR's value and recompile"); 379 | return 1; 380 | } 381 | 382 | obj = profile_bpf__open(); 383 | if (!obj) 384 | { 385 | fprintf(stderr, "failed to open BPF object\n"); 386 | return 1; 387 | } 388 | 389 | /* initialize global data (filtering options) */ 390 | obj->rodata->targ_pid = env.pid; 391 | obj->rodata->targ_tid = env.tid; 392 | obj->rodata->user_stacks_only = env.user_stacks_only; 393 | obj->rodata->kernel_stacks_only = env.kernel_stacks_only; 394 | obj->rodata->include_idle = env.include_idle; 395 | obj->rodata->frame_depth = env.frame_depth; 396 | 397 | bpf_map__set_value_size(obj->maps.stackmap, 398 | env.perf_max_stack_depth * sizeof(unsigned long)); 399 | bpf_map__set_max_entries(obj->maps.stackmap, env.stack_storage_size); 400 | 401 | err = profile_bpf__load(obj); 402 | if (err) 403 | { 404 | fprintf(stderr, "failed to load BPF programs\n"); 405 | fprintf(stderr, "try decrease the max frame depth with -D and rerun with sudo?\n"); 406 | goto cleanup; 407 | } 408 | ksyms = ksyms__load(); 409 | if (!ksyms) 410 | { 411 | fprintf(stderr, "failed to load kallsyms\n"); 412 | goto cleanup; 413 | } 414 | syms_cache = syms_cache__new(0); 415 | if (!syms_cache) 416 | { 417 | fprintf(stderr, "failed to create syms_cache\n"); 418 | goto cleanup; 419 | } 420 | 421 | err = attach_lua_uprobes(obj, uprobe_links); 422 | if (err < 0) 423 | { 424 | // cannot found lua lib, so skip lua uprobe 425 | env.disable_lua_user_trace = true; 426 | } 427 | 428 | pb = perf_buffer__new(bpf_map__fd(obj->maps.lua_event_output), PERF_BUFFER_PAGES, 429 | handle_lua_stack_event, handle_lua_stack_lost_events, NULL, NULL); 430 | if (!pb) 431 | { 432 | err = -errno; 433 | warn("failed to open perf buffer: %d\n", err); 434 | goto cleanup; 435 | } 436 | 437 | err = open_and_attach_perf_event(env.freq, obj->progs.do_perf_event, cpu_links); 438 | if (err) 439 | goto cleanup; 440 | 441 | signal(SIGINT, sig_handler); 442 | 443 | if (env.pid != -1) 444 | snprintf(thread_context, sizeof(thread_context), "PID %d", env.pid); 445 | else if (env.tid != -1) 446 | snprintf(thread_context, sizeof(thread_context), "TID %d", env.tid); 447 | else 448 | snprintf(thread_context, sizeof(thread_context), "all threads"); 449 | 450 | snprintf(sample_context, sizeof(sample_context), "%d Hertz", env.sample_freq); 451 | 452 | if (env.user_stacks_only) 453 | stack_context = "user"; 454 | else if (env.kernel_stacks_only) 455 | stack_context = "kernel"; 456 | 457 | if (!env.folded) 458 | { 459 | printf("Sampling at %s of %s by %s stack", sample_context, thread_context, stack_context); 460 | if (env.cpu != -1) 461 | printf(" on CPU#%d", env.cpu); 462 | if (env.duration < 99999999) 463 | printf(" for %d secs.\n", env.duration); 464 | else 465 | printf("... Hit Ctrl-C to end.\n"); 466 | } 467 | 468 | /* 469 | * We'll get sleep interrupted when someone presses Ctrl-C (which will 470 | * be "handled" with noop by sig_handler). 471 | */ 472 | while (!exiting) 473 | { 474 | // print perf event to get stack trace 475 | err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); 476 | if (err < 0 && err != -EINTR) 477 | { 478 | warn("error polling perf buffer: %s\n", strerror(-err)); 479 | goto cleanup; 480 | } 481 | /* reset err to return 0 if exiting */ 482 | err = 0; 483 | } 484 | 485 | print_stack_trace(ksyms, syms_cache, obj); 486 | 487 | cleanup: 488 | if (env.cpu != -1) 489 | bpf_link__destroy(cpu_links[env.cpu]); 490 | else 491 | { 492 | for (i = 0; i < nr_cpus; i++) 493 | bpf_link__destroy(cpu_links[i]); 494 | } 495 | for (i = 0; i < UPROBE_SIZE; i++) 496 | bpf_link__destroy(uprobe_links[i]); 497 | profile_bpf__destroy(obj); 498 | perf_buffer__free(pb); 499 | syms_cache__free(syms_cache); 500 | ksyms__free(ksyms); 501 | return err != 0; 502 | } 503 | -------------------------------------------------------------------------------- /profiler/profile.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-2-Clause */ 2 | #ifndef __PROFILE_H 3 | #define __PROFILE_H 4 | 5 | #define TASK_COMM_LEN 16 6 | #define MAX_CPU_NR 128 7 | #define MAX_ENTRIES 10240 8 | #define HOST_LEN 80 9 | 10 | struct stack_key 11 | { 12 | unsigned int pid; 13 | unsigned long long kernel_ip; 14 | int user_stack_id; 15 | int kern_stack_id; 16 | char name[TASK_COMM_LEN]; 17 | }; 18 | 19 | enum func_type { 20 | FUNC_TYPE_LUA, 21 | FUNC_TYPE_C, 22 | FUNC_TYPE_F, 23 | FUNC_TYPE_UNKNOWN, 24 | }; 25 | 26 | struct lua_stack_event 27 | { 28 | unsigned int pid; 29 | // key for user_stack_id 30 | int user_stack_id; 31 | // stack level 32 | int level; 33 | // function type 34 | int type; 35 | // function name 36 | char name[HOST_LEN]; 37 | void *funcp; 38 | // line number(lua func) or ffid(ffunc) 39 | int ffid; 40 | // lua state 41 | void *L; 42 | }; 43 | 44 | struct profile_env 45 | { 46 | int pid; 47 | int tid; 48 | bool user_stacks_only; 49 | bool kernel_stacks_only; 50 | // control lua user space stack trace 51 | bool disable_lua_user_trace; 52 | bool lua_user_stacks_only; 53 | int stack_storage_size; 54 | unsigned int perf_max_stack_depth; 55 | int duration; 56 | bool verbose; 57 | bool freq; 58 | unsigned int sample_freq; 59 | bool delimiter; 60 | bool include_idle; 61 | bool folded; 62 | int cpu; 63 | int frame_depth; 64 | }; 65 | 66 | #endif /* __PROFILE_H */ 67 | -------------------------------------------------------------------------------- /profiler/stack_printer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "profile.h" 13 | #include "lua_stacks_map.h" 14 | #include "profile.skel.h" 15 | extern "C" 16 | { 17 | #include "trace_helpers.h" 18 | #include "uprobe_helpers.h" 19 | } 20 | 21 | #define warn(...) fprintf(stderr, __VA_ARGS__) 22 | 23 | extern struct profile_env env; 24 | extern class lua_stack_map lua_bt_map; 25 | 26 | /* This structure combines key_t and count which should be sorted together */ 27 | struct key_ext_t 28 | { 29 | struct stack_key k; 30 | __u64 v; 31 | }; 32 | 33 | static int stack_id_err(int stack_id) 34 | { 35 | return (stack_id < 0) && (stack_id != -EFAULT); 36 | } 37 | 38 | static int cmp_counts(const void *dx, const void *dy) 39 | { 40 | __u64 x = ((struct key_ext_t *)dx)->v; 41 | __u64 y = ((struct key_ext_t *)dy)->v; 42 | return x > y ? -1 : !(x == y); 43 | } 44 | 45 | static bool batch_map_ops = true; /* hope for the best */ 46 | 47 | static bool read_batch_counts_map(int fd, struct key_ext_t *items, __u32 *count) 48 | { 49 | void *in = NULL, *out; 50 | __u32 i, n, n_read = 0; 51 | int err = 0; 52 | __u32 vals[*count]; 53 | struct stack_key keys[*count]; 54 | 55 | while (n_read < *count && !err) 56 | { 57 | n = *count - n_read; 58 | err = bpf_map_lookup_batch(fd, &in, &out, keys + n_read, 59 | vals + n_read, &n, NULL); 60 | if (err && errno != ENOENT) 61 | { 62 | /* we want to propagate EINVAL upper, so that 63 | * the batch_map_ops flag is set to false */ 64 | if (errno != EINVAL) 65 | warn("bpf_map_lookup_batch: %s\n", 66 | strerror(-err)); 67 | return false; 68 | } 69 | n_read += n; 70 | in = out; 71 | } 72 | 73 | for (i = 0; i < n_read; i++) 74 | { 75 | items[i].k.pid = keys[i].pid; 76 | items[i].k.kernel_ip = keys[i].kernel_ip; 77 | items[i].k.user_stack_id = keys[i].user_stack_id; 78 | items[i].k.kern_stack_id = keys[i].kern_stack_id; 79 | strncpy(items[i].k.name, keys[i].name, TASK_COMM_LEN); 80 | items[i].v = vals[i]; 81 | } 82 | 83 | *count = n_read; 84 | return true; 85 | } 86 | 87 | static bool read_counts_map(int fd, struct key_ext_t *items, __u32 *count) 88 | { 89 | struct stack_key empty = {}; 90 | struct stack_key *lookup_key = ∅ 91 | int i = 0; 92 | int err; 93 | 94 | if (batch_map_ops) 95 | { 96 | bool ok = read_batch_counts_map(fd, items, count); 97 | if (!ok && errno == EINVAL) 98 | { 99 | /* fall back to a racy variant */ 100 | batch_map_ops = false; 101 | } 102 | else 103 | { 104 | return ok; 105 | } 106 | } 107 | 108 | if (!items || !count || !*count) 109 | return true; 110 | 111 | while (!bpf_map_get_next_key(fd, lookup_key, &items[i].k)) 112 | { 113 | 114 | err = bpf_map_lookup_elem(fd, &items[i].k, &items[i].v); 115 | if (err < 0) 116 | { 117 | fprintf(stderr, "failed to lookup counts: %d\n", err); 118 | return false; 119 | } 120 | if (items[i].v == 0) 121 | continue; 122 | 123 | lookup_key = &items[i].k; 124 | i++; 125 | } 126 | 127 | *count = i; 128 | return true; 129 | } 130 | 131 | static void print_fold_lua_func(const struct syms *syms, const struct lua_stack_event *eventp) 132 | { 133 | if (!eventp) 134 | { 135 | return; 136 | } 137 | if (eventp->type == FUNC_TYPE_LUA) 138 | { 139 | if (eventp->ffid) 140 | { 141 | printf(";L:%s:%d", eventp->name, eventp->ffid); 142 | } 143 | else 144 | { 145 | printf(";L:%s", eventp->name); 146 | } 147 | } 148 | else if (eventp->type == FUNC_TYPE_C) 149 | { 150 | const struct sym *sym = syms__map_addr(syms, (unsigned long)eventp->funcp); 151 | if (sym) 152 | { 153 | printf(";C:%s", sym ? sym->name : "[unknown]"); 154 | } 155 | } 156 | else if (eventp->type == FUNC_TYPE_F) 157 | { 158 | printf(";builtin#%d", eventp->ffid); 159 | } 160 | else 161 | { 162 | printf(";[unknown]"); 163 | } 164 | } 165 | 166 | static void print_fold_user_stack_with_lua(const lua_stack_backtrace *lua_bt, const struct syms *syms, unsigned long *uip, unsigned int nr_uip) 167 | { 168 | const struct sym *sym = NULL; 169 | int lua_bt_count = lua_bt->size() - 1; 170 | for (int j = nr_uip - 1; j >= 0; j--) 171 | { 172 | sym = syms__map_addr(syms, uip[j]); 173 | if (sym) 174 | { 175 | if (!env.lua_user_stacks_only) 176 | { 177 | printf(";%s", sym->name); 178 | } 179 | } 180 | else 181 | { 182 | if (lua_bt_count >= 0) 183 | { 184 | print_fold_lua_func(syms, &((*lua_bt)[lua_bt_count])); 185 | lua_bt_count--; 186 | } 187 | } 188 | } 189 | while (lua_bt_count >= 0) 190 | { 191 | print_fold_lua_func(syms, &((*lua_bt)[lua_bt_count])); 192 | lua_bt_count--; 193 | } 194 | } 195 | 196 | void print_stack_trace(struct ksyms *ksyms, struct syms_cache *syms_cache, 197 | struct profile_bpf *obj) 198 | { 199 | const struct ksym *ksym; 200 | const struct syms *syms = NULL; 201 | const struct sym *sym; 202 | int cfd, sfd; 203 | lua_stack_backtrace lua_bt = {}; 204 | __u32 nr_count; 205 | struct stack_key *k; 206 | __u64 v; 207 | unsigned long *kip; 208 | unsigned long *uip; 209 | bool has_collision = false; 210 | unsigned int missing_stacks = 0; 211 | struct key_ext_t counts[MAX_ENTRIES]; 212 | unsigned int nr_kip; 213 | unsigned int nr_uip; 214 | int idx = 0; 215 | 216 | /* add 1 for kernel_ip */ 217 | kip = (unsigned long *)calloc(env.perf_max_stack_depth + 1, sizeof(*kip)); 218 | if (!kip) 219 | { 220 | fprintf(stderr, "failed to alloc kernel ip\n"); 221 | return; 222 | } 223 | 224 | uip = (unsigned long *)calloc(env.perf_max_stack_depth, sizeof(*uip)); 225 | if (!uip) 226 | { 227 | fprintf(stderr, "failed to alloc user ip\n"); 228 | return; 229 | } 230 | 231 | cfd = bpf_map__fd(obj->maps.counts); 232 | sfd = bpf_map__fd(obj->maps.stackmap); 233 | 234 | nr_count = MAX_ENTRIES; 235 | if (!read_counts_map(cfd, counts, &nr_count)) 236 | { 237 | goto cleanup; 238 | } 239 | 240 | qsort(counts, nr_count, sizeof(counts[0]), cmp_counts); 241 | 242 | for (std::size_t i = 0; i < nr_count; i++) 243 | { 244 | k = &counts[i].k; 245 | v = counts[i].v; 246 | nr_uip = 0; 247 | nr_kip = 0; 248 | idx = 0; 249 | 250 | if (!env.user_stacks_only && stack_id_err(k->kern_stack_id)) 251 | { 252 | missing_stacks += 1; 253 | has_collision |= (k->kern_stack_id == -EEXIST); 254 | } 255 | if (!env.kernel_stacks_only && stack_id_err(k->user_stack_id)) 256 | { 257 | missing_stacks += 1; 258 | has_collision |= (k->user_stack_id == -EEXIST); 259 | } 260 | 261 | if (!env.kernel_stacks_only && k->user_stack_id >= 0) 262 | { 263 | if (bpf_map_lookup_elem(sfd, &k->user_stack_id, uip) == 0) 264 | { 265 | /* count the number of ips */ 266 | while (nr_uip < env.perf_max_stack_depth && uip[nr_uip]) 267 | nr_uip++; 268 | syms = syms_cache__get_syms(syms_cache, k->pid); 269 | } 270 | int stack_level = lua_bt_map.get_lua_stack_backtrace(k->user_stack_id, &lua_bt); 271 | if (env.lua_user_stacks_only && env.folded) 272 | { 273 | if (stack_level <= 0) 274 | { 275 | // if show lua user stack only, then we do not count the stack if it is not lua stack 276 | continue; 277 | } 278 | } 279 | } 280 | 281 | if (!env.user_stacks_only && k->kern_stack_id >= 0) 282 | { 283 | if (k->kernel_ip) 284 | kip[nr_kip++] = k->kernel_ip; 285 | if (bpf_map_lookup_elem(sfd, &k->kern_stack_id, kip + nr_kip) == 0) 286 | { 287 | /* count the number of ips */ 288 | while (nr_kip < env.perf_max_stack_depth && kip[nr_kip]) 289 | nr_kip++; 290 | } 291 | } 292 | 293 | if (env.folded) 294 | { 295 | // print folded stack output 296 | printf("%s", k->name); 297 | 298 | if (!env.kernel_stacks_only) 299 | { 300 | if (stack_id_err(k->user_stack_id)) 301 | printf(";[Missed User Stack]"); 302 | if (syms) 303 | { 304 | if (!env.disable_lua_user_trace) 305 | { 306 | print_fold_user_stack_with_lua(&lua_bt, syms, uip, nr_uip); 307 | } 308 | else 309 | { 310 | const struct sym *sym = NULL; 311 | for (int j = nr_uip - 1; j >= 0; j--) 312 | { 313 | sym = syms__map_addr(syms, uip[j]); 314 | printf(";%s", sym ? sym->name : "[unknown]"); 315 | } 316 | } 317 | } 318 | } 319 | if (!env.user_stacks_only) 320 | { 321 | if (env.delimiter && k->user_stack_id >= 0 && 322 | k->kern_stack_id >= 0) 323 | printf(";-"); 324 | 325 | if (stack_id_err(k->kern_stack_id)) 326 | printf(";[Missed Kernel Stack]"); 327 | for (std::size_t j = nr_kip - 1; j >= 0; j--) 328 | { 329 | ksym = ksyms__map_addr(ksyms, kip[j]); 330 | printf(";%s", ksym ? ksym->name : "[unknown]"); 331 | } 332 | } 333 | printf(" %lld\n", v); 334 | } 335 | else 336 | { 337 | // print default multi-line stack output 338 | if (!env.user_stacks_only) 339 | { 340 | if (stack_id_err(k->kern_stack_id)) 341 | printf(" [Missed Kernel Stack]\n"); 342 | for (std::size_t j = 0; j < nr_kip; j++) 343 | { 344 | ksym = ksyms__map_addr(ksyms, kip[j]); 345 | if (ksym) 346 | printf(" #%-2d 0x%lx %s+0x%lx\n", idx++, kip[j], ksym->name, kip[j] - ksym->addr); 347 | else 348 | printf(" #%-2d 0x%lx [unknown]\n", idx++, kip[j]); 349 | } 350 | } 351 | 352 | if (!env.kernel_stacks_only) 353 | { 354 | if (env.delimiter && k->kern_stack_id >= 0 && 355 | k->user_stack_id >= 0) 356 | printf(" --\n"); 357 | 358 | if (stack_id_err(k->user_stack_id)) 359 | printf(" [Missed User Stack]\n"); 360 | if (!syms) 361 | { 362 | for (std::size_t j = 0; j < nr_uip; j++) 363 | printf(" #%-2d 0x%016lx [unknown]\n", idx++, uip[j]); 364 | } 365 | else 366 | { 367 | for (std::size_t j = 0; j < nr_uip; j++) 368 | { 369 | char *dso_name; 370 | uint64_t dso_offset; 371 | sym = syms__map_addr_dso(syms, uip[j], &dso_name, &dso_offset); 372 | 373 | printf(" #%-2d 0x%016lx", idx++, uip[j]); 374 | if (sym) 375 | printf(" %s+0x%lx", sym->name, sym->offset); 376 | if (dso_name) 377 | printf(" (%s+0x%lx)", dso_name, dso_offset); 378 | printf("\n"); 379 | } 380 | } 381 | } 382 | 383 | printf(" %-16s %s (%d)\n", "-", k->name, k->pid); 384 | printf(" %lld\n\n", v); 385 | } 386 | } 387 | 388 | if (missing_stacks > 0) 389 | { 390 | fprintf(stderr, "WARNING: %d stack traces could not be displayed.%s\n", 391 | missing_stacks, has_collision ? " Consider increasing --stack-storage-size." : ""); 392 | } 393 | 394 | cleanup: 395 | free(kip); 396 | free(uip); 397 | } 398 | -------------------------------------------------------------------------------- /profiler/stack_printer.h: -------------------------------------------------------------------------------- 1 | #ifndef STACK_PRINTER_H 2 | #define STACK_PRINTER_H 3 | 4 | void print_stack_trace(struct ksyms *ksyms, struct syms_cache *syms_cache, 5 | struct profile_bpf *obj); 6 | 7 | #endif 8 | --------------------------------------------------------------------------------