├── .gitignore ├── lang ├── test.vy ├── kprobe.vy ├── include │ ├── dsl.h │ ├── annot.h │ ├── func.h │ ├── parser.h │ ├── buffer.h │ ├── symtable.h │ ├── bpflib.h │ ├── ir.h │ ├── lexer.h │ ├── probe.h │ ├── insn.h │ ├── ut.h │ └── ast.h ├── main.vy ├── Makefile ├── dsl.c ├── ast.c ├── symtable.c ├── bpflib.c ├── func.c ├── ut.c ├── lexer.c ├── buffer.c ├── annot.c ├── gen.c ├── parser.c ├── ir.c └── probe.c ├── tools ├── pro.vy ├── tracepoint │ ├── net_dev.vy │ ├── execve_exit.vy │ ├── statfs.vy │ ├── rname_ex.vy │ ├── runqlat.vy │ ├── consume_skb.vy │ ├── cpu.vy │ ├── brk.vy │ ├── fault.vy │ ├── ext.vy │ ├── open.vy │ └── mmap.vy ├── kprobe │ ├── file_fault.vy │ └── undump.vy └── doc-zh.md ├── README.md ├── README-zh.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ -------------------------------------------------------------------------------- /lang/test.vy: -------------------------------------------------------------------------------- 1 | #net; 2 | 3 | probe netif_receive_skb { 4 | out("%d\n", args->len); 5 | } 6 | -------------------------------------------------------------------------------- /tools/pro.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | probe sys_exit_execve { 4 | a := 1+ 2; 5 | out("%d\n", a); 6 | } -------------------------------------------------------------------------------- /tools/tracepoint/net_dev.vy: -------------------------------------------------------------------------------- 1 | #net; 2 | 3 | probe net_dev_start_xmit{ 4 | map[comm()] |> coun(); 5 | } -------------------------------------------------------------------------------- /tools/kprobe/file_fault.vy: -------------------------------------------------------------------------------- 1 | #kprobe; 2 | 3 | probe filemap_fault{ 4 | map[comm()] |> count(); 5 | out("%s\n", comm()); 6 | } -------------------------------------------------------------------------------- /tools/tracepoint/execve_exit.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | probe sys_exit_execve{ 4 | arg := args->ret; 5 | out("%s", arg); 6 | } -------------------------------------------------------------------------------- /tools/tracepoint/statfs.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | probe sys_enter_statfs { 4 | p := args->pathname; 5 | out("%s\n", p); 6 | } -------------------------------------------------------------------------------- /lang/kprobe.vy: -------------------------------------------------------------------------------- 1 | #kprobe; 2 | 3 | probe dev_queue_xmit { 4 | sk := (sk_buff*) arg0; 5 | out("comm: %s len: %d\n", comm(), sk->len); 6 | } 7 | -------------------------------------------------------------------------------- /tools/tracepoint/rname_ex.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | probe sys_enter_renameat2{ 4 | name := args->oldname; 5 | out("%s %s\n", comm(), name); 6 | } -------------------------------------------------------------------------------- /tools/kprobe/undump.vy: -------------------------------------------------------------------------------- 1 | #kprobe; 2 | 3 | BEGIN{ 4 | out("%s\n", "attach unix_stream_read_actor"); 5 | } 6 | 7 | probe unix_stream_read_actor{ 8 | out("%s\n", comm()); 9 | } -------------------------------------------------------------------------------- /tools/tracepoint/runqlat.vy: -------------------------------------------------------------------------------- 1 | #sched; 2 | 3 | BEGIN{ 4 | out("tracing cpu scheduler ..."); 5 | } 6 | 7 | probe sched_wakeup{ 8 | 9 | out("%d\n", args->pid); 10 | } -------------------------------------------------------------------------------- /tools/tracepoint/consume_skb.vy: -------------------------------------------------------------------------------- 1 | #skb; 2 | 3 | BEGIN { 4 | out("%s\n", "Tracing unusual skb dorp stacks. Hit Ctrl-C to end."); 5 | } 6 | 7 | probe consume_skb{ 8 | map[comm()] |> count(); 9 | } -------------------------------------------------------------------------------- /tools/tracepoint/cpu.vy: -------------------------------------------------------------------------------- 1 | #power; 2 | 3 | BEGIN { 4 | printf("%s\n", "Sampling CPU freq system-wide & by process. Ctrl-C to end."); 5 | } 6 | 7 | probe cpu_frequency{ 8 | curfreq[cpu()] |> count(); 9 | } -------------------------------------------------------------------------------- /tools/tracepoint/brk.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | BEGIN { 4 | out("%-18s %-16s\n", "PID", "COMM"); 5 | } 6 | 7 | probe sys_enter_brk { 8 | map[comm()] |> count(); 9 | out("%-18d %-16s\n", pid(), comm()); 10 | } -------------------------------------------------------------------------------- /tools/tracepoint/fault.vy: -------------------------------------------------------------------------------- 1 | #exceptions; 2 | 3 | BEGIN { 4 | out("%-18s %-16s\n", "PID", "COMM"); 5 | } 6 | 7 | probe page_fault_user{ 8 | map[comm()] |> count(); 9 | out("%-18d %-16s\n", pid(), comm()); 10 | } -------------------------------------------------------------------------------- /tools/tracepoint/ext.vy: -------------------------------------------------------------------------------- 1 | #ext4; 2 | 3 | BEGIN { 4 | out("%-18s %-16s %-6s\n", "PID", "COMM", "LEN"); 5 | } 6 | 7 | probe ext4_da_write_begin { 8 | arg := args->len; 9 | out("%-18d %-16s %-6d\n", pid(), comm(), arg); 10 | } -------------------------------------------------------------------------------- /lang/include/dsl.h: -------------------------------------------------------------------------------- 1 | #ifndef DSL_H 2 | #define DSL_H 3 | 4 | #include "lexer.h" 5 | #include "parser.h" 6 | #include "ast.h" 7 | #include "annot.h" 8 | #include "ir.h" 9 | #include "buffer.h" 10 | #include "probe.h" 11 | 12 | 13 | #endif -------------------------------------------------------------------------------- /tools/tracepoint/open.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | BEGIN { 4 | out("%-18s %-16s %-6s\n", "PID", "COMM", "FILE"); 5 | } 6 | 7 | probe sys_enter_open { 8 | arg := args->filename; 9 | out("%-18d %-16s %-6s\n", pid(), comm(), arg); 10 | } 11 | -------------------------------------------------------------------------------- /tools/tracepoint/mmap.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | BEGIN { 4 | out("%-18s %-16s %-14s %-12s\n", "PID", "COMM", "LEN", "FD"); 5 | } 6 | 7 | probe sys_enter_mmap { 8 | len := args->len; 9 | fd := args->fd; 10 | 11 | out("%-18d %-16s %-14d %-12d\n", pid(), comm(), len, fd); 12 | } -------------------------------------------------------------------------------- /lang/main.vy: -------------------------------------------------------------------------------- 1 | #syscalls; 2 | 3 | BEGIN{ 4 | out("%-18s %-16s %-6s %s\n", "PID", "COMM", "FD", "PATH"); 5 | } 6 | 7 | probe sys_enter_open { 8 | enter[pid()] := args->filename; 9 | } 10 | 11 | probe sys_exit_open { 12 | ret := args->ret; 13 | out("%-18d %-16s %-6d %s\n", pid(), comm(), ret, enter[pid()]); 14 | } 15 | -------------------------------------------------------------------------------- /lang/include/annot.h: -------------------------------------------------------------------------------- 1 | #ifndef ANNOT_H 2 | #define ANNOT_H 3 | 4 | #include 5 | #include 6 | 7 | #include "ast.h" 8 | #include "bpflib.h" 9 | #include "symtable.h" 10 | #include "buffer.h" 11 | #include "probe.h" 12 | 13 | extern void get_annot(node_t *n, ebpf_t *e); 14 | extern void loc_assign(node_t *n, ebpf_t *e); 15 | extern void sema(node_t *n, ebpf_t *e); 16 | #endif 17 | -------------------------------------------------------------------------------- /lang/include/func.h: -------------------------------------------------------------------------------- 1 | #ifndef FUNC_H 2 | #define FUNC_H 3 | 4 | #include "ast.h" 5 | #include "bpflib.h" 6 | 7 | typedef struct builtin_t { 8 | const char *name; 9 | int (*annotate)(node_t *call); 10 | int (*compile)(node_t *call, ebpf_t *e); 11 | } builtin_t; 12 | 13 | #define builtin(_name, _annot, _compile) \ 14 | {.name = _name, .annotate = _annot, .compile = _compile} \ 15 | 16 | int global_annot(node_t *call); 17 | int global_compile(node_t *n, ebpf_t *e, type_t type); 18 | #endif -------------------------------------------------------------------------------- /lang/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -Wall -g 2 | 3 | HEADERS = include/*.h 4 | 5 | FRONT = lexer.c ast.c parser.c ut.c 6 | SEMA = annot.c func.c symtable.c 7 | BACK = bpflib.c buffer.c probe.c ir.c gen.c 8 | DSL = dsl.c 9 | SRCS = $(FRONT) $(SEMA) $(BACK) $(DSL) 10 | 11 | OBJS = $(SRCS:.c=.o) 12 | 13 | TSRCS = $(FRONT) test.c 14 | TOBJS = $(TSRCS:.c=.o) 15 | TBINS = test.exe 16 | 17 | %.o: %.c 18 | $(CC) $(CFLAGS) -Iinclude -c $< -o $@ 19 | 20 | all: $(OBJS) 21 | $(CC) -o voyant $(OBJS) $(LDFLAGS) 22 | 23 | test: $(TOBJS) 24 | $(CC) -o $(TBINS) $(TOBJS) $(LDFLAGS) 25 | ./$(TBINS) 26 | 27 | ct: 28 | rm -f $(TOBJS) $(TBINS) 29 | 30 | clean: 31 | rm -f $(OBJS) voyant 32 | 33 | .PHONY: all clean test ct -------------------------------------------------------------------------------- /lang/include/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | 4 | #include "lexer.h" 5 | #include "ast.h" 6 | 7 | typedef enum { 8 | LOWEST = 1, 9 | DEC, 10 | ASSIGN, //= 11 | LOG_OR, //|| 12 | LOG_AND, //&& 13 | EQUALS, //== 14 | LESSGREATERA, //> or < 15 | SUM, //+ 16 | PRODUCT, //* 17 | PREFIX, //!1 18 | PIPE, 19 | CALL, 20 | INDEX, 21 | } seq_t; 22 | 23 | typedef struct parser_t { 24 | lexer_t* lexer; 25 | token_t* this_tok; 26 | token_t* next_tok; 27 | } parser_t; 28 | 29 | parser_t* parser_init(lexer_t* l); 30 | node_t* parse_expr(parser_t* p, seq_t s); 31 | node_t* parse_block_stmts(parser_t* p); 32 | node_t* parse_program(parser_t* p); 33 | void free_parser(parser_t* p); 34 | #endif 35 | -------------------------------------------------------------------------------- /lang/include/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef BUFFER_H 2 | #define BUFFER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "ast.h" 9 | 10 | typedef struct event { 11 | struct perf_event_header hdr; 12 | uint32_t size; 13 | uint64_t type; 14 | uint8_t data[0]; 15 | }__attribute__((packed)) event_t; 16 | 17 | typedef struct lost_event{ 18 | struct perf_event_header hdr; 19 | uint64_t id; 20 | uint64_t lost; 21 | } lost_event_t; 22 | 23 | typedef struct evhandler { 24 | TAILQ_ENTRY(evhandler) node; 25 | uint64_t type; 26 | void* priv; 27 | int (*handle)(event_t* ev, void* priv); 28 | } evhandler_t; 29 | 30 | typedef struct evqueue { 31 | int fd; 32 | struct perf_event_mmap_page* mem; 33 | void* buf; 34 | } evqueue_t; 35 | 36 | typedef struct evpipe { 37 | int mapfd; 38 | uint32_t ncpus; 39 | struct pollfd* poll; 40 | evqueue_t* q; 41 | } evpipe_t; 42 | 43 | struct ret_value { 44 | int val; 45 | unsigned err:1; 46 | unsigned exit:1; 47 | }; 48 | 49 | extern int evpipe_init(evpipe_t* evp, size_t qsize); 50 | extern void evhandler_register(evhandler_t* evh); 51 | extern struct ret_value evpipe_loop(evpipe_t* evp, int* sig, int strict); 52 | extern void map_dump(node_t* n); 53 | #endif 54 | -------------------------------------------------------------------------------- /lang/include/symtable.h: -------------------------------------------------------------------------------- 1 | #ifndef SYMTABLE_H 2 | #define SYMTABLE_H 3 | 4 | #include 5 | #include "ast.h" 6 | 7 | typedef enum { 8 | SYM_NONE, 9 | SYM_MAP, 10 | SYM_VAR, 11 | } sym_type; 12 | 13 | typedef struct smap_t{ 14 | int id; 15 | enum bpf_map_type type; 16 | size_t ksize, vsize, nelem; 17 | type_t ktype; 18 | ssize_t kaddr; 19 | node_t *map; 20 | } smap_t; 21 | 22 | typedef struct sym { 23 | sym_type type; 24 | const char *name; 25 | const char *cast; 26 | annot_t vannot; 27 | 28 | union{ 29 | node_t *var; 30 | smap_t *map; 31 | }; 32 | } sym_t; 33 | 34 | typedef struct symtable_t { 35 | size_t cap, len; 36 | sym_t *table; 37 | struct symtable_t *out; 38 | } symtable_t; 39 | 40 | extern symtable_t *symtable_new(); 41 | extern symtable_t *symtable_create(symtable_t *out); 42 | extern sym_t *symtable_get(symtable_t *st, const char *name); 43 | extern int sym_transfer(sym_t *st, node_t *n); 44 | extern void var_dec(symtable_t *st, node_t *var, node_t* expr); 45 | extern void map_dec(symtable_t *st, node_t *map, node_t* expr); 46 | extern sym_t *symtable_add(symtable_t *st, char *name); 47 | extern int sym_ref(symtable_t* st, node_t* node); 48 | extern int symtable_ref(symtable_t *st, node_t *n); 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /lang/include/bpflib.h: -------------------------------------------------------------------------------- 1 | #ifndef BPF_LIB_H 2 | #define BPF_LIB_H 3 | 4 | #include 5 | 6 | #include "buffer.h" 7 | #include "symtable.h" 8 | #include "ast.h" 9 | #include "ut.h" 10 | 11 | typedef struct ebpf_t{ 12 | char* name; 13 | ssize_t sp; 14 | symtable_t *st; 15 | evpipe_t *evp; 16 | struct bpf_insn *ip; 17 | struct bpf_insn prog[BPF_MAXINSNS]; 18 | } ebpf_t; 19 | 20 | extern ebpf_t *ebpf_new(); 21 | extern ssize_t ebpf_addr_get(node_t *n, ebpf_t *e); 22 | extern void ebpf_emit_mapld(ebpf_t *e, int reg, int fd); 23 | extern void ebpf_stack_zero(node_t *value, ebpf_t *code, int reg); 24 | extern void ebpf_emit(ebpf_t *code, struct bpf_insn insn); 25 | extern void ebpf_emit_at(struct bpf_insn *at, struct bpf_insn insn); 26 | extern void ebpf_value_copy(ebpf_t* code, ssize_t to, ssize_t from, size_t size); 27 | extern void ebpf_str_to_stack(ebpf_t *code, node_t *value); 28 | extern void ebpf_emit_map_look(ebpf_t* code, int fd, ssize_t kaddr); 29 | extern void ebpf_emit_map_update(ebpf_t* code, int fd, ssize_t kaddr, ssize_t vaddr); 30 | extern void ebpf_emit_count(ebpf_t* code, ssize_t addr); 31 | extern void ebpf_emit_bool(ebpf_t* code, int op, int r0, int r2); 32 | extern void ebpf_emit_read(ebpf_t* code, ssize_t to, int from, size_t size); 33 | extern void ebpf_emit_read_str(ebpf_t* code, ssize_t to, int from, size_t size); 34 | #endif -------------------------------------------------------------------------------- /lang/include/ir.h: -------------------------------------------------------------------------------- 1 | #ifndef IR_H 2 | #define IR_H 3 | 4 | #include "lexer.h" 5 | #include "ast.h" 6 | #include "parser.h" 7 | #include "annot.h" 8 | 9 | enum{ 10 | IR_ADD = 1, 11 | IR_SUB, 12 | IR_MUL, 13 | IR_DIV, 14 | IR_GT, 15 | IR_GE, 16 | IR_IMM, 17 | IR_STR, 18 | IR_MOV, 19 | IR_RETURN, 20 | IR_CALL, 21 | IR_RCALL, 22 | IR_EQ, 23 | IR_NE, 24 | IR_LE, 25 | IR_LT, 26 | IR_AND, 27 | IR_OR, 28 | IR_XOR, 29 | IR_SHL, 30 | IR_SHR, 31 | IR_MOD, 32 | IR_JMP, 33 | IR_BR, 34 | IR_IF_THEN, 35 | IR_IF_END, 36 | IR_ELSE_THEN, 37 | IR_ELSE_END, 38 | IR_MAP_UPDATE, 39 | IR_MAP_LOOK, 40 | IR_MAP_METHOD, 41 | IR_REC, 42 | IR_INIT, 43 | IR_LOAD, 44 | IR_ARG, 45 | IR_COPY, 46 | IR_READ, 47 | IR_CAST, 48 | IR_PUSH, 49 | IR_STW, 50 | IR_LOAD_SPILL, 51 | IR_STORE, 52 | IR_STORE_ARG, 53 | IR_STORE_SPILL, 54 | IR_NOP, 55 | }; 56 | 57 | typedef struct reg_t { 58 | int vn; 59 | int rn; 60 | int def; 61 | int end; 62 | bool spill; 63 | char *str; 64 | } reg_t; 65 | 66 | typedef struct bb_t { 67 | int label; 68 | vec_t *ir; 69 | reg_t *parm; 70 | 71 | vec_t *succ; 72 | vec_t *pred; 73 | vec_t *def_regs; 74 | vec_t *in_regs; 75 | vec_t *out_regs; 76 | } bb_t; 77 | 78 | typedef struct ir_t { 79 | int op; 80 | reg_t *r0; 81 | reg_t *r1; 82 | reg_t *r2; 83 | 84 | int imm; 85 | int label; 86 | node_t *value; 87 | bb_t *bb1; 88 | bb_t *bb2; 89 | 90 | ssize_t addr; 91 | size_t size; 92 | 93 | char *name; 94 | int nargs; 95 | reg_t *args[5]; 96 | 97 | vec_t *kill; 98 | reg_t *bbarg; 99 | } ir_t; 100 | 101 | typedef struct prog_t { 102 | char *name; 103 | node_t *ast; 104 | vec_t *data; 105 | vec_t *bbs; 106 | bool is_end; 107 | ebpf_t *ctx; 108 | } prog_t; 109 | 110 | extern reg_t *gen_expr(node_t *n); 111 | extern void gen_stmt(node_t *n); 112 | extern void dyn_assign(node_t *dst, node_t *src); 113 | extern int gen_ir(node_t *n); 114 | extern prog_t *gen_prog(node_t *n); 115 | extern void compile(prog_t* prog); 116 | #endif -------------------------------------------------------------------------------- /lang/include/lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXER_H 2 | #define LEXER_H 3 | 4 | #include 5 | 6 | #define is_char(c) isalnum(c) || c == '_' 7 | 8 | #define TOKEN_TYPE_TABLE \ 9 | TYPE(TOKEN_ILLEGAL, "Illegal") \ 10 | TYPE(TOKEN_INT, "Int") \ 11 | TYPE(TOKEN_IDENT, "Ident") \ 12 | TYPE(TOKEN_STRING, "String") \ 13 | TYPE(TOKEN_PROBE, "Probe") \ 14 | TYPE(TOKEN_PROFI, "Kprobe") \ 15 | TYPE(TOKEN_BEGIN, "Begin") \ 16 | TYPE(TOKEN_END, "End") \ 17 | TYPE(TOKEN_SLASH, "Slash") \ 18 | TYPE(TOKEN_COLON, "Colon") \ 19 | TYPE(TOKEN_COMMA, "Comma") \ 20 | TYPE(LEFT_BRACKET, "Left Bracket") \ 21 | TYPE(RIGHT_BRACKET, "Right Bracket") \ 22 | TYPE(LEFT_BLOCK, "Left Block") \ 23 | TYPE(RIGHT_BLOCK, "Right Block") \ 24 | TYPE(TOKEN_UNDERLINE, "Underline") \ 25 | TYPE(LEFT_PAREN, "Left Paren") \ 26 | TYPE(RIGHT_PAREN, "Right Paren") \ 27 | TYPE(TOKEN_ASSIGN, "Assign") \ 28 | TYPE(TOKEN_EQ, "Equal") \ 29 | TYPE(TOKEN_SEMICOLON, "Semicolon") \ 30 | TYPE(TOKEN_IF, "If") \ 31 | TYPE(TOKEN_UNROLL, "Unroll") \ 32 | TYPE(TOKEN_DEC, "Dec") \ 33 | TYPE(TOKEN_PLUS, "Plus") \ 34 | TYPE(TOKEN_STAR, "Star") \ 35 | TYPE(TOKEN_SUB, "Sub") \ 36 | TYPE(TOKEN_GE, "Ge") \ 37 | TYPE(TOKEN_GT, "Gt") \ 38 | TYPE(TOKEN_LT, "Lt") \ 39 | TYPE(TOKEN_LE, "Le") \ 40 | TYPE(TOKEN_HASH, "Hash") \ 41 | TYPE(TOKEN_ACCESS, "Access") \ 42 | TYPE(TOKEN_PIPE, "Pipe") \ 43 | TYPE(END_OF_FILE, "End of File") 44 | 45 | #define TYPE(_type, _typestr) _type, 46 | typedef enum token_type { 47 | TOKEN_TYPE_TABLE 48 | } token_type; 49 | #undef TYPE 50 | 51 | typedef struct token_t { 52 | char *literal; 53 | token_type type; 54 | } token_t; 55 | 56 | typedef struct lexer_t { 57 | size_t read_pos; 58 | size_t pos; 59 | char ch; 60 | char *input; 61 | } lexer_t; 62 | 63 | char *read_ident(lexer_t *lexer); 64 | token_type get_type(char *string); 65 | lexer_t *lexer_init(char *string); 66 | token_t *lexer_next_token(lexer_t *lexer); 67 | 68 | void free_token(token_t *tok); 69 | void free_lexer(lexer_t *lex); 70 | 71 | const char *token_to_str(token_type type); 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /lang/dsl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "dsl.h" 6 | #include "ut.h" 7 | 8 | static int term_sig = 0; 9 | static void term(int sig) { 10 | term_sig = sig; 11 | return; 12 | } 13 | 14 | void attach(node_t* node, ebpf_t* ctx, int id) { 15 | switch (node->type) { 16 | case NODE_TEST: 17 | bpf_test_attach(ctx); 18 | break; 19 | case NODE_KPROBE: 20 | bpf_kprobe_attach(ctx, id); 21 | break; 22 | case NODE_PROBE: 23 | bpf_probe_attach(ctx, id); 24 | break; 25 | default: 26 | break; 27 | } 28 | } 29 | 30 | void print_map(symtable_t* st) { 31 | int i; 32 | for (i = 0; i < st->len; i++) { 33 | if (st->table[i].type == SYM_MAP) { 34 | map_dump(st->table[i].map->map); 35 | } 36 | } 37 | } 38 | 39 | void _free(node_t* node) { 40 | node_t* head; 41 | vec_t* vec = vec_new(); 42 | size_t i; 43 | 44 | _foreach(head, node) { 45 | vec_push(vec, head); 46 | } 47 | 48 | for (i = 0; i < vec->len; i++) { 49 | node_t* value = vec->data[i]; 50 | free_node(value); 51 | } 52 | free(vec->data); 53 | free(vec); 54 | } 55 | 56 | void run(node_t* node) { 57 | node_t* head; 58 | ebpf_t* code; 59 | prog_t* prog; 60 | symtable_t* st = symtable_new(); 61 | evpipe_t* evp = vcalloc(1, sizeof(*evp)); 62 | evpipe_init(evp, 4<<10); 63 | 64 | _foreach(head, node) { 65 | code = ebpf_new(); 66 | code->evp = evp; 67 | code->st = st; 68 | 69 | sema(head, code); 70 | prog = gen_prog(head); 71 | prog->ctx = code; 72 | compile(prog); 73 | 74 | attach(head, prog->ctx, head->probe.traceid); 75 | } 76 | 77 | siginterrupt(SIGINT, 1); 78 | signal(SIGINT, term); 79 | evpipe_loop(evp, &term_sig, -1); 80 | print_map(st); 81 | } 82 | 83 | int main(int argc, char **argv) { 84 | char* filename, *input; 85 | lexer_t* lexer; 86 | parser_t* parser; 87 | node_t* node; 88 | ebpf_t* code; 89 | prog_t* prog; 90 | int id; 91 | symtable_t* st; 92 | 93 | if (argc != 2) { 94 | verror("should have the two args"); 95 | return 0; 96 | } 97 | 98 | filename = argv[1]; 99 | input = read_file(filename); 100 | 101 | if (!input) { 102 | verror("can not read file"); 103 | } 104 | 105 | lexer = lexer_init(input); 106 | parser = parser_init(lexer); 107 | node = parse_program(parser); 108 | 109 | run(node); 110 | _free(node); 111 | return 0; 112 | } -------------------------------------------------------------------------------- /lang/include/probe.h: -------------------------------------------------------------------------------- 1 | #ifndef SYSCALL_H 2 | #define SYSCALL_H 3 | 4 | #include "annot.h" 5 | #include 6 | 7 | typedef struct profile { 8 | int* efds; 9 | int num; 10 | } profile_t; 11 | 12 | #define BTF_MAX_NR_TYPES 0x7fffffffU 13 | #define BTF_MAX_STR_OFFSET 0x7fffffffU 14 | 15 | static struct btf_type btf_void; 16 | 17 | typedef struct btf_t{ 18 | void* raw_data; 19 | void* raw_data_swapped; 20 | __u32 raw_size; 21 | bool swapped_endian; 22 | struct btf_header* hdr; 23 | void* types_data; 24 | size_t types_data_cap; 25 | __u32* type_offs; 26 | size_t type_offs_cap; 27 | __u32 nr_types; 28 | struct btf* base_btf; 29 | int start_id; 30 | int start_str_off; 31 | void* strs_data; 32 | bool strs_deduped; 33 | bool owns_base; 34 | int fd; 35 | int ptr_sz; 36 | } btf_t; 37 | 38 | static inline __u16 btf_vlen(const struct btf_type *t) { 39 | return BTF_INFO_VLEN(t->info); 40 | } 41 | 42 | static inline __u16 btf_kind(const struct btf_type *t) { 43 | return BTF_INFO_KIND(t->info); 44 | } 45 | 46 | static inline struct btf_enum* btf_enum(const struct btf_type* t) 47 | { 48 | return (struct btf_enum*)(t + 1); 49 | } 50 | 51 | static inline struct btf_member* btf_members(const struct btf_type* t) 52 | { 53 | return (struct btf_member*)(t + 1); 54 | } 55 | 56 | static inline struct btf_param* btf_params(const struct btf_type* t) { 57 | return (struct btf_param*)(t + 1); 58 | } 59 | 60 | static inline struct btf_var_secinfo* btf_var_secinfos(const struct btf_type* t) 61 | { 62 | return (struct btf_var_secinfo*)(t + 1); 63 | } 64 | 65 | 66 | static inline struct btf_array* btf_array(const struct btf_type* t) { 67 | return (struct btf_array*)(t + 1); 68 | } 69 | 70 | static inline struct btf_var *btf_var(const struct btf_type *t) 71 | { 72 | return (struct btf_var *)(t + 1); 73 | } 74 | 75 | extern long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); 76 | extern int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn* insns, int insn_cnt); 77 | extern int bpf_map_create(enum bpf_map_type type, int key_sz, int val_sz, int entries); 78 | extern int bpf_map_update(int fd, void* key, void* val, int flags); 79 | extern int bpf_map_lookup(int fd, void* key, void* val); 80 | extern int bpf_read_field(field_t* field); 81 | extern int bpf_test_attach(ebpf_t* e); 82 | extern int bpf_get_probe_id(char* name); 83 | extern int bpf_get_kprobe_id(char* name); 84 | extern int bpf_probe_attach(ebpf_t* e, int id); 85 | extern int bpf_kprobe_attach(ebpf_t* ctx, int id); 86 | extern btf_t* btf_load_vmlinux(); 87 | extern int btf_get_field_off(const char *struct_name, const char *field_name); 88 | #endif 89 | -------------------------------------------------------------------------------- /lang/include/insn.h: -------------------------------------------------------------------------------- 1 | #ifndef INSN_H 2 | #define INSN_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define _ALIGN sizeof(int64_t) 9 | #define _ALIGNED(_size) (((_size) + _ALIGN - 1) & ~(_ALIGN - 1)) 10 | 11 | #define BPF_CTX_REG BPF_REG_9 12 | 13 | #define INSN(_code, _dst, _src, _off, _imm) \ 14 | ((struct bpf_insn){ \ 15 | .code = _code, \ 16 | .dst_reg = _dst, \ 17 | .src_reg = _src, \ 18 | .off = _off, \ 19 | .imm = _imm}) 20 | 21 | typedef enum extract{ 22 | EXTRACT_OP_NONE, 23 | EXTRACT_OP_MASK, 24 | EXTRACT_OP_SHIFT, 25 | EXTRACT_OP_DIV_1G, 26 | } extract_op_t; 27 | 28 | typedef enum op_t{ 29 | OP_ILLEGAL, 30 | OP_ADD, 31 | OP_SUB, 32 | OP_MUL, 33 | OP_DIV, 34 | OP_OR, 35 | OP_AND, 36 | OP_LSH, 37 | OP_RSH, 38 | OP_NEG, 39 | OP_MOD, 40 | OP_XOR, 41 | OP_MOV, 42 | OP_EQ, 43 | OP_GT, 44 | OP_GE, 45 | OP_LT, 46 | OP_LE, 47 | OP_NE, 48 | OP_JSGT, 49 | OP_JA, 50 | OP_PIPE, 51 | OP_ACCESS, 52 | } op_t; 53 | 54 | 55 | #define MOV(_dst, _src) INSN(BPF_ALU64 | BPF_MOV | BPF_X, _dst, _src, 0, 0) 56 | #define MOV32_IMM(_dst, _imm) INSN(BPF_ALU | BPF_MOV | BPF_K, _dst, 0, 0, _imm) 57 | #define MOV_IMM(_dst, _imm) INSN(BPF_ALU64 | BPF_MOV | BPF_K, _dst, 0, 0, _imm) 58 | 59 | #define EXIT INSN(BPF_JMP | BPF_EXIT, 0, 0, 0, 0) 60 | #define CALL(_imm) INSN(BPF_JMP | BPF_CALL, 0, 0, 0, _imm) 61 | 62 | #define JMP(_op, _dst, _src, _off) INSN(BPF_JMP | BPF_OP((_op)) | BPF_X, _dst, _src, _off, 0) 63 | #define JMP_IMM(_op, _dst, _imm, _off) INSN(BPF_JMP | BPF_OP((_op)) | BPF_K, _dst, 0, _off, _imm) 64 | 65 | #define ALU(_op, _dst, _src) INSN(BPF_ALU64 | BPF_OP((_op)) | BPF_X, _dst, _src, 0, 0) 66 | #define ALU_IMM(_op, _dst, _imm) INSN(BPF_ALU64 | BPF_OP((_op)) | BPF_K, _dst, 0, 0, _imm) 67 | 68 | #define STW_IMM(_dst, _off, _imm) INSN(BPF_ST | BPF_SIZE(BPF_W) | BPF_MEM, _dst, 0, _off, _imm) 69 | #define STXDW(_dst, _off, _src) INSN(BPF_STX | BPF_SIZE(BPF_DW) | BPF_MEM, _dst, _src, _off, 0) 70 | #define STXB(_dst, _off, _src) INSN(BPF_STX | BPF_SIZE(BPF_B) | BPF_MEM, _dst, _src, _off, 0) 71 | #define STXH(_dst, _off, _src) INSN(BPF_STX | BPF_SIZE(BPF_H) | BPF_MEM, _dst, _src, _off, 0) 72 | #define STXW(_dst, _off, _src) INSN(BPF_STX | BPF_SIZE(BPF_W) | BPF_MEM, _dst, _src, _off, 0) 73 | 74 | #define LDXDW(_dst, _off, _src) INSN(BPF_LDX | BPF_SIZE(BPF_DW) | BPF_MEM, _dst, _src, _off, 0) 75 | #define LDXB(_dst, _off, _src) INSN(BPF_LDX | BPF_SIZE(BPF_B) | BPF_MEM, _dst, _src, _off, 0) 76 | #define LDXH(_dst, _off, _src) INSN(BPF_LDX | BPF_SIZE(BPF_H) | BPF_MEM, _dst, _src, _off, 0) 77 | #define LDXW(_dst, _off, _src) INSN(BPF_LDX | BPF_SIZE(BPF_W) | BPF_MEM, _dst, _src, _off, 0) 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /lang/include/ut.h: -------------------------------------------------------------------------------- 1 | #ifndef UT_H 2 | #define UT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define MAX_ERRNO 4095 11 | #define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO) 12 | #define _size(arr) (sizeof(arr) / sizeof((arr)[0])) 13 | #define _foreach(_n, _in) for ((_n) = (_in); (_n); (_n) = (_n)->next) 14 | #define __printf(a, b) __attribute__((format(printf, a, b))) 15 | 16 | #define _d(_fmt, ...)\ 17 | fprintf(stderr, "DBG %-20s: " _fmt "\n", __func__, ##__VA_ARGS__); 18 | 19 | #define _e(_fmt, ...) \ 20 | fprintf(stderr, "ERR %-20s: " _fmt "\n", __func__, ##__VA_ARGS__); 21 | 22 | 23 | #define __pr(_level, _fmt, ...)\ 24 | do{\ 25 | ut_print(_level, "voyant: " _fmt, ##__VA_ARGS__);\ 26 | }while(0) 27 | 28 | #define _pr_warn(_fmt, ...) __pr(PRINT_WARN, _fmt, ##__VA_ARGS__) 29 | #define _pr_info(_fmt, ...) __pr(PRINT_INFO, _fmt, ##__VA_ARGS__) 30 | #define _pr_debug(_fmt, ...) __pr(PRINT_DEBUG, _fmt, ##__VA_ARGS__) 31 | 32 | 33 | static inline bool IS_ERR_OR_NULL(const void* ptr) { 34 | return (!ptr) || IS_ERR_VALUE((unsigned long)ptr); 35 | } 36 | 37 | static inline void* ERR_PTR(long error_) { 38 | return (void*) error_; 39 | } 40 | 41 | static inline long PTR_ERR(const void* ptr) { 42 | return (long) ptr; 43 | } 44 | 45 | static inline long IS_ERR(const void* ptr) { 46 | return IS_ERR_VALUE((unsigned long) ptr); 47 | } 48 | 49 | static inline void* ut_err(void* ret) { 50 | if (IS_ERR(ret)) { 51 | errno = -PTR_ERR(ret); 52 | } 53 | 54 | return IS_ERR(ret) ? NULL : ret; 55 | } 56 | 57 | static inline void* ut_ptr(void* ret) { 58 | if (IS_ERR(ret)) 59 | errno = -PTR_ERR(ret); 60 | 61 | return IS_ERR(ret) ? NULL : ret; 62 | } 63 | 64 | static inline int libbpf_err(int ret) { 65 | if (ret < 0) 66 | errno = -ret; 67 | 68 | return ret; 69 | } 70 | 71 | static inline void *ut_reallocarray(void *ptr, size_t nmemb, size_t size) { 72 | size_t total; 73 | 74 | total = nmemb * size; 75 | return realloc(ptr, total); 76 | } 77 | 78 | enum print_level { 79 | PRINT_WARN, 80 | PRINT_INFO, 81 | PRINT_DEBUG, 82 | }; 83 | 84 | typedef int(*ut_print_fn_t)(enum print_level level, const char*, va_list ap); 85 | 86 | typedef struct vec_t { 87 | int len; 88 | int cap; 89 | void **data; 90 | } vec_t; 91 | 92 | extern noreturn void verror(char *fmt, ...); 93 | extern long get_error(const void* ptr); 94 | extern vec_t *vec_new(); 95 | extern void vec_push(vec_t *vec, void *data); 96 | extern bool vec_contains(vec_t *vec, void *elem); 97 | extern bool vec_union(vec_t *vec, void *elem); 98 | extern void *vmalloc(size_t len); 99 | extern void *vcalloc(size_t len1, size_t len2); 100 | extern void *vrealloc(void *p, size_t size); 101 | extern bool vstreq(char *s1, char *s2); 102 | extern char *vstr(char *str); 103 | extern char *str_escape(char *str); 104 | extern FILE *fopenf(const char *mode, const char *fmt, ...); 105 | extern char* read_file(char* name); 106 | extern void output_hist(FILE* fp, int log2, int64_t count, int64_t max); 107 | extern void *ut_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, 108 | size_t cur_cnt, size_t max_cnt, size_t add_cnt); 109 | #endif -------------------------------------------------------------------------------- /lang/ast.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "ut.h" 5 | #include "ast.h" 6 | 7 | node_t *node_new(node_type t) { 8 | node_t *n = vmalloc(sizeof(*n)); 9 | 10 | n->type = t; 11 | return n; 12 | } 13 | 14 | node_t *node_var_new(char *name) { 15 | node_t *n = node_new(NODE_VAR); 16 | 17 | n->name = name; 18 | return n; 19 | } 20 | 21 | node_t *node_str_new(char *str) { 22 | node_t *n = node_new(NODE_STR); 23 | n->name = str; 24 | return n; 25 | } 26 | 27 | node_t *node_int_new(size_t integer) { 28 | node_t *n = node_new(NODE_INT); 29 | n->integer = integer; 30 | return n; 31 | } 32 | 33 | node_t *node_expr_new(int opcode, node_t *left, node_t *right) { 34 | node_t *n = node_new(NODE_EXPR); 35 | 36 | n->expr.opcode = opcode; 37 | n->expr.left = left; 38 | n->expr.right = right; 39 | 40 | return n; 41 | } 42 | 43 | node_t *node_assign_new(node_t *left, node_t *expr) { 44 | node_t *n = node_new(NODE_ASSIGN); 45 | 46 | n->assign.op = OP_MOV; 47 | n->assign.lval = left; 48 | n->assign.expr = expr; 49 | 50 | return n; 51 | } 52 | 53 | node_t *node_rec_new(node_t *args) { 54 | node_t *n = node_new(NODE_REC); 55 | n->rec.args = args; 56 | return n; 57 | } 58 | 59 | node_t *node_if_new(node_t *cond, node_t *then, node_t *els) { 60 | node_t *c, *n = node_new(NODE_IF); 61 | 62 | n->iff.cond = cond; 63 | n->iff.then = then; 64 | n->iff.els = els; 65 | 66 | return n; 67 | } 68 | 69 | node_t *node_unroll_new(size_t count, node_t *stmts) { 70 | node_t *n = node_new(NODE_UNROLL); 71 | 72 | n->unroll.count = count; 73 | n->unroll.stmts = stmts; 74 | 75 | return n; 76 | } 77 | 78 | node_t* node_cast_new(char* name, char* value) { 79 | node_t* n = node_new(NODE_CAST); 80 | n->cast.name = name; 81 | n->cast.value = value; 82 | return n; 83 | } 84 | 85 | node_t *node_dec_new(node_t *var, node_t *expr) { 86 | node_t *n = node_new(NODE_DEC); 87 | 88 | n->dec.var = var; 89 | n->dec.expr = expr; 90 | 91 | return n; 92 | } 93 | 94 | node_t *node_probe_new(char *name, node_t *stmts) { 95 | node_t *n = node_new(NODE_PROBE); 96 | 97 | n->probe.name = name; 98 | n->probe.stmts = stmts; 99 | 100 | return n; 101 | } 102 | 103 | node_t *node_kprobe_new(char *name, node_t *stmts) { 104 | node_t *n = node_new(NODE_KPROBE); 105 | 106 | n->probe.name = name; 107 | n->probe.stmts = stmts; 108 | 109 | return n; 110 | } 111 | 112 | node_t* node_test_new(char* name, node_t* stmts) { 113 | node_t* n = node_new(NODE_TEST); 114 | 115 | n->probe.name = name; 116 | n->probe.stmts = stmts; 117 | 118 | return n; 119 | } 120 | 121 | static int do_list(node_t *head) { 122 | node_t *elem, *next = head; 123 | 124 | for (elem = next; elem;) { 125 | next = elem->next; 126 | free_node(elem); 127 | elem = next; 128 | } 129 | 130 | return 0; 131 | } 132 | 133 | 134 | void free_node(node_t *node) { 135 | switch (node->type) { 136 | case NODE_PROBE: 137 | case NODE_KPROBE: 138 | free(node->probe.name); 139 | do_list(node->probe.stmts); 140 | break; 141 | case NODE_CALL: 142 | free(node->name); 143 | if (node->call.args) 144 | do_list(node->call.args); 145 | break; 146 | case NODE_DEC: 147 | free(node->dec.var); 148 | free(node->dec.expr); 149 | break; 150 | case NODE_REC: 151 | do_list(node->rec.args); 152 | break; 153 | case NODE_STR: 154 | free(node->name); 155 | break; 156 | default: 157 | break; 158 | } 159 | } -------------------------------------------------------------------------------- /tools/doc-zh.md: -------------------------------------------------------------------------------- 1 | ## cpu篇章 2 | 3 | 4 | ## 内存篇 5 | 6 | ### mmap 7 | 8 | ```c 9 | void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); 10 | ``` 11 | - 参数start:指向欲映射的内存起始地址,通常设为 NULL,代表让系统自动选定地址,映射成功后返回该地址。 12 | - 参数length:代表将文件中多大的部分映射到内存。 13 | - 参数prot: 映射区域的保护方式。可以为以下几种方式的组合。 14 | - 参数flags:影响映射区域的各种特性。在调用mmap()时必须要指定MAP_SHARED 或MAP_PRIVATE。 15 | - 参数fd:要映射到内存中的文件描述符. 16 | - 参数offset:文件映射的偏移量,通常设置为0,代表从文件最前方开始对应,offset必须是分页大小的整数倍。 17 | 18 | ```c 19 | #syscalls; 20 | 21 | BEGIN { 22 | out("%-18s %-16s %-6s\n", "PID", "COMM", "LEN"); 23 | } 24 | 25 | probe sys_enter_mmap { 26 | arg := args->len; 27 | out("%-18d %-16s %-6d\n", pid(), comm(), arg); 28 | } 29 | ``` 30 | 31 | 32 | ### sys_enter_brk 33 | 34 | 应用程序的数据存放于堆内存中, 堆内存通过 brk(2) 系统 调 用 进行 扩 展。 35 | 跟踪brk(2)调用,并且展示导致增长的用户态调用栈信息相对来说是很有用的分析信息。 36 | 同时还有一个sbrk(2)变体调用。在Linux 中,sbrk(2)是以库函数形式实现的,内部仍 然使用brk(2)系统调用。 37 | 38 | brk(2)这个系统调用可以使用 sys_enter_brk 这个函数来进行跟踪 39 | 40 | ```c 41 | #syscalls; 42 | 43 | BEGIN { 44 | out("%-18s %-16s\n", "PID", "COMM"); 45 | } 46 | 47 | probe sys_enter_brk { 48 | map[comm()] |> count(); 49 | out("%-18d %-16s\n", pid(), comm()); 50 | } 51 | ``` 52 | 53 | ### vmscan 54 | 55 | vmscan(8) 使用vmscan 跟踪点来观 察页换出守护进程(kswapd)的操作,该进程在系统内存压力上升时负责释放内存以便重用。 56 | 57 | ### page_fault_user 58 | 59 | 使用下面这个程序, 我们可以检测用户态的page fault 60 | 61 | ```c 62 | #exceptions; 63 | 64 | BEGIN { 65 | out("%-18s %-16s\n", "PID", "COMM"); 66 | } 67 | 68 | probe page_fault_user{ 69 | map[comm()] |> count(); 70 | out("%-18d %-16s\n", pid(), comm()); 71 | } 72 | ``` 73 | 74 | ## 文件系统 75 | 76 | ### ext4_da_write_begin 77 | 78 | ```c 79 | #ext4; 80 | 81 | BEGIN { 82 | out("%-18s %-16s %-6s\n", "PID", "COMM", "LEN"); 83 | } 84 | 85 | probe ext4_da_write_begin { 86 | arg := args->len; 87 | out("%-18d %-16s %-6d\n", pid(), comm(), arg); 88 | } 89 | ``` 90 | 91 | ### sys_enter_open 92 | 93 | ```c 94 | #syscall; 95 | 96 | probe sys_enter_open { 97 | arg := args->filename; 98 | 99 | out("%s\n", arg); 100 | } 101 | ``` 102 | 103 | ### sys_enter_sync 104 | 105 | 这个函数可以用来检测sync这个系统调用, sync(2)的作用是将修改过的数据写回磁盘 106 | ```c 107 | #syscall; 108 | 109 | probe sys_enter_sync { 110 | out("%s\n", comm()); 111 | } 112 | ``` 113 | 114 | ### sys_enter_read 115 | 116 | 文件描述符到文件名称的转化, 117 | 118 | - 通过tasks truct找到文件描述符表,同时利用FD作为索引值找到对应的file结构体。 文件名称可以从这个结构体中读取 。 scread(2) 用的就是这种方法。 不过这种方法并不十分稳定:找到文件描述符表的方式(task->files- >fdt->fd)利用了 内核中的一些内部实现 细节,每个内核版本都不一定一样,所以这会导致该脚本无法跨版本使用。 119 | 120 | - 通过跟踪open(2)系统调 构造一个以PID和FD为键的哈希表,值为对应的文件名和路径名 。 这样就可以在处理 read(2 )以及其他系统调用的时候进行查询了。虽然 这样增加了一个额外的探针( 带来 了额外 的性 能消耗),但是却比较稳定 121 | 122 | ```c 123 | #syscall; 124 | 125 | probe sys_enter_read{ 126 | arg := args->filename; 127 | out("%s %s", comm(), arg); 128 | } 129 | ``` 130 | 131 | ## 网络 132 | 133 | `net_dev_start_xmit` 是 Linux 内核网络子系统的一部分,它是一个内联函数,用于启动网络设备的数据包发送流程。 134 | 这个函数通过网络设备操作集(net_device_ops)指定的特定函数来启动给定数据包的发送。 135 | 136 | ```c 137 | #net; 138 | 139 | BEGIN { 140 | out("%s\n", "Tracing unusual skb dorp stacks. Hit Ctrl-C to end."); 141 | } 142 | 143 | probe net_dev_start_xmit{ 144 | map[comm()] |> coun(); 145 | } 146 | ``` 147 | 148 | ```c 149 | #skb; 150 | 151 | BEGIN { 152 | out("%s\n", "Tracing unusual skb dorp stacks. Hit Ctrl-C to end."); 153 | } 154 | 155 | probe consume_skb{ 156 | map[comm()] |> count(); 157 | } 158 | ``` 159 | 160 | ```c 161 | #skb; 162 | 163 | probe kfree_skb { 164 | skb[comm()] |> count(); 165 | } 166 | ``` 167 | 168 | 169 | 170 | ## 安全 171 | 172 | 使用下面这个系统调用来监控对应的容器 173 | ```c 174 | #syscalls; 175 | 176 | probe sys_enter_renameat2{ 177 | name := args->oldname; 178 | out("%s %s\n", comm(), name); 179 | } 180 | ``` -------------------------------------------------------------------------------- /lang/include/ast.h: -------------------------------------------------------------------------------- 1 | #ifndef AST_H 2 | #define AST_H 3 | 4 | #include 5 | #include 6 | #include "insn.h" 7 | 8 | typedef enum node_type{ 9 | NODE_SCRIPT, 10 | NODE_PROBE, 11 | NODE_KPROBE, 12 | NODE_TEST, 13 | NODE_PROBE_PRED, 14 | NODE_IF, 15 | NODE_UNROLL, 16 | NODE_PREFIX_EXPR, 17 | NODE_EXPR, 18 | NODE_LOGAND, 19 | NODE_LOGOR, 20 | NODE_DEC, 21 | NODE_VAR, 22 | NODE_MAP, 23 | NODE_REC, 24 | NODE_ASSIGN, 25 | NODE_CALL, 26 | NODE_STR, 27 | NODE_INT, 28 | NODE_CAST, 29 | } node_type; 30 | 31 | typedef struct node_t node_t; 32 | 33 | typedef enum { 34 | PRODE_KPROBE, 35 | PROBE_PROBE, 36 | } probe_type; 37 | 38 | typedef struct probe_t { 39 | char *name; 40 | int traceid; 41 | node_t* stmts; 42 | } probe_t; 43 | 44 | typedef struct call_t { 45 | node_t *args; 46 | } call_t; 47 | 48 | typedef struct infix_t { 49 | int opcode; 50 | node_t *left, *right; 51 | } infix_t; 52 | 53 | typedef struct prefix_t { 54 | int opcode; 55 | node_t *right; 56 | } prefix_t; 57 | 58 | typedef struct assign_t { 59 | op_t op; 60 | node_t *lval, *expr; 61 | } assign_t; 62 | 63 | typedef struct map_t { 64 | node_t *args; 65 | } map_t; 66 | 67 | typedef struct rec_t { 68 | node_t *args; 69 | } rec_t; 70 | 71 | typedef struct iff_t { 72 | node_t *cond; 73 | node_t *then; 74 | node_t *els; 75 | } iff_t; 76 | 77 | typedef struct unroll_t { 78 | size_t count; 79 | node_t *stmts; 80 | } unroll_t; 81 | 82 | typedef struct dec_t { 83 | node_t *var; 84 | node_t *expr; 85 | } dec_t; 86 | 87 | typedef enum loc_t { 88 | LOC_NOWHERE, 89 | LOC_REG, 90 | LOC_STACK, 91 | } loc_t; 92 | 93 | typedef enum type_t { 94 | TYPE_SCRIPT, 95 | TYPE_PROBE, 96 | TYPE_KPROBE, 97 | TYPE_TEST, 98 | TYPE_PROBE_PRED, 99 | TYPE_IF, 100 | TYPE_UNROLL, 101 | TYPE_PREFIX_EXPR, 102 | TYPE_EXPR, 103 | TYPE_LOGAND, 104 | TYPE_LOGOR, 105 | TYPE_DEC, 106 | TYPE_VAR, 107 | TYPE_MAP, 108 | TYPE_REC, 109 | TYPE_ASSIGN, 110 | TYPE_CALL, 111 | TYPE_STR, 112 | TYPE_CAST, 113 | TYPE_INT, 114 | TYPE_MAP_METHOD, 115 | TYPE_NULL, 116 | } type_t; 117 | 118 | typedef struct annot_t { 119 | type_t type; 120 | int mapid; 121 | int isarg; 122 | size_t ksize; 123 | ssize_t size; 124 | size_t offs; 125 | 126 | loc_t loc; 127 | ssize_t addr; 128 | } annot_t; 129 | 130 | typedef struct field_t{ 131 | char* name; 132 | char* field; 133 | type_t type; 134 | size_t offs; 135 | } field_t; 136 | 137 | typedef struct cast_t{ 138 | char* name, *value; 139 | } cast_t; 140 | 141 | struct node_t { 142 | char *name; 143 | node_type type; 144 | node_t *parent, *next; 145 | 146 | union{ 147 | probe_t probe; 148 | infix_t expr; 149 | prefix_t pexpr; 150 | dec_t dec; 151 | iff_t iff; 152 | unroll_t unroll; 153 | call_t call; 154 | map_t map; 155 | rec_t rec; 156 | cast_t cast; 157 | assign_t assign; 158 | size_t integer; 159 | }; 160 | 161 | annot_t annot; 162 | }; 163 | 164 | 165 | extern node_t *node_new(node_type t); 166 | extern node_t *node_probe_new(char *name, node_t *stmts); 167 | extern node_t *node_kprobe_new(char *name, node_t *stmts); 168 | extern node_t *node_test_new(char* name, node_t* stmts); 169 | extern node_t *node_var_new(char *name); 170 | extern node_t *node_int_new(size_t name); 171 | extern node_t *node_str_new(char *str); 172 | extern node_t *node_expr_new(int opcode, node_t *left, node_t *right); 173 | extern node_t *node_if_new(node_t *cond, node_t *then, node_t *els); 174 | extern node_t *node_unroll_new(size_t count, node_t *stmts); 175 | extern node_t *node_rec_new(node_t *args); 176 | extern node_t *node_assign_new(node_t *left, node_t *expr); 177 | extern node_t *node_dec_new(node_t *var, node_t *expr); 178 | extern node_t *node_cast_new(char* name, char* value); 179 | extern void free_node(node_t* node); 180 | 181 | #endif 182 | -------------------------------------------------------------------------------- /lang/symtable.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "symtable.h" 5 | #include "ut.h" 6 | 7 | static void sym_init(symtable_t *st) { 8 | sym_t *sym; 9 | 10 | sym = &st->table[st->len++]; 11 | sym->vannot.type = TYPE_INT; 12 | sym->vannot.size = 8; 13 | sym->name = "#"; 14 | } 15 | 16 | symtable_t *symtable_new() { 17 | symtable_t *st; 18 | 19 | st = vmalloc(sizeof(*st)); 20 | st->cap = 16; 21 | st->table = vcalloc(st->cap, sizeof(*st->table)); 22 | 23 | sym_init(st); 24 | 25 | return st; 26 | } 27 | 28 | symtable_t* symtable_create(symtable_t* out) { 29 | symtable_t* st; 30 | 31 | st = vmalloc(sizeof(*st)); 32 | st->cap = 16; 33 | st->table = vcalloc(st->cap, sizeof(*st->table)); 34 | st->out = out; 35 | 36 | sym_init(st); 37 | return st; 38 | } 39 | 40 | sym_t *symtable_get(symtable_t *st, const char *name) { 41 | size_t i; 42 | 43 | for (i = 0; i < st->len; i++) { 44 | if (!strcmp(st->table[i].name, name)) { 45 | return &st->table[i]; 46 | } 47 | } 48 | 49 | return NULL; 50 | } 51 | 52 | int sym_transfer(sym_t* sym, node_t *n) { 53 | if (n->type != NODE_VAR && n->type != NODE_MAP) { 54 | verror("invalid node type provided"); 55 | } 56 | 57 | if (n->type == NODE_MAP){ 58 | node_t* args; 59 | 60 | args = n->map.args; 61 | args->annot.type = sym->map->ktype; 62 | args->annot.addr = sym->map->kaddr; 63 | } 64 | 65 | n->annot = sym->vannot; 66 | return 0; 67 | } 68 | 69 | sym_t* symtable_add(symtable_t* st, char* name) { 70 | sym_t* sym; 71 | 72 | if (st->len == st->cap) { 73 | st->cap += 16; 74 | st->table = realloc(st->table, st->cap * sizeof(*st->table)); 75 | memset(&st->table[st->len], 0, 16 * sizeof(*st->table)); 76 | } 77 | 78 | sym = &st->table[st->len++]; 79 | sym->name = name; 80 | 81 | return sym; 82 | } 83 | 84 | void var_dec(symtable_t* st, node_t* var, node_t* expr) { 85 | char* name; 86 | sym_t* sym; 87 | 88 | name = var->name; 89 | 90 | sym = symtable_get(st, name); 91 | 92 | if (sym) { 93 | verror("Variable '%s' is already defined.", name); 94 | } 95 | 96 | sym = symtable_add(st, name); 97 | sym->type = SYM_VAR; 98 | sym->vannot = var->annot; 99 | 100 | if (expr->annot.type == TYPE_CAST) { 101 | sym->cast = expr->cast.name; 102 | sym->vannot.offs = expr->annot.offs; 103 | } 104 | 105 | sym->var = var; 106 | } 107 | 108 | 109 | smap_t* map_create(node_t* map) { 110 | ssize_t ksize, vsize; 111 | smap_t* smap; 112 | 113 | ksize = map->annot.ksize; 114 | vsize = map->annot.size; 115 | 116 | map->annot.mapid = bpf_map_create( 117 | BPF_MAP_TYPE_HASH, ksize, vsize, 1024); 118 | 119 | smap = calloc(1, sizeof(*smap)); 120 | 121 | smap->ksize = ksize; 122 | smap->vsize = vsize; 123 | smap->ktype = map->map.args->annot.type; 124 | smap->id = map->annot.mapid; 125 | smap->map = map; 126 | 127 | return smap; 128 | } 129 | 130 | void map_dec(symtable_t* st, node_t* map, node_t* expr) { 131 | sym_t* sym; 132 | char* name; 133 | smap_t* smap; 134 | 135 | name = map->name; 136 | sym = symtable_get(st, name); 137 | 138 | if (sym) { 139 | verror("map '%s' is already defined.", name); 140 | } 141 | 142 | smap = map_create(map); 143 | 144 | sym = symtable_add(st, name); 145 | sym->type = SYM_MAP; 146 | sym->vannot = map->annot; 147 | sym->map = smap; 148 | } 149 | 150 | int sym_ref(symtable_t* st, node_t* var) { 151 | sym_t* sym; 152 | 153 | sym = symtable_get(st, var->name); 154 | 155 | if (sym) { 156 | sym_transfer(sym, var); 157 | return 0; 158 | } 159 | 160 | return -1; 161 | } 162 | 163 | int symtable_ref(symtable_t* st, node_t* n) { 164 | switch (n->type) { 165 | case NODE_VAR: 166 | return sym_ref(st, n); 167 | case NODE_MAP: 168 | return sym_ref(st, n); 169 | default: 170 | break; 171 | } 172 | } -------------------------------------------------------------------------------- /lang/bpflib.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "bpflib.h" 4 | 5 | ebpf_t *ebpf_new() { 6 | ebpf_t *code = vcalloc(1, sizeof(*code)); 7 | code->ip = code->prog; 8 | return code; 9 | } 10 | 11 | void ebpf_emit(ebpf_t *code, struct bpf_insn insn) { 12 | assert(code != NULL); 13 | *(code->ip)++ = insn; 14 | } 15 | 16 | void ebpf_emit_at(struct bpf_insn *at, struct bpf_insn insn) { 17 | assert(at != NULL); 18 | *at = insn; 19 | } 20 | 21 | void ebpf_emit_mapld(ebpf_t *e, int reg, int fd) { 22 | ebpf_emit(e, INSN(BPF_LD | BPF_DW | BPF_IMM, reg, BPF_PSEUDO_MAP_FD, 0, fd)); 23 | ebpf_emit(e, INSN(0, 0, 0, 0, 0)); 24 | } 25 | 26 | ssize_t ebpf_addr_get(node_t *value, ebpf_t *code) { 27 | code->sp -= value->annot.size; 28 | return code->sp; 29 | } 30 | 31 | void ebpf_stack_zero(node_t *value, ebpf_t *code, int reg) { 32 | size_t i; 33 | annot_t to; 34 | size_t size; 35 | 36 | to = value->annot; 37 | size = to.size; 38 | 39 | ebpf_emit(code, MOV_IMM(reg, 0)); 40 | 41 | for (i = 0; i < size; i += sizeof(int64_t)) { 42 | ebpf_emit(code, STXDW(BPF_REG_10, to.addr + i, reg)); 43 | } 44 | } 45 | 46 | void ebpf_str_to_stack(ebpf_t *code, node_t *value) { 47 | ssize_t size, at, left; 48 | void *obj; 49 | uint32_t *str; 50 | 51 | at = value->annot.addr; 52 | size = value->annot.size; 53 | obj = value->name; 54 | str = obj; 55 | left = size / sizeof(*str); 56 | 57 | for (; left; left--, str++, at += sizeof(*str)) { 58 | ebpf_emit(code, STW_IMM(BPF_REG_10, at, *str)); 59 | } 60 | } 61 | 62 | void ebpf_value_copy(ebpf_t *code, ssize_t to, ssize_t from, size_t size) { 63 | while (size >= 8) { 64 | ebpf_emit(code, LDXDW(BPF_REG_0, from, BPF_REG_10)); 65 | ebpf_emit(code, STXDW(BPF_REG_10, to, BPF_REG_0)); 66 | 67 | to += 8; 68 | from += 8; 69 | size -= 8; 70 | } 71 | 72 | if (size >= 4) { 73 | ebpf_emit(code, LDXW(BPF_REG_0, from, BPF_REG_10)); 74 | ebpf_emit(code, STXW(BPF_REG_10, to, BPF_REG_0)); 75 | to += 4; 76 | from += 4; 77 | size -= 4; 78 | } 79 | 80 | if (size >= 2) { 81 | ebpf_emit(code, LDXH(BPF_REG_0, from, BPF_REG_10)); 82 | ebpf_emit(code, STXH(BPF_REG_10, to, BPF_REG_0)); 83 | to += 2; 84 | from += 2; 85 | size -= 2; 86 | } 87 | 88 | if (size) { 89 | ebpf_emit(code, LDXB(BPF_REG_0, from, BPF_REG_10)); 90 | ebpf_emit(code, STXB(BPF_REG_10, to, BPF_REG_0)); 91 | } 92 | } 93 | 94 | void ebpf_emit_map_look(ebpf_t *code, int fd, ssize_t kaddr) { 95 | ebpf_emit_mapld(code, BPF_REG_1, fd); 96 | ebpf_emit(code, MOV(BPF_REG_2, BPF_REG_10)); 97 | ebpf_emit(code, ALU_IMM(BPF_ADD, BPF_REG_2, kaddr)); 98 | ebpf_emit(code, CALL(BPF_FUNC_map_lookup_elem)); 99 | } 100 | 101 | void ebpf_emit_map_update(ebpf_t *code, int fd, ssize_t kaddr, ssize_t vaddr) { 102 | ebpf_emit_mapld(code, BPF_REG_1, fd); 103 | 104 | ebpf_emit(code, MOV(BPF_REG_2, BPF_REG_10)); 105 | ebpf_emit(code, ALU_IMM(OP_ADD, BPF_REG_2, kaddr)); 106 | 107 | ebpf_emit(code, MOV(BPF_REG_3, BPF_REG_10)); 108 | ebpf_emit(code, ALU_IMM(OP_ADD, BPF_REG_3, vaddr)); 109 | 110 | ebpf_emit(code, MOV_IMM(BPF_REG_4, 0)); 111 | ebpf_emit(code, CALL(BPF_FUNC_map_update_elem)); 112 | } 113 | 114 | void ebpf_emit_count(ebpf_t *code, ssize_t addr) { 115 | ebpf_emit(code, LDXB(BPF_REG_0, addr, BPF_REG_10)); 116 | ebpf_emit(code, ALU_IMM(BPF_ADD, BPF_REG_0, 1)); 117 | ebpf_emit(code, STXDW(BPF_REG_10, addr, BPF_REG_0)); 118 | } 119 | 120 | void ebpf_emit_bool(ebpf_t *code, int op, int r0, int r2) { 121 | int gregs[3] = {BPF_REG_6, BPF_REG_7, BPF_REG_8}; 122 | 123 | ebpf_emit(code, JMP(op, gregs[r0], gregs[r2], 2)); 124 | ebpf_emit(code, MOV_IMM(gregs[r0], 0)); 125 | ebpf_emit(code, JMP_IMM(BPF_JA, 0, 0, 1)); 126 | ebpf_emit(code, MOV_IMM(gregs[r0], 1)); 127 | } 128 | 129 | void ebpf_emit_read(ebpf_t *code, ssize_t to, int from, size_t size) { 130 | ebpf_emit(code, MOV(BPF_REG_1, BPF_REG_10)); 131 | ebpf_emit(code, ALU_IMM(BPF_ADD, BPF_REG_1, to)); 132 | ebpf_emit(code, MOV_IMM(BPF_REG_2, size)); 133 | ebpf_emit(code, MOV(BPF_REG_3, from)); 134 | ebpf_emit(code, CALL(BPF_FUNC_probe_read)); 135 | } 136 | 137 | void ebpf_emit_read_str(ebpf_t *code, ssize_t to, int from, size_t size) { 138 | ebpf_emit(code, MOV(BPF_REG_1, BPF_REG_10)); 139 | ebpf_emit(code, ALU_IMM(BPF_ADD, BPF_REG_1, to)); 140 | ebpf_emit(code, MOV_IMM(BPF_REG_2, size)); 141 | ebpf_emit(code, MOV(BPF_REG_3, from)); 142 | ebpf_emit(code, CALL(BPF_FUNC_probe_read_str)); 143 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # voyant 2 | 3 | **voyant is domain specific language based on the eBPF instruction set (insn) and system calls;** 4 | 5 | ulike other eBPF tools, it is designed to be lightweight and easy extendable,There are three aspectes that can account for my option; 6 | 7 | 1. **Ligtht tool** First of all, no LLVM, indeed LLVM is an exceptional tool for building complier backends, but LLVM is counted in ten millions, the light weight is one of my goals and the rules out LLVM. 8 | 9 | 2. **Easy**: Due to its lightweight design, our DSL is easier to install and can perform effectively even in resource-constrained environments. 10 | 11 | 12 | 3. **Clear:** The third, our dsl will offer the similarly level of expressivity as general-purpose programming language, also extending the semantics in certain aspectes; 13 | 14 | 15 | ## install 16 | 17 | ```c 18 | git clone 19 | cd lang 20 | make 21 | sudo ./voyant main.vy 22 | ``` 23 | 24 | ## syntax 25 | 26 | 27 | ## attach target 28 | 29 | Currently, our DSL supports two types of mounting targets: one is kernel functions, and the other is tracepoints. I recommend using tracepoints whenever possible, as they are more stable. 30 | 31 | 32 | ### tracepoint 33 | 34 | ```c 35 | #[syscalls]; //<-----event name 36 | 37 | probe [tracepoint] { 38 | .... 39 | } 40 | ``` 41 | 42 | ### kprobe 43 | 44 | ```c 45 | #kprobe; //<-----kprobe string 46 | 47 | probe [kprobe] { 48 | ... 49 | } 50 | ``` 51 | 52 | ## Hello, world 53 | 54 | The out function is similar to the printf function in C. It is typically used to send data from the runtime of our program back to user space. 55 | 56 | ```c 57 | #syscalls; 58 | 59 | probe sys_enter_execve { 60 | out("%s", "Hello, World!"); 61 | } 62 | ``` 63 | 64 | ### variable 65 | 66 | The `:=` symbol is used to declare a variable in the current scope. currently, we do not support reassigning values to variables. 67 | 68 | ```c 69 | #syscalls; 70 | 71 | probe sys_enter_execve { 72 | a := 1; 73 | out("%d\n", a); 74 | } 75 | ``` 76 | 77 | ### operator 78 | 79 | ```c 80 | #syscalls; 81 | 82 | probe sys_enter_execve{ 83 | a := 4 * 2; 84 | b := 4 + 2; 85 | c := 4 - 2; 86 | d := 4 / 2; 87 | 88 | out("a:%d b:%d c:%d d:%d\n", a, b, c, d); 89 | } 90 | ``` 91 | 92 | 93 | ### helper function 94 | 95 | Our DSL supports several common helper functions, which are essentially the same as those found in eBPF. 96 | 97 | ```c 98 | #syscalls; 99 | 100 | probe sys_enter_open { 101 | out("%-18d %-16s %-6d\n", pid(), comm(), cpu()); 102 | } 103 | ``` 104 | 105 | ### bpf map 106 | 107 | 1. Map: Using the `map[comm()]` statement, we can create a map where the keys are generated by the comm() function. 108 | 109 | ```c 110 | #syscalls; 111 | 112 | probe sys_enter_open{ 113 | map[comm()] := pid(); 114 | } 115 | ``` 116 | 117 | 118 | 2. Method Call Operator: The |> operator is a special operator that indicates method call semantics. in this case, map's value init zero 119 | ```c 120 | #syscalls; 121 | 122 | probe sys_enter_execve { 123 | enter[comm()] |> count(); 124 | } 125 | 126 | probe sys_exit_execve { 127 | exit[comm()] |> count(); 128 | } 129 | ``` 130 | 131 | 3. map in muti probes 132 | 133 | ```c 134 | #syscalls; 135 | 136 | probe sys_enter_open { 137 | enter[pid()] := args->filename; 138 | } 139 | 140 | probe sys_exit_open { 141 | ret := args->ret; 142 | out("%-18d %-16s %-6d %s\n", pid(), comm(), ret, enter[pid()]); 143 | } 144 | ``` 145 | ### BEGIN 146 | 147 | Begin is a special probe used to perform tasks before program compilation, such as outputting some prompt messages. 148 | 149 | ```c 150 | #syscalls; 151 | 152 | BEGIN { 153 | out("%-18s %-16s %-6s\n", "PID", "COMM", "FILE"); 154 | } 155 | 156 | probe sys_enter_open { 157 | out("%-18d %-16s %-6d\n", pid(), comm(), cpu()); 158 | } 159 | ``` 160 | 161 | ### probe function args 162 | 163 | In our DSL, we can get trace point parameter information using `args->filename`, and the compiler will automatically infer the corresponding parameter type. For example, `args->filename` is of type string." 164 | 165 | ```c 166 | #syscalls; 167 | 168 | probe sys_enter_open{ 169 | arg := arg->filename; 170 | out("%-18d %-16s %-6s\n", pid(), comm(), arg); 171 | } 172 | ``` 173 | 174 | ### if stmts 175 | 176 | Our DSL also supports simple if statements, and it will later support corresponding boolean expressions. 177 | 178 | ```c 179 | #syscalls; 180 | 181 | probe sys_enter_execve { 182 | if (cpu() >= 0) { 183 | out("on cpu %d", cpu()); 184 | } 185 | } 186 | ``` 187 | 188 | ### struct fileds 189 | 190 | ```c 191 | #kprobe; 192 | 193 | probe dev_queue_xmit { 194 | sk := (sk_buff*) arg0; 195 | out("%d\n", sk->len); 196 | } 197 | ``` -------------------------------------------------------------------------------- /README-zh.md: -------------------------------------------------------------------------------- 1 | # voyant 2 | 3 | **voyant 是一个基于 bpf 指令集的动态追踪领域特定语言**, 4 | 5 | **它相比于其它的动态追踪编程语言具有下面这些优势**: 6 | 7 | 1. **轻量+高性能**: 8 | - 编译器和解析器均手工打造,未借助如yacc或llvm等外部编译器工具。 9 | - 这种自主开发的方法带来了显著的优势, 那就是在一些资源受限的环境中可以得到很好的发挥, 10 | - 文件 `voyant` 的大小是文件 `bpftrace` 的 0.0295560441 倍。 11 | 12 | 2. **语义一致性**: voyant 旨在与通用编程语言保持高度一致性,这样做大大提高了易用性。用户将发现,使用我们的DSL就像使用熟悉的编程语言一样自然和直观。 13 | 14 | 3. **内核兼容性**: 我们在设计时特别注意与内核的兼容性。尽管BPF最新特性颇具吸引力,但是这些新的版本并不是被一些旧的内核支持。我们的目的是确保DSL在广泛的内核环境中都能稳定运行,从而满足大多数用户的需求。 15 | 16 | ## 使用 17 | 18 | ### 从源码构建 19 | 20 | ```shell 21 | git clone xxx 22 | cd lang 23 | make 24 | ./voyant main.y 25 | ``` 26 | 27 | ### 前置检查 28 | ```shell 29 | #检查时候有vmlinux 30 | ls -la /sys/kernel/btf/vmlinux 31 | ``` 32 | 33 | ### tracepoint 34 | 35 | 目前我们程序只支持挂载到内核的跟踪程序上, 这是因为选择跟踪点挂载程序更加的稳定 36 | 37 | 在编写我们的 eBPF 程序之前, 我们可以通过下面这几种方式, 来查看跟踪点类型和跟踪点函数参数的类型 38 | ```shell 39 | sudo cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_mmap/format 40 | sudo cat /sys/kernel/debug/tracing/events/syscalls/sys_exit_execve/format 41 | ``` 42 | 43 | 首先我们需要在程序中表示出当前跟踪点的类型, syscall表示当前的跟踪点是属于系统调用这个跟踪点下面的 44 | ```cpp 45 | #syscall; 46 | ``` 47 | 48 | 然后选择对应的跟踪点: 49 | ```cpp 50 | probe sys_enter_execve{ 51 | 52 | } 53 | ``` 54 | 55 | ### kprobe 56 | 57 | kprobe是Linux内核提供的另外一种动态跟踪工具,kprobe的好处就是多+动态, 在我们的编程语言中如果你想使用kprobe的话, 首先需呀 58 | ```c 59 | #kprobe; 60 | ``` 61 | 62 | 然后选择对应的kprobe 63 | ```c 64 | probe do_mmap{ 65 | 66 | } 67 | ``` 68 | 69 | 70 | ### Hello, World 71 | 72 | ```c 73 | #syscalls; 74 | 75 | probe sys_enter_execve { 76 | out("%s\n", "hello, world"); 77 | } 78 | 79 | probe sys_exit_execve{ 80 | out("%s\n", "bye"); 81 | } 82 | ``` 83 | - probe 是一个关键字,其后通常跟随一个指定的跟踪点变量。编译器能够自动推断出该跟踪点的标识符(ID),并将随后的 {} 代码块作为跟踪点的挂载程序。这种设计使得跟踪点的设置和程序的挂载变得简洁而直观。 84 | - 用户态输出函数: out 是一个专为用户态设计的输出信息函数,其语法与 printf 类似,但目前我们仅支持字符串(%s)和整数(%d)两种格式化输出。值得注意的是,out 函数的实现基于 bpf map array 和 perf ring buffer 技术,这种底层实现确保了输出操作的高效性和稳定性。 85 | 86 | 87 | ### 变量 88 | 89 | ```c 90 | #syscalls; 91 | 92 | probe sys_enter_execve { 93 | a := 1; 94 | out("%d\n", a); 95 | } 96 | ``` 97 | - `a := 0` 表示声明一个变量 98 | 99 | 100 | ### 辅助函数 101 | 102 | 在voyant 我们提供了一下辅助函数, 这些辅助函数的返回值有两种类型, 分别是 103 | - 整数类型,pid、cpu 104 | - 字符串类型, comm 105 | 106 | ```c 107 | #syscalls; 108 | 109 | probe sys_enter_execve { 110 | out("pid: %d, cpu: %d", pid(), cpu()); 111 | } 112 | ``` 113 | 114 | ### 跟踪点函数参数 115 | 116 | ```c 117 | #syscalls; 118 | 119 | probe sys_enter_mmap { 120 | len := args->len; 121 | fd := args->fd; 122 | 123 | out("%-14d %-12d\n", pid(), comm(), len, fd); 124 | } 125 | ``` 126 | 127 | 跟踪点参数有多种类型, 通常有两种类型: 128 | - 整数类型 129 | - 字符串类型 130 | - 复合类型, 这种类型, 会在后面支持 131 | 132 | 133 | ### BPF hash map 134 | 135 | ```c 136 | #syscalls; 137 | 138 | //示例1 139 | probe sys_enter_execve { 140 | map[comm()] |> count(); 141 | } 142 | ``` 143 | - **Map的作用域**: 不同于变量需要声明和做相应的寄存器分配,map的所有的数据都是存放在栈上面的 144 | 145 | - **Map 键值初始化**: 使用 `map[comm()]` 语句,我们可以创建一个 map,其中键由 `comm()` 函数生成,该函数通常返回当前进程的名称。如果 map 中的某个键尚未被赋值,其对应的值将默认初始化为 0。这种设计简化了对进程特定数据的跟踪和管理。 146 | 147 | - **方法调用操作符**: `|>` 是一个特殊的操作符,用于表示方法调用的语义。它的工作方式类似于 Java 中的 `1.add()`,即将数字 1 作为参数传递给 `add()` 方法。这种设计允许我们将操作符用于函数的链式调用,为实现更复杂的数据处理提供了灵活性。 148 | 149 | - **支持函数组合**: 我们的设计允许通过 `|>` 操作符实现多个函数的层级调用,从而创建组合函数的效果。例如,在表达式 `map[pid()] |> count(1) |> hist();` 中,我们首先通过 `pid()` 获取进程 ID,然后调用 `count(1)` 对每个进程的计数进行累加,最后通过 `hist()` 函数生成一个统计直方图。 150 | 151 | - **计数函数**: `count()` 是一个简洁的函数调用,表示每次调用时将对应的计数器值增加 1。这种设计使得对事件或数据点的计数变得直观和易于实现。 152 | 153 | - **用户态的输出:** 在使用map的一系列组合函数的时候, 我们并不需要实时打印其结果, 因为在你结束程序的时候, 我们会在用户态输出 map 的键(key) 和值(value) 154 | 155 | ### 获取跟踪点函数的参数 156 | 157 | 通过 `args->filename` 这种方式用来获取对应跟踪点函数的参数值 158 | ```c 159 | #syscalls; 160 | 161 | probe sys_enter_execve { 162 | arg := args->filename; 163 | out("%s\n", arg); 164 | } 165 | ``` 166 | 167 | 相比较于上面这种跟踪点参数获取, kprobe的参数获取方式就比较复杂了, 我们主要使用 vmlinux 和 btf 的方式来完成偏移量的获取,具体的细节这里并不介绍, 168 | 我们主要介绍一下它的使用方式。 169 | 170 | ```c 171 | #kprobe; 172 | 173 | probe dev_queue_xmit { 174 | sk := (sk_buff*) arg0; 175 | out("len: %d\n", sk->len); 176 | } 177 | ``` 178 | 179 | ### BEGIN 表达式 180 | 181 | ```c 182 | BEGIN { 183 | out("%-18s %-16s %-6s\n", "PID", "COMM", "FILE"); 184 | } 185 | 186 | probe sys_enter_execve { 187 | arg := args->filename; 188 | out("%-18d %-16s %-6s\n", pid(), comm(), arg); 189 | } 190 | ``` 191 | 输出结果: 192 | ```c 193 | PID COMM FILE 194 | 1428705 barad_agent /bin/sh 195 | 1428706 barad_agent /bin/sh 196 | 1428707 node /bin/sh 197 | 1428708 sh /usr/bin/which 198 | 1428709 node /bin/sh 199 | 1428710 sh /usr/bin/ps 200 | 1428711 node /bin/sh 201 | 1428734 start.sh /usr/bin/whoami 202 | 1428737 start.sh /usr/bin/grep 203 | 1428738 start.sh /usr/bin/grep 204 | 1428739 start.sh /usr/bin/wc 205 | 1428736 start.sh /usr/bin/ps 206 | ``` 207 | BEGIN是一个特殊的探针类型,它仅在脚本开始执行时触发一次。此处,我们利用BEGIN探针来定义一个立即执行的代码块,该代码块负责输出格式化的表头,包括进程ID(PID)、命令名称(COMM)和文件路径(FILE)。 208 | 209 | ### if语句 210 | 211 | ```c 212 | #syscalls; 213 | 214 | probe sys_enter_mmap { 215 | len := args->len; 216 | 217 | if (len > 0) { 218 | out("%s\n", comm()); 219 | } 220 | } 221 | ``` -------------------------------------------------------------------------------- /lang/func.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "func.h" 5 | #include "buffer.h" 6 | #include "ut.h" 7 | 8 | static int annot_rint(node_t* n) { 9 | n->annot.type = TYPE_INT; 10 | n->annot.size = 8; 11 | } 12 | 13 | static int annot_rstr(node_t* n) { 14 | n->annot.type = TYPE_STR; 15 | n->annot.size = _ALIGNED(16); 16 | } 17 | 18 | static void printf_spec(const char* spec, const char* term, void* data, node_t* arg) { 19 | int64_t num; 20 | size_t fmt_len; 21 | char* fmt; 22 | 23 | memcpy(&num, data, sizeof(num)); 24 | fmt_len = term - spec + 1; 25 | fmt = strndup(spec, fmt_len); 26 | 27 | switch(*term) { 28 | case 's': 29 | printf(fmt, (char*)data); 30 | break; 31 | case 'd': 32 | printf(fmt, (int)num); 33 | break; 34 | } 35 | 36 | free(fmt); 37 | } 38 | 39 | static int event_output(event_t* ev, void* _call) { 40 | node_t* arg, *call = _call; 41 | char* fmt, *spec, *name, *str; 42 | void* data = ev->data; 43 | 44 | name = call->call.args->name; 45 | 46 | arg = call->call.args->next->rec.args->next; 47 | str = call->call.args->name; 48 | 49 | str_escape(str); 50 | 51 | for (fmt = str; *fmt; fmt++) { 52 | if (*fmt == '%' && arg) { 53 | spec = fmt; 54 | fmt = strpbrk(spec, "scd"); 55 | if (!fmt) 56 | break; 57 | printf_spec(spec, fmt, data, arg); 58 | 59 | data += arg->annot.size; 60 | arg = arg->next; 61 | } else { 62 | fputc(*fmt, stdout); 63 | } 64 | } 65 | return 0; 66 | } 67 | 68 | static int annot_out(node_t* call) { 69 | evhandler_t* evh; 70 | node_t* meta, *head, *varg, *rec; 71 | size_t size; 72 | ssize_t addr; 73 | 74 | varg = call->call.args; 75 | if (!varg) { 76 | verror("should has a string fromat"); 77 | return -1; 78 | } 79 | 80 | evh = vcalloc(1, sizeof(*evh)); 81 | evh->priv = call; 82 | evh->handle = event_output; 83 | 84 | evhandler_register(evh); 85 | 86 | meta = node_int_new(evh->type); 87 | meta->annot.type = TYPE_INT; 88 | meta->annot.size = 8; 89 | meta->next = varg->next; 90 | 91 | rec = node_rec_new(meta); 92 | varg->next = rec; 93 | } 94 | 95 | static int annot_strcmp(node_t* call) { 96 | node_t* arg = call->call.args; 97 | 98 | if (!arg || arg->type != NODE_STR) { 99 | verror("strcmp requires string arguments"); 100 | } 101 | 102 | arg = arg->next; 103 | 104 | if (!arg || arg->type != NODE_STR) { 105 | verror("strcmp requires string arguments"); 106 | } 107 | 108 | call->annot.type = TYPE_INT; 109 | call->annot.size = 8; 110 | } 111 | 112 | 113 | static int compile_rint_func(enum bpf_func_id func, extract_op_t op, ebpf_t* e, node_t* n) { 114 | ebpf_emit(e, CALL(func)); 115 | 116 | switch(op) { 117 | case EXTRACT_OP_MASK: 118 | ebpf_emit(e, ALU_IMM(BPF_AND, BPF_REG_0, 0x7fffffff)); 119 | break; 120 | case EXTRACT_OP_SHIFT: 121 | ebpf_emit(e, ALU_IMM(BPF_RSH, BPF_REG_0, 32)); 122 | break; 123 | case EXTRACT_OP_DIV_1G: 124 | ebpf_emit(e, ALU_IMM(BPF_DIV, BPF_REG_0, 1000000000)); 125 | default: 126 | break; 127 | } 128 | 129 | return 0; 130 | } 131 | 132 | int compile_gid(node_t* n, ebpf_t* e) { 133 | return compile_rint_func(BPF_FUNC_get_current_uid_gid, EXTRACT_OP_SHIFT, e, n); 134 | } 135 | 136 | int compile_uid(node_t* n, ebpf_t* e) { 137 | return compile_rint_func(BPF_FUNC_get_current_uid_gid, EXTRACT_OP_MASK, e, n); 138 | } 139 | 140 | int compile_pid(node_t* n, ebpf_t* e) { 141 | return compile_rint_func(BPF_FUNC_get_current_pid_tgid, EXTRACT_OP_SHIFT, e, n); 142 | } 143 | 144 | int compile_tid(node_t* n, ebpf_t* e) { 145 | return compile_rint_func(BPF_FUNC_get_current_pid_tgid, EXTRACT_OP_MASK, e, n); 146 | } 147 | 148 | int compile_ns(node_t* n, ebpf_t* e) { 149 | return compile_rint_func(BPF_FUNC_ktime_get_ns, EXTRACT_OP_NONE, e, n); 150 | } 151 | 152 | int compile_sens(node_t* n, ebpf_t* e) { 153 | return compile_rint_func(BPF_FUNC_ktime_get_ns, EXTRACT_OP_DIV_1G, e, n); 154 | } 155 | 156 | int compile_bns(node_t* n, ebpf_t* e) { 157 | return compile_rint_func(BPF_FUNC_ktime_get_boot_ns, EXTRACT_OP_NONE, e, n); 158 | } 159 | 160 | int compile_cpu(node_t* n, ebpf_t* e) { 161 | return compile_rint_func(BPF_FUNC_get_smp_processor_id, EXTRACT_OP_NONE, e, n); 162 | } 163 | 164 | int compile_stack(node_t* call, ebpf_t* code) { 165 | ebpf_emit(code, MOV(BPF_REG_1, BPF_REG_9)); 166 | ebpf_emit_mapld(code, BPF_REG_2, call->annot.mapid); 167 | ebpf_emit(code, MOV_IMM(BPF_REG_3, 0)); 168 | ebpf_emit(code, CALL(BPF_FUNC_get_stackid)); 169 | } 170 | 171 | static builtin_t global_builtins[] = { 172 | builtin("tid", annot_rint, compile_tid), 173 | builtin("gid", annot_rint, compile_gid), 174 | builtin("pid", annot_rint, compile_pid), 175 | builtin("uid", annot_rint, compile_uid), 176 | builtin("cpu", annot_rint, compile_cpu), 177 | builtin("ns", annot_rint, compile_ns), 178 | builtin("secs", annot_rint, compile_sens), 179 | builtin("bns", annot_rint, compile_bns), 180 | builtin("log", annot_rint, NULL), 181 | builtin("comm", annot_rstr, NULL), 182 | builtin("out", annot_out, NULL), 183 | builtin("strcmp", annot_strcmp, NULL), 184 | }; 185 | 186 | 187 | int global_annot(node_t* n) { 188 | builtin_t* bi; 189 | 190 | for (bi = global_builtins; bi->name; bi++) { 191 | if (vstreq(bi->name, n->name)) 192 | return bi->annotate(n); 193 | } 194 | 195 | return -1; 196 | } 197 | 198 | int global_compile(node_t* n, ebpf_t* code, type_t type) { 199 | builtin_t* bi; 200 | 201 | for (bi = global_builtins; bi->name; bi++) { 202 | if (vstreq(bi->name, n->name)) 203 | return bi->compile(n, code); 204 | } 205 | 206 | return -1; 207 | } 208 | 209 | static int annot_hist(node_t* node) { 210 | node_t* map = node->expr.left; 211 | node_t* method = node->expr.right; 212 | } 213 | 214 | static builtin_t global_methods[] = { 215 | builtin("count", annot_hist, NULL), 216 | builtin("hist", annot_hist, NULL), 217 | }; 218 | 219 | int global_method(node_t* map) { 220 | return 0; 221 | } -------------------------------------------------------------------------------- /lang/ut.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ut.h" 11 | 12 | noreturn void verror(char *fmt, ...) { 13 | va_list ap; 14 | va_start(ap, fmt); 15 | vfprintf(stderr, fmt, ap); 16 | fprintf(stderr, "\n"); 17 | exit(1); 18 | } 19 | 20 | long get_error(const void* ptr) { 21 | if (!IS_ERR_OR_NULL(ptr)) { 22 | return 0; 23 | } 24 | 25 | if (IS_ERR(ptr)) { 26 | errno = -PTR_ERR(ptr); 27 | } 28 | 29 | return -errno; 30 | } 31 | 32 | vec_t *vec_new() { 33 | vec_t *vec = vmalloc(sizeof(vec)); 34 | vec->data = vmalloc(sizeof(void *) * 16); 35 | vec->cap = 16; 36 | vec->len = 0; 37 | return vec; 38 | } 39 | 40 | void vec_push(vec_t *vec, void *elem) { 41 | if (vec->len == vec->cap) { 42 | vec->cap *= 2; 43 | vec->data = realloc(vec->data, sizeof(void *) * vec->cap); 44 | } 45 | vec->data[vec->len++] = elem; 46 | } 47 | 48 | void vec_free(vec_t* vec) { 49 | int i; 50 | 51 | for (i = 0; i < vec->len; i++) { 52 | free(vec->data[i]); 53 | } 54 | 55 | free(vec->data); 56 | free(vec); 57 | } 58 | 59 | bool vec_contains(vec_t *vec, void *elem) { 60 | int i; 61 | 62 | for (i = 0; i < vec->len; i++) { 63 | if (vec->data[i] == elem) { 64 | return true; 65 | } 66 | } 67 | return false; 68 | } 69 | 70 | bool vec_union(vec_t *vec, void *elem) { 71 | if (vec_contains(vec, elem)) { 72 | return false; 73 | } 74 | 75 | vec_push(vec, elem); 76 | return true; 77 | } 78 | 79 | FILE *fopenf(const char *mode, const char *fmt, ...) { 80 | va_list ap; 81 | FILE *fp; 82 | char *path; 83 | va_start(ap, fmt); 84 | vasprintf(&path, fmt, ap); 85 | va_end(ap); 86 | fp = fopen(path, mode); 87 | free(path); 88 | return fp; 89 | } 90 | 91 | void *vmalloc(size_t len) { 92 | void *obj = malloc(len); 93 | if (!obj) { 94 | fprintf(stderr, "\n malloc failed\n"); 95 | exit(1); 96 | } 97 | return obj; 98 | } 99 | 100 | void *vrealloc(void *ptr, size_t size) { 101 | void *obj = realloc(ptr, size); 102 | 103 | if (!obj) { 104 | fprintf(stderr, "\n Rand out of memory (realloc)\n"); 105 | exit(1); 106 | } 107 | return obj; 108 | } 109 | 110 | void *vcalloc(size_t num, size_t size) { 111 | void *obj = calloc(num, size); 112 | if (!obj) { 113 | fprintf(stderr, "\n Rand out of memory (calloc)\n"); 114 | exit(1); 115 | } 116 | return obj; 117 | } 118 | 119 | char *vstr(char *str) { 120 | char *p = vmalloc(strlen(str) + 1); 121 | strcpy(p, str); 122 | return p; 123 | } 124 | 125 | bool vstreq(char *s1, char *s2) { 126 | return strcmp(s1, s2) == 0; 127 | } 128 | 129 | char *str_escape(char *str) { 130 | char *in, *out; 131 | 132 | for (in = out = str; *in; in++, out++) { 133 | if (*in != '\\') 134 | continue; 135 | in++; 136 | switch (*in) { 137 | case 'n': 138 | *out = '\n'; 139 | break; 140 | case 'r': 141 | *out = '\r'; 142 | break; 143 | case 't': 144 | *out = '\t'; 145 | break; 146 | case '\\': 147 | *out = '\\'; 148 | break; 149 | default: 150 | break; 151 | } 152 | } 153 | if (out < in) 154 | *out = '\0'; 155 | return str; 156 | } 157 | 158 | char *read_file(char *filename) { 159 | char *input = (char *)calloc(BUFSIZ, sizeof(char)); 160 | assert(input != NULL); 161 | uint32_t size = 0, read; 162 | 163 | FILE *f = fopen(filename, "r"); 164 | 165 | if (!f) { 166 | verror("Could not open \"%s\" for reading", filename); 167 | exit(1); 168 | } 169 | 170 | while ((read = fread(input, sizeof(char), BUFSIZ, f)) > 0) { 171 | size += read; 172 | 173 | if (read >= BUFSIZ) { 174 | input = vrealloc(input, size + BUFSIZ); 175 | assert(input != NULL); 176 | } 177 | } 178 | input[size] = '\0'; 179 | 180 | fclose(f); 181 | return input; 182 | } 183 | 184 | 185 | void print_bar_ascii(FILE *fp, int64_t count, int64_t max) { 186 | int w = (((float)count / (float)max) * 32.0) + 0.5; 187 | int i; 188 | 189 | fputc('|', fp); 190 | 191 | for (i = 0; i < 32; i++, w--) 192 | fputc((w > 0) ? '@' : ' ', fp); 193 | 194 | fputc('|', fp); 195 | } 196 | 197 | static int quantize_normalize(int log2, char* const** suffix) { 198 | static const char* s[] = {NULL, "k", "M", "G", "T", "P", "Z"}; 199 | int i; 200 | 201 | for (i = 0; log2 >= 10; i++, log2 -= 10); 202 | *suffix = s[i]; 203 | 204 | return (1 << log2); 205 | } 206 | 207 | void output_hist(FILE* fp, int log2, int64_t count, int64_t max) { 208 | int lo, hi; 209 | const char* ls, *hs; 210 | 211 | switch (log2) { 212 | case -1: 213 | fputs("\t < 0", fp); 214 | break; 215 | case 0: 216 | fputs("\t 0", fp); 217 | break; 218 | case 1: 219 | fputs("\t 1", fp); 220 | break; 221 | default: 222 | lo = quantize_normalize(log2-1, &ls); 223 | hi = quantize_normalize(log2, &hs); 224 | 225 | if (!hs) 226 | fprintf(fp, "\t[%4d, %4d]", lo, hi - 1); 227 | else 228 | fprintf(fp, "\t[%*d%s, %*d%s)", 229 | ls ? 3 : 4, lo, ls ? : "", 230 | hs ? 3 : 4, hi, hs ? : ""); 231 | } 232 | 233 | fprintf(fp, "\t%8" PRId64 " ", count); 234 | print_bar_ascii(fp, count, max); 235 | fputc('\n', fp); 236 | } 237 | 238 | static int base_pr(enum print_level level, const char* format, va_list args) { 239 | const char* env_var = "VY_LOG_LEVEL"; 240 | static enum print_level min_level = PRINT_INFO; 241 | static bool initialized; 242 | 243 | if (!initialized) { 244 | char* verbosity; 245 | 246 | initialized = true; 247 | verbosity = getenv(env_var); 248 | if (verbosity) { 249 | if (strcasecmp(verbosity, "warn") == 0) { 250 | min_level = PRINT_WARN; 251 | } else if (strcasecmp(verbosity, "debug") == 0) { 252 | min_level = PRINT_DEBUG; 253 | } else if (strcasecmp(verbosity, "info") == 0) { 254 | min_level = PRINT_INFO; 255 | } else { 256 | fprintf(stderr, "voyant: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", 257 | env_var, verbosity); 258 | } 259 | } 260 | } 261 | 262 | if (level > min_level) { 263 | return 0; 264 | } 265 | 266 | return vfprintf(stderr, format, args); 267 | } 268 | 269 | static ut_print_fn_t __vy_pr = base_pr; 270 | 271 | __printf(2, 3) 272 | void ut_print(enum print_level level, const char* format, ...) { 273 | va_list args; 274 | int old_errno; 275 | ut_print_fn_t print_fn; 276 | 277 | print_fn = __atomic_load_n(&__vy_pr, __ATOMIC_RELAXED); 278 | if (!print_fn) 279 | return; 280 | 281 | old_errno = errno; 282 | va_start(args, format); 283 | __vy_pr(level, format, args); 284 | va_end(args); 285 | 286 | errno = old_errno; 287 | } 288 | 289 | char* ut_strerror_r(int err, char* dst, int len) { 290 | int ret = strerror_r(err < 0 ? -err : err, dst, len); 291 | if (ret == -1) 292 | ret = errno; 293 | 294 | if (ret) { 295 | if (ret == EINVAL) 296 | snprintf(dst, len, "unknown error (%d)", err < 0 ? err : -err); 297 | else 298 | snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); 299 | } 300 | 301 | return dst; 302 | } 303 | 304 | 305 | void *ut_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, 306 | size_t cur_cnt, size_t max_cnt, size_t add_cnt) 307 | { 308 | size_t new_cnt; 309 | void *new_data; 310 | 311 | if (cur_cnt + add_cnt <= *cap_cnt) 312 | return *data + cur_cnt * elem_sz; 313 | 314 | if (cur_cnt + add_cnt > max_cnt) 315 | return NULL; 316 | 317 | new_cnt = *cap_cnt; 318 | new_cnt += new_cnt / 4; /* expand by 25% */ 319 | if (new_cnt < 16) /* but at least 16 elements */ 320 | new_cnt = 16; 321 | if (new_cnt > max_cnt) /* but not exceeding a set limit */ 322 | new_cnt = max_cnt; 323 | if (new_cnt < cur_cnt + add_cnt) /* also ensure we have enough memory */ 324 | new_cnt = cur_cnt + add_cnt; 325 | 326 | new_data = ut_reallocarray(*data, new_cnt, elem_sz); 327 | if (!new_data) 328 | return NULL; 329 | 330 | /* zero out newly allocated portion of memory */ 331 | memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz); 332 | 333 | *data = new_data; 334 | *cap_cnt = new_cnt; 335 | return new_data + cur_cnt * elem_sz; 336 | } -------------------------------------------------------------------------------- /lang/lexer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "lexer.h" 7 | #include "ut.h" 8 | 9 | static int is_number(char *literal) { 10 | while (1) { 11 | char c = *literal; 12 | if (!c) 13 | break; 14 | if (!isdigit(c)) 15 | return 0; 16 | literal++; 17 | } 18 | return 1; 19 | } 20 | 21 | token_type get_type(char *str) { 22 | if (strcmp(str, "probe") == 0) 23 | return TOKEN_PROBE; 24 | 25 | if (vstreq(str, "BEGIN")) 26 | return TOKEN_BEGIN; 27 | 28 | if (vstreq(str, "END")) 29 | return TOKEN_END; 30 | 31 | if (!strcmp(str, "if")) 32 | return TOKEN_IF; 33 | 34 | if (!strcmp(str, "unroll")) 35 | return TOKEN_UNROLL; 36 | 37 | if (is_number(str)) 38 | return TOKEN_INT; 39 | 40 | return TOKEN_IDENT; 41 | } 42 | 43 | lexer_t *lexer_init(char *s) { 44 | lexer_t *l = vmalloc(sizeof(*l)); 45 | 46 | l->input = vstr(s); 47 | l->read_pos = 1; 48 | l->pos = 0; 49 | l->ch = l->input[0]; 50 | 51 | return l; 52 | } 53 | 54 | static void read_char(lexer_t *l) { 55 | if (l->ch) { 56 | l->pos = l->read_pos; 57 | l->read_pos++; 58 | l->ch = l->input[l->pos]; 59 | } 60 | } 61 | 62 | static void skip_whitespace(lexer_t *l) { 63 | while (l->ch && (l->ch == ' ' 64 | || l->ch == '\n' 65 | || l->ch == '\r' 66 | || l->ch == '\t')) { 67 | 68 | read_char(l); 69 | } 70 | } 71 | 72 | char *read_string(lexer_t *l) { 73 | size_t pos = l->pos + 1; 74 | l->pos++; 75 | 76 | while (l->input[l->pos] != '"' && l->input[l->pos] != 0) { 77 | l->pos++; 78 | } 79 | 80 | size_t len = l->pos - pos; 81 | char *str = vmalloc(len + 1); 82 | 83 | memcpy(str, l->input + pos, len); 84 | str[len] = 0; 85 | l->pos++; 86 | l->read_pos = l->pos + 1; 87 | l->ch = l->input[l->pos]; 88 | 89 | return str; 90 | } 91 | 92 | char *read_ident(lexer_t *l) { 93 | size_t pos = l->pos; 94 | 95 | while (is_char(l->input[l->pos])) { 96 | l->pos++; 97 | } 98 | size_t len = l->pos - pos; 99 | 100 | char *ident = vmalloc(len + 1); 101 | memcpy(ident, l->input + pos, len); 102 | ident[len] = 0; 103 | 104 | l->read_pos = l->pos + 1; 105 | l->ch = l->input[l->pos]; 106 | return ident; 107 | } 108 | 109 | token_t* lexer_next_token(lexer_t *lexer) { 110 | token_t* token = vmalloc(sizeof(*token)); 111 | 112 | skip_whitespace(lexer); 113 | 114 | switch (lexer->ch) { 115 | case '"': 116 | token->type = TOKEN_STRING; 117 | token->literal = read_string(lexer); 118 | return token; 119 | 120 | case ',': 121 | token->type = TOKEN_COMMA; 122 | token->literal = strdup(","); 123 | read_char(lexer); 124 | return token; 125 | 126 | case '(': 127 | token->type = LEFT_PAREN; 128 | token->literal = strdup("("); 129 | read_char(lexer); 130 | return token; 131 | 132 | case ')': 133 | token->type = RIGHT_PAREN; 134 | token->literal = strdup(")"); 135 | read_char(lexer); 136 | return token; 137 | 138 | case '[': 139 | token->type = LEFT_BRACKET; 140 | token->literal = strdup("["); 141 | read_char(lexer); 142 | return token; 143 | 144 | case ']': 145 | token->type = RIGHT_BRACKET; 146 | token->literal = strdup("]"); 147 | read_char(lexer); 148 | return token; 149 | 150 | case '{': 151 | token->type = LEFT_BLOCK; 152 | token->literal = strdup("{"); 153 | read_char(lexer); 154 | return token; 155 | 156 | case '}': 157 | token->type = RIGHT_BLOCK; 158 | token->literal = strdup("}"); 159 | read_char(lexer); 160 | return token; 161 | 162 | case ';': 163 | token->type = TOKEN_SEMICOLON; 164 | token->literal = strdup(";"); 165 | read_char(lexer); 166 | return token; 167 | 168 | case '+': 169 | token->type = TOKEN_PLUS; 170 | token->literal = strdup("+"); 171 | read_char(lexer); 172 | return token; 173 | 174 | case '*': 175 | token->type = TOKEN_STAR; 176 | token->literal = strdup("*"); 177 | read_char(lexer); 178 | return token; 179 | 180 | case '#': 181 | token->type = TOKEN_HASH; 182 | token->literal = strdup("#"); 183 | read_char(lexer); 184 | return token; 185 | 186 | case '/': 187 | if (lexer->input[lexer->read_pos] == '/') { 188 | while (lexer->input[lexer->pos] != '\n') { 189 | read_char(lexer); 190 | } 191 | 192 | return lexer_next_token(lexer); 193 | } 194 | 195 | token->type = TOKEN_SLASH; 196 | token->literal = strdup("/"); 197 | read_char(lexer); 198 | return token; 199 | 200 | 201 | case '<': 202 | if (lexer->input[lexer->read_pos] == '=') { 203 | token->type = TOKEN_LE; 204 | token->literal = strdup("<="); 205 | read_char(lexer); 206 | read_char(lexer); 207 | return token; 208 | } 209 | 210 | token->type = TOKEN_LT; 211 | token->literal = strdup("<"); 212 | read_char(lexer); 213 | return token; 214 | 215 | case '>': 216 | if (lexer->input[lexer->read_pos] == '=') { 217 | token->type = TOKEN_GE; 218 | token->literal = strdup(">="); 219 | read_char(lexer); 220 | read_char(lexer); 221 | return token; 222 | } 223 | 224 | token->type = TOKEN_GT; 225 | token->literal = strdup(">"); 226 | read_char(lexer); 227 | return token; 228 | 229 | case '-': 230 | if (lexer->input[lexer->read_pos] == '>') { 231 | token->type = TOKEN_ACCESS; 232 | token->literal = strdup("->"); 233 | read_char(lexer); 234 | read_char(lexer); 235 | return token; 236 | } 237 | 238 | token->type = TOKEN_SUB; 239 | token->literal = strdup("-"); 240 | read_char(lexer); 241 | return token; 242 | 243 | case '|': 244 | if (lexer->input[lexer->read_pos] == '>') { 245 | token->type = TOKEN_PIPE; 246 | token->literal = strdup("|>"); 247 | read_char(lexer); 248 | read_char(lexer); 249 | return token; 250 | } 251 | 252 | case '=': 253 | if (lexer->input[lexer->read_pos] == '=') { 254 | token->type = TOKEN_EQ; 255 | token->literal = strdup("=="); 256 | read_char(lexer); 257 | read_char(lexer); 258 | return token; 259 | } 260 | 261 | token->type = TOKEN_ASSIGN; 262 | token->literal = strdup("="); 263 | read_char(lexer); 264 | return token; 265 | 266 | case ':': 267 | if (lexer->input[lexer->read_pos] == '=') { 268 | token->type = TOKEN_DEC; 269 | token->literal = strdup(":="); 270 | read_char(lexer); 271 | read_char(lexer); 272 | return token; 273 | } 274 | case 0: 275 | token->literal = ""; 276 | token->type = END_OF_FILE; 277 | return token; 278 | default: 279 | goto out; 280 | break; 281 | } 282 | 283 | out: 284 | if (is_char(lexer->ch)) { 285 | token->literal = read_ident(lexer); 286 | token->type = get_type(token->literal); 287 | return token; 288 | } else { 289 | token->type = TOKEN_ILLEGAL; 290 | token->literal = NULL; 291 | return token; 292 | } 293 | } 294 | 295 | 296 | const char* token_to_str(token_type type) { 297 | #define TYPE(_type, _type_str) [_type] = _type_str, 298 | static const char* strs[] = { 299 | TOKEN_TYPE_TABLE 300 | }; 301 | #undef TYPE 302 | return strs[type]; 303 | } 304 | 305 | void free_token(token_t *tok) { 306 | if (tok->type != END_OF_FILE) { 307 | free(tok->literal); 308 | } 309 | free(tok); 310 | } 311 | 312 | void free_lexer(lexer_t *lex) { 313 | free(lex->input); 314 | free(lex); 315 | } -------------------------------------------------------------------------------- /lang/buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "buffer.h" 13 | #include "errno.h" 14 | #include "probe.h" 15 | #include "ut.h" 16 | 17 | static uint64_t next_type = 0; 18 | 19 | TAILQ_HEAD(evhandlers, evhandler); 20 | static struct evhandlers evh_list = TAILQ_HEAD_INITIALIZER(evh_list); 21 | 22 | void evhandler_register(evhandler_t* evh) { 23 | evh->type = next_type++; 24 | TAILQ_INSERT_TAIL(&evh_list, evh, node); 25 | } 26 | 27 | static evhandler_t* evhandler_find(uint64_t type) { 28 | evhandler_t* evh; 29 | 30 | TAILQ_FOREACH(evh, &evh_list, node) { 31 | if (evh->type == type) 32 | return evh; 33 | } 34 | 35 | return NULL; 36 | } 37 | 38 | static struct ret_value event_handle(event_t* ev, size_t size) { 39 | evhandler_t* evh; 40 | 41 | 42 | evh = evhandler_find(ev->type); 43 | if (!evh) { 44 | verror("unknown event: type:%#"PRIx64" size:%#zx\n", 45 | ev->size, size); 46 | return (struct ret_value) { .err = 1, .val = ENOSYS}; 47 | } 48 | 49 | evh->handle(ev, evh->priv); 50 | return (struct ret_value) {}; 51 | } 52 | 53 | void evqueue_init(evpipe_t* evp, uint32_t cpu, size_t size) { 54 | struct perf_event_attr attr = {0}; 55 | evqueue_t* q = &evp->q[cpu]; 56 | int err; 57 | 58 | attr.type = PERF_TYPE_SOFTWARE; 59 | attr.config = PERF_COUNT_SW_BPF_OUTPUT; 60 | attr.sample_type = PERF_SAMPLE_RAW; 61 | attr.wakeup_events = 1; 62 | 63 | q->fd = perf_event_open(&attr, -1, cpu, -1, 0); 64 | if (q->fd < 0) { 65 | verror("could not create queue"); 66 | return q->fd; 67 | } 68 | 69 | err = bpf_map_update(evp->mapfd, &cpu, &q->fd, BPF_ANY); 70 | if (err) { 71 | verror("could not link map to queue"); 72 | return err; 73 | } 74 | 75 | 76 | size += sysconf(_SC_PAGESIZE); 77 | q->mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0); 78 | if (q->mem == MAP_FAILED) { 79 | verror("clould not mmap queue"); 80 | return -1; 81 | } 82 | 83 | evp->poll[cpu].fd = q->fd; 84 | evp->poll[cpu].events = POLLIN; 85 | 86 | return 0; 87 | } 88 | 89 | 90 | int evpipe_init(evpipe_t* evp, size_t qsize) { 91 | uint32_t cpu; 92 | int err; 93 | 94 | evp->ncpus = sysconf(_SC_NPROCESSORS_ONLN); 95 | evp->mapfd = bpf_map_create(BPF_MAP_TYPE_PERF_EVENT_ARRAY, sizeof(uint32_t), sizeof(int), evp->ncpus); 96 | 97 | if (evp->mapfd < 0) { 98 | verror("clould not create map in evpipe init"); 99 | return evp->mapfd; 100 | } 101 | 102 | evp->q = vcalloc(evp->ncpus, sizeof(*evp->q)); 103 | evp->poll = vcalloc(evp->ncpus, sizeof(*evp->poll)); 104 | 105 | for (cpu = 0; cpu < evp->ncpus; cpu++) { 106 | evqueue_init(evp, cpu, qsize); 107 | } 108 | } 109 | 110 | 111 | static inline uint64_t __get_head(struct perf_event_mmap_page* mem) { 112 | uint64_t head = *((volatile uint64_t *)&mem->data_head); 113 | 114 | asm volatile("" ::: "memory"); 115 | return head; 116 | } 117 | 118 | static inline void __set_tail(struct perf_event_mmap_page* mem, uint64_t tail) { 119 | asm volatile("" ::: "memory"); 120 | mem->data_tail = tail; 121 | } 122 | 123 | 124 | struct ret_value evqueue_drain(evqueue_t* q) { 125 | struct lost_event* lost; 126 | struct ret_value ret = {}; 127 | 128 | uint16_t size, offs, head, tail; 129 | uint8_t* base, *this, *next; 130 | event_t* ev; 131 | int err = 0; 132 | 133 | size = q->mem->data_size; 134 | offs = q->mem->data_offset; 135 | base = (uint8_t*)q->mem + offs; 136 | 137 | for (head = __get_head(q->mem); q->mem->data_tail != head; 138 | __set_tail(q->mem, q->mem->data_tail+ev->hdr.size)) { 139 | tail = q->mem->data_tail; 140 | this = base + (tail % size); 141 | ev = (void*) this; 142 | next = base + (tail + ev->hdr.size) % size; 143 | 144 | if (next < this) { 145 | size_t left = (base + size) - this; 146 | q->buf = realloc(q->buf, ev->hdr.size); 147 | memcpy(q->buf, this, left); 148 | memcpy(q->buf + left, base, ev->hdr.size - left); 149 | ev = q->buf; 150 | } 151 | 152 | switch(ev->hdr.type) { 153 | case PERF_RECORD_SAMPLE: 154 | ret = event_handle(ev, ev->hdr.size); 155 | break; 156 | case PERF_RECORD_LOST: 157 | lost = (void*) ev; 158 | _e("lost %"PRId64" events\n", lost->lost); 159 | ret.err = 1; 160 | ret.val = EOVERFLOW; 161 | break; 162 | default: 163 | _e("unknown perf event %#"PRIx32"\n", ev->hdr.type); 164 | ret.err = 1; 165 | ret.val = EINVAL; 166 | break; 167 | } 168 | 169 | if (ret.err || ret.exit) 170 | break; 171 | } 172 | 173 | return ret; 174 | } 175 | 176 | struct ret_value evpipe_loop(evpipe_t* evp, int* sig, int timeout) { 177 | struct ret_value ret; 178 | int cpu, ready; 179 | 180 | for (;!(*sig);) { 181 | ready = poll(evp->poll, evp->ncpus, timeout); 182 | 183 | if (ready < 0) { 184 | ret.err = 1; 185 | ret.val = errno; 186 | return ret; 187 | } 188 | 189 | if (timeout == -1) { 190 | assert(ready); 191 | } else if (ready == 0) { 192 | return ret; 193 | } 194 | 195 | for (cpu = 0; ready && (cpu < evp->ncpus); cpu++) { 196 | if (!(evp->poll[cpu].revents & POLLIN)) 197 | continue; 198 | ret = evqueue_drain(&evp->q[cpu]); 199 | 200 | if (ret.err | ret.exit) 201 | return ret; 202 | 203 | ready--; 204 | } 205 | } 206 | return ret; 207 | } 208 | 209 | static void __key_workaround(int fd, void* key, size_t key_sz, void* val) { 210 | FILE* fp; 211 | int err; 212 | 213 | fp = fopen("/dev/urandom", "r"); 214 | 215 | while (1) { 216 | err = bpf_map_lookup(fd, key, val); 217 | if (err) 218 | break; 219 | 220 | if (fread(key, key_sz, 1, fp) != 1) 221 | break; 222 | } 223 | fclose(fp); 224 | } 225 | 226 | void dump_str(FILE* fp, node_t* str, void* data) { 227 | int size = (int) str->annot.size; 228 | 229 | fprintf(fp, "%-*.*s", size, size, (const char*)data); 230 | } 231 | 232 | void dump_int(FILE* fp, node_t* integer, void* data) { 233 | int64_t num; 234 | 235 | memcpy(&num, data, sizeof(num)); 236 | fprintf(fp, "%8" PRId64, num); 237 | } 238 | 239 | 240 | void dump(FILE* fp, node_t* n, void* data) { 241 | switch (n->annot.type) { 242 | case TYPE_STR: 243 | dump_str(fp, n, data); 244 | break; 245 | case TYPE_INT: 246 | dump_int(fp, n, data); 247 | break; 248 | default: 249 | _e("err map or key type"); 250 | break; 251 | } 252 | } 253 | 254 | void dump_rec(FILE* fp, node_t* rec, void* data, int len) { 255 | node_t* first, *varg; 256 | int backets = 0; 257 | 258 | first = rec->rec.args; 259 | if (!first || !len) 260 | return; 261 | 262 | if (first->next && (len > 1)) { 263 | fputs("[ ", fp); 264 | backets = 1; 265 | } 266 | 267 | _foreach(varg, first) { 268 | if (varg != first) 269 | fputs(", ", fp); 270 | dump(fp, varg, data); 271 | data += varg->annot.size; 272 | 273 | if (!(--len)) 274 | break; 275 | } 276 | 277 | if (backets) 278 | fputs(" ]", fp); 279 | } 280 | 281 | int cmp_node(node_t* node, const void* a, const void* b) { 282 | node_t* arg; 283 | 284 | switch (node->annot.type) { 285 | case TYPE_INT: 286 | break; 287 | case TYPE_REC: 288 | break; 289 | default: 290 | break; 291 | } 292 | } 293 | 294 | void cmp_map(const void* ak, const void* bk, void* _map) { 295 | node_t* map = _map; 296 | node_t* rec = map->map.args; 297 | const void* av = ak + rec->annot.size; 298 | const void* bv = bk + rec->annot.size; 299 | int cmp; 300 | 301 | cmp = cmp_node(rec, ak, bk); 302 | if (cmp) 303 | return cmp_node(rec, ak, bk); 304 | 305 | return cmp_node(map, av, bv); 306 | } 307 | 308 | void map_dump(node_t* map) { 309 | node_t* arg; 310 | int err, c = 0; 311 | size_t fd, rsize, ksize, vsize; 312 | char* key, *val, *data; 313 | 314 | arg = map->map.args; 315 | fd = map->annot.mapid; 316 | ksize = arg->annot.size; 317 | vsize = map->annot.size; 318 | rsize = ksize + vsize; 319 | 320 | data = vmalloc(rsize * 1024); 321 | key = data; 322 | val = data + ksize; 323 | 324 | __key_workaround(fd, key, ksize, val); 325 | 326 | for (err = bpf_map_next(fd, key, key); !err; 327 | err = bpf_map_next(fd, key-rsize, key)) { 328 | 329 | err = bpf_map_lookup(fd, key, val); 330 | if (err) 331 | goto out_free; 332 | c++; 333 | 334 | key += rsize; 335 | val += rsize; 336 | } 337 | 338 | qsort_r(data, c, rsize, cmp_map, map); 339 | 340 | printf("\n%s\n", map->name, c); 341 | for (key = data, val = data+ksize; c > 0; c--) { 342 | dump(stdout, arg, key); 343 | fputs("\t", stdout); 344 | dump(stdout, map, val); 345 | fputs("\n", stdout); 346 | 347 | key += rsize; 348 | val += rsize; 349 | } 350 | 351 | out_free: 352 | free(data); 353 | } -------------------------------------------------------------------------------- /lang/annot.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "annot.h" 6 | #include "func.h" 7 | #include "ut.h" 8 | 9 | #define STRING_SIZE 64 10 | 11 | void annot_int(node_t* integer) { 12 | integer->annot.type = TYPE_INT; 13 | integer->annot.size = sizeof(integer->integer); 14 | } 15 | 16 | void annot_str(node_t* string) { 17 | size_t size; 18 | 19 | size = strlen(string->name) + 1; 20 | if (size > STRING_SIZE) { 21 | verror("string is to long (over %d bytes): %s", STRING_SIZE, string->name); 22 | } 23 | string->annot.type = TYPE_STR; 24 | string->annot.size = _ALIGNED(size); 25 | } 26 | 27 | static int annot_value(node_t *value) { 28 | int err = 0; 29 | 30 | switch (value->type) { 31 | case NODE_INT: 32 | annot_int(value); 33 | break; 34 | case NODE_STR: 35 | annot_str(value); 36 | break; 37 | case NODE_CALL: 38 | err = global_annot(value); 39 | break; 40 | default: 41 | break; 42 | } 43 | 44 | return err; 45 | } 46 | 47 | static void annot_map_args(node_t *map, ebpf_t *e) { 48 | node_t *arg; 49 | ssize_t ksize; 50 | 51 | arg = map->map.args; 52 | get_annot(arg, e); 53 | ksize = arg->annot.size; 54 | 55 | map->annot.type = TYPE_INT; 56 | map->annot.ksize = ksize; 57 | map->annot.size = 8; 58 | } 59 | 60 | int annot_map_method(node_t* expr, ebpf_t* ctx) { 61 | int err = 0; 62 | node_t* left, *right; 63 | 64 | left = expr->expr.left; 65 | right = expr->expr.right; 66 | 67 | annot_map_args(left, ctx); 68 | 69 | right->parent = left; 70 | map_dec(ctx->st, left, NULL); 71 | expr->annot.type = TYPE_MAP_METHOD; 72 | 73 | return err; 74 | } 75 | 76 | static int annot_dec(node_t *n, ebpf_t *e) { 77 | node_t *var, *expr; 78 | int err = 0; 79 | 80 | var = n->dec.var; 81 | expr = n->dec.expr; 82 | 83 | get_annot(expr, e); 84 | 85 | switch (var->type) { 86 | case NODE_VAR: 87 | var->annot.type = expr->annot.type; 88 | var->annot.size = expr->annot.size; 89 | var_dec(e->st, var, expr); 90 | break; 91 | case NODE_MAP: 92 | annot_map_args(var, e); 93 | var->annot.type = expr->annot.type; 94 | var->annot.size = expr->annot.size; 95 | map_dec(e->st, var, expr); 96 | break; 97 | default: 98 | break; 99 | } 100 | 101 | n->annot.type = TYPE_DEC; 102 | return err; 103 | } 104 | 105 | 106 | void annot_probe_args(node_t* expr, ebpf_t* ctx) { 107 | size_t offs; 108 | field_t field; 109 | node_t* arg, *data; 110 | 111 | data = expr->expr.right; 112 | 113 | field.name = ctx->name; 114 | field.field = data->name; 115 | 116 | bpf_read_field(&field); 117 | 118 | data->annot.type = field.type; 119 | data->annot.offs = field.offs; 120 | 121 | switch (data->annot.type){ 122 | case TYPE_INT: 123 | expr->annot.size = 8; 124 | expr->annot.type = TYPE_INT; 125 | break; 126 | case TYPE_STR: 127 | expr->annot.size = 64; 128 | expr->annot.type = TYPE_STR; 129 | break; 130 | default: 131 | break; 132 | } 133 | } 134 | 135 | static inline int is_arg(const char* name) { 136 | return (strstr(name, "arg") == name) 137 | && (strlen(name) == 4) 138 | && (name[3] >= '0' && name[3] <= 9); 139 | } 140 | 141 | void annot_cast(node_t* expr, ebpf_t* ctx) { 142 | size_t size; 143 | char* arg, name; 144 | int num; 145 | 146 | arg = expr->cast.value; 147 | num = arg[3]; 148 | 149 | expr->annot.size = 0; 150 | expr->annot.type = TYPE_CAST; 151 | expr->annot.offs = num - '0'; 152 | } 153 | 154 | void annot_struct_filed(node_t* expr, ebpf_t* ctx) { 155 | sym_t* sym; 156 | int offs; 157 | char* sname, *filed; 158 | 159 | sym = symtable_get(ctx->st, expr->expr.left->name); 160 | sname = sym->cast; 161 | filed = expr->expr.right->name; 162 | 163 | offs = btf_get_field_off(sname, filed); 164 | 165 | expr->annot.offs = offs; 166 | expr->annot.size = 8; 167 | expr->annot.type = TYPE_INT; 168 | } 169 | 170 | void annot_accses(node_t* expr, ebpf_t* ctx) { 171 | sym_t* sym; 172 | 173 | sym = symtable_get(ctx->st, expr->expr.left->name); 174 | if (!sym) { 175 | annot_probe_args(expr, ctx); 176 | return; 177 | } 178 | 179 | annot_struct_filed(expr, ctx); 180 | } 181 | 182 | void annot_expr(node_t* expr, ebpf_t* ctx) { 183 | node_t* left, *right; 184 | int opcode; 185 | 186 | left = expr->expr.left; 187 | right = expr->expr.right; 188 | opcode = expr->expr.opcode; 189 | 190 | switch (opcode) { 191 | case OP_PIPE: 192 | annot_map_method(expr, ctx); 193 | break; 194 | case OP_ACCESS: 195 | annot_accses(expr, ctx); 196 | break; 197 | default: 198 | get_annot(left, ctx); 199 | get_annot(right, ctx); 200 | expr->annot.type = TYPE_INT; 201 | expr->annot.size = 8; 202 | break; 203 | } 204 | } 205 | 206 | void annot_rec(node_t *n, ebpf_t *code) { 207 | node_t *arg; 208 | ssize_t size = 0; 209 | 210 | _foreach(arg, n->rec.args) { 211 | get_annot(arg, code); 212 | 213 | size += arg->annot.size; 214 | } 215 | 216 | n->annot.size = size; 217 | n->annot.type = TYPE_REC; 218 | } 219 | 220 | void annot_probe(node_t* probe, ebpf_t* ctx) { 221 | int id; 222 | 223 | switch (probe->type) { 224 | case NODE_KPROBE: 225 | id = bpf_get_kprobe_id(probe->probe.name); 226 | break; 227 | case NODE_PROBE: 228 | ctx->name = probe->probe.name; 229 | id = bpf_get_probe_id(ctx->name); 230 | break; 231 | default: 232 | break; 233 | } 234 | 235 | probe->probe.traceid = id; 236 | } 237 | 238 | void sym_ref_assign(node_t* node, ebpf_t* code) { 239 | node_t* arg; 240 | arg = node->map.args; 241 | 242 | symtable_ref(code->st, node); 243 | arg->annot.addr = ebpf_addr_get(node, code); 244 | } 245 | 246 | 247 | void get_annot(node_t *node, ebpf_t *code) { 248 | switch (node->type) { 249 | case NODE_KPROBE: 250 | case NODE_PROBE: 251 | annot_probe(node, code); 252 | break; 253 | case NODE_CALL: 254 | case NODE_INT: 255 | case NODE_STR: 256 | annot_value(node); 257 | break; 258 | case NODE_VAR: 259 | symtable_ref(code->st, node); 260 | break; 261 | case NODE_MAP: 262 | sym_ref_assign(node, code); 263 | break; 264 | case NODE_EXPR: 265 | annot_expr(node, code); 266 | break; 267 | case NODE_DEC: 268 | annot_dec(node, code); 269 | break; 270 | case NODE_CAST: 271 | annot_cast(node, code); 272 | break; 273 | case NODE_REC: 274 | annot_rec(node, code); 275 | break; 276 | default: 277 | break; 278 | } 279 | } 280 | 281 | void assign_stack(node_t* node, ebpf_t *code) { 282 | node->annot.addr = ebpf_addr_get(node, code); 283 | } 284 | 285 | void assign_data(node_t* node, ssize_t addr) { 286 | switch (node->annot.type) { 287 | case TYPE_STR: 288 | node->annot.addr = addr; 289 | break; 290 | case TYPE_VAR: 291 | node->annot.addr = addr; 292 | break; 293 | default: 294 | break; 295 | } 296 | } 297 | 298 | void assign_dec(node_t *dec, ebpf_t *code) { 299 | node_t *var, *expr; 300 | ssize_t addr; 301 | sym_t *sym; 302 | 303 | var = dec->dec.var; 304 | expr = dec->dec.expr; 305 | sym = symtable_get(code->st, var->name); 306 | 307 | if (expr->type == NODE_CAST) { 308 | return; 309 | } 310 | 311 | if (var->type == NODE_MAP) { 312 | node_t *args; 313 | args = var->map.args; 314 | addr = ebpf_addr_get(args, code); 315 | 316 | args->annot.addr = addr; 317 | sym->map->kaddr = addr; 318 | } 319 | 320 | addr = ebpf_addr_get(var, code); 321 | var->annot.addr = addr; 322 | sym->vannot.addr = addr; 323 | 324 | assign_data(expr, addr); 325 | } 326 | 327 | void assign_rec(node_t *node, ebpf_t *code) { 328 | node_t *head; 329 | size_t offs; 330 | assign_stack(node, code); 331 | 332 | offs = node->annot.addr; 333 | 334 | _foreach(head, node->rec.args) { 335 | head->annot.addr = offs; 336 | offs += head->annot.size; 337 | } 338 | } 339 | 340 | void assign_method(node_t* expr, ebpf_t* code) { 341 | node_t* map, *args; 342 | sym_t* sym; 343 | ssize_t addr; 344 | 345 | map = expr->expr.left; 346 | args = map->map.args; 347 | sym = symtable_get(code->st, map->name); 348 | 349 | addr = ebpf_addr_get(args, code); 350 | 351 | args->annot.addr = addr; 352 | sym->map->kaddr = addr; 353 | 354 | addr = ebpf_addr_get(map, code); 355 | 356 | sym->vannot.addr = addr; 357 | map->annot.addr = addr; 358 | } 359 | 360 | void assign_expr(node_t* node, ebpf_t* code) { 361 | int op = node->expr.opcode; 362 | node_t* left = node->expr.left; 363 | node_t* right = node->expr.right; 364 | 365 | switch (op) { 366 | case OP_PIPE: 367 | assign_method(node, code); 368 | return; 369 | default: 370 | break; 371 | } 372 | 373 | if (left->type == NODE_MAP) { 374 | left->annot.addr = ebpf_addr_get(left, code); 375 | } 376 | 377 | if (right->type == NODE_MAP) { 378 | right->annot.addr = ebpf_addr_get(right, code); 379 | } 380 | } 381 | 382 | void loc_assign(node_t* node, ebpf_t* code) { 383 | switch (node->type) { 384 | case NODE_DEC: 385 | assign_dec(node, code); 386 | break; 387 | case NODE_REC: 388 | assign_rec(node, code); 389 | break; 390 | case NODE_EXPR: 391 | assign_expr(node, code); 392 | break; 393 | default: 394 | break; 395 | } 396 | } 397 | 398 | static int do_list(node_t *head, ebpf_t *ctx) { 399 | node_t *elem, *next = head; 400 | 401 | for (elem = next; elem;) { 402 | next = elem->next; 403 | sema(elem, ctx); 404 | elem = next; 405 | } 406 | 407 | return 0; 408 | } 409 | 410 | void sema(node_t *node, ebpf_t *ctx) { 411 | 412 | get_annot(node, ctx); 413 | 414 | switch (node->type) { 415 | case NODE_KPROBE: 416 | case NODE_PROBE: 417 | do_list(node->probe.stmts, ctx); 418 | break; 419 | case NODE_TEST: 420 | do_list(node->probe.stmts, ctx); 421 | break; 422 | case NODE_CALL: 423 | do_list(node->call.args, ctx); 424 | break; 425 | case NODE_IF: 426 | do_list(node->iff.cond, ctx); 427 | do_list(node->iff.then, ctx); 428 | break; 429 | default: 430 | break; 431 | } 432 | 433 | loc_assign(node, ctx); 434 | } -------------------------------------------------------------------------------- /lang/gen.c: -------------------------------------------------------------------------------- 1 | #include "func.h" 2 | #include "ir.h" 3 | 4 | static struct bpf_insn* at; 5 | 6 | const struct bpf_insn break_insn = 7 | JMP_IMM(BPF_JA, 0xf, INT32_MIN, INT16_MIN); 8 | const struct bpf_insn continue_insn = 9 | JMP_IMM(BPF_JA, 0xf, INT32_MIN, INT16_MIN + 1); 10 | const struct bpf_insn if_then_insn = 11 | JMP_IMM(BPF_JA, 0xf, INT32_MIN, INT16_MIN + 2); 12 | const struct bpf_insn if_else_insn = 13 | JMP_IMM(BPF_JA, 0xf, INT32_MIN, INT16_MIN + 3); 14 | 15 | int gregs[3] = {BPF_REG_6, BPF_REG_7, BPF_REG_8}; 16 | 17 | #define LOG2_CMP(_bit)\ 18 | ebpf_emit(code, JMP_IMM(BPF_JSGE, src, (1<<(_bit)), 1));\ 19 | ebpf_emit(code, JMP_IMM(BPF_JA, 0, 0, 2));\ 20 | ebpf_emit(code, ALU_IMM(BPF_ADD, dst, _bit));\ 21 | ebpf_emit(code, ALU_IMM(BPF_RSH, src, _bit));\ 22 | 23 | int emit_log2(ebpf_t* code, int dst, int src) { 24 | int cmp = BPF_REG_5; 25 | 26 | ebpf_emit(code, MOV_IMM(dst, 0)); 27 | 28 | ebpf_emit(code, JMP_IMM(BPF_JSGE, src, 0, 2)); 29 | ebpf_emit(code, ALU_IMM(BPF_SUB, dst, 1)); 30 | ebpf_emit(code, JMP_IMM(BPF_JA, 0, 0, 8+5*4)); 31 | 32 | ebpf_emit(code, JMP_IMM(BPF_JEQ, src, 0, 7+5*4)); 33 | 34 | ebpf_emit(code, ALU_IMM(BPF_ADD, dst, 1)); 35 | 36 | ebpf_emit(code, MOV_IMM(cmp, 1)); 37 | ebpf_emit(code, ALU_IMM(BPF_LSH, cmp, 32)); 38 | 39 | ebpf_emit(code, JMP(BPF_JSGE, src, cmp, 1)); 40 | ebpf_emit(code, JMP_IMM(BPF_JA, 0, 0, 2)); 41 | ebpf_emit(code, ALU_IMM(BPF_ADD, dst, 32)); 42 | ebpf_emit(code, ALU_IMM(BPF_RSH, src, 32)); 43 | 44 | LOG2_CMP(16); 45 | LOG2_CMP(8); 46 | LOG2_CMP(4); 47 | LOG2_CMP(2); 48 | LOG2_CMP(1); 49 | return 0; 50 | } 51 | 52 | void compile_map_update(ebpf_t* code, node_t* var) { 53 | ssize_t kaddr, vaddr, size; 54 | int fd; 55 | 56 | vaddr = var->annot.addr; 57 | kaddr = var->map.args->annot.addr; 58 | size = var->annot.ksize; 59 | fd = var->annot.mapid; 60 | 61 | ebpf_emit_map_update(code, fd, kaddr, vaddr); 62 | } 63 | 64 | void compile_map_look(ebpf_t* code, node_t* map, ir_t* ir) { 65 | int fd; 66 | ssize_t kaddr, vaddr, vsize; 67 | 68 | fd = map->annot.mapid; 69 | kaddr = map->map.args->annot.addr; 70 | vsize = map->annot.size; 71 | vaddr = map->annot.addr; 72 | 73 | ebpf_emit_map_look(code, fd, kaddr); 74 | 75 | ebpf_emit_read(code, vaddr, BPF_REG_0, vsize); 76 | 77 | if (map->annot.type == TYPE_INT) { 78 | ebpf_emit(code, LDXDW(gregs[ir->r0->rn], vaddr, BPF_REG_10)); 79 | } 80 | 81 | } 82 | 83 | void map_count(node_t* map, ebpf_t* code) { 84 | int fd; 85 | ssize_t kaddr, vaddr, vsize; 86 | 87 | fd = map->annot.mapid; 88 | kaddr = map->map.args->annot.addr; 89 | vsize = map->annot.size; 90 | vaddr = map->annot.addr; 91 | 92 | ebpf_stack_zero(map, code, 0); 93 | 94 | ebpf_emit_map_look(code, fd, kaddr); 95 | ebpf_emit(code, JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8)); 96 | ebpf_emit_read(code, vaddr, BPF_REG_0, vsize); 97 | 98 | ebpf_emit_count(code, vaddr); 99 | ebpf_emit_map_update(code, fd, kaddr, vaddr); 100 | } 101 | 102 | void compile_comm(node_t* n, ebpf_t* e) { 103 | size_t i; 104 | 105 | for (i = 0; i < n->annot.size; i += 4) { 106 | ebpf_emit(e, STW_IMM(BPF_REG_10, n->annot.addr+i, 0)); 107 | } 108 | 109 | ebpf_emit(e, MOV(BPF_REG_1, BPF_REG_10)); 110 | ebpf_emit(e, ALU_IMM(OP_ADD, BPF_REG_1, n->annot.addr)); 111 | ebpf_emit(e, MOV_IMM(BPF_REG_2, n->annot.size)); 112 | ebpf_emit(e, CALL(BPF_FUNC_get_current_comm)); 113 | } 114 | 115 | void compile_rec(node_t* n, ebpf_t* code) { 116 | ssize_t addr, size; 117 | node_t* arg; 118 | int id; 119 | 120 | id = code->evp->mapfd; 121 | addr = n->annot.addr; 122 | size = n->annot.size; 123 | 124 | ebpf_emit(code, MOV(BPF_REG_1, BPF_REG_9)); 125 | ebpf_emit_mapld(code, BPF_REG_2, id); 126 | 127 | ebpf_emit(code, MOV32_IMM(BPF_REG_3, BPF_F_CURRENT_CPU)); 128 | ebpf_emit(code, MOV(BPF_REG_4, BPF_REG_10)); 129 | ebpf_emit(code, ALU_IMM(BPF_ADD, BPF_REG_4, addr)); 130 | 131 | ebpf_emit(code, MOV_IMM(BPF_REG_5, size)); 132 | ebpf_emit(code, CALL(BPF_FUNC_perf_event_output)); 133 | } 134 | 135 | void compile_call(node_t* n, ebpf_t* e) { 136 | if (n->annot.type == TYPE_REC) { 137 | compile_rec(n, e); 138 | return; 139 | } 140 | } 141 | 142 | void to_stack(node_t* obj, ebpf_t* code) { 143 | switch (obj->type) { 144 | case NODE_STR: 145 | ebpf_str_to_stack(code, obj); 146 | break; 147 | case NODE_CALL: 148 | compile_comm(obj, code); 149 | break; 150 | default: 151 | break; 152 | } 153 | } 154 | 155 | void store_data(vec_t* vec, ebpf_t* e) { 156 | int i, len; 157 | node_t* obj; 158 | 159 | len = vec->len; 160 | 161 | for (i = 0; i < len; i++) { 162 | obj = (node_t*)vec->data[i]; 163 | to_stack(obj, e); 164 | } 165 | } 166 | 167 | void copy_data(ebpf_t* ebpf, ir_t* ir) { 168 | ssize_t to, from; 169 | size_t size; 170 | sym_t* sym; 171 | node_t* n = ir->value; 172 | 173 | sym = symtable_get(ebpf->st, n->name); 174 | to = n->annot.addr; 175 | from = sym->vannot.addr; 176 | size = n->annot.size; 177 | 178 | if (n->annot.type == TYPE_INT) { 179 | ebpf_emit(ebpf, LDXDW(gregs[ir->r0->rn], from, BPF_REG_10)); 180 | return; 181 | } 182 | 183 | ebpf_value_copy(ebpf, to, from, size); 184 | } 185 | 186 | void read_trace_args(ir_t* ir, ebpf_t* code) { 187 | node_t* right, *expr; 188 | ssize_t addr; 189 | size_t size, offs; 190 | 191 | expr = ir->value; 192 | right = expr->expr.right; 193 | 194 | addr = expr->annot.addr; 195 | size = expr->annot.size; 196 | offs = right->annot.offs; 197 | 198 | switch (right->annot.type) { 199 | case TYPE_INT: 200 | ebpf_emit(code, LDXDW(BPF_REG_0, offs, BPF_REG_9)); 201 | ebpf_emit(code, MOV(gregs[ir->r0->rn], BPF_REG_0)); 202 | break; 203 | case TYPE_STR: 204 | ebpf_emit(code, MOV(BPF_REG_1, BPF_REG_10)); 205 | ebpf_emit(code, ALU_IMM(BPF_ADD, BPF_REG_1, addr)); 206 | ebpf_emit(code, MOV_IMM(BPF_REG_2, size)); 207 | ebpf_emit(code, LDXDW(BPF_REG_3, offs, BPF_REG_9)); 208 | ebpf_emit(code, CALL(BPF_FUNC_probe_read_user_str)); 209 | break; 210 | default: 211 | break; 212 | } 213 | } 214 | 215 | void read_kprobe_args(ir_t* ir, ebpf_t* ctx) { 216 | ssize_t o, size, addr, offs; 217 | sym_t* sym; 218 | node_t* node; 219 | char* name; 220 | 221 | node = ir->value; 222 | name = node->expr.left->name; 223 | size = node->annot.size; 224 | addr = node->annot.addr; 225 | offs = node->annot.offs; 226 | 227 | sym = symtable_get(ctx->st, name); 228 | 229 | switch (sym->vannot.offs) { 230 | case 0: 231 | o = btf_get_field_off("pt_regs", "di"); 232 | break; 233 | case 1: 234 | o = btf_get_field_off("pt_regs", "si"); 235 | break; 236 | case 2: 237 | o = btf_get_field_off("pt_regs", "dx"); 238 | default: 239 | break; 240 | } 241 | 242 | ebpf_emit(ctx, LDXDW(BPF_REG_3, o, BPF_REG_9)); 243 | ebpf_emit(ctx, MOV(BPF_REG_1, BPF_REG_10)); 244 | ebpf_emit(ctx, ALU_IMM(BPF_ADD, BPF_REG_1, addr)); 245 | ebpf_emit(ctx, MOV_IMM(BPF_REG_2, size)); 246 | ebpf_emit(ctx, ALU_IMM(BPF_ADD, BPF_REG_3, offs)); 247 | ebpf_emit(ctx, CALL(BPF_FUNC_probe_read_kernel)); 248 | 249 | ebpf_emit(ctx, LDXDW(gregs[ir->r0->rn], addr, BPF_REG_10)); 250 | } 251 | 252 | //todo refactor 253 | void read_args(ir_t* ir, ebpf_t* code) { 254 | if (code->name == NULL) { 255 | read_kprobe_args(ir, code); 256 | } else { 257 | read_trace_args(ir, code); 258 | } 259 | } 260 | 261 | 262 | void compile_ir(ir_t* ir, ebpf_t* code) { 263 | ssize_t addr; 264 | int r0 = ir->r0 ? ir->r0->rn : 0; 265 | int r1 = ir->r1 ? ir->r1->rn : 0; 266 | int r2 = ir->r2 ? ir->r2->rn : 0; 267 | 268 | switch (ir->op) { 269 | case IR_IMM: 270 | ebpf_emit(code, MOV_IMM(gregs[r0], ir->imm)); 271 | break; 272 | case IR_SUB: 273 | ebpf_emit(code, ALU(BPF_SUB, gregs[r0], gregs[r2])); 274 | break; 275 | case IR_ADD: 276 | ebpf_emit(code, ALU(BPF_ADD, gregs[r0], gregs[r2])); 277 | break; 278 | case IR_EQ: 279 | ebpf_emit_bool(code, BPF_JEQ, r0, r2); 280 | break; 281 | case IR_MUL: 282 | ebpf_emit(code, ALU(BPF_MUL, gregs[r0], gregs[r2])); 283 | break; 284 | case IR_DIV: 285 | ebpf_emit(code, ALU(BPF_DIV, gregs[r0], gregs[r2])); 286 | break; 287 | case IR_GT: 288 | ebpf_emit_bool(code, BPF_JGT, r0, r2); 289 | break; 290 | case IR_GE: 291 | ebpf_emit_bool(code, BPF_JGE, r0, r2); 292 | break; 293 | case IR_LT: 294 | ebpf_emit_bool(code, BPF_JGT, r2, r0); 295 | break; 296 | case IR_LE: 297 | ebpf_emit_bool(code, BPF_JGE, r2, r0); 298 | break; 299 | case IR_COPY: 300 | copy_data(code, ir); 301 | break; 302 | case IR_INIT: 303 | ebpf_stack_zero(ir->value, code, BPF_REG_0); 304 | break; 305 | case IR_STORE: 306 | addr = ir->value->annot.addr; 307 | ebpf_emit(code, STXDW(BPF_REG_10, addr, gregs[r2])); 308 | break; 309 | case IR_ARG: 310 | ebpf_emit(code, STXDW(BPF_REG_10, ir->addr, gregs[r0])); 311 | break; 312 | case IR_MAP_UPDATE: 313 | compile_map_update(code, ir->value); 314 | break; 315 | case IR_MAP_LOOK: 316 | compile_map_look(code, ir->value, ir); 317 | break; 318 | case IR_RCALL: 319 | global_compile(ir->value, code, 0); 320 | ebpf_emit(code, MOV(gregs[r0], BPF_REG_0)); 321 | break; 322 | case IR_CALL: 323 | compile_call(ir->value, code); 324 | break; 325 | case IR_BR: 326 | ebpf_emit(code, MOV(BPF_REG_0, gregs[r2])); 327 | break; 328 | case IR_IF_THEN: 329 | at = code->ip; 330 | ebpf_emit(code, if_then_insn); 331 | break; 332 | case IR_IF_END: 333 | ebpf_emit_at(at, JMP_IMM(BPF_JEQ, 0, 0, code->ip-at-1)); 334 | break; 335 | case IR_MAP_METHOD: 336 | map_count(ir->value, code); 337 | break; 338 | case IR_READ: 339 | read_args(ir, code); 340 | break; 341 | case IR_RETURN: 342 | ebpf_emit(code, MOV_IMM(BPF_REG_0, 0)); 343 | ebpf_emit(code, EXIT); 344 | break; 345 | default: 346 | break; 347 | } 348 | } 349 | 350 | void compile(prog_t* prog) { 351 | int i, j; 352 | bb_t* bb; 353 | ir_t* ir; 354 | ebpf_t* e; 355 | 356 | e = prog->ctx; 357 | 358 | ebpf_emit(e, MOV(BPF_CTX_REG, BPF_REG_1)); 359 | store_data(prog->data, e); 360 | 361 | for (i = 0; i < prog->bbs->len; i++) { 362 | bb = prog->bbs->data[i]; 363 | for (j = 0; j < bb->ir->len; j++) { 364 | ir = bb->ir->data[j]; 365 | compile_ir(ir, e); 366 | } 367 | } 368 | } 369 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright yumosx zhengel2022@163.com 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /lang/parser.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "lexer.h" 8 | #include "parser.h" 9 | #include "ut.h" 10 | 11 | static bool current(parser_t *parser, token_type type) { 12 | return parser->this_tok->type == type; 13 | } 14 | 15 | static bool expect(parser_t *parser, token_type type) { 16 | return parser->next_tok->type == type; 17 | } 18 | 19 | static void advance(parser_t *parser) { 20 | if (parser->this_tok != NULL) 21 | free_token(parser->this_tok); 22 | 23 | parser->this_tok = parser->next_tok; 24 | parser->next_tok = lexer_next_token(parser->lexer); 25 | } 26 | 27 | static void bad_token(parser_t* parser, token_type type, bool is_next) { 28 | char* token, *expect; 29 | 30 | if (is_next) { 31 | token = parser->this_tok->literal; 32 | expect = token_to_str(type); 33 | 34 | verror("Parsing error: expected next token to be [%s], got [%s] instead", expect, token); 35 | return; 36 | } 37 | 38 | printf("%s %s\n", token, expect); 39 | token = parser->this_tok->literal; 40 | expect = token_to_str(type); 41 | 42 | verror("Parser error: expected this token to be [%s] got [%s] instead", expect, token); 43 | 44 | } 45 | 46 | parser_t *parser_init(lexer_t *lexer) { 47 | parser_t *parser = vmalloc(sizeof(*parser)); 48 | 49 | parser->lexer = lexer; 50 | parser->this_tok = NULL; 51 | parser->next_tok = NULL; 52 | 53 | advance(parser); 54 | advance(parser); 55 | 56 | return parser; 57 | } 58 | 59 | bool expect_next_token(parser_t* parser, token_type type) { 60 | if (expect(parser, type)) { 61 | advance(parser); 62 | return true; 63 | } 64 | 65 | return false; 66 | } 67 | 68 | static seq_t 69 | get_token_seq(token_type type) { 70 | switch (type) { 71 | case TOKEN_EQ: 72 | return EQUALS; 73 | 74 | case TOKEN_SUB: 75 | return SUM; 76 | 77 | case TOKEN_PLUS: 78 | return SUM; 79 | 80 | case TOKEN_STAR: 81 | return PRODUCT; 82 | 83 | case TOKEN_SLASH: 84 | return PRODUCT; 85 | 86 | case LEFT_PAREN: 87 | return CALL; 88 | 89 | case LEFT_BRACKET: 90 | return INDEX; 91 | 92 | case TOKEN_ASSIGN: 93 | return ASSIGN; 94 | 95 | case TOKEN_DEC: 96 | return DEC; 97 | 98 | case TOKEN_PIPE: 99 | return PIPE; 100 | 101 | case TOKEN_LE: 102 | case TOKEN_LT: 103 | case TOKEN_GE: 104 | case TOKEN_GT: 105 | return LESSGREATERA; 106 | 107 | case TOKEN_ACCESS: 108 | return SUM; 109 | default: 110 | return LOWEST; 111 | } 112 | } 113 | 114 | static int get_op(token_type type) { 115 | switch (type) { 116 | case TOKEN_ASSIGN: 117 | return OP_MOV; 118 | 119 | case TOKEN_STAR: 120 | return OP_MUL; 121 | 122 | case TOKEN_PLUS: 123 | return OP_ADD; 124 | 125 | case TOKEN_SUB: 126 | return OP_SUB; 127 | 128 | case TOKEN_SLASH: 129 | return OP_DIV; 130 | 131 | case TOKEN_PIPE: 132 | return OP_PIPE; 133 | 134 | case TOKEN_GE: 135 | return OP_GE; 136 | 137 | case TOKEN_GT: 138 | return OP_GT; 139 | 140 | case TOKEN_LT: 141 | return OP_LT; 142 | 143 | case TOKEN_LE: 144 | return OP_LE; 145 | 146 | case TOKEN_EQ: 147 | return OP_EQ; 148 | 149 | case TOKEN_ACCESS: 150 | return OP_ACCESS; 151 | 152 | default: 153 | return OP_ILLEGAL; 154 | } 155 | } 156 | 157 | static node_t *parse_integer(char *name) { 158 | size_t integer = 0; 159 | char *s = name; 160 | 161 | while (*s != '\0') { 162 | integer = (integer * 10) + (*s++ - '0'); 163 | } 164 | 165 | return node_int_new(integer); 166 | } 167 | 168 | static node_t* parse_dec(parser_t *parser, node_t *var) { 169 | node_t* expr; 170 | int seq; 171 | 172 | seq = get_token_seq(parser->this_tok->type); 173 | 174 | if (expect_next_token(parser, LEFT_PAREN)) { 175 | advance(parser); 176 | char* name = vstr(parser->this_tok->literal); 177 | 178 | advance(parser); 179 | advance(parser); 180 | advance(parser); 181 | 182 | char* arg_name = vstr(parser->this_tok->literal); 183 | expr = node_cast_new(name, arg_name); 184 | } else { 185 | advance(parser); 186 | expr = parse_expr(parser, seq); 187 | } 188 | 189 | return node_dec_new(var, expr); 190 | } 191 | 192 | node_t* parse_assign(parser_t *parser, node_t *left) { 193 | node_t *right; 194 | int seq; 195 | 196 | seq = get_token_seq(parser->this_tok->type); 197 | advance(parser); 198 | 199 | right = parse_expr(parser, seq); 200 | 201 | return node_assign_new(left, right); 202 | } 203 | 204 | node_t* parse_infix_expr(parser_t *parser, node_t *left) { 205 | node_t *right; 206 | seq_t seq; 207 | int opcode; 208 | 209 | opcode = get_op(parser->this_tok->type); 210 | seq = get_token_seq(parser->this_tok->type); 211 | advance(parser); 212 | 213 | right = parse_expr(parser, seq); 214 | 215 | return node_expr_new(opcode, left, right); 216 | } 217 | 218 | node_t* parse_call_args(parser_t* parser) { 219 | node_t *n, *head; 220 | 221 | advance(parser); 222 | 223 | n = parse_expr(parser, LOWEST); 224 | head = n; 225 | 226 | while (expect(parser, TOKEN_COMMA)) { 227 | advance(parser); 228 | advance(parser); 229 | n->next = parse_expr(parser, LOWEST); 230 | n = n->next; 231 | } 232 | 233 | if (!expect_next_token(parser, RIGHT_PAREN)) { 234 | verror("expect a right paren"); 235 | return NULL; 236 | } 237 | 238 | return head; 239 | } 240 | 241 | node_t *parse_call_expr(parser_t *parser, node_t *left) { 242 | left->type = NODE_CALL; 243 | 244 | if (expect(parser, RIGHT_PAREN)) { 245 | advance(parser); 246 | return left; 247 | } 248 | 249 | left->call.args = parse_call_args(parser); 250 | return left; 251 | } 252 | 253 | node_t *parse_map_args(parser_t *p) { 254 | node_t *n, *head; 255 | 256 | n = parse_expr(p, LOWEST); 257 | head = n; 258 | 259 | while (expect(p, TOKEN_COMMA)) { 260 | advance(p); 261 | advance(p); 262 | n->next = parse_expr(p, LOWEST); 263 | n = n->next; 264 | } 265 | 266 | if (!expect_next_token(p, RIGHT_BRACKET)) { 267 | return NULL; 268 | } 269 | 270 | return head; 271 | } 272 | 273 | node_t *parse_map_expr(parser_t *p, node_t *left) { 274 | left->type = NODE_MAP; 275 | advance(p); 276 | left->map.args = parse_map_args(p); 277 | return left; 278 | } 279 | 280 | node_t *parse_unroll_stmts(parser_t *p) { 281 | char *str; 282 | node_t *stmts; 283 | size_t count = 0; 284 | 285 | if (!expect_next_token(p, LEFT_PAREN)) { 286 | return NULL; 287 | } 288 | 289 | str = p->next_tok->literal; 290 | 291 | while (*str != '\0') { 292 | count = (count * 10) + (*str++ - '0'); 293 | } 294 | advance(p); 295 | 296 | if (!expect_next_token(p, RIGHT_PAREN)) { 297 | return NULL; 298 | } 299 | advance(p); 300 | 301 | stmts = parse_block_stmts(p); 302 | return node_unroll_new(count, stmts); 303 | } 304 | 305 | node_t *parse_if_stmts(parser_t *p) { 306 | node_t *cond, *stmts; 307 | 308 | if (!expect_next_token(p, LEFT_PAREN)) { 309 | return NULL; 310 | } 311 | 312 | advance(p); 313 | cond = parse_expr(p, LOWEST); 314 | advance(p); 315 | advance(p); 316 | 317 | stmts = parse_block_stmts(p); 318 | 319 | return node_if_new(cond, stmts, NULL); 320 | } 321 | 322 | node_t *parse_expr(parser_t *p, seq_t s) { 323 | node_t *left; 324 | 325 | switch (p->this_tok->type) { 326 | case TOKEN_INT: 327 | left = parse_integer(p->this_tok->literal); 328 | break; 329 | case TOKEN_IDENT: 330 | left = node_var_new(vstr(p->this_tok->literal)); 331 | break; 332 | case TOKEN_STRING: 333 | left = node_str_new(vstr(p->this_tok->literal)); 334 | break; 335 | case TOKEN_UNROLL: 336 | left = parse_unroll_stmts(p); 337 | break; 338 | case TOKEN_IF: 339 | left = parse_if_stmts(p); 340 | break; 341 | default: 342 | return NULL; 343 | } 344 | 345 | while (!expect(p, TOKEN_SEMICOLON) && s < get_token_seq(p->next_tok->type)) { 346 | switch (p->next_tok->type) { 347 | case TOKEN_SLASH: 348 | case TOKEN_EQ: 349 | case TOKEN_ACCESS: 350 | case TOKEN_GE: 351 | case TOKEN_GT: 352 | case TOKEN_LE: 353 | case TOKEN_LT: 354 | case TOKEN_SUB: 355 | case TOKEN_PIPE: 356 | case TOKEN_STAR: 357 | case TOKEN_PLUS: 358 | advance(p); 359 | left = parse_infix_expr(p, left); 360 | break; 361 | case LEFT_PAREN: 362 | advance(p); 363 | left = parse_call_expr(p, left); 364 | break; 365 | case LEFT_BRACKET: 366 | advance(p); 367 | left = parse_map_expr(p, left); 368 | break; 369 | case TOKEN_DEC: 370 | advance(p); 371 | left = parse_dec(p, left); 372 | break; 373 | case TOKEN_ASSIGN: 374 | advance(p); 375 | left = parse_assign(p, left); 376 | break; 377 | default: 378 | break; 379 | } 380 | } 381 | 382 | return left; 383 | } 384 | 385 | node_t *parse_block_stmts(parser_t *p) { 386 | node_t *n, *head; 387 | 388 | advance(p); 389 | 390 | n = parse_expr(p, LOWEST); 391 | head = n; 392 | 393 | advance(p); 394 | 395 | while (!expect(p, RIGHT_BLOCK) && !expect(p, END_OF_FILE)) { 396 | node_t *stmts = parse_expr(p, LOWEST); 397 | 398 | if (stmts != NULL) { 399 | n->next = stmts; 400 | n = n->next; 401 | } 402 | advance(p); 403 | } 404 | advance(p); 405 | return head; 406 | } 407 | 408 | node_t* parse_probe(parser_t* parser, char* event) { 409 | char* name; 410 | int flag = 0; 411 | node_t* stmts, *pred; 412 | 413 | if (!expect_next_token(parser, TOKEN_IDENT)) { 414 | return NULL; 415 | } 416 | 417 | name = strdup(parser->this_tok->literal); 418 | 419 | if (!vstreq("kprobe", event) && event) { 420 | flag = 1; 421 | char* str = calloc(100, sizeof(char)); 422 | snprintf(str, 100, "%s/%s", event, name); 423 | free(name); 424 | name = str; 425 | } 426 | 427 | advance(parser); 428 | 429 | if (parser->this_tok->type == TOKEN_SLASH) { 430 | advance(parser); 431 | pred = parse_expr(parser, LOWEST); 432 | advance(parser); 433 | advance(parser); 434 | } 435 | 436 | stmts = parse_block_stmts(parser); 437 | 438 | if (!flag) { 439 | return node_kprobe_new(name, stmts); 440 | } 441 | 442 | return node_probe_new(name, stmts); 443 | } 444 | 445 | 446 | node_t* parse_script(parser_t* parser, char* event) { 447 | char* name; 448 | node_t* stmts; 449 | 450 | name = parser->this_tok->literal; 451 | 452 | if (current(parser, TOKEN_BEGIN) || current(parser, TOKEN_END)) { 453 | name = strdup(name); 454 | advance(parser); 455 | 456 | stmts = parse_block_stmts(parser); 457 | advance(parser); 458 | 459 | return node_test_new(name, stmts); 460 | } 461 | 462 | if (current(parser, TOKEN_PROBE)) { 463 | stmts = parse_probe(parser, event); 464 | advance(parser); 465 | return stmts; 466 | } 467 | 468 | return NULL; 469 | } 470 | 471 | char* parse_event(parser_t *parser) { 472 | if (!current(parser, TOKEN_HASH)) { 473 | bad_token(parser, TOKEN_HASH, false); 474 | return NULL; 475 | } 476 | 477 | char* name; 478 | 479 | if (!expect_next_token(parser, TOKEN_IDENT)) { 480 | bad_token(parser, TOKEN_IDENT, true); 481 | return NULL; 482 | } 483 | 484 | name = strdup(parser->this_tok->literal); 485 | 486 | if (!expect_next_token(parser, TOKEN_SEMICOLON)) { 487 | bad_token(parser, TOKEN_SEMICOLON, true); 488 | return NULL; 489 | } 490 | 491 | return name; 492 | } 493 | 494 | node_t* parse_program(parser_t* parser) { 495 | char* name; 496 | node_t* head, *node; 497 | 498 | name = parse_event(parser); 499 | advance(parser); 500 | 501 | node = parse_script(parser, name); 502 | 503 | head = node; 504 | head->name = name; 505 | 506 | while (parser->next_tok->type != END_OF_FILE){ 507 | node_t *script = parse_script(parser, name); 508 | if (script) { 509 | node->next = script; 510 | node = node->next; 511 | } 512 | } 513 | 514 | free_parser(parser); 515 | 516 | return head; 517 | } 518 | 519 | void free_parser(parser_t *parser) { 520 | free_lexer(parser->lexer); 521 | free_token(parser->this_tok); 522 | free_token(parser->next_tok); 523 | } -------------------------------------------------------------------------------- /lang/ir.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "ir.h" 4 | #include "ut.h" 5 | #include "insn.h" 6 | #include "probe.h" 7 | #include "buffer.h" 8 | 9 | static prog_t *prog; 10 | static bb_t *curbb; 11 | static int nreg = 1; 12 | static int nlabel = 1; 13 | static int regnum = 3; 14 | 15 | static bb_t *bb_new() { 16 | bb_t *bb = calloc(1, sizeof(*bb)); 17 | 18 | bb->label = nlabel++; 19 | bb->ir = vec_new(); 20 | bb->succ = vec_new(); 21 | bb->pred = vec_new(); 22 | bb->def_regs = vec_new(); 23 | bb->in_regs = vec_new(); 24 | bb->out_regs = vec_new(); 25 | 26 | vec_push(prog->bbs, bb); 27 | 28 | return bb; 29 | } 30 | 31 | static ir_t *ir_new(int op) { 32 | ir_t *ir = calloc(1, sizeof(*ir)); 33 | ir->op = op; 34 | vec_push(curbb->ir, ir); 35 | return ir; 36 | } 37 | 38 | static reg_t *reg_new() { 39 | reg_t *reg = calloc(1, sizeof(*reg)); 40 | reg->vn = nreg++; 41 | reg->rn = -1; 42 | return reg; 43 | } 44 | 45 | static ir_t *emit(int op, reg_t *r0, reg_t *r1, reg_t *r2) { 46 | ir_t *ir = ir_new(op); 47 | ir->r0 = r0; 48 | ir->r1 = r1; 49 | ir->r2 = r2; 50 | return ir; 51 | } 52 | 53 | static ir_t* ir_exit() { 54 | ir_t* ir = ir_new(IR_RETURN); 55 | return ir; 56 | } 57 | 58 | static ir_t* if_then() { 59 | ir_t *ir = ir_new(IR_IF_THEN); 60 | return ir; 61 | } 62 | 63 | static ir_t* then_end() { 64 | ir_t *ir = ir_new(IR_IF_END); 65 | return ir; 66 | } 67 | 68 | static ir_t* else_then() { 69 | ir_t *ir = ir_new(IR_ELSE_THEN); 70 | return ir; 71 | } 72 | 73 | static ir_t* else_end() { 74 | ir_t *ir = ir_new(IR_ELSE_END); 75 | return ir; 76 | } 77 | 78 | static ir_t* map_update(node_t *map) { 79 | ir_t *ir = ir_new(IR_MAP_UPDATE); 80 | ir->value = map; 81 | return ir; 82 | } 83 | 84 | static ir_t* map_look(node_t* map) { 85 | ir_t* ir = ir_new(IR_MAP_LOOK); 86 | ir->value = map; 87 | return ir; 88 | } 89 | 90 | static ir_t* map_count(node_t* map) { 91 | ir_t* ir = ir_new(IR_MAP_METHOD); 92 | ir->value = map; 93 | return ir; 94 | } 95 | 96 | static ir_t *br(reg_t *r, bb_t *then, bb_t *els) { 97 | ir_t *ir = ir_new(IR_BR); 98 | ir->r2 = r; 99 | ir->bb1 = then; 100 | ir->bb2 = els; 101 | 102 | return ir; 103 | } 104 | 105 | static ir_t *jmp(bb_t *bb) { 106 | ir_t *ir = ir_new(IR_JMP); 107 | 108 | ir->bb1 = bb; 109 | return ir; 110 | } 111 | 112 | static reg_t *imm(node_t *n) { 113 | ir_t *ir = ir_new(IR_IMM); 114 | 115 | ir->r0 = reg_new(); 116 | ir->imm = n->integer; 117 | 118 | return ir->r0; 119 | } 120 | 121 | 122 | static void push(node_t *value) { 123 | vec_push(prog->data, value); 124 | } 125 | 126 | static ir_t *init(node_t *var) { 127 | ir_t *ir; 128 | 129 | ir = ir_new(IR_INIT); 130 | ir->value = var; 131 | 132 | return ir; 133 | } 134 | 135 | static reg_t* var_copy(node_t* var) { 136 | ir_t* ir = ir_new(IR_COPY); 137 | ir->r0 = reg_new(); 138 | ir->value = var; 139 | 140 | return ir->r0; 141 | } 142 | 143 | static reg_t* map_copy(node_t* map) { 144 | ir_t* ir = ir_new(IR_MAP_LOOK); 145 | ir->r0 = reg_new(); 146 | ir->value = map; 147 | 148 | return ir->r0; 149 | } 150 | 151 | static reg_t* arg_read(node_t* expr) { 152 | ir_t* ir = ir_new(IR_READ); 153 | ir->r0 = reg_new(); 154 | ir->value = expr; 155 | 156 | return ir->r0; 157 | } 158 | 159 | static ir_t* ir_struct(node_t* expr, node_t* dst) { 160 | ir_t* ir = ir_new(IR_CAST); 161 | ir->value = expr; 162 | return ir; 163 | } 164 | 165 | ir_t *store(node_t *dst, reg_t *src) { 166 | ir_t *ir; 167 | 168 | ir = ir_new(IR_STORE); 169 | 170 | ir->value = dst; 171 | ir->r0 = src; 172 | 173 | return ir; 174 | } 175 | 176 | ir_t* arg_reg_to_stack(node_t* arg, reg_t* reg) { 177 | ir_t *ir; 178 | 179 | ir = ir_new(IR_ARG); 180 | 181 | ir->value = arg; 182 | ir->r0 = reg; 183 | ir->addr = arg->annot.addr; 184 | ir->size = arg->annot.size; 185 | 186 | return ir; 187 | } 188 | 189 | reg_t* ret(node_t* call) { 190 | ir_t* ir; 191 | 192 | ir = ir_new(IR_RCALL); 193 | ir->r0 = reg_new(); 194 | ir->value = call; 195 | 196 | return ir->r0; 197 | } 198 | 199 | static void gen_noret_call(node_t* call) { 200 | ir_t* ir; 201 | node_t* rec; 202 | node_t* arg; 203 | 204 | if (call->call.args) { 205 | rec = call->call.args->next; 206 | _foreach(arg, rec->rec.args) { 207 | dyn_args(arg); 208 | } 209 | } 210 | 211 | ir = ir_new(IR_CALL); 212 | ir->value = rec; 213 | } 214 | 215 | 216 | static reg_t *binop(int op, node_t *node) { 217 | reg_t *r1, *r2, *r3; 218 | 219 | r1 = reg_new(); 220 | r2 = gen_expr(node->expr.left); 221 | r3 = gen_expr(node->expr.right); 222 | 223 | emit(op, r1, r2, r3); 224 | 225 | return r1; 226 | } 227 | 228 | reg_t* gen_binop(node_t *n) { 229 | switch (n->expr.opcode) { 230 | case OP_ADD: 231 | return binop(IR_ADD, n); 232 | case OP_SUB: 233 | return binop(IR_SUB, n); 234 | case OP_DIV: 235 | return binop(IR_DIV, n); 236 | case OP_MUL: 237 | return binop(IR_MUL, n); 238 | case OP_GT: 239 | return binop(IR_GT, n); 240 | case OP_GE: 241 | return binop(IR_GE, n); 242 | case OP_LT: 243 | return binop(IR_LT, n); 244 | case OP_LE: 245 | return binop(IR_LE, n); 246 | case OP_EQ: 247 | return binop(IR_EQ, n); 248 | case OP_ACCESS: 249 | return arg_read(n); 250 | default: 251 | break; 252 | } 253 | } 254 | 255 | reg_t* gen_expr(node_t *expr) { 256 | switch (expr->type) { 257 | case NODE_INT: 258 | return imm(expr); 259 | case NODE_EXPR: 260 | return gen_binop(expr); 261 | case NODE_CALL: 262 | return ret(expr); 263 | case NODE_VAR: 264 | return var_copy(expr); 265 | case NODE_MAP: 266 | dyn_args(expr->map.args); 267 | return map_copy(expr); 268 | default: 269 | break; 270 | } 271 | } 272 | 273 | void reg_to_stack(node_t* dst, node_t* src) { 274 | reg_t* r1; 275 | 276 | r1 = gen_expr(src); 277 | init(dst); 278 | store(dst, r1); 279 | } 280 | 281 | void direct_to_stack(node_t* dst, node_t* src) { 282 | switch (src->type) { 283 | case NODE_CALL: 284 | push(src); 285 | break; 286 | case NODE_STR: 287 | push(src); 288 | break; 289 | case NODE_VAR: 290 | var_copy(dst); 291 | break; 292 | case NODE_MAP: 293 | dyn_args(dst->map.args); 294 | map_copy(dst); 295 | break; 296 | case NODE_EXPR: 297 | arg_read(src); 298 | break; 299 | default: 300 | break; 301 | } 302 | } 303 | 304 | void dyn_assign(node_t* dst, node_t* src) { 305 | switch (dst->annot.type) { 306 | case TYPE_INT: 307 | reg_to_stack(dst, src); 308 | break; 309 | case TYPE_STR: 310 | direct_to_stack(dst, src); 311 | break; 312 | case TYPE_CAST: 313 | ir_struct(dst, src); 314 | break; 315 | default: 316 | break; 317 | } 318 | } 319 | 320 | 321 | void dyn_int_store(node_t* dst) { 322 | reg_t* reg; 323 | 324 | switch (dst->type) { 325 | case NODE_INT: 326 | reg = gen_expr(dst); 327 | break; 328 | case NODE_CALL: 329 | reg = gen_expr(dst); 330 | break; 331 | case NODE_VAR: 332 | reg = gen_expr(dst); 333 | break; 334 | case NODE_EXPR: 335 | reg = gen_expr(dst); 336 | break; 337 | case NODE_MAP: 338 | reg = gen_expr(dst); 339 | break; 340 | default: 341 | break; 342 | } 343 | 344 | arg_reg_to_stack(dst, reg); 345 | } 346 | 347 | void dyn_str_store(node_t* dst) { 348 | switch (dst->type) { 349 | case NODE_STR: 350 | push(dst); 351 | break; 352 | case NODE_CALL: 353 | push(dst); 354 | break; 355 | case NODE_VAR: 356 | var_copy(dst); 357 | break; 358 | case NODE_MAP: 359 | dyn_args(dst->map.args); 360 | map_copy(dst); 361 | break; 362 | case NODE_EXPR: 363 | arg_read(dst); 364 | break; 365 | default: 366 | break; 367 | } 368 | } 369 | 370 | void dyn_args(node_t* dst) { 371 | switch (dst->annot.type) { 372 | case TYPE_INT: 373 | dyn_int_store(dst); 374 | break; 375 | case TYPE_STR: 376 | dyn_str_store(dst); 377 | break; 378 | default: 379 | break; 380 | } 381 | } 382 | 383 | void gen_map_method(node_t* expr) { 384 | node_t* map; 385 | 386 | map = expr->expr.left; 387 | 388 | dyn_args(map->map.args); 389 | map_count(map); 390 | } 391 | 392 | void gen_dec(node_t *dec) { 393 | node_t *var, *expr; 394 | ssize_t addr; 395 | 396 | var = dec->dec.var; 397 | expr = dec->dec.expr; 398 | 399 | switch (var->type) { 400 | case NODE_MAP: 401 | dyn_args(var->map.args); 402 | dyn_assign(var, expr); 403 | map_update(var); 404 | break; 405 | case NODE_VAR: 406 | dyn_assign(var, expr); 407 | break; 408 | default: 409 | break; 410 | } 411 | } 412 | 413 | void gen_iff(node_t *n) { 414 | node_t* stmt; 415 | bb_t *then = bb_new(); 416 | bb_t *els = bb_new(); 417 | bb_t *last = bb_new(); 418 | 419 | br(gen_binop(n->iff.cond), then, els); 420 | 421 | curbb = then; 422 | 423 | if_then(); 424 | gen_stmt(n->iff.then); 425 | jmp(last); 426 | 427 | 428 | then_end(); 429 | 430 | curbb = els; 431 | 432 | if (n->iff.els){ 433 | else_then(); 434 | gen_stmt(n->iff.els); 435 | else_end(); 436 | } 437 | 438 | jmp(last); 439 | 440 | curbb = last; 441 | } 442 | 443 | void gen_stmt(node_t *n) { 444 | switch (n->type) { 445 | case NODE_IF: 446 | gen_iff(n); 447 | break; 448 | case NODE_DEC: 449 | gen_dec(n); 450 | break; 451 | case NODE_CALL: 452 | gen_noret_call(n); 453 | break; 454 | case NODE_EXPR: 455 | gen_map_method(n); 456 | break; 457 | default: 458 | verror("not match stmts type"); 459 | break; 460 | } 461 | } 462 | 463 | int gen_ir(node_t *n) { 464 | node_t *head; 465 | 466 | curbb = bb_new(); 467 | bb_t *bb = bb_new(); 468 | jmp(bb); 469 | curbb = bb; 470 | 471 | _foreach(head, n->probe.stmts) { 472 | gen_stmt(head); 473 | } 474 | 475 | ir_exit(); 476 | return 0; 477 | } 478 | 479 | prog_t *prog_new(node_t *n) { 480 | prog_t *p = vmalloc(sizeof(*p)); 481 | p->ast = n; 482 | p->data = vec_new(); 483 | p->bbs = vec_new(); 484 | return p; 485 | } 486 | 487 | static void init_def_regs(bb_t *bb) { 488 | ir_t* ir; 489 | int i; 490 | 491 | for (i = 0; i < bb->ir->len; i++) { 492 | ir = bb->ir->data[i]; 493 | if (ir->r0) { 494 | vec_union(bb->def_regs, ir->r0); 495 | } 496 | } 497 | } 498 | 499 | static void ir_cfg(bb_t *bb, reg_t *reg) { 500 | if (!reg || vec_contains(bb->def_regs, reg)) 501 | return; 502 | 503 | if (!vec_union(bb->in_regs, reg)) 504 | return; 505 | 506 | for (int i = 0; i < bb->pred->len; i++) { 507 | bb_t *pred = bb->pred->data[i]; 508 | 509 | if (vec_union(pred->out_regs, reg)) { 510 | ir_cfg(pred, reg); 511 | } 512 | } 513 | } 514 | 515 | static void ir_init_it_regs(bb_t *bb, ir_t *ir) { 516 | int i; 517 | 518 | ir_cfg(bb, ir->r1); 519 | ir_cfg(bb, ir->r2); 520 | ir_cfg(bb, ir->bbarg); 521 | 522 | if (ir->op == IR_CALL) { 523 | for (i = 0; i < ir->nargs; i++) { 524 | ir_cfg(bb, ir->args[i]); 525 | } 526 | } 527 | } 528 | 529 | void ir_liveness(prog_t *prog) { 530 | int i, j; 531 | bb_t *bb; 532 | ir_t *ir; 533 | 534 | for (i = 0; i < prog->bbs->len; i++) { 535 | bb = prog->bbs->data[i]; 536 | 537 | init_def_regs(bb); 538 | 539 | for (j = 0; j < bb->ir->len; j++) { 540 | ir = bb->ir->data[j]; 541 | ir_init_it_regs(bb, ir); 542 | } 543 | } 544 | } 545 | 546 | static void ir_set_end(reg_t *reg, int ic) { 547 | if (reg && reg->end < ic) { 548 | reg->end = ic; 549 | } 550 | } 551 | 552 | static void ir_trans(bb_t *bb) { 553 | vec_t *v = vec_new(); 554 | ir_t *ir, *ir2; 555 | int i; 556 | 557 | for (i = 0; i < bb->ir->len; i++) { 558 | ir = bb->ir->data[i]; 559 | 560 | if (!ir->r0 || !ir->r1) { 561 | vec_push(v, ir); 562 | continue; 563 | } 564 | 565 | assert(ir->r0 != ir->r1); 566 | 567 | ir2 = calloc(1, sizeof(*ir2)); 568 | ir2->op = IR_MOV; 569 | ir2->r0 = ir->r0; 570 | ir2->r2 = ir->r1; 571 | vec_push(v, ir2); 572 | 573 | ir->r1 = ir->r0; 574 | vec_push(v, ir); 575 | } 576 | 577 | bb->ir = v; 578 | } 579 | 580 | static vec_t *ir_collect(prog_t *prog) { 581 | vec_t *vec = vec_new(); 582 | int ic = 1; 583 | int i, j, k; 584 | bb_t* bb; 585 | ir_t* ir; 586 | 587 | for (i = 0; i < prog->bbs->len; i++) { 588 | bb = prog->bbs->data[i]; 589 | 590 | for (j = 0; j < bb->ir->len; j++, ic++) { 591 | ir = bb->ir->data[j]; 592 | 593 | if (ir->r0 && !ir->r0->def) { 594 | ir->r0->def = ic; 595 | vec_push(vec, ir->r0); 596 | } 597 | 598 | ir_set_end(ir->r1, ic); 599 | ir_set_end(ir->r2, ic); 600 | ir_set_end(ir->bbarg, ic); 601 | 602 | if (ir->op == IR_CALL) { 603 | for (k = 0; k < ir->nargs; k++) 604 | ir_set_end(ir->args[k], ic); 605 | } 606 | } 607 | 608 | for (j = 0; j < bb->out_regs->len; j++) { 609 | reg_t *reg = bb->out_regs->data[j]; 610 | ir_set_end(reg, ic); 611 | } 612 | } 613 | 614 | return vec; 615 | } 616 | 617 | static int ir_spill(reg_t **used) { 618 | int i, k = 0; 619 | for (i = 1; i < regnum; i++) { 620 | if (used[k]->end < used[i]->end) { 621 | k = i; 622 | } 623 | } 624 | return k; 625 | } 626 | 627 | void ir_scan(vec_t *regs) { 628 | int i, j, k; 629 | bool found; 630 | reg_t **used = calloc(regnum, sizeof(reg_t *)); 631 | 632 | for (i = 0; i < regs->len; i++) { 633 | reg_t *reg = regs->data[i]; 634 | found = false; 635 | 636 | for (j = 0; j < regnum - 1; j++) { 637 | if (used[j] && reg->def < used[j]->end) { 638 | continue; 639 | } 640 | reg->rn = j; 641 | used[j] = reg; 642 | found = true; 643 | break; 644 | } 645 | 646 | if (found) 647 | continue; 648 | 649 | used[regnum - 1] = reg; 650 | 651 | k = ir_spill(used); 652 | reg->rn = k; 653 | used[k]->rn = regnum - 1; 654 | used[k]->spill = true; 655 | used[k] = reg; 656 | } 657 | } 658 | 659 | void ir_regs_alloc(prog_t *prog) { 660 | int i; 661 | bb_t *bb; 662 | vec_t *regs; 663 | node_t *var; 664 | 665 | for (i = 0; i < prog->bbs->len; i++) { 666 | bb = prog->bbs->data[i]; 667 | ir_trans(bb); 668 | } 669 | 670 | regs = ir_collect(prog); 671 | ir_scan(regs); 672 | } 673 | 674 | prog_t *gen_prog(node_t *n) { 675 | prog = prog_new(n); 676 | 677 | gen_ir(n); 678 | ir_liveness(prog); 679 | ir_regs_alloc(prog); 680 | 681 | return prog; 682 | } 683 | -------------------------------------------------------------------------------- /lang/probe.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "annot.h" 20 | #include "probe.h" 21 | #include "ut.h" 22 | 23 | #define LOG_BUF_SIZE 1 << 20 24 | char bpf_log_buf[LOG_BUF_SIZE]; 25 | 26 | static __u64 ptr_to_u64(const void* ptr) { 27 | return (__u64) (unsigned long) ptr; 28 | } 29 | 30 | int _bpf(enum bpf_cmd cmd, union bpf_attr *attr) { 31 | int r = (int) syscall(__NR_bpf, cmd, attr, sizeof(*attr)); 32 | if (r < 0) 33 | return -errno; 34 | return r; 35 | } 36 | 37 | long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { 38 | return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 39 | } 40 | 41 | int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn* insns, int insn_cnt) { 42 | union bpf_attr attr = { 43 | .prog_type = type, 44 | .insns = ptr_to_u64(insns), 45 | .insn_cnt = insn_cnt, 46 | .license = ptr_to_u64("GPL"), 47 | .log_buf = ptr_to_u64(bpf_log_buf), 48 | .log_size = LOG_BUF_SIZE, 49 | .log_level = 1, 50 | .kern_version = LINUX_VERSION_CODE, 51 | }; 52 | 53 | return _bpf(BPF_PROG_LOAD, &attr); 54 | } 55 | 56 | 57 | int bpf_map_create(enum bpf_map_type type, int ksize, int size, int entries) { 58 | union bpf_attr attr = { 59 | .map_type = type, 60 | .key_size = ksize, 61 | .value_size = size, 62 | .max_entries = entries, 63 | }; 64 | 65 | return _bpf(BPF_MAP_CREATE, &attr); 66 | } 67 | 68 | int bpf_test_attach(ebpf_t* ctx) { 69 | union bpf_attr attr; 70 | int id; 71 | 72 | memset(&attr, 0, sizeof(attr)); 73 | id = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, ctx->prog, ctx->ip-ctx->prog); 74 | attr.test.prog_fd = id; 75 | 76 | return _bpf(BPF_PROG_TEST_RUN, &attr); 77 | } 78 | 79 | 80 | int bpf_kprobe_attach(ebpf_t* ctx, int id) { 81 | struct perf_event_attr attr = {}; 82 | 83 | int ed, bd; 84 | 85 | attr.type = PERF_TYPE_TRACEPOINT; 86 | attr.sample_type = PERF_SAMPLE_RAW; 87 | attr.sample_period = 1; 88 | attr.wakeup_events = 1; 89 | attr.config = id; 90 | 91 | bd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, ctx->prog, ctx->ip - ctx->prog); 92 | 93 | if (bd < 0) { 94 | perror("bpf"); 95 | fprintf(stderr, "bpf verifier:\n%s\n", bpf_log_buf); 96 | return 1; 97 | } 98 | 99 | ed = perf_event_open(&attr, -1, 0, -1, 0); 100 | 101 | if (ed < 0){ 102 | perror("perf_event_open"); 103 | return 1; 104 | } 105 | 106 | if (ioctl(ed, PERF_EVENT_IOC_ENABLE, 0)) { 107 | perror("perf enable"); 108 | return 1; 109 | } 110 | 111 | if (ioctl(ed, PERF_EVENT_IOC_SET_BPF, bd)) { 112 | perror("perf attach"); 113 | return 1; 114 | } 115 | 116 | return 0; 117 | } 118 | 119 | 120 | int bpf_probe_attach(ebpf_t* ctx, int id) { 121 | struct perf_event_attr attr = {}; 122 | 123 | int ed, bd; 124 | 125 | attr.type = PERF_TYPE_TRACEPOINT; 126 | attr.sample_type = PERF_SAMPLE_RAW; 127 | attr.sample_period = 1; 128 | attr.wakeup_events = 1; 129 | attr.config = id; 130 | 131 | bd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, ctx->prog, ctx->ip - ctx->prog); 132 | 133 | if (bd < 0) { 134 | perror("bpf"); 135 | fprintf(stderr, "bpf verifier:\n%s\n", bpf_log_buf); 136 | return 1; 137 | } 138 | 139 | ed = perf_event_open(&attr, -1, 0, -1, 0); 140 | 141 | if (ed < 0){ 142 | perror("perf_event_open"); 143 | return 1; 144 | } 145 | 146 | if (ioctl(ed, PERF_EVENT_IOC_ENABLE, 0)) { 147 | perror("perf enable"); 148 | return 1; 149 | } 150 | 151 | if (ioctl(ed, PERF_EVENT_IOC_SET_BPF, bd)) { 152 | perror("perf attach"); 153 | return 1; 154 | } 155 | 156 | return 0; 157 | } 158 | 159 | static int profile_perf_event_open(profile_t* profile, int cpu, int freq) { 160 | struct perf_event_attr attr = {}; 161 | int err = 0, i = profile->num, bd; 162 | 163 | attr.type = PERF_TYPE_SOFTWARE; 164 | attr.config = PERF_COUNT_SW_CPU_CLOCK; 165 | attr.freq = 1; 166 | attr.sample_freq = freq; 167 | 168 | profile->efds[i] = perf_event_open(&attr, -1, cpu, -1, 0); 169 | 170 | if (profile->efds[i] < 0) { 171 | return -errno; 172 | } 173 | 174 | if (ioctl(profile->efds[i], PERF_EVENT_IOC_ENABLE, 0)) { 175 | close(profile->efds[i]); 176 | return -errno; 177 | } 178 | 179 | profile->num++; 180 | return 0; 181 | } 182 | 183 | void profile_attach(ebpf_t* code) { 184 | int ncpus; 185 | profile_t* profile; 186 | 187 | ncpus = sysconf(_SC_NPROCESSORS_ONLN); 188 | profile = vcalloc(1, sizeof(*profile)); 189 | } 190 | 191 | 192 | type_t get_filed_type(char* name, unsigned long size, unsigned long sign) { 193 | int s = 1; 194 | 195 | if (!strncmp(name, "signed ", sizeof("signed"))){ 196 | name += sizeof("signed"); 197 | } else if (!strncmp(name, "unsigned ", sizeof("unsigned"))){ 198 | name += sizeof("unsigned"); 199 | } else{ 200 | s = 0; 201 | } 202 | 203 | if (!strcmp(name, "int") || !strcmp(name, "long")) { 204 | return TYPE_INT; 205 | } else if(!strcmp(name, "const char *")) { 206 | return TYPE_STR; 207 | } else { 208 | return TYPE_NULL; 209 | } 210 | } 211 | 212 | int bpf_read_field(field_t* field) { 213 | FILE* fmt; 214 | unsigned long offs, size, sign, len = 0; 215 | char line[0x80]; 216 | 217 | fmt = fopenf("r", "/sys/kernel/debug/tracing/events/%s/format", field->name); 218 | 219 | if (!fmt) { 220 | fclose(fmt); 221 | error("can't open the file"); 222 | return; 223 | } 224 | 225 | char* save, *offs_s, *size_s, *sign_s; 226 | char* type_s, *str, *tname; 227 | 228 | 229 | while (fgets(line, sizeof(line), fmt)) { 230 | if (!strstr(line, "field:")) 231 | continue; 232 | 233 | type_s = strtok_r(line, ";", &save); 234 | offs_s = strtok_r(NULL, ";", &save); 235 | size_s = strtok_r(NULL, ";", &save); 236 | sign_s = strtok_r(NULL, ";", &save); 237 | 238 | if (!(type_s && offs_s && size_s && sign_s)) { 239 | _e("read type_s, off_s error"); 240 | } 241 | 242 | type_s += sizeof("field:"); 243 | offs_s += sizeof("offset:"); 244 | size_s += sizeof("size:"); 245 | sign_s += sizeof("signed:"); 246 | 247 | offs = strtol(offs_s, NULL, 0); 248 | size = strtoul(size_s, NULL, 0); 249 | sign = strtoul(sign_s, NULL, 0); 250 | 251 | if (!type_s) { 252 | _e("type not found"); 253 | } 254 | 255 | tname = rindex(type_s, ' '); 256 | *tname++ = '\0'; 257 | 258 | if (!strcmp(tname, field->field)) { 259 | field->offs = offs; 260 | field->type = get_filed_type(type_s, size, sign); 261 | return 0; 262 | } 263 | } 264 | 265 | return 0; 266 | } 267 | 268 | 269 | const char *reg_names[] = { 270 | "r15", 271 | "r14", 272 | "r13", 273 | "r12", 274 | "bp", 275 | "bx", 276 | "r11", 277 | "r10", 278 | "r9", 279 | "r8", 280 | "ax", 281 | "cx", 282 | "dx", 283 | "si", 284 | "di", 285 | "orig_ax", 286 | "ip", 287 | "cs", 288 | "flags", 289 | "sp", 290 | "ss", 291 | NULL 292 | }; 293 | 294 | int arch_reg_width(void) { 295 | return sizeof(uint64_t); 296 | } 297 | 298 | int arch_reg_atoi(const char *name) { 299 | int reg; 300 | 301 | for (reg = 0; reg_names[reg]; reg++) { 302 | if (!strcmp(reg_names[reg], name)) 303 | return reg; 304 | } 305 | 306 | return -ENOENT; 307 | } 308 | 309 | int arch_reg_arg(int num) { 310 | switch (num) { 311 | case 0: return arch_reg_atoi("di"); 312 | case 1: return arch_reg_atoi("si"); 313 | case 2: return arch_reg_atoi("dx"); 314 | case 3: return arch_reg_atoi("r10"); 315 | case 4: return arch_reg_atoi("r8"); 316 | case 5: return arch_reg_atoi("r9"); 317 | } 318 | 319 | return -ENOSYS; 320 | } 321 | 322 | int bpf_get_probe_id(char* name) { 323 | char* buffer; 324 | FILE* fp; 325 | int number; 326 | 327 | buffer = vmalloc(256); 328 | sprintf(buffer, "/sys/kernel/debug/tracing/events/%s/id", name); 329 | 330 | fp = fopen(buffer, "r"); 331 | 332 | if (fp == NULL) { 333 | verror("Error opening file"); 334 | return 1; 335 | } 336 | 337 | if (fscanf(fp, "%d", &number) != 1) { 338 | fprintf(stderr, "Error reading number from file\n"); 339 | fclose(fp); 340 | return 1; 341 | } 342 | 343 | free(buffer); 344 | return number; 345 | } 346 | 347 | int bpf_get_kprobe_id(char* func) { 348 | FILE* fp; 349 | char str[128]; 350 | 351 | sprintf(str, "echo 'p %s' >/sys/kernel/debug/tracing/kprobe_events", func); 352 | system(str); 353 | 354 | sprintf(str, "/sys/kernel/debug/tracing/events/kprobes/p_%s_0/id", func); 355 | fp = fopen(str, "r"); 356 | if (!fp) 357 | return -1; 358 | 359 | fgets(str, sizeof(str), fp); 360 | fclose(fp); 361 | 362 | return strtol(str, NULL, 0); 363 | } 364 | 365 | static int bpf_map_op(enum bpf_cmd cmd, int fd, void* key, void* val, int flags) { 366 | union bpf_attr attr = { 367 | .map_fd = fd, 368 | .key = ptr_to_u64(key), 369 | .value = ptr_to_u64(val), 370 | .flags = flags, 371 | }; 372 | 373 | return _bpf(cmd, &attr); 374 | } 375 | 376 | int bpf_map_lookup(int fd, void* key, void* val) { 377 | return bpf_map_op(BPF_MAP_LOOKUP_ELEM, fd, key, val, 0); 378 | } 379 | 380 | int bpf_map_update(int fd, void* key, void* val, int flags) { 381 | return bpf_map_op(BPF_MAP_UPDATE_ELEM, fd, key, val, flags); 382 | } 383 | 384 | int bpf_map_next(int fd, void* key, void* next_key) { 385 | return bpf_map_op(BPF_MAP_GET_NEXT_KEY, fd, key, next_key, 0); 386 | } 387 | 388 | int bpf_map_delete(int fd, void* key, void* val) { 389 | return bpf_map_op(BPF_MAP_DELETE_ELEM, fd, key, val, 0); 390 | } 391 | 392 | int bpf_map_close(int fd){ 393 | close(fd); 394 | } 395 | 396 | int perf_event_enable(int id) { 397 | if (ioctl(id, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP)) { 398 | return -1; 399 | } 400 | return 0; 401 | } 402 | 403 | static void btf_bswap_hdr(struct btf_header *h) { 404 | h->magic = bswap_16(h->magic); 405 | h->hdr_len = bswap_32(h->hdr_len); 406 | h->type_off = bswap_32(h->type_off); 407 | h->type_len = bswap_32(h->type_len); 408 | h->str_off = bswap_32(h->str_off); 409 | h->str_len = bswap_32(h->str_len); 410 | } 411 | 412 | static int btf_parse_hdr(btf_t *btf) { 413 | struct btf_header *hdr = btf->hdr; 414 | __u32 meta_left; 415 | 416 | if (btf->raw_size < sizeof(struct btf_header)) { 417 | _pr_debug("BTF header not found\n"); 418 | return -EINVAL; 419 | } 420 | 421 | if (hdr->magic == bswap_16(BTF_MAGIC)) { 422 | btf->swapped_endian = true; 423 | if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) { 424 | _pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n", 425 | bswap_32(hdr->hdr_len)); 426 | return -ENOTSUP; 427 | } 428 | btf_bswap_hdr(hdr); 429 | } else if (hdr->magic != BTF_MAGIC) { 430 | _pr_debug("Invalid BTF magic: %x\n", hdr->magic); 431 | return -EINVAL; 432 | } 433 | 434 | if (btf->raw_size < hdr->hdr_len) { 435 | _pr_debug("BTF header len %u larger than data size %u\n", 436 | hdr->hdr_len, btf->raw_size); 437 | return -EINVAL; 438 | } 439 | 440 | meta_left = btf->raw_size - hdr->hdr_len; 441 | if (meta_left < (long long)hdr->str_off + hdr->str_len) { 442 | _pr_debug("Invalid BTF total size: %u\n", btf->raw_size); 443 | return -EINVAL; 444 | } 445 | 446 | if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { 447 | _pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", 448 | hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); 449 | return -EINVAL; 450 | } 451 | 452 | if (hdr->type_off % 4) { 453 | _pr_debug("BTF type section is not aligned to 4 bytes\n"); 454 | return -EINVAL; 455 | } 456 | 457 | return 0; 458 | } 459 | 460 | 461 | 462 | __u32 btf__type_cnt(const btf_t* btf) { 463 | return btf->start_id + btf->nr_types; 464 | } 465 | 466 | static int btf_parse_str_sec(btf_t *btf) { 467 | const struct btf_header *hdr = btf->hdr; 468 | const char *start = btf->strs_data; 469 | const char *end = start + btf->hdr->str_len; 470 | 471 | if (btf->base_btf && hdr->str_len == 0) 472 | return 0; 473 | if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { 474 | _pr_debug("Invalid BTF string section\n"); 475 | return -EINVAL; 476 | } 477 | if (!btf->base_btf && start[0]) { 478 | _pr_debug("Invalid BTF string section\n"); 479 | return -EINVAL; 480 | } 481 | return 0; 482 | } 483 | 484 | static void btf_bswap_type_base(struct btf_type *t) { 485 | t->name_off = bswap_32(t->name_off); 486 | t->info = bswap_32(t->info); 487 | t->type = bswap_32(t->type); 488 | } 489 | 490 | 491 | static int btf_type_size(const struct btf_type *t) { 492 | const int base_size = sizeof(struct btf_type); 493 | __u16 vlen = btf_vlen(t); 494 | 495 | switch (btf_kind(t)) { 496 | case BTF_KIND_FWD: 497 | case BTF_KIND_CONST: 498 | case BTF_KIND_VOLATILE: 499 | case BTF_KIND_RESTRICT: 500 | case BTF_KIND_PTR: 501 | case BTF_KIND_TYPEDEF: 502 | case BTF_KIND_FUNC: 503 | case BTF_KIND_FLOAT: 504 | return base_size; 505 | case BTF_KIND_INT: 506 | return base_size + sizeof(__u32); 507 | case BTF_KIND_ENUM: 508 | return base_size + vlen * sizeof(struct btf_enum); 509 | case BTF_KIND_ARRAY: 510 | return base_size + sizeof(struct btf_array); 511 | case BTF_KIND_STRUCT: 512 | case BTF_KIND_UNION: 513 | return base_size + vlen * sizeof(struct btf_member); 514 | case BTF_KIND_FUNC_PROTO: 515 | return base_size + vlen * sizeof(struct btf_param); 516 | case BTF_KIND_VAR: 517 | return base_size + sizeof(struct btf_var); 518 | case BTF_KIND_DATASEC: 519 | return base_size + vlen * sizeof(struct btf_var_secinfo); 520 | default: 521 | _pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); 522 | return -EINVAL; 523 | } 524 | } 525 | 526 | static int btf_bswap_type_rest(struct btf_type *t) 527 | { 528 | struct btf_var_secinfo *v; 529 | struct btf_enum64 *e64; 530 | struct btf_member *m; 531 | struct btf_array *a; 532 | struct btf_param *p; 533 | struct btf_enum *e; 534 | __u16 vlen = btf_vlen(t); 535 | int i; 536 | 537 | switch (btf_kind(t)) { 538 | case BTF_KIND_FWD: 539 | case BTF_KIND_CONST: 540 | case BTF_KIND_VOLATILE: 541 | case BTF_KIND_RESTRICT: 542 | case BTF_KIND_PTR: 543 | case BTF_KIND_TYPEDEF: 544 | case BTF_KIND_FUNC: 545 | case BTF_KIND_FLOAT: 546 | return 0; 547 | case BTF_KIND_INT: 548 | *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); 549 | return 0; 550 | case BTF_KIND_ENUM: 551 | for (i = 0, e = btf_enum(t); i < vlen; i++, e++) { 552 | e->name_off = bswap_32(e->name_off); 553 | e->val = bswap_32(e->val); 554 | } 555 | return 0; 556 | case BTF_KIND_ARRAY: 557 | a = btf_array(t); 558 | a->type = bswap_32(a->type); 559 | a->index_type = bswap_32(a->index_type); 560 | a->nelems = bswap_32(a->nelems); 561 | return 0; 562 | case BTF_KIND_STRUCT: 563 | case BTF_KIND_UNION: 564 | for (i = 0, m = btf_members(t); i < vlen; i++, m++) { 565 | m->name_off = bswap_32(m->name_off); 566 | m->type = bswap_32(m->type); 567 | m->offset = bswap_32(m->offset); 568 | } 569 | return 0; 570 | case BTF_KIND_FUNC_PROTO: 571 | for (i = 0, p = btf_params(t); i < vlen; i++, p++) { 572 | p->name_off = bswap_32(p->name_off); 573 | p->type = bswap_32(p->type); 574 | } 575 | return 0; 576 | case BTF_KIND_VAR: 577 | btf_var(t)->linkage = bswap_32(btf_var(t)->linkage); 578 | return 0; 579 | case BTF_KIND_DATASEC: 580 | for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) { 581 | v->type = bswap_32(v->type); 582 | v->offset = bswap_32(v->offset); 583 | v->size = bswap_32(v->size); 584 | } 585 | return 0; 586 | default: 587 | _pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); 588 | return -EINVAL; 589 | } 590 | } 591 | 592 | static void *btf_add_type_offs_mem(btf_t *btf, size_t add_cnt) { 593 | return ut_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), 594 | btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); 595 | } 596 | 597 | static int btf_add_type_idx_entry(btf_t* btf, __u32 type_off) { 598 | __u32* p; 599 | 600 | p = btf_add_type_offs_mem(btf, 1); 601 | if (!p) 602 | return -ENOMEM; 603 | 604 | *p = type_off; 605 | return 0; 606 | } 607 | 608 | 609 | static int btf_parse_type_sec(btf_t *btf) { 610 | struct btf_header *hdr = btf->hdr; 611 | void *next_type = btf->types_data; 612 | void *end_type = next_type + hdr->type_len; 613 | int err, type_size; 614 | 615 | while (next_type + sizeof(struct btf_type) <= end_type) { 616 | if (btf->swapped_endian) 617 | btf_bswap_type_base(next_type); 618 | 619 | type_size = btf_type_size(next_type); 620 | if (type_size < 0) 621 | return type_size; 622 | if (next_type + type_size > end_type) { 623 | _pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types); 624 | return -EINVAL; 625 | } 626 | 627 | if (btf->swapped_endian && btf_bswap_type_rest(next_type)) 628 | return -EINVAL; 629 | 630 | err = btf_add_type_idx_entry(btf, next_type - btf->types_data); 631 | if (err) 632 | return err; 633 | 634 | next_type += type_size; 635 | btf->nr_types++; 636 | } 637 | 638 | if (next_type != end_type) { 639 | _pr_warn("BTF types data is malformed\n"); 640 | return -EINVAL; 641 | } 642 | 643 | return 0; 644 | } 645 | 646 | 647 | static bool btf_is_modifiable(const btf_t *btf) 648 | { 649 | return (void *)btf->hdr != btf->raw_data; 650 | } 651 | 652 | 653 | void btf_free(btf_t* btf) { 654 | if (IS_ERR_OR_NULL(btf)) { 655 | return; 656 | } 657 | 658 | if (btf->fd >= 0) 659 | close(btf->fd); 660 | 661 | if (btf_is_modifiable(btf)) { 662 | free(btf->hdr); 663 | free(btf->types_data); 664 | } 665 | 666 | free(btf->raw_data); 667 | free(btf->raw_data_swapped); 668 | free(btf->type_offs); 669 | 670 | if (btf->owns_base) 671 | btf_free(btf->base_btf); 672 | 673 | free(btf); 674 | } 675 | 676 | 677 | static btf_t* btf_new(const void* data, __u32 size, btf_t* base_btf) { 678 | btf_t* btf; 679 | int err; 680 | 681 | btf = calloc(1, sizeof(struct btf_t)); 682 | if (!btf) 683 | return ERR_PTR(-ENOMEM); 684 | 685 | btf->nr_types = 0; 686 | btf->start_id = 1; 687 | btf->start_str_off = 0; 688 | btf->fd = -1; 689 | 690 | if (base_btf) { 691 | btf->base_btf = base_btf; 692 | btf->start_id = btf__type_cnt(base_btf); 693 | btf->start_str_off = base_btf->hdr->str_len; 694 | } 695 | 696 | btf->raw_data = malloc(size); 697 | if (!btf->raw_data) { 698 | err = -ENOMEM; 699 | goto done; 700 | } 701 | memcpy(btf->raw_data, data, size); 702 | btf->raw_size = size; 703 | btf->hdr = btf->raw_data; 704 | 705 | err = btf_parse_hdr(btf); 706 | if (err) 707 | goto done; 708 | 709 | btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off; 710 | btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off; 711 | 712 | err = btf_parse_str_sec(btf); 713 | err = err? : btf_parse_type_sec(btf); 714 | if (err) 715 | goto done; 716 | done: 717 | if (err) { 718 | btf_free(btf); 719 | return ERR_PTR(err); 720 | } 721 | 722 | return btf; 723 | } 724 | 725 | btf_t* btf_parse_raw(const char* path) { 726 | btf_t* btf = NULL; 727 | void* data = NULL; 728 | FILE* file = NULL; 729 | __u16 magic; 730 | int err = 0; 731 | long sz; 732 | 733 | file = fopen(path, "rbe"); 734 | if (!file) { 735 | err = -errno; 736 | goto err_out; 737 | } 738 | 739 | if (fread(&magic, 1, sizeof(magic), file) < sizeof(magic)) { 740 | err = -EIO; 741 | goto err_out; 742 | } 743 | 744 | if (magic != BTF_MAGIC) { 745 | err = -EPROTO; 746 | goto err_out; 747 | } 748 | 749 | if (fseek(file, 0, SEEK_END)) { 750 | err = -errno; 751 | goto err_out; 752 | } 753 | 754 | sz = ftell(file); 755 | if (sz < 0) { 756 | err = -errno; 757 | goto err_out; 758 | } 759 | 760 | if (fseek(file, 0, SEEK_SET)) { 761 | err = -errno; 762 | goto err_out; 763 | } 764 | 765 | data = malloc(sz); 766 | if (!data) { 767 | err = -ENOMEM; 768 | goto err_out; 769 | } 770 | 771 | if (fread(data, 1, sz, file) < sz) { 772 | err = -EIO; 773 | goto err_out; 774 | } 775 | btf = btf_new(data, sz, NULL); 776 | err_out: 777 | free(data); 778 | if (file) { 779 | fclose(file); 780 | } 781 | return err ? ERR_PTR(err) : btf; 782 | } 783 | 784 | static btf_t* btf_parse(const char* path) { 785 | struct btf_t* btf; 786 | int err = 0; 787 | 788 | btf = btf_parse_raw(path); 789 | err = get_error(btf); 790 | if (!err) 791 | return btf; 792 | 793 | if (err != -EPROTO) 794 | return ERR_PTR(err); 795 | } 796 | 797 | static struct btf_t* vy_btf__parse(const char* path) { 798 | return ut_err(btf_parse(path)); 799 | } 800 | 801 | 802 | btf_t* btf_load_vmlinux() { 803 | const char* sysfs_btf_path = "/sys/kernel/btf/vmlinux"; 804 | char path[PATH_MAX+1]; 805 | 806 | struct utsname buf; 807 | struct btf *btf; 808 | int i, err; 809 | 810 | if (faccessat(AT_FDCWD, sysfs_btf_path, F_OK, AT_EACCESS) < 0) { 811 | _pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", 812 | sysfs_btf_path); 813 | } else { 814 | btf = vy_btf__parse(sysfs_btf_path); 815 | if (!btf) { 816 | err = -errno; 817 | _pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); 818 | return NULL; 819 | } 820 | _pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path); 821 | return btf; 822 | } 823 | } 824 | 825 | 826 | struct btf_type* btf_type_by_id(const btf_t* btf, __u32 type_id) { 827 | if (type_id == 0) 828 | return ; 829 | if (type_id < btf->start_id) 830 | return btf_type_by_id(btf->base_btf, type_id); 831 | 832 | return btf->types_data + btf->type_offs[type_id-btf->start_id]; 833 | } 834 | 835 | const struct btf_type* btf__type_by_id(btf_t* btf, __u32 type_id) { 836 | if (type_id >= btf->start_id + btf->nr_types) 837 | return errno = EINVAL, NULL; 838 | 839 | return btf_type_by_id(btf, type_id); 840 | } 841 | 842 | static const void* btf_strs_data(const btf_t* btf) { 843 | return btf->strs_data ? btf->strs_data : NULL; 844 | } 845 | 846 | 847 | const char* btf__str_by_offset(const btf_t* btf, __u32 offset) { 848 | if (offset < btf->start_str_off) { 849 | return btf__str_by_offset(btf->base_btf, offset); 850 | } else if (offset - btf->start_str_off < btf->hdr->str_len) { 851 | return btf_strs_data(btf) + (offset - btf->start_str_off); 852 | } else { 853 | return errno = EINVAL, NULL; 854 | } 855 | } 856 | 857 | const char* btf__name_by_offset(const btf_t* btf, __u32 offset) { 858 | return btf__str_by_offset(btf, offset); 859 | } 860 | 861 | static __s32 btf_find_by_name_kind( 862 | const struct btf* btf, int start_id, const char* type_name, __u32 kind) 863 | { 864 | __u32 i, nr_types = btf__type_cnt(btf); 865 | if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) { 866 | return 0; 867 | } 868 | 869 | for (i = start_id; i < nr_types; i++) { 870 | const struct btf_type* type = btf__type_by_id(btf, i); 871 | const char* name; 872 | 873 | if (btf_kind(type) != kind) { 874 | continue; 875 | } 876 | 877 | name = btf__name_by_offset(btf, type->name_off); 878 | 879 | if (name && !strcmp(type_name, name)) 880 | return i; 881 | } 882 | 883 | return libbpf_err(-ENOENT); 884 | } 885 | 886 | __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind) { 887 | return btf_find_by_name_kind(btf, 1, type_name, kind); 888 | } 889 | 890 | int btf_get_field_off(const char *struct_name, const char *field_name) { 891 | int offset = -1; 892 | int struct_id; 893 | struct btf_member *member; 894 | const struct btf_type *type; 895 | btf_t* btf; 896 | 897 | btf = btf_load_vmlinux(); 898 | 899 | struct_id = btf__find_by_name_kind(btf, struct_name, BTF_KIND_STRUCT); 900 | if (struct_id < 0) { 901 | verror("can't find structure %s", struct_name); 902 | } 903 | type = btf__type_by_id(btf, struct_id); 904 | if (!type) 905 | verror("can t get btf_type for %s", struct_name); 906 | 907 | member = (struct btf_member *)(type + 1); 908 | for (size_t i = 0; i < BTF_INFO_VLEN(type->info); ++i, ++member) { 909 | const char *cur_name = btf__name_by_offset(btf, member->name_off); 910 | if (!cur_name || !vstreq(cur_name, field_name)) 911 | continue; 912 | 913 | if (BTF_INFO_KFLAG(type->info)) 914 | offset = BTF_MEMBER_BIT_OFFSET(member->offset); 915 | else 916 | offset = member->offset; 917 | 918 | break; 919 | } 920 | 921 | if (offset < 0 || offset % 8) 922 | return -ENOENT; 923 | 924 | return offset / 8; 925 | } --------------------------------------------------------------------------------