├── README.md ├── iptables_trace_ex.py ├── skbtracer.py ├── LICENSE └── skbtracer.c /README.md: -------------------------------------------------------------------------------- 1 | # skbtracer 2 | 3 | skbtracer 基于 ebpf 技术的 skb 网络包路径追踪利器, 实现代码基于 [BCC](https://github.com/iovisor/bcc) (required Linux Kernel 4.15+) 4 | 5 | ## 使用样例 6 | 7 | ``` 8 | skbtracer.py # trace all packets 9 | skbtracer.py --proto=icmp -H 1.2.3.4 --icmpid 22 # trace icmp packet with addr=1.2.3.4 and icmpid=22 10 | skbtracer.py --proto=tcp -H 1.2.3.4 -P 22 # trace tcp packet with addr=1.2.3.4:22 11 | skbtracer.py --proto=udp -H 1.2.3.4 -P 22 # trace udp packet wich addr=1.2.3.4:22 12 | skbtracer.py -t -T -p 1 --debug -P 80 -H 127.0.0.1 --proto=tcp --kernel-stack --icmpid=100 -N 10000 13 | ``` 14 | 15 | 运行效果 16 | 17 | ```bash 18 | $ sudo ./skbtracer.py -c 100 19 | time NETWORK_NS CPU INTERFACE DEST_MAC IP_LEN PKT_INFO TRACE_INFO 20 | [06:47:28 ][4026531992] 0 b'nil' 00042de08c77 196 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a594e0.0:b'ip_output' 21 | [06:47:28 ][4026531992] 0 b'eth0' 00042de08c77 196 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a594e0.0:b'ip_finish_output' 22 | [06:47:28 ][4026531992] 0 b'eth0' 00042de08c77 196 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a594e0.0:b'__dev_queue_xmit' 23 | [06:47:28 ][4026531992] 0 b'nil' 000439849c02 76 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ee0.0:b'ip_output' 24 | [06:47:28 ][4026531992] 0 b'eth0' 000439849c02 76 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ee0.0:b'ip_finish_output' 25 | [06:47:28 ][4026531992] 0 b'eth0' 000439849c02 76 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ee0.0:b'__dev_queue_xmit' 26 | [06:47:28 ][4026531992] 0 b'nil' 000429e08c77 228 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ae0.0:b'ip_output' 27 | [06:47:28 ][4026531992] 0 b'eth0' 000429e08c77 228 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ae0.0:b'ip_finish_output' 28 | [06:47:28 ][4026531992] 0 b'eth0' 000429e08c77 228 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ae0.0:b'__dev_queue_xmit' 29 | [06:47:28 ][4026531992] 0 b'nil' 000439e08c77 76 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ce0.0:b'ip_output' 30 | [06:47:28 ][4026531992] 0 b'eth0' 000439e08c77 76 T_ACK,PSH:172.17.0.14:22->101.87.140.43:18359 ffff8a7572a59ce0.0:b'ip_finish_output' 31 | ``` 32 | 33 | ## 功能增强 34 | 35 | 1. 调整基于抓取数量的实现(更加精准,避免了部分环境下异常被忽略) 36 | 2. 增加了 ip 长度的字段 37 | 3. 增加了运行 cpu 的字段 38 | 39 | 本文代码来自于 [gist](https://gist.github.com/chendotjs/194768c411f15ecfec11e7235c435fa0 40 | ) 41 | 42 | 更通用的网络方案参见仓库 [WeaveWorks tcptracer-bpf](https://github.com/weaveworks/tcptracer-bpf) 43 | 44 | ## 相关文档 45 | 46 | * [使用 ebpf 深入分析容器网络 dup 包问题](https://blog.csdn.net/alex_yangchuansheng/article/details/104058072) 47 | * [使用 Linux tracepoint、perf 和 eBPF 跟踪数据包 (2017)](https://github.com/DavadDi/bpf_study/blob/master/trace-packet-with-tracepoint-perf-ebpf/index_zh.md) 48 | -------------------------------------------------------------------------------- /iptables_trace_ex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from bcc import BPF 3 | 4 | prog = """ 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #define MAC_HEADER_SIZE 14; 14 | #define member_address(source_struct, source_member) \ 15 | ({ \ 16 | void* __ret; \ 17 | __ret = (void*) (((char*)source_struct) + offsetof(typeof(*source_struct), source_member)); \ 18 | __ret; \ 19 | }) 20 | #define member_read(destination, source_struct, source_member) \ 21 | do{ \ 22 | bpf_probe_read( \ 23 | destination, \ 24 | sizeof(source_struct->source_member), \ 25 | member_address(source_struct, source_member) \ 26 | ); \ 27 | } while(0) 28 | 29 | struct ipt_do_table_args 30 | { 31 | struct sk_buff *skb; 32 | const struct nf_hook_state *state; 33 | struct xt_table *table; 34 | u64 start_ns; 35 | }; 36 | 37 | BPF_HASH(cur_ipt_do_table_args, u32, struct ipt_do_table_args); 38 | 39 | int kprobe__ipt_do_table(struct pt_regs *ctx, struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table) 40 | { 41 | u32 pid = bpf_get_current_pid_tgid(); 42 | 43 | struct ipt_do_table_args args = { 44 | .skb = skb, 45 | .state = state, 46 | .table = table, 47 | }; 48 | 49 | args.start_ns = bpf_ktime_get_ns(); 50 | cur_ipt_do_table_args.update(&pid, &args); 51 | 52 | return 0; 53 | }; 54 | 55 | struct event_data_t { 56 | void *skb; 57 | u32 pid; 58 | u32 hook; 59 | u32 verdict; 60 | u8 pf; 61 | u8 reserv[3]; 62 | char table[XT_TABLE_MAXNAMELEN]; 63 | }; 64 | 65 | BPF_PERF_OUTPUT(open_events); 66 | 67 | /* 68 | * tricky: use ebx as the 1st parms, thus get skb 69 | */ 70 | int kretprobe__ipt_do_table(struct pt_regs *ctx) 71 | { 72 | struct ipt_do_table_args *args; 73 | u32 pid = bpf_get_current_pid_tgid(); 74 | struct event_data_t evt = {}; 75 | 76 | args = cur_ipt_do_table_args.lookup(&pid); 77 | if (args == 0) 78 | return 0; 79 | 80 | cur_ipt_do_table_args.delete(&pid); 81 | 82 | evt.pid = pid; 83 | evt.skb = args->skb; 84 | member_read(&evt.hook, args->state, hook); 85 | //member_read(&evt.pf, args->state, pf); 86 | member_read(&evt.table, args->table, name); 87 | evt.verdict = PT_REGS_RC(ctx); 88 | 89 | open_events.perf_submit(ctx, &evt, sizeof(evt)); 90 | return 0; 91 | } 92 | 93 | """ 94 | 95 | # uapi/linux/netfilter.h 96 | NF_VERDICT_NAME = [ 97 | 'DROP', 98 | 'ACCEPT', 99 | 'STOLEN', 100 | 'QUEUE', 101 | 'REPEAT', 102 | 'STOP', 103 | ] 104 | 105 | # uapi/linux/netfilter.h 106 | # net/ipv4/netfilter/ip_tables.c 107 | HOOKNAMES = [ 108 | "PREROUTING", 109 | "INPUT", 110 | "FORWARD", 111 | "OUTPUT", 112 | "POSTROUTING", 113 | ] 114 | 115 | def _get(l, index, default): 116 | ''' 117 | Get element at index in l or return the default 118 | ''' 119 | if index < len(l): 120 | return l[index] 121 | return default 122 | 123 | def print_event(cpu, data, size): 124 | event = b["open_events"].event(data) 125 | 126 | hook = _get(HOOKNAMES, event.hook, "~UNK~") 127 | verdict = _get(NF_VERDICT_NAME, event.verdict, "~UNK~") 128 | 129 | print("%-10d %-16x %-12s %-12s %-10s"%(event.pid, event.skb, event.table, hook, verdict)) 130 | 131 | b = BPF(text=prog) 132 | b["open_events"].open_perf_buffer(print_event) 133 | 134 | print("pid skb_addr table hook verdict") 135 | 136 | while True: 137 | try: 138 | b.perf_buffer_poll() 139 | except KeyboardInterrupt: 140 | exit() 141 | 142 | -------------------------------------------------------------------------------- /skbtracer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import sys 5 | import socket 6 | from socket import inet_ntop, AF_INET, AF_INET6 7 | from bcc import BPF 8 | import ctypes as ct 9 | import subprocess 10 | from struct import pack 11 | import argparse 12 | import time 13 | import struct 14 | 15 | examples = """examples: 16 | skbtracer.py # trace all packets 17 | skbtracer.py --proto=icmp -H 1.2.3.4 --icmpid 22 # trace icmp packet with addr=1.2.3.4 and icmpid=22 18 | skbtracer.py --proto=tcp -H 1.2.3.4 -P 22 # trace tcp packet with addr=1.2.3.4:22 19 | skbtracer.py --proto=udp -H 1.2.3.4 -P 22 # trace udp packet wich addr=1.2.3.4:22 20 | skbtracer.py -t -T -p 1 --debug -P 80 -H 127.0.0.1 --proto=tcp --kernel-stack --icmpid=100 -N 10000 21 | """ 22 | 23 | parser = argparse.ArgumentParser( 24 | description="Trace any packet through TCP/IP stack", 25 | formatter_class=argparse.RawDescriptionHelpFormatter, 26 | epilog=examples) 27 | 28 | parser.add_argument("-H", "--ipaddr", type=str, 29 | help="ip address") 30 | 31 | parser.add_argument("--proto", type=str, 32 | help="tcp|udp|icmp|any ") 33 | 34 | parser.add_argument("--icmpid", type=int, default=0, 35 | help="trace icmp id") 36 | 37 | parser.add_argument("-c", "--catch-count", type=int, default=1000000, 38 | help="catch and print count") 39 | 40 | parser.add_argument("-P", "--port", type=int, default=0, 41 | help="udp or tcp port") 42 | 43 | parser.add_argument("-p", "--pid", type=int, default=0, 44 | help="trace this PID only") 45 | 46 | parser.add_argument("-N", "--netns", type=int, default=0, 47 | help="trace this Network Namespace only") 48 | 49 | parser.add_argument("--dropstack", action="store_true", 50 | help="output kernel stack trace when drop packet") 51 | 52 | parser.add_argument("--callstack", action="store_true", 53 | help="output kernel stack trace") 54 | 55 | parser.add_argument("--iptable", action="store_true", 56 | help="output iptable path") 57 | 58 | parser.add_argument("--route", action="store_true", 59 | help="output route path") 60 | 61 | parser.add_argument("--keep", action="store_true", 62 | help="keep trace packet all lifetime") 63 | 64 | parser.add_argument("-T", "--time", action="store_true", 65 | help="show HH:MM:SS timestamp") 66 | 67 | parser.add_argument("-t", "--timestamp", action="store_true", 68 | help="show timestamp in seconds at us resolution") 69 | 70 | parser.add_argument("--ebpf", action="store_true", 71 | help=argparse.SUPPRESS) 72 | 73 | parser.add_argument("--debug", action="store_true", 74 | help=argparse.SUPPRESS) 75 | 76 | args = parser.parse_args() 77 | if args.debug == True: 78 | print("pid=%d time=%d timestamp=%d ipaddr=%s port=%d netns=%d proto=%s icmpid=%d dropstack=%d" % \ 79 | (args.pid,args.time,args.timestamp,args.ipaddr, args.port,args.netns,args.proto,args.icmpid, args.dropstack)) 80 | sys.exit() 81 | 82 | 83 | ipproto={} 84 | #ipproto["tcp"]="IPPROTO_TCP" 85 | ipproto["tcp"]="6" 86 | #ipproto["udp"]="IPPROTO_UDP" 87 | ipproto["udp"]="17" 88 | #ipproto["icmp"]="IPPROTO_ICMP" 89 | ipproto["icmp"]="1" 90 | proto = 0 if args.proto == None else (0 if ipproto.get(args.proto) == None else ipproto[args.proto]) 91 | #ipaddr=socket.htonl(struct.unpack("I",socket.inet_aton("0" if args.ipaddr == None else args.ipaddr))[0]) 92 | #port=socket.htons(args.port) 93 | ipaddr=(struct.unpack("I",socket.inet_aton("0" if args.ipaddr == None else args.ipaddr))[0]) 94 | port=(args.port) 95 | icmpid=socket.htons(args.icmpid) 96 | 97 | bpf_def="#define __BCC_ARGS__\n" 98 | bpf_args="#define __BCC_pid (%d)\n" % (args.pid) 99 | bpf_args+="#define __BCC_ipaddr (0x%x)\n" % (ipaddr) 100 | bpf_args+="#define __BCC_port (%d)\n" % (port) 101 | bpf_args+="#define __BCC_netns (%d)\n" % (args.netns) 102 | bpf_args+="#define __BCC_proto (%s)\n" % (proto) 103 | bpf_args+="#define __BCC_icmpid (%d)\n" % (icmpid) 104 | bpf_args+="#define __BCC_dropstack (%d)\n" % (args.dropstack) 105 | bpf_args+="#define __BCC_callstack (%d)\n" % (args.callstack) 106 | bpf_args+="#define __BCC_iptable (%d)\n" % (args.iptable) 107 | bpf_args+="#define __BCC_route (%d)\n" % (args.route) 108 | bpf_args+="#define __BCC_keep (%d)\n" % (args.keep) 109 | 110 | bpf_text=open(r"skbtracer.c", "r").read() 111 | bpf_text=bpf_def + bpf_text 112 | bpf_text=bpf_text.replace("__BCC_ARGS_DEFINE__", bpf_args) 113 | 114 | if args.ebpf == True: 115 | print("%s" % (bpf_text)) 116 | sys.exit() 117 | 118 | # uapi/linux/if.h 119 | IFNAMSIZ = 16 120 | 121 | # uapi/linux/netfilter/x_tables.h 122 | XT_TABLE_MAXNAMELEN = 32 123 | 124 | # uapi/linux/netfilter.h 125 | NF_VERDICT_NAME = [ 126 | 'DROP', 127 | 'ACCEPT', 128 | 'STOLEN', 129 | 'QUEUE', 130 | 'REPEAT', 131 | 'STOP', 132 | ] 133 | 134 | # uapi/linux/netfilter.h 135 | # net/ipv4/netfilter/ip_tables.c 136 | HOOKNAMES = [ 137 | "PREROUTING", 138 | "INPUT", 139 | "FORWARD", 140 | "OUTPUT", 141 | "POSTROUTING", 142 | ] 143 | 144 | TCPFLAGS = [ 145 | "CWR", 146 | "ECE", 147 | "URG", 148 | "ACK", 149 | "PSH", 150 | "RST", 151 | "SYN", 152 | "FIN", 153 | ] 154 | 155 | ROUTE_EVENT_IF = 0x0001 156 | ROUTE_EVENT_IPTABLE = 0x0002 157 | ROUTE_EVENT_DROP = 0x0004 158 | ROUTE_EVENT_NEW = 0x0010 159 | FUNCNAME_MAX_LEN = 64 160 | 161 | class TestEvt(ct.Structure): 162 | _fields_ = [ 163 | ("func_name", ct.c_char * FUNCNAME_MAX_LEN), 164 | ("flags", ct.c_ubyte), 165 | ("cpu", ct.c_ubyte), 166 | ("ifname", ct.c_char * IFNAMSIZ), 167 | ("netns", ct.c_uint), 168 | 169 | ("dest_mac", ct.c_ubyte * 6), 170 | ("len", ct.c_uint), 171 | ("ip_version", ct.c_ubyte), 172 | ("l4_proto", ct.c_ubyte), 173 | ("tot_len", ct.c_ushort), 174 | ("saddr", ct.c_ulonglong * 2), 175 | ("daddr", ct.c_ulonglong * 2), 176 | ("icmptype", ct.c_ubyte), 177 | ("icmpid", ct.c_ushort), 178 | ("icmpseq", ct.c_ushort), 179 | ("sport", ct.c_ushort), 180 | ("dport", ct.c_ushort), 181 | ("tcpflags", ct.c_ushort), 182 | ("hook", ct.c_uint), 183 | ("pf", ct.c_ubyte), 184 | ("verdict", ct.c_uint), 185 | ("tablename", ct.c_char * XT_TABLE_MAXNAMELEN), 186 | ("ipt_delay", ct.c_ulonglong), 187 | 188 | ("skb", ct.c_ulonglong), 189 | ("pkt_type", ct.c_ubyte), 190 | 191 | ("kernel_stack_id", ct.c_int), 192 | ("kernel_ip", ct.c_ulonglong), 193 | 194 | ("start_ns", ct.c_ulonglong), 195 | ("test", ct.c_ulonglong) 196 | ] 197 | 198 | 199 | def _get(l, index, default): 200 | ''' 201 | Get element at index in l or return the default 202 | ''' 203 | if index < len(l): 204 | return l[index] 205 | return default 206 | def _get_tcpflags(tcpflags): 207 | flag="" 208 | start=1 209 | for index in range(len(TCPFLAGS)): 210 | if (tcpflags & (1< 0: 225 | kernel_tmp = stack_traces.walk(event.kernel_stack_id) 226 | # fix kernel stack 227 | for addr in kernel_tmp: 228 | kernel_stack.append(addr) 229 | for addr in kernel_stack: 230 | print((" %s" % b.ksym(addr))) 231 | 232 | earliest_ts = 0 233 | def time_str(event): 234 | if args.timestamp: 235 | global earliest_ts 236 | if earliest_ts == 0: 237 | earliest_ts = event.start_ns 238 | return "%-7.6f " % ((event.start_ns - earliest_ts) / 1000000000.0) 239 | elif args.time: 240 | return "%-7s " % time.strftime("%H:%M:%S") 241 | else: 242 | return "%-7s " % time.strftime("%H:%M:%S") 243 | 244 | def event_printer(cpu, data, size): 245 | args.catch_count = args.catch_count - 1 246 | 247 | if args.catch_count < 0: 248 | global is_done 249 | is_done = True 250 | return 251 | 252 | # Decode event 253 | event = ct.cast(data, ct.POINTER(TestEvt)).contents 254 | 255 | if event.ip_version == 4: 256 | saddr = inet_ntop(AF_INET, pack("=I", event.saddr[0])) 257 | daddr = inet_ntop(AF_INET, pack("=I", event.daddr[0])) 258 | elif event.ip_version == 6: 259 | saddr = inet_ntop(AF_INET6, event.saddr) 260 | daddr = inet_ntop(AF_INET6, event.daddr) 261 | else: 262 | return 263 | 264 | mac_info = ''.join('%02x' % b for b in event.dest_mac) 265 | 266 | if event.l4_proto == socket.IPPROTO_TCP: 267 | pkt_info = "T_%s:%s:%u->%s:%u" % (_get_tcpflags(event.tcpflags), saddr, event.sport, daddr, event.dport) 268 | elif event.l4_proto == socket.IPPROTO_UDP: 269 | pkt_info = "U:%s:%u->%s:%u" % (saddr, event.sport, daddr, event.dport) 270 | elif event.l4_proto == socket.IPPROTO_ICMP: 271 | if event.icmptype in [8, 128]: 272 | pkt_info = "I_request:%s->%s" % (saddr, daddr) 273 | elif event.icmptype in [0, 129]: 274 | pkt_info = "I_reply:%s->%s" % (saddr, daddr) 275 | else: 276 | pkt_info = "I:%s->%s" % (saddr, daddr) 277 | else: 278 | pkt_info = "%u:%s->%s" % (event.l4_proto, saddr, daddr) 279 | 280 | iptables = "" 281 | if event.flags & ROUTE_EVENT_IPTABLE == ROUTE_EVENT_IPTABLE: 282 | verdict = _get(NF_VERDICT_NAME, event.verdict, "~UNK~") 283 | hook = _get(HOOKNAMES, event.hook, "~UNK~") 284 | iptables = "%u.%s.%s.%s " % (event.pf, event.tablename, hook, verdict) 285 | 286 | trace_info = "%x.%u:%s%s" % (event.skb, event.pkt_type, iptables, event.func_name) 287 | 288 | # Print event 289 | print("[%-8s][%-10s] %-6s %-18s %-12s %-6s %-40s %s" % (time_str(event), event.netns, event.cpu, event.ifname, mac_info, event.tot_len, pkt_info, trace_info)) 290 | print_stack(event) 291 | 292 | is_done = False 293 | 294 | if __name__ == "__main__": 295 | b = BPF(text=bpf_text) 296 | b["route_event"].open_perf_buffer(event_printer) 297 | 298 | print("%-10s %-12s %-6s %-18s %-12s %-6s %-40s %s" % ('time', 'NETWORK_NS', 'CPU', 'INTERFACE', 'DEST_MAC', 'IP_LEN', 'PKT_INFO', 'TRACE_INFO')) 299 | 300 | try: 301 | while is_done == False: 302 | b.kprobe_poll(1) 303 | except KeyboardInterrupt: 304 | print("Cancel by User Ctrl+C\n") 305 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /skbtracer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define ROUTE_EVENT_IF 0x0001 12 | #define ROUTE_EVENT_IPTABLE 0x0002 13 | #define ROUTE_EVENT_DROP 0x0004 14 | #define ROUTE_EVENT_NEW 0x0010 15 | 16 | #ifdef __BCC_ARGS__ 17 | __BCC_ARGS_DEFINE__ 18 | #else 19 | #define __BCC_pid 0 20 | #define __BCC_ipaddr 0 21 | #define __BCC_port 0 22 | #define __BCC_icmpid 0 23 | #define __BCC_dropstack 0 24 | #define __BCC_callstack 0 25 | #define __BCC_iptable 0 26 | #define __BCC_route 0 27 | #define __BCC_keep 0 28 | #define __BCC_proto 0 29 | #define __BCC_netns 0 30 | #endif 31 | 32 | /* route info as default */ 33 | #if !__BCC_dropstack && !__BCC_iptable && !__BCC_route 34 | #undef __BCC_route 35 | #define __BCC_route 1 36 | #endif 37 | 38 | #if (__BCC_dropstack) || (!__BCC_pid && !__BCC_ipaddr && !__BCC_port && !__BCC_icmpid &&! __BCC_proto && !__BCC_netns) 39 | #undef __BCC_keep 40 | #define __BCC_keep 0 41 | #endif 42 | 43 | BPF_STACK_TRACE(stacks, 2048); 44 | 45 | #define FUNCNAME_MAX_LEN 64 46 | struct event_t { 47 | char func_name[FUNCNAME_MAX_LEN]; 48 | u8 flags; 49 | u8 cpu; 50 | 51 | // route info 52 | char ifname[IFNAMSIZ]; 53 | u32 netns; 54 | 55 | // pkt info 56 | u8 dest_mac[6]; 57 | u32 len; 58 | u8 ip_version; 59 | u8 l4_proto; 60 | u16 tot_len; 61 | u64 saddr[2]; 62 | u64 daddr[2]; 63 | u8 icmptype; 64 | u16 icmpid; 65 | u16 icmpseq; 66 | u16 sport; 67 | u16 dport; 68 | u16 tcpflags; 69 | 70 | // ipt info 71 | u32 hook; 72 | u8 pf; 73 | u32 verdict; 74 | char tablename[XT_TABLE_MAXNAMELEN]; 75 | u64 ipt_delay; 76 | 77 | void *skb; 78 | // skb info 79 | u8 pkt_type; //skb->pkt_type 80 | 81 | // call stack 82 | int kernel_stack_id; 83 | u64 kernel_ip; 84 | 85 | //time 86 | u64 start_ns; 87 | u64 test; 88 | }; 89 | BPF_PERF_OUTPUT(route_event); 90 | 91 | struct ipt_do_table_args 92 | { 93 | struct sk_buff *skb; 94 | const struct nf_hook_state *state; 95 | struct xt_table *table; 96 | u64 start_ns; 97 | }; 98 | BPF_HASH(cur_ipt_do_table_args, u32, struct ipt_do_table_args); 99 | 100 | union ___skb_pkt_type { 101 | __u8 value; 102 | struct { 103 | __u8 __pkt_type_offset[0]; 104 | __u8 pkt_type:3; 105 | __u8 pfmemalloc:1; 106 | __u8 ignore_df:1; 107 | 108 | __u8 nf_trace:1; 109 | __u8 ip_summed:2; 110 | }; 111 | }; 112 | 113 | #if __BCC_keep 114 | #endif 115 | 116 | #define MAC_HEADER_SIZE 14; 117 | #define member_address(source_struct, source_member) \ 118 | ({ \ 119 | void* __ret; \ 120 | __ret = (void*) (((char*)source_struct) + offsetof(typeof(*source_struct), source_member)); \ 121 | __ret; \ 122 | }) 123 | #define member_read(destination, source_struct, source_member) \ 124 | do{ \ 125 | bpf_probe_read( \ 126 | destination, \ 127 | sizeof(source_struct->source_member), \ 128 | member_address(source_struct, source_member) \ 129 | ); \ 130 | } while(0) 131 | 132 | enum { 133 | __TCP_FLAG_CWR, 134 | __TCP_FLAG_ECE, 135 | __TCP_FLAG_URG, 136 | __TCP_FLAG_ACK, 137 | __TCP_FLAG_PSH, 138 | __TCP_FLAG_RST, 139 | __TCP_FLAG_SYN, 140 | __TCP_FLAG_FIN 141 | }; 142 | 143 | static void bpf_strncpy(char *dst, const char *src, int n) 144 | { 145 | int i = 0, j; 146 | #define CPY(n) \ 147 | do { \ 148 | for (; i < n; i++) { \ 149 | if (src[i] == 0) return; \ 150 | dst[i] = src[i]; \ 151 | } \ 152 | } while(0) 153 | 154 | for (j = 10; j < 64; j += 10) 155 | CPY(j); 156 | CPY(64); 157 | #undef CPY 158 | } 159 | 160 | #define TCP_FLAGS_INIT(new_flags, orig_flags, flag) \ 161 | do { \ 162 | if (orig_flags & flag) { \ 163 | new_flags |= (1U<<__##flag); \ 164 | } \ 165 | } while (0) 166 | #define init_tcpflags_bits(new_flags, orig_flags) \ 167 | ({ \ 168 | new_flags = 0; \ 169 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_CWR); \ 170 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_ECE); \ 171 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_URG); \ 172 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_ACK); \ 173 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_PSH); \ 174 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_RST); \ 175 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_SYN); \ 176 | TCP_FLAGS_INIT(new_flags, orig_flags, TCP_FLAG_FIN); \ 177 | }) 178 | 179 | static void get_stack(struct pt_regs *ctx, struct event_t *event) 180 | { 181 | event->kernel_stack_id = stacks.get_stackid(ctx, 0); 182 | if (event->kernel_stack_id >= 0) { 183 | u64 ip = PT_REGS_IP(ctx); 184 | u64 page_offset; 185 | // if ip isn't sane, leave key ips as zero for later checking 186 | #if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE) 187 | // x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it 188 | page_offset = __PAGE_OFFSET_BASE; 189 | #elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4) 190 | // x64, 4.17, and later 191 | #if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL) 192 | page_offset = __PAGE_OFFSET_BASE_L5; 193 | #else 194 | page_offset = __PAGE_OFFSET_BASE_L4; 195 | #endif 196 | #else 197 | // earlier x86_64 kernels, e.g., 4.6, comes here 198 | // arm64, s390, powerpc, x86_32 199 | page_offset = PAGE_OFFSET; 200 | #endif 201 | if (ip > page_offset) { 202 | event->kernel_ip = ip; 203 | } 204 | } 205 | return; 206 | } 207 | 208 | #define CALL_STACK(ctx, event) \ 209 | do { \ 210 | if (__BCC_callstack) \ 211 | get_stack(ctx, event); \ 212 | } while (0) 213 | 214 | 215 | /** 216 | * Common tracepoint handler. Detect IPv4/IPv6 and 217 | * emit event with address, interface and namespace. 218 | */ 219 | static int 220 | do_trace_skb(struct event_t *event, void *ctx, struct sk_buff *skb, void *netdev) 221 | { 222 | struct net_device *dev; 223 | 224 | char *head; 225 | char *l2_header_address; 226 | char *l3_header_address; 227 | char *l4_header_address; 228 | 229 | u16 mac_header; 230 | u16 network_header; 231 | 232 | u8 proto_icmp_echo_request; 233 | u8 proto_icmp_echo_reply; 234 | u8 l4_offset_from_ip_header; 235 | 236 | struct icmphdr icmphdr; 237 | union tcp_word_hdr tcphdr; 238 | struct udphdr udphdr; 239 | 240 | // Get device pointer, we'll need it to get the name and network namespace 241 | event->ifname[0] = 0; 242 | if (netdev) 243 | dev = netdev; 244 | else 245 | member_read(&dev, skb, dev); 246 | 247 | bpf_probe_read(&event->ifname, IFNAMSIZ, dev->name); 248 | 249 | if (event->ifname[0] == 0 || dev == NULL) 250 | bpf_strncpy(event->ifname, "nil", IFNAMSIZ); 251 | 252 | event->flags |= ROUTE_EVENT_IF; 253 | 254 | #ifdef CONFIG_NET_NS 255 | struct net* net; 256 | 257 | // Get netns id. The code below is equivalent to: event->netns = dev->nd_net.net->ns.inum 258 | possible_net_t *skc_net = &dev->nd_net; 259 | member_read(&net, skc_net, net); 260 | struct ns_common *ns = member_address(net, ns); 261 | member_read(&event->netns, ns, inum); 262 | 263 | // maybe the skb->dev is not init, for this situation, we can get ns by sk->__sk_common.skc_net.net->ns.inum 264 | if (event->netns == 0) { 265 | struct sock *sk; 266 | struct sock_common __sk_common; 267 | struct ns_common* ns2; 268 | member_read(&sk, skb, sk); 269 | if (sk != NULL) { 270 | member_read(&__sk_common, sk, __sk_common); 271 | ns2 = member_address(__sk_common.skc_net.net, ns); 272 | member_read(&event->netns, ns2, inum); 273 | } 274 | } 275 | 276 | 277 | #endif 278 | event->cpu = bpf_get_smp_processor_id(); 279 | member_read(&event->len, skb, len); 280 | member_read(&head, skb, head); 281 | member_read(&mac_header, skb, mac_header); 282 | member_read(&network_header, skb, network_header); 283 | 284 | if(network_header == 0) { 285 | network_header = mac_header + MAC_HEADER_SIZE; 286 | } 287 | 288 | l2_header_address = mac_header + head; 289 | bpf_probe_read(&event->dest_mac, 6, l2_header_address); 290 | 291 | l3_header_address = head + network_header; 292 | bpf_probe_read(&event->ip_version, sizeof(u8), l3_header_address); 293 | event->ip_version = event->ip_version >> 4 & 0xf; 294 | 295 | if (event->ip_version == 4) { 296 | struct iphdr iphdr; 297 | bpf_probe_read(&iphdr, sizeof(iphdr), l3_header_address); 298 | 299 | l4_offset_from_ip_header = iphdr.ihl * 4; 300 | event->l4_proto = iphdr.protocol; 301 | event->saddr[0] = iphdr.saddr; 302 | event->daddr[0] = iphdr.daddr; 303 | event->tot_len = ntohs(iphdr.tot_len); 304 | 305 | if (event->l4_proto == IPPROTO_ICMP) { 306 | proto_icmp_echo_request = ICMP_ECHO; 307 | proto_icmp_echo_reply = ICMP_ECHOREPLY; 308 | } 309 | 310 | } else if (event->ip_version == 6) { 311 | // Assume no option header --> fixed size header 312 | struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)l3_header_address; 313 | l4_offset_from_ip_header = sizeof(*ipv6hdr); 314 | 315 | bpf_probe_read(&event->l4_proto, sizeof(ipv6hdr->nexthdr), (char*)ipv6hdr + offsetof(struct ipv6hdr, nexthdr)); 316 | bpf_probe_read(event->saddr, sizeof(ipv6hdr->saddr), (char*)ipv6hdr + offsetof(struct ipv6hdr, saddr)); 317 | bpf_probe_read(event->daddr, sizeof(ipv6hdr->daddr), (char*)ipv6hdr + offsetof(struct ipv6hdr, daddr)); 318 | bpf_probe_read(&event->tot_len, sizeof(ipv6hdr->payload_len), (char*)ipv6hdr + offsetof(struct ipv6hdr, payload_len)); 319 | event->tot_len = ntohs(event->tot_len); 320 | 321 | if (event->l4_proto == IPPROTO_ICMPV6) { 322 | proto_icmp_echo_request = ICMPV6_ECHO_REQUEST; 323 | proto_icmp_echo_reply = ICMPV6_ECHO_REPLY; 324 | } 325 | 326 | } else { 327 | return -1; 328 | } 329 | 330 | l4_header_address = l3_header_address + l4_offset_from_ip_header; 331 | switch (event->l4_proto) { 332 | case IPPROTO_ICMPV6: 333 | case IPPROTO_ICMP: 334 | bpf_probe_read(&icmphdr, sizeof(icmphdr), l4_header_address); 335 | if (icmphdr.type != proto_icmp_echo_request && icmphdr.type != proto_icmp_echo_reply) { 336 | return -1; 337 | } 338 | event->icmptype = icmphdr.type; 339 | event->icmpid = be16_to_cpu(icmphdr.un.echo.id); 340 | event->icmpseq = be16_to_cpu(icmphdr.un.echo.sequence); 341 | break; 342 | case IPPROTO_TCP: 343 | bpf_probe_read(&tcphdr, sizeof(tcphdr), l4_header_address); 344 | init_tcpflags_bits(event->tcpflags, tcp_flag_word(&tcphdr)); 345 | event->sport = be16_to_cpu(tcphdr.hdr.source); 346 | event->dport = be16_to_cpu(tcphdr.hdr.dest); 347 | break; 348 | case IPPROTO_UDP: 349 | bpf_probe_read(&udphdr, sizeof(udphdr), l4_header_address); 350 | event->sport = be16_to_cpu(udphdr.source); 351 | event->dport = be16_to_cpu(udphdr.dest); 352 | break; 353 | default: 354 | return -1; 355 | } 356 | 357 | #if __BCC_keep 358 | #endif 359 | 360 | 361 | /* 362 | * netns filter 363 | */ 364 | if (__BCC_netns !=0 && event->netns != 0 && event->netns != __BCC_netns) { 365 | return -1; 366 | } 367 | 368 | /* 369 | * pid filter 370 | */ 371 | #if __BCC_pid 372 | u64 tgid = bpf_get_current_pid_tgid() >> 32; 373 | if (tgid != __BCC_pid) 374 | return -1; 375 | #endif 376 | 377 | /* 378 | * skb filter 379 | */ 380 | #if __BCC_ipaddr 381 | if (event->ip_version == 4) { 382 | if (__BCC_ipaddr != event->saddr[0] && __BCC_ipaddr != event->daddr[0]) 383 | return -1; 384 | } else { 385 | return -1; 386 | } 387 | #endif 388 | 389 | #if __BCC_proto 390 | if (__BCC_proto != event->l4_proto) 391 | return -1; 392 | #endif 393 | 394 | #if __BCC_port 395 | if ( (event->l4_proto == IPPROTO_UDP || event->l4_proto == IPPROTO_TCP) && 396 | (__BCC_port != event->sport && __BCC_port != event->dport)) 397 | return -1; 398 | #endif 399 | 400 | #if __BCC_icmpid 401 | if (__BCC_proto == IPPROTO_ICMP && __BCC_icmpid != event->icmpid) 402 | return -1; 403 | #endif 404 | 405 | #if __BCC_keep 406 | #endif 407 | 408 | return 0; 409 | } 410 | 411 | static int 412 | do_trace(void *ctx, struct sk_buff *skb, const char *func_name, void *netdev) 413 | { 414 | struct event_t event = {}; 415 | union ___skb_pkt_type type = {}; 416 | 417 | if (do_trace_skb(&event, ctx, skb, netdev) < 0) 418 | return 0; 419 | 420 | event.skb=skb; 421 | bpf_probe_read(&type.value, 1, ((char*)skb) + offsetof(typeof(*skb), __pkt_type_offset)); 422 | event.pkt_type = type.pkt_type; 423 | 424 | event.start_ns = bpf_ktime_get_ns(); 425 | bpf_strncpy(event.func_name, func_name, FUNCNAME_MAX_LEN); 426 | CALL_STACK(ctx, &event); 427 | route_event.perf_submit(ctx, &event, sizeof(event)); 428 | out: 429 | return 0; 430 | } 431 | 432 | #if __BCC_route 433 | 434 | /* 435 | * netif rcv hook: 436 | * 1) int netif_rx(struct sk_buff *skb) 437 | * 2) int __netif_receive_skb(struct sk_buff *skb) 438 | * 3) gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 439 | * 4) ... 440 | */ 441 | int kprobe__netif_rx(struct pt_regs *ctx, struct sk_buff *skb) 442 | { 443 | return do_trace(ctx, skb, __func__+8, NULL); 444 | } 445 | 446 | int kprobe____netif_receive_skb(struct pt_regs *ctx, struct sk_buff *skb) 447 | { 448 | return do_trace(ctx, skb, __func__+8, NULL); 449 | } 450 | 451 | int kprobe__tpacket_rcv(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 452 | { 453 | return do_trace(ctx, skb, __func__+8, orig_dev); 454 | } 455 | 456 | int kprobe__packet_rcv(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 457 | { 458 | return do_trace(ctx, skb, __func__+8, orig_dev); 459 | } 460 | 461 | int kprobe__napi_gro_receive(struct pt_regs *ctx, struct napi_struct *napi, struct sk_buff *skb) 462 | { 463 | return do_trace(ctx, skb, __func__+8, NULL); 464 | } 465 | 466 | /* 467 | * netif send hook: 468 | * 1) int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) 469 | * 2) ... 470 | */ 471 | 472 | int kprobe____dev_queue_xmit(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *sb_dev) 473 | { 474 | return do_trace(ctx, skb, __func__+8, NULL); 475 | } 476 | 477 | /* 478 | * br process hook: 479 | * 1) rx_handler_result_t br_handle_frame(struct sk_buff **pskb) 480 | * 2) int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 481 | * 3) unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 482 | * 4) int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 483 | * 5) int br_pass_frame_up(struct sk_buff *skb) 484 | * 6) int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) 485 | * 7) void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig) 486 | * 8) int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 487 | * 9) unsigned int br_nf_forward_ip(void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 488 | * 10)int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 489 | * 11)unsigned int br_nf_post_routing(void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 490 | * 12)int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) 491 | */ 492 | int kprobe__br_handle_frame(struct pt_regs *ctx, struct sk_buff **pskb) 493 | { 494 | return do_trace(ctx, *pskb, __func__+8, NULL); 495 | } 496 | 497 | int kprobe__br_handle_frame_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 498 | { 499 | return do_trace(ctx, skb, __func__+8, NULL); 500 | } 501 | 502 | int kprobe__br_nf_pre_routing(struct pt_regs *ctx, void *priv, struct sk_buff *skb, const struct nf_hook_state *state) 503 | { 504 | return do_trace(ctx, skb, __func__+8, NULL); 505 | } 506 | 507 | int kprobe__br_nf_pre_routing_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 508 | { 509 | return do_trace(ctx, skb, __func__+8, NULL); 510 | } 511 | 512 | int kprobe__br_pass_frame_up(struct pt_regs *ctx, struct sk_buff *skb) 513 | { 514 | return do_trace(ctx, skb, __func__+8, NULL); 515 | } 516 | 517 | int kprobe__br_netif_receive_skb(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 518 | { 519 | return do_trace(ctx, skb, __func__+8, NULL); 520 | } 521 | 522 | int kprobe__br_forward(struct pt_regs *ctx, const void *to, struct sk_buff *skb, bool local_rcv, bool local_orig) 523 | { 524 | return do_trace(ctx, skb, __func__+8, NULL); 525 | } 526 | 527 | int kprobe____br_forward(struct pt_regs *ctx, const void *to, struct sk_buff *skb, bool local_orig) 528 | { 529 | return do_trace(ctx, skb, __func__+8, NULL); 530 | } 531 | 532 | int kprobe__deliver_clone(struct pt_regs *ctx, const void *prev, struct sk_buff *skb, bool local_orig) 533 | { 534 | return do_trace(ctx, skb, __func__+8, NULL); 535 | } 536 | 537 | int kprobe__br_forward_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 538 | { 539 | return do_trace(ctx, skb, __func__+8, NULL); 540 | } 541 | 542 | int kprobe__br_nf_forward_ip(struct pt_regs *ctx, void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 543 | { 544 | return do_trace(ctx, skb, __func__+8, NULL); 545 | } 546 | 547 | int kprobe__br_nf_forward_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 548 | { 549 | return do_trace(ctx, skb, __func__+8, NULL); 550 | } 551 | 552 | int kprobe__br_nf_post_routing(struct pt_regs *ctx, void *priv,struct sk_buff *skb,const struct nf_hook_state *state) 553 | { 554 | return do_trace(ctx, skb, __func__+8, NULL); 555 | } 556 | 557 | int kprobe__br_nf_dev_queue_xmit(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 558 | { 559 | return do_trace(ctx, skb, __func__+8, NULL); 560 | } 561 | 562 | /* 563 | * ip layer: 564 | * 1) int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 565 | * 2) int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 566 | * 3) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) 567 | * 4) int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) 568 | * 5) int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 569 | * 6) ... 570 | */ 571 | 572 | int kprobe__ip_rcv(struct pt_regs *ctx, struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 573 | { 574 | return do_trace(ctx, skb, __func__+8, NULL); 575 | } 576 | 577 | int kprobe__ip_rcv_finish(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 578 | { 579 | return do_trace(ctx, skb, __func__+8, NULL); 580 | } 581 | 582 | int kprobe__ip_output(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 583 | { 584 | return do_trace(ctx, skb, __func__+8, NULL); 585 | } 586 | 587 | int kprobe__ip_finish_output(struct pt_regs *ctx, struct net *net, struct sock *sk, struct sk_buff *skb) 588 | { 589 | return do_trace(ctx, skb, __func__+8, NULL); 590 | } 591 | 592 | #endif 593 | 594 | #if __BCC_iptable 595 | static int 596 | __ipt_do_table_in(struct pt_regs *ctx, struct sk_buff *skb, 597 | const struct nf_hook_state *state, struct xt_table *table) 598 | { 599 | u32 pid = bpf_get_current_pid_tgid(); 600 | 601 | struct ipt_do_table_args args = { 602 | .skb = skb, 603 | .state = state, 604 | .table = table, 605 | }; 606 | args.start_ns = bpf_ktime_get_ns(); 607 | cur_ipt_do_table_args.update(&pid, &args); 608 | 609 | return 0; 610 | }; 611 | 612 | static int 613 | __ipt_do_table_out(struct pt_regs * ctx, struct sk_buff *skb) 614 | { 615 | struct event_t event = {}; 616 | union ___skb_pkt_type type = {}; 617 | struct ipt_do_table_args *args; 618 | u32 pid = bpf_get_current_pid_tgid(); 619 | 620 | args = cur_ipt_do_table_args.lookup(&pid); 621 | if (args == 0) 622 | return 0; 623 | 624 | cur_ipt_do_table_args.delete(&pid); 625 | 626 | if (do_trace_skb(&event, ctx, args->skb, NULL) < 0) 627 | return 0; 628 | 629 | event.flags |= ROUTE_EVENT_IPTABLE; 630 | event.ipt_delay = bpf_ktime_get_ns() - args->start_ns; 631 | member_read(&event.hook, args->state, hook); 632 | member_read(&event.pf, args->state, pf); 633 | member_read(&event.tablename, args->table, name); 634 | event.verdict = PT_REGS_RC(ctx); 635 | event.skb=args->skb; 636 | bpf_probe_read(&type.value, 1, ((char*)args->skb) + offsetof(typeof(*args->skb), __pkt_type_offset)); 637 | event.pkt_type = type.pkt_type; 638 | 639 | event.start_ns = bpf_ktime_get_ns(); 640 | CALL_STACK(ctx, &event); 641 | route_event.perf_submit(ctx, &event, sizeof(event)); 642 | 643 | return 0; 644 | } 645 | 646 | int kprobe__ipt_do_table(struct pt_regs *ctx, struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table) 647 | { 648 | return __ipt_do_table_in(ctx, skb, state, table); 649 | }; 650 | 651 | /* 652 | * tricky: use ebx as the 1st parms, thus get skb 653 | */ 654 | int kretprobe__ipt_do_table(struct pt_regs *ctx) 655 | { 656 | struct sk_buff *skb=(void*)ctx->bx; 657 | return __ipt_do_table_out(ctx, skb); 658 | } 659 | #endif 660 | 661 | 662 | #if __BCC_dropstack 663 | int kprobe____kfree_skb(struct pt_regs *ctx, struct sk_buff *skb) 664 | { 665 | struct event_t event = {}; 666 | 667 | if (do_trace_skb(&event, ctx, skb, NULL) < 0) 668 | return 0; 669 | 670 | event.flags |= ROUTE_EVENT_DROP; 671 | event.start_ns = bpf_ktime_get_ns(); 672 | bpf_strncpy(event.func_name, __func__+8, FUNCNAME_MAX_LEN); 673 | get_stack(ctx, &event); 674 | route_event.perf_submit(ctx, event, sizeof(*event)); 675 | return 0; 676 | } 677 | #endif 678 | 679 | #if 0 680 | int kprobe__ip6t_do_table(struct pt_regs *ctx, struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table) 681 | { 682 | return __ipt_do_table_in(ctx, skb, state, table); 683 | }; 684 | 685 | int kretprobe__ip6t_do_table(struct pt_regs *ctx) 686 | { 687 | return __ipt_do_table_out(ctx); 688 | } 689 | #endif 690 | --------------------------------------------------------------------------------