├── .gitignore ├── go.mod ├── inode ├── trace_386.go ├── trace_amd64.go ├── trace_arm64.go └── inode.go ├── connect ├── trace_linux_amd64.go ├── trace_linux_arm64.go ├── trace_linux_386.go └── connect.go ├── pkg ├── kallsyms │ ├── kallsyms_test.go │ └── kallsyms.go ├── alloc │ └── alloc.go └── kversion │ └── kversion.go ├── .github └── workflows │ └── build.yml ├── cmd └── systracer │ ├── listen.go │ ├── connect.go │ ├── main.go │ └── watch.go ├── compile_test.go ├── listen ├── trace_linux_386.go ├── trace_linux_amd64.go ├── trace_linux_arm64.go └── listen.go ├── README.md ├── package.go ├── rcnotify ├── trace_amd64.go ├── trace_arm64.go ├── trace_386.go └── rcnotify.go ├── go.sum ├── tracefs.go ├── handle.go ├── LICENSE ├── compile.go └── manager.go /.gitignore: -------------------------------------------------------------------------------- 1 | /systracer 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/chaitin/systracer 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/aegistudio/shaft v0.0.0-20221023182702-de3982a0a277 7 | github.com/pkg/errors v0.9.1 8 | github.com/spf13/cobra v1.6.0 9 | github.com/stretchr/testify v1.8.0 10 | go.uber.org/zap v1.23.0 11 | golang.org/x/sync v0.1.0 12 | golang.org/x/sys v0.2.0 13 | ) 14 | -------------------------------------------------------------------------------- /inode/trace_386.go: -------------------------------------------------------------------------------- 1 | package inode 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySecurityInodePin_V2_6_24 struct { 8 | systracer.ProbeEvent 9 | Inode uint64 `tracing:"%ax"` 10 | Name string `tracing:"%dx,Name ~ \"systracer.inode_pin.*\""` 11 | } 12 | 13 | type entrySecurityInodePin_V5_12 struct { 14 | systracer.ProbeEvent 15 | Inode uint64 `tracing:"%dx"` 16 | Name string `tracing:"%cx,Name ~ \"systracer.inode_pin.*\""` 17 | } 18 | -------------------------------------------------------------------------------- /inode/trace_amd64.go: -------------------------------------------------------------------------------- 1 | package inode 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySecurityInodePin_V2_6_24 struct { 8 | systracer.ProbeEvent 9 | Inode uint64 `tracing:"%di"` 10 | Name string `tracing:"%si,Name ~ \"systracer.inode_pin.*\""` 11 | } 12 | 13 | type entrySecurityInodePin_V5_12 struct { 14 | systracer.ProbeEvent 15 | Inode uint64 `tracing:"%si"` 16 | Name string `tracing:"%dx,Name ~ \"systracer.inode_pin.*\""` 17 | } 18 | -------------------------------------------------------------------------------- /inode/trace_arm64.go: -------------------------------------------------------------------------------- 1 | package inode 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySecurityInodePin_V2_6_24 struct { 8 | systracer.ProbeEvent 9 | Inode uint64 `tracing:"%x0"` 10 | Name string `tracing:"%x1,Name ~ \"systracer.inode_pin.*\""` 11 | } 12 | 13 | type entrySecurityInodePin_V5_12 struct { 14 | systracer.ProbeEvent 15 | Inode uint64 `tracing:"%x1"` 16 | Name string `tracing:"%x2,Name ~ \"systracer.inode_pin.*\""` 17 | } 18 | -------------------------------------------------------------------------------- /connect/trace_linux_amd64.go: -------------------------------------------------------------------------------- 1 | package connect 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySyscallConnectInet4 struct { 8 | systracer.ProbeEvent 9 | FD int64 `tracing:"%di"` 10 | Family uint16 `tracing:"+0(%si),Family == 2"` 11 | Port uint16 `tracing:"+2(%si),,bigendian"` 12 | Address uint32 `tracing:"+4(%si),,bigendian"` 13 | Len uint64 `tracing:"%dx,Len >= 16"` 14 | } 15 | 16 | type entrySyscallConnectInet6 struct { 17 | systracer.ProbeEvent 18 | FD int64 `tracing:"%di"` 19 | Family uint16 `tracing:"+0(%si),Family == 10"` 20 | Port uint16 `tracing:"+2(%si),,bigendian"` 21 | FlowInfo uint32 `tracing:"+4(%si)"` 22 | Address0 uint32 `tracing:"+8(%si),,bigendian"` 23 | Address1 uint32 `tracing:"+12(%si),,bigendian"` 24 | Address2 uint32 `tracing:"+16(%si),,bigendian"` 25 | Address3 uint32 `tracing:"+20(%si),,bigendian"` 26 | Scope uint32 `tracing:"+24(%si)"` 27 | Len uint64 `tracing:"%dx,Len >= 28"` 28 | } 29 | 30 | type exitSyscallConnect struct { 31 | systracer.ReturnEvent 32 | Errno int32 `tracing:"%ax"` 33 | } 34 | 35 | type entryInetProtocolConnect struct { 36 | systracer.ProbeEvent 37 | 38 | // (struct socket*)->type 39 | Type uint16 `tracing:"+4(%di)"` 40 | } 41 | -------------------------------------------------------------------------------- /connect/trace_linux_arm64.go: -------------------------------------------------------------------------------- 1 | package connect 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySyscallConnectInet4 struct { 8 | systracer.ProbeEvent 9 | FD int64 `tracing:"%x0"` 10 | Family uint16 `tracing:"+0(%x1),Family == 2"` 11 | Port uint16 `tracing:"+2(%x1),,bigendian"` 12 | Address uint32 `tracing:"+4(%x1),,bigendian"` 13 | Len uint64 `tracing:"%x2,Len >= 16"` 14 | } 15 | 16 | type entrySyscallConnectInet6 struct { 17 | systracer.ProbeEvent 18 | FD int64 `tracing:"%x0"` 19 | Family uint16 `tracing:"+0(%x1),Family == 10"` 20 | Port uint16 `tracing:"+2(%x1),,bigendian"` 21 | FlowInfo uint32 `tracing:"+4(%x1)"` 22 | Address0 uint32 `tracing:"+8(%x1),,bigendian"` 23 | Address1 uint32 `tracing:"+12(%x1),,bigendian"` 24 | Address2 uint32 `tracing:"+16(%x1),,bigendian"` 25 | Address3 uint32 `tracing:"+20(%x1),,bigendian"` 26 | Scope uint32 `tracing:"+24(%x1)"` 27 | Len uint64 `tracing:"%x2,Len >= 28"` 28 | } 29 | 30 | type exitSyscallConnect struct { 31 | systracer.ReturnEvent 32 | Errno int32 `tracing:"%x0"` 33 | } 34 | 35 | type entryInetProtocolConnect struct { 36 | systracer.ProbeEvent 37 | 38 | // (struct socket*)->type 39 | Type uint16 `tracing:"+4(%x0)"` 40 | } 41 | -------------------------------------------------------------------------------- /connect/trace_linux_386.go: -------------------------------------------------------------------------------- 1 | package connect 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySyscallConnectInet4 struct { 8 | systracer.ProbeEvent 9 | FD int32 `tracing:"+4(%sp)"` 10 | Family uint16 `tracing:"+0(+8(%sp)),Family == 2"` 11 | Port uint16 `tracing:"+2(+8(%sp)),,bigendian"` 12 | Address uint32 `tracing:"+4(+8(%sp)),,bigendian"` 13 | Len uint32 `tracing:"+12(%sp),Len >= 16"` 14 | } 15 | 16 | type entrySyscallConnectInet6 struct { 17 | systracer.ProbeEvent 18 | FD int32 `tracing:"+4(%sp)"` 19 | Family uint16 `tracing:"+0(+8(%sp)),Family == 10"` 20 | Port uint16 `tracing:"+2(+8(%sp)),,bigendian"` 21 | FlowInfo uint32 `tracing:"+4(+8(%sp))"` 22 | Address0 uint32 `tracing:"+8(+8(%sp)),,bigendian"` 23 | Address1 uint32 `tracing:"+12(+8(%sp)),,bigendian"` 24 | Address2 uint32 `tracing:"+16(+8(%sp)),,bigendian"` 25 | Address3 uint32 `tracing:"+20(+8(%sp)),,bigendian"` 26 | Scope uint32 `tracing:"+24(+8(%sp))"` 27 | Len uint64 `tracing:"+12(%sp),Len >= 28"` 28 | } 29 | 30 | type exitSyscallConnect struct { 31 | systracer.ReturnEvent 32 | Errno int32 `tracing:"%ax"` 33 | } 34 | 35 | type entryInetProtocolConnect struct { 36 | systracer.ProbeEvent 37 | 38 | // (struct socket*)->type 39 | Type uint16 `tracing:"+4(+0(%sp))"` 40 | } 41 | -------------------------------------------------------------------------------- /pkg/kallsyms/kallsyms_test.go: -------------------------------------------------------------------------------- 1 | package kallsyms 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func Test(t *testing.T) { 11 | assert := assert.New(t) 12 | symtabs := Parse(bytes.Trim([]byte(` 13 | ffffffff90d38450 t do_open 14 | ffffffffc01920e0 r __func__.24 [video] 15 | ffffffff91ff03ae d .LC1 16 | ffffffffc027b1b0 r .LC1 [drm] 17 | ffffffffc0275fa8 r .LC1 [drm] 18 | ffffffffc0ba88a0 t do_open [nfs] 19 | `), "\n"), nil) 20 | 21 | coreSymtab := symtabs[""] 22 | assert.NotNil(coreSymtab) 23 | assert.Equal(uint64(0xffffffff91ff03ae), coreSymtab.Lookup(".LC1", "Dd")) 24 | assert.Equal(uint64(0), coreSymtab.Lookup(".LC1", "Tt")) 25 | assert.Equal(uint64(0xffffffff90d38450), coreSymtab.Lookup("do_open", "Tt")) 26 | 27 | nfsSymtab := symtabs["nfs"] 28 | assert.NotNil(nfsSymtab) 29 | assert.Equal(uint64(0xffffffffc0ba88a0), nfsSymtab.Lookup("do_open", "Tt")) 30 | 31 | drmSymtab := symtabs["drm"] 32 | assert.NotNil(drmSymtab) 33 | assert.Equal(uint64(0), drmSymtab.Lookup(".LC1", "Dd")) 34 | assert.Equal(uint64(0xffffffffc0275fa8), drmSymtab.Lookup(".LC1", "Rr")) 35 | 36 | videoSymtab := symtabs["video"] 37 | assert.NotNil(videoSymtab) 38 | assert.Equal(uint64(0), videoSymtab.Lookup(".LC1", "Dd")) 39 | assert.Equal(uint64(0xffffffffc01920e0), videoSymtab.Lookup("__func__.24", "Rr")) 40 | } 41 | -------------------------------------------------------------------------------- /pkg/alloc/alloc.go: -------------------------------------------------------------------------------- 1 | // package alloc is the id allocator that circulates the 2 | // next id as the id allocator. 3 | // 4 | // When the allocation space is large, the next id behind 5 | // the current id is very likely to be unused in most cases, 6 | // which involves only a single operation while allocating. 7 | // 8 | // The upper limit of this allocator is O(N), where N is the 9 | // current number of elements in use. 10 | package alloc 11 | 12 | // Alloc allocates the ID by circularly seeking for the 13 | // next available identity. 14 | // 15 | // Please notice that since the index 0 has been reserved 16 | // for invalid index, it will be returned whenever the 17 | // allocation has failed. 18 | func Alloc( 19 | id, upperLimit uint64, occupied func(uint64) bool, 20 | ) uint64 { 21 | if upperLimit == 0 { 22 | upperLimit = ^uint64(0) 23 | } 24 | 25 | // Fast path: attempt to return the value next to this 26 | // value as the identity. 27 | // 28 | // The fast path is asserted to happen in most cases, 29 | // since it is nearly impossible to use up all 30 | // identities as long as the limit is great enough. 31 | newID := id + 1 32 | if newID != 0 && !occupied(id) { 33 | return newID 34 | } 35 | 36 | // Slow path: attempt to seek for the last 37 | // available identities. 38 | for newID := id + 2; newID <= upperLimit; newID++ { 39 | if !occupied(newID) { 40 | return newID 41 | } 42 | } 43 | for newID := uint64(1); newID < id; newID++ { 44 | if !occupied(newID) { 45 | return newID 46 | } 47 | } 48 | return 0 49 | } 50 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | strategy: 6 | matrix: 7 | include: 8 | - GOARCH: "386" 9 | name: "systracer-386" 10 | - GOARCH: "amd64" 11 | name: "systracer" 12 | - GOARCH: "arm64" 13 | name: "systracer-aarch64" 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: actions/setup-go@v3 18 | with: 19 | go-version: '1.18' 20 | - run: | 21 | mkdir -p build 22 | GOOS=linux GOARCH=${{ matrix.GOARCH }} CGO_ENABLED=0 go build -ldflags '-extldflags="-static"' -tags osuergo,netgo -o build/${{ matrix.name }} ./cmd/systracer/ 23 | md5sum build/${{ matrix.name }} | tee build/${{ matrix.name }}.md5.txt 24 | sha256sum build/${{ matrix.name }} | tee build/${{matrix.name }}.sha256.txt 25 | - uses: actions/upload-artifact@v3 26 | with: 27 | name: systracer 28 | path: build/*/** 29 | release: 30 | needs: build 31 | runs-on: ubuntu-latest 32 | steps: 33 | - uses: actions/checkout@v3 34 | - uses: actions/download-artifact@v3 35 | with: 36 | name: systracer 37 | path: build/ 38 | - run: | 39 | ls -la build 40 | cat build/*.md5.txt 41 | cat build/*.sha256.txt 42 | - uses: softprops/action-gh-release@v1 43 | if: startsWith(github.ref, 'refs/tags/') 44 | with: 45 | body: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" 46 | draft: true 47 | files: build/* 48 | -------------------------------------------------------------------------------- /cmd/systracer/listen.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/aegistudio/shaft" 8 | "go.uber.org/zap" 9 | "golang.org/x/sync/errgroup" 10 | "golang.org/x/sys/unix" 11 | 12 | "github.com/chaitin/systracer/listen" 13 | ) 14 | 15 | var ( 16 | listenEnabled bool 17 | ) 18 | 19 | func initListenModule() shaft.Option { 20 | if !(allEnabled || listenEnabled) { 21 | return shaft.Module() 22 | } 23 | return shaft.Module( 24 | listen.Module, 25 | shaft.Provide(func( 26 | ctx context.Context, group *errgroup.Group, 27 | logger *zap.SugaredLogger, ch <-chan listen.Event, 28 | ) ([]moduleBarrier, error) { 29 | group.Go(func() error { 30 | for { 31 | var event listen.Event 32 | select { 33 | case <-ctx.Done(): 34 | return nil 35 | case event = <-ch: 36 | } 37 | eventContext := fmt.Sprintf("%s %d", 38 | event.Timestamp.Format("2006-01-02T15:04:05.999999999"), event.PID) 39 | eventAddr := "" 40 | switch event.Family { 41 | case unix.AF_INET: 42 | eventAddr = fmt.Sprintf("%s:%d", 43 | event.Addr, event.Port) 44 | case unix.AF_INET6: 45 | eventAddr = fmt.Sprintf("[%s]:%d", 46 | event.Addr, event.Port) 47 | } 48 | switch event.Op { 49 | case listen.OpListenStart: 50 | logger.Infof( 51 | "%s - listen_tcp(%d, %q, %d)", 52 | eventContext, *event.FD, 53 | eventAddr, *event.Backlog) 54 | case listen.OpListenEnd: 55 | logger.Infof( 56 | "%s - unlisten_tcp(%q)", 57 | eventContext, eventAddr) 58 | } 59 | } 60 | }) 61 | return nil, nil 62 | }), 63 | ) 64 | } 65 | 66 | func init() { 67 | moduleInits = append(moduleInits, initListenModule) 68 | rootCmd.PersistentFlags().BoolVar( 69 | &listenEnabled, "listen", listenEnabled, 70 | "collect listen event for logging") 71 | } 72 | -------------------------------------------------------------------------------- /compile_test.go: -------------------------------------------------------------------------------- 1 | package systracer 2 | 3 | import ( 4 | "bytes" 5 | "reflect" 6 | "strings" 7 | "testing" 8 | "unsafe" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | // OpenEvent is a demo event dedicated for being 14 | // used as example of parsing. 15 | type OpenEvent struct { 16 | ProbeEvent 17 | Dir int64 `tracing:"%di"` 18 | Filename string `tracing:"%si"` 19 | Flags uint64 `tracing:"%dx"` 20 | Mode uint64 `tracing:"%cx"` 21 | } 22 | 23 | // TestOpenEvent is the event for performing tests 24 | // strongly associated with the open event. 25 | func TestOpenEvent(t *testing.T) { 26 | var err error 27 | assert := assert.New(t) 28 | 29 | // Attempt to compile and validate OpenEvent. 30 | desc, err := compileTraceEvent( 31 | reflect.TypeOf(OpenEvent{})) 32 | assert.NoError(err) 33 | if err != nil { 34 | return 35 | } 36 | assert.Equal(4, len(desc.fields), "number of fields") 37 | assert.Equal("", desc.initialCondition, "initial condition") 38 | 39 | // Evaluate and test the format of open event. 40 | expectedFormat := strings.Join([]string{ 41 | "Dir=%di:s64", "FilenameStart=%si:u64", 42 | "Filename=+0(%si):string", "FilenameEnd=%si:u64", 43 | "Flags=%dx:u64", "Mode=%cx:u64", 44 | }, " ") 45 | actualFormat := desc.format() 46 | assert.Equal(expectedFormat, actualFormat, "format") 47 | 48 | // Attempt to parse and fill the event struct. 49 | var testEvent1 OpenEvent 50 | testLog1 := bytes.Trim([]byte(` 51 | Dir=-100 FilenameStart=-12345678 Filename="/proc/self/status" FilenameEnd=-12345678 Flags=0x8000 Mode=0x0 52 | `), "\n") 53 | len1, err := desc.fill( 54 | uintptr(unsafe.Pointer(&testEvent1)), testLog1) 55 | assert.NoError(err) 56 | if err != nil { 57 | return 58 | } 59 | assert.Equal(len(testLog1), len1, "log read") 60 | assert.Equal(int64(-100), testEvent1.Dir, "dirfd") 61 | assert.Equal("/proc/self/status", testEvent1.Filename, "filename") 62 | assert.Equal(uint64(0x8000), testEvent1.Flags, "flags") 63 | assert.Equal(uint64(0), testEvent1.Mode, "mode") 64 | } 65 | -------------------------------------------------------------------------------- /pkg/kallsyms/kallsyms.go: -------------------------------------------------------------------------------- 1 | // Package kallsyms loads and parses the kernel symbol table 2 | // so that other modules can attach kprobe to functions. 3 | package kallsyms 4 | 5 | import ( 6 | "bytes" 7 | "regexp" 8 | "strconv" 9 | ) 10 | 11 | var regexpSymbolItem = regexp.MustCompilePOSIX( 12 | `^([0-9a-f]+) ([A-Za-z0-9]) ([^ \t]+)(\t\[([^]]+)\])?$`) 13 | 14 | func init() { 15 | regexpSymbolItem.Longest() 16 | } 17 | 18 | type symbol struct { 19 | addr uint64 20 | typ byte 21 | } 22 | 23 | // SymbolTable is the parsed symbol table from kernel. 24 | type SymbolTable struct { 25 | table map[string][]symbol 26 | } 27 | 28 | // Lookup looks up the symbol in a module. 29 | func (t *SymbolTable) Lookup(name, types string) uint64 { 30 | syms := t.table[name] 31 | for i := len(syms); i > 0; i-- { 32 | if bytes.IndexAny([]byte{syms[i-1].typ}, types) >= 0 { 33 | return syms[i-1].addr 34 | } 35 | } 36 | return 0 37 | } 38 | 39 | // Parse the kallsyms data and return the parsed symbol table. 40 | func Parse( 41 | kallsyms []byte, interestedModules map[string]struct{}, 42 | ) map[string]*SymbolTable { 43 | result := make(map[string]*SymbolTable) 44 | for len(kallsyms) > 0 { 45 | index := bytes.Index(kallsyms, []byte("\n")) 46 | current := kallsyms 47 | if index < 0 { 48 | kallsyms = nil 49 | } else { 50 | current = kallsyms[0:index] 51 | kallsyms = kallsyms[index+1:] 52 | } 53 | // 0: the whole string 54 | // 1: symbol address 55 | // 2: symbol type 56 | // 3: symbol name 57 | // 4: string with module string 58 | // 5: module containing symbol 59 | matches := regexpSymbolItem.FindSubmatch(current) 60 | if len(matches) == 0 { 61 | continue 62 | } 63 | module := string(matches[5]) 64 | if interestedModules != nil { 65 | if _, ok := interestedModules[module]; !ok { 66 | continue 67 | } 68 | } 69 | addr, _ := strconv.ParseUint(string(matches[1]), 16, 64) 70 | typ := matches[2][0] 71 | name := string(matches[3]) 72 | table := result[module] 73 | if table == nil { 74 | table = &SymbolTable{ 75 | table: make(map[string][]symbol), 76 | } 77 | result[module] = table 78 | } 79 | table.table[name] = append(table.table[name], symbol{ 80 | addr: uint64(addr), 81 | typ: typ, 82 | }) 83 | } 84 | return result 85 | } 86 | -------------------------------------------------------------------------------- /listen/trace_linux_386.go: -------------------------------------------------------------------------------- 1 | package listen 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySyscallListen struct { 8 | systracer.ProbeEvent 9 | FD int64 `tracing:"%di"` 10 | Backlog int64 `tracing:"%si"` 11 | } 12 | 13 | type exitSyscallListen struct { 14 | systracer.ReturnEvent 15 | Errno int32 `tracing:"%ax"` 16 | } 17 | 18 | type StructSockListenInet4 struct { 19 | // (struct socket*)->sk 20 | Address uint32 `tracing:"+4({1}),,bigendian"` 21 | Port uint16 `tracing:"+14({1})"` 22 | Family uint16 `tracing:"+16({1}),{0}Family == 2"` 23 | } 24 | 25 | type entryProtocolListenInet4_V2_6_12 struct { 26 | systracer.ProbeEvent 27 | Sk StructSockListenInet4 `tracing:"+20(%ax)"` 28 | } 29 | 30 | type entryProtocolListenInet4_V5_3 struct { 31 | systracer.ProbeEvent 32 | Sk StructSockListenInet4 `tracing:"+16(%ax)"` 33 | } 34 | 35 | type StructSockListenInet6 struct { 36 | // (struct socket*)->sk 37 | Port uint16 `tracing:"+14({1})"` 38 | Family uint16 `tracing:"+16({1}),{0}Family == 10"` 39 | Address0 uint32 `tracing:"+56({1}),,bigendian"` 40 | Address1 uint32 `tracing:"+60({1}),,bigendian"` 41 | Address2 uint32 `tracing:"+64({1}),,bigendian"` 42 | Address3 uint32 `tracing:"+68({1}),,bigendian"` 43 | } 44 | 45 | type entryProtocolListenInet6_V2_6_12 struct { 46 | systracer.ProbeEvent 47 | Sk StructSockListenInet6 `tracing:"+20(%ax)"` 48 | } 49 | 50 | type entryProtocolListenInet6_V5_3 struct { 51 | systracer.ProbeEvent 52 | Sk StructSockListenInet6 `tracing:"+16(%ax)"` 53 | } 54 | 55 | type entryTCPCloseInet4 struct { 56 | systracer.ProbeEvent 57 | 58 | // (struct socket*)->sk 59 | // Sk uint64 `tracing:"%ax"` 60 | 61 | Address uint32 `tracing:"+4(%ax),,bigendian"` 62 | Port uint16 `tracing:"+14(%ax)"` 63 | Family uint16 `tracing:"+16(%ax),Family == 2"` 64 | State uint8 `tracing:"+18(%ax),State == 10"` 65 | } 66 | 67 | type entryTCPCloseInet6 struct { 68 | systracer.ProbeEvent 69 | 70 | // (struct socket*)->sk 71 | // Sk uint64 `tracing:"ax"` 72 | 73 | Port uint16 `tracing:"+14(%ax)"` 74 | Family uint16 `tracing:"+16(%ax),Family == 10"` 75 | State uint8 `tracing:"+18(%ax),State == 10"` 76 | Address0 uint32 `tracing:"+56(%ax),,bigendian"` 77 | Address1 uint32 `tracing:"+60(%ax),,bigendian"` 78 | Address2 uint32 `tracing:"+64(%ax),,bigendian"` 79 | Address3 uint32 `tracing:"+68(%ax),,bigendian"` 80 | } 81 | -------------------------------------------------------------------------------- /listen/trace_linux_amd64.go: -------------------------------------------------------------------------------- 1 | package listen 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySyscallListen struct { 8 | systracer.ProbeEvent 9 | FD int64 `tracing:"%di"` 10 | Backlog int64 `tracing:"%si"` 11 | } 12 | 13 | type exitSyscallListen struct { 14 | systracer.ReturnEvent 15 | Errno int32 `tracing:"%ax"` 16 | } 17 | 18 | type StructSockListenInet4 struct { 19 | // (struct socket*)({1})->sk 20 | Address uint32 `tracing:"+4({1}),,bigendian"` 21 | Port uint16 `tracing:"+14({1})"` 22 | Family uint16 `tracing:"+16({1}),{0}Family == 2"` 23 | } 24 | 25 | type entryProtocolListenInet4_V2_6_12 struct { 26 | systracer.ProbeEvent 27 | Sk StructSockListenInet4 `tracing:"+32(%di)"` 28 | } 29 | 30 | type entryProtocolListenInet4_V5_3 struct { 31 | systracer.ProbeEvent 32 | Sk StructSockListenInet4 `tracing:"+24(%di)"` 33 | } 34 | 35 | type StructSockListenInet6 struct { 36 | // (struct socket*)({1})->sk 37 | Port uint16 `tracing:"+14({1})"` 38 | Family uint16 `tracing:"+16({1}),{0}Family == 10"` 39 | Address0 uint32 `tracing:"+72({1}),,bigendian"` 40 | Address1 uint32 `tracing:"+76({1}),,bigendian"` 41 | Address2 uint32 `tracing:"+80({1}),,bigendian"` 42 | Address3 uint32 `tracing:"+84({1}),,bigendian"` 43 | } 44 | 45 | type entryProtocolListenInet6_V2_6_12 struct { 46 | systracer.ProbeEvent 47 | Sk StructSockListenInet6 `tracing:"+32(%di)"` 48 | } 49 | 50 | type entryProtocolListenInet6_V5_3 struct { 51 | systracer.ProbeEvent 52 | Sk StructSockListenInet6 `tracing:"+24(%di)"` 53 | } 54 | 55 | type entryTCPCloseInet4 struct { 56 | systracer.ProbeEvent 57 | 58 | // (struct socket*)->sk 59 | // Sk uint64 `tracing:"di"` 60 | 61 | Address uint32 `tracing:"+4(%di),,bigendian"` 62 | Port uint16 `tracing:"+14(%di)"` 63 | Family uint16 `tracing:"+16(%di),Family == 2"` 64 | State uint8 `tracing:"+18(%di),State == 10"` 65 | } 66 | 67 | type entryTCPCloseInet6 struct { 68 | systracer.ProbeEvent 69 | 70 | // (struct socket*)->sk 71 | // Sk uint64 `tracing:"di"` 72 | 73 | Port uint16 `tracing:"+14(%di)"` 74 | Family uint16 `tracing:"+16(%di),Family == 10"` 75 | State uint8 `tracing:"+18(%di),State == 10"` 76 | Address0 uint32 `tracing:"+72(%di),,bigendian"` 77 | Address1 uint32 `tracing:"+76(%di),,bigendian"` 78 | Address2 uint32 `tracing:"+80(%di),,bigendian"` 79 | Address3 uint32 `tracing:"+84(%di),,bigendian"` 80 | } 81 | -------------------------------------------------------------------------------- /listen/trace_linux_arm64.go: -------------------------------------------------------------------------------- 1 | package listen 2 | 3 | import ( 4 | "github.com/chaitin/systracer" 5 | ) 6 | 7 | type entrySyscallListen struct { 8 | systracer.ProbeEvent 9 | FD int64 `tracing:"%x0"` 10 | Backlog int64 `tracing:"%x1"` 11 | } 12 | 13 | type exitSyscallListen struct { 14 | systracer.ReturnEvent 15 | Errno int32 `tracing:"%x0"` 16 | } 17 | 18 | type StructSockListenInet4 struct { 19 | // (struct socket*)({1})->sk 20 | Address uint32 `tracing:"+4({1}),,bigendian"` 21 | Port uint16 `tracing:"+14({1})"` 22 | Family uint16 `tracing:"+16({1}),{0}Family == 2"` 23 | } 24 | 25 | type entryProtocolListenInet4_V2_6_12 struct { 26 | systracer.ProbeEvent 27 | Sk StructSockListenInet4 `tracing:"+32(%x0)"` 28 | } 29 | 30 | type entryProtocolListenInet4_V5_3 struct { 31 | systracer.ProbeEvent 32 | Sk StructSockListenInet4 `tracing:"+24(%x0)"` 33 | } 34 | 35 | type StructSockListenInet6 struct { 36 | // (struct socket*)({1})->sk 37 | Port uint16 `tracing:"+14({1})"` 38 | Family uint16 `tracing:"+16({1}),{0}Family == 10"` 39 | Address0 uint32 `tracing:"+72({1}),,bigendian"` 40 | Address1 uint32 `tracing:"+76({1}),,bigendian"` 41 | Address2 uint32 `tracing:"+80({1}),,bigendian"` 42 | Address3 uint32 `tracing:"+84({1}),,bigendian"` 43 | } 44 | 45 | type entryProtocolListenInet6_V2_6_12 struct { 46 | systracer.ProbeEvent 47 | Sk StructSockListenInet6 `tracing:"+32(%x0)"` 48 | } 49 | 50 | type entryProtocolListenInet6_V5_3 struct { 51 | systracer.ProbeEvent 52 | Sk StructSockListenInet6 `tracing:"+24(%x0)"` 53 | } 54 | 55 | type entryTCPCloseInet4 struct { 56 | systracer.ProbeEvent 57 | 58 | // (struct socket*)->sk 59 | // Sk uint64 `tracing:"x0"` 60 | 61 | Address uint32 `tracing:"+4(%x0),,bigendian"` 62 | Port uint16 `tracing:"+14(%x0)"` 63 | Family uint16 `tracing:"+16(%x0),Family == 2"` 64 | State uint8 `tracing:"+18(%x0),State == 10"` 65 | } 66 | 67 | type entryTCPCloseInet6 struct { 68 | systracer.ProbeEvent 69 | 70 | // (struct socket*)->sk 71 | // Sk uint64 `tracing:"x0"` 72 | 73 | Port uint16 `tracing:"+14(%x0)"` 74 | Family uint16 `tracing:"+16(%x0),Family == 10"` 75 | State uint8 `tracing:"+18(%x0),State == 10"` 76 | Address0 uint32 `tracing:"+72(%x0),,bigendian"` 77 | Address1 uint32 `tracing:"+76(%x0),,bigendian"` 78 | Address2 uint32 `tracing:"+80(%x0),,bigendian"` 79 | Address3 uint32 `tracing:"+84(%x0),,bigendian"` 80 | } 81 | -------------------------------------------------------------------------------- /cmd/systracer/connect.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "syscall" 7 | 8 | "github.com/aegistudio/shaft" 9 | "go.uber.org/zap" 10 | "golang.org/x/sync/errgroup" 11 | "golang.org/x/sys/unix" 12 | 13 | "github.com/chaitin/systracer/connect" 14 | ) 15 | 16 | var ( 17 | connectEnabled bool 18 | ) 19 | 20 | func initConnectModule() shaft.Option { 21 | if !(allEnabled || connectEnabled) { 22 | return shaft.Module() 23 | } 24 | return shaft.Module( 25 | connect.Module, 26 | shaft.Provide(func( 27 | ctx context.Context, group *errgroup.Group, 28 | logger *zap.SugaredLogger, ch <-chan connect.Event, 29 | ) ([]moduleBarrier, error) { 30 | group.Go(func() error { 31 | for { 32 | var event connect.Event 33 | select { 34 | case <-ctx.Done(): 35 | return nil 36 | case event = <-ch: 37 | } 38 | eventContext := fmt.Sprintf("%s %d", 39 | event.Timestamp.Format("2006-01-02T15:04:05.999999999"), event.PID) 40 | eventAddr := "" 41 | switch event.Family { 42 | case unix.AF_INET: 43 | eventAddr = fmt.Sprintf("%s:%d", 44 | event.Addr, event.Port) 45 | case unix.AF_INET6: 46 | eventAddr = fmt.Sprintf("[%s]:%d", 47 | event.Addr, event.Port) 48 | } 49 | eventType := fmt.Sprintf("%d", event.Type) 50 | switch event.Type { 51 | case unix.SOCK_STREAM: 52 | eventType = "tcp" 53 | case unix.SOCK_DGRAM: 54 | eventType = "udp" 55 | case unix.SOCK_RAW, unix.SOCK_PACKET: 56 | eventType = "raw" 57 | } 58 | switch event.Op { 59 | case connect.OpConnectStart: 60 | logger.Infof( 61 | "%s - connect_%s(%d, %q)", 62 | eventContext, eventType, 63 | event.FD, eventAddr) 64 | case connect.OpConnectEnd: 65 | eventResult := "0" 66 | if event.Errno != nil { 67 | if errno := *event.Errno; errno != 0 { 68 | eventResult = fmt.Sprintf("%d (%s)", 69 | errno, syscall.Errno(-errno)) 70 | } 71 | } 72 | logger.Infof( 73 | "%s - connect_%s(%d, %q) = %s", 74 | eventContext, eventType, 75 | event.FD, eventAddr, eventResult) 76 | } 77 | } 78 | }) 79 | return nil, nil 80 | }), 81 | ) 82 | } 83 | 84 | func init() { 85 | moduleInits = append(moduleInits, initConnectModule) 86 | rootCmd.PersistentFlags().BoolVar( 87 | &connectEnabled, "connect", connectEnabled, 88 | "collect connect event for logging") 89 | } 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SysTracer: Linux 系统活动跟踪器 2 | 3 | ![build](https://github.com/chaitin/systracer/actions/workflows/build.yml/badge.svg) 4 | ![release](https://img.shields.io/github/release/chaitin/systracer) 5 | ![update](https://img.shields.io/github/release-date/chaitin/systracer.svg?color=blue&label=update) 6 | 7 | SysTracer 是长亭所开发的,用于跟踪 Linux 关键活动的监控程序。 8 | 9 | 得益于 Linux Tracing 技术和内核分析手段,SysTracer 的监控和跟踪是事件驱动且实时的。 10 | 11 | ![](https://github.com/chaitin/systrace-blob/raw/7a79506047ea3baf7b6b41d3318dfde438e89b34/screenshot.gif) 12 | 13 |

使用 SysTracer 监控 Docker Pull+Run 时的网络和文件活动

14 | 15 | ## 功能和使用方法 16 | 17 | 各 CPU 架构下的 SysTracer 的可执行文件(静态编译)可以在 [Releases](https://github.com/chaitin/systracer/releases) 页面下载,请注意验证文件完整性。 18 | 19 | 执行 SysTracer 需要 root 权限,但 SysTracer 不会在系统上创建持久化文件或修改系统配置。 20 | 21 | ### 网络活动监控 22 | 23 | 当前 SysTracer 支持基于 IPv4 和 IPv6 的网络连接和网络监听活动的监控。 24 | 25 | 网络连接监控基于进程通过 `connect` 系统调用发起远程连接的跟踪,记录了进程用于发起远程连接的主动套接字 FD,远程连接的协议(TCP 或 UDP 等)以及远程服务器的地址。 26 | 27 | 网络监听监控基于进程通过 `listen` 系统调用发起网络监听的跟踪,记录了进程用于监听的被动套接字 FD,监听的网络地址以及 Backlog 大小。 28 | 29 | 通过 `./systracer --connect` 或 `./systracer --all` 可以启用对网络连接的监控,通过 `./systracer --listen` 或 `./systracer --all` 可以启用对网络监听的监控。 30 | 31 | ### 文件操作递归监控 32 | 33 | 当前 SysTracer 支持对文件操作进行递归监控,即用户指定监控目录和所关心的事件集合,SysTracer 输出在监控目录下发生的文件事件。 34 | 35 | 所谓递归监控是指,不仅监控指定的监控目录下的文件事件,还监控其子目录及所有后代目录的下发生的文件事件。 36 | 37 | 当前支持的文件操作包括:(文件、目录)创建、(文件、目录)删除、移动或重命名、属性(权限、所有用户、所有组)修改、创建符号连接、创建硬连接。 38 | 39 | 如果同时指定了具有父子关系的监控目录(如 `/usr` 和 `/usr/lib`),则子目录的事件集合将覆盖父级目录的,父级目录下的其他目录不受影响。 40 | 41 | 通过 `./systracer --watch "="` 可以添加一个监控目录,如 `./systracer --watch "all=/etc"`。 42 | 43 | 参数中的 `events` 指定了监控事件的列表,可以为以下事件的集合,事件之间通过 `,` 分隔: 44 | 45 | - `all`:监控所有支持的文件事件。 46 | - `create`:监控普通文件的创建,输出创建的文件路径和权限。 47 | - `mkdir`:监控目录的创建,输出创建的目录路径和权限。 48 | - `mknod`:监控特殊文件(管道、套接字、字符设备、块设备等)的删除,输出创建的文件路径、权限和设备 ID。 49 | - `delete`:监控文件的删除,输出删除的文件路径。 50 | - `rmdir`:监控目录的删除,输出删除的目录路径。 51 | - `rename`:监控文件或目录的移动或重命名,输出重命名前后的文件路径。 52 | - `attrib`:监控文件或目录属性(权限、所有用户、所有组)的修改,输出修改的文件路径和属性。 53 | - `symlink`:监控符号连接的创建,输出软连接的内容和软连接的路径。 54 | - `link`:监控硬连接的创建,输出链接的源路径和目标路径。 55 | 56 | 譬如 `./systracer --watch "all=/usr" --watch "create,mkdir=/usr/lib"` 就同时添加了对 `/usr` 目录下所有支持的文件事件的递归监控,以及 `/usr/lib` 目录下文件和目录创建事件的递归监控。 57 | 58 | 值得注意的是,SysTracer 只会上报已经成功完成的文件操作事件。 59 | 60 | ## 工作原理 61 | 62 | ![](https://github.com/chaitin/systrace-blob/blob/ad3d1d292b82cdbb4c1349a28d6110f73c690231/architecture.png) 63 | 64 | 我们都知道,Linux 内核为用户进程管理了各种可访问的资源,用户进程需要发起系统调用来使用这些资源。不同资源的操作和处理由 Linux 内核中对应的子系统完成。 65 | 66 | SysTracer 通过向 Linux 中不同子系统插入 KProbe,当子系统的代码执行到 KProbe 处,便会采集我们设定的信息,并将数据写入 Linux Tracing 的事件环形缓冲中。写入事件循环缓冲的过程是非阻塞且无锁的。 67 | 68 | SysTracer 运行 Linux Tracing 事件循环缓冲的消费循环,拉取其中记录的数据并进行处理,即可产生各种类型的事件。 69 | 70 | 内核的各种原生监控机制(如 netlink connector、inotify、fanotify 等),都是先由各种上层监控程序编写用户态代码和内核态代码,对应的内核态代码作为一个半通用机制合并到 Linux 内核中,供该监控程序本身即其他可能的监控程序使用。 71 | 72 | 使用内核原生监控机制的缺点在于,过于为原始需求的监控程序“量身定做”了,以致于要获取其采集的信息以外的任何事件信息都困难重重。而使用 Linux Tracing 则可以根据应用程序自己的采集需求定制,不受当前内核监控机制的实现状态所影响。 73 | 74 | ## 许可协议 75 | 76 | 本项目基于 [Apache-2.0](LICENSE) 协议进行开源和许可。 77 | -------------------------------------------------------------------------------- /cmd/systracer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "os/signal" 7 | 8 | "github.com/aegistudio/shaft" 9 | "github.com/aegistudio/shaft/serpent" 10 | "github.com/spf13/cobra" 11 | "go.uber.org/zap" 12 | "go.uber.org/zap/zapcore" 13 | "golang.org/x/sync/errgroup" 14 | 15 | "github.com/chaitin/systracer" 16 | "github.com/chaitin/systracer/inode" 17 | ) 18 | 19 | type moduleBarrier struct{} 20 | 21 | var ( 22 | moduleInits []func() shaft.Option 23 | allEnabled bool 24 | logLevel = "info" 25 | ) 26 | 27 | var rootCmd = &cobra.Command{ 28 | Use: "systracer", 29 | Long: "Linux system activity tracer", 30 | PreRunE: func(cmd *cobra.Command, _ []string) error { 31 | for _, moduleInit := range moduleInits { 32 | if err := serpent.AddOption( 33 | cmd, moduleInit()); err != nil { 34 | return err 35 | } 36 | } 37 | return nil 38 | }, 39 | RunE: serpent.Executor(shaft.Module( 40 | shaft.Stack(func( 41 | next func(*errgroup.Group, context.Context) error, 42 | rootCtx serpent.CommandContext, 43 | ) error { 44 | cancelCtx, cancel := context.WithCancel(rootCtx) 45 | group, ctx := errgroup.WithContext(cancelCtx) 46 | defer func() { _ = group.Wait() }() 47 | defer cancel() 48 | return next(group, ctx) 49 | }), 50 | shaft.Invoke(func( 51 | group *errgroup.Group, _ []moduleBarrier, 52 | logger *zap.SugaredLogger, 53 | ) error { 54 | logger.Info("initialization complete") 55 | return group.Wait() 56 | }), 57 | shaft.Provide(func( 58 | ctx context.Context, group *errgroup.Group, 59 | options []systracer.Option, 60 | ) (systracer.Manager, error) { 61 | return systracer.New(ctx, group, options...) 62 | }), 63 | shaft.Stack(func( 64 | next func(*zap.Logger, *zap.SugaredLogger) error, 65 | ) error { 66 | level, err := zapcore.ParseLevel(logLevel) 67 | if err != nil { 68 | return err 69 | } 70 | consoleLevel := zap.NewAtomicLevelAt(level) 71 | consoleConfig := zap.NewDevelopmentEncoderConfig() 72 | consoleConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder 73 | consoleErrors := zapcore.Lock(os.Stderr) 74 | consoleEncoder := zapcore.NewConsoleEncoder(consoleConfig) 75 | loggerCore := zapcore.NewCore( 76 | consoleEncoder, consoleErrors, consoleLevel) 77 | logger := zap.New(loggerCore) 78 | sugaredLogger := logger.Sugar() 79 | defer logger.Sync() 80 | return next(logger, sugaredLogger) 81 | }), 82 | inode.Module, 83 | )).RunE, 84 | } 85 | 86 | func init() { 87 | rootCmd.PersistentFlags().BoolVar( 88 | &allEnabled, "all", allEnabled, 89 | "capture all supported events") 90 | rootCmd.PersistentFlags().StringVar( 91 | &logLevel, "log-level", logLevel, 92 | "setup the log level of the logger") 93 | } 94 | 95 | func main() { 96 | ctx, cancel := signal.NotifyContext( 97 | context.Background(), os.Interrupt) 98 | defer cancel() 99 | if err := serpent.ExecuteContext(ctx, rootCmd); err != nil { 100 | os.Exit(1) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /pkg/kversion/kversion.go: -------------------------------------------------------------------------------- 1 | // Package kversion fetches the linux kernel version, 2 | // and parse them with semantic versioning. 3 | package kversion 4 | 5 | import ( 6 | "fmt" 7 | "io/ioutil" 8 | "regexp" 9 | "strconv" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | // Version stores the kernel version using semantic 15 | // versioning, but converted to a 64bit numeric value. 16 | type Version uint64 17 | 18 | // Predefined component for composing into version. 19 | const ( 20 | offsetPreRelease = 0 21 | bitsPreRelease = 32 22 | offsetPatch = bitsPreRelease 23 | bitsPatch = 16 24 | offsetMinor = offsetPatch + bitsPatch 25 | bitsMinor = 8 26 | offsetMajor = offsetMinor + bitsMinor 27 | bitsMajor = 64 - offsetMajor 28 | ) 29 | 30 | // Major returns the value of the major version. 31 | func (v Version) Major() int64 { 32 | return (int64(v) >> offsetMajor) & ((1 << bitsMajor) - 1) 33 | } 34 | 35 | // Minor returns the value of the minor version. 36 | func (v Version) Minor() int64 { 37 | return (int64(v) >> offsetMinor) & ((1 << bitsMinor) - 1) 38 | } 39 | 40 | // Patch returns the value of the patch version. 41 | func (v Version) Patch() int64 { 42 | return (int64(v) >> offsetPatch) & ((1 << bitsPatch) - 1) 43 | } 44 | 45 | // PreRelease returns the value of the pre-release version. 46 | func (v Version) PreRelease() int64 { 47 | return (int64(v) >> offsetPreRelease) & ((1 << bitsPreRelease) - 1) 48 | } 49 | 50 | // String formats the kernel version as triplets. 51 | func (v Version) String() string { 52 | return fmt.Sprintf("%d.%d.%d-%d", 53 | v.Major(), v.Minor(), v.Patch(), v.PreRelease()) 54 | } 55 | 56 | // regexpKv is the regular expression for parsing 57 | // the kernel version string. 58 | var regexKv = regexp.MustCompile( 59 | `([0-9]+)\.([0-9]+)(\.[0-9]+)?(-[0-9]+)?`) 60 | 61 | // Parse the specified kernel version. 62 | func Parse(version string) (Version, error) { 63 | var err error 64 | kv := []byte(version) 65 | 66 | // Parse the provided kernel version. 67 | m := regexKv.FindSubmatchIndex(kv) 68 | if len(m) < 10 || m[0] != 0 { 69 | return Version(0), errors.Wrapf( 70 | err, "malformed %q", version) 71 | } 72 | 73 | // Parse the major, minor and patch version. 74 | majorComponent := string(kv[m[2]:m[3]]) 75 | major, err := strconv.ParseUint(majorComponent, 10, bitsMajor) 76 | if err != nil { 77 | return Version(0), errors.Wrapf( 78 | err, "invalid major %q", majorComponent) 79 | } 80 | minorComponent := string(kv[m[4]:m[5]]) 81 | minor, err := strconv.ParseUint(minorComponent, 10, bitsMinor) 82 | if err != nil { 83 | return Version(0), errors.Wrapf( 84 | err, "invalid minor %q", minorComponent) 85 | } 86 | 87 | // Check the optional kernel version. 88 | var patch uint64 89 | if m[6] >= 0 && m[7] >= 0 { 90 | patchComponent := string(kv[m[6]+1 : m[7]]) 91 | patch, err = strconv.ParseUint(patchComponent, 10, bitsPatch) 92 | if err != nil { 93 | return Version(0), errors.Wrapf( 94 | err, "invalid patch %q", patchComponent) 95 | } 96 | } 97 | var preRelease uint64 98 | if m[8] >= 0 && m[9] >= 0 { 99 | preReleaseComponent := string(kv[m[8]+1 : m[9]]) 100 | preRelease, err = strconv.ParseUint( 101 | preReleaseComponent, 10, bitsPreRelease) 102 | if err != nil { 103 | return Version(0), errors.Wrapf( 104 | err, "invalid pre-release %q", preReleaseComponent) 105 | } 106 | } 107 | 108 | // Return the parsed version result. 109 | return Version(preRelease | 110 | (major << offsetMajor) | 111 | (minor << offsetMinor) | 112 | (patch << offsetPatch)), nil 113 | } 114 | 115 | // Must forcefully parses the version and panics if 116 | // the version specified cannot resolve. 117 | func Must(version string) Version { 118 | v, err := Parse(version) 119 | if err != nil { 120 | panic(err) 121 | } 122 | return v 123 | } 124 | 125 | // Current is the version retrieved when the process 126 | // has just been initialized. 127 | var Current Version 128 | 129 | // init initializes the current version retrieved 130 | // from the kernel. 131 | func init() { 132 | kv, kverr := ioutil.ReadFile("/proc/sys/kernel/osrelease") 133 | if kverr != nil { 134 | panic(kverr) 135 | } 136 | Current = Must(string(kv)) 137 | } 138 | -------------------------------------------------------------------------------- /package.go: -------------------------------------------------------------------------------- 1 | // Package systracer is the framework of linux event tracing 2 | // system developed by Chaitin Tech. 3 | package systracer 4 | 5 | import ( 6 | "reflect" 7 | "time" 8 | 9 | "github.com/pkg/errors" 10 | "go.uber.org/zap" 11 | ) 12 | 13 | // ErrBadTracePoint is the error returned when the target 14 | // trace point cannot be attached to. 15 | var ErrBadTracePoint = errors.New("bad tracepoint") 16 | 17 | // Condition is common embed field for defining an extra 18 | // condition for current field. 19 | type Condition struct{} 20 | 21 | // typeCondition is the specified case for condition. 22 | var typeCondition = reflect.TypeOf(Condition{}) 23 | 24 | // StringAddr is the special type used in the place of 25 | // the string to fetch the address canary when decoding 26 | // the string, when it is meaningful. 27 | type StringAddr struct { 28 | String string 29 | Addr uint64 30 | } 31 | 32 | // typeStringAddr is the specified case for string addr. 33 | var typeStringAddr = reflect.TypeOf(StringAddr{}) 34 | 35 | // Event stores common event data made by all types of 36 | // concrete tracing events. The format is defined by 37 | // "/tracing/trace" file. 38 | // 39 | // The comm field is ommitted out since it is always 40 | // imcomplete (rendered as "<...>) and is not essentially 41 | // required by all events. 42 | type Event struct { 43 | TaskPID uint32 44 | Timestamp time.Time 45 | epoch time.Duration 46 | } 47 | 48 | // ProbeEvent is the event triggered when touching any 49 | // of the breakpoint inside a function. 50 | type ProbeEvent struct { 51 | Event 52 | } 53 | 54 | // typeProbeEvent is the event kind of probe. 55 | var typeProbeEvent = reflect.TypeOf(ProbeEvent{}) 56 | 57 | // ReturnEvent is the event triggered when a return 58 | // instruction in function is executed. 59 | type ReturnEvent struct { 60 | Event 61 | } 62 | 63 | // typeReturnEvent is the event kind of return. 64 | var typeReturnEvent = reflect.TypeOf(ReturnEvent{}) 65 | 66 | // Trace is a controlling handle for trace events. 67 | // 68 | // The trace handle is initially not started to avoid 69 | // deadlocking when used as collectors. The caller must 70 | // manually activate them after their master thread 71 | // has been initialized. 72 | // 73 | // And the trace can be stopped at runtime, it is 74 | // recommended to disable certain tracing when there's 75 | // no subscribers and the master thread nned not to 76 | // track the real time state with the trace. 77 | type Trace interface { 78 | ID() uint64 79 | SetCondition(string) error 80 | SetEnabled(bool) 81 | GetDone() uint64 82 | GetLost() uint64 83 | Close() 84 | } 85 | 86 | // Manager is the manager of traces. 87 | // 88 | // The manager is the monolithic consumer to read from 89 | // trace pipe "/instances//trace_pipe" 90 | // and generate events per registered events. 91 | type Manager interface { 92 | // TraceKProbe creates either a kprobe (when handled 93 | // event is ProbeEvent) or a kretprobe (when handled 94 | // event is ReturnEvent). 95 | TraceKProbe( 96 | location string, handler interface{}, 97 | ) (Trace, <-chan struct{}, error) 98 | 99 | // TraceUProbe creates either a uprobe (when handled 100 | // event is ProbeEvent) or a uretprobe (when handled 101 | // event is ReturnEvent). 102 | TraceUProbe( 103 | library, location string, handler interface{}, 104 | ) (Trace, <-chan struct{}, error) 105 | } 106 | 107 | type option struct { 108 | tracefsPath string 109 | instanceName string 110 | limitInterval time.Duration 111 | logger *zap.Logger 112 | } 113 | 114 | // Option to initialize the systrace subsystem. 115 | type Option func(*option) 116 | 117 | // WithTraceFSPath is the path of the tracefs. The 118 | // default value is "/sys/kernel/debug/tracing". 119 | func WithTraceFSPath(path string) Option { 120 | return func(opt *option) { 121 | opt.tracefsPath = path 122 | } 123 | } 124 | 125 | // WithInstanceName is the name of the trace instance. 126 | // The default value is "systrace". 127 | func WithInstanceName(name string) Option { 128 | return func(opt *option) { 129 | opt.instanceName = name 130 | } 131 | } 132 | 133 | // WithLimitInterval specifies the interval of receiving 134 | // event from trace pipe. Setting this value to 0 will 135 | // disable the reception limit. The default value is 0. 136 | func WithLimitInterval(dur time.Duration) Option { 137 | return func(opt *option) { 138 | opt.limitInterval = dur 139 | } 140 | } 141 | 142 | // WithLogger specifies the logger for the manager. 143 | // The default value is zap.L(). 144 | func WithLogger(logger *zap.Logger) Option { 145 | return func(opt *option) { 146 | opt.logger = logger 147 | } 148 | } 149 | 150 | // WithOptions aggregate a set of options together. 151 | func WithOptions(opts ...Option) Option { 152 | return func(o *option) { 153 | for _, opt := range opts { 154 | opt(o) 155 | } 156 | } 157 | } 158 | 159 | // newOption creates the option with all default values. 160 | func newOption() *option { 161 | return &option{ 162 | tracefsPath: "/sys/kernel/debug/tracing", 163 | instanceName: "systrace", 164 | logger: zap.L(), 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /rcnotify/trace_amd64.go: -------------------------------------------------------------------------------- 1 | package rcnotify 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/chaitin/systracer" 7 | ) 8 | 9 | type eventFsnotify struct { 10 | TaskPID uint32 11 | Timestamp time.Time 12 | Inode uint64 13 | Access uint32 14 | ModifyAttrib uint32 15 | CloseOpen uint32 16 | Dentry uint32 17 | Filename string 18 | Visited *uint8 19 | } 20 | 21 | type entryFsnotify_V2_6_32 struct { 22 | systracer.ProbeEvent 23 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 24 | 25 | Inode uint64 `tracing:"%di"` 26 | Access uint32 `tracing:"%si,Access == 0,bit[0]"` 27 | ModifyAttrib uint32 `tracing:"%si,,bit[1:2]"` 28 | CloseOpen uint32 `tracing:"%si,CloseOpen == 0,bit[3:5]"` 29 | Dentry uint32 `tracing:"%si,,bit[6:12]"` 30 | Filename string `tracing:"+8(%r8)"` 31 | } 32 | 33 | type entryFsnotify_V5_9 struct { 34 | systracer.ProbeEvent 35 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 36 | 37 | Access uint32 `tracing:"%di,Access == 0,bit[0]"` 38 | ModifyAttrib uint32 `tracing:"%di,,bit[1:2]"` 39 | CloseOpen uint32 `tracing:"%di,CloseOpen == 0,bit[3:5]"` 40 | Dentry uint32 `tracing:"%di,,bit[6:12]"` 41 | Dir uint64 `tracing:"%cx"` 42 | Filename string `tracing:"+8(%r8)"` 43 | Inode uint64 `tracing:"%r9"` 44 | } 45 | 46 | type entryFsnotifyParent_V5_9 struct { 47 | systracer.ProbeEvent 48 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 49 | 50 | Filename string `tracing:"+40(%di)"` 51 | Inode uint64 `tracing:"+48(%di)"` 52 | Access uint32 `tracing:"%si,Access == 0,bit[0]"` 53 | ModifyAttrib uint32 `tracing:"%si,,bit[1:2]"` 54 | CloseOpen uint32 `tracing:"%si,CloseOpen == 0,bit[3:5]"` 55 | Dentry uint32 `tracing:"%si,,bit[6:12]"` 56 | } 57 | 58 | type path struct { 59 | N0 systracer.StringAddr `tracing:"+40({1})"` 60 | N1 systracer.StringAddr `tracing:"+40(+24({1}))"` 61 | N2 systracer.StringAddr `tracing:"+40(+24(+24({1})))"` 62 | N3 systracer.StringAddr `tracing:"+40(+24(+24(+24({1}))))"` 63 | N4 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24({1})))))"` 64 | N5 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24({1}))))))"` 65 | N6 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24({1})))))))"` 66 | N7 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24({1}))))))))"` 67 | N8 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"` 68 | 69 | I0 uint64 `tracing:"+48({1})"` 70 | I1 uint64 `tracing:"+48(+24({1}))"` 71 | I2 uint64 `tracing:"+48(+24(+24({1})))"` 72 | I3 uint64 `tracing:"+48(+24(+24(+24({1}))))"` 73 | I4 uint64 `tracing:"+48(+24(+24(+24(+24({1})))))"` 74 | I5 uint64 `tracing:"+48(+24(+24(+24(+24(+24({1}))))))"` 75 | I6 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24({1})))))))"` 76 | I7 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24({1}))))))))"` 77 | I8 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"` 78 | I9 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24(+24({1}))))))))))"` 79 | } 80 | 81 | func (d path) extract() ([]string, []uint64) { 82 | nodes := []systracer.StringAddr{ 83 | d.N0, d.N1, d.N2, d.N3, d.N4, d.N5, d.N6, d.N7, d.N8, 84 | } 85 | resultPath := extractPathComponent(nodes) 86 | inodes := []uint64{ 87 | d.I0, d.I1, d.I2, d.I3, d.I4, d.I5, d.I6, d.I7, d.I8, d.I9, 88 | } 89 | resultInodes := inodes[:len(resultPath)+1] 90 | return resultPath, resultInodes 91 | } 92 | 93 | type entrySecurityInodeRename struct { 94 | SrcDir uint64 `tracing:"%di"` 95 | DstDir uint64 `tracing:"%dx"` 96 | } 97 | 98 | type entrySecurityInodeRenameSource struct { 99 | systracer.ProbeEvent 100 | Event entrySecurityInodeRename 101 | Source path `tracing:"%si"` 102 | } 103 | 104 | type entrySecurityInodeRenameTarget struct { 105 | systracer.ProbeEvent 106 | Event entrySecurityInodeRename 107 | Target path `tracing:"%cx"` 108 | } 109 | 110 | type entrySecurityInodeCreate struct { 111 | systracer.ProbeEvent 112 | Dir uint64 `tracing:"%di"` 113 | Path path `tracing:"%si"` 114 | Mode uint16 `tracing:"%dx"` 115 | } 116 | 117 | type entrySecurityInodeMknod struct { 118 | systracer.ProbeEvent 119 | Dir uint64 `tracing:"%di"` 120 | Path path `tracing:"%si"` 121 | Mode uint16 `tracing:"%dx"` 122 | Dev uint32 `tracing:"%cx"` 123 | } 124 | 125 | type entrySecurityInodeMkdir struct { 126 | systracer.ProbeEvent 127 | Dir uint64 `tracing:"%di"` 128 | Path path `tracing:"%si"` 129 | Mode uint16 `tracing:"%dx"` 130 | } 131 | 132 | type entrySecurityInodeLink struct { 133 | Dir uint64 `tracing:"%si"` 134 | } 135 | 136 | type entrySecurityInodeLinkSource struct { 137 | systracer.ProbeEvent 138 | Event entrySecurityInodeLink 139 | Source path `tracing:"%di"` 140 | } 141 | 142 | type entrySecurityInodeLinkTarget struct { 143 | systracer.ProbeEvent 144 | Event entrySecurityInodeLink 145 | Target path `tracing:"%dx"` 146 | } 147 | 148 | type entrySecurityInodeSymlink struct { 149 | systracer.ProbeEvent 150 | Dir uint64 `tracing:"%di"` 151 | Path path `tracing:"%si"` 152 | Name string `tracing:"%dx"` 153 | } 154 | 155 | type entrySecurityInodeUnlink struct { 156 | systracer.ProbeEvent 157 | Path path `tracing:"%si"` 158 | } 159 | 160 | type entrySecurityInodeRmdir struct { 161 | systracer.ProbeEvent 162 | Path path `tracing:"%si"` 163 | } 164 | 165 | type entrySecurityInodeSetattr struct { 166 | systracer.ProbeEvent 167 | Path path `tracing:"%di"` 168 | Valid uint32 `tracing:"+0(%si)"` 169 | Mode uint16 `tracing:"+4(%si)"` 170 | Uid uint32 `tracing:"+8(%si)"` 171 | Gid uint32 `tracing:"+12(%si)"` 172 | } 173 | -------------------------------------------------------------------------------- /rcnotify/trace_arm64.go: -------------------------------------------------------------------------------- 1 | package rcnotify 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/chaitin/systracer" 7 | ) 8 | 9 | type eventFsnotify struct { 10 | TaskPID uint32 11 | Timestamp time.Time 12 | Inode uint64 13 | Access uint32 14 | ModifyAttrib uint32 15 | CloseOpen uint32 16 | Dentry uint32 17 | Filename string 18 | Visited *uint8 19 | } 20 | 21 | type entryFsnotify_V2_6_32 struct { 22 | systracer.ProbeEvent 23 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 24 | 25 | Inode uint64 `tracing:"%x0"` 26 | Access uint32 `tracing:"%x1,Access == 0,bit[0]"` 27 | ModifyAttrib uint32 `tracing:"%x1,,bit[1:2]"` 28 | CloseOpen uint32 `tracing:"%x1,CloseOpen == 0,bit[3:5]"` 29 | Dentry uint32 `tracing:"%x1,,bit[6:12]"` 30 | Filename string `tracing:"+8(%x4)"` 31 | } 32 | 33 | type entryFsnotify_V5_9 struct { 34 | systracer.ProbeEvent 35 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 36 | 37 | Access uint32 `tracing:"%x0,Access == 0,bit[0]"` 38 | ModifyAttrib uint32 `tracing:"%x0,,bit[1:2]"` 39 | CloseOpen uint32 `tracing:"%x0,CloseOpen == 0,bit[3:5]"` 40 | Dentry uint32 `tracing:"%x0,,bit[6:12]"` 41 | Dir uint64 `tracing:"%x3"` 42 | Filename string `tracing:"+8(%x4)"` 43 | Inode uint64 `tracing:"%x5"` 44 | } 45 | 46 | type entryFsnotifyParent_V5_9 struct { 47 | systracer.ProbeEvent 48 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 49 | 50 | Filename string `tracing:"+40(%x0)"` 51 | Inode uint64 `tracing:"+48(%x0)"` 52 | Access uint32 `tracing:"%x1,Access == 0,bit[0]"` 53 | ModifyAttrib uint32 `tracing:"%x1,,bit[1:2]"` 54 | CloseOpen uint32 `tracing:"%x1,CloseOpen == 0,bit[3:5]"` 55 | Dentry uint32 `tracing:"%x1,,bit[6:12]"` 56 | } 57 | 58 | type path struct { 59 | N0 systracer.StringAddr `tracing:"+40({1})"` 60 | N1 systracer.StringAddr `tracing:"+40(+24({1}))"` 61 | N2 systracer.StringAddr `tracing:"+40(+24(+24({1})))"` 62 | N3 systracer.StringAddr `tracing:"+40(+24(+24(+24({1}))))"` 63 | N4 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24({1})))))"` 64 | N5 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24({1}))))))"` 65 | N6 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24({1})))))))"` 66 | N7 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24({1}))))))))"` 67 | N8 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"` 68 | 69 | I0 uint64 `tracing:"+48({1})"` 70 | I1 uint64 `tracing:"+48(+24({1}))"` 71 | I2 uint64 `tracing:"+48(+24(+24({1})))"` 72 | I3 uint64 `tracing:"+48(+24(+24(+24({1}))))"` 73 | I4 uint64 `tracing:"+48(+24(+24(+24(+24({1})))))"` 74 | I5 uint64 `tracing:"+48(+24(+24(+24(+24(+24({1}))))))"` 75 | I6 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24({1})))))))"` 76 | I7 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24({1}))))))))"` 77 | I8 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"` 78 | I9 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24(+24({1}))))))))))"` 79 | } 80 | 81 | func (d path) extract() ([]string, []uint64) { 82 | nodes := []systracer.StringAddr{ 83 | d.N0, d.N1, d.N2, d.N3, d.N4, d.N5, d.N6, d.N7, d.N8, 84 | } 85 | resultPath := extractPathComponent(nodes) 86 | inodes := []uint64{ 87 | d.I0, d.I1, d.I2, d.I3, d.I4, d.I5, d.I6, d.I7, d.I8, d.I9, 88 | } 89 | resultInodes := inodes[:len(resultPath)+1] 90 | return resultPath, resultInodes 91 | } 92 | 93 | type entrySecurityInodeRename struct { 94 | SrcDir uint64 `tracing:"%x0"` 95 | DstDir uint64 `tracing:"%x2"` 96 | } 97 | 98 | type entrySecurityInodeRenameSource struct { 99 | systracer.ProbeEvent 100 | Event entrySecurityInodeRename 101 | Source path `tracing:"%x1"` 102 | } 103 | 104 | type entrySecurityInodeRenameTarget struct { 105 | systracer.ProbeEvent 106 | Event entrySecurityInodeRename 107 | Target path `tracing:"%x3"` 108 | } 109 | 110 | type entrySecurityInodeCreate struct { 111 | systracer.ProbeEvent 112 | Dir uint64 `tracing:"%x0"` 113 | Path path `tracing:"%x1"` 114 | Mode uint16 `tracing:"%x2"` 115 | } 116 | 117 | type entrySecurityInodeMknod struct { 118 | systracer.ProbeEvent 119 | Dir uint64 `tracing:"%x0"` 120 | Path path `tracing:"%x1"` 121 | Mode uint16 `tracing:"%x2"` 122 | Dev uint32 `tracing:"%x3"` 123 | } 124 | 125 | type entrySecurityInodeMkdir struct { 126 | systracer.ProbeEvent 127 | Dir uint64 `tracing:"%x0"` 128 | Path path `tracing:"%x1"` 129 | Mode uint16 `tracing:"%x2"` 130 | } 131 | 132 | type entrySecurityInodeLink struct { 133 | Dir uint64 `tracing:"%x1"` 134 | } 135 | 136 | type entrySecurityInodeLinkSource struct { 137 | systracer.ProbeEvent 138 | Event entrySecurityInodeLink 139 | Source path `tracing:"%x0"` 140 | } 141 | 142 | type entrySecurityInodeLinkTarget struct { 143 | systracer.ProbeEvent 144 | Event entrySecurityInodeLink 145 | Target path `tracing:"%x2"` 146 | } 147 | 148 | type entrySecurityInodeSymlink struct { 149 | systracer.ProbeEvent 150 | Dir uint64 `tracing:"%x0"` 151 | Path path `tracing:"%x1"` 152 | Name string `tracing:"%x2"` 153 | } 154 | 155 | type entrySecurityInodeUnlink struct { 156 | systracer.ProbeEvent 157 | Path path `tracing:"%x1"` 158 | } 159 | 160 | type entrySecurityInodeRmdir struct { 161 | systracer.ProbeEvent 162 | Path path `tracing:"%x1"` 163 | } 164 | 165 | type entrySecurityInodeSetattr struct { 166 | systracer.ProbeEvent 167 | Path path `tracing:"%x0"` 168 | Valid uint32 `tracing:"+0(%x1)"` 169 | Mode uint16 `tracing:"+4(%x1)"` 170 | Uid uint32 `tracing:"+8(%x1)"` 171 | Gid uint32 `tracing:"+12(%x1)"` 172 | } 173 | -------------------------------------------------------------------------------- /rcnotify/trace_386.go: -------------------------------------------------------------------------------- 1 | package rcnotify 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/chaitin/systracer" 7 | ) 8 | 9 | type eventFsnotify struct { 10 | TaskPID uint32 11 | Timestamp time.Time 12 | Inode uint64 13 | Access uint32 14 | ModifyAttrib uint32 15 | CloseOpen uint32 16 | Dentry uint32 17 | Filename string 18 | Visited *uint8 19 | } 20 | 21 | type entryFsnotify_V2_6_32 struct { 22 | systracer.ProbeEvent 23 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 24 | 25 | Inode uint32 `tracing:"%ax"` 26 | Access uint32 `tracing:"%dx,Access == 0,bit[0]"` 27 | ModifyAttrib uint32 `tracing:"%dx,,bit[1:2]"` 28 | CloseOpen uint32 `tracing:"%dx,CloseOpen == 0,bit[3:5]"` 29 | Dentry uint32 `tracing:"%dx,,bit[6:12]"` 30 | Filename string `tracing:"+8(+8(%sp))"` 31 | } 32 | 33 | type entryFsnotify_V5_9 struct { 34 | systracer.ProbeEvent 35 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 36 | 37 | Access uint32 `tracing:"%ax,Access == 0,bit[0]"` 38 | ModifyAttrib uint32 `tracing:"%ax,,bit[1:2]"` 39 | CloseOpen uint32 `tracing:"%ax,CloseOpen == 0,bit[3:5]"` 40 | Dentry uint32 `tracing:"%ax,,bit[6:12]"` 41 | Dir uint32 `tracing:"+4(%sp)"` 42 | Filename string `tracing:"+8(+8(%sp))"` 43 | Inode uint32 `tracing:"+12(%sp)` 44 | } 45 | 46 | type entryFsnotifyParent_V5_9 struct { 47 | systracer.ProbeEvent 48 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"` 49 | 50 | Filename string `tracing:"+40(%ax)"` 51 | Inode uint64 `tracing:"+48(%ax)"` 52 | Access uint32 `tracing:"%dx,Access == 0,bit[0]"` 53 | ModifyAttrib uint32 `tracing:"%dx,,bit[1:2]"` 54 | CloseOpen uint32 `tracing:"%dx,CloseOpen == 0,bit[3:5]"` 55 | Dentry uint32 `tracing:"%dx,,bit[6:12]"` 56 | } 57 | 58 | type path struct { 59 | N0 systracer.StringAddr `tracing:"+28({1})"` 60 | N1 systracer.StringAddr `tracing:"+28(+16({1}))"` 61 | N2 systracer.StringAddr `tracing:"+28(+16(+16({1})))"` 62 | N3 systracer.StringAddr `tracing:"+28(+16(+16(+16({1}))))"` 63 | N4 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16({1})))))"` 64 | N5 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16({1}))))))"` 65 | N6 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16(+16({1})))))))"` 66 | N7 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16(+16(+16({1}))))))))"` 67 | N8 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16(+16(+16(+16({1})))))))))"` 68 | 69 | I0 uint64 `tracing:"+32({1})"` 70 | I1 uint64 `tracing:"+32(+16({1}))"` 71 | I2 uint64 `tracing:"+32(+16(+16({1})))"` 72 | I3 uint64 `tracing:"+32(+16(+16(+16({1}))))"` 73 | I4 uint64 `tracing:"+32(+16(+16(+16(+16({1})))))"` 74 | I5 uint64 `tracing:"+32(+16(+16(+16(+16(+16({1}))))))"` 75 | I6 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16({1})))))))"` 76 | I7 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16(+16({1}))))))))"` 77 | I8 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16(+16(+16({1})))))))))"` 78 | I9 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16(+16(+16(+16({1}))))))))))"` 79 | } 80 | 81 | func (d path) extract() ([]string, []uint64) { 82 | nodes := []systracer.StringAddr{ 83 | d.N0, d.N1, d.N2, d.N3, d.N4, d.N5, d.N6, d.N7, d.N8, 84 | } 85 | resultPath := extractPathComponent(nodes) 86 | inodes := []uint64{ 87 | d.I0, d.I1, d.I2, d.I3, d.I4, d.I5, d.I6, d.I7, d.I8, d.I9, 88 | } 89 | resultInodes := inodes[:len(resultPath)+1] 90 | return resultPath, resultInodes 91 | } 92 | 93 | type entrySecurityInodeRename struct { 94 | SrcDir uint64 `tracing:"%ax"` 95 | DstDir uint64 `tracing:"%cx"` 96 | } 97 | 98 | type entrySecurityInodeRenameSource struct { 99 | systracer.ProbeEvent 100 | Event entrySecurityInodeRename 101 | Source path `tracing:"%dx"` 102 | } 103 | 104 | type entrySecurityInodeRenameTarget struct { 105 | systracer.ProbeEvent 106 | Event entrySecurityInodeRename 107 | Target path `tracing:"+4(%sp)"` 108 | } 109 | 110 | type entrySecurityInodeCreate struct { 111 | systracer.ProbeEvent 112 | Dir uint64 `tracing:"%ax"` 113 | Path path `tracing:"%dx"` 114 | Mode uint16 `tracing:"%cx"` 115 | } 116 | 117 | type entrySecurityInodeMknod struct { 118 | systracer.ProbeEvent 119 | Dir uint64 `tracing:"%ax"` 120 | Path path `tracing:"%dx"` 121 | Mode uint16 `tracing:"%cx"` 122 | Dev uint32 `tracing:"+4(%sp)"` 123 | } 124 | 125 | type entrySecurityInodeMkdir struct { 126 | systracer.ProbeEvent 127 | Dir uint64 `tracing:"%ax"` 128 | Path path `tracing:"%dx"` 129 | Mode uint16 `tracing:"%cx"` 130 | } 131 | 132 | type entrySecurityInodeLink struct { 133 | Dir uint64 `tracing:"%dx"` 134 | } 135 | 136 | type entrySecurityInodeLinkSource struct { 137 | systracer.ProbeEvent 138 | Event entrySecurityInodeLink 139 | Source path `tracing:"%ax"` 140 | } 141 | 142 | type entrySecurityInodeLinkTarget struct { 143 | systracer.ProbeEvent 144 | Event entrySecurityInodeLink 145 | Target path `tracing:"%cx"` 146 | } 147 | 148 | type entrySecurityInodeSymlink struct { 149 | systracer.ProbeEvent 150 | Dir uint64 `tracing:"%ax"` 151 | Path path `tracing:"%dx"` 152 | Name string `tracing:"%cx"` 153 | } 154 | 155 | type entrySecurityInodeUnlink struct { 156 | systracer.ProbeEvent 157 | Path path `tracing:"%dx"` 158 | } 159 | 160 | type entrySecurityInodeRmdir struct { 161 | systracer.ProbeEvent 162 | Path path `tracing:"%dx"` 163 | } 164 | 165 | type entrySecurityInodeSetattr struct { 166 | systracer.ProbeEvent 167 | Path path `tracing:"%di"` 168 | Valid uint32 `tracing:"+0(%si)"` 169 | Mode uint16 `tracing:"+4(%si)"` 170 | Uid uint32 `tracing:"+8(%si)"` 171 | Gid uint32 `tracing:"+12(%si)"` 172 | } 173 | -------------------------------------------------------------------------------- /cmd/systracer/watch.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "strings" 8 | "syscall" 9 | 10 | "github.com/aegistudio/shaft" 11 | "github.com/pkg/errors" 12 | "go.uber.org/zap" 13 | "golang.org/x/sync/errgroup" 14 | 15 | "github.com/chaitin/systracer/rcnotify" 16 | ) 17 | 18 | var ( 19 | watches []string 20 | ) 21 | 22 | func initWatchModule() shaft.Option { 23 | if len(watches) == 0 { 24 | return shaft.Module() 25 | } 26 | return shaft.Module( 27 | rcnotify.Module, 28 | shaft.Provide(func( 29 | ctx context.Context, group *errgroup.Group, 30 | logger *zap.SugaredLogger, manager *rcnotify.Manager, 31 | ) ([]moduleBarrier, error) { 32 | // Attempt to parse the watch argumets. 33 | var options []rcnotify.Option 34 | for _, watch := range watches { 35 | pathIndex := strings.Index(watch, "=") 36 | if pathIndex <= 0 { 37 | return nil, errors.New( 38 | `"watch must be of format "="`) 39 | } 40 | events := watch[:pathIndex] 41 | path := watch[pathIndex+1:] 42 | var matcher func(rcnotify.Op, string) rcnotify.Option 43 | matcher = rcnotify.WatchDir 44 | var flags rcnotify.Op 45 | for _, event := range strings.Split(events, ",") { 46 | switch event { 47 | case "all": 48 | flags |= rcnotify.OpAll 49 | case "create": 50 | flags |= rcnotify.OpCreate 51 | case "mknod": 52 | flags |= rcnotify.OpMknod 53 | case "mkdir": 54 | flags |= rcnotify.OpMkdir 55 | case "delete": 56 | flags |= rcnotify.OpDelete 57 | case "rmdir": 58 | flags |= rcnotify.OpRmdir 59 | case "rename": 60 | flags |= rcnotify.OpRename 61 | case "attrib": 62 | flags |= rcnotify.OpAttrib 63 | case "link": 64 | flags |= rcnotify.OpLink 65 | case "symlink": 66 | flags |= rcnotify.OpSymlink 67 | case "dir": 68 | matcher = rcnotify.WatchDir 69 | case "file": 70 | matcher = rcnotify.WatchFile 71 | default: 72 | return nil, errors.Errorf( 73 | "unknown event %q", event) 74 | } 75 | } 76 | options = append(options, matcher(flags, path)) 77 | } 78 | watcher, err := manager.Watch(options...) 79 | if err != nil { 80 | return nil, err 81 | } 82 | group.Go(func() error { 83 | defer watcher.Close() 84 | for { 85 | var event rcnotify.Event 86 | select { 87 | case <-ctx.Done(): 88 | return nil 89 | case event = <-watcher.C: 90 | } 91 | eventContext := fmt.Sprintf("%s %d", 92 | event.Timestamp.Format("2006-01-02T15:04:05.999999999"), event.PID) 93 | sourcePath := "(unknown)" 94 | if event.Source != nil { 95 | sourcePath = fmt.Sprintf("%q", *event.Source) 96 | } 97 | targetPath := "(unknown)" 98 | if event.Target != nil { 99 | targetPath = fmt.Sprintf("%q", *event.Target) 100 | } 101 | var fileMode os.FileMode 102 | if event.Mode != nil { 103 | fileMode = os.FileMode(*event.Mode & 0777) 104 | switch *event.Mode & syscall.S_IFMT { 105 | case syscall.S_IFBLK: 106 | fileMode |= os.ModeDevice 107 | case syscall.S_IFCHR: 108 | fileMode |= os.ModeDevice | os.ModeCharDevice 109 | case syscall.S_IFDIR: 110 | fileMode |= os.ModeDir 111 | case syscall.S_IFIFO: 112 | fileMode |= os.ModeNamedPipe 113 | case syscall.S_IFLNK: 114 | fileMode |= os.ModeSymlink 115 | case syscall.S_IFREG: 116 | // nothing to do 117 | case syscall.S_IFSOCK: 118 | fileMode |= os.ModeSocket 119 | } 120 | if (*event.Mode & syscall.S_ISGID) != 0 { 121 | fileMode |= os.ModeSetgid 122 | } 123 | if (*event.Mode & syscall.S_ISUID) != 0 { 124 | fileMode |= os.ModeSetuid 125 | } 126 | if (*event.Mode & syscall.S_ISVTX) != 0 { 127 | fileMode |= os.ModeSticky 128 | } 129 | } 130 | switch event.Op { 131 | case rcnotify.OpCreate: 132 | logger.Infof("%s - create(%s, %q)", 133 | eventContext, targetPath, fileMode) 134 | case rcnotify.OpMkdir: 135 | logger.Infof("%s - mkdir(%s, %q)", 136 | eventContext, targetPath, fileMode) 137 | case rcnotify.OpMknod: 138 | logger.Infof("%s - mknod(%s, %q, %d)", 139 | eventContext, targetPath, 140 | fileMode, *event.Dev) 141 | case rcnotify.OpDelete: 142 | logger.Infof("%s - delete(%s)", 143 | eventContext, targetPath) 144 | case rcnotify.OpRmdir: 145 | logger.Infof("%s - rmdir(%s)", 146 | eventContext, targetPath) 147 | case rcnotify.OpRename: 148 | logger.Infof("%s - rename(%s, %s)", 149 | eventContext, sourcePath, targetPath) 150 | case rcnotify.OpAttrib: 151 | if event.Attr&rcnotify.AttrMode != 0 { 152 | logger.Infof("%s - chmod(%s, %q)", 153 | eventContext, targetPath, fileMode) 154 | } 155 | if event.Attr&rcnotify.AttrUID != 0 { 156 | logger.Infof("%s - chown_uid(%s, %d)", 157 | eventContext, targetPath, *event.Uid) 158 | } 159 | if event.Attr&rcnotify.AttrGID != 0 { 160 | logger.Infof("%s - chown_gid(%s, %d)", 161 | eventContext, targetPath, *event.Gid) 162 | } 163 | case rcnotify.OpLink: 164 | logger.Infof("%s - link(%s, %s)", 165 | eventContext, sourcePath, targetPath) 166 | case rcnotify.OpSymlink: 167 | logger.Infof("%s - symlink(%s, %s)", 168 | eventContext, sourcePath, targetPath) 169 | } 170 | } 171 | return nil 172 | }) 173 | return nil, nil 174 | }), 175 | ) 176 | } 177 | 178 | func init() { 179 | moduleInits = append(moduleInits, initWatchModule) 180 | rootCmd.PersistentFlags().StringArrayVarP( 181 | &watches, "watch", "w", watches, 182 | "specify list of watches for directory events") 183 | } 184 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aegistudio/shaft v0.0.0-20221023182702-de3982a0a277 h1:SI22BGHhuRdM/dxnCb5wO/5yhNAeN0JMqgEK/dJujyM= 2 | github.com/aegistudio/shaft v0.0.0-20221023182702-de3982a0a277/go.mod h1:78gJgtia9zBYrzBRlRe5vDSEtWNfN6PbHX2Y7WNzghI= 3 | github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= 4 | github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= 5 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= 10 | github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 11 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 12 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 13 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 14 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 15 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 16 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 17 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 18 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 20 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 21 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 22 | github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI= 23 | github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= 24 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 25 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 26 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 27 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 28 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 29 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 30 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 31 | github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= 32 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 33 | github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= 34 | go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= 35 | go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= 36 | go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= 37 | go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= 38 | go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= 39 | go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= 40 | go.uber.org/zap v1.23.0 h1:OjGQ5KQDEUawVHxNwQgPpiypGHOxo2mNZsOqTak4fFY= 41 | go.uber.org/zap v1.23.0/go.mod h1:D+nX8jyLsMHMYrln8A0rJjFt/T/9/bGgIhAqxv5URuY= 42 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 43 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 44 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 45 | golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 46 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 47 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 48 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 49 | golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= 50 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 51 | golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 52 | golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= 53 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 54 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 55 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 56 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 57 | golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 58 | golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 59 | golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A= 60 | golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 61 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 62 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 63 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 64 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 65 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 66 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 67 | golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= 68 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 69 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 70 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 71 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 72 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= 73 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 74 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 75 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 76 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 77 | -------------------------------------------------------------------------------- /tracefs.go: -------------------------------------------------------------------------------- 1 | package systracer 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | "syscall" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | // setError represents a set of errors that could be returned 15 | // by tracefs when operating on a set of entities. 16 | type setError struct { 17 | Op string 18 | Arg []string 19 | Err []error 20 | } 21 | 22 | // Error returns the formatted error string. 23 | func (e *setError) Error() string { 24 | var errString []string 25 | for _, err := range e.Err { 26 | errString = append(errString, err.Error()) 27 | } 28 | return fmt.Sprintf( 29 | "errors returned while %s(%q): %s", e.Op, 30 | strings.Join(e.Arg, ", "), 31 | strings.Join(errString, "\n")) 32 | } 33 | 34 | // disableInstance will attempt to disable all events associated 35 | // with a single instance. 36 | func disableInstance(tracefs, instance string) error { 37 | if instance == "" { 38 | return errors.New("invalid empty instance name") 39 | } 40 | set := &setError{ 41 | Op: "disableInstance", 42 | Arg: []string{tracefs, instance}, 43 | } 44 | 45 | // Walk the instance event directory and disable event. 46 | if err := filepath.Walk( 47 | filepath.Join(tracefs, "instances", instance), 48 | func(path string, info os.FileInfo, err error) error { 49 | if err != nil { 50 | set.Err = append(set.Err, err) 51 | } 52 | if info == nil { 53 | return nil 54 | } 55 | if info.Name() == "enable" && !info.IsDir() { 56 | if err = ioutil.WriteFile(path, []byte("0"), 57 | os.FileMode(0600)); err != nil { 58 | set.Err = append(set.Err, err) 59 | } 60 | } 61 | return nil 62 | }); err != nil { 63 | set.Err = append(set.Err, err) 64 | } 65 | if len(set.Err) > 0 { 66 | return set 67 | } 68 | return nil 69 | } 70 | 71 | // removeInstance will attempt to remove an instance from 72 | // currently registered traces. 73 | func removeInstance(tracefs, instance string) error { 74 | if instance == "" { 75 | return errors.New("invalid empty instance name") 76 | } 77 | 78 | // We won't have to remove the instance if the instance 79 | // has already been deleted. 80 | set := &setError{ 81 | Op: "removeInstance", 82 | Arg: []string{tracefs, instance}, 83 | } 84 | instancePath := filepath.Join(tracefs, "instances", instance) 85 | var stat syscall.Stat_t 86 | if err := syscall.Stat(instancePath, &stat); err != nil { 87 | if err == syscall.ENOENT { 88 | return nil 89 | } 90 | set.Err = append(set.Err, err) 91 | return set 92 | } 93 | 94 | // Disable current tracing of the instance. 95 | if err := ioutil.WriteFile( 96 | filepath.Join(instancePath, "tracing_on"), 97 | []byte("0"), os.FileMode(0600)); err != nil { 98 | set.Err = append(set.Err, err) 99 | return set 100 | } 101 | 102 | // Cleanup content of all ring buffers in the instance. 103 | // 104 | // XXX: though it is unnecessary, there's a bug 105 | // (RingBufferDetonator) that exists in kernel ranged from 106 | // 3.10 to 5.14-rc3, which means it should exist in exactly 107 | // all linux that cloudwalker agent operates on. 108 | // 109 | // https://github.com/torvalds/linux/commit/67f0d6d9883c13174669f88adac4f0ee656cc16a 110 | // 111 | // When the bug is triggered, if will stuck inside a deadloop 112 | // that can only be bailed out by disabling the tracing and 113 | // cleanup the ring buffers. 114 | if err := ioutil.WriteFile( 115 | filepath.Join(instancePath, "trace"), 116 | nil, os.FileMode(0600)); err != nil { 117 | set.Err = append(set.Err, err) 118 | return set 119 | } 120 | 121 | // If the instance could be removed directly, we will just 122 | // attempt to remove and return. And if we will only try 123 | // to perform more work if it is EBUSY. 124 | err := syscall.Rmdir(instancePath) 125 | if err == nil || err == syscall.ENOENT { 126 | return nil 127 | } 128 | if err != syscall.EBUSY { 129 | set.Err = append(set.Err, err) 130 | return set 131 | } 132 | 133 | // Record the errors generated while disabling instance. 134 | // 135 | // Please notice that it is only considered an error when we 136 | // cannot remove the instance directory. 137 | if err := disableInstance(tracefs, instance); err != nil { 138 | if subset, ok := err.(*setError); ok { 139 | set.Err = append(set.Err, subset.Err...) 140 | } else { 141 | set.Err = append(set.Err, err) 142 | } 143 | } 144 | 145 | // Remove the root directory of instance. 146 | err = syscall.Rmdir(instancePath) 147 | if err == nil || err == syscall.ENOENT { 148 | return nil 149 | } 150 | set.Err = append(set.Err, err) 151 | return set 152 | } 153 | 154 | // removeProbe will attempt to remove a single probe from 155 | // specified file, while disabling all of them. 156 | func removeProbe(tracefs, typ, namespace, probe string) error { 157 | if typ == "" { 158 | return errors.New("invalid empty typ name") 159 | } 160 | if namespace == "" { 161 | return errors.New("invalid empty namespace name") 162 | } 163 | if probe == "" { 164 | return errors.New("invalid empty probe name") 165 | } 166 | 167 | // Attempt to open the probe manifest first. Under no 168 | // circumstance should the open fail. 169 | var err error 170 | set := &setError{ 171 | Op: "removeProbe", 172 | Arg: []string{tracefs, typ, namespace, probe}, 173 | } 174 | fd, err := syscall.Open(filepath.Join(tracefs, typ), 175 | syscall.O_WRONLY|syscall.O_APPEND, 0600) 176 | if err != nil { 177 | set.Err = append(set.Err, err) 178 | return set 179 | } 180 | defer func() { _ = syscall.Close(fd) }() 181 | 182 | // Attempt to remove the probe from the file. 183 | eraseWord := []byte(fmt.Sprintf( 184 | "-:%s/%s", namespace, probe)) 185 | _, err = syscall.Write(fd, eraseWord) 186 | if err == nil || err == syscall.ENOENT { 187 | return nil 188 | } 189 | if err != syscall.EBUSY { 190 | set.Err = append(set.Err, err) 191 | return set 192 | } 193 | 194 | // Disable the probe in all of the item list. 195 | if err = ioutil.WriteFile(filepath.Join( 196 | tracefs, "events", namespace, probe, "enable"), 197 | []byte("0"), os.FileMode(0600)); err != nil { 198 | set.Err = append(set.Err, err) 199 | } 200 | dirents, err := ioutil.ReadDir( 201 | filepath.Join(tracefs, "instances")) 202 | if err != nil && !os.IsNotExist(err) { 203 | set.Err = append(set.Err, err) 204 | } 205 | for _, dirent := range dirents { 206 | if !dirent.IsDir() { 207 | continue 208 | } 209 | if err = ioutil.WriteFile(filepath.Join( 210 | tracefs, "instances", dirent.Name(), 211 | "events", namespace, probe, "enable"), 212 | []byte("0"), os.FileMode(0600)); err != nil { 213 | set.Err = append(set.Err, err) 214 | } 215 | } 216 | 217 | // Reattempt to disable the probe from the file. 218 | _, err = syscall.Write(fd, eraseWord) 219 | if err == nil || err == syscall.ENOENT { 220 | return nil 221 | } 222 | set.Err = append(set.Err, err) 223 | return set 224 | } 225 | 226 | // removeAllProbe will remove all probes under namespace. 227 | func removeAllProbe(tracefs, typ, namespace string) error { 228 | if typ == "" { 229 | return errors.New("invalid empty typ name") 230 | } 231 | if namespace == "" { 232 | return errors.New("invalid empty namespace name") 233 | } 234 | 235 | // Iterate and invoke remove method on the events. 236 | var err error 237 | set := &setError{ 238 | Op: "removeAllProbe", 239 | Arg: []string{tracefs, typ, namespace}, 240 | } 241 | dirents, err := ioutil.ReadDir( 242 | filepath.Join(tracefs, "events", namespace)) 243 | if err != nil && !os.IsNotExist(err) { 244 | set.Err = append(set.Err, err) 245 | } 246 | for _, dirent := range dirents { 247 | if !dirent.IsDir() { 248 | continue 249 | } 250 | if err := removeProbe(tracefs, typ, namespace, 251 | dirent.Name()); err != nil { 252 | if subset, ok := err.(*setError); ok { 253 | set.Err = append(set.Err, subset.Err...) 254 | } else { 255 | set.Err = append(set.Err, err) 256 | } 257 | } 258 | } 259 | if len(set.Err) > 0 { 260 | return set 261 | } 262 | return nil 263 | } 264 | -------------------------------------------------------------------------------- /connect/connect.go: -------------------------------------------------------------------------------- 1 | // Package connect defines the event source of network 2 | // connection events on linux. 3 | package connect 4 | 5 | import ( 6 | "context" 7 | "encoding/binary" 8 | "net" 9 | "time" 10 | 11 | "github.com/aegistudio/shaft" 12 | 13 | "github.com/chaitin/systracer" 14 | ) 15 | 16 | // Op is the event op of connect event. 17 | type Op uint8 18 | 19 | const ( 20 | OpConnectStart = Op(iota) 21 | OpConnectEnd 22 | ) 23 | 24 | // Event is the generated event of this module. 25 | type Event struct { 26 | Op Op 27 | PID uint32 28 | Timestamp time.Time 29 | FD int 30 | Errno *int32 31 | Family uint16 32 | Type uint16 33 | Addr string 34 | FlowInfo *uint32 35 | Scope *uint32 36 | Port uint16 37 | } 38 | 39 | // collector is the event's collector. 40 | type collector struct { 41 | ctx context.Context 42 | ch chan<- Event 43 | registries map[uint32]*Event 44 | } 45 | 46 | func (c *collector) dispatch(event Event) { 47 | select { 48 | case <-c.ctx.Done(): 49 | case c.ch <- event: 50 | } 51 | } 52 | 53 | // handleConnectInet4 handles the event triggered when 54 | // a syscall connect or its equivalences are encountered 55 | // and represents a IPv4 event record. 56 | // 57 | // connect(FD, &sockaddr_in{ 58 | // .sin_family = AF_INET = 2, 59 | // .sin_port = Port, 60 | // .sin_addr = { Address }, 61 | // }, sizeof(sockaddr_in) == 16) 62 | func (col *collector) handleConnectInet4( 63 | event entrySyscallConnectInet4, 64 | ) { 65 | connectEvent := &Event{} 66 | connectEvent.Timestamp = event.Timestamp 67 | connectEvent.PID = event.TaskPID 68 | connectEvent.FD = int(event.FD) 69 | connectEvent.Family = event.Family 70 | connectEvent.Port = event.Port 71 | var ipv4 [4]byte 72 | binary.BigEndian.PutUint32(ipv4[:], event.Address) 73 | connectEvent.Addr = net.IP(ipv4[:]).String() 74 | col.registries[event.TaskPID] = connectEvent 75 | } 76 | 77 | // handleConnectInet6 handles the event triggered when 78 | // a syscall connect or its equivalences are encountered 79 | // and represents a IPv4 event record. 80 | // 81 | // connect(FD, &sockaddr_in6{ 82 | // .sin6_family = AF_INET6 = 10, 83 | // .sin6_port = Port, 84 | // .sin6_flowinfo = FlowInfo, 85 | // .sin6_addr = in6_addr{ 86 | // Address0, Address1, Address2, Address3, 87 | // }, 88 | // .sin6_scope_id = Scope, 89 | // }, sizeof(sockaddr_in6) = 28}) 90 | func (col *collector) handleConnectInet6( 91 | event entrySyscallConnectInet6, 92 | ) { 93 | connectEvent := &Event{} 94 | connectEvent.Timestamp = event.Timestamp 95 | connectEvent.PID = event.TaskPID 96 | connectEvent.FD = int(event.FD) 97 | connectEvent.Family = event.Family 98 | connectEvent.Port = event.Port 99 | connectEvent.FlowInfo = new(uint32) 100 | *connectEvent.FlowInfo = event.FlowInfo 101 | connectEvent.Scope = new(uint32) 102 | *connectEvent.Scope = event.Scope 103 | var ipv6 [16]byte 104 | binary.BigEndian.PutUint32(ipv6[0:4], event.Address0) 105 | binary.BigEndian.PutUint32(ipv6[4:8], event.Address1) 106 | binary.BigEndian.PutUint32(ipv6[8:12], event.Address2) 107 | binary.BigEndian.PutUint32(ipv6[12:16], event.Address3) 108 | connectEvent.Addr = net.IP(ipv6[:]).String() 109 | col.registries[event.TaskPID] = connectEvent 110 | } 111 | 112 | // handleExitConnect handles the event when the connect 113 | // syscall or its equivalences have returned. This should 114 | // generate the connect end event, and delete the record 115 | // since it has been completed. 116 | func (col *collector) handleExitConnect( 117 | event exitSyscallConnect, 118 | ) { 119 | connectEvent := col.registries[event.TaskPID] 120 | if connectEvent == nil { 121 | return 122 | } 123 | connectEndEvent := *connectEvent 124 | connectEndEvent.Timestamp = event.Timestamp 125 | connectEndEvent.Op = OpConnectEnd 126 | connectEndEvent.Errno = new(int32) 127 | *connectEndEvent.Errno = event.Errno 128 | delete(col.registries, event.TaskPID) 129 | col.dispatch(connectEndEvent) 130 | } 131 | 132 | // handleInetProtocolConnect is the event triggered when 133 | // the proto_ops->connect corresponded functions are called 134 | // (e.g. inet_stream_connect and inet_dgram_connect). 135 | // 136 | // The type field will be fetched at this point, which 137 | // will query the (struct socket*)->type field. And an 138 | // connect start event must be generated after that. 139 | func (col *collector) handleInetProtocolConnect( 140 | event entryInetProtocolConnect, 141 | ) { 142 | connectEvent := col.registries[event.TaskPID] 143 | if connectEvent == nil { 144 | return 145 | } 146 | connectEvent.Type = event.Type 147 | connectStartEvent := *connectEvent 148 | connectStartEvent.Op = OpConnectStart 149 | col.dispatch(connectStartEvent) 150 | } 151 | 152 | func stackConnectEventSource( 153 | next func(<-chan Event) error, 154 | rootCtx context.Context, manager systracer.Manager, 155 | ) error { 156 | // Attempt to initialize the connect data source. 157 | ctx, cancel := context.WithCancel(rootCtx) 158 | defer cancel() 159 | var lastSyncCh <-chan struct{} 160 | eventCh := make(chan Event) 161 | 162 | // Create the connect event collector first. 163 | collector := &collector{ 164 | ctx: ctx, 165 | ch: eventCh, 166 | registries: make(map[uint32]*Event), 167 | } 168 | 169 | // Attempt to attach to the inet_dgram_connect and 170 | // the inet_stream_connect first. 171 | inetDgramConnect, _, err := manager.TraceKProbe( 172 | "inet_dgram_connect", 173 | collector.handleInetProtocolConnect) 174 | if err != nil { 175 | return err 176 | } 177 | defer inetDgramConnect.Close() 178 | 179 | inetStreamConnect, _, err := manager.TraceKProbe( 180 | "inet_stream_connect", 181 | collector.handleInetProtocolConnect) 182 | if err != nil { 183 | return err 184 | } 185 | defer inetStreamConnect.Close() 186 | 187 | // Attempt to attach to correct location of the 188 | // syscall connect. Please notice that once a point 189 | // of tracing is found, the other functions must 190 | // also attach to that point. 191 | var exitConnect, connectInet4, connectInet6 systracer.Trace 192 | candidates := []string{ 193 | "sys_connect", "__sys_connect", 194 | } 195 | for _, candidate := range candidates { 196 | var syncCh <-chan struct{} 197 | 198 | // Try to attach to the kretprobe of candidate. 199 | // nolint 200 | exitConnect, syncCh, err = manager.TraceKProbe( 201 | candidate, collector.handleExitConnect) 202 | if err == systracer.ErrBadTracePoint { 203 | continue 204 | } 205 | if err != nil { 206 | return err 207 | } 208 | lastSyncCh = syncCh 209 | defer exitConnect.Close() 210 | 211 | // Try to attach to the connect ipv4 event. 212 | connectInet4, syncCh, err = manager.TraceKProbe( 213 | candidate, collector.handleConnectInet4) 214 | if err != nil { 215 | return err 216 | } 217 | lastSyncCh = syncCh 218 | defer connectInet4.Close() 219 | 220 | // Try to attach to the connect ipv6 event. 221 | // This is optional because some older kernel 222 | // may have no ipv6 support. 223 | connectInet6, syncCh, err = manager.TraceKProbe( 224 | candidate, collector.handleConnectInet6) 225 | if err != nil && err != systracer.ErrBadTracePoint { 226 | return err 227 | } 228 | if connectInet6 != nil { 229 | lastSyncCh = syncCh 230 | defer connectInet6.Close() 231 | } 232 | 233 | // Creation completed for now. 234 | break 235 | } 236 | if exitConnect == nil { 237 | return systracer.ErrBadTracePoint 238 | } 239 | 240 | // Wait for the synchronization of probe point. 241 | select { 242 | case <-ctx.Done(): 243 | return nil 244 | case <-lastSyncCh: 245 | } 246 | defer cancel() 247 | inetDgramConnect.SetEnabled(true) 248 | inetStreamConnect.SetEnabled(true) 249 | exitConnect.SetEnabled(true) 250 | connectInet4.SetEnabled(true) 251 | if connectInet6 != nil { 252 | connectInet6.SetEnabled(true) 253 | } 254 | return next(eventCh) 255 | } 256 | 257 | // Module is the DI module of connect event. 258 | // 259 | // The module requires a context and a trace manager, and 260 | // injects an event channel of <-chan Event. 261 | var Module = shaft.Stack(stackConnectEventSource) 262 | -------------------------------------------------------------------------------- /listen/listen.go: -------------------------------------------------------------------------------- 1 | package listen 2 | 3 | import ( 4 | "context" 5 | "encoding/binary" 6 | "net" 7 | "syscall" 8 | "time" 9 | 10 | "github.com/aegistudio/shaft" 11 | "github.com/pkg/errors" 12 | 13 | "github.com/chaitin/systracer" 14 | "github.com/chaitin/systracer/pkg/kversion" 15 | ) 16 | 17 | // Op is the listen event op for linux. 18 | // 19 | // The operation involves a listen and unlisten event. The 20 | // listen event is issued when a socket has successfully 21 | // listen while the unlisten event is issued when a 22 | // listening socket is closed. 23 | type Op uint8 24 | 25 | const ( 26 | OpListenStart = Op(iota) 27 | OpListenEnd 28 | ) 29 | 30 | // Event is the standard information for a linux listen 31 | // event. Since it is only possible to listen TCP socket 32 | // (?->0x0a), we can omit out the type judgement. 33 | type Event struct { 34 | Op Op 35 | Timestamp time.Time 36 | PID uint32 37 | FD *int 38 | Family uint16 // AF_* 39 | Addr string 40 | Port uint16 41 | Backlog *int 42 | } 43 | 44 | // collector is the collector for the linux listen events. 45 | // It keeps track of listen state registries and and will 46 | // periodically perform cleanup. 47 | type collector struct { 48 | ctx context.Context 49 | ch chan<- Event 50 | starts map[uint32]*Event 51 | } 52 | 53 | func (c *collector) dispatch(event Event) { 54 | select { 55 | case <-c.ctx.Done(): 56 | case c.ch <- event: 57 | } 58 | } 59 | 60 | // handleEntryListen handles the event triggered when 61 | // a syscall listen or its equivalences are encoutered. 62 | // We can only decode the address family when the 63 | // inet_listen or inet6_listen is called. 64 | // 65 | // listen(FD, Backlog) 66 | func (col *collector) handleEntryListen( 67 | event entrySyscallListen, 68 | ) { 69 | listenEvent := &Event{} 70 | listenEvent.Timestamp = event.Timestamp 71 | listenEvent.PID = event.TaskPID 72 | listenEvent.FD = new(int) 73 | *listenEvent.FD = int(event.FD) 74 | listenEvent.Backlog = new(int) 75 | *listenEvent.Backlog = int(event.Backlog) 76 | col.starts[event.TaskPID] = listenEvent 77 | } 78 | 79 | // handleProtocolListenInet4 handles the event triggered 80 | // when it enters inet_listen. 81 | // 82 | // inet_listen(&socket{ 83 | // ... 84 | // .sk = &sock{ 85 | // .skc_rcv_saddr = Address, 86 | // .skc_num = Port, 87 | // .skc_family = AF_INET = 2, 88 | // //.skc_state = != 0x0a, 89 | // }, 90 | // }, Backlog) 91 | func (col *collector) handleProtocolListenInet4( 92 | event systracer.ProbeEvent, sk StructSockListenInet4, 93 | ) { 94 | listenEvent := col.starts[event.TaskPID] 95 | if listenEvent == nil { 96 | return 97 | } 98 | listenEvent.Timestamp = event.Timestamp 99 | listenEvent.Family = syscall.AF_INET 100 | var ipv4 [4]byte 101 | binary.BigEndian.PutUint32(ipv4[:], sk.Address) 102 | listenEvent.Addr = net.IP(ipv4[:]).String() 103 | listenEvent.Port = sk.Port 104 | } 105 | 106 | // handleProtocolListenInet4_V2_6_12 handles the inet_listen ipv4 107 | // tracepoint event for linux version 2.6.12 ~ 5.3 (excluded). 108 | func (col *collector) handleProtocolListenInet4_V2_6_12( 109 | event entryProtocolListenInet4_V2_6_12, 110 | ) { 111 | col.handleProtocolListenInet4(event.ProbeEvent, event.Sk) 112 | } 113 | 114 | // handleProtocolListenInet4_v5_3 handles the inet_listen ipv4 115 | // tracepoint event for linux version above 5.3 (included). 116 | func (col *collector) handleProtocolListenInet4_V5_3( 117 | event entryProtocolListenInet4_V5_3, 118 | ) { 119 | col.handleProtocolListenInet4(event.ProbeEvent, event.Sk) 120 | } 121 | 122 | // handleProtocolListenInet6 handles the event triggered 123 | // when it enters inet6_listen. 124 | // 125 | // inet_listen(&socket{ 126 | // ... 127 | // .sk = &sock{ 128 | // .skc_num = Port, 129 | // .skc_family = AF_INET6 = 10, 130 | // .sin_v6_rev_saddr = Address, 131 | // }, 132 | // }, Backlog) 133 | func (col *collector) handleProtocolListenInet6( 134 | event systracer.ProbeEvent, sk StructSockListenInet6, 135 | ) { 136 | listenEvent := col.starts[event.TaskPID] 137 | if listenEvent == nil { 138 | return 139 | } 140 | listenEvent.Timestamp = event.Timestamp 141 | listenEvent.Family = syscall.AF_INET6 142 | var ipv6 [16]byte 143 | binary.BigEndian.PutUint32(ipv6[0:4], sk.Address0) 144 | binary.BigEndian.PutUint32(ipv6[4:8], sk.Address1) 145 | binary.BigEndian.PutUint32(ipv6[8:12], sk.Address2) 146 | binary.BigEndian.PutUint32(ipv6[12:16], sk.Address3) 147 | listenEvent.Addr = net.IP(ipv6[:]).String() 148 | listenEvent.Port = sk.Port 149 | } 150 | 151 | // handleProtocolListenInet4_V2_6_12 handles the inet_listen ipv6 152 | // tracepoint event for linux version 2.6.12 ~ 5.3 (excluded). 153 | func (col *collector) handleProtocolListenInet6_V2_6_12( 154 | event entryProtocolListenInet6_V2_6_12, 155 | ) { 156 | col.handleProtocolListenInet6(event.ProbeEvent, event.Sk) 157 | } 158 | 159 | // handleProtocolListenInet4_v5_3 handles the inet_listen ipv6 160 | // tracepoint event for linux version above 5.3 (included). 161 | func (col *collector) handleProtocolListenInet6_V5_3( 162 | event entryProtocolListenInet6_V5_3, 163 | ) { 164 | col.handleProtocolListenInet6(event.ProbeEvent, event.Sk) 165 | } 166 | 167 | // handleExitListen handles the event when the listen 168 | // syscall or its equivalences have returned. 169 | // 170 | // This should generate the listen event when the retcode 171 | // is 0, and the address family is known to us. 172 | func (col *collector) handleExitListen( 173 | event exitSyscallListen, 174 | ) { 175 | listenEvent := col.starts[event.TaskPID] 176 | if listenEvent == nil { 177 | return 178 | } 179 | listenStartEvent := *listenEvent 180 | listenStartEvent.Op = OpListenStart 181 | listenStartEvent.Timestamp = event.Timestamp 182 | delete(col.starts, event.TaskPID) 183 | if event.Errno == 0 && listenEvent.Family != 0 { 184 | col.dispatch(listenStartEvent) 185 | } 186 | } 187 | 188 | // handleTCPCloseInet4 handles the event triggered 189 | // when it enters tcp_close. 190 | // 191 | // tcp_close(&socket{ 192 | // ... 193 | // .sk = &sock{ 194 | // .skc_rcv_saddr = Address, 195 | // .skc_num = Port, 196 | // .skc_family = AF_INET = 2, 197 | // .skc_state = == 0x0a, 198 | // }, 199 | // }) 200 | func (col *collector) handleTCPCloseInet4( 201 | event entryTCPCloseInet4, 202 | ) { 203 | if event.State != 10 { 204 | return 205 | } 206 | var listenEndEvent Event 207 | listenEndEvent.Op = OpListenEnd 208 | listenEndEvent.PID = event.TaskPID 209 | listenEndEvent.Timestamp = event.Timestamp 210 | listenEndEvent.Family = syscall.AF_INET 211 | var ipv4 [4]byte 212 | binary.BigEndian.PutUint32(ipv4[:], event.Address) 213 | listenEndEvent.Addr = net.IP(ipv4[:]).String() 214 | listenEndEvent.Port = event.Port 215 | col.dispatch(listenEndEvent) 216 | } 217 | 218 | // handleTCPCloseInet6 handles the event triggered 219 | // when it enters tcp_close. 220 | // 221 | // tcp_close(&socket{ 222 | // ... 223 | // .sk = &sock{ 224 | // .skc_num = Port, 225 | // .skc_family = AF_INET6 = 10, 226 | // .sin_v6_rev_saddr = Address, 227 | // .skc_state = == 0x0a, 228 | // }, 229 | // }) 230 | func (col *collector) handleTCPCloseInet6( 231 | event entryTCPCloseInet6, 232 | ) { 233 | if event.State != 10 { 234 | return 235 | } 236 | var listenEndEvent Event 237 | listenEndEvent.Op = OpListenEnd 238 | listenEndEvent.PID = event.TaskPID 239 | listenEndEvent.Timestamp = event.Timestamp 240 | listenEndEvent.Family = syscall.AF_INET6 241 | var ipv6 [16]byte 242 | binary.BigEndian.PutUint32(ipv6[0:4], event.Address0) 243 | binary.BigEndian.PutUint32(ipv6[4:8], event.Address1) 244 | binary.BigEndian.PutUint32(ipv6[8:12], event.Address2) 245 | binary.BigEndian.PutUint32(ipv6[12:16], event.Address3) 246 | listenEndEvent.Addr = net.IP(ipv6[:]).String() 247 | listenEndEvent.Port = event.Port 248 | col.dispatch(listenEndEvent) 249 | } 250 | 251 | func stackListenEventSource( 252 | next func(<-chan Event) error, 253 | rootCtx context.Context, manager systracer.Manager, 254 | ) error { 255 | // Attempt to initialize the listen data source. 256 | ctx, cancel := context.WithCancel(rootCtx) 257 | defer cancel() 258 | var lastSyncCh <-chan struct{} 259 | eventCh := make(chan Event) 260 | 261 | // Create the listen event collector first. 262 | collector := &collector{ 263 | ctx: ctx, 264 | ch: eventCh, 265 | starts: make(map[uint32]*Event), 266 | } 267 | 268 | // Search the event collector handler for IPv4 event. 269 | var handleProtocolListenInet4, handleProtocolListenInet6 interface{} 270 | if kversion.Current >= kversion.Must("5.3") { 271 | handleProtocolListenInet4 = collector.handleProtocolListenInet4_V5_3 272 | handleProtocolListenInet6 = collector.handleProtocolListenInet6_V5_3 273 | } else if kversion.Current >= kversion.Must("2.6.12") { 274 | handleProtocolListenInet4 = collector.handleProtocolListenInet4_V2_6_12 275 | handleProtocolListenInet6 = collector.handleProtocolListenInet6_V2_6_12 276 | } else { 277 | return errors.Errorf("listen event unsupported") 278 | } 279 | 280 | // Attempt to attach to the inet_listen first. 281 | listenInet4, _, err := manager.TraceKProbe( 282 | "inet_listen", handleProtocolListenInet4) 283 | if err != nil { 284 | return err 285 | } 286 | defer listenInet4.Close() 287 | listenInet6, _, err := manager.TraceKProbe( 288 | "inet_listen", handleProtocolListenInet6) 289 | if err != nil { 290 | return err 291 | } 292 | defer listenInet6.Close() 293 | 294 | // Attempt to attach to the inet_release then. 295 | shutdownInet4, _, err := manager.TraceKProbe( 296 | "tcp_close", collector.handleTCPCloseInet4) 297 | if err != nil { 298 | return err 299 | } 300 | defer shutdownInet4.Close() 301 | 302 | shutdownInet6, _, err := manager.TraceKProbe( 303 | "tcp_close", collector.handleTCPCloseInet6) 304 | if err != nil { 305 | return err 306 | } 307 | defer shutdownInet6.Close() 308 | 309 | // Attempt to attach to correct location of the 310 | // syscall listen. Please notice that once a point 311 | // of tracing is found, the other functions must 312 | // also attach to that point. 313 | var exitListen, entryListen systracer.Trace 314 | candidates := []string{ 315 | "sys_listen", "__sys_listen", 316 | } 317 | for _, candidate := range candidates { 318 | var syncCh <-chan struct{} 319 | 320 | // Try to attach to the kretprobe of candidate. 321 | exitListen, _, err = manager.TraceKProbe( 322 | candidate, collector.handleExitListen) 323 | if err == systracer.ErrBadTracePoint { 324 | continue 325 | } 326 | if err != nil { 327 | return err 328 | } 329 | defer exitListen.Close() 330 | 331 | // Try to attach to the syscall entry event. 332 | // nolint 333 | entryListen, syncCh, err = manager.TraceKProbe( 334 | candidate, collector.handleEntryListen) 335 | if err != nil { 336 | return err 337 | } 338 | defer entryListen.Close() 339 | lastSyncCh = syncCh 340 | 341 | // Creation completed for now. 342 | break 343 | } 344 | if exitListen == nil { 345 | return systracer.ErrBadTracePoint 346 | } 347 | 348 | // Wait for the completion of entry initialization. 349 | select { 350 | case <-ctx.Done(): 351 | return nil 352 | case <-lastSyncCh: 353 | } 354 | defer cancel() 355 | listenInet4.SetEnabled(true) 356 | listenInet6.SetEnabled(true) 357 | shutdownInet4.SetEnabled(true) 358 | shutdownInet6.SetEnabled(true) 359 | exitListen.SetEnabled(true) 360 | entryListen.SetEnabled(true) 361 | return next(eventCh) 362 | } 363 | 364 | // Module is the DI module of listen event. 365 | // 366 | // The module requires a context and a trace manager, and 367 | // injects an event channel of <-chan Event. 368 | var Module = shaft.Stack(stackListenEventSource) 369 | -------------------------------------------------------------------------------- /handle.go: -------------------------------------------------------------------------------- 1 | package systracer 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io/ioutil" 8 | "os" 9 | "path/filepath" 10 | "reflect" 11 | "strconv" 12 | "sync/atomic" 13 | "syscall" 14 | 15 | "github.com/pkg/errors" 16 | ) 17 | 18 | // traceHandle refers to a single trace registry managed 19 | // by the trace manager and can update its options. 20 | type traceHandle struct { 21 | id uint64 22 | createTime uint64 23 | numDone uint64 24 | numLoss uint64 25 | ctx context.Context 26 | enableCh chan *traceEnableRequest 27 | closeCh chan *traceCloseRequest 28 | conditionCh chan *conditionUpdateRequest 29 | handler interface{} 30 | desc *traceEventDescriptor 31 | condition string 32 | typ string 33 | enabled bool 34 | } 35 | 36 | // getProbeName formats the probe name. 37 | func (t *traceHandle) getProbeName() string { 38 | return fmt.Sprintf("probe_%x_%x", t.createTime, t.id) 39 | } 40 | 41 | // getEnableFilePath evaluates the path for setting the 42 | // probe enabled or disabled. 43 | func (t *traceHandle) getEnableFilePath( 44 | root, namespace string, 45 | ) string { 46 | probeName := t.getProbeName() 47 | return filepath.Join(root, "instances", 48 | namespace, "events", namespace, probeName, "enable") 49 | } 50 | 51 | // parseProbeName convert from probe name to id. 52 | // 53 | // If the probe name cannot be parsed, it will return 0 54 | // directly, which is not an valid id for probe. 55 | func parseProbeName(name []byte) (createTime, id uint64) { 56 | if !bytes.HasPrefix(name, []byte("probe_")) { 57 | return 58 | } 59 | name = name[len("probe_"):] 60 | if index := bytes.Index(name, []byte("_")); index > 0 { 61 | createTime, _ = strconv.ParseUint( 62 | string(name[:index]), 16, 64) 63 | id, _ = strconv.ParseUint( 64 | string(name[index+1:]), 16, 64) 65 | } 66 | return 67 | } 68 | 69 | // ID is the current ID of the trace handle. 70 | func (t *traceHandle) ID() uint64 { 71 | return t.id 72 | } 73 | 74 | // GetDone retrieves the number of done events. 75 | func (t *traceHandle) GetDone() uint64 { 76 | return atomic.LoadUint64(&t.numDone) 77 | } 78 | 79 | // GetLost retrieves the number of lost events. 80 | func (t *traceHandle) GetLost() uint64 { 81 | return atomic.LoadUint64(&t.numLoss) 82 | } 83 | 84 | // complete increment the corresponding counter. 85 | func (t *traceHandle) complete(success bool) { 86 | if success { 87 | atomic.AddUint64(&t.numDone, 1) 88 | } else { 89 | atomic.AddUint64(&t.numLoss, 1) 90 | } 91 | } 92 | 93 | // traceEnableRequest is the request to enable or 94 | // disable the handle. 95 | type traceEnableRequest struct { 96 | enabled bool 97 | handle *traceHandle 98 | doneCh chan struct{} 99 | } 100 | 101 | // SetEnabled requests for the enable state update. 102 | func (t *traceHandle) SetEnabled(enabled bool) { 103 | if t.enabled == enabled { 104 | return 105 | } 106 | req := &traceEnableRequest{ 107 | enabled: enabled, 108 | handle: t, 109 | doneCh: make(chan struct{}), 110 | } 111 | select { 112 | case <-t.ctx.Done(): 113 | return 114 | case t.enableCh <- req: 115 | <-req.doneCh 116 | } 117 | } 118 | 119 | // setEnabled flips the state of the handle. 120 | func (t *traceHandle) setEnabled( 121 | root, namespace string, enabled bool, 122 | ) error { 123 | if t.enabled == enabled { 124 | return nil 125 | } 126 | enableString := []byte("0") 127 | if enabled { 128 | enableString = []byte("1") 129 | 130 | // XXX: when converting from enabled to disabled 131 | // status, the condition might always be reset, 132 | // so we should at least attempt to reset the 133 | // condition before restarting. 134 | // 135 | // We will revert to disabled status if the error 136 | // cannot be resolved. 137 | if err := t.updateCondition( 138 | root, namespace, t.condition); err != nil { 139 | return err 140 | } 141 | } 142 | 143 | // /instances//events///enable. 144 | enableFilePath := t.getEnableFilePath(root, namespace) 145 | if err := ioutil.WriteFile(enableFilePath, 146 | enableString, os.FileMode(0600)); err != nil { 147 | return err 148 | } 149 | t.enabled = enabled 150 | return nil 151 | } 152 | 153 | // traceCloseRequest is the request to close the handle. 154 | type traceCloseRequest struct { 155 | handle *traceHandle 156 | doneCh chan struct{} 157 | } 158 | 159 | // Close will send the message to the manager. 160 | func (t *traceHandle) Close() { 161 | req := &traceCloseRequest{ 162 | handle: t, 163 | doneCh: make(chan struct{}), 164 | } 165 | select { 166 | case <-t.ctx.Done(): 167 | return 168 | case t.closeCh <- req: 169 | } 170 | select { 171 | case <-t.ctx.Done(): 172 | return 173 | case <-req.doneCh: 174 | } 175 | } 176 | 177 | // conditionUpdateRequest is the request to update condition 178 | // of the current trace handle. 179 | type conditionUpdateRequest struct { 180 | handle *traceHandle 181 | err error 182 | condition string 183 | doneCh chan struct{} 184 | } 185 | 186 | // SetCondition will dispatch the condition to manager 187 | // and waits for its result. 188 | func (t *traceHandle) SetCondition(condition string) error { 189 | req := &conditionUpdateRequest{ 190 | handle: t, 191 | condition: condition, 192 | doneCh: make(chan struct{}), 193 | } 194 | select { 195 | case <-t.ctx.Done(): 196 | return t.ctx.Err() 197 | case t.conditionCh <- req: 198 | <-req.doneCh 199 | return req.err 200 | } 201 | } 202 | 203 | // evaluateCondition evaluates the condition string for 204 | // specified two conditions. 205 | func evaluateCondition(left, right string) string { 206 | switch { 207 | case left == "" && right != "": 208 | return right 209 | case left != "" && right != "": 210 | return fmt.Sprintf("(%s) && (%s)", left, right) 211 | case left != "" && right == "": 212 | return left 213 | default: 214 | return "0" 215 | } 216 | } 217 | 218 | // updateCondition is the real function to set condition. 219 | func (t *traceHandle) updateCondition( 220 | root, namespace, condition string, 221 | ) error { 222 | // /instances//events///filter. 223 | target := filepath.Join( 224 | root, "instances", namespace, "events", 225 | namespace, t.getProbeName(), "filter") 226 | 227 | // Evaluate the old condition so it could be recovered 228 | // if there's error encountered. 229 | oldCondition := evaluateCondition( 230 | t.desc.initialCondition, t.condition) 231 | defer func() { 232 | if t.condition != condition { 233 | // XXX: attempt to rollback to previous condition, 234 | // and will disable the probe if it cannot be 235 | // actually reverted. 236 | err := ioutil.WriteFile(target, 237 | []byte(oldCondition), os.FileMode(0600)) 238 | if err != nil { 239 | enableFilePath := t.getEnableFilePath( 240 | root, namespace) 241 | _ = ioutil.WriteFile(enableFilePath, 242 | []byte("0"), os.FileMode(0600)) 243 | } 244 | } 245 | }() 246 | 247 | // Evaluate the new condition and update it. 248 | newCondition := evaluateCondition( 249 | t.desc.initialCondition, condition) 250 | err := ioutil.WriteFile(target, 251 | []byte(newCondition), os.FileMode(0600)) 252 | if err != nil { 253 | // Report error directly if it is not EINVAL. 254 | pathErr, ok := err.(*os.PathError) 255 | if !ok { 256 | return err 257 | } 258 | if pathErr.Err != syscall.EINVAL { 259 | return err 260 | } 261 | 262 | // Attempt to fetch and report the error cause. 263 | cause, readErr := ioutil.ReadFile(target) 264 | if readErr != nil { 265 | return err 266 | } 267 | return errors.Errorf( 268 | "filter expression %q syntax error: %s", 269 | newCondition, string(cause)) 270 | } 271 | t.condition = condition 272 | return nil 273 | } 274 | 275 | // init attempt to initialize a specific probe, this 276 | // must be done after the fields inside the trace handle 277 | // have already been initialized. 278 | func (t *traceHandle) init( 279 | root, namespace, tracepoint string, 280 | ) error { 281 | var err error 282 | var probeCreated bool 283 | 284 | // Determine the type prefix of the probe. 285 | var prefix string 286 | switch t.desc.meta { 287 | case typeProbeEvent: 288 | prefix = "p" 289 | case typeReturnEvent: 290 | prefix = "r" 291 | default: 292 | return errors.Errorf( 293 | "type %s is not supported", t.desc.meta) 294 | } 295 | 296 | // Evaluate the probe name and insertion statement. 297 | probeName := t.getProbeName() 298 | probeHeader := fmt.Sprintf("%s:%s/%s %s", 299 | prefix, namespace, probeName, tracepoint) 300 | probeExpr := probeHeader + " " + t.desc.format() 301 | 302 | // Open and write the tracepoint into manifest. 303 | fd, err := syscall.Open(filepath.Join(root, t.typ), 304 | syscall.O_WRONLY|syscall.O_APPEND, 0600) 305 | if err != nil { 306 | return err 307 | } 308 | defer func() { _ = syscall.Close(fd) }() 309 | if _, err = syscall.Write(fd, []byte(probeHeader)); err != nil { 310 | if err == syscall.EINVAL || err == syscall.ENOENT { 311 | return ErrBadTracePoint 312 | } 313 | return err 314 | } 315 | if err = removeProbe( 316 | root, t.typ, namespace, probeName); err != nil { 317 | return err 318 | } 319 | if _, err = syscall.Write(fd, []byte(probeExpr)); err != nil { 320 | if err == syscall.EINVAL { 321 | return errors.Errorf( 322 | "probe expression %q syntax error", probeExpr) 323 | } 324 | return err 325 | } 326 | defer func() { 327 | // Remove the tracepoint from the tracefs. 328 | if !probeCreated { 329 | _ = removeProbe(root, t.typ, namespace, probeName) 330 | } 331 | }() 332 | 333 | // Set the initial condition of the probe. 334 | if err = t.updateCondition(root, namespace, ""); err != nil { 335 | return err 336 | } 337 | 338 | // Attempt to enable the probe, this should reveals some 339 | // problem when the specified trace point is actually 340 | // invalid, especially for those in uprobe. 341 | enableFilePath := t.getEnableFilePath(root, namespace) 342 | if err = ioutil.WriteFile(enableFilePath, 343 | []byte("1"), os.FileMode(0600)); err != nil { 344 | return err 345 | } 346 | if err = ioutil.WriteFile(enableFilePath, 347 | []byte("0"), os.FileMode(0600)); err != nil { 348 | return err 349 | } 350 | probeCreated = true 351 | return nil 352 | } 353 | 354 | // destroy will attempt to remove the single probe. 355 | func (t *traceHandle) destroy(root, namespace string) { 356 | _ = removeProbe(root, t.typ, namespace, t.getProbeName()) 357 | t.id = 0 358 | } 359 | 360 | // parseEventHandler is the common code to parse and 361 | // compile the event handler. 362 | func parseEventHandler( 363 | handler interface{}, 364 | ) (*traceEventDescriptor, error) { 365 | handlerType := reflect.TypeOf(handler) 366 | if kind := handlerType.Kind(); kind != reflect.Func { 367 | return nil, errors.Wrapf( 368 | errors.Errorf("invalid kind %s", kind), 369 | "parse event handler") 370 | } 371 | if handlerType.NumIn() != 1 { 372 | return nil, errors.Wrapf( 373 | errors.Errorf("invalid input amount"), 374 | "parse event handler") 375 | } 376 | typ := handlerType.In(0) 377 | desc, err := compileTraceEvent(typ) 378 | if err != nil { 379 | return nil, errors.Wrapf(err, "parse event") 380 | } 381 | return desc, nil 382 | } 383 | 384 | // TraceKProbe will register a kprobe event. 385 | func (mgr *traceManager) TraceKProbe( 386 | location string, handler interface{}, 387 | ) (Trace, <-chan struct{}, error) { 388 | desc, err := parseEventHandler(handler) 389 | if err != nil { 390 | return nil, nil, err 391 | } 392 | return mgr.createTrace("kprobe_events", 393 | location, handler, desc) 394 | } 395 | 396 | // TraceUProbe will register a uprobe event. 397 | func (mgr *traceManager) TraceUProbe( 398 | library, location string, handler interface{}, 399 | ) (Trace, <-chan struct{}, error) { 400 | desc, err := parseEventHandler(handler) 401 | if err != nil { 402 | return nil, nil, err 403 | } 404 | return mgr.createTrace("uprobe_events", 405 | fmt.Sprintf("%s:%s", library, location), 406 | handler, desc) 407 | } 408 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2022 Chaitin Tech 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /inode/inode.go: -------------------------------------------------------------------------------- 1 | // Package inode provides service for uniquely pinning and 2 | // addressing an inode for path. 3 | package inode 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "os" 9 | "path/filepath" 10 | "runtime" 11 | "strconv" 12 | "strings" 13 | "sync" 14 | "syscall" 15 | "time" 16 | 17 | "github.com/aegistudio/shaft" 18 | "github.com/pkg/errors" 19 | "golang.org/x/sync/errgroup" 20 | 21 | "github.com/chaitin/systracer" 22 | "github.com/chaitin/systracer/pkg/alloc" 23 | "github.com/chaitin/systracer/pkg/kversion" 24 | ) 25 | 26 | // inodePinResult is the response captured corresponding to 27 | // each inode pinning request. 28 | type inodePinResult struct { 29 | inode, cookie uint64 30 | } 31 | 32 | // collector is the collector for receiving the reaction of 33 | // the inode pinning, and send it back to the master thread. 34 | type collector struct { 35 | rootCtx context.Context 36 | resultCh chan<- inodePinResult 37 | } 38 | 39 | // handleInodePin is the handler for inode pinning captured 40 | // by security_inode_getsecurity. We filter out only the 41 | // "security.systracer.inode_pin.*". 42 | func (col *collector) handleInodePin( 43 | name string, inode uint64, 44 | ) { 45 | prefix := "systracer.inode_pin." 46 | if !strings.HasPrefix(name, prefix) { 47 | return 48 | } 49 | cookie, err := strconv.ParseUint( 50 | name[len(prefix):], 16, 64) 51 | if err != nil { 52 | return 53 | } 54 | result := inodePinResult{ 55 | inode: inode, 56 | cookie: cookie, 57 | } 58 | select { 59 | case <-col.rootCtx.Done(): 60 | case col.resultCh <- result: 61 | } 62 | } 63 | 64 | // handleSecurityInodePin handles the inode pinning event 65 | // from version 2.6.24 (inclusive) to 5.12 (exclusive). 66 | // 67 | // security_inode_getsecurity( 68 | // Inode, "systracer.inode_pin.${hex Cookie}") 69 | func (col *collector) handleSecurityInodePin_V2_6_24( 70 | event entrySecurityInodePin_V2_6_24, 71 | ) { 72 | col.handleInodePin(event.Name, event.Inode) 73 | } 74 | 75 | // handleSecurityInodePin handles the inode pinning event 76 | // from 5.12 (inclusive) to now. 77 | // 78 | // security_inode_getsecurity( 79 | // MountNS, Inode, "systracer.inode_pin.${hex Cookie}") 80 | func (col *collector) handleSecurityInodePin_V5_12( 81 | event entrySecurityInodePin_V5_12, 82 | ) { 83 | col.handleInodePin(event.Name, event.Inode) 84 | } 85 | 86 | // inodePin is the state of deduplicated inode which holds 87 | // strong reference to the open inode to keep the validity 88 | // of the addressing result. 89 | type inodePin struct { 90 | id uint64 91 | name string 92 | inode uint64 93 | file *os.File 94 | ref uint64 95 | doneCh chan struct{} 96 | } 97 | 98 | // Inode is the actually pinned inode. 99 | type Inode struct { 100 | manager *Manager 101 | inner *inodePin 102 | once sync.Once 103 | } 104 | 105 | // Inode returns the address of the pinned inode. 106 | func (inode *Inode) Inode() uint64 { 107 | return inode.inner.inode 108 | } 109 | 110 | // Unpin removes the strong reference held by caller. 111 | func (inode *Inode) Unpin() { 112 | inode.once.Do(func() { 113 | inode.manager.unpin(inode.inner) 114 | }) 115 | runtime.SetFinalizer(inode, nil) 116 | } 117 | 118 | // inodePinRequest for performing inode pinning. 119 | type inodePinRequest struct { 120 | name string 121 | mode int 122 | doneCh chan struct{} 123 | result *inodePin 124 | err error 125 | } 126 | 127 | // Manager for performing and managing inode pins. 128 | type Manager struct { 129 | rootCtx context.Context 130 | pinCh chan *inodePinRequest 131 | unpinCh chan *inodePin 132 | } 133 | 134 | // pin requests for requesting and opening an inode pin. 135 | func (m *Manager) pin(name string, mode int) (*Inode, error) { 136 | abs, err := filepath.Abs(name) 137 | if err != nil { 138 | return nil, err 139 | } 140 | req := &inodePinRequest{ 141 | name: abs, 142 | mode: mode, 143 | doneCh: make(chan struct{}), 144 | } 145 | select { 146 | case <-m.rootCtx.Done(): 147 | return nil, m.rootCtx.Err() 148 | case m.pinCh <- req: 149 | } 150 | select { 151 | case <-m.rootCtx.Done(): 152 | return nil, m.rootCtx.Err() 153 | case <-req.doneCh: 154 | } 155 | if req.err != nil { 156 | return nil, req.err 157 | } 158 | select { 159 | case <-m.rootCtx.Done(): 160 | return nil, m.rootCtx.Err() 161 | case <-req.result.doneCh: 162 | } 163 | result := &Inode{ 164 | inner: req.result, 165 | manager: m, 166 | } 167 | runtime.SetFinalizer(result, func(value *Inode) { 168 | value.Unpin() 169 | }) 170 | return result, nil 171 | } 172 | 173 | // PinFile is the request for pinning single file. 174 | func (m *Manager) PinFile(name string) (*Inode, error) { 175 | return m.pin(name, syscall.O_RDONLY|syscall.O_CLOEXEC) 176 | } 177 | 178 | // PinDir is the request for pinning single dir. 179 | func (m *Manager) PinDir(name string) (*Inode, error) { 180 | return m.pin(name, 181 | syscall.O_RDONLY|syscall.O_DIRECTORY|syscall.O_CLOEXEC) 182 | } 183 | 184 | // unpin is the request for closing an inode pin. 185 | func (m *Manager) unpin(p *inodePin) { 186 | select { 187 | case <-m.rootCtx.Done(): 188 | case m.unpinCh <- p: 189 | } 190 | } 191 | 192 | // managerState is the state triggered by 193 | // either pin completion event, retest timers and 194 | // registrations/unregistrations. 195 | type managerState struct { 196 | last uint64 197 | cookieBase uint64 198 | 199 | all map[uint64]*inodePin 200 | names map[string]*inodePin 201 | cookies map[uint64]*inodePin 202 | } 203 | 204 | // close destroys all allocated instances in the state. 205 | func (s *managerState) close() { 206 | for _, pin := range s.all { 207 | _ = pin.file.Close() 208 | pin.id = 0 209 | } 210 | } 211 | 212 | // performInodePin executes the actual inode pinning with 213 | // our specified fd and cookie. 214 | func performInodePin(fd uintptr, cookie uint64) { 215 | filename := fmt.Sprintf("/proc/self/fd/%d", fd) 216 | attribute := fmt.Sprintf( 217 | "security.systracer.inode_pin.%x", cookie) 218 | var buf [1024]byte 219 | _, _ = syscall.Getxattr(filename, attribute, buf[:]) 220 | } 221 | 222 | // pin attempts to allocate and create a pin in the state. 223 | func (s *managerState) pin( 224 | name string, flag int, 225 | ) (rpin *inodePin, rerr error) { 226 | // Attempt to open the specified file for later use, 227 | // please notice that the file might be swapped for 228 | // later use and will not close then. 229 | fd, err := syscall.Open(name, flag, 0) 230 | if err != nil { 231 | return nil, err 232 | } 233 | f := os.NewFile(uintptr(fd), name) 234 | defer func() { 235 | if f != nil { 236 | _ = f.Close() 237 | } 238 | }() 239 | 240 | // If there's previous node for the file, attempt to 241 | // allocate specified node for the file. 242 | if previous, ok := s.names[name]; ok { 243 | // Retrieve previous and current file information. 244 | newInfo, err := f.Stat() 245 | if err != nil { 246 | return nil, err 247 | } 248 | newStat := newInfo.Sys().(*syscall.Stat_t) 249 | oldInfo, err := previous.file.Stat() 250 | if err != nil { 251 | return nil, err 252 | } 253 | oldStat := oldInfo.Sys().(*syscall.Stat_t) 254 | 255 | // Compare the information and return the previous 256 | // one if they are the same. 257 | if newStat.Dev == oldStat.Dev && 258 | newStat.Ino == oldStat.Ino && 259 | newStat.Rdev == oldStat.Rdev { 260 | previous.ref++ 261 | return previous, nil 262 | } 263 | } 264 | 265 | // Attempt to allocate a new node for the subscription. 266 | id := alloc.Alloc(s.last, uint64(1<<48), func(id uint64) bool { 267 | return s.all[id] != nil 268 | }) 269 | if id == 0 { 270 | return nil, errors.New( 271 | "cannot allocate more inode pin") 272 | } 273 | created := &inodePin{ 274 | id: id, 275 | name: name, 276 | file: f, 277 | ref: 1, 278 | doneCh: make(chan struct{}), 279 | } 280 | s.all[id] = created 281 | s.last = id 282 | s.names[name] = created 283 | f = nil 284 | 285 | // Mark the file and create a new cookie here. 286 | s.cookieBase++ 287 | cookie := s.cookieBase 288 | s.cookies[cookie] = created 289 | performInodePin(created.file.Fd(), cookie) 290 | return created, nil 291 | } 292 | 293 | // unpin attempts to decrement reference and potentially 294 | // remove a pin from the state. 295 | func (s *managerState) unpin(p *inodePin) { 296 | if p.id == 0 { 297 | return 298 | } 299 | p.ref-- 300 | if p.ref > 0 { 301 | return 302 | } 303 | if s.names[p.name] == p { 304 | delete(s.names, p.name) 305 | } 306 | delete(s.all, p.id) 307 | if p.file != nil { 308 | _ = p.file.Close() 309 | p.file = nil 310 | } 311 | p.id = 0 312 | } 313 | 314 | // reallocateCookie will attempt to reset current cookies. 315 | func (s *managerState) reallocateCookie() { 316 | newCookies := make(map[uint64]*inodePin) 317 | for _, target := range s.cookies { 318 | if target.id == 0 { 319 | continue 320 | } 321 | s.cookieBase++ 322 | cookie := s.cookieBase 323 | newCookies[cookie] = target 324 | performInodePin(target.file.Fd(), cookie) 325 | } 326 | s.cookies = newCookies 327 | } 328 | 329 | // handleResult handles the inode pin result. 330 | func (s *managerState) handleResult( 331 | event inodePinResult, 332 | ) { 333 | target := s.cookies[event.cookie] 334 | if target == nil { 335 | return 336 | } 337 | delete(s.cookies, event.cookie) 338 | target.inode = event.inode 339 | close(target.doneCh) 340 | } 341 | 342 | // hasPending see whether there's pending pind request. 343 | func (s *managerState) hasPending() bool { 344 | return len(s.cookies) != 0 345 | } 346 | 347 | // runMasterThread executes the master thread. 348 | func (m *Manager) runMasterThread( 349 | resultCh <-chan inodePinResult, 350 | ) { 351 | var ticker *time.Ticker 352 | defer func() { 353 | if ticker != nil { 354 | ticker.Stop() 355 | } 356 | }() 357 | state := &managerState{ 358 | all: make(map[uint64]*inodePin), 359 | names: make(map[string]*inodePin), 360 | cookies: make(map[uint64]*inodePin), 361 | } 362 | defer state.close() 363 | for { 364 | var tickCh <-chan time.Time 365 | if ticker != nil { 366 | tickCh = ticker.C 367 | } 368 | 369 | // Serve user request, inode pin event and 370 | // reallocate tick within select. 371 | select { 372 | case <-m.rootCtx.Done(): 373 | return 374 | case event := <-resultCh: 375 | state.handleResult(event) 376 | case req := <-m.pinCh: 377 | func() { 378 | defer close(req.doneCh) 379 | req.result, req.err = state.pin( 380 | req.name, req.mode) 381 | }() 382 | case req := <-m.unpinCh: 383 | state.unpin(req) 384 | case <-tickCh: 385 | state.reallocateCookie() 386 | } 387 | 388 | // Setup or shutdown current reallocate ticker. 389 | if state.hasPending() { 390 | if ticker == nil { 391 | ticker = time.NewTicker(5 * time.Second) 392 | } 393 | } else { 394 | if ticker != nil { 395 | ticker.Stop() 396 | ticker = nil 397 | } 398 | } 399 | } 400 | } 401 | 402 | // stackInodeManager will attempt to create an inode pin 403 | // manager and stack it for later operations. 404 | func stackInodeManager( 405 | next func(*Manager) error, 406 | rootCtx context.Context, group *errgroup.Group, 407 | manager systracer.Manager, 408 | ) error { 409 | // Setup the collector for receiving events. 410 | resultCh := make(chan inodePinResult) 411 | collector := &collector{ 412 | rootCtx: rootCtx, 413 | resultCh: resultCh, 414 | } 415 | 416 | // Attach to the security_inode_getsecurity 417 | // for receiving file hook result. 418 | var target interface{} 419 | target = collector.handleSecurityInodePin_V2_6_24 420 | if kversion.Current >= kversion.Must("5.12") { 421 | target = collector.handleSecurityInodePin_V5_12 422 | } 423 | inodePinProbe, syncCh, err := manager.TraceKProbe( 424 | "security_inode_getsecurity", target) 425 | if err != nil { 426 | return err 427 | } 428 | defer inodePinProbe.Close() 429 | 430 | // Wait for the completion of probe creation. 431 | select { 432 | case <-rootCtx.Done(): 433 | return nil 434 | case <-syncCh: 435 | } 436 | 437 | // Startup the inode pin master thread. 438 | result := &Manager{ 439 | rootCtx: rootCtx, 440 | pinCh: make(chan *inodePinRequest), 441 | unpinCh: make(chan *inodePin), 442 | } 443 | group.Go(func() error { 444 | inodePinProbe.SetEnabled(true) 445 | result.runMasterThread(resultCh) 446 | return nil 447 | }) 448 | return next(result) 449 | } 450 | 451 | // Module is the DI module of the inode manager. 452 | // 453 | // The module requires a context, an errgroup and a trace 454 | // manager, and injects an inode pin manager. 455 | var Module = shaft.Stack(stackInodeManager) 456 | -------------------------------------------------------------------------------- /compile.go: -------------------------------------------------------------------------------- 1 | package systracer 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | "reflect" 8 | "strconv" 9 | "strings" 10 | "unsafe" 11 | 12 | "github.com/pkg/errors" 13 | ) 14 | 15 | // traceEventField contains information about what to do 16 | // with the field comes at first. 17 | type traceEventField interface { 18 | // format returns the format string that could be 19 | // set as the fetch expression. 20 | // 21 | // It is usually in the form of =:, 22 | // and some field might be composite and contains 23 | // multiple of such form. 24 | format() string 25 | 26 | // fill will attempt to parse the input string and 27 | // fill information into the struct. 28 | fill(input []byte, data uintptr) (int, error) 29 | } 30 | 31 | // bytesFault is the sequence of fault bytes. 32 | var bytesFault = []byte("(fault)") 33 | 34 | // bytesHex is the sequence of hexdecimal number. 35 | var bytesHex = []byte("0x") 36 | 37 | // traceFillInteger will attempt to parse the 38 | // content from the number field and later place it 39 | // inside the specified address. 40 | func traceFillInteger( 41 | addr uintptr, kind reflect.Kind, 42 | bigEndian bool, number []byte, 43 | ) (step int, err error) { 44 | // If fault is encountered, the string will 45 | // shift forward for the size of fault and 46 | // left the remained fields unchanged. 47 | if bytes.HasPrefix(number, bytesFault) { 48 | return len(bytesFault), nil 49 | } 50 | 51 | // Attempt to parse it as string. 52 | base := 10 53 | offset := 0 54 | negative := false 55 | if bytes.HasPrefix(number, bytesHex) { 56 | base = 16 57 | offset = len(bytesHex) 58 | } else if len(number) > 0 && number[0] == '-' { 59 | offset = 1 60 | negative = true 61 | } 62 | 63 | // Seek for the next space or end of line. 64 | step = offset 65 | for step < len(number) && 66 | number[step] != ' ' && number[step] != '\n' { 67 | step++ 68 | } 69 | 70 | // Attempt to parse the number and return. 71 | var v uint64 72 | v, err = strconv.ParseUint( 73 | string(number[offset:step]), base, 64) 74 | if err != nil { 75 | return 76 | } 77 | 78 | // Transform byte order if mismatched order. 79 | if bigEndian { 80 | switch kind { 81 | case reflect.Uint16, reflect.Int16: 82 | var buf [2]byte 83 | *(*uint16)((unsafe.Pointer)(&buf[0])) = uint16(v) 84 | v = uint64(binary.BigEndian.Uint16(buf[:])) 85 | case reflect.Uint32, reflect.Int32: 86 | var buf [4]byte 87 | *(*uint32)((unsafe.Pointer)(&buf[0])) = uint32(v) 88 | v = uint64(binary.BigEndian.Uint32(buf[:])) 89 | case reflect.Uint64, reflect.Int64: 90 | var buf [8]byte 91 | *(*uint64)((unsafe.Pointer)(&buf[0])) = uint64(v) 92 | v = uint64(binary.BigEndian.Uint64(buf[:])) 93 | } 94 | } 95 | 96 | // Negate the number if it is negative. 97 | if negative { 98 | v = uint64(-int64(v)) 99 | } 100 | 101 | // Switch the concrete type of integer and fill. 102 | switch kind { 103 | case reflect.Uint8: 104 | *(*uint8)((unsafe.Pointer)(addr)) = uint8(v) 105 | case reflect.Int8: 106 | *(*int8)((unsafe.Pointer)(addr)) = int8(v) 107 | case reflect.Uint16: 108 | *(*uint16)((unsafe.Pointer)(addr)) = uint16(v) 109 | case reflect.Int16: 110 | *(*int16)((unsafe.Pointer)(addr)) = int16(v) 111 | case reflect.Uint32: 112 | *(*uint32)((unsafe.Pointer)(addr)) = uint32(v) 113 | case reflect.Int32: 114 | *(*int32)((unsafe.Pointer)(addr)) = int32(v) 115 | case reflect.Uint64: 116 | *(*uint64)((unsafe.Pointer)(addr)) = uint64(v) 117 | case reflect.Int64: 118 | *(*int64)((unsafe.Pointer)(addr)) = int64(v) 119 | } 120 | return 121 | } 122 | 123 | // traceIntegerField is a field corresponding to integer. 124 | type traceIntegerField struct { 125 | name string 126 | offset uintptr 127 | fetch string 128 | kind reflect.Kind 129 | typename string 130 | bigEndian bool 131 | } 132 | 133 | // format returns the format for the field. 134 | func (f traceIntegerField) format() string { 135 | return fmt.Sprintf("%s=%s%s", 136 | f.name, f.fetch, f.typename) 137 | } 138 | 139 | // fill parsed integer data and move forward. 140 | func (f traceIntegerField) fill( 141 | input []byte, data uintptr, 142 | ) (forward int, err error) { 143 | bytesName := []byte(f.name + "=") 144 | if !bytes.HasPrefix(input, bytesName) { 145 | return 0, errors.Errorf( 146 | "expect integer field start token %q", f.name) 147 | } 148 | forward, err = traceFillInteger(data+f.offset, 149 | f.kind, f.bigEndian, input[len(bytesName):]) 150 | forward += len(bytesName) 151 | return 152 | } 153 | 154 | // traceStringField is a field corresponding to string. 155 | // 156 | // XXX: despite outputing double quote symbols, all string 157 | // variables written from kernel is not quoted and written 158 | // to buffer directly. And this behaviour is even not 159 | // fixed by kernel yet (>=5.0). 160 | // 161 | // To work around, we enforce the kernel to output the 162 | // canary to identify the end of string. The canary is 163 | // either default to the string address or manually 164 | // specified, and must be hard to detect. 165 | type traceStringField struct { 166 | name string 167 | offset uintptr 168 | fetch string 169 | canary string 170 | 171 | isStringAddr bool 172 | } 173 | 174 | // format returns the format for the field. 175 | func (f traceStringField) format() string { 176 | return fmt.Sprintf( 177 | "%sStart=%s:u64 %s=+0(%s):string %sEnd=%s:u64", 178 | f.name, f.canary, f.name, f.fetch, f.name, f.canary) 179 | } 180 | 181 | // fill parsed string data and move forward. 182 | func (f traceStringField) fill( 183 | input []byte, addr uintptr, 184 | ) (forward int, err error) { 185 | pointer := (unsafe.Pointer)(addr + f.offset) 186 | 187 | // Extract the string start token canaries. 188 | bytesStart := []byte(f.name + "Start=") 189 | if !bytes.HasPrefix(input, bytesStart) { 190 | return 0, errors.Errorf( 191 | "expect string field start token %q", f.name) 192 | } 193 | var address uint64 194 | addressStep, err := traceFillInteger( 195 | uintptr(unsafe.Pointer(&address)), 196 | reflect.Uint64, false, input[len(bytesStart):]) 197 | if err != nil { 198 | return 0, err 199 | } 200 | if f.isStringAddr { 201 | (*StringAddr)(pointer).Addr = address 202 | } 203 | 204 | // Construct the string end token canaries. 205 | lenFirstPortion := len(bytesStart) + addressStep 206 | bytesEnd := input[len(bytesStart):lenFirstPortion] 207 | bytesEnd = []byte(" " + f.name + "End=" + string(bytesEnd)) 208 | 209 | // Attempt to find the enclosing part of the 210 | // string in the input. 211 | addressEnd := bytes.Index(input[lenFirstPortion:], bytesEnd) 212 | if addressEnd < 0 { 213 | return 0, errors.Errorf( 214 | "expect string field end token %q", f.name) 215 | } 216 | forward = lenFirstPortion + addressEnd + len(bytesEnd) 217 | 218 | // Construct and parse the string. 219 | bytesString := input[lenFirstPortion+1 : lenFirstPortion+addressEnd] 220 | bytesMiddle := []byte(f.name + "=") 221 | if !bytes.HasPrefix(bytesString, bytesMiddle) { 222 | err = errors.Errorf( 223 | "expect string field middle token %q", f.name) 224 | return 225 | } 226 | bytesString = bytesString[len(bytesMiddle):] 227 | if bytes.Equal(bytesString, bytesFault) { 228 | return 229 | } 230 | bytesString = bytesString[1 : len(bytesString)-1] 231 | if f.isStringAddr { 232 | (*StringAddr)(pointer).String = string(bytesString) 233 | } else { 234 | *(*string)(pointer) = string(bytesString) 235 | } 236 | return 237 | } 238 | 239 | // traceEventDescriptor describes the way to process event. 240 | // 241 | // The first field will always be untagged and must be one of 242 | // the offspring of tracing.Event (e.g. tracing.ProbeEvent and 243 | // tracing.ReturnEvent). The first field determines how will 244 | // the events be registered and processed. 245 | type traceEventDescriptor struct { 246 | typ reflect.Type 247 | meta reflect.Type 248 | fields []traceEventField 249 | 250 | initialCondition string 251 | } 252 | 253 | // format returns the event field format concatenated. 254 | func (efd traceEventDescriptor) format() string { 255 | var formats []string 256 | for _, field := range efd.fields { 257 | formats = append(formats, field.format()) 258 | } 259 | return strings.Join(formats, " ") 260 | } 261 | 262 | // mapIntegerName is the map from the kind to the name. 263 | var mapIntegerName = map[reflect.Kind]string{ 264 | reflect.Uint8: ":u8", 265 | reflect.Int8: ":s8", 266 | reflect.Uint16: ":u16", 267 | reflect.Int16: ":s16", 268 | reflect.Uint32: ":u32", 269 | reflect.Int32: ":s32", 270 | reflect.Uint64: ":u64", 271 | reflect.Int64: ":s64", 272 | } 273 | 274 | // mapIntegerSize is the map from the kind to the size. 275 | var mapIntegerSize = map[reflect.Kind]uint64{ 276 | reflect.Uint8: 8, 277 | reflect.Int8: 8, 278 | reflect.Uint16: 16, 279 | reflect.Int16: 16, 280 | reflect.Uint32: 32, 281 | reflect.Int32: 32, 282 | reflect.Uint64: 64, 283 | reflect.Int64: 64, 284 | } 285 | 286 | // compileTraceEvent will attempt to parse the fields and 287 | // convert the event specified by type into the event 288 | // descriptor. 289 | func compileTraceEvent( 290 | typ reflect.Type, 291 | ) (*traceEventDescriptor, error) { 292 | result := &traceEventDescriptor{ 293 | typ: typ, 294 | } 295 | 296 | // Ensure that the specified type should be struct. 297 | if kind := typ.Kind(); kind != reflect.Struct { 298 | return nil, errors.Errorf("invalid kind %q", kind) 299 | } 300 | 301 | // Detect and collect first field. 302 | if typ.NumField() == 0 { 303 | return nil, errors.New("empty struct") 304 | } 305 | firstField := typ.Field(0) 306 | if !firstField.Anonymous { 307 | return nil, errors.New("first field must be anonymous") 308 | } 309 | result.meta = firstField.Type 310 | switch result.meta { 311 | case typeProbeEvent: 312 | case typeReturnEvent: 313 | default: 314 | return nil, errors.Errorf( 315 | "type %s cannot be first field", result.meta) 316 | } 317 | 318 | // Perform conversion of each field recursively. 319 | var stackTyp []reflect.Type 320 | var stackIndex []int 321 | var stackOffset []uintptr 322 | var stackNames []string 323 | var stackArgs [][]string 324 | var conds []string 325 | stackTyp = append(stackTyp, typ) 326 | stackIndex = append(stackIndex, 1) 327 | stackOffset = append(stackOffset, 0) 328 | stackNames = append(stackNames, "") 329 | stackArgs = append(stackArgs, nil) 330 | for len(stackTyp) > 0 { 331 | // Fetch current field for parsing. 332 | currentTyp := stackTyp[len(stackTyp)-1] 333 | currentIndex := stackIndex[len(stackIndex)-1] 334 | if currentTyp.NumField() <= currentIndex { 335 | stackTyp = stackTyp[:len(stackTyp)-1] 336 | stackIndex = stackIndex[:len(stackIndex)-1] 337 | stackOffset = stackOffset[:len(stackOffset)-1] 338 | stackArgs = stackArgs[:len(stackArgs)-1] 339 | if len(stackIndex) > 0 { 340 | stackIndex[len(stackIndex)-1]++ 341 | } 342 | continue 343 | } 344 | currentField := typ.FieldByIndex(stackIndex) 345 | currentKind := currentField.Type.Kind() 346 | fieldOffset := stackOffset[len(stackOffset)-1] + 347 | currentField.Offset 348 | tag := currentField.Tag.Get("tracing") 349 | 350 | // Apply alternations to the tracing tag with 351 | // arguments specified on stack. 352 | currentPrefix := strings.Join(stackNames, "_") 353 | tag = strings.ReplaceAll(tag, "{0}", currentPrefix) 354 | for i, value := range stackArgs[len(stackArgs)-1] { 355 | tag = strings.ReplaceAll(tag, 356 | fmt.Sprintf("{%d}", i+1), value) 357 | } 358 | 359 | // Specially processing for the condition field, 360 | // which is special embedding of condition. 361 | if currentField.Type == typeCondition { 362 | if tag != "" { 363 | conds = append(conds, tag) 364 | } 365 | stackIndex[len(stackIndex)-1]++ 366 | continue 367 | } 368 | args := strings.Split(tag, ",") 369 | 370 | // Specially processing for the struct kind, 371 | // which is considered as embedding. 372 | if currentKind == reflect.Struct && 373 | currentField.Type != typeStringAddr { 374 | stackTyp = append(stackTyp, currentField.Type) 375 | stackIndex = append(stackIndex, 0) 376 | stackOffset = append(stackOffset, fieldOffset) 377 | stackNames = append(stackNames, currentField.Name) 378 | stackArgs = append(stackArgs, args) 379 | continue 380 | } 381 | 382 | // Analyze and prepare current parameter. 383 | // 384 | // The first two parameters must always be 385 | // fetcher and condition (optional). Callers 386 | // could add more conditions after that. 387 | if tag == "" { 388 | stackIndex[len(stackIndex)-1]++ 389 | continue 390 | } 391 | fetch := args[0] 392 | if len(args) > 1 && args[1] != "" { 393 | conds = append(conds, args[1]) 394 | } 395 | 396 | // Evaluate current name and offset. 397 | if currentField.Anonymous { 398 | return nil, errors.New( 399 | "cannot embed non-struct field") 400 | } 401 | fieldName := currentPrefix + currentField.Name 402 | 403 | // Fallback tracing.StringAddr to string kind. 404 | isStringAddr := false 405 | if currentField.Type == typeStringAddr { 406 | currentKind = reflect.String 407 | isStringAddr = true 408 | } 409 | 410 | // Find out the kind of the current field. 411 | switch currentKind { 412 | case reflect.Uint8, reflect.Int8, 413 | reflect.Uint16, reflect.Int16, 414 | reflect.Uint32, reflect.Int32, 415 | reflect.Uint64, reflect.Int64: 416 | var bigEndian bool 417 | typename := mapIntegerName[currentKind] 418 | for i := 2; i < len(args); i++ { 419 | arg := args[i] 420 | switch { 421 | case arg == "": 422 | case arg == "bigendian": 423 | bigEndian = true 424 | case strings.HasPrefix(arg, "bit[") && 425 | strings.HasSuffix(arg, "]"): 426 | size := mapIntegerSize[currentKind] 427 | start := arg[len("bit[") : len(arg)-1] 428 | end := start 429 | if col := strings.Index(start, ":"); col >= 0 { 430 | start = start[0:col] 431 | end = end[col+1:] 432 | } 433 | vstart, err := strconv.ParseUint(start, 10, 64) 434 | if err != nil { 435 | return nil, errors.Errorf( 436 | "malformed start %q: %s", arg, err) 437 | } 438 | vend, err := strconv.ParseUint(end, 10, 64) 439 | if err != nil { 440 | return nil, errors.Errorf( 441 | "malformed end %q: %s", arg, err) 442 | } 443 | if vstart > vend || vend >= size { 444 | return nil, errors.Errorf( 445 | `invalid bit range "%d:%d"`, vstart, vend) 446 | } 447 | typename = fmt.Sprintf(":b%d@%d/%d", 448 | vend-vstart+1, vstart, size) 449 | default: 450 | return nil, errors.Errorf( 451 | "unknown modifier %q", arg) 452 | } 453 | } 454 | result.fields = append(result.fields, 455 | &traceIntegerField{ 456 | name: fieldName, 457 | offset: fieldOffset, 458 | fetch: fetch, 459 | kind: currentKind, 460 | typename: typename, 461 | bigEndian: bigEndian, 462 | }) 463 | case reflect.String: 464 | canary := fetch 465 | if len(args) > 2 && args[2] != "" { 466 | canary = fetch 467 | } 468 | result.fields = append(result.fields, 469 | &traceStringField{ 470 | name: fieldName, 471 | offset: fieldOffset, 472 | fetch: fetch, 473 | canary: canary, 474 | isStringAddr: isStringAddr, 475 | }) 476 | default: 477 | return nil, errors.Errorf( 478 | "unacceptible kind %s", currentKind) 479 | } 480 | stackIndex[len(stackIndex)-1]++ 481 | } 482 | 483 | // Evaluate the initial condition for the queries. 484 | if len(conds) == 1 { 485 | result.initialCondition = conds[0] 486 | } else if len(conds) > 1 { 487 | result.initialCondition = "(" + strings.Join( 488 | conds, ") && (") + ")" 489 | } 490 | 491 | return result, nil 492 | } 493 | 494 | // fill will parse the given log and fill the content. 495 | func (efd traceEventDescriptor) fill( 496 | data uintptr, log []byte, 497 | ) (forward int, err error) { 498 | forward = 0 499 | for i, field := range efd.fields { 500 | // Remove spaces encountered. 501 | for log[forward] == ' ' { 502 | forward++ 503 | } 504 | if log[forward] == '\n' { 505 | err = errors.Errorf( 506 | "unexpected truncation in field #%d", i) 507 | return 508 | } 509 | 510 | // Start parsing the field. 511 | var current int 512 | current, err = field.fill(log[forward:], data) 513 | forward += current 514 | if err != nil { 515 | return 516 | } 517 | } 518 | return 519 | } 520 | -------------------------------------------------------------------------------- /rcnotify/rcnotify.go: -------------------------------------------------------------------------------- 1 | package rcnotify 2 | 3 | import ( 4 | "context" 5 | "path/filepath" 6 | "runtime" 7 | "sync" 8 | "time" 9 | 10 | "github.com/aegistudio/shaft" 11 | "github.com/pkg/errors" 12 | 13 | "github.com/chaitin/systracer" 14 | "github.com/chaitin/systracer/inode" 15 | "github.com/chaitin/systracer/pkg/kversion" 16 | ) 17 | 18 | // extractPathComponent is the code for creating a 19 | // valid portion of path component. 20 | func extractPathComponent(src []systracer.StringAddr) []string { 21 | var result []string 22 | for i := 0; i < len(src); i++ { 23 | if src[i].Addr == 0 { 24 | break 25 | } 26 | if i > 0 && src[i].Addr == src[i-1].Addr { 27 | break 28 | } 29 | result = append(result, src[i].String) 30 | } 31 | return result 32 | } 33 | 34 | // Op is the file event op for linux. 35 | // 36 | // The event operations are defined dedicated for linux, 37 | // and some extra information will be filled based on 38 | // different event type. 39 | // 40 | // The operations can be or-ed together to represent 41 | // set of events for notification. 42 | type Op uint64 43 | 44 | const ( 45 | OpCreate = Op(1 << iota) 46 | OpMkdir 47 | OpMknod 48 | OpDelete 49 | OpRmdir 50 | OpRename 51 | OpAttrib 52 | OpLink 53 | OpSymlink 54 | 55 | OpAll = OpCreate | OpMkdir | OpDelete | OpRmdir | 56 | OpRename | OpAttrib | OpLink | OpSymlink 57 | ) 58 | 59 | // Attr indicates valid fields in the attribute event. 60 | // 61 | // These fields are or-ed together to represent the set 62 | // of fields that has been updated by the event. 63 | type Attr uint32 64 | 65 | const ( 66 | AttrMode = Attr(1 << iota) 67 | AttrUID 68 | AttrGID 69 | ) 70 | 71 | // eventRaw is the trasported event on linux waiting 72 | // to be dispatched to master. 73 | // 74 | // Master should translate and lookup using the source 75 | // and target path and translate file events. 76 | type eventRaw struct { 77 | op Op 78 | timestamp time.Time 79 | pid uint32 80 | source path 81 | target path 82 | attr Attr 83 | mode *uint16 84 | dev *uint32 85 | symlink *string 86 | uid *uint32 87 | gid *uint32 88 | } 89 | 90 | // eventRegistry is the common registry holding 91 | // information used for later notification. 92 | type eventRegistry struct { 93 | event eventRaw 94 | targetInode uint64 95 | sourceInode uint64 96 | } 97 | 98 | // Event is standard format of linux directory event. 99 | type Event struct { 100 | Op Op 101 | Timestamp time.Time 102 | PID uint32 103 | Target *string 104 | Source *string 105 | Attr Attr 106 | Mode *uint16 107 | Dev *uint32 108 | Uid *uint32 109 | Gid *uint32 110 | } 111 | 112 | // dispatchPolicy stores information for the dispatcher, 113 | // including the file name corresponding to inode, and 114 | // its related flags. 115 | // 116 | // while dispatching events, the subscriber works in a 117 | // hierarchical manner, the dispatch policy nearer to 118 | // the leaf will be applied first. 119 | type dispatchPolicy struct { 120 | name string 121 | opFlags Op 122 | } 123 | 124 | // subscriber stores the information for dispatching 125 | // the subscribed events to the subscriber. 126 | type subscriber struct { 127 | ctx context.Context 128 | done *uint8 129 | allOpFlags Op 130 | eventCh chan<- Event 131 | policies map[uint64]dispatchPolicy 132 | } 133 | 134 | // composeSuffix composes the suffix with path. 135 | func composeSuffix(components []string) string { 136 | size := len(components) 137 | result := make([]string, size) 138 | for j := 0; j < size; j++ { 139 | result[j] = components[size-j-1] 140 | } 141 | return filepath.Join(result...) 142 | } 143 | 144 | // evaluatePathPolicy attempts to evaluate the path and 145 | // calculate the policies for the path. 146 | func (s *subscriber) evaluatePathPolicy(p path) (*string, Op) { 147 | paths, inodes := p.extract() 148 | for i, inode := range inodes { 149 | if inode == 0 { 150 | continue 151 | } 152 | policy, ok := s.policies[inode] 153 | if !ok { 154 | continue 155 | } 156 | targetSuffix := composeSuffix(paths[:i]) 157 | result := new(string) 158 | *result = filepath.Join(policy.name, targetSuffix) 159 | return result, policy.opFlags 160 | } 161 | return nil, Op(0) 162 | } 163 | 164 | // dispatch is the handler for dispatching event 165 | // to receivers. 166 | func (s *subscriber) dispatch( 167 | rawEvent eventRaw, visited *uint8, 168 | ) { 169 | if s.done == visited { 170 | // The event has already been dispatched, 171 | // so we won't dispatch it again here. 172 | return 173 | } 174 | s.done = visited 175 | if s.allOpFlags&rawEvent.op == 0 { 176 | return 177 | } 178 | 179 | // Prepare the base for the new dispatching of 180 | // specified file event. 181 | var event Event 182 | event.Op = rawEvent.op 183 | event.PID = rawEvent.pid 184 | event.Timestamp = rawEvent.timestamp 185 | event.Attr = rawEvent.attr 186 | event.Mode = rawEvent.mode 187 | event.Dev = rawEvent.dev 188 | event.Uid = rawEvent.uid 189 | event.Gid = rawEvent.gid 190 | event.Source = rawEvent.symlink 191 | 192 | // Compose the path parameters for the event. 193 | var opFlags, allFlags Op 194 | event.Target, allFlags = s.evaluatePathPolicy(rawEvent.target) 195 | switch rawEvent.op { 196 | case OpRename, OpLink: 197 | event.Source, opFlags = s.evaluatePathPolicy(rawEvent.source) 198 | allFlags |= opFlags 199 | case OpSymlink: 200 | event.Source = rawEvent.symlink 201 | } 202 | 203 | // Dispatch the collected event to subscriber. 204 | if allFlags&rawEvent.op == 0 { 205 | return 206 | } 207 | select { 208 | case <-s.ctx.Done(): 209 | case s.eventCh <- event: 210 | } 211 | } 212 | 213 | // collector is the collector for the linux file related 214 | // events. It keeps track of file state registries and 215 | // will periodically perform cleanup. 216 | type collector struct { 217 | registries map[uint32]*eventRegistry 218 | dispatchMap *sync.Map 219 | } 220 | 221 | // allocateRename will attempt to allocate a new 222 | // or previously existing registry for rename. 223 | func (col *collector) allocateRename( 224 | taskPID uint32, event entrySecurityInodeRename, 225 | ) *eventRegistry { 226 | registry := col.registries[taskPID] 227 | if registry != nil { 228 | if registry.event.op != OpRename || 229 | registry.sourceInode != event.SrcDir || 230 | registry.targetInode != event.DstDir { 231 | delete(col.registries, taskPID) 232 | registry = nil 233 | } 234 | } 235 | if registry == nil { 236 | registry = &eventRegistry{ 237 | event: eventRaw{ 238 | op: OpRename, 239 | }, 240 | sourceInode: event.SrcDir, 241 | targetInode: event.DstDir, 242 | } 243 | col.registries[taskPID] = registry 244 | } 245 | return registry 246 | } 247 | 248 | // handleRenameSource handles the event triggered 249 | // when renaming the file and is captured by our 250 | // trace probe. 251 | // 252 | // security_inode_rename(sourcePath, &dentry{ 253 | // = Source, 254 | // }, targetPath, ...) 255 | func (col *collector) handleRenameSource( 256 | event entrySecurityInodeRenameSource, 257 | ) { 258 | registry := col.allocateRename( 259 | event.TaskPID, event.Event) 260 | registry.event.source = event.Source 261 | } 262 | 263 | // handleRenameTarget handles the event triggered 264 | // when renaming the file and is captured by our 265 | // trace probe. 266 | // 267 | // security_inode_rename(sourcePath, ..., 268 | // targetPath, &dentry{ 269 | // = Target, 270 | // }) 271 | func (col *collector) handleRenameTarget( 272 | event entrySecurityInodeRenameTarget, 273 | ) { 274 | registry := col.allocateRename( 275 | event.TaskPID, event.Event) 276 | registry.event.target = event.Target 277 | } 278 | 279 | // handleCreate handles the event triggered when 280 | // creating a file and is captured by our trace probe. 281 | // 282 | // security_inode_create(targetInode, &dentry{ 283 | // = targetPath, 284 | // }, mode, dev) 285 | func (col *collector) handleCreate( 286 | event entrySecurityInodeCreate, 287 | ) { 288 | registry := &eventRegistry{ 289 | event: eventRaw{ 290 | op: OpCreate, 291 | mode: new(uint16), 292 | target: event.Path, 293 | }, 294 | targetInode: event.Dir, 295 | } 296 | *registry.event.mode = event.Mode 297 | col.registries[event.TaskPID] = registry 298 | } 299 | 300 | // handleMknod handles the event triggered when 301 | // creating a device and is captured by our 302 | // trace probe. 303 | // 304 | // security_inode_mknod(targetInode, &dentry{ 305 | // = targetPath, 306 | // }, mode, dev) 307 | func (col *collector) handleMknod( 308 | event entrySecurityInodeMknod, 309 | ) { 310 | registry := &eventRegistry{ 311 | event: eventRaw{ 312 | op: OpMknod, 313 | mode: new(uint16), 314 | dev: new(uint32), 315 | target: event.Path, 316 | }, 317 | targetInode: event.Dir, 318 | } 319 | *registry.event.mode = event.Mode 320 | *registry.event.dev = event.Dev 321 | col.registries[event.TaskPID] = registry 322 | } 323 | 324 | // handleMkdir handles the event triggered when 325 | // creating a direcotry and is captured by our 326 | // trace probe. 327 | // 328 | // security_inode_mkdir(targetInode, &dentry{ 329 | // = targetPath, 330 | // }, mode) 331 | func (col *collector) handleMkdir( 332 | event entrySecurityInodeMkdir, 333 | ) { 334 | registry := &eventRegistry{ 335 | event: eventRaw{ 336 | op: OpMkdir, 337 | mode: new(uint16), 338 | target: event.Path, 339 | }, 340 | targetInode: event.Dir, 341 | } 342 | *registry.event.mode = event.Mode 343 | col.registries[event.TaskPID] = registry 344 | } 345 | 346 | // allocateLink will attempt to allocate a new 347 | // or previously existing registry for link. 348 | func (col *collector) allocateLink( 349 | taskPID uint32, event entrySecurityInodeLink, 350 | ) *eventRegistry { 351 | registry := col.registries[taskPID] 352 | if registry != nil { 353 | if registry.event.op != OpLink || 354 | registry.targetInode != event.Dir { 355 | delete(col.registries, taskPID) 356 | registry = nil 357 | } 358 | } 359 | if registry == nil { 360 | registry = &eventRegistry{ 361 | event: eventRaw{ 362 | op: OpLink, 363 | }, 364 | targetInode: event.Dir, 365 | } 366 | col.registries[taskPID] = registry 367 | } 368 | return registry 369 | } 370 | 371 | // handleLinkSource handles the event triggered 372 | // when creating hard link of the file and is captured 373 | // by our trace probe. 374 | // 375 | // security_inode_link(source, &dentry{ 376 | // = Dir, 377 | // }, ...) 378 | func (col *collector) handleLinkSource( 379 | event entrySecurityInodeLinkSource, 380 | ) { 381 | registry := col.allocateLink( 382 | event.TaskPID, event.Event) 383 | registry.event.source = event.Source 384 | } 385 | 386 | // handleRenameTarget handles the event triggered 387 | // when creating hard link of the file and is captured 388 | // by our trace probe. 389 | // 390 | // security_inode_link(..., &dentry{ 391 | // = Target, 392 | // }, target) 393 | func (col *collector) handleLinkTarget( 394 | event entrySecurityInodeLinkTarget, 395 | ) { 396 | registry := col.allocateLink( 397 | event.TaskPID, event.Event) 398 | registry.event.target = event.Target 399 | } 400 | 401 | // handleSymlink handles the event triggered when 402 | // creating soft link of the file and is captured by 403 | // our trace probe. 404 | // 405 | // security_inode_symlink(targetInode, &dentry{ 406 | // = Target, 407 | // }, source) 408 | func (col *collector) handleSymlink( 409 | event entrySecurityInodeSymlink, 410 | ) { 411 | registry := &eventRegistry{ 412 | event: eventRaw{ 413 | op: OpSymlink, 414 | target: event.Path, 415 | symlink: new(string), 416 | }, 417 | targetInode: event.Dir, 418 | } 419 | *registry.event.symlink = event.Name 420 | col.registries[event.TaskPID] = registry 421 | } 422 | 423 | // handleUnlink handles the event triggered when 424 | // removing a file and is captured by our trace probe. 425 | // 426 | // security_inode_unlink(targetInode, &dentry{ 427 | // = targetPath, 428 | // }) 429 | func (col *collector) handleUnlink( 430 | event entrySecurityInodeUnlink, 431 | ) { 432 | registry := &eventRegistry{ 433 | event: eventRaw{ 434 | op: OpDelete, 435 | target: event.Path, 436 | }, 437 | targetInode: event.Path.I0, 438 | } 439 | col.registries[event.TaskPID] = registry 440 | } 441 | 442 | // handleRmdir handles the event triggered when 443 | // removing a direcotry and is captured by our 444 | // trace probe. 445 | // 446 | // security_inode_rmdir(targetInode, &dentry{ 447 | // = targetPath, 448 | // }) 449 | func (col *collector) handleRmdir( 450 | event entrySecurityInodeRmdir, 451 | ) { 452 | registry := &eventRegistry{ 453 | event: eventRaw{ 454 | op: OpRmdir, 455 | target: event.Path, 456 | }, 457 | targetInode: event.Path.I0, 458 | } 459 | col.registries[event.TaskPID] = registry 460 | } 461 | 462 | // handleSetattr handles the event triggered when 463 | // updating a file attributes and is captured by 464 | // our trace probe. 465 | // 466 | // security_inode_setattr(&dentry{ 467 | // = targetPath, 468 | // }, &iattr{ 469 | // ia_mode = Mode, 470 | // ia_uid = Uid, 471 | // ia_gid = Gid, 472 | // }) 473 | func (col *collector) handleSetattr( 474 | event entrySecurityInodeSetattr, 475 | ) { 476 | registry := &eventRegistry{ 477 | event: eventRaw{ 478 | op: OpAttrib, 479 | target: event.Path, 480 | }, 481 | targetInode: event.Path.I0, 482 | } 483 | if Attr(event.Valid)&AttrMode != 0 { 484 | registry.event.attr |= AttrMode 485 | registry.event.mode = new(uint16) 486 | *registry.event.mode = event.Mode 487 | } 488 | if Attr(event.Valid)&AttrUID != 0 { 489 | registry.event.attr |= AttrUID 490 | registry.event.uid = new(uint32) 491 | *registry.event.uid = event.Uid 492 | } 493 | if Attr(event.Valid)&AttrGID != 0 { 494 | registry.event.attr |= AttrGID 495 | registry.event.gid = new(uint32) 496 | *registry.event.gid = event.Gid 497 | } 498 | if registry.event.attr == 0 { 499 | // No event we are interested, so we will 500 | // just skip reporting the events. 501 | return 502 | } 503 | col.registries[event.TaskPID] = registry 504 | } 505 | 506 | // handleFsnotify_V2_6_32 handles the fsnotify event 507 | // from 2.6.32 (inclusive) to 5.9 (exclusive). 508 | func (col *collector) handleFsnotify_V2_6_32( 509 | event entryFsnotify_V2_6_32, 510 | ) { 511 | if _, ok := col.registries[event.TaskPID]; !ok { 512 | return 513 | } 514 | col.handleFsnotify(eventFsnotify{ 515 | TaskPID: event.TaskPID, 516 | Timestamp: event.Timestamp, 517 | Inode: uint64(event.Inode), 518 | Access: event.Access, 519 | ModifyAttrib: event.ModifyAttrib, 520 | CloseOpen: event.CloseOpen, 521 | Dentry: event.Dentry, 522 | Filename: event.Filename, 523 | }) 524 | } 525 | 526 | // handleFsnotify_V5_9 handles the fsnotify event 527 | // from 5.9 (inclusive) to now. 528 | func (col *collector) handleFsnotify_V5_9( 529 | event entryFsnotify_V5_9, 530 | ) { 531 | if _, ok := col.registries[event.TaskPID]; !ok { 532 | return 533 | } 534 | baseEvent := eventFsnotify{ 535 | TaskPID: event.TaskPID, 536 | Timestamp: event.Timestamp, 537 | Access: event.Access, 538 | ModifyAttrib: event.ModifyAttrib, 539 | CloseOpen: event.CloseOpen, 540 | Dentry: event.Dentry, 541 | Filename: event.Filename, 542 | Visited: new(uint8), 543 | } 544 | if event.Inode != 0 { 545 | baseEvent.Inode = uint64(event.Inode) 546 | col.handleFsnotify(baseEvent) 547 | } 548 | if event.Dir != 0 { 549 | baseEvent.Inode = uint64(event.Dir) 550 | col.handleFsnotify(baseEvent) 551 | } 552 | } 553 | 554 | // handleFsnotifyParent_V5_9 handles the fsnotify 555 | // parent event from 5.9 (inclusive) to now. 556 | func (col *collector) handleFsnotifyParent_V5_9( 557 | event entryFsnotifyParent_V5_9, 558 | ) { 559 | if _, ok := col.registries[event.TaskPID]; !ok { 560 | return 561 | } 562 | col.handleFsnotify(eventFsnotify{ 563 | TaskPID: event.TaskPID, 564 | Timestamp: event.Timestamp, 565 | Inode: uint64(event.Inode), 566 | Access: event.Access, 567 | ModifyAttrib: event.ModifyAttrib, 568 | CloseOpen: event.CloseOpen, 569 | Dentry: event.Dentry, 570 | Filename: event.Filename, 571 | }) 572 | } 573 | 574 | // handleFsnotify handles the event triggered when 575 | // fsnotify dispatch call is invoked and is captured 576 | // by our trace probe. 577 | func (col *collector) handleFsnotify( 578 | event eventFsnotify, 579 | ) { 580 | registry := col.registries[event.TaskPID] 581 | if registry == nil { 582 | return 583 | } 584 | 585 | // Judge whether it is dispatch condition. 586 | switch registry.event.op { 587 | case OpSymlink: 588 | fallthrough 589 | case OpLink: 590 | fallthrough 591 | case OpCreate, OpMkdir, OpMknod: 592 | switch event.Dentry { 593 | case 4: 594 | if event.Inode != registry.targetInode { 595 | return 596 | } 597 | default: 598 | return 599 | } 600 | case OpAttrib: 601 | switch event.ModifyAttrib { 602 | case 2: 603 | if event.Inode != registry.targetInode { 604 | return 605 | } 606 | default: 607 | return 608 | } 609 | case OpDelete, OpRmdir: 610 | switch { 611 | case event.ModifyAttrib == 2: 612 | fallthrough 613 | case event.Dentry == 16: 614 | if event.Inode != registry.targetInode { 615 | return 616 | } 617 | default: 618 | return 619 | } 620 | case OpRename: 621 | switch event.Dentry { 622 | case 1: 623 | if event.Inode != registry.sourceInode { 624 | return 625 | } 626 | case 2: 627 | if event.Inode != registry.targetInode { 628 | return 629 | } 630 | default: 631 | return 632 | } 633 | default: 634 | return 635 | } 636 | 637 | // Dispatch the stored event at this point. 638 | delete(col.registries, event.TaskPID) 639 | registry.event.pid = event.TaskPID 640 | registry.event.timestamp = event.Timestamp 641 | _, targetInodes := registry.event.target.extract() 642 | for _, inode := range targetInodes { 643 | if subs, ok := col.dispatchMap.Load(inode); ok { 644 | for _, sub := range subs.([]*subscriber) { 645 | sub.dispatch(registry.event, event.Visited) 646 | } 647 | } 648 | } 649 | if registry.event.op&(OpRename|OpLink) != 0 { 650 | _, sourceInodes := registry.event.source.extract() 651 | for _, inode := range sourceInodes { 652 | if subs, ok := col.dispatchMap.Load(inode); ok { 653 | for _, sub := range subs.([]*subscriber) { 654 | sub.dispatch(registry.event, event.Visited) 655 | } 656 | } 657 | } 658 | } 659 | } 660 | 661 | // Watcher is the subscription of the dispatch info. 662 | type Watcher struct { 663 | C <-chan Event 664 | mgr *Manager 665 | cancel context.CancelFunc 666 | sub *subscriber 667 | inodes []*inode.Inode 668 | once sync.Once 669 | } 670 | 671 | func (s *Watcher) Close() { 672 | s.cancel() 673 | s.once.Do(func() { 674 | s.mgr.evict(s.sub) 675 | for _, inode := range s.inodes { 676 | inode.Unpin() 677 | } 678 | s.inodes = nil 679 | }) 680 | runtime.SetFinalizer(s, nil) 681 | } 682 | 683 | // Manager is the manager for all directory events. 684 | type Manager struct { 685 | ctx context.Context 686 | mtx sync.Mutex 687 | subsets map[uint64]map[*subscriber]struct{} 688 | dispatchMap *sync.Map 689 | inodeMgr *inode.Manager 690 | } 691 | 692 | func (m *Manager) evict(sub *subscriber) { 693 | m.mtx.Lock() 694 | defer m.mtx.Unlock() 695 | for inode := range sub.policies { 696 | subset, ok := m.subsets[inode] 697 | if !ok { 698 | continue 699 | } 700 | delete(subset, sub) 701 | if len(subset) == 0 { 702 | delete(m.subsets, inode) 703 | m.dispatchMap.Delete(inode) 704 | } 705 | var remainings []*subscriber 706 | for remaining := range subset { 707 | remainings = append(remainings, remaining) 708 | } 709 | m.dispatchMap.Store(inode, remainings) 710 | } 711 | } 712 | 713 | func (m *Manager) emplace(sub *subscriber) { 714 | m.mtx.Lock() 715 | defer m.mtx.Unlock() 716 | for inode := range sub.policies { 717 | subset, ok := m.subsets[inode] 718 | if !ok { 719 | subset = make(map[*subscriber]struct{}) 720 | m.subsets[inode] = subset 721 | } 722 | subset[sub] = struct{}{} 723 | var updated []*subscriber 724 | for item := range subset { 725 | updated = append(updated, item) 726 | } 727 | m.dispatchMap.Store(inode, updated) 728 | } 729 | } 730 | 731 | type watchPoint struct { 732 | name string 733 | watch func(*inode.Manager) (*inode.Inode, error) 734 | opFlags Op 735 | } 736 | 737 | type option struct { 738 | watchPoints []watchPoint 739 | } 740 | 741 | // Option is the options for creating watcher. 742 | type Option func(*option) 743 | 744 | // WatchFile specifies a file for watching. 745 | func WatchFile(opFlags Op, file string) Option { 746 | return func(opt *option) { 747 | opt.watchPoints = append(opt.watchPoints, watchPoint{ 748 | name: file, 749 | watch: func(mgr *inode.Manager) (*inode.Inode, error) { 750 | return mgr.PinFile(file) 751 | }, 752 | opFlags: opFlags, 753 | }) 754 | } 755 | } 756 | 757 | // WatchDir specifies a directory for watching. 758 | func WatchDir(opFlags Op, dir string) Option { 759 | return func(opt *option) { 760 | opt.watchPoints = append(opt.watchPoints, watchPoint{ 761 | name: dir, 762 | watch: func(mgr *inode.Manager) (*inode.Inode, error) { 763 | return mgr.PinDir(dir) 764 | }, 765 | opFlags: opFlags, 766 | }) 767 | } 768 | } 769 | 770 | // WithOptions aggregates a set of options for execution. 771 | func WithOptions(opts ...Option) Option { 772 | return func(option *option) { 773 | for _, opt := range opts { 774 | opt(option) 775 | } 776 | } 777 | } 778 | 779 | // Watch with specified options and returns error. 780 | func (mgr *Manager) Watch(opts ...Option) (*Watcher, error) { 781 | var option option 782 | WithOptions(opts...)(&option) 783 | 784 | // Attempt to create pins for specified watchers. 785 | created := false 786 | ctx, cancel := context.WithCancel(mgr.ctx) 787 | defer func() { 788 | if !created { 789 | cancel() 790 | } 791 | }() 792 | eventCh := make(chan Event) 793 | subscriber := &subscriber{ 794 | ctx: ctx, 795 | done: new(uint8), 796 | eventCh: eventCh, 797 | policies: make(map[uint64]dispatchPolicy), 798 | } 799 | result := &Watcher{ 800 | C: eventCh, 801 | mgr: mgr, 802 | cancel: cancel, 803 | sub: subscriber, 804 | } 805 | for _, watchPoint := range option.watchPoints { 806 | pin, err := watchPoint.watch(mgr.inodeMgr) 807 | if err != nil { 808 | return nil, err 809 | } 810 | defer func() { 811 | if !created { 812 | pin.Unpin() 813 | } 814 | }() 815 | subscriber.policies[pin.Inode()] = dispatchPolicy{ 816 | name: watchPoint.name, 817 | opFlags: watchPoint.opFlags, 818 | } 819 | subscriber.allOpFlags |= watchPoint.opFlags 820 | result.inodes = append(result.inodes, pin) 821 | } 822 | 823 | // Attempt to emplace all the modifications to map 824 | // and return the result. 825 | defer func() { 826 | if !created { 827 | mgr.evict(subscriber) 828 | } 829 | }() 830 | mgr.emplace(subscriber) 831 | runtime.SetFinalizer(result, func(value *Watcher) { 832 | value.Close() 833 | }) 834 | created = true 835 | return result, nil 836 | } 837 | 838 | // stackRcnotifyManager will attempt to create a rcnotify 839 | // manager and stack it for later operations. 840 | func stackRcnotifyManager( 841 | next func(*Manager) error, 842 | rootCtx context.Context, manager systracer.Manager, 843 | inodeMgr *inode.Manager, 844 | ) error { 845 | dispatchMap := new(sync.Map) 846 | result := &Manager{ 847 | ctx: rootCtx, 848 | subsets: make(map[uint64]map[*subscriber]struct{}), 849 | dispatchMap: dispatchMap, 850 | inodeMgr: inodeMgr, 851 | } 852 | collector := &collector{ 853 | registries: make(map[uint32]*eventRegistry), 854 | dispatchMap: dispatchMap, 855 | } 856 | 857 | // Attach to the fsnotify dispatcher first. 858 | var fsnotifyHandler interface{} 859 | fsnotifyHandler = collector.handleFsnotify_V2_6_32 860 | if kversion.Current >= kversion.Must("5.9") { 861 | fsnotifyHandler = collector.handleFsnotify_V5_9 862 | } 863 | fsnotify, _, err := manager.TraceKProbe( 864 | "fsnotify", fsnotifyHandler) 865 | if err != nil { 866 | return err 867 | } 868 | defer fsnotify.Close() 869 | 870 | // There's also fsnotify parent handler for those 871 | // version >= 5.9, we will also register them here. 872 | var fsnotifyParent systracer.Trace 873 | if kversion.Current >= kversion.Must("5.9") { 874 | fsnotifyParent, _, err = manager.TraceKProbe( 875 | "__fsnotify_parent", 876 | collector.handleFsnotifyParent_V5_9) 877 | if err != nil { 878 | return err 879 | } 880 | defer fsnotifyParent.Close() 881 | } 882 | 883 | // Define a collection of probe points and their 884 | // associated probes for registering. 885 | probes := map[string][]interface{}{ 886 | "security_inode_rename": { 887 | collector.handleRenameSource, 888 | collector.handleRenameTarget, 889 | }, 890 | "security_inode_create": { 891 | collector.handleCreate, 892 | }, 893 | "security_inode_mknod": { 894 | collector.handleMknod, 895 | }, 896 | "security_inode_mkdir": { 897 | collector.handleMkdir, 898 | }, 899 | "security_inode_link": { 900 | collector.handleLinkSource, 901 | collector.handleLinkTarget, 902 | }, 903 | "security_inode_symlink": { 904 | collector.handleSymlink, 905 | }, 906 | "security_inode_unlink": { 907 | collector.handleUnlink, 908 | }, 909 | "security_inode_rmdir": { 910 | collector.handleRmdir, 911 | }, 912 | "security_inode_setattr": { 913 | collector.handleSetattr, 914 | }, 915 | } 916 | var lastSyncCh <-chan struct{} 917 | var registries []systracer.Trace 918 | for point, handlers := range probes { 919 | for _, handler := range handlers { 920 | registry, syncCh, err := manager. 921 | TraceKProbe(point, handler) 922 | if err != nil { 923 | return errors.Wrapf(err, 924 | "initializing %s", handler) 925 | } 926 | defer registry.Close() 927 | lastSyncCh = syncCh 928 | registries = append(registries, registry) 929 | } 930 | } 931 | 932 | // Wait for synchronization of the kprobe registry. 933 | select { 934 | case <-rootCtx.Done(): 935 | return nil 936 | case <-lastSyncCh: 937 | } 938 | fsnotify.SetEnabled(true) 939 | if fsnotifyParent != nil { 940 | fsnotifyParent.SetEnabled(true) 941 | } 942 | for _, registry := range registries { 943 | registry.SetEnabled(true) 944 | } 945 | return next(result) 946 | } 947 | 948 | // Module is the DI module of the rcnotify manager. 949 | // 950 | // The module requires a context, a trace manager and 951 | // an inode manager, and injects a rcnotify manager. 952 | var Module = shaft.Stack(stackRcnotifyManager) 953 | -------------------------------------------------------------------------------- /manager.go: -------------------------------------------------------------------------------- 1 | package systracer 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "io/ioutil" 8 | "os" 9 | "path/filepath" 10 | "reflect" 11 | "strconv" 12 | "sync" 13 | "syscall" 14 | "time" 15 | "unsafe" 16 | 17 | "github.com/pkg/errors" 18 | "go.uber.org/zap" 19 | "golang.org/x/sync/errgroup" 20 | "golang.org/x/sys/unix" 21 | 22 | "github.com/chaitin/systracer/pkg/alloc" 23 | "github.com/chaitin/systracer/pkg/kversion" 24 | ) 25 | 26 | // epollNotWorking indicates whether there's support for 27 | // polling tracing pipe with epoll. 28 | // 29 | // XXX: on linux version 3.10, the epoll will fail to 30 | // generate edge trigger event for tracefs files, rendering 31 | // the trace to be not working. 32 | // 33 | // To prevent so, if the polling is not working, we will 34 | // enforce it to always read something from the buffer. 35 | var epollNotWorking = kversion.Current < kversion.Must("3.11") 36 | 37 | // traceCreateRequest is the request for creating an 38 | // instance of trace, and wait for creation completion. 39 | type traceCreateRequest struct { 40 | handle *traceHandle 41 | err error 42 | handler interface{} 43 | desc *traceEventDescriptor 44 | typ string 45 | tracepoint string 46 | doneCh chan struct{} 47 | syncCh <-chan struct{} 48 | } 49 | 50 | // createTrace is the request to create a trace object 51 | // with the dispatched request. 52 | func (mgr *traceManager) createTrace( 53 | typ, tracepoint string, 54 | handler interface{}, desc *traceEventDescriptor, 55 | ) (Trace, <-chan struct{}, error) { 56 | req := &traceCreateRequest{ 57 | handler: handler, 58 | desc: desc, 59 | typ: typ, 60 | tracepoint: tracepoint, 61 | doneCh: make(chan struct{}), 62 | } 63 | select { 64 | case <-mgr.rootCtx.Done(): 65 | return nil, nil, mgr.rootCtx.Err() 66 | case mgr.createCh <- req: 67 | } 68 | 69 | select { 70 | case <-mgr.rootCtx.Done(): 71 | return nil, nil, mgr.rootCtx.Err() 72 | case <-req.doneCh: 73 | var handle Trace 74 | if req.handle != nil { 75 | handle = req.handle 76 | } 77 | return handle, req.syncCh, req.err 78 | } 79 | } 80 | 81 | // traceManager implements tracing.TraceManager. 82 | type traceManager struct { 83 | rootCtx context.Context 84 | lastErr error 85 | createCh chan *traceCreateRequest 86 | fetchCh chan *fetchWriterStateRequest 87 | } 88 | 89 | // cleanupNamespace cleans up specified namespace. 90 | func cleanupNamespace(log *zap.SugaredLogger, root, namespace string) { 91 | logger := log.With( 92 | zap.String("root", root), 93 | zap.String("namespace", namespace), 94 | ) 95 | if err := removeAllProbe( 96 | root, "kprobe_events", namespace); err != nil { 97 | logger.Infof("remove kprobes: %s", err) 98 | } 99 | if err := removeAllProbe( 100 | root, "uprobe_events", namespace); err != nil { 101 | logger.Infof("remove uprobes: %s", err) 102 | } 103 | if err := removeInstance(root, namespace); err != nil { 104 | logger.Infof("remove instance: %s", err) 105 | } 106 | } 107 | 108 | // traceManagerState holds registry of traces. 109 | type traceManagerState struct { 110 | rootCtx context.Context 111 | root string 112 | namespace string 113 | traceID uint64 114 | registries map[uint64]*traceHandle 115 | enableCh chan *traceEnableRequest 116 | closeCh chan *traceCloseRequest 117 | conditionCh chan *conditionUpdateRequest 118 | syncCh chan struct{} 119 | } 120 | 121 | // destroy will clean up all previously allocated 122 | // instances of traces. 123 | func (s *traceManagerState) destroy() { 124 | for _, registry := range s.registries { 125 | registry.destroy(s.root, s.namespace) 126 | } 127 | } 128 | 129 | // markUnsync is the action to mark current manager state 130 | // is not synchronized with the writer, and must perform 131 | // the synchronization action, returning channel for 132 | // synchronization completion notification. 133 | func (s *traceManagerState) markUnsync() <-chan struct{} { 134 | if s.syncCh == nil { 135 | s.syncCh = make(chan struct{}) 136 | } 137 | return s.syncCh 138 | } 139 | 140 | // markSync is the action to mark current manager state 141 | // as up-to-date with the writer thread. 142 | func (s *traceManagerState) markSync() { 143 | if s.syncCh == nil { 144 | return 145 | } 146 | close(s.syncCh) 147 | s.syncCh = nil 148 | 149 | // XXX: the request synchronization is a process of 150 | // copy-on-write, since the number of items in the 151 | // table is far less than data to process. 152 | newRegistries := make(map[uint64]*traceHandle) 153 | for id, handle := range s.registries { 154 | newRegistries[id] = handle 155 | } 156 | s.registries = newRegistries 157 | } 158 | 159 | // handleCreate will handle the request of creation. 160 | func (s *traceManagerState) handleCreate( 161 | request *traceCreateRequest, 162 | ) { 163 | defer close(request.doneCh) 164 | defer func() { 165 | if err := recover(); err != nil { 166 | request.err = errors.Wrap(errors.Errorf( 167 | "handleCreate panics: %s", err), 168 | "allocate trace") 169 | } 170 | }() 171 | request.err = func() error { 172 | // Attempt to allocate a new identity. 173 | newTraceID := alloc.Alloc(s.traceID, 0, 174 | func(id uint64) bool { 175 | return s.registries[id] != nil 176 | }) 177 | if newTraceID == 0 { 178 | return errors.Wrap(errors.New( 179 | "no available trace ID"), 180 | "allocate trace") 181 | } 182 | s.traceID = newTraceID 183 | 184 | // Allocate and initialize the trace. 185 | handle := &traceHandle{ 186 | id: newTraceID, 187 | createTime: uint64(time.Now().UnixNano()), 188 | ctx: s.rootCtx, 189 | enableCh: s.enableCh, 190 | closeCh: s.closeCh, 191 | conditionCh: s.conditionCh, 192 | handler: request.handler, 193 | desc: request.desc, 194 | typ: request.typ, 195 | } 196 | if err := handle.init(s.root, s.namespace, 197 | request.tracepoint); err != nil { 198 | if err != ErrBadTracePoint { 199 | return errors.Wrap(err, 200 | "initialize trace") 201 | } 202 | return ErrBadTracePoint 203 | } 204 | s.registries[newTraceID] = handle 205 | request.handle = handle 206 | request.syncCh = s.markUnsync() 207 | return nil 208 | }() 209 | } 210 | 211 | // handleEnable will handle the request of start. 212 | func (s *traceManagerState) handleEnable( 213 | request *traceEnableRequest, 214 | ) error { 215 | defer close(request.doneCh) 216 | if request.handle.id == 0 { 217 | return nil 218 | } 219 | return request.handle.setEnabled( 220 | s.root, s.namespace, request.enabled) 221 | } 222 | 223 | // handleRemove will handle the request of deletion. 224 | func (s *traceManagerState) handleRemove( 225 | request *traceCloseRequest, 226 | ) { 227 | defer close(request.doneCh) 228 | if request.handle.id == 0 { 229 | return 230 | } 231 | delete(s.registries, request.handle.id) 232 | request.handle.destroy(s.root, s.namespace) 233 | s.markUnsync() 234 | } 235 | 236 | // handleCondition will handle the request of condition. 237 | func (s *traceManagerState) handleCondition( 238 | request *conditionUpdateRequest, 239 | ) { 240 | defer close(request.doneCh) 241 | if request.handle.id == 0 { 242 | return 243 | } 244 | err := request.handle.updateCondition( 245 | s.root, s.namespace, request.condition) 246 | if err != nil { 247 | request.err = errors.Wrap(err, 248 | "update trace condition") 249 | } 250 | } 251 | 252 | // traceWriterState is the state held on writer thread. 253 | type traceWriterState struct { 254 | registries map[uint64]*traceHandle 255 | baseTime time.Time 256 | baseEpoch time.Duration 257 | logger *zap.SugaredLogger 258 | } 259 | 260 | // fetchWriterStateRequest is an internal request for 261 | // retrieving trace writer state and modify it. 262 | type fetchWriterStateRequest struct { 263 | state *traceWriterState 264 | doneCh chan struct{} 265 | } 266 | 267 | // fetchWriterState requests for the trace writer state. 268 | func (mgr *traceManager) fetchWriterState() ( 269 | *traceWriterState, error, 270 | ) { 271 | req := &fetchWriterStateRequest{ 272 | doneCh: make(chan struct{}), 273 | } 274 | select { 275 | case <-mgr.rootCtx.Done(): 276 | return nil, mgr.rootCtx.Err() 277 | case mgr.fetchCh <- req: 278 | } 279 | select { 280 | case <-mgr.rootCtx.Done(): 281 | return nil, mgr.rootCtx.Err() 282 | case <-req.doneCh: 283 | return req.state, nil 284 | } 285 | } 286 | 287 | // pow10 is the series of exponents to 10^exponent values. 288 | var pow10 = [10]uint64{ 289 | 1, 290 | 10, 291 | 100, 292 | 1000, 293 | 10000, 294 | 100000, 295 | 1000000, 296 | 10000000, 297 | 100000000, 298 | 1000000000, 299 | } 300 | 301 | // parseSecond parses the number representing the value 302 | // of second (with period dot). 303 | func parseSecond(value []byte) (time.Duration, error) { 304 | dotIndex := bytes.Index(value, []byte(".")) 305 | var beforeDot, afterDot []byte 306 | if dotIndex < 0 { 307 | beforeDot = value 308 | afterDot = nil 309 | } else { 310 | beforeDot = value[:dotIndex] 311 | afterDot = value[dotIndex+1:] 312 | } 313 | var result int64 314 | 315 | // Parse the component before the dot. 316 | if len(beforeDot) > 0 { 317 | val, err := strconv.ParseUint( 318 | string(beforeDot), 10, 64) 319 | if err != nil { 320 | return time.Duration(0), err 321 | } 322 | result += int64(val * pow10[9]) 323 | } 324 | 325 | // Parse the component after the dot. 326 | if len(afterDot) > 0 { 327 | if len(afterDot) > 9 { 328 | afterDot = afterDot[0:9] 329 | } 330 | val, err := strconv.ParseUint( 331 | string(afterDot), 10, 64) 332 | if err != nil { 333 | return time.Duration(0), err 334 | } 335 | result += int64(val * pow10[9-len(afterDot)]) 336 | } 337 | 338 | return time.Duration(result), nil 339 | } 340 | 341 | // handleData will process the input of reader. 342 | // 343 | // This operation will limit the epoch of event and 344 | // produces the end epoch, which will ensures that 345 | // events will not process more than once when used 346 | // under the circumstances like side chain mitigating. 347 | func (s *traceWriterState) handleData( 348 | input []byte, startEpoch time.Duration, 349 | ) time.Duration { 350 | limitEpoch := startEpoch 351 | 352 | // Loop and parse input data. 353 | for len(input) > 0 { 354 | func() { 355 | var err error 356 | 357 | // Skip the current strip of input and find 358 | // the dash character. 359 | if len(input) < 17 || input[16] != '-' { 360 | return 361 | } 362 | input = input[17:] 363 | 364 | // Skip and read the PID sequence. 365 | var taskPID uint32 366 | for i := 0; i < len(input); i++ { 367 | if input[i] == ' ' { 368 | value, err := strconv.ParseUint( 369 | string(input[:i]), 10, 32) 370 | input = input[i+1:] 371 | if err != nil { 372 | s.logger.Debugf( 373 | "parse taskid %q: %s", 374 | string(input[:i]), err) 375 | return 376 | } 377 | taskPID = uint32(value) 378 | break 379 | } 380 | if input[i] < '0' || input[i] > '9' { 381 | return 382 | } 383 | } 384 | 385 | // Skip the central portion of CPUID and IRQ. 386 | input = bytes.TrimLeft(input, " ") 387 | for i := 0; i < len(input); i++ { 388 | if input[i] == ' ' { 389 | input = input[i+1:] 390 | break 391 | } 392 | } 393 | for i := 0; i < len(input); i++ { 394 | if input[i] == ' ' { 395 | input = input[i+1:] 396 | break 397 | } 398 | } 399 | 400 | // Parse the duration since the timepoint of 401 | // start of the boot time. (If this could not 402 | // be completed, the timestamp will be now). 403 | var epoch time.Duration 404 | for i := 0; i < len(input); i++ { 405 | if input[i] == ':' { 406 | epoch, err = parseSecond( 407 | bytes.TrimSpace(input[:i])) 408 | if err != nil { 409 | s.logger.Debugf( 410 | "parse epoch %q: %s", 411 | string(input[:i]), err) 412 | return 413 | } 414 | input = input[i+1:] 415 | break 416 | } 417 | if input[i] == '.' || input[i] == ' ' { 418 | continue 419 | } 420 | if input[i] < '0' || input[i] > '9' { 421 | return 422 | } 423 | } 424 | timestamp := s.baseTime.Add(epoch - s.baseEpoch) 425 | 426 | // Judge whether the event is earlier than the 427 | // limit epoch, and we will just parse and skip 428 | // that event if it happens so. 429 | if limitEpoch != 0 && limitEpoch >= epoch { 430 | return 431 | } 432 | limitEpoch = epoch 433 | 434 | // Read the portion of the message key. 435 | var key []byte 436 | for i := 0; i < len(input); i++ { 437 | if input[i] == ':' { 438 | key = input[:i] 439 | input = input[i+1:] 440 | break 441 | } else if input[i] == '\n' { 442 | key = input[:i] 443 | input = input[i:] 444 | break 445 | } 446 | } 447 | createTime, id := parseProbeName( 448 | bytes.TrimSpace(key)) 449 | if id == 0 { 450 | return 451 | } 452 | handle := s.registries[id] 453 | if handle == nil || 454 | handle.createTime != createTime { 455 | return 456 | } 457 | 458 | // Skip the parenthesis of trace. 459 | input = bytes.TrimLeft(input, " ") 460 | if len(input) > 0 && input[0] == '(' { 461 | for i := 0; i < len(input); i++ { 462 | if input[i] == ')' { 463 | input = input[i+1:] 464 | break 465 | } 466 | } 467 | } 468 | input = bytes.TrimLeft(input, " ") 469 | 470 | // A counter for recording whether the handle 471 | // has been called successfully. 472 | var handleSuccess bool 473 | defer func() { 474 | handle.complete(handleSuccess) 475 | }() 476 | defer func() { 477 | if err := recover(); err != nil { 478 | s.logger.Errorf( 479 | "handle #%d panics: %s", 480 | handle.id, err) 481 | } 482 | }() 483 | 484 | // Attempt to allocate the instance of 485 | // event before we call handler. 486 | argument := reflect.New(handle.desc.typ) 487 | baseEvent := (*Event)( 488 | unsafe.Pointer(argument.Pointer())) 489 | baseEvent.TaskPID = taskPID 490 | baseEvent.Timestamp = timestamp 491 | baseEvent.epoch = epoch 492 | 493 | // Fill the fields in the log event. 494 | offset, err := handle.desc.fill( 495 | argument.Pointer(), input) 496 | input = input[offset:] 497 | if err != nil { 498 | s.logger.Errorf( 499 | "handle #%d errors: %s", 500 | handle.id, err) 501 | return 502 | } 503 | 504 | // If the handle is not enabled, just don't 505 | // invoke the function and return. 506 | if !handle.enabled { 507 | return 508 | } 509 | 510 | // Invoke the function and complete the processing. 511 | f := reflect.ValueOf(handle.handler) 512 | _ = f.Call([]reflect.Value{ 513 | reflect.Indirect(argument), 514 | }) 515 | handleSuccess = true 516 | }() 517 | 518 | // Seek for the next endline and forward. 519 | index := bytes.Index(input, []byte("\n")) 520 | if index < 0 { 521 | break 522 | } 523 | input = input[index+1:] 524 | } 525 | 526 | return limitEpoch 527 | } 528 | 529 | // maxReadPacketSize is the maximum size allowed for 530 | // the manager reader packet. 531 | const maxReadPacketSize = 10 * 1024 * 1024 532 | 533 | // runReaderThread will execute the reader thread 534 | // with specified pipe and channel. 535 | func (mgr *traceManager) runReaderThread( 536 | tracePipe *os.File, spliceIn, spliceOut int, 537 | sendCh chan<- []byte, 538 | ) error { 539 | var err error 540 | conn, err := tracePipe.SyscallConn() 541 | if err != nil { 542 | return errors.Wrap(err, "syscall connect") 543 | } 544 | 545 | for { 546 | var data []byte 547 | tracePipeConsume := func(fd uintptr) error { 548 | for len(data) < maxReadPacketSize { 549 | // XXX: trace pipe file supports splicing right 550 | // at its initial implementation, and unlike 551 | // its read counterpart, it contains nearly no 552 | // backward goto statement, which reduces its 553 | // chance for triggering known bug in the kernel. 554 | n, err := unix.Splice( 555 | int(fd), nil, spliceOut, nil, 556 | maxReadPacketSize, unix.SPLICE_F_NONBLOCK) 557 | if n > 0 { 558 | // Read and splice next data in buffer. 559 | buf := make([]byte, n) 560 | m, err := syscall.Read(spliceIn, buf) 561 | if err != nil { 562 | return err 563 | } 564 | data = append(data, buf[:m]...) 565 | } else if n == 0 { 566 | // No more data to read now, we will 567 | // just exit and return error. 568 | return syscall.EBADF 569 | } else if err == syscall.EAGAIN || 570 | err == syscall.EWOULDBLOCK || 571 | err == syscall.EINTR { 572 | // Current buffer has been emptied, 573 | // now we should perform the action. 574 | return nil 575 | } else { 576 | return err 577 | } 578 | } 579 | return nil 580 | } 581 | 582 | if epollNotWorking { 583 | // If epoll is not working, we will always 584 | // attempt to read from the epoll pipe, this 585 | // requires the minimum limit timeout to be 586 | // non-zero to prevent creating a busy looping. 587 | _ = tracePipeConsume(tracePipe.Fd()) 588 | } else { 589 | var innerErr error 590 | if err := conn.Read(func(fd uintptr) bool { 591 | innerErr := tracePipeConsume(fd) 592 | if innerErr != nil { 593 | return true 594 | } 595 | return len(data) > 0 596 | }); err != nil { 597 | // XXX: the error is from standard library, 598 | // internal/poll.ErrFileClosing, the piece 599 | // of code above is provided by standard 600 | // library, so it is safe to do so. 601 | if err.Error() == "use of closed file" { 602 | err = nil 603 | } 604 | return errors.Wrap(err, "read pipe") 605 | } 606 | if innerErr != nil { 607 | return errors.Wrap(innerErr, "read pipe") 608 | } 609 | } 610 | 611 | // Create and copy out buffer, and send data 612 | // back to the manager thread. 613 | select { 614 | case <-mgr.rootCtx.Done(): 615 | return nil 616 | case sendCh <- data: 617 | } 618 | } 619 | } 620 | 621 | // synchronizeRegistryRequest is the request communicating 622 | // between the master and writer. 623 | type synchronizeRegistryRequest struct { 624 | registries map[uint64]*traceHandle 625 | } 626 | 627 | // currentSyncRequest retrieve the current request of 628 | // synchronization from the trace manager state. 629 | func (s *traceManagerState) currentSyncRequest() ( 630 | request *synchronizeRegistryRequest, 631 | ) { 632 | if s.syncCh == nil { 633 | return nil 634 | } 635 | return &synchronizeRegistryRequest{ 636 | registries: s.registries, 637 | } 638 | } 639 | 640 | // minimumTickerInterval is the interval which is the lowest 641 | // frequecy the writer thread could operate on. 642 | var minimumTickerInterval = 50 * time.Millisecond 643 | 644 | // runWriterThread will execute the writer thread for 645 | // processing data from the reader and side chain. 646 | func (mgr *traceManager) runWriterThread( 647 | syncCh <-chan *synchronizeRegistryRequest, 648 | receiveCh <-chan []byte, limitInterval time.Duration, 649 | logger *zap.SugaredLogger, 650 | ) error { 651 | // Writer state for handling the dispatch relation 652 | // of the trace data payload. 653 | state := &traceWriterState{ 654 | registries: make(map[uint64]*traceHandle), 655 | logger: logger, 656 | } 657 | 658 | // Initialize the ticker which limits the reader 659 | // production rate. 660 | var tick *time.Ticker 661 | defer func() { 662 | if tick != nil { 663 | tick.Stop() 664 | } 665 | }() 666 | 667 | // Clamp the minimum of timeout to a value 668 | // so that the reader thread will not be trapped 669 | // in a raging busy loop in realtime mode. 670 | if epollNotWorking { 671 | if limitInterval < minimumTickerInterval { 672 | limitInterval = minimumTickerInterval 673 | } 674 | } 675 | 676 | // Must not be too small, or delivering 677 | // time event will iteself brings load. 678 | if limitInterval > minimumTickerInterval { 679 | tick = time.NewTicker(limitInterval) 680 | } 681 | 682 | // Execute the writer thread for handling data 683 | // from the reader thread and side chain. 684 | received := false 685 | for { 686 | // Create the channel of tick flipping and 687 | // reader consuming. 688 | var timerCh <-chan time.Time 689 | var currentReceiveCh <-chan []byte 690 | if tick != nil && received { 691 | timerCh = tick.C 692 | currentReceiveCh = nil 693 | } else { 694 | timerCh = nil 695 | currentReceiveCh = receiveCh 696 | } 697 | 698 | // Wait for the next tick for reception. 699 | select { 700 | case <-mgr.rootCtx.Done(): 701 | return nil 702 | case req := <-mgr.fetchCh: 703 | req.state = state 704 | close(req.doneCh) 705 | case data := <-currentReceiveCh: 706 | received = true 707 | _ = state.handleData(data, time.Duration(0)) 708 | case <-timerCh: 709 | received = false 710 | case request := <-syncCh: 711 | state.registries = request.registries 712 | } 713 | } 714 | } 715 | 716 | // runMasterThread will execute the master thread 717 | // after the environment has been setup. 718 | func (mgr *traceManager) runMasterThread( 719 | tracePipe *os.File, spliceIn, spliceOut int, 720 | root, namespace string, log *zap.SugaredLogger, 721 | syncCh chan *synchronizeRegistryRequest, 722 | ) error { 723 | defer cleanupNamespace(log, root, namespace) 724 | defer func() { _ = tracePipe.Close() }() 725 | defer func() { 726 | _ = syscall.Close(spliceIn) 727 | _ = syscall.Close(spliceOut) 728 | }() 729 | 730 | // Registries of all available probes. 731 | state := &traceManagerState{ 732 | rootCtx: mgr.rootCtx, 733 | root: root, 734 | namespace: namespace, 735 | registries: make(map[uint64]*traceHandle), 736 | enableCh: make(chan *traceEnableRequest), 737 | closeCh: make(chan *traceCloseRequest), 738 | conditionCh: make(chan *conditionUpdateRequest), 739 | } 740 | defer state.destroy() 741 | 742 | // Loop and handle trace manager events. 743 | for { 744 | var currentSyncCh chan<- *synchronizeRegistryRequest 745 | syncRequest := state.currentSyncRequest() 746 | if syncRequest != nil { 747 | currentSyncCh = syncCh 748 | } 749 | select { 750 | case <-mgr.rootCtx.Done(): 751 | return nil 752 | case req := <-mgr.createCh: 753 | state.handleCreate(req) 754 | case req := <-state.enableCh: 755 | if err := state.handleEnable(req); err != nil { 756 | log.Errorf( 757 | "cannot enable handle #%d: %s", 758 | req.handle.id, err) 759 | } 760 | case req := <-state.closeCh: 761 | state.handleRemove(req) 762 | case req := <-state.conditionCh: 763 | state.handleCondition(req) 764 | case currentSyncCh <- syncRequest: 765 | state.markSync() 766 | } 767 | } 768 | } 769 | 770 | // newInternal will create an instance of the manager. 771 | func newInternal( 772 | ctx context.Context, group *errgroup.Group, options ...Option, 773 | ) (*traceManager, error) { 774 | var err error 775 | option := newOption() 776 | WithOptions(options...)(option) 777 | logger := option.logger.Named("systracer").Sugar() 778 | root := option.tracefsPath 779 | namespace := option.instanceName 780 | 781 | // Verify that the specified file system is tracefs 782 | // or debugfs, the debugfs directory must have last 783 | // component name of tracing. 784 | var fs unix.Statfs_t 785 | if err := unix.Statfs(root, &fs); err != nil { 786 | return nil, err 787 | } 788 | isValidFileSystem := false 789 | if fs.Type == unix.TRACEFS_MAGIC { 790 | isValidFileSystem = true 791 | } else if fs.Type == unix.DEBUGFS_MAGIC && 792 | filepath.Base(root) == "tracing" { 793 | isValidFileSystem = true 794 | } 795 | if !isValidFileSystem { 796 | return nil, errors.Errorf( 797 | "invalid file system with magic %x", fs.Type) 798 | } 799 | 800 | // Attempt to clean up previous run pass of manager. 801 | hasCreated := false 802 | cleanupNamespace(logger, root, namespace) 803 | defer func() { 804 | if !hasCreated { 805 | cleanupNamespace(logger, root, namespace) 806 | } 807 | }() 808 | 809 | // Create a new namespace under the specified directory. 810 | if err := unix.Mkdir(filepath.Join(root, "instances", 811 | namespace), 0600); err != nil && err != unix.EEXIST { 812 | return nil, errors.Errorf( 813 | "cannot create instance %q: %s", namespace, err) 814 | } 815 | 816 | // Clear the content of previous trace. 817 | if err = ioutil.WriteFile(filepath.Join( 818 | root, "instances", namespace, "tracing_on"), 819 | []byte("0"), os.FileMode(0600)); err != nil { 820 | return nil, err 821 | } 822 | if err = ioutil.WriteFile(filepath.Join( 823 | root, "instances", namespace, "trace"), 824 | []byte(""), os.FileMode(0600)); err != nil { 825 | return nil, err 826 | } 827 | 828 | // Setup trace data recording parameters. 829 | if err = ioutil.WriteFile(filepath.Join( 830 | root, "instances", namespace, "trace_clock"), 831 | []byte("global"), os.FileMode(0600)); err != nil { 832 | return nil, err 833 | } 834 | traceOptions := []string{ 835 | "print-parent", "nosym-offset", "nosym-addr", 836 | "noverbose", "nohex", "nobin", "noblock", 837 | "nostacktrace", "trace_printk", "noftrace-preempt", 838 | "nobranch", "noannotate", "nouserstacktrace", 839 | "nosym-userobj", "noprintk-msg-only", 840 | "context-info", "nolatency-format", 841 | "nosleep-time", "nograph-time", 842 | "norecord-cmd", "norecord-tgid", 843 | "nodisable-on-free", "irq-info", 844 | "nomarkers", "nofunction-trace", 845 | "notest_nop_accept", "notest_nop_reject", 846 | } 847 | for _, traceOption := range traceOptions { 848 | _ = ioutil.WriteFile(filepath.Join( 849 | root, "instances", namespace, "trace_options"), 850 | []byte(traceOption), os.FileMode(0600)) 851 | } 852 | 853 | // Re-enable the trace instance after setup. 854 | if err = ioutil.WriteFile(filepath.Join( 855 | root, "instances", namespace, "tracing_on"), 856 | []byte("1"), os.FileMode(0600)); err != nil { 857 | return nil, err 858 | } 859 | 860 | // Attempt to open the trace pipe of the manager. 861 | fd, err := syscall.Open(filepath.Join( 862 | root, "instances", namespace, "trace_pipe"), 863 | syscall.O_RDONLY|syscall.O_NONBLOCK, 0400) 864 | if err != nil { 865 | return nil, err 866 | } 867 | tracePipe := os.NewFile(uintptr(fd), "trace_pipe") 868 | defer func() { 869 | if !hasCreated { 870 | _ = tracePipe.Close() 871 | } 872 | }() 873 | 874 | // Attempt to create splice pipe for reading. 875 | var spliceFd [2]int 876 | if err := syscall.Pipe2(spliceFd[:], 877 | syscall.O_NONBLOCK|syscall.O_CLOEXEC); err != nil { 878 | return nil, err 879 | } 880 | spliceIn, spliceOut := spliceFd[0], spliceFd[1] 881 | defer func() { 882 | if !hasCreated { 883 | _ = syscall.Close(spliceIn) 884 | _ = syscall.Close(spliceOut) 885 | } 886 | }() 887 | 888 | // Attempt update the capacity of the trace pipe to 889 | // increase the capacity of the event tracing. 890 | // 891 | // However the program still works without this 892 | // setup, it is just an optional optimization. 893 | _, _ = unix.FcntlInt(uintptr(spliceOut), 894 | unix.F_SETPIPE_SZ, maxReadPacketSize) 895 | 896 | // Start the new trace manager and return. 897 | receiveCh := make(chan []byte) 898 | syncCh := make(chan *synchronizeRegistryRequest) 899 | manager := &traceManager{ 900 | rootCtx: ctx, 901 | createCh: make(chan *traceCreateRequest), 902 | fetchCh: make(chan *fetchWriterStateRequest), 903 | } 904 | group.Go(func() error { 905 | return manager.runMasterThread( 906 | tracePipe, spliceIn, spliceOut, 907 | root, namespace, logger, syncCh) 908 | }) 909 | group.Go(func() error { 910 | return manager.runReaderThread( 911 | tracePipe, spliceIn, spliceOut, receiveCh) 912 | }) 913 | group.Go(func() error { 914 | return manager.runWriterThread( 915 | syncCh, receiveCh, option.limitInterval, logger) 916 | }) 917 | hasCreated = true 918 | return manager, nil 919 | } 920 | 921 | type calibrateEvent struct { 922 | ProbeEvent 923 | Condition `tracing:"Name ~ \"/proc/self/calibrate/*\"` 924 | 925 | Name StringAddr `tracing:"$arg2"` 926 | } 927 | 928 | // New will create an instance of the manager. 929 | func New( 930 | ctx context.Context, group *errgroup.Group, options ...Option, 931 | ) (Manager, error) { 932 | // The implementation will be splitted into two steps, 933 | // first we create the uncalibrated manager, then we 934 | // attempt to calibrate it and return it to caller. 935 | calibrated := false 936 | cancelCtx, cancel := context.WithCancel(ctx) 937 | innerGroup, innerCtx := errgroup.WithContext(cancelCtx) 938 | defer func() { 939 | if !calibrated { 940 | cancel() 941 | _ = innerGroup.Wait() 942 | } 943 | }() 944 | manager, err := newInternal(innerCtx, innerGroup, options...) 945 | if err != nil { 946 | return nil, err 947 | } 948 | state, err := manager.fetchWriterState() 949 | if err != nil { 950 | return nil, err 951 | } 952 | calibrateDone := make(chan struct{}) 953 | var calibrateOnce sync.Once 954 | symbols := []string{"vfs_fstatat", "vfs_statx"} 955 | var calibrateErr error 956 | registered := false 957 | for _, symbol := range symbols { 958 | calibrate, _, err := manager.TraceKProbe(symbol, func( 959 | event calibrateEvent, 960 | ) { 961 | str := filepath.Base(event.Name.String) 962 | unixNano, err := strconv.ParseUint(str, 16, 64) 963 | if err != nil { 964 | return 965 | } 966 | baseTime := time.Unix(0, int64(unixNano)) 967 | if !state.baseTime.IsZero() { 968 | startNew := baseTime.Add(-event.epoch) 969 | startOld := state.baseTime.Add(-state.baseEpoch) 970 | if startOld.Sub(startNew) > 500*time.Millisecond { 971 | return 972 | } 973 | } 974 | state.baseTime = baseTime 975 | state.baseEpoch = event.epoch 976 | calibrateOnce.Do(func() { 977 | close(calibrateDone) 978 | }) 979 | }) 980 | if err != nil { 981 | calibrateErr = err 982 | continue 983 | } 984 | registered = true 985 | defer calibrate.Close() 986 | calibrate.SetEnabled(true) 987 | } 988 | if !registered { 989 | return nil, calibrateErr 990 | } 991 | var stat unix.Stat_t 992 | unixNano := time.Now().UnixNano() 993 | _ = unix.Fstatat(unix.AT_FDCWD, fmt.Sprintf( 994 | "/proc/self/calibrate/%x", unixNano), &stat, 0) 995 | select { 996 | case <-ctx.Done(): 997 | return nil, ctx.Err() 998 | case <-time.After(5 * time.Second): 999 | return nil, errors.New("calibration timed out") 1000 | case <-calibrateDone: 1001 | } 1002 | group.Go(func() error { 1003 | defer cancel() 1004 | <-innerCtx.Done() 1005 | return innerGroup.Wait() 1006 | }) 1007 | calibrated = true 1008 | return manager, nil 1009 | } 1010 | --------------------------------------------------------------------------------