├── .gitignore
├── go.mod
├── inode
├── trace_386.go
├── trace_amd64.go
├── trace_arm64.go
└── inode.go
├── connect
├── trace_linux_amd64.go
├── trace_linux_arm64.go
├── trace_linux_386.go
└── connect.go
├── pkg
├── kallsyms
│ ├── kallsyms_test.go
│ └── kallsyms.go
├── alloc
│ └── alloc.go
└── kversion
│ └── kversion.go
├── .github
└── workflows
│ └── build.yml
├── cmd
└── systracer
│ ├── listen.go
│ ├── connect.go
│ ├── main.go
│ └── watch.go
├── compile_test.go
├── listen
├── trace_linux_386.go
├── trace_linux_amd64.go
├── trace_linux_arm64.go
└── listen.go
├── README.md
├── package.go
├── rcnotify
├── trace_amd64.go
├── trace_arm64.go
├── trace_386.go
└── rcnotify.go
├── go.sum
├── tracefs.go
├── handle.go
├── LICENSE
├── compile.go
└── manager.go
/.gitignore:
--------------------------------------------------------------------------------
1 | /systracer
2 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/chaitin/systracer
2 |
3 | go 1.16
4 |
5 | require (
6 | github.com/aegistudio/shaft v0.0.0-20221023182702-de3982a0a277
7 | github.com/pkg/errors v0.9.1
8 | github.com/spf13/cobra v1.6.0
9 | github.com/stretchr/testify v1.8.0
10 | go.uber.org/zap v1.23.0
11 | golang.org/x/sync v0.1.0
12 | golang.org/x/sys v0.2.0
13 | )
14 |
--------------------------------------------------------------------------------
/inode/trace_386.go:
--------------------------------------------------------------------------------
1 | package inode
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySecurityInodePin_V2_6_24 struct {
8 | systracer.ProbeEvent
9 | Inode uint64 `tracing:"%ax"`
10 | Name string `tracing:"%dx,Name ~ \"systracer.inode_pin.*\""`
11 | }
12 |
13 | type entrySecurityInodePin_V5_12 struct {
14 | systracer.ProbeEvent
15 | Inode uint64 `tracing:"%dx"`
16 | Name string `tracing:"%cx,Name ~ \"systracer.inode_pin.*\""`
17 | }
18 |
--------------------------------------------------------------------------------
/inode/trace_amd64.go:
--------------------------------------------------------------------------------
1 | package inode
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySecurityInodePin_V2_6_24 struct {
8 | systracer.ProbeEvent
9 | Inode uint64 `tracing:"%di"`
10 | Name string `tracing:"%si,Name ~ \"systracer.inode_pin.*\""`
11 | }
12 |
13 | type entrySecurityInodePin_V5_12 struct {
14 | systracer.ProbeEvent
15 | Inode uint64 `tracing:"%si"`
16 | Name string `tracing:"%dx,Name ~ \"systracer.inode_pin.*\""`
17 | }
18 |
--------------------------------------------------------------------------------
/inode/trace_arm64.go:
--------------------------------------------------------------------------------
1 | package inode
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySecurityInodePin_V2_6_24 struct {
8 | systracer.ProbeEvent
9 | Inode uint64 `tracing:"%x0"`
10 | Name string `tracing:"%x1,Name ~ \"systracer.inode_pin.*\""`
11 | }
12 |
13 | type entrySecurityInodePin_V5_12 struct {
14 | systracer.ProbeEvent
15 | Inode uint64 `tracing:"%x1"`
16 | Name string `tracing:"%x2,Name ~ \"systracer.inode_pin.*\""`
17 | }
18 |
--------------------------------------------------------------------------------
/connect/trace_linux_amd64.go:
--------------------------------------------------------------------------------
1 | package connect
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySyscallConnectInet4 struct {
8 | systracer.ProbeEvent
9 | FD int64 `tracing:"%di"`
10 | Family uint16 `tracing:"+0(%si),Family == 2"`
11 | Port uint16 `tracing:"+2(%si),,bigendian"`
12 | Address uint32 `tracing:"+4(%si),,bigendian"`
13 | Len uint64 `tracing:"%dx,Len >= 16"`
14 | }
15 |
16 | type entrySyscallConnectInet6 struct {
17 | systracer.ProbeEvent
18 | FD int64 `tracing:"%di"`
19 | Family uint16 `tracing:"+0(%si),Family == 10"`
20 | Port uint16 `tracing:"+2(%si),,bigendian"`
21 | FlowInfo uint32 `tracing:"+4(%si)"`
22 | Address0 uint32 `tracing:"+8(%si),,bigendian"`
23 | Address1 uint32 `tracing:"+12(%si),,bigendian"`
24 | Address2 uint32 `tracing:"+16(%si),,bigendian"`
25 | Address3 uint32 `tracing:"+20(%si),,bigendian"`
26 | Scope uint32 `tracing:"+24(%si)"`
27 | Len uint64 `tracing:"%dx,Len >= 28"`
28 | }
29 |
30 | type exitSyscallConnect struct {
31 | systracer.ReturnEvent
32 | Errno int32 `tracing:"%ax"`
33 | }
34 |
35 | type entryInetProtocolConnect struct {
36 | systracer.ProbeEvent
37 |
38 | // (struct socket*)->type
39 | Type uint16 `tracing:"+4(%di)"`
40 | }
41 |
--------------------------------------------------------------------------------
/connect/trace_linux_arm64.go:
--------------------------------------------------------------------------------
1 | package connect
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySyscallConnectInet4 struct {
8 | systracer.ProbeEvent
9 | FD int64 `tracing:"%x0"`
10 | Family uint16 `tracing:"+0(%x1),Family == 2"`
11 | Port uint16 `tracing:"+2(%x1),,bigendian"`
12 | Address uint32 `tracing:"+4(%x1),,bigendian"`
13 | Len uint64 `tracing:"%x2,Len >= 16"`
14 | }
15 |
16 | type entrySyscallConnectInet6 struct {
17 | systracer.ProbeEvent
18 | FD int64 `tracing:"%x0"`
19 | Family uint16 `tracing:"+0(%x1),Family == 10"`
20 | Port uint16 `tracing:"+2(%x1),,bigendian"`
21 | FlowInfo uint32 `tracing:"+4(%x1)"`
22 | Address0 uint32 `tracing:"+8(%x1),,bigendian"`
23 | Address1 uint32 `tracing:"+12(%x1),,bigendian"`
24 | Address2 uint32 `tracing:"+16(%x1),,bigendian"`
25 | Address3 uint32 `tracing:"+20(%x1),,bigendian"`
26 | Scope uint32 `tracing:"+24(%x1)"`
27 | Len uint64 `tracing:"%x2,Len >= 28"`
28 | }
29 |
30 | type exitSyscallConnect struct {
31 | systracer.ReturnEvent
32 | Errno int32 `tracing:"%x0"`
33 | }
34 |
35 | type entryInetProtocolConnect struct {
36 | systracer.ProbeEvent
37 |
38 | // (struct socket*)->type
39 | Type uint16 `tracing:"+4(%x0)"`
40 | }
41 |
--------------------------------------------------------------------------------
/connect/trace_linux_386.go:
--------------------------------------------------------------------------------
1 | package connect
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySyscallConnectInet4 struct {
8 | systracer.ProbeEvent
9 | FD int32 `tracing:"+4(%sp)"`
10 | Family uint16 `tracing:"+0(+8(%sp)),Family == 2"`
11 | Port uint16 `tracing:"+2(+8(%sp)),,bigendian"`
12 | Address uint32 `tracing:"+4(+8(%sp)),,bigendian"`
13 | Len uint32 `tracing:"+12(%sp),Len >= 16"`
14 | }
15 |
16 | type entrySyscallConnectInet6 struct {
17 | systracer.ProbeEvent
18 | FD int32 `tracing:"+4(%sp)"`
19 | Family uint16 `tracing:"+0(+8(%sp)),Family == 10"`
20 | Port uint16 `tracing:"+2(+8(%sp)),,bigendian"`
21 | FlowInfo uint32 `tracing:"+4(+8(%sp))"`
22 | Address0 uint32 `tracing:"+8(+8(%sp)),,bigendian"`
23 | Address1 uint32 `tracing:"+12(+8(%sp)),,bigendian"`
24 | Address2 uint32 `tracing:"+16(+8(%sp)),,bigendian"`
25 | Address3 uint32 `tracing:"+20(+8(%sp)),,bigendian"`
26 | Scope uint32 `tracing:"+24(+8(%sp))"`
27 | Len uint64 `tracing:"+12(%sp),Len >= 28"`
28 | }
29 |
30 | type exitSyscallConnect struct {
31 | systracer.ReturnEvent
32 | Errno int32 `tracing:"%ax"`
33 | }
34 |
35 | type entryInetProtocolConnect struct {
36 | systracer.ProbeEvent
37 |
38 | // (struct socket*)->type
39 | Type uint16 `tracing:"+4(+0(%sp))"`
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/kallsyms/kallsyms_test.go:
--------------------------------------------------------------------------------
1 | package kallsyms
2 |
3 | import (
4 | "bytes"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func Test(t *testing.T) {
11 | assert := assert.New(t)
12 | symtabs := Parse(bytes.Trim([]byte(`
13 | ffffffff90d38450 t do_open
14 | ffffffffc01920e0 r __func__.24 [video]
15 | ffffffff91ff03ae d .LC1
16 | ffffffffc027b1b0 r .LC1 [drm]
17 | ffffffffc0275fa8 r .LC1 [drm]
18 | ffffffffc0ba88a0 t do_open [nfs]
19 | `), "\n"), nil)
20 |
21 | coreSymtab := symtabs[""]
22 | assert.NotNil(coreSymtab)
23 | assert.Equal(uint64(0xffffffff91ff03ae), coreSymtab.Lookup(".LC1", "Dd"))
24 | assert.Equal(uint64(0), coreSymtab.Lookup(".LC1", "Tt"))
25 | assert.Equal(uint64(0xffffffff90d38450), coreSymtab.Lookup("do_open", "Tt"))
26 |
27 | nfsSymtab := symtabs["nfs"]
28 | assert.NotNil(nfsSymtab)
29 | assert.Equal(uint64(0xffffffffc0ba88a0), nfsSymtab.Lookup("do_open", "Tt"))
30 |
31 | drmSymtab := symtabs["drm"]
32 | assert.NotNil(drmSymtab)
33 | assert.Equal(uint64(0), drmSymtab.Lookup(".LC1", "Dd"))
34 | assert.Equal(uint64(0xffffffffc0275fa8), drmSymtab.Lookup(".LC1", "Rr"))
35 |
36 | videoSymtab := symtabs["video"]
37 | assert.NotNil(videoSymtab)
38 | assert.Equal(uint64(0), videoSymtab.Lookup(".LC1", "Dd"))
39 | assert.Equal(uint64(0xffffffffc01920e0), videoSymtab.Lookup("__func__.24", "Rr"))
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/alloc/alloc.go:
--------------------------------------------------------------------------------
1 | // package alloc is the id allocator that circulates the
2 | // next id as the id allocator.
3 | //
4 | // When the allocation space is large, the next id behind
5 | // the current id is very likely to be unused in most cases,
6 | // which involves only a single operation while allocating.
7 | //
8 | // The upper limit of this allocator is O(N), where N is the
9 | // current number of elements in use.
10 | package alloc
11 |
12 | // Alloc allocates the ID by circularly seeking for the
13 | // next available identity.
14 | //
15 | // Please notice that since the index 0 has been reserved
16 | // for invalid index, it will be returned whenever the
17 | // allocation has failed.
18 | func Alloc(
19 | id, upperLimit uint64, occupied func(uint64) bool,
20 | ) uint64 {
21 | if upperLimit == 0 {
22 | upperLimit = ^uint64(0)
23 | }
24 |
25 | // Fast path: attempt to return the value next to this
26 | // value as the identity.
27 | //
28 | // The fast path is asserted to happen in most cases,
29 | // since it is nearly impossible to use up all
30 | // identities as long as the limit is great enough.
31 | newID := id + 1
32 | if newID != 0 && !occupied(id) {
33 | return newID
34 | }
35 |
36 | // Slow path: attempt to seek for the last
37 | // available identities.
38 | for newID := id + 2; newID <= upperLimit; newID++ {
39 | if !occupied(newID) {
40 | return newID
41 | }
42 | }
43 | for newID := uint64(1); newID < id; newID++ {
44 | if !occupied(newID) {
45 | return newID
46 | }
47 | }
48 | return 0
49 | }
50 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on: [push, pull_request]
3 | jobs:
4 | build:
5 | strategy:
6 | matrix:
7 | include:
8 | - GOARCH: "386"
9 | name: "systracer-386"
10 | - GOARCH: "amd64"
11 | name: "systracer"
12 | - GOARCH: "arm64"
13 | name: "systracer-aarch64"
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v3
17 | - uses: actions/setup-go@v3
18 | with:
19 | go-version: '1.18'
20 | - run: |
21 | mkdir -p build
22 | GOOS=linux GOARCH=${{ matrix.GOARCH }} CGO_ENABLED=0 go build -ldflags '-extldflags="-static"' -tags osuergo,netgo -o build/${{ matrix.name }} ./cmd/systracer/
23 | md5sum build/${{ matrix.name }} | tee build/${{ matrix.name }}.md5.txt
24 | sha256sum build/${{ matrix.name }} | tee build/${{matrix.name }}.sha256.txt
25 | - uses: actions/upload-artifact@v3
26 | with:
27 | name: systracer
28 | path: build/*/**
29 | release:
30 | needs: build
31 | runs-on: ubuntu-latest
32 | steps:
33 | - uses: actions/checkout@v3
34 | - uses: actions/download-artifact@v3
35 | with:
36 | name: systracer
37 | path: build/
38 | - run: |
39 | ls -la build
40 | cat build/*.md5.txt
41 | cat build/*.sha256.txt
42 | - uses: softprops/action-gh-release@v1
43 | if: startsWith(github.ref, 'refs/tags/')
44 | with:
45 | body: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
46 | draft: true
47 | files: build/*
48 |
--------------------------------------------------------------------------------
/cmd/systracer/listen.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "fmt"
6 |
7 | "github.com/aegistudio/shaft"
8 | "go.uber.org/zap"
9 | "golang.org/x/sync/errgroup"
10 | "golang.org/x/sys/unix"
11 |
12 | "github.com/chaitin/systracer/listen"
13 | )
14 |
15 | var (
16 | listenEnabled bool
17 | )
18 |
19 | func initListenModule() shaft.Option {
20 | if !(allEnabled || listenEnabled) {
21 | return shaft.Module()
22 | }
23 | return shaft.Module(
24 | listen.Module,
25 | shaft.Provide(func(
26 | ctx context.Context, group *errgroup.Group,
27 | logger *zap.SugaredLogger, ch <-chan listen.Event,
28 | ) ([]moduleBarrier, error) {
29 | group.Go(func() error {
30 | for {
31 | var event listen.Event
32 | select {
33 | case <-ctx.Done():
34 | return nil
35 | case event = <-ch:
36 | }
37 | eventContext := fmt.Sprintf("%s %d",
38 | event.Timestamp.Format("2006-01-02T15:04:05.999999999"), event.PID)
39 | eventAddr := ""
40 | switch event.Family {
41 | case unix.AF_INET:
42 | eventAddr = fmt.Sprintf("%s:%d",
43 | event.Addr, event.Port)
44 | case unix.AF_INET6:
45 | eventAddr = fmt.Sprintf("[%s]:%d",
46 | event.Addr, event.Port)
47 | }
48 | switch event.Op {
49 | case listen.OpListenStart:
50 | logger.Infof(
51 | "%s - listen_tcp(%d, %q, %d)",
52 | eventContext, *event.FD,
53 | eventAddr, *event.Backlog)
54 | case listen.OpListenEnd:
55 | logger.Infof(
56 | "%s - unlisten_tcp(%q)",
57 | eventContext, eventAddr)
58 | }
59 | }
60 | })
61 | return nil, nil
62 | }),
63 | )
64 | }
65 |
66 | func init() {
67 | moduleInits = append(moduleInits, initListenModule)
68 | rootCmd.PersistentFlags().BoolVar(
69 | &listenEnabled, "listen", listenEnabled,
70 | "collect listen event for logging")
71 | }
72 |
--------------------------------------------------------------------------------
/compile_test.go:
--------------------------------------------------------------------------------
1 | package systracer
2 |
3 | import (
4 | "bytes"
5 | "reflect"
6 | "strings"
7 | "testing"
8 | "unsafe"
9 |
10 | "github.com/stretchr/testify/assert"
11 | )
12 |
13 | // OpenEvent is a demo event dedicated for being
14 | // used as example of parsing.
15 | type OpenEvent struct {
16 | ProbeEvent
17 | Dir int64 `tracing:"%di"`
18 | Filename string `tracing:"%si"`
19 | Flags uint64 `tracing:"%dx"`
20 | Mode uint64 `tracing:"%cx"`
21 | }
22 |
23 | // TestOpenEvent is the event for performing tests
24 | // strongly associated with the open event.
25 | func TestOpenEvent(t *testing.T) {
26 | var err error
27 | assert := assert.New(t)
28 |
29 | // Attempt to compile and validate OpenEvent.
30 | desc, err := compileTraceEvent(
31 | reflect.TypeOf(OpenEvent{}))
32 | assert.NoError(err)
33 | if err != nil {
34 | return
35 | }
36 | assert.Equal(4, len(desc.fields), "number of fields")
37 | assert.Equal("", desc.initialCondition, "initial condition")
38 |
39 | // Evaluate and test the format of open event.
40 | expectedFormat := strings.Join([]string{
41 | "Dir=%di:s64", "FilenameStart=%si:u64",
42 | "Filename=+0(%si):string", "FilenameEnd=%si:u64",
43 | "Flags=%dx:u64", "Mode=%cx:u64",
44 | }, " ")
45 | actualFormat := desc.format()
46 | assert.Equal(expectedFormat, actualFormat, "format")
47 |
48 | // Attempt to parse and fill the event struct.
49 | var testEvent1 OpenEvent
50 | testLog1 := bytes.Trim([]byte(`
51 | Dir=-100 FilenameStart=-12345678 Filename="/proc/self/status" FilenameEnd=-12345678 Flags=0x8000 Mode=0x0
52 | `), "\n")
53 | len1, err := desc.fill(
54 | uintptr(unsafe.Pointer(&testEvent1)), testLog1)
55 | assert.NoError(err)
56 | if err != nil {
57 | return
58 | }
59 | assert.Equal(len(testLog1), len1, "log read")
60 | assert.Equal(int64(-100), testEvent1.Dir, "dirfd")
61 | assert.Equal("/proc/self/status", testEvent1.Filename, "filename")
62 | assert.Equal(uint64(0x8000), testEvent1.Flags, "flags")
63 | assert.Equal(uint64(0), testEvent1.Mode, "mode")
64 | }
65 |
--------------------------------------------------------------------------------
/pkg/kallsyms/kallsyms.go:
--------------------------------------------------------------------------------
1 | // Package kallsyms loads and parses the kernel symbol table
2 | // so that other modules can attach kprobe to functions.
3 | package kallsyms
4 |
5 | import (
6 | "bytes"
7 | "regexp"
8 | "strconv"
9 | )
10 |
11 | var regexpSymbolItem = regexp.MustCompilePOSIX(
12 | `^([0-9a-f]+) ([A-Za-z0-9]) ([^ \t]+)(\t\[([^]]+)\])?$`)
13 |
14 | func init() {
15 | regexpSymbolItem.Longest()
16 | }
17 |
18 | type symbol struct {
19 | addr uint64
20 | typ byte
21 | }
22 |
23 | // SymbolTable is the parsed symbol table from kernel.
24 | type SymbolTable struct {
25 | table map[string][]symbol
26 | }
27 |
28 | // Lookup looks up the symbol in a module.
29 | func (t *SymbolTable) Lookup(name, types string) uint64 {
30 | syms := t.table[name]
31 | for i := len(syms); i > 0; i-- {
32 | if bytes.IndexAny([]byte{syms[i-1].typ}, types) >= 0 {
33 | return syms[i-1].addr
34 | }
35 | }
36 | return 0
37 | }
38 |
39 | // Parse the kallsyms data and return the parsed symbol table.
40 | func Parse(
41 | kallsyms []byte, interestedModules map[string]struct{},
42 | ) map[string]*SymbolTable {
43 | result := make(map[string]*SymbolTable)
44 | for len(kallsyms) > 0 {
45 | index := bytes.Index(kallsyms, []byte("\n"))
46 | current := kallsyms
47 | if index < 0 {
48 | kallsyms = nil
49 | } else {
50 | current = kallsyms[0:index]
51 | kallsyms = kallsyms[index+1:]
52 | }
53 | // 0: the whole string
54 | // 1: symbol address
55 | // 2: symbol type
56 | // 3: symbol name
57 | // 4: string with module string
58 | // 5: module containing symbol
59 | matches := regexpSymbolItem.FindSubmatch(current)
60 | if len(matches) == 0 {
61 | continue
62 | }
63 | module := string(matches[5])
64 | if interestedModules != nil {
65 | if _, ok := interestedModules[module]; !ok {
66 | continue
67 | }
68 | }
69 | addr, _ := strconv.ParseUint(string(matches[1]), 16, 64)
70 | typ := matches[2][0]
71 | name := string(matches[3])
72 | table := result[module]
73 | if table == nil {
74 | table = &SymbolTable{
75 | table: make(map[string][]symbol),
76 | }
77 | result[module] = table
78 | }
79 | table.table[name] = append(table.table[name], symbol{
80 | addr: uint64(addr),
81 | typ: typ,
82 | })
83 | }
84 | return result
85 | }
86 |
--------------------------------------------------------------------------------
/listen/trace_linux_386.go:
--------------------------------------------------------------------------------
1 | package listen
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySyscallListen struct {
8 | systracer.ProbeEvent
9 | FD int64 `tracing:"%di"`
10 | Backlog int64 `tracing:"%si"`
11 | }
12 |
13 | type exitSyscallListen struct {
14 | systracer.ReturnEvent
15 | Errno int32 `tracing:"%ax"`
16 | }
17 |
18 | type StructSockListenInet4 struct {
19 | // (struct socket*)->sk
20 | Address uint32 `tracing:"+4({1}),,bigendian"`
21 | Port uint16 `tracing:"+14({1})"`
22 | Family uint16 `tracing:"+16({1}),{0}Family == 2"`
23 | }
24 |
25 | type entryProtocolListenInet4_V2_6_12 struct {
26 | systracer.ProbeEvent
27 | Sk StructSockListenInet4 `tracing:"+20(%ax)"`
28 | }
29 |
30 | type entryProtocolListenInet4_V5_3 struct {
31 | systracer.ProbeEvent
32 | Sk StructSockListenInet4 `tracing:"+16(%ax)"`
33 | }
34 |
35 | type StructSockListenInet6 struct {
36 | // (struct socket*)->sk
37 | Port uint16 `tracing:"+14({1})"`
38 | Family uint16 `tracing:"+16({1}),{0}Family == 10"`
39 | Address0 uint32 `tracing:"+56({1}),,bigendian"`
40 | Address1 uint32 `tracing:"+60({1}),,bigendian"`
41 | Address2 uint32 `tracing:"+64({1}),,bigendian"`
42 | Address3 uint32 `tracing:"+68({1}),,bigendian"`
43 | }
44 |
45 | type entryProtocolListenInet6_V2_6_12 struct {
46 | systracer.ProbeEvent
47 | Sk StructSockListenInet6 `tracing:"+20(%ax)"`
48 | }
49 |
50 | type entryProtocolListenInet6_V5_3 struct {
51 | systracer.ProbeEvent
52 | Sk StructSockListenInet6 `tracing:"+16(%ax)"`
53 | }
54 |
55 | type entryTCPCloseInet4 struct {
56 | systracer.ProbeEvent
57 |
58 | // (struct socket*)->sk
59 | // Sk uint64 `tracing:"%ax"`
60 |
61 | Address uint32 `tracing:"+4(%ax),,bigendian"`
62 | Port uint16 `tracing:"+14(%ax)"`
63 | Family uint16 `tracing:"+16(%ax),Family == 2"`
64 | State uint8 `tracing:"+18(%ax),State == 10"`
65 | }
66 |
67 | type entryTCPCloseInet6 struct {
68 | systracer.ProbeEvent
69 |
70 | // (struct socket*)->sk
71 | // Sk uint64 `tracing:"ax"`
72 |
73 | Port uint16 `tracing:"+14(%ax)"`
74 | Family uint16 `tracing:"+16(%ax),Family == 10"`
75 | State uint8 `tracing:"+18(%ax),State == 10"`
76 | Address0 uint32 `tracing:"+56(%ax),,bigendian"`
77 | Address1 uint32 `tracing:"+60(%ax),,bigendian"`
78 | Address2 uint32 `tracing:"+64(%ax),,bigendian"`
79 | Address3 uint32 `tracing:"+68(%ax),,bigendian"`
80 | }
81 |
--------------------------------------------------------------------------------
/listen/trace_linux_amd64.go:
--------------------------------------------------------------------------------
1 | package listen
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySyscallListen struct {
8 | systracer.ProbeEvent
9 | FD int64 `tracing:"%di"`
10 | Backlog int64 `tracing:"%si"`
11 | }
12 |
13 | type exitSyscallListen struct {
14 | systracer.ReturnEvent
15 | Errno int32 `tracing:"%ax"`
16 | }
17 |
18 | type StructSockListenInet4 struct {
19 | // (struct socket*)({1})->sk
20 | Address uint32 `tracing:"+4({1}),,bigendian"`
21 | Port uint16 `tracing:"+14({1})"`
22 | Family uint16 `tracing:"+16({1}),{0}Family == 2"`
23 | }
24 |
25 | type entryProtocolListenInet4_V2_6_12 struct {
26 | systracer.ProbeEvent
27 | Sk StructSockListenInet4 `tracing:"+32(%di)"`
28 | }
29 |
30 | type entryProtocolListenInet4_V5_3 struct {
31 | systracer.ProbeEvent
32 | Sk StructSockListenInet4 `tracing:"+24(%di)"`
33 | }
34 |
35 | type StructSockListenInet6 struct {
36 | // (struct socket*)({1})->sk
37 | Port uint16 `tracing:"+14({1})"`
38 | Family uint16 `tracing:"+16({1}),{0}Family == 10"`
39 | Address0 uint32 `tracing:"+72({1}),,bigendian"`
40 | Address1 uint32 `tracing:"+76({1}),,bigendian"`
41 | Address2 uint32 `tracing:"+80({1}),,bigendian"`
42 | Address3 uint32 `tracing:"+84({1}),,bigendian"`
43 | }
44 |
45 | type entryProtocolListenInet6_V2_6_12 struct {
46 | systracer.ProbeEvent
47 | Sk StructSockListenInet6 `tracing:"+32(%di)"`
48 | }
49 |
50 | type entryProtocolListenInet6_V5_3 struct {
51 | systracer.ProbeEvent
52 | Sk StructSockListenInet6 `tracing:"+24(%di)"`
53 | }
54 |
55 | type entryTCPCloseInet4 struct {
56 | systracer.ProbeEvent
57 |
58 | // (struct socket*)->sk
59 | // Sk uint64 `tracing:"di"`
60 |
61 | Address uint32 `tracing:"+4(%di),,bigendian"`
62 | Port uint16 `tracing:"+14(%di)"`
63 | Family uint16 `tracing:"+16(%di),Family == 2"`
64 | State uint8 `tracing:"+18(%di),State == 10"`
65 | }
66 |
67 | type entryTCPCloseInet6 struct {
68 | systracer.ProbeEvent
69 |
70 | // (struct socket*)->sk
71 | // Sk uint64 `tracing:"di"`
72 |
73 | Port uint16 `tracing:"+14(%di)"`
74 | Family uint16 `tracing:"+16(%di),Family == 10"`
75 | State uint8 `tracing:"+18(%di),State == 10"`
76 | Address0 uint32 `tracing:"+72(%di),,bigendian"`
77 | Address1 uint32 `tracing:"+76(%di),,bigendian"`
78 | Address2 uint32 `tracing:"+80(%di),,bigendian"`
79 | Address3 uint32 `tracing:"+84(%di),,bigendian"`
80 | }
81 |
--------------------------------------------------------------------------------
/listen/trace_linux_arm64.go:
--------------------------------------------------------------------------------
1 | package listen
2 |
3 | import (
4 | "github.com/chaitin/systracer"
5 | )
6 |
7 | type entrySyscallListen struct {
8 | systracer.ProbeEvent
9 | FD int64 `tracing:"%x0"`
10 | Backlog int64 `tracing:"%x1"`
11 | }
12 |
13 | type exitSyscallListen struct {
14 | systracer.ReturnEvent
15 | Errno int32 `tracing:"%x0"`
16 | }
17 |
18 | type StructSockListenInet4 struct {
19 | // (struct socket*)({1})->sk
20 | Address uint32 `tracing:"+4({1}),,bigendian"`
21 | Port uint16 `tracing:"+14({1})"`
22 | Family uint16 `tracing:"+16({1}),{0}Family == 2"`
23 | }
24 |
25 | type entryProtocolListenInet4_V2_6_12 struct {
26 | systracer.ProbeEvent
27 | Sk StructSockListenInet4 `tracing:"+32(%x0)"`
28 | }
29 |
30 | type entryProtocolListenInet4_V5_3 struct {
31 | systracer.ProbeEvent
32 | Sk StructSockListenInet4 `tracing:"+24(%x0)"`
33 | }
34 |
35 | type StructSockListenInet6 struct {
36 | // (struct socket*)({1})->sk
37 | Port uint16 `tracing:"+14({1})"`
38 | Family uint16 `tracing:"+16({1}),{0}Family == 10"`
39 | Address0 uint32 `tracing:"+72({1}),,bigendian"`
40 | Address1 uint32 `tracing:"+76({1}),,bigendian"`
41 | Address2 uint32 `tracing:"+80({1}),,bigendian"`
42 | Address3 uint32 `tracing:"+84({1}),,bigendian"`
43 | }
44 |
45 | type entryProtocolListenInet6_V2_6_12 struct {
46 | systracer.ProbeEvent
47 | Sk StructSockListenInet6 `tracing:"+32(%x0)"`
48 | }
49 |
50 | type entryProtocolListenInet6_V5_3 struct {
51 | systracer.ProbeEvent
52 | Sk StructSockListenInet6 `tracing:"+24(%x0)"`
53 | }
54 |
55 | type entryTCPCloseInet4 struct {
56 | systracer.ProbeEvent
57 |
58 | // (struct socket*)->sk
59 | // Sk uint64 `tracing:"x0"`
60 |
61 | Address uint32 `tracing:"+4(%x0),,bigendian"`
62 | Port uint16 `tracing:"+14(%x0)"`
63 | Family uint16 `tracing:"+16(%x0),Family == 2"`
64 | State uint8 `tracing:"+18(%x0),State == 10"`
65 | }
66 |
67 | type entryTCPCloseInet6 struct {
68 | systracer.ProbeEvent
69 |
70 | // (struct socket*)->sk
71 | // Sk uint64 `tracing:"x0"`
72 |
73 | Port uint16 `tracing:"+14(%x0)"`
74 | Family uint16 `tracing:"+16(%x0),Family == 10"`
75 | State uint8 `tracing:"+18(%x0),State == 10"`
76 | Address0 uint32 `tracing:"+72(%x0),,bigendian"`
77 | Address1 uint32 `tracing:"+76(%x0),,bigendian"`
78 | Address2 uint32 `tracing:"+80(%x0),,bigendian"`
79 | Address3 uint32 `tracing:"+84(%x0),,bigendian"`
80 | }
81 |
--------------------------------------------------------------------------------
/cmd/systracer/connect.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "syscall"
7 |
8 | "github.com/aegistudio/shaft"
9 | "go.uber.org/zap"
10 | "golang.org/x/sync/errgroup"
11 | "golang.org/x/sys/unix"
12 |
13 | "github.com/chaitin/systracer/connect"
14 | )
15 |
16 | var (
17 | connectEnabled bool
18 | )
19 |
20 | func initConnectModule() shaft.Option {
21 | if !(allEnabled || connectEnabled) {
22 | return shaft.Module()
23 | }
24 | return shaft.Module(
25 | connect.Module,
26 | shaft.Provide(func(
27 | ctx context.Context, group *errgroup.Group,
28 | logger *zap.SugaredLogger, ch <-chan connect.Event,
29 | ) ([]moduleBarrier, error) {
30 | group.Go(func() error {
31 | for {
32 | var event connect.Event
33 | select {
34 | case <-ctx.Done():
35 | return nil
36 | case event = <-ch:
37 | }
38 | eventContext := fmt.Sprintf("%s %d",
39 | event.Timestamp.Format("2006-01-02T15:04:05.999999999"), event.PID)
40 | eventAddr := ""
41 | switch event.Family {
42 | case unix.AF_INET:
43 | eventAddr = fmt.Sprintf("%s:%d",
44 | event.Addr, event.Port)
45 | case unix.AF_INET6:
46 | eventAddr = fmt.Sprintf("[%s]:%d",
47 | event.Addr, event.Port)
48 | }
49 | eventType := fmt.Sprintf("%d", event.Type)
50 | switch event.Type {
51 | case unix.SOCK_STREAM:
52 | eventType = "tcp"
53 | case unix.SOCK_DGRAM:
54 | eventType = "udp"
55 | case unix.SOCK_RAW, unix.SOCK_PACKET:
56 | eventType = "raw"
57 | }
58 | switch event.Op {
59 | case connect.OpConnectStart:
60 | logger.Infof(
61 | "%s - connect_%s(%d, %q)",
62 | eventContext, eventType,
63 | event.FD, eventAddr)
64 | case connect.OpConnectEnd:
65 | eventResult := "0"
66 | if event.Errno != nil {
67 | if errno := *event.Errno; errno != 0 {
68 | eventResult = fmt.Sprintf("%d (%s)",
69 | errno, syscall.Errno(-errno))
70 | }
71 | }
72 | logger.Infof(
73 | "%s - connect_%s(%d, %q) = %s",
74 | eventContext, eventType,
75 | event.FD, eventAddr, eventResult)
76 | }
77 | }
78 | })
79 | return nil, nil
80 | }),
81 | )
82 | }
83 |
84 | func init() {
85 | moduleInits = append(moduleInits, initConnectModule)
86 | rootCmd.PersistentFlags().BoolVar(
87 | &connectEnabled, "connect", connectEnabled,
88 | "collect connect event for logging")
89 | }
90 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SysTracer: Linux 系统活动跟踪器
2 |
3 | 
4 | 
5 | 
6 |
7 | SysTracer 是长亭所开发的,用于跟踪 Linux 关键活动的监控程序。
8 |
9 | 得益于 Linux Tracing 技术和内核分析手段,SysTracer 的监控和跟踪是事件驱动且实时的。
10 |
11 | 
12 |
13 |
使用 SysTracer 监控 Docker Pull+Run 时的网络和文件活动
14 |
15 | ## 功能和使用方法
16 |
17 | 各 CPU 架构下的 SysTracer 的可执行文件(静态编译)可以在 [Releases](https://github.com/chaitin/systracer/releases) 页面下载,请注意验证文件完整性。
18 |
19 | 执行 SysTracer 需要 root 权限,但 SysTracer 不会在系统上创建持久化文件或修改系统配置。
20 |
21 | ### 网络活动监控
22 |
23 | 当前 SysTracer 支持基于 IPv4 和 IPv6 的网络连接和网络监听活动的监控。
24 |
25 | 网络连接监控基于进程通过 `connect` 系统调用发起远程连接的跟踪,记录了进程用于发起远程连接的主动套接字 FD,远程连接的协议(TCP 或 UDP 等)以及远程服务器的地址。
26 |
27 | 网络监听监控基于进程通过 `listen` 系统调用发起网络监听的跟踪,记录了进程用于监听的被动套接字 FD,监听的网络地址以及 Backlog 大小。
28 |
29 | 通过 `./systracer --connect` 或 `./systracer --all` 可以启用对网络连接的监控,通过 `./systracer --listen` 或 `./systracer --all` 可以启用对网络监听的监控。
30 |
31 | ### 文件操作递归监控
32 |
33 | 当前 SysTracer 支持对文件操作进行递归监控,即用户指定监控目录和所关心的事件集合,SysTracer 输出在监控目录下发生的文件事件。
34 |
35 | 所谓递归监控是指,不仅监控指定的监控目录下的文件事件,还监控其子目录及所有后代目录的下发生的文件事件。
36 |
37 | 当前支持的文件操作包括:(文件、目录)创建、(文件、目录)删除、移动或重命名、属性(权限、所有用户、所有组)修改、创建符号连接、创建硬连接。
38 |
39 | 如果同时指定了具有父子关系的监控目录(如 `/usr` 和 `/usr/lib`),则子目录的事件集合将覆盖父级目录的,父级目录下的其他目录不受影响。
40 |
41 | 通过 `./systracer --watch "="` 可以添加一个监控目录,如 `./systracer --watch "all=/etc"`。
42 |
43 | 参数中的 `events` 指定了监控事件的列表,可以为以下事件的集合,事件之间通过 `,` 分隔:
44 |
45 | - `all`:监控所有支持的文件事件。
46 | - `create`:监控普通文件的创建,输出创建的文件路径和权限。
47 | - `mkdir`:监控目录的创建,输出创建的目录路径和权限。
48 | - `mknod`:监控特殊文件(管道、套接字、字符设备、块设备等)的删除,输出创建的文件路径、权限和设备 ID。
49 | - `delete`:监控文件的删除,输出删除的文件路径。
50 | - `rmdir`:监控目录的删除,输出删除的目录路径。
51 | - `rename`:监控文件或目录的移动或重命名,输出重命名前后的文件路径。
52 | - `attrib`:监控文件或目录属性(权限、所有用户、所有组)的修改,输出修改的文件路径和属性。
53 | - `symlink`:监控符号连接的创建,输出软连接的内容和软连接的路径。
54 | - `link`:监控硬连接的创建,输出链接的源路径和目标路径。
55 |
56 | 譬如 `./systracer --watch "all=/usr" --watch "create,mkdir=/usr/lib"` 就同时添加了对 `/usr` 目录下所有支持的文件事件的递归监控,以及 `/usr/lib` 目录下文件和目录创建事件的递归监控。
57 |
58 | 值得注意的是,SysTracer 只会上报已经成功完成的文件操作事件。
59 |
60 | ## 工作原理
61 |
62 | 
63 |
64 | 我们都知道,Linux 内核为用户进程管理了各种可访问的资源,用户进程需要发起系统调用来使用这些资源。不同资源的操作和处理由 Linux 内核中对应的子系统完成。
65 |
66 | SysTracer 通过向 Linux 中不同子系统插入 KProbe,当子系统的代码执行到 KProbe 处,便会采集我们设定的信息,并将数据写入 Linux Tracing 的事件环形缓冲中。写入事件循环缓冲的过程是非阻塞且无锁的。
67 |
68 | SysTracer 运行 Linux Tracing 事件循环缓冲的消费循环,拉取其中记录的数据并进行处理,即可产生各种类型的事件。
69 |
70 | 内核的各种原生监控机制(如 netlink connector、inotify、fanotify 等),都是先由各种上层监控程序编写用户态代码和内核态代码,对应的内核态代码作为一个半通用机制合并到 Linux 内核中,供该监控程序本身即其他可能的监控程序使用。
71 |
72 | 使用内核原生监控机制的缺点在于,过于为原始需求的监控程序“量身定做”了,以致于要获取其采集的信息以外的任何事件信息都困难重重。而使用 Linux Tracing 则可以根据应用程序自己的采集需求定制,不受当前内核监控机制的实现状态所影响。
73 |
74 | ## 许可协议
75 |
76 | 本项目基于 [Apache-2.0](LICENSE) 协议进行开源和许可。
77 |
--------------------------------------------------------------------------------
/cmd/systracer/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "os"
6 | "os/signal"
7 |
8 | "github.com/aegistudio/shaft"
9 | "github.com/aegistudio/shaft/serpent"
10 | "github.com/spf13/cobra"
11 | "go.uber.org/zap"
12 | "go.uber.org/zap/zapcore"
13 | "golang.org/x/sync/errgroup"
14 |
15 | "github.com/chaitin/systracer"
16 | "github.com/chaitin/systracer/inode"
17 | )
18 |
19 | type moduleBarrier struct{}
20 |
21 | var (
22 | moduleInits []func() shaft.Option
23 | allEnabled bool
24 | logLevel = "info"
25 | )
26 |
27 | var rootCmd = &cobra.Command{
28 | Use: "systracer",
29 | Long: "Linux system activity tracer",
30 | PreRunE: func(cmd *cobra.Command, _ []string) error {
31 | for _, moduleInit := range moduleInits {
32 | if err := serpent.AddOption(
33 | cmd, moduleInit()); err != nil {
34 | return err
35 | }
36 | }
37 | return nil
38 | },
39 | RunE: serpent.Executor(shaft.Module(
40 | shaft.Stack(func(
41 | next func(*errgroup.Group, context.Context) error,
42 | rootCtx serpent.CommandContext,
43 | ) error {
44 | cancelCtx, cancel := context.WithCancel(rootCtx)
45 | group, ctx := errgroup.WithContext(cancelCtx)
46 | defer func() { _ = group.Wait() }()
47 | defer cancel()
48 | return next(group, ctx)
49 | }),
50 | shaft.Invoke(func(
51 | group *errgroup.Group, _ []moduleBarrier,
52 | logger *zap.SugaredLogger,
53 | ) error {
54 | logger.Info("initialization complete")
55 | return group.Wait()
56 | }),
57 | shaft.Provide(func(
58 | ctx context.Context, group *errgroup.Group,
59 | options []systracer.Option,
60 | ) (systracer.Manager, error) {
61 | return systracer.New(ctx, group, options...)
62 | }),
63 | shaft.Stack(func(
64 | next func(*zap.Logger, *zap.SugaredLogger) error,
65 | ) error {
66 | level, err := zapcore.ParseLevel(logLevel)
67 | if err != nil {
68 | return err
69 | }
70 | consoleLevel := zap.NewAtomicLevelAt(level)
71 | consoleConfig := zap.NewDevelopmentEncoderConfig()
72 | consoleConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder
73 | consoleErrors := zapcore.Lock(os.Stderr)
74 | consoleEncoder := zapcore.NewConsoleEncoder(consoleConfig)
75 | loggerCore := zapcore.NewCore(
76 | consoleEncoder, consoleErrors, consoleLevel)
77 | logger := zap.New(loggerCore)
78 | sugaredLogger := logger.Sugar()
79 | defer logger.Sync()
80 | return next(logger, sugaredLogger)
81 | }),
82 | inode.Module,
83 | )).RunE,
84 | }
85 |
86 | func init() {
87 | rootCmd.PersistentFlags().BoolVar(
88 | &allEnabled, "all", allEnabled,
89 | "capture all supported events")
90 | rootCmd.PersistentFlags().StringVar(
91 | &logLevel, "log-level", logLevel,
92 | "setup the log level of the logger")
93 | }
94 |
95 | func main() {
96 | ctx, cancel := signal.NotifyContext(
97 | context.Background(), os.Interrupt)
98 | defer cancel()
99 | if err := serpent.ExecuteContext(ctx, rootCmd); err != nil {
100 | os.Exit(1)
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/pkg/kversion/kversion.go:
--------------------------------------------------------------------------------
1 | // Package kversion fetches the linux kernel version,
2 | // and parse them with semantic versioning.
3 | package kversion
4 |
5 | import (
6 | "fmt"
7 | "io/ioutil"
8 | "regexp"
9 | "strconv"
10 |
11 | "github.com/pkg/errors"
12 | )
13 |
14 | // Version stores the kernel version using semantic
15 | // versioning, but converted to a 64bit numeric value.
16 | type Version uint64
17 |
18 | // Predefined component for composing into version.
19 | const (
20 | offsetPreRelease = 0
21 | bitsPreRelease = 32
22 | offsetPatch = bitsPreRelease
23 | bitsPatch = 16
24 | offsetMinor = offsetPatch + bitsPatch
25 | bitsMinor = 8
26 | offsetMajor = offsetMinor + bitsMinor
27 | bitsMajor = 64 - offsetMajor
28 | )
29 |
30 | // Major returns the value of the major version.
31 | func (v Version) Major() int64 {
32 | return (int64(v) >> offsetMajor) & ((1 << bitsMajor) - 1)
33 | }
34 |
35 | // Minor returns the value of the minor version.
36 | func (v Version) Minor() int64 {
37 | return (int64(v) >> offsetMinor) & ((1 << bitsMinor) - 1)
38 | }
39 |
40 | // Patch returns the value of the patch version.
41 | func (v Version) Patch() int64 {
42 | return (int64(v) >> offsetPatch) & ((1 << bitsPatch) - 1)
43 | }
44 |
45 | // PreRelease returns the value of the pre-release version.
46 | func (v Version) PreRelease() int64 {
47 | return (int64(v) >> offsetPreRelease) & ((1 << bitsPreRelease) - 1)
48 | }
49 |
50 | // String formats the kernel version as triplets.
51 | func (v Version) String() string {
52 | return fmt.Sprintf("%d.%d.%d-%d",
53 | v.Major(), v.Minor(), v.Patch(), v.PreRelease())
54 | }
55 |
56 | // regexpKv is the regular expression for parsing
57 | // the kernel version string.
58 | var regexKv = regexp.MustCompile(
59 | `([0-9]+)\.([0-9]+)(\.[0-9]+)?(-[0-9]+)?`)
60 |
61 | // Parse the specified kernel version.
62 | func Parse(version string) (Version, error) {
63 | var err error
64 | kv := []byte(version)
65 |
66 | // Parse the provided kernel version.
67 | m := regexKv.FindSubmatchIndex(kv)
68 | if len(m) < 10 || m[0] != 0 {
69 | return Version(0), errors.Wrapf(
70 | err, "malformed %q", version)
71 | }
72 |
73 | // Parse the major, minor and patch version.
74 | majorComponent := string(kv[m[2]:m[3]])
75 | major, err := strconv.ParseUint(majorComponent, 10, bitsMajor)
76 | if err != nil {
77 | return Version(0), errors.Wrapf(
78 | err, "invalid major %q", majorComponent)
79 | }
80 | minorComponent := string(kv[m[4]:m[5]])
81 | minor, err := strconv.ParseUint(minorComponent, 10, bitsMinor)
82 | if err != nil {
83 | return Version(0), errors.Wrapf(
84 | err, "invalid minor %q", minorComponent)
85 | }
86 |
87 | // Check the optional kernel version.
88 | var patch uint64
89 | if m[6] >= 0 && m[7] >= 0 {
90 | patchComponent := string(kv[m[6]+1 : m[7]])
91 | patch, err = strconv.ParseUint(patchComponent, 10, bitsPatch)
92 | if err != nil {
93 | return Version(0), errors.Wrapf(
94 | err, "invalid patch %q", patchComponent)
95 | }
96 | }
97 | var preRelease uint64
98 | if m[8] >= 0 && m[9] >= 0 {
99 | preReleaseComponent := string(kv[m[8]+1 : m[9]])
100 | preRelease, err = strconv.ParseUint(
101 | preReleaseComponent, 10, bitsPreRelease)
102 | if err != nil {
103 | return Version(0), errors.Wrapf(
104 | err, "invalid pre-release %q", preReleaseComponent)
105 | }
106 | }
107 |
108 | // Return the parsed version result.
109 | return Version(preRelease |
110 | (major << offsetMajor) |
111 | (minor << offsetMinor) |
112 | (patch << offsetPatch)), nil
113 | }
114 |
115 | // Must forcefully parses the version and panics if
116 | // the version specified cannot resolve.
117 | func Must(version string) Version {
118 | v, err := Parse(version)
119 | if err != nil {
120 | panic(err)
121 | }
122 | return v
123 | }
124 |
125 | // Current is the version retrieved when the process
126 | // has just been initialized.
127 | var Current Version
128 |
129 | // init initializes the current version retrieved
130 | // from the kernel.
131 | func init() {
132 | kv, kverr := ioutil.ReadFile("/proc/sys/kernel/osrelease")
133 | if kverr != nil {
134 | panic(kverr)
135 | }
136 | Current = Must(string(kv))
137 | }
138 |
--------------------------------------------------------------------------------
/package.go:
--------------------------------------------------------------------------------
1 | // Package systracer is the framework of linux event tracing
2 | // system developed by Chaitin Tech.
3 | package systracer
4 |
5 | import (
6 | "reflect"
7 | "time"
8 |
9 | "github.com/pkg/errors"
10 | "go.uber.org/zap"
11 | )
12 |
13 | // ErrBadTracePoint is the error returned when the target
14 | // trace point cannot be attached to.
15 | var ErrBadTracePoint = errors.New("bad tracepoint")
16 |
17 | // Condition is common embed field for defining an extra
18 | // condition for current field.
19 | type Condition struct{}
20 |
21 | // typeCondition is the specified case for condition.
22 | var typeCondition = reflect.TypeOf(Condition{})
23 |
24 | // StringAddr is the special type used in the place of
25 | // the string to fetch the address canary when decoding
26 | // the string, when it is meaningful.
27 | type StringAddr struct {
28 | String string
29 | Addr uint64
30 | }
31 |
32 | // typeStringAddr is the specified case for string addr.
33 | var typeStringAddr = reflect.TypeOf(StringAddr{})
34 |
35 | // Event stores common event data made by all types of
36 | // concrete tracing events. The format is defined by
37 | // "/tracing/trace" file.
38 | //
39 | // The comm field is ommitted out since it is always
40 | // imcomplete (rendered as "<...>) and is not essentially
41 | // required by all events.
42 | type Event struct {
43 | TaskPID uint32
44 | Timestamp time.Time
45 | epoch time.Duration
46 | }
47 |
48 | // ProbeEvent is the event triggered when touching any
49 | // of the breakpoint inside a function.
50 | type ProbeEvent struct {
51 | Event
52 | }
53 |
54 | // typeProbeEvent is the event kind of probe.
55 | var typeProbeEvent = reflect.TypeOf(ProbeEvent{})
56 |
57 | // ReturnEvent is the event triggered when a return
58 | // instruction in function is executed.
59 | type ReturnEvent struct {
60 | Event
61 | }
62 |
63 | // typeReturnEvent is the event kind of return.
64 | var typeReturnEvent = reflect.TypeOf(ReturnEvent{})
65 |
66 | // Trace is a controlling handle for trace events.
67 | //
68 | // The trace handle is initially not started to avoid
69 | // deadlocking when used as collectors. The caller must
70 | // manually activate them after their master thread
71 | // has been initialized.
72 | //
73 | // And the trace can be stopped at runtime, it is
74 | // recommended to disable certain tracing when there's
75 | // no subscribers and the master thread nned not to
76 | // track the real time state with the trace.
77 | type Trace interface {
78 | ID() uint64
79 | SetCondition(string) error
80 | SetEnabled(bool)
81 | GetDone() uint64
82 | GetLost() uint64
83 | Close()
84 | }
85 |
86 | // Manager is the manager of traces.
87 | //
88 | // The manager is the monolithic consumer to read from
89 | // trace pipe "/instances//trace_pipe"
90 | // and generate events per registered events.
91 | type Manager interface {
92 | // TraceKProbe creates either a kprobe (when handled
93 | // event is ProbeEvent) or a kretprobe (when handled
94 | // event is ReturnEvent).
95 | TraceKProbe(
96 | location string, handler interface{},
97 | ) (Trace, <-chan struct{}, error)
98 |
99 | // TraceUProbe creates either a uprobe (when handled
100 | // event is ProbeEvent) or a uretprobe (when handled
101 | // event is ReturnEvent).
102 | TraceUProbe(
103 | library, location string, handler interface{},
104 | ) (Trace, <-chan struct{}, error)
105 | }
106 |
107 | type option struct {
108 | tracefsPath string
109 | instanceName string
110 | limitInterval time.Duration
111 | logger *zap.Logger
112 | }
113 |
114 | // Option to initialize the systrace subsystem.
115 | type Option func(*option)
116 |
117 | // WithTraceFSPath is the path of the tracefs. The
118 | // default value is "/sys/kernel/debug/tracing".
119 | func WithTraceFSPath(path string) Option {
120 | return func(opt *option) {
121 | opt.tracefsPath = path
122 | }
123 | }
124 |
125 | // WithInstanceName is the name of the trace instance.
126 | // The default value is "systrace".
127 | func WithInstanceName(name string) Option {
128 | return func(opt *option) {
129 | opt.instanceName = name
130 | }
131 | }
132 |
133 | // WithLimitInterval specifies the interval of receiving
134 | // event from trace pipe. Setting this value to 0 will
135 | // disable the reception limit. The default value is 0.
136 | func WithLimitInterval(dur time.Duration) Option {
137 | return func(opt *option) {
138 | opt.limitInterval = dur
139 | }
140 | }
141 |
142 | // WithLogger specifies the logger for the manager.
143 | // The default value is zap.L().
144 | func WithLogger(logger *zap.Logger) Option {
145 | return func(opt *option) {
146 | opt.logger = logger
147 | }
148 | }
149 |
150 | // WithOptions aggregate a set of options together.
151 | func WithOptions(opts ...Option) Option {
152 | return func(o *option) {
153 | for _, opt := range opts {
154 | opt(o)
155 | }
156 | }
157 | }
158 |
159 | // newOption creates the option with all default values.
160 | func newOption() *option {
161 | return &option{
162 | tracefsPath: "/sys/kernel/debug/tracing",
163 | instanceName: "systrace",
164 | logger: zap.L(),
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/rcnotify/trace_amd64.go:
--------------------------------------------------------------------------------
1 | package rcnotify
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/chaitin/systracer"
7 | )
8 |
9 | type eventFsnotify struct {
10 | TaskPID uint32
11 | Timestamp time.Time
12 | Inode uint64
13 | Access uint32
14 | ModifyAttrib uint32
15 | CloseOpen uint32
16 | Dentry uint32
17 | Filename string
18 | Visited *uint8
19 | }
20 |
21 | type entryFsnotify_V2_6_32 struct {
22 | systracer.ProbeEvent
23 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
24 |
25 | Inode uint64 `tracing:"%di"`
26 | Access uint32 `tracing:"%si,Access == 0,bit[0]"`
27 | ModifyAttrib uint32 `tracing:"%si,,bit[1:2]"`
28 | CloseOpen uint32 `tracing:"%si,CloseOpen == 0,bit[3:5]"`
29 | Dentry uint32 `tracing:"%si,,bit[6:12]"`
30 | Filename string `tracing:"+8(%r8)"`
31 | }
32 |
33 | type entryFsnotify_V5_9 struct {
34 | systracer.ProbeEvent
35 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
36 |
37 | Access uint32 `tracing:"%di,Access == 0,bit[0]"`
38 | ModifyAttrib uint32 `tracing:"%di,,bit[1:2]"`
39 | CloseOpen uint32 `tracing:"%di,CloseOpen == 0,bit[3:5]"`
40 | Dentry uint32 `tracing:"%di,,bit[6:12]"`
41 | Dir uint64 `tracing:"%cx"`
42 | Filename string `tracing:"+8(%r8)"`
43 | Inode uint64 `tracing:"%r9"`
44 | }
45 |
46 | type entryFsnotifyParent_V5_9 struct {
47 | systracer.ProbeEvent
48 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
49 |
50 | Filename string `tracing:"+40(%di)"`
51 | Inode uint64 `tracing:"+48(%di)"`
52 | Access uint32 `tracing:"%si,Access == 0,bit[0]"`
53 | ModifyAttrib uint32 `tracing:"%si,,bit[1:2]"`
54 | CloseOpen uint32 `tracing:"%si,CloseOpen == 0,bit[3:5]"`
55 | Dentry uint32 `tracing:"%si,,bit[6:12]"`
56 | }
57 |
58 | type path struct {
59 | N0 systracer.StringAddr `tracing:"+40({1})"`
60 | N1 systracer.StringAddr `tracing:"+40(+24({1}))"`
61 | N2 systracer.StringAddr `tracing:"+40(+24(+24({1})))"`
62 | N3 systracer.StringAddr `tracing:"+40(+24(+24(+24({1}))))"`
63 | N4 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24({1})))))"`
64 | N5 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24({1}))))))"`
65 | N6 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24({1})))))))"`
66 | N7 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24({1}))))))))"`
67 | N8 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"`
68 |
69 | I0 uint64 `tracing:"+48({1})"`
70 | I1 uint64 `tracing:"+48(+24({1}))"`
71 | I2 uint64 `tracing:"+48(+24(+24({1})))"`
72 | I3 uint64 `tracing:"+48(+24(+24(+24({1}))))"`
73 | I4 uint64 `tracing:"+48(+24(+24(+24(+24({1})))))"`
74 | I5 uint64 `tracing:"+48(+24(+24(+24(+24(+24({1}))))))"`
75 | I6 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24({1})))))))"`
76 | I7 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24({1}))))))))"`
77 | I8 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"`
78 | I9 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24(+24({1}))))))))))"`
79 | }
80 |
81 | func (d path) extract() ([]string, []uint64) {
82 | nodes := []systracer.StringAddr{
83 | d.N0, d.N1, d.N2, d.N3, d.N4, d.N5, d.N6, d.N7, d.N8,
84 | }
85 | resultPath := extractPathComponent(nodes)
86 | inodes := []uint64{
87 | d.I0, d.I1, d.I2, d.I3, d.I4, d.I5, d.I6, d.I7, d.I8, d.I9,
88 | }
89 | resultInodes := inodes[:len(resultPath)+1]
90 | return resultPath, resultInodes
91 | }
92 |
93 | type entrySecurityInodeRename struct {
94 | SrcDir uint64 `tracing:"%di"`
95 | DstDir uint64 `tracing:"%dx"`
96 | }
97 |
98 | type entrySecurityInodeRenameSource struct {
99 | systracer.ProbeEvent
100 | Event entrySecurityInodeRename
101 | Source path `tracing:"%si"`
102 | }
103 |
104 | type entrySecurityInodeRenameTarget struct {
105 | systracer.ProbeEvent
106 | Event entrySecurityInodeRename
107 | Target path `tracing:"%cx"`
108 | }
109 |
110 | type entrySecurityInodeCreate struct {
111 | systracer.ProbeEvent
112 | Dir uint64 `tracing:"%di"`
113 | Path path `tracing:"%si"`
114 | Mode uint16 `tracing:"%dx"`
115 | }
116 |
117 | type entrySecurityInodeMknod struct {
118 | systracer.ProbeEvent
119 | Dir uint64 `tracing:"%di"`
120 | Path path `tracing:"%si"`
121 | Mode uint16 `tracing:"%dx"`
122 | Dev uint32 `tracing:"%cx"`
123 | }
124 |
125 | type entrySecurityInodeMkdir struct {
126 | systracer.ProbeEvent
127 | Dir uint64 `tracing:"%di"`
128 | Path path `tracing:"%si"`
129 | Mode uint16 `tracing:"%dx"`
130 | }
131 |
132 | type entrySecurityInodeLink struct {
133 | Dir uint64 `tracing:"%si"`
134 | }
135 |
136 | type entrySecurityInodeLinkSource struct {
137 | systracer.ProbeEvent
138 | Event entrySecurityInodeLink
139 | Source path `tracing:"%di"`
140 | }
141 |
142 | type entrySecurityInodeLinkTarget struct {
143 | systracer.ProbeEvent
144 | Event entrySecurityInodeLink
145 | Target path `tracing:"%dx"`
146 | }
147 |
148 | type entrySecurityInodeSymlink struct {
149 | systracer.ProbeEvent
150 | Dir uint64 `tracing:"%di"`
151 | Path path `tracing:"%si"`
152 | Name string `tracing:"%dx"`
153 | }
154 |
155 | type entrySecurityInodeUnlink struct {
156 | systracer.ProbeEvent
157 | Path path `tracing:"%si"`
158 | }
159 |
160 | type entrySecurityInodeRmdir struct {
161 | systracer.ProbeEvent
162 | Path path `tracing:"%si"`
163 | }
164 |
165 | type entrySecurityInodeSetattr struct {
166 | systracer.ProbeEvent
167 | Path path `tracing:"%di"`
168 | Valid uint32 `tracing:"+0(%si)"`
169 | Mode uint16 `tracing:"+4(%si)"`
170 | Uid uint32 `tracing:"+8(%si)"`
171 | Gid uint32 `tracing:"+12(%si)"`
172 | }
173 |
--------------------------------------------------------------------------------
/rcnotify/trace_arm64.go:
--------------------------------------------------------------------------------
1 | package rcnotify
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/chaitin/systracer"
7 | )
8 |
9 | type eventFsnotify struct {
10 | TaskPID uint32
11 | Timestamp time.Time
12 | Inode uint64
13 | Access uint32
14 | ModifyAttrib uint32
15 | CloseOpen uint32
16 | Dentry uint32
17 | Filename string
18 | Visited *uint8
19 | }
20 |
21 | type entryFsnotify_V2_6_32 struct {
22 | systracer.ProbeEvent
23 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
24 |
25 | Inode uint64 `tracing:"%x0"`
26 | Access uint32 `tracing:"%x1,Access == 0,bit[0]"`
27 | ModifyAttrib uint32 `tracing:"%x1,,bit[1:2]"`
28 | CloseOpen uint32 `tracing:"%x1,CloseOpen == 0,bit[3:5]"`
29 | Dentry uint32 `tracing:"%x1,,bit[6:12]"`
30 | Filename string `tracing:"+8(%x4)"`
31 | }
32 |
33 | type entryFsnotify_V5_9 struct {
34 | systracer.ProbeEvent
35 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
36 |
37 | Access uint32 `tracing:"%x0,Access == 0,bit[0]"`
38 | ModifyAttrib uint32 `tracing:"%x0,,bit[1:2]"`
39 | CloseOpen uint32 `tracing:"%x0,CloseOpen == 0,bit[3:5]"`
40 | Dentry uint32 `tracing:"%x0,,bit[6:12]"`
41 | Dir uint64 `tracing:"%x3"`
42 | Filename string `tracing:"+8(%x4)"`
43 | Inode uint64 `tracing:"%x5"`
44 | }
45 |
46 | type entryFsnotifyParent_V5_9 struct {
47 | systracer.ProbeEvent
48 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
49 |
50 | Filename string `tracing:"+40(%x0)"`
51 | Inode uint64 `tracing:"+48(%x0)"`
52 | Access uint32 `tracing:"%x1,Access == 0,bit[0]"`
53 | ModifyAttrib uint32 `tracing:"%x1,,bit[1:2]"`
54 | CloseOpen uint32 `tracing:"%x1,CloseOpen == 0,bit[3:5]"`
55 | Dentry uint32 `tracing:"%x1,,bit[6:12]"`
56 | }
57 |
58 | type path struct {
59 | N0 systracer.StringAddr `tracing:"+40({1})"`
60 | N1 systracer.StringAddr `tracing:"+40(+24({1}))"`
61 | N2 systracer.StringAddr `tracing:"+40(+24(+24({1})))"`
62 | N3 systracer.StringAddr `tracing:"+40(+24(+24(+24({1}))))"`
63 | N4 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24({1})))))"`
64 | N5 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24({1}))))))"`
65 | N6 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24({1})))))))"`
66 | N7 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24({1}))))))))"`
67 | N8 systracer.StringAddr `tracing:"+40(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"`
68 |
69 | I0 uint64 `tracing:"+48({1})"`
70 | I1 uint64 `tracing:"+48(+24({1}))"`
71 | I2 uint64 `tracing:"+48(+24(+24({1})))"`
72 | I3 uint64 `tracing:"+48(+24(+24(+24({1}))))"`
73 | I4 uint64 `tracing:"+48(+24(+24(+24(+24({1})))))"`
74 | I5 uint64 `tracing:"+48(+24(+24(+24(+24(+24({1}))))))"`
75 | I6 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24({1})))))))"`
76 | I7 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24({1}))))))))"`
77 | I8 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24({1})))))))))"`
78 | I9 uint64 `tracing:"+48(+24(+24(+24(+24(+24(+24(+24(+24(+24({1}))))))))))"`
79 | }
80 |
81 | func (d path) extract() ([]string, []uint64) {
82 | nodes := []systracer.StringAddr{
83 | d.N0, d.N1, d.N2, d.N3, d.N4, d.N5, d.N6, d.N7, d.N8,
84 | }
85 | resultPath := extractPathComponent(nodes)
86 | inodes := []uint64{
87 | d.I0, d.I1, d.I2, d.I3, d.I4, d.I5, d.I6, d.I7, d.I8, d.I9,
88 | }
89 | resultInodes := inodes[:len(resultPath)+1]
90 | return resultPath, resultInodes
91 | }
92 |
93 | type entrySecurityInodeRename struct {
94 | SrcDir uint64 `tracing:"%x0"`
95 | DstDir uint64 `tracing:"%x2"`
96 | }
97 |
98 | type entrySecurityInodeRenameSource struct {
99 | systracer.ProbeEvent
100 | Event entrySecurityInodeRename
101 | Source path `tracing:"%x1"`
102 | }
103 |
104 | type entrySecurityInodeRenameTarget struct {
105 | systracer.ProbeEvent
106 | Event entrySecurityInodeRename
107 | Target path `tracing:"%x3"`
108 | }
109 |
110 | type entrySecurityInodeCreate struct {
111 | systracer.ProbeEvent
112 | Dir uint64 `tracing:"%x0"`
113 | Path path `tracing:"%x1"`
114 | Mode uint16 `tracing:"%x2"`
115 | }
116 |
117 | type entrySecurityInodeMknod struct {
118 | systracer.ProbeEvent
119 | Dir uint64 `tracing:"%x0"`
120 | Path path `tracing:"%x1"`
121 | Mode uint16 `tracing:"%x2"`
122 | Dev uint32 `tracing:"%x3"`
123 | }
124 |
125 | type entrySecurityInodeMkdir struct {
126 | systracer.ProbeEvent
127 | Dir uint64 `tracing:"%x0"`
128 | Path path `tracing:"%x1"`
129 | Mode uint16 `tracing:"%x2"`
130 | }
131 |
132 | type entrySecurityInodeLink struct {
133 | Dir uint64 `tracing:"%x1"`
134 | }
135 |
136 | type entrySecurityInodeLinkSource struct {
137 | systracer.ProbeEvent
138 | Event entrySecurityInodeLink
139 | Source path `tracing:"%x0"`
140 | }
141 |
142 | type entrySecurityInodeLinkTarget struct {
143 | systracer.ProbeEvent
144 | Event entrySecurityInodeLink
145 | Target path `tracing:"%x2"`
146 | }
147 |
148 | type entrySecurityInodeSymlink struct {
149 | systracer.ProbeEvent
150 | Dir uint64 `tracing:"%x0"`
151 | Path path `tracing:"%x1"`
152 | Name string `tracing:"%x2"`
153 | }
154 |
155 | type entrySecurityInodeUnlink struct {
156 | systracer.ProbeEvent
157 | Path path `tracing:"%x1"`
158 | }
159 |
160 | type entrySecurityInodeRmdir struct {
161 | systracer.ProbeEvent
162 | Path path `tracing:"%x1"`
163 | }
164 |
165 | type entrySecurityInodeSetattr struct {
166 | systracer.ProbeEvent
167 | Path path `tracing:"%x0"`
168 | Valid uint32 `tracing:"+0(%x1)"`
169 | Mode uint16 `tracing:"+4(%x1)"`
170 | Uid uint32 `tracing:"+8(%x1)"`
171 | Gid uint32 `tracing:"+12(%x1)"`
172 | }
173 |
--------------------------------------------------------------------------------
/rcnotify/trace_386.go:
--------------------------------------------------------------------------------
1 | package rcnotify
2 |
3 | import (
4 | "time"
5 |
6 | "github.com/chaitin/systracer"
7 | )
8 |
9 | type eventFsnotify struct {
10 | TaskPID uint32
11 | Timestamp time.Time
12 | Inode uint64
13 | Access uint32
14 | ModifyAttrib uint32
15 | CloseOpen uint32
16 | Dentry uint32
17 | Filename string
18 | Visited *uint8
19 | }
20 |
21 | type entryFsnotify_V2_6_32 struct {
22 | systracer.ProbeEvent
23 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
24 |
25 | Inode uint32 `tracing:"%ax"`
26 | Access uint32 `tracing:"%dx,Access == 0,bit[0]"`
27 | ModifyAttrib uint32 `tracing:"%dx,,bit[1:2]"`
28 | CloseOpen uint32 `tracing:"%dx,CloseOpen == 0,bit[3:5]"`
29 | Dentry uint32 `tracing:"%dx,,bit[6:12]"`
30 | Filename string `tracing:"+8(+8(%sp))"`
31 | }
32 |
33 | type entryFsnotify_V5_9 struct {
34 | systracer.ProbeEvent
35 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
36 |
37 | Access uint32 `tracing:"%ax,Access == 0,bit[0]"`
38 | ModifyAttrib uint32 `tracing:"%ax,,bit[1:2]"`
39 | CloseOpen uint32 `tracing:"%ax,CloseOpen == 0,bit[3:5]"`
40 | Dentry uint32 `tracing:"%ax,,bit[6:12]"`
41 | Dir uint32 `tracing:"+4(%sp)"`
42 | Filename string `tracing:"+8(+8(%sp))"`
43 | Inode uint32 `tracing:"+12(%sp)`
44 | }
45 |
46 | type entryFsnotifyParent_V5_9 struct {
47 | systracer.ProbeEvent
48 | systracer.Condition `tracing:"(ModifyAttrib == 2) || (Dentry != 0)"`
49 |
50 | Filename string `tracing:"+40(%ax)"`
51 | Inode uint64 `tracing:"+48(%ax)"`
52 | Access uint32 `tracing:"%dx,Access == 0,bit[0]"`
53 | ModifyAttrib uint32 `tracing:"%dx,,bit[1:2]"`
54 | CloseOpen uint32 `tracing:"%dx,CloseOpen == 0,bit[3:5]"`
55 | Dentry uint32 `tracing:"%dx,,bit[6:12]"`
56 | }
57 |
58 | type path struct {
59 | N0 systracer.StringAddr `tracing:"+28({1})"`
60 | N1 systracer.StringAddr `tracing:"+28(+16({1}))"`
61 | N2 systracer.StringAddr `tracing:"+28(+16(+16({1})))"`
62 | N3 systracer.StringAddr `tracing:"+28(+16(+16(+16({1}))))"`
63 | N4 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16({1})))))"`
64 | N5 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16({1}))))))"`
65 | N6 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16(+16({1})))))))"`
66 | N7 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16(+16(+16({1}))))))))"`
67 | N8 systracer.StringAddr `tracing:"+28(+16(+16(+16(+16(+16(+16(+16(+16({1})))))))))"`
68 |
69 | I0 uint64 `tracing:"+32({1})"`
70 | I1 uint64 `tracing:"+32(+16({1}))"`
71 | I2 uint64 `tracing:"+32(+16(+16({1})))"`
72 | I3 uint64 `tracing:"+32(+16(+16(+16({1}))))"`
73 | I4 uint64 `tracing:"+32(+16(+16(+16(+16({1})))))"`
74 | I5 uint64 `tracing:"+32(+16(+16(+16(+16(+16({1}))))))"`
75 | I6 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16({1})))))))"`
76 | I7 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16(+16({1}))))))))"`
77 | I8 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16(+16(+16({1})))))))))"`
78 | I9 uint64 `tracing:"+32(+16(+16(+16(+16(+16(+16(+16(+16(+16({1}))))))))))"`
79 | }
80 |
81 | func (d path) extract() ([]string, []uint64) {
82 | nodes := []systracer.StringAddr{
83 | d.N0, d.N1, d.N2, d.N3, d.N4, d.N5, d.N6, d.N7, d.N8,
84 | }
85 | resultPath := extractPathComponent(nodes)
86 | inodes := []uint64{
87 | d.I0, d.I1, d.I2, d.I3, d.I4, d.I5, d.I6, d.I7, d.I8, d.I9,
88 | }
89 | resultInodes := inodes[:len(resultPath)+1]
90 | return resultPath, resultInodes
91 | }
92 |
93 | type entrySecurityInodeRename struct {
94 | SrcDir uint64 `tracing:"%ax"`
95 | DstDir uint64 `tracing:"%cx"`
96 | }
97 |
98 | type entrySecurityInodeRenameSource struct {
99 | systracer.ProbeEvent
100 | Event entrySecurityInodeRename
101 | Source path `tracing:"%dx"`
102 | }
103 |
104 | type entrySecurityInodeRenameTarget struct {
105 | systracer.ProbeEvent
106 | Event entrySecurityInodeRename
107 | Target path `tracing:"+4(%sp)"`
108 | }
109 |
110 | type entrySecurityInodeCreate struct {
111 | systracer.ProbeEvent
112 | Dir uint64 `tracing:"%ax"`
113 | Path path `tracing:"%dx"`
114 | Mode uint16 `tracing:"%cx"`
115 | }
116 |
117 | type entrySecurityInodeMknod struct {
118 | systracer.ProbeEvent
119 | Dir uint64 `tracing:"%ax"`
120 | Path path `tracing:"%dx"`
121 | Mode uint16 `tracing:"%cx"`
122 | Dev uint32 `tracing:"+4(%sp)"`
123 | }
124 |
125 | type entrySecurityInodeMkdir struct {
126 | systracer.ProbeEvent
127 | Dir uint64 `tracing:"%ax"`
128 | Path path `tracing:"%dx"`
129 | Mode uint16 `tracing:"%cx"`
130 | }
131 |
132 | type entrySecurityInodeLink struct {
133 | Dir uint64 `tracing:"%dx"`
134 | }
135 |
136 | type entrySecurityInodeLinkSource struct {
137 | systracer.ProbeEvent
138 | Event entrySecurityInodeLink
139 | Source path `tracing:"%ax"`
140 | }
141 |
142 | type entrySecurityInodeLinkTarget struct {
143 | systracer.ProbeEvent
144 | Event entrySecurityInodeLink
145 | Target path `tracing:"%cx"`
146 | }
147 |
148 | type entrySecurityInodeSymlink struct {
149 | systracer.ProbeEvent
150 | Dir uint64 `tracing:"%ax"`
151 | Path path `tracing:"%dx"`
152 | Name string `tracing:"%cx"`
153 | }
154 |
155 | type entrySecurityInodeUnlink struct {
156 | systracer.ProbeEvent
157 | Path path `tracing:"%dx"`
158 | }
159 |
160 | type entrySecurityInodeRmdir struct {
161 | systracer.ProbeEvent
162 | Path path `tracing:"%dx"`
163 | }
164 |
165 | type entrySecurityInodeSetattr struct {
166 | systracer.ProbeEvent
167 | Path path `tracing:"%di"`
168 | Valid uint32 `tracing:"+0(%si)"`
169 | Mode uint16 `tracing:"+4(%si)"`
170 | Uid uint32 `tracing:"+8(%si)"`
171 | Gid uint32 `tracing:"+12(%si)"`
172 | }
173 |
--------------------------------------------------------------------------------
/cmd/systracer/watch.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "os"
7 | "strings"
8 | "syscall"
9 |
10 | "github.com/aegistudio/shaft"
11 | "github.com/pkg/errors"
12 | "go.uber.org/zap"
13 | "golang.org/x/sync/errgroup"
14 |
15 | "github.com/chaitin/systracer/rcnotify"
16 | )
17 |
18 | var (
19 | watches []string
20 | )
21 |
22 | func initWatchModule() shaft.Option {
23 | if len(watches) == 0 {
24 | return shaft.Module()
25 | }
26 | return shaft.Module(
27 | rcnotify.Module,
28 | shaft.Provide(func(
29 | ctx context.Context, group *errgroup.Group,
30 | logger *zap.SugaredLogger, manager *rcnotify.Manager,
31 | ) ([]moduleBarrier, error) {
32 | // Attempt to parse the watch argumets.
33 | var options []rcnotify.Option
34 | for _, watch := range watches {
35 | pathIndex := strings.Index(watch, "=")
36 | if pathIndex <= 0 {
37 | return nil, errors.New(
38 | `"watch must be of format "="`)
39 | }
40 | events := watch[:pathIndex]
41 | path := watch[pathIndex+1:]
42 | var matcher func(rcnotify.Op, string) rcnotify.Option
43 | matcher = rcnotify.WatchDir
44 | var flags rcnotify.Op
45 | for _, event := range strings.Split(events, ",") {
46 | switch event {
47 | case "all":
48 | flags |= rcnotify.OpAll
49 | case "create":
50 | flags |= rcnotify.OpCreate
51 | case "mknod":
52 | flags |= rcnotify.OpMknod
53 | case "mkdir":
54 | flags |= rcnotify.OpMkdir
55 | case "delete":
56 | flags |= rcnotify.OpDelete
57 | case "rmdir":
58 | flags |= rcnotify.OpRmdir
59 | case "rename":
60 | flags |= rcnotify.OpRename
61 | case "attrib":
62 | flags |= rcnotify.OpAttrib
63 | case "link":
64 | flags |= rcnotify.OpLink
65 | case "symlink":
66 | flags |= rcnotify.OpSymlink
67 | case "dir":
68 | matcher = rcnotify.WatchDir
69 | case "file":
70 | matcher = rcnotify.WatchFile
71 | default:
72 | return nil, errors.Errorf(
73 | "unknown event %q", event)
74 | }
75 | }
76 | options = append(options, matcher(flags, path))
77 | }
78 | watcher, err := manager.Watch(options...)
79 | if err != nil {
80 | return nil, err
81 | }
82 | group.Go(func() error {
83 | defer watcher.Close()
84 | for {
85 | var event rcnotify.Event
86 | select {
87 | case <-ctx.Done():
88 | return nil
89 | case event = <-watcher.C:
90 | }
91 | eventContext := fmt.Sprintf("%s %d",
92 | event.Timestamp.Format("2006-01-02T15:04:05.999999999"), event.PID)
93 | sourcePath := "(unknown)"
94 | if event.Source != nil {
95 | sourcePath = fmt.Sprintf("%q", *event.Source)
96 | }
97 | targetPath := "(unknown)"
98 | if event.Target != nil {
99 | targetPath = fmt.Sprintf("%q", *event.Target)
100 | }
101 | var fileMode os.FileMode
102 | if event.Mode != nil {
103 | fileMode = os.FileMode(*event.Mode & 0777)
104 | switch *event.Mode & syscall.S_IFMT {
105 | case syscall.S_IFBLK:
106 | fileMode |= os.ModeDevice
107 | case syscall.S_IFCHR:
108 | fileMode |= os.ModeDevice | os.ModeCharDevice
109 | case syscall.S_IFDIR:
110 | fileMode |= os.ModeDir
111 | case syscall.S_IFIFO:
112 | fileMode |= os.ModeNamedPipe
113 | case syscall.S_IFLNK:
114 | fileMode |= os.ModeSymlink
115 | case syscall.S_IFREG:
116 | // nothing to do
117 | case syscall.S_IFSOCK:
118 | fileMode |= os.ModeSocket
119 | }
120 | if (*event.Mode & syscall.S_ISGID) != 0 {
121 | fileMode |= os.ModeSetgid
122 | }
123 | if (*event.Mode & syscall.S_ISUID) != 0 {
124 | fileMode |= os.ModeSetuid
125 | }
126 | if (*event.Mode & syscall.S_ISVTX) != 0 {
127 | fileMode |= os.ModeSticky
128 | }
129 | }
130 | switch event.Op {
131 | case rcnotify.OpCreate:
132 | logger.Infof("%s - create(%s, %q)",
133 | eventContext, targetPath, fileMode)
134 | case rcnotify.OpMkdir:
135 | logger.Infof("%s - mkdir(%s, %q)",
136 | eventContext, targetPath, fileMode)
137 | case rcnotify.OpMknod:
138 | logger.Infof("%s - mknod(%s, %q, %d)",
139 | eventContext, targetPath,
140 | fileMode, *event.Dev)
141 | case rcnotify.OpDelete:
142 | logger.Infof("%s - delete(%s)",
143 | eventContext, targetPath)
144 | case rcnotify.OpRmdir:
145 | logger.Infof("%s - rmdir(%s)",
146 | eventContext, targetPath)
147 | case rcnotify.OpRename:
148 | logger.Infof("%s - rename(%s, %s)",
149 | eventContext, sourcePath, targetPath)
150 | case rcnotify.OpAttrib:
151 | if event.Attr&rcnotify.AttrMode != 0 {
152 | logger.Infof("%s - chmod(%s, %q)",
153 | eventContext, targetPath, fileMode)
154 | }
155 | if event.Attr&rcnotify.AttrUID != 0 {
156 | logger.Infof("%s - chown_uid(%s, %d)",
157 | eventContext, targetPath, *event.Uid)
158 | }
159 | if event.Attr&rcnotify.AttrGID != 0 {
160 | logger.Infof("%s - chown_gid(%s, %d)",
161 | eventContext, targetPath, *event.Gid)
162 | }
163 | case rcnotify.OpLink:
164 | logger.Infof("%s - link(%s, %s)",
165 | eventContext, sourcePath, targetPath)
166 | case rcnotify.OpSymlink:
167 | logger.Infof("%s - symlink(%s, %s)",
168 | eventContext, sourcePath, targetPath)
169 | }
170 | }
171 | return nil
172 | })
173 | return nil, nil
174 | }),
175 | )
176 | }
177 |
178 | func init() {
179 | moduleInits = append(moduleInits, initWatchModule)
180 | rootCmd.PersistentFlags().StringArrayVarP(
181 | &watches, "watch", "w", watches,
182 | "specify list of watches for directory events")
183 | }
184 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/aegistudio/shaft v0.0.0-20221023182702-de3982a0a277 h1:SI22BGHhuRdM/dxnCb5wO/5yhNAeN0JMqgEK/dJujyM=
2 | github.com/aegistudio/shaft v0.0.0-20221023182702-de3982a0a277/go.mod h1:78gJgtia9zBYrzBRlRe5vDSEtWNfN6PbHX2Y7WNzghI=
3 | github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
4 | github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
5 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
9 | github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc=
10 | github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
11 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
12 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
13 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
14 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
15 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
16 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
17 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
18 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
20 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
21 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
22 | github.com/spf13/cobra v1.6.0 h1:42a0n6jwCot1pUmomAp4T7DeMD+20LFv4Q54pxLf2LI=
23 | github.com/spf13/cobra v1.6.0/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
24 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
25 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
26 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
27 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
28 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
29 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
30 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
31 | github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
32 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
33 | github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
34 | go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
35 | go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
36 | go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI=
37 | go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
38 | go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4=
39 | go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
40 | go.uber.org/zap v1.23.0 h1:OjGQ5KQDEUawVHxNwQgPpiypGHOxo2mNZsOqTak4fFY=
41 | go.uber.org/zap v1.23.0/go.mod h1:D+nX8jyLsMHMYrln8A0rJjFt/T/9/bGgIhAqxv5URuY=
42 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
43 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
44 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
45 | golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
46 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
47 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
48 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
49 | golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
50 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
51 | golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
52 | golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
53 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
54 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
55 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
56 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
57 | golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
58 | golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
59 | golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A=
60 | golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
61 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
62 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
63 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
64 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
65 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
66 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
67 | golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
68 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
69 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
70 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
71 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
72 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
73 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
74 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
75 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
76 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
77 |
--------------------------------------------------------------------------------
/tracefs.go:
--------------------------------------------------------------------------------
1 | package systracer
2 |
3 | import (
4 | "fmt"
5 | "io/ioutil"
6 | "os"
7 | "path/filepath"
8 | "strings"
9 | "syscall"
10 |
11 | "github.com/pkg/errors"
12 | )
13 |
14 | // setError represents a set of errors that could be returned
15 | // by tracefs when operating on a set of entities.
16 | type setError struct {
17 | Op string
18 | Arg []string
19 | Err []error
20 | }
21 |
22 | // Error returns the formatted error string.
23 | func (e *setError) Error() string {
24 | var errString []string
25 | for _, err := range e.Err {
26 | errString = append(errString, err.Error())
27 | }
28 | return fmt.Sprintf(
29 | "errors returned while %s(%q): %s", e.Op,
30 | strings.Join(e.Arg, ", "),
31 | strings.Join(errString, "\n"))
32 | }
33 |
34 | // disableInstance will attempt to disable all events associated
35 | // with a single instance.
36 | func disableInstance(tracefs, instance string) error {
37 | if instance == "" {
38 | return errors.New("invalid empty instance name")
39 | }
40 | set := &setError{
41 | Op: "disableInstance",
42 | Arg: []string{tracefs, instance},
43 | }
44 |
45 | // Walk the instance event directory and disable event.
46 | if err := filepath.Walk(
47 | filepath.Join(tracefs, "instances", instance),
48 | func(path string, info os.FileInfo, err error) error {
49 | if err != nil {
50 | set.Err = append(set.Err, err)
51 | }
52 | if info == nil {
53 | return nil
54 | }
55 | if info.Name() == "enable" && !info.IsDir() {
56 | if err = ioutil.WriteFile(path, []byte("0"),
57 | os.FileMode(0600)); err != nil {
58 | set.Err = append(set.Err, err)
59 | }
60 | }
61 | return nil
62 | }); err != nil {
63 | set.Err = append(set.Err, err)
64 | }
65 | if len(set.Err) > 0 {
66 | return set
67 | }
68 | return nil
69 | }
70 |
71 | // removeInstance will attempt to remove an instance from
72 | // currently registered traces.
73 | func removeInstance(tracefs, instance string) error {
74 | if instance == "" {
75 | return errors.New("invalid empty instance name")
76 | }
77 |
78 | // We won't have to remove the instance if the instance
79 | // has already been deleted.
80 | set := &setError{
81 | Op: "removeInstance",
82 | Arg: []string{tracefs, instance},
83 | }
84 | instancePath := filepath.Join(tracefs, "instances", instance)
85 | var stat syscall.Stat_t
86 | if err := syscall.Stat(instancePath, &stat); err != nil {
87 | if err == syscall.ENOENT {
88 | return nil
89 | }
90 | set.Err = append(set.Err, err)
91 | return set
92 | }
93 |
94 | // Disable current tracing of the instance.
95 | if err := ioutil.WriteFile(
96 | filepath.Join(instancePath, "tracing_on"),
97 | []byte("0"), os.FileMode(0600)); err != nil {
98 | set.Err = append(set.Err, err)
99 | return set
100 | }
101 |
102 | // Cleanup content of all ring buffers in the instance.
103 | //
104 | // XXX: though it is unnecessary, there's a bug
105 | // (RingBufferDetonator) that exists in kernel ranged from
106 | // 3.10 to 5.14-rc3, which means it should exist in exactly
107 | // all linux that cloudwalker agent operates on.
108 | //
109 | // https://github.com/torvalds/linux/commit/67f0d6d9883c13174669f88adac4f0ee656cc16a
110 | //
111 | // When the bug is triggered, if will stuck inside a deadloop
112 | // that can only be bailed out by disabling the tracing and
113 | // cleanup the ring buffers.
114 | if err := ioutil.WriteFile(
115 | filepath.Join(instancePath, "trace"),
116 | nil, os.FileMode(0600)); err != nil {
117 | set.Err = append(set.Err, err)
118 | return set
119 | }
120 |
121 | // If the instance could be removed directly, we will just
122 | // attempt to remove and return. And if we will only try
123 | // to perform more work if it is EBUSY.
124 | err := syscall.Rmdir(instancePath)
125 | if err == nil || err == syscall.ENOENT {
126 | return nil
127 | }
128 | if err != syscall.EBUSY {
129 | set.Err = append(set.Err, err)
130 | return set
131 | }
132 |
133 | // Record the errors generated while disabling instance.
134 | //
135 | // Please notice that it is only considered an error when we
136 | // cannot remove the instance directory.
137 | if err := disableInstance(tracefs, instance); err != nil {
138 | if subset, ok := err.(*setError); ok {
139 | set.Err = append(set.Err, subset.Err...)
140 | } else {
141 | set.Err = append(set.Err, err)
142 | }
143 | }
144 |
145 | // Remove the root directory of instance.
146 | err = syscall.Rmdir(instancePath)
147 | if err == nil || err == syscall.ENOENT {
148 | return nil
149 | }
150 | set.Err = append(set.Err, err)
151 | return set
152 | }
153 |
154 | // removeProbe will attempt to remove a single probe from
155 | // specified file, while disabling all of them.
156 | func removeProbe(tracefs, typ, namespace, probe string) error {
157 | if typ == "" {
158 | return errors.New("invalid empty typ name")
159 | }
160 | if namespace == "" {
161 | return errors.New("invalid empty namespace name")
162 | }
163 | if probe == "" {
164 | return errors.New("invalid empty probe name")
165 | }
166 |
167 | // Attempt to open the probe manifest first. Under no
168 | // circumstance should the open fail.
169 | var err error
170 | set := &setError{
171 | Op: "removeProbe",
172 | Arg: []string{tracefs, typ, namespace, probe},
173 | }
174 | fd, err := syscall.Open(filepath.Join(tracefs, typ),
175 | syscall.O_WRONLY|syscall.O_APPEND, 0600)
176 | if err != nil {
177 | set.Err = append(set.Err, err)
178 | return set
179 | }
180 | defer func() { _ = syscall.Close(fd) }()
181 |
182 | // Attempt to remove the probe from the file.
183 | eraseWord := []byte(fmt.Sprintf(
184 | "-:%s/%s", namespace, probe))
185 | _, err = syscall.Write(fd, eraseWord)
186 | if err == nil || err == syscall.ENOENT {
187 | return nil
188 | }
189 | if err != syscall.EBUSY {
190 | set.Err = append(set.Err, err)
191 | return set
192 | }
193 |
194 | // Disable the probe in all of the item list.
195 | if err = ioutil.WriteFile(filepath.Join(
196 | tracefs, "events", namespace, probe, "enable"),
197 | []byte("0"), os.FileMode(0600)); err != nil {
198 | set.Err = append(set.Err, err)
199 | }
200 | dirents, err := ioutil.ReadDir(
201 | filepath.Join(tracefs, "instances"))
202 | if err != nil && !os.IsNotExist(err) {
203 | set.Err = append(set.Err, err)
204 | }
205 | for _, dirent := range dirents {
206 | if !dirent.IsDir() {
207 | continue
208 | }
209 | if err = ioutil.WriteFile(filepath.Join(
210 | tracefs, "instances", dirent.Name(),
211 | "events", namespace, probe, "enable"),
212 | []byte("0"), os.FileMode(0600)); err != nil {
213 | set.Err = append(set.Err, err)
214 | }
215 | }
216 |
217 | // Reattempt to disable the probe from the file.
218 | _, err = syscall.Write(fd, eraseWord)
219 | if err == nil || err == syscall.ENOENT {
220 | return nil
221 | }
222 | set.Err = append(set.Err, err)
223 | return set
224 | }
225 |
226 | // removeAllProbe will remove all probes under namespace.
227 | func removeAllProbe(tracefs, typ, namespace string) error {
228 | if typ == "" {
229 | return errors.New("invalid empty typ name")
230 | }
231 | if namespace == "" {
232 | return errors.New("invalid empty namespace name")
233 | }
234 |
235 | // Iterate and invoke remove method on the events.
236 | var err error
237 | set := &setError{
238 | Op: "removeAllProbe",
239 | Arg: []string{tracefs, typ, namespace},
240 | }
241 | dirents, err := ioutil.ReadDir(
242 | filepath.Join(tracefs, "events", namespace))
243 | if err != nil && !os.IsNotExist(err) {
244 | set.Err = append(set.Err, err)
245 | }
246 | for _, dirent := range dirents {
247 | if !dirent.IsDir() {
248 | continue
249 | }
250 | if err := removeProbe(tracefs, typ, namespace,
251 | dirent.Name()); err != nil {
252 | if subset, ok := err.(*setError); ok {
253 | set.Err = append(set.Err, subset.Err...)
254 | } else {
255 | set.Err = append(set.Err, err)
256 | }
257 | }
258 | }
259 | if len(set.Err) > 0 {
260 | return set
261 | }
262 | return nil
263 | }
264 |
--------------------------------------------------------------------------------
/connect/connect.go:
--------------------------------------------------------------------------------
1 | // Package connect defines the event source of network
2 | // connection events on linux.
3 | package connect
4 |
5 | import (
6 | "context"
7 | "encoding/binary"
8 | "net"
9 | "time"
10 |
11 | "github.com/aegistudio/shaft"
12 |
13 | "github.com/chaitin/systracer"
14 | )
15 |
16 | // Op is the event op of connect event.
17 | type Op uint8
18 |
19 | const (
20 | OpConnectStart = Op(iota)
21 | OpConnectEnd
22 | )
23 |
24 | // Event is the generated event of this module.
25 | type Event struct {
26 | Op Op
27 | PID uint32
28 | Timestamp time.Time
29 | FD int
30 | Errno *int32
31 | Family uint16
32 | Type uint16
33 | Addr string
34 | FlowInfo *uint32
35 | Scope *uint32
36 | Port uint16
37 | }
38 |
39 | // collector is the event's collector.
40 | type collector struct {
41 | ctx context.Context
42 | ch chan<- Event
43 | registries map[uint32]*Event
44 | }
45 |
46 | func (c *collector) dispatch(event Event) {
47 | select {
48 | case <-c.ctx.Done():
49 | case c.ch <- event:
50 | }
51 | }
52 |
53 | // handleConnectInet4 handles the event triggered when
54 | // a syscall connect or its equivalences are encountered
55 | // and represents a IPv4 event record.
56 | //
57 | // connect(FD, &sockaddr_in{
58 | // .sin_family = AF_INET = 2,
59 | // .sin_port = Port,
60 | // .sin_addr = { Address },
61 | // }, sizeof(sockaddr_in) == 16)
62 | func (col *collector) handleConnectInet4(
63 | event entrySyscallConnectInet4,
64 | ) {
65 | connectEvent := &Event{}
66 | connectEvent.Timestamp = event.Timestamp
67 | connectEvent.PID = event.TaskPID
68 | connectEvent.FD = int(event.FD)
69 | connectEvent.Family = event.Family
70 | connectEvent.Port = event.Port
71 | var ipv4 [4]byte
72 | binary.BigEndian.PutUint32(ipv4[:], event.Address)
73 | connectEvent.Addr = net.IP(ipv4[:]).String()
74 | col.registries[event.TaskPID] = connectEvent
75 | }
76 |
77 | // handleConnectInet6 handles the event triggered when
78 | // a syscall connect or its equivalences are encountered
79 | // and represents a IPv4 event record.
80 | //
81 | // connect(FD, &sockaddr_in6{
82 | // .sin6_family = AF_INET6 = 10,
83 | // .sin6_port = Port,
84 | // .sin6_flowinfo = FlowInfo,
85 | // .sin6_addr = in6_addr{
86 | // Address0, Address1, Address2, Address3,
87 | // },
88 | // .sin6_scope_id = Scope,
89 | // }, sizeof(sockaddr_in6) = 28})
90 | func (col *collector) handleConnectInet6(
91 | event entrySyscallConnectInet6,
92 | ) {
93 | connectEvent := &Event{}
94 | connectEvent.Timestamp = event.Timestamp
95 | connectEvent.PID = event.TaskPID
96 | connectEvent.FD = int(event.FD)
97 | connectEvent.Family = event.Family
98 | connectEvent.Port = event.Port
99 | connectEvent.FlowInfo = new(uint32)
100 | *connectEvent.FlowInfo = event.FlowInfo
101 | connectEvent.Scope = new(uint32)
102 | *connectEvent.Scope = event.Scope
103 | var ipv6 [16]byte
104 | binary.BigEndian.PutUint32(ipv6[0:4], event.Address0)
105 | binary.BigEndian.PutUint32(ipv6[4:8], event.Address1)
106 | binary.BigEndian.PutUint32(ipv6[8:12], event.Address2)
107 | binary.BigEndian.PutUint32(ipv6[12:16], event.Address3)
108 | connectEvent.Addr = net.IP(ipv6[:]).String()
109 | col.registries[event.TaskPID] = connectEvent
110 | }
111 |
112 | // handleExitConnect handles the event when the connect
113 | // syscall or its equivalences have returned. This should
114 | // generate the connect end event, and delete the record
115 | // since it has been completed.
116 | func (col *collector) handleExitConnect(
117 | event exitSyscallConnect,
118 | ) {
119 | connectEvent := col.registries[event.TaskPID]
120 | if connectEvent == nil {
121 | return
122 | }
123 | connectEndEvent := *connectEvent
124 | connectEndEvent.Timestamp = event.Timestamp
125 | connectEndEvent.Op = OpConnectEnd
126 | connectEndEvent.Errno = new(int32)
127 | *connectEndEvent.Errno = event.Errno
128 | delete(col.registries, event.TaskPID)
129 | col.dispatch(connectEndEvent)
130 | }
131 |
132 | // handleInetProtocolConnect is the event triggered when
133 | // the proto_ops->connect corresponded functions are called
134 | // (e.g. inet_stream_connect and inet_dgram_connect).
135 | //
136 | // The type field will be fetched at this point, which
137 | // will query the (struct socket*)->type field. And an
138 | // connect start event must be generated after that.
139 | func (col *collector) handleInetProtocolConnect(
140 | event entryInetProtocolConnect,
141 | ) {
142 | connectEvent := col.registries[event.TaskPID]
143 | if connectEvent == nil {
144 | return
145 | }
146 | connectEvent.Type = event.Type
147 | connectStartEvent := *connectEvent
148 | connectStartEvent.Op = OpConnectStart
149 | col.dispatch(connectStartEvent)
150 | }
151 |
152 | func stackConnectEventSource(
153 | next func(<-chan Event) error,
154 | rootCtx context.Context, manager systracer.Manager,
155 | ) error {
156 | // Attempt to initialize the connect data source.
157 | ctx, cancel := context.WithCancel(rootCtx)
158 | defer cancel()
159 | var lastSyncCh <-chan struct{}
160 | eventCh := make(chan Event)
161 |
162 | // Create the connect event collector first.
163 | collector := &collector{
164 | ctx: ctx,
165 | ch: eventCh,
166 | registries: make(map[uint32]*Event),
167 | }
168 |
169 | // Attempt to attach to the inet_dgram_connect and
170 | // the inet_stream_connect first.
171 | inetDgramConnect, _, err := manager.TraceKProbe(
172 | "inet_dgram_connect",
173 | collector.handleInetProtocolConnect)
174 | if err != nil {
175 | return err
176 | }
177 | defer inetDgramConnect.Close()
178 |
179 | inetStreamConnect, _, err := manager.TraceKProbe(
180 | "inet_stream_connect",
181 | collector.handleInetProtocolConnect)
182 | if err != nil {
183 | return err
184 | }
185 | defer inetStreamConnect.Close()
186 |
187 | // Attempt to attach to correct location of the
188 | // syscall connect. Please notice that once a point
189 | // of tracing is found, the other functions must
190 | // also attach to that point.
191 | var exitConnect, connectInet4, connectInet6 systracer.Trace
192 | candidates := []string{
193 | "sys_connect", "__sys_connect",
194 | }
195 | for _, candidate := range candidates {
196 | var syncCh <-chan struct{}
197 |
198 | // Try to attach to the kretprobe of candidate.
199 | // nolint
200 | exitConnect, syncCh, err = manager.TraceKProbe(
201 | candidate, collector.handleExitConnect)
202 | if err == systracer.ErrBadTracePoint {
203 | continue
204 | }
205 | if err != nil {
206 | return err
207 | }
208 | lastSyncCh = syncCh
209 | defer exitConnect.Close()
210 |
211 | // Try to attach to the connect ipv4 event.
212 | connectInet4, syncCh, err = manager.TraceKProbe(
213 | candidate, collector.handleConnectInet4)
214 | if err != nil {
215 | return err
216 | }
217 | lastSyncCh = syncCh
218 | defer connectInet4.Close()
219 |
220 | // Try to attach to the connect ipv6 event.
221 | // This is optional because some older kernel
222 | // may have no ipv6 support.
223 | connectInet6, syncCh, err = manager.TraceKProbe(
224 | candidate, collector.handleConnectInet6)
225 | if err != nil && err != systracer.ErrBadTracePoint {
226 | return err
227 | }
228 | if connectInet6 != nil {
229 | lastSyncCh = syncCh
230 | defer connectInet6.Close()
231 | }
232 |
233 | // Creation completed for now.
234 | break
235 | }
236 | if exitConnect == nil {
237 | return systracer.ErrBadTracePoint
238 | }
239 |
240 | // Wait for the synchronization of probe point.
241 | select {
242 | case <-ctx.Done():
243 | return nil
244 | case <-lastSyncCh:
245 | }
246 | defer cancel()
247 | inetDgramConnect.SetEnabled(true)
248 | inetStreamConnect.SetEnabled(true)
249 | exitConnect.SetEnabled(true)
250 | connectInet4.SetEnabled(true)
251 | if connectInet6 != nil {
252 | connectInet6.SetEnabled(true)
253 | }
254 | return next(eventCh)
255 | }
256 |
257 | // Module is the DI module of connect event.
258 | //
259 | // The module requires a context and a trace manager, and
260 | // injects an event channel of <-chan Event.
261 | var Module = shaft.Stack(stackConnectEventSource)
262 |
--------------------------------------------------------------------------------
/listen/listen.go:
--------------------------------------------------------------------------------
1 | package listen
2 |
3 | import (
4 | "context"
5 | "encoding/binary"
6 | "net"
7 | "syscall"
8 | "time"
9 |
10 | "github.com/aegistudio/shaft"
11 | "github.com/pkg/errors"
12 |
13 | "github.com/chaitin/systracer"
14 | "github.com/chaitin/systracer/pkg/kversion"
15 | )
16 |
17 | // Op is the listen event op for linux.
18 | //
19 | // The operation involves a listen and unlisten event. The
20 | // listen event is issued when a socket has successfully
21 | // listen while the unlisten event is issued when a
22 | // listening socket is closed.
23 | type Op uint8
24 |
25 | const (
26 | OpListenStart = Op(iota)
27 | OpListenEnd
28 | )
29 |
30 | // Event is the standard information for a linux listen
31 | // event. Since it is only possible to listen TCP socket
32 | // (?->0x0a), we can omit out the type judgement.
33 | type Event struct {
34 | Op Op
35 | Timestamp time.Time
36 | PID uint32
37 | FD *int
38 | Family uint16 // AF_*
39 | Addr string
40 | Port uint16
41 | Backlog *int
42 | }
43 |
44 | // collector is the collector for the linux listen events.
45 | // It keeps track of listen state registries and and will
46 | // periodically perform cleanup.
47 | type collector struct {
48 | ctx context.Context
49 | ch chan<- Event
50 | starts map[uint32]*Event
51 | }
52 |
53 | func (c *collector) dispatch(event Event) {
54 | select {
55 | case <-c.ctx.Done():
56 | case c.ch <- event:
57 | }
58 | }
59 |
60 | // handleEntryListen handles the event triggered when
61 | // a syscall listen or its equivalences are encoutered.
62 | // We can only decode the address family when the
63 | // inet_listen or inet6_listen is called.
64 | //
65 | // listen(FD, Backlog)
66 | func (col *collector) handleEntryListen(
67 | event entrySyscallListen,
68 | ) {
69 | listenEvent := &Event{}
70 | listenEvent.Timestamp = event.Timestamp
71 | listenEvent.PID = event.TaskPID
72 | listenEvent.FD = new(int)
73 | *listenEvent.FD = int(event.FD)
74 | listenEvent.Backlog = new(int)
75 | *listenEvent.Backlog = int(event.Backlog)
76 | col.starts[event.TaskPID] = listenEvent
77 | }
78 |
79 | // handleProtocolListenInet4 handles the event triggered
80 | // when it enters inet_listen.
81 | //
82 | // inet_listen(&socket{
83 | // ...
84 | // .sk = &sock{
85 | // .skc_rcv_saddr = Address,
86 | // .skc_num = Port,
87 | // .skc_family = AF_INET = 2,
88 | // //.skc_state = != 0x0a,
89 | // },
90 | // }, Backlog)
91 | func (col *collector) handleProtocolListenInet4(
92 | event systracer.ProbeEvent, sk StructSockListenInet4,
93 | ) {
94 | listenEvent := col.starts[event.TaskPID]
95 | if listenEvent == nil {
96 | return
97 | }
98 | listenEvent.Timestamp = event.Timestamp
99 | listenEvent.Family = syscall.AF_INET
100 | var ipv4 [4]byte
101 | binary.BigEndian.PutUint32(ipv4[:], sk.Address)
102 | listenEvent.Addr = net.IP(ipv4[:]).String()
103 | listenEvent.Port = sk.Port
104 | }
105 |
106 | // handleProtocolListenInet4_V2_6_12 handles the inet_listen ipv4
107 | // tracepoint event for linux version 2.6.12 ~ 5.3 (excluded).
108 | func (col *collector) handleProtocolListenInet4_V2_6_12(
109 | event entryProtocolListenInet4_V2_6_12,
110 | ) {
111 | col.handleProtocolListenInet4(event.ProbeEvent, event.Sk)
112 | }
113 |
114 | // handleProtocolListenInet4_v5_3 handles the inet_listen ipv4
115 | // tracepoint event for linux version above 5.3 (included).
116 | func (col *collector) handleProtocolListenInet4_V5_3(
117 | event entryProtocolListenInet4_V5_3,
118 | ) {
119 | col.handleProtocolListenInet4(event.ProbeEvent, event.Sk)
120 | }
121 |
122 | // handleProtocolListenInet6 handles the event triggered
123 | // when it enters inet6_listen.
124 | //
125 | // inet_listen(&socket{
126 | // ...
127 | // .sk = &sock{
128 | // .skc_num = Port,
129 | // .skc_family = AF_INET6 = 10,
130 | // .sin_v6_rev_saddr = Address,
131 | // },
132 | // }, Backlog)
133 | func (col *collector) handleProtocolListenInet6(
134 | event systracer.ProbeEvent, sk StructSockListenInet6,
135 | ) {
136 | listenEvent := col.starts[event.TaskPID]
137 | if listenEvent == nil {
138 | return
139 | }
140 | listenEvent.Timestamp = event.Timestamp
141 | listenEvent.Family = syscall.AF_INET6
142 | var ipv6 [16]byte
143 | binary.BigEndian.PutUint32(ipv6[0:4], sk.Address0)
144 | binary.BigEndian.PutUint32(ipv6[4:8], sk.Address1)
145 | binary.BigEndian.PutUint32(ipv6[8:12], sk.Address2)
146 | binary.BigEndian.PutUint32(ipv6[12:16], sk.Address3)
147 | listenEvent.Addr = net.IP(ipv6[:]).String()
148 | listenEvent.Port = sk.Port
149 | }
150 |
151 | // handleProtocolListenInet4_V2_6_12 handles the inet_listen ipv6
152 | // tracepoint event for linux version 2.6.12 ~ 5.3 (excluded).
153 | func (col *collector) handleProtocolListenInet6_V2_6_12(
154 | event entryProtocolListenInet6_V2_6_12,
155 | ) {
156 | col.handleProtocolListenInet6(event.ProbeEvent, event.Sk)
157 | }
158 |
159 | // handleProtocolListenInet4_v5_3 handles the inet_listen ipv6
160 | // tracepoint event for linux version above 5.3 (included).
161 | func (col *collector) handleProtocolListenInet6_V5_3(
162 | event entryProtocolListenInet6_V5_3,
163 | ) {
164 | col.handleProtocolListenInet6(event.ProbeEvent, event.Sk)
165 | }
166 |
167 | // handleExitListen handles the event when the listen
168 | // syscall or its equivalences have returned.
169 | //
170 | // This should generate the listen event when the retcode
171 | // is 0, and the address family is known to us.
172 | func (col *collector) handleExitListen(
173 | event exitSyscallListen,
174 | ) {
175 | listenEvent := col.starts[event.TaskPID]
176 | if listenEvent == nil {
177 | return
178 | }
179 | listenStartEvent := *listenEvent
180 | listenStartEvent.Op = OpListenStart
181 | listenStartEvent.Timestamp = event.Timestamp
182 | delete(col.starts, event.TaskPID)
183 | if event.Errno == 0 && listenEvent.Family != 0 {
184 | col.dispatch(listenStartEvent)
185 | }
186 | }
187 |
188 | // handleTCPCloseInet4 handles the event triggered
189 | // when it enters tcp_close.
190 | //
191 | // tcp_close(&socket{
192 | // ...
193 | // .sk = &sock{
194 | // .skc_rcv_saddr = Address,
195 | // .skc_num = Port,
196 | // .skc_family = AF_INET = 2,
197 | // .skc_state = == 0x0a,
198 | // },
199 | // })
200 | func (col *collector) handleTCPCloseInet4(
201 | event entryTCPCloseInet4,
202 | ) {
203 | if event.State != 10 {
204 | return
205 | }
206 | var listenEndEvent Event
207 | listenEndEvent.Op = OpListenEnd
208 | listenEndEvent.PID = event.TaskPID
209 | listenEndEvent.Timestamp = event.Timestamp
210 | listenEndEvent.Family = syscall.AF_INET
211 | var ipv4 [4]byte
212 | binary.BigEndian.PutUint32(ipv4[:], event.Address)
213 | listenEndEvent.Addr = net.IP(ipv4[:]).String()
214 | listenEndEvent.Port = event.Port
215 | col.dispatch(listenEndEvent)
216 | }
217 |
218 | // handleTCPCloseInet6 handles the event triggered
219 | // when it enters tcp_close.
220 | //
221 | // tcp_close(&socket{
222 | // ...
223 | // .sk = &sock{
224 | // .skc_num = Port,
225 | // .skc_family = AF_INET6 = 10,
226 | // .sin_v6_rev_saddr = Address,
227 | // .skc_state = == 0x0a,
228 | // },
229 | // })
230 | func (col *collector) handleTCPCloseInet6(
231 | event entryTCPCloseInet6,
232 | ) {
233 | if event.State != 10 {
234 | return
235 | }
236 | var listenEndEvent Event
237 | listenEndEvent.Op = OpListenEnd
238 | listenEndEvent.PID = event.TaskPID
239 | listenEndEvent.Timestamp = event.Timestamp
240 | listenEndEvent.Family = syscall.AF_INET6
241 | var ipv6 [16]byte
242 | binary.BigEndian.PutUint32(ipv6[0:4], event.Address0)
243 | binary.BigEndian.PutUint32(ipv6[4:8], event.Address1)
244 | binary.BigEndian.PutUint32(ipv6[8:12], event.Address2)
245 | binary.BigEndian.PutUint32(ipv6[12:16], event.Address3)
246 | listenEndEvent.Addr = net.IP(ipv6[:]).String()
247 | listenEndEvent.Port = event.Port
248 | col.dispatch(listenEndEvent)
249 | }
250 |
251 | func stackListenEventSource(
252 | next func(<-chan Event) error,
253 | rootCtx context.Context, manager systracer.Manager,
254 | ) error {
255 | // Attempt to initialize the listen data source.
256 | ctx, cancel := context.WithCancel(rootCtx)
257 | defer cancel()
258 | var lastSyncCh <-chan struct{}
259 | eventCh := make(chan Event)
260 |
261 | // Create the listen event collector first.
262 | collector := &collector{
263 | ctx: ctx,
264 | ch: eventCh,
265 | starts: make(map[uint32]*Event),
266 | }
267 |
268 | // Search the event collector handler for IPv4 event.
269 | var handleProtocolListenInet4, handleProtocolListenInet6 interface{}
270 | if kversion.Current >= kversion.Must("5.3") {
271 | handleProtocolListenInet4 = collector.handleProtocolListenInet4_V5_3
272 | handleProtocolListenInet6 = collector.handleProtocolListenInet6_V5_3
273 | } else if kversion.Current >= kversion.Must("2.6.12") {
274 | handleProtocolListenInet4 = collector.handleProtocolListenInet4_V2_6_12
275 | handleProtocolListenInet6 = collector.handleProtocolListenInet6_V2_6_12
276 | } else {
277 | return errors.Errorf("listen event unsupported")
278 | }
279 |
280 | // Attempt to attach to the inet_listen first.
281 | listenInet4, _, err := manager.TraceKProbe(
282 | "inet_listen", handleProtocolListenInet4)
283 | if err != nil {
284 | return err
285 | }
286 | defer listenInet4.Close()
287 | listenInet6, _, err := manager.TraceKProbe(
288 | "inet_listen", handleProtocolListenInet6)
289 | if err != nil {
290 | return err
291 | }
292 | defer listenInet6.Close()
293 |
294 | // Attempt to attach to the inet_release then.
295 | shutdownInet4, _, err := manager.TraceKProbe(
296 | "tcp_close", collector.handleTCPCloseInet4)
297 | if err != nil {
298 | return err
299 | }
300 | defer shutdownInet4.Close()
301 |
302 | shutdownInet6, _, err := manager.TraceKProbe(
303 | "tcp_close", collector.handleTCPCloseInet6)
304 | if err != nil {
305 | return err
306 | }
307 | defer shutdownInet6.Close()
308 |
309 | // Attempt to attach to correct location of the
310 | // syscall listen. Please notice that once a point
311 | // of tracing is found, the other functions must
312 | // also attach to that point.
313 | var exitListen, entryListen systracer.Trace
314 | candidates := []string{
315 | "sys_listen", "__sys_listen",
316 | }
317 | for _, candidate := range candidates {
318 | var syncCh <-chan struct{}
319 |
320 | // Try to attach to the kretprobe of candidate.
321 | exitListen, _, err = manager.TraceKProbe(
322 | candidate, collector.handleExitListen)
323 | if err == systracer.ErrBadTracePoint {
324 | continue
325 | }
326 | if err != nil {
327 | return err
328 | }
329 | defer exitListen.Close()
330 |
331 | // Try to attach to the syscall entry event.
332 | // nolint
333 | entryListen, syncCh, err = manager.TraceKProbe(
334 | candidate, collector.handleEntryListen)
335 | if err != nil {
336 | return err
337 | }
338 | defer entryListen.Close()
339 | lastSyncCh = syncCh
340 |
341 | // Creation completed for now.
342 | break
343 | }
344 | if exitListen == nil {
345 | return systracer.ErrBadTracePoint
346 | }
347 |
348 | // Wait for the completion of entry initialization.
349 | select {
350 | case <-ctx.Done():
351 | return nil
352 | case <-lastSyncCh:
353 | }
354 | defer cancel()
355 | listenInet4.SetEnabled(true)
356 | listenInet6.SetEnabled(true)
357 | shutdownInet4.SetEnabled(true)
358 | shutdownInet6.SetEnabled(true)
359 | exitListen.SetEnabled(true)
360 | entryListen.SetEnabled(true)
361 | return next(eventCh)
362 | }
363 |
364 | // Module is the DI module of listen event.
365 | //
366 | // The module requires a context and a trace manager, and
367 | // injects an event channel of <-chan Event.
368 | var Module = shaft.Stack(stackListenEventSource)
369 |
--------------------------------------------------------------------------------
/handle.go:
--------------------------------------------------------------------------------
1 | package systracer
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "io/ioutil"
8 | "os"
9 | "path/filepath"
10 | "reflect"
11 | "strconv"
12 | "sync/atomic"
13 | "syscall"
14 |
15 | "github.com/pkg/errors"
16 | )
17 |
18 | // traceHandle refers to a single trace registry managed
19 | // by the trace manager and can update its options.
20 | type traceHandle struct {
21 | id uint64
22 | createTime uint64
23 | numDone uint64
24 | numLoss uint64
25 | ctx context.Context
26 | enableCh chan *traceEnableRequest
27 | closeCh chan *traceCloseRequest
28 | conditionCh chan *conditionUpdateRequest
29 | handler interface{}
30 | desc *traceEventDescriptor
31 | condition string
32 | typ string
33 | enabled bool
34 | }
35 |
36 | // getProbeName formats the probe name.
37 | func (t *traceHandle) getProbeName() string {
38 | return fmt.Sprintf("probe_%x_%x", t.createTime, t.id)
39 | }
40 |
41 | // getEnableFilePath evaluates the path for setting the
42 | // probe enabled or disabled.
43 | func (t *traceHandle) getEnableFilePath(
44 | root, namespace string,
45 | ) string {
46 | probeName := t.getProbeName()
47 | return filepath.Join(root, "instances",
48 | namespace, "events", namespace, probeName, "enable")
49 | }
50 |
51 | // parseProbeName convert from probe name to id.
52 | //
53 | // If the probe name cannot be parsed, it will return 0
54 | // directly, which is not an valid id for probe.
55 | func parseProbeName(name []byte) (createTime, id uint64) {
56 | if !bytes.HasPrefix(name, []byte("probe_")) {
57 | return
58 | }
59 | name = name[len("probe_"):]
60 | if index := bytes.Index(name, []byte("_")); index > 0 {
61 | createTime, _ = strconv.ParseUint(
62 | string(name[:index]), 16, 64)
63 | id, _ = strconv.ParseUint(
64 | string(name[index+1:]), 16, 64)
65 | }
66 | return
67 | }
68 |
69 | // ID is the current ID of the trace handle.
70 | func (t *traceHandle) ID() uint64 {
71 | return t.id
72 | }
73 |
74 | // GetDone retrieves the number of done events.
75 | func (t *traceHandle) GetDone() uint64 {
76 | return atomic.LoadUint64(&t.numDone)
77 | }
78 |
79 | // GetLost retrieves the number of lost events.
80 | func (t *traceHandle) GetLost() uint64 {
81 | return atomic.LoadUint64(&t.numLoss)
82 | }
83 |
84 | // complete increment the corresponding counter.
85 | func (t *traceHandle) complete(success bool) {
86 | if success {
87 | atomic.AddUint64(&t.numDone, 1)
88 | } else {
89 | atomic.AddUint64(&t.numLoss, 1)
90 | }
91 | }
92 |
93 | // traceEnableRequest is the request to enable or
94 | // disable the handle.
95 | type traceEnableRequest struct {
96 | enabled bool
97 | handle *traceHandle
98 | doneCh chan struct{}
99 | }
100 |
101 | // SetEnabled requests for the enable state update.
102 | func (t *traceHandle) SetEnabled(enabled bool) {
103 | if t.enabled == enabled {
104 | return
105 | }
106 | req := &traceEnableRequest{
107 | enabled: enabled,
108 | handle: t,
109 | doneCh: make(chan struct{}),
110 | }
111 | select {
112 | case <-t.ctx.Done():
113 | return
114 | case t.enableCh <- req:
115 | <-req.doneCh
116 | }
117 | }
118 |
119 | // setEnabled flips the state of the handle.
120 | func (t *traceHandle) setEnabled(
121 | root, namespace string, enabled bool,
122 | ) error {
123 | if t.enabled == enabled {
124 | return nil
125 | }
126 | enableString := []byte("0")
127 | if enabled {
128 | enableString = []byte("1")
129 |
130 | // XXX: when converting from enabled to disabled
131 | // status, the condition might always be reset,
132 | // so we should at least attempt to reset the
133 | // condition before restarting.
134 | //
135 | // We will revert to disabled status if the error
136 | // cannot be resolved.
137 | if err := t.updateCondition(
138 | root, namespace, t.condition); err != nil {
139 | return err
140 | }
141 | }
142 |
143 | // /instances//events///enable.
144 | enableFilePath := t.getEnableFilePath(root, namespace)
145 | if err := ioutil.WriteFile(enableFilePath,
146 | enableString, os.FileMode(0600)); err != nil {
147 | return err
148 | }
149 | t.enabled = enabled
150 | return nil
151 | }
152 |
153 | // traceCloseRequest is the request to close the handle.
154 | type traceCloseRequest struct {
155 | handle *traceHandle
156 | doneCh chan struct{}
157 | }
158 |
159 | // Close will send the message to the manager.
160 | func (t *traceHandle) Close() {
161 | req := &traceCloseRequest{
162 | handle: t,
163 | doneCh: make(chan struct{}),
164 | }
165 | select {
166 | case <-t.ctx.Done():
167 | return
168 | case t.closeCh <- req:
169 | }
170 | select {
171 | case <-t.ctx.Done():
172 | return
173 | case <-req.doneCh:
174 | }
175 | }
176 |
177 | // conditionUpdateRequest is the request to update condition
178 | // of the current trace handle.
179 | type conditionUpdateRequest struct {
180 | handle *traceHandle
181 | err error
182 | condition string
183 | doneCh chan struct{}
184 | }
185 |
186 | // SetCondition will dispatch the condition to manager
187 | // and waits for its result.
188 | func (t *traceHandle) SetCondition(condition string) error {
189 | req := &conditionUpdateRequest{
190 | handle: t,
191 | condition: condition,
192 | doneCh: make(chan struct{}),
193 | }
194 | select {
195 | case <-t.ctx.Done():
196 | return t.ctx.Err()
197 | case t.conditionCh <- req:
198 | <-req.doneCh
199 | return req.err
200 | }
201 | }
202 |
203 | // evaluateCondition evaluates the condition string for
204 | // specified two conditions.
205 | func evaluateCondition(left, right string) string {
206 | switch {
207 | case left == "" && right != "":
208 | return right
209 | case left != "" && right != "":
210 | return fmt.Sprintf("(%s) && (%s)", left, right)
211 | case left != "" && right == "":
212 | return left
213 | default:
214 | return "0"
215 | }
216 | }
217 |
218 | // updateCondition is the real function to set condition.
219 | func (t *traceHandle) updateCondition(
220 | root, namespace, condition string,
221 | ) error {
222 | // /instances//events///filter.
223 | target := filepath.Join(
224 | root, "instances", namespace, "events",
225 | namespace, t.getProbeName(), "filter")
226 |
227 | // Evaluate the old condition so it could be recovered
228 | // if there's error encountered.
229 | oldCondition := evaluateCondition(
230 | t.desc.initialCondition, t.condition)
231 | defer func() {
232 | if t.condition != condition {
233 | // XXX: attempt to rollback to previous condition,
234 | // and will disable the probe if it cannot be
235 | // actually reverted.
236 | err := ioutil.WriteFile(target,
237 | []byte(oldCondition), os.FileMode(0600))
238 | if err != nil {
239 | enableFilePath := t.getEnableFilePath(
240 | root, namespace)
241 | _ = ioutil.WriteFile(enableFilePath,
242 | []byte("0"), os.FileMode(0600))
243 | }
244 | }
245 | }()
246 |
247 | // Evaluate the new condition and update it.
248 | newCondition := evaluateCondition(
249 | t.desc.initialCondition, condition)
250 | err := ioutil.WriteFile(target,
251 | []byte(newCondition), os.FileMode(0600))
252 | if err != nil {
253 | // Report error directly if it is not EINVAL.
254 | pathErr, ok := err.(*os.PathError)
255 | if !ok {
256 | return err
257 | }
258 | if pathErr.Err != syscall.EINVAL {
259 | return err
260 | }
261 |
262 | // Attempt to fetch and report the error cause.
263 | cause, readErr := ioutil.ReadFile(target)
264 | if readErr != nil {
265 | return err
266 | }
267 | return errors.Errorf(
268 | "filter expression %q syntax error: %s",
269 | newCondition, string(cause))
270 | }
271 | t.condition = condition
272 | return nil
273 | }
274 |
275 | // init attempt to initialize a specific probe, this
276 | // must be done after the fields inside the trace handle
277 | // have already been initialized.
278 | func (t *traceHandle) init(
279 | root, namespace, tracepoint string,
280 | ) error {
281 | var err error
282 | var probeCreated bool
283 |
284 | // Determine the type prefix of the probe.
285 | var prefix string
286 | switch t.desc.meta {
287 | case typeProbeEvent:
288 | prefix = "p"
289 | case typeReturnEvent:
290 | prefix = "r"
291 | default:
292 | return errors.Errorf(
293 | "type %s is not supported", t.desc.meta)
294 | }
295 |
296 | // Evaluate the probe name and insertion statement.
297 | probeName := t.getProbeName()
298 | probeHeader := fmt.Sprintf("%s:%s/%s %s",
299 | prefix, namespace, probeName, tracepoint)
300 | probeExpr := probeHeader + " " + t.desc.format()
301 |
302 | // Open and write the tracepoint into manifest.
303 | fd, err := syscall.Open(filepath.Join(root, t.typ),
304 | syscall.O_WRONLY|syscall.O_APPEND, 0600)
305 | if err != nil {
306 | return err
307 | }
308 | defer func() { _ = syscall.Close(fd) }()
309 | if _, err = syscall.Write(fd, []byte(probeHeader)); err != nil {
310 | if err == syscall.EINVAL || err == syscall.ENOENT {
311 | return ErrBadTracePoint
312 | }
313 | return err
314 | }
315 | if err = removeProbe(
316 | root, t.typ, namespace, probeName); err != nil {
317 | return err
318 | }
319 | if _, err = syscall.Write(fd, []byte(probeExpr)); err != nil {
320 | if err == syscall.EINVAL {
321 | return errors.Errorf(
322 | "probe expression %q syntax error", probeExpr)
323 | }
324 | return err
325 | }
326 | defer func() {
327 | // Remove the tracepoint from the tracefs.
328 | if !probeCreated {
329 | _ = removeProbe(root, t.typ, namespace, probeName)
330 | }
331 | }()
332 |
333 | // Set the initial condition of the probe.
334 | if err = t.updateCondition(root, namespace, ""); err != nil {
335 | return err
336 | }
337 |
338 | // Attempt to enable the probe, this should reveals some
339 | // problem when the specified trace point is actually
340 | // invalid, especially for those in uprobe.
341 | enableFilePath := t.getEnableFilePath(root, namespace)
342 | if err = ioutil.WriteFile(enableFilePath,
343 | []byte("1"), os.FileMode(0600)); err != nil {
344 | return err
345 | }
346 | if err = ioutil.WriteFile(enableFilePath,
347 | []byte("0"), os.FileMode(0600)); err != nil {
348 | return err
349 | }
350 | probeCreated = true
351 | return nil
352 | }
353 |
354 | // destroy will attempt to remove the single probe.
355 | func (t *traceHandle) destroy(root, namespace string) {
356 | _ = removeProbe(root, t.typ, namespace, t.getProbeName())
357 | t.id = 0
358 | }
359 |
360 | // parseEventHandler is the common code to parse and
361 | // compile the event handler.
362 | func parseEventHandler(
363 | handler interface{},
364 | ) (*traceEventDescriptor, error) {
365 | handlerType := reflect.TypeOf(handler)
366 | if kind := handlerType.Kind(); kind != reflect.Func {
367 | return nil, errors.Wrapf(
368 | errors.Errorf("invalid kind %s", kind),
369 | "parse event handler")
370 | }
371 | if handlerType.NumIn() != 1 {
372 | return nil, errors.Wrapf(
373 | errors.Errorf("invalid input amount"),
374 | "parse event handler")
375 | }
376 | typ := handlerType.In(0)
377 | desc, err := compileTraceEvent(typ)
378 | if err != nil {
379 | return nil, errors.Wrapf(err, "parse event")
380 | }
381 | return desc, nil
382 | }
383 |
384 | // TraceKProbe will register a kprobe event.
385 | func (mgr *traceManager) TraceKProbe(
386 | location string, handler interface{},
387 | ) (Trace, <-chan struct{}, error) {
388 | desc, err := parseEventHandler(handler)
389 | if err != nil {
390 | return nil, nil, err
391 | }
392 | return mgr.createTrace("kprobe_events",
393 | location, handler, desc)
394 | }
395 |
396 | // TraceUProbe will register a uprobe event.
397 | func (mgr *traceManager) TraceUProbe(
398 | library, location string, handler interface{},
399 | ) (Trace, <-chan struct{}, error) {
400 | desc, err := parseEventHandler(handler)
401 | if err != nil {
402 | return nil, nil, err
403 | }
404 | return mgr.createTrace("uprobe_events",
405 | fmt.Sprintf("%s:%s", library, location),
406 | handler, desc)
407 | }
408 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright 2022 Chaitin Tech
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/inode/inode.go:
--------------------------------------------------------------------------------
1 | // Package inode provides service for uniquely pinning and
2 | // addressing an inode for path.
3 | package inode
4 |
5 | import (
6 | "context"
7 | "fmt"
8 | "os"
9 | "path/filepath"
10 | "runtime"
11 | "strconv"
12 | "strings"
13 | "sync"
14 | "syscall"
15 | "time"
16 |
17 | "github.com/aegistudio/shaft"
18 | "github.com/pkg/errors"
19 | "golang.org/x/sync/errgroup"
20 |
21 | "github.com/chaitin/systracer"
22 | "github.com/chaitin/systracer/pkg/alloc"
23 | "github.com/chaitin/systracer/pkg/kversion"
24 | )
25 |
26 | // inodePinResult is the response captured corresponding to
27 | // each inode pinning request.
28 | type inodePinResult struct {
29 | inode, cookie uint64
30 | }
31 |
32 | // collector is the collector for receiving the reaction of
33 | // the inode pinning, and send it back to the master thread.
34 | type collector struct {
35 | rootCtx context.Context
36 | resultCh chan<- inodePinResult
37 | }
38 |
39 | // handleInodePin is the handler for inode pinning captured
40 | // by security_inode_getsecurity. We filter out only the
41 | // "security.systracer.inode_pin.*".
42 | func (col *collector) handleInodePin(
43 | name string, inode uint64,
44 | ) {
45 | prefix := "systracer.inode_pin."
46 | if !strings.HasPrefix(name, prefix) {
47 | return
48 | }
49 | cookie, err := strconv.ParseUint(
50 | name[len(prefix):], 16, 64)
51 | if err != nil {
52 | return
53 | }
54 | result := inodePinResult{
55 | inode: inode,
56 | cookie: cookie,
57 | }
58 | select {
59 | case <-col.rootCtx.Done():
60 | case col.resultCh <- result:
61 | }
62 | }
63 |
64 | // handleSecurityInodePin handles the inode pinning event
65 | // from version 2.6.24 (inclusive) to 5.12 (exclusive).
66 | //
67 | // security_inode_getsecurity(
68 | // Inode, "systracer.inode_pin.${hex Cookie}")
69 | func (col *collector) handleSecurityInodePin_V2_6_24(
70 | event entrySecurityInodePin_V2_6_24,
71 | ) {
72 | col.handleInodePin(event.Name, event.Inode)
73 | }
74 |
75 | // handleSecurityInodePin handles the inode pinning event
76 | // from 5.12 (inclusive) to now.
77 | //
78 | // security_inode_getsecurity(
79 | // MountNS, Inode, "systracer.inode_pin.${hex Cookie}")
80 | func (col *collector) handleSecurityInodePin_V5_12(
81 | event entrySecurityInodePin_V5_12,
82 | ) {
83 | col.handleInodePin(event.Name, event.Inode)
84 | }
85 |
86 | // inodePin is the state of deduplicated inode which holds
87 | // strong reference to the open inode to keep the validity
88 | // of the addressing result.
89 | type inodePin struct {
90 | id uint64
91 | name string
92 | inode uint64
93 | file *os.File
94 | ref uint64
95 | doneCh chan struct{}
96 | }
97 |
98 | // Inode is the actually pinned inode.
99 | type Inode struct {
100 | manager *Manager
101 | inner *inodePin
102 | once sync.Once
103 | }
104 |
105 | // Inode returns the address of the pinned inode.
106 | func (inode *Inode) Inode() uint64 {
107 | return inode.inner.inode
108 | }
109 |
110 | // Unpin removes the strong reference held by caller.
111 | func (inode *Inode) Unpin() {
112 | inode.once.Do(func() {
113 | inode.manager.unpin(inode.inner)
114 | })
115 | runtime.SetFinalizer(inode, nil)
116 | }
117 |
118 | // inodePinRequest for performing inode pinning.
119 | type inodePinRequest struct {
120 | name string
121 | mode int
122 | doneCh chan struct{}
123 | result *inodePin
124 | err error
125 | }
126 |
127 | // Manager for performing and managing inode pins.
128 | type Manager struct {
129 | rootCtx context.Context
130 | pinCh chan *inodePinRequest
131 | unpinCh chan *inodePin
132 | }
133 |
134 | // pin requests for requesting and opening an inode pin.
135 | func (m *Manager) pin(name string, mode int) (*Inode, error) {
136 | abs, err := filepath.Abs(name)
137 | if err != nil {
138 | return nil, err
139 | }
140 | req := &inodePinRequest{
141 | name: abs,
142 | mode: mode,
143 | doneCh: make(chan struct{}),
144 | }
145 | select {
146 | case <-m.rootCtx.Done():
147 | return nil, m.rootCtx.Err()
148 | case m.pinCh <- req:
149 | }
150 | select {
151 | case <-m.rootCtx.Done():
152 | return nil, m.rootCtx.Err()
153 | case <-req.doneCh:
154 | }
155 | if req.err != nil {
156 | return nil, req.err
157 | }
158 | select {
159 | case <-m.rootCtx.Done():
160 | return nil, m.rootCtx.Err()
161 | case <-req.result.doneCh:
162 | }
163 | result := &Inode{
164 | inner: req.result,
165 | manager: m,
166 | }
167 | runtime.SetFinalizer(result, func(value *Inode) {
168 | value.Unpin()
169 | })
170 | return result, nil
171 | }
172 |
173 | // PinFile is the request for pinning single file.
174 | func (m *Manager) PinFile(name string) (*Inode, error) {
175 | return m.pin(name, syscall.O_RDONLY|syscall.O_CLOEXEC)
176 | }
177 |
178 | // PinDir is the request for pinning single dir.
179 | func (m *Manager) PinDir(name string) (*Inode, error) {
180 | return m.pin(name,
181 | syscall.O_RDONLY|syscall.O_DIRECTORY|syscall.O_CLOEXEC)
182 | }
183 |
184 | // unpin is the request for closing an inode pin.
185 | func (m *Manager) unpin(p *inodePin) {
186 | select {
187 | case <-m.rootCtx.Done():
188 | case m.unpinCh <- p:
189 | }
190 | }
191 |
192 | // managerState is the state triggered by
193 | // either pin completion event, retest timers and
194 | // registrations/unregistrations.
195 | type managerState struct {
196 | last uint64
197 | cookieBase uint64
198 |
199 | all map[uint64]*inodePin
200 | names map[string]*inodePin
201 | cookies map[uint64]*inodePin
202 | }
203 |
204 | // close destroys all allocated instances in the state.
205 | func (s *managerState) close() {
206 | for _, pin := range s.all {
207 | _ = pin.file.Close()
208 | pin.id = 0
209 | }
210 | }
211 |
212 | // performInodePin executes the actual inode pinning with
213 | // our specified fd and cookie.
214 | func performInodePin(fd uintptr, cookie uint64) {
215 | filename := fmt.Sprintf("/proc/self/fd/%d", fd)
216 | attribute := fmt.Sprintf(
217 | "security.systracer.inode_pin.%x", cookie)
218 | var buf [1024]byte
219 | _, _ = syscall.Getxattr(filename, attribute, buf[:])
220 | }
221 |
222 | // pin attempts to allocate and create a pin in the state.
223 | func (s *managerState) pin(
224 | name string, flag int,
225 | ) (rpin *inodePin, rerr error) {
226 | // Attempt to open the specified file for later use,
227 | // please notice that the file might be swapped for
228 | // later use and will not close then.
229 | fd, err := syscall.Open(name, flag, 0)
230 | if err != nil {
231 | return nil, err
232 | }
233 | f := os.NewFile(uintptr(fd), name)
234 | defer func() {
235 | if f != nil {
236 | _ = f.Close()
237 | }
238 | }()
239 |
240 | // If there's previous node for the file, attempt to
241 | // allocate specified node for the file.
242 | if previous, ok := s.names[name]; ok {
243 | // Retrieve previous and current file information.
244 | newInfo, err := f.Stat()
245 | if err != nil {
246 | return nil, err
247 | }
248 | newStat := newInfo.Sys().(*syscall.Stat_t)
249 | oldInfo, err := previous.file.Stat()
250 | if err != nil {
251 | return nil, err
252 | }
253 | oldStat := oldInfo.Sys().(*syscall.Stat_t)
254 |
255 | // Compare the information and return the previous
256 | // one if they are the same.
257 | if newStat.Dev == oldStat.Dev &&
258 | newStat.Ino == oldStat.Ino &&
259 | newStat.Rdev == oldStat.Rdev {
260 | previous.ref++
261 | return previous, nil
262 | }
263 | }
264 |
265 | // Attempt to allocate a new node for the subscription.
266 | id := alloc.Alloc(s.last, uint64(1<<48), func(id uint64) bool {
267 | return s.all[id] != nil
268 | })
269 | if id == 0 {
270 | return nil, errors.New(
271 | "cannot allocate more inode pin")
272 | }
273 | created := &inodePin{
274 | id: id,
275 | name: name,
276 | file: f,
277 | ref: 1,
278 | doneCh: make(chan struct{}),
279 | }
280 | s.all[id] = created
281 | s.last = id
282 | s.names[name] = created
283 | f = nil
284 |
285 | // Mark the file and create a new cookie here.
286 | s.cookieBase++
287 | cookie := s.cookieBase
288 | s.cookies[cookie] = created
289 | performInodePin(created.file.Fd(), cookie)
290 | return created, nil
291 | }
292 |
293 | // unpin attempts to decrement reference and potentially
294 | // remove a pin from the state.
295 | func (s *managerState) unpin(p *inodePin) {
296 | if p.id == 0 {
297 | return
298 | }
299 | p.ref--
300 | if p.ref > 0 {
301 | return
302 | }
303 | if s.names[p.name] == p {
304 | delete(s.names, p.name)
305 | }
306 | delete(s.all, p.id)
307 | if p.file != nil {
308 | _ = p.file.Close()
309 | p.file = nil
310 | }
311 | p.id = 0
312 | }
313 |
314 | // reallocateCookie will attempt to reset current cookies.
315 | func (s *managerState) reallocateCookie() {
316 | newCookies := make(map[uint64]*inodePin)
317 | for _, target := range s.cookies {
318 | if target.id == 0 {
319 | continue
320 | }
321 | s.cookieBase++
322 | cookie := s.cookieBase
323 | newCookies[cookie] = target
324 | performInodePin(target.file.Fd(), cookie)
325 | }
326 | s.cookies = newCookies
327 | }
328 |
329 | // handleResult handles the inode pin result.
330 | func (s *managerState) handleResult(
331 | event inodePinResult,
332 | ) {
333 | target := s.cookies[event.cookie]
334 | if target == nil {
335 | return
336 | }
337 | delete(s.cookies, event.cookie)
338 | target.inode = event.inode
339 | close(target.doneCh)
340 | }
341 |
342 | // hasPending see whether there's pending pind request.
343 | func (s *managerState) hasPending() bool {
344 | return len(s.cookies) != 0
345 | }
346 |
347 | // runMasterThread executes the master thread.
348 | func (m *Manager) runMasterThread(
349 | resultCh <-chan inodePinResult,
350 | ) {
351 | var ticker *time.Ticker
352 | defer func() {
353 | if ticker != nil {
354 | ticker.Stop()
355 | }
356 | }()
357 | state := &managerState{
358 | all: make(map[uint64]*inodePin),
359 | names: make(map[string]*inodePin),
360 | cookies: make(map[uint64]*inodePin),
361 | }
362 | defer state.close()
363 | for {
364 | var tickCh <-chan time.Time
365 | if ticker != nil {
366 | tickCh = ticker.C
367 | }
368 |
369 | // Serve user request, inode pin event and
370 | // reallocate tick within select.
371 | select {
372 | case <-m.rootCtx.Done():
373 | return
374 | case event := <-resultCh:
375 | state.handleResult(event)
376 | case req := <-m.pinCh:
377 | func() {
378 | defer close(req.doneCh)
379 | req.result, req.err = state.pin(
380 | req.name, req.mode)
381 | }()
382 | case req := <-m.unpinCh:
383 | state.unpin(req)
384 | case <-tickCh:
385 | state.reallocateCookie()
386 | }
387 |
388 | // Setup or shutdown current reallocate ticker.
389 | if state.hasPending() {
390 | if ticker == nil {
391 | ticker = time.NewTicker(5 * time.Second)
392 | }
393 | } else {
394 | if ticker != nil {
395 | ticker.Stop()
396 | ticker = nil
397 | }
398 | }
399 | }
400 | }
401 |
402 | // stackInodeManager will attempt to create an inode pin
403 | // manager and stack it for later operations.
404 | func stackInodeManager(
405 | next func(*Manager) error,
406 | rootCtx context.Context, group *errgroup.Group,
407 | manager systracer.Manager,
408 | ) error {
409 | // Setup the collector for receiving events.
410 | resultCh := make(chan inodePinResult)
411 | collector := &collector{
412 | rootCtx: rootCtx,
413 | resultCh: resultCh,
414 | }
415 |
416 | // Attach to the security_inode_getsecurity
417 | // for receiving file hook result.
418 | var target interface{}
419 | target = collector.handleSecurityInodePin_V2_6_24
420 | if kversion.Current >= kversion.Must("5.12") {
421 | target = collector.handleSecurityInodePin_V5_12
422 | }
423 | inodePinProbe, syncCh, err := manager.TraceKProbe(
424 | "security_inode_getsecurity", target)
425 | if err != nil {
426 | return err
427 | }
428 | defer inodePinProbe.Close()
429 |
430 | // Wait for the completion of probe creation.
431 | select {
432 | case <-rootCtx.Done():
433 | return nil
434 | case <-syncCh:
435 | }
436 |
437 | // Startup the inode pin master thread.
438 | result := &Manager{
439 | rootCtx: rootCtx,
440 | pinCh: make(chan *inodePinRequest),
441 | unpinCh: make(chan *inodePin),
442 | }
443 | group.Go(func() error {
444 | inodePinProbe.SetEnabled(true)
445 | result.runMasterThread(resultCh)
446 | return nil
447 | })
448 | return next(result)
449 | }
450 |
451 | // Module is the DI module of the inode manager.
452 | //
453 | // The module requires a context, an errgroup and a trace
454 | // manager, and injects an inode pin manager.
455 | var Module = shaft.Stack(stackInodeManager)
456 |
--------------------------------------------------------------------------------
/compile.go:
--------------------------------------------------------------------------------
1 | package systracer
2 |
3 | import (
4 | "bytes"
5 | "encoding/binary"
6 | "fmt"
7 | "reflect"
8 | "strconv"
9 | "strings"
10 | "unsafe"
11 |
12 | "github.com/pkg/errors"
13 | )
14 |
15 | // traceEventField contains information about what to do
16 | // with the field comes at first.
17 | type traceEventField interface {
18 | // format returns the format string that could be
19 | // set as the fetch expression.
20 | //
21 | // It is usually in the form of =:,
22 | // and some field might be composite and contains
23 | // multiple of such form.
24 | format() string
25 |
26 | // fill will attempt to parse the input string and
27 | // fill information into the struct.
28 | fill(input []byte, data uintptr) (int, error)
29 | }
30 |
31 | // bytesFault is the sequence of fault bytes.
32 | var bytesFault = []byte("(fault)")
33 |
34 | // bytesHex is the sequence of hexdecimal number.
35 | var bytesHex = []byte("0x")
36 |
37 | // traceFillInteger will attempt to parse the
38 | // content from the number field and later place it
39 | // inside the specified address.
40 | func traceFillInteger(
41 | addr uintptr, kind reflect.Kind,
42 | bigEndian bool, number []byte,
43 | ) (step int, err error) {
44 | // If fault is encountered, the string will
45 | // shift forward for the size of fault and
46 | // left the remained fields unchanged.
47 | if bytes.HasPrefix(number, bytesFault) {
48 | return len(bytesFault), nil
49 | }
50 |
51 | // Attempt to parse it as string.
52 | base := 10
53 | offset := 0
54 | negative := false
55 | if bytes.HasPrefix(number, bytesHex) {
56 | base = 16
57 | offset = len(bytesHex)
58 | } else if len(number) > 0 && number[0] == '-' {
59 | offset = 1
60 | negative = true
61 | }
62 |
63 | // Seek for the next space or end of line.
64 | step = offset
65 | for step < len(number) &&
66 | number[step] != ' ' && number[step] != '\n' {
67 | step++
68 | }
69 |
70 | // Attempt to parse the number and return.
71 | var v uint64
72 | v, err = strconv.ParseUint(
73 | string(number[offset:step]), base, 64)
74 | if err != nil {
75 | return
76 | }
77 |
78 | // Transform byte order if mismatched order.
79 | if bigEndian {
80 | switch kind {
81 | case reflect.Uint16, reflect.Int16:
82 | var buf [2]byte
83 | *(*uint16)((unsafe.Pointer)(&buf[0])) = uint16(v)
84 | v = uint64(binary.BigEndian.Uint16(buf[:]))
85 | case reflect.Uint32, reflect.Int32:
86 | var buf [4]byte
87 | *(*uint32)((unsafe.Pointer)(&buf[0])) = uint32(v)
88 | v = uint64(binary.BigEndian.Uint32(buf[:]))
89 | case reflect.Uint64, reflect.Int64:
90 | var buf [8]byte
91 | *(*uint64)((unsafe.Pointer)(&buf[0])) = uint64(v)
92 | v = uint64(binary.BigEndian.Uint64(buf[:]))
93 | }
94 | }
95 |
96 | // Negate the number if it is negative.
97 | if negative {
98 | v = uint64(-int64(v))
99 | }
100 |
101 | // Switch the concrete type of integer and fill.
102 | switch kind {
103 | case reflect.Uint8:
104 | *(*uint8)((unsafe.Pointer)(addr)) = uint8(v)
105 | case reflect.Int8:
106 | *(*int8)((unsafe.Pointer)(addr)) = int8(v)
107 | case reflect.Uint16:
108 | *(*uint16)((unsafe.Pointer)(addr)) = uint16(v)
109 | case reflect.Int16:
110 | *(*int16)((unsafe.Pointer)(addr)) = int16(v)
111 | case reflect.Uint32:
112 | *(*uint32)((unsafe.Pointer)(addr)) = uint32(v)
113 | case reflect.Int32:
114 | *(*int32)((unsafe.Pointer)(addr)) = int32(v)
115 | case reflect.Uint64:
116 | *(*uint64)((unsafe.Pointer)(addr)) = uint64(v)
117 | case reflect.Int64:
118 | *(*int64)((unsafe.Pointer)(addr)) = int64(v)
119 | }
120 | return
121 | }
122 |
123 | // traceIntegerField is a field corresponding to integer.
124 | type traceIntegerField struct {
125 | name string
126 | offset uintptr
127 | fetch string
128 | kind reflect.Kind
129 | typename string
130 | bigEndian bool
131 | }
132 |
133 | // format returns the format for the field.
134 | func (f traceIntegerField) format() string {
135 | return fmt.Sprintf("%s=%s%s",
136 | f.name, f.fetch, f.typename)
137 | }
138 |
139 | // fill parsed integer data and move forward.
140 | func (f traceIntegerField) fill(
141 | input []byte, data uintptr,
142 | ) (forward int, err error) {
143 | bytesName := []byte(f.name + "=")
144 | if !bytes.HasPrefix(input, bytesName) {
145 | return 0, errors.Errorf(
146 | "expect integer field start token %q", f.name)
147 | }
148 | forward, err = traceFillInteger(data+f.offset,
149 | f.kind, f.bigEndian, input[len(bytesName):])
150 | forward += len(bytesName)
151 | return
152 | }
153 |
154 | // traceStringField is a field corresponding to string.
155 | //
156 | // XXX: despite outputing double quote symbols, all string
157 | // variables written from kernel is not quoted and written
158 | // to buffer directly. And this behaviour is even not
159 | // fixed by kernel yet (>=5.0).
160 | //
161 | // To work around, we enforce the kernel to output the
162 | // canary to identify the end of string. The canary is
163 | // either default to the string address or manually
164 | // specified, and must be hard to detect.
165 | type traceStringField struct {
166 | name string
167 | offset uintptr
168 | fetch string
169 | canary string
170 |
171 | isStringAddr bool
172 | }
173 |
174 | // format returns the format for the field.
175 | func (f traceStringField) format() string {
176 | return fmt.Sprintf(
177 | "%sStart=%s:u64 %s=+0(%s):string %sEnd=%s:u64",
178 | f.name, f.canary, f.name, f.fetch, f.name, f.canary)
179 | }
180 |
181 | // fill parsed string data and move forward.
182 | func (f traceStringField) fill(
183 | input []byte, addr uintptr,
184 | ) (forward int, err error) {
185 | pointer := (unsafe.Pointer)(addr + f.offset)
186 |
187 | // Extract the string start token canaries.
188 | bytesStart := []byte(f.name + "Start=")
189 | if !bytes.HasPrefix(input, bytesStart) {
190 | return 0, errors.Errorf(
191 | "expect string field start token %q", f.name)
192 | }
193 | var address uint64
194 | addressStep, err := traceFillInteger(
195 | uintptr(unsafe.Pointer(&address)),
196 | reflect.Uint64, false, input[len(bytesStart):])
197 | if err != nil {
198 | return 0, err
199 | }
200 | if f.isStringAddr {
201 | (*StringAddr)(pointer).Addr = address
202 | }
203 |
204 | // Construct the string end token canaries.
205 | lenFirstPortion := len(bytesStart) + addressStep
206 | bytesEnd := input[len(bytesStart):lenFirstPortion]
207 | bytesEnd = []byte(" " + f.name + "End=" + string(bytesEnd))
208 |
209 | // Attempt to find the enclosing part of the
210 | // string in the input.
211 | addressEnd := bytes.Index(input[lenFirstPortion:], bytesEnd)
212 | if addressEnd < 0 {
213 | return 0, errors.Errorf(
214 | "expect string field end token %q", f.name)
215 | }
216 | forward = lenFirstPortion + addressEnd + len(bytesEnd)
217 |
218 | // Construct and parse the string.
219 | bytesString := input[lenFirstPortion+1 : lenFirstPortion+addressEnd]
220 | bytesMiddle := []byte(f.name + "=")
221 | if !bytes.HasPrefix(bytesString, bytesMiddle) {
222 | err = errors.Errorf(
223 | "expect string field middle token %q", f.name)
224 | return
225 | }
226 | bytesString = bytesString[len(bytesMiddle):]
227 | if bytes.Equal(bytesString, bytesFault) {
228 | return
229 | }
230 | bytesString = bytesString[1 : len(bytesString)-1]
231 | if f.isStringAddr {
232 | (*StringAddr)(pointer).String = string(bytesString)
233 | } else {
234 | *(*string)(pointer) = string(bytesString)
235 | }
236 | return
237 | }
238 |
239 | // traceEventDescriptor describes the way to process event.
240 | //
241 | // The first field will always be untagged and must be one of
242 | // the offspring of tracing.Event (e.g. tracing.ProbeEvent and
243 | // tracing.ReturnEvent). The first field determines how will
244 | // the events be registered and processed.
245 | type traceEventDescriptor struct {
246 | typ reflect.Type
247 | meta reflect.Type
248 | fields []traceEventField
249 |
250 | initialCondition string
251 | }
252 |
253 | // format returns the event field format concatenated.
254 | func (efd traceEventDescriptor) format() string {
255 | var formats []string
256 | for _, field := range efd.fields {
257 | formats = append(formats, field.format())
258 | }
259 | return strings.Join(formats, " ")
260 | }
261 |
262 | // mapIntegerName is the map from the kind to the name.
263 | var mapIntegerName = map[reflect.Kind]string{
264 | reflect.Uint8: ":u8",
265 | reflect.Int8: ":s8",
266 | reflect.Uint16: ":u16",
267 | reflect.Int16: ":s16",
268 | reflect.Uint32: ":u32",
269 | reflect.Int32: ":s32",
270 | reflect.Uint64: ":u64",
271 | reflect.Int64: ":s64",
272 | }
273 |
274 | // mapIntegerSize is the map from the kind to the size.
275 | var mapIntegerSize = map[reflect.Kind]uint64{
276 | reflect.Uint8: 8,
277 | reflect.Int8: 8,
278 | reflect.Uint16: 16,
279 | reflect.Int16: 16,
280 | reflect.Uint32: 32,
281 | reflect.Int32: 32,
282 | reflect.Uint64: 64,
283 | reflect.Int64: 64,
284 | }
285 |
286 | // compileTraceEvent will attempt to parse the fields and
287 | // convert the event specified by type into the event
288 | // descriptor.
289 | func compileTraceEvent(
290 | typ reflect.Type,
291 | ) (*traceEventDescriptor, error) {
292 | result := &traceEventDescriptor{
293 | typ: typ,
294 | }
295 |
296 | // Ensure that the specified type should be struct.
297 | if kind := typ.Kind(); kind != reflect.Struct {
298 | return nil, errors.Errorf("invalid kind %q", kind)
299 | }
300 |
301 | // Detect and collect first field.
302 | if typ.NumField() == 0 {
303 | return nil, errors.New("empty struct")
304 | }
305 | firstField := typ.Field(0)
306 | if !firstField.Anonymous {
307 | return nil, errors.New("first field must be anonymous")
308 | }
309 | result.meta = firstField.Type
310 | switch result.meta {
311 | case typeProbeEvent:
312 | case typeReturnEvent:
313 | default:
314 | return nil, errors.Errorf(
315 | "type %s cannot be first field", result.meta)
316 | }
317 |
318 | // Perform conversion of each field recursively.
319 | var stackTyp []reflect.Type
320 | var stackIndex []int
321 | var stackOffset []uintptr
322 | var stackNames []string
323 | var stackArgs [][]string
324 | var conds []string
325 | stackTyp = append(stackTyp, typ)
326 | stackIndex = append(stackIndex, 1)
327 | stackOffset = append(stackOffset, 0)
328 | stackNames = append(stackNames, "")
329 | stackArgs = append(stackArgs, nil)
330 | for len(stackTyp) > 0 {
331 | // Fetch current field for parsing.
332 | currentTyp := stackTyp[len(stackTyp)-1]
333 | currentIndex := stackIndex[len(stackIndex)-1]
334 | if currentTyp.NumField() <= currentIndex {
335 | stackTyp = stackTyp[:len(stackTyp)-1]
336 | stackIndex = stackIndex[:len(stackIndex)-1]
337 | stackOffset = stackOffset[:len(stackOffset)-1]
338 | stackArgs = stackArgs[:len(stackArgs)-1]
339 | if len(stackIndex) > 0 {
340 | stackIndex[len(stackIndex)-1]++
341 | }
342 | continue
343 | }
344 | currentField := typ.FieldByIndex(stackIndex)
345 | currentKind := currentField.Type.Kind()
346 | fieldOffset := stackOffset[len(stackOffset)-1] +
347 | currentField.Offset
348 | tag := currentField.Tag.Get("tracing")
349 |
350 | // Apply alternations to the tracing tag with
351 | // arguments specified on stack.
352 | currentPrefix := strings.Join(stackNames, "_")
353 | tag = strings.ReplaceAll(tag, "{0}", currentPrefix)
354 | for i, value := range stackArgs[len(stackArgs)-1] {
355 | tag = strings.ReplaceAll(tag,
356 | fmt.Sprintf("{%d}", i+1), value)
357 | }
358 |
359 | // Specially processing for the condition field,
360 | // which is special embedding of condition.
361 | if currentField.Type == typeCondition {
362 | if tag != "" {
363 | conds = append(conds, tag)
364 | }
365 | stackIndex[len(stackIndex)-1]++
366 | continue
367 | }
368 | args := strings.Split(tag, ",")
369 |
370 | // Specially processing for the struct kind,
371 | // which is considered as embedding.
372 | if currentKind == reflect.Struct &&
373 | currentField.Type != typeStringAddr {
374 | stackTyp = append(stackTyp, currentField.Type)
375 | stackIndex = append(stackIndex, 0)
376 | stackOffset = append(stackOffset, fieldOffset)
377 | stackNames = append(stackNames, currentField.Name)
378 | stackArgs = append(stackArgs, args)
379 | continue
380 | }
381 |
382 | // Analyze and prepare current parameter.
383 | //
384 | // The first two parameters must always be
385 | // fetcher and condition (optional). Callers
386 | // could add more conditions after that.
387 | if tag == "" {
388 | stackIndex[len(stackIndex)-1]++
389 | continue
390 | }
391 | fetch := args[0]
392 | if len(args) > 1 && args[1] != "" {
393 | conds = append(conds, args[1])
394 | }
395 |
396 | // Evaluate current name and offset.
397 | if currentField.Anonymous {
398 | return nil, errors.New(
399 | "cannot embed non-struct field")
400 | }
401 | fieldName := currentPrefix + currentField.Name
402 |
403 | // Fallback tracing.StringAddr to string kind.
404 | isStringAddr := false
405 | if currentField.Type == typeStringAddr {
406 | currentKind = reflect.String
407 | isStringAddr = true
408 | }
409 |
410 | // Find out the kind of the current field.
411 | switch currentKind {
412 | case reflect.Uint8, reflect.Int8,
413 | reflect.Uint16, reflect.Int16,
414 | reflect.Uint32, reflect.Int32,
415 | reflect.Uint64, reflect.Int64:
416 | var bigEndian bool
417 | typename := mapIntegerName[currentKind]
418 | for i := 2; i < len(args); i++ {
419 | arg := args[i]
420 | switch {
421 | case arg == "":
422 | case arg == "bigendian":
423 | bigEndian = true
424 | case strings.HasPrefix(arg, "bit[") &&
425 | strings.HasSuffix(arg, "]"):
426 | size := mapIntegerSize[currentKind]
427 | start := arg[len("bit[") : len(arg)-1]
428 | end := start
429 | if col := strings.Index(start, ":"); col >= 0 {
430 | start = start[0:col]
431 | end = end[col+1:]
432 | }
433 | vstart, err := strconv.ParseUint(start, 10, 64)
434 | if err != nil {
435 | return nil, errors.Errorf(
436 | "malformed start %q: %s", arg, err)
437 | }
438 | vend, err := strconv.ParseUint(end, 10, 64)
439 | if err != nil {
440 | return nil, errors.Errorf(
441 | "malformed end %q: %s", arg, err)
442 | }
443 | if vstart > vend || vend >= size {
444 | return nil, errors.Errorf(
445 | `invalid bit range "%d:%d"`, vstart, vend)
446 | }
447 | typename = fmt.Sprintf(":b%d@%d/%d",
448 | vend-vstart+1, vstart, size)
449 | default:
450 | return nil, errors.Errorf(
451 | "unknown modifier %q", arg)
452 | }
453 | }
454 | result.fields = append(result.fields,
455 | &traceIntegerField{
456 | name: fieldName,
457 | offset: fieldOffset,
458 | fetch: fetch,
459 | kind: currentKind,
460 | typename: typename,
461 | bigEndian: bigEndian,
462 | })
463 | case reflect.String:
464 | canary := fetch
465 | if len(args) > 2 && args[2] != "" {
466 | canary = fetch
467 | }
468 | result.fields = append(result.fields,
469 | &traceStringField{
470 | name: fieldName,
471 | offset: fieldOffset,
472 | fetch: fetch,
473 | canary: canary,
474 | isStringAddr: isStringAddr,
475 | })
476 | default:
477 | return nil, errors.Errorf(
478 | "unacceptible kind %s", currentKind)
479 | }
480 | stackIndex[len(stackIndex)-1]++
481 | }
482 |
483 | // Evaluate the initial condition for the queries.
484 | if len(conds) == 1 {
485 | result.initialCondition = conds[0]
486 | } else if len(conds) > 1 {
487 | result.initialCondition = "(" + strings.Join(
488 | conds, ") && (") + ")"
489 | }
490 |
491 | return result, nil
492 | }
493 |
494 | // fill will parse the given log and fill the content.
495 | func (efd traceEventDescriptor) fill(
496 | data uintptr, log []byte,
497 | ) (forward int, err error) {
498 | forward = 0
499 | for i, field := range efd.fields {
500 | // Remove spaces encountered.
501 | for log[forward] == ' ' {
502 | forward++
503 | }
504 | if log[forward] == '\n' {
505 | err = errors.Errorf(
506 | "unexpected truncation in field #%d", i)
507 | return
508 | }
509 |
510 | // Start parsing the field.
511 | var current int
512 | current, err = field.fill(log[forward:], data)
513 | forward += current
514 | if err != nil {
515 | return
516 | }
517 | }
518 | return
519 | }
520 |
--------------------------------------------------------------------------------
/rcnotify/rcnotify.go:
--------------------------------------------------------------------------------
1 | package rcnotify
2 |
3 | import (
4 | "context"
5 | "path/filepath"
6 | "runtime"
7 | "sync"
8 | "time"
9 |
10 | "github.com/aegistudio/shaft"
11 | "github.com/pkg/errors"
12 |
13 | "github.com/chaitin/systracer"
14 | "github.com/chaitin/systracer/inode"
15 | "github.com/chaitin/systracer/pkg/kversion"
16 | )
17 |
18 | // extractPathComponent is the code for creating a
19 | // valid portion of path component.
20 | func extractPathComponent(src []systracer.StringAddr) []string {
21 | var result []string
22 | for i := 0; i < len(src); i++ {
23 | if src[i].Addr == 0 {
24 | break
25 | }
26 | if i > 0 && src[i].Addr == src[i-1].Addr {
27 | break
28 | }
29 | result = append(result, src[i].String)
30 | }
31 | return result
32 | }
33 |
34 | // Op is the file event op for linux.
35 | //
36 | // The event operations are defined dedicated for linux,
37 | // and some extra information will be filled based on
38 | // different event type.
39 | //
40 | // The operations can be or-ed together to represent
41 | // set of events for notification.
42 | type Op uint64
43 |
44 | const (
45 | OpCreate = Op(1 << iota)
46 | OpMkdir
47 | OpMknod
48 | OpDelete
49 | OpRmdir
50 | OpRename
51 | OpAttrib
52 | OpLink
53 | OpSymlink
54 |
55 | OpAll = OpCreate | OpMkdir | OpDelete | OpRmdir |
56 | OpRename | OpAttrib | OpLink | OpSymlink
57 | )
58 |
59 | // Attr indicates valid fields in the attribute event.
60 | //
61 | // These fields are or-ed together to represent the set
62 | // of fields that has been updated by the event.
63 | type Attr uint32
64 |
65 | const (
66 | AttrMode = Attr(1 << iota)
67 | AttrUID
68 | AttrGID
69 | )
70 |
71 | // eventRaw is the trasported event on linux waiting
72 | // to be dispatched to master.
73 | //
74 | // Master should translate and lookup using the source
75 | // and target path and translate file events.
76 | type eventRaw struct {
77 | op Op
78 | timestamp time.Time
79 | pid uint32
80 | source path
81 | target path
82 | attr Attr
83 | mode *uint16
84 | dev *uint32
85 | symlink *string
86 | uid *uint32
87 | gid *uint32
88 | }
89 |
90 | // eventRegistry is the common registry holding
91 | // information used for later notification.
92 | type eventRegistry struct {
93 | event eventRaw
94 | targetInode uint64
95 | sourceInode uint64
96 | }
97 |
98 | // Event is standard format of linux directory event.
99 | type Event struct {
100 | Op Op
101 | Timestamp time.Time
102 | PID uint32
103 | Target *string
104 | Source *string
105 | Attr Attr
106 | Mode *uint16
107 | Dev *uint32
108 | Uid *uint32
109 | Gid *uint32
110 | }
111 |
112 | // dispatchPolicy stores information for the dispatcher,
113 | // including the file name corresponding to inode, and
114 | // its related flags.
115 | //
116 | // while dispatching events, the subscriber works in a
117 | // hierarchical manner, the dispatch policy nearer to
118 | // the leaf will be applied first.
119 | type dispatchPolicy struct {
120 | name string
121 | opFlags Op
122 | }
123 |
124 | // subscriber stores the information for dispatching
125 | // the subscribed events to the subscriber.
126 | type subscriber struct {
127 | ctx context.Context
128 | done *uint8
129 | allOpFlags Op
130 | eventCh chan<- Event
131 | policies map[uint64]dispatchPolicy
132 | }
133 |
134 | // composeSuffix composes the suffix with path.
135 | func composeSuffix(components []string) string {
136 | size := len(components)
137 | result := make([]string, size)
138 | for j := 0; j < size; j++ {
139 | result[j] = components[size-j-1]
140 | }
141 | return filepath.Join(result...)
142 | }
143 |
144 | // evaluatePathPolicy attempts to evaluate the path and
145 | // calculate the policies for the path.
146 | func (s *subscriber) evaluatePathPolicy(p path) (*string, Op) {
147 | paths, inodes := p.extract()
148 | for i, inode := range inodes {
149 | if inode == 0 {
150 | continue
151 | }
152 | policy, ok := s.policies[inode]
153 | if !ok {
154 | continue
155 | }
156 | targetSuffix := composeSuffix(paths[:i])
157 | result := new(string)
158 | *result = filepath.Join(policy.name, targetSuffix)
159 | return result, policy.opFlags
160 | }
161 | return nil, Op(0)
162 | }
163 |
164 | // dispatch is the handler for dispatching event
165 | // to receivers.
166 | func (s *subscriber) dispatch(
167 | rawEvent eventRaw, visited *uint8,
168 | ) {
169 | if s.done == visited {
170 | // The event has already been dispatched,
171 | // so we won't dispatch it again here.
172 | return
173 | }
174 | s.done = visited
175 | if s.allOpFlags&rawEvent.op == 0 {
176 | return
177 | }
178 |
179 | // Prepare the base for the new dispatching of
180 | // specified file event.
181 | var event Event
182 | event.Op = rawEvent.op
183 | event.PID = rawEvent.pid
184 | event.Timestamp = rawEvent.timestamp
185 | event.Attr = rawEvent.attr
186 | event.Mode = rawEvent.mode
187 | event.Dev = rawEvent.dev
188 | event.Uid = rawEvent.uid
189 | event.Gid = rawEvent.gid
190 | event.Source = rawEvent.symlink
191 |
192 | // Compose the path parameters for the event.
193 | var opFlags, allFlags Op
194 | event.Target, allFlags = s.evaluatePathPolicy(rawEvent.target)
195 | switch rawEvent.op {
196 | case OpRename, OpLink:
197 | event.Source, opFlags = s.evaluatePathPolicy(rawEvent.source)
198 | allFlags |= opFlags
199 | case OpSymlink:
200 | event.Source = rawEvent.symlink
201 | }
202 |
203 | // Dispatch the collected event to subscriber.
204 | if allFlags&rawEvent.op == 0 {
205 | return
206 | }
207 | select {
208 | case <-s.ctx.Done():
209 | case s.eventCh <- event:
210 | }
211 | }
212 |
213 | // collector is the collector for the linux file related
214 | // events. It keeps track of file state registries and
215 | // will periodically perform cleanup.
216 | type collector struct {
217 | registries map[uint32]*eventRegistry
218 | dispatchMap *sync.Map
219 | }
220 |
221 | // allocateRename will attempt to allocate a new
222 | // or previously existing registry for rename.
223 | func (col *collector) allocateRename(
224 | taskPID uint32, event entrySecurityInodeRename,
225 | ) *eventRegistry {
226 | registry := col.registries[taskPID]
227 | if registry != nil {
228 | if registry.event.op != OpRename ||
229 | registry.sourceInode != event.SrcDir ||
230 | registry.targetInode != event.DstDir {
231 | delete(col.registries, taskPID)
232 | registry = nil
233 | }
234 | }
235 | if registry == nil {
236 | registry = &eventRegistry{
237 | event: eventRaw{
238 | op: OpRename,
239 | },
240 | sourceInode: event.SrcDir,
241 | targetInode: event.DstDir,
242 | }
243 | col.registries[taskPID] = registry
244 | }
245 | return registry
246 | }
247 |
248 | // handleRenameSource handles the event triggered
249 | // when renaming the file and is captured by our
250 | // trace probe.
251 | //
252 | // security_inode_rename(sourcePath, &dentry{
253 | // = Source,
254 | // }, targetPath, ...)
255 | func (col *collector) handleRenameSource(
256 | event entrySecurityInodeRenameSource,
257 | ) {
258 | registry := col.allocateRename(
259 | event.TaskPID, event.Event)
260 | registry.event.source = event.Source
261 | }
262 |
263 | // handleRenameTarget handles the event triggered
264 | // when renaming the file and is captured by our
265 | // trace probe.
266 | //
267 | // security_inode_rename(sourcePath, ...,
268 | // targetPath, &dentry{
269 | // = Target,
270 | // })
271 | func (col *collector) handleRenameTarget(
272 | event entrySecurityInodeRenameTarget,
273 | ) {
274 | registry := col.allocateRename(
275 | event.TaskPID, event.Event)
276 | registry.event.target = event.Target
277 | }
278 |
279 | // handleCreate handles the event triggered when
280 | // creating a file and is captured by our trace probe.
281 | //
282 | // security_inode_create(targetInode, &dentry{
283 | // = targetPath,
284 | // }, mode, dev)
285 | func (col *collector) handleCreate(
286 | event entrySecurityInodeCreate,
287 | ) {
288 | registry := &eventRegistry{
289 | event: eventRaw{
290 | op: OpCreate,
291 | mode: new(uint16),
292 | target: event.Path,
293 | },
294 | targetInode: event.Dir,
295 | }
296 | *registry.event.mode = event.Mode
297 | col.registries[event.TaskPID] = registry
298 | }
299 |
300 | // handleMknod handles the event triggered when
301 | // creating a device and is captured by our
302 | // trace probe.
303 | //
304 | // security_inode_mknod(targetInode, &dentry{
305 | // = targetPath,
306 | // }, mode, dev)
307 | func (col *collector) handleMknod(
308 | event entrySecurityInodeMknod,
309 | ) {
310 | registry := &eventRegistry{
311 | event: eventRaw{
312 | op: OpMknod,
313 | mode: new(uint16),
314 | dev: new(uint32),
315 | target: event.Path,
316 | },
317 | targetInode: event.Dir,
318 | }
319 | *registry.event.mode = event.Mode
320 | *registry.event.dev = event.Dev
321 | col.registries[event.TaskPID] = registry
322 | }
323 |
324 | // handleMkdir handles the event triggered when
325 | // creating a direcotry and is captured by our
326 | // trace probe.
327 | //
328 | // security_inode_mkdir(targetInode, &dentry{
329 | // = targetPath,
330 | // }, mode)
331 | func (col *collector) handleMkdir(
332 | event entrySecurityInodeMkdir,
333 | ) {
334 | registry := &eventRegistry{
335 | event: eventRaw{
336 | op: OpMkdir,
337 | mode: new(uint16),
338 | target: event.Path,
339 | },
340 | targetInode: event.Dir,
341 | }
342 | *registry.event.mode = event.Mode
343 | col.registries[event.TaskPID] = registry
344 | }
345 |
346 | // allocateLink will attempt to allocate a new
347 | // or previously existing registry for link.
348 | func (col *collector) allocateLink(
349 | taskPID uint32, event entrySecurityInodeLink,
350 | ) *eventRegistry {
351 | registry := col.registries[taskPID]
352 | if registry != nil {
353 | if registry.event.op != OpLink ||
354 | registry.targetInode != event.Dir {
355 | delete(col.registries, taskPID)
356 | registry = nil
357 | }
358 | }
359 | if registry == nil {
360 | registry = &eventRegistry{
361 | event: eventRaw{
362 | op: OpLink,
363 | },
364 | targetInode: event.Dir,
365 | }
366 | col.registries[taskPID] = registry
367 | }
368 | return registry
369 | }
370 |
371 | // handleLinkSource handles the event triggered
372 | // when creating hard link of the file and is captured
373 | // by our trace probe.
374 | //
375 | // security_inode_link(source, &dentry{
376 | // = Dir,
377 | // }, ...)
378 | func (col *collector) handleLinkSource(
379 | event entrySecurityInodeLinkSource,
380 | ) {
381 | registry := col.allocateLink(
382 | event.TaskPID, event.Event)
383 | registry.event.source = event.Source
384 | }
385 |
386 | // handleRenameTarget handles the event triggered
387 | // when creating hard link of the file and is captured
388 | // by our trace probe.
389 | //
390 | // security_inode_link(..., &dentry{
391 | // = Target,
392 | // }, target)
393 | func (col *collector) handleLinkTarget(
394 | event entrySecurityInodeLinkTarget,
395 | ) {
396 | registry := col.allocateLink(
397 | event.TaskPID, event.Event)
398 | registry.event.target = event.Target
399 | }
400 |
401 | // handleSymlink handles the event triggered when
402 | // creating soft link of the file and is captured by
403 | // our trace probe.
404 | //
405 | // security_inode_symlink(targetInode, &dentry{
406 | // = Target,
407 | // }, source)
408 | func (col *collector) handleSymlink(
409 | event entrySecurityInodeSymlink,
410 | ) {
411 | registry := &eventRegistry{
412 | event: eventRaw{
413 | op: OpSymlink,
414 | target: event.Path,
415 | symlink: new(string),
416 | },
417 | targetInode: event.Dir,
418 | }
419 | *registry.event.symlink = event.Name
420 | col.registries[event.TaskPID] = registry
421 | }
422 |
423 | // handleUnlink handles the event triggered when
424 | // removing a file and is captured by our trace probe.
425 | //
426 | // security_inode_unlink(targetInode, &dentry{
427 | // = targetPath,
428 | // })
429 | func (col *collector) handleUnlink(
430 | event entrySecurityInodeUnlink,
431 | ) {
432 | registry := &eventRegistry{
433 | event: eventRaw{
434 | op: OpDelete,
435 | target: event.Path,
436 | },
437 | targetInode: event.Path.I0,
438 | }
439 | col.registries[event.TaskPID] = registry
440 | }
441 |
442 | // handleRmdir handles the event triggered when
443 | // removing a direcotry and is captured by our
444 | // trace probe.
445 | //
446 | // security_inode_rmdir(targetInode, &dentry{
447 | // = targetPath,
448 | // })
449 | func (col *collector) handleRmdir(
450 | event entrySecurityInodeRmdir,
451 | ) {
452 | registry := &eventRegistry{
453 | event: eventRaw{
454 | op: OpRmdir,
455 | target: event.Path,
456 | },
457 | targetInode: event.Path.I0,
458 | }
459 | col.registries[event.TaskPID] = registry
460 | }
461 |
462 | // handleSetattr handles the event triggered when
463 | // updating a file attributes and is captured by
464 | // our trace probe.
465 | //
466 | // security_inode_setattr(&dentry{
467 | // = targetPath,
468 | // }, &iattr{
469 | // ia_mode = Mode,
470 | // ia_uid = Uid,
471 | // ia_gid = Gid,
472 | // })
473 | func (col *collector) handleSetattr(
474 | event entrySecurityInodeSetattr,
475 | ) {
476 | registry := &eventRegistry{
477 | event: eventRaw{
478 | op: OpAttrib,
479 | target: event.Path,
480 | },
481 | targetInode: event.Path.I0,
482 | }
483 | if Attr(event.Valid)&AttrMode != 0 {
484 | registry.event.attr |= AttrMode
485 | registry.event.mode = new(uint16)
486 | *registry.event.mode = event.Mode
487 | }
488 | if Attr(event.Valid)&AttrUID != 0 {
489 | registry.event.attr |= AttrUID
490 | registry.event.uid = new(uint32)
491 | *registry.event.uid = event.Uid
492 | }
493 | if Attr(event.Valid)&AttrGID != 0 {
494 | registry.event.attr |= AttrGID
495 | registry.event.gid = new(uint32)
496 | *registry.event.gid = event.Gid
497 | }
498 | if registry.event.attr == 0 {
499 | // No event we are interested, so we will
500 | // just skip reporting the events.
501 | return
502 | }
503 | col.registries[event.TaskPID] = registry
504 | }
505 |
506 | // handleFsnotify_V2_6_32 handles the fsnotify event
507 | // from 2.6.32 (inclusive) to 5.9 (exclusive).
508 | func (col *collector) handleFsnotify_V2_6_32(
509 | event entryFsnotify_V2_6_32,
510 | ) {
511 | if _, ok := col.registries[event.TaskPID]; !ok {
512 | return
513 | }
514 | col.handleFsnotify(eventFsnotify{
515 | TaskPID: event.TaskPID,
516 | Timestamp: event.Timestamp,
517 | Inode: uint64(event.Inode),
518 | Access: event.Access,
519 | ModifyAttrib: event.ModifyAttrib,
520 | CloseOpen: event.CloseOpen,
521 | Dentry: event.Dentry,
522 | Filename: event.Filename,
523 | })
524 | }
525 |
526 | // handleFsnotify_V5_9 handles the fsnotify event
527 | // from 5.9 (inclusive) to now.
528 | func (col *collector) handleFsnotify_V5_9(
529 | event entryFsnotify_V5_9,
530 | ) {
531 | if _, ok := col.registries[event.TaskPID]; !ok {
532 | return
533 | }
534 | baseEvent := eventFsnotify{
535 | TaskPID: event.TaskPID,
536 | Timestamp: event.Timestamp,
537 | Access: event.Access,
538 | ModifyAttrib: event.ModifyAttrib,
539 | CloseOpen: event.CloseOpen,
540 | Dentry: event.Dentry,
541 | Filename: event.Filename,
542 | Visited: new(uint8),
543 | }
544 | if event.Inode != 0 {
545 | baseEvent.Inode = uint64(event.Inode)
546 | col.handleFsnotify(baseEvent)
547 | }
548 | if event.Dir != 0 {
549 | baseEvent.Inode = uint64(event.Dir)
550 | col.handleFsnotify(baseEvent)
551 | }
552 | }
553 |
554 | // handleFsnotifyParent_V5_9 handles the fsnotify
555 | // parent event from 5.9 (inclusive) to now.
556 | func (col *collector) handleFsnotifyParent_V5_9(
557 | event entryFsnotifyParent_V5_9,
558 | ) {
559 | if _, ok := col.registries[event.TaskPID]; !ok {
560 | return
561 | }
562 | col.handleFsnotify(eventFsnotify{
563 | TaskPID: event.TaskPID,
564 | Timestamp: event.Timestamp,
565 | Inode: uint64(event.Inode),
566 | Access: event.Access,
567 | ModifyAttrib: event.ModifyAttrib,
568 | CloseOpen: event.CloseOpen,
569 | Dentry: event.Dentry,
570 | Filename: event.Filename,
571 | })
572 | }
573 |
574 | // handleFsnotify handles the event triggered when
575 | // fsnotify dispatch call is invoked and is captured
576 | // by our trace probe.
577 | func (col *collector) handleFsnotify(
578 | event eventFsnotify,
579 | ) {
580 | registry := col.registries[event.TaskPID]
581 | if registry == nil {
582 | return
583 | }
584 |
585 | // Judge whether it is dispatch condition.
586 | switch registry.event.op {
587 | case OpSymlink:
588 | fallthrough
589 | case OpLink:
590 | fallthrough
591 | case OpCreate, OpMkdir, OpMknod:
592 | switch event.Dentry {
593 | case 4:
594 | if event.Inode != registry.targetInode {
595 | return
596 | }
597 | default:
598 | return
599 | }
600 | case OpAttrib:
601 | switch event.ModifyAttrib {
602 | case 2:
603 | if event.Inode != registry.targetInode {
604 | return
605 | }
606 | default:
607 | return
608 | }
609 | case OpDelete, OpRmdir:
610 | switch {
611 | case event.ModifyAttrib == 2:
612 | fallthrough
613 | case event.Dentry == 16:
614 | if event.Inode != registry.targetInode {
615 | return
616 | }
617 | default:
618 | return
619 | }
620 | case OpRename:
621 | switch event.Dentry {
622 | case 1:
623 | if event.Inode != registry.sourceInode {
624 | return
625 | }
626 | case 2:
627 | if event.Inode != registry.targetInode {
628 | return
629 | }
630 | default:
631 | return
632 | }
633 | default:
634 | return
635 | }
636 |
637 | // Dispatch the stored event at this point.
638 | delete(col.registries, event.TaskPID)
639 | registry.event.pid = event.TaskPID
640 | registry.event.timestamp = event.Timestamp
641 | _, targetInodes := registry.event.target.extract()
642 | for _, inode := range targetInodes {
643 | if subs, ok := col.dispatchMap.Load(inode); ok {
644 | for _, sub := range subs.([]*subscriber) {
645 | sub.dispatch(registry.event, event.Visited)
646 | }
647 | }
648 | }
649 | if registry.event.op&(OpRename|OpLink) != 0 {
650 | _, sourceInodes := registry.event.source.extract()
651 | for _, inode := range sourceInodes {
652 | if subs, ok := col.dispatchMap.Load(inode); ok {
653 | for _, sub := range subs.([]*subscriber) {
654 | sub.dispatch(registry.event, event.Visited)
655 | }
656 | }
657 | }
658 | }
659 | }
660 |
661 | // Watcher is the subscription of the dispatch info.
662 | type Watcher struct {
663 | C <-chan Event
664 | mgr *Manager
665 | cancel context.CancelFunc
666 | sub *subscriber
667 | inodes []*inode.Inode
668 | once sync.Once
669 | }
670 |
671 | func (s *Watcher) Close() {
672 | s.cancel()
673 | s.once.Do(func() {
674 | s.mgr.evict(s.sub)
675 | for _, inode := range s.inodes {
676 | inode.Unpin()
677 | }
678 | s.inodes = nil
679 | })
680 | runtime.SetFinalizer(s, nil)
681 | }
682 |
683 | // Manager is the manager for all directory events.
684 | type Manager struct {
685 | ctx context.Context
686 | mtx sync.Mutex
687 | subsets map[uint64]map[*subscriber]struct{}
688 | dispatchMap *sync.Map
689 | inodeMgr *inode.Manager
690 | }
691 |
692 | func (m *Manager) evict(sub *subscriber) {
693 | m.mtx.Lock()
694 | defer m.mtx.Unlock()
695 | for inode := range sub.policies {
696 | subset, ok := m.subsets[inode]
697 | if !ok {
698 | continue
699 | }
700 | delete(subset, sub)
701 | if len(subset) == 0 {
702 | delete(m.subsets, inode)
703 | m.dispatchMap.Delete(inode)
704 | }
705 | var remainings []*subscriber
706 | for remaining := range subset {
707 | remainings = append(remainings, remaining)
708 | }
709 | m.dispatchMap.Store(inode, remainings)
710 | }
711 | }
712 |
713 | func (m *Manager) emplace(sub *subscriber) {
714 | m.mtx.Lock()
715 | defer m.mtx.Unlock()
716 | for inode := range sub.policies {
717 | subset, ok := m.subsets[inode]
718 | if !ok {
719 | subset = make(map[*subscriber]struct{})
720 | m.subsets[inode] = subset
721 | }
722 | subset[sub] = struct{}{}
723 | var updated []*subscriber
724 | for item := range subset {
725 | updated = append(updated, item)
726 | }
727 | m.dispatchMap.Store(inode, updated)
728 | }
729 | }
730 |
731 | type watchPoint struct {
732 | name string
733 | watch func(*inode.Manager) (*inode.Inode, error)
734 | opFlags Op
735 | }
736 |
737 | type option struct {
738 | watchPoints []watchPoint
739 | }
740 |
741 | // Option is the options for creating watcher.
742 | type Option func(*option)
743 |
744 | // WatchFile specifies a file for watching.
745 | func WatchFile(opFlags Op, file string) Option {
746 | return func(opt *option) {
747 | opt.watchPoints = append(opt.watchPoints, watchPoint{
748 | name: file,
749 | watch: func(mgr *inode.Manager) (*inode.Inode, error) {
750 | return mgr.PinFile(file)
751 | },
752 | opFlags: opFlags,
753 | })
754 | }
755 | }
756 |
757 | // WatchDir specifies a directory for watching.
758 | func WatchDir(opFlags Op, dir string) Option {
759 | return func(opt *option) {
760 | opt.watchPoints = append(opt.watchPoints, watchPoint{
761 | name: dir,
762 | watch: func(mgr *inode.Manager) (*inode.Inode, error) {
763 | return mgr.PinDir(dir)
764 | },
765 | opFlags: opFlags,
766 | })
767 | }
768 | }
769 |
770 | // WithOptions aggregates a set of options for execution.
771 | func WithOptions(opts ...Option) Option {
772 | return func(option *option) {
773 | for _, opt := range opts {
774 | opt(option)
775 | }
776 | }
777 | }
778 |
779 | // Watch with specified options and returns error.
780 | func (mgr *Manager) Watch(opts ...Option) (*Watcher, error) {
781 | var option option
782 | WithOptions(opts...)(&option)
783 |
784 | // Attempt to create pins for specified watchers.
785 | created := false
786 | ctx, cancel := context.WithCancel(mgr.ctx)
787 | defer func() {
788 | if !created {
789 | cancel()
790 | }
791 | }()
792 | eventCh := make(chan Event)
793 | subscriber := &subscriber{
794 | ctx: ctx,
795 | done: new(uint8),
796 | eventCh: eventCh,
797 | policies: make(map[uint64]dispatchPolicy),
798 | }
799 | result := &Watcher{
800 | C: eventCh,
801 | mgr: mgr,
802 | cancel: cancel,
803 | sub: subscriber,
804 | }
805 | for _, watchPoint := range option.watchPoints {
806 | pin, err := watchPoint.watch(mgr.inodeMgr)
807 | if err != nil {
808 | return nil, err
809 | }
810 | defer func() {
811 | if !created {
812 | pin.Unpin()
813 | }
814 | }()
815 | subscriber.policies[pin.Inode()] = dispatchPolicy{
816 | name: watchPoint.name,
817 | opFlags: watchPoint.opFlags,
818 | }
819 | subscriber.allOpFlags |= watchPoint.opFlags
820 | result.inodes = append(result.inodes, pin)
821 | }
822 |
823 | // Attempt to emplace all the modifications to map
824 | // and return the result.
825 | defer func() {
826 | if !created {
827 | mgr.evict(subscriber)
828 | }
829 | }()
830 | mgr.emplace(subscriber)
831 | runtime.SetFinalizer(result, func(value *Watcher) {
832 | value.Close()
833 | })
834 | created = true
835 | return result, nil
836 | }
837 |
838 | // stackRcnotifyManager will attempt to create a rcnotify
839 | // manager and stack it for later operations.
840 | func stackRcnotifyManager(
841 | next func(*Manager) error,
842 | rootCtx context.Context, manager systracer.Manager,
843 | inodeMgr *inode.Manager,
844 | ) error {
845 | dispatchMap := new(sync.Map)
846 | result := &Manager{
847 | ctx: rootCtx,
848 | subsets: make(map[uint64]map[*subscriber]struct{}),
849 | dispatchMap: dispatchMap,
850 | inodeMgr: inodeMgr,
851 | }
852 | collector := &collector{
853 | registries: make(map[uint32]*eventRegistry),
854 | dispatchMap: dispatchMap,
855 | }
856 |
857 | // Attach to the fsnotify dispatcher first.
858 | var fsnotifyHandler interface{}
859 | fsnotifyHandler = collector.handleFsnotify_V2_6_32
860 | if kversion.Current >= kversion.Must("5.9") {
861 | fsnotifyHandler = collector.handleFsnotify_V5_9
862 | }
863 | fsnotify, _, err := manager.TraceKProbe(
864 | "fsnotify", fsnotifyHandler)
865 | if err != nil {
866 | return err
867 | }
868 | defer fsnotify.Close()
869 |
870 | // There's also fsnotify parent handler for those
871 | // version >= 5.9, we will also register them here.
872 | var fsnotifyParent systracer.Trace
873 | if kversion.Current >= kversion.Must("5.9") {
874 | fsnotifyParent, _, err = manager.TraceKProbe(
875 | "__fsnotify_parent",
876 | collector.handleFsnotifyParent_V5_9)
877 | if err != nil {
878 | return err
879 | }
880 | defer fsnotifyParent.Close()
881 | }
882 |
883 | // Define a collection of probe points and their
884 | // associated probes for registering.
885 | probes := map[string][]interface{}{
886 | "security_inode_rename": {
887 | collector.handleRenameSource,
888 | collector.handleRenameTarget,
889 | },
890 | "security_inode_create": {
891 | collector.handleCreate,
892 | },
893 | "security_inode_mknod": {
894 | collector.handleMknod,
895 | },
896 | "security_inode_mkdir": {
897 | collector.handleMkdir,
898 | },
899 | "security_inode_link": {
900 | collector.handleLinkSource,
901 | collector.handleLinkTarget,
902 | },
903 | "security_inode_symlink": {
904 | collector.handleSymlink,
905 | },
906 | "security_inode_unlink": {
907 | collector.handleUnlink,
908 | },
909 | "security_inode_rmdir": {
910 | collector.handleRmdir,
911 | },
912 | "security_inode_setattr": {
913 | collector.handleSetattr,
914 | },
915 | }
916 | var lastSyncCh <-chan struct{}
917 | var registries []systracer.Trace
918 | for point, handlers := range probes {
919 | for _, handler := range handlers {
920 | registry, syncCh, err := manager.
921 | TraceKProbe(point, handler)
922 | if err != nil {
923 | return errors.Wrapf(err,
924 | "initializing %s", handler)
925 | }
926 | defer registry.Close()
927 | lastSyncCh = syncCh
928 | registries = append(registries, registry)
929 | }
930 | }
931 |
932 | // Wait for synchronization of the kprobe registry.
933 | select {
934 | case <-rootCtx.Done():
935 | return nil
936 | case <-lastSyncCh:
937 | }
938 | fsnotify.SetEnabled(true)
939 | if fsnotifyParent != nil {
940 | fsnotifyParent.SetEnabled(true)
941 | }
942 | for _, registry := range registries {
943 | registry.SetEnabled(true)
944 | }
945 | return next(result)
946 | }
947 |
948 | // Module is the DI module of the rcnotify manager.
949 | //
950 | // The module requires a context, a trace manager and
951 | // an inode manager, and injects a rcnotify manager.
952 | var Module = shaft.Stack(stackRcnotifyManager)
953 |
--------------------------------------------------------------------------------
/manager.go:
--------------------------------------------------------------------------------
1 | package systracer
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "fmt"
7 | "io/ioutil"
8 | "os"
9 | "path/filepath"
10 | "reflect"
11 | "strconv"
12 | "sync"
13 | "syscall"
14 | "time"
15 | "unsafe"
16 |
17 | "github.com/pkg/errors"
18 | "go.uber.org/zap"
19 | "golang.org/x/sync/errgroup"
20 | "golang.org/x/sys/unix"
21 |
22 | "github.com/chaitin/systracer/pkg/alloc"
23 | "github.com/chaitin/systracer/pkg/kversion"
24 | )
25 |
26 | // epollNotWorking indicates whether there's support for
27 | // polling tracing pipe with epoll.
28 | //
29 | // XXX: on linux version 3.10, the epoll will fail to
30 | // generate edge trigger event for tracefs files, rendering
31 | // the trace to be not working.
32 | //
33 | // To prevent so, if the polling is not working, we will
34 | // enforce it to always read something from the buffer.
35 | var epollNotWorking = kversion.Current < kversion.Must("3.11")
36 |
37 | // traceCreateRequest is the request for creating an
38 | // instance of trace, and wait for creation completion.
39 | type traceCreateRequest struct {
40 | handle *traceHandle
41 | err error
42 | handler interface{}
43 | desc *traceEventDescriptor
44 | typ string
45 | tracepoint string
46 | doneCh chan struct{}
47 | syncCh <-chan struct{}
48 | }
49 |
50 | // createTrace is the request to create a trace object
51 | // with the dispatched request.
52 | func (mgr *traceManager) createTrace(
53 | typ, tracepoint string,
54 | handler interface{}, desc *traceEventDescriptor,
55 | ) (Trace, <-chan struct{}, error) {
56 | req := &traceCreateRequest{
57 | handler: handler,
58 | desc: desc,
59 | typ: typ,
60 | tracepoint: tracepoint,
61 | doneCh: make(chan struct{}),
62 | }
63 | select {
64 | case <-mgr.rootCtx.Done():
65 | return nil, nil, mgr.rootCtx.Err()
66 | case mgr.createCh <- req:
67 | }
68 |
69 | select {
70 | case <-mgr.rootCtx.Done():
71 | return nil, nil, mgr.rootCtx.Err()
72 | case <-req.doneCh:
73 | var handle Trace
74 | if req.handle != nil {
75 | handle = req.handle
76 | }
77 | return handle, req.syncCh, req.err
78 | }
79 | }
80 |
81 | // traceManager implements tracing.TraceManager.
82 | type traceManager struct {
83 | rootCtx context.Context
84 | lastErr error
85 | createCh chan *traceCreateRequest
86 | fetchCh chan *fetchWriterStateRequest
87 | }
88 |
89 | // cleanupNamespace cleans up specified namespace.
90 | func cleanupNamespace(log *zap.SugaredLogger, root, namespace string) {
91 | logger := log.With(
92 | zap.String("root", root),
93 | zap.String("namespace", namespace),
94 | )
95 | if err := removeAllProbe(
96 | root, "kprobe_events", namespace); err != nil {
97 | logger.Infof("remove kprobes: %s", err)
98 | }
99 | if err := removeAllProbe(
100 | root, "uprobe_events", namespace); err != nil {
101 | logger.Infof("remove uprobes: %s", err)
102 | }
103 | if err := removeInstance(root, namespace); err != nil {
104 | logger.Infof("remove instance: %s", err)
105 | }
106 | }
107 |
108 | // traceManagerState holds registry of traces.
109 | type traceManagerState struct {
110 | rootCtx context.Context
111 | root string
112 | namespace string
113 | traceID uint64
114 | registries map[uint64]*traceHandle
115 | enableCh chan *traceEnableRequest
116 | closeCh chan *traceCloseRequest
117 | conditionCh chan *conditionUpdateRequest
118 | syncCh chan struct{}
119 | }
120 |
121 | // destroy will clean up all previously allocated
122 | // instances of traces.
123 | func (s *traceManagerState) destroy() {
124 | for _, registry := range s.registries {
125 | registry.destroy(s.root, s.namespace)
126 | }
127 | }
128 |
129 | // markUnsync is the action to mark current manager state
130 | // is not synchronized with the writer, and must perform
131 | // the synchronization action, returning channel for
132 | // synchronization completion notification.
133 | func (s *traceManagerState) markUnsync() <-chan struct{} {
134 | if s.syncCh == nil {
135 | s.syncCh = make(chan struct{})
136 | }
137 | return s.syncCh
138 | }
139 |
140 | // markSync is the action to mark current manager state
141 | // as up-to-date with the writer thread.
142 | func (s *traceManagerState) markSync() {
143 | if s.syncCh == nil {
144 | return
145 | }
146 | close(s.syncCh)
147 | s.syncCh = nil
148 |
149 | // XXX: the request synchronization is a process of
150 | // copy-on-write, since the number of items in the
151 | // table is far less than data to process.
152 | newRegistries := make(map[uint64]*traceHandle)
153 | for id, handle := range s.registries {
154 | newRegistries[id] = handle
155 | }
156 | s.registries = newRegistries
157 | }
158 |
159 | // handleCreate will handle the request of creation.
160 | func (s *traceManagerState) handleCreate(
161 | request *traceCreateRequest,
162 | ) {
163 | defer close(request.doneCh)
164 | defer func() {
165 | if err := recover(); err != nil {
166 | request.err = errors.Wrap(errors.Errorf(
167 | "handleCreate panics: %s", err),
168 | "allocate trace")
169 | }
170 | }()
171 | request.err = func() error {
172 | // Attempt to allocate a new identity.
173 | newTraceID := alloc.Alloc(s.traceID, 0,
174 | func(id uint64) bool {
175 | return s.registries[id] != nil
176 | })
177 | if newTraceID == 0 {
178 | return errors.Wrap(errors.New(
179 | "no available trace ID"),
180 | "allocate trace")
181 | }
182 | s.traceID = newTraceID
183 |
184 | // Allocate and initialize the trace.
185 | handle := &traceHandle{
186 | id: newTraceID,
187 | createTime: uint64(time.Now().UnixNano()),
188 | ctx: s.rootCtx,
189 | enableCh: s.enableCh,
190 | closeCh: s.closeCh,
191 | conditionCh: s.conditionCh,
192 | handler: request.handler,
193 | desc: request.desc,
194 | typ: request.typ,
195 | }
196 | if err := handle.init(s.root, s.namespace,
197 | request.tracepoint); err != nil {
198 | if err != ErrBadTracePoint {
199 | return errors.Wrap(err,
200 | "initialize trace")
201 | }
202 | return ErrBadTracePoint
203 | }
204 | s.registries[newTraceID] = handle
205 | request.handle = handle
206 | request.syncCh = s.markUnsync()
207 | return nil
208 | }()
209 | }
210 |
211 | // handleEnable will handle the request of start.
212 | func (s *traceManagerState) handleEnable(
213 | request *traceEnableRequest,
214 | ) error {
215 | defer close(request.doneCh)
216 | if request.handle.id == 0 {
217 | return nil
218 | }
219 | return request.handle.setEnabled(
220 | s.root, s.namespace, request.enabled)
221 | }
222 |
223 | // handleRemove will handle the request of deletion.
224 | func (s *traceManagerState) handleRemove(
225 | request *traceCloseRequest,
226 | ) {
227 | defer close(request.doneCh)
228 | if request.handle.id == 0 {
229 | return
230 | }
231 | delete(s.registries, request.handle.id)
232 | request.handle.destroy(s.root, s.namespace)
233 | s.markUnsync()
234 | }
235 |
236 | // handleCondition will handle the request of condition.
237 | func (s *traceManagerState) handleCondition(
238 | request *conditionUpdateRequest,
239 | ) {
240 | defer close(request.doneCh)
241 | if request.handle.id == 0 {
242 | return
243 | }
244 | err := request.handle.updateCondition(
245 | s.root, s.namespace, request.condition)
246 | if err != nil {
247 | request.err = errors.Wrap(err,
248 | "update trace condition")
249 | }
250 | }
251 |
252 | // traceWriterState is the state held on writer thread.
253 | type traceWriterState struct {
254 | registries map[uint64]*traceHandle
255 | baseTime time.Time
256 | baseEpoch time.Duration
257 | logger *zap.SugaredLogger
258 | }
259 |
260 | // fetchWriterStateRequest is an internal request for
261 | // retrieving trace writer state and modify it.
262 | type fetchWriterStateRequest struct {
263 | state *traceWriterState
264 | doneCh chan struct{}
265 | }
266 |
267 | // fetchWriterState requests for the trace writer state.
268 | func (mgr *traceManager) fetchWriterState() (
269 | *traceWriterState, error,
270 | ) {
271 | req := &fetchWriterStateRequest{
272 | doneCh: make(chan struct{}),
273 | }
274 | select {
275 | case <-mgr.rootCtx.Done():
276 | return nil, mgr.rootCtx.Err()
277 | case mgr.fetchCh <- req:
278 | }
279 | select {
280 | case <-mgr.rootCtx.Done():
281 | return nil, mgr.rootCtx.Err()
282 | case <-req.doneCh:
283 | return req.state, nil
284 | }
285 | }
286 |
287 | // pow10 is the series of exponents to 10^exponent values.
288 | var pow10 = [10]uint64{
289 | 1,
290 | 10,
291 | 100,
292 | 1000,
293 | 10000,
294 | 100000,
295 | 1000000,
296 | 10000000,
297 | 100000000,
298 | 1000000000,
299 | }
300 |
301 | // parseSecond parses the number representing the value
302 | // of second (with period dot).
303 | func parseSecond(value []byte) (time.Duration, error) {
304 | dotIndex := bytes.Index(value, []byte("."))
305 | var beforeDot, afterDot []byte
306 | if dotIndex < 0 {
307 | beforeDot = value
308 | afterDot = nil
309 | } else {
310 | beforeDot = value[:dotIndex]
311 | afterDot = value[dotIndex+1:]
312 | }
313 | var result int64
314 |
315 | // Parse the component before the dot.
316 | if len(beforeDot) > 0 {
317 | val, err := strconv.ParseUint(
318 | string(beforeDot), 10, 64)
319 | if err != nil {
320 | return time.Duration(0), err
321 | }
322 | result += int64(val * pow10[9])
323 | }
324 |
325 | // Parse the component after the dot.
326 | if len(afterDot) > 0 {
327 | if len(afterDot) > 9 {
328 | afterDot = afterDot[0:9]
329 | }
330 | val, err := strconv.ParseUint(
331 | string(afterDot), 10, 64)
332 | if err != nil {
333 | return time.Duration(0), err
334 | }
335 | result += int64(val * pow10[9-len(afterDot)])
336 | }
337 |
338 | return time.Duration(result), nil
339 | }
340 |
341 | // handleData will process the input of reader.
342 | //
343 | // This operation will limit the epoch of event and
344 | // produces the end epoch, which will ensures that
345 | // events will not process more than once when used
346 | // under the circumstances like side chain mitigating.
347 | func (s *traceWriterState) handleData(
348 | input []byte, startEpoch time.Duration,
349 | ) time.Duration {
350 | limitEpoch := startEpoch
351 |
352 | // Loop and parse input data.
353 | for len(input) > 0 {
354 | func() {
355 | var err error
356 |
357 | // Skip the current strip of input and find
358 | // the dash character.
359 | if len(input) < 17 || input[16] != '-' {
360 | return
361 | }
362 | input = input[17:]
363 |
364 | // Skip and read the PID sequence.
365 | var taskPID uint32
366 | for i := 0; i < len(input); i++ {
367 | if input[i] == ' ' {
368 | value, err := strconv.ParseUint(
369 | string(input[:i]), 10, 32)
370 | input = input[i+1:]
371 | if err != nil {
372 | s.logger.Debugf(
373 | "parse taskid %q: %s",
374 | string(input[:i]), err)
375 | return
376 | }
377 | taskPID = uint32(value)
378 | break
379 | }
380 | if input[i] < '0' || input[i] > '9' {
381 | return
382 | }
383 | }
384 |
385 | // Skip the central portion of CPUID and IRQ.
386 | input = bytes.TrimLeft(input, " ")
387 | for i := 0; i < len(input); i++ {
388 | if input[i] == ' ' {
389 | input = input[i+1:]
390 | break
391 | }
392 | }
393 | for i := 0; i < len(input); i++ {
394 | if input[i] == ' ' {
395 | input = input[i+1:]
396 | break
397 | }
398 | }
399 |
400 | // Parse the duration since the timepoint of
401 | // start of the boot time. (If this could not
402 | // be completed, the timestamp will be now).
403 | var epoch time.Duration
404 | for i := 0; i < len(input); i++ {
405 | if input[i] == ':' {
406 | epoch, err = parseSecond(
407 | bytes.TrimSpace(input[:i]))
408 | if err != nil {
409 | s.logger.Debugf(
410 | "parse epoch %q: %s",
411 | string(input[:i]), err)
412 | return
413 | }
414 | input = input[i+1:]
415 | break
416 | }
417 | if input[i] == '.' || input[i] == ' ' {
418 | continue
419 | }
420 | if input[i] < '0' || input[i] > '9' {
421 | return
422 | }
423 | }
424 | timestamp := s.baseTime.Add(epoch - s.baseEpoch)
425 |
426 | // Judge whether the event is earlier than the
427 | // limit epoch, and we will just parse and skip
428 | // that event if it happens so.
429 | if limitEpoch != 0 && limitEpoch >= epoch {
430 | return
431 | }
432 | limitEpoch = epoch
433 |
434 | // Read the portion of the message key.
435 | var key []byte
436 | for i := 0; i < len(input); i++ {
437 | if input[i] == ':' {
438 | key = input[:i]
439 | input = input[i+1:]
440 | break
441 | } else if input[i] == '\n' {
442 | key = input[:i]
443 | input = input[i:]
444 | break
445 | }
446 | }
447 | createTime, id := parseProbeName(
448 | bytes.TrimSpace(key))
449 | if id == 0 {
450 | return
451 | }
452 | handle := s.registries[id]
453 | if handle == nil ||
454 | handle.createTime != createTime {
455 | return
456 | }
457 |
458 | // Skip the parenthesis of trace.
459 | input = bytes.TrimLeft(input, " ")
460 | if len(input) > 0 && input[0] == '(' {
461 | for i := 0; i < len(input); i++ {
462 | if input[i] == ')' {
463 | input = input[i+1:]
464 | break
465 | }
466 | }
467 | }
468 | input = bytes.TrimLeft(input, " ")
469 |
470 | // A counter for recording whether the handle
471 | // has been called successfully.
472 | var handleSuccess bool
473 | defer func() {
474 | handle.complete(handleSuccess)
475 | }()
476 | defer func() {
477 | if err := recover(); err != nil {
478 | s.logger.Errorf(
479 | "handle #%d panics: %s",
480 | handle.id, err)
481 | }
482 | }()
483 |
484 | // Attempt to allocate the instance of
485 | // event before we call handler.
486 | argument := reflect.New(handle.desc.typ)
487 | baseEvent := (*Event)(
488 | unsafe.Pointer(argument.Pointer()))
489 | baseEvent.TaskPID = taskPID
490 | baseEvent.Timestamp = timestamp
491 | baseEvent.epoch = epoch
492 |
493 | // Fill the fields in the log event.
494 | offset, err := handle.desc.fill(
495 | argument.Pointer(), input)
496 | input = input[offset:]
497 | if err != nil {
498 | s.logger.Errorf(
499 | "handle #%d errors: %s",
500 | handle.id, err)
501 | return
502 | }
503 |
504 | // If the handle is not enabled, just don't
505 | // invoke the function and return.
506 | if !handle.enabled {
507 | return
508 | }
509 |
510 | // Invoke the function and complete the processing.
511 | f := reflect.ValueOf(handle.handler)
512 | _ = f.Call([]reflect.Value{
513 | reflect.Indirect(argument),
514 | })
515 | handleSuccess = true
516 | }()
517 |
518 | // Seek for the next endline and forward.
519 | index := bytes.Index(input, []byte("\n"))
520 | if index < 0 {
521 | break
522 | }
523 | input = input[index+1:]
524 | }
525 |
526 | return limitEpoch
527 | }
528 |
529 | // maxReadPacketSize is the maximum size allowed for
530 | // the manager reader packet.
531 | const maxReadPacketSize = 10 * 1024 * 1024
532 |
533 | // runReaderThread will execute the reader thread
534 | // with specified pipe and channel.
535 | func (mgr *traceManager) runReaderThread(
536 | tracePipe *os.File, spliceIn, spliceOut int,
537 | sendCh chan<- []byte,
538 | ) error {
539 | var err error
540 | conn, err := tracePipe.SyscallConn()
541 | if err != nil {
542 | return errors.Wrap(err, "syscall connect")
543 | }
544 |
545 | for {
546 | var data []byte
547 | tracePipeConsume := func(fd uintptr) error {
548 | for len(data) < maxReadPacketSize {
549 | // XXX: trace pipe file supports splicing right
550 | // at its initial implementation, and unlike
551 | // its read counterpart, it contains nearly no
552 | // backward goto statement, which reduces its
553 | // chance for triggering known bug in the kernel.
554 | n, err := unix.Splice(
555 | int(fd), nil, spliceOut, nil,
556 | maxReadPacketSize, unix.SPLICE_F_NONBLOCK)
557 | if n > 0 {
558 | // Read and splice next data in buffer.
559 | buf := make([]byte, n)
560 | m, err := syscall.Read(spliceIn, buf)
561 | if err != nil {
562 | return err
563 | }
564 | data = append(data, buf[:m]...)
565 | } else if n == 0 {
566 | // No more data to read now, we will
567 | // just exit and return error.
568 | return syscall.EBADF
569 | } else if err == syscall.EAGAIN ||
570 | err == syscall.EWOULDBLOCK ||
571 | err == syscall.EINTR {
572 | // Current buffer has been emptied,
573 | // now we should perform the action.
574 | return nil
575 | } else {
576 | return err
577 | }
578 | }
579 | return nil
580 | }
581 |
582 | if epollNotWorking {
583 | // If epoll is not working, we will always
584 | // attempt to read from the epoll pipe, this
585 | // requires the minimum limit timeout to be
586 | // non-zero to prevent creating a busy looping.
587 | _ = tracePipeConsume(tracePipe.Fd())
588 | } else {
589 | var innerErr error
590 | if err := conn.Read(func(fd uintptr) bool {
591 | innerErr := tracePipeConsume(fd)
592 | if innerErr != nil {
593 | return true
594 | }
595 | return len(data) > 0
596 | }); err != nil {
597 | // XXX: the error is from standard library,
598 | // internal/poll.ErrFileClosing, the piece
599 | // of code above is provided by standard
600 | // library, so it is safe to do so.
601 | if err.Error() == "use of closed file" {
602 | err = nil
603 | }
604 | return errors.Wrap(err, "read pipe")
605 | }
606 | if innerErr != nil {
607 | return errors.Wrap(innerErr, "read pipe")
608 | }
609 | }
610 |
611 | // Create and copy out buffer, and send data
612 | // back to the manager thread.
613 | select {
614 | case <-mgr.rootCtx.Done():
615 | return nil
616 | case sendCh <- data:
617 | }
618 | }
619 | }
620 |
621 | // synchronizeRegistryRequest is the request communicating
622 | // between the master and writer.
623 | type synchronizeRegistryRequest struct {
624 | registries map[uint64]*traceHandle
625 | }
626 |
627 | // currentSyncRequest retrieve the current request of
628 | // synchronization from the trace manager state.
629 | func (s *traceManagerState) currentSyncRequest() (
630 | request *synchronizeRegistryRequest,
631 | ) {
632 | if s.syncCh == nil {
633 | return nil
634 | }
635 | return &synchronizeRegistryRequest{
636 | registries: s.registries,
637 | }
638 | }
639 |
640 | // minimumTickerInterval is the interval which is the lowest
641 | // frequecy the writer thread could operate on.
642 | var minimumTickerInterval = 50 * time.Millisecond
643 |
644 | // runWriterThread will execute the writer thread for
645 | // processing data from the reader and side chain.
646 | func (mgr *traceManager) runWriterThread(
647 | syncCh <-chan *synchronizeRegistryRequest,
648 | receiveCh <-chan []byte, limitInterval time.Duration,
649 | logger *zap.SugaredLogger,
650 | ) error {
651 | // Writer state for handling the dispatch relation
652 | // of the trace data payload.
653 | state := &traceWriterState{
654 | registries: make(map[uint64]*traceHandle),
655 | logger: logger,
656 | }
657 |
658 | // Initialize the ticker which limits the reader
659 | // production rate.
660 | var tick *time.Ticker
661 | defer func() {
662 | if tick != nil {
663 | tick.Stop()
664 | }
665 | }()
666 |
667 | // Clamp the minimum of timeout to a value
668 | // so that the reader thread will not be trapped
669 | // in a raging busy loop in realtime mode.
670 | if epollNotWorking {
671 | if limitInterval < minimumTickerInterval {
672 | limitInterval = minimumTickerInterval
673 | }
674 | }
675 |
676 | // Must not be too small, or delivering
677 | // time event will iteself brings load.
678 | if limitInterval > minimumTickerInterval {
679 | tick = time.NewTicker(limitInterval)
680 | }
681 |
682 | // Execute the writer thread for handling data
683 | // from the reader thread and side chain.
684 | received := false
685 | for {
686 | // Create the channel of tick flipping and
687 | // reader consuming.
688 | var timerCh <-chan time.Time
689 | var currentReceiveCh <-chan []byte
690 | if tick != nil && received {
691 | timerCh = tick.C
692 | currentReceiveCh = nil
693 | } else {
694 | timerCh = nil
695 | currentReceiveCh = receiveCh
696 | }
697 |
698 | // Wait for the next tick for reception.
699 | select {
700 | case <-mgr.rootCtx.Done():
701 | return nil
702 | case req := <-mgr.fetchCh:
703 | req.state = state
704 | close(req.doneCh)
705 | case data := <-currentReceiveCh:
706 | received = true
707 | _ = state.handleData(data, time.Duration(0))
708 | case <-timerCh:
709 | received = false
710 | case request := <-syncCh:
711 | state.registries = request.registries
712 | }
713 | }
714 | }
715 |
716 | // runMasterThread will execute the master thread
717 | // after the environment has been setup.
718 | func (mgr *traceManager) runMasterThread(
719 | tracePipe *os.File, spliceIn, spliceOut int,
720 | root, namespace string, log *zap.SugaredLogger,
721 | syncCh chan *synchronizeRegistryRequest,
722 | ) error {
723 | defer cleanupNamespace(log, root, namespace)
724 | defer func() { _ = tracePipe.Close() }()
725 | defer func() {
726 | _ = syscall.Close(spliceIn)
727 | _ = syscall.Close(spliceOut)
728 | }()
729 |
730 | // Registries of all available probes.
731 | state := &traceManagerState{
732 | rootCtx: mgr.rootCtx,
733 | root: root,
734 | namespace: namespace,
735 | registries: make(map[uint64]*traceHandle),
736 | enableCh: make(chan *traceEnableRequest),
737 | closeCh: make(chan *traceCloseRequest),
738 | conditionCh: make(chan *conditionUpdateRequest),
739 | }
740 | defer state.destroy()
741 |
742 | // Loop and handle trace manager events.
743 | for {
744 | var currentSyncCh chan<- *synchronizeRegistryRequest
745 | syncRequest := state.currentSyncRequest()
746 | if syncRequest != nil {
747 | currentSyncCh = syncCh
748 | }
749 | select {
750 | case <-mgr.rootCtx.Done():
751 | return nil
752 | case req := <-mgr.createCh:
753 | state.handleCreate(req)
754 | case req := <-state.enableCh:
755 | if err := state.handleEnable(req); err != nil {
756 | log.Errorf(
757 | "cannot enable handle #%d: %s",
758 | req.handle.id, err)
759 | }
760 | case req := <-state.closeCh:
761 | state.handleRemove(req)
762 | case req := <-state.conditionCh:
763 | state.handleCondition(req)
764 | case currentSyncCh <- syncRequest:
765 | state.markSync()
766 | }
767 | }
768 | }
769 |
770 | // newInternal will create an instance of the manager.
771 | func newInternal(
772 | ctx context.Context, group *errgroup.Group, options ...Option,
773 | ) (*traceManager, error) {
774 | var err error
775 | option := newOption()
776 | WithOptions(options...)(option)
777 | logger := option.logger.Named("systracer").Sugar()
778 | root := option.tracefsPath
779 | namespace := option.instanceName
780 |
781 | // Verify that the specified file system is tracefs
782 | // or debugfs, the debugfs directory must have last
783 | // component name of tracing.
784 | var fs unix.Statfs_t
785 | if err := unix.Statfs(root, &fs); err != nil {
786 | return nil, err
787 | }
788 | isValidFileSystem := false
789 | if fs.Type == unix.TRACEFS_MAGIC {
790 | isValidFileSystem = true
791 | } else if fs.Type == unix.DEBUGFS_MAGIC &&
792 | filepath.Base(root) == "tracing" {
793 | isValidFileSystem = true
794 | }
795 | if !isValidFileSystem {
796 | return nil, errors.Errorf(
797 | "invalid file system with magic %x", fs.Type)
798 | }
799 |
800 | // Attempt to clean up previous run pass of manager.
801 | hasCreated := false
802 | cleanupNamespace(logger, root, namespace)
803 | defer func() {
804 | if !hasCreated {
805 | cleanupNamespace(logger, root, namespace)
806 | }
807 | }()
808 |
809 | // Create a new namespace under the specified directory.
810 | if err := unix.Mkdir(filepath.Join(root, "instances",
811 | namespace), 0600); err != nil && err != unix.EEXIST {
812 | return nil, errors.Errorf(
813 | "cannot create instance %q: %s", namespace, err)
814 | }
815 |
816 | // Clear the content of previous trace.
817 | if err = ioutil.WriteFile(filepath.Join(
818 | root, "instances", namespace, "tracing_on"),
819 | []byte("0"), os.FileMode(0600)); err != nil {
820 | return nil, err
821 | }
822 | if err = ioutil.WriteFile(filepath.Join(
823 | root, "instances", namespace, "trace"),
824 | []byte(""), os.FileMode(0600)); err != nil {
825 | return nil, err
826 | }
827 |
828 | // Setup trace data recording parameters.
829 | if err = ioutil.WriteFile(filepath.Join(
830 | root, "instances", namespace, "trace_clock"),
831 | []byte("global"), os.FileMode(0600)); err != nil {
832 | return nil, err
833 | }
834 | traceOptions := []string{
835 | "print-parent", "nosym-offset", "nosym-addr",
836 | "noverbose", "nohex", "nobin", "noblock",
837 | "nostacktrace", "trace_printk", "noftrace-preempt",
838 | "nobranch", "noannotate", "nouserstacktrace",
839 | "nosym-userobj", "noprintk-msg-only",
840 | "context-info", "nolatency-format",
841 | "nosleep-time", "nograph-time",
842 | "norecord-cmd", "norecord-tgid",
843 | "nodisable-on-free", "irq-info",
844 | "nomarkers", "nofunction-trace",
845 | "notest_nop_accept", "notest_nop_reject",
846 | }
847 | for _, traceOption := range traceOptions {
848 | _ = ioutil.WriteFile(filepath.Join(
849 | root, "instances", namespace, "trace_options"),
850 | []byte(traceOption), os.FileMode(0600))
851 | }
852 |
853 | // Re-enable the trace instance after setup.
854 | if err = ioutil.WriteFile(filepath.Join(
855 | root, "instances", namespace, "tracing_on"),
856 | []byte("1"), os.FileMode(0600)); err != nil {
857 | return nil, err
858 | }
859 |
860 | // Attempt to open the trace pipe of the manager.
861 | fd, err := syscall.Open(filepath.Join(
862 | root, "instances", namespace, "trace_pipe"),
863 | syscall.O_RDONLY|syscall.O_NONBLOCK, 0400)
864 | if err != nil {
865 | return nil, err
866 | }
867 | tracePipe := os.NewFile(uintptr(fd), "trace_pipe")
868 | defer func() {
869 | if !hasCreated {
870 | _ = tracePipe.Close()
871 | }
872 | }()
873 |
874 | // Attempt to create splice pipe for reading.
875 | var spliceFd [2]int
876 | if err := syscall.Pipe2(spliceFd[:],
877 | syscall.O_NONBLOCK|syscall.O_CLOEXEC); err != nil {
878 | return nil, err
879 | }
880 | spliceIn, spliceOut := spliceFd[0], spliceFd[1]
881 | defer func() {
882 | if !hasCreated {
883 | _ = syscall.Close(spliceIn)
884 | _ = syscall.Close(spliceOut)
885 | }
886 | }()
887 |
888 | // Attempt update the capacity of the trace pipe to
889 | // increase the capacity of the event tracing.
890 | //
891 | // However the program still works without this
892 | // setup, it is just an optional optimization.
893 | _, _ = unix.FcntlInt(uintptr(spliceOut),
894 | unix.F_SETPIPE_SZ, maxReadPacketSize)
895 |
896 | // Start the new trace manager and return.
897 | receiveCh := make(chan []byte)
898 | syncCh := make(chan *synchronizeRegistryRequest)
899 | manager := &traceManager{
900 | rootCtx: ctx,
901 | createCh: make(chan *traceCreateRequest),
902 | fetchCh: make(chan *fetchWriterStateRequest),
903 | }
904 | group.Go(func() error {
905 | return manager.runMasterThread(
906 | tracePipe, spliceIn, spliceOut,
907 | root, namespace, logger, syncCh)
908 | })
909 | group.Go(func() error {
910 | return manager.runReaderThread(
911 | tracePipe, spliceIn, spliceOut, receiveCh)
912 | })
913 | group.Go(func() error {
914 | return manager.runWriterThread(
915 | syncCh, receiveCh, option.limitInterval, logger)
916 | })
917 | hasCreated = true
918 | return manager, nil
919 | }
920 |
921 | type calibrateEvent struct {
922 | ProbeEvent
923 | Condition `tracing:"Name ~ \"/proc/self/calibrate/*\"`
924 |
925 | Name StringAddr `tracing:"$arg2"`
926 | }
927 |
928 | // New will create an instance of the manager.
929 | func New(
930 | ctx context.Context, group *errgroup.Group, options ...Option,
931 | ) (Manager, error) {
932 | // The implementation will be splitted into two steps,
933 | // first we create the uncalibrated manager, then we
934 | // attempt to calibrate it and return it to caller.
935 | calibrated := false
936 | cancelCtx, cancel := context.WithCancel(ctx)
937 | innerGroup, innerCtx := errgroup.WithContext(cancelCtx)
938 | defer func() {
939 | if !calibrated {
940 | cancel()
941 | _ = innerGroup.Wait()
942 | }
943 | }()
944 | manager, err := newInternal(innerCtx, innerGroup, options...)
945 | if err != nil {
946 | return nil, err
947 | }
948 | state, err := manager.fetchWriterState()
949 | if err != nil {
950 | return nil, err
951 | }
952 | calibrateDone := make(chan struct{})
953 | var calibrateOnce sync.Once
954 | symbols := []string{"vfs_fstatat", "vfs_statx"}
955 | var calibrateErr error
956 | registered := false
957 | for _, symbol := range symbols {
958 | calibrate, _, err := manager.TraceKProbe(symbol, func(
959 | event calibrateEvent,
960 | ) {
961 | str := filepath.Base(event.Name.String)
962 | unixNano, err := strconv.ParseUint(str, 16, 64)
963 | if err != nil {
964 | return
965 | }
966 | baseTime := time.Unix(0, int64(unixNano))
967 | if !state.baseTime.IsZero() {
968 | startNew := baseTime.Add(-event.epoch)
969 | startOld := state.baseTime.Add(-state.baseEpoch)
970 | if startOld.Sub(startNew) > 500*time.Millisecond {
971 | return
972 | }
973 | }
974 | state.baseTime = baseTime
975 | state.baseEpoch = event.epoch
976 | calibrateOnce.Do(func() {
977 | close(calibrateDone)
978 | })
979 | })
980 | if err != nil {
981 | calibrateErr = err
982 | continue
983 | }
984 | registered = true
985 | defer calibrate.Close()
986 | calibrate.SetEnabled(true)
987 | }
988 | if !registered {
989 | return nil, calibrateErr
990 | }
991 | var stat unix.Stat_t
992 | unixNano := time.Now().UnixNano()
993 | _ = unix.Fstatat(unix.AT_FDCWD, fmt.Sprintf(
994 | "/proc/self/calibrate/%x", unixNano), &stat, 0)
995 | select {
996 | case <-ctx.Done():
997 | return nil, ctx.Err()
998 | case <-time.After(5 * time.Second):
999 | return nil, errors.New("calibration timed out")
1000 | case <-calibrateDone:
1001 | }
1002 | group.Go(func() error {
1003 | defer cancel()
1004 | <-innerCtx.Done()
1005 | return innerGroup.Wait()
1006 | })
1007 | calibrated = true
1008 | return manager, nil
1009 | }
1010 |
--------------------------------------------------------------------------------