├── .gitignore ├── bcd_sys_unsupported.go ├── README.md ├── bcd_sys_linux.go ├── LICENSE ├── bcd_test.go ├── tracer_darwin_stub.go ├── examples └── main.go ├── tracer.go └── bcd.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.out 3 | *.swp 4 | *.btt 5 | *.kdev4 6 | examples/main 7 | examples/examples 8 | examples/tracelog 9 | examples/tracedir/* 10 | -------------------------------------------------------------------------------- /bcd_sys_unsupported.go: -------------------------------------------------------------------------------- 1 | // +build !linux arm 2 | 3 | package bcd 4 | 5 | import ( 6 | "errors" 7 | ) 8 | 9 | func gettid() (int, error) { 10 | return 0, errors.New("Gettid() is unsupported on this system") 11 | } 12 | 13 | // Call this function to allow other (non-parent) processes to trace this one. 14 | // 15 | // This is a Linux-specific utility function and is stubbed out on other 16 | // operating systems. 17 | func EnableTracing() error { 18 | return nil 19 | } 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bcd 2 | -- 3 | import "github.com/backtrace-labs/go-bcd" 4 | 5 | Package bcd provides integration with out of process tracers. Using the provided 6 | Tracer interface, applications may invoke tracer execution on demand. Panic and 7 | signal handling integrations are provided. 8 | 9 | The Tracer interface is generic and will support any out of process tracer 10 | implementing it. A default Tracer implementation, which uses the Backtrace I/O 11 | platform, is provided. 12 | 13 | ## Usage 14 | 15 | See the [godoc page](https://godoc.org/github.com/backtrace-labs/go-bcd) for 16 | current documentation; 17 | see [this](https://github.com/backtrace-labs/go-bcd/blob/master/examples/main.go) 18 | for an example application. 19 | -------------------------------------------------------------------------------- /bcd_sys_linux.go: -------------------------------------------------------------------------------- 1 | // +build !arm 2 | 3 | package bcd 4 | 5 | import ( 6 | sys "golang.org/x/sys/unix" 7 | ) 8 | 9 | func gettid() (int, error) { 10 | return sys.Gettid(), nil 11 | } 12 | 13 | // Call this function to allow other (non-parent) processes to trace this one. 14 | // Alternatively, set kernel.yama.ptrace_scope = 0 in 15 | // /etc/sysctl.d/10-ptrace.conf. 16 | // 17 | // This is a Linux-specific utility function. 18 | func EnableTracing() error { 19 | // PR_SET_PTRACER_ANY may be a negative integer constant on some 20 | // systems, so we need to store it in a separate variable to bypass 21 | // Go's const conversion restrictions. 22 | var flag uint64 23 | flag = sys.PR_SET_PTRACER_ANY 24 | 25 | return sys.Prctl(sys.PR_SET_PTRACER, uintptr(flag), 0, 0, 0) 26 | } 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Backtrace I/O, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /bcd_test.go: -------------------------------------------------------------------------------- 1 | package bcd 2 | 3 | import ( 4 | "errors" 5 | "os/exec" 6 | "runtime" 7 | "strconv" 8 | "strings" 9 | "sync" 10 | "sync/atomic" 11 | "testing" 12 | "time" 13 | ) 14 | 15 | type TestTracer struct { 16 | tracer *exec.Cmd 17 | options []string 18 | sleepDuration int 19 | m sync.Mutex 20 | } 21 | 22 | func (t *TestTracer) AddOptions(options []string, v ...string) []string { 23 | if options != nil { 24 | return append(options, v...) 25 | } 26 | 27 | t.m.Lock() 28 | defer t.m.Unlock() 29 | 30 | t.options = append(t.options, v...) 31 | return nil 32 | } 33 | 34 | func (t *TestTracer) AddKV(options []string, key, val string) []string { 35 | return t.AddOptions(options, key+":"+val) 36 | } 37 | 38 | func (t *TestTracer) AddThreadFilter(options []string, tid int) []string { 39 | return t.AddOptions(options, "filter:"+strconv.Itoa(tid)) 40 | } 41 | 42 | func (t *TestTracer) AddFaultedThread(options []string, tid int) []string { 43 | return t.AddOptions(options, "fault:"+strconv.Itoa(tid)) 44 | } 45 | 46 | func (t *TestTracer) AddClassifier(options []string, classifier string) []string { 47 | return t.AddOptions(options, classifier) 48 | } 49 | 50 | func (t *TestTracer) Options() []string { 51 | return t.options 52 | } 53 | 54 | func (t *TestTracer) ClearOptions() { 55 | t.options = nil 56 | } 57 | 58 | func (t *TestTracer) DefaultTraceOptions() *TraceOptions { 59 | return &TraceOptions{ 60 | Faulted: true, 61 | CallerOnly: false, 62 | ErrClassification: true, 63 | Timeout: time.Second * 3} 64 | } 65 | 66 | func (t *TestTracer) Finalize(options []string) *exec.Cmd { 67 | t.m.Lock() 68 | defer t.m.Unlock() 69 | 70 | t.tracer = exec.Command("/bin/sleep", strconv.Itoa(t.sleepDuration)) 71 | return t.tracer 72 | } 73 | 74 | func (t *TestTracer) Put(snapshot []byte) error { 75 | return nil 76 | } 77 | 78 | func (t *TestTracer) PutOnTrace() bool { 79 | return false 80 | } 81 | 82 | func (t *TestTracer) Logf(level LogPriority, format string, v ...interface{}) { 83 | } 84 | 85 | func (t *TestTracer) SetLogLevel(level LogPriority) { 86 | } 87 | 88 | func (t *TestTracer) String() string { 89 | return "TestTracer" 90 | } 91 | 92 | func TestConcurrentRateLimit(t *testing.T) { 93 | tracer := &TestTracer{} 94 | count := new(int64) 95 | var wg sync.WaitGroup 96 | var rateLimit time.Duration = 3 97 | 98 | UpdateConfig(GlobalConfig{ 99 | PanicOnKillFailure: true, 100 | RateLimit: time.Second * rateLimit}) 101 | 102 | ng := 4 103 | timeout := time.After(time.Second * 9) 104 | done := make(chan struct{}, ng) 105 | start := time.Now() 106 | 107 | for i := 0; i < ng; i++ { 108 | wg.Add(1) 109 | 110 | go func() { 111 | for { 112 | select { 113 | case <-done: 114 | wg.Done() 115 | return 116 | default: 117 | } 118 | 119 | if Trace(tracer, nil, nil) == nil { 120 | atomic.AddInt64(count, 1) 121 | } 122 | } 123 | }() 124 | } 125 | 126 | <-timeout 127 | close(done) 128 | wg.Wait() 129 | end := time.Now() 130 | expected := end.Sub(start) / rateLimit 131 | 132 | if *count > int64(expected) { 133 | t.Fatalf("Rate limit exceeded: actual %v, expected %v\n", 134 | *count, expected) 135 | } 136 | } 137 | 138 | func TestTimeout(t *testing.T) { 139 | UpdateConfig(GlobalConfig{RateLimit: 0}) 140 | 141 | tracer := &TestTracer{sleepDuration: 5} 142 | 143 | traceErr := Trace(tracer, nil, &TraceOptions{ 144 | Timeout: time.Second * 4}) 145 | if traceErr == nil { 146 | t.Fatal("Trace timeout failed") 147 | } else if strings.Contains(traceErr.Error(), "execution timed out") == false { 148 | t.Fatalf("Tracer failure not due to timeout (%v)\n", traceErr) 149 | } 150 | 151 | // Use the default tracer timeout (3 seconds -- see above). 152 | traceErr = Trace(tracer, nil, nil) 153 | if traceErr == nil { 154 | t.Fatal("Trace timeout failed") 155 | } else if strings.Contains(traceErr.Error(), "execution timed out") == false { 156 | t.Fatal("Tracer failure not due to timeout:", traceErr) 157 | } 158 | 159 | // We shouldn't timeout with a negative timeout. 160 | traceErr = Trace(tracer, nil, &TraceOptions{ 161 | Timeout: time.Second * -1}) 162 | if traceErr != nil { 163 | t.Fatal("Tracer failed:", traceErr) 164 | } 165 | } 166 | 167 | func TestTrace(t *testing.T) { 168 | var tracer TestTracer 169 | 170 | runtime.LockOSThread() 171 | defer runtime.UnlockOSThread() 172 | 173 | err := errors.New("Trace error") 174 | traceErr := Trace(&tracer, err, &TraceOptions{ 175 | Faulted: true, 176 | CallerOnly: true, 177 | Timeout: time.Second * 30, 178 | ErrClassification: true, 179 | Classifications: []string{"classifier1", "classifier2"}}) 180 | 181 | if traceErr != nil { 182 | t.Fatal("Failed to trace:", traceErr) 183 | } 184 | 185 | if !tracer.tracer.ProcessState.Success() { 186 | t.Fatal("Failed to execute tracer successfully") 187 | } 188 | 189 | expectedSet := map[string]bool{ 190 | "error:"+err.Error(): false, 191 | "*errors.errorString": false, 192 | "classifier1": false, 193 | "classifier2": false} 194 | 195 | if tid, err := gettid(); err == nil { 196 | expectedSet["fault:"+strconv.Itoa(tid)] = false 197 | expectedSet["filter:"+strconv.Itoa(tid)] = false 198 | } 199 | 200 | opns := tracer.Options() 201 | for _, s := range opns { 202 | delete(expectedSet, s) 203 | } 204 | 205 | if len(expectedSet) != 0 { 206 | t.Fatal("Expected options not set:", expectedSet) 207 | } 208 | } 209 | 210 | func pan(t *TestTracer) { 211 | defer Recover(t, false, &TraceOptions{ 212 | CallerOnly: false, 213 | Timeout: time.Second * 30}) 214 | 215 | panic("panic") 216 | } 217 | 218 | func TestRecover(t *testing.T) { 219 | var tracer TestTracer 220 | pan(&tracer) 221 | if !tracer.tracer.ProcessState.Success() { 222 | t.Fatal("Failed to recover from panic and trace") 223 | } 224 | } 225 | -------------------------------------------------------------------------------- /tracer_darwin_stub.go: -------------------------------------------------------------------------------- 1 | // +build darwin 2 | 3 | package bcd 4 | 5 | import ( 6 | "fmt" 7 | "io" 8 | "log" 9 | "net/http" 10 | "os" 11 | "os/exec" 12 | "path/filepath" 13 | "sync" 14 | ) 15 | 16 | type pipes struct { 17 | stdin io.Reader 18 | stderr io.Writer 19 | } 20 | 21 | type uploader struct { 22 | endpoint string 23 | options PutOptions 24 | } 25 | 26 | type BTTracer struct { 27 | // Path to the tracer to invoke. 28 | cmd string 29 | 30 | // Output directory for generated snapshots. 31 | outputDir string 32 | 33 | // Generic options to pass to the tracer. 34 | options []string 35 | 36 | // Prefix for key-value options. 37 | kvp string 38 | 39 | // Delimeter between key and value for key-value options. 40 | kvd string 41 | 42 | // Channel which receives signal notifications. 43 | sigs chan os.Signal 44 | 45 | // The set of signals the tracer will monitor. 46 | ss []os.Signal 47 | 48 | // The pipes to use for tracer I/O. 49 | p pipes 50 | 51 | // Protects tracer state modification. 52 | m sync.RWMutex 53 | 54 | // Logs tracer execution status messages. 55 | logger Log 56 | 57 | // Default trace options to use if none are specified to bcd.Trace(). 58 | defaultTraceOptions TraceOptions 59 | 60 | // The connection information and options used during Put operations. 61 | put uploader 62 | } 63 | 64 | type defaultLogger struct { 65 | logger *log.Logger 66 | level LogPriority 67 | } 68 | 69 | func (d *defaultLogger) Logf(level LogPriority, format string, v ...interface{}) { 70 | } 71 | 72 | func (d *defaultLogger) SetLogLevel(level LogPriority) { 73 | } 74 | 75 | type NewOptions struct { 76 | // If false, system goroutines (i.e. those started and used by the Go 77 | // runtime) are excluded. 78 | IncludeSystemGs bool 79 | } 80 | 81 | // Returns a new object implementing the bcd.Tracer and bcd.TracerSig interfaces 82 | // using the Backtrace debugging platform. 83 | func New(options NewOptions) *BTTracer { 84 | return &BTTracer{} 85 | } 86 | 87 | type PutOptions struct { 88 | // If set to true, tracer results (i.e. generated snapshot files) 89 | // will be unlinked from the filesystem after successful puts. 90 | Unlink bool 91 | 92 | // The http.Client to use for uploading. The default will be used 93 | // if left unspecified. 94 | Client http.Client 95 | 96 | // If set to true, tracer results will be uploaded after each 97 | // successful Trace request. 98 | OnTrace bool 99 | } 100 | 101 | func (t *BTTracer) ConfigurePut(endpoint, token string, options PutOptions) error { 102 | return nil 103 | } 104 | 105 | // See bcd.Tracer.PutOnTrace(). 106 | func (t *BTTracer) PutOnTrace() bool { 107 | return t.put.options.OnTrace 108 | } 109 | 110 | // See bcd.Tracer.Put(). 111 | func (t *BTTracer) Put(snapshot []byte) error { 112 | return nil 113 | } 114 | 115 | // Synchronously uploads snapshots contained in the specified directory. 116 | func (t *BTTracer) PutDir(path string) error { 117 | return nil 118 | } 119 | 120 | func putDirWalk(t *BTTracer) filepath.WalkFunc { 121 | return nil 122 | } 123 | 124 | func (t *BTTracer) putSnapshotFile(path string) error { 125 | return nil 126 | } 127 | 128 | // Sets the executable path for the tracer. 129 | func (t *BTTracer) SetTracerPath(path string) { 130 | } 131 | 132 | // Sets the output path for generated snapshots. 133 | func (t *BTTracer) SetOutputPath(path string, perm os.FileMode) error { 134 | return nil 135 | } 136 | 137 | // Sets the input and output pipes for the tracer. 138 | // Stdout is not redirected; it is instead passed to the 139 | // tracer's Put command. 140 | func (t *BTTracer) SetPipes(stdin io.Reader, stderr io.Writer) { 141 | } 142 | 143 | // Sets the logger for the tracer. 144 | func (t *BTTracer) SetLogger(logger Log) { 145 | } 146 | 147 | // See bcd.Tracer.AddOptions(). 148 | func (t *BTTracer) AddOptions(options []string, v ...string) []string { 149 | return nil 150 | } 151 | 152 | // See bcd.Tracer.AddKV(). 153 | func (t *BTTracer) AddKV(options []string, key, val string) []string { 154 | return options 155 | } 156 | 157 | // See bcd.Tracer.AddThreadFilter(). 158 | func (t *BTTracer) AddThreadFilter(options []string, tid int) []string { 159 | return options 160 | } 161 | 162 | // See bcd.Tracer.AddFaultedThread(). 163 | func (t *BTTracer) AddFaultedThread(options []string, tid int) []string { 164 | return options 165 | } 166 | 167 | // See bcd.Tracer.AddCallerGo(). 168 | func (t *BTTracer) AddCallerGo(options []string, goid int) []string { 169 | return options 170 | } 171 | 172 | // See bcd.Tracer.AddClassifier(). 173 | func (t *BTTracer) AddClassifier(options []string, classifier string) []string { 174 | return options 175 | } 176 | 177 | // See bcd.Tracer.Options(). 178 | func (t *BTTracer) Options() []string { 179 | return t.options 180 | } 181 | 182 | // See bcd.Tracer.ClearOptions(). 183 | func (t *BTTracer) ClearOptions() { 184 | } 185 | 186 | // See bcd.Tracer.DefaultTraceOptions(). 187 | func (t *BTTracer) DefaultTraceOptions() *TraceOptions { 188 | return &t.defaultTraceOptions 189 | } 190 | 191 | // See bcd.Tracer.Finalize(). 192 | func (t *BTTracer) Finalize(options []string) *exec.Cmd { 193 | return nil 194 | } 195 | 196 | func (t *BTTracer) Logf(level LogPriority, format string, v ...interface{}) { 197 | } 198 | 199 | func (t *BTTracer) SetLogLevel(level LogPriority) { 200 | } 201 | 202 | func (t *BTTracer) String() string { 203 | t.m.RLock() 204 | defer t.m.RUnlock() 205 | 206 | return fmt.Sprintf("Command: %s, Options: %v", t.cmd, t.options) 207 | } 208 | 209 | // See bcd.TracerSig.SetSigset(). 210 | func (t *BTTracer) SetSigset(sigs ...os.Signal) { 211 | } 212 | 213 | // See bcd.TracerSig.Sigset(). 214 | func (t *BTTracer) Sigset() []os.Signal { 215 | return []os.Signal(nil) 216 | } 217 | 218 | // See bcd.TracerSig.SetSigchan(). 219 | func (t *BTTracer) SetSigchan(sc chan os.Signal) { 220 | } 221 | 222 | // See bcd.TracerSig.Sigchan(). 223 | func (t *BTTracer) Sigchan() chan os.Signal { 224 | return t.sigs 225 | } 226 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | // +build linux freebsd 2 | 3 | package main 4 | 5 | import ( 6 | "github.com/backtrace-labs/go-bcd" 7 | 8 | "errors" 9 | "fmt" 10 | "os" 11 | "strconv" 12 | "sync" 13 | "syscall" 14 | "time" 15 | ) 16 | 17 | const ( 18 | max_recurse = 2 19 | ) 20 | 21 | var ( 22 | tracer *bcd.BTTracer 23 | wg sync.WaitGroup 24 | ) 25 | 26 | func pan() { 27 | defer bcd.Recover(tracer, false, &bcd.TraceOptions{ 28 | Faulted: true, 29 | CallerOnly: true, 30 | ErrClassification: true, 31 | SpawnedGs: &wg}) 32 | 33 | panic("panic error") 34 | } 35 | 36 | func sig() { 37 | p, err := os.FindProcess(os.Getpid()) 38 | if err != nil { 39 | fmt.Println("error: failed to find process object") 40 | return 41 | } 42 | 43 | p.Signal(syscall.SIGSEGV) 44 | } 45 | 46 | func recurse(depth int, s1 fishface) { 47 | if depth == 0 { 48 | fmt.Println("Sending signal...") 49 | sig() 50 | fmt.Println("Signal recovered successfully") 51 | 52 | fmt.Println("Panicking...") 53 | pan() 54 | fmt.Println("Panic recovered successfully") 55 | 56 | return 57 | } 58 | 59 | a := 10 60 | b := "foo" 61 | var h string 62 | i := "" 63 | 64 | f := make(chan string, 3) 65 | f <- "this" 66 | f <- "is" 67 | f <- "Go" 68 | b = <-f 69 | 70 | c := []int{3, 4, 5} 71 | var d [3]int 72 | g := [3]int{7, 8, 9} 73 | m := [300]string{"test"} 74 | 75 | k := &sarlmons{a: 3, b: 4, c: 5, d: "fish"} 76 | 77 | j := map[string]int{} 78 | for z := 0; z < 300; z++ { 79 | j[strconv.Itoa(z)] = z 80 | } 81 | e := map[string]int{"a": 10, "b": 5} 82 | l := map[sarlmons]string{ 83 | sarlmons{3, 4, 5, "fish"}: "what", 84 | sarlmons{4, 5, 6, "fush"}: "the", 85 | sarlmons{5, 6, 7, "fisheded"}: "chicken", 86 | } 87 | 88 | _, _, _, _, _, _, _, _, _, _, _, _ = a, b, c, d, e, f, g, h, i, k, l, m 89 | 90 | wg.Add(1) 91 | go func() { 92 | defer wg.Done() 93 | 94 | fmt.Println("Requesting trace...") 95 | 96 | err := errors.New("trace-request") 97 | 98 | // Request a trace. TraceOptions are optional -- see pan() 99 | // for an example of use with the default options. 100 | traceErr := bcd.Trace(tracer, err, &bcd.TraceOptions{ 101 | // Note: no (unlimited) timeout. 102 | // Faulted and CallerOnly options don't make sense 103 | // for asynchronous trace requests. See below for a 104 | // synchronous request. 105 | Faulted: false, 106 | CallerOnly: false, 107 | ErrClassification: true, 108 | Classifications: []string{ 109 | "these", "are", "test", "classifiers"}}) 110 | if traceErr != nil { 111 | fmt.Println("Failed to trace: %v", traceErr) 112 | } 113 | 114 | fmt.Println("Done") 115 | }() 116 | 117 | wg.Add(1) 118 | go func() { 119 | defer wg.Done() 120 | 121 | f, err := os.Create("/tmp/dat1") 122 | if err != nil { 123 | panic(err) 124 | } 125 | defer f.Close() 126 | 127 | x := 0 128 | y := map[sarlmons]string{sarlmons{4, 5, 6, "what"}: "stuff"} 129 | 130 | for { 131 | x += 1 132 | f.WriteString(fmt.Sprintf("%d", x)) 133 | f.WriteString(y[sarlmons{4, 5, 6, "what"}]) 134 | f.Sync() 135 | 136 | if x >= 1000 { 137 | rf, err := os.OpenFile( 138 | "/home/someone/rdonlyfile", 139 | os.O_RDWR, 0644) 140 | 141 | if err != nil { 142 | fmt.Println("Requesting trace") 143 | bcd.Trace(tracer, err, &bcd.TraceOptions{ 144 | Faulted: true, 145 | CallerOnly: true, 146 | Timeout: time.Second * 30, 147 | ErrClassification: true}) 148 | break 149 | } 150 | 151 | fmt.Println("Writing to rf") 152 | rf.WriteString("File opened\n") 153 | rf.Sync() 154 | rf.Close() 155 | } 156 | } 157 | 158 | fmt.Println("Done") 159 | }() 160 | 161 | recurse(depth-1, s1) 162 | } 163 | 164 | func start() { 165 | x := &sarlmons{a: 3, b: 4, c: 5, d: "fish"} 166 | x.b += 3 167 | 168 | recurse(max_recurse, x) 169 | } 170 | 171 | func main() { 172 | // On kernels with specific security settings enabled, this call 173 | // allows a non-parent tracer to run against this process. 174 | // It is not necessary to call this in the absence of these security 175 | // settings. 176 | if err := bcd.EnableTracing(); err != nil { 177 | fmt.Printf("Warning: failed to enable tracing permission: %v\n", 178 | err) 179 | } 180 | 181 | bcd.UpdateConfig(bcd.GlobalConfig{ 182 | PanicOnKillFailure: true, 183 | ResendSignal: true, 184 | RateLimit: time.Second * 5, 185 | SynchronousPut: false}) 186 | 187 | // Use the default tracer implementation. 188 | tracer = bcd.New(bcd.NewOptions{IncludeSystemGs: false}) 189 | 190 | // Enable WARNING log output from the tracer. 191 | tracer.AddOptions(nil, "-L", "WARNING") 192 | 193 | if err := tracer.SetOutputPath("./tracedir", 0755); err != nil { 194 | fmt.Printf("Warning: failed to set output path: %v.\n" + 195 | "Generated snapshots will be stored in cwd.\n", err) 196 | } 197 | 198 | tracer.AddKV(nil, "version", "1.2.3") 199 | 200 | // Tracer I/O is directed to os.DevNull by default. 201 | // Note: this does not affect the generated output file (unless the 202 | // tracer can only print to stderr). 203 | f, err := os.Create("./tracelog") 204 | if err != nil { 205 | panic(err) 206 | } 207 | defer f.Close() 208 | tracer.SetPipes(nil, f) 209 | 210 | tracer.SetLogLevel(bcd.LogMax) 211 | 212 | if err := tracer.ConfigurePut("https://fakeserver.fakecompany.com:6098", 213 | "fakeprojecttoken", 214 | bcd.PutOptions{Unlink: true, OnTrace: true}); err != nil { 215 | fmt.Printf("Failed to enable put: %v\n", err) 216 | } 217 | 218 | wg.Add(1) 219 | go func() { 220 | defer wg.Done() 221 | 222 | // Uploads all snapshots contained in the specified directory. 223 | // 224 | // Generally, one should use either BTTracer.PutDir or set the 225 | // OnTrace option to true when calling BTTracer.ConfigurePut. 226 | if err := tracer.PutDir("./tracedir"); err != nil { 227 | fmt.Printf("Failed to Put from directory: %v\n", err) 228 | } 229 | }() 230 | 231 | // Register for signal handling using the tracer's default signal set. 232 | bcd.Register(tracer) 233 | 234 | start() 235 | 236 | wg.Wait() 237 | } 238 | 239 | type sarlmons struct { 240 | a, b, c int 241 | d string 242 | } 243 | 244 | type fish map[sarlmons]string 245 | 246 | type moop struct { 247 | a, b, c int 248 | d string 249 | e, f map[string]int 250 | } 251 | 252 | type fntest struct { 253 | a, b int 254 | f func(a, b int) int 255 | } 256 | 257 | func intfn(a, b int) int { 258 | return a + b 259 | } 260 | 261 | type fishface interface { 262 | Plip(i, y int) (result int, err error) 263 | Plop(i []int) (result int, err error) 264 | Dunk(i, y string) (result string, err error) 265 | } 266 | 267 | func (p *sarlmons) Plip(i, y int) (result int, err error) { 268 | return i + y, nil 269 | } 270 | 271 | func (p *sarlmons) Plop(i []int) (result int, err error) { 272 | r := 0 273 | 274 | for _, z := range i { 275 | r += z 276 | } 277 | 278 | return r, nil 279 | } 280 | 281 | func (p *sarlmons) Dunk(i, y string) (result string, err error) { 282 | return i + y, nil 283 | } 284 | -------------------------------------------------------------------------------- /tracer.go: -------------------------------------------------------------------------------- 1 | // +build linux freebsd 2 | 3 | package bcd 4 | 5 | import ( 6 | "bytes" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "log" 11 | "net/http" 12 | "net/url" 13 | "os" 14 | "os/exec" 15 | "path/filepath" 16 | "strconv" 17 | "strings" 18 | "sync" 19 | "syscall" 20 | "time" 21 | ) 22 | 23 | type pipes struct { 24 | stdin io.Reader 25 | stderr io.Writer 26 | } 27 | 28 | type uploader struct { 29 | endpoint string 30 | options PutOptions 31 | } 32 | 33 | type BTTracer struct { 34 | // Path to the tracer to invoke. 35 | cmd string 36 | 37 | // Output directory for generated snapshots. 38 | outputDir string 39 | 40 | // Generic options to pass to the tracer. 41 | options []string 42 | 43 | // Prefix for key-value options. 44 | kvp string 45 | 46 | // Delimeter between key and value for key-value options. 47 | kvd string 48 | 49 | // Channel which receives signal notifications. 50 | sigs chan os.Signal 51 | 52 | // The set of signals the tracer will monitor. 53 | ss []os.Signal 54 | 55 | // The pipes to use for tracer I/O. 56 | p pipes 57 | 58 | // Protects tracer state modification. 59 | m sync.RWMutex 60 | 61 | // Logs tracer execution status messages. 62 | logger Log 63 | 64 | // Default trace options to use if none are specified to bcd.Trace(). 65 | defaultTraceOptions TraceOptions 66 | 67 | // The connection information and options used during Put operations. 68 | put uploader 69 | } 70 | 71 | type defaultLogger struct { 72 | logger *log.Logger 73 | level LogPriority 74 | } 75 | 76 | func (d *defaultLogger) Logf(level LogPriority, format string, v ...interface{}) { 77 | if (d.level & level) == 0 { 78 | return 79 | } 80 | 81 | d.logger.Printf(format, v...) 82 | } 83 | 84 | func (d *defaultLogger) SetLogLevel(level LogPriority) { 85 | d.level = level 86 | } 87 | 88 | type NewOptions struct { 89 | // If false, system goroutines (i.e. those started and used by the Go 90 | // runtime) are excluded. 91 | IncludeSystemGs bool 92 | } 93 | 94 | // Returns a new object implementing the bcd.Tracer and bcd.TracerSig interfaces 95 | // using the Backtrace debugging platform. Currently, only Linux and FreeBSD 96 | // are supported. 97 | // 98 | // Relevant default values: 99 | // 100 | // Tracer path: /opt/backtrace/bin/ptrace. 101 | // 102 | // Output directory: Current working directory of process. 103 | // 104 | // Signal set: ABRT, FPE, SEGV, ILL, BUS. Note: Go converts BUS, FPE, and 105 | // SEGV arising from process execution into run-time panics, which cannot be 106 | // handled by signal handlers. These signals are caught when sent from 107 | // os.Process.Kill or similar. 108 | // 109 | // The default logger prints to stderr. 110 | // 111 | // DefaultTraceOptions: 112 | // 113 | // Faulted: true 114 | // 115 | // CallerOnly: false 116 | // 117 | // ErrClassification: true 118 | // 119 | // Timeout: 120s 120 | func New(options NewOptions) *BTTracer { 121 | moduleOpt := "--module=go:enable,true" 122 | if !options.IncludeSystemGs { 123 | moduleOpt += ",filter,user" 124 | } 125 | 126 | return &BTTracer{ 127 | cmd: "/opt/backtrace/bin/ptrace", 128 | kvp: "--kv", 129 | kvd: ":", 130 | options: []string{ 131 | "--load=", 132 | moduleOpt, 133 | "--faulted", 134 | strconv.Itoa(os.Getpid())}, 135 | ss: []os.Signal{ 136 | syscall.SIGABRT, 137 | syscall.SIGFPE, 138 | syscall.SIGSEGV, 139 | syscall.SIGILL, 140 | syscall.SIGBUS}, 141 | logger: &defaultLogger{ 142 | logger: log.New(os.Stderr, "[bcd] ", log.LstdFlags), 143 | level: LogError}, 144 | defaultTraceOptions: TraceOptions{ 145 | Faulted: true, 146 | CallerOnly: false, 147 | ErrClassification: true, 148 | Timeout: time.Second * 120}} 149 | } 150 | 151 | const ( 152 | defaultCoronerScheme = "https" 153 | defaultCoronerPort = "6098" 154 | ) 155 | 156 | type PutOptions struct { 157 | // If set to true, tracer results (i.e. generated snapshot files) 158 | // will be unlinked from the filesystem after successful puts. 159 | Unlink bool 160 | 161 | // The http.Client to use for uploading. The default will be used 162 | // if left unspecified. 163 | Client http.Client 164 | 165 | // If set to true, tracer results will be uploaded after each 166 | // successful Trace request. 167 | OnTrace bool 168 | } 169 | 170 | // Configures the uploading of a generated snapshot file to a remote Backtrace 171 | // coronerd object store. 172 | // 173 | // Uploads use simple one-shot semantics and won't retry on failures. For 174 | // more robust snapshot uploading and directory monitoring, consider using 175 | // coroner daemon, as described at 176 | // https://documentation.backtrace.io/snapshot/#daemon. 177 | // 178 | // endpoint: The URL of the server. It must be a valid HTTP endpoint as 179 | // according to url.Parse() (which is based on RFC 3986). The default scheme 180 | // and port are https and 6098, respectively, and are used if left unspecified. 181 | // 182 | // token: The hash associated with the coronerd project to which this 183 | // application belongs; see 184 | // https://documentation.backtrace.io/coronerd_setup/#authentication-tokens 185 | // for more details. 186 | // 187 | // options: Modifies behavior of the Put action; see PutOptions documentation 188 | // for more details. 189 | func (t *BTTracer) ConfigurePut(endpoint, token string, options PutOptions) error { 190 | if endpoint == "" || token == "" { 191 | return errors.New("Endpoint must be non-empty") 192 | } 193 | 194 | url, err := url.Parse(endpoint) 195 | if err != nil { 196 | return err 197 | } 198 | 199 | // Endpoints without the scheme prefix (or at the very least a '//` 200 | // prefix) are interpreted as remote server paths. Handle the 201 | // (unlikely) case of an unspecified scheme. We won't allow other 202 | // cases, like a port specified without a scheme, though, as per 203 | // RFC 3986. 204 | if url.Host == "" { 205 | if url.Path == "" { 206 | return errors.New("invalid URL specification: host " + 207 | "or path must be non-empty") 208 | } 209 | 210 | url.Host = url.Path 211 | } 212 | 213 | if url.Scheme == "" { 214 | url.Scheme = defaultCoronerScheme 215 | } 216 | 217 | if strings.IndexAny(url.Host, ":") == -1 { 218 | url.Host += ":" + defaultCoronerPort 219 | } 220 | 221 | url.Path = "post" 222 | url.RawQuery = fmt.Sprintf("token=%s", token) 223 | 224 | t.put.endpoint = url.String() 225 | t.put.options = options 226 | 227 | t.Logf(LogDebug, "Put enabled (endpoint: %s, unlink: %v)\n", 228 | t.put.endpoint, 229 | t.put.options.Unlink) 230 | 231 | return nil 232 | } 233 | 234 | // See bcd.Tracer.PutOnTrace(). 235 | func (t *BTTracer) PutOnTrace() bool { 236 | return t.put.options.OnTrace 237 | } 238 | 239 | // See bcd.Tracer.Put(). 240 | func (t *BTTracer) Put(snapshot []byte) error { 241 | end := bytes.IndexByte(snapshot, 0) 242 | if end == -1 { 243 | end = len(snapshot) 244 | } 245 | path := strings.TrimSpace(string(snapshot[:end])) 246 | 247 | return t.putSnapshotFile(path) 248 | } 249 | 250 | // Synchronously uploads snapshots contained in the specified directory. 251 | // It is safe to spawn a goroutine to run BTTracer.PutDir(). 252 | // 253 | // ConfigurePut should have returned successfully before calling 254 | // BTTracer.PutDir(). 255 | // 256 | // Only files with the '.btt' suffix will be uploaded. 257 | // 258 | // The first error encountered terminates the directory walk, thus 259 | // skipping snapshots which would have been processed later in the walk. 260 | func (t *BTTracer) PutDir(path string) error { 261 | t.Logf(LogDebug, "Uploading snapshots from %s...\n", path) 262 | return filepath.Walk(path, putDirWalk(t)) 263 | } 264 | 265 | func putDirWalk(t *BTTracer) filepath.WalkFunc { 266 | return func(path string, info os.FileInfo, err error) error { 267 | if err != nil { 268 | t.Logf(LogError, "Failed to walk put directory: %v\n", 269 | err) 270 | return err 271 | } 272 | 273 | if info.IsDir() { 274 | return nil 275 | } 276 | 277 | if !strings.HasSuffix(info.Name(), ".btt") { 278 | t.Logf(LogDebug, "Ignoring file %s: suffix '.btt' " + 279 | "is required\n", info.Name()) 280 | return nil 281 | } 282 | 283 | return t.putSnapshotFile(path) 284 | } 285 | } 286 | 287 | func (t *BTTracer) putSnapshotFile(path string) error { 288 | t.Logf(LogDebug, "Attempting to upload snapshot %s...\n", path) 289 | 290 | body, err := os.Open(path) 291 | if err != nil { 292 | return err 293 | } 294 | defer body.Close() 295 | 296 | // The file is automatically closed by the Post request after 297 | // completion. 298 | 299 | resp, err := t.put.options.Client.Post( 300 | t.put.endpoint, 301 | "application/octet-stream", 302 | body) 303 | if err != nil { 304 | return err 305 | } 306 | defer resp.Body.Close() 307 | 308 | if resp.StatusCode != 200 { 309 | return fmt.Errorf("failed to upload: %s", resp.Status) 310 | } 311 | 312 | if t.put.options.Unlink { 313 | t.Logf(LogDebug, "Unlinking snapshot...\n") 314 | 315 | if err := os.Remove(path); err != nil { 316 | t.Logf(LogWarning, 317 | "Failed to unlink snapshot: %v\n", 318 | err) 319 | 320 | // This does not mean the put itself failed, 321 | // so we don't return this error here. 322 | } else { 323 | t.Logf(LogDebug, "Unlinked snapshot\n") 324 | } 325 | } 326 | 327 | t.Logf(LogDebug, "Uploaded snapshot\n") 328 | 329 | return nil 330 | } 331 | 332 | // Sets the executable path for the tracer. 333 | func (t *BTTracer) SetTracerPath(path string) { 334 | t.m.Lock() 335 | defer t.m.Unlock() 336 | 337 | t.cmd = path 338 | } 339 | 340 | // Sets the output path for generated snapshots. The directory will be 341 | // created with the specified permission bits if it does not already 342 | // exist. 343 | // 344 | // If perm is 0, a default of 0755 will be used. 345 | func (t *BTTracer) SetOutputPath(path string, perm os.FileMode) error { 346 | if perm == 0 { 347 | perm = 0755 348 | } 349 | 350 | if err := os.MkdirAll(path, perm); err != nil { 351 | t.Logf(LogError, "Failed to create output directory: %v\n", err) 352 | return err 353 | } 354 | 355 | t.m.Lock() 356 | defer t.m.Unlock() 357 | 358 | t.outputDir = path 359 | 360 | return nil 361 | } 362 | 363 | // Sets the input and output pipes for the tracer. 364 | // Stdout is not redirected; it is instead passed to the 365 | // tracer's Put command. 366 | func (t *BTTracer) SetPipes(stdin io.Reader, stderr io.Writer) { 367 | t.m.Lock() 368 | defer t.m.Unlock() 369 | 370 | t.p.stdin = stdin 371 | t.p.stderr = stderr 372 | } 373 | 374 | // Sets the logger for the tracer. 375 | func (t *BTTracer) SetLogger(logger Log) { 376 | t.logger = logger 377 | } 378 | 379 | // See bcd.Tracer.AddOptions(). 380 | func (t *BTTracer) AddOptions(options []string, v ...string) []string { 381 | if options != nil { 382 | return append(options, v...) 383 | } 384 | 385 | t.m.Lock() 386 | defer t.m.Unlock() 387 | 388 | t.options = append(t.options, v...) 389 | return nil 390 | } 391 | 392 | // Append to an option with given prefix 393 | func AppendOptionWithPrefix(options []string, prefix string, v string) []string { 394 | for i, opt := range options { 395 | if strings.HasPrefix(opt, prefix) == true { 396 | new_opt := opt + "," + v 397 | options[i] = new_opt 398 | return options 399 | } 400 | } 401 | return append(options, prefix + v) 402 | } 403 | 404 | func (t *BTTracer) AppendOptionWithPrefix(options []string, prefix string, v string) []string { 405 | if options != nil { 406 | return AppendOptionWithPrefix(options, prefix, v) 407 | } 408 | 409 | t.m.Lock() 410 | defer t.m.Unlock() 411 | 412 | t.options = AppendOptionWithPrefix(t.options, prefix, v) 413 | return nil 414 | } 415 | 416 | // See bcd.Tracer.AddKV(). 417 | func (t *BTTracer) AddKV(options []string, key, val string) []string { 418 | return t.AddOptions(options, t.kvp, key+t.kvd+val) 419 | } 420 | 421 | // See bcd.Tracer.AddThreadFilter(). 422 | func (t *BTTracer) AddThreadFilter(options []string, tid int) []string { 423 | return t.AddOptions(options, "--thread", strconv.Itoa(tid)) 424 | } 425 | 426 | // See bcd.Tracer.AddFaultedThread(). 427 | func (t *BTTracer) AddFaultedThread(options []string, tid int) []string { 428 | return t.AddOptions(options, "--fault-thread", strconv.Itoa(tid)) 429 | } 430 | 431 | // See bcd.Tracer.AddCallerGo(). 432 | func (t *BTTracer) AddCallerGo(options []string, goid int) []string { 433 | moduleOpt := "goid," + strconv.Itoa(goid) 434 | return t.AppendOptionWithPrefix(options, "--module=go:", moduleOpt) 435 | } 436 | 437 | // See bcd.Tracer.AddClassifier(). 438 | func (t *BTTracer) AddClassifier(options []string, classifier string) []string { 439 | return t.AddOptions(options, "--classifier", classifier) 440 | } 441 | 442 | // See bcd.Tracer.Options(). 443 | func (t *BTTracer) Options() []string { 444 | t.m.RLock() 445 | defer t.m.RUnlock() 446 | 447 | return append([]string(nil), t.options...) 448 | } 449 | 450 | // See bcd.Tracer.ClearOptions(). 451 | func (t *BTTracer) ClearOptions() { 452 | t.m.Lock() 453 | defer t.m.Unlock() 454 | 455 | t.options = nil 456 | } 457 | 458 | // See bcd.Tracer.DefaultTraceOptions(). 459 | func (t *BTTracer) DefaultTraceOptions() *TraceOptions { 460 | return &t.defaultTraceOptions 461 | } 462 | 463 | // See bcd.Tracer.Finalize(). 464 | func (t *BTTracer) Finalize(options []string) *exec.Cmd { 465 | t.m.RLock() 466 | defer t.m.RUnlock() 467 | 468 | tracer := exec.Command(t.cmd, options...) 469 | tracer.Dir = t.outputDir 470 | tracer.Stdin = t.p.stdin 471 | tracer.Stderr = t.p.stderr 472 | 473 | t.Logf(LogDebug, "Command: %v\n", tracer) 474 | 475 | return tracer 476 | } 477 | 478 | func (t *BTTracer) Logf(level LogPriority, format string, v ...interface{}) { 479 | t.m.RLock() 480 | defer t.m.RUnlock() 481 | 482 | t.logger.Logf(level, format, v...) 483 | } 484 | 485 | func (t *BTTracer) SetLogLevel(level LogPriority) { 486 | t.m.RLock() 487 | defer t.m.RUnlock() 488 | 489 | t.logger.SetLogLevel(level) 490 | } 491 | 492 | func (t *BTTracer) String() string { 493 | t.m.RLock() 494 | defer t.m.RUnlock() 495 | 496 | return fmt.Sprintf("Command: %s, Options: %v", t.cmd, t.options) 497 | } 498 | 499 | // See bcd.TracerSig.SetSigset(). 500 | func (t *BTTracer) SetSigset(sigs ...os.Signal) { 501 | t.ss = append([]os.Signal(nil), sigs...) 502 | } 503 | 504 | // See bcd.TracerSig.Sigset(). 505 | func (t *BTTracer) Sigset() []os.Signal { 506 | return append([]os.Signal(nil), t.ss...) 507 | } 508 | 509 | // See bcd.TracerSig.SetSigchan(). 510 | func (t *BTTracer) SetSigchan(sc chan os.Signal) { 511 | t.sigs = sc 512 | } 513 | 514 | // See bcd.TracerSig.Sigchan(). 515 | func (t *BTTracer) Sigchan() chan os.Signal { 516 | return t.sigs 517 | } 518 | -------------------------------------------------------------------------------- /bcd.go: -------------------------------------------------------------------------------- 1 | // Package bcd provides integration with out-of-process tracers. Using the 2 | // provided Tracer interface, applications may invoke tracer execution on 3 | // demand. Panic and signal handling integrations are provided. 4 | // 5 | // The Tracer interface is generic and will support any out-of-process tracer 6 | // implementing it. A default Tracer implementation, which uses the 7 | // Backtrace I/O platform, is provided. 8 | package bcd 9 | 10 | import ( 11 | "errors" 12 | "fmt" 13 | "os" 14 | "os/exec" 15 | "os/signal" 16 | "reflect" 17 | "runtime" 18 | "strconv" 19 | "strings" 20 | "sync" 21 | "time" 22 | ) 23 | 24 | var ( 25 | // Only one tracer should be running on a process at any time; this is 26 | // global to all created tracers. 27 | traceLock chan struct{} 28 | 29 | // Configuration applicable to all tracer invocations. 30 | state globalState 31 | ) 32 | 33 | type globalState struct { 34 | c GlobalConfig 35 | m sync.RWMutex 36 | } 37 | 38 | type GlobalConfig struct { 39 | // If the tracer's timeout expires and the tracer cannot be killed, 40 | // generate a run-time panic. 41 | // 42 | // Defaults to true. 43 | PanicOnKillFailure bool 44 | 45 | // Upon receipt of a signal and execution of the tracer, re-sends the 46 | // signal to the default Go signal handler for the signal and stops 47 | // listening for the signal. 48 | // Note: this will call signal.Reset(signal) on the received signal, 49 | // which undoes the effect of any signal.Notify() calls for the signal. 50 | // 51 | // Defaults to true. 52 | ResendSignal bool 53 | 54 | // Length of time to wait after completion of a tracer's 55 | // execution before allowing the next tracer to run. 56 | // 57 | // Defaults to 3 seconds. 58 | RateLimit time.Duration 59 | 60 | // If bcd.Trace() has been configured to attempt an upload immediately, 61 | // wait for the Tracer to finish uploading its results (instead of 62 | // asynchronously uploading in a new goroutine) before returning 63 | // from bcd.Trace(). 64 | // 65 | // Defaults to true. 66 | SynchronousPut bool 67 | } 68 | 69 | func init() { 70 | // Tracer execution timeouts are supported, which is why we use a 71 | // channel here instead of sync.Mutex. 72 | traceLock = make(chan struct{}, 1) 73 | traceLock <- struct{}{} 74 | 75 | state = globalState{ 76 | c: GlobalConfig{ 77 | PanicOnKillFailure: true, 78 | ResendSignal: true, 79 | RateLimit: time.Second * 3, 80 | SynchronousPut: true}} 81 | } 82 | 83 | // Update global Tracer configuration. 84 | func UpdateConfig(c GlobalConfig) { 85 | state.m.Lock() 86 | defer state.m.Unlock() 87 | 88 | state.c = c 89 | } 90 | 91 | // A generic out-of-process tracer interface. 92 | // 93 | // This is used primarily by the top-level functions of the bcd package, 94 | // like bcd.Trace, to handle execution and synchronization of various 95 | // generic tracers. 96 | // 97 | // Tracers are not limited to this interface and may provide additional 98 | // utility methods; see specific tracer implementation (e.g. BTTracer) 99 | // documentation for details. 100 | // 101 | // The methods in this interface are expected to be goroutine safe; multiple 102 | // trace requests (which ultimately call into these methods) from different 103 | // goroutines may run concurrently. 104 | type Tracer interface { 105 | // Store the options provided by v. 106 | // 107 | // If the options slice is non-nil, the provided options should be 108 | // stored in it; otherwise, the options are added to the Tracer's 109 | // base set of options. 110 | // Returns the final options slice if the provided options slice is 111 | // non-nil. 112 | AddOptions(options []string, v ...string) []string 113 | 114 | // Add a key-value attribute. 115 | // 116 | // See AddOptions for rules regarding the specified options slice and 117 | // the return value. 118 | AddKV(options []string, key, val string) []string 119 | 120 | // Add a thread filter option using the specified tid. If any thread 121 | // filter options are added, all non-matching threads and goroutines 122 | // are expected to be excluded from the generated snapshot. 123 | // 124 | // See AddOptions for rules regarding the specified options slice and 125 | // the return value. 126 | AddThreadFilter(options []string, tid int) []string 127 | 128 | // Add a faulted thread option using the specified tid. Threads and 129 | // goroutines matching any faulted thread options are marked as faulted 130 | // and subject to analysis and grouping. 131 | // 132 | // See AddOptions for rules regarding the specified options slice and 133 | // the return value. 134 | AddFaultedThread(options []string, tid int) []string 135 | 136 | // Add a caller goroutine option using the specified goid. 137 | // 138 | // See AddOptions for rules regarding the specified options slice and 139 | // the return value. 140 | AddCallerGo(options []string, goid int) []string 141 | 142 | // Add a classification to the generated snapshot. 143 | // 144 | // See AddOptions for rules regarding the specified options slice and 145 | // the return value. 146 | AddClassifier(options []string, classifier string) []string 147 | 148 | // Returns a copy of the base set of options for the Tracer. 149 | Options() []string 150 | 151 | // Clears the base set of options for the Tracer. 152 | ClearOptions() 153 | 154 | // Returns the default TraceOptions used in bcd.Trace() if an override 155 | // is not specified as an argument to it. 156 | DefaultTraceOptions() *TraceOptions 157 | 158 | // Accepts a final set of options and returns a Command object 159 | // representing a tracer that is ready to run. This will be executed 160 | // on the current process. 161 | Finalize(options []string) *exec.Cmd 162 | 163 | // Determines when and to what the Tracer will log. 164 | Log 165 | 166 | // String representation of a Tracer. 167 | fmt.Stringer 168 | 169 | // Returns whether the Tracer should upload its results to a remote 170 | // server after successful tracer execution. 171 | PutOnTrace() bool 172 | 173 | // Uploads Tracer results given by the snapshot argument, which is 174 | // the stdout of the Tracer process, to the configured remote server. 175 | // 176 | // As this is part of the generic Tracer interface, callers know 177 | // nothing about the contents of the output; thus, it is passed 178 | // unfiltered to the specific underlying implementation. 179 | Put(snapshot []byte) error 180 | } 181 | 182 | // Options determining actions taken during Tracer execution. 183 | type TraceOptions struct { 184 | // If true, the calling thread/goroutine will be marked as faulted 185 | // (i.e. the cause of the error or trace request). 186 | // 187 | // This is a Linux-specific option; it results in a noop on other 188 | // systems. 189 | Faulted bool 190 | 191 | // If true, only the calling thread/goroutine will be traced; all others 192 | // will be excluded from the generated snapshot. 193 | // 194 | // This is a Linux-specific option; it results in a noop on other 195 | // systems. 196 | CallerOnly bool 197 | 198 | // If true and a non-nil error object is passed to bcd.Trace(), a 199 | // classifier will be added based on the specified error's type. 200 | ErrClassification bool 201 | 202 | // If non-nil, all contained strings will be added as classifiers to 203 | // the generated snapshot. 204 | Classifications []string 205 | 206 | // Amount of time to wait for the tracer to finish execution. 207 | // If 0 is specified, Tracer.DefaultTraceOptions()'s timeout will be 208 | // used. If <0 is specified, no timeout will be used; the Tracer command 209 | // will run until it exits. 210 | Timeout time.Duration 211 | 212 | // If non-nil, any goroutines spawned during the Trace() request will 213 | // be added to the wait group. This facilitates waiting for things like 214 | // asynchronous snapshot uploads to complete before exiting the 215 | // application. 216 | SpawnedGs *sync.WaitGroup 217 | } 218 | 219 | type Log interface { 220 | // Logs the specified message if the specified log level is enabled. 221 | Logf(level LogPriority, format string, v ...interface{}) 222 | 223 | // Sets the log level to the specified bitmask of LogPriorities; all 224 | // priorities excluded from the mask are ignored. 225 | SetLogLevel(level LogPriority) 226 | } 227 | 228 | type LogPriority int 229 | 230 | const ( 231 | LogDebug = 1 << iota 232 | LogWarning 233 | LogError 234 | LogMax = (1 << iota) - 1 235 | ) 236 | 237 | // This is a superset of the generic Tracer interface for those that wish 238 | // to support signal handling. The methods unique to this interface are 239 | // not expected to be goroutine-safe. 240 | type TracerSig interface { 241 | Tracer 242 | 243 | // Sets the desired set of signals for which to invoke the Tracer upon 244 | // receipt of the signal. 245 | SetSigset(sigs ...os.Signal) 246 | 247 | // Returns the desired signal set. 248 | Sigset() []os.Signal 249 | 250 | // Sets the channel through which the Tracer will respond to signals. 251 | SetSigchan(sc chan os.Signal) 252 | 253 | // Returns the channel through which the Tracer will respond to signals. 254 | Sigchan() chan os.Signal 255 | } 256 | 257 | // Create a unique error to pass to a Trace request. 258 | type signalError struct { 259 | s os.Signal 260 | } 261 | 262 | func (s *signalError) Error() string { 263 | return s.s.String() 264 | } 265 | 266 | // Registers a signal handler to execute the specified Tracer upon receipt of 267 | // any signal in the set specified by TracerSig.Sigset(). 268 | // If the GlobalConfiguration value ResendSignal is true, then when a signal is 269 | // received through this handler, all handlers for that signal will be reset 270 | // with signal.Reset(s) after tracer execution completes. The signal will then 271 | // be resent to the default Go handler for that signal. 272 | func Register(t TracerSig) { 273 | ss := t.Sigset() 274 | if ss == nil || len(ss) == 0 { 275 | t.Logf(LogError, "Failed to register signal handler: empty "+ 276 | "sigset\n") 277 | return 278 | } 279 | 280 | c := t.Sigchan() 281 | if c != nil { 282 | unregisterInternal(t, c) 283 | } 284 | 285 | c = make(chan os.Signal, len(ss)) 286 | t.SetSigchan(c) 287 | 288 | signal.Notify(c, ss...) 289 | 290 | t.Logf(LogDebug, "Registered tracer %s (signal set: %v)\n", t, ss) 291 | 292 | state.m.RLock() 293 | rs := state.c.ResendSignal 294 | state.m.RUnlock() 295 | 296 | go func(t TracerSig) { 297 | for s := range c { 298 | t.Logf(LogDebug, "Received %v; executing tracer\n", s) 299 | 300 | Trace(t, &signalError{s}, nil) 301 | 302 | if !rs { 303 | continue 304 | } 305 | 306 | t.Logf(LogDebug, "Resending %v to default handler\n", s) 307 | 308 | // Re-handle the signal with the default Go behavior. 309 | signal.Reset(s) 310 | p, err := os.FindProcess(os.Getpid()) 311 | if err != nil { 312 | t.Logf(LogError, "Failed to resend signal: "+ 313 | "cannot find process object") 314 | return 315 | } 316 | 317 | p.Signal(s) 318 | } 319 | 320 | t.Logf(LogDebug, "Signal channel closed; exiting goroutine\n") 321 | }(t) 322 | 323 | return 324 | } 325 | 326 | // Stops the specified TracerSig from handling any signals it was previously 327 | // registered to handle via bcd.Register(). 328 | func Unregister(t TracerSig) { 329 | c := t.Sigchan() 330 | if c == nil { 331 | return 332 | } 333 | 334 | unregisterInternal(t, c) 335 | } 336 | 337 | func unregisterInternal(t TracerSig, c chan os.Signal) { 338 | t.Logf(LogDebug, "Stopping signal channel...\n") 339 | signal.Stop(c) 340 | 341 | t.Logf(LogDebug, "Closing signal channel...\n") 342 | close(c) 343 | 344 | t.SetSigchan(nil) 345 | t.Logf(LogDebug, "Tracer unregistered\n") 346 | } 347 | 348 | type tracerResult struct { 349 | stdOut []byte 350 | err error 351 | } 352 | 353 | // Executes the specified Tracer on the current process. 354 | // 355 | // If e is non-nil, it will be used to augment the trace according to the 356 | // TraceOptions. 357 | // If traceOptions is non-nil, it will be used instead of the Tracer's 358 | // DefaultTraceOptions(). See TraceOptions for details on the various options. 359 | // 360 | // This is goroutine-safe; multiple goroutines may share the same Tracer and 361 | // execute Trace() concurrently. Only one tracer will be allowed to run at 362 | // any point; others will wait to acquire resources (locks) or timeout (if 363 | // timeouts are not disabled). Trace execution will be rate-limited according 364 | // to the GlobalConfig settings. 365 | // 366 | // This may also be called in a new goroutine via go Trace(...). In that case, 367 | // ensure TraceOptions.CallerOnly is false (you will likely also want to set 368 | // TraceOptions.Faulted to false); otherwise, only the newly spawned goroutine 369 | // will be traced. 370 | // 371 | // Output of specific Tracer execution depends on the implementation; most 372 | // Tracers will have options for specifying output paths. 373 | func Trace(t Tracer, e error, traceOptions *TraceOptions) (err error) { 374 | if traceOptions == nil { 375 | traceOptions = t.DefaultTraceOptions() 376 | } 377 | 378 | // If no timeouts are specified, the timeout channel will block 379 | // forever (i.e. it will return only after the tracer exits). 380 | // We create the timer first to account for the work below, but 381 | // we won't wrap setup in a timeout as it's unlikely to be 382 | // a bottleneck. 383 | var timeout <-chan time.Time 384 | 385 | if traceOptions.Timeout == 0 { 386 | to := t.DefaultTraceOptions().Timeout 387 | timeout = time.After(to) 388 | t.Logf(LogDebug, "Tracer timeout: %v\n", to) 389 | } else if traceOptions.Timeout > 0 { 390 | timeout = time.After(traceOptions.Timeout) 391 | t.Logf(LogDebug, "Tracer timeout: %v\n", traceOptions.Timeout) 392 | } 393 | 394 | // We create a new options slice to avoid modifying the base 395 | // set of tracer options just for this particular trace 396 | // invocation. 397 | options := t.Options() 398 | 399 | // If the caller has requested a trace with thread-specific options, 400 | // then add the relevant thread specifications to the options list. 401 | if traceOptions.CallerOnly || traceOptions.Faulted { 402 | runtime.LockOSThread() 403 | defer runtime.UnlockOSThread() 404 | 405 | if tid, err := gettid(); err == nil { 406 | t.Logf(LogDebug, "Retrieved tid: %v\n", tid) 407 | 408 | if traceOptions.CallerOnly { 409 | options = t.AddThreadFilter(options, tid) 410 | } 411 | 412 | if traceOptions.Faulted { 413 | options = t.AddFaultedThread(options, tid) 414 | } 415 | } else { 416 | t.Logf(LogWarning, "Failed to retrieve tid: %v\n", err) 417 | } 418 | } 419 | 420 | // Report caller's goid 421 | var buf [64]byte 422 | n := runtime.Stack(buf[:], false) 423 | idField := strings.Fields(strings.TrimPrefix(string(buf[:n]), "goroutine "))[0] 424 | if goid, err := strconv.Atoi(idField); err == nil { 425 | t.Logf(LogDebug, "Retrieved goid: %v\n", goid) 426 | options = t.AddCallerGo(options, goid) 427 | } else { 428 | t.Logf(LogWarning, "Failed to retrieve goid: %v\n", err) 429 | } 430 | 431 | if e != nil { 432 | options = t.AddKV(options, "error", e.Error()) 433 | if traceOptions.ErrClassification { 434 | options = t.AddClassifier(options, 435 | reflect.TypeOf(e).String()) 436 | } 437 | } 438 | 439 | for _, c := range traceOptions.Classifications { 440 | options = t.AddClassifier(options, c) 441 | } 442 | 443 | state.m.RLock() 444 | kfPanic := state.c.PanicOnKillFailure 445 | rl := state.c.RateLimit 446 | synchronousPut := state.c.SynchronousPut 447 | state.m.RUnlock() 448 | 449 | select { 450 | case <-timeout: 451 | err = errors.New("Tracer lock acquisition timed out") 452 | t.Logf(LogError, "%v\n", err) 453 | 454 | return 455 | case <-traceLock: 456 | break 457 | } 458 | 459 | // We now hold the trace lock. 460 | // Allow another tracer to execute (i.e. by re-populating the 461 | // traceLock channel) as long as the current tracer has 462 | // exited. 463 | defer func() { 464 | go traceUnlockRL(t, rl) 465 | }() 466 | 467 | done := make(chan tracerResult, 1) 468 | tracer := t.Finalize(options) 469 | 470 | if traceOptions.SpawnedGs != nil { 471 | traceOptions.SpawnedGs.Add(1) 472 | } 473 | 474 | go func() { 475 | if traceOptions.SpawnedGs != nil { 476 | defer traceOptions.SpawnedGs.Done() 477 | } 478 | 479 | t.Logf(LogDebug, "Starting tracer %v\n", tracer) 480 | 481 | var res tracerResult 482 | 483 | res.stdOut, res.err = tracer.Output() 484 | done <- res 485 | 486 | t.Logf(LogDebug, "Tracer finished execution\n") 487 | }() 488 | 489 | t.Logf(LogDebug, "Waiting for tracer completion...\n") 490 | 491 | var res tracerResult 492 | 493 | select { 494 | case <-timeout: 495 | if err = tracer.Process.Kill(); err != nil { 496 | t.Logf(LogError, 497 | "Failed to kill tracer upon timeout: %v\n", 498 | err) 499 | 500 | if kfPanic { 501 | t.Logf(LogWarning, 502 | "PanicOnKillFailure set; "+ 503 | "panicking\n") 504 | panic(err) 505 | } 506 | } 507 | 508 | err = errors.New("Tracer execution timed out") 509 | t.Logf(LogError, "%v; process killed\n", err) 510 | 511 | return 512 | case res = <-done: 513 | break 514 | } 515 | 516 | // Tracer execution has completed by this point. 517 | if res.err != nil { 518 | t.Logf(LogError, "Tracer failed to run: %v\n", 519 | res.err) 520 | err = res.err 521 | 522 | return 523 | } 524 | 525 | if t.PutOnTrace() == false { 526 | t.Logf(LogDebug, "Trace request complete\n") 527 | 528 | return 529 | } 530 | 531 | putFn := func() error { 532 | t.Logf(LogDebug, "Uploading snapshot...") 533 | 534 | if err := t.Put(res.stdOut); err != nil { 535 | t.Logf(LogError, "Failed to upload snapshot: %s", 536 | err) 537 | 538 | return err 539 | } 540 | 541 | t.Logf(LogDebug, "Successfully uploaded snapshot\n") 542 | 543 | return nil 544 | } 545 | 546 | if synchronousPut { 547 | err = putFn() 548 | } else { 549 | t.Logf(LogDebug, "Starting asynchronous put...\n") 550 | 551 | if traceOptions.SpawnedGs != nil { 552 | traceOptions.SpawnedGs.Add(1) 553 | } 554 | 555 | go func() { 556 | if traceOptions.SpawnedGs != nil { 557 | defer traceOptions.SpawnedGs.Done() 558 | } 559 | 560 | putFn() 561 | }() 562 | } 563 | 564 | t.Logf(LogDebug, "Trace request complete\n") 565 | 566 | return 567 | } 568 | 569 | func traceUnlockRL(t Tracer, rl time.Duration) { 570 | t.Logf(LogDebug, "Waiting for ratelimit (%v)\n", rl) 571 | <-time.After(rl) 572 | t.Logf(LogDebug, "Unlocking traceLock\n") 573 | traceLock <- struct{}{} 574 | } 575 | 576 | // Create a unique error type to use during panic recovery. 577 | type panicError struct { 578 | v interface{} 579 | } 580 | 581 | func (p *panicError) Error() string { 582 | return fmt.Sprintf("%v", p.v) 583 | } 584 | 585 | // Establishes a panic handler that will execute the specified Tracer in 586 | // response. If repanic is true, this will repanic again after Tracer execution 587 | // completes (with the original value returned by recover()). 588 | // This must be used with Go's defer, panic, and recover pattern; see 589 | // https://blog.golang.org/defer-panic-and-recover. 590 | func Recover(t Tracer, repanic bool, options *TraceOptions) { 591 | if r := recover(); r != nil { 592 | err, ok := r.(error) 593 | if !ok { 594 | // We use the runtime type of the error object for 595 | // classification (and thus potential grouping); 596 | // *bcd.PanicError is a more descriptive classifier 597 | // than something like *errors.errorString. 598 | err = &panicError{r} 599 | } 600 | 601 | Trace(t, err, options) 602 | 603 | if repanic { 604 | panic(r) 605 | } 606 | } 607 | } 608 | --------------------------------------------------------------------------------