├── go.mod ├── go.sum ├── LICENSE ├── doc.go ├── README.md ├── group.go ├── perf_example_test.go ├── group_test.go ├── record_amd64_test.go ├── count.go ├── perf_test.go ├── count_test.go ├── record_test.go ├── perf.go └── record.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/elastic/go-perf 2 | 3 | go 1.18 4 | 5 | require golang.org/x/sys v0.26.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= 2 | golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | /* 6 | Package perf provides access to the Linux perf API. 7 | 8 | Counting events 9 | 10 | A Group represents a set of perf events measured together. 11 | 12 | var g perf.Group 13 | g.Add(perf.Instructions, perf.CPUCycles) 14 | 15 | hw, err := g.Open(targetpid, perf.AnyCPU) 16 | // ... 17 | gc, err := hw.MeasureGroup(func() { ... }) 18 | 19 | Attr configures an individual event. 20 | 21 | fa := &perf.Attr{ 22 | CountFormat: perf.CountFormat{ 23 | Running: true, 24 | ID: true, 25 | }, 26 | } 27 | perf.PageFaults.Configure(fa) 28 | 29 | faults, err := perf.Open(fa, perf.CallingThread, perf.AnyCPU, nil) 30 | // ... 31 | c, err := faults.Measure(func() { ... }) 32 | 33 | Sampling events 34 | 35 | Overflow records are available once the MapRing method on Event is called: 36 | 37 | var ev perf.Event // initialized previously 38 | 39 | ev.MapRing() 40 | 41 | ev.Enable() 42 | 43 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 44 | defer cancel() 45 | 46 | for { 47 | rec, err := ev.ReadRecord(ctx) 48 | // process rec 49 | } 50 | 51 | Tracepoints are also supported: 52 | 53 | wa := &perf.Attr{ 54 | SampleFormat: perf.SampleFormat{ 55 | Pid: true, 56 | Tid: true, 57 | IP: true, 58 | }, 59 | } 60 | wa.SetSamplePeriod(1) 61 | wa.SetWakeupEvents(1) 62 | wtp := perf.Tracepoint("syscalls", "sys_enter_write") 63 | wtp.Configure(wa) 64 | 65 | writes, err := perf.Open(wa, targetpid, perf.AnyCPU, nil) 66 | // ... 67 | c, err := writes.Measure(func() { ... }) 68 | // ... 69 | fmt.Printf("saw %d writes\n", c.Value) 70 | 71 | rec, err := writes.ReadRecord(ctx) 72 | // ... 73 | sr, ok := rec.(*perf.SampleRecord) 74 | // ... 75 | fmt.Printf("pid = %d, tid = %d\n", sr.Pid, sr.Tid) 76 | 77 | For more detailed information, see the examples, and man 2 perf_event_open. 78 | 79 | NOTE: this package is experimental and does not yet offer compatibility 80 | guarantees. 81 | */ 82 | package perf 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ---- 2 | 3 | This is a clone of the `golang.org/x/sys/unix/linux/perf` submitted by 4 | [acln0](https://github.com/acln0) to review at 5 | https://go-review.googlesource.com/c/sys/+/168059 6 | 7 | An alternative working tree for this package can also be found 8 | at https://github.com/acln0/perf 9 | 10 | This Elastic fork contains bugfixes and features necessary for 11 | our KProbes implementation. 12 | 13 | ---- 14 | 15 | `perf` API client package for Linux. See `man 2 perf_event_open` and 16 | `include/uapi/linux/perf_event.h`. 17 | 18 | This package is in its early stages. The API is still under discussion: 19 | it may change at any moment, without prior notice. Furthermore, 20 | this document may not be completely up to date at all times. 21 | 22 | 23 | Testing 24 | ======= 25 | 26 | Many of the things package perf does require elevated privileges on 27 | most systems. We would very much like for the tests to not require 28 | root to run. Because of this, we use a fairly specific testing model, 29 | described next. 30 | 31 | If the host kernel does not support `perf_event_open(2)` (i.e. if 32 | the `/proc/sys/kernel/perf_event_paranoid` file is not present), 33 | then tests fail immediately with an error message. 34 | 35 | Tests are designed in such a way that they are skipped if their 36 | requirements are not met by the underlying system. We would like the 37 | test suite to degrade gracefully, under certain circumstances. 38 | 39 | For example, when running Linux in a virtualized environment, various 40 | hardware PMUs might not be available. In such situations, we would like 41 | the test suite to continue running. For this purpose, we introduce the 42 | mechanism described next. 43 | 44 | Requirements for a test are specified by invoking the `requires` 45 | function, at the beginning of a test function. All tests that call 46 | `perf_event_open` must specify requirements this way. Currently, 47 | we use three kinds of requirements: 48 | 49 | * `perf_event_paranoid` values 50 | 51 | * the existence of various PMUs (e.g. "cpu", "software", "tracepoint") 52 | 53 | * tracefs is mounted, and readable 54 | 55 | Today, setting `perf_event_paranoid=1` and having a readable tracefs 56 | mounted at `/sys/kernel/debug/tracing` enables most of the tests. 57 | A select few require `perf_event_paranoid=0`. If the test process 58 | is running with `CAP_SYS_ADMIN`, `perf_event_paranoid` requirements 59 | are ignored, since they are considered fulfilled. The test process 60 | does not attempt to see if it is running as root, it only checks 61 | `CAP_SYS_ADMIN`. 62 | 63 | If you find a test that, when ran without elevated privileges, 64 | fails with something akin to a permissions error, then it means the 65 | requirements for the test were not specified precisely. Please file 66 | a bug. Extending the test suite and making these requirements more 67 | precise is an ongoing process. 68 | -------------------------------------------------------------------------------- /group.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf 8 | 9 | import ( 10 | "errors" 11 | "fmt" 12 | ) 13 | 14 | // Group configures a group of events. 15 | type Group struct { 16 | // CountFormat configures the format of counts read from the event 17 | // leader. The Group option is set automatically. 18 | CountFormat CountFormat 19 | 20 | // Options configures options for all events in the group. 21 | Options Options 22 | 23 | // ClockID configures the clock for samples in the group. 24 | ClockID int32 25 | 26 | err error // sticky configuration error 27 | attrs []*Attr 28 | leaderNeedsRing bool 29 | } 30 | 31 | // TODO(acln): what other fields belong on Group? SampleFormat perhaps? 32 | 33 | // Add adds events to the group, as configured by cfgs. 34 | // 35 | // For each Configurator, a new *Attr is created, the group-specific settings 36 | // are applied, then Configure is called on the *Attr to produce the final 37 | // event attributes. 38 | func (g *Group) Add(cfgs ...Configurator) { 39 | for _, cfg := range cfgs { 40 | g.add(cfg) 41 | } 42 | } 43 | 44 | func (g *Group) add(cfg Configurator) { 45 | if g.err != nil { 46 | return 47 | } 48 | a := new(Attr) 49 | a.CountFormat = g.CountFormat 50 | a.Options = g.Options 51 | a.ClockID = g.ClockID 52 | err := cfg.Configure(a) 53 | if err != nil { 54 | g.err = err 55 | return 56 | } 57 | if a.Sample != 0 { 58 | g.leaderNeedsRing = true 59 | } 60 | g.attrs = append(g.attrs, a) 61 | } 62 | 63 | // Open opens all the events in the group, and returns their leader. 64 | // 65 | // The returned Event controls the entire group. Callers must use the 66 | // ReadGroupCount method when reading counters from it. Closing it closes 67 | // the entire group. 68 | func (g *Group) Open(pid int, cpu int) (*Event, error) { 69 | if len(g.attrs) == 0 { 70 | return nil, errors.New("perf: empty event group") 71 | } 72 | if g.err != nil { 73 | return nil, fmt.Errorf("perf: configuration error: %v", g.err) 74 | } 75 | leaderattr := g.attrs[0] 76 | leaderattr.CountFormat.Group = true 77 | leader, err := Open(leaderattr, pid, cpu, nil) 78 | if err != nil { 79 | return nil, fmt.Errorf("perf: failed to open event leader: %v", err) 80 | } 81 | if len(g.attrs) < 2 { 82 | return leader, nil 83 | } 84 | if g.leaderNeedsRing { 85 | if err := leader.MapRing(); err != nil { 86 | return nil, fmt.Errorf("perf: failed to map leader ring: %v", err) 87 | } 88 | } 89 | for idx, attr := range g.attrs[1:] { 90 | follower, err := Open(attr, pid, cpu, leader) 91 | if err != nil { 92 | leader.Close() 93 | return nil, fmt.Errorf("perf: failed to open group event #%d (%q): %v", idx, attr.Label, err) 94 | } 95 | leader.owned = append(leader.owned, follower) 96 | if attr.Sample != 0 { 97 | if err := follower.SetOutput(leader); err != nil { 98 | leader.Close() 99 | return nil, fmt.Errorf("perf: failed to route follower %q output to leader %q (pid %d on CPU %d)", attr.Label, leaderattr.Label, pid, cpu) 100 | } 101 | } 102 | } 103 | return leader, nil 104 | } 105 | 106 | // A Configurator configures event attributes. Implementations should only 107 | // set the fields they need. See (*Group).Add for more details. 108 | type Configurator interface { 109 | Configure(attr *Attr) error 110 | } 111 | 112 | type configuratorFunc func(attr *Attr) error 113 | 114 | func (cf configuratorFunc) Configure(attr *Attr) error { return cf(attr) } 115 | -------------------------------------------------------------------------------- /perf_example_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf_test 8 | 9 | import ( 10 | "context" 11 | "fmt" 12 | "log" 13 | "runtime" 14 | 15 | "golang.org/x/sys/unix" 16 | 17 | "github.com/elastic/go-perf" 18 | ) 19 | 20 | func ExampleHardwareCounter_iPC() { 21 | g := perf.Group{ 22 | CountFormat: perf.CountFormat{ 23 | Running: true, 24 | }, 25 | } 26 | g.Add(perf.Instructions, perf.CPUCycles) 27 | 28 | runtime.LockOSThread() 29 | defer runtime.UnlockOSThread() 30 | 31 | ipc, err := g.Open(perf.CallingThread, perf.AnyCPU) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | defer ipc.Close() 36 | 37 | sum := 0 38 | gc, err := ipc.MeasureGroup(func() { 39 | for i := 0; i < 100000; i++ { 40 | sum += i 41 | } 42 | }) 43 | if err != nil { 44 | log.Fatal(err) 45 | } 46 | 47 | insns, cycles := gc.Values[0].Value, gc.Values[1].Value 48 | 49 | fmt.Printf("got sum = %d in %v: %d instructions, %d CPU cycles: %f IPC", 50 | sum, gc.Running, insns, cycles, float64(insns)/float64(cycles)) 51 | } 52 | 53 | func ExampleSoftwareCounter_pageFaults() { 54 | pfa := new(perf.Attr) 55 | perf.PageFaults.Configure(pfa) 56 | 57 | runtime.LockOSThread() 58 | defer runtime.UnlockOSThread() 59 | 60 | faults, err := perf.Open(pfa, perf.CallingThread, perf.AnyCPU, nil) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | defer faults.Close() 65 | 66 | var mem []byte 67 | const ( 68 | size = 64 * 1024 * 1024 69 | pos = 63 * 1024 * 1024 70 | ) 71 | c, err := faults.Measure(func() { 72 | mem = make([]byte, size) 73 | mem[pos] = 42 74 | }) 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | fmt.Printf("saw %d page faults, wrote value %d", c.Value, mem[pos]) 79 | } 80 | 81 | func ExampleTracepoint_getpid() { 82 | ga := new(perf.Attr) 83 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 84 | if err := gtp.Configure(ga); err != nil { 85 | log.Fatal(err) 86 | } 87 | 88 | runtime.LockOSThread() 89 | defer runtime.UnlockOSThread() 90 | 91 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 92 | if err != nil { 93 | log.Fatal(err) 94 | } 95 | defer getpid.Close() 96 | 97 | unix.Getpid() // does not count towards the measurement 98 | 99 | c, err := getpid.Measure(func() { 100 | unix.Getpid() 101 | unix.Getpid() 102 | unix.Getpid() 103 | }) 104 | if err != nil { 105 | log.Fatal(err) 106 | } 107 | 108 | fmt.Printf("saw getpid %d times\n", c.Value) // should print 3 109 | } 110 | 111 | func ExampleMmapRecord_plugin() { 112 | var targetpid int // pid of the monitored process 113 | 114 | da := &perf.Attr{ 115 | Options: perf.Options{ 116 | Mmap: true, 117 | }, 118 | } 119 | da.SetSamplePeriod(1) 120 | da.SetWakeupEvents(1) 121 | perf.Dummy.Configure(da) // configure a dummy event, so we can Open it 122 | 123 | mmap, err := perf.Open(da, targetpid, perf.AnyCPU, nil) 124 | if err != nil { 125 | log.Fatal(err) 126 | } 127 | if err := mmap.MapRing(); err != nil { 128 | log.Fatal(err) 129 | } 130 | 131 | // Monitor the target process, wait for it to load something like 132 | // a plugin, or a shared library, which requires a PROT_EXEC mapping. 133 | 134 | for { 135 | rec, err := mmap.ReadRecord(context.Background()) 136 | if err != nil { 137 | log.Fatal(err) 138 | } 139 | mr, ok := rec.(*perf.MmapRecord) 140 | if !ok { 141 | continue 142 | } 143 | fmt.Printf("pid %d created a PROT_EXEC mapping at %#x: %s", 144 | mr.Pid, mr.Addr, mr.Filename) 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /group_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package perf_test 6 | 7 | import ( 8 | "context" 9 | "runtime" 10 | "testing" 11 | "time" 12 | 13 | "github.com/elastic/go-perf" 14 | ) 15 | 16 | func TestGroup(t *testing.T) { 17 | t.Run("Count", testGroupCount) 18 | t.Run("Record", testGroupRecord) 19 | } 20 | 21 | func testGroupCount(t *testing.T) { 22 | requires(t, paranoid(1), hardwarePMU, softwarePMU) 23 | 24 | da := new(perf.Attr) 25 | perf.Dummy.Configure(da) 26 | 27 | g := perf.Group{ 28 | CountFormat: perf.CountFormat{ 29 | Enabled: true, 30 | Running: true, 31 | }, 32 | } 33 | g.Add(perf.CPUCycles, perf.Instructions, da) 34 | 35 | runtime.LockOSThread() 36 | defer runtime.UnlockOSThread() 37 | 38 | ev, err := g.Open(perf.CallingThread, perf.AnyCPU) 39 | if err != nil { 40 | t.Fatalf("Open: %v", err) 41 | } 42 | 43 | sum := int64(0) 44 | gc, err := ev.MeasureGroup(func() { 45 | for i := int64(0); i < 50000; i++ { 46 | sum += i 47 | } 48 | }) 49 | if err != nil { 50 | t.Fatalf("MeasureGroup: %v", err) 51 | } 52 | 53 | t.Logf("got sum %d in %d %s and %d %s", sum, gc.Values[0].Value, gc.Values[0].Label, gc.Values[1].Value, gc.Values[1].Label) 54 | } 55 | 56 | func testGroupRecord(t *testing.T) { 57 | requires(t, paranoid(1), tracepointPMU, tracefs) 58 | 59 | ga := &perf.Attr{ 60 | Options: perf.Options{ 61 | Disabled: true, 62 | }, 63 | SampleFormat: perf.SampleFormat{ 64 | Tid: true, 65 | Time: true, 66 | CPU: true, 67 | IP: true, 68 | StreamID: true, 69 | }, 70 | } 71 | ga.SetSamplePeriod(1) 72 | ga.SetWakeupEvents(1) 73 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 74 | if err := gtp.Configure(ga); err != nil { 75 | t.Fatal(err) 76 | } 77 | 78 | wa := &perf.Attr{ 79 | SampleFormat: perf.SampleFormat{ 80 | Tid: true, 81 | Time: true, 82 | CPU: true, 83 | IP: true, 84 | StreamID: true, 85 | }, 86 | } 87 | wa.SetSamplePeriod(1) 88 | wa.SetWakeupEvents(1) 89 | wtp := perf.Tracepoint("syscalls", "sys_enter_write") 90 | if err := wtp.Configure(wa); err != nil { 91 | t.Fatal(err) 92 | } 93 | 94 | g := perf.Group{ 95 | CountFormat: perf.CountFormat{ 96 | Enabled: true, 97 | Running: true, 98 | }, 99 | } 100 | g.Add(ga, wa) 101 | 102 | runtime.LockOSThread() 103 | defer runtime.UnlockOSThread() 104 | 105 | ev, err := g.Open(perf.CallingThread, perf.AnyCPU) 106 | if err != nil { 107 | t.Fatal(err) 108 | } 109 | defer ev.Close() 110 | 111 | gc, err := ev.MeasureGroup(func() { 112 | getpidTrigger() 113 | writeTrigger() 114 | }) 115 | if err != nil { 116 | t.Fatal(err) 117 | } 118 | for _, got := range gc.Values { 119 | if got.Value != 1 { 120 | t.Fatalf("want 1 hit for %q, got %d", got.Label, got.Value) 121 | } 122 | } 123 | 124 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) 125 | defer cancel() 126 | 127 | grec, err := ev.ReadRecord(ctx) 128 | if err != nil { 129 | t.Fatal(err) 130 | } 131 | gsr, ok := grec.(*perf.SampleGroupRecord) 132 | if !ok { 133 | t.Fatalf("got %T, want *perf.SampleGroupRecord", grec) 134 | } 135 | 136 | wrec, err := ev.ReadRecord(ctx) 137 | if err != nil { 138 | t.Fatal(err) 139 | } 140 | wsr, ok := wrec.(*perf.SampleGroupRecord) 141 | if !ok { 142 | t.Fatalf("got %T, want *perf.SampleGroupRecord", wrec) 143 | } 144 | 145 | if gip, wip := gsr.IP, wsr.IP; gip == wip { 146 | t.Fatalf("equal IP 0x%x for samples of different events", wip) 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /record_amd64_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf_test 8 | 9 | import ( 10 | "context" 11 | "os" 12 | "runtime" 13 | "testing" 14 | "time" 15 | "unsafe" 16 | 17 | "github.com/elastic/go-perf" 18 | ) 19 | 20 | func TestSampleUserRegisters(t *testing.T) { 21 | requires(t, paranoid(1), tracepointPMU, tracefs) 22 | 23 | wea := &perf.Attr{ 24 | CountFormat: perf.CountFormat{ 25 | Group: true, 26 | }, 27 | SampleFormat: perf.SampleFormat{ 28 | StreamID: true, 29 | UserRegisters: true, 30 | }, 31 | Options: perf.Options{ 32 | SampleIDAll: true, 33 | }, 34 | // RDI, RSI, RDX. See arch/x86/include/uapi/asm/perf_regs.h. 35 | SampleRegistersUser: 0x38, 36 | } 37 | wea.SetSamplePeriod(1) 38 | wea.SetWakeupEvents(1) 39 | wetp := perf.Tracepoint("syscalls", "sys_enter_write") 40 | if err := wetp.Configure(wea); err != nil { 41 | t.Fatal(err) 42 | } 43 | 44 | wxa := &perf.Attr{ 45 | SampleFormat: perf.SampleFormat{ 46 | StreamID: true, 47 | UserRegisters: true, 48 | }, 49 | Options: perf.Options{ 50 | SampleIDAll: true, 51 | }, 52 | // RAX. See arch/x86/include/uapi/asm/perf_regs.h. 53 | SampleRegistersUser: 0x1, 54 | } 55 | wxa.SetSamplePeriod(1) 56 | wxa.SetWakeupEvents(1) 57 | wxtp := perf.Tracepoint("syscalls", "sys_exit_write") 58 | if err := wxtp.Configure(wxa); err != nil { 59 | t.Fatal(err) 60 | } 61 | 62 | var g perf.Group 63 | g.Add(wea, wxa) 64 | 65 | runtime.LockOSThread() 66 | defer runtime.UnlockOSThread() 67 | 68 | write, err := g.Open(perf.CallingThread, perf.AnyCPU) 69 | if err != nil { 70 | t.Fatal(err) 71 | } 72 | 73 | null, err := os.OpenFile("/dev/null", os.O_WRONLY, 0200) 74 | if err != nil { 75 | t.Fatal(err) 76 | } 77 | defer null.Close() 78 | 79 | buf := make([]byte, 8) 80 | 81 | var n int 82 | var werr error 83 | gc, err := write.MeasureGroup(func() { 84 | n, werr = null.Write(buf) 85 | }) 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | if werr != nil { 90 | t.Fatal(err) 91 | } 92 | if entry := gc.Values[0].Value; entry != 1 { 93 | t.Fatalf("got %d hits for write at entry, want 1", entry) 94 | } 95 | if exit := gc.Values[1].Value; exit != 1 { 96 | t.Fatalf("got %d hits for write at exit, want 1", exit) 97 | } 98 | 99 | ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) 100 | defer cancel() 101 | 102 | entryrec, err := write.ReadRecord(ctx) 103 | if err != nil { 104 | t.Fatalf("got %v, want a valid record", err) 105 | } 106 | entrysr, ok := entryrec.(*perf.SampleGroupRecord) 107 | if !ok { 108 | t.Fatalf("got %T, want *perf.SampleGroupRecord", entryrec) 109 | } 110 | if nregs := len(entrysr.UserRegisters); nregs != 3 { 111 | t.Fatalf("got %d registers, want 3", nregs) 112 | } 113 | 114 | var ( 115 | rdi = entrysr.UserRegisters[2] 116 | rsi = entrysr.UserRegisters[1] 117 | rdx = entrysr.UserRegisters[0] 118 | 119 | nullfd = uint64(null.Fd()) 120 | bufp = uint64(uintptr(unsafe.Pointer(&buf[0]))) 121 | bufsize = uint64(len(buf)) 122 | ) 123 | 124 | if rdi != nullfd { 125 | t.Errorf("fd: rdi = %d, want %d", rdi, nullfd) 126 | } 127 | if rsi != bufp { 128 | t.Errorf("buf: rsi = %#x, want %#x", rsi, bufp) 129 | } 130 | if rdx != bufsize { 131 | t.Errorf("count: rdx = %d, want %d", rdx, bufsize) 132 | } 133 | 134 | exitrec, err := write.ReadRecord(ctx) 135 | if err != nil { 136 | t.Fatalf("got %v, want a valid record", err) 137 | } 138 | exitsr, ok := exitrec.(*perf.SampleGroupRecord) 139 | if !ok { 140 | t.Fatalf("got %T, want SampleGroupRecord", exitrec) 141 | } 142 | if nregs := len(exitsr.UserRegisters); nregs != 1 { 143 | t.Fatalf("got %d registers, want 1", nregs) 144 | } 145 | 146 | rax := exitsr.UserRegisters[0] 147 | if uint64(n) != rax { 148 | t.Fatalf("return: rax = %d, want %d", n, rax) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /count.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf 8 | 9 | import ( 10 | "errors" 11 | "fmt" 12 | "io" 13 | "os" 14 | "text/tabwriter" 15 | "time" 16 | 17 | "golang.org/x/sys/unix" 18 | ) 19 | 20 | // Count is a measurement taken by an Event. 21 | // 22 | // The Value field is always present and populated. 23 | // 24 | // The Enabled field is populated if CountFormat.Enabled is set on the Event 25 | // the Count was read from. Ditto for TimeRunning and ID. 26 | // 27 | // Label is set based on the Label field of the Attr associated with the 28 | // event. See the documentation there for more details. 29 | type Count struct { 30 | Value uint64 31 | Enabled time.Duration 32 | Running time.Duration 33 | ID uint64 34 | Label string 35 | } 36 | 37 | func (c Count) String() string { 38 | if c.Label != "" { 39 | return fmt.Sprintf("%s = %d", c.Label, c.Value) 40 | } 41 | return fmt.Sprint(c.Value) 42 | } 43 | 44 | var errGroup = errors.New("calling ReadCount on group Event") 45 | 46 | // ReadCount reads the measurement associated with ev. If the Event was 47 | // configured with CountFormat.Group, ReadCount returns an error. 48 | func (ev *Event) ReadCount() (Count, error) { 49 | var c Count 50 | if err := ev.ok(); err != nil { 51 | return c, err 52 | } 53 | if ev.a.CountFormat.Group { 54 | return c, errGroup 55 | } 56 | 57 | // TODO(acln): on x86, the rdpmc instruction can be used here, 58 | // instead of read(2), to reduce the number of system calls, and 59 | // improve the accuracy of measurements. 60 | // 61 | // Investigate this. It seems like this functionality may not always 62 | // be available, even on x86, but we can check for it explicitly 63 | // if the ring associated with ev is mapped into memory: see 64 | // cap_user_rdpmc on perf_event_mmap_page. 65 | buf := make([]byte, ev.a.CountFormat.readSize()) 66 | _, err := unix.Read(ev.perffd, buf) 67 | if err != nil { 68 | return c, os.NewSyscallError("read", err) 69 | } 70 | 71 | f := fields(buf) 72 | f.count(&c, ev.a.CountFormat) 73 | c.Label = ev.a.Label 74 | 75 | return c, err 76 | } 77 | 78 | // GroupCount is a group of measurements taken by an Event group. 79 | // 80 | // Fields are populated as described in the Count documentation. 81 | type GroupCount struct { 82 | Enabled time.Duration 83 | Running time.Duration 84 | Values []struct { 85 | Value uint64 86 | ID uint64 87 | Label string 88 | } 89 | } 90 | 91 | type errWriter struct { 92 | w io.Writer 93 | err error // sticky 94 | } 95 | 96 | func (ew *errWriter) Write(b []byte) (int, error) { 97 | if ew.err != nil { 98 | return 0, ew.err 99 | } 100 | n, err := ew.w.Write(b) 101 | ew.err = err 102 | return n, err 103 | } 104 | 105 | // PrintValues prints a table of gc.Values to w. 106 | func (gc GroupCount) PrintValues(w io.Writer) error { 107 | ew := &errWriter{w: w} 108 | 109 | tw := new(tabwriter.Writer) 110 | tw.Init(ew, 0, 8, 1, ' ', 0) 111 | 112 | if gc.Values[0].ID != 0 { 113 | fmt.Fprintln(tw, "label\tvalue\tID") 114 | } else { 115 | fmt.Fprintln(tw, "label\tvalue") 116 | } 117 | 118 | for _, v := range gc.Values { 119 | if v.ID != 0 { 120 | fmt.Fprintf(tw, "%s\t%d\t%d\n", v.Label, v.Value, v.ID) 121 | } else { 122 | fmt.Fprintf(tw, "%s\t%d\n", v.Label, v.Value) 123 | } 124 | } 125 | 126 | tw.Flush() 127 | return ew.err 128 | } 129 | 130 | var errNotGroup = errors.New("calling ReadGroupCount on non-group Event") 131 | 132 | // ReadGroupCount reads the measurements associated with ev. If the Event 133 | // was not configued with CountFormat.Group, ReadGroupCount returns an error. 134 | func (ev *Event) ReadGroupCount() (GroupCount, error) { 135 | var gc GroupCount 136 | if err := ev.ok(); err != nil { 137 | return gc, err 138 | } 139 | if !ev.a.CountFormat.Group { 140 | return gc, errNotGroup 141 | } 142 | 143 | size := ev.a.CountFormat.groupReadSize(1 + len(ev.group)) 144 | buf := make([]byte, size) 145 | _, err := unix.Read(ev.perffd, buf) 146 | if err != nil { 147 | return gc, os.NewSyscallError("read", err) 148 | } 149 | 150 | f := fields(buf) 151 | f.groupCount(&gc, ev.a.CountFormat) 152 | gc.Values[0].Label = ev.a.Label 153 | for i := 0; i < len(ev.group); i++ { 154 | gc.Values[i+1].Label = ev.group[i].a.Label 155 | } 156 | 157 | return gc, nil 158 | } 159 | 160 | // CountFormat configures the format of Count or GroupCount measurements. 161 | // 162 | // Enabled and Running configure the Event to include time enabled and 163 | // time running measurements to the counts. Usually, these two values are 164 | // equal. They may differ when events are multiplexed. 165 | // 166 | // If ID is set, a unique ID is assigned to the associated event. For a 167 | // given event, this ID matches the ID reported by the (*Event).ID method. 168 | // 169 | // If Group is set, the Event measures a group of events together: callers 170 | // must use ReadGroupCount. If Group is not set, the Event measures a single 171 | // counter: callers must use ReadCount. 172 | type CountFormat struct { 173 | Enabled bool 174 | Running bool 175 | ID bool 176 | Group bool 177 | } 178 | 179 | // readSize returns the buffer size required for a Count read. Assumes 180 | // f.Group is not set. 181 | func (f CountFormat) readSize() int { 182 | size := 8 // value is always set 183 | if f.Enabled { 184 | size += 8 185 | } 186 | if f.Running { 187 | size += 8 188 | } 189 | if f.ID { 190 | size += 8 191 | } 192 | return size 193 | } 194 | 195 | // groupReadSize returns the buffer size required for a GroupCount read. 196 | // Assumes f.Group is set. 197 | func (f CountFormat) groupReadSize(events int) int { 198 | hsize := 8 // the number of events is always set 199 | if f.Enabled { 200 | hsize += 8 201 | } 202 | if f.Running { 203 | hsize += 8 204 | } 205 | vsize := 8 // each event contains at least a value 206 | if f.ID { 207 | vsize += 8 208 | } 209 | return hsize + events*vsize 210 | } 211 | 212 | // marshal marshals the CountFormat into a uint64. 213 | func (f CountFormat) marshal() uint64 { 214 | // Always keep this in sync with the type definition above. 215 | fields := []bool{ 216 | f.Enabled, 217 | f.Running, 218 | f.ID, 219 | f.Group, 220 | } 221 | return marshalBitwiseUint64(fields) 222 | } 223 | -------------------------------------------------------------------------------- /perf_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf_test 8 | 9 | import ( 10 | "errors" 11 | "fmt" 12 | "io/ioutil" 13 | "os" 14 | "runtime" 15 | "strconv" 16 | "strings" 17 | "sync" 18 | "testing" 19 | "unsafe" 20 | 21 | "golang.org/x/sys/unix" 22 | 23 | "github.com/elastic/go-perf" 24 | ) 25 | 26 | func TestOpen(t *testing.T) { 27 | t.Run("BadGroup", testOpenBadGroup) 28 | t.Run("BadAttrType", testOpenBadAttrType) 29 | t.Run("PopulatesLabel", testOpenPopulatesLabel) 30 | t.Run("EventIDsDifferentByCPU", testEventIDsDifferentByCPU) 31 | } 32 | 33 | func testOpenBadGroup(t *testing.T) { 34 | requires(t, paranoid(1), hardwarePMU) 35 | 36 | ca := new(perf.Attr) 37 | perf.CPUCycles.Configure(ca) 38 | ca.CountFormat.Group = true 39 | 40 | runtime.LockOSThread() 41 | defer runtime.UnlockOSThread() 42 | 43 | cycles, err := perf.Open(ca, perf.CallingThread, perf.AnyCPU, nil) 44 | if err != nil { 45 | t.Fatal(err) 46 | } 47 | cycles.Close() 48 | 49 | _, err = perf.Open(ca, perf.CallingThread, perf.AnyCPU, cycles) 50 | if err == nil { 51 | t.Fatal("successful Open with closed group *Event") 52 | } 53 | 54 | cycles = new(perf.Event) // uninitialized 55 | _, err = perf.Open(ca, perf.CallingThread, perf.AnyCPU, cycles) 56 | if err == nil { 57 | t.Fatal("successful Open with closed group *Event") 58 | } 59 | } 60 | 61 | func testOpenBadAttrType(t *testing.T) { 62 | a := &perf.Attr{ 63 | Type: 42, 64 | } 65 | 66 | _, err := perf.Open(a, perf.CallingThread, perf.AnyCPU, nil) 67 | if err == nil { 68 | t.Fatal("got a valid *Event for bad Attr.Type 42") 69 | } 70 | } 71 | 72 | func testOpenPopulatesLabel(t *testing.T) { 73 | // TODO(acln): extend when we implement general label lookup 74 | requires(t, paranoid(1), hardwarePMU) 75 | 76 | runtime.LockOSThread() 77 | defer runtime.UnlockOSThread() 78 | 79 | ca := &perf.Attr{ 80 | Type: perf.HardwareEvent, 81 | Config: uint64(perf.CPUCycles), 82 | } 83 | 84 | cycles, err := perf.Open(ca, perf.CallingThread, perf.AnyCPU, nil) 85 | if err != nil { 86 | t.Fatal(err) 87 | } 88 | defer cycles.Close() 89 | 90 | c, err := cycles.Measure(getpidTrigger) 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | if c.Label == "" { 95 | t.Fatal("Open did not set label on *Attr") 96 | } 97 | } 98 | 99 | func testEventIDsDifferentByCPU(t *testing.T) { 100 | requires(t, paranoid(1), hardwarePMU) 101 | 102 | if runtime.NumCPU() == 1 { 103 | t.Skip("only one CPU") 104 | } 105 | 106 | ca := new(perf.Attr) 107 | perf.CPUCycles.Configure(ca) 108 | 109 | cycles0, err := perf.Open(ca, perf.CallingThread, 0, nil) 110 | if err != nil { 111 | t.Fatal(err) 112 | } 113 | defer cycles0.Close() 114 | 115 | cycles1, err := perf.Open(ca, perf.CallingThread, 1, nil) 116 | if err != nil { 117 | t.Fatal(err) 118 | } 119 | defer cycles1.Close() 120 | 121 | id0, err := cycles0.ID() 122 | if err != nil { 123 | t.Fatal(err) 124 | } 125 | 126 | id1, err := cycles1.ID() 127 | if err != nil { 128 | t.Fatal(err) 129 | } 130 | 131 | if id0 == id1 { 132 | t.Fatalf("event has the same ID on different CPUs") 133 | } 134 | } 135 | 136 | func TestMain(m *testing.M) { 137 | if !perf.Supported() { 138 | fmt.Fprintln(os.Stderr, "perf_event_open not supported") 139 | os.Exit(2) 140 | } 141 | os.Exit(m.Run()) 142 | } 143 | 144 | // perfTestEnv holds and caches information about the testing environment 145 | // for package perf. 146 | type perfTestEnv struct { 147 | cap struct { 148 | sync.Once 149 | sysadmin bool 150 | } 151 | 152 | paranoid struct { 153 | sync.Once 154 | value int 155 | } 156 | 157 | tracefs struct { 158 | sync.Once 159 | mounted bool 160 | readable bool 161 | readErr error 162 | } 163 | 164 | pmu struct { 165 | sync.Mutex 166 | ok map[string]struct{} 167 | missing map[string]error 168 | } 169 | } 170 | 171 | func (env *perfTestEnv) capSysAdmin() bool { 172 | env.cap.Once.Do(env.initCap) 173 | return env.cap.sysadmin 174 | } 175 | 176 | type capHeader struct { 177 | version uint32 178 | pid int32 179 | } 180 | 181 | type capData struct { 182 | effective uint32 183 | _ uint32 // permitted 184 | _ uint32 // inheritable 185 | } 186 | 187 | // constants from uapi/linux/capability.h 188 | const ( 189 | capSysAdmin = 21 190 | capV3 = 0x20080522 191 | ) 192 | 193 | func (env *perfTestEnv) initCap() { 194 | header := &capHeader{ 195 | version: capV3, 196 | pid: int32(unix.Getpid()), 197 | } 198 | data := make([]capData, 2) 199 | _, _, e := unix.Syscall(unix.SYS_CAPGET, uintptr(unsafe.Pointer(header)), uintptr(unsafe.Pointer(&data[0])), 0) 200 | if e != 0 { 201 | return 202 | } 203 | if data[0].effective&(1< want { 294 | return fmt.Errorf("want perf_event_paranoid <= %d, have %d", want, have) 295 | } 296 | return nil 297 | } 298 | 299 | // tracefsreq specifies a tracefs requirement for a test: tracefs must be 300 | // mounted at /sys/kernel/debug/tracing, and it must be readable. 301 | type tracefsreq struct{} 302 | 303 | func (tracefsreq) Evaluate() error { 304 | if !testenv.tracefsMounted() { 305 | return errors.New("tracefs is not mounted at /sys/kernel/debug/tracing") 306 | } 307 | if ok, err := testenv.tracefsReadable(); !ok { 308 | return fmt.Errorf("tracefs is not readable: %v", err) 309 | } 310 | return nil 311 | } 312 | 313 | var tracefs = tracefsreq{} 314 | 315 | // pmu specifies a PMU requirement for a test. 316 | type pmu string 317 | 318 | var ( 319 | hardwarePMU = pmu("hardware") 320 | softwarePMU = pmu("software") 321 | tracepointPMU = pmu("tracepoint") 322 | ) 323 | 324 | func (u pmu) Evaluate() error { 325 | device := string(u) 326 | if device == "hardware" { 327 | device = "cpu" // TODO(acln): investigate 328 | } 329 | if ok, err := testenv.havePMU(device); !ok { 330 | return fmt.Errorf("%s PMU not supported: %v", device, err) 331 | } 332 | return nil 333 | } 334 | 335 | type testRequirement interface { 336 | Evaluate() error 337 | } 338 | 339 | func requires(t *testing.T, reqs ...testRequirement) { 340 | t.Helper() 341 | 342 | sb := new(strings.Builder) 343 | unmet := 0 344 | 345 | for _, req := range reqs { 346 | if err := req.Evaluate(); err != nil { 347 | if unmet > 0 { 348 | sb.WriteString("; ") 349 | } 350 | fmt.Fprint(sb, err) 351 | unmet++ 352 | } 353 | } 354 | 355 | switch unmet { 356 | case 0: 357 | return 358 | case 1: 359 | t.Skipf("unmet requirement: %s", sb.String()) 360 | default: 361 | t.Skipf("unmet requirements: %s", sb.String()) 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /count_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package perf_test 6 | 7 | import ( 8 | "math/rand" 9 | "os" 10 | "runtime" 11 | "runtime/debug" 12 | "testing" 13 | "time" 14 | 15 | "golang.org/x/sys/unix" 16 | 17 | "github.com/elastic/go-perf" 18 | ) 19 | 20 | func TestCount(t *testing.T) { 21 | t.Run("Hardware", testHardwareCounters) 22 | t.Run("Software", testSoftwareCounters) 23 | t.Run("HardwareCache", testHardwareCacheCounters) 24 | t.Run("Tracepoint", testSingleTracepoint) 25 | t.Run("IoctlAndCountIDsMatch", testIoctlAndCountIDsMatch) 26 | } 27 | 28 | func testHardwareCounters(t *testing.T) { 29 | requires(t, paranoid(1), hardwarePMU) 30 | 31 | t.Run("IPC", testIPC) 32 | } 33 | 34 | func testIPC(t *testing.T) { 35 | g := perf.Group{ 36 | CountFormat: perf.CountFormat{ 37 | ID: true, 38 | }, 39 | } 40 | g.Add(perf.Instructions, perf.CPUCycles) 41 | 42 | runtime.LockOSThread() 43 | defer runtime.UnlockOSThread() 44 | 45 | hw, err := g.Open(perf.CallingThread, perf.AnyCPU) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | defer hw.Close() 50 | 51 | var sum int64 52 | gc, err := hw.MeasureGroup(func() { 53 | for i := int64(0); i < 1000000; i++ { 54 | sum += i 55 | } 56 | }) 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | for _, c := range gc.Values { 61 | if c.Value == 0 { 62 | t.Fatalf("didn't count %q", c.Label) 63 | } 64 | } 65 | insns := gc.Values[0].Value 66 | cycles := gc.Values[1].Value 67 | ipc := float64(insns) / float64(cycles) 68 | t.Logf("got %d instructions, %d cycles: %f IPC", insns, cycles, ipc) 69 | } 70 | 71 | func testSoftwareCounters(t *testing.T) { 72 | requires(t, paranoid(1), softwarePMU) 73 | 74 | t.Run("PageFaults", testPageFaults) 75 | } 76 | 77 | var fault []byte 78 | 79 | func testPageFaults(t *testing.T) { 80 | // TODO(acln): this test starts failing when run with -count > 4-5, 81 | // even though we're calling debug.FreeOSMemory. Why? 82 | pfa := &perf.Attr{ 83 | CountFormat: perf.CountFormat{ 84 | Running: true, 85 | Enabled: true, 86 | }, 87 | } 88 | perf.PageFaults.Configure(pfa) 89 | 90 | runtime.LockOSThread() 91 | defer runtime.UnlockOSThread() 92 | 93 | faults, err := perf.Open(pfa, perf.CallingThread, perf.AnyCPU, nil) 94 | if err != nil { 95 | t.Fatal(err) 96 | } 97 | defer faults.Close() 98 | 99 | debug.FreeOSMemory() 100 | 101 | c, err := faults.Measure(func() { 102 | fault = make([]byte, 64*1024*1024) 103 | fault[0] = 1 104 | fault[63*1024*1024] = 1 105 | }) 106 | if err != nil { 107 | t.Fatal(err) 108 | } 109 | if c.Value == 0 { 110 | t.Fatal("didn't see a page fault") 111 | } 112 | t.Logf("saw %v: enabled: %v, running: %v", c, c.Enabled, c.Running) 113 | } 114 | 115 | func testHardwareCacheCounters(t *testing.T) { 116 | // TODO(acln): add PMU requirement? but how? 117 | // 118 | // $ ls /sys/bus/event_source/devices/*/type | xargs cat 119 | // 120 | // does not contain a 3, which is the ABI-specified value of 121 | // perf.HardwareCacheEvent. Maybe it's under the "cpu" PMU 122 | // somewhere. Investigate. 123 | requires(t, paranoid(1)) 124 | 125 | t.Run("L1DataMissesBadLocality", testL1DataMissesBadLocality) 126 | t.Run("L1DataMissesGoodLocality", testL1DataMissesGoodLocality) 127 | t.Run("L1Group", testL1Group) 128 | } 129 | 130 | func testL1DataMissesBadLocality(t *testing.T) { 131 | hwca := new(perf.Attr) 132 | hwcc := perf.HardwareCacheCounter{ 133 | Cache: perf.L1D, 134 | Op: perf.Read, 135 | Result: perf.Miss, 136 | } 137 | hwcc.Configure(hwca) 138 | 139 | runtime.LockOSThread() 140 | defer runtime.UnlockOSThread() 141 | 142 | l1dmisses, err := perf.Open(hwca, perf.CallingThread, perf.AnyCPU, nil) 143 | if err != nil { 144 | t.Fatal(err) 145 | } 146 | defer l1dmisses.Close() 147 | 148 | rng := rand.New(rand.NewSource(time.Now().Unix())) 149 | 150 | max := 1000 151 | 152 | var bad []interface{} 153 | for i := 0; i < 10000; i++ { 154 | bad = append(bad, rng.Intn(max)) 155 | } 156 | 157 | sink := 0 158 | c, err := l1dmisses.Measure(func() { 159 | for _, v := range bad { 160 | if v.(int) < max/2 { 161 | sink++ 162 | } 163 | } 164 | }) 165 | if err != nil { 166 | t.Fatal(err) 167 | } 168 | if c.Value == 0 { 169 | t.Fatalf("recorded no L1 data cache misses") 170 | } 171 | 172 | t.Logf("bad locality: got %d L1 data cache misses", c.Value) 173 | } 174 | 175 | func testL1DataMissesGoodLocality(t *testing.T) { 176 | hwca := new(perf.Attr) 177 | hwcc := perf.HardwareCacheCounter{ 178 | Cache: perf.L1D, 179 | Op: perf.Read, 180 | Result: perf.Miss, 181 | } 182 | hwcc.Configure(hwca) 183 | 184 | runtime.LockOSThread() 185 | defer runtime.UnlockOSThread() 186 | 187 | l1dmisses, err := perf.Open(hwca, perf.CallingThread, perf.AnyCPU, nil) 188 | if err != nil { 189 | t.Fatal(err) 190 | } 191 | defer l1dmisses.Close() 192 | 193 | rng := rand.New(rand.NewSource(time.Now().Unix())) 194 | 195 | max := 1000 196 | 197 | var contiguous []int 198 | for i := 0; i < 10000; i++ { 199 | contiguous = append(contiguous, rng.Intn(max)) 200 | } 201 | 202 | sink := 0 203 | c, err := l1dmisses.Measure(func() { 204 | for _, v := range contiguous { 205 | if v < max/2 { 206 | sink++ 207 | } 208 | } 209 | }) 210 | if err != nil { 211 | t.Fatal(err) 212 | } 213 | if c.Value == 0 { 214 | t.Fatalf("recorded no L1 data cache misses") 215 | } 216 | 217 | t.Logf("good locality: got %d L1 data cache misses", c.Value) 218 | } 219 | 220 | type l1testIdentity int 221 | 222 | func (v l1testIdentity) value() int { return int(v) } 223 | 224 | type l1testSquare int 225 | 226 | func (v l1testSquare) value() int { return int(v * v) } 227 | 228 | type l1testCube int 229 | 230 | func (v l1testCube) value() int { return int(v * v * v) } 231 | 232 | type valuer interface { 233 | value() int 234 | } 235 | 236 | func newValuer(n int) valuer { 237 | switch n % 3 { 238 | case 0: 239 | return l1testIdentity(n) 240 | case 1: 241 | return l1testSquare(n) 242 | default: 243 | return l1testCube(n) 244 | } 245 | } 246 | 247 | func testL1Group(t *testing.T) { 248 | caches := []perf.Cache{perf.L1D, perf.L1I} 249 | ops := []perf.CacheOp{perf.Read} 250 | results := []perf.CacheOpResult{perf.Miss} 251 | 252 | var g perf.Group 253 | g.Add(perf.HardwareCacheCounters(caches, ops, results)...) 254 | 255 | runtime.LockOSThread() 256 | defer runtime.UnlockOSThread() 257 | 258 | l1, err := g.Open(perf.CallingThread, perf.AnyCPU) 259 | if err != nil { 260 | t.Fatal(err) 261 | } 262 | defer l1.Close() 263 | 264 | const n = 100000 265 | 266 | valuers := make([]valuer, 0, n) 267 | for i := 0; i < n; i++ { 268 | valuers = append(valuers, newValuer(i)) 269 | } 270 | 271 | sum := 0 272 | gc, err := l1.MeasureGroup(func() { 273 | for i := 0; i < n; i++ { 274 | sum += valuers[i].value() 275 | } 276 | }) 277 | if err != nil { 278 | t.Fatal(err) 279 | } 280 | 281 | t.Logf("got %d L1 data cache misses, %d L1 instruction cache misses", 282 | gc.Values[0].Value, gc.Values[1].Value) 283 | } 284 | 285 | func testSingleTracepoint(t *testing.T) { 286 | requires(t, paranoid(1), tracepointPMU, tracefs) 287 | 288 | tests := []singleTracepointTest{ 289 | { 290 | category: "syscalls", 291 | event: "sys_enter_getpid", 292 | trigger: getpidTrigger, 293 | }, 294 | { 295 | category: "syscalls", 296 | event: "sys_enter_read", 297 | trigger: readTrigger, 298 | }, 299 | { 300 | category: "syscalls", 301 | event: "sys_enter_write", 302 | trigger: writeTrigger, 303 | }, 304 | } 305 | for _, tt := range tests { 306 | t.Run(tt.String(), tt.run) 307 | } 308 | } 309 | 310 | type singleTracepointTest struct { 311 | category string 312 | event string 313 | trigger func() 314 | } 315 | 316 | func (tt singleTracepointTest) run(t *testing.T) { 317 | tp := perf.Tracepoint(tt.category, tt.event) 318 | attr := new(perf.Attr) 319 | if err := tp.Configure(attr); err != nil { 320 | t.Fatal(err) 321 | } 322 | 323 | runtime.LockOSThread() 324 | defer runtime.UnlockOSThread() 325 | 326 | ev, err := perf.Open(attr, perf.CallingThread, perf.AnyCPU, nil) 327 | if err != nil { 328 | t.Fatal(err) 329 | } 330 | defer ev.Close() 331 | 332 | c, err := ev.Measure(func() { 333 | tt.trigger() 334 | }) 335 | if err != nil { 336 | t.Fatal(err) 337 | } 338 | if c.Value != 1 { 339 | t.Fatalf("got %d hits for %q, want 1 hit", c.Value, c.Label) 340 | } 341 | } 342 | 343 | func (tt singleTracepointTest) String() string { 344 | return tt.category + ":" + tt.event 345 | } 346 | 347 | func testIoctlAndCountIDsMatch(t *testing.T) { 348 | requires(t, paranoid(1), softwarePMU) 349 | 350 | pfa := new(perf.Attr) 351 | perf.PageFaults.Configure(pfa) 352 | pfa.CountFormat.ID = true 353 | 354 | runtime.LockOSThread() 355 | defer runtime.UnlockOSThread() 356 | 357 | faults, err := perf.Open(pfa, perf.CallingThread, perf.AnyCPU, nil) 358 | if err != nil { 359 | t.Fatal(err) 360 | } 361 | 362 | runtime.GC() 363 | 364 | c, err := faults.Measure(func() { 365 | fault = make([]byte, 64*1024*1024) 366 | fault[0] = 1 367 | fault[63*1024*1024] = 1 368 | }) 369 | if err != nil { 370 | t.Fatal(err) 371 | } 372 | if c.Value == 0 { 373 | t.Fatal("didn't see a page fault") 374 | } 375 | id, err := faults.ID() 376 | if err != nil { 377 | t.Fatal(err) 378 | } 379 | if id != c.ID { 380 | t.Fatalf("got ID %d from ioctl, but %d from count read", id, c.ID) 381 | } 382 | } 383 | 384 | func getpidTrigger() { 385 | unix.Getpid() 386 | } 387 | 388 | func readTrigger() { 389 | zero, err := os.Open("/dev/zero") 390 | if err != nil { 391 | panic(err) 392 | } 393 | buf := make([]byte, 8) 394 | if _, err := zero.Read(buf); err != nil { 395 | panic(err) 396 | } 397 | } 398 | 399 | func writeTrigger() { 400 | null, err := os.OpenFile("/dev/null", os.O_WRONLY, 0200) 401 | if err != nil { 402 | panic(err) 403 | } 404 | if _, err := null.Write([]byte("big data")); err != nil { 405 | panic(err) 406 | } 407 | } 408 | -------------------------------------------------------------------------------- /record_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf_test 8 | 9 | import ( 10 | "context" 11 | "fmt" 12 | "os" 13 | "os/exec" 14 | "runtime" 15 | "sync" 16 | "testing" 17 | "time" 18 | "unsafe" 19 | 20 | "golang.org/x/sys/unix" 21 | 22 | "github.com/elastic/go-perf" 23 | ) 24 | 25 | func TestPoll(t *testing.T) { 26 | t.Run("Timeout", testPollTimeout) 27 | t.Run("Cancel", testPollCancel) 28 | t.Run("Expired", testPollExpired) 29 | t.Run("DisabledExplicitly", testPollDisabledExplicitly) 30 | t.Run("DisabledByRefresh", testPollDisabledByRefresh) 31 | t.Run("DisabledByExit", testPollDisabledByExit) 32 | } 33 | 34 | func TestReadRecord(t *testing.T) { 35 | t.Run("Comm", testComm) 36 | t.Run("Exit", testExit) 37 | t.Run("CPUWideSwitch", testCPUWideSwitch) 38 | t.Run("SampleGetpid", testSampleGetpid) 39 | t.Run("SampleGetpidConcurrent", testSampleGetpidConcurrent) 40 | t.Run("SampleTracepointStack", testSampleTracepointStack) 41 | t.Run("RedirectedOutput", testRedirectedOutput) 42 | 43 | // TODO(acln): a test for the case when a record straddles the head 44 | // of the ring is missing. See readRawRecordNonblock. 45 | } 46 | 47 | func testPollTimeout(t *testing.T) { 48 | requires(t, paranoid(1), tracepointPMU, tracefs) 49 | 50 | ga := new(perf.Attr) 51 | ga.SetSamplePeriod(1) 52 | ga.SetWakeupEvents(1) 53 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 54 | if err := gtp.Configure(ga); err != nil { 55 | t.Fatal(err) 56 | } 57 | 58 | runtime.LockOSThread() 59 | defer runtime.UnlockOSThread() 60 | 61 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | defer getpid.Close() 66 | if err := getpid.MapRing(); err != nil { 67 | t.Fatal(err) 68 | } 69 | 70 | errch := make(chan error) 71 | timeout := 20 * time.Millisecond 72 | 73 | go func() { 74 | ctx, cancel := context.WithTimeout(context.Background(), timeout) 75 | defer cancel() 76 | 77 | for i := 0; i < 2; i++ { 78 | _, err := getpid.ReadRecord(ctx) 79 | errch <- err 80 | } 81 | }() 82 | 83 | c, err := getpid.Measure(getpidTrigger) 84 | if err != nil { 85 | t.Fatal(err) 86 | } 87 | if c.Value != 1 { 88 | t.Fatalf("got %d hits for %q, want 1", c.Value, c.Label) 89 | } 90 | 91 | // For the first event, we should get a valid sample immediately. 92 | select { 93 | case <-time.After(10 * time.Millisecond): 94 | t.Fatalf("didn't get the first sample: timeout") 95 | case err := <-errch: 96 | if err != nil { 97 | t.Fatalf("got %v, want valid first sample", err) 98 | } 99 | } 100 | 101 | // Now, we should get a timeout. 102 | select { 103 | case <-time.After(2 * timeout): 104 | t.Logf("didn't time out, waiting") 105 | err := <-errch 106 | t.Fatalf("got %v", err) 107 | case err := <-errch: 108 | if err != context.DeadlineExceeded { 109 | t.Fatalf("got %v, want context.DeadlineExceeded", err) 110 | } 111 | } 112 | } 113 | 114 | func testPollCancel(t *testing.T) { 115 | requires(t, paranoid(1), tracepointPMU, tracefs) 116 | 117 | ga := new(perf.Attr) 118 | ga.SetSamplePeriod(1) 119 | ga.SetWakeupEvents(1) 120 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 121 | if err := gtp.Configure(ga); err != nil { 122 | t.Fatal(err) 123 | } 124 | 125 | runtime.LockOSThread() 126 | defer runtime.UnlockOSThread() 127 | 128 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 129 | if err != nil { 130 | t.Fatal(err) 131 | } 132 | defer getpid.Close() 133 | if err := getpid.MapRing(); err != nil { 134 | t.Fatal(err) 135 | } 136 | 137 | ctx, cancel := context.WithCancel(context.Background()) 138 | defer cancel() 139 | 140 | errch := make(chan error) 141 | 142 | go func() { 143 | for i := 0; i < 2; i++ { 144 | _, err := getpid.ReadRecord(ctx) 145 | errch <- err 146 | } 147 | }() 148 | 149 | c, err := getpid.Measure(getpidTrigger) 150 | if err != nil { 151 | t.Fatal(err) 152 | } 153 | if c.Value != 1 { 154 | t.Fatalf("got %d hits for %q, want 1", c.Value, c.Label) 155 | } 156 | 157 | // For the first event, we should get a valid sample. 158 | select { 159 | case <-time.After(10 * time.Millisecond): 160 | t.Fatalf("didn't get the first sample: timeout") 161 | case err := <-errch: 162 | if err != nil { 163 | t.Fatalf("got %v, want valid first sample", err) 164 | } 165 | } 166 | 167 | // The goroutine reading the records is now blocked in ReadRecord. 168 | // Cancel the context and observe the results. We should see 169 | // context.Canceled quite quickly. 170 | cancel() 171 | 172 | select { 173 | case <-time.After(10 * time.Millisecond): 174 | t.Fatalf("context cancel didn't unblock ReadRecord") 175 | case err := <-errch: 176 | if err != context.Canceled { 177 | t.Fatalf("got %v, want %v", err, context.Canceled) 178 | } 179 | } 180 | } 181 | 182 | func testPollExpired(t *testing.T) { 183 | requires(t, paranoid(1), softwarePMU) 184 | 185 | da := new(perf.Attr) 186 | perf.Dummy.Configure(da) 187 | 188 | runtime.LockOSThread() 189 | defer runtime.UnlockOSThread() 190 | 191 | dummy, err := perf.Open(da, perf.CallingThread, perf.AnyCPU, nil) 192 | if err != nil { 193 | t.Fatal(err) 194 | } 195 | defer dummy.Close() 196 | if err := dummy.MapRing(); err != nil { 197 | t.Fatal(err) 198 | } 199 | 200 | timeout := 1 * time.Millisecond 201 | ctx, cancel := context.WithTimeout(context.Background(), timeout) 202 | defer cancel() 203 | 204 | // Wait until the deadline is in the past. 205 | time.Sleep(2 * timeout) 206 | 207 | rec, err := dummy.ReadRecord(ctx) 208 | if err == nil { 209 | t.Fatalf("got nil error and record %#v", rec) 210 | } 211 | if err != context.DeadlineExceeded { 212 | t.Fatalf("got %v, want context.DeadlineExceeded", err) 213 | } 214 | } 215 | 216 | const errDisabledTestEnv = "PERF_TEST_ERR_DISABLED" 217 | 218 | func init() { 219 | // In child process of testErrDisabledProcessExist. 220 | if os.Getenv(errDisabledTestEnv) != "1" { 221 | return 222 | } 223 | 224 | readyevfd := 3 225 | startevfd := 4 226 | 227 | // Signal to the parent that we can start. 228 | evsig(readyevfd) 229 | 230 | // Wait for the parent to tell us that they have set up performance 231 | // monitoring, and are ready to observe the event. 232 | evwait(startevfd) 233 | 234 | // Call getpid, then exit. Parent will see POLLIN for getpid, then 235 | // POLLHUP because we exited. 236 | unix.Getpid() 237 | os.Exit(0) 238 | } 239 | 240 | func testPollDisabledByExit(t *testing.T) { 241 | requires(t, paranoid(1), tracepointPMU, tracefs) 242 | 243 | // Re-exec ourselves with PERF_TEST_ERR_DISABLED=1. 244 | self, err := os.Executable() 245 | if err != nil { 246 | t.Fatal(err) 247 | } 248 | 249 | readyevfd, err := unix.Eventfd(0, 0) 250 | if err != nil { 251 | t.Fatal(err) 252 | } 253 | defer unix.Close(readyevfd) 254 | 255 | startevfd, err := unix.Eventfd(0, 0) 256 | if err != nil { 257 | t.Fatal(err) 258 | } 259 | defer unix.Close(startevfd) 260 | 261 | cmd := exec.Command(self) 262 | cmd.Env = append(os.Environ(), errDisabledTestEnv+"=1") 263 | cmd.ExtraFiles = []*os.File{ 264 | os.NewFile(uintptr(readyevfd), "readyevfd"), 265 | os.NewFile(uintptr(startevfd), "startevfd"), 266 | } 267 | if err := cmd.Start(); err != nil { 268 | t.Fatal(err) 269 | } 270 | 271 | // Set up performance monitoring for the child process. 272 | ga := &perf.Attr{ 273 | Options: perf.Options{ 274 | Disabled: true, 275 | }, 276 | SampleFormat: perf.SampleFormat{ 277 | Tid: true, 278 | }, 279 | } 280 | ga.SetSamplePeriod(1) 281 | ga.SetWakeupEvents(1) 282 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 283 | if err := gtp.Configure(ga); err != nil { 284 | t.Fatal(err) 285 | } 286 | 287 | runtime.LockOSThread() 288 | defer runtime.UnlockOSThread() 289 | 290 | getpid, err := perf.Open(ga, cmd.Process.Pid, perf.AnyCPU, nil) 291 | if err != nil { 292 | t.Fatal(err) 293 | } 294 | defer getpid.Close() 295 | if err := getpid.MapRing(); err != nil { 296 | t.Fatal(err) 297 | } 298 | 299 | // Wait for the child process to be ready. 300 | evwait(readyevfd) 301 | 302 | // Now that it is, enable the event. 303 | if err := getpid.Enable(); err != nil { 304 | t.Fatal(err) 305 | } 306 | 307 | // Signal to the child that it should call getpid now. 308 | // It will call getpid, then exit. 309 | evsig(startevfd) 310 | if err := cmd.Wait(); err != nil { 311 | t.Fatal(err) 312 | } 313 | 314 | // Read two records. The first one should be valid, 315 | // the second one should not, and the second error 316 | // should be ErrDisabled. 317 | timeout := 100 * time.Millisecond 318 | ctx, cancel := context.WithTimeout(context.Background(), timeout) 319 | defer cancel() 320 | rec1, err1 := getpid.ReadRecord(ctx) 321 | rec2, err2 := getpid.ReadRecord(ctx) 322 | 323 | if err1 != nil { 324 | t.Errorf("first error was %v, want nil", err1) 325 | } 326 | sr, ok := rec1.(*perf.SampleRecord) 327 | if !ok { 328 | t.Errorf("first record: got %T, want *perf.SampleRecord", rec1) 329 | } 330 | if int(sr.Pid) != cmd.Process.Pid { 331 | t.Errorf("first record: got pid %d in the sample, want %d", 332 | sr.Pid, cmd.Process.Pid) 333 | } 334 | 335 | if err2 != perf.ErrDisabled { 336 | t.Errorf("second record: error was %v, want ErrDisabled", err2) 337 | } 338 | if rec2 != nil { 339 | t.Errorf("second record: got %#v, want nil", rec2) 340 | } 341 | } 342 | 343 | func testPollDisabledExplicitly(t *testing.T) { 344 | requires(t, paranoid(1), tracepointPMU, tracefs) 345 | 346 | ga := &perf.Attr{ 347 | SampleFormat: perf.SampleFormat{ 348 | Tid: true, 349 | }, 350 | Options: perf.Options{ 351 | Disabled: true, 352 | }, 353 | } 354 | ga.SetSamplePeriod(1) 355 | ga.SetWakeupEvents(1) 356 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 357 | if err := gtp.Configure(ga); err != nil { 358 | t.Fatal(err) 359 | } 360 | 361 | runtime.LockOSThread() 362 | defer runtime.UnlockOSThread() 363 | 364 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 365 | if err != nil { 366 | t.Fatal(err) 367 | } 368 | defer getpid.Close() 369 | if err := getpid.MapRing(); err != nil { 370 | t.Fatal(err) 371 | } 372 | 373 | const n = 3 374 | 375 | ctx, cancel := context.WithCancel(context.Background()) 376 | defer cancel() 377 | 378 | done := make(chan struct{}) 379 | seen := 0 380 | 381 | go func() { 382 | for i := 0; i < 2*n; i++ { 383 | _, err := getpid.ReadRecord(ctx) 384 | if err == nil { 385 | seen++ 386 | } 387 | } 388 | close(done) 389 | }() 390 | 391 | if err := getpid.Enable(); err != nil { 392 | t.Fatal(err) 393 | } 394 | 395 | for i := 0; i < n; i++ { 396 | getpidTrigger() 397 | } 398 | 399 | if err := getpid.Disable(); err != nil { 400 | getpidTrigger() 401 | } 402 | 403 | for i := 0; i < n; i++ { 404 | getpidTrigger() 405 | } 406 | 407 | cancel() 408 | <-done 409 | 410 | if seen != n { 411 | t.Fatalf("saw %d events, want %d", seen, n) 412 | } 413 | } 414 | 415 | func testPollDisabledByRefresh(t *testing.T) { 416 | // TODO(acln): investigate the following: the man page says that 417 | // POLLHUP should be indicated on the file descriptor when the counter 418 | // associated with a call to Refresh reaches zero. I have not been 419 | // able to observe this. When the counter reaches zero, the event 420 | // is disabled (which is what this test shows), but POLLHUP doesn't 421 | // seem to be indicated on the file descriptor. 422 | // 423 | // If we ever figure out how to observe a HUP there, we should 424 | // make ReadRawRecord return ErrDisabled. In the meantime, leave 425 | // things as-is. 426 | requires(t, paranoid(1), tracepointPMU, tracefs) 427 | 428 | ga := &perf.Attr{ 429 | SampleFormat: perf.SampleFormat{ 430 | Tid: true, 431 | }, 432 | Options: perf.Options{ 433 | Disabled: true, 434 | }, 435 | } 436 | ga.SetSamplePeriod(1) 437 | ga.SetWakeupEvents(1) 438 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 439 | if err := gtp.Configure(ga); err != nil { 440 | t.Fatal(err) 441 | } 442 | 443 | runtime.LockOSThread() 444 | defer runtime.UnlockOSThread() 445 | 446 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 447 | if err != nil { 448 | t.Fatal(err) 449 | } 450 | defer getpid.Close() 451 | if err := getpid.MapRing(); err != nil { 452 | t.Fatal(err) 453 | } 454 | 455 | const n = 3 456 | 457 | ctx, cancel := context.WithCancel(context.Background()) 458 | defer cancel() 459 | 460 | done := make(chan struct{}) 461 | seen := 0 462 | 463 | go func() { 464 | for i := 0; i < 2*n; i++ { 465 | _, err := getpid.ReadRecord(ctx) 466 | if err == nil { 467 | seen++ 468 | } 469 | } 470 | close(done) 471 | }() 472 | 473 | if err := getpid.Refresh(n); err != nil { 474 | t.Fatal(err) 475 | } 476 | 477 | for i := 0; i < n; i++ { 478 | getpidTrigger() 479 | } 480 | 481 | for i := 0; i < n; i++ { 482 | getpidTrigger() 483 | } 484 | 485 | cancel() 486 | <-done 487 | 488 | if seen != n { 489 | t.Fatalf("saw %d events, want %d", seen, n) 490 | } 491 | } 492 | 493 | const ( 494 | commTestEnv = "PERF_TEST_COMM" 495 | commTestName = "commtest" 496 | ) 497 | 498 | func init() { 499 | // In child process of testComm. 500 | if os.Getenv(commTestEnv) != "1" { 501 | return 502 | } 503 | 504 | readyevfd := 3 505 | startevfd := 4 506 | sawcommevfd := 5 507 | 508 | // Signal to the parent that we can start. 509 | evsig(readyevfd) 510 | 511 | // Wait for the parent to tell us that they have set up performance 512 | // monitoring, and are ready to observe the event. 513 | evwait(startevfd) 514 | 515 | // Change our name. 516 | b := make([]byte, len(commTestName)+1) 517 | copy(b, commTestName) 518 | err := unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&b[0])), 0, 0, 0) 519 | runtime.KeepAlive(&b[0]) 520 | if err != nil { 521 | fmt.Fprint(os.Stderr, err) 522 | os.Exit(2) 523 | } 524 | 525 | // TODO(acln): investigate the legitimacy of the following crutch. 526 | // 527 | // Wait for the parent to see that we changed our name, then exit. 528 | // 529 | // If we do not wait here, there is a terrible race condition waiting 530 | // to happen: If we PR_SET_NAME in the child, then immediately exit, 531 | // the other side may not see POLLIN on the comm record: it may see 532 | // POLLHUP directly, even though a comm record was actually written 533 | // to the ring in the meantime. Why we get POLLHUP directly, and not 534 | // POLLIN before it, is unclear. The machinery to deal with this 535 | // eventuality in the poller does not exist yet, and at the time 536 | // when this comment was written, I have found no good solutions to 537 | // this conundrum. 538 | // 539 | // So we live with it, but still try to make our test pass. 540 | evwait(sawcommevfd) 541 | os.Exit(0) 542 | } 543 | 544 | func testComm(t *testing.T) { 545 | t.Skip("flaky. TODO(acln): investigate") 546 | 547 | requires(t, paranoid(1), softwarePMU) 548 | 549 | // Re-exec ourselves with PERF_TEST_COMM=1. 550 | self, err := os.Executable() 551 | if err != nil { 552 | t.Fatal(err) 553 | } 554 | 555 | readyevfd, err := unix.Eventfd(0, 0) 556 | if err != nil { 557 | t.Fatal(err) 558 | } 559 | defer unix.Close(readyevfd) 560 | 561 | startevfd, err := unix.Eventfd(0, 0) 562 | if err != nil { 563 | t.Fatal(err) 564 | } 565 | defer unix.Close(startevfd) 566 | 567 | sawcommevfd, err := unix.Eventfd(0, 0) 568 | if err != nil { 569 | t.Fatal(err) 570 | } 571 | defer unix.Close(sawcommevfd) 572 | 573 | cmd := exec.Command(self) 574 | cmd.Env = append(os.Environ(), commTestEnv+"=1") 575 | cmd.ExtraFiles = []*os.File{ 576 | os.NewFile(uintptr(readyevfd), "readyevfd"), 577 | os.NewFile(uintptr(startevfd), "startevfd"), 578 | os.NewFile(uintptr(sawcommevfd), "sawcommevfd"), 579 | } 580 | if err := cmd.Start(); err != nil { 581 | t.Fatal(err) 582 | } 583 | 584 | // Set up performance monitoring for the child process. 585 | ca := &perf.Attr{ 586 | Options: perf.Options{ 587 | Disabled: true, 588 | Comm: true, 589 | }, 590 | SampleFormat: perf.SampleFormat{ 591 | Tid: true, 592 | }, 593 | } 594 | ca.SetSamplePeriod(1) 595 | ca.SetWakeupEvents(1) 596 | perf.Dummy.Configure(ca) 597 | 598 | runtime.LockOSThread() 599 | defer runtime.UnlockOSThread() 600 | 601 | comm, err := perf.Open(ca, cmd.Process.Pid, perf.AnyCPU, nil) 602 | if err != nil { 603 | t.Fatal(err) 604 | } 605 | defer comm.Close() 606 | if err := comm.MapRing(); err != nil { 607 | t.Fatal(err) 608 | } 609 | 610 | // Wait for the child process to be ready. 611 | evwait(readyevfd) 612 | 613 | // Now that it is, enable the event. 614 | if err := comm.Enable(); err != nil { 615 | t.Fatal(err) 616 | } 617 | 618 | // Signal to the child that it should change its name. 619 | evsig(startevfd) 620 | 621 | // Read the CommRecord. 622 | ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) 623 | defer cancel() 624 | rec, rerr := comm.ReadRecord(ctx) 625 | 626 | // Signal to the child that it should exit, and wait for it to do so. 627 | evsig(sawcommevfd) 628 | if err := cmd.Wait(); err != nil { 629 | t.Fatal(err) 630 | } 631 | 632 | // Observe the CommRecord. 633 | if rerr != nil { 634 | t.Fatalf("got %v, want valid record", rerr) 635 | } 636 | cr, ok := rec.(*perf.CommRecord) 637 | if !ok { 638 | t.Fatalf("got %T, want *perf.CommRecord", rec) 639 | } 640 | if int(cr.Pid) != cmd.Process.Pid { 641 | t.Errorf("got pid %d, want %d", cr.Pid, cmd.Process.Pid) 642 | } 643 | if cr.NewName != commTestName { 644 | t.Errorf("new name = %q, want %q", cr.NewName, commTestName) 645 | } 646 | if cr.WasExec() { 647 | t.Error("got WasExec() == true, want false") 648 | } 649 | } 650 | 651 | const ( 652 | exitTestEnv = "PERF_TEST_EXIT" 653 | exitTestCode = 42 654 | ) 655 | 656 | func init() { 657 | // In the child process of testExit. 658 | if os.Getenv("PERF_TEST_EXIT") != "1" { 659 | return 660 | } 661 | 662 | readyevfd := 3 663 | startevfd := 4 664 | 665 | // Signal to the parent that we can start. 666 | evsig(readyevfd) 667 | 668 | // Wait for the parent to tell us that they have set up performance 669 | // monitoring, and are ready to observe the event. 670 | evwait(startevfd) 671 | 672 | os.Exit(exitTestCode) 673 | } 674 | 675 | func testExit(t *testing.T) { 676 | requires(t, paranoid(1), softwarePMU) 677 | 678 | // Re-exec ourselves with PERF_TEST_EXIT=1. 679 | self, err := os.Executable() 680 | if err != nil { 681 | t.Fatal(err) 682 | } 683 | 684 | readyevfd, err := unix.Eventfd(0, 0) 685 | if err != nil { 686 | t.Fatal(err) 687 | } 688 | defer unix.Close(readyevfd) 689 | 690 | startevfd, err := unix.Eventfd(0, 0) 691 | if err != nil { 692 | t.Fatal(err) 693 | } 694 | defer unix.Close(startevfd) 695 | 696 | cmd := exec.Command(self) 697 | cmd.Env = append(os.Environ(), exitTestEnv+"=1") 698 | cmd.ExtraFiles = []*os.File{ 699 | os.NewFile(uintptr(readyevfd), "readyevfd"), 700 | os.NewFile(uintptr(startevfd), "startevfd"), 701 | } 702 | if err := cmd.Start(); err != nil { 703 | t.Fatal(err) 704 | } 705 | pid := cmd.Process.Pid 706 | 707 | // Set up performance monitoring for the child process. 708 | ca := &perf.Attr{ 709 | Options: perf.Options{ 710 | Disabled: true, 711 | Task: true, 712 | }, 713 | SampleFormat: perf.SampleFormat{ 714 | Tid: true, 715 | }, 716 | } 717 | ca.SetSamplePeriod(1) 718 | ca.SetWakeupEvents(1) 719 | perf.Dummy.Configure(ca) 720 | 721 | runtime.LockOSThread() 722 | defer runtime.UnlockOSThread() 723 | 724 | comm, err := perf.Open(ca, pid, perf.AnyCPU, nil) 725 | if err != nil { 726 | t.Fatal(err) 727 | } 728 | defer comm.Close() 729 | if err := comm.MapRing(); err != nil { 730 | t.Fatal(err) 731 | } 732 | 733 | // Wait for the child process to be ready. 734 | evwait(readyevfd) 735 | 736 | // Now that it is, enable the event. 737 | if err := comm.Enable(); err != nil { 738 | t.Fatal(err) 739 | } 740 | 741 | // Signal to the child that it should exit now. 742 | evsig(startevfd) 743 | 744 | // Observe the exit code from os/exec first. 745 | err = cmd.Wait() 746 | if err == nil { 747 | t.Fatal("child exited with code 0") 748 | } 749 | ee, ok := err.(*exec.ExitError) 750 | if !ok { 751 | t.Fatalf("got %T, want *exec.ExitError", err) 752 | } 753 | if got := ee.ExitCode(); got != exitTestCode { 754 | t.Fatalf("got exit code %d, want %d", got, exitTestCode) 755 | } 756 | 757 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) 758 | defer cancel() 759 | rec, err := comm.ReadRecord(ctx) 760 | if err != nil { 761 | t.Fatalf("got %v, want valid record", err) 762 | } 763 | er, ok := rec.(*perf.ExitRecord) 764 | if !ok { 765 | t.Fatalf("got %T, want *perf.ExitRecord", rec) 766 | } 767 | if int(er.Pid) != pid { 768 | t.Errorf("got pid %d, want %d", er.Pid, pid) 769 | } 770 | // Unfortunately, no er.Ppid and er.Ptid test. The Go runtime 771 | // interferes with us. 772 | } 773 | 774 | func testCPUWideSwitch(t *testing.T) { 775 | requires(t, paranoid(0), softwarePMU) 776 | 777 | var wg sync.WaitGroup 778 | ready := make(chan error) 779 | start := make(chan struct{}) 780 | pingpong := make(chan struct{}) 781 | var recvtid, sendtid int 782 | 783 | const numpingpongs = 4 784 | const cpu = 0 785 | 786 | fn := func(recv bool) { 787 | defer wg.Done() 788 | 789 | runtime.LockOSThread() 790 | defer runtime.UnlockOSThread() 791 | 792 | var cpuset unix.CPUSet 793 | cpuset.Set(cpu) 794 | if err := unix.SchedSetaffinity(0, &cpuset); err != nil { 795 | ready <- err 796 | return 797 | } 798 | 799 | if !recv { 800 | sendtid = unix.Gettid() 801 | ready <- nil 802 | <-start 803 | for i := 0; i < numpingpongs; i++ { 804 | pingpong <- struct{}{} 805 | <-pingpong 806 | } 807 | } else { 808 | recvtid = unix.Gettid() 809 | ready <- nil 810 | <-start 811 | for i := 0; i < numpingpongs; i++ { 812 | <-pingpong 813 | pingpong <- struct{}{} 814 | } 815 | } 816 | } 817 | 818 | wg.Add(2) 819 | 820 | go fn(true) 821 | go fn(false) 822 | 823 | if err := <-ready; err != nil { 824 | t.Fatal(err) 825 | } 826 | if err := <-ready; err != nil { 827 | t.Fatal(err) 828 | } 829 | 830 | sa := &perf.Attr{ 831 | Options: perf.Options{ 832 | ExcludeKernel: true, 833 | Disabled: true, 834 | ContextSwitch: true, 835 | }, 836 | } 837 | sa.SetSamplePeriod(1) 838 | sa.SetWakeupEvents(1) 839 | perf.ContextSwitches.Configure(sa) 840 | 841 | switches, err := perf.Open(sa, perf.AllThreads, cpu, nil) 842 | if err != nil { 843 | t.Fatal(err) 844 | } 845 | defer switches.Close() 846 | if err := switches.MapRing(); err != nil { 847 | t.Fatal(err) 848 | } 849 | 850 | if err := switches.Enable(); err != nil { 851 | t.Fatal(err) 852 | } 853 | 854 | // Run the ping-pong game. 855 | close(start) 856 | wg.Wait() 857 | 858 | intorecv, outofrecv := 0, 0 859 | intosend, outofsend := 0, 0 860 | intosched, outofsched := 0, 0 861 | 862 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) 863 | defer cancel() 864 | 865 | var rerr error 866 | 867 | for { 868 | sawinto := intorecv >= numpingpongs && intosend >= numpingpongs 869 | sawoutof := outofrecv >= numpingpongs && outofsend >= numpingpongs 870 | if sawinto && sawoutof { 871 | break 872 | } 873 | rec, err := switches.ReadRecord(ctx) 874 | if err != nil { 875 | rerr = err 876 | break 877 | } 878 | sr, ok := rec.(*perf.SwitchCPUWideRecord) 879 | if !ok { 880 | t.Errorf("got %T, want *perf.SwitchCPUWideRecord", rec) 881 | } 882 | switch int(sr.Tid) { 883 | case 0: 884 | if sr.Out() { 885 | outofsched++ 886 | } else { 887 | intosched++ 888 | } 889 | case recvtid: 890 | if sr.Out() { 891 | outofrecv++ 892 | } else { 893 | intorecv++ 894 | } 895 | case sendtid: 896 | if sr.Out() { 897 | outofsend++ 898 | } else { 899 | intosend++ 900 | } 901 | } 902 | } 903 | 904 | if rerr != nil { 905 | t.Fatal(err) 906 | } 907 | 908 | t.Logf("%d ping-pongs", numpingpongs) 909 | t.Logf("recv switches: %d in, %d out", intorecv, outofrecv) 910 | t.Logf("send switches: %d in, %d out", intosend, outofsend) 911 | t.Logf("scheduler switches: %d in, %d out", intosched, outofsched) 912 | } 913 | 914 | func testSampleGetpid(t *testing.T) { 915 | requires(t, paranoid(1), tracepointPMU, tracefs) 916 | 917 | ga := &perf.Attr{ 918 | SampleFormat: perf.SampleFormat{ 919 | Tid: true, 920 | }, 921 | } 922 | ga.SetSamplePeriod(1) 923 | ga.SetWakeupEvents(1) 924 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 925 | if err := gtp.Configure(ga); err != nil { 926 | t.Fatal(err) 927 | } 928 | 929 | runtime.LockOSThread() 930 | defer runtime.UnlockOSThread() 931 | 932 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 933 | if err != nil { 934 | t.Fatal(err) 935 | } 936 | defer getpid.Close() 937 | if err := getpid.MapRing(); err != nil { 938 | t.Fatal(err) 939 | } 940 | 941 | c, err := getpid.Measure(getpidTrigger) 942 | if err != nil { 943 | t.Fatal(err) 944 | } 945 | if c.Value != 1 { 946 | t.Fatalf("got %d hits for %q, want 1 hit", c.Value, c.Label) 947 | } 948 | 949 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond) 950 | defer cancel() 951 | rec, err := getpid.ReadRecord(ctx) 952 | if err != nil { 953 | t.Fatalf("got %v, want a valid sample record", err) 954 | } 955 | sr, ok := rec.(*perf.SampleRecord) 956 | if !ok { 957 | t.Fatalf("got a %T, want a SampleRecord", rec) 958 | } 959 | pid, tid := unix.Getpid(), unix.Gettid() 960 | if int(sr.Pid) != pid || int(sr.Tid) != tid { 961 | t.Fatalf("got pid=%d tid=%d, want pid=%d tid=%d", sr.Pid, sr.Tid, pid, tid) 962 | } 963 | } 964 | 965 | func testSampleGetpidConcurrent(t *testing.T) { 966 | requires(t, paranoid(1), tracepointPMU, tracefs) 967 | 968 | ga := &perf.Attr{ 969 | SampleFormat: perf.SampleFormat{ 970 | Tid: true, 971 | }, 972 | } 973 | ga.SetSamplePeriod(1) 974 | ga.SetWakeupEvents(1) 975 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 976 | if err := gtp.Configure(ga); err != nil { 977 | t.Fatal(err) 978 | } 979 | 980 | runtime.LockOSThread() 981 | defer runtime.UnlockOSThread() 982 | 983 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 984 | if err != nil { 985 | t.Fatal(err) 986 | } 987 | defer getpid.Close() 988 | if err := getpid.MapRing(); err != nil { 989 | t.Fatal(err) 990 | } 991 | 992 | const n = 6 993 | sawSample := make(chan bool) 994 | 995 | go func() { 996 | for i := 0; i < n; i++ { 997 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) 998 | defer cancel() 999 | rec, err := getpid.ReadRecord(ctx) 1000 | _, isSample := rec.(*perf.SampleRecord) 1001 | if err == nil && isSample { 1002 | sawSample <- true 1003 | } else { 1004 | sawSample <- false 1005 | } 1006 | } 1007 | }() 1008 | 1009 | seen := 0 1010 | 1011 | c, err := getpid.Measure(func() { 1012 | for i := 0; i < n; i++ { 1013 | getpidTrigger() 1014 | if ok := <-sawSample; ok { 1015 | seen++ 1016 | } 1017 | } 1018 | }) 1019 | if err != nil { 1020 | t.Fatal(err) 1021 | } 1022 | if c.Value != n { 1023 | t.Fatalf("got %d hits for %q, want %d", c.Value, c.Label, n) 1024 | } 1025 | if seen != n { 1026 | t.Fatalf("saw %d samples, want %d", seen, n) 1027 | } 1028 | } 1029 | 1030 | func testSampleTracepointStack(t *testing.T) { 1031 | requires(t, paranoid(1), tracepointPMU, tracefs) 1032 | 1033 | ga := &perf.Attr{ 1034 | Options: perf.Options{ 1035 | Disabled: true, 1036 | }, 1037 | SampleFormat: perf.SampleFormat{ 1038 | Tid: true, 1039 | Time: true, 1040 | CPU: true, 1041 | IP: true, 1042 | Callchain: true, 1043 | }, 1044 | } 1045 | ga.SetSamplePeriod(1) 1046 | ga.SetWakeupEvents(1) 1047 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 1048 | if err := gtp.Configure(ga); err != nil { 1049 | t.Fatal(err) 1050 | } 1051 | 1052 | runtime.LockOSThread() 1053 | defer runtime.UnlockOSThread() 1054 | 1055 | getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 1056 | if err != nil { 1057 | t.Fatal(err) 1058 | } 1059 | defer getpid.Close() 1060 | if err := getpid.MapRing(); err != nil { 1061 | t.Fatal(err) 1062 | } 1063 | 1064 | pcs := make([]uintptr, 10) 1065 | var n int 1066 | 1067 | c, err := getpid.Measure(func() { 1068 | n = runtime.Callers(2, pcs) 1069 | getpidTrigger() 1070 | }) 1071 | if err != nil { 1072 | t.Fatal(err) 1073 | } 1074 | if c.Value != 1 { 1075 | t.Fatalf("want 1 hit for %q, got %d", c.Label, c.Value) 1076 | } 1077 | 1078 | pcs = pcs[:n] 1079 | 1080 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) 1081 | defer cancel() 1082 | rec, err := getpid.ReadRecord(ctx) 1083 | if err != nil { 1084 | t.Fatal(err) 1085 | } 1086 | getpidsample, ok := rec.(*perf.SampleRecord) 1087 | if !ok { 1088 | t.Fatalf("got a %T, want a *SampleRecord", rec) 1089 | } 1090 | 1091 | i := len(pcs) - 1 1092 | j := len(getpidsample.Callchain) - 1 1093 | 1094 | for i >= 0 && j >= 0 { 1095 | gopc := pcs[i] 1096 | kpc := getpidsample.Callchain[j] 1097 | if gopc != uintptr(kpc) { 1098 | t.Fatalf("Go (%#x) and kernel (%#x) PC differ", gopc, kpc) 1099 | } 1100 | i-- 1101 | j-- 1102 | } 1103 | 1104 | logFrame := func(pc uintptr) { 1105 | fn := runtime.FuncForPC(pc) 1106 | if fn == nil { 1107 | t.Logf("%#x ", pc) 1108 | } else { 1109 | file, line := fn.FileLine(pc) 1110 | t.Logf("%#x %s:%d %s", pc, file, line, fn.Name()) 1111 | } 1112 | } 1113 | 1114 | t.Log("kernel callchain:") 1115 | for _, kpc := range getpidsample.Callchain { 1116 | logFrame(uintptr(kpc)) 1117 | } 1118 | 1119 | t.Log() 1120 | 1121 | t.Logf("Go stack:") 1122 | for _, gopc := range pcs { 1123 | logFrame(gopc) 1124 | } 1125 | } 1126 | 1127 | func testRedirectedOutput(t *testing.T) { 1128 | requires(t, paranoid(1), tracepointPMU, tracefs) 1129 | 1130 | ga := &perf.Attr{ 1131 | SampleFormat: perf.SampleFormat{ 1132 | Tid: true, 1133 | Time: true, 1134 | CPU: true, 1135 | Addr: true, 1136 | StreamID: true, 1137 | }, 1138 | CountFormat: perf.CountFormat{ 1139 | Group: true, 1140 | }, 1141 | Options: perf.Options{ 1142 | Disabled: true, 1143 | }, 1144 | } 1145 | ga.SetSamplePeriod(1) 1146 | ga.SetWakeupEvents(1) 1147 | gtp := perf.Tracepoint("syscalls", "sys_enter_getpid") 1148 | if err := gtp.Configure(ga); err != nil { 1149 | t.Fatalf("Configure: %v", err) 1150 | } 1151 | 1152 | runtime.LockOSThread() 1153 | defer runtime.UnlockOSThread() 1154 | 1155 | leader, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil) 1156 | if err != nil { 1157 | t.Fatal(err) 1158 | } 1159 | defer leader.Close() 1160 | if err := leader.MapRing(); err != nil { 1161 | t.Fatal(err) 1162 | } 1163 | 1164 | wa := &perf.Attr{ 1165 | SampleFormat: perf.SampleFormat{ 1166 | Tid: true, 1167 | Time: true, 1168 | CPU: true, 1169 | Addr: true, 1170 | StreamID: true, 1171 | }, 1172 | } 1173 | wa.SetSamplePeriod(1) 1174 | wa.SetWakeupEvents(1) 1175 | wtp := perf.Tracepoint("syscalls", "sys_enter_write") 1176 | if err := wtp.Configure(wa); err != nil { 1177 | t.Fatal(err) 1178 | } 1179 | 1180 | follower, err := perf.Open(wa, perf.CallingThread, perf.AnyCPU, leader) 1181 | if err != nil { 1182 | t.Fatal(err) 1183 | } 1184 | defer follower.Close() 1185 | if err := follower.SetOutput(leader); err != nil { 1186 | t.Fatal(err) 1187 | } 1188 | 1189 | errch := make(chan error) 1190 | go func() { 1191 | for i := 0; i < 2; i++ { 1192 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) 1193 | defer cancel() 1194 | _, err := leader.ReadRecord(ctx) 1195 | errch <- err 1196 | } 1197 | }() 1198 | 1199 | gc, err := leader.MeasureGroup(func() { 1200 | getpidTrigger() 1201 | writeTrigger() 1202 | }) 1203 | if err != nil { 1204 | t.Fatal(err) 1205 | } 1206 | 1207 | if got := gc.Values[0]; got.Value != 1 { 1208 | t.Fatalf("got %d hits for %q, want 1 hit", got.Value, got.Label) 1209 | } 1210 | if got := gc.Values[1]; got.Value != 1 { 1211 | t.Fatalf("got %d hits for %q, want 1 hit", got.Value, got.Label) 1212 | } 1213 | 1214 | for i := 0; i < 2; i++ { 1215 | select { 1216 | case <-time.After(10 * time.Millisecond): 1217 | t.Errorf("did not get sample record: timeout") 1218 | case err := <-errch: 1219 | if err != nil { 1220 | t.Fatalf("did not get sample record: %v", err) 1221 | } 1222 | } 1223 | } 1224 | } 1225 | 1226 | func evsig(fd int) { 1227 | val := uint64(1) 1228 | buf := (*[8]byte)(unsafe.Pointer(&val))[:] 1229 | unix.Write(fd, buf) 1230 | } 1231 | 1232 | func evwait(fd int) { 1233 | var val uint64 1234 | buf := (*[8]byte)(unsafe.Pointer(&val))[:] 1235 | unix.Read(fd, buf) 1236 | } 1237 | -------------------------------------------------------------------------------- /perf.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf 8 | 9 | import ( 10 | "bytes" 11 | "fmt" 12 | "io/ioutil" 13 | "os" 14 | "path/filepath" 15 | "strconv" 16 | "sync" 17 | "sync/atomic" 18 | "syscall" 19 | "time" 20 | "unsafe" 21 | 22 | "golang.org/x/sys/unix" 23 | ) 24 | 25 | // Special pid values for Open. 26 | const ( 27 | // CallingThread configures the event to measure the calling thread. 28 | CallingThread = 0 29 | 30 | // AllThreads configures the event to measure all threads on the 31 | // specified CPU. 32 | AllThreads = -1 33 | ) 34 | 35 | // AnyCPU configures the specified process/thread to be measured on any CPU. 36 | const AnyCPU = -1 37 | 38 | // Event states. 39 | const ( 40 | eventStateUninitialized = 0 41 | eventStateOK = 1 42 | eventStateClosed = 2 43 | ) 44 | 45 | // Event is an active perf event. 46 | type Event struct { 47 | // state is the state of the event. See eventState* constants. 48 | state int32 49 | 50 | // perffd is the perf event file descriptor. 51 | perffd int 52 | 53 | // id is the unique event ID. 54 | id uint64 55 | 56 | // group contains other events in the event group, if this event is 57 | // an event group leader. The order is the order in which the events 58 | // were added to the group. 59 | group []*Event 60 | 61 | // groupByID maps group event IDs to the events themselves. The 62 | // reason why this mapping is needed is explained in ReadRecord. 63 | groupByID map[uint64]*Event 64 | 65 | // owned contains other events in the event group, which the caller 66 | // has no access to. The Event owns them all, Close closes them all. 67 | owned []*Event 68 | 69 | // a is the set of attributes the Event was configured with. It is 70 | // a clone of the original, save for the Label field, which may have 71 | // been set, if the original *Attr didn't set it. 72 | a *Attr 73 | 74 | // noReadRecord is true if ReadRecord is disabled for the event. 75 | // See SetOutput and ReadRecord. 76 | noReadRecord bool 77 | 78 | // ring is the (entire) memory mapped ring buffer. 79 | ring []byte 80 | 81 | // ringdata is the data region of the ring buffer. 82 | ringdata []byte 83 | 84 | // meta is the metadata page: &ring[0]. 85 | meta *unix.PerfEventMmapPage 86 | 87 | // wakeupfd is an event file descriptor (see eventfd(2)). It is used to 88 | // unblock calls to ReadRawRecord when the associated context expires. 89 | wakeupfd int 90 | 91 | // pollreq communicates requests from ReadRawRecord to the poll goroutine 92 | // associated with the ring. 93 | pollreq chan pollreq 94 | 95 | // pollresp receives responses from the poll goroutine associated 96 | // with the ring, back to ReadRawRecord. 97 | pollresp chan pollresp 98 | 99 | // recordBuffer is used as storage for records returned by ReadRecord 100 | // and ReadRawRecord. This means memory for records returned from those 101 | // methods will be overwritten by successive calls. 102 | recordBuffer []byte 103 | } 104 | 105 | // Open opens the event configured by attr. 106 | // 107 | // The pid and cpu parameters specify which thread and CPU to monitor: 108 | // 109 | // * if pid == CallingThread and cpu == AnyCPU, the event measures 110 | // the calling thread on any CPU 111 | // 112 | // * if pid == CallingThread and cpu >= 0, the event measures 113 | // the calling thread only when running on the specified CPU 114 | // 115 | // * if pid > 0 and cpu == AnyCPU, the event measures the specified 116 | // thread on any CPU 117 | // 118 | // * if pid > 0 and cpu >= 0, the event measures the specified thread 119 | // only when running on the specified CPU 120 | // 121 | // * if pid == AllThreads and cpu >= 0, the event measures all threads 122 | // on the specified CPU 123 | // 124 | // * finally, the pid == AllThreads and cpu == AnyCPU setting is invalid 125 | // 126 | // If group is non-nil, the returned Event is made part of the group 127 | // associated with the specified group Event. 128 | func Open(a *Attr, pid, cpu int, group *Event) (*Event, error) { 129 | return open(a, pid, cpu, group, 0) 130 | } 131 | 132 | // OpenWithFlags is like Open but allows to specify additional flags to be 133 | // passed to perf_event_open(2). 134 | func OpenWithFlags(a *Attr, pid, cpu int, group *Event, flags int) (*Event, error) { 135 | return open(a, pid, cpu, group, flags) 136 | } 137 | 138 | // OpenCGroup is like Open, but activates per-container system-wide 139 | // monitoring. If cgroupfs is mounted on /dev/cgroup, and the group to 140 | // monitor is called "test", then cgroupfd must be a file descriptor opened 141 | // on /dev/cgroup/test. 142 | func OpenCGroup(a *Attr, cgroupfd, cpu int, group *Event) (*Event, error) { 143 | return open(a, cgroupfd, cpu, group, unix.PERF_FLAG_PID_CGROUP) 144 | } 145 | 146 | func open(a *Attr, pid, cpu int, group *Event, flags int) (*Event, error) { 147 | groupfd := -1 148 | if group != nil { 149 | if err := group.ok(); err != nil { 150 | return nil, err 151 | } 152 | groupfd = group.perffd 153 | } 154 | 155 | fd, err := perfEventOpen(a, pid, cpu, groupfd, flags) 156 | if err != nil { 157 | return nil, os.NewSyscallError("perf_event_open", err) 158 | } 159 | if err := unix.SetNonblock(fd, true); err != nil { 160 | unix.Close(fd) 161 | return nil, os.NewSyscallError("setnonblock", err) 162 | } 163 | 164 | // Clone the *Attr so the caller can't change it from under our feet. 165 | 166 | ac := new(Attr) 167 | *ac = *a // ok to copy since no slices 168 | if ac.Label == "" { 169 | evID := eventID{ 170 | Type: uint64(a.Type), 171 | Config: uint64(a.Config), 172 | } 173 | ac.Label = lookupLabel(evID).Name 174 | } 175 | 176 | ev := &Event{ 177 | state: eventStateOK, 178 | perffd: fd, 179 | a: ac, 180 | } 181 | id, err := ev.ID() 182 | if err != nil { 183 | return nil, err 184 | } 185 | ev.id = id 186 | if group != nil { 187 | if group.groupByID == nil { 188 | group.groupByID = map[uint64]*Event{} 189 | } 190 | group.group = append(group.group, ev) 191 | group.groupByID[id] = ev 192 | } 193 | 194 | return ev, nil 195 | } 196 | 197 | // perfEventOpen wraps the perf_event_open system call with some additional 198 | // logic around ensuring that file descriptors are marked close-on-exec. 199 | func perfEventOpen(a *Attr, pid, cpu, groupfd, flags int) (fd int, err error) { 200 | sysAttr := a.sysAttr() 201 | cloexecFlags := flags | unix.PERF_FLAG_FD_CLOEXEC 202 | 203 | fd, err = unix.PerfEventOpen(sysAttr, pid, cpu, groupfd, cloexecFlags) 204 | switch err { 205 | case nil: 206 | return fd, nil 207 | case unix.EINVAL: 208 | // PERF_FLAG_FD_CLOEXEC is only available in Linux 3.14 209 | // and up, or in older kernels patched by distributions 210 | // with backported perf updates. If we got EINVAL, try again 211 | // without the flag, while holding syscall.ForkLock, following 212 | // the standard library pattern in net/sock_cloexec.go. 213 | syscall.ForkLock.RLock() 214 | defer syscall.ForkLock.RUnlock() 215 | 216 | fd, err = unix.PerfEventOpen(sysAttr, pid, cpu, groupfd, flags) 217 | if err == nil { 218 | unix.CloseOnExec(fd) 219 | } 220 | return fd, err 221 | default: 222 | return -1, err 223 | } 224 | } 225 | 226 | // DefaultNumPages is the number of pages used by MapRing. There is no 227 | // fundamental logic to this number. We use it because that is what the perf 228 | // tool does. 229 | const DefaultNumPages = 128 230 | 231 | // MapRing maps the ring buffer attached to the event into memory. 232 | // 233 | // This enables reading records via ReadRecord / ReadRawRecord. 234 | func (ev *Event) MapRing() error { 235 | return ev.MapRingNumPages(DefaultNumPages) 236 | } 237 | 238 | // MapRingNumPages is like MapRing, but allows the caller to The size of 239 | // the data portion of the ring is num pages. The total size of the ring 240 | // is num+1 pages, because an additional metadata page is mapped before the 241 | // data portion of the ring. 242 | func (ev *Event) MapRingNumPages(num int) error { 243 | if err := ev.ok(); err != nil { 244 | return err 245 | } 246 | if ev.ring != nil { 247 | return nil 248 | } 249 | 250 | pgSize := unix.Getpagesize() 251 | size := (1 + num) * pgSize 252 | const prot = unix.PROT_READ | unix.PROT_WRITE 253 | const flags = unix.MAP_SHARED 254 | ring, err := unix.Mmap(ev.perffd, 0, size, prot, flags) 255 | if err != nil { 256 | return os.NewSyscallError("mmap", err) 257 | } 258 | 259 | meta := (*unix.PerfEventMmapPage)(unsafe.Pointer(&ring[0])) 260 | 261 | // Some systems do not fill in the data_offset and data_size fields 262 | // of the metadata page correctly: Centos 6.9 and Debian 8 have been 263 | // observed to do this. Try to detect this condition, and adjust 264 | // the values accordingly. 265 | if meta.Data_offset == 0 && meta.Data_size == 0 { 266 | atomic.StoreUint64(&meta.Data_offset, uint64(pgSize)) 267 | atomic.StoreUint64(&meta.Data_size, uint64(num*pgSize)) 268 | } 269 | 270 | ringdata := ring[meta.Data_offset:] 271 | 272 | wakeupfd, err := unix.Eventfd(0, unix.EFD_CLOEXEC|unix.EFD_NONBLOCK) 273 | if err != nil { 274 | return os.NewSyscallError("eventfd", err) 275 | } 276 | 277 | ev.ring = ring 278 | ev.meta = meta 279 | ev.ringdata = ringdata 280 | ev.wakeupfd = wakeupfd 281 | ev.pollreq = make(chan pollreq) 282 | ev.pollresp = make(chan pollresp) 283 | 284 | go ev.poll() 285 | 286 | return nil 287 | } 288 | 289 | func (ev *Event) ok() error { 290 | if ev == nil { 291 | return os.ErrInvalid 292 | } 293 | 294 | switch ev.state { 295 | case eventStateUninitialized: 296 | return os.ErrInvalid 297 | case eventStateOK: 298 | return nil 299 | default: // eventStateClosed 300 | return os.ErrClosed 301 | } 302 | } 303 | 304 | // FD returns the file descriptor associated with the event. 305 | func (ev *Event) FD() (int, error) { 306 | if err := ev.ok(); err != nil { 307 | return -1, err 308 | } 309 | return ev.perffd, nil 310 | } 311 | 312 | // Measure disables the event, resets it, enables it, runs f, disables it again, 313 | // then reads the Count associated with the event. 314 | func (ev *Event) Measure(f func()) (Count, error) { 315 | if err := ev.Disable(); err != nil { 316 | return Count{}, err 317 | } 318 | if err := ev.Reset(); err != nil { 319 | return Count{}, err 320 | } 321 | if err := ev.Enable(); err != nil { 322 | return Count{}, err 323 | } 324 | 325 | f() 326 | 327 | if err := ev.Disable(); err != nil { 328 | return Count{}, err 329 | } 330 | return ev.ReadCount() 331 | } 332 | 333 | // MeasureGroup is like Measure, but for event groups. 334 | func (ev *Event) MeasureGroup(f func()) (GroupCount, error) { 335 | if err := ev.Disable(); err != nil { 336 | return GroupCount{}, err 337 | } 338 | if err := ev.Reset(); err != nil { 339 | return GroupCount{}, err 340 | } 341 | if err := ev.Enable(); err != nil { 342 | return GroupCount{}, err 343 | } 344 | 345 | f() 346 | 347 | if err := ev.Disable(); err != nil { 348 | return GroupCount{}, err 349 | } 350 | return ev.ReadGroupCount() 351 | } 352 | 353 | // Enable enables the event. 354 | func (ev *Event) Enable() error { 355 | if err := ev.ok(); err != nil { 356 | return err 357 | } 358 | err := ev.ioctlNoArg(unix.PERF_EVENT_IOC_ENABLE) 359 | return wrapIoctlError("PERF_EVENT_IOC_ENABLE", err) 360 | } 361 | 362 | // Disable disables the event. If ev is a group leader, Disable disables 363 | // all events in the group. 364 | func (ev *Event) Disable() error { 365 | if err := ev.ok(); err != nil { 366 | return err 367 | } 368 | err := ev.ioctlInt(unix.PERF_EVENT_IOC_DISABLE, 0) 369 | return wrapIoctlError("PERF_EVENT_IOC_DISABLE", err) 370 | } 371 | 372 | // TODO(acln): add support for PERF_IOC_FLAG_GROUP and for event followers 373 | // to disable the entire group? 374 | 375 | // Refresh adds delta to a counter associated with the event. This counter 376 | // decrements every time the event overflows. Once the counter reaches zero, 377 | // the event is disabled. Calling Refresh with delta == 0 is considered 378 | // undefined behavior. 379 | func (ev *Event) Refresh(delta int) error { 380 | if err := ev.ok(); err != nil { 381 | return err 382 | } 383 | err := ev.ioctlInt(unix.PERF_EVENT_IOC_REFRESH, uintptr(delta)) 384 | return wrapIoctlError("PERF_EVENT_IOC_REFRESH", err) 385 | } 386 | 387 | // Reset resets the counters associated with the event. 388 | func (ev *Event) Reset() error { 389 | if err := ev.ok(); err != nil { 390 | return err 391 | } 392 | err := ev.ioctlNoArg(unix.PERF_EVENT_IOC_RESET) 393 | return wrapIoctlError("PERF_EVENT_IOC_RESET", err) 394 | } 395 | 396 | // UpdatePeriod updates the overflow period for the event. On older kernels, 397 | // the new period does not take effect until after the next overflow. 398 | func (ev *Event) UpdatePeriod(p uint64) error { 399 | if err := ev.ok(); err != nil { 400 | return err 401 | } 402 | err := ev.ioctlPointer(unix.PERF_EVENT_IOC_PERIOD, unsafe.Pointer(&p)) 403 | return wrapIoctlError("PERF_EVENT_IOC_PERIOD", err) 404 | } 405 | 406 | // SetOutput tells the kernel to send records to the specified 407 | // target Event rather than ev. 408 | // 409 | // If target is nil, output from ev is ignored. 410 | // 411 | // Some restrictions apply: 412 | // 413 | // 1) Calling SetOutput on an *Event will fail with EINVAL if MapRing was 414 | // called on that event previously. 2) If ev and target are not CPU-wide 415 | // events, they must be on the same CPU. 3) If ev and target are CPU-wide 416 | // events, they must refer to the same task. 4) ev and target must use the 417 | // same clock. 418 | // 419 | // An additional restriction of the Go API also applies: 420 | // 421 | // In order to use ReadRecord on the target Event, the following settings on 422 | // ev and target must match: Options.SampleIDAll, SampleFormat.Identifier, 423 | // SampleFormat.IP, SampleFormat.Tid, SampleFormat.Time, SampleFormat.Addr, 424 | // SampleFormat.ID, SampleFormat.StreamID. Furthermore, SampleFormat.StreamID 425 | // must be set. SetOutput nevertheless succeeds even if this condition is 426 | // not met, because callers can still use ReadRawRecord instead of ReadRecord. 427 | func (ev *Event) SetOutput(target *Event) error { 428 | if err := ev.ok(); err != nil { 429 | return err 430 | } 431 | var targetfd int 432 | if target == nil { 433 | targetfd = -1 434 | } else { 435 | if err := target.ok(); err != nil { 436 | return err 437 | } 438 | if !target.canReadRecordFrom(ev) { 439 | target.noReadRecord = true 440 | } 441 | targetfd = target.perffd 442 | } 443 | err := ev.ioctlInt(unix.PERF_EVENT_IOC_SET_OUTPUT, uintptr(targetfd)) 444 | return wrapIoctlError("PERF_EVENT_IOC_SET_OUTPUT", err) 445 | } 446 | 447 | // canReadRecordFrom returns a boolean indicating whether ev, as a leader, 448 | // can read records produced by f, a follower. 449 | func (ev *Event) canReadRecordFrom(f *Event) bool { 450 | lf := ev.a.SampleFormat 451 | ff := f.a.SampleFormat 452 | 453 | return lf.Identifier == ff.Identifier && 454 | lf.IP == ff.IP && 455 | lf.Tid == ff.Tid && 456 | lf.Time == ff.Time && 457 | lf.Addr == ff.Addr && 458 | lf.ID == ff.ID && 459 | lf.StreamID == ff.StreamID && 460 | ff.StreamID 461 | } 462 | 463 | // BUG(acln): PERF_EVENT_IOC_SET_FILTER is not implemented 464 | 465 | // ID returns the unique event ID value for ev. 466 | func (ev *Event) ID() (uint64, error) { 467 | if err := ev.ok(); err != nil { 468 | return 0, err 469 | } 470 | var val uint64 471 | err := ev.ioctlPointer(unix.PERF_EVENT_IOC_ID, unsafe.Pointer(&val)) 472 | return val, wrapIoctlError("PERF_EVENT_IOC_ID", err) 473 | } 474 | 475 | // SetBPF attaches a BPF program to ev, which must be a kprobe tracepoint 476 | // event. progfd is the file descriptor associated with the BPF program. 477 | func (ev *Event) SetBPF(progfd uint32) error { 478 | if err := ev.ok(); err != nil { 479 | return err 480 | } 481 | err := ev.ioctlInt(unix.PERF_EVENT_IOC_SET_BPF, uintptr(progfd)) 482 | return wrapIoctlError("PERF_EVENT_IOC_SET_BPF", err) 483 | } 484 | 485 | // PauseOutput pauses the output from ev. 486 | func (ev *Event) PauseOutput() error { 487 | if err := ev.ok(); err != nil { 488 | return err 489 | } 490 | err := ev.ioctlInt(unix.PERF_EVENT_IOC_PAUSE_OUTPUT, 1) 491 | return wrapIoctlError("PEF_EVENT_IOC_PAUSE_OUTPUT", err) 492 | } 493 | 494 | // ResumeOutput resumes output from ev. 495 | func (ev *Event) ResumeOutput() error { 496 | if err := ev.ok(); err != nil { 497 | return err 498 | } 499 | err := ev.ioctlInt(unix.PERF_EVENT_IOC_PAUSE_OUTPUT, 0) 500 | return wrapIoctlError("PEF_EVENT_IOC_PAUSE_OUTPUT", err) 501 | } 502 | 503 | // QueryBPF queries the event for BPF program file descriptors attached to 504 | // the same tracepoint as ev. max is the maximum number of file descriptors 505 | // to return. 506 | func (ev *Event) QueryBPF(max uint32) ([]uint32, error) { 507 | if err := ev.ok(); err != nil { 508 | return nil, err 509 | } 510 | buf := make([]uint32, 2+max) 511 | buf[0] = max 512 | err := ev.ioctlPointer(unix.PERF_EVENT_IOC_QUERY_BPF, unsafe.Pointer(&buf[0])) 513 | if err != nil { 514 | return nil, wrapIoctlError("PERF_EVENT_IOC_QUERY_BPF", err) 515 | } 516 | count := buf[1] 517 | fds := make([]uint32, count) 518 | copy(fds, buf[2:2+count]) 519 | return fds, nil 520 | } 521 | 522 | // BUG(acln): PERF_EVENT_IOC_MODIFY_ATTRIBUTES is not implemented 523 | 524 | func (ev *Event) ioctlNoArg(number int64) error { 525 | return ev.ioctlInt(number, 0) 526 | } 527 | 528 | func (ev *Event) ioctlInt(number int64, arg uintptr) error { 529 | _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(ev.perffd), uintptr(number), arg) 530 | if e != 0 { 531 | return e 532 | } 533 | return nil 534 | } 535 | 536 | func (ev *Event) ioctlPointer(number uintptr, arg unsafe.Pointer) error { 537 | _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(ev.perffd), number, uintptr(arg)) 538 | if e != 0 { 539 | return e 540 | } 541 | return nil 542 | } 543 | 544 | func wrapIoctlError(ioctl string, err error) error { 545 | if err == nil { 546 | return nil 547 | } 548 | return &ioctlError{ioctl: ioctl, err: err} 549 | } 550 | 551 | type ioctlError struct { 552 | ioctl string 553 | err error 554 | } 555 | 556 | func (e *ioctlError) Error() string { 557 | return fmt.Sprintf("%s: %v", e.ioctl, e.err) 558 | } 559 | 560 | func (e *ioctlError) Unwrap() error { return e.err } 561 | 562 | // Close closes the event. Close must not be called concurrently with any 563 | // other methods on the Event. 564 | func (ev *Event) Close() error { 565 | if ev.ring != nil { 566 | close(ev.pollreq) 567 | <-ev.pollresp 568 | unix.Munmap(ev.ring) 569 | unix.Close(ev.wakeupfd) 570 | } 571 | 572 | for _, ev := range ev.owned { 573 | ev.Close() 574 | } 575 | 576 | ev.state = eventStateClosed 577 | return unix.Close(ev.perffd) 578 | } 579 | 580 | // Attr configures a perf event. 581 | type Attr struct { 582 | // Label is a human readable label associated with the event. 583 | // For convenience, the Label is included in Count and GroupCount 584 | // measurements read from events. 585 | // 586 | // When an event is opened, if Label is the empty string, then a 587 | // Label is computed (if possible) based on the Type and Config 588 | // fields. Otherwise, if the Label user-defined (not the empty 589 | // string), it is included verbatim. 590 | // 591 | // For most events, the computed Label matches the label specified by 592 | // ``perf list'' for the same event (but see Bugs). 593 | Label string 594 | 595 | // Type is the major type of the event. 596 | Type EventType 597 | 598 | // Config is the type-specific event configuration. 599 | Config uint64 600 | 601 | // Sample configures the sample period or sample frequency for 602 | // overflow packets, based on Options.Freq: if Options.Freq is set, 603 | // Sample is interpreted as "sample frequency", otherwise it is 604 | // interpreted as "sample period". 605 | // 606 | // See also SetSample{Period,Freq}. 607 | Sample uint64 608 | 609 | // SampleFormat configures information requested in sample records, 610 | // on the memory mapped ring buffer. 611 | SampleFormat SampleFormat 612 | 613 | // CountFormat specifies the format of counts read from the 614 | // Event using ReadCount or ReadGroupCount. See the CountFormat 615 | // documentation for more details. 616 | CountFormat CountFormat 617 | 618 | // Options contains more fine grained event configuration. 619 | Options Options 620 | 621 | // Wakeup configures wakeups on the ring buffer associated with the 622 | // event. If Options.Watermark is set, Wakeup is interpreted as the 623 | // number of bytes before wakeup. Otherwise, it is interpreted as 624 | // "wake up every N events". 625 | // 626 | // See also SetWakeup{Events,Watermark}. 627 | Wakeup uint32 628 | 629 | // BreakpointType is the breakpoint type, if Type == BreakpointEvent. 630 | BreakpointType uint32 631 | 632 | // Config1 is used for events that need an extra register or otherwise 633 | // do not fit in the regular config field. 634 | // 635 | // For breakpoint events, Config1 is the breakpoint address. 636 | // For kprobes, it is the kprobe function. For uprobes, it is the 637 | // uprobe path. 638 | Config1 uint64 639 | 640 | // Config2 is a further extension of the Config1 field. 641 | // 642 | // For breakpoint events, it is the length of the breakpoint. 643 | // For kprobes, when the kprobe function is NULL, it is the address of 644 | // the kprobe. For both kprobes and uprobes, it is the probe offset. 645 | Config2 uint64 646 | 647 | // BranchSampleFormat specifies what branches to include in the 648 | // branch record, if SampleFormat.BranchStack is set. 649 | BranchSampleFormat BranchSampleFormat 650 | 651 | // SampleRegistersUser is the set of user registers to dump on samples. 652 | SampleRegistersUser uint64 653 | 654 | // SampleStackUser is the size of the user stack to dump on samples. 655 | SampleStackUser uint32 656 | 657 | // ClockID is the clock ID to use with samples, if Options.UseClockID 658 | // is set. 659 | // 660 | // TODO(acln): What are the values for this? CLOCK_MONOTONIC and such? 661 | // Investigate. Can we choose a clock that can be compared to Go's 662 | // clock in a meaningful way? If so, should we add special support 663 | // for that? 664 | ClockID int32 665 | 666 | // SampleRegistersIntr is the set of register to dump for each sample. 667 | // See asm/perf_regs.h for details. 668 | SampleRegistersIntr uint64 669 | 670 | // AuxWatermark is the watermark for the aux area. 671 | AuxWatermark uint32 672 | 673 | // SampleMaxStack is the maximum number of frame pointers in a 674 | // callchain. The value must be < MaxStack(). 675 | SampleMaxStack uint16 676 | } 677 | 678 | func (a Attr) sysAttr() *unix.PerfEventAttr { 679 | return &unix.PerfEventAttr{ 680 | Type: uint32(a.Type), 681 | Size: uint32(unsafe.Sizeof(unix.PerfEventAttr{})), 682 | Config: a.Config, 683 | Sample: a.Sample, 684 | Sample_type: a.SampleFormat.marshal(), 685 | Read_format: a.CountFormat.marshal(), 686 | Bits: a.Options.marshal(), 687 | Wakeup: a.Wakeup, 688 | Bp_type: a.BreakpointType, 689 | Ext1: a.Config1, 690 | Ext2: a.Config2, 691 | Branch_sample_type: a.BranchSampleFormat.marshal(), 692 | Sample_regs_user: a.SampleRegistersUser, 693 | Sample_stack_user: a.SampleStackUser, 694 | Clockid: a.ClockID, 695 | Sample_regs_intr: a.SampleRegistersIntr, 696 | Aux_watermark: a.AuxWatermark, 697 | Sample_max_stack: a.SampleMaxStack, 698 | } 699 | } 700 | 701 | // Configure implements the Configurator interface. It overwrites target 702 | // with a. See also (*Group).Add. 703 | func (a *Attr) Configure(target *Attr) error { 704 | *target = *a 705 | return nil 706 | } 707 | 708 | // SetSamplePeriod configures the sampling period for the event. 709 | // 710 | // It sets attr.Sample to p and disables a.Options.Freq. 711 | func (a *Attr) SetSamplePeriod(p uint64) { 712 | a.Sample = p 713 | a.Options.Freq = false 714 | } 715 | 716 | // SetSampleFreq configures the sampling frequency for the event. 717 | // 718 | // It sets attr.Sample to f and enables a.Options.Freq. 719 | func (a *Attr) SetSampleFreq(f uint64) { 720 | a.Sample = f 721 | a.Options.Freq = true 722 | } 723 | 724 | // SetWakeupEvents configures the event to wake up every n events. 725 | // 726 | // It sets a.Wakeup to n and disables a.Options.Watermark. 727 | func (a *Attr) SetWakeupEvents(n uint32) { 728 | a.Wakeup = n 729 | a.Options.Watermark = false 730 | } 731 | 732 | // SetWakeupWatermark configures the number of bytes in overflow records 733 | // before wakeup. 734 | // 735 | // It sets a.Wakeup to n and enables a.Options.Watermark. 736 | func (a *Attr) SetWakeupWatermark(n uint32) { 737 | a.Wakeup = n 738 | a.Options.Watermark = true 739 | } 740 | 741 | // LookupEventType probes /sys/bus/event_source/devices//type 742 | // for the EventType value associated with the specified PMU. 743 | func LookupEventType(pmu string) (EventType, error) { 744 | path := filepath.Join("/sys/bus/event_source/devices", pmu, "type") 745 | et, err := readUint(path, 32) 746 | return EventType(et), err 747 | } 748 | 749 | // EventType is the overall type of a performance event. 750 | type EventType uint32 751 | 752 | // Supported event types. 753 | const ( 754 | HardwareEvent EventType = unix.PERF_TYPE_HARDWARE 755 | SoftwareEvent EventType = unix.PERF_TYPE_SOFTWARE 756 | TracepointEvent EventType = unix.PERF_TYPE_TRACEPOINT 757 | HardwareCacheEvent EventType = unix.PERF_TYPE_HW_CACHE 758 | RawEvent EventType = unix.PERF_TYPE_RAW 759 | BreakpointEvent EventType = unix.PERF_TYPE_BREAKPOINT 760 | ) 761 | 762 | // HardwareCounter is a hardware performance counter. 763 | type HardwareCounter uint64 764 | 765 | // Hardware performance counters. 766 | const ( 767 | CPUCycles HardwareCounter = unix.PERF_COUNT_HW_CPU_CYCLES 768 | Instructions HardwareCounter = unix.PERF_COUNT_HW_INSTRUCTIONS 769 | CacheReferences HardwareCounter = unix.PERF_COUNT_HW_CACHE_REFERENCES 770 | CacheMisses HardwareCounter = unix.PERF_COUNT_HW_CACHE_MISSES 771 | BranchInstructions HardwareCounter = unix.PERF_COUNT_HW_BRANCH_INSTRUCTIONS 772 | BranchMisses HardwareCounter = unix.PERF_COUNT_HW_BRANCH_MISSES 773 | BusCycles HardwareCounter = unix.PERF_COUNT_HW_BUS_CYCLES 774 | StalledCyclesFrontend HardwareCounter = unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 775 | StalledCyclesBackend HardwareCounter = unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND 776 | RefCPUCycles HardwareCounter = unix.PERF_COUNT_HW_REF_CPU_CYCLES 777 | ) 778 | 779 | var hardwareLabels = map[HardwareCounter]eventLabel{ 780 | CPUCycles: {Name: "cpu-cycles", Alias: "cycles"}, 781 | Instructions: {Name: "instructions"}, 782 | CacheReferences: {Name: "cache-references"}, 783 | CacheMisses: {Name: "cache-misses"}, 784 | BranchInstructions: {Name: "branch-instructions", Alias: "branches"}, 785 | BranchMisses: {Name: "branch-misses", Alias: "branch-misses"}, 786 | BusCycles: {Name: "bus-cycles"}, 787 | StalledCyclesFrontend: {Name: "stalled-cycles-frontend", Alias: "idle-cycles-frontend"}, 788 | StalledCyclesBackend: {Name: "stalled-cycles-backend", Alias: "idle-cycles-backend"}, 789 | RefCPUCycles: {Name: "ref-cycles"}, 790 | } 791 | 792 | func (hwc HardwareCounter) String() string { 793 | return hwc.eventLabel().Name 794 | } 795 | 796 | func (hwc HardwareCounter) eventLabel() eventLabel { 797 | return hardwareLabels[hwc] 798 | } 799 | 800 | // Configure configures attr to measure hwc. It sets the Label, Type, and 801 | // Config fields on attr. 802 | func (hwc HardwareCounter) Configure(attr *Attr) error { 803 | attr.Label = hwc.String() 804 | attr.Type = HardwareEvent 805 | attr.Config = uint64(hwc) 806 | return nil 807 | } 808 | 809 | // AllHardwareCounters returns a slice of all known hardware counters. 810 | func AllHardwareCounters() []Configurator { 811 | return []Configurator{ 812 | CPUCycles, 813 | Instructions, 814 | CacheReferences, 815 | CacheMisses, 816 | BranchInstructions, 817 | BranchMisses, 818 | BusCycles, 819 | StalledCyclesFrontend, 820 | StalledCyclesBackend, 821 | RefCPUCycles, 822 | } 823 | } 824 | 825 | // SoftwareCounter is a software performance counter. 826 | type SoftwareCounter uint64 827 | 828 | // Software performance counters. 829 | const ( 830 | CPUClock SoftwareCounter = unix.PERF_COUNT_SW_CPU_CLOCK 831 | TaskClock SoftwareCounter = unix.PERF_COUNT_SW_TASK_CLOCK 832 | PageFaults SoftwareCounter = unix.PERF_COUNT_SW_PAGE_FAULTS 833 | ContextSwitches SoftwareCounter = unix.PERF_COUNT_SW_CONTEXT_SWITCHES 834 | CPUMigrations SoftwareCounter = unix.PERF_COUNT_SW_CPU_MIGRATIONS 835 | MinorPageFaults SoftwareCounter = unix.PERF_COUNT_SW_PAGE_FAULTS_MIN 836 | MajorPageFaults SoftwareCounter = unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ 837 | AlignmentFaults SoftwareCounter = unix.PERF_COUNT_SW_ALIGNMENT_FAULTS 838 | EmulationFaults SoftwareCounter = unix.PERF_COUNT_SW_EMULATION_FAULTS 839 | Dummy SoftwareCounter = unix.PERF_COUNT_SW_DUMMY 840 | BPFOutput SoftwareCounter = unix.PERF_COUNT_SW_BPF_OUTPUT 841 | ) 842 | 843 | var softwareLabels = map[SoftwareCounter]eventLabel{ 844 | CPUClock: {Name: "cpu-clock"}, 845 | TaskClock: {Name: "task-clock"}, 846 | PageFaults: {Name: "page-faults", Alias: "faults"}, 847 | ContextSwitches: {Name: "context-switches", Alias: "cs"}, 848 | CPUMigrations: {Name: "cpu-migrations", Alias: "migrations"}, 849 | MinorPageFaults: {Name: "minor-faults"}, 850 | MajorPageFaults: {Name: "major-faults"}, 851 | AlignmentFaults: {Name: "alignment-faults"}, 852 | EmulationFaults: {Name: "emulation-faults"}, 853 | Dummy: {Name: "dummy"}, 854 | BPFOutput: {Name: "bpf-output"}, 855 | } 856 | 857 | func (swc SoftwareCounter) String() string { 858 | return swc.eventLabel().Name 859 | } 860 | 861 | func (swc SoftwareCounter) eventLabel() eventLabel { 862 | return softwareLabels[swc] 863 | } 864 | 865 | // Configure configures attr to measure swc. It sets attr.Type and attr.Config. 866 | func (swc SoftwareCounter) Configure(attr *Attr) error { 867 | attr.Label = swc.eventLabel().Name 868 | attr.Type = SoftwareEvent 869 | attr.Config = uint64(swc) 870 | return nil 871 | } 872 | 873 | // AllSoftwareCounters returns a slice of all known software counters. 874 | func AllSoftwareCounters() []Configurator { 875 | return []Configurator{ 876 | CPUClock, 877 | TaskClock, 878 | PageFaults, 879 | ContextSwitches, 880 | CPUMigrations, 881 | MinorPageFaults, 882 | MajorPageFaults, 883 | AlignmentFaults, 884 | EmulationFaults, 885 | Dummy, 886 | BPFOutput, 887 | } 888 | } 889 | 890 | // Cache identifies a cache. 891 | type Cache uint64 892 | 893 | // Caches. 894 | const ( 895 | L1D Cache = unix.PERF_COUNT_HW_CACHE_L1D 896 | L1I Cache = unix.PERF_COUNT_HW_CACHE_L1I 897 | LL Cache = unix.PERF_COUNT_HW_CACHE_LL 898 | DTLB Cache = unix.PERF_COUNT_HW_CACHE_DTLB 899 | ITLB Cache = unix.PERF_COUNT_HW_CACHE_ITLB 900 | BPU Cache = unix.PERF_COUNT_HW_CACHE_BPU 901 | NODE Cache = unix.PERF_COUNT_HW_CACHE_NODE 902 | ) 903 | 904 | // AllCaches returns a slice of all known cache types. 905 | func AllCaches() []Cache { 906 | return []Cache{L1D, L1I, LL, DTLB, ITLB, BPU, NODE} 907 | } 908 | 909 | // CacheOp is a cache operation. 910 | type CacheOp uint64 911 | 912 | // Cache operations. 913 | const ( 914 | Read CacheOp = unix.PERF_COUNT_HW_CACHE_OP_READ 915 | Write CacheOp = unix.PERF_COUNT_HW_CACHE_OP_WRITE 916 | Prefetch CacheOp = unix.PERF_COUNT_HW_CACHE_OP_PREFETCH 917 | ) 918 | 919 | // AllCacheOps returns a slice of all known cache operations. 920 | func AllCacheOps() []CacheOp { 921 | return []CacheOp{Read, Write, Prefetch} 922 | } 923 | 924 | // CacheOpResult is the result of a cache operation. 925 | type CacheOpResult uint64 926 | 927 | // Cache operation results. 928 | const ( 929 | Access CacheOpResult = unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS 930 | Miss CacheOpResult = unix.PERF_COUNT_HW_CACHE_RESULT_MISS 931 | ) 932 | 933 | // AllCacheOpResults returns a slice of all known cache operation results. 934 | func AllCacheOpResults() []CacheOpResult { 935 | return []CacheOpResult{Access, Miss} 936 | } 937 | 938 | // A HardwareCacheCounter groups a cache, a cache operation, and an operation 939 | // result. It measures the number of results for the specified op, on the 940 | // specified cache. 941 | type HardwareCacheCounter struct { 942 | Cache Cache 943 | Op CacheOp 944 | Result CacheOpResult 945 | } 946 | 947 | // Configure configures attr to measure hwcc. It sets attr.Type and attr.Config. 948 | func (hwcc HardwareCacheCounter) Configure(attr *Attr) error { 949 | attr.Type = HardwareCacheEvent 950 | attr.Config = uint64(hwcc.Cache) | uint64(hwcc.Op<<8) | uint64(hwcc.Result<<16) 951 | return nil 952 | } 953 | 954 | // HardwareCacheCounters returns cache counters which measure the cartesian 955 | // product of the specified caches, operations and results. 956 | func HardwareCacheCounters(caches []Cache, ops []CacheOp, results []CacheOpResult) []Configurator { 957 | counters := make([]Configurator, 0, len(caches)*len(ops)*len(results)) 958 | for _, cache := range caches { 959 | for _, op := range ops { 960 | for _, result := range results { 961 | c := HardwareCacheCounter{ 962 | Cache: cache, 963 | Op: op, 964 | Result: result, 965 | } 966 | counters = append(counters, c) 967 | } 968 | } 969 | } 970 | return counters 971 | } 972 | 973 | // Tracepoint returns a Configurator for the specified category and event. 974 | // The returned Configurator sets attr.Type and attr.Config. 975 | func Tracepoint(category, event string) Configurator { 976 | return configuratorFunc(func(attr *Attr) error { 977 | cfg, err := LookupTracepointConfig(category, event) 978 | if err != nil { 979 | return err 980 | } 981 | 982 | attr.Label = fmt.Sprintf("%s:%s", category, event) 983 | attr.Type = TracepointEvent 984 | attr.Config = cfg 985 | 986 | return nil 987 | }) 988 | } 989 | 990 | // LookupTracepointConfig probes 991 | // /sys/kernel/debug/tracing/events///id for the Attr.Config 992 | // value associated with the specified category and event. 993 | func LookupTracepointConfig(category, event string) (uint64, error) { 994 | p := filepath.Join("/sys/kernel/debug/tracing/events", category, event, "id") 995 | return readUint(p, 64) 996 | } 997 | 998 | // Breakpoint returns a Configurator for a breakpoint event. 999 | // 1000 | // typ is the type of the breakpoint. 1001 | // 1002 | // addr is the address of the breakpoint. For execution breakpoints, this 1003 | // is the memory address of the instruction of interest; for read and write 1004 | // breakpoints, it is the memory address of the memory location of interest. 1005 | // 1006 | // length is the length of the breakpoint being measured. 1007 | // 1008 | // The returned Configurator sets the Type, BreakpointType, Config1, and 1009 | // Config2 fields on attr. 1010 | func Breakpoint(typ BreakpointType, addr uint64, length BreakpointLength) Configurator { 1011 | return configuratorFunc(func(attr *Attr) error { 1012 | attr.Type = BreakpointEvent 1013 | attr.BreakpointType = uint32(typ) 1014 | attr.Config1 = addr 1015 | attr.Config2 = uint64(length) 1016 | 1017 | return nil 1018 | }) 1019 | } 1020 | 1021 | // BreakpointType is the type of a breakpoint. 1022 | type BreakpointType uint32 1023 | 1024 | // Breakpoint types. Values are |-ed together. The combination of 1025 | // BreakpointTypeR or BreakpointTypeW with BreakpointTypeX is invalid. 1026 | const ( 1027 | BreakpointTypeEmpty BreakpointType = 0x0 1028 | BreakpointTypeR BreakpointType = 0x1 1029 | BreakpointTypeW BreakpointType = 0x2 1030 | BreakpointTypeRW BreakpointType = BreakpointTypeR | BreakpointTypeW 1031 | BreakpointTypeX BreakpointType = 0x4 1032 | ) 1033 | 1034 | // BreakpointLength is the length of the breakpoint being measured. 1035 | type BreakpointLength uint64 1036 | 1037 | // Breakpoint length values. 1038 | const ( 1039 | BreakpointLength1 BreakpointLength = 1 1040 | BreakpointLength2 BreakpointLength = 2 1041 | BreakpointLength4 BreakpointLength = 4 1042 | BreakpointLength8 BreakpointLength = 8 1043 | ) 1044 | 1045 | // ExecutionBreakpointLength returns the length of an execution breakpoint. 1046 | func ExecutionBreakpointLength() BreakpointLength { 1047 | // TODO(acln): is this correct? The man page says to set this to 1048 | // sizeof(long). Is sizeof(C long) == sizeof(Go uintptr) on all 1049 | // platforms of interest? 1050 | var x uintptr 1051 | return BreakpointLength(unsafe.Sizeof(x)) 1052 | } 1053 | 1054 | // ExecutionBreakpoint returns a Configurator for an execution breakpoint 1055 | // at the specified address. 1056 | func ExecutionBreakpoint(addr uint64) Configurator { 1057 | return Breakpoint(BreakpointTypeX, addr, ExecutionBreakpointLength()) 1058 | } 1059 | 1060 | // Options contains low level event configuration options. 1061 | type Options struct { 1062 | // Disabled disables the event by default. If the event is in a 1063 | // group, but not a group leader, this option has no effect, since 1064 | // the group leader controls when events are enabled or disabled. 1065 | Disabled bool 1066 | 1067 | // Inherit specifies that this counter should count events of child 1068 | // tasks as well as the specified task. This only applies to new 1069 | // children, not to any existing children at the time the counter 1070 | // is created (nor to any new children of existing children). 1071 | // 1072 | // Inherit does not work with some combination of CountFormat options, 1073 | // such as CountFormat.Group. 1074 | Inherit bool 1075 | 1076 | // Pinned specifies that the counter should always be on the CPU if 1077 | // possible. This bit applies only to hardware counters, and only 1078 | // to group leaders. If a pinned counter canno be put onto the CPU, 1079 | // then the counter goes into an error state, where reads return EOF, 1080 | // until it is subsequently enabled or disabled. 1081 | Pinned bool 1082 | 1083 | // Exclusive specifies that when this counter's group is on the CPU, 1084 | // it should be the only group using the CPUs counters. 1085 | Exclusive bool 1086 | 1087 | // ExcludeUser excludes events that happen in user space. 1088 | ExcludeUser bool 1089 | 1090 | // ExcludeKernel excludes events that happen in kernel space. 1091 | ExcludeKernel bool 1092 | 1093 | // ExcludeHypervisor excludes events that happen in the hypervisor. 1094 | ExcludeHypervisor bool 1095 | 1096 | // ExcludeIdle disables counting while the CPU is idle. 1097 | ExcludeIdle bool 1098 | 1099 | // The mmap bit enables generation of MmapRecord records for every 1100 | // mmap(2) call that has PROT_EXEC set. 1101 | Mmap bool 1102 | 1103 | // Comm enables tracking of process command name, as modified by 1104 | // exec(2), prctl(PR_SET_NAME), as well as writing to /proc/self/comm. 1105 | // If CommExec is also set, then the CommRecord records produced 1106 | // can be queries using the WasExec method, to differentiate exec(2) 1107 | // from the other ases. 1108 | Comm bool 1109 | 1110 | // Freq configures the event to use sample frequency, rather than 1111 | // sample period. See also Attr.Sample. 1112 | Freq bool 1113 | 1114 | // InheritStat enables saving of event counts on context switch for 1115 | // inherited tasks. InheritStat is only meaningful if Inherit is 1116 | // also set. 1117 | InheritStat bool 1118 | 1119 | // EnableOnExec configures the counter to be enabled automatically 1120 | // after a call to exec(2). 1121 | EnableOnExec bool 1122 | 1123 | // Task configures the event to include fork/exit notifications in 1124 | // the ring buffer. 1125 | Task bool 1126 | 1127 | // Watermark configures the ring buffer to issue an overflow 1128 | // notification when the Wakeup boundary is crossed. If not set, 1129 | // notifications happen after Wakeup samples. See also Attr.Wakeup. 1130 | Watermark bool 1131 | 1132 | // PreciseIP controls the number of instructions between an event of 1133 | // interest happening and the kernel being able to stop and record 1134 | // the event. 1135 | PreciseIP Skid 1136 | 1137 | // MmapData is the counterpart to Mmap. It enables generation of 1138 | // MmapRecord records for mmap(2) calls that do not have PROT_EXEC 1139 | // set. 1140 | MmapData bool 1141 | 1142 | // SampleIDAll configures Tid, Time, ID, StreamID and CPU samples 1143 | // to be included in non-Sample records. 1144 | SampleIDAll bool 1145 | 1146 | // ExcludeHost configures only events happening inside a guest 1147 | // instance (one that has executed a KVM_RUN ioctl) to be measured. 1148 | ExcludeHost bool 1149 | 1150 | // ExcludeGuest is the opposite of ExcludeHost: it configures only 1151 | // events outside a guest instance to be measured. 1152 | ExcludeGuest bool 1153 | 1154 | // ExcludeKernelCallchain excludes kernel callchains. 1155 | ExcludeKernelCallchain bool 1156 | 1157 | // ExcludeUserCallchain excludes user callchains. 1158 | ExcludeUserCallchain bool 1159 | 1160 | // Mmap2 configures mmap(2) events to include inode data. 1161 | Mmap2 bool 1162 | 1163 | // CommExec allows the distinction between process renaming 1164 | // via exec(2) or via other means. See also Comm, and 1165 | // (*CommRecord).WasExec. 1166 | CommExec bool 1167 | 1168 | // UseClockID allows selecting which internal linux clock to use 1169 | // when generating timestamps via the ClockID field. 1170 | UseClockID bool 1171 | 1172 | // ContextSwitch enables the generation of SwitchRecord records, 1173 | // and SwitchCPUWideRecord records when sampling in CPU-wide mode. 1174 | ContextSwitch bool 1175 | 1176 | // writeBackward configures the kernel to write to the memory 1177 | // mapped ring buffer backwards. This option is not supported by 1178 | // package perf at the moment. 1179 | writeBackward bool 1180 | 1181 | // Namespaces enables the generation of NamespacesRecord records. 1182 | Namespaces bool 1183 | } 1184 | 1185 | func (opt Options) marshal() uint64 { 1186 | fields := []bool{ 1187 | opt.Disabled, 1188 | opt.Inherit, 1189 | opt.Pinned, 1190 | opt.Exclusive, 1191 | opt.ExcludeUser, 1192 | opt.ExcludeKernel, 1193 | opt.ExcludeHypervisor, 1194 | opt.ExcludeIdle, 1195 | opt.Mmap, 1196 | opt.Comm, 1197 | opt.Freq, 1198 | opt.InheritStat, 1199 | opt.EnableOnExec, 1200 | opt.Task, 1201 | opt.Watermark, 1202 | false, false, // 2 bits for skid constraint 1203 | opt.MmapData, 1204 | opt.SampleIDAll, 1205 | opt.ExcludeHost, 1206 | opt.ExcludeGuest, 1207 | opt.ExcludeKernelCallchain, 1208 | opt.ExcludeUserCallchain, 1209 | opt.Mmap2, 1210 | opt.CommExec, 1211 | opt.UseClockID, 1212 | opt.ContextSwitch, 1213 | opt.writeBackward, 1214 | opt.Namespaces, 1215 | } 1216 | val := marshalBitwiseUint64(fields) 1217 | 1218 | const ( 1219 | skidlsb = 15 1220 | skidmsb = 16 1221 | ) 1222 | if opt.PreciseIP&0x01 != 0 { 1223 | val |= 1 << skidlsb 1224 | } 1225 | if opt.PreciseIP&0x10 != 0 { 1226 | val |= 1 << skidmsb 1227 | } 1228 | 1229 | return val 1230 | } 1231 | 1232 | // Supported returns a boolean indicating whether the host kernel supports 1233 | // the perf_event_open system call, which is a prerequisite for the operations 1234 | // of this package. 1235 | // 1236 | // Supported checks for the existence of a /proc/sys/kernel/perf_event_paranoid 1237 | // file, which is the canonical method for determining if a kernel supports 1238 | // perf_event_open(2). 1239 | func Supported() bool { 1240 | _, err := os.Stat("/proc/sys/kernel/perf_event_paranoid") 1241 | return err == nil 1242 | } 1243 | 1244 | // MaxStack returns the maximum number of frame pointers in a recorded 1245 | // callchain. It reads the value from /proc/sys/kernel/perf_event_max_stack. 1246 | func MaxStack() (uint16, error) { 1247 | max, err := readUint("/proc/sys/kernel/perf_event_max_stack", 16) 1248 | return uint16(max), err 1249 | } 1250 | 1251 | // fields is a collection of 32-bit or 64-bit fields. 1252 | type fields []byte 1253 | 1254 | // uint64 decodes the next 64 bit field into v. 1255 | func (f *fields) uint64(v *uint64) { 1256 | *v = *(*uint64)(unsafe.Pointer(&(*f)[0])) 1257 | f.advance(8) 1258 | } 1259 | 1260 | // uint64Cond decodes the next 64 bit field into v, if cond is true. 1261 | func (f *fields) uint64Cond(cond bool, v *uint64) { 1262 | if cond { 1263 | f.uint64(v) 1264 | } 1265 | } 1266 | 1267 | // uint32 decodes a pair of uint32s into a and b. 1268 | func (f *fields) uint32(a, b *uint32) { 1269 | *a = *(*uint32)(unsafe.Pointer(&(*f)[0])) 1270 | *b = *(*uint32)(unsafe.Pointer(&(*f)[4])) 1271 | f.advance(8) 1272 | } 1273 | 1274 | // uint32 decodes a pair of uint32s into a and b, if cond is true. 1275 | func (f *fields) uint32Cond(cond bool, a, b *uint32) { 1276 | if cond { 1277 | f.uint32(a, b) 1278 | } 1279 | } 1280 | 1281 | func (f *fields) uint32sizeBytes(b *[]byte) { 1282 | size := *(*uint32)(unsafe.Pointer(&(*f)[0])) 1283 | f.advance(4) 1284 | data := make([]byte, size) 1285 | copy(data, *f) 1286 | f.advance(int(size)) 1287 | *b = data 1288 | } 1289 | 1290 | func (f *fields) uint64sizeBytes(b *[]byte) { 1291 | size := *(*uint64)(unsafe.Pointer(&(*f)[0])) 1292 | f.advance(8) 1293 | data := make([]byte, size) 1294 | copy(data, *f) 1295 | f.advance(int(size)) 1296 | *b = data 1297 | } 1298 | 1299 | // duration decodes a duration into d. 1300 | func (f *fields) duration(d *time.Duration) { 1301 | *d = *(*time.Duration)(unsafe.Pointer(&(*f)[0])) 1302 | f.advance(8) 1303 | } 1304 | 1305 | // string decodes a null-terminated string into s. The null terminator 1306 | // is not included in the string written to s. 1307 | func (f *fields) string(s *string) { 1308 | for i := 0; i < len(*f); i++ { 1309 | if (*f)[i] == 0 { 1310 | *s = string((*f)[:i]) 1311 | if i+1 <= len(*f) { 1312 | f.advance(i + 1) 1313 | } 1314 | return 1315 | } 1316 | } 1317 | } 1318 | 1319 | // id decodes a SampleID based on the SampleFormat event was configured with, 1320 | // if cond is true. 1321 | func (f *fields) idCond(cond bool, id *SampleID, sfmt SampleFormat) { 1322 | if !cond { 1323 | return 1324 | } 1325 | f.uint32Cond(sfmt.Tid, &id.Pid, &id.Tid) 1326 | f.uint64Cond(sfmt.Time, &id.Time) 1327 | f.uint64Cond(sfmt.ID, &id.ID) 1328 | f.uint64Cond(sfmt.StreamID, &id.StreamID) 1329 | var reserved uint32 1330 | f.uint32Cond(sfmt.CPU, &id.CPU, &reserved) 1331 | f.uint64Cond(sfmt.Identifier, &id.Identifier) 1332 | } 1333 | 1334 | // count decodes a Count into c. 1335 | func (f *fields) count(c *Count, cfmt CountFormat) { 1336 | f.uint64(&c.Value) 1337 | if cfmt.Enabled { 1338 | f.duration(&c.Enabled) 1339 | } 1340 | if cfmt.Running { 1341 | f.duration(&c.Running) 1342 | } 1343 | f.uint64Cond(cfmt.ID, &c.ID) 1344 | } 1345 | 1346 | // groupCount decodes a GroupCount into gc. 1347 | func (f *fields) groupCount(gc *GroupCount, cfmt CountFormat) { 1348 | var nr uint64 1349 | f.uint64(&nr) 1350 | if cfmt.Enabled { 1351 | f.duration(&gc.Enabled) 1352 | } 1353 | if cfmt.Running { 1354 | f.duration(&gc.Running) 1355 | } 1356 | gc.Values = make([]struct { 1357 | Value, ID uint64 1358 | Label string 1359 | }, nr) 1360 | for i := 0; i < int(nr); i++ { 1361 | f.uint64(&gc.Values[i].Value) 1362 | f.uint64Cond(cfmt.ID, &gc.Values[i].ID) 1363 | } 1364 | } 1365 | 1366 | // advance advances through the fields by n bytes. 1367 | func (f *fields) advance(n int) { 1368 | *f = (*f)[n:] 1369 | } 1370 | 1371 | // marshalBitwiseUint64 marshals a set of bitwise flags into a 1372 | // uint64, LSB first. 1373 | func marshalBitwiseUint64(fields []bool) uint64 { 1374 | var res uint64 1375 | for shift, set := range fields { 1376 | if set { 1377 | res |= 1 << uint(shift) 1378 | } 1379 | } 1380 | return res 1381 | } 1382 | 1383 | // readUint reads an unsigned integer from the specified sys file. 1384 | // If readUint does not return an error, the returned integer is 1385 | // guaranteed to fit in the specified number of bits. 1386 | func readUint(sysfile string, bits int) (uint64, error) { 1387 | content, err := ioutil.ReadFile(sysfile) 1388 | if err != nil { 1389 | return 0, err 1390 | } 1391 | content = bytes.TrimSpace(content) 1392 | return strconv.ParseUint(string(content), 10, bits) 1393 | } 1394 | 1395 | type eventLabel struct { 1396 | Name, Alias string 1397 | } 1398 | 1399 | func (el eventLabel) String() string { 1400 | if el.Name == "" { 1401 | return "unknown" 1402 | } 1403 | if el.Alias != "" { 1404 | return fmt.Sprintf("%s OR %s", el.Name, el.Alias) 1405 | } 1406 | return el.Name 1407 | } 1408 | 1409 | type eventID struct { 1410 | Type, Config uint64 1411 | } 1412 | 1413 | var eventLabels sync.Map // of eventID to eventLabel 1414 | 1415 | func init() { 1416 | type labeler interface { 1417 | eventLabel() eventLabel 1418 | } 1419 | 1420 | var events []Configurator 1421 | events = append(events, AllHardwareCounters()...) 1422 | events = append(events, AllSoftwareCounters()...) 1423 | 1424 | for _, cfg := range events { 1425 | if l, ok := cfg.(labeler); ok { 1426 | var a Attr 1427 | cfg.Configure(&a) 1428 | id := eventID{Type: uint64(a.Type), Config: a.Config} 1429 | label := l.eventLabel() 1430 | eventLabels.Store(id, label) 1431 | } 1432 | } 1433 | } 1434 | 1435 | func lookupLabel(id eventID) eventLabel { 1436 | v, ok := eventLabels.Load(id) 1437 | if ok { 1438 | return v.(eventLabel) 1439 | } 1440 | label := lookupLabelInSysfs(id) 1441 | eventLabels.Store(id, label) 1442 | return label 1443 | } 1444 | 1445 | func lookupLabelInSysfs(id eventID) eventLabel { 1446 | return eventLabel{} 1447 | } 1448 | 1449 | // BUG(acln): generic Attr.Label lookup is not implemented 1450 | -------------------------------------------------------------------------------- /record.go: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build linux 6 | 7 | package perf 8 | 9 | import ( 10 | "context" 11 | "errors" 12 | "fmt" 13 | "math/bits" 14 | "os" 15 | "sync/atomic" 16 | "time" 17 | "unsafe" 18 | 19 | "golang.org/x/sys/unix" 20 | ) 21 | 22 | // ErrDisabled is returned from ReadRecord and ReadRawRecord if the event 23 | // being monitored is attached to a different process, and that process 24 | // exits. (since Linux 3.18) 25 | var ErrDisabled = errors.New("perf: event disabled") 26 | 27 | // ErrNoReadRecord is returned by ReadRecord when it is disabled on a 28 | // group event, due to different configurations of the leader and follower 29 | // events. See also (*Event).SetOutput. 30 | var ErrNoReadRecord = errors.New("perf: ReadRecord disabled") 31 | 32 | // ErrBadRecord is returned by ReadRecord when a read record can't be decoded. 33 | var ErrBadRecord = errors.New("bad record received") 34 | 35 | // ReadRecord reads and decodes a record from the ring buffer associated 36 | // with ev. 37 | // 38 | // ReadRecord may be called concurrently with ReadCount or ReadGroupCount, 39 | // but not concurrently with itself, ReadRawRecord, Close, or any other 40 | // Event method. 41 | // 42 | // If another event's records were routed to ev via SetOutput, and the 43 | // two events did not have compatible SampleFormat Options settings (see 44 | // SetOutput documentation), ReadRecord returns ErrNoReadRecord. 45 | func (ev *Event) ReadRecord(ctx context.Context) (Record, error) { 46 | if err := ev.ok(); err != nil { 47 | return nil, err 48 | } 49 | if ev.noReadRecord { 50 | return nil, ErrNoReadRecord 51 | } 52 | var raw RawRecord 53 | if err := ev.ReadRawRecord(ctx, &raw); err != nil { 54 | return nil, err 55 | } 56 | rec, err := newRecord(ev, raw.Header.Type) 57 | if err != nil { 58 | return nil, err 59 | } 60 | if err := rec.DecodeFrom(&raw, ev); err != nil { 61 | return nil, err 62 | } 63 | return rec, nil 64 | } 65 | 66 | // ReadRawRecord reads and decodes a raw record from the ring buffer 67 | // associated with ev into rec. Callers must not retain rec.Data. 68 | // 69 | // ReadRawRecord may be called concurrently with ReadCount or ReadGroupCount, 70 | // but not concurrently with itself, ReadRecord, Close or any other Event 71 | // method. 72 | func (ev *Event) ReadRawRecord(ctx context.Context, raw *RawRecord) error { 73 | if err := ev.ok(); err != nil { 74 | return err 75 | } 76 | if ev.ring == nil { 77 | return errors.New("perf: event ring not mapped") 78 | } 79 | 80 | // Fast path: try reading from the ring buffer first. If there is 81 | // a record there, we are done. 82 | if ev.readRawRecordNonblock(raw) { 83 | return nil 84 | } 85 | 86 | // If the context has a deadline, and that deadline is in the future, 87 | // use it to compute a timeout for ppoll(2). If the context is 88 | // expired, bail out immediately. Otherwise, the timeout is zero, 89 | // which means no timeout. 90 | var timeout time.Duration 91 | deadline, ok := ctx.Deadline() 92 | if ok { 93 | timeout = time.Until(deadline) 94 | if timeout <= 0 { 95 | <-ctx.Done() 96 | return ctx.Err() 97 | } 98 | } 99 | 100 | // Start a round of polling, then await results. Only one request 101 | // can be in flight at a time, and the whole request-response cycle 102 | // is owned by the current invocation of ReadRawRecord. 103 | again: 104 | ev.pollreq <- pollreq{timeout: timeout} 105 | select { 106 | case <-ctx.Done(): 107 | active := false 108 | err := ctx.Err() 109 | if err == context.Canceled { 110 | // Initiate active wakeup on ev.wakeupfd, and wait for 111 | // doPoll to return. doPoll might miss this signal, 112 | // but that's okay: see below. 113 | val := uint64(1) 114 | buf := (*[8]byte)(unsafe.Pointer(&val))[:] 115 | unix.Write(ev.wakeupfd, buf) 116 | active = true 117 | } 118 | <-ev.pollresp 119 | 120 | // We don't know if doPoll woke up due to our active wakeup 121 | // or because it timed out. It doesn't make a difference. 122 | // The important detail here is that doPoll does not touch 123 | // ev.wakeupfd (besides polling it for readiness). If we 124 | // initiated active wakeup, we must restore the event file 125 | // descriptor to quiescent state ourselves, in order to avoid 126 | // a spurious wakeup during the next round of polling. 127 | if active { 128 | var buf [8]byte 129 | unix.Read(ev.wakeupfd, buf[:]) 130 | } 131 | return err 132 | case resp := <-ev.pollresp: 133 | if resp.err != nil { 134 | // Polling failed. Nothing to do but report the error. 135 | return resp.err 136 | } 137 | if resp.perfhup { 138 | // Saw POLLHUP on ev.perffd. See also the 139 | // documentation for ErrDisabled. 140 | return ErrDisabled 141 | } 142 | if !resp.perfready { 143 | // Here, we have not touched ev.wakeupfd, there 144 | // was no polling error, and ev.perffd is not 145 | // ready. Therefore, ppoll(2) must have timed out. 146 | // 147 | // The reason we are here is the following: doPoll 148 | // woke up, and immediately sent us a pollresp, which 149 | // won the race with <-ctx.Done(), such that this 150 | // select case fired. In any case, ctx is expired, 151 | // because we wouldn't be here otherwise. 152 | <-ctx.Done() 153 | return ctx.Err() 154 | } 155 | if !ev.readRawRecordNonblock(raw) { 156 | // It might happen that an overflow notification was 157 | // generated on the file descriptor, we observed it 158 | // as POLLIN, but there is still nothing new for us 159 | // to read in the ring buffer. 160 | // 161 | // This is because the notification is raised based 162 | // on the Attr.Wakeup and Attr.Options.Watermark 163 | // settings, rather than based on what events we've 164 | // seen already. 165 | // 166 | // For example, for an event with Attr.Wakeup == 1, 167 | // POLLIN will be indicated on the file descriptor 168 | // after the first event, regardless of whether we 169 | // have consumed it from the ring buffer or not. 170 | // 171 | // If we happen to see POLLIN with an empty ring 172 | // buffer, the only thing to do is to wait again. 173 | // 174 | // See also https://github.com/acln0/perfwakeup. 175 | goto again 176 | } 177 | return nil 178 | } 179 | } 180 | 181 | // HasRecord returns if there is a record available to be read from the ring. 182 | func (ev *Event) HasRecord() bool { 183 | return atomic.LoadUint64(&ev.meta.Data_head) != atomic.LoadUint64(&ev.meta.Data_tail) 184 | } 185 | 186 | // resetRing advances the read pointer to the write pointer to discard all the 187 | // data in the ring. This is done when bogus data is read from the ring. 188 | func (ev *Event) resetRing() { 189 | atomic.StoreUint64(&ev.meta.Data_tail, atomic.LoadUint64(&ev.meta.Data_head)) 190 | } 191 | 192 | // readRawRecordNonblock reads a raw record into rec, if one is available. 193 | // Callers must not retain rec.Data. The boolean return value signals whether 194 | // a record was actually found / written to rec. 195 | func (ev *Event) readRawRecordNonblock(raw *RawRecord) bool { 196 | head := atomic.LoadUint64(&ev.meta.Data_head) 197 | tail := atomic.LoadUint64(&ev.meta.Data_tail) 198 | if head == tail { 199 | return false 200 | } 201 | 202 | // Make sure there is enough space the read a record header. Otherwise 203 | // consider the ring to be corrupted. 204 | const headerSize = uint64(unsafe.Sizeof(RecordHeader{})) 205 | avail := head - tail 206 | if avail < headerSize { 207 | ev.resetRing() 208 | return false 209 | } 210 | 211 | // Head and tail values only ever grow, so we must take their value 212 | // modulo the size of the data segment of the ring. 213 | start := tail % uint64(len(ev.ringdata)) 214 | raw.Header = *(*RecordHeader)(unsafe.Pointer(&ev.ringdata[start])) 215 | end := (tail + uint64(raw.Header.Size)) % uint64(len(ev.ringdata)) 216 | 217 | // Make sure there is enough space available to read the whole record. 218 | // Otherwise treat the ring as corrupted. 219 | msgLen := uint64(raw.Header.Size) 220 | if avail < msgLen || msgLen < headerSize { 221 | ev.resetRing() 222 | return false 223 | } 224 | 225 | // Reserve space to store this record out of the ring. 226 | if uint64(len(ev.recordBuffer)) < msgLen { 227 | ev.recordBuffer = make([]byte, msgLen) 228 | } 229 | // If the record wraps around the ring, we must allocate storage, 230 | // so that we can return a contiguous area of memory to the caller. 231 | if end < start { 232 | n := copy(ev.recordBuffer, ev.ringdata[start:]) 233 | copy(ev.recordBuffer[n:], ev.ringdata[:int(raw.Header.Size)-n]) 234 | } else { 235 | copy(ev.recordBuffer, ev.ringdata[start:end]) 236 | } 237 | raw.Data = ev.recordBuffer[unsafe.Sizeof(raw.Header):msgLen] 238 | 239 | // Notify the kernel of the last record we've seen. 240 | atomic.AddUint64(&ev.meta.Data_tail, msgLen) 241 | return true 242 | } 243 | 244 | // poll services requests from ev.pollreq and sends responses on ev.pollresp. 245 | func (ev *Event) poll() { 246 | defer close(ev.pollresp) 247 | 248 | for req := range ev.pollreq { 249 | ev.pollresp <- ev.doPoll(req) 250 | } 251 | } 252 | 253 | // doPoll executes one round of polling on ev.perffd and ev.wakeupfd. 254 | // 255 | // A req.timeout value of zero is interpreted as "no timeout". req.timeout 256 | // must not be negative. 257 | func (ev *Event) doPoll(req pollreq) pollresp { 258 | var timeout *unix.Timespec 259 | if req.timeout > 0 { 260 | ts := unix.NsecToTimespec(req.timeout.Nanoseconds()) 261 | timeout = &ts 262 | } 263 | 264 | pollfds := []unix.PollFd{ 265 | {Fd: int32(ev.perffd), Events: unix.POLLIN}, 266 | {Fd: int32(ev.wakeupfd), Events: unix.POLLIN}, 267 | } 268 | 269 | again: 270 | _, err := unix.Ppoll(pollfds, timeout, nil) 271 | // TODO(acln): do we need to do this business at all? See #20400. 272 | if err == unix.EINTR { 273 | goto again 274 | } 275 | 276 | // If we are here and we have successfully woken up, it is for one 277 | // of four reasons: we got POLLIN on ev.perffd, we got POLLHUP on 278 | // ev.perffd (see ErrDisabled), the ppoll(2) timeout fired, or we 279 | // got POLLIN on ev.wakeupfd. 280 | // 281 | // Report if the perf fd is ready, if we saw POLLHUP, and any 282 | // errors except EINTR. The machinery is documented in more detail 283 | // in ReadRawRecord. 284 | return pollresp{ 285 | perfready: pollfds[0].Revents&unix.POLLIN != 0, 286 | perfhup: pollfds[0].Revents&unix.POLLHUP != 0, 287 | err: os.NewSyscallError("ppoll", err), 288 | } 289 | } 290 | 291 | type pollreq struct { 292 | // timeout is the timeout for ppoll(2): zero means no timeout 293 | timeout time.Duration 294 | } 295 | 296 | type pollresp struct { 297 | // perfready indicates if the perf FD (ev.perffd) is ready. 298 | perfready bool 299 | 300 | // perfhup indicates if POLLUP was observed on ev.perffd. 301 | perfhup bool 302 | 303 | // err is the *os.SyscallError from ppoll(2). 304 | err error 305 | } 306 | 307 | // SampleFormat configures information requested in overflow packets. 308 | type SampleFormat struct { 309 | // IP records the instruction pointer. 310 | IP bool 311 | 312 | // Tid records process and thread IDs. 313 | Tid bool 314 | 315 | // Time records a hardware timestamp. 316 | Time bool 317 | 318 | // Addr records an address, if applicable. 319 | Addr bool 320 | 321 | // Count records counter values for all events in a group, not just 322 | // the group leader. 323 | Count bool 324 | 325 | // Callchain records the stack backtrace. 326 | Callchain bool 327 | 328 | // ID records a unique ID for the opened event's group leader. 329 | ID bool 330 | 331 | // CPU records the CPU number. 332 | CPU bool 333 | 334 | // Period records the current sampling period. 335 | Period bool 336 | 337 | // StreamID returns a unique ID for the opened event. Unlike ID, 338 | // the actual ID is returned, not the group ID. 339 | StreamID bool 340 | 341 | // Raw records additional data, if applicable. Usually returned by 342 | // tracepoint events. 343 | Raw bool 344 | 345 | // BranchStack provides a record of recent branches, as provided by 346 | // CPU branch sampling hardware. See also Attr.BranchSampleFormat. 347 | BranchStack bool 348 | 349 | // UserRegisters records the current user-level CPU state (the 350 | // values in the process before the kernel was called). See also 351 | // Attr.SampleRegistersUser. 352 | UserRegisters bool 353 | 354 | // UserStack records the user level stack, allowing stack unwinding. 355 | UserStack bool 356 | 357 | // Weight records a hardware provided weight value that expresses 358 | // how costly the sampled event was. 359 | Weight bool 360 | 361 | // DataSource records the data source: where in the memory hierarchy 362 | // the data associated with the sampled instruction came from. 363 | DataSource bool 364 | 365 | // Identifier places the ID value in a fixed position in the record. 366 | Identifier bool 367 | 368 | // Transaction records reasons for transactional memory abort events. 369 | Transaction bool 370 | 371 | // IntrRegisters Records a subset of the current CPU register state. 372 | // Unlike UserRegisters, the registers will return kernel register 373 | // state if the overflow happened while kernel code is running. See 374 | // also Attr.SampleRegistersIntr. 375 | IntrRegisters bool 376 | 377 | PhysicalAddress bool 378 | } 379 | 380 | // TODO(acln): document SampleFormat.PhysicalAddress 381 | 382 | // marshal packs the SampleFormat into a uint64. 383 | func (sf SampleFormat) marshal() uint64 { 384 | // Always keep this in sync with the type definition above. 385 | fields := []bool{ 386 | sf.IP, 387 | sf.Tid, 388 | sf.Time, 389 | sf.Addr, 390 | sf.Count, 391 | sf.Callchain, 392 | sf.ID, 393 | sf.CPU, 394 | sf.Period, 395 | sf.StreamID, 396 | sf.Raw, 397 | sf.BranchStack, 398 | sf.UserRegisters, 399 | sf.UserStack, 400 | sf.Weight, 401 | sf.DataSource, 402 | sf.Identifier, 403 | sf.Transaction, 404 | sf.IntrRegisters, 405 | sf.PhysicalAddress, 406 | } 407 | return marshalBitwiseUint64(fields) 408 | } 409 | 410 | // SampleID contains identifiers for when and where a record was collected. 411 | // 412 | // A SampleID is included in a Record if Options.SampleIDAll is set on the 413 | // associated event. Fields are set according to SampleFormat options. 414 | type SampleID struct { 415 | Pid uint32 416 | Tid uint32 417 | Time uint64 418 | ID uint64 419 | StreamID uint64 420 | CPU uint32 421 | _ uint32 // reserved 422 | Identifier uint64 423 | } 424 | 425 | // Record is the interface implemented by all record types. 426 | type Record interface { 427 | Header() RecordHeader 428 | DecodeFrom(*RawRecord, *Event) error 429 | } 430 | 431 | // RecordType is the type of an overflow record. 432 | type RecordType uint32 433 | 434 | // Known record types. 435 | const ( 436 | RecordTypeMmap RecordType = unix.PERF_RECORD_MMAP 437 | RecordTypeLost RecordType = unix.PERF_RECORD_LOST 438 | RecordTypeComm RecordType = unix.PERF_RECORD_COMM 439 | RecordTypeExit RecordType = unix.PERF_RECORD_EXIT 440 | RecordTypeThrottle RecordType = unix.PERF_RECORD_THROTTLE 441 | RecordTypeUnthrottle RecordType = unix.PERF_RECORD_UNTHROTTLE 442 | RecordTypeFork RecordType = unix.PERF_RECORD_FORK 443 | RecordTypeRead RecordType = unix.PERF_RECORD_READ 444 | RecordTypeSample RecordType = unix.PERF_RECORD_SAMPLE 445 | RecordTypeMmap2 RecordType = unix.PERF_RECORD_MMAP2 446 | RecordTypeAux RecordType = unix.PERF_RECORD_AUX 447 | RecordTypeItraceStart RecordType = unix.PERF_RECORD_ITRACE_START 448 | RecordTypeLostSamples RecordType = unix.PERF_RECORD_LOST_SAMPLES 449 | RecordTypeSwitch RecordType = unix.PERF_RECORD_SWITCH 450 | RecordTypeSwitchCPUWide RecordType = unix.PERF_RECORD_SWITCH_CPU_WIDE 451 | RecordTypeNamespaces RecordType = unix.PERF_RECORD_NAMESPACES 452 | ) 453 | 454 | func (rt RecordType) known() bool { 455 | return rt >= RecordTypeMmap && rt <= RecordTypeNamespaces 456 | } 457 | 458 | // RecordHeader is the header present in every overflow record. 459 | type RecordHeader struct { 460 | Type RecordType 461 | Misc uint16 462 | Size uint16 463 | } 464 | 465 | // Header returns rh itself, so that types which embed a RecordHeader 466 | // automatically implement a part of the Record interface. 467 | func (rh RecordHeader) Header() RecordHeader { return rh } 468 | 469 | // CPUMode returns the CPU mode in use when the sample happened. 470 | func (rh RecordHeader) CPUMode() CPUMode { 471 | return CPUMode(rh.Misc & cpuModeMask) 472 | } 473 | 474 | // CPUMode is a CPU operation mode. 475 | type CPUMode uint8 476 | 477 | const cpuModeMask = 7 478 | 479 | // Known CPU modes. 480 | const ( 481 | UnknownMode CPUMode = iota 482 | KernelMode 483 | UserMode 484 | HypervisorMode 485 | GuestKernelMode 486 | GuestUserMode 487 | ) 488 | 489 | // RawRecord is a raw overflow record, read from the memory mapped ring 490 | // buffer associated with an Event. 491 | // 492 | // Header is the 8 byte record header. Data contains the rest of the record. 493 | type RawRecord struct { 494 | Header RecordHeader 495 | Data []byte 496 | } 497 | 498 | func (raw RawRecord) fields() fields { return fields(raw.Data) } 499 | 500 | var newRecordFuncs = [...]func(ev *Event) Record{ 501 | RecordTypeMmap: func(_ *Event) Record { return &MmapRecord{} }, 502 | RecordTypeLost: func(_ *Event) Record { return &LostRecord{} }, 503 | RecordTypeComm: func(_ *Event) Record { return &CommRecord{} }, 504 | RecordTypeExit: func(_ *Event) Record { return &ExitRecord{} }, 505 | RecordTypeThrottle: func(_ *Event) Record { return &ThrottleRecord{} }, 506 | RecordTypeUnthrottle: func(_ *Event) Record { return &UnthrottleRecord{} }, 507 | RecordTypeFork: func(_ *Event) Record { return &ForkRecord{} }, 508 | RecordTypeRead: newReadRecord, 509 | RecordTypeSample: newSampleRecord, 510 | RecordTypeMmap2: func(_ *Event) Record { return &Mmap2Record{} }, 511 | RecordTypeAux: func(_ *Event) Record { return &AuxRecord{} }, 512 | RecordTypeItraceStart: func(_ *Event) Record { return &ItraceStartRecord{} }, 513 | RecordTypeLostSamples: func(_ *Event) Record { return &LostSamplesRecord{} }, 514 | RecordTypeSwitch: func(_ *Event) Record { return &SwitchRecord{} }, 515 | RecordTypeSwitchCPUWide: func(_ *Event) Record { return &SwitchCPUWideRecord{} }, 516 | RecordTypeNamespaces: func(_ *Event) Record { return &NamespacesRecord{} }, 517 | } 518 | 519 | func newReadRecord(ev *Event) Record { 520 | if ev.a.CountFormat.Group { 521 | return &ReadGroupRecord{} 522 | } 523 | return &ReadRecord{} 524 | } 525 | 526 | func newSampleRecord(ev *Event) Record { 527 | if ev.a.CountFormat.Group { 528 | return &SampleGroupRecord{} 529 | } 530 | return &SampleRecord{} 531 | } 532 | 533 | // newRecord returns an empty Record of the given type, tailored for the 534 | // specified Event. 535 | func newRecord(ev *Event, rt RecordType) (Record, error) { 536 | if !rt.known() { 537 | return nil, fmt.Errorf("unknown record type %d", rt) 538 | } 539 | return newRecordFuncs[rt](ev), nil 540 | } 541 | 542 | // mmapDataBit is PERF_RECORD_MISC_MMAP_DATA 543 | const mmapDataBit = 1 << 13 544 | 545 | // MmapRecord (PERF_RECORD_MMAP) records PROT_EXEC mappings such that 546 | // user-space IPs can be correlated to code. 547 | type MmapRecord struct { 548 | RecordHeader 549 | Pid uint32 // process ID 550 | Tid uint32 // thread ID 551 | Addr uint64 // address of the allocated memory 552 | Len uint64 // length of the allocated memory 553 | PageOffset uint64 // page offset of the allocated memory 554 | Filename string // describes backing of allocated memory 555 | SampleID 556 | } 557 | 558 | // DecodeFrom implements the Record.DecodeFrom method. 559 | func (mr *MmapRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 560 | mr.RecordHeader = raw.Header 561 | f := raw.fields() 562 | f.uint32(&mr.Pid, &mr.Tid) 563 | f.uint64(&mr.Addr) 564 | f.uint64(&mr.Len) 565 | f.uint64(&mr.PageOffset) 566 | f.string(&mr.Filename) 567 | f.idCond(ev.a.Options.SampleIDAll, &mr.SampleID, ev.a.SampleFormat) 568 | return nil 569 | } 570 | 571 | // Executable returns a boolean indicating whether the mapping is executable. 572 | func (mr *MmapRecord) Executable() bool { 573 | // The data bit is set when the mapping is _not_ executable. 574 | return mr.RecordHeader.Misc&mmapDataBit == 0 575 | } 576 | 577 | // LostRecord (PERF_RECORD_LOST) indicates when events are lost. 578 | type LostRecord struct { 579 | RecordHeader 580 | ID uint64 // the unique ID for the lost events 581 | Lost uint64 // the number of lost events 582 | SampleID 583 | } 584 | 585 | // DecodeFrom implements the Record.DecodeFrom method. 586 | func (lr *LostRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 587 | lr.RecordHeader = raw.Header 588 | f := raw.fields() 589 | f.uint64(&lr.ID) 590 | f.uint64(&lr.Lost) 591 | f.idCond(ev.a.Options.SampleIDAll, &lr.SampleID, ev.a.SampleFormat) 592 | return nil 593 | } 594 | 595 | // CommRecord (PERF_RECORD_COMM) indicates a change in the process name. 596 | type CommRecord struct { 597 | RecordHeader 598 | Pid uint32 // process ID 599 | Tid uint32 // threadID 600 | NewName string // the new name of the process 601 | SampleID 602 | } 603 | 604 | // DecodeFrom implements the Record.DecodeFrom method. 605 | func (cr *CommRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 606 | cr.RecordHeader = raw.Header 607 | f := raw.fields() 608 | f.uint32(&cr.Pid, &cr.Tid) 609 | f.string(&cr.NewName) 610 | f.idCond(ev.a.Options.SampleIDAll, &cr.SampleID, ev.a.SampleFormat) 611 | return nil 612 | } 613 | 614 | // commExecBit is PERF_RECORD_MISC_COMM_EXEC 615 | const commExecBit = 1 << 13 616 | 617 | // WasExec returns a boolean indicating whether a process name change 618 | // was caused by an exec(2) system call. 619 | func (cr *CommRecord) WasExec() bool { 620 | return cr.RecordHeader.Misc&(commExecBit) != 0 621 | } 622 | 623 | // ExitRecord (PERF_RECORD_EXIT) indicates a process exit event. 624 | type ExitRecord struct { 625 | RecordHeader 626 | Pid uint32 // process ID 627 | Ppid uint32 // parent process ID 628 | Tid uint32 // thread ID 629 | Ptid uint32 // parent thread ID 630 | Time uint64 // time when the process exited 631 | SampleID 632 | } 633 | 634 | // DecodeFrom implements the Record.DecodeFrom method. 635 | func (er *ExitRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 636 | er.RecordHeader = raw.Header 637 | f := raw.fields() 638 | f.uint32(&er.Pid, &er.Ppid) 639 | f.uint32(&er.Tid, &er.Ptid) 640 | f.uint64(&er.Time) 641 | f.idCond(ev.a.Options.SampleIDAll, &er.SampleID, ev.a.SampleFormat) 642 | return nil 643 | } 644 | 645 | // ThrottleRecord (PERF_RECORD_THROTTLE) indicates a throttle event. 646 | type ThrottleRecord struct { 647 | RecordHeader 648 | Time uint64 649 | ID uint64 650 | StreamID uint64 651 | SampleID 652 | } 653 | 654 | // DecodeFrom implements the Record.DecodeFrom method. 655 | func (tr *ThrottleRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 656 | tr.RecordHeader = raw.Header 657 | f := raw.fields() 658 | f.uint64(&tr.Time) 659 | f.uint64(&tr.ID) 660 | f.uint64(&tr.StreamID) 661 | f.idCond(ev.a.Options.SampleIDAll, &tr.SampleID, ev.a.SampleFormat) 662 | return nil 663 | } 664 | 665 | // UnthrottleRecord (PERF_RECORD_UNTHROTTLE) indicates an unthrottle event. 666 | type UnthrottleRecord struct { 667 | RecordHeader 668 | Time uint64 669 | ID uint64 670 | StreamID uint64 671 | SampleID 672 | } 673 | 674 | // DecodeFrom implements the Record.DecodeFrom method. 675 | func (ur *UnthrottleRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 676 | ur.RecordHeader = raw.Header 677 | f := raw.fields() 678 | f.uint64(&ur.Time) 679 | f.uint64(&ur.ID) 680 | f.uint64(&ur.StreamID) 681 | f.idCond(ev.a.Options.SampleIDAll, &ur.SampleID, ev.a.SampleFormat) 682 | return nil 683 | } 684 | 685 | // ForkRecord (PERF_RECORD_FORK) indicates a fork event. 686 | type ForkRecord struct { 687 | RecordHeader 688 | Pid uint32 // process ID 689 | Ppid uint32 // parent process ID 690 | Tid uint32 // thread ID 691 | Ptid uint32 // parent thread ID 692 | Time uint64 // time when the fork occurred 693 | SampleID 694 | } 695 | 696 | // DecodeFrom implements the Record.DecodeFrom method. 697 | func (fr *ForkRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 698 | fr.RecordHeader = raw.Header 699 | f := raw.fields() 700 | f.uint32(&fr.Pid, &fr.Ppid) 701 | f.uint32(&fr.Tid, &fr.Ptid) 702 | f.uint64(&fr.Time) 703 | f.idCond(ev.a.Options.SampleIDAll, &fr.SampleID, ev.a.SampleFormat) 704 | return nil 705 | } 706 | 707 | // ReadRecord (PERF_RECORD_READ) indicates a read event. 708 | type ReadRecord struct { 709 | RecordHeader 710 | Pid uint32 // process ID 711 | Tid uint32 // thread ID 712 | Count Count // count value 713 | SampleID 714 | } 715 | 716 | // DecodeFrom implements the Record.DecodeFrom method. 717 | func (rr *ReadRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 718 | rr.RecordHeader = raw.Header 719 | f := raw.fields() 720 | f.uint32(&rr.Pid, &rr.Tid) 721 | f.count(&rr.Count, ev.a.CountFormat) 722 | f.idCond(ev.a.Options.SampleIDAll, &rr.SampleID, ev.a.SampleFormat) 723 | return nil 724 | } 725 | 726 | // ReadGroupRecord (PERF_RECORD_READ) indicates a read event on a group event. 727 | type ReadGroupRecord struct { 728 | RecordHeader 729 | Pid uint32 // process ID 730 | Tid uint32 // thread ID 731 | GroupCount GroupCount // group count values 732 | SampleID 733 | } 734 | 735 | // DecodeFrom implements the Record.DecodeFrom method. 736 | func (rr *ReadGroupRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 737 | rr.RecordHeader = raw.Header 738 | f := raw.fields() 739 | f.uint32(&rr.Pid, &rr.Tid) 740 | f.groupCount(&rr.GroupCount, ev.a.CountFormat) 741 | f.idCond(ev.a.Options.SampleIDAll, &rr.SampleID, ev.a.SampleFormat) 742 | return nil 743 | } 744 | 745 | // SampleRecord indicates a sample. 746 | // 747 | // All the fields up to and including Callchain represent ABI bits. All the 748 | // fields starting with Data are non-ABI and have no compatibility guarantees. 749 | // 750 | // Fields on SampleRecord are set according to the SampleFormat the event 751 | // was configured with. A boolean flag in SampleFormat typically enables 752 | // the homonymous field in a SampleRecord. 753 | type SampleRecord struct { 754 | RecordHeader 755 | Identifier uint64 756 | IP uint64 757 | Pid uint32 758 | Tid uint32 759 | Time uint64 760 | Addr uint64 761 | ID uint64 762 | StreamID uint64 763 | CPU uint32 764 | _ uint32 // reserved 765 | Period uint64 766 | Count Count 767 | Callchain []uint64 768 | 769 | Raw []byte 770 | BranchStack []BranchEntry 771 | UserRegisterABI uint64 772 | UserRegisters []uint64 773 | UserStack []byte 774 | UserStackDynamicSize uint64 775 | Weight uint64 776 | DataSource DataSource 777 | Transaction Transaction 778 | IntrRegisterABI uint64 779 | IntrRegisters []uint64 780 | PhysicalAddress uint64 781 | } 782 | 783 | // DecodeFrom implements the Record.DecodeFrom method. 784 | func (sr *SampleRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 785 | sr.RecordHeader = raw.Header 786 | f := raw.fields() 787 | f.uint64Cond(ev.a.SampleFormat.Identifier, &sr.Identifier) 788 | f.uint64Cond(ev.a.SampleFormat.IP, &sr.IP) 789 | f.uint32Cond(ev.a.SampleFormat.Tid, &sr.Pid, &sr.Tid) 790 | f.uint64Cond(ev.a.SampleFormat.Time, &sr.Time) 791 | f.uint64Cond(ev.a.SampleFormat.Addr, &sr.Addr) 792 | f.uint64Cond(ev.a.SampleFormat.ID, &sr.ID) 793 | f.uint64Cond(ev.a.SampleFormat.StreamID, &sr.StreamID) 794 | 795 | // If we have a StreamID and it is different from our 796 | // own ID, then the output from the event we're interested 797 | // in was redirected to ev. We must switch to that event 798 | // in order to decode the sample. 799 | if ev.a.SampleFormat.StreamID { 800 | if sr.StreamID != ev.id { 801 | newev := ev.groupByID[sr.StreamID] 802 | if newev == nil { 803 | ev.resetRing() 804 | return ErrBadRecord 805 | } 806 | ev = newev 807 | } 808 | } 809 | 810 | var reserved uint32 811 | f.uint32Cond(ev.a.SampleFormat.CPU, &sr.CPU, &reserved) 812 | f.uint64Cond(ev.a.SampleFormat.Period, &sr.Period) 813 | if ev.a.SampleFormat.Count { 814 | f.count(&sr.Count, ev.a.CountFormat) 815 | } 816 | if ev.a.SampleFormat.Callchain { 817 | var nr uint64 818 | f.uint64(&nr) 819 | sr.Callchain = make([]uint64, nr) 820 | for i := 0; i < len(sr.Callchain); i++ { 821 | f.uint64(&sr.Callchain[i]) 822 | } 823 | } 824 | if ev.a.SampleFormat.Raw { 825 | f.uint32sizeBytes(&sr.Raw) 826 | } 827 | if ev.a.SampleFormat.BranchStack { 828 | var nr uint64 829 | f.uint64(&nr) 830 | sr.BranchStack = make([]BranchEntry, nr) 831 | for i := 0; i < len(sr.BranchStack); i++ { 832 | var from, to, entry uint64 833 | f.uint64(&from) 834 | f.uint64(&to) 835 | f.uint64(&entry) 836 | sr.BranchStack[i].decode(from, to, entry) 837 | } 838 | } 839 | if ev.a.SampleFormat.UserRegisters { 840 | f.uint64(&sr.UserRegisterABI) 841 | num := bits.OnesCount64(ev.a.SampleRegistersUser) 842 | sr.UserRegisters = make([]uint64, num) 843 | for i := 0; i < len(sr.UserRegisters); i++ { 844 | f.uint64(&sr.UserRegisters[i]) 845 | } 846 | } 847 | if ev.a.SampleFormat.UserStack { 848 | f.uint64sizeBytes(&sr.UserStack) 849 | if len(sr.UserStack) > 0 { 850 | f.uint64(&sr.UserStackDynamicSize) 851 | } 852 | } 853 | f.uint64Cond(ev.a.SampleFormat.Weight, &sr.Weight) 854 | if ev.a.SampleFormat.DataSource { 855 | var ds uint64 856 | f.uint64(&ds) 857 | sr.DataSource = DataSource(ds) 858 | } 859 | if ev.a.SampleFormat.Transaction { 860 | var tx uint64 861 | f.uint64(&tx) 862 | sr.Transaction = Transaction(tx) 863 | } 864 | if ev.a.SampleFormat.IntrRegisters { 865 | f.uint64(&sr.IntrRegisterABI) 866 | num := bits.OnesCount64(ev.a.SampleRegistersIntr) 867 | sr.IntrRegisters = make([]uint64, num) 868 | for i := 0; i < len(sr.IntrRegisters); i++ { 869 | f.uint64(&sr.IntrRegisters[i]) 870 | } 871 | } 872 | f.uint64Cond(ev.a.SampleFormat.PhysicalAddress, &sr.PhysicalAddress) 873 | return nil 874 | } 875 | 876 | // exactIPBit is PERF_RECORD_MISC_EXACT_IP 877 | const exactIPBit = 1 << 14 878 | 879 | // ExactIP indicates that sr.IP points to the actual instruction that 880 | // triggered the event. See also Options.PreciseIP. 881 | func (sr *SampleRecord) ExactIP() bool { 882 | return sr.RecordHeader.Misc&exactIPBit != 0 883 | } 884 | 885 | // SampleGroupRecord indicates a sample from an event group. 886 | // 887 | // All the fields up to and including Callchain represent ABI bits. All the 888 | // fields starting with Data are non-ABI and have no compatibility guarantees. 889 | // 890 | // Fields on SampleGroupRecord are set according to the RecordFormat the event 891 | // was configured with. A boolean flag in RecordFormat typically enables the 892 | // homonymous field in SampleGroupRecord. 893 | type SampleGroupRecord struct { 894 | RecordHeader 895 | Identifier uint64 896 | IP uint64 897 | Pid uint32 898 | Tid uint32 899 | Time uint64 900 | Addr uint64 901 | ID uint64 902 | StreamID uint64 903 | CPU uint32 904 | _ uint32 905 | Period uint64 906 | Count GroupCount 907 | Callchain []uint64 908 | 909 | Raw []byte 910 | BranchStack []BranchEntry 911 | UserRegisterABI uint64 912 | UserRegisters []uint64 913 | UserStack []byte 914 | UserStackDynamicSize uint64 915 | Weight uint64 916 | DataSource DataSource 917 | Transaction Transaction 918 | IntrRegisterABI uint64 919 | IntrRegisters []uint64 920 | PhysicalAddress uint64 921 | } 922 | 923 | // DecodeFrom implements the Record.DecodeFrom method. 924 | func (sr *SampleGroupRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 925 | sr.RecordHeader = raw.Header 926 | f := raw.fields() 927 | f.uint64Cond(ev.a.SampleFormat.Identifier, &sr.Identifier) 928 | f.uint64Cond(ev.a.SampleFormat.IP, &sr.IP) 929 | f.uint32Cond(ev.a.SampleFormat.Tid, &sr.Pid, &sr.Tid) 930 | f.uint64Cond(ev.a.SampleFormat.Time, &sr.Time) 931 | f.uint64Cond(ev.a.SampleFormat.Addr, &sr.Addr) 932 | f.uint64Cond(ev.a.SampleFormat.ID, &sr.ID) 933 | f.uint64Cond(ev.a.SampleFormat.StreamID, &sr.StreamID) 934 | 935 | // If we have a StreamID and it is different from our 936 | // own ID, then the output from the event we're interested 937 | // in was redirected to ev. We must switch to that event 938 | // in order to decode the sample. 939 | if ev.a.SampleFormat.StreamID { 940 | if sr.StreamID != ev.id { 941 | ev = ev.groupByID[sr.StreamID] 942 | } 943 | } 944 | 945 | var reserved uint32 946 | f.uint32Cond(ev.a.SampleFormat.CPU, &sr.CPU, &reserved) 947 | f.uint64Cond(ev.a.SampleFormat.Period, &sr.Period) 948 | if ev.a.SampleFormat.Count { 949 | f.groupCount(&sr.Count, ev.a.CountFormat) 950 | } 951 | if ev.a.SampleFormat.Callchain { 952 | var nr uint64 953 | f.uint64(&nr) 954 | sr.Callchain = make([]uint64, nr) 955 | for i := 0; i < len(sr.Callchain); i++ { 956 | f.uint64(&sr.Callchain[i]) 957 | } 958 | } 959 | if ev.a.SampleFormat.Raw { 960 | f.uint32sizeBytes(&sr.Raw) 961 | } 962 | if ev.a.SampleFormat.BranchStack { 963 | var nr uint64 964 | f.uint64(&nr) 965 | sr.BranchStack = make([]BranchEntry, nr) 966 | for i := 0; i < len(sr.BranchStack); i++ { 967 | var from, to, entry uint64 968 | f.uint64(&from) 969 | f.uint64(&to) 970 | f.uint64(&entry) 971 | sr.BranchStack[i].decode(from, to, entry) 972 | } 973 | } 974 | if ev.a.SampleFormat.UserRegisters { 975 | f.uint64(&sr.UserRegisterABI) 976 | num := bits.OnesCount64(ev.a.SampleRegistersUser) 977 | sr.UserRegisters = make([]uint64, num) 978 | for i := 0; i < len(sr.UserRegisters); i++ { 979 | f.uint64(&sr.UserRegisters[i]) 980 | } 981 | } 982 | if ev.a.SampleFormat.UserStack { 983 | f.uint64sizeBytes(&sr.UserStack) 984 | if len(sr.UserStack) > 0 { 985 | f.uint64(&sr.UserStackDynamicSize) 986 | } 987 | } 988 | f.uint64Cond(ev.a.SampleFormat.Weight, &sr.Weight) 989 | if ev.a.SampleFormat.DataSource { 990 | var ds uint64 991 | f.uint64(&ds) 992 | sr.DataSource = DataSource(ds) 993 | } 994 | if ev.a.SampleFormat.Transaction { 995 | var tx uint64 996 | f.uint64(&tx) 997 | sr.Transaction = Transaction(tx) 998 | } 999 | if ev.a.SampleFormat.IntrRegisters { 1000 | f.uint64(&sr.IntrRegisterABI) 1001 | num := bits.OnesCount64(ev.a.SampleRegistersIntr) 1002 | sr.IntrRegisters = make([]uint64, num) 1003 | for i := 0; i < len(sr.IntrRegisters); i++ { 1004 | f.uint64(&sr.IntrRegisters[i]) 1005 | } 1006 | } 1007 | f.uint64Cond(ev.a.SampleFormat.PhysicalAddress, &sr.PhysicalAddress) 1008 | return nil 1009 | } 1010 | 1011 | // ExactIP indicates that sr.IP points to the actual instruction that 1012 | // triggered the event. See also Options.PreciseIP. 1013 | func (sr *SampleGroupRecord) ExactIP() bool { 1014 | return sr.RecordHeader.Misc&exactIPBit != 0 1015 | } 1016 | 1017 | // BranchEntry is a sampled branch. 1018 | type BranchEntry struct { 1019 | From uint64 1020 | To uint64 1021 | Mispredicted bool 1022 | Predicted bool 1023 | InTransaction bool 1024 | TransactionAbort bool 1025 | Cycles uint16 1026 | BranchType BranchType 1027 | } 1028 | 1029 | func (be *BranchEntry) decode(from, to, entry uint64) { 1030 | *be = BranchEntry{ 1031 | From: from, 1032 | To: to, 1033 | Mispredicted: entry&(1<<0) != 0, 1034 | Predicted: entry&(1<<1) != 0, 1035 | InTransaction: entry&(1<<2) != 0, 1036 | TransactionAbort: entry&(1<<3) != 0, 1037 | Cycles: uint16((entry << 44) >> 48), 1038 | BranchType: BranchType((entry << 40) >> 44), 1039 | } 1040 | } 1041 | 1042 | // BranchType classifies a BranchEntry. 1043 | type BranchType uint8 1044 | 1045 | // Branch classifications. 1046 | const ( 1047 | BranchTypeUnknown BranchType = iota 1048 | BranchTypeConditional 1049 | BranchTypeUnconditional 1050 | BranchTypeIndirect 1051 | BranchTypeCall 1052 | BranchTypeIndirectCall 1053 | BranchTypeReturn 1054 | BranchTypeSyscall 1055 | BranchTypeSyscallReturn 1056 | BranchTypeConditionalCall 1057 | BranchTypeConditionalReturn 1058 | ) 1059 | 1060 | // Mmap2Record (PERF_RECORD_MMAP2) includes extended information on mmap(2) 1061 | // calls returning executable mappings. It is similar to MmapRecord, but 1062 | // includes extra values, allowing unique identification of shared mappings. 1063 | type Mmap2Record struct { 1064 | RecordHeader 1065 | Pid uint32 // process ID 1066 | Tid uint32 // thread ID 1067 | Addr uint64 // address of the allocated memory 1068 | Len uint64 // length of the allocated memory 1069 | PageOffset uint64 // page offset of the allocated memory 1070 | MajorID uint32 // major ID of the underlying device 1071 | MinorID uint32 // minor ID of the underlying device 1072 | Inode uint64 // inode number 1073 | InodeGeneration uint64 // inode generation 1074 | Prot uint32 // protection information 1075 | Flags uint32 // flags information 1076 | Filename string // describes the backing of the allocated memory 1077 | SampleID 1078 | } 1079 | 1080 | // DecodeFrom implements the Record.DecodeFrom method. 1081 | func (mr *Mmap2Record) DecodeFrom(raw *RawRecord, ev *Event) error { 1082 | mr.RecordHeader = raw.Header 1083 | f := raw.fields() 1084 | f.uint32(&mr.Pid, &mr.Tid) 1085 | f.uint64(&mr.Addr) 1086 | f.uint64(&mr.Len) 1087 | f.uint64(&mr.PageOffset) 1088 | f.uint32(&mr.MajorID, &mr.MinorID) 1089 | f.uint64(&mr.Inode) 1090 | f.uint64(&mr.InodeGeneration) 1091 | f.uint32(&mr.Prot, &mr.Flags) 1092 | f.string(&mr.Filename) 1093 | f.idCond(ev.a.Options.SampleIDAll, &mr.SampleID, ev.a.SampleFormat) 1094 | return nil 1095 | } 1096 | 1097 | // Executable returns a boolean indicating whether the mapping is executable. 1098 | func (mr *Mmap2Record) Executable() bool { 1099 | // The data bit is set when the mapping is _not_ executable. 1100 | return mr.RecordHeader.Misc&mmapDataBit == 0 1101 | } 1102 | 1103 | // AuxRecord (PERF_RECORD_AUX) reports that new data is available in the 1104 | // AUX buffer region. 1105 | type AuxRecord struct { 1106 | RecordHeader 1107 | Offset uint64 // offset in the AUX mmap region where the new data begins 1108 | Size uint64 // size of data made available 1109 | Flags AuxFlag // describes the update 1110 | SampleID 1111 | } 1112 | 1113 | // AuxFlag describes an update to a record in the AUX buffer region. 1114 | type AuxFlag uint64 1115 | 1116 | // AuxFlag bits. 1117 | const ( 1118 | AuxTruncated AuxFlag = 0x01 // record was truncated to fit 1119 | AuxOverwrite AuxFlag = 0x02 // snapshot from overwrite mode 1120 | AuxPartial AuxFlag = 0x04 // record contains gaps 1121 | AuxCollision AuxFlag = 0x08 // sample collided with another 1122 | ) 1123 | 1124 | // DecodeFrom implements the Record.DecodeFrom method. 1125 | func (ar *AuxRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 1126 | ar.RecordHeader = raw.Header 1127 | f := raw.fields() 1128 | f.uint64(&ar.Offset) 1129 | f.uint64(&ar.Size) 1130 | var flag uint64 1131 | f.uint64(&flag) 1132 | ar.Flags = AuxFlag(flag) 1133 | f.idCond(ev.a.Options.SampleIDAll, &ar.SampleID, ev.a.SampleFormat) 1134 | return nil 1135 | } 1136 | 1137 | // ItraceStartRecord (PERF_RECORD_ITRACE_START) indicates which process 1138 | // has initiated an instruction trace event, allowing tools to correlate 1139 | // instruction addresses in the AUX buffer with the proper executable. 1140 | type ItraceStartRecord struct { 1141 | RecordHeader 1142 | Pid uint32 // process ID of the thread starting an instruction trace 1143 | Tid uint32 // thread ID of the thread starting an instruction trace 1144 | SampleID 1145 | } 1146 | 1147 | // DecodeFrom implements the Record.DecodeFrom method. 1148 | func (ir *ItraceStartRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 1149 | ir.RecordHeader = raw.Header 1150 | f := raw.fields() 1151 | f.uint32(&ir.Pid, &ir.Tid) 1152 | f.idCond(ev.a.Options.SampleIDAll, &ir.SampleID, ev.a.SampleFormat) 1153 | return nil 1154 | } 1155 | 1156 | // LostSamplesRecord (PERF_RECORD_LOST_SAMPLES) indicates some number of 1157 | // samples that may have been lost, when using hardware sampling such as 1158 | // Intel PEBS. 1159 | type LostSamplesRecord struct { 1160 | RecordHeader 1161 | Lost uint64 // the number of potentially lost samples 1162 | SampleID 1163 | } 1164 | 1165 | // DecodeFrom implements the Record.DecodeFrom method. 1166 | func (lr *LostSamplesRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 1167 | lr.RecordHeader = raw.Header 1168 | f := raw.fields() 1169 | f.uint64(&lr.Lost) 1170 | f.idCond(ev.a.Options.SampleIDAll, &lr.SampleID, ev.a.SampleFormat) 1171 | return nil 1172 | } 1173 | 1174 | // SwitchRecord (PERF_RECORD_SWITCH) indicates that a context switch has 1175 | // happened. 1176 | type SwitchRecord struct { 1177 | RecordHeader 1178 | SampleID 1179 | } 1180 | 1181 | // DecodeFrom implements the Record.DecodeFrom method. 1182 | func (sr *SwitchRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 1183 | sr.RecordHeader = raw.Header 1184 | f := raw.fields() 1185 | f.idCond(ev.a.Options.SampleIDAll, &sr.SampleID, ev.a.SampleFormat) 1186 | return nil 1187 | } 1188 | 1189 | // switchOutBit is PERF_RECORD_MISC_SWITCH_OUT 1190 | const switchOutBit = 1 << 13 1191 | 1192 | // switchOutPreemptBit is PERF_RECORD_MISC_SWITCH_OUT_PREEMPT 1193 | const switchOutPreemptBit = 1 << 14 1194 | 1195 | // Out returns a boolean indicating whether the context switch was 1196 | // out of the current process, or into the current process. 1197 | func (sr *SwitchRecord) Out() bool { 1198 | return sr.RecordHeader.Misc&switchOutBit != 0 1199 | } 1200 | 1201 | // Preempted indicates whether the thread was preempted in TASK_RUNNING state. 1202 | func (sr *SwitchRecord) Preempted() bool { 1203 | return sr.RecordHeader.Misc&switchOutPreemptBit != 0 1204 | } 1205 | 1206 | // SwitchCPUWideRecord (PERF_RECORD_SWITCH_CPU_WIDE) indicates a context 1207 | // switch, but only occurs when sampling in CPU-wide mode. It provides 1208 | // information on the process being switched to / from. 1209 | type SwitchCPUWideRecord struct { 1210 | RecordHeader 1211 | Pid uint32 1212 | Tid uint32 1213 | SampleID 1214 | } 1215 | 1216 | // DecodeFrom implements the Record.DecodeFrom method. 1217 | func (sr *SwitchCPUWideRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 1218 | sr.RecordHeader = raw.Header 1219 | f := raw.fields() 1220 | f.uint32(&sr.Pid, &sr.Tid) 1221 | f.idCond(ev.a.Options.SampleIDAll, &sr.SampleID, ev.a.SampleFormat) 1222 | return nil 1223 | } 1224 | 1225 | // Out returns a boolean indicating whether the context switch was 1226 | // out of the current process, or into the current process. 1227 | func (sr *SwitchCPUWideRecord) Out() bool { 1228 | return sr.RecordHeader.Misc&switchOutBit != 0 1229 | } 1230 | 1231 | // Preempted indicates whether the thread was preempted in TASK_RUNNING state. 1232 | func (sr *SwitchCPUWideRecord) Preempted() bool { 1233 | return sr.RecordHeader.Misc&switchOutPreemptBit != 0 1234 | } 1235 | 1236 | // NamespacesRecord (PERF_RECORD_NAMESPACES) describes the namespaces of a 1237 | // process when it is created. 1238 | type NamespacesRecord struct { 1239 | RecordHeader 1240 | Pid uint32 1241 | Tid uint32 1242 | Namespaces []struct { 1243 | Dev uint64 1244 | Inode uint64 1245 | } 1246 | SampleID 1247 | } 1248 | 1249 | // TODO(acln): check out *_NS_INDEX in perf_event.h 1250 | 1251 | // DecodeFrom implements the Record.DecodeFrom method. 1252 | func (nr *NamespacesRecord) DecodeFrom(raw *RawRecord, ev *Event) error { 1253 | nr.RecordHeader = raw.Header 1254 | f := raw.fields() 1255 | f.uint32(&nr.Pid, &nr.Tid) 1256 | var num uint64 1257 | f.uint64(&num) 1258 | nr.Namespaces = make([]struct{ Dev, Inode uint64 }, num) 1259 | for i := 0; i < int(num); i++ { 1260 | f.uint64(&nr.Namespaces[i].Dev) 1261 | f.uint64(&nr.Namespaces[i].Inode) 1262 | } 1263 | f.idCond(ev.a.Options.SampleIDAll, &nr.SampleID, ev.a.SampleFormat) 1264 | return nil 1265 | } 1266 | 1267 | // Skid is an instruction pointer skid constraint. 1268 | type Skid int 1269 | 1270 | // Supported Skid settings. 1271 | const ( 1272 | CanHaveArbitrarySkid Skid = 0 1273 | MustHaveConstantSkid Skid = 1 1274 | RequestedZeroSkid Skid = 2 1275 | MustHaveZeroSkid Skid = 3 1276 | ) 1277 | 1278 | // BranchSampleFormat specifies what branches to include in a branch record. 1279 | type BranchSampleFormat struct { 1280 | Privilege BranchSamplePrivilege 1281 | Sample BranchSample 1282 | } 1283 | 1284 | func (b BranchSampleFormat) marshal() uint64 { 1285 | return uint64(b.Privilege) | uint64(b.Sample) 1286 | } 1287 | 1288 | // BranchSamplePrivilege speifies a branch sample privilege level. If a 1289 | // level is not set explicitly, the kernel will use the event's privilege 1290 | // level. Event and branch privilege levels do not have to match. 1291 | type BranchSamplePrivilege uint64 1292 | 1293 | // Branch sample privilege values. Values should be |-ed together. 1294 | const ( 1295 | BranchPrivilegeUser BranchSamplePrivilege = unix.PERF_SAMPLE_BRANCH_USER 1296 | BranchPrivilegeKernel BranchSamplePrivilege = unix.PERF_SAMPLE_BRANCH_KERNEL 1297 | BranchPrivilegeHypervisor BranchSamplePrivilege = unix.PERF_SAMPLE_BRANCH_HV 1298 | ) 1299 | 1300 | // BranchSample specifies a type of branch to sample. 1301 | type BranchSample uint64 1302 | 1303 | // Branch sample bits. Values should be |-ed together. 1304 | const ( 1305 | BranchSampleAny BranchSample = unix.PERF_SAMPLE_BRANCH_ANY 1306 | BranchSampleAnyCall BranchSample = unix.PERF_SAMPLE_BRANCH_ANY_CALL 1307 | BranchSampleAnyReturn BranchSample = unix.PERF_SAMPLE_BRANCH_ANY_RETURN 1308 | BranchSampleIndirectCall BranchSample = unix.PERF_SAMPLE_BRANCH_IND_CALL 1309 | BranchSampleAbortTransaction BranchSample = unix.PERF_SAMPLE_BRANCH_ABORT_TX 1310 | BranchSampleInTransaction BranchSample = unix.PERF_SAMPLE_BRANCH_IN_TX 1311 | BranchSampleNoTransaction BranchSample = unix.PERF_SAMPLE_BRANCH_NO_TX 1312 | BranchSampleCond BranchSample = unix.PERF_SAMPLE_BRANCH_COND 1313 | BranchSampleCallStack BranchSample = unix.PERF_SAMPLE_BRANCH_CALL_STACK 1314 | BranchSampleIndirectJump BranchSample = unix.PERF_SAMPLE_BRANCH_IND_JUMP 1315 | BranchSampleCall BranchSample = unix.PERF_SAMPLE_BRANCH_CALL 1316 | BranchSampleNoFlags BranchSample = unix.PERF_SAMPLE_BRANCH_NO_FLAGS 1317 | BranchSampleNoCycles BranchSample = unix.PERF_SAMPLE_BRANCH_NO_CYCLES 1318 | BranchSampleSave BranchSample = unix.PERF_SAMPLE_BRANCH_TYPE_SAVE 1319 | ) 1320 | 1321 | // DataSource records where in the memory hierarchy the data associated with 1322 | // a sampled instruction came from. 1323 | type DataSource uint64 1324 | 1325 | // MemOp returns the recorded memory operation. 1326 | func (ds DataSource) MemOp() MemOp { 1327 | return MemOp(ds >> memOpShift) 1328 | } 1329 | 1330 | // MemLevel returns the recorded memory level. 1331 | func (ds DataSource) MemLevel() MemLevel { 1332 | return MemLevel(ds >> memLevelShift) 1333 | } 1334 | 1335 | // MemRemote returns the recorded remote bit. 1336 | func (ds DataSource) MemRemote() MemRemote { 1337 | return MemRemote(ds >> memRemoteShift) 1338 | } 1339 | 1340 | // MemLevelNumber returns the recorded memory level number. 1341 | func (ds DataSource) MemLevelNumber() MemLevelNumber { 1342 | return MemLevelNumber(ds >> memLevelNumberShift) 1343 | } 1344 | 1345 | // MemSnoopMode returns the recorded memory snoop mode. 1346 | func (ds DataSource) MemSnoopMode() MemSnoopMode { 1347 | return MemSnoopMode(ds >> memSnoopModeShift) 1348 | } 1349 | 1350 | // MemSnoopModeX returns the recorded extended memory snoop mode. 1351 | func (ds DataSource) MemSnoopModeX() MemSnoopModeX { 1352 | return MemSnoopModeX(ds >> memSnoopModeXShift) 1353 | } 1354 | 1355 | // MemLock returns the recorded memory lock mode. 1356 | func (ds DataSource) MemLock() MemLock { 1357 | return MemLock(ds >> memLockShift) 1358 | } 1359 | 1360 | // MemTLB returns the recorded TLB access mode. 1361 | func (ds DataSource) MemTLB() MemTLB { 1362 | return MemTLB(ds >> memTLBShift) 1363 | } 1364 | 1365 | // MemOp is a memory operation. 1366 | type MemOp uint8 1367 | 1368 | // MemOp flag bits. 1369 | const ( 1370 | MemOpNA MemOp = 1 << iota 1371 | MemOpLoad 1372 | MemOpStore 1373 | MemOpPrefetch 1374 | MemOpExec 1375 | 1376 | memOpShift = 0 1377 | ) 1378 | 1379 | // MemLevel is a memory level. 1380 | type MemLevel uint32 1381 | 1382 | // MemLevel flag bits. 1383 | const ( 1384 | MemLevelNA MemLevel = 1 << iota 1385 | MemLevelHit 1386 | MemLevelMiss 1387 | MemLevelL1 1388 | MemLevelLFB 1389 | MemLevelL2 1390 | MemLevelL3 1391 | MemLevelLocalDRAM 1392 | MemLevelRemoteDRAM1 1393 | MemLevelRemoteDRAM2 1394 | MemLevelRemoteCache1 1395 | MemLevelRemoteCache2 1396 | MemLevelIO 1397 | MemLevelUncached 1398 | 1399 | memLevelShift = 5 1400 | ) 1401 | 1402 | // MemRemote indicates whether remote memory was accessed. 1403 | type MemRemote uint8 1404 | 1405 | // MemRemote flag bits. 1406 | const ( 1407 | MemRemoteRemote MemRemote = 1 << iota 1408 | 1409 | memRemoteShift = 37 1410 | ) 1411 | 1412 | // MemLevelNumber is a memory level number. 1413 | type MemLevelNumber uint8 1414 | 1415 | // MemLevelNumber flag bits. 1416 | const ( 1417 | MemLevelNumberL1 MemLevelNumber = iota 1418 | MemLevelNumberL2 1419 | MemLevelNumberL3 1420 | MemLevelNumberL4 1421 | 1422 | MemLevelNumberAnyCache MemLevelNumber = iota + 0x0b 1423 | MemLevelNumberLFB 1424 | MemLevelNumberRAM 1425 | MemLevelNumberPMem 1426 | MemLevelNumberNA 1427 | 1428 | memLevelNumberShift = 33 1429 | ) 1430 | 1431 | // MemSnoopMode is a memory snoop mode. 1432 | type MemSnoopMode uint8 1433 | 1434 | // MemSnoopMode flag bits. 1435 | const ( 1436 | MemSnoopModeNA MemSnoopMode = 1 << iota 1437 | MemSnoopModeNone 1438 | MemSnoopModeHit 1439 | MemSnoopModeMiss 1440 | MemSnoopModeHitModified 1441 | 1442 | memSnoopModeShift = 19 1443 | ) 1444 | 1445 | // MemSnoopModeX is an extended memory snoop mode. 1446 | type MemSnoopModeX uint8 1447 | 1448 | // MemSnoopModeX flag bits. 1449 | const ( 1450 | MemSnoopModeXForward MemSnoopModeX = 0x01 // forward 1451 | 1452 | memSnoopModeXShift = 37 1453 | ) 1454 | 1455 | // MemLock is a memory locking mode. 1456 | type MemLock uint8 1457 | 1458 | // MemLock flag bits. 1459 | const ( 1460 | MemLockNA MemLock = 1 << iota // not available 1461 | MemLockLocked // locked transaction 1462 | 1463 | memLockShift = 24 1464 | ) 1465 | 1466 | // MemTLB is a TLB access mode. 1467 | type MemTLB uint8 1468 | 1469 | // MemTLB flag bits. 1470 | const ( 1471 | MemTLBNA MemTLB = 1 << iota // not available 1472 | MemTLBHit // hit level 1473 | MemTLBMiss // miss level 1474 | MemTLBL1 1475 | MemTLBL2 1476 | MemTLBWK // Hardware Walker 1477 | MemTLBOS // OS fault handler 1478 | 1479 | memTLBShift = 26 1480 | ) 1481 | 1482 | // Transaction describes a transactional memory abort. 1483 | type Transaction uint64 1484 | 1485 | // Transaction bits: values should be &-ed with Transaction values. 1486 | const ( 1487 | // Transaction Elision indicates an abort from an elision type 1488 | // transaction (Intel CPU specific). 1489 | TransactionElision Transaction = 1 << iota 1490 | 1491 | // TransactionGeneric indicates an abort from a generic transaction. 1492 | TransactionGeneric 1493 | 1494 | // TransactionSync indicates a synchronous abort (related to the 1495 | // reported instruction). 1496 | TransactionSync 1497 | 1498 | // TransactionAsync indicates an asynchronous abort (unrelated to 1499 | // the reported instruction). 1500 | TransactionAsync 1501 | 1502 | // TransactionRetryable indicates whether retrying the transaction 1503 | // may have succeeded. 1504 | TransactionRetryable 1505 | 1506 | // TransactionConflict indicates an abort rue to memory conflicts 1507 | // with other threads. 1508 | TransactionConflict 1509 | 1510 | // TransactionWriteCapacity indicates an abort due to write capacity 1511 | // overflow. 1512 | TransactionWriteCapacity 1513 | 1514 | // TransactionReadCapacity indicates an abort due to read capacity 1515 | // overflow. 1516 | TransactionReadCapacity 1517 | ) 1518 | 1519 | // txnAbortMask is PERF_TXN_ABORT_MASK 1520 | const txnAbortMask = 0xffffffff 1521 | 1522 | // txnAbortShift is PERF_TXN_ABORT_SHIFT 1523 | const txnAbortShift = 32 1524 | 1525 | // UserAbortCode returns the user-specified abort code associated with 1526 | // the transaction. 1527 | func (txn Transaction) UserAbortCode() uint32 { 1528 | return uint32((txn >> txnAbortShift) & txnAbortMask) 1529 | } 1530 | 1531 | // TODO(acln): the latter part of this file is full of constants added 1532 | // ad-hoc, which use iota. These should probably be added to x/sys/unix 1533 | // instead, and used from there. 1534 | --------------------------------------------------------------------------------