├── go.mod
├── go.sum
├── LICENSE
├── doc.go
├── README.md
├── group.go
├── perf_example_test.go
├── group_test.go
├── record_amd64_test.go
├── count.go
├── perf_test.go
├── count_test.go
├── record_test.go
├── perf.go
└── record.go


/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/elastic/go-perf
2 | 
3 | go 1.18
4 | 
5 | require golang.org/x/sys v0.26.0
6 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
2 | golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009 The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | /*
 6 | Package perf provides access to the Linux perf API.
 7 | 
 8 | Counting events
 9 | 
10 | A Group represents a set of perf events measured together.
11 | 
12 | 	var g perf.Group
13 | 	g.Add(perf.Instructions, perf.CPUCycles)
14 | 
15 | 	hw, err := g.Open(targetpid, perf.AnyCPU)
16 | 	// ...
17 | 	gc, err := hw.MeasureGroup(func() { ... })
18 | 
19 | Attr configures an individual event.
20 | 
21 | 	fa := &perf.Attr{
22 | 		CountFormat: perf.CountFormat{
23 | 			Running: true,
24 | 			ID:      true,
25 | 		},
26 | 	}
27 | 	perf.PageFaults.Configure(fa)
28 | 
29 | 	faults, err := perf.Open(fa, perf.CallingThread, perf.AnyCPU, nil)
30 | 	// ...
31 | 	c, err := faults.Measure(func() { ... })
32 | 
33 | Sampling events
34 | 
35 | Overflow records are available once the MapRing method on Event is called:
36 | 
37 | 	var ev perf.Event // initialized previously
38 | 
39 | 	ev.MapRing()
40 | 
41 | 	ev.Enable()
42 | 
43 | 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
44 | 	defer cancel()
45 | 
46 | 	for {
47 | 		rec, err := ev.ReadRecord(ctx)
48 | 		// process rec
49 | 	}
50 | 
51 | Tracepoints are also supported:
52 | 
53 | 	wa := &perf.Attr{
54 | 		SampleFormat: perf.SampleFormat{
55 | 			Pid: true,
56 | 			Tid: true,
57 | 			IP:  true,
58 | 		},
59 | 	}
60 | 	wa.SetSamplePeriod(1)
61 | 	wa.SetWakeupEvents(1)
62 | 	wtp := perf.Tracepoint("syscalls", "sys_enter_write")
63 | 	wtp.Configure(wa)
64 | 
65 | 	writes, err := perf.Open(wa, targetpid, perf.AnyCPU, nil)
66 | 	// ...
67 | 	c, err := writes.Measure(func() { ... })
68 | 	// ...
69 | 	fmt.Printf("saw %d writes\n", c.Value)
70 | 
71 | 	rec, err := writes.ReadRecord(ctx)
72 | 	// ...
73 | 	sr, ok := rec.(*perf.SampleRecord)
74 | 	// ...
75 | 	fmt.Printf("pid = %d, tid = %d\n", sr.Pid, sr.Tid)
76 | 
77 | For more detailed information, see the examples, and man 2 perf_event_open.
78 | 
79 | NOTE: this package is experimental and does not yet offer compatibility
80 | guarantees.
81 | */
82 | package perf
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ----
 2 | 
 3 | This is a clone of the `golang.org/x/sys/unix/linux/perf` submitted by
 4 | [acln0](https://github.com/acln0) to review at
 5 | https://go-review.googlesource.com/c/sys/+/168059
 6 | 
 7 | An alternative working tree for this package can also be found
 8 | at https://github.com/acln0/perf
 9 | 
10 | This Elastic fork contains bugfixes and features necessary for
11 | our KProbes implementation.
12 | 
13 | ----
14 | 
15 | `perf` API client package for Linux. See `man 2 perf_event_open` and
16 | `include/uapi/linux/perf_event.h`.
17 | 
18 | This package is in its early stages. The API is still under discussion:
19 | it may change at any moment, without prior notice. Furthermore,
20 | this document may not be completely up to date at all times.
21 | 
22 | 
23 | Testing
24 | =======
25 | 
26 | Many of the things package perf does require elevated privileges on
27 | most systems. We would very much like for the tests to not require
28 | root to run. Because of this, we use a fairly specific testing model,
29 | described next.
30 | 
31 | If the host kernel does not support `perf_event_open(2)` (i.e. if
32 | the `/proc/sys/kernel/perf_event_paranoid` file is not present),
33 | then tests fail immediately with an error message.
34 | 
35 | Tests are designed in such a way that they are skipped if their
36 | requirements are not met by the underlying system. We would like the
37 | test suite to degrade gracefully, under certain circumstances.
38 | 
39 | For example, when running Linux in a virtualized environment, various
40 | hardware PMUs might not be available. In such situations, we would like
41 | the test suite to continue running. For this purpose, we introduce the
42 | mechanism described next.
43 | 
44 | Requirements for a test are specified by invoking the `requires`
45 | function, at the beginning of a test function. All tests that call
46 | `perf_event_open` must specify requirements this way. Currently,
47 | we use three kinds of requirements:
48 | 
49 | * `perf_event_paranoid` values
50 | 
51 | * the existence of various PMUs (e.g. "cpu", "software", "tracepoint")
52 | 
53 | * tracefs is mounted, and readable
54 | 
55 | Today, setting `perf_event_paranoid=1` and having a readable tracefs
56 | mounted at `/sys/kernel/debug/tracing` enables most of the tests.
57 | A select few require `perf_event_paranoid=0`. If the test process
58 | is running with `CAP_SYS_ADMIN`, `perf_event_paranoid` requirements
59 | are ignored, since they are considered fulfilled. The test process
60 | does not attempt to see if it is running as root, it only checks
61 | `CAP_SYS_ADMIN`.
62 | 
63 | If you find a test that, when ran without elevated privileges,
64 | fails with something akin to a permissions error, then it means the
65 | requirements for the test were not specified precisely. Please file
66 | a bug. Extending the test suite and making these requirements more
67 | precise is an ongoing process.
68 | 


--------------------------------------------------------------------------------
/group.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // +build linux
  6 | 
  7 | package perf
  8 | 
  9 | import (
 10 | 	"errors"
 11 | 	"fmt"
 12 | )
 13 | 
 14 | // Group configures a group of events.
 15 | type Group struct {
 16 | 	// CountFormat configures the format of counts read from the event
 17 | 	// leader. The Group option is set automatically.
 18 | 	CountFormat CountFormat
 19 | 
 20 | 	// Options configures options for all events in the group.
 21 | 	Options Options
 22 | 
 23 | 	// ClockID configures the clock for samples in the group.
 24 | 	ClockID int32
 25 | 
 26 | 	err             error // sticky configuration error
 27 | 	attrs           []*Attr
 28 | 	leaderNeedsRing bool
 29 | }
 30 | 
 31 | // TODO(acln): what other fields belong on Group? SampleFormat perhaps?
 32 | 
 33 | // Add adds events to the group, as configured by cfgs.
 34 | //
 35 | // For each Configurator, a new *Attr is created, the group-specific settings
 36 | // are applied, then Configure is called on the *Attr to produce the final
 37 | // event attributes.
 38 | func (g *Group) Add(cfgs ...Configurator) {
 39 | 	for _, cfg := range cfgs {
 40 | 		g.add(cfg)
 41 | 	}
 42 | }
 43 | 
 44 | func (g *Group) add(cfg Configurator) {
 45 | 	if g.err != nil {
 46 | 		return
 47 | 	}
 48 | 	a := new(Attr)
 49 | 	a.CountFormat = g.CountFormat
 50 | 	a.Options = g.Options
 51 | 	a.ClockID = g.ClockID
 52 | 	err := cfg.Configure(a)
 53 | 	if err != nil {
 54 | 		g.err = err
 55 | 		return
 56 | 	}
 57 | 	if a.Sample != 0 {
 58 | 		g.leaderNeedsRing = true
 59 | 	}
 60 | 	g.attrs = append(g.attrs, a)
 61 | }
 62 | 
 63 | // Open opens all the events in the group, and returns their leader.
 64 | //
 65 | // The returned Event controls the entire group. Callers must use the
 66 | // ReadGroupCount method when reading counters from it. Closing it closes
 67 | // the entire group.
 68 | func (g *Group) Open(pid int, cpu int) (*Event, error) {
 69 | 	if len(g.attrs) == 0 {
 70 | 		return nil, errors.New("perf: empty event group")
 71 | 	}
 72 | 	if g.err != nil {
 73 | 		return nil, fmt.Errorf("perf: configuration error: %v", g.err)
 74 | 	}
 75 | 	leaderattr := g.attrs[0]
 76 | 	leaderattr.CountFormat.Group = true
 77 | 	leader, err := Open(leaderattr, pid, cpu, nil)
 78 | 	if err != nil {
 79 | 		return nil, fmt.Errorf("perf: failed to open event leader: %v", err)
 80 | 	}
 81 | 	if len(g.attrs) < 2 {
 82 | 		return leader, nil
 83 | 	}
 84 | 	if g.leaderNeedsRing {
 85 | 		if err := leader.MapRing(); err != nil {
 86 | 			return nil, fmt.Errorf("perf: failed to map leader ring: %v", err)
 87 | 		}
 88 | 	}
 89 | 	for idx, attr := range g.attrs[1:] {
 90 | 		follower, err := Open(attr, pid, cpu, leader)
 91 | 		if err != nil {
 92 | 			leader.Close()
 93 | 			return nil, fmt.Errorf("perf: failed to open group event #%d (%q): %v", idx, attr.Label, err)
 94 | 		}
 95 | 		leader.owned = append(leader.owned, follower)
 96 | 		if attr.Sample != 0 {
 97 | 			if err := follower.SetOutput(leader); err != nil {
 98 | 				leader.Close()
 99 | 				return nil, fmt.Errorf("perf: failed to route follower %q output to leader %q (pid %d on CPU %d)", attr.Label, leaderattr.Label, pid, cpu)
100 | 			}
101 | 		}
102 | 	}
103 | 	return leader, nil
104 | }
105 | 
106 | // A Configurator configures event attributes. Implementations should only
107 | // set the fields they need. See (*Group).Add for more details.
108 | type Configurator interface {
109 | 	Configure(attr *Attr) error
110 | }
111 | 
112 | type configuratorFunc func(attr *Attr) error
113 | 
114 | func (cf configuratorFunc) Configure(attr *Attr) error { return cf(attr) }
115 | 


--------------------------------------------------------------------------------
/perf_example_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // +build linux
  6 | 
  7 | package perf_test
  8 | 
  9 | import (
 10 | 	"context"
 11 | 	"fmt"
 12 | 	"log"
 13 | 	"runtime"
 14 | 
 15 | 	"golang.org/x/sys/unix"
 16 | 
 17 | 	"github.com/elastic/go-perf"
 18 | )
 19 | 
 20 | func ExampleHardwareCounter_iPC() {
 21 | 	g := perf.Group{
 22 | 		CountFormat: perf.CountFormat{
 23 | 			Running: true,
 24 | 		},
 25 | 	}
 26 | 	g.Add(perf.Instructions, perf.CPUCycles)
 27 | 
 28 | 	runtime.LockOSThread()
 29 | 	defer runtime.UnlockOSThread()
 30 | 
 31 | 	ipc, err := g.Open(perf.CallingThread, perf.AnyCPU)
 32 | 	if err != nil {
 33 | 		log.Fatal(err)
 34 | 	}
 35 | 	defer ipc.Close()
 36 | 
 37 | 	sum := 0
 38 | 	gc, err := ipc.MeasureGroup(func() {
 39 | 		for i := 0; i < 100000; i++ {
 40 | 			sum += i
 41 | 		}
 42 | 	})
 43 | 	if err != nil {
 44 | 		log.Fatal(err)
 45 | 	}
 46 | 
 47 | 	insns, cycles := gc.Values[0].Value, gc.Values[1].Value
 48 | 
 49 | 	fmt.Printf("got sum = %d in %v: %d instructions, %d CPU cycles: %f IPC",
 50 | 		sum, gc.Running, insns, cycles, float64(insns)/float64(cycles))
 51 | }
 52 | 
 53 | func ExampleSoftwareCounter_pageFaults() {
 54 | 	pfa := new(perf.Attr)
 55 | 	perf.PageFaults.Configure(pfa)
 56 | 
 57 | 	runtime.LockOSThread()
 58 | 	defer runtime.UnlockOSThread()
 59 | 
 60 | 	faults, err := perf.Open(pfa, perf.CallingThread, perf.AnyCPU, nil)
 61 | 	if err != nil {
 62 | 		log.Fatal(err)
 63 | 	}
 64 | 	defer faults.Close()
 65 | 
 66 | 	var mem []byte
 67 | 	const (
 68 | 		size = 64 * 1024 * 1024
 69 | 		pos  = 63 * 1024 * 1024
 70 | 	)
 71 | 	c, err := faults.Measure(func() {
 72 | 		mem = make([]byte, size)
 73 | 		mem[pos] = 42
 74 | 	})
 75 | 	if err != nil {
 76 | 		log.Fatal(err)
 77 | 	}
 78 | 	fmt.Printf("saw %d page faults, wrote value %d", c.Value, mem[pos])
 79 | }
 80 | 
 81 | func ExampleTracepoint_getpid() {
 82 | 	ga := new(perf.Attr)
 83 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 84 | 	if err := gtp.Configure(ga); err != nil {
 85 | 		log.Fatal(err)
 86 | 	}
 87 | 
 88 | 	runtime.LockOSThread()
 89 | 	defer runtime.UnlockOSThread()
 90 | 
 91 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
 92 | 	if err != nil {
 93 | 		log.Fatal(err)
 94 | 	}
 95 | 	defer getpid.Close()
 96 | 
 97 | 	unix.Getpid() // does not count towards the measurement
 98 | 
 99 | 	c, err := getpid.Measure(func() {
100 | 		unix.Getpid()
101 | 		unix.Getpid()
102 | 		unix.Getpid()
103 | 	})
104 | 	if err != nil {
105 | 		log.Fatal(err)
106 | 	}
107 | 
108 | 	fmt.Printf("saw getpid %d times\n", c.Value) // should print 3
109 | }
110 | 
111 | func ExampleMmapRecord_plugin() {
112 | 	var targetpid int // pid of the monitored process
113 | 
114 | 	da := &perf.Attr{
115 | 		Options: perf.Options{
116 | 			Mmap: true,
117 | 		},
118 | 	}
119 | 	da.SetSamplePeriod(1)
120 | 	da.SetWakeupEvents(1)
121 | 	perf.Dummy.Configure(da) // configure a dummy event, so we can Open it
122 | 
123 | 	mmap, err := perf.Open(da, targetpid, perf.AnyCPU, nil)
124 | 	if err != nil {
125 | 		log.Fatal(err)
126 | 	}
127 | 	if err := mmap.MapRing(); err != nil {
128 | 		log.Fatal(err)
129 | 	}
130 | 
131 | 	// Monitor the target process, wait for it to load something like
132 | 	// a plugin, or a shared library, which requires a PROT_EXEC mapping.
133 | 
134 | 	for {
135 | 		rec, err := mmap.ReadRecord(context.Background())
136 | 		if err != nil {
137 | 			log.Fatal(err)
138 | 		}
139 | 		mr, ok := rec.(*perf.MmapRecord)
140 | 		if !ok {
141 | 			continue
142 | 		}
143 | 		fmt.Printf("pid %d created a PROT_EXEC mapping at %#x: %s",
144 | 			mr.Pid, mr.Addr, mr.Filename)
145 | 	}
146 | }
147 | 


--------------------------------------------------------------------------------
/group_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package perf_test
  6 | 
  7 | import (
  8 | 	"context"
  9 | 	"runtime"
 10 | 	"testing"
 11 | 	"time"
 12 | 
 13 | 	"github.com/elastic/go-perf"
 14 | )
 15 | 
 16 | func TestGroup(t *testing.T) {
 17 | 	t.Run("Count", testGroupCount)
 18 | 	t.Run("Record", testGroupRecord)
 19 | }
 20 | 
 21 | func testGroupCount(t *testing.T) {
 22 | 	requires(t, paranoid(1), hardwarePMU, softwarePMU)
 23 | 
 24 | 	da := new(perf.Attr)
 25 | 	perf.Dummy.Configure(da)
 26 | 
 27 | 	g := perf.Group{
 28 | 		CountFormat: perf.CountFormat{
 29 | 			Enabled: true,
 30 | 			Running: true,
 31 | 		},
 32 | 	}
 33 | 	g.Add(perf.CPUCycles, perf.Instructions, da)
 34 | 
 35 | 	runtime.LockOSThread()
 36 | 	defer runtime.UnlockOSThread()
 37 | 
 38 | 	ev, err := g.Open(perf.CallingThread, perf.AnyCPU)
 39 | 	if err != nil {
 40 | 		t.Fatalf("Open: %v", err)
 41 | 	}
 42 | 
 43 | 	sum := int64(0)
 44 | 	gc, err := ev.MeasureGroup(func() {
 45 | 		for i := int64(0); i < 50000; i++ {
 46 | 			sum += i
 47 | 		}
 48 | 	})
 49 | 	if err != nil {
 50 | 		t.Fatalf("MeasureGroup: %v", err)
 51 | 	}
 52 | 
 53 | 	t.Logf("got sum %d in %d %s and %d %s", sum, gc.Values[0].Value, gc.Values[0].Label, gc.Values[1].Value, gc.Values[1].Label)
 54 | }
 55 | 
 56 | func testGroupRecord(t *testing.T) {
 57 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 58 | 
 59 | 	ga := &perf.Attr{
 60 | 		Options: perf.Options{
 61 | 			Disabled: true,
 62 | 		},
 63 | 		SampleFormat: perf.SampleFormat{
 64 | 			Tid:      true,
 65 | 			Time:     true,
 66 | 			CPU:      true,
 67 | 			IP:       true,
 68 | 			StreamID: true,
 69 | 		},
 70 | 	}
 71 | 	ga.SetSamplePeriod(1)
 72 | 	ga.SetWakeupEvents(1)
 73 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 74 | 	if err := gtp.Configure(ga); err != nil {
 75 | 		t.Fatal(err)
 76 | 	}
 77 | 
 78 | 	wa := &perf.Attr{
 79 | 		SampleFormat: perf.SampleFormat{
 80 | 			Tid:      true,
 81 | 			Time:     true,
 82 | 			CPU:      true,
 83 | 			IP:       true,
 84 | 			StreamID: true,
 85 | 		},
 86 | 	}
 87 | 	wa.SetSamplePeriod(1)
 88 | 	wa.SetWakeupEvents(1)
 89 | 	wtp := perf.Tracepoint("syscalls", "sys_enter_write")
 90 | 	if err := wtp.Configure(wa); err != nil {
 91 | 		t.Fatal(err)
 92 | 	}
 93 | 
 94 | 	g := perf.Group{
 95 | 		CountFormat: perf.CountFormat{
 96 | 			Enabled: true,
 97 | 			Running: true,
 98 | 		},
 99 | 	}
100 | 	g.Add(ga, wa)
101 | 
102 | 	runtime.LockOSThread()
103 | 	defer runtime.UnlockOSThread()
104 | 
105 | 	ev, err := g.Open(perf.CallingThread, perf.AnyCPU)
106 | 	if err != nil {
107 | 		t.Fatal(err)
108 | 	}
109 | 	defer ev.Close()
110 | 
111 | 	gc, err := ev.MeasureGroup(func() {
112 | 		getpidTrigger()
113 | 		writeTrigger()
114 | 	})
115 | 	if err != nil {
116 | 		t.Fatal(err)
117 | 	}
118 | 	for _, got := range gc.Values {
119 | 		if got.Value != 1 {
120 | 			t.Fatalf("want 1 hit for %q, got %d", got.Label, got.Value)
121 | 		}
122 | 	}
123 | 
124 | 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
125 | 	defer cancel()
126 | 
127 | 	grec, err := ev.ReadRecord(ctx)
128 | 	if err != nil {
129 | 		t.Fatal(err)
130 | 	}
131 | 	gsr, ok := grec.(*perf.SampleGroupRecord)
132 | 	if !ok {
133 | 		t.Fatalf("got %T, want *perf.SampleGroupRecord", grec)
134 | 	}
135 | 
136 | 	wrec, err := ev.ReadRecord(ctx)
137 | 	if err != nil {
138 | 		t.Fatal(err)
139 | 	}
140 | 	wsr, ok := wrec.(*perf.SampleGroupRecord)
141 | 	if !ok {
142 | 		t.Fatalf("got %T, want *perf.SampleGroupRecord", wrec)
143 | 	}
144 | 
145 | 	if gip, wip := gsr.IP, wsr.IP; gip == wip {
146 | 		t.Fatalf("equal IP 0x%x for samples of different events", wip)
147 | 	}
148 | }
149 | 


--------------------------------------------------------------------------------
/record_amd64_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // +build linux
  6 | 
  7 | package perf_test
  8 | 
  9 | import (
 10 | 	"context"
 11 | 	"os"
 12 | 	"runtime"
 13 | 	"testing"
 14 | 	"time"
 15 | 	"unsafe"
 16 | 
 17 | 	"github.com/elastic/go-perf"
 18 | )
 19 | 
 20 | func TestSampleUserRegisters(t *testing.T) {
 21 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 22 | 
 23 | 	wea := &perf.Attr{
 24 | 		CountFormat: perf.CountFormat{
 25 | 			Group: true,
 26 | 		},
 27 | 		SampleFormat: perf.SampleFormat{
 28 | 			StreamID:      true,
 29 | 			UserRegisters: true,
 30 | 		},
 31 | 		Options: perf.Options{
 32 | 			SampleIDAll: true,
 33 | 		},
 34 | 		// RDI, RSI, RDX. See arch/x86/include/uapi/asm/perf_regs.h.
 35 | 		SampleRegistersUser: 0x38,
 36 | 	}
 37 | 	wea.SetSamplePeriod(1)
 38 | 	wea.SetWakeupEvents(1)
 39 | 	wetp := perf.Tracepoint("syscalls", "sys_enter_write")
 40 | 	if err := wetp.Configure(wea); err != nil {
 41 | 		t.Fatal(err)
 42 | 	}
 43 | 
 44 | 	wxa := &perf.Attr{
 45 | 		SampleFormat: perf.SampleFormat{
 46 | 			StreamID:      true,
 47 | 			UserRegisters: true,
 48 | 		},
 49 | 		Options: perf.Options{
 50 | 			SampleIDAll: true,
 51 | 		},
 52 | 		// RAX. See arch/x86/include/uapi/asm/perf_regs.h.
 53 | 		SampleRegistersUser: 0x1,
 54 | 	}
 55 | 	wxa.SetSamplePeriod(1)
 56 | 	wxa.SetWakeupEvents(1)
 57 | 	wxtp := perf.Tracepoint("syscalls", "sys_exit_write")
 58 | 	if err := wxtp.Configure(wxa); err != nil {
 59 | 		t.Fatal(err)
 60 | 	}
 61 | 
 62 | 	var g perf.Group
 63 | 	g.Add(wea, wxa)
 64 | 
 65 | 	runtime.LockOSThread()
 66 | 	defer runtime.UnlockOSThread()
 67 | 
 68 | 	write, err := g.Open(perf.CallingThread, perf.AnyCPU)
 69 | 	if err != nil {
 70 | 		t.Fatal(err)
 71 | 	}
 72 | 
 73 | 	null, err := os.OpenFile("/dev/null", os.O_WRONLY, 0200)
 74 | 	if err != nil {
 75 | 		t.Fatal(err)
 76 | 	}
 77 | 	defer null.Close()
 78 | 
 79 | 	buf := make([]byte, 8)
 80 | 
 81 | 	var n int
 82 | 	var werr error
 83 | 	gc, err := write.MeasureGroup(func() {
 84 | 		n, werr = null.Write(buf)
 85 | 	})
 86 | 	if err != nil {
 87 | 		t.Fatal(err)
 88 | 	}
 89 | 	if werr != nil {
 90 | 		t.Fatal(err)
 91 | 	}
 92 | 	if entry := gc.Values[0].Value; entry != 1 {
 93 | 		t.Fatalf("got %d hits for write at entry, want 1", entry)
 94 | 	}
 95 | 	if exit := gc.Values[1].Value; exit != 1 {
 96 | 		t.Fatalf("got %d hits for write at exit, want 1", exit)
 97 | 	}
 98 | 
 99 | 	ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
100 | 	defer cancel()
101 | 
102 | 	entryrec, err := write.ReadRecord(ctx)
103 | 	if err != nil {
104 | 		t.Fatalf("got %v, want a valid record", err)
105 | 	}
106 | 	entrysr, ok := entryrec.(*perf.SampleGroupRecord)
107 | 	if !ok {
108 | 		t.Fatalf("got %T, want *perf.SampleGroupRecord", entryrec)
109 | 	}
110 | 	if nregs := len(entrysr.UserRegisters); nregs != 3 {
111 | 		t.Fatalf("got %d registers, want 3", nregs)
112 | 	}
113 | 
114 | 	var (
115 | 		rdi = entrysr.UserRegisters[2]
116 | 		rsi = entrysr.UserRegisters[1]
117 | 		rdx = entrysr.UserRegisters[0]
118 | 
119 | 		nullfd  = uint64(null.Fd())
120 | 		bufp    = uint64(uintptr(unsafe.Pointer(&buf[0])))
121 | 		bufsize = uint64(len(buf))
122 | 	)
123 | 
124 | 	if rdi != nullfd {
125 | 		t.Errorf("fd: rdi = %d, want %d", rdi, nullfd)
126 | 	}
127 | 	if rsi != bufp {
128 | 		t.Errorf("buf: rsi = %#x, want %#x", rsi, bufp)
129 | 	}
130 | 	if rdx != bufsize {
131 | 		t.Errorf("count: rdx = %d, want %d", rdx, bufsize)
132 | 	}
133 | 
134 | 	exitrec, err := write.ReadRecord(ctx)
135 | 	if err != nil {
136 | 		t.Fatalf("got %v, want a valid record", err)
137 | 	}
138 | 	exitsr, ok := exitrec.(*perf.SampleGroupRecord)
139 | 	if !ok {
140 | 		t.Fatalf("got %T, want SampleGroupRecord", exitrec)
141 | 	}
142 | 	if nregs := len(exitsr.UserRegisters); nregs != 1 {
143 | 		t.Fatalf("got %d registers, want 1", nregs)
144 | 	}
145 | 
146 | 	rax := exitsr.UserRegisters[0]
147 | 	if uint64(n) != rax {
148 | 		t.Fatalf("return: rax = %d, want %d", n, rax)
149 | 	}
150 | }
151 | 


--------------------------------------------------------------------------------
/count.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // +build linux
  6 | 
  7 | package perf
  8 | 
  9 | import (
 10 | 	"errors"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"os"
 14 | 	"text/tabwriter"
 15 | 	"time"
 16 | 
 17 | 	"golang.org/x/sys/unix"
 18 | )
 19 | 
 20 | // Count is a measurement taken by an Event.
 21 | //
 22 | // The Value field is always present and populated.
 23 | //
 24 | // The Enabled field is populated if CountFormat.Enabled is set on the Event
 25 | // the Count was read from. Ditto for TimeRunning and ID.
 26 | //
 27 | // Label is set based on the Label field of the Attr associated with the
 28 | // event. See the documentation there for more details.
 29 | type Count struct {
 30 | 	Value   uint64
 31 | 	Enabled time.Duration
 32 | 	Running time.Duration
 33 | 	ID      uint64
 34 | 	Label   string
 35 | }
 36 | 
 37 | func (c Count) String() string {
 38 | 	if c.Label != "" {
 39 | 		return fmt.Sprintf("%s = %d", c.Label, c.Value)
 40 | 	}
 41 | 	return fmt.Sprint(c.Value)
 42 | }
 43 | 
 44 | var errGroup = errors.New("calling ReadCount on group Event")
 45 | 
 46 | // ReadCount reads the measurement associated with ev. If the Event was
 47 | // configured with CountFormat.Group, ReadCount returns an error.
 48 | func (ev *Event) ReadCount() (Count, error) {
 49 | 	var c Count
 50 | 	if err := ev.ok(); err != nil {
 51 | 		return c, err
 52 | 	}
 53 | 	if ev.a.CountFormat.Group {
 54 | 		return c, errGroup
 55 | 	}
 56 | 
 57 | 	// TODO(acln): on x86, the rdpmc instruction can be used here,
 58 | 	// instead of read(2), to reduce the number of system calls, and
 59 | 	// improve the accuracy of measurements.
 60 | 	//
 61 | 	// Investigate this. It seems like this functionality may not always
 62 | 	// be available, even on x86, but we can check for it explicitly
 63 | 	// if the ring associated with ev is mapped into memory: see
 64 | 	// cap_user_rdpmc on perf_event_mmap_page.
 65 | 	buf := make([]byte, ev.a.CountFormat.readSize())
 66 | 	_, err := unix.Read(ev.perffd, buf)
 67 | 	if err != nil {
 68 | 		return c, os.NewSyscallError("read", err)
 69 | 	}
 70 | 
 71 | 	f := fields(buf)
 72 | 	f.count(&c, ev.a.CountFormat)
 73 | 	c.Label = ev.a.Label
 74 | 
 75 | 	return c, err
 76 | }
 77 | 
 78 | // GroupCount is a group of measurements taken by an Event group.
 79 | //
 80 | // Fields are populated as described in the Count documentation.
 81 | type GroupCount struct {
 82 | 	Enabled time.Duration
 83 | 	Running time.Duration
 84 | 	Values  []struct {
 85 | 		Value uint64
 86 | 		ID    uint64
 87 | 		Label string
 88 | 	}
 89 | }
 90 | 
 91 | type errWriter struct {
 92 | 	w   io.Writer
 93 | 	err error // sticky
 94 | }
 95 | 
 96 | func (ew *errWriter) Write(b []byte) (int, error) {
 97 | 	if ew.err != nil {
 98 | 		return 0, ew.err
 99 | 	}
100 | 	n, err := ew.w.Write(b)
101 | 	ew.err = err
102 | 	return n, err
103 | }
104 | 
105 | // PrintValues prints a table of gc.Values to w.
106 | func (gc GroupCount) PrintValues(w io.Writer) error {
107 | 	ew := &errWriter{w: w}
108 | 
109 | 	tw := new(tabwriter.Writer)
110 | 	tw.Init(ew, 0, 8, 1, ' ', 0)
111 | 
112 | 	if gc.Values[0].ID != 0 {
113 | 		fmt.Fprintln(tw, "label\tvalue\tID")
114 | 	} else {
115 | 		fmt.Fprintln(tw, "label\tvalue")
116 | 	}
117 | 
118 | 	for _, v := range gc.Values {
119 | 		if v.ID != 0 {
120 | 			fmt.Fprintf(tw, "%s\t%d\t%d\n", v.Label, v.Value, v.ID)
121 | 		} else {
122 | 			fmt.Fprintf(tw, "%s\t%d\n", v.Label, v.Value)
123 | 		}
124 | 	}
125 | 
126 | 	tw.Flush()
127 | 	return ew.err
128 | }
129 | 
130 | var errNotGroup = errors.New("calling ReadGroupCount on non-group Event")
131 | 
132 | // ReadGroupCount reads the measurements associated with ev. If the Event
133 | // was not configued with CountFormat.Group, ReadGroupCount returns an error.
134 | func (ev *Event) ReadGroupCount() (GroupCount, error) {
135 | 	var gc GroupCount
136 | 	if err := ev.ok(); err != nil {
137 | 		return gc, err
138 | 	}
139 | 	if !ev.a.CountFormat.Group {
140 | 		return gc, errNotGroup
141 | 	}
142 | 
143 | 	size := ev.a.CountFormat.groupReadSize(1 + len(ev.group))
144 | 	buf := make([]byte, size)
145 | 	_, err := unix.Read(ev.perffd, buf)
146 | 	if err != nil {
147 | 		return gc, os.NewSyscallError("read", err)
148 | 	}
149 | 
150 | 	f := fields(buf)
151 | 	f.groupCount(&gc, ev.a.CountFormat)
152 | 	gc.Values[0].Label = ev.a.Label
153 | 	for i := 0; i < len(ev.group); i++ {
154 | 		gc.Values[i+1].Label = ev.group[i].a.Label
155 | 	}
156 | 
157 | 	return gc, nil
158 | }
159 | 
160 | // CountFormat configures the format of Count or GroupCount measurements.
161 | //
162 | // Enabled and Running configure the Event to include time enabled and
163 | // time running measurements to the counts. Usually, these two values are
164 | // equal. They may differ when events are multiplexed.
165 | //
166 | // If ID is set, a unique ID is assigned to the associated event. For a
167 | // given event, this ID matches the ID reported by the (*Event).ID method.
168 | //
169 | // If Group is set, the Event measures a group of events together: callers
170 | // must use ReadGroupCount. If Group is not set, the Event measures a single
171 | // counter: callers must use ReadCount.
172 | type CountFormat struct {
173 | 	Enabled bool
174 | 	Running bool
175 | 	ID      bool
176 | 	Group   bool
177 | }
178 | 
179 | // readSize returns the buffer size required for a Count read. Assumes
180 | // f.Group is not set.
181 | func (f CountFormat) readSize() int {
182 | 	size := 8 // value is always set
183 | 	if f.Enabled {
184 | 		size += 8
185 | 	}
186 | 	if f.Running {
187 | 		size += 8
188 | 	}
189 | 	if f.ID {
190 | 		size += 8
191 | 	}
192 | 	return size
193 | }
194 | 
195 | // groupReadSize returns the buffer size required for a GroupCount read.
196 | // Assumes f.Group is set.
197 | func (f CountFormat) groupReadSize(events int) int {
198 | 	hsize := 8 // the number of events is always set
199 | 	if f.Enabled {
200 | 		hsize += 8
201 | 	}
202 | 	if f.Running {
203 | 		hsize += 8
204 | 	}
205 | 	vsize := 8 // each event contains at least a value
206 | 	if f.ID {
207 | 		vsize += 8
208 | 	}
209 | 	return hsize + events*vsize
210 | }
211 | 
212 | // marshal marshals the CountFormat into a uint64.
213 | func (f CountFormat) marshal() uint64 {
214 | 	// Always keep this in sync with the type definition above.
215 | 	fields := []bool{
216 | 		f.Enabled,
217 | 		f.Running,
218 | 		f.ID,
219 | 		f.Group,
220 | 	}
221 | 	return marshalBitwiseUint64(fields)
222 | }
223 | 


--------------------------------------------------------------------------------
/perf_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // +build linux
  6 | 
  7 | package perf_test
  8 | 
  9 | import (
 10 | 	"errors"
 11 | 	"fmt"
 12 | 	"io/ioutil"
 13 | 	"os"
 14 | 	"runtime"
 15 | 	"strconv"
 16 | 	"strings"
 17 | 	"sync"
 18 | 	"testing"
 19 | 	"unsafe"
 20 | 
 21 | 	"golang.org/x/sys/unix"
 22 | 
 23 | 	"github.com/elastic/go-perf"
 24 | )
 25 | 
 26 | func TestOpen(t *testing.T) {
 27 | 	t.Run("BadGroup", testOpenBadGroup)
 28 | 	t.Run("BadAttrType", testOpenBadAttrType)
 29 | 	t.Run("PopulatesLabel", testOpenPopulatesLabel)
 30 | 	t.Run("EventIDsDifferentByCPU", testEventIDsDifferentByCPU)
 31 | }
 32 | 
 33 | func testOpenBadGroup(t *testing.T) {
 34 | 	requires(t, paranoid(1), hardwarePMU)
 35 | 
 36 | 	ca := new(perf.Attr)
 37 | 	perf.CPUCycles.Configure(ca)
 38 | 	ca.CountFormat.Group = true
 39 | 
 40 | 	runtime.LockOSThread()
 41 | 	defer runtime.UnlockOSThread()
 42 | 
 43 | 	cycles, err := perf.Open(ca, perf.CallingThread, perf.AnyCPU, nil)
 44 | 	if err != nil {
 45 | 		t.Fatal(err)
 46 | 	}
 47 | 	cycles.Close()
 48 | 
 49 | 	_, err = perf.Open(ca, perf.CallingThread, perf.AnyCPU, cycles)
 50 | 	if err == nil {
 51 | 		t.Fatal("successful Open with closed group *Event")
 52 | 	}
 53 | 
 54 | 	cycles = new(perf.Event) // uninitialized
 55 | 	_, err = perf.Open(ca, perf.CallingThread, perf.AnyCPU, cycles)
 56 | 	if err == nil {
 57 | 		t.Fatal("successful Open with closed group *Event")
 58 | 	}
 59 | }
 60 | 
 61 | func testOpenBadAttrType(t *testing.T) {
 62 | 	a := &perf.Attr{
 63 | 		Type: 42,
 64 | 	}
 65 | 
 66 | 	_, err := perf.Open(a, perf.CallingThread, perf.AnyCPU, nil)
 67 | 	if err == nil {
 68 | 		t.Fatal("got a valid *Event for bad Attr.Type 42")
 69 | 	}
 70 | }
 71 | 
 72 | func testOpenPopulatesLabel(t *testing.T) {
 73 | 	// TODO(acln): extend when we implement general label lookup
 74 | 	requires(t, paranoid(1), hardwarePMU)
 75 | 
 76 | 	runtime.LockOSThread()
 77 | 	defer runtime.UnlockOSThread()
 78 | 
 79 | 	ca := &perf.Attr{
 80 | 		Type:   perf.HardwareEvent,
 81 | 		Config: uint64(perf.CPUCycles),
 82 | 	}
 83 | 
 84 | 	cycles, err := perf.Open(ca, perf.CallingThread, perf.AnyCPU, nil)
 85 | 	if err != nil {
 86 | 		t.Fatal(err)
 87 | 	}
 88 | 	defer cycles.Close()
 89 | 
 90 | 	c, err := cycles.Measure(getpidTrigger)
 91 | 	if err != nil {
 92 | 		t.Fatal(err)
 93 | 	}
 94 | 	if c.Label == "" {
 95 | 		t.Fatal("Open did not set label on *Attr")
 96 | 	}
 97 | }
 98 | 
 99 | func testEventIDsDifferentByCPU(t *testing.T) {
100 | 	requires(t, paranoid(1), hardwarePMU)
101 | 
102 | 	if runtime.NumCPU() == 1 {
103 | 		t.Skip("only one CPU")
104 | 	}
105 | 
106 | 	ca := new(perf.Attr)
107 | 	perf.CPUCycles.Configure(ca)
108 | 
109 | 	cycles0, err := perf.Open(ca, perf.CallingThread, 0, nil)
110 | 	if err != nil {
111 | 		t.Fatal(err)
112 | 	}
113 | 	defer cycles0.Close()
114 | 
115 | 	cycles1, err := perf.Open(ca, perf.CallingThread, 1, nil)
116 | 	if err != nil {
117 | 		t.Fatal(err)
118 | 	}
119 | 	defer cycles1.Close()
120 | 
121 | 	id0, err := cycles0.ID()
122 | 	if err != nil {
123 | 		t.Fatal(err)
124 | 	}
125 | 
126 | 	id1, err := cycles1.ID()
127 | 	if err != nil {
128 | 		t.Fatal(err)
129 | 	}
130 | 
131 | 	if id0 == id1 {
132 | 		t.Fatalf("event has the same ID on different CPUs")
133 | 	}
134 | }
135 | 
136 | func TestMain(m *testing.M) {
137 | 	if !perf.Supported() {
138 | 		fmt.Fprintln(os.Stderr, "perf_event_open not supported")
139 | 		os.Exit(2)
140 | 	}
141 | 	os.Exit(m.Run())
142 | }
143 | 
144 | // perfTestEnv holds and caches information about the testing environment
145 | // for package perf.
146 | type perfTestEnv struct {
147 | 	cap struct {
148 | 		sync.Once
149 | 		sysadmin bool
150 | 	}
151 | 
152 | 	paranoid struct {
153 | 		sync.Once
154 | 		value int
155 | 	}
156 | 
157 | 	tracefs struct {
158 | 		sync.Once
159 | 		mounted  bool
160 | 		readable bool
161 | 		readErr  error
162 | 	}
163 | 
164 | 	pmu struct {
165 | 		sync.Mutex
166 | 		ok      map[string]struct{}
167 | 		missing map[string]error
168 | 	}
169 | }
170 | 
171 | func (env *perfTestEnv) capSysAdmin() bool {
172 | 	env.cap.Once.Do(env.initCap)
173 | 	return env.cap.sysadmin
174 | }
175 | 
176 | type capHeader struct {
177 | 	version uint32
178 | 	pid     int32
179 | }
180 | 
181 | type capData struct {
182 | 	effective uint32
183 | 	_         uint32 // permitted
184 | 	_         uint32 // inheritable
185 | }
186 | 
187 | // constants from uapi/linux/capability.h
188 | const (
189 | 	capSysAdmin = 21
190 | 	capV3       = 0x20080522
191 | )
192 | 
193 | func (env *perfTestEnv) initCap() {
194 | 	header := &capHeader{
195 | 		version: capV3,
196 | 		pid:     int32(unix.Getpid()),
197 | 	}
198 | 	data := make([]capData, 2)
199 | 	_, _, e := unix.Syscall(unix.SYS_CAPGET, uintptr(unsafe.Pointer(header)), uintptr(unsafe.Pointer(&data[0])), 0)
200 | 	if e != 0 {
201 | 		return
202 | 	}
203 | 	if data[0].effective&(1<<capSysAdmin) != 0 {
204 | 		env.cap.sysadmin = true
205 | 	}
206 | }
207 | 
208 | func (env *perfTestEnv) paranoidLevel() int {
209 | 	env.paranoid.Once.Do(env.initParanoid)
210 | 	return env.paranoid.value
211 | }
212 | 
213 | func (env *perfTestEnv) initParanoid() {
214 | 	content, err := ioutil.ReadFile("/proc/sys/kernel/perf_event_paranoid")
215 | 	if err != nil {
216 | 		env.paranoid.value = 3
217 | 		return
218 | 	}
219 | 	nr := strings.TrimSpace(string(content))
220 | 	paranoid, err := strconv.ParseInt(nr, 10, 32)
221 | 	if err != nil {
222 | 		env.paranoid.value = 3
223 | 		return
224 | 	}
225 | 	env.paranoid.value = int(paranoid)
226 | }
227 | 
228 | func (env *perfTestEnv) initTracefs() {
229 | 	_, err := os.Stat("/sys/kernel/debug/tracing")
230 | 	if err != nil {
231 | 		return
232 | 	}
233 | 	env.tracefs.mounted = true
234 | 	_, err = ioutil.ReadDir("/sys/kernel/debug/tracing")
235 | 	if err != nil {
236 | 		env.tracefs.readErr = err
237 | 		return
238 | 	}
239 | 	env.tracefs.readable = true
240 | }
241 | 
242 | func (env *perfTestEnv) tracefsMounted() bool {
243 | 	env.tracefs.Once.Do(env.initTracefs)
244 | 	return env.tracefs.mounted
245 | }
246 | 
247 | func (env *perfTestEnv) tracefsReadable() (bool, error) {
248 | 	env.tracefs.Once.Do(env.initTracefs)
249 | 	return env.tracefs.readable, env.tracefs.readErr
250 | }
251 | 
252 | func (env *perfTestEnv) havePMU(u string) (bool, error) {
253 | 	env.pmu.Lock()
254 | 	defer env.pmu.Unlock()
255 | 
256 | 	if env.pmu.ok == nil {
257 | 		env.pmu.ok = map[string]struct{}{}
258 | 	}
259 | 	if env.pmu.missing == nil {
260 | 		env.pmu.missing = map[string]error{}
261 | 	}
262 | 
263 | 	if _, ok := env.pmu.ok[u]; ok {
264 | 		return true, nil
265 | 	}
266 | 	if err, ok := env.pmu.missing[u]; ok {
267 | 		return false, err
268 | 	}
269 | 
270 | 	_, err := perf.LookupEventType(u)
271 | 	if err != nil {
272 | 		env.pmu.missing[u] = err
273 | 		return false, err
274 | 	}
275 | 
276 | 	env.pmu.ok[u] = struct{}{}
277 | 	return true, nil
278 | }
279 | 
280 | var testenv perfTestEnv
281 | 
282 | // paranoid specifies a perf_event_paranoid level requirement for a test.
283 | //
284 | // For example, a value of 1 for paranoid means that the test requires a
285 | // perf_event_paranoid level of 1 or less.
286 | type paranoid int
287 | 
288 | func (p paranoid) Evaluate() error {
289 | 	if testenv.capSysAdmin() {
290 | 		return nil
291 | 	}
292 | 	want, have := int(p), testenv.paranoidLevel()
293 | 	if have > want {
294 | 		return fmt.Errorf("want perf_event_paranoid <= %d, have %d", want, have)
295 | 	}
296 | 	return nil
297 | }
298 | 
299 | // tracefsreq specifies a tracefs requirement for a test: tracefs must be
300 | // mounted at /sys/kernel/debug/tracing, and it must be readable.
301 | type tracefsreq struct{}
302 | 
303 | func (tracefsreq) Evaluate() error {
304 | 	if !testenv.tracefsMounted() {
305 | 		return errors.New("tracefs is not mounted at /sys/kernel/debug/tracing")
306 | 	}
307 | 	if ok, err := testenv.tracefsReadable(); !ok {
308 | 		return fmt.Errorf("tracefs is not readable: %v", err)
309 | 	}
310 | 	return nil
311 | }
312 | 
313 | var tracefs = tracefsreq{}
314 | 
315 | // pmu specifies a PMU requirement for a test.
316 | type pmu string
317 | 
318 | var (
319 | 	hardwarePMU   = pmu("hardware")
320 | 	softwarePMU   = pmu("software")
321 | 	tracepointPMU = pmu("tracepoint")
322 | )
323 | 
324 | func (u pmu) Evaluate() error {
325 | 	device := string(u)
326 | 	if device == "hardware" {
327 | 		device = "cpu" // TODO(acln): investigate
328 | 	}
329 | 	if ok, err := testenv.havePMU(device); !ok {
330 | 		return fmt.Errorf("%s PMU not supported: %v", device, err)
331 | 	}
332 | 	return nil
333 | }
334 | 
335 | type testRequirement interface {
336 | 	Evaluate() error
337 | }
338 | 
339 | func requires(t *testing.T, reqs ...testRequirement) {
340 | 	t.Helper()
341 | 
342 | 	sb := new(strings.Builder)
343 | 	unmet := 0
344 | 
345 | 	for _, req := range reqs {
346 | 		if err := req.Evaluate(); err != nil {
347 | 			if unmet > 0 {
348 | 				sb.WriteString("; ")
349 | 			}
350 | 			fmt.Fprint(sb, err)
351 | 			unmet++
352 | 		}
353 | 	}
354 | 
355 | 	switch unmet {
356 | 	case 0:
357 | 		return
358 | 	case 1:
359 | 		t.Skipf("unmet requirement: %s", sb.String())
360 | 	default:
361 | 		t.Skipf("unmet requirements: %s", sb.String())
362 | 	}
363 | }
364 | 


--------------------------------------------------------------------------------
/count_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2019 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package perf_test
  6 | 
  7 | import (
  8 | 	"math/rand"
  9 | 	"os"
 10 | 	"runtime"
 11 | 	"runtime/debug"
 12 | 	"testing"
 13 | 	"time"
 14 | 
 15 | 	"golang.org/x/sys/unix"
 16 | 
 17 | 	"github.com/elastic/go-perf"
 18 | )
 19 | 
 20 | func TestCount(t *testing.T) {
 21 | 	t.Run("Hardware", testHardwareCounters)
 22 | 	t.Run("Software", testSoftwareCounters)
 23 | 	t.Run("HardwareCache", testHardwareCacheCounters)
 24 | 	t.Run("Tracepoint", testSingleTracepoint)
 25 | 	t.Run("IoctlAndCountIDsMatch", testIoctlAndCountIDsMatch)
 26 | }
 27 | 
 28 | func testHardwareCounters(t *testing.T) {
 29 | 	requires(t, paranoid(1), hardwarePMU)
 30 | 
 31 | 	t.Run("IPC", testIPC)
 32 | }
 33 | 
 34 | func testIPC(t *testing.T) {
 35 | 	g := perf.Group{
 36 | 		CountFormat: perf.CountFormat{
 37 | 			ID: true,
 38 | 		},
 39 | 	}
 40 | 	g.Add(perf.Instructions, perf.CPUCycles)
 41 | 
 42 | 	runtime.LockOSThread()
 43 | 	defer runtime.UnlockOSThread()
 44 | 
 45 | 	hw, err := g.Open(perf.CallingThread, perf.AnyCPU)
 46 | 	if err != nil {
 47 | 		t.Fatal(err)
 48 | 	}
 49 | 	defer hw.Close()
 50 | 
 51 | 	var sum int64
 52 | 	gc, err := hw.MeasureGroup(func() {
 53 | 		for i := int64(0); i < 1000000; i++ {
 54 | 			sum += i
 55 | 		}
 56 | 	})
 57 | 	if err != nil {
 58 | 		t.Fatal(err)
 59 | 	}
 60 | 	for _, c := range gc.Values {
 61 | 		if c.Value == 0 {
 62 | 			t.Fatalf("didn't count %q", c.Label)
 63 | 		}
 64 | 	}
 65 | 	insns := gc.Values[0].Value
 66 | 	cycles := gc.Values[1].Value
 67 | 	ipc := float64(insns) / float64(cycles)
 68 | 	t.Logf("got %d instructions, %d cycles: %f IPC", insns, cycles, ipc)
 69 | }
 70 | 
 71 | func testSoftwareCounters(t *testing.T) {
 72 | 	requires(t, paranoid(1), softwarePMU)
 73 | 
 74 | 	t.Run("PageFaults", testPageFaults)
 75 | }
 76 | 
 77 | var fault []byte
 78 | 
 79 | func testPageFaults(t *testing.T) {
 80 | 	// TODO(acln): this test starts failing when run with -count > 4-5,
 81 | 	// even though we're calling debug.FreeOSMemory. Why?
 82 | 	pfa := &perf.Attr{
 83 | 		CountFormat: perf.CountFormat{
 84 | 			Running: true,
 85 | 			Enabled: true,
 86 | 		},
 87 | 	}
 88 | 	perf.PageFaults.Configure(pfa)
 89 | 
 90 | 	runtime.LockOSThread()
 91 | 	defer runtime.UnlockOSThread()
 92 | 
 93 | 	faults, err := perf.Open(pfa, perf.CallingThread, perf.AnyCPU, nil)
 94 | 	if err != nil {
 95 | 		t.Fatal(err)
 96 | 	}
 97 | 	defer faults.Close()
 98 | 
 99 | 	debug.FreeOSMemory()
100 | 
101 | 	c, err := faults.Measure(func() {
102 | 		fault = make([]byte, 64*1024*1024)
103 | 		fault[0] = 1
104 | 		fault[63*1024*1024] = 1
105 | 	})
106 | 	if err != nil {
107 | 		t.Fatal(err)
108 | 	}
109 | 	if c.Value == 0 {
110 | 		t.Fatal("didn't see a page fault")
111 | 	}
112 | 	t.Logf("saw %v: enabled: %v, running: %v", c, c.Enabled, c.Running)
113 | }
114 | 
115 | func testHardwareCacheCounters(t *testing.T) {
116 | 	// TODO(acln): add PMU requirement? but how?
117 | 	//
118 | 	// $ ls /sys/bus/event_source/devices/*/type | xargs cat
119 | 	//
120 | 	// does not contain a 3, which is the ABI-specified value of
121 | 	// perf.HardwareCacheEvent. Maybe it's under the "cpu" PMU
122 | 	// somewhere. Investigate.
123 | 	requires(t, paranoid(1))
124 | 
125 | 	t.Run("L1DataMissesBadLocality", testL1DataMissesBadLocality)
126 | 	t.Run("L1DataMissesGoodLocality", testL1DataMissesGoodLocality)
127 | 	t.Run("L1Group", testL1Group)
128 | }
129 | 
130 | func testL1DataMissesBadLocality(t *testing.T) {
131 | 	hwca := new(perf.Attr)
132 | 	hwcc := perf.HardwareCacheCounter{
133 | 		Cache:  perf.L1D,
134 | 		Op:     perf.Read,
135 | 		Result: perf.Miss,
136 | 	}
137 | 	hwcc.Configure(hwca)
138 | 
139 | 	runtime.LockOSThread()
140 | 	defer runtime.UnlockOSThread()
141 | 
142 | 	l1dmisses, err := perf.Open(hwca, perf.CallingThread, perf.AnyCPU, nil)
143 | 	if err != nil {
144 | 		t.Fatal(err)
145 | 	}
146 | 	defer l1dmisses.Close()
147 | 
148 | 	rng := rand.New(rand.NewSource(time.Now().Unix()))
149 | 
150 | 	max := 1000
151 | 
152 | 	var bad []interface{}
153 | 	for i := 0; i < 10000; i++ {
154 | 		bad = append(bad, rng.Intn(max))
155 | 	}
156 | 
157 | 	sink := 0
158 | 	c, err := l1dmisses.Measure(func() {
159 | 		for _, v := range bad {
160 | 			if v.(int) < max/2 {
161 | 				sink++
162 | 			}
163 | 		}
164 | 	})
165 | 	if err != nil {
166 | 		t.Fatal(err)
167 | 	}
168 | 	if c.Value == 0 {
169 | 		t.Fatalf("recorded no L1 data cache misses")
170 | 	}
171 | 
172 | 	t.Logf("bad locality: got %d L1 data cache misses", c.Value)
173 | }
174 | 
175 | func testL1DataMissesGoodLocality(t *testing.T) {
176 | 	hwca := new(perf.Attr)
177 | 	hwcc := perf.HardwareCacheCounter{
178 | 		Cache:  perf.L1D,
179 | 		Op:     perf.Read,
180 | 		Result: perf.Miss,
181 | 	}
182 | 	hwcc.Configure(hwca)
183 | 
184 | 	runtime.LockOSThread()
185 | 	defer runtime.UnlockOSThread()
186 | 
187 | 	l1dmisses, err := perf.Open(hwca, perf.CallingThread, perf.AnyCPU, nil)
188 | 	if err != nil {
189 | 		t.Fatal(err)
190 | 	}
191 | 	defer l1dmisses.Close()
192 | 
193 | 	rng := rand.New(rand.NewSource(time.Now().Unix()))
194 | 
195 | 	max := 1000
196 | 
197 | 	var contiguous []int
198 | 	for i := 0; i < 10000; i++ {
199 | 		contiguous = append(contiguous, rng.Intn(max))
200 | 	}
201 | 
202 | 	sink := 0
203 | 	c, err := l1dmisses.Measure(func() {
204 | 		for _, v := range contiguous {
205 | 			if v < max/2 {
206 | 				sink++
207 | 			}
208 | 		}
209 | 	})
210 | 	if err != nil {
211 | 		t.Fatal(err)
212 | 	}
213 | 	if c.Value == 0 {
214 | 		t.Fatalf("recorded no L1 data cache misses")
215 | 	}
216 | 
217 | 	t.Logf("good locality: got %d L1 data cache misses", c.Value)
218 | }
219 | 
220 | type l1testIdentity int
221 | 
222 | func (v l1testIdentity) value() int { return int(v) }
223 | 
224 | type l1testSquare int
225 | 
226 | func (v l1testSquare) value() int { return int(v * v) }
227 | 
228 | type l1testCube int
229 | 
230 | func (v l1testCube) value() int { return int(v * v * v) }
231 | 
232 | type valuer interface {
233 | 	value() int
234 | }
235 | 
236 | func newValuer(n int) valuer {
237 | 	switch n % 3 {
238 | 	case 0:
239 | 		return l1testIdentity(n)
240 | 	case 1:
241 | 		return l1testSquare(n)
242 | 	default:
243 | 		return l1testCube(n)
244 | 	}
245 | }
246 | 
247 | func testL1Group(t *testing.T) {
248 | 	caches := []perf.Cache{perf.L1D, perf.L1I}
249 | 	ops := []perf.CacheOp{perf.Read}
250 | 	results := []perf.CacheOpResult{perf.Miss}
251 | 
252 | 	var g perf.Group
253 | 	g.Add(perf.HardwareCacheCounters(caches, ops, results)...)
254 | 
255 | 	runtime.LockOSThread()
256 | 	defer runtime.UnlockOSThread()
257 | 
258 | 	l1, err := g.Open(perf.CallingThread, perf.AnyCPU)
259 | 	if err != nil {
260 | 		t.Fatal(err)
261 | 	}
262 | 	defer l1.Close()
263 | 
264 | 	const n = 100000
265 | 
266 | 	valuers := make([]valuer, 0, n)
267 | 	for i := 0; i < n; i++ {
268 | 		valuers = append(valuers, newValuer(i))
269 | 	}
270 | 
271 | 	sum := 0
272 | 	gc, err := l1.MeasureGroup(func() {
273 | 		for i := 0; i < n; i++ {
274 | 			sum += valuers[i].value()
275 | 		}
276 | 	})
277 | 	if err != nil {
278 | 		t.Fatal(err)
279 | 	}
280 | 
281 | 	t.Logf("got %d L1 data cache misses, %d L1 instruction cache misses",
282 | 		gc.Values[0].Value, gc.Values[1].Value)
283 | }
284 | 
285 | func testSingleTracepoint(t *testing.T) {
286 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
287 | 
288 | 	tests := []singleTracepointTest{
289 | 		{
290 | 			category: "syscalls",
291 | 			event:    "sys_enter_getpid",
292 | 			trigger:  getpidTrigger,
293 | 		},
294 | 		{
295 | 			category: "syscalls",
296 | 			event:    "sys_enter_read",
297 | 			trigger:  readTrigger,
298 | 		},
299 | 		{
300 | 			category: "syscalls",
301 | 			event:    "sys_enter_write",
302 | 			trigger:  writeTrigger,
303 | 		},
304 | 	}
305 | 	for _, tt := range tests {
306 | 		t.Run(tt.String(), tt.run)
307 | 	}
308 | }
309 | 
310 | type singleTracepointTest struct {
311 | 	category string
312 | 	event    string
313 | 	trigger  func()
314 | }
315 | 
316 | func (tt singleTracepointTest) run(t *testing.T) {
317 | 	tp := perf.Tracepoint(tt.category, tt.event)
318 | 	attr := new(perf.Attr)
319 | 	if err := tp.Configure(attr); err != nil {
320 | 		t.Fatal(err)
321 | 	}
322 | 
323 | 	runtime.LockOSThread()
324 | 	defer runtime.UnlockOSThread()
325 | 
326 | 	ev, err := perf.Open(attr, perf.CallingThread, perf.AnyCPU, nil)
327 | 	if err != nil {
328 | 		t.Fatal(err)
329 | 	}
330 | 	defer ev.Close()
331 | 
332 | 	c, err := ev.Measure(func() {
333 | 		tt.trigger()
334 | 	})
335 | 	if err != nil {
336 | 		t.Fatal(err)
337 | 	}
338 | 	if c.Value != 1 {
339 | 		t.Fatalf("got %d hits for %q, want 1 hit", c.Value, c.Label)
340 | 	}
341 | }
342 | 
343 | func (tt singleTracepointTest) String() string {
344 | 	return tt.category + ":" + tt.event
345 | }
346 | 
347 | func testIoctlAndCountIDsMatch(t *testing.T) {
348 | 	requires(t, paranoid(1), softwarePMU)
349 | 
350 | 	pfa := new(perf.Attr)
351 | 	perf.PageFaults.Configure(pfa)
352 | 	pfa.CountFormat.ID = true
353 | 
354 | 	runtime.LockOSThread()
355 | 	defer runtime.UnlockOSThread()
356 | 
357 | 	faults, err := perf.Open(pfa, perf.CallingThread, perf.AnyCPU, nil)
358 | 	if err != nil {
359 | 		t.Fatal(err)
360 | 	}
361 | 
362 | 	runtime.GC()
363 | 
364 | 	c, err := faults.Measure(func() {
365 | 		fault = make([]byte, 64*1024*1024)
366 | 		fault[0] = 1
367 | 		fault[63*1024*1024] = 1
368 | 	})
369 | 	if err != nil {
370 | 		t.Fatal(err)
371 | 	}
372 | 	if c.Value == 0 {
373 | 		t.Fatal("didn't see a page fault")
374 | 	}
375 | 	id, err := faults.ID()
376 | 	if err != nil {
377 | 		t.Fatal(err)
378 | 	}
379 | 	if id != c.ID {
380 | 		t.Fatalf("got ID %d from ioctl, but %d from count read", id, c.ID)
381 | 	}
382 | }
383 | 
384 | func getpidTrigger() {
385 | 	unix.Getpid()
386 | }
387 | 
388 | func readTrigger() {
389 | 	zero, err := os.Open("/dev/zero")
390 | 	if err != nil {
391 | 		panic(err)
392 | 	}
393 | 	buf := make([]byte, 8)
394 | 	if _, err := zero.Read(buf); err != nil {
395 | 		panic(err)
396 | 	}
397 | }
398 | 
399 | func writeTrigger() {
400 | 	null, err := os.OpenFile("/dev/null", os.O_WRONLY, 0200)
401 | 	if err != nil {
402 | 		panic(err)
403 | 	}
404 | 	if _, err := null.Write([]byte("big data")); err != nil {
405 | 		panic(err)
406 | 	}
407 | }
408 | 


--------------------------------------------------------------------------------
/record_test.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2019 The Go Authors. All rights reserved.
   2 | // Use of this source code is governed by a BSD-style
   3 | // license that can be found in the LICENSE file.
   4 | 
   5 | // +build linux
   6 | 
   7 | package perf_test
   8 | 
   9 | import (
  10 | 	"context"
  11 | 	"fmt"
  12 | 	"os"
  13 | 	"os/exec"
  14 | 	"runtime"
  15 | 	"sync"
  16 | 	"testing"
  17 | 	"time"
  18 | 	"unsafe"
  19 | 
  20 | 	"golang.org/x/sys/unix"
  21 | 
  22 | 	"github.com/elastic/go-perf"
  23 | )
  24 | 
  25 | func TestPoll(t *testing.T) {
  26 | 	t.Run("Timeout", testPollTimeout)
  27 | 	t.Run("Cancel", testPollCancel)
  28 | 	t.Run("Expired", testPollExpired)
  29 | 	t.Run("DisabledExplicitly", testPollDisabledExplicitly)
  30 | 	t.Run("DisabledByRefresh", testPollDisabledByRefresh)
  31 | 	t.Run("DisabledByExit", testPollDisabledByExit)
  32 | }
  33 | 
  34 | func TestReadRecord(t *testing.T) {
  35 | 	t.Run("Comm", testComm)
  36 | 	t.Run("Exit", testExit)
  37 | 	t.Run("CPUWideSwitch", testCPUWideSwitch)
  38 | 	t.Run("SampleGetpid", testSampleGetpid)
  39 | 	t.Run("SampleGetpidConcurrent", testSampleGetpidConcurrent)
  40 | 	t.Run("SampleTracepointStack", testSampleTracepointStack)
  41 | 	t.Run("RedirectedOutput", testRedirectedOutput)
  42 | 
  43 | 	// TODO(acln): a test for the case when a record straddles the head
  44 | 	// of the ring is missing. See readRawRecordNonblock.
  45 | }
  46 | 
  47 | func testPollTimeout(t *testing.T) {
  48 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
  49 | 
  50 | 	ga := new(perf.Attr)
  51 | 	ga.SetSamplePeriod(1)
  52 | 	ga.SetWakeupEvents(1)
  53 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
  54 | 	if err := gtp.Configure(ga); err != nil {
  55 | 		t.Fatal(err)
  56 | 	}
  57 | 
  58 | 	runtime.LockOSThread()
  59 | 	defer runtime.UnlockOSThread()
  60 | 
  61 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
  62 | 	if err != nil {
  63 | 		t.Fatal(err)
  64 | 	}
  65 | 	defer getpid.Close()
  66 | 	if err := getpid.MapRing(); err != nil {
  67 | 		t.Fatal(err)
  68 | 	}
  69 | 
  70 | 	errch := make(chan error)
  71 | 	timeout := 20 * time.Millisecond
  72 | 
  73 | 	go func() {
  74 | 		ctx, cancel := context.WithTimeout(context.Background(), timeout)
  75 | 		defer cancel()
  76 | 
  77 | 		for i := 0; i < 2; i++ {
  78 | 			_, err := getpid.ReadRecord(ctx)
  79 | 			errch <- err
  80 | 		}
  81 | 	}()
  82 | 
  83 | 	c, err := getpid.Measure(getpidTrigger)
  84 | 	if err != nil {
  85 | 		t.Fatal(err)
  86 | 	}
  87 | 	if c.Value != 1 {
  88 | 		t.Fatalf("got %d hits for %q, want 1", c.Value, c.Label)
  89 | 	}
  90 | 
  91 | 	// For the first event, we should get a valid sample immediately.
  92 | 	select {
  93 | 	case <-time.After(10 * time.Millisecond):
  94 | 		t.Fatalf("didn't get the first sample: timeout")
  95 | 	case err := <-errch:
  96 | 		if err != nil {
  97 | 			t.Fatalf("got %v, want valid first sample", err)
  98 | 		}
  99 | 	}
 100 | 
 101 | 	// Now, we should get a timeout.
 102 | 	select {
 103 | 	case <-time.After(2 * timeout):
 104 | 		t.Logf("didn't time out, waiting")
 105 | 		err := <-errch
 106 | 		t.Fatalf("got %v", err)
 107 | 	case err := <-errch:
 108 | 		if err != context.DeadlineExceeded {
 109 | 			t.Fatalf("got %v, want context.DeadlineExceeded", err)
 110 | 		}
 111 | 	}
 112 | }
 113 | 
 114 | func testPollCancel(t *testing.T) {
 115 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 116 | 
 117 | 	ga := new(perf.Attr)
 118 | 	ga.SetSamplePeriod(1)
 119 | 	ga.SetWakeupEvents(1)
 120 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 121 | 	if err := gtp.Configure(ga); err != nil {
 122 | 		t.Fatal(err)
 123 | 	}
 124 | 
 125 | 	runtime.LockOSThread()
 126 | 	defer runtime.UnlockOSThread()
 127 | 
 128 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
 129 | 	if err != nil {
 130 | 		t.Fatal(err)
 131 | 	}
 132 | 	defer getpid.Close()
 133 | 	if err := getpid.MapRing(); err != nil {
 134 | 		t.Fatal(err)
 135 | 	}
 136 | 
 137 | 	ctx, cancel := context.WithCancel(context.Background())
 138 | 	defer cancel()
 139 | 
 140 | 	errch := make(chan error)
 141 | 
 142 | 	go func() {
 143 | 		for i := 0; i < 2; i++ {
 144 | 			_, err := getpid.ReadRecord(ctx)
 145 | 			errch <- err
 146 | 		}
 147 | 	}()
 148 | 
 149 | 	c, err := getpid.Measure(getpidTrigger)
 150 | 	if err != nil {
 151 | 		t.Fatal(err)
 152 | 	}
 153 | 	if c.Value != 1 {
 154 | 		t.Fatalf("got %d hits for %q, want 1", c.Value, c.Label)
 155 | 	}
 156 | 
 157 | 	// For the first event, we should get a valid sample.
 158 | 	select {
 159 | 	case <-time.After(10 * time.Millisecond):
 160 | 		t.Fatalf("didn't get the first sample: timeout")
 161 | 	case err := <-errch:
 162 | 		if err != nil {
 163 | 			t.Fatalf("got %v, want valid first sample", err)
 164 | 		}
 165 | 	}
 166 | 
 167 | 	// The goroutine reading the records is now blocked in ReadRecord.
 168 | 	// Cancel the context and observe the results. We should see
 169 | 	// context.Canceled quite quickly.
 170 | 	cancel()
 171 | 
 172 | 	select {
 173 | 	case <-time.After(10 * time.Millisecond):
 174 | 		t.Fatalf("context cancel didn't unblock ReadRecord")
 175 | 	case err := <-errch:
 176 | 		if err != context.Canceled {
 177 | 			t.Fatalf("got %v, want %v", err, context.Canceled)
 178 | 		}
 179 | 	}
 180 | }
 181 | 
 182 | func testPollExpired(t *testing.T) {
 183 | 	requires(t, paranoid(1), softwarePMU)
 184 | 
 185 | 	da := new(perf.Attr)
 186 | 	perf.Dummy.Configure(da)
 187 | 
 188 | 	runtime.LockOSThread()
 189 | 	defer runtime.UnlockOSThread()
 190 | 
 191 | 	dummy, err := perf.Open(da, perf.CallingThread, perf.AnyCPU, nil)
 192 | 	if err != nil {
 193 | 		t.Fatal(err)
 194 | 	}
 195 | 	defer dummy.Close()
 196 | 	if err := dummy.MapRing(); err != nil {
 197 | 		t.Fatal(err)
 198 | 	}
 199 | 
 200 | 	timeout := 1 * time.Millisecond
 201 | 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 202 | 	defer cancel()
 203 | 
 204 | 	// Wait until the deadline is in the past.
 205 | 	time.Sleep(2 * timeout)
 206 | 
 207 | 	rec, err := dummy.ReadRecord(ctx)
 208 | 	if err == nil {
 209 | 		t.Fatalf("got nil error and record %#v", rec)
 210 | 	}
 211 | 	if err != context.DeadlineExceeded {
 212 | 		t.Fatalf("got %v, want context.DeadlineExceeded", err)
 213 | 	}
 214 | }
 215 | 
 216 | const errDisabledTestEnv = "PERF_TEST_ERR_DISABLED"
 217 | 
 218 | func init() {
 219 | 	// In child process of testErrDisabledProcessExist.
 220 | 	if os.Getenv(errDisabledTestEnv) != "1" {
 221 | 		return
 222 | 	}
 223 | 
 224 | 	readyevfd := 3
 225 | 	startevfd := 4
 226 | 
 227 | 	// Signal to the parent that we can start.
 228 | 	evsig(readyevfd)
 229 | 
 230 | 	// Wait for the parent to tell us that they have set up performance
 231 | 	// monitoring, and are ready to observe the event.
 232 | 	evwait(startevfd)
 233 | 
 234 | 	// Call getpid, then exit. Parent will see POLLIN for getpid, then
 235 | 	// POLLHUP because we exited.
 236 | 	unix.Getpid()
 237 | 	os.Exit(0)
 238 | }
 239 | 
 240 | func testPollDisabledByExit(t *testing.T) {
 241 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 242 | 
 243 | 	// Re-exec ourselves with PERF_TEST_ERR_DISABLED=1.
 244 | 	self, err := os.Executable()
 245 | 	if err != nil {
 246 | 		t.Fatal(err)
 247 | 	}
 248 | 
 249 | 	readyevfd, err := unix.Eventfd(0, 0)
 250 | 	if err != nil {
 251 | 		t.Fatal(err)
 252 | 	}
 253 | 	defer unix.Close(readyevfd)
 254 | 
 255 | 	startevfd, err := unix.Eventfd(0, 0)
 256 | 	if err != nil {
 257 | 		t.Fatal(err)
 258 | 	}
 259 | 	defer unix.Close(startevfd)
 260 | 
 261 | 	cmd := exec.Command(self)
 262 | 	cmd.Env = append(os.Environ(), errDisabledTestEnv+"=1")
 263 | 	cmd.ExtraFiles = []*os.File{
 264 | 		os.NewFile(uintptr(readyevfd), "readyevfd"),
 265 | 		os.NewFile(uintptr(startevfd), "startevfd"),
 266 | 	}
 267 | 	if err := cmd.Start(); err != nil {
 268 | 		t.Fatal(err)
 269 | 	}
 270 | 
 271 | 	// Set up performance monitoring for the child process.
 272 | 	ga := &perf.Attr{
 273 | 		Options: perf.Options{
 274 | 			Disabled: true,
 275 | 		},
 276 | 		SampleFormat: perf.SampleFormat{
 277 | 			Tid: true,
 278 | 		},
 279 | 	}
 280 | 	ga.SetSamplePeriod(1)
 281 | 	ga.SetWakeupEvents(1)
 282 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 283 | 	if err := gtp.Configure(ga); err != nil {
 284 | 		t.Fatal(err)
 285 | 	}
 286 | 
 287 | 	runtime.LockOSThread()
 288 | 	defer runtime.UnlockOSThread()
 289 | 
 290 | 	getpid, err := perf.Open(ga, cmd.Process.Pid, perf.AnyCPU, nil)
 291 | 	if err != nil {
 292 | 		t.Fatal(err)
 293 | 	}
 294 | 	defer getpid.Close()
 295 | 	if err := getpid.MapRing(); err != nil {
 296 | 		t.Fatal(err)
 297 | 	}
 298 | 
 299 | 	// Wait for the child process to be ready.
 300 | 	evwait(readyevfd)
 301 | 
 302 | 	// Now that it is, enable the event.
 303 | 	if err := getpid.Enable(); err != nil {
 304 | 		t.Fatal(err)
 305 | 	}
 306 | 
 307 | 	// Signal to the child that it should call getpid now.
 308 | 	// It will call getpid, then exit.
 309 | 	evsig(startevfd)
 310 | 	if err := cmd.Wait(); err != nil {
 311 | 		t.Fatal(err)
 312 | 	}
 313 | 
 314 | 	// Read two records. The first one should be valid,
 315 | 	// the second one should not, and the second error
 316 | 	// should be ErrDisabled.
 317 | 	timeout := 100 * time.Millisecond
 318 | 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 319 | 	defer cancel()
 320 | 	rec1, err1 := getpid.ReadRecord(ctx)
 321 | 	rec2, err2 := getpid.ReadRecord(ctx)
 322 | 
 323 | 	if err1 != nil {
 324 | 		t.Errorf("first error was %v, want nil", err1)
 325 | 	}
 326 | 	sr, ok := rec1.(*perf.SampleRecord)
 327 | 	if !ok {
 328 | 		t.Errorf("first record: got %T, want *perf.SampleRecord", rec1)
 329 | 	}
 330 | 	if int(sr.Pid) != cmd.Process.Pid {
 331 | 		t.Errorf("first record: got pid %d in the sample, want %d",
 332 | 			sr.Pid, cmd.Process.Pid)
 333 | 	}
 334 | 
 335 | 	if err2 != perf.ErrDisabled {
 336 | 		t.Errorf("second record: error was %v, want ErrDisabled", err2)
 337 | 	}
 338 | 	if rec2 != nil {
 339 | 		t.Errorf("second record: got %#v, want nil", rec2)
 340 | 	}
 341 | }
 342 | 
 343 | func testPollDisabledExplicitly(t *testing.T) {
 344 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 345 | 
 346 | 	ga := &perf.Attr{
 347 | 		SampleFormat: perf.SampleFormat{
 348 | 			Tid: true,
 349 | 		},
 350 | 		Options: perf.Options{
 351 | 			Disabled: true,
 352 | 		},
 353 | 	}
 354 | 	ga.SetSamplePeriod(1)
 355 | 	ga.SetWakeupEvents(1)
 356 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 357 | 	if err := gtp.Configure(ga); err != nil {
 358 | 		t.Fatal(err)
 359 | 	}
 360 | 
 361 | 	runtime.LockOSThread()
 362 | 	defer runtime.UnlockOSThread()
 363 | 
 364 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
 365 | 	if err != nil {
 366 | 		t.Fatal(err)
 367 | 	}
 368 | 	defer getpid.Close()
 369 | 	if err := getpid.MapRing(); err != nil {
 370 | 		t.Fatal(err)
 371 | 	}
 372 | 
 373 | 	const n = 3
 374 | 
 375 | 	ctx, cancel := context.WithCancel(context.Background())
 376 | 	defer cancel()
 377 | 
 378 | 	done := make(chan struct{})
 379 | 	seen := 0
 380 | 
 381 | 	go func() {
 382 | 		for i := 0; i < 2*n; i++ {
 383 | 			_, err := getpid.ReadRecord(ctx)
 384 | 			if err == nil {
 385 | 				seen++
 386 | 			}
 387 | 		}
 388 | 		close(done)
 389 | 	}()
 390 | 
 391 | 	if err := getpid.Enable(); err != nil {
 392 | 		t.Fatal(err)
 393 | 	}
 394 | 
 395 | 	for i := 0; i < n; i++ {
 396 | 		getpidTrigger()
 397 | 	}
 398 | 
 399 | 	if err := getpid.Disable(); err != nil {
 400 | 		getpidTrigger()
 401 | 	}
 402 | 
 403 | 	for i := 0; i < n; i++ {
 404 | 		getpidTrigger()
 405 | 	}
 406 | 
 407 | 	cancel()
 408 | 	<-done
 409 | 
 410 | 	if seen != n {
 411 | 		t.Fatalf("saw %d events, want %d", seen, n)
 412 | 	}
 413 | }
 414 | 
 415 | func testPollDisabledByRefresh(t *testing.T) {
 416 | 	// TODO(acln): investigate the following: the man page says that
 417 | 	// POLLHUP should be indicated on the file descriptor when the counter
 418 | 	// associated with a call to Refresh reaches zero.  I have not been
 419 | 	// able to observe this. When the counter reaches zero, the event
 420 | 	// is disabled (which is what this test shows), but POLLHUP doesn't
 421 | 	// seem to be indicated on the file descriptor.
 422 | 	//
 423 | 	// If we ever figure out how to observe a HUP there, we should
 424 | 	// make ReadRawRecord return ErrDisabled. In the meantime, leave
 425 | 	// things as-is.
 426 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 427 | 
 428 | 	ga := &perf.Attr{
 429 | 		SampleFormat: perf.SampleFormat{
 430 | 			Tid: true,
 431 | 		},
 432 | 		Options: perf.Options{
 433 | 			Disabled: true,
 434 | 		},
 435 | 	}
 436 | 	ga.SetSamplePeriod(1)
 437 | 	ga.SetWakeupEvents(1)
 438 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 439 | 	if err := gtp.Configure(ga); err != nil {
 440 | 		t.Fatal(err)
 441 | 	}
 442 | 
 443 | 	runtime.LockOSThread()
 444 | 	defer runtime.UnlockOSThread()
 445 | 
 446 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
 447 | 	if err != nil {
 448 | 		t.Fatal(err)
 449 | 	}
 450 | 	defer getpid.Close()
 451 | 	if err := getpid.MapRing(); err != nil {
 452 | 		t.Fatal(err)
 453 | 	}
 454 | 
 455 | 	const n = 3
 456 | 
 457 | 	ctx, cancel := context.WithCancel(context.Background())
 458 | 	defer cancel()
 459 | 
 460 | 	done := make(chan struct{})
 461 | 	seen := 0
 462 | 
 463 | 	go func() {
 464 | 		for i := 0; i < 2*n; i++ {
 465 | 			_, err := getpid.ReadRecord(ctx)
 466 | 			if err == nil {
 467 | 				seen++
 468 | 			}
 469 | 		}
 470 | 		close(done)
 471 | 	}()
 472 | 
 473 | 	if err := getpid.Refresh(n); err != nil {
 474 | 		t.Fatal(err)
 475 | 	}
 476 | 
 477 | 	for i := 0; i < n; i++ {
 478 | 		getpidTrigger()
 479 | 	}
 480 | 
 481 | 	for i := 0; i < n; i++ {
 482 | 		getpidTrigger()
 483 | 	}
 484 | 
 485 | 	cancel()
 486 | 	<-done
 487 | 
 488 | 	if seen != n {
 489 | 		t.Fatalf("saw %d events, want %d", seen, n)
 490 | 	}
 491 | }
 492 | 
 493 | const (
 494 | 	commTestEnv  = "PERF_TEST_COMM"
 495 | 	commTestName = "commtest"
 496 | )
 497 | 
 498 | func init() {
 499 | 	// In child process of testComm.
 500 | 	if os.Getenv(commTestEnv) != "1" {
 501 | 		return
 502 | 	}
 503 | 
 504 | 	readyevfd := 3
 505 | 	startevfd := 4
 506 | 	sawcommevfd := 5
 507 | 
 508 | 	// Signal to the parent that we can start.
 509 | 	evsig(readyevfd)
 510 | 
 511 | 	// Wait for the parent to tell us that they have set up performance
 512 | 	// monitoring, and are ready to observe the event.
 513 | 	evwait(startevfd)
 514 | 
 515 | 	// Change our name.
 516 | 	b := make([]byte, len(commTestName)+1)
 517 | 	copy(b, commTestName)
 518 | 	err := unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&b[0])), 0, 0, 0)
 519 | 	runtime.KeepAlive(&b[0])
 520 | 	if err != nil {
 521 | 		fmt.Fprint(os.Stderr, err)
 522 | 		os.Exit(2)
 523 | 	}
 524 | 
 525 | 	// TODO(acln): investigate the legitimacy of the following crutch.
 526 | 	//
 527 | 	// Wait for the parent to see that we changed our name, then exit.
 528 | 	//
 529 | 	// If we do not wait here, there is a terrible race condition waiting
 530 | 	// to happen: If we PR_SET_NAME in the child, then immediately exit,
 531 | 	// the other side may not see POLLIN on the comm record: it may see
 532 | 	// POLLHUP directly, even though a comm record was actually written
 533 | 	// to the ring in the meantime. Why we get POLLHUP directly, and not
 534 | 	// POLLIN before it, is unclear. The machinery to deal with this
 535 | 	// eventuality in the poller does not exist yet, and at the time
 536 | 	// when this comment was written, I have found no good solutions to
 537 | 	// this conundrum.
 538 | 	//
 539 | 	// So we live with it, but still try to make our test pass.
 540 | 	evwait(sawcommevfd)
 541 | 	os.Exit(0)
 542 | }
 543 | 
 544 | func testComm(t *testing.T) {
 545 | 	t.Skip("flaky. TODO(acln): investigate")
 546 | 
 547 | 	requires(t, paranoid(1), softwarePMU)
 548 | 
 549 | 	// Re-exec ourselves with PERF_TEST_COMM=1.
 550 | 	self, err := os.Executable()
 551 | 	if err != nil {
 552 | 		t.Fatal(err)
 553 | 	}
 554 | 
 555 | 	readyevfd, err := unix.Eventfd(0, 0)
 556 | 	if err != nil {
 557 | 		t.Fatal(err)
 558 | 	}
 559 | 	defer unix.Close(readyevfd)
 560 | 
 561 | 	startevfd, err := unix.Eventfd(0, 0)
 562 | 	if err != nil {
 563 | 		t.Fatal(err)
 564 | 	}
 565 | 	defer unix.Close(startevfd)
 566 | 
 567 | 	sawcommevfd, err := unix.Eventfd(0, 0)
 568 | 	if err != nil {
 569 | 		t.Fatal(err)
 570 | 	}
 571 | 	defer unix.Close(sawcommevfd)
 572 | 
 573 | 	cmd := exec.Command(self)
 574 | 	cmd.Env = append(os.Environ(), commTestEnv+"=1")
 575 | 	cmd.ExtraFiles = []*os.File{
 576 | 		os.NewFile(uintptr(readyevfd), "readyevfd"),
 577 | 		os.NewFile(uintptr(startevfd), "startevfd"),
 578 | 		os.NewFile(uintptr(sawcommevfd), "sawcommevfd"),
 579 | 	}
 580 | 	if err := cmd.Start(); err != nil {
 581 | 		t.Fatal(err)
 582 | 	}
 583 | 
 584 | 	// Set up performance monitoring for the child process.
 585 | 	ca := &perf.Attr{
 586 | 		Options: perf.Options{
 587 | 			Disabled: true,
 588 | 			Comm:     true,
 589 | 		},
 590 | 		SampleFormat: perf.SampleFormat{
 591 | 			Tid: true,
 592 | 		},
 593 | 	}
 594 | 	ca.SetSamplePeriod(1)
 595 | 	ca.SetWakeupEvents(1)
 596 | 	perf.Dummy.Configure(ca)
 597 | 
 598 | 	runtime.LockOSThread()
 599 | 	defer runtime.UnlockOSThread()
 600 | 
 601 | 	comm, err := perf.Open(ca, cmd.Process.Pid, perf.AnyCPU, nil)
 602 | 	if err != nil {
 603 | 		t.Fatal(err)
 604 | 	}
 605 | 	defer comm.Close()
 606 | 	if err := comm.MapRing(); err != nil {
 607 | 		t.Fatal(err)
 608 | 	}
 609 | 
 610 | 	// Wait for the child process to be ready.
 611 | 	evwait(readyevfd)
 612 | 
 613 | 	// Now that it is, enable the event.
 614 | 	if err := comm.Enable(); err != nil {
 615 | 		t.Fatal(err)
 616 | 	}
 617 | 
 618 | 	// Signal to the child that it should change its name.
 619 | 	evsig(startevfd)
 620 | 
 621 | 	// Read the CommRecord.
 622 | 	ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
 623 | 	defer cancel()
 624 | 	rec, rerr := comm.ReadRecord(ctx)
 625 | 
 626 | 	// Signal to the child that it should exit, and wait for it to do so.
 627 | 	evsig(sawcommevfd)
 628 | 	if err := cmd.Wait(); err != nil {
 629 | 		t.Fatal(err)
 630 | 	}
 631 | 
 632 | 	// Observe the CommRecord.
 633 | 	if rerr != nil {
 634 | 		t.Fatalf("got %v, want valid record", rerr)
 635 | 	}
 636 | 	cr, ok := rec.(*perf.CommRecord)
 637 | 	if !ok {
 638 | 		t.Fatalf("got %T, want *perf.CommRecord", rec)
 639 | 	}
 640 | 	if int(cr.Pid) != cmd.Process.Pid {
 641 | 		t.Errorf("got pid %d, want %d", cr.Pid, cmd.Process.Pid)
 642 | 	}
 643 | 	if cr.NewName != commTestName {
 644 | 		t.Errorf("new name = %q, want %q", cr.NewName, commTestName)
 645 | 	}
 646 | 	if cr.WasExec() {
 647 | 		t.Error("got WasExec() == true, want false")
 648 | 	}
 649 | }
 650 | 
 651 | const (
 652 | 	exitTestEnv  = "PERF_TEST_EXIT"
 653 | 	exitTestCode = 42
 654 | )
 655 | 
 656 | func init() {
 657 | 	// In the child process of testExit.
 658 | 	if os.Getenv("PERF_TEST_EXIT") != "1" {
 659 | 		return
 660 | 	}
 661 | 
 662 | 	readyevfd := 3
 663 | 	startevfd := 4
 664 | 
 665 | 	// Signal to the parent that we can start.
 666 | 	evsig(readyevfd)
 667 | 
 668 | 	// Wait for the parent to tell us that they have set up performance
 669 | 	// monitoring, and are ready to observe the event.
 670 | 	evwait(startevfd)
 671 | 
 672 | 	os.Exit(exitTestCode)
 673 | }
 674 | 
 675 | func testExit(t *testing.T) {
 676 | 	requires(t, paranoid(1), softwarePMU)
 677 | 
 678 | 	// Re-exec ourselves with PERF_TEST_EXIT=1.
 679 | 	self, err := os.Executable()
 680 | 	if err != nil {
 681 | 		t.Fatal(err)
 682 | 	}
 683 | 
 684 | 	readyevfd, err := unix.Eventfd(0, 0)
 685 | 	if err != nil {
 686 | 		t.Fatal(err)
 687 | 	}
 688 | 	defer unix.Close(readyevfd)
 689 | 
 690 | 	startevfd, err := unix.Eventfd(0, 0)
 691 | 	if err != nil {
 692 | 		t.Fatal(err)
 693 | 	}
 694 | 	defer unix.Close(startevfd)
 695 | 
 696 | 	cmd := exec.Command(self)
 697 | 	cmd.Env = append(os.Environ(), exitTestEnv+"=1")
 698 | 	cmd.ExtraFiles = []*os.File{
 699 | 		os.NewFile(uintptr(readyevfd), "readyevfd"),
 700 | 		os.NewFile(uintptr(startevfd), "startevfd"),
 701 | 	}
 702 | 	if err := cmd.Start(); err != nil {
 703 | 		t.Fatal(err)
 704 | 	}
 705 | 	pid := cmd.Process.Pid
 706 | 
 707 | 	// Set up performance monitoring for the child process.
 708 | 	ca := &perf.Attr{
 709 | 		Options: perf.Options{
 710 | 			Disabled: true,
 711 | 			Task:     true,
 712 | 		},
 713 | 		SampleFormat: perf.SampleFormat{
 714 | 			Tid: true,
 715 | 		},
 716 | 	}
 717 | 	ca.SetSamplePeriod(1)
 718 | 	ca.SetWakeupEvents(1)
 719 | 	perf.Dummy.Configure(ca)
 720 | 
 721 | 	runtime.LockOSThread()
 722 | 	defer runtime.UnlockOSThread()
 723 | 
 724 | 	comm, err := perf.Open(ca, pid, perf.AnyCPU, nil)
 725 | 	if err != nil {
 726 | 		t.Fatal(err)
 727 | 	}
 728 | 	defer comm.Close()
 729 | 	if err := comm.MapRing(); err != nil {
 730 | 		t.Fatal(err)
 731 | 	}
 732 | 
 733 | 	// Wait for the child process to be ready.
 734 | 	evwait(readyevfd)
 735 | 
 736 | 	// Now that it is, enable the event.
 737 | 	if err := comm.Enable(); err != nil {
 738 | 		t.Fatal(err)
 739 | 	}
 740 | 
 741 | 	// Signal to the child that it should exit now.
 742 | 	evsig(startevfd)
 743 | 
 744 | 	// Observe the exit code from os/exec first.
 745 | 	err = cmd.Wait()
 746 | 	if err == nil {
 747 | 		t.Fatal("child exited with code 0")
 748 | 	}
 749 | 	ee, ok := err.(*exec.ExitError)
 750 | 	if !ok {
 751 | 		t.Fatalf("got %T, want *exec.ExitError", err)
 752 | 	}
 753 | 	if got := ee.ExitCode(); got != exitTestCode {
 754 | 		t.Fatalf("got exit code %d, want %d", got, exitTestCode)
 755 | 	}
 756 | 
 757 | 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
 758 | 	defer cancel()
 759 | 	rec, err := comm.ReadRecord(ctx)
 760 | 	if err != nil {
 761 | 		t.Fatalf("got %v, want valid record", err)
 762 | 	}
 763 | 	er, ok := rec.(*perf.ExitRecord)
 764 | 	if !ok {
 765 | 		t.Fatalf("got %T, want *perf.ExitRecord", rec)
 766 | 	}
 767 | 	if int(er.Pid) != pid {
 768 | 		t.Errorf("got pid %d, want %d", er.Pid, pid)
 769 | 	}
 770 | 	// Unfortunately, no er.Ppid and er.Ptid test. The Go runtime
 771 | 	// interferes with us.
 772 | }
 773 | 
 774 | func testCPUWideSwitch(t *testing.T) {
 775 | 	requires(t, paranoid(0), softwarePMU)
 776 | 
 777 | 	var wg sync.WaitGroup
 778 | 	ready := make(chan error)
 779 | 	start := make(chan struct{})
 780 | 	pingpong := make(chan struct{})
 781 | 	var recvtid, sendtid int
 782 | 
 783 | 	const numpingpongs = 4
 784 | 	const cpu = 0
 785 | 
 786 | 	fn := func(recv bool) {
 787 | 		defer wg.Done()
 788 | 
 789 | 		runtime.LockOSThread()
 790 | 		defer runtime.UnlockOSThread()
 791 | 
 792 | 		var cpuset unix.CPUSet
 793 | 		cpuset.Set(cpu)
 794 | 		if err := unix.SchedSetaffinity(0, &cpuset); err != nil {
 795 | 			ready <- err
 796 | 			return
 797 | 		}
 798 | 
 799 | 		if !recv {
 800 | 			sendtid = unix.Gettid()
 801 | 			ready <- nil
 802 | 			<-start
 803 | 			for i := 0; i < numpingpongs; i++ {
 804 | 				pingpong <- struct{}{}
 805 | 				<-pingpong
 806 | 			}
 807 | 		} else {
 808 | 			recvtid = unix.Gettid()
 809 | 			ready <- nil
 810 | 			<-start
 811 | 			for i := 0; i < numpingpongs; i++ {
 812 | 				<-pingpong
 813 | 				pingpong <- struct{}{}
 814 | 			}
 815 | 		}
 816 | 	}
 817 | 
 818 | 	wg.Add(2)
 819 | 
 820 | 	go fn(true)
 821 | 	go fn(false)
 822 | 
 823 | 	if err := <-ready; err != nil {
 824 | 		t.Fatal(err)
 825 | 	}
 826 | 	if err := <-ready; err != nil {
 827 | 		t.Fatal(err)
 828 | 	}
 829 | 
 830 | 	sa := &perf.Attr{
 831 | 		Options: perf.Options{
 832 | 			ExcludeKernel: true,
 833 | 			Disabled:      true,
 834 | 			ContextSwitch: true,
 835 | 		},
 836 | 	}
 837 | 	sa.SetSamplePeriod(1)
 838 | 	sa.SetWakeupEvents(1)
 839 | 	perf.ContextSwitches.Configure(sa)
 840 | 
 841 | 	switches, err := perf.Open(sa, perf.AllThreads, cpu, nil)
 842 | 	if err != nil {
 843 | 		t.Fatal(err)
 844 | 	}
 845 | 	defer switches.Close()
 846 | 	if err := switches.MapRing(); err != nil {
 847 | 		t.Fatal(err)
 848 | 	}
 849 | 
 850 | 	if err := switches.Enable(); err != nil {
 851 | 		t.Fatal(err)
 852 | 	}
 853 | 
 854 | 	// Run the ping-pong game.
 855 | 	close(start)
 856 | 	wg.Wait()
 857 | 
 858 | 	intorecv, outofrecv := 0, 0
 859 | 	intosend, outofsend := 0, 0
 860 | 	intosched, outofsched := 0, 0
 861 | 
 862 | 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
 863 | 	defer cancel()
 864 | 
 865 | 	var rerr error
 866 | 
 867 | 	for {
 868 | 		sawinto := intorecv >= numpingpongs && intosend >= numpingpongs
 869 | 		sawoutof := outofrecv >= numpingpongs && outofsend >= numpingpongs
 870 | 		if sawinto && sawoutof {
 871 | 			break
 872 | 		}
 873 | 		rec, err := switches.ReadRecord(ctx)
 874 | 		if err != nil {
 875 | 			rerr = err
 876 | 			break
 877 | 		}
 878 | 		sr, ok := rec.(*perf.SwitchCPUWideRecord)
 879 | 		if !ok {
 880 | 			t.Errorf("got %T, want *perf.SwitchCPUWideRecord", rec)
 881 | 		}
 882 | 		switch int(sr.Tid) {
 883 | 		case 0:
 884 | 			if sr.Out() {
 885 | 				outofsched++
 886 | 			} else {
 887 | 				intosched++
 888 | 			}
 889 | 		case recvtid:
 890 | 			if sr.Out() {
 891 | 				outofrecv++
 892 | 			} else {
 893 | 				intorecv++
 894 | 			}
 895 | 		case sendtid:
 896 | 			if sr.Out() {
 897 | 				outofsend++
 898 | 			} else {
 899 | 				intosend++
 900 | 			}
 901 | 		}
 902 | 	}
 903 | 
 904 | 	if rerr != nil {
 905 | 		t.Fatal(err)
 906 | 	}
 907 | 
 908 | 	t.Logf("%d ping-pongs", numpingpongs)
 909 | 	t.Logf("recv switches: %d in, %d out", intorecv, outofrecv)
 910 | 	t.Logf("send switches: %d in, %d out", intosend, outofsend)
 911 | 	t.Logf("scheduler switches: %d in, %d out", intosched, outofsched)
 912 | }
 913 | 
 914 | func testSampleGetpid(t *testing.T) {
 915 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 916 | 
 917 | 	ga := &perf.Attr{
 918 | 		SampleFormat: perf.SampleFormat{
 919 | 			Tid: true,
 920 | 		},
 921 | 	}
 922 | 	ga.SetSamplePeriod(1)
 923 | 	ga.SetWakeupEvents(1)
 924 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 925 | 	if err := gtp.Configure(ga); err != nil {
 926 | 		t.Fatal(err)
 927 | 	}
 928 | 
 929 | 	runtime.LockOSThread()
 930 | 	defer runtime.UnlockOSThread()
 931 | 
 932 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
 933 | 	if err != nil {
 934 | 		t.Fatal(err)
 935 | 	}
 936 | 	defer getpid.Close()
 937 | 	if err := getpid.MapRing(); err != nil {
 938 | 		t.Fatal(err)
 939 | 	}
 940 | 
 941 | 	c, err := getpid.Measure(getpidTrigger)
 942 | 	if err != nil {
 943 | 		t.Fatal(err)
 944 | 	}
 945 | 	if c.Value != 1 {
 946 | 		t.Fatalf("got %d hits for %q, want 1 hit", c.Value, c.Label)
 947 | 	}
 948 | 
 949 | 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond)
 950 | 	defer cancel()
 951 | 	rec, err := getpid.ReadRecord(ctx)
 952 | 	if err != nil {
 953 | 		t.Fatalf("got %v, want a valid sample record", err)
 954 | 	}
 955 | 	sr, ok := rec.(*perf.SampleRecord)
 956 | 	if !ok {
 957 | 		t.Fatalf("got a %T, want a SampleRecord", rec)
 958 | 	}
 959 | 	pid, tid := unix.Getpid(), unix.Gettid()
 960 | 	if int(sr.Pid) != pid || int(sr.Tid) != tid {
 961 | 		t.Fatalf("got pid=%d tid=%d, want pid=%d tid=%d", sr.Pid, sr.Tid, pid, tid)
 962 | 	}
 963 | }
 964 | 
 965 | func testSampleGetpidConcurrent(t *testing.T) {
 966 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
 967 | 
 968 | 	ga := &perf.Attr{
 969 | 		SampleFormat: perf.SampleFormat{
 970 | 			Tid: true,
 971 | 		},
 972 | 	}
 973 | 	ga.SetSamplePeriod(1)
 974 | 	ga.SetWakeupEvents(1)
 975 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
 976 | 	if err := gtp.Configure(ga); err != nil {
 977 | 		t.Fatal(err)
 978 | 	}
 979 | 
 980 | 	runtime.LockOSThread()
 981 | 	defer runtime.UnlockOSThread()
 982 | 
 983 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
 984 | 	if err != nil {
 985 | 		t.Fatal(err)
 986 | 	}
 987 | 	defer getpid.Close()
 988 | 	if err := getpid.MapRing(); err != nil {
 989 | 		t.Fatal(err)
 990 | 	}
 991 | 
 992 | 	const n = 6
 993 | 	sawSample := make(chan bool)
 994 | 
 995 | 	go func() {
 996 | 		for i := 0; i < n; i++ {
 997 | 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
 998 | 			defer cancel()
 999 | 			rec, err := getpid.ReadRecord(ctx)
1000 | 			_, isSample := rec.(*perf.SampleRecord)
1001 | 			if err == nil && isSample {
1002 | 				sawSample <- true
1003 | 			} else {
1004 | 				sawSample <- false
1005 | 			}
1006 | 		}
1007 | 	}()
1008 | 
1009 | 	seen := 0
1010 | 
1011 | 	c, err := getpid.Measure(func() {
1012 | 		for i := 0; i < n; i++ {
1013 | 			getpidTrigger()
1014 | 			if ok := <-sawSample; ok {
1015 | 				seen++
1016 | 			}
1017 | 		}
1018 | 	})
1019 | 	if err != nil {
1020 | 		t.Fatal(err)
1021 | 	}
1022 | 	if c.Value != n {
1023 | 		t.Fatalf("got %d hits for %q, want %d", c.Value, c.Label, n)
1024 | 	}
1025 | 	if seen != n {
1026 | 		t.Fatalf("saw %d samples, want %d", seen, n)
1027 | 	}
1028 | }
1029 | 
1030 | func testSampleTracepointStack(t *testing.T) {
1031 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
1032 | 
1033 | 	ga := &perf.Attr{
1034 | 		Options: perf.Options{
1035 | 			Disabled: true,
1036 | 		},
1037 | 		SampleFormat: perf.SampleFormat{
1038 | 			Tid:       true,
1039 | 			Time:      true,
1040 | 			CPU:       true,
1041 | 			IP:        true,
1042 | 			Callchain: true,
1043 | 		},
1044 | 	}
1045 | 	ga.SetSamplePeriod(1)
1046 | 	ga.SetWakeupEvents(1)
1047 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
1048 | 	if err := gtp.Configure(ga); err != nil {
1049 | 		t.Fatal(err)
1050 | 	}
1051 | 
1052 | 	runtime.LockOSThread()
1053 | 	defer runtime.UnlockOSThread()
1054 | 
1055 | 	getpid, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
1056 | 	if err != nil {
1057 | 		t.Fatal(err)
1058 | 	}
1059 | 	defer getpid.Close()
1060 | 	if err := getpid.MapRing(); err != nil {
1061 | 		t.Fatal(err)
1062 | 	}
1063 | 
1064 | 	pcs := make([]uintptr, 10)
1065 | 	var n int
1066 | 
1067 | 	c, err := getpid.Measure(func() {
1068 | 		n = runtime.Callers(2, pcs)
1069 | 		getpidTrigger()
1070 | 	})
1071 | 	if err != nil {
1072 | 		t.Fatal(err)
1073 | 	}
1074 | 	if c.Value != 1 {
1075 | 		t.Fatalf("want 1 hit for %q, got %d", c.Label, c.Value)
1076 | 	}
1077 | 
1078 | 	pcs = pcs[:n]
1079 | 
1080 | 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
1081 | 	defer cancel()
1082 | 	rec, err := getpid.ReadRecord(ctx)
1083 | 	if err != nil {
1084 | 		t.Fatal(err)
1085 | 	}
1086 | 	getpidsample, ok := rec.(*perf.SampleRecord)
1087 | 	if !ok {
1088 | 		t.Fatalf("got a %T, want a *SampleRecord", rec)
1089 | 	}
1090 | 
1091 | 	i := len(pcs) - 1
1092 | 	j := len(getpidsample.Callchain) - 1
1093 | 
1094 | 	for i >= 0 && j >= 0 {
1095 | 		gopc := pcs[i]
1096 | 		kpc := getpidsample.Callchain[j]
1097 | 		if gopc != uintptr(kpc) {
1098 | 			t.Fatalf("Go (%#x) and kernel (%#x) PC differ", gopc, kpc)
1099 | 		}
1100 | 		i--
1101 | 		j--
1102 | 	}
1103 | 
1104 | 	logFrame := func(pc uintptr) {
1105 | 		fn := runtime.FuncForPC(pc)
1106 | 		if fn == nil {
1107 | 			t.Logf("%#x <nil>", pc)
1108 | 		} else {
1109 | 			file, line := fn.FileLine(pc)
1110 | 			t.Logf("%#x %s:%d %s", pc, file, line, fn.Name())
1111 | 		}
1112 | 	}
1113 | 
1114 | 	t.Log("kernel callchain:")
1115 | 	for _, kpc := range getpidsample.Callchain {
1116 | 		logFrame(uintptr(kpc))
1117 | 	}
1118 | 
1119 | 	t.Log()
1120 | 
1121 | 	t.Logf("Go stack:")
1122 | 	for _, gopc := range pcs {
1123 | 		logFrame(gopc)
1124 | 	}
1125 | }
1126 | 
1127 | func testRedirectedOutput(t *testing.T) {
1128 | 	requires(t, paranoid(1), tracepointPMU, tracefs)
1129 | 
1130 | 	ga := &perf.Attr{
1131 | 		SampleFormat: perf.SampleFormat{
1132 | 			Tid:      true,
1133 | 			Time:     true,
1134 | 			CPU:      true,
1135 | 			Addr:     true,
1136 | 			StreamID: true,
1137 | 		},
1138 | 		CountFormat: perf.CountFormat{
1139 | 			Group: true,
1140 | 		},
1141 | 		Options: perf.Options{
1142 | 			Disabled: true,
1143 | 		},
1144 | 	}
1145 | 	ga.SetSamplePeriod(1)
1146 | 	ga.SetWakeupEvents(1)
1147 | 	gtp := perf.Tracepoint("syscalls", "sys_enter_getpid")
1148 | 	if err := gtp.Configure(ga); err != nil {
1149 | 		t.Fatalf("Configure: %v", err)
1150 | 	}
1151 | 
1152 | 	runtime.LockOSThread()
1153 | 	defer runtime.UnlockOSThread()
1154 | 
1155 | 	leader, err := perf.Open(ga, perf.CallingThread, perf.AnyCPU, nil)
1156 | 	if err != nil {
1157 | 		t.Fatal(err)
1158 | 	}
1159 | 	defer leader.Close()
1160 | 	if err := leader.MapRing(); err != nil {
1161 | 		t.Fatal(err)
1162 | 	}
1163 | 
1164 | 	wa := &perf.Attr{
1165 | 		SampleFormat: perf.SampleFormat{
1166 | 			Tid:      true,
1167 | 			Time:     true,
1168 | 			CPU:      true,
1169 | 			Addr:     true,
1170 | 			StreamID: true,
1171 | 		},
1172 | 	}
1173 | 	wa.SetSamplePeriod(1)
1174 | 	wa.SetWakeupEvents(1)
1175 | 	wtp := perf.Tracepoint("syscalls", "sys_enter_write")
1176 | 	if err := wtp.Configure(wa); err != nil {
1177 | 		t.Fatal(err)
1178 | 	}
1179 | 
1180 | 	follower, err := perf.Open(wa, perf.CallingThread, perf.AnyCPU, leader)
1181 | 	if err != nil {
1182 | 		t.Fatal(err)
1183 | 	}
1184 | 	defer follower.Close()
1185 | 	if err := follower.SetOutput(leader); err != nil {
1186 | 		t.Fatal(err)
1187 | 	}
1188 | 
1189 | 	errch := make(chan error)
1190 | 	go func() {
1191 | 		for i := 0; i < 2; i++ {
1192 | 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
1193 | 			defer cancel()
1194 | 			_, err := leader.ReadRecord(ctx)
1195 | 			errch <- err
1196 | 		}
1197 | 	}()
1198 | 
1199 | 	gc, err := leader.MeasureGroup(func() {
1200 | 		getpidTrigger()
1201 | 		writeTrigger()
1202 | 	})
1203 | 	if err != nil {
1204 | 		t.Fatal(err)
1205 | 	}
1206 | 
1207 | 	if got := gc.Values[0]; got.Value != 1 {
1208 | 		t.Fatalf("got %d hits for %q, want 1 hit", got.Value, got.Label)
1209 | 	}
1210 | 	if got := gc.Values[1]; got.Value != 1 {
1211 | 		t.Fatalf("got %d hits for %q, want 1 hit", got.Value, got.Label)
1212 | 	}
1213 | 
1214 | 	for i := 0; i < 2; i++ {
1215 | 		select {
1216 | 		case <-time.After(10 * time.Millisecond):
1217 | 			t.Errorf("did not get sample record: timeout")
1218 | 		case err := <-errch:
1219 | 			if err != nil {
1220 | 				t.Fatalf("did not get sample record: %v", err)
1221 | 			}
1222 | 		}
1223 | 	}
1224 | }
1225 | 
1226 | func evsig(fd int) {
1227 | 	val := uint64(1)
1228 | 	buf := (*[8]byte)(unsafe.Pointer(&val))[:]
1229 | 	unix.Write(fd, buf)
1230 | }
1231 | 
1232 | func evwait(fd int) {
1233 | 	var val uint64
1234 | 	buf := (*[8]byte)(unsafe.Pointer(&val))[:]
1235 | 	unix.Read(fd, buf)
1236 | }
1237 | 


--------------------------------------------------------------------------------
/perf.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2019 The Go Authors. All rights reserved.
   2 | // Use of this source code is governed by a BSD-style
   3 | // license that can be found in the LICENSE file.
   4 | 
   5 | // +build linux
   6 | 
   7 | package perf
   8 | 
   9 | import (
  10 | 	"bytes"
  11 | 	"fmt"
  12 | 	"io/ioutil"
  13 | 	"os"
  14 | 	"path/filepath"
  15 | 	"strconv"
  16 | 	"sync"
  17 | 	"sync/atomic"
  18 | 	"syscall"
  19 | 	"time"
  20 | 	"unsafe"
  21 | 
  22 | 	"golang.org/x/sys/unix"
  23 | )
  24 | 
  25 | // Special pid values for Open.
  26 | const (
  27 | 	// CallingThread configures the event to measure the calling thread.
  28 | 	CallingThread = 0
  29 | 
  30 | 	// AllThreads configures the event to measure all threads on the
  31 | 	// specified CPU.
  32 | 	AllThreads = -1
  33 | )
  34 | 
  35 | // AnyCPU configures the specified process/thread to be measured on any CPU.
  36 | const AnyCPU = -1
  37 | 
  38 | // Event states.
  39 | const (
  40 | 	eventStateUninitialized = 0
  41 | 	eventStateOK            = 1
  42 | 	eventStateClosed        = 2
  43 | )
  44 | 
  45 | // Event is an active perf event.
  46 | type Event struct {
  47 | 	// state is the state of the event. See eventState* constants.
  48 | 	state int32
  49 | 
  50 | 	// perffd is the perf event file descriptor.
  51 | 	perffd int
  52 | 
  53 | 	// id is the unique event ID.
  54 | 	id uint64
  55 | 
  56 | 	// group contains other events in the event group, if this event is
  57 | 	// an event group leader. The order is the order in which the events
  58 | 	// were added to the group.
  59 | 	group []*Event
  60 | 
  61 | 	// groupByID maps group event IDs to the events themselves. The
  62 | 	// reason why this mapping is needed is explained in ReadRecord.
  63 | 	groupByID map[uint64]*Event
  64 | 
  65 | 	// owned contains other events in the event group, which the caller
  66 | 	// has no access to. The Event owns them all, Close closes them all.
  67 | 	owned []*Event
  68 | 
  69 | 	// a is the set of attributes the Event was configured with. It is
  70 | 	// a clone of the original, save for the Label field, which may have
  71 | 	// been set, if the original *Attr didn't set it.
  72 | 	a *Attr
  73 | 
  74 | 	// noReadRecord is true if ReadRecord is disabled for the event.
  75 | 	// See SetOutput and ReadRecord.
  76 | 	noReadRecord bool
  77 | 
  78 | 	// ring is the (entire) memory mapped ring buffer.
  79 | 	ring []byte
  80 | 
  81 | 	// ringdata is the data region of the ring buffer.
  82 | 	ringdata []byte
  83 | 
  84 | 	// meta is the metadata page: &ring[0].
  85 | 	meta *unix.PerfEventMmapPage
  86 | 
  87 | 	// wakeupfd is an event file descriptor (see eventfd(2)). It is used to
  88 | 	// unblock calls to ReadRawRecord when the associated context expires.
  89 | 	wakeupfd int
  90 | 
  91 | 	// pollreq communicates requests from ReadRawRecord to the poll goroutine
  92 | 	// associated with the ring.
  93 | 	pollreq chan pollreq
  94 | 
  95 | 	// pollresp receives responses from the poll goroutine associated
  96 | 	// with the ring, back to ReadRawRecord.
  97 | 	pollresp chan pollresp
  98 | 
  99 | 	// recordBuffer is used as storage for records returned by ReadRecord
 100 | 	// and ReadRawRecord. This means memory for records returned from those
 101 | 	// methods will be overwritten by successive calls.
 102 | 	recordBuffer []byte
 103 | }
 104 | 
 105 | // Open opens the event configured by attr.
 106 | //
 107 | // The pid and cpu parameters specify which thread and CPU to monitor:
 108 | //
 109 | //     * if pid == CallingThread and cpu == AnyCPU, the event measures
 110 | //       the calling thread on any CPU
 111 | //
 112 | //     * if pid == CallingThread and cpu >= 0, the event measures
 113 | //       the calling thread only when running on the specified CPU
 114 | //
 115 | //     * if pid > 0 and cpu == AnyCPU, the event measures the specified
 116 | //       thread on any CPU
 117 | //
 118 | //     * if pid > 0 and cpu >= 0, the event measures the specified thread
 119 | //       only when running on the specified CPU
 120 | //
 121 | //     * if pid == AllThreads and cpu >= 0, the event measures all threads
 122 | //       on the specified CPU
 123 | //
 124 | //     * finally, the pid == AllThreads and cpu == AnyCPU setting is invalid
 125 | //
 126 | // If group is non-nil, the returned Event is made part of the group
 127 | // associated with the specified group Event.
 128 | func Open(a *Attr, pid, cpu int, group *Event) (*Event, error) {
 129 | 	return open(a, pid, cpu, group, 0)
 130 | }
 131 | 
 132 | // OpenWithFlags is like Open but allows to specify additional flags to be
 133 | // passed to perf_event_open(2).
 134 | func OpenWithFlags(a *Attr, pid, cpu int, group *Event, flags int) (*Event, error) {
 135 | 	return open(a, pid, cpu, group, flags)
 136 | }
 137 | 
 138 | // OpenCGroup is like Open, but activates per-container system-wide
 139 | // monitoring. If cgroupfs is mounted on /dev/cgroup, and the group to
 140 | // monitor is called "test", then cgroupfd must be a file descriptor opened
 141 | // on /dev/cgroup/test.
 142 | func OpenCGroup(a *Attr, cgroupfd, cpu int, group *Event) (*Event, error) {
 143 | 	return open(a, cgroupfd, cpu, group, unix.PERF_FLAG_PID_CGROUP)
 144 | }
 145 | 
 146 | func open(a *Attr, pid, cpu int, group *Event, flags int) (*Event, error) {
 147 | 	groupfd := -1
 148 | 	if group != nil {
 149 | 		if err := group.ok(); err != nil {
 150 | 			return nil, err
 151 | 		}
 152 | 		groupfd = group.perffd
 153 | 	}
 154 | 
 155 | 	fd, err := perfEventOpen(a, pid, cpu, groupfd, flags)
 156 | 	if err != nil {
 157 | 		return nil, os.NewSyscallError("perf_event_open", err)
 158 | 	}
 159 | 	if err := unix.SetNonblock(fd, true); err != nil {
 160 | 		unix.Close(fd)
 161 | 		return nil, os.NewSyscallError("setnonblock", err)
 162 | 	}
 163 | 
 164 | 	// Clone the *Attr so the caller can't change it from under our feet.
 165 | 
 166 | 	ac := new(Attr)
 167 | 	*ac = *a // ok to copy since no slices
 168 | 	if ac.Label == "" {
 169 | 		evID := eventID{
 170 | 			Type:   uint64(a.Type),
 171 | 			Config: uint64(a.Config),
 172 | 		}
 173 | 		ac.Label = lookupLabel(evID).Name
 174 | 	}
 175 | 
 176 | 	ev := &Event{
 177 | 		state:  eventStateOK,
 178 | 		perffd: fd,
 179 | 		a:      ac,
 180 | 	}
 181 | 	id, err := ev.ID()
 182 | 	if err != nil {
 183 | 		return nil, err
 184 | 	}
 185 | 	ev.id = id
 186 | 	if group != nil {
 187 | 		if group.groupByID == nil {
 188 | 			group.groupByID = map[uint64]*Event{}
 189 | 		}
 190 | 		group.group = append(group.group, ev)
 191 | 		group.groupByID[id] = ev
 192 | 	}
 193 | 
 194 | 	return ev, nil
 195 | }
 196 | 
 197 | // perfEventOpen wraps the perf_event_open system call with some additional
 198 | // logic around ensuring that file descriptors are marked close-on-exec.
 199 | func perfEventOpen(a *Attr, pid, cpu, groupfd, flags int) (fd int, err error) {
 200 | 	sysAttr := a.sysAttr()
 201 | 	cloexecFlags := flags | unix.PERF_FLAG_FD_CLOEXEC
 202 | 
 203 | 	fd, err = unix.PerfEventOpen(sysAttr, pid, cpu, groupfd, cloexecFlags)
 204 | 	switch err {
 205 | 	case nil:
 206 | 		return fd, nil
 207 | 	case unix.EINVAL:
 208 | 		// PERF_FLAG_FD_CLOEXEC is only available in Linux 3.14
 209 | 		// and up, or in older kernels patched by distributions
 210 | 		// with backported perf updates. If we got EINVAL, try again
 211 | 		// without the flag, while holding syscall.ForkLock, following
 212 | 		// the standard library pattern in net/sock_cloexec.go.
 213 | 		syscall.ForkLock.RLock()
 214 | 		defer syscall.ForkLock.RUnlock()
 215 | 
 216 | 		fd, err = unix.PerfEventOpen(sysAttr, pid, cpu, groupfd, flags)
 217 | 		if err == nil {
 218 | 			unix.CloseOnExec(fd)
 219 | 		}
 220 | 		return fd, err
 221 | 	default:
 222 | 		return -1, err
 223 | 	}
 224 | }
 225 | 
 226 | // DefaultNumPages is the number of pages used by MapRing. There is no
 227 | // fundamental logic to this number. We use it because that is what the perf
 228 | // tool does.
 229 | const DefaultNumPages = 128
 230 | 
 231 | // MapRing maps the ring buffer attached to the event into memory.
 232 | //
 233 | // This enables reading records via ReadRecord / ReadRawRecord.
 234 | func (ev *Event) MapRing() error {
 235 | 	return ev.MapRingNumPages(DefaultNumPages)
 236 | }
 237 | 
 238 | // MapRingNumPages is like MapRing, but allows the caller to The size of
 239 | // the data portion of the ring is num pages. The total size of the ring
 240 | // is num+1 pages, because an additional metadata page is mapped before the
 241 | // data portion of the ring.
 242 | func (ev *Event) MapRingNumPages(num int) error {
 243 | 	if err := ev.ok(); err != nil {
 244 | 		return err
 245 | 	}
 246 | 	if ev.ring != nil {
 247 | 		return nil
 248 | 	}
 249 | 
 250 | 	pgSize := unix.Getpagesize()
 251 | 	size := (1 + num) * pgSize
 252 | 	const prot = unix.PROT_READ | unix.PROT_WRITE
 253 | 	const flags = unix.MAP_SHARED
 254 | 	ring, err := unix.Mmap(ev.perffd, 0, size, prot, flags)
 255 | 	if err != nil {
 256 | 		return os.NewSyscallError("mmap", err)
 257 | 	}
 258 | 
 259 | 	meta := (*unix.PerfEventMmapPage)(unsafe.Pointer(&ring[0]))
 260 | 
 261 | 	// Some systems do not fill in the data_offset and data_size fields
 262 | 	// of the metadata page correctly: Centos 6.9 and Debian 8 have been
 263 | 	// observed to do this. Try to detect this condition, and adjust
 264 | 	// the values accordingly.
 265 | 	if meta.Data_offset == 0 && meta.Data_size == 0 {
 266 | 		atomic.StoreUint64(&meta.Data_offset, uint64(pgSize))
 267 | 		atomic.StoreUint64(&meta.Data_size, uint64(num*pgSize))
 268 | 	}
 269 | 
 270 | 	ringdata := ring[meta.Data_offset:]
 271 | 
 272 | 	wakeupfd, err := unix.Eventfd(0, unix.EFD_CLOEXEC|unix.EFD_NONBLOCK)
 273 | 	if err != nil {
 274 | 		return os.NewSyscallError("eventfd", err)
 275 | 	}
 276 | 
 277 | 	ev.ring = ring
 278 | 	ev.meta = meta
 279 | 	ev.ringdata = ringdata
 280 | 	ev.wakeupfd = wakeupfd
 281 | 	ev.pollreq = make(chan pollreq)
 282 | 	ev.pollresp = make(chan pollresp)
 283 | 
 284 | 	go ev.poll()
 285 | 
 286 | 	return nil
 287 | }
 288 | 
 289 | func (ev *Event) ok() error {
 290 | 	if ev == nil {
 291 | 		return os.ErrInvalid
 292 | 	}
 293 | 
 294 | 	switch ev.state {
 295 | 	case eventStateUninitialized:
 296 | 		return os.ErrInvalid
 297 | 	case eventStateOK:
 298 | 		return nil
 299 | 	default: // eventStateClosed
 300 | 		return os.ErrClosed
 301 | 	}
 302 | }
 303 | 
 304 | // FD returns the file descriptor associated with the event.
 305 | func (ev *Event) FD() (int, error) {
 306 | 	if err := ev.ok(); err != nil {
 307 | 		return -1, err
 308 | 	}
 309 | 	return ev.perffd, nil
 310 | }
 311 | 
 312 | // Measure disables the event, resets it, enables it, runs f, disables it again,
 313 | // then reads the Count associated with the event.
 314 | func (ev *Event) Measure(f func()) (Count, error) {
 315 | 	if err := ev.Disable(); err != nil {
 316 | 		return Count{}, err
 317 | 	}
 318 | 	if err := ev.Reset(); err != nil {
 319 | 		return Count{}, err
 320 | 	}
 321 | 	if err := ev.Enable(); err != nil {
 322 | 		return Count{}, err
 323 | 	}
 324 | 
 325 | 	f()
 326 | 
 327 | 	if err := ev.Disable(); err != nil {
 328 | 		return Count{}, err
 329 | 	}
 330 | 	return ev.ReadCount()
 331 | }
 332 | 
 333 | // MeasureGroup is like Measure, but for event groups.
 334 | func (ev *Event) MeasureGroup(f func()) (GroupCount, error) {
 335 | 	if err := ev.Disable(); err != nil {
 336 | 		return GroupCount{}, err
 337 | 	}
 338 | 	if err := ev.Reset(); err != nil {
 339 | 		return GroupCount{}, err
 340 | 	}
 341 | 	if err := ev.Enable(); err != nil {
 342 | 		return GroupCount{}, err
 343 | 	}
 344 | 
 345 | 	f()
 346 | 
 347 | 	if err := ev.Disable(); err != nil {
 348 | 		return GroupCount{}, err
 349 | 	}
 350 | 	return ev.ReadGroupCount()
 351 | }
 352 | 
 353 | // Enable enables the event.
 354 | func (ev *Event) Enable() error {
 355 | 	if err := ev.ok(); err != nil {
 356 | 		return err
 357 | 	}
 358 | 	err := ev.ioctlNoArg(unix.PERF_EVENT_IOC_ENABLE)
 359 | 	return wrapIoctlError("PERF_EVENT_IOC_ENABLE", err)
 360 | }
 361 | 
 362 | // Disable disables the event. If ev is a group leader, Disable disables
 363 | // all events in the group.
 364 | func (ev *Event) Disable() error {
 365 | 	if err := ev.ok(); err != nil {
 366 | 		return err
 367 | 	}
 368 | 	err := ev.ioctlInt(unix.PERF_EVENT_IOC_DISABLE, 0)
 369 | 	return wrapIoctlError("PERF_EVENT_IOC_DISABLE", err)
 370 | }
 371 | 
 372 | // TODO(acln): add support for PERF_IOC_FLAG_GROUP and for event followers
 373 | // to disable the entire group?
 374 | 
 375 | // Refresh adds delta to a counter associated with the event. This counter
 376 | // decrements every time the event overflows. Once the counter reaches zero,
 377 | // the event is disabled. Calling Refresh with delta == 0 is considered
 378 | // undefined behavior.
 379 | func (ev *Event) Refresh(delta int) error {
 380 | 	if err := ev.ok(); err != nil {
 381 | 		return err
 382 | 	}
 383 | 	err := ev.ioctlInt(unix.PERF_EVENT_IOC_REFRESH, uintptr(delta))
 384 | 	return wrapIoctlError("PERF_EVENT_IOC_REFRESH", err)
 385 | }
 386 | 
 387 | // Reset resets the counters associated with the event.
 388 | func (ev *Event) Reset() error {
 389 | 	if err := ev.ok(); err != nil {
 390 | 		return err
 391 | 	}
 392 | 	err := ev.ioctlNoArg(unix.PERF_EVENT_IOC_RESET)
 393 | 	return wrapIoctlError("PERF_EVENT_IOC_RESET", err)
 394 | }
 395 | 
 396 | // UpdatePeriod updates the overflow period for the event. On older kernels,
 397 | // the new period does not take effect until after the next overflow.
 398 | func (ev *Event) UpdatePeriod(p uint64) error {
 399 | 	if err := ev.ok(); err != nil {
 400 | 		return err
 401 | 	}
 402 | 	err := ev.ioctlPointer(unix.PERF_EVENT_IOC_PERIOD, unsafe.Pointer(&p))
 403 | 	return wrapIoctlError("PERF_EVENT_IOC_PERIOD", err)
 404 | }
 405 | 
 406 | // SetOutput tells the kernel to send records to the specified
 407 | // target Event rather than ev.
 408 | //
 409 | // If target is nil, output from ev is ignored.
 410 | //
 411 | // Some restrictions apply:
 412 | //
 413 | // 1) Calling SetOutput on an *Event will fail with EINVAL if MapRing was
 414 | // called on that event previously. 2) If ev and target are not CPU-wide
 415 | // events, they must be on the same CPU. 3) If ev and target are CPU-wide
 416 | // events, they must refer to the same task. 4) ev and target must use the
 417 | // same clock.
 418 | //
 419 | // An additional restriction of the Go API also applies:
 420 | //
 421 | // In order to use ReadRecord on the target Event, the following settings on
 422 | // ev and target must match: Options.SampleIDAll, SampleFormat.Identifier,
 423 | // SampleFormat.IP, SampleFormat.Tid, SampleFormat.Time, SampleFormat.Addr,
 424 | // SampleFormat.ID, SampleFormat.StreamID. Furthermore, SampleFormat.StreamID
 425 | // must be set. SetOutput nevertheless succeeds even if this condition is
 426 | // not met, because callers can still use ReadRawRecord instead of ReadRecord.
 427 | func (ev *Event) SetOutput(target *Event) error {
 428 | 	if err := ev.ok(); err != nil {
 429 | 		return err
 430 | 	}
 431 | 	var targetfd int
 432 | 	if target == nil {
 433 | 		targetfd = -1
 434 | 	} else {
 435 | 		if err := target.ok(); err != nil {
 436 | 			return err
 437 | 		}
 438 | 		if !target.canReadRecordFrom(ev) {
 439 | 			target.noReadRecord = true
 440 | 		}
 441 | 		targetfd = target.perffd
 442 | 	}
 443 | 	err := ev.ioctlInt(unix.PERF_EVENT_IOC_SET_OUTPUT, uintptr(targetfd))
 444 | 	return wrapIoctlError("PERF_EVENT_IOC_SET_OUTPUT", err)
 445 | }
 446 | 
 447 | // canReadRecordFrom returns a boolean indicating whether ev, as a leader,
 448 | // can read records produced by f, a follower.
 449 | func (ev *Event) canReadRecordFrom(f *Event) bool {
 450 | 	lf := ev.a.SampleFormat
 451 | 	ff := f.a.SampleFormat
 452 | 
 453 | 	return lf.Identifier == ff.Identifier &&
 454 | 		lf.IP == ff.IP &&
 455 | 		lf.Tid == ff.Tid &&
 456 | 		lf.Time == ff.Time &&
 457 | 		lf.Addr == ff.Addr &&
 458 | 		lf.ID == ff.ID &&
 459 | 		lf.StreamID == ff.StreamID &&
 460 | 		ff.StreamID
 461 | }
 462 | 
 463 | // BUG(acln): PERF_EVENT_IOC_SET_FILTER is not implemented
 464 | 
 465 | // ID returns the unique event ID value for ev.
 466 | func (ev *Event) ID() (uint64, error) {
 467 | 	if err := ev.ok(); err != nil {
 468 | 		return 0, err
 469 | 	}
 470 | 	var val uint64
 471 | 	err := ev.ioctlPointer(unix.PERF_EVENT_IOC_ID, unsafe.Pointer(&val))
 472 | 	return val, wrapIoctlError("PERF_EVENT_IOC_ID", err)
 473 | }
 474 | 
 475 | // SetBPF attaches a BPF program to ev, which must be a kprobe tracepoint
 476 | // event. progfd is the file descriptor associated with the BPF program.
 477 | func (ev *Event) SetBPF(progfd uint32) error {
 478 | 	if err := ev.ok(); err != nil {
 479 | 		return err
 480 | 	}
 481 | 	err := ev.ioctlInt(unix.PERF_EVENT_IOC_SET_BPF, uintptr(progfd))
 482 | 	return wrapIoctlError("PERF_EVENT_IOC_SET_BPF", err)
 483 | }
 484 | 
 485 | // PauseOutput pauses the output from ev.
 486 | func (ev *Event) PauseOutput() error {
 487 | 	if err := ev.ok(); err != nil {
 488 | 		return err
 489 | 	}
 490 | 	err := ev.ioctlInt(unix.PERF_EVENT_IOC_PAUSE_OUTPUT, 1)
 491 | 	return wrapIoctlError("PEF_EVENT_IOC_PAUSE_OUTPUT", err)
 492 | }
 493 | 
 494 | // ResumeOutput resumes output from ev.
 495 | func (ev *Event) ResumeOutput() error {
 496 | 	if err := ev.ok(); err != nil {
 497 | 		return err
 498 | 	}
 499 | 	err := ev.ioctlInt(unix.PERF_EVENT_IOC_PAUSE_OUTPUT, 0)
 500 | 	return wrapIoctlError("PEF_EVENT_IOC_PAUSE_OUTPUT", err)
 501 | }
 502 | 
 503 | // QueryBPF queries the event for BPF program file descriptors attached to
 504 | // the same tracepoint as ev. max is the maximum number of file descriptors
 505 | // to return.
 506 | func (ev *Event) QueryBPF(max uint32) ([]uint32, error) {
 507 | 	if err := ev.ok(); err != nil {
 508 | 		return nil, err
 509 | 	}
 510 | 	buf := make([]uint32, 2+max)
 511 | 	buf[0] = max
 512 | 	err := ev.ioctlPointer(unix.PERF_EVENT_IOC_QUERY_BPF, unsafe.Pointer(&buf[0]))
 513 | 	if err != nil {
 514 | 		return nil, wrapIoctlError("PERF_EVENT_IOC_QUERY_BPF", err)
 515 | 	}
 516 | 	count := buf[1]
 517 | 	fds := make([]uint32, count)
 518 | 	copy(fds, buf[2:2+count])
 519 | 	return fds, nil
 520 | }
 521 | 
 522 | // BUG(acln): PERF_EVENT_IOC_MODIFY_ATTRIBUTES is not implemented
 523 | 
 524 | func (ev *Event) ioctlNoArg(number int64) error {
 525 | 	return ev.ioctlInt(number, 0)
 526 | }
 527 | 
 528 | func (ev *Event) ioctlInt(number int64, arg uintptr) error {
 529 | 	_, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(ev.perffd), uintptr(number), arg)
 530 | 	if e != 0 {
 531 | 		return e
 532 | 	}
 533 | 	return nil
 534 | }
 535 | 
 536 | func (ev *Event) ioctlPointer(number uintptr, arg unsafe.Pointer) error {
 537 | 	_, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(ev.perffd), number, uintptr(arg))
 538 | 	if e != 0 {
 539 | 		return e
 540 | 	}
 541 | 	return nil
 542 | }
 543 | 
 544 | func wrapIoctlError(ioctl string, err error) error {
 545 | 	if err == nil {
 546 | 		return nil
 547 | 	}
 548 | 	return &ioctlError{ioctl: ioctl, err: err}
 549 | }
 550 | 
 551 | type ioctlError struct {
 552 | 	ioctl string
 553 | 	err   error
 554 | }
 555 | 
 556 | func (e *ioctlError) Error() string {
 557 | 	return fmt.Sprintf("%s: %v", e.ioctl, e.err)
 558 | }
 559 | 
 560 | func (e *ioctlError) Unwrap() error { return e.err }
 561 | 
 562 | // Close closes the event. Close must not be called concurrently with any
 563 | // other methods on the Event.
 564 | func (ev *Event) Close() error {
 565 | 	if ev.ring != nil {
 566 | 		close(ev.pollreq)
 567 | 		<-ev.pollresp
 568 | 		unix.Munmap(ev.ring)
 569 | 		unix.Close(ev.wakeupfd)
 570 | 	}
 571 | 
 572 | 	for _, ev := range ev.owned {
 573 | 		ev.Close()
 574 | 	}
 575 | 
 576 | 	ev.state = eventStateClosed
 577 | 	return unix.Close(ev.perffd)
 578 | }
 579 | 
 580 | // Attr configures a perf event.
 581 | type Attr struct {
 582 | 	// Label is a human readable label associated with the event.
 583 | 	// For convenience, the Label is included in Count and GroupCount
 584 | 	// measurements read from events.
 585 | 	//
 586 | 	// When an event is opened, if Label is the empty string, then a
 587 | 	// Label is computed (if possible) based on the Type and Config
 588 | 	// fields. Otherwise, if the Label user-defined (not the empty
 589 | 	// string), it is included verbatim.
 590 | 	//
 591 | 	// For most events, the computed Label matches the label specified by
 592 | 	// ``perf list'' for the same event (but see Bugs).
 593 | 	Label string
 594 | 
 595 | 	// Type is the major type of the event.
 596 | 	Type EventType
 597 | 
 598 | 	// Config is the type-specific event configuration.
 599 | 	Config uint64
 600 | 
 601 | 	// Sample configures the sample period or sample frequency for
 602 | 	// overflow packets, based on Options.Freq: if Options.Freq is set,
 603 | 	// Sample is interpreted as "sample frequency", otherwise it is
 604 | 	// interpreted as "sample period".
 605 | 	//
 606 | 	// See also SetSample{Period,Freq}.
 607 | 	Sample uint64
 608 | 
 609 | 	// SampleFormat configures information requested in sample records,
 610 | 	// on the memory mapped ring buffer.
 611 | 	SampleFormat SampleFormat
 612 | 
 613 | 	// CountFormat specifies the format of counts read from the
 614 | 	// Event using ReadCount or ReadGroupCount. See the CountFormat
 615 | 	// documentation for more details.
 616 | 	CountFormat CountFormat
 617 | 
 618 | 	// Options contains more fine grained event configuration.
 619 | 	Options Options
 620 | 
 621 | 	// Wakeup configures wakeups on the ring buffer associated with the
 622 | 	// event. If Options.Watermark is set, Wakeup is interpreted as the
 623 | 	// number of bytes before wakeup. Otherwise, it is interpreted as
 624 | 	// "wake up every N events".
 625 | 	//
 626 | 	// See also SetWakeup{Events,Watermark}.
 627 | 	Wakeup uint32
 628 | 
 629 | 	// BreakpointType is the breakpoint type, if Type == BreakpointEvent.
 630 | 	BreakpointType uint32
 631 | 
 632 | 	// Config1 is used for events that need an extra register or otherwise
 633 | 	// do not fit in the regular config field.
 634 | 	//
 635 | 	// For breakpoint events, Config1 is the breakpoint address.
 636 | 	// For kprobes, it is the kprobe function. For uprobes, it is the
 637 | 	// uprobe path.
 638 | 	Config1 uint64
 639 | 
 640 | 	// Config2 is a further extension of the Config1 field.
 641 | 	//
 642 | 	// For breakpoint events, it is the length of the breakpoint.
 643 | 	// For kprobes, when the kprobe function is NULL, it is the address of
 644 | 	// the kprobe. For both kprobes and uprobes, it is the probe offset.
 645 | 	Config2 uint64
 646 | 
 647 | 	// BranchSampleFormat specifies what branches to include in the
 648 | 	// branch record, if SampleFormat.BranchStack is set.
 649 | 	BranchSampleFormat BranchSampleFormat
 650 | 
 651 | 	// SampleRegistersUser is the set of user registers to dump on samples.
 652 | 	SampleRegistersUser uint64
 653 | 
 654 | 	// SampleStackUser is the size of the user stack to  dump on samples.
 655 | 	SampleStackUser uint32
 656 | 
 657 | 	// ClockID is the clock ID to use with samples, if Options.UseClockID
 658 | 	// is set.
 659 | 	//
 660 | 	// TODO(acln): What are the values for this? CLOCK_MONOTONIC and such?
 661 | 	// Investigate. Can we choose a clock that can be compared to Go's
 662 | 	// clock in a meaningful way? If so, should we add special support
 663 | 	// for that?
 664 | 	ClockID int32
 665 | 
 666 | 	// SampleRegistersIntr is the set of register to dump for each sample.
 667 | 	// See asm/perf_regs.h for details.
 668 | 	SampleRegistersIntr uint64
 669 | 
 670 | 	// AuxWatermark is the watermark for the aux area.
 671 | 	AuxWatermark uint32
 672 | 
 673 | 	// SampleMaxStack is the maximum number of frame pointers in a
 674 | 	// callchain. The value must be < MaxStack().
 675 | 	SampleMaxStack uint16
 676 | }
 677 | 
 678 | func (a Attr) sysAttr() *unix.PerfEventAttr {
 679 | 	return &unix.PerfEventAttr{
 680 | 		Type:               uint32(a.Type),
 681 | 		Size:               uint32(unsafe.Sizeof(unix.PerfEventAttr{})),
 682 | 		Config:             a.Config,
 683 | 		Sample:             a.Sample,
 684 | 		Sample_type:        a.SampleFormat.marshal(),
 685 | 		Read_format:        a.CountFormat.marshal(),
 686 | 		Bits:               a.Options.marshal(),
 687 | 		Wakeup:             a.Wakeup,
 688 | 		Bp_type:            a.BreakpointType,
 689 | 		Ext1:               a.Config1,
 690 | 		Ext2:               a.Config2,
 691 | 		Branch_sample_type: a.BranchSampleFormat.marshal(),
 692 | 		Sample_regs_user:   a.SampleRegistersUser,
 693 | 		Sample_stack_user:  a.SampleStackUser,
 694 | 		Clockid:            a.ClockID,
 695 | 		Sample_regs_intr:   a.SampleRegistersIntr,
 696 | 		Aux_watermark:      a.AuxWatermark,
 697 | 		Sample_max_stack:   a.SampleMaxStack,
 698 | 	}
 699 | }
 700 | 
 701 | // Configure implements the Configurator interface. It overwrites target
 702 | // with a. See also (*Group).Add.
 703 | func (a *Attr) Configure(target *Attr) error {
 704 | 	*target = *a
 705 | 	return nil
 706 | }
 707 | 
 708 | // SetSamplePeriod configures the sampling period for the event.
 709 | //
 710 | // It sets attr.Sample to p and disables a.Options.Freq.
 711 | func (a *Attr) SetSamplePeriod(p uint64) {
 712 | 	a.Sample = p
 713 | 	a.Options.Freq = false
 714 | }
 715 | 
 716 | // SetSampleFreq configures the sampling frequency for the event.
 717 | //
 718 | // It sets attr.Sample to f and enables a.Options.Freq.
 719 | func (a *Attr) SetSampleFreq(f uint64) {
 720 | 	a.Sample = f
 721 | 	a.Options.Freq = true
 722 | }
 723 | 
 724 | // SetWakeupEvents configures the event to wake up every n events.
 725 | //
 726 | // It sets a.Wakeup to n and disables a.Options.Watermark.
 727 | func (a *Attr) SetWakeupEvents(n uint32) {
 728 | 	a.Wakeup = n
 729 | 	a.Options.Watermark = false
 730 | }
 731 | 
 732 | // SetWakeupWatermark configures the number of bytes in overflow records
 733 | // before wakeup.
 734 | //
 735 | // It sets a.Wakeup to n and enables a.Options.Watermark.
 736 | func (a *Attr) SetWakeupWatermark(n uint32) {
 737 | 	a.Wakeup = n
 738 | 	a.Options.Watermark = true
 739 | }
 740 | 
 741 | // LookupEventType probes /sys/bus/event_source/devices/<device>/type
 742 | // for the EventType value associated with the specified PMU.
 743 | func LookupEventType(pmu string) (EventType, error) {
 744 | 	path := filepath.Join("/sys/bus/event_source/devices", pmu, "type")
 745 | 	et, err := readUint(path, 32)
 746 | 	return EventType(et), err
 747 | }
 748 | 
 749 | // EventType is the overall type of a performance event.
 750 | type EventType uint32
 751 | 
 752 | // Supported event types.
 753 | const (
 754 | 	HardwareEvent      EventType = unix.PERF_TYPE_HARDWARE
 755 | 	SoftwareEvent      EventType = unix.PERF_TYPE_SOFTWARE
 756 | 	TracepointEvent    EventType = unix.PERF_TYPE_TRACEPOINT
 757 | 	HardwareCacheEvent EventType = unix.PERF_TYPE_HW_CACHE
 758 | 	RawEvent           EventType = unix.PERF_TYPE_RAW
 759 | 	BreakpointEvent    EventType = unix.PERF_TYPE_BREAKPOINT
 760 | )
 761 | 
 762 | // HardwareCounter is a hardware performance counter.
 763 | type HardwareCounter uint64
 764 | 
 765 | // Hardware performance counters.
 766 | const (
 767 | 	CPUCycles             HardwareCounter = unix.PERF_COUNT_HW_CPU_CYCLES
 768 | 	Instructions          HardwareCounter = unix.PERF_COUNT_HW_INSTRUCTIONS
 769 | 	CacheReferences       HardwareCounter = unix.PERF_COUNT_HW_CACHE_REFERENCES
 770 | 	CacheMisses           HardwareCounter = unix.PERF_COUNT_HW_CACHE_MISSES
 771 | 	BranchInstructions    HardwareCounter = unix.PERF_COUNT_HW_BRANCH_INSTRUCTIONS
 772 | 	BranchMisses          HardwareCounter = unix.PERF_COUNT_HW_BRANCH_MISSES
 773 | 	BusCycles             HardwareCounter = unix.PERF_COUNT_HW_BUS_CYCLES
 774 | 	StalledCyclesFrontend HardwareCounter = unix.PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
 775 | 	StalledCyclesBackend  HardwareCounter = unix.PERF_COUNT_HW_STALLED_CYCLES_BACKEND
 776 | 	RefCPUCycles          HardwareCounter = unix.PERF_COUNT_HW_REF_CPU_CYCLES
 777 | )
 778 | 
 779 | var hardwareLabels = map[HardwareCounter]eventLabel{
 780 | 	CPUCycles:             {Name: "cpu-cycles", Alias: "cycles"},
 781 | 	Instructions:          {Name: "instructions"},
 782 | 	CacheReferences:       {Name: "cache-references"},
 783 | 	CacheMisses:           {Name: "cache-misses"},
 784 | 	BranchInstructions:    {Name: "branch-instructions", Alias: "branches"},
 785 | 	BranchMisses:          {Name: "branch-misses", Alias: "branch-misses"},
 786 | 	BusCycles:             {Name: "bus-cycles"},
 787 | 	StalledCyclesFrontend: {Name: "stalled-cycles-frontend", Alias: "idle-cycles-frontend"},
 788 | 	StalledCyclesBackend:  {Name: "stalled-cycles-backend", Alias: "idle-cycles-backend"},
 789 | 	RefCPUCycles:          {Name: "ref-cycles"},
 790 | }
 791 | 
 792 | func (hwc HardwareCounter) String() string {
 793 | 	return hwc.eventLabel().Name
 794 | }
 795 | 
 796 | func (hwc HardwareCounter) eventLabel() eventLabel {
 797 | 	return hardwareLabels[hwc]
 798 | }
 799 | 
 800 | // Configure configures attr to measure hwc. It sets the Label, Type, and
 801 | // Config fields on attr.
 802 | func (hwc HardwareCounter) Configure(attr *Attr) error {
 803 | 	attr.Label = hwc.String()
 804 | 	attr.Type = HardwareEvent
 805 | 	attr.Config = uint64(hwc)
 806 | 	return nil
 807 | }
 808 | 
 809 | // AllHardwareCounters returns a slice of all known hardware counters.
 810 | func AllHardwareCounters() []Configurator {
 811 | 	return []Configurator{
 812 | 		CPUCycles,
 813 | 		Instructions,
 814 | 		CacheReferences,
 815 | 		CacheMisses,
 816 | 		BranchInstructions,
 817 | 		BranchMisses,
 818 | 		BusCycles,
 819 | 		StalledCyclesFrontend,
 820 | 		StalledCyclesBackend,
 821 | 		RefCPUCycles,
 822 | 	}
 823 | }
 824 | 
 825 | // SoftwareCounter is a software performance counter.
 826 | type SoftwareCounter uint64
 827 | 
 828 | // Software performance counters.
 829 | const (
 830 | 	CPUClock        SoftwareCounter = unix.PERF_COUNT_SW_CPU_CLOCK
 831 | 	TaskClock       SoftwareCounter = unix.PERF_COUNT_SW_TASK_CLOCK
 832 | 	PageFaults      SoftwareCounter = unix.PERF_COUNT_SW_PAGE_FAULTS
 833 | 	ContextSwitches SoftwareCounter = unix.PERF_COUNT_SW_CONTEXT_SWITCHES
 834 | 	CPUMigrations   SoftwareCounter = unix.PERF_COUNT_SW_CPU_MIGRATIONS
 835 | 	MinorPageFaults SoftwareCounter = unix.PERF_COUNT_SW_PAGE_FAULTS_MIN
 836 | 	MajorPageFaults SoftwareCounter = unix.PERF_COUNT_SW_PAGE_FAULTS_MAJ
 837 | 	AlignmentFaults SoftwareCounter = unix.PERF_COUNT_SW_ALIGNMENT_FAULTS
 838 | 	EmulationFaults SoftwareCounter = unix.PERF_COUNT_SW_EMULATION_FAULTS
 839 | 	Dummy           SoftwareCounter = unix.PERF_COUNT_SW_DUMMY
 840 | 	BPFOutput       SoftwareCounter = unix.PERF_COUNT_SW_BPF_OUTPUT
 841 | )
 842 | 
 843 | var softwareLabels = map[SoftwareCounter]eventLabel{
 844 | 	CPUClock:        {Name: "cpu-clock"},
 845 | 	TaskClock:       {Name: "task-clock"},
 846 | 	PageFaults:      {Name: "page-faults", Alias: "faults"},
 847 | 	ContextSwitches: {Name: "context-switches", Alias: "cs"},
 848 | 	CPUMigrations:   {Name: "cpu-migrations", Alias: "migrations"},
 849 | 	MinorPageFaults: {Name: "minor-faults"},
 850 | 	MajorPageFaults: {Name: "major-faults"},
 851 | 	AlignmentFaults: {Name: "alignment-faults"},
 852 | 	EmulationFaults: {Name: "emulation-faults"},
 853 | 	Dummy:           {Name: "dummy"},
 854 | 	BPFOutput:       {Name: "bpf-output"},
 855 | }
 856 | 
 857 | func (swc SoftwareCounter) String() string {
 858 | 	return swc.eventLabel().Name
 859 | }
 860 | 
 861 | func (swc SoftwareCounter) eventLabel() eventLabel {
 862 | 	return softwareLabels[swc]
 863 | }
 864 | 
 865 | // Configure configures attr to measure swc. It sets attr.Type and attr.Config.
 866 | func (swc SoftwareCounter) Configure(attr *Attr) error {
 867 | 	attr.Label = swc.eventLabel().Name
 868 | 	attr.Type = SoftwareEvent
 869 | 	attr.Config = uint64(swc)
 870 | 	return nil
 871 | }
 872 | 
 873 | // AllSoftwareCounters returns a slice of all known software counters.
 874 | func AllSoftwareCounters() []Configurator {
 875 | 	return []Configurator{
 876 | 		CPUClock,
 877 | 		TaskClock,
 878 | 		PageFaults,
 879 | 		ContextSwitches,
 880 | 		CPUMigrations,
 881 | 		MinorPageFaults,
 882 | 		MajorPageFaults,
 883 | 		AlignmentFaults,
 884 | 		EmulationFaults,
 885 | 		Dummy,
 886 | 		BPFOutput,
 887 | 	}
 888 | }
 889 | 
 890 | // Cache identifies a cache.
 891 | type Cache uint64
 892 | 
 893 | // Caches.
 894 | const (
 895 | 	L1D  Cache = unix.PERF_COUNT_HW_CACHE_L1D
 896 | 	L1I  Cache = unix.PERF_COUNT_HW_CACHE_L1I
 897 | 	LL   Cache = unix.PERF_COUNT_HW_CACHE_LL
 898 | 	DTLB Cache = unix.PERF_COUNT_HW_CACHE_DTLB
 899 | 	ITLB Cache = unix.PERF_COUNT_HW_CACHE_ITLB
 900 | 	BPU  Cache = unix.PERF_COUNT_HW_CACHE_BPU
 901 | 	NODE Cache = unix.PERF_COUNT_HW_CACHE_NODE
 902 | )
 903 | 
 904 | // AllCaches returns a slice of all known cache types.
 905 | func AllCaches() []Cache {
 906 | 	return []Cache{L1D, L1I, LL, DTLB, ITLB, BPU, NODE}
 907 | }
 908 | 
 909 | // CacheOp is a cache operation.
 910 | type CacheOp uint64
 911 | 
 912 | // Cache operations.
 913 | const (
 914 | 	Read     CacheOp = unix.PERF_COUNT_HW_CACHE_OP_READ
 915 | 	Write    CacheOp = unix.PERF_COUNT_HW_CACHE_OP_WRITE
 916 | 	Prefetch CacheOp = unix.PERF_COUNT_HW_CACHE_OP_PREFETCH
 917 | )
 918 | 
 919 | // AllCacheOps returns a slice of all known cache operations.
 920 | func AllCacheOps() []CacheOp {
 921 | 	return []CacheOp{Read, Write, Prefetch}
 922 | }
 923 | 
 924 | // CacheOpResult is the result of a cache operation.
 925 | type CacheOpResult uint64
 926 | 
 927 | // Cache operation results.
 928 | const (
 929 | 	Access CacheOpResult = unix.PERF_COUNT_HW_CACHE_RESULT_ACCESS
 930 | 	Miss   CacheOpResult = unix.PERF_COUNT_HW_CACHE_RESULT_MISS
 931 | )
 932 | 
 933 | // AllCacheOpResults returns a slice of all known cache operation results.
 934 | func AllCacheOpResults() []CacheOpResult {
 935 | 	return []CacheOpResult{Access, Miss}
 936 | }
 937 | 
 938 | // A HardwareCacheCounter groups a cache, a cache operation, and an operation
 939 | // result. It measures the number of results for the specified op, on the
 940 | // specified cache.
 941 | type HardwareCacheCounter struct {
 942 | 	Cache  Cache
 943 | 	Op     CacheOp
 944 | 	Result CacheOpResult
 945 | }
 946 | 
 947 | // Configure configures attr to measure hwcc. It sets attr.Type and attr.Config.
 948 | func (hwcc HardwareCacheCounter) Configure(attr *Attr) error {
 949 | 	attr.Type = HardwareCacheEvent
 950 | 	attr.Config = uint64(hwcc.Cache) | uint64(hwcc.Op<<8) | uint64(hwcc.Result<<16)
 951 | 	return nil
 952 | }
 953 | 
 954 | // HardwareCacheCounters returns cache counters which measure the cartesian
 955 | // product of the specified caches, operations and results.
 956 | func HardwareCacheCounters(caches []Cache, ops []CacheOp, results []CacheOpResult) []Configurator {
 957 | 	counters := make([]Configurator, 0, len(caches)*len(ops)*len(results))
 958 | 	for _, cache := range caches {
 959 | 		for _, op := range ops {
 960 | 			for _, result := range results {
 961 | 				c := HardwareCacheCounter{
 962 | 					Cache:  cache,
 963 | 					Op:     op,
 964 | 					Result: result,
 965 | 				}
 966 | 				counters = append(counters, c)
 967 | 			}
 968 | 		}
 969 | 	}
 970 | 	return counters
 971 | }
 972 | 
 973 | // Tracepoint returns a Configurator for the specified category and event.
 974 | // The returned Configurator sets attr.Type and attr.Config.
 975 | func Tracepoint(category, event string) Configurator {
 976 | 	return configuratorFunc(func(attr *Attr) error {
 977 | 		cfg, err := LookupTracepointConfig(category, event)
 978 | 		if err != nil {
 979 | 			return err
 980 | 		}
 981 | 
 982 | 		attr.Label = fmt.Sprintf("%s:%s", category, event)
 983 | 		attr.Type = TracepointEvent
 984 | 		attr.Config = cfg
 985 | 
 986 | 		return nil
 987 | 	})
 988 | }
 989 | 
 990 | // LookupTracepointConfig probes
 991 | // /sys/kernel/debug/tracing/events/<category>/<event>/id for the Attr.Config
 992 | // value associated with the specified category and event.
 993 | func LookupTracepointConfig(category, event string) (uint64, error) {
 994 | 	p := filepath.Join("/sys/kernel/debug/tracing/events", category, event, "id")
 995 | 	return readUint(p, 64)
 996 | }
 997 | 
 998 | // Breakpoint returns a Configurator for a breakpoint event.
 999 | //
1000 | // typ is the type of the breakpoint.
1001 | //
1002 | // addr is the address of the breakpoint. For execution breakpoints, this
1003 | // is the memory address of the instruction of interest; for read and write
1004 | // breakpoints, it is the memory address of the memory location of interest.
1005 | //
1006 | // length is the length of the breakpoint being measured.
1007 | //
1008 | // The returned Configurator sets the Type, BreakpointType, Config1, and
1009 | // Config2 fields on attr.
1010 | func Breakpoint(typ BreakpointType, addr uint64, length BreakpointLength) Configurator {
1011 | 	return configuratorFunc(func(attr *Attr) error {
1012 | 		attr.Type = BreakpointEvent
1013 | 		attr.BreakpointType = uint32(typ)
1014 | 		attr.Config1 = addr
1015 | 		attr.Config2 = uint64(length)
1016 | 
1017 | 		return nil
1018 | 	})
1019 | }
1020 | 
1021 | // BreakpointType is the type of a breakpoint.
1022 | type BreakpointType uint32
1023 | 
1024 | // Breakpoint types. Values are |-ed together. The combination of
1025 | // BreakpointTypeR or BreakpointTypeW with BreakpointTypeX is invalid.
1026 | const (
1027 | 	BreakpointTypeEmpty BreakpointType = 0x0
1028 | 	BreakpointTypeR     BreakpointType = 0x1
1029 | 	BreakpointTypeW     BreakpointType = 0x2
1030 | 	BreakpointTypeRW    BreakpointType = BreakpointTypeR | BreakpointTypeW
1031 | 	BreakpointTypeX     BreakpointType = 0x4
1032 | )
1033 | 
1034 | // BreakpointLength is the length of the breakpoint being measured.
1035 | type BreakpointLength uint64
1036 | 
1037 | // Breakpoint length values.
1038 | const (
1039 | 	BreakpointLength1 BreakpointLength = 1
1040 | 	BreakpointLength2 BreakpointLength = 2
1041 | 	BreakpointLength4 BreakpointLength = 4
1042 | 	BreakpointLength8 BreakpointLength = 8
1043 | )
1044 | 
1045 | // ExecutionBreakpointLength returns the length of an execution breakpoint.
1046 | func ExecutionBreakpointLength() BreakpointLength {
1047 | 	// TODO(acln): is this correct? The man page says to set this to
1048 | 	// sizeof(long). Is sizeof(C long) == sizeof(Go uintptr) on all
1049 | 	// platforms of interest?
1050 | 	var x uintptr
1051 | 	return BreakpointLength(unsafe.Sizeof(x))
1052 | }
1053 | 
1054 | // ExecutionBreakpoint returns a Configurator for an execution breakpoint
1055 | // at the specified address.
1056 | func ExecutionBreakpoint(addr uint64) Configurator {
1057 | 	return Breakpoint(BreakpointTypeX, addr, ExecutionBreakpointLength())
1058 | }
1059 | 
1060 | // Options contains low level event configuration options.
1061 | type Options struct {
1062 | 	// Disabled disables the event by default. If the event is in a
1063 | 	// group, but not a group leader, this option has no effect, since
1064 | 	// the group leader controls when events are enabled or disabled.
1065 | 	Disabled bool
1066 | 
1067 | 	// Inherit specifies that this counter should count events of child
1068 | 	// tasks as well as the specified task. This only applies to new
1069 | 	// children, not to any existing children at the time the counter
1070 | 	// is created (nor to any new children of existing children).
1071 | 	//
1072 | 	// Inherit does not work with some combination of CountFormat options,
1073 | 	// such as CountFormat.Group.
1074 | 	Inherit bool
1075 | 
1076 | 	// Pinned specifies that the counter should always be on the CPU if
1077 | 	// possible. This bit applies only to hardware counters, and only
1078 | 	// to group leaders. If a pinned counter canno be put onto the CPU,
1079 | 	// then the counter goes into an error state, where reads return EOF,
1080 | 	// until it is subsequently enabled or disabled.
1081 | 	Pinned bool
1082 | 
1083 | 	// Exclusive specifies that when this counter's group is on the CPU,
1084 | 	// it should be the only group using the CPUs counters.
1085 | 	Exclusive bool
1086 | 
1087 | 	// ExcludeUser excludes events that happen in user space.
1088 | 	ExcludeUser bool
1089 | 
1090 | 	// ExcludeKernel excludes events that happen in kernel space.
1091 | 	ExcludeKernel bool
1092 | 
1093 | 	// ExcludeHypervisor excludes events that happen in the hypervisor.
1094 | 	ExcludeHypervisor bool
1095 | 
1096 | 	// ExcludeIdle disables counting while the CPU is idle.
1097 | 	ExcludeIdle bool
1098 | 
1099 | 	// The mmap bit enables generation of MmapRecord records for every
1100 | 	// mmap(2) call that has PROT_EXEC set.
1101 | 	Mmap bool
1102 | 
1103 | 	// Comm enables tracking of process command name, as modified by
1104 | 	// exec(2), prctl(PR_SET_NAME), as well as writing to /proc/self/comm.
1105 | 	// If CommExec is also set, then the CommRecord records produced
1106 | 	// can be queries using the WasExec method, to differentiate exec(2)
1107 | 	// from the other ases.
1108 | 	Comm bool
1109 | 
1110 | 	// Freq configures the event to use sample frequency, rather than
1111 | 	// sample period. See also Attr.Sample.
1112 | 	Freq bool
1113 | 
1114 | 	// InheritStat enables saving of event counts on context switch for
1115 | 	// inherited tasks. InheritStat is only meaningful if Inherit is
1116 | 	// also set.
1117 | 	InheritStat bool
1118 | 
1119 | 	// EnableOnExec configures the counter to be enabled automatically
1120 | 	// after a call to exec(2).
1121 | 	EnableOnExec bool
1122 | 
1123 | 	// Task configures the event to include fork/exit notifications in
1124 | 	// the ring buffer.
1125 | 	Task bool
1126 | 
1127 | 	// Watermark configures the ring buffer to issue an overflow
1128 | 	// notification when the Wakeup boundary is crossed. If not set,
1129 | 	// notifications happen after Wakeup samples. See also Attr.Wakeup.
1130 | 	Watermark bool
1131 | 
1132 | 	// PreciseIP controls the number of instructions between an event of
1133 | 	// interest happening and the kernel being able to stop and record
1134 | 	// the event.
1135 | 	PreciseIP Skid
1136 | 
1137 | 	// MmapData is the counterpart to Mmap. It enables generation of
1138 | 	// MmapRecord records for mmap(2) calls that do not have PROT_EXEC
1139 | 	// set.
1140 | 	MmapData bool
1141 | 
1142 | 	// SampleIDAll configures Tid, Time, ID, StreamID and CPU samples
1143 | 	// to be included in non-Sample records.
1144 | 	SampleIDAll bool
1145 | 
1146 | 	// ExcludeHost configures only events happening inside a guest
1147 | 	// instance (one that has executed a KVM_RUN ioctl) to be measured.
1148 | 	ExcludeHost bool
1149 | 
1150 | 	// ExcludeGuest is the opposite of ExcludeHost: it configures only
1151 | 	// events outside a guest instance to be measured.
1152 | 	ExcludeGuest bool
1153 | 
1154 | 	// ExcludeKernelCallchain excludes kernel callchains.
1155 | 	ExcludeKernelCallchain bool
1156 | 
1157 | 	// ExcludeUserCallchain excludes user callchains.
1158 | 	ExcludeUserCallchain bool
1159 | 
1160 | 	// Mmap2 configures mmap(2) events to include inode data.
1161 | 	Mmap2 bool
1162 | 
1163 | 	// CommExec allows the distinction between process renaming
1164 | 	// via exec(2) or via other means. See also Comm, and
1165 | 	// (*CommRecord).WasExec.
1166 | 	CommExec bool
1167 | 
1168 | 	// UseClockID allows selecting which internal linux clock to use
1169 | 	// when generating timestamps via the ClockID field.
1170 | 	UseClockID bool
1171 | 
1172 | 	// ContextSwitch enables the generation of SwitchRecord records,
1173 | 	// and SwitchCPUWideRecord records when sampling in CPU-wide mode.
1174 | 	ContextSwitch bool
1175 | 
1176 | 	// writeBackward configures the kernel to write to the memory
1177 | 	// mapped ring buffer backwards. This option is not supported by
1178 | 	// package perf at the moment.
1179 | 	writeBackward bool
1180 | 
1181 | 	// Namespaces enables the generation of NamespacesRecord records.
1182 | 	Namespaces bool
1183 | }
1184 | 
1185 | func (opt Options) marshal() uint64 {
1186 | 	fields := []bool{
1187 | 		opt.Disabled,
1188 | 		opt.Inherit,
1189 | 		opt.Pinned,
1190 | 		opt.Exclusive,
1191 | 		opt.ExcludeUser,
1192 | 		opt.ExcludeKernel,
1193 | 		opt.ExcludeHypervisor,
1194 | 		opt.ExcludeIdle,
1195 | 		opt.Mmap,
1196 | 		opt.Comm,
1197 | 		opt.Freq,
1198 | 		opt.InheritStat,
1199 | 		opt.EnableOnExec,
1200 | 		opt.Task,
1201 | 		opt.Watermark,
1202 | 		false, false, // 2 bits for skid constraint
1203 | 		opt.MmapData,
1204 | 		opt.SampleIDAll,
1205 | 		opt.ExcludeHost,
1206 | 		opt.ExcludeGuest,
1207 | 		opt.ExcludeKernelCallchain,
1208 | 		opt.ExcludeUserCallchain,
1209 | 		opt.Mmap2,
1210 | 		opt.CommExec,
1211 | 		opt.UseClockID,
1212 | 		opt.ContextSwitch,
1213 | 		opt.writeBackward,
1214 | 		opt.Namespaces,
1215 | 	}
1216 | 	val := marshalBitwiseUint64(fields)
1217 | 
1218 | 	const (
1219 | 		skidlsb = 15
1220 | 		skidmsb = 16
1221 | 	)
1222 | 	if opt.PreciseIP&0x01 != 0 {
1223 | 		val |= 1 << skidlsb
1224 | 	}
1225 | 	if opt.PreciseIP&0x10 != 0 {
1226 | 		val |= 1 << skidmsb
1227 | 	}
1228 | 
1229 | 	return val
1230 | }
1231 | 
1232 | // Supported returns a boolean indicating whether the host kernel supports
1233 | // the perf_event_open system call, which is a prerequisite for the operations
1234 | // of this package.
1235 | //
1236 | // Supported checks for the existence of a /proc/sys/kernel/perf_event_paranoid
1237 | // file, which is the canonical method for determining if a kernel supports
1238 | // perf_event_open(2).
1239 | func Supported() bool {
1240 | 	_, err := os.Stat("/proc/sys/kernel/perf_event_paranoid")
1241 | 	return err == nil
1242 | }
1243 | 
1244 | // MaxStack returns the maximum number of frame pointers in a recorded
1245 | // callchain. It reads the value from /proc/sys/kernel/perf_event_max_stack.
1246 | func MaxStack() (uint16, error) {
1247 | 	max, err := readUint("/proc/sys/kernel/perf_event_max_stack", 16)
1248 | 	return uint16(max), err
1249 | }
1250 | 
1251 | // fields is a collection of 32-bit or 64-bit fields.
1252 | type fields []byte
1253 | 
1254 | // uint64 decodes the next 64 bit field into v.
1255 | func (f *fields) uint64(v *uint64) {
1256 | 	*v = *(*uint64)(unsafe.Pointer(&(*f)[0]))
1257 | 	f.advance(8)
1258 | }
1259 | 
1260 | // uint64Cond decodes the next 64 bit field into v, if cond is true.
1261 | func (f *fields) uint64Cond(cond bool, v *uint64) {
1262 | 	if cond {
1263 | 		f.uint64(v)
1264 | 	}
1265 | }
1266 | 
1267 | // uint32 decodes a pair of uint32s into a and b.
1268 | func (f *fields) uint32(a, b *uint32) {
1269 | 	*a = *(*uint32)(unsafe.Pointer(&(*f)[0]))
1270 | 	*b = *(*uint32)(unsafe.Pointer(&(*f)[4]))
1271 | 	f.advance(8)
1272 | }
1273 | 
1274 | // uint32 decodes a pair of uint32s into a and b, if cond is true.
1275 | func (f *fields) uint32Cond(cond bool, a, b *uint32) {
1276 | 	if cond {
1277 | 		f.uint32(a, b)
1278 | 	}
1279 | }
1280 | 
1281 | func (f *fields) uint32sizeBytes(b *[]byte) {
1282 | 	size := *(*uint32)(unsafe.Pointer(&(*f)[0]))
1283 | 	f.advance(4)
1284 | 	data := make([]byte, size)
1285 | 	copy(data, *f)
1286 | 	f.advance(int(size))
1287 | 	*b = data
1288 | }
1289 | 
1290 | func (f *fields) uint64sizeBytes(b *[]byte) {
1291 | 	size := *(*uint64)(unsafe.Pointer(&(*f)[0]))
1292 | 	f.advance(8)
1293 | 	data := make([]byte, size)
1294 | 	copy(data, *f)
1295 | 	f.advance(int(size))
1296 | 	*b = data
1297 | }
1298 | 
1299 | // duration decodes a duration into d.
1300 | func (f *fields) duration(d *time.Duration) {
1301 | 	*d = *(*time.Duration)(unsafe.Pointer(&(*f)[0]))
1302 | 	f.advance(8)
1303 | }
1304 | 
1305 | // string decodes a null-terminated string into s. The null terminator
1306 | // is not included in the string written to s.
1307 | func (f *fields) string(s *string) {
1308 | 	for i := 0; i < len(*f); i++ {
1309 | 		if (*f)[i] == 0 {
1310 | 			*s = string((*f)[:i])
1311 | 			if i+1 <= len(*f) {
1312 | 				f.advance(i + 1)
1313 | 			}
1314 | 			return
1315 | 		}
1316 | 	}
1317 | }
1318 | 
1319 | // id decodes a SampleID based on the SampleFormat event was configured with,
1320 | // if cond is true.
1321 | func (f *fields) idCond(cond bool, id *SampleID, sfmt SampleFormat) {
1322 | 	if !cond {
1323 | 		return
1324 | 	}
1325 | 	f.uint32Cond(sfmt.Tid, &id.Pid, &id.Tid)
1326 | 	f.uint64Cond(sfmt.Time, &id.Time)
1327 | 	f.uint64Cond(sfmt.ID, &id.ID)
1328 | 	f.uint64Cond(sfmt.StreamID, &id.StreamID)
1329 | 	var reserved uint32
1330 | 	f.uint32Cond(sfmt.CPU, &id.CPU, &reserved)
1331 | 	f.uint64Cond(sfmt.Identifier, &id.Identifier)
1332 | }
1333 | 
1334 | // count decodes a Count into c.
1335 | func (f *fields) count(c *Count, cfmt CountFormat) {
1336 | 	f.uint64(&c.Value)
1337 | 	if cfmt.Enabled {
1338 | 		f.duration(&c.Enabled)
1339 | 	}
1340 | 	if cfmt.Running {
1341 | 		f.duration(&c.Running)
1342 | 	}
1343 | 	f.uint64Cond(cfmt.ID, &c.ID)
1344 | }
1345 | 
1346 | // groupCount decodes a GroupCount into gc.
1347 | func (f *fields) groupCount(gc *GroupCount, cfmt CountFormat) {
1348 | 	var nr uint64
1349 | 	f.uint64(&nr)
1350 | 	if cfmt.Enabled {
1351 | 		f.duration(&gc.Enabled)
1352 | 	}
1353 | 	if cfmt.Running {
1354 | 		f.duration(&gc.Running)
1355 | 	}
1356 | 	gc.Values = make([]struct {
1357 | 		Value, ID uint64
1358 | 		Label     string
1359 | 	}, nr)
1360 | 	for i := 0; i < int(nr); i++ {
1361 | 		f.uint64(&gc.Values[i].Value)
1362 | 		f.uint64Cond(cfmt.ID, &gc.Values[i].ID)
1363 | 	}
1364 | }
1365 | 
1366 | // advance advances through the fields by n bytes.
1367 | func (f *fields) advance(n int) {
1368 | 	*f = (*f)[n:]
1369 | }
1370 | 
1371 | // marshalBitwiseUint64 marshals a set of bitwise flags into a
1372 | // uint64, LSB first.
1373 | func marshalBitwiseUint64(fields []bool) uint64 {
1374 | 	var res uint64
1375 | 	for shift, set := range fields {
1376 | 		if set {
1377 | 			res |= 1 << uint(shift)
1378 | 		}
1379 | 	}
1380 | 	return res
1381 | }
1382 | 
1383 | // readUint reads an unsigned integer from the specified sys file.
1384 | // If readUint does not return an error, the returned integer is
1385 | // guaranteed to fit in the specified number of bits.
1386 | func readUint(sysfile string, bits int) (uint64, error) {
1387 | 	content, err := ioutil.ReadFile(sysfile)
1388 | 	if err != nil {
1389 | 		return 0, err
1390 | 	}
1391 | 	content = bytes.TrimSpace(content)
1392 | 	return strconv.ParseUint(string(content), 10, bits)
1393 | }
1394 | 
1395 | type eventLabel struct {
1396 | 	Name, Alias string
1397 | }
1398 | 
1399 | func (el eventLabel) String() string {
1400 | 	if el.Name == "" {
1401 | 		return "unknown"
1402 | 	}
1403 | 	if el.Alias != "" {
1404 | 		return fmt.Sprintf("%s OR %s", el.Name, el.Alias)
1405 | 	}
1406 | 	return el.Name
1407 | }
1408 | 
1409 | type eventID struct {
1410 | 	Type, Config uint64
1411 | }
1412 | 
1413 | var eventLabels sync.Map // of eventID to eventLabel
1414 | 
1415 | func init() {
1416 | 	type labeler interface {
1417 | 		eventLabel() eventLabel
1418 | 	}
1419 | 
1420 | 	var events []Configurator
1421 | 	events = append(events, AllHardwareCounters()...)
1422 | 	events = append(events, AllSoftwareCounters()...)
1423 | 
1424 | 	for _, cfg := range events {
1425 | 		if l, ok := cfg.(labeler); ok {
1426 | 			var a Attr
1427 | 			cfg.Configure(&a)
1428 | 			id := eventID{Type: uint64(a.Type), Config: a.Config}
1429 | 			label := l.eventLabel()
1430 | 			eventLabels.Store(id, label)
1431 | 		}
1432 | 	}
1433 | }
1434 | 
1435 | func lookupLabel(id eventID) eventLabel {
1436 | 	v, ok := eventLabels.Load(id)
1437 | 	if ok {
1438 | 		return v.(eventLabel)
1439 | 	}
1440 | 	label := lookupLabelInSysfs(id)
1441 | 	eventLabels.Store(id, label)
1442 | 	return label
1443 | }
1444 | 
1445 | func lookupLabelInSysfs(id eventID) eventLabel {
1446 | 	return eventLabel{}
1447 | }
1448 | 
1449 | // BUG(acln): generic Attr.Label lookup is not implemented
1450 | 


--------------------------------------------------------------------------------
/record.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2019 The Go Authors. All rights reserved.
   2 | // Use of this source code is governed by a BSD-style
   3 | // license that can be found in the LICENSE file.
   4 | 
   5 | // +build linux
   6 | 
   7 | package perf
   8 | 
   9 | import (
  10 | 	"context"
  11 | 	"errors"
  12 | 	"fmt"
  13 | 	"math/bits"
  14 | 	"os"
  15 | 	"sync/atomic"
  16 | 	"time"
  17 | 	"unsafe"
  18 | 
  19 | 	"golang.org/x/sys/unix"
  20 | )
  21 | 
  22 | // ErrDisabled is returned from ReadRecord and ReadRawRecord if the event
  23 | // being monitored is attached to a different process, and that process
  24 | // exits. (since Linux 3.18)
  25 | var ErrDisabled = errors.New("perf: event disabled")
  26 | 
  27 | // ErrNoReadRecord is returned by ReadRecord when it is disabled on a
  28 | // group event, due to different configurations of the leader and follower
  29 | // events. See also (*Event).SetOutput.
  30 | var ErrNoReadRecord = errors.New("perf: ReadRecord disabled")
  31 | 
  32 | // ErrBadRecord is returned by ReadRecord when a read record can't be decoded.
  33 | var ErrBadRecord = errors.New("bad record received")
  34 | 
  35 | // ReadRecord reads and decodes a record from the ring buffer associated
  36 | // with ev.
  37 | //
  38 | // ReadRecord may be called concurrently with ReadCount or ReadGroupCount,
  39 | // but not concurrently with itself, ReadRawRecord, Close, or any other
  40 | // Event method.
  41 | //
  42 | // If another event's records were routed to ev via SetOutput, and the
  43 | // two events did not have compatible SampleFormat Options settings (see
  44 | // SetOutput documentation), ReadRecord returns ErrNoReadRecord.
  45 | func (ev *Event) ReadRecord(ctx context.Context) (Record, error) {
  46 | 	if err := ev.ok(); err != nil {
  47 | 		return nil, err
  48 | 	}
  49 | 	if ev.noReadRecord {
  50 | 		return nil, ErrNoReadRecord
  51 | 	}
  52 | 	var raw RawRecord
  53 | 	if err := ev.ReadRawRecord(ctx, &raw); err != nil {
  54 | 		return nil, err
  55 | 	}
  56 | 	rec, err := newRecord(ev, raw.Header.Type)
  57 | 	if err != nil {
  58 | 		return nil, err
  59 | 	}
  60 | 	if err := rec.DecodeFrom(&raw, ev); err != nil {
  61 | 		return nil, err
  62 | 	}
  63 | 	return rec, nil
  64 | }
  65 | 
  66 | // ReadRawRecord reads and decodes a raw record from the ring buffer
  67 | // associated with ev into rec. Callers must not retain rec.Data.
  68 | //
  69 | // ReadRawRecord may be called concurrently with ReadCount or ReadGroupCount,
  70 | // but not concurrently with itself, ReadRecord, Close or any other Event
  71 | // method.
  72 | func (ev *Event) ReadRawRecord(ctx context.Context, raw *RawRecord) error {
  73 | 	if err := ev.ok(); err != nil {
  74 | 		return err
  75 | 	}
  76 | 	if ev.ring == nil {
  77 | 		return errors.New("perf: event ring not mapped")
  78 | 	}
  79 | 
  80 | 	// Fast path: try reading from the ring buffer first. If there is
  81 | 	// a record there, we are done.
  82 | 	if ev.readRawRecordNonblock(raw) {
  83 | 		return nil
  84 | 	}
  85 | 
  86 | 	// If the context has a deadline, and that deadline is in the future,
  87 | 	// use it to compute a timeout for ppoll(2). If the context is
  88 | 	// expired, bail out immediately. Otherwise, the timeout is zero,
  89 | 	// which means no timeout.
  90 | 	var timeout time.Duration
  91 | 	deadline, ok := ctx.Deadline()
  92 | 	if ok {
  93 | 		timeout = time.Until(deadline)
  94 | 		if timeout <= 0 {
  95 | 			<-ctx.Done()
  96 | 			return ctx.Err()
  97 | 		}
  98 | 	}
  99 | 
 100 | 	// Start a round of polling, then await results. Only one request
 101 | 	// can be in flight at a time, and the whole request-response cycle
 102 | 	// is owned by the current invocation of ReadRawRecord.
 103 | again:
 104 | 	ev.pollreq <- pollreq{timeout: timeout}
 105 | 	select {
 106 | 	case <-ctx.Done():
 107 | 		active := false
 108 | 		err := ctx.Err()
 109 | 		if err == context.Canceled {
 110 | 			// Initiate active wakeup on ev.wakeupfd, and wait for
 111 | 			// doPoll to return. doPoll might miss this signal,
 112 | 			// but that's okay: see below.
 113 | 			val := uint64(1)
 114 | 			buf := (*[8]byte)(unsafe.Pointer(&val))[:]
 115 | 			unix.Write(ev.wakeupfd, buf)
 116 | 			active = true
 117 | 		}
 118 | 		<-ev.pollresp
 119 | 
 120 | 		// We don't know if doPoll woke up due to our active wakeup
 121 | 		// or because it timed out. It doesn't make a difference.
 122 | 		// The important detail here is that doPoll does not touch
 123 | 		// ev.wakeupfd (besides polling it for readiness). If we
 124 | 		// initiated active wakeup, we must restore the event file
 125 | 		// descriptor to quiescent state ourselves, in order to avoid
 126 | 		// a spurious wakeup during the next round of polling.
 127 | 		if active {
 128 | 			var buf [8]byte
 129 | 			unix.Read(ev.wakeupfd, buf[:])
 130 | 		}
 131 | 		return err
 132 | 	case resp := <-ev.pollresp:
 133 | 		if resp.err != nil {
 134 | 			// Polling failed. Nothing to do but report the error.
 135 | 			return resp.err
 136 | 		}
 137 | 		if resp.perfhup {
 138 | 			// Saw POLLHUP on ev.perffd. See also the
 139 | 			// documentation for ErrDisabled.
 140 | 			return ErrDisabled
 141 | 		}
 142 | 		if !resp.perfready {
 143 | 			// Here, we have not touched ev.wakeupfd, there
 144 | 			// was no polling error, and ev.perffd is not
 145 | 			// ready. Therefore, ppoll(2) must have timed out.
 146 | 			//
 147 | 			// The reason we are here is the following: doPoll
 148 | 			// woke up, and immediately sent us a pollresp, which
 149 | 			// won the race with <-ctx.Done(), such that this
 150 | 			// select case fired. In any case, ctx is expired,
 151 | 			// because we wouldn't be here otherwise.
 152 | 			<-ctx.Done()
 153 | 			return ctx.Err()
 154 | 		}
 155 | 		if !ev.readRawRecordNonblock(raw) {
 156 | 			// It might happen that an overflow notification was
 157 | 			// generated on the file descriptor, we observed it
 158 | 			// as POLLIN, but there is still nothing new for us
 159 | 			// to read in the ring buffer.
 160 | 			//
 161 | 			// This is because the notification is raised based
 162 | 			// on the Attr.Wakeup and Attr.Options.Watermark
 163 | 			// settings, rather than based on what events we've
 164 | 			// seen already.
 165 | 			//
 166 | 			// For example, for an event with Attr.Wakeup == 1,
 167 | 			// POLLIN will be indicated on the file descriptor
 168 | 			// after the first event, regardless of whether we
 169 | 			// have consumed it from the ring buffer or not.
 170 | 			//
 171 | 			// If we happen to see POLLIN with an empty ring
 172 | 			// buffer, the only thing to do is to wait again.
 173 | 			//
 174 | 			// See also https://github.com/acln0/perfwakeup.
 175 | 			goto again
 176 | 		}
 177 | 		return nil
 178 | 	}
 179 | }
 180 | 
 181 | // HasRecord returns if there is a record available to be read from the ring.
 182 | func (ev *Event) HasRecord() bool {
 183 | 	return atomic.LoadUint64(&ev.meta.Data_head) != atomic.LoadUint64(&ev.meta.Data_tail)
 184 | }
 185 | 
 186 | // resetRing advances the read pointer to the write pointer to discard all the
 187 | // data in the ring. This is done when bogus data is read from the ring.
 188 | func (ev *Event) resetRing() {
 189 | 	atomic.StoreUint64(&ev.meta.Data_tail, atomic.LoadUint64(&ev.meta.Data_head))
 190 | }
 191 | 
 192 | // readRawRecordNonblock reads a raw record into rec, if one is available.
 193 | // Callers must not retain rec.Data. The boolean return value signals whether
 194 | // a record was actually found / written to rec.
 195 | func (ev *Event) readRawRecordNonblock(raw *RawRecord) bool {
 196 | 	head := atomic.LoadUint64(&ev.meta.Data_head)
 197 | 	tail := atomic.LoadUint64(&ev.meta.Data_tail)
 198 | 	if head == tail {
 199 | 		return false
 200 | 	}
 201 | 
 202 | 	// Make sure there is enough space the read a record header. Otherwise
 203 | 	// consider the ring to be corrupted.
 204 | 	const headerSize = uint64(unsafe.Sizeof(RecordHeader{}))
 205 | 	avail := head - tail
 206 | 	if avail < headerSize {
 207 | 		ev.resetRing()
 208 | 		return false
 209 | 	}
 210 | 
 211 | 	// Head and tail values only ever grow, so we must take their value
 212 | 	// modulo the size of the data segment of the ring.
 213 | 	start := tail % uint64(len(ev.ringdata))
 214 | 	raw.Header = *(*RecordHeader)(unsafe.Pointer(&ev.ringdata[start]))
 215 | 	end := (tail + uint64(raw.Header.Size)) % uint64(len(ev.ringdata))
 216 | 
 217 | 	// Make sure there is enough space available to read the whole record.
 218 | 	// Otherwise treat the ring as corrupted.
 219 | 	msgLen := uint64(raw.Header.Size)
 220 | 	if avail < msgLen || msgLen < headerSize {
 221 | 		ev.resetRing()
 222 | 		return false
 223 | 	}
 224 | 
 225 | 	// Reserve space to store this record out of the ring.
 226 | 	if uint64(len(ev.recordBuffer)) < msgLen {
 227 | 		ev.recordBuffer = make([]byte, msgLen)
 228 | 	}
 229 | 	// If the record wraps around the ring, we must allocate storage,
 230 | 	// so that we can return a contiguous area of memory to the caller.
 231 | 	if end < start {
 232 | 		n := copy(ev.recordBuffer, ev.ringdata[start:])
 233 | 		copy(ev.recordBuffer[n:], ev.ringdata[:int(raw.Header.Size)-n])
 234 | 	} else {
 235 | 		copy(ev.recordBuffer, ev.ringdata[start:end])
 236 | 	}
 237 | 	raw.Data = ev.recordBuffer[unsafe.Sizeof(raw.Header):msgLen]
 238 | 
 239 | 	// Notify the kernel of the last record we've seen.
 240 | 	atomic.AddUint64(&ev.meta.Data_tail, msgLen)
 241 | 	return true
 242 | }
 243 | 
 244 | // poll services requests from ev.pollreq and sends responses on ev.pollresp.
 245 | func (ev *Event) poll() {
 246 | 	defer close(ev.pollresp)
 247 | 
 248 | 	for req := range ev.pollreq {
 249 | 		ev.pollresp <- ev.doPoll(req)
 250 | 	}
 251 | }
 252 | 
 253 | // doPoll executes one round of polling on ev.perffd and ev.wakeupfd.
 254 | //
 255 | // A req.timeout value of zero is interpreted as "no timeout". req.timeout
 256 | // must not be negative.
 257 | func (ev *Event) doPoll(req pollreq) pollresp {
 258 | 	var timeout *unix.Timespec
 259 | 	if req.timeout > 0 {
 260 | 		ts := unix.NsecToTimespec(req.timeout.Nanoseconds())
 261 | 		timeout = &ts
 262 | 	}
 263 | 
 264 | 	pollfds := []unix.PollFd{
 265 | 		{Fd: int32(ev.perffd), Events: unix.POLLIN},
 266 | 		{Fd: int32(ev.wakeupfd), Events: unix.POLLIN},
 267 | 	}
 268 | 
 269 | again:
 270 | 	_, err := unix.Ppoll(pollfds, timeout, nil)
 271 | 	// TODO(acln): do we need to do this business at all? See #20400.
 272 | 	if err == unix.EINTR {
 273 | 		goto again
 274 | 	}
 275 | 
 276 | 	// If we are here and we have successfully woken up, it is for one
 277 | 	// of four reasons: we got POLLIN on ev.perffd, we got POLLHUP on
 278 | 	// ev.perffd (see ErrDisabled), the ppoll(2) timeout fired, or we
 279 | 	// got POLLIN on ev.wakeupfd.
 280 | 	//
 281 | 	// Report if the perf fd is ready, if we saw POLLHUP, and any
 282 | 	// errors except EINTR. The machinery is documented in more detail
 283 | 	// in ReadRawRecord.
 284 | 	return pollresp{
 285 | 		perfready: pollfds[0].Revents&unix.POLLIN != 0,
 286 | 		perfhup:   pollfds[0].Revents&unix.POLLHUP != 0,
 287 | 		err:       os.NewSyscallError("ppoll", err),
 288 | 	}
 289 | }
 290 | 
 291 | type pollreq struct {
 292 | 	// timeout is the timeout for ppoll(2): zero means no timeout
 293 | 	timeout time.Duration
 294 | }
 295 | 
 296 | type pollresp struct {
 297 | 	// perfready indicates if the perf FD (ev.perffd) is ready.
 298 | 	perfready bool
 299 | 
 300 | 	// perfhup indicates if POLLUP was observed on ev.perffd.
 301 | 	perfhup bool
 302 | 
 303 | 	// err is the *os.SyscallError from ppoll(2).
 304 | 	err error
 305 | }
 306 | 
 307 | // SampleFormat configures information requested in overflow packets.
 308 | type SampleFormat struct {
 309 | 	// IP records the instruction pointer.
 310 | 	IP bool
 311 | 
 312 | 	// Tid records process and thread IDs.
 313 | 	Tid bool
 314 | 
 315 | 	// Time records a hardware timestamp.
 316 | 	Time bool
 317 | 
 318 | 	// Addr records an address, if applicable.
 319 | 	Addr bool
 320 | 
 321 | 	// Count records counter values for all events in a group, not just
 322 | 	// the group leader.
 323 | 	Count bool
 324 | 
 325 | 	// Callchain records the stack backtrace.
 326 | 	Callchain bool
 327 | 
 328 | 	// ID records a unique ID for the opened event's group leader.
 329 | 	ID bool
 330 | 
 331 | 	// CPU records the CPU number.
 332 | 	CPU bool
 333 | 
 334 | 	// Period records the current sampling period.
 335 | 	Period bool
 336 | 
 337 | 	// StreamID returns a unique ID for the opened event. Unlike ID,
 338 | 	// the actual ID is returned, not the group ID.
 339 | 	StreamID bool
 340 | 
 341 | 	// Raw records additional data, if applicable. Usually returned by
 342 | 	// tracepoint events.
 343 | 	Raw bool
 344 | 
 345 | 	// BranchStack provides a record of recent branches, as provided by
 346 | 	// CPU branch sampling hardware. See also Attr.BranchSampleFormat.
 347 | 	BranchStack bool
 348 | 
 349 | 	// UserRegisters records the current user-level CPU state (the
 350 | 	// values in the process before the kernel was called). See also
 351 | 	// Attr.SampleRegistersUser.
 352 | 	UserRegisters bool
 353 | 
 354 | 	// UserStack records the user level stack, allowing stack unwinding.
 355 | 	UserStack bool
 356 | 
 357 | 	// Weight records a hardware provided weight value that expresses
 358 | 	// how costly the sampled event was.
 359 | 	Weight bool
 360 | 
 361 | 	// DataSource records the data source: where in the memory hierarchy
 362 | 	// the data associated with the sampled instruction came from.
 363 | 	DataSource bool
 364 | 
 365 | 	// Identifier places the ID value in a fixed position in the record.
 366 | 	Identifier bool
 367 | 
 368 | 	// Transaction records reasons for transactional memory abort events.
 369 | 	Transaction bool
 370 | 
 371 | 	// IntrRegisters Records a subset of the current CPU register state.
 372 | 	// Unlike UserRegisters, the registers will return kernel register
 373 | 	// state if the overflow happened while kernel code is running. See
 374 | 	// also Attr.SampleRegistersIntr.
 375 | 	IntrRegisters bool
 376 | 
 377 | 	PhysicalAddress bool
 378 | }
 379 | 
 380 | // TODO(acln): document SampleFormat.PhysicalAddress
 381 | 
 382 | // marshal packs the SampleFormat into a uint64.
 383 | func (sf SampleFormat) marshal() uint64 {
 384 | 	// Always keep this in sync with the type definition above.
 385 | 	fields := []bool{
 386 | 		sf.IP,
 387 | 		sf.Tid,
 388 | 		sf.Time,
 389 | 		sf.Addr,
 390 | 		sf.Count,
 391 | 		sf.Callchain,
 392 | 		sf.ID,
 393 | 		sf.CPU,
 394 | 		sf.Period,
 395 | 		sf.StreamID,
 396 | 		sf.Raw,
 397 | 		sf.BranchStack,
 398 | 		sf.UserRegisters,
 399 | 		sf.UserStack,
 400 | 		sf.Weight,
 401 | 		sf.DataSource,
 402 | 		sf.Identifier,
 403 | 		sf.Transaction,
 404 | 		sf.IntrRegisters,
 405 | 		sf.PhysicalAddress,
 406 | 	}
 407 | 	return marshalBitwiseUint64(fields)
 408 | }
 409 | 
 410 | // SampleID contains identifiers for when and where a record was collected.
 411 | //
 412 | // A SampleID is included in a Record if Options.SampleIDAll is set on the
 413 | // associated event. Fields are set according to SampleFormat options.
 414 | type SampleID struct {
 415 | 	Pid        uint32
 416 | 	Tid        uint32
 417 | 	Time       uint64
 418 | 	ID         uint64
 419 | 	StreamID   uint64
 420 | 	CPU        uint32
 421 | 	_          uint32 // reserved
 422 | 	Identifier uint64
 423 | }
 424 | 
 425 | // Record is the interface implemented by all record types.
 426 | type Record interface {
 427 | 	Header() RecordHeader
 428 | 	DecodeFrom(*RawRecord, *Event) error
 429 | }
 430 | 
 431 | // RecordType is the type of an overflow record.
 432 | type RecordType uint32
 433 | 
 434 | // Known record types.
 435 | const (
 436 | 	RecordTypeMmap          RecordType = unix.PERF_RECORD_MMAP
 437 | 	RecordTypeLost          RecordType = unix.PERF_RECORD_LOST
 438 | 	RecordTypeComm          RecordType = unix.PERF_RECORD_COMM
 439 | 	RecordTypeExit          RecordType = unix.PERF_RECORD_EXIT
 440 | 	RecordTypeThrottle      RecordType = unix.PERF_RECORD_THROTTLE
 441 | 	RecordTypeUnthrottle    RecordType = unix.PERF_RECORD_UNTHROTTLE
 442 | 	RecordTypeFork          RecordType = unix.PERF_RECORD_FORK
 443 | 	RecordTypeRead          RecordType = unix.PERF_RECORD_READ
 444 | 	RecordTypeSample        RecordType = unix.PERF_RECORD_SAMPLE
 445 | 	RecordTypeMmap2         RecordType = unix.PERF_RECORD_MMAP2
 446 | 	RecordTypeAux           RecordType = unix.PERF_RECORD_AUX
 447 | 	RecordTypeItraceStart   RecordType = unix.PERF_RECORD_ITRACE_START
 448 | 	RecordTypeLostSamples   RecordType = unix.PERF_RECORD_LOST_SAMPLES
 449 | 	RecordTypeSwitch        RecordType = unix.PERF_RECORD_SWITCH
 450 | 	RecordTypeSwitchCPUWide RecordType = unix.PERF_RECORD_SWITCH_CPU_WIDE
 451 | 	RecordTypeNamespaces    RecordType = unix.PERF_RECORD_NAMESPACES
 452 | )
 453 | 
 454 | func (rt RecordType) known() bool {
 455 | 	return rt >= RecordTypeMmap && rt <= RecordTypeNamespaces
 456 | }
 457 | 
 458 | // RecordHeader is the header present in every overflow record.
 459 | type RecordHeader struct {
 460 | 	Type RecordType
 461 | 	Misc uint16
 462 | 	Size uint16
 463 | }
 464 | 
 465 | // Header returns rh itself, so that types which embed a RecordHeader
 466 | // automatically implement a part of the Record interface.
 467 | func (rh RecordHeader) Header() RecordHeader { return rh }
 468 | 
 469 | // CPUMode returns the CPU mode in use when the sample happened.
 470 | func (rh RecordHeader) CPUMode() CPUMode {
 471 | 	return CPUMode(rh.Misc & cpuModeMask)
 472 | }
 473 | 
 474 | // CPUMode is a CPU operation mode.
 475 | type CPUMode uint8
 476 | 
 477 | const cpuModeMask = 7
 478 | 
 479 | // Known CPU modes.
 480 | const (
 481 | 	UnknownMode CPUMode = iota
 482 | 	KernelMode
 483 | 	UserMode
 484 | 	HypervisorMode
 485 | 	GuestKernelMode
 486 | 	GuestUserMode
 487 | )
 488 | 
 489 | // RawRecord is a raw overflow record, read from the memory mapped ring
 490 | // buffer associated with an Event.
 491 | //
 492 | // Header is the 8 byte record header. Data contains the rest of the record.
 493 | type RawRecord struct {
 494 | 	Header RecordHeader
 495 | 	Data   []byte
 496 | }
 497 | 
 498 | func (raw RawRecord) fields() fields { return fields(raw.Data) }
 499 | 
 500 | var newRecordFuncs = [...]func(ev *Event) Record{
 501 | 	RecordTypeMmap:          func(_ *Event) Record { return &MmapRecord{} },
 502 | 	RecordTypeLost:          func(_ *Event) Record { return &LostRecord{} },
 503 | 	RecordTypeComm:          func(_ *Event) Record { return &CommRecord{} },
 504 | 	RecordTypeExit:          func(_ *Event) Record { return &ExitRecord{} },
 505 | 	RecordTypeThrottle:      func(_ *Event) Record { return &ThrottleRecord{} },
 506 | 	RecordTypeUnthrottle:    func(_ *Event) Record { return &UnthrottleRecord{} },
 507 | 	RecordTypeFork:          func(_ *Event) Record { return &ForkRecord{} },
 508 | 	RecordTypeRead:          newReadRecord,
 509 | 	RecordTypeSample:        newSampleRecord,
 510 | 	RecordTypeMmap2:         func(_ *Event) Record { return &Mmap2Record{} },
 511 | 	RecordTypeAux:           func(_ *Event) Record { return &AuxRecord{} },
 512 | 	RecordTypeItraceStart:   func(_ *Event) Record { return &ItraceStartRecord{} },
 513 | 	RecordTypeLostSamples:   func(_ *Event) Record { return &LostSamplesRecord{} },
 514 | 	RecordTypeSwitch:        func(_ *Event) Record { return &SwitchRecord{} },
 515 | 	RecordTypeSwitchCPUWide: func(_ *Event) Record { return &SwitchCPUWideRecord{} },
 516 | 	RecordTypeNamespaces:    func(_ *Event) Record { return &NamespacesRecord{} },
 517 | }
 518 | 
 519 | func newReadRecord(ev *Event) Record {
 520 | 	if ev.a.CountFormat.Group {
 521 | 		return &ReadGroupRecord{}
 522 | 	}
 523 | 	return &ReadRecord{}
 524 | }
 525 | 
 526 | func newSampleRecord(ev *Event) Record {
 527 | 	if ev.a.CountFormat.Group {
 528 | 		return &SampleGroupRecord{}
 529 | 	}
 530 | 	return &SampleRecord{}
 531 | }
 532 | 
 533 | // newRecord returns an empty Record of the given type, tailored for the
 534 | // specified Event.
 535 | func newRecord(ev *Event, rt RecordType) (Record, error) {
 536 | 	if !rt.known() {
 537 | 		return nil, fmt.Errorf("unknown record type %d", rt)
 538 | 	}
 539 | 	return newRecordFuncs[rt](ev), nil
 540 | }
 541 | 
 542 | // mmapDataBit is PERF_RECORD_MISC_MMAP_DATA
 543 | const mmapDataBit = 1 << 13
 544 | 
 545 | // MmapRecord (PERF_RECORD_MMAP) records PROT_EXEC mappings such that
 546 | // user-space IPs can be correlated to code.
 547 | type MmapRecord struct {
 548 | 	RecordHeader
 549 | 	Pid        uint32 // process ID
 550 | 	Tid        uint32 // thread ID
 551 | 	Addr       uint64 // address of the allocated memory
 552 | 	Len        uint64 // length of the allocated memory
 553 | 	PageOffset uint64 // page offset of the allocated memory
 554 | 	Filename   string // describes backing of allocated memory
 555 | 	SampleID
 556 | }
 557 | 
 558 | // DecodeFrom implements the Record.DecodeFrom method.
 559 | func (mr *MmapRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 560 | 	mr.RecordHeader = raw.Header
 561 | 	f := raw.fields()
 562 | 	f.uint32(&mr.Pid, &mr.Tid)
 563 | 	f.uint64(&mr.Addr)
 564 | 	f.uint64(&mr.Len)
 565 | 	f.uint64(&mr.PageOffset)
 566 | 	f.string(&mr.Filename)
 567 | 	f.idCond(ev.a.Options.SampleIDAll, &mr.SampleID, ev.a.SampleFormat)
 568 | 	return nil
 569 | }
 570 | 
 571 | // Executable returns a boolean indicating whether the mapping is executable.
 572 | func (mr *MmapRecord) Executable() bool {
 573 | 	// The data bit is set when the mapping is _not_ executable.
 574 | 	return mr.RecordHeader.Misc&mmapDataBit == 0
 575 | }
 576 | 
 577 | // LostRecord (PERF_RECORD_LOST) indicates when events are lost.
 578 | type LostRecord struct {
 579 | 	RecordHeader
 580 | 	ID   uint64 // the unique ID for the lost events
 581 | 	Lost uint64 // the number of lost events
 582 | 	SampleID
 583 | }
 584 | 
 585 | // DecodeFrom implements the Record.DecodeFrom method.
 586 | func (lr *LostRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 587 | 	lr.RecordHeader = raw.Header
 588 | 	f := raw.fields()
 589 | 	f.uint64(&lr.ID)
 590 | 	f.uint64(&lr.Lost)
 591 | 	f.idCond(ev.a.Options.SampleIDAll, &lr.SampleID, ev.a.SampleFormat)
 592 | 	return nil
 593 | }
 594 | 
 595 | // CommRecord (PERF_RECORD_COMM) indicates a change in the process name.
 596 | type CommRecord struct {
 597 | 	RecordHeader
 598 | 	Pid     uint32 // process ID
 599 | 	Tid     uint32 // threadID
 600 | 	NewName string // the new name of the process
 601 | 	SampleID
 602 | }
 603 | 
 604 | // DecodeFrom implements the Record.DecodeFrom method.
 605 | func (cr *CommRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 606 | 	cr.RecordHeader = raw.Header
 607 | 	f := raw.fields()
 608 | 	f.uint32(&cr.Pid, &cr.Tid)
 609 | 	f.string(&cr.NewName)
 610 | 	f.idCond(ev.a.Options.SampleIDAll, &cr.SampleID, ev.a.SampleFormat)
 611 | 	return nil
 612 | }
 613 | 
 614 | // commExecBit is PERF_RECORD_MISC_COMM_EXEC
 615 | const commExecBit = 1 << 13
 616 | 
 617 | // WasExec returns a boolean indicating whether a process name change
 618 | // was caused by an exec(2) system call.
 619 | func (cr *CommRecord) WasExec() bool {
 620 | 	return cr.RecordHeader.Misc&(commExecBit) != 0
 621 | }
 622 | 
 623 | // ExitRecord (PERF_RECORD_EXIT) indicates a process exit event.
 624 | type ExitRecord struct {
 625 | 	RecordHeader
 626 | 	Pid  uint32 // process ID
 627 | 	Ppid uint32 // parent process ID
 628 | 	Tid  uint32 // thread ID
 629 | 	Ptid uint32 // parent thread ID
 630 | 	Time uint64 // time when the process exited
 631 | 	SampleID
 632 | }
 633 | 
 634 | // DecodeFrom implements the Record.DecodeFrom method.
 635 | func (er *ExitRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 636 | 	er.RecordHeader = raw.Header
 637 | 	f := raw.fields()
 638 | 	f.uint32(&er.Pid, &er.Ppid)
 639 | 	f.uint32(&er.Tid, &er.Ptid)
 640 | 	f.uint64(&er.Time)
 641 | 	f.idCond(ev.a.Options.SampleIDAll, &er.SampleID, ev.a.SampleFormat)
 642 | 	return nil
 643 | }
 644 | 
 645 | // ThrottleRecord (PERF_RECORD_THROTTLE) indicates a throttle event.
 646 | type ThrottleRecord struct {
 647 | 	RecordHeader
 648 | 	Time     uint64
 649 | 	ID       uint64
 650 | 	StreamID uint64
 651 | 	SampleID
 652 | }
 653 | 
 654 | // DecodeFrom implements the Record.DecodeFrom method.
 655 | func (tr *ThrottleRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 656 | 	tr.RecordHeader = raw.Header
 657 | 	f := raw.fields()
 658 | 	f.uint64(&tr.Time)
 659 | 	f.uint64(&tr.ID)
 660 | 	f.uint64(&tr.StreamID)
 661 | 	f.idCond(ev.a.Options.SampleIDAll, &tr.SampleID, ev.a.SampleFormat)
 662 | 	return nil
 663 | }
 664 | 
 665 | // UnthrottleRecord (PERF_RECORD_UNTHROTTLE) indicates an unthrottle event.
 666 | type UnthrottleRecord struct {
 667 | 	RecordHeader
 668 | 	Time     uint64
 669 | 	ID       uint64
 670 | 	StreamID uint64
 671 | 	SampleID
 672 | }
 673 | 
 674 | // DecodeFrom implements the Record.DecodeFrom method.
 675 | func (ur *UnthrottleRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 676 | 	ur.RecordHeader = raw.Header
 677 | 	f := raw.fields()
 678 | 	f.uint64(&ur.Time)
 679 | 	f.uint64(&ur.ID)
 680 | 	f.uint64(&ur.StreamID)
 681 | 	f.idCond(ev.a.Options.SampleIDAll, &ur.SampleID, ev.a.SampleFormat)
 682 | 	return nil
 683 | }
 684 | 
 685 | // ForkRecord (PERF_RECORD_FORK) indicates a fork event.
 686 | type ForkRecord struct {
 687 | 	RecordHeader
 688 | 	Pid  uint32 // process ID
 689 | 	Ppid uint32 // parent process ID
 690 | 	Tid  uint32 // thread ID
 691 | 	Ptid uint32 // parent thread ID
 692 | 	Time uint64 // time when the fork occurred
 693 | 	SampleID
 694 | }
 695 | 
 696 | // DecodeFrom implements the Record.DecodeFrom method.
 697 | func (fr *ForkRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 698 | 	fr.RecordHeader = raw.Header
 699 | 	f := raw.fields()
 700 | 	f.uint32(&fr.Pid, &fr.Ppid)
 701 | 	f.uint32(&fr.Tid, &fr.Ptid)
 702 | 	f.uint64(&fr.Time)
 703 | 	f.idCond(ev.a.Options.SampleIDAll, &fr.SampleID, ev.a.SampleFormat)
 704 | 	return nil
 705 | }
 706 | 
 707 | // ReadRecord (PERF_RECORD_READ) indicates a read event.
 708 | type ReadRecord struct {
 709 | 	RecordHeader
 710 | 	Pid   uint32 // process ID
 711 | 	Tid   uint32 // thread ID
 712 | 	Count Count  // count value
 713 | 	SampleID
 714 | }
 715 | 
 716 | // DecodeFrom implements the Record.DecodeFrom method.
 717 | func (rr *ReadRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 718 | 	rr.RecordHeader = raw.Header
 719 | 	f := raw.fields()
 720 | 	f.uint32(&rr.Pid, &rr.Tid)
 721 | 	f.count(&rr.Count, ev.a.CountFormat)
 722 | 	f.idCond(ev.a.Options.SampleIDAll, &rr.SampleID, ev.a.SampleFormat)
 723 | 	return nil
 724 | }
 725 | 
 726 | // ReadGroupRecord (PERF_RECORD_READ) indicates a read event on a group event.
 727 | type ReadGroupRecord struct {
 728 | 	RecordHeader
 729 | 	Pid        uint32     // process ID
 730 | 	Tid        uint32     // thread ID
 731 | 	GroupCount GroupCount // group count values
 732 | 	SampleID
 733 | }
 734 | 
 735 | // DecodeFrom implements the Record.DecodeFrom method.
 736 | func (rr *ReadGroupRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 737 | 	rr.RecordHeader = raw.Header
 738 | 	f := raw.fields()
 739 | 	f.uint32(&rr.Pid, &rr.Tid)
 740 | 	f.groupCount(&rr.GroupCount, ev.a.CountFormat)
 741 | 	f.idCond(ev.a.Options.SampleIDAll, &rr.SampleID, ev.a.SampleFormat)
 742 | 	return nil
 743 | }
 744 | 
 745 | // SampleRecord indicates a sample.
 746 | //
 747 | // All the fields up to and including Callchain represent ABI bits. All the
 748 | // fields starting with Data are non-ABI and have no compatibility guarantees.
 749 | //
 750 | // Fields on SampleRecord are set according to the SampleFormat the event
 751 | // was configured with. A boolean flag in SampleFormat typically enables
 752 | // the homonymous field in a SampleRecord.
 753 | type SampleRecord struct {
 754 | 	RecordHeader
 755 | 	Identifier uint64
 756 | 	IP         uint64
 757 | 	Pid        uint32
 758 | 	Tid        uint32
 759 | 	Time       uint64
 760 | 	Addr       uint64
 761 | 	ID         uint64
 762 | 	StreamID   uint64
 763 | 	CPU        uint32
 764 | 	_          uint32 // reserved
 765 | 	Period     uint64
 766 | 	Count      Count
 767 | 	Callchain  []uint64
 768 | 
 769 | 	Raw                  []byte
 770 | 	BranchStack          []BranchEntry
 771 | 	UserRegisterABI      uint64
 772 | 	UserRegisters        []uint64
 773 | 	UserStack            []byte
 774 | 	UserStackDynamicSize uint64
 775 | 	Weight               uint64
 776 | 	DataSource           DataSource
 777 | 	Transaction          Transaction
 778 | 	IntrRegisterABI      uint64
 779 | 	IntrRegisters        []uint64
 780 | 	PhysicalAddress      uint64
 781 | }
 782 | 
 783 | // DecodeFrom implements the Record.DecodeFrom method.
 784 | func (sr *SampleRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 785 | 	sr.RecordHeader = raw.Header
 786 | 	f := raw.fields()
 787 | 	f.uint64Cond(ev.a.SampleFormat.Identifier, &sr.Identifier)
 788 | 	f.uint64Cond(ev.a.SampleFormat.IP, &sr.IP)
 789 | 	f.uint32Cond(ev.a.SampleFormat.Tid, &sr.Pid, &sr.Tid)
 790 | 	f.uint64Cond(ev.a.SampleFormat.Time, &sr.Time)
 791 | 	f.uint64Cond(ev.a.SampleFormat.Addr, &sr.Addr)
 792 | 	f.uint64Cond(ev.a.SampleFormat.ID, &sr.ID)
 793 | 	f.uint64Cond(ev.a.SampleFormat.StreamID, &sr.StreamID)
 794 | 
 795 | 	// If we have a StreamID and it is different from our
 796 | 	// own ID, then the output from the event we're interested
 797 | 	// in was redirected to ev. We must switch to that event
 798 | 	// in order to decode the sample.
 799 | 	if ev.a.SampleFormat.StreamID {
 800 | 		if sr.StreamID != ev.id {
 801 | 			newev := ev.groupByID[sr.StreamID]
 802 | 			if newev == nil {
 803 | 				ev.resetRing()
 804 | 				return ErrBadRecord
 805 | 			}
 806 | 			ev = newev
 807 | 		}
 808 | 	}
 809 | 
 810 | 	var reserved uint32
 811 | 	f.uint32Cond(ev.a.SampleFormat.CPU, &sr.CPU, &reserved)
 812 | 	f.uint64Cond(ev.a.SampleFormat.Period, &sr.Period)
 813 | 	if ev.a.SampleFormat.Count {
 814 | 		f.count(&sr.Count, ev.a.CountFormat)
 815 | 	}
 816 | 	if ev.a.SampleFormat.Callchain {
 817 | 		var nr uint64
 818 | 		f.uint64(&nr)
 819 | 		sr.Callchain = make([]uint64, nr)
 820 | 		for i := 0; i < len(sr.Callchain); i++ {
 821 | 			f.uint64(&sr.Callchain[i])
 822 | 		}
 823 | 	}
 824 | 	if ev.a.SampleFormat.Raw {
 825 | 		f.uint32sizeBytes(&sr.Raw)
 826 | 	}
 827 | 	if ev.a.SampleFormat.BranchStack {
 828 | 		var nr uint64
 829 | 		f.uint64(&nr)
 830 | 		sr.BranchStack = make([]BranchEntry, nr)
 831 | 		for i := 0; i < len(sr.BranchStack); i++ {
 832 | 			var from, to, entry uint64
 833 | 			f.uint64(&from)
 834 | 			f.uint64(&to)
 835 | 			f.uint64(&entry)
 836 | 			sr.BranchStack[i].decode(from, to, entry)
 837 | 		}
 838 | 	}
 839 | 	if ev.a.SampleFormat.UserRegisters {
 840 | 		f.uint64(&sr.UserRegisterABI)
 841 | 		num := bits.OnesCount64(ev.a.SampleRegistersUser)
 842 | 		sr.UserRegisters = make([]uint64, num)
 843 | 		for i := 0; i < len(sr.UserRegisters); i++ {
 844 | 			f.uint64(&sr.UserRegisters[i])
 845 | 		}
 846 | 	}
 847 | 	if ev.a.SampleFormat.UserStack {
 848 | 		f.uint64sizeBytes(&sr.UserStack)
 849 | 		if len(sr.UserStack) > 0 {
 850 | 			f.uint64(&sr.UserStackDynamicSize)
 851 | 		}
 852 | 	}
 853 | 	f.uint64Cond(ev.a.SampleFormat.Weight, &sr.Weight)
 854 | 	if ev.a.SampleFormat.DataSource {
 855 | 		var ds uint64
 856 | 		f.uint64(&ds)
 857 | 		sr.DataSource = DataSource(ds)
 858 | 	}
 859 | 	if ev.a.SampleFormat.Transaction {
 860 | 		var tx uint64
 861 | 		f.uint64(&tx)
 862 | 		sr.Transaction = Transaction(tx)
 863 | 	}
 864 | 	if ev.a.SampleFormat.IntrRegisters {
 865 | 		f.uint64(&sr.IntrRegisterABI)
 866 | 		num := bits.OnesCount64(ev.a.SampleRegistersIntr)
 867 | 		sr.IntrRegisters = make([]uint64, num)
 868 | 		for i := 0; i < len(sr.IntrRegisters); i++ {
 869 | 			f.uint64(&sr.IntrRegisters[i])
 870 | 		}
 871 | 	}
 872 | 	f.uint64Cond(ev.a.SampleFormat.PhysicalAddress, &sr.PhysicalAddress)
 873 | 	return nil
 874 | }
 875 | 
 876 | // exactIPBit is PERF_RECORD_MISC_EXACT_IP
 877 | const exactIPBit = 1 << 14
 878 | 
 879 | // ExactIP indicates that sr.IP points to the actual instruction that
 880 | // triggered the event. See also Options.PreciseIP.
 881 | func (sr *SampleRecord) ExactIP() bool {
 882 | 	return sr.RecordHeader.Misc&exactIPBit != 0
 883 | }
 884 | 
 885 | // SampleGroupRecord indicates a sample from an event group.
 886 | //
 887 | // All the fields up to and including Callchain represent ABI bits. All the
 888 | // fields starting with Data are non-ABI and have no compatibility guarantees.
 889 | //
 890 | // Fields on SampleGroupRecord are set according to the RecordFormat the event
 891 | // was configured with. A boolean flag in RecordFormat typically enables the
 892 | // homonymous field in SampleGroupRecord.
 893 | type SampleGroupRecord struct {
 894 | 	RecordHeader
 895 | 	Identifier uint64
 896 | 	IP         uint64
 897 | 	Pid        uint32
 898 | 	Tid        uint32
 899 | 	Time       uint64
 900 | 	Addr       uint64
 901 | 	ID         uint64
 902 | 	StreamID   uint64
 903 | 	CPU        uint32
 904 | 	_          uint32
 905 | 	Period     uint64
 906 | 	Count      GroupCount
 907 | 	Callchain  []uint64
 908 | 
 909 | 	Raw                  []byte
 910 | 	BranchStack          []BranchEntry
 911 | 	UserRegisterABI      uint64
 912 | 	UserRegisters        []uint64
 913 | 	UserStack            []byte
 914 | 	UserStackDynamicSize uint64
 915 | 	Weight               uint64
 916 | 	DataSource           DataSource
 917 | 	Transaction          Transaction
 918 | 	IntrRegisterABI      uint64
 919 | 	IntrRegisters        []uint64
 920 | 	PhysicalAddress      uint64
 921 | }
 922 | 
 923 | // DecodeFrom implements the Record.DecodeFrom method.
 924 | func (sr *SampleGroupRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
 925 | 	sr.RecordHeader = raw.Header
 926 | 	f := raw.fields()
 927 | 	f.uint64Cond(ev.a.SampleFormat.Identifier, &sr.Identifier)
 928 | 	f.uint64Cond(ev.a.SampleFormat.IP, &sr.IP)
 929 | 	f.uint32Cond(ev.a.SampleFormat.Tid, &sr.Pid, &sr.Tid)
 930 | 	f.uint64Cond(ev.a.SampleFormat.Time, &sr.Time)
 931 | 	f.uint64Cond(ev.a.SampleFormat.Addr, &sr.Addr)
 932 | 	f.uint64Cond(ev.a.SampleFormat.ID, &sr.ID)
 933 | 	f.uint64Cond(ev.a.SampleFormat.StreamID, &sr.StreamID)
 934 | 
 935 | 	// If we have a StreamID and it is different from our
 936 | 	// own ID, then the output from the event we're interested
 937 | 	// in was redirected to ev. We must switch to that event
 938 | 	// in order to decode the sample.
 939 | 	if ev.a.SampleFormat.StreamID {
 940 | 		if sr.StreamID != ev.id {
 941 | 			ev = ev.groupByID[sr.StreamID]
 942 | 		}
 943 | 	}
 944 | 
 945 | 	var reserved uint32
 946 | 	f.uint32Cond(ev.a.SampleFormat.CPU, &sr.CPU, &reserved)
 947 | 	f.uint64Cond(ev.a.SampleFormat.Period, &sr.Period)
 948 | 	if ev.a.SampleFormat.Count {
 949 | 		f.groupCount(&sr.Count, ev.a.CountFormat)
 950 | 	}
 951 | 	if ev.a.SampleFormat.Callchain {
 952 | 		var nr uint64
 953 | 		f.uint64(&nr)
 954 | 		sr.Callchain = make([]uint64, nr)
 955 | 		for i := 0; i < len(sr.Callchain); i++ {
 956 | 			f.uint64(&sr.Callchain[i])
 957 | 		}
 958 | 	}
 959 | 	if ev.a.SampleFormat.Raw {
 960 | 		f.uint32sizeBytes(&sr.Raw)
 961 | 	}
 962 | 	if ev.a.SampleFormat.BranchStack {
 963 | 		var nr uint64
 964 | 		f.uint64(&nr)
 965 | 		sr.BranchStack = make([]BranchEntry, nr)
 966 | 		for i := 0; i < len(sr.BranchStack); i++ {
 967 | 			var from, to, entry uint64
 968 | 			f.uint64(&from)
 969 | 			f.uint64(&to)
 970 | 			f.uint64(&entry)
 971 | 			sr.BranchStack[i].decode(from, to, entry)
 972 | 		}
 973 | 	}
 974 | 	if ev.a.SampleFormat.UserRegisters {
 975 | 		f.uint64(&sr.UserRegisterABI)
 976 | 		num := bits.OnesCount64(ev.a.SampleRegistersUser)
 977 | 		sr.UserRegisters = make([]uint64, num)
 978 | 		for i := 0; i < len(sr.UserRegisters); i++ {
 979 | 			f.uint64(&sr.UserRegisters[i])
 980 | 		}
 981 | 	}
 982 | 	if ev.a.SampleFormat.UserStack {
 983 | 		f.uint64sizeBytes(&sr.UserStack)
 984 | 		if len(sr.UserStack) > 0 {
 985 | 			f.uint64(&sr.UserStackDynamicSize)
 986 | 		}
 987 | 	}
 988 | 	f.uint64Cond(ev.a.SampleFormat.Weight, &sr.Weight)
 989 | 	if ev.a.SampleFormat.DataSource {
 990 | 		var ds uint64
 991 | 		f.uint64(&ds)
 992 | 		sr.DataSource = DataSource(ds)
 993 | 	}
 994 | 	if ev.a.SampleFormat.Transaction {
 995 | 		var tx uint64
 996 | 		f.uint64(&tx)
 997 | 		sr.Transaction = Transaction(tx)
 998 | 	}
 999 | 	if ev.a.SampleFormat.IntrRegisters {
1000 | 		f.uint64(&sr.IntrRegisterABI)
1001 | 		num := bits.OnesCount64(ev.a.SampleRegistersIntr)
1002 | 		sr.IntrRegisters = make([]uint64, num)
1003 | 		for i := 0; i < len(sr.IntrRegisters); i++ {
1004 | 			f.uint64(&sr.IntrRegisters[i])
1005 | 		}
1006 | 	}
1007 | 	f.uint64Cond(ev.a.SampleFormat.PhysicalAddress, &sr.PhysicalAddress)
1008 | 	return nil
1009 | }
1010 | 
1011 | // ExactIP indicates that sr.IP points to the actual instruction that
1012 | // triggered the event. See also Options.PreciseIP.
1013 | func (sr *SampleGroupRecord) ExactIP() bool {
1014 | 	return sr.RecordHeader.Misc&exactIPBit != 0
1015 | }
1016 | 
1017 | // BranchEntry is a sampled branch.
1018 | type BranchEntry struct {
1019 | 	From             uint64
1020 | 	To               uint64
1021 | 	Mispredicted     bool
1022 | 	Predicted        bool
1023 | 	InTransaction    bool
1024 | 	TransactionAbort bool
1025 | 	Cycles           uint16
1026 | 	BranchType       BranchType
1027 | }
1028 | 
1029 | func (be *BranchEntry) decode(from, to, entry uint64) {
1030 | 	*be = BranchEntry{
1031 | 		From:             from,
1032 | 		To:               to,
1033 | 		Mispredicted:     entry&(1<<0) != 0,
1034 | 		Predicted:        entry&(1<<1) != 0,
1035 | 		InTransaction:    entry&(1<<2) != 0,
1036 | 		TransactionAbort: entry&(1<<3) != 0,
1037 | 		Cycles:           uint16((entry << 44) >> 48),
1038 | 		BranchType:       BranchType((entry << 40) >> 44),
1039 | 	}
1040 | }
1041 | 
1042 | // BranchType classifies a BranchEntry.
1043 | type BranchType uint8
1044 | 
1045 | // Branch classifications.
1046 | const (
1047 | 	BranchTypeUnknown BranchType = iota
1048 | 	BranchTypeConditional
1049 | 	BranchTypeUnconditional
1050 | 	BranchTypeIndirect
1051 | 	BranchTypeCall
1052 | 	BranchTypeIndirectCall
1053 | 	BranchTypeReturn
1054 | 	BranchTypeSyscall
1055 | 	BranchTypeSyscallReturn
1056 | 	BranchTypeConditionalCall
1057 | 	BranchTypeConditionalReturn
1058 | )
1059 | 
1060 | // Mmap2Record (PERF_RECORD_MMAP2) includes extended information on mmap(2)
1061 | // calls returning executable mappings. It is similar to MmapRecord, but
1062 | // includes extra values, allowing unique identification of shared mappings.
1063 | type Mmap2Record struct {
1064 | 	RecordHeader
1065 | 	Pid             uint32 // process ID
1066 | 	Tid             uint32 // thread ID
1067 | 	Addr            uint64 // address of the allocated memory
1068 | 	Len             uint64 // length of the allocated memory
1069 | 	PageOffset      uint64 // page offset of the allocated memory
1070 | 	MajorID         uint32 // major ID of the underlying device
1071 | 	MinorID         uint32 // minor ID of the underlying device
1072 | 	Inode           uint64 // inode number
1073 | 	InodeGeneration uint64 // inode generation
1074 | 	Prot            uint32 // protection information
1075 | 	Flags           uint32 // flags information
1076 | 	Filename        string // describes the backing of the allocated memory
1077 | 	SampleID
1078 | }
1079 | 
1080 | // DecodeFrom implements the Record.DecodeFrom method.
1081 | func (mr *Mmap2Record) DecodeFrom(raw *RawRecord, ev *Event) error {
1082 | 	mr.RecordHeader = raw.Header
1083 | 	f := raw.fields()
1084 | 	f.uint32(&mr.Pid, &mr.Tid)
1085 | 	f.uint64(&mr.Addr)
1086 | 	f.uint64(&mr.Len)
1087 | 	f.uint64(&mr.PageOffset)
1088 | 	f.uint32(&mr.MajorID, &mr.MinorID)
1089 | 	f.uint64(&mr.Inode)
1090 | 	f.uint64(&mr.InodeGeneration)
1091 | 	f.uint32(&mr.Prot, &mr.Flags)
1092 | 	f.string(&mr.Filename)
1093 | 	f.idCond(ev.a.Options.SampleIDAll, &mr.SampleID, ev.a.SampleFormat)
1094 | 	return nil
1095 | }
1096 | 
1097 | // Executable returns a boolean indicating whether the mapping is executable.
1098 | func (mr *Mmap2Record) Executable() bool {
1099 | 	// The data bit is set when the mapping is _not_ executable.
1100 | 	return mr.RecordHeader.Misc&mmapDataBit == 0
1101 | }
1102 | 
1103 | // AuxRecord (PERF_RECORD_AUX) reports that new data is available in the
1104 | // AUX buffer region.
1105 | type AuxRecord struct {
1106 | 	RecordHeader
1107 | 	Offset uint64  // offset in the AUX mmap region where the new data begins
1108 | 	Size   uint64  // size of data made available
1109 | 	Flags  AuxFlag // describes the update
1110 | 	SampleID
1111 | }
1112 | 
1113 | // AuxFlag describes an update to a record in the AUX buffer region.
1114 | type AuxFlag uint64
1115 | 
1116 | // AuxFlag bits.
1117 | const (
1118 | 	AuxTruncated AuxFlag = 0x01 // record was truncated to fit
1119 | 	AuxOverwrite AuxFlag = 0x02 // snapshot from overwrite mode
1120 | 	AuxPartial   AuxFlag = 0x04 // record contains gaps
1121 | 	AuxCollision AuxFlag = 0x08 // sample collided with another
1122 | )
1123 | 
1124 | // DecodeFrom implements the Record.DecodeFrom method.
1125 | func (ar *AuxRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
1126 | 	ar.RecordHeader = raw.Header
1127 | 	f := raw.fields()
1128 | 	f.uint64(&ar.Offset)
1129 | 	f.uint64(&ar.Size)
1130 | 	var flag uint64
1131 | 	f.uint64(&flag)
1132 | 	ar.Flags = AuxFlag(flag)
1133 | 	f.idCond(ev.a.Options.SampleIDAll, &ar.SampleID, ev.a.SampleFormat)
1134 | 	return nil
1135 | }
1136 | 
1137 | // ItraceStartRecord (PERF_RECORD_ITRACE_START) indicates which process
1138 | // has initiated an instruction trace event, allowing tools to correlate
1139 | // instruction addresses in the AUX buffer with the proper executable.
1140 | type ItraceStartRecord struct {
1141 | 	RecordHeader
1142 | 	Pid uint32 // process ID of the thread starting an instruction trace
1143 | 	Tid uint32 // thread ID of the thread starting an instruction trace
1144 | 	SampleID
1145 | }
1146 | 
1147 | // DecodeFrom implements the Record.DecodeFrom method.
1148 | func (ir *ItraceStartRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
1149 | 	ir.RecordHeader = raw.Header
1150 | 	f := raw.fields()
1151 | 	f.uint32(&ir.Pid, &ir.Tid)
1152 | 	f.idCond(ev.a.Options.SampleIDAll, &ir.SampleID, ev.a.SampleFormat)
1153 | 	return nil
1154 | }
1155 | 
1156 | // LostSamplesRecord (PERF_RECORD_LOST_SAMPLES) indicates some number of
1157 | // samples that may have been lost, when using hardware sampling such as
1158 | // Intel PEBS.
1159 | type LostSamplesRecord struct {
1160 | 	RecordHeader
1161 | 	Lost uint64 // the number of potentially lost samples
1162 | 	SampleID
1163 | }
1164 | 
1165 | // DecodeFrom implements the Record.DecodeFrom method.
1166 | func (lr *LostSamplesRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
1167 | 	lr.RecordHeader = raw.Header
1168 | 	f := raw.fields()
1169 | 	f.uint64(&lr.Lost)
1170 | 	f.idCond(ev.a.Options.SampleIDAll, &lr.SampleID, ev.a.SampleFormat)
1171 | 	return nil
1172 | }
1173 | 
1174 | // SwitchRecord (PERF_RECORD_SWITCH) indicates that a context switch has
1175 | // happened.
1176 | type SwitchRecord struct {
1177 | 	RecordHeader
1178 | 	SampleID
1179 | }
1180 | 
1181 | // DecodeFrom implements the Record.DecodeFrom method.
1182 | func (sr *SwitchRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
1183 | 	sr.RecordHeader = raw.Header
1184 | 	f := raw.fields()
1185 | 	f.idCond(ev.a.Options.SampleIDAll, &sr.SampleID, ev.a.SampleFormat)
1186 | 	return nil
1187 | }
1188 | 
1189 | // switchOutBit is PERF_RECORD_MISC_SWITCH_OUT
1190 | const switchOutBit = 1 << 13
1191 | 
1192 | // switchOutPreemptBit is PERF_RECORD_MISC_SWITCH_OUT_PREEMPT
1193 | const switchOutPreemptBit = 1 << 14
1194 | 
1195 | // Out returns a boolean indicating whether the context switch was
1196 | // out of the current process, or into the current process.
1197 | func (sr *SwitchRecord) Out() bool {
1198 | 	return sr.RecordHeader.Misc&switchOutBit != 0
1199 | }
1200 | 
1201 | // Preempted indicates whether the thread was preempted in TASK_RUNNING state.
1202 | func (sr *SwitchRecord) Preempted() bool {
1203 | 	return sr.RecordHeader.Misc&switchOutPreemptBit != 0
1204 | }
1205 | 
1206 | // SwitchCPUWideRecord (PERF_RECORD_SWITCH_CPU_WIDE) indicates a context
1207 | // switch, but only occurs when sampling in CPU-wide mode. It provides
1208 | // information on the process being switched to / from.
1209 | type SwitchCPUWideRecord struct {
1210 | 	RecordHeader
1211 | 	Pid uint32
1212 | 	Tid uint32
1213 | 	SampleID
1214 | }
1215 | 
1216 | // DecodeFrom implements the Record.DecodeFrom method.
1217 | func (sr *SwitchCPUWideRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
1218 | 	sr.RecordHeader = raw.Header
1219 | 	f := raw.fields()
1220 | 	f.uint32(&sr.Pid, &sr.Tid)
1221 | 	f.idCond(ev.a.Options.SampleIDAll, &sr.SampleID, ev.a.SampleFormat)
1222 | 	return nil
1223 | }
1224 | 
1225 | // Out returns a boolean indicating whether the context switch was
1226 | // out of the current process, or into the current process.
1227 | func (sr *SwitchCPUWideRecord) Out() bool {
1228 | 	return sr.RecordHeader.Misc&switchOutBit != 0
1229 | }
1230 | 
1231 | // Preempted indicates whether the thread was preempted in TASK_RUNNING state.
1232 | func (sr *SwitchCPUWideRecord) Preempted() bool {
1233 | 	return sr.RecordHeader.Misc&switchOutPreemptBit != 0
1234 | }
1235 | 
1236 | // NamespacesRecord (PERF_RECORD_NAMESPACES) describes the namespaces of a
1237 | // process when it is created.
1238 | type NamespacesRecord struct {
1239 | 	RecordHeader
1240 | 	Pid        uint32
1241 | 	Tid        uint32
1242 | 	Namespaces []struct {
1243 | 		Dev   uint64
1244 | 		Inode uint64
1245 | 	}
1246 | 	SampleID
1247 | }
1248 | 
1249 | // TODO(acln): check out *_NS_INDEX in perf_event.h
1250 | 
1251 | // DecodeFrom implements the Record.DecodeFrom method.
1252 | func (nr *NamespacesRecord) DecodeFrom(raw *RawRecord, ev *Event) error {
1253 | 	nr.RecordHeader = raw.Header
1254 | 	f := raw.fields()
1255 | 	f.uint32(&nr.Pid, &nr.Tid)
1256 | 	var num uint64
1257 | 	f.uint64(&num)
1258 | 	nr.Namespaces = make([]struct{ Dev, Inode uint64 }, num)
1259 | 	for i := 0; i < int(num); i++ {
1260 | 		f.uint64(&nr.Namespaces[i].Dev)
1261 | 		f.uint64(&nr.Namespaces[i].Inode)
1262 | 	}
1263 | 	f.idCond(ev.a.Options.SampleIDAll, &nr.SampleID, ev.a.SampleFormat)
1264 | 	return nil
1265 | }
1266 | 
1267 | // Skid is an instruction pointer skid constraint.
1268 | type Skid int
1269 | 
1270 | // Supported Skid settings.
1271 | const (
1272 | 	CanHaveArbitrarySkid Skid = 0
1273 | 	MustHaveConstantSkid Skid = 1
1274 | 	RequestedZeroSkid    Skid = 2
1275 | 	MustHaveZeroSkid     Skid = 3
1276 | )
1277 | 
1278 | // BranchSampleFormat specifies what branches to include in a branch record.
1279 | type BranchSampleFormat struct {
1280 | 	Privilege BranchSamplePrivilege
1281 | 	Sample    BranchSample
1282 | }
1283 | 
1284 | func (b BranchSampleFormat) marshal() uint64 {
1285 | 	return uint64(b.Privilege) | uint64(b.Sample)
1286 | }
1287 | 
1288 | // BranchSamplePrivilege speifies a branch sample privilege level. If a
1289 | // level is not set explicitly, the kernel will use the event's privilege
1290 | // level. Event and branch privilege levels do not have to match.
1291 | type BranchSamplePrivilege uint64
1292 | 
1293 | // Branch sample privilege values. Values should be |-ed together.
1294 | const (
1295 | 	BranchPrivilegeUser       BranchSamplePrivilege = unix.PERF_SAMPLE_BRANCH_USER
1296 | 	BranchPrivilegeKernel     BranchSamplePrivilege = unix.PERF_SAMPLE_BRANCH_KERNEL
1297 | 	BranchPrivilegeHypervisor BranchSamplePrivilege = unix.PERF_SAMPLE_BRANCH_HV
1298 | )
1299 | 
1300 | // BranchSample specifies a type of branch to sample.
1301 | type BranchSample uint64
1302 | 
1303 | // Branch sample bits. Values should be |-ed together.
1304 | const (
1305 | 	BranchSampleAny              BranchSample = unix.PERF_SAMPLE_BRANCH_ANY
1306 | 	BranchSampleAnyCall          BranchSample = unix.PERF_SAMPLE_BRANCH_ANY_CALL
1307 | 	BranchSampleAnyReturn        BranchSample = unix.PERF_SAMPLE_BRANCH_ANY_RETURN
1308 | 	BranchSampleIndirectCall     BranchSample = unix.PERF_SAMPLE_BRANCH_IND_CALL
1309 | 	BranchSampleAbortTransaction BranchSample = unix.PERF_SAMPLE_BRANCH_ABORT_TX
1310 | 	BranchSampleInTransaction    BranchSample = unix.PERF_SAMPLE_BRANCH_IN_TX
1311 | 	BranchSampleNoTransaction    BranchSample = unix.PERF_SAMPLE_BRANCH_NO_TX
1312 | 	BranchSampleCond             BranchSample = unix.PERF_SAMPLE_BRANCH_COND
1313 | 	BranchSampleCallStack        BranchSample = unix.PERF_SAMPLE_BRANCH_CALL_STACK
1314 | 	BranchSampleIndirectJump     BranchSample = unix.PERF_SAMPLE_BRANCH_IND_JUMP
1315 | 	BranchSampleCall             BranchSample = unix.PERF_SAMPLE_BRANCH_CALL
1316 | 	BranchSampleNoFlags          BranchSample = unix.PERF_SAMPLE_BRANCH_NO_FLAGS
1317 | 	BranchSampleNoCycles         BranchSample = unix.PERF_SAMPLE_BRANCH_NO_CYCLES
1318 | 	BranchSampleSave             BranchSample = unix.PERF_SAMPLE_BRANCH_TYPE_SAVE
1319 | )
1320 | 
1321 | // DataSource records where in the memory hierarchy the data associated with
1322 | // a sampled instruction came from.
1323 | type DataSource uint64
1324 | 
1325 | // MemOp returns the recorded memory operation.
1326 | func (ds DataSource) MemOp() MemOp {
1327 | 	return MemOp(ds >> memOpShift)
1328 | }
1329 | 
1330 | // MemLevel returns the recorded memory level.
1331 | func (ds DataSource) MemLevel() MemLevel {
1332 | 	return MemLevel(ds >> memLevelShift)
1333 | }
1334 | 
1335 | // MemRemote returns the recorded remote bit.
1336 | func (ds DataSource) MemRemote() MemRemote {
1337 | 	return MemRemote(ds >> memRemoteShift)
1338 | }
1339 | 
1340 | // MemLevelNumber returns the recorded memory level number.
1341 | func (ds DataSource) MemLevelNumber() MemLevelNumber {
1342 | 	return MemLevelNumber(ds >> memLevelNumberShift)
1343 | }
1344 | 
1345 | // MemSnoopMode returns the recorded memory snoop mode.
1346 | func (ds DataSource) MemSnoopMode() MemSnoopMode {
1347 | 	return MemSnoopMode(ds >> memSnoopModeShift)
1348 | }
1349 | 
1350 | // MemSnoopModeX returns the recorded extended memory snoop mode.
1351 | func (ds DataSource) MemSnoopModeX() MemSnoopModeX {
1352 | 	return MemSnoopModeX(ds >> memSnoopModeXShift)
1353 | }
1354 | 
1355 | // MemLock returns the recorded memory lock mode.
1356 | func (ds DataSource) MemLock() MemLock {
1357 | 	return MemLock(ds >> memLockShift)
1358 | }
1359 | 
1360 | // MemTLB returns the recorded TLB access mode.
1361 | func (ds DataSource) MemTLB() MemTLB {
1362 | 	return MemTLB(ds >> memTLBShift)
1363 | }
1364 | 
1365 | // MemOp is a memory operation.
1366 | type MemOp uint8
1367 | 
1368 | // MemOp flag bits.
1369 | const (
1370 | 	MemOpNA MemOp = 1 << iota
1371 | 	MemOpLoad
1372 | 	MemOpStore
1373 | 	MemOpPrefetch
1374 | 	MemOpExec
1375 | 
1376 | 	memOpShift = 0
1377 | )
1378 | 
1379 | // MemLevel is a memory level.
1380 | type MemLevel uint32
1381 | 
1382 | // MemLevel flag bits.
1383 | const (
1384 | 	MemLevelNA MemLevel = 1 << iota
1385 | 	MemLevelHit
1386 | 	MemLevelMiss
1387 | 	MemLevelL1
1388 | 	MemLevelLFB
1389 | 	MemLevelL2
1390 | 	MemLevelL3
1391 | 	MemLevelLocalDRAM
1392 | 	MemLevelRemoteDRAM1
1393 | 	MemLevelRemoteDRAM2
1394 | 	MemLevelRemoteCache1
1395 | 	MemLevelRemoteCache2
1396 | 	MemLevelIO
1397 | 	MemLevelUncached
1398 | 
1399 | 	memLevelShift = 5
1400 | )
1401 | 
1402 | // MemRemote indicates whether remote memory was accessed.
1403 | type MemRemote uint8
1404 | 
1405 | // MemRemote flag bits.
1406 | const (
1407 | 	MemRemoteRemote MemRemote = 1 << iota
1408 | 
1409 | 	memRemoteShift = 37
1410 | )
1411 | 
1412 | // MemLevelNumber is a memory level number.
1413 | type MemLevelNumber uint8
1414 | 
1415 | // MemLevelNumber flag bits.
1416 | const (
1417 | 	MemLevelNumberL1 MemLevelNumber = iota
1418 | 	MemLevelNumberL2
1419 | 	MemLevelNumberL3
1420 | 	MemLevelNumberL4
1421 | 
1422 | 	MemLevelNumberAnyCache MemLevelNumber = iota + 0x0b
1423 | 	MemLevelNumberLFB
1424 | 	MemLevelNumberRAM
1425 | 	MemLevelNumberPMem
1426 | 	MemLevelNumberNA
1427 | 
1428 | 	memLevelNumberShift = 33
1429 | )
1430 | 
1431 | // MemSnoopMode is a memory snoop mode.
1432 | type MemSnoopMode uint8
1433 | 
1434 | // MemSnoopMode flag bits.
1435 | const (
1436 | 	MemSnoopModeNA MemSnoopMode = 1 << iota
1437 | 	MemSnoopModeNone
1438 | 	MemSnoopModeHit
1439 | 	MemSnoopModeMiss
1440 | 	MemSnoopModeHitModified
1441 | 
1442 | 	memSnoopModeShift = 19
1443 | )
1444 | 
1445 | // MemSnoopModeX is an extended memory snoop mode.
1446 | type MemSnoopModeX uint8
1447 | 
1448 | // MemSnoopModeX flag bits.
1449 | const (
1450 | 	MemSnoopModeXForward MemSnoopModeX = 0x01 // forward
1451 | 
1452 | 	memSnoopModeXShift = 37
1453 | )
1454 | 
1455 | // MemLock is a memory locking mode.
1456 | type MemLock uint8
1457 | 
1458 | // MemLock flag bits.
1459 | const (
1460 | 	MemLockNA     MemLock = 1 << iota // not available
1461 | 	MemLockLocked                     // locked transaction
1462 | 
1463 | 	memLockShift = 24
1464 | )
1465 | 
1466 | // MemTLB is a TLB access mode.
1467 | type MemTLB uint8
1468 | 
1469 | // MemTLB flag bits.
1470 | const (
1471 | 	MemTLBNA   MemTLB = 1 << iota // not available
1472 | 	MemTLBHit                     // hit level
1473 | 	MemTLBMiss                    // miss level
1474 | 	MemTLBL1
1475 | 	MemTLBL2
1476 | 	MemTLBWK // Hardware Walker
1477 | 	MemTLBOS // OS fault handler
1478 | 
1479 | 	memTLBShift = 26
1480 | )
1481 | 
1482 | // Transaction describes a transactional memory abort.
1483 | type Transaction uint64
1484 | 
1485 | // Transaction bits: values should be &-ed with Transaction values.
1486 | const (
1487 | 	// Transaction Elision indicates an abort from an elision type
1488 | 	// transaction (Intel CPU specific).
1489 | 	TransactionElision Transaction = 1 << iota
1490 | 
1491 | 	// TransactionGeneric indicates an abort from a generic transaction.
1492 | 	TransactionGeneric
1493 | 
1494 | 	// TransactionSync indicates a synchronous abort (related to the
1495 | 	// reported instruction).
1496 | 	TransactionSync
1497 | 
1498 | 	// TransactionAsync indicates an asynchronous abort (unrelated to
1499 | 	// the reported instruction).
1500 | 	TransactionAsync
1501 | 
1502 | 	// TransactionRetryable indicates whether retrying the transaction
1503 | 	// may have succeeded.
1504 | 	TransactionRetryable
1505 | 
1506 | 	// TransactionConflict indicates an abort rue to memory conflicts
1507 | 	// with other threads.
1508 | 	TransactionConflict
1509 | 
1510 | 	// TransactionWriteCapacity indicates an abort due to write capacity
1511 | 	// overflow.
1512 | 	TransactionWriteCapacity
1513 | 
1514 | 	// TransactionReadCapacity indicates an abort due to read capacity
1515 | 	// overflow.
1516 | 	TransactionReadCapacity
1517 | )
1518 | 
1519 | // txnAbortMask is PERF_TXN_ABORT_MASK
1520 | const txnAbortMask = 0xffffffff
1521 | 
1522 | // txnAbortShift is PERF_TXN_ABORT_SHIFT
1523 | const txnAbortShift = 32
1524 | 
1525 | // UserAbortCode returns the user-specified abort code associated with
1526 | // the transaction.
1527 | func (txn Transaction) UserAbortCode() uint32 {
1528 | 	return uint32((txn >> txnAbortShift) & txnAbortMask)
1529 | }
1530 | 
1531 | // TODO(acln): the latter part of this file is full of constants added
1532 | // ad-hoc, which use iota. These should probably be added to x/sys/unix
1533 | // instead, and used from there.
1534 | 


--------------------------------------------------------------------------------