├── .gitignore
├── Makefile
├── README.md
├── algo-common.hpp
├── basic-impls.cpp
├── basic-impls.hpp
├── catch.hpp
├── clock
    └── clock.h
├── common-cxx.hpp
├── cpuid.cpp
├── cpuid.hpp
├── cycle-timer.c
├── cycle-timer.h
├── dbg.h
├── env.hpp
├── exact-int
    └── exact-int.h
├── generate-event-code.py
├── hedley.h
├── impl-list.cpp
├── impl-list.hpp
├── jevents
    ├── Makefile
    ├── README.md
    ├── cache.c
    ├── cpustr.c
    ├── event-rmap.c
    ├── examples
    │   ├── Makefile
    │   ├── addr.c
    │   ├── cpu.c
    │   ├── cpu.h
    │   ├── hist.cc
    │   ├── hist.h
    │   ├── jestat.c
    │   ├── rtest.c
    │   ├── rtest2.c
    │   └── rtest3.c
    ├── interrupts.c
    ├── interrupts.h
    ├── jevents-internal.h
    ├── jevents.c
    ├── jevents.h
    ├── jsession.h
    ├── jsmn.c
    ├── jsmn.h
    ├── json.c
    ├── json.h
    ├── libjevents.spec
    ├── listevents.c
    ├── measure.c
    ├── measure.h
    ├── perf-iter.c
    ├── perf-iter.h
    ├── perf_event_open.c
    ├── rawevent.c
    ├── rdpmc.c
    ├── rdpmc.h
    ├── resolve.c
    ├── session.c
    ├── showevent.c
    └── util.h
├── main-test.cpp
├── main.cpp
├── misc.cpp
├── misc.hpp
├── msr-access.c
├── msr-access.h
├── nonstd
    ├── LICENSE.txt
    └── span.hpp
├── opt-control.h
├── perf-timer-events.cpp
├── perf-timer-events.hpp
├── perf-timer.cpp
├── perf-timer.hpp
├── scripts
    ├── common.sh
    ├── data.sh
    ├── plot-csv.py
    └── plots.sh
├── tsc-support.cpp
├── tsc-support.hpp
├── unit-test.cpp
├── util.hpp
└── voltmon.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # by default exclude anythign in jevents without an extension since it's
 2 | # probably a binary file
 3 | /jevents/**
 4 | !/jevents/**/
 5 | !/jevents/**/*.*
 6 | !/jevents/**/Makefile
 7 | 
 8 | /.*
 9 | !.gitignore
10 | *.o
11 | *.a
12 | *.log
13 | __pycache__
14 | /tmp/*
15 | *.d
16 | /out*
17 | 
18 | # exes
19 | /test
20 | /bench
21 | /voltmon
22 | 
23 | perf.data
24 | perf.data.*
25 | 
26 | /results
27 | /temp
28 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | STD ?= c++14
 2 | ARCH := haswell
 3 | 
 4 | ifeq ($(DEBUG), 1)
 5 | OPT  := -Og
 6 | else
 7 | CPPFLAGS += -DNDEBUG
 8 | OPT := -O2
 9 | endif
10 | 
11 | CPPFLAGS += -MMD -Wall -Wextra -Werror $(OPT) -g -march=$(ARCH) -Wno-unused-parameter $(CPPEXTRA) \
12 |     -Wno-error=unused-variable \
13 | 	-Wno-unknown-pragmas \
14 | #	-funroll-loops
15 | 
16 | CXXFLAGS += -std=$(STD) -DENABLE_TIMER=1
17 | 
18 | # uncomment to use the fast gold linker
19 | # LDFLAGS = -use-ld=gold
20 | 
21 | 
22 | TARGETS := bench test voltmon
23 | MAINOS  := main.o main-test.o voltmon.o
24 | 
25 | TESTSRCS:= $(wildcard *-test.c *-test.cpp)
26 | TESTOBJS:= $(patsubst %.c,%.o,$(TESTSRCS))
27 | TESTOBJS:= $(patsubst %.cpp,%.o,$(TESTOBJS))
28 | 
29 | SRCS    := $(wildcard *.c *.cpp xxHash/xxhash.c)
30 | OBJECTS := $(patsubst %.c,%.o,$(SRCS))
31 | OBJECTS := $(patsubst %.cpp,%.o,$(OBJECTS))
32 | ALLOBJS := $(OBJECTS)
33 | OBJECTS := $(filter-out $(MAINOS) $(TESTOBJS), $(OBJECTS))
34 | 
35 | DEPS    := $(ALLOBJS:%.o=%.d)
36 | 
37 | # $(info $$DEPS is [${DEPS}])
38 | # $(info $$TESTOBJS is [${TESTOBJS}])
39 | # $(info $$OBJECTS is [${OBJECTS}])
40 | 
41 | JE_LIB := jevents/libjevents.a
42 | JE_SRC := $(wildcard jevents/*.c jevents/*.h)
43 | 
44 | LDFLAGS := -lm
45 | 
46 | ifneq ($(LD), ld)
47 | LDFLAGS += $(if $(LD),-fuse-ld=$(LD))
48 | endif
49 | 
50 | ifeq ($(ASAN), 1)
51 | CPPFLAGS += -fsanitize=address -fno-omit-frame-pointer
52 | LDFLAGS  += -lasan
53 | endif
54 | 
55 | default: $(TARGETS)
56 | 
57 | -include $(DEPS)
58 | 
59 | bench : $(OBJECTS) main.o
60 | 
61 | voltmon : voltmon.o msr-access.o
62 | 
63 | test  : $(OBJECTS) $(TESTOBJS)
64 | 
65 | $(TARGETS) : $(JE_LIB)
66 | 	$(CXX) $(CFLAGS) $(CPPFLAGS) $^ $(LDFLAGS) -o $@ $(JE_LIB)
67 | 
68 | %.o: %.c
69 | 	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
70 | 
71 | %.o: %.cpp
72 | 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CXXEXTRA) -c -o $@ $<
73 | 
74 | $(JE_LIB): $(JE_SRC)
75 | 	cd jevents && $(MAKE) MAKEFLAGS=
76 | 
77 | clean:
78 | 	rm -f $(TARGETS)
79 | 	rm -f *.o *.d
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is the supporting benchmark for the blog post [_Gathering Intel on Intel AVX-512 Transitions_](https://travisdowns.github.io/blog/2020/01/17/avxfreq1.html).
 2 | 
 3 | This benchmark relies heavily on the [`perf_events`](http://man7.org/linux/man-pages/man2/perf_event_open.2.html) subsystem and so only runs on Linux.
 4 | 
 5 | ## Build
 6 | 
 7 | This project provides a makefile and building should be as simple as:
 8 | 
 9 |     make
10 | 
11 | ## Checking perf_event_paranoid
12 | 
13 | To run these tests you need a `/proc/sys/kernel/perf_event_paranoid` setting of 1 or less, or to be running as root. Many modern system are setting this to 3, a very conservative setting. If the value on your system is greater than 1, you can either set it (until reboot) to 1 like so:
14 | 
15 |     echo 1 | sudo tee /proc/sys/kernel/perf_event_paranoid
16 | 
17 | Or you can run the tests as root (e.g., `sudo ./bench)
18 | 
19 | ## Running
20 | 
21 | The benchmark includes several tests. You can run them all like this (this will crash pretty quickly on platforms without AVX-512):
22 | 
23 |     ./bench
24 | 
25 | You can run a specific test by providing its name:
26 | 
27 |     ./bench vporymm
28 | 
29 | You can list the available tests:
30 | 
31 |     ./bench list tests
32 | 
33 | 
34 | ## Generating Results
35 | 
36 | You can run all the test required for generating the data used in the post using the ./data.sh scripts. First you should set your TSC (time stamp counter as read by `rstsc`) frequency as the `MHZ` variable in the environment. The benchmark application itself has detection of the TSC frequency, you can check the value on your system by running:
37 | 
38 |     ./bench dummy |& grep 'tsc freq'
39 | 
40 | Set that value as `MHZ` in your environment, like so:
41 | 
42 |     export MHZ=3200
43 | 
44 | Next, choose a prefix for your result files to identify them, this defaults to `test` if you don't set any:
45 | 
46 |     export PREFIX=blah
47 |     # this second prefix is used specificaly for the voltage related tests
48 |     export PREFIXV=blah
49 | 
50 | Then you can collect most of the data like this:
51 | 
52 |     scripts/data.sh
53 | 
54 | This generates the results in the `./results` dir by default. This generates all the results _except_ for those related to voltage. To generate those, you need to jump through a few hoops, because voltage readings require MSR access. First, you must have `msr-tools` installed, however that works on your platform (e.g., `apt install msr-tools` on Ubuntu). Then you can either run data.sh as root, like so:
55 | 
56 |     sudo MHZ=$MHZ DO_VOLTS=1 scripts/data.sh
57 | 
58 | The `DO_VOLTS=1` indicates to the script that it should run only the voltage tests. This causes the files written to `./results` to be owned by root (unless the files already existed with different ownership), so you should `chown` them to your current user after. You can also run this as a non-root user but you have to jump though some hoops (this is probably not entirely safe, certainly not on any host with untrusted users or code):
59 | 
60 | ~~~
61 | # set the special msr files world readable
62 | sudo chmod a+r /dev/cpu/*/msr
63 | # give the bench binary the caps needed to read the msr files (not needed on all systems)
64 | sudo setcap cap_sys_rawio=ep ./bench
65 | # run the test as non-root
66 | DO_VOLTS=1 scripts/data.sh
67 | ~~~
68 | 
69 | ## Generating Plots
70 | 
71 | Once the results are generated, you can generate the plots with:
72 | 
73 |     scripts/plots.sh
74 | 
75 | That will show the plots, one by one, in matplotlib's interactive viewer. To write them out to SVG files, just provide the `OUTDIR` environment variable:
76 | 
77 | ~~~
78 | mkdir plots
79 | OUTDIR=./plots scripts/plots.sh
80 | ~~~
81 | 
82 | The script expects all the result files to exist, feel free to comment out some lines if you've just generated partial results. You can also pass the script a regex and it will only generate plots whose title matches the regex. For example, the following generates only the 3 voltage plots:
83 | 
84 |     ./scripts/plots.sh Voltage
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 |     
92 | 


--------------------------------------------------------------------------------
/algo-common.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef ALGO_COMMON_H_
 2 | #define ALGO_COMMON_H_
 3 | 
 4 | #include "hedley.h"
 5 | 
 6 | #include <stdlib.h>
 7 | 
 8 | #endif
 9 | 
10 | 


--------------------------------------------------------------------------------
/basic-impls.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Implementation of various very basic algorithms, mostly as a litmus test for the more complicated ones.
  3 |  */
  4 | 
  5 | #include "basic-impls.hpp"
  6 | #include "algo-common.hpp"
  7 | 
  8 | #include <assert.h>
  9 | #include <inttypes.h>
 10 | #include <string.h>
 11 | #include <numeric>
 12 | 
 13 | void vporxmm(bench_args args) {
 14 |     asm volatile ("vpor %xmm0, %xmm0, %xmm0\n");
 15 | }
 16 | 
 17 | void vporymm(bench_args args) {
 18 |     asm volatile ("vpor %ymm0, %ymm0, %ymm0\n");
 19 | }
 20 | 
 21 | void vporzmm(bench_args args) {
 22 |     asm volatile ("vpord %zmm0, %zmm0, %zmm0\n");
 23 | }
 24 | 
 25 | void vporxmm_vz(bench_args args) {
 26 |     asm volatile (
 27 |         "vpor %xmm0, %xmm0, %xmm0\n"
 28 |         "vzeroupper\n"
 29 |     );
 30 | }
 31 | 
 32 | void vporymm_vz(bench_args args) {
 33 |     asm volatile (
 34 |         "vpor %ymm0, %ymm0, %ymm0\n"
 35 |         "vzeroupper\n"
 36 |     );
 37 | }
 38 | 
 39 | void vporzmm_vz(bench_args args) {
 40 |     asm volatile (
 41 |         "vpord %zmm0, %zmm0, %zmm0\n"
 42 |         "vzeroupper\n"
 43 |     );
 44 | }
 45 | 
 46 | /**
 47 |  * 1000 copies of instr
 48 |  */
 49 | #define MAKE_MANY(name,instr,regd,regs) \
 50 | void name##_vz100(bench_args args) {  \
 51 |     asm volatile (                     \
 52 |         ".rept 1000\n\t"               \
 53 |         #instr " %" #regs ", %" #regs ", %" #regd "\n\t" \
 54 |         ".endr\n\t"                    \
 55 |         "vzeroupper\n\t"               \
 56 |     );                                 \
 57 | }
 58 | 
 59 | // latency
 60 | MAKE_MANY(vporxmm, vpor,  xmm0, xmm0)
 61 | MAKE_MANY(vporymm, vpor,  ymm0, ymm0)
 62 | MAKE_MANY(vporzmm, vpord, zmm0, zmm0)
 63 | 
 64 | // throughput
 65 | MAKE_MANY(vporxmm_tput, vpor,  xmm0, xmm1)
 66 | MAKE_MANY(vporymm_tput, vpor,  ymm0, ymm1)
 67 | MAKE_MANY(vporzmm_tput, vpord, zmm0, zmm1)
 68 | 
 69 | // vpermd
 70 | MAKE_MANY(vpermdzmm     , vpermd, zmm0, zmm0)
 71 | MAKE_MANY(vpermdzmm_tput, vpermd, zmm0, zmm1)
 72 | 
 73 | /**
 74 |  * 1000 copies of instr
 75 |  */
 76 | #define MAKE_MANY250(name,instr1,instr2,regd1,regs1,regd2,regs2) \
 77 | void name(bench_args args) {  \
 78 |     asm volatile (                     \
 79 |         ".rept 500\n\t"               \
 80 |         #instr1 " %" #regs1 ", %" #regs1 ", %" #regd1 "\n\t" \
 81 |         #instr2 " %" #regs2 ", %" #regs2 ", %" #regd2 "\n\t" \
 82 |         #instr2 " %" #regs2 ", %" #regs2 ", %" #regd2 "\n\t" \
 83 |         #instr2 " %" #regs2 ", %" #regs2 ", %" #regd2 "\n\t" \
 84 |         ".endr\n\t"                    \
 85 |         "vzeroupper\n\t"               \
 86 |     );                                 \
 87 | }
 88 | 
 89 | // MAKE_MANY250(vporxymm250, vpor , vpor, ymm0, ymm0, xmm0, xmm0);
 90 | // MAKE_MANY250(vporyzmm250, vpord, vpor, zmm0, zmm0, ymm0, ymm0);
 91 | 
 92 | #define MAKE_MANY3(name,instr1,instr2,instr3) \
 93 | void name(bench_args args) {  \
 94 |     asm volatile (                     \
 95 |         ".rept 250\n\t"                \
 96 |         instr1                         \
 97 |         instr2                         \
 98 |         instr3                         \
 99 |         ".endr\n\t"                    \
100 |     );                                 \
101 | }
102 | 
103 | MAKE_MANY3(vporxymm250, \
104 |     "vpor %ymm0, %ymm0, %ymm0\n\t",
105 |     "vmovd %xmm0, %eax\n\t",
106 |     "vmovd %eax, %xmm0\n\t");
107 | 
108 | MAKE_MANY3(vporyzmm250, \
109 |     "vpor %xmm0, %xmm0, %xmm0\n\t",
110 |     "vmovd %xmm0, %eax\n\t",
111 |     "vmovd %eax, %xmm0\n\t");
112 | 
113 | #define MAKE250(name,rep,instr) \
114 | void name##_##rep(bench_args args) {           \
115 |     asm volatile (                            \
116 |         "vzeroupper\n\t"                      \
117 |         ".rept 10\n\t"                        \
118 |         "vpor %ymm0, %ymm0, %ymm0\n\t"        \
119 |         ".rept " #rep "\n\t"                        \
120 |         instr                  \
121 |         ".endr\n\t"                         \
122 |         ".endr\n\t"                           \
123 |     );                                        \
124 | }                                             \
125 | 
126 | #define MAKE250ADD(rep) MAKE250(vporxymm250, rep, "addl  $0, %eax\n\t")
127 | 
128 | ALL_RATIOS_X(MAKE250ADD)
129 | 
130 | MAKE250(mulxymm250, 10, "imull  $0, %eax, %eax\n\t")
131 | 
132 | 
133 | void dummy(bench_args args) {}
134 | 


--------------------------------------------------------------------------------
/basic-impls.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef BASIC_IMPLS_H_
 2 | #define BASIC_IMPLS_H_
 3 | 
 4 | #include "common-cxx.hpp"
 5 | 
 6 | bench_fn dummy;
 7 | 
 8 | bench_fn vporxmm;
 9 | bench_fn vporymm;
10 | bench_fn vporzmm;
11 | 
12 | bench_fn vporxmm_vz;
13 | bench_fn vporymm_vz;
14 | bench_fn vporzmm_vz;
15 | 
16 | bench_fn vporxmm_vz100;
17 | bench_fn vporymm_vz100;
18 | bench_fn vporzmm_vz100;
19 | 
20 | bench_fn vporxmm_tput_vz100;
21 | bench_fn vporymm_tput_vz100;
22 | bench_fn vporzmm_tput_vz100;
23 | 
24 | bench_fn vpermdxmm_vz100;
25 | bench_fn vpermdymm_vz100;
26 | bench_fn vpermdzmm_vz100;
27 | 
28 | bench_fn vpermdxmm_tput_vz100;
29 | bench_fn vpermdymm_tput_vz100;
30 | bench_fn vpermdzmm_tput_vz100;
31 | 
32 | bench_fn vporxymm250;
33 | bench_fn vporyzmm250;
34 | 
35 | #define DEFINE250(rep) bench_fn vporxymm250_##rep;
36 | 
37 | ALL_RATIOS_X(DEFINE250)
38 | 
39 | bench_fn mulxymm250_10;
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/common-cxx.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SI_COMMON_CXX_H_
 2 | #define SI_COMMON_CXX_H_
 3 | 
 4 | /* to align with C++20 std::span */
 5 | #define span_CONFIG_INDEX_TYPE size_t
 6 | 
 7 | #include <stdexcept> // needed by span.hpp
 8 | 
 9 | #include "hedley.h"
10 | #include "inttypes.h"
11 | #include "nonstd/span.hpp"
12 | 
13 | #include <stdlib.h>
14 | 
15 | using char_span = nonstd::span<char>;
16 | 
17 | /**
18 |  * Bundles all the arguments.
19 |  */
20 | struct bench_args {};
21 | 
22 | using bench_fn = void (bench_args args);
23 | 
24 | #define ALL_RATIOS_X(f) \
25 |     f(1) \
26 |     f(2) \
27 |     f(3) \
28 |     f(4) \
29 |     f(5) \
30 |     f(6) \
31 |     f(7) \
32 |     f(8) \
33 |     f(9) \
34 |     f(10) \
35 |     f(20) \
36 |     f(30) \
37 |     f(40) \
38 |     f(50) \
39 |     f(60) \
40 |     f(70) \
41 |     f(80) \
42 |     f(90) \
43 |     f(100) \
44 |     f(120) \
45 |     f(140) \
46 |     f(160) \
47 |     f(180) \
48 |     f(200) \
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/cpuid.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * cpuid.cpp
  3 |  */
  4 | 
  5 | #include "cpuid.hpp"
  6 | 
  7 | #include <string.h>
  8 | 
  9 | using std::uint8_t;
 10 | using std::uint32_t;
 11 | 
 12 | 
 13 | std::string cpuid_result::to_string() {
 14 |     std::string s;
 15 |     s += "eax = " + std::to_string(eax) + ", ";
 16 |     s += "ebx = " + std::to_string(ebx) + ", ";
 17 |     s += "ecx = " + std::to_string(ecx) + ", ";
 18 |     s += "edx = " + std::to_string(edx);
 19 |     return s;
 20 | }
 21 | 
 22 | uint32_t cpuid_highest_leaf_inner() {
 23 |     return cpuid(0).eax;
 24 | }
 25 | 
 26 | uint32_t cpuid_highest_leaf() {
 27 |     static uint32_t cached = cpuid_highest_leaf_inner();
 28 |     return cached;
 29 | }
 30 | 
 31 | cpuid_result cpuid(int leaf, int subleaf) {
 32 |     cpuid_result ret = {};
 33 |     asm ("cpuid"
 34 |             :
 35 |             "=a" (ret.eax),
 36 |             "=b" (ret.ebx),
 37 |             "=c" (ret.ecx),
 38 |             "=d" (ret.edx)
 39 |             :
 40 |             "a" (leaf),
 41 |             "c" (subleaf)
 42 |     );
 43 |     return ret;
 44 | }
 45 | 
 46 | cpuid_result cpuid(int leaf) {
 47 |     return cpuid(leaf, 0);
 48 | }
 49 | 
 50 | family_model gfm_inner() {
 51 |     auto cpuid1 = cpuid(1);
 52 |     family_model ret;
 53 |     ret.family   = (cpuid1.eax >> 8) & 0xF;
 54 |     ret.model    = (cpuid1.eax >> 4) & 0xF;
 55 |     ret.stepping = (cpuid1.eax     ) & 0xF;
 56 |     if (ret.family == 15) {
 57 |         ret.family += (cpuid1.eax >> 20) & 0xFF;  // extended family
 58 |     }
 59 |     if (ret.family == 15 || ret.family == 6) {
 60 |         ret.model += ((cpuid1.eax >> 16) & 0xF) << 4; // extended model
 61 |     }
 62 |     return ret;
 63 | }
 64 | 
 65 | family_model get_family_model() {
 66 |     static family_model cached_family_model = gfm_inner();
 67 |     return cached_family_model;
 68 | }
 69 | 
 70 | std::string get_brand_string() {
 71 |     auto check = cpuid(0x80000000);
 72 |     if (check.eax < 0x80000004) {
 73 |         return std::string("unkown (eax =") + std::to_string(check.eax) +")";
 74 |     }
 75 |     std::string ret;
 76 |     for (uint32_t eax : {0x80000002, 0x80000003, 0x80000004}) {
 77 |         char buf[17];
 78 |         auto fourchars = cpuid(eax);
 79 |         memcpy(buf +  0, &fourchars.eax, 4);
 80 |         memcpy(buf +  4, &fourchars.ebx, 4);
 81 |         memcpy(buf +  8, &fourchars.ecx, 4);
 82 |         memcpy(buf + 12, &fourchars.edx, 4);
 83 |         buf[16] = '\0';
 84 |         ret += buf;
 85 |     }
 86 |     return ret;
 87 | }
 88 | 
 89 | /* get bits [start:end] inclusive of the given value */
 90 | uint32_t get_bits(uint32_t value, int start, int end) {
 91 |     value >>= start;
 92 |     uint32_t mask = ((uint64_t)-1) << (end - start + 1);
 93 |     return value & ~mask;
 94 | }
 95 | 
 96 | /**
 97 |  * Get the shift amount for unique physical core IDs
 98 |  */
 99 | int get_smt_shift()
100 | {
101 |     if (cpuid_highest_leaf() < 0xb) {
102 |         return -1;
103 |     }
104 |     uint32_t smtShift = -1u;
105 |     for (uint32_t subleaf = 0; ; subleaf++) {
106 |         cpuid_result leafb = cpuid(0xb, subleaf);
107 |         uint32_t type  = get_bits(leafb.ecx, 8 ,15);
108 |         if (!get_bits(leafb.ebx,0,15) || type == 0) {
109 |             // done
110 |             break;
111 |         }
112 |         if (type == 1) {
113 |             // here's the value we are after: make sure we don't have more than one entry for
114 |             // this type though!
115 |             if (smtShift != -1u) {
116 |                 fprintf(stderr, "Warning: more than one level of type 1 in the x2APIC hierarchy");
117 |             }
118 |             smtShift = get_bits(leafb.eax, 0, 4);
119 |         }
120 |     }
121 |     return smtShift;
122 | }
123 | 
124 | 


--------------------------------------------------------------------------------
/cpuid.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cpuid.hpp
 3 |  */
 4 | 
 5 | #ifndef CPUID_HPP_
 6 | #define CPUID_HPP_
 7 | 
 8 | #include <cinttypes>
 9 | #include <string>
10 | 
11 | struct cpuid_result {
12 |     std::uint32_t eax, ebx, ecx, edx;
13 |     std::string to_string();
14 | };
15 | 
16 | struct family_model {
17 |     uint8_t family;
18 |     uint8_t model;
19 |     uint8_t stepping;
20 |     std::string to_string() {
21 |         std::string s;
22 |         s += "family = " + std::to_string(family) + ", ";
23 |         s += "model = " + std::to_string(model) + ", ";
24 |         s += "stepping = " + std::to_string(stepping);
25 |         return s;
26 |     }
27 | };
28 | 
29 | 
30 | /** the highest supported leaf value */
31 | uint32_t cpuid_highest_leaf();
32 | 
33 | /* return the CPUID result for querying the given leaf (EAX) and no subleaf (ECX=0) */
34 | cpuid_result cpuid(int leaf);
35 | 
36 | /* return the CPUID result for querying the given leaf (EAX) and subleaf (ECX) */
37 | cpuid_result cpuid(int leaf, int subleaf);
38 | 
39 | family_model get_family_model();
40 | 
41 | std::string get_brand_string();
42 | 
43 | int get_smt_shift();
44 | 
45 | /* get bits [start:end] inclusive of the given value */
46 | uint32_t get_bits(uint32_t value, int start, int end);
47 | 
48 | #endif /* CPUID_HPP_ */
49 | 


--------------------------------------------------------------------------------
/cycle-timer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * cycle-timer.c
  3 |  *
  4 |  * Implementation for cycle-timer.h
  5 |  */
  6 | 
  7 | #include "clock/clock.h"
  8 | 
  9 | #include "cycle-timer.h"
 10 | #include "hedley.h"
 11 | 
 12 | #include <stdlib.h>
 13 | #include <stdio.h>
 14 | 
 15 | 
 16 | const size_t ITERS = 10000;
 17 | const size_t TRIES = 11;
 18 | const size_t WARMUP = 1000;
 19 | 
 20 | volatile size_t sink;
 21 | /**
 22 |  * Calibration loop that relies on store throughput being exactly 1 per cycle
 23 |  * on all modern x86 chips, and the loop overhead running totally in parallel.
 24 |  */
 25 | HEDLEY_NEVER_INLINE
 26 | __attribute__((aligned(32)))
 27 | void store_calibration(size_t iters) {
 28 |     do {
 29 |         sink = iters;
 30 |     } while (--iters > 0);
 31 | }
 32 | 
 33 | int intcompare(const void *l_, const void *r_) {
 34 |     int64_t l = *(const uint64_t *)l_;
 35 |     int64_t r = *(const uint64_t *)r_;
 36 |     return (l > r) - (l < r);
 37 | }
 38 | 
 39 | /*
 40 |  * Calculate the frequency of the CPU based on timing a tight loop that we expect to
 41 |  * take one iteration per cycle.
 42 |  *
 43 |  * ITERS is the base number of iterations to use: the calibration routine is actually
 44 |  * run twice, once with ITERS iterations and once with 2*ITERS, and a delta is used to
 45 |  * remove measurement overhead.
 46 |  */
 47 | HEDLEY_NEVER_INLINE
 48 | static double get_ghz(bool print) {
 49 | 
 50 |     const char *force = getenv("CYCLE_TIMER_FORCE_MHZ");
 51 |     if (force) {
 52 |         int mhz = atoi(force);
 53 |         if (mhz) {
 54 |             double ghz = mhz / 1000.;
 55 |             if (print) fprintf(stderr, "Forced CPU speed (CYCLE_TIMER_FORCE_MHZ): %5.2f GHz\n", ghz);
 56 |             return ghz;
 57 |         } else {
 58 |             if (print) fprintf(stderr, "Bad value for CYCLE_TIMER_FORCE_MHZ: '%s' (falling back to cal loop)\n", force);
 59 |         }
 60 |     }
 61 | 
 62 |     int64_t results[TRIES];
 63 | 
 64 |     for (size_t w = 0; w < WARMUP + 1; w++) {
 65 |         for (size_t r = 0; r < TRIES; r++) {
 66 |             cl_timepoint t0 = cl_now();
 67 |             store_calibration(ITERS);
 68 |             cl_timepoint t1 = cl_now();
 69 |             store_calibration(ITERS * 2);
 70 |             cl_timepoint t2 = cl_now();
 71 |             results[r] = cl_delta(t1, t2).nanos - cl_delta(t0, t1).nanos;
 72 |         }
 73 |     }
 74 | 
 75 |     // return the median value
 76 |     qsort(results, TRIES, sizeof(results[0]), intcompare);
 77 |     double ghz = ((double)ITERS / results[TRIES/2]);
 78 |     if (print) fprintf(stderr, "Estimated CPU speed: %5.2f GHz\n", ghz);
 79 |     return ghz;
 80 | }
 81 | 
 82 | static bool is_init = false;
 83 | double ghz;
 84 | 
 85 | void cl_init(bool print) {
 86 |     if (HEDLEY_UNLIKELY(!is_init)) {
 87 |         ghz = get_ghz(print);
 88 |         is_init = true;
 89 |     }
 90 | };
 91 | 
 92 | cl_timepoint cl_now() {
 93 |     struct PsnipClockTimespec spec;
 94 |     if (psnip_clock_monotonic_get_time(&spec)) {
 95 |         return (cl_timepoint){0};
 96 |     } else {
 97 |         return (cl_timepoint){spec.seconds * 1000000000ll + spec.nanoseconds};
 98 |     }
 99 | }
100 | 
101 | /*
102 |  * Take an interval value and convert it to cycles based on the
103 |  * detected frequency of this host.
104 |  */
105 | double cl_to_cycles(cl_interval interval) {
106 |     cl_init(false);
107 |     return interval.nanos * ghz;
108 | }
109 | 
110 | /*
111 |  * Take an interval value and "convert" it to nanos.
112 |  */
113 | double cl_to_nanos(cl_interval interval) {
114 |     return interval.nanos;
115 | }
116 | 


--------------------------------------------------------------------------------
/cycle-timer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * cycle-timer.h
 3 |  *
 4 |  * A timer that returns results in CPU cycles in addition to nanoseconds.
 5 |  * It measures cycles indirectly by measuring the wall-time, and then converting
 6 |  * that to a cycle count based on a calibration loop performed once at startup.
 7 |  */
 8 | 
 9 | #ifndef CYCLE_TIMER_H_
10 | #define CYCLE_TIMER_H_
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | #include <inttypes.h>
17 | #include <stdbool.h>
18 | 
19 | /**
20 |  * A point in time, or an interval when subtracted. You should probably
21 |  * treat this as an opaque struct, in case I change the implementation
22 |  * someday.
23 |  */
24 | struct cl_timepoint_ {
25 |     int64_t nanos;
26 | };
27 | typedef struct cl_timepoint_ cl_timepoint;
28 | 
29 | /**
30 |  * An interval created by subtracting two points in time, measured
31 |  * in nanoseconds.
32 |  */
33 | struct cl_interval_ {
34 |     int64_t nanos;
35 | };
36 | typedef struct cl_interval_ cl_interval;
37 | 
38 | /* return the current moment in time as a cycletimer_result */
39 | cl_timepoint cl_now();
40 | 
41 | /*
42 |  * Return the interval between timepoints first and second.
43 |  * This value is positive iff second occus after first.
44 |  */
45 | static inline cl_interval cl_delta(cl_timepoint first, cl_timepoint second) {
46 |     return (cl_interval){second.nanos - first.nanos};
47 | }
48 | 
49 | /*
50 |  * Take an interval value and convert it to cycles based on the
51 |  * detected frequency of this host.
52 |  */
53 | double cl_to_cycles(cl_interval interval);
54 | 
55 | double cl_to_nanos(cl_interval interval);
56 | 
57 | /*
58 |  * Initialize the cycletimer infrastructure. Mostly this just means calculating
59 |  * the cycle to nanoseconds value (i.e., the CPU frequency). You never *need* to
60 |  * use this function, if you haven't call it, it will happens automatically when
61 |  * init is necessary (usually lazily - when accessing the cl_to_cycles),
62 |  * but may be lengthy, so this method is offfered so that the user can trigger
63 |  * it at a time of their choosing (and allowing the user to elect whether to
64 |  * print out diagnostic information about the calibration).
65 |  *
66 |  * If you pass true for print, dignostic information like the detected CPU
67 |  * frequency is printed to stderr.
68 |  */
69 | void cl_init(bool print);
70 | 
71 | #ifdef __cplusplus
72 | }
73 | #endif
74 | 
75 | #endif /* CYCLE_TIMER_HPP_ */
76 | 


--------------------------------------------------------------------------------
/dbg.h:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 | 
  3 |                                 dbg(...) macro
  4 | 
  5 | License (MIT):
  6 | 
  7 |   Copyright (c) 2019 David Peter <mail@david-peter.de>
  8 | 
  9 |   Permission is hereby granted, free of charge, to any person obtaining a copy
 10 |   of this software and associated documentation files (the "Software"), to
 11 |   deal in the Software without restriction, including without limitation the
 12 |   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 13 |   sell copies of the Software, and to permit persons to whom the Software is
 14 |   furnished to do so, subject to the following conditions:
 15 | 
 16 |   The above copyright notice and this permission notice shall be included in
 17 |   all copies or substantial portions of the Software.
 18 | 
 19 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 20 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 21 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 22 |   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 23 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 24 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 25 |   SOFTWARE.
 26 | 
 27 | *****************************************************************************/
 28 | 
 29 | #ifndef DBG_MACRO_DBG_H
 30 | #define DBG_MACRO_DBG_H
 31 | 
 32 | #pragma message("WARNING: the 'dbg.h' header is included in your code base")
 33 | 
 34 | #include <ios>
 35 | #include <iostream>
 36 | #include <sstream>
 37 | #include <string>
 38 | #include <type_traits>
 39 | #include <vector>
 40 | 
 41 | #include <unistd.h>
 42 | 
 43 | #if __cplusplus >= 201703L
 44 | #include <optional>
 45 | #endif
 46 | 
 47 | namespace dbg_macro {
 48 | 
 49 | namespace pretty_function {
 50 | 
 51 | // Compiler-agnostic version of __PRETTY_FUNCTION__ and constants to
 52 | // extract the template argument in `type_name_impl`
 53 | 
 54 | #if defined(__clang__)
 55 | #define DBG_MACRO_PRETTY_FUNCTION __PRETTY_FUNCTION__
 56 | static constexpr size_t PREFIX_LENGTH =
 57 |     sizeof("const char *dbg_macro::type_name_impl() [T = ") - 1;
 58 | static constexpr size_t SUFFIX_LENGTH = sizeof("]") - 1;
 59 | #elif defined(__GNUC__) && !defined(__clang__)
 60 | #define DBG_MACRO_PRETTY_FUNCTION __PRETTY_FUNCTION__
 61 | static constexpr size_t PREFIX_LENGTH =
 62 |     sizeof("const char* dbg_macro::type_name_impl() [with T = ") - 1;
 63 | static constexpr size_t SUFFIX_LENGTH = sizeof("]") - 1;
 64 | #elif defined(_MSC_VER)
 65 | #define DBG_MACRO_PRETTY_FUNCTION __FUNCSIG__
 66 | static constexpr size_t PREFIX_LENGTH =
 67 |     sizeof("const char *__cdecl dbg_macro::type_name_impl<") - 1;
 68 | static constexpr size_t SUFFIX_LENGTH = sizeof(">(void)") - 1;
 69 | #else
 70 | #error "This compiler is currently not supported by dbg_macro."
 71 | #endif
 72 | 
 73 | }  // namespace pretty_function
 74 | 
 75 | // Implementation of 'type_name<T>()'
 76 | 
 77 | template <typename T>
 78 | const char* type_name_impl() {
 79 |   return DBG_MACRO_PRETTY_FUNCTION;
 80 | }
 81 | 
 82 | template <typename T>
 83 | struct type_tag {};
 84 | 
 85 | template <int&... ExplicitArgumentBarrier, typename T>
 86 | std::string get_type_name(type_tag<T>) {
 87 |   namespace pf = pretty_function;
 88 | 
 89 |   std::string type = type_name_impl<T>();
 90 |   return type.substr(pf::PREFIX_LENGTH,
 91 |                      type.size() - pf::PREFIX_LENGTH - pf::SUFFIX_LENGTH);
 92 | }
 93 | 
 94 | template <typename T>
 95 | std::string type_name() {
 96 |   if (std::is_volatile<T>::value) {
 97 |     if (std::is_pointer<T>::value) {
 98 |       return type_name<typename std::remove_volatile<T>::type>() + " volatile";
 99 |     } else {
100 |       return "volatile " + type_name<typename std::remove_volatile<T>::type>();
101 |     }
102 |   }
103 |   if (std::is_const<T>::value) {
104 |     if (std::is_pointer<T>::value) {
105 |       return type_name<typename std::remove_const<T>::type>() + " const";
106 |     } else {
107 |       return "const " + type_name<typename std::remove_const<T>::type>();
108 |     }
109 |   }
110 |   if (std::is_pointer<T>::value) {
111 |     return type_name<typename std::remove_pointer<T>::type>() + "*";
112 |   }
113 |   if (std::is_lvalue_reference<T>::value) {
114 |     return type_name<typename std::remove_reference<T>::type>() + "&";
115 |   }
116 |   if (std::is_rvalue_reference<T>::value) {
117 |     return type_name<typename std::remove_reference<T>::type>() + "&&";
118 |   }
119 |   return get_type_name(type_tag<T>{});
120 | }
121 | 
122 | inline std::string get_type_name(type_tag<short>) {
123 |   return "short";
124 | }
125 | 
126 | inline std::string get_type_name(type_tag<unsigned short>) {
127 |   return "unsigned short";
128 | }
129 | 
130 | inline std::string get_type_name(type_tag<long>) {
131 |   return "long";
132 | }
133 | 
134 | inline std::string get_type_name(type_tag<unsigned long>) {
135 |   return "unsigned long";
136 | }
137 | 
138 | inline std::string get_type_name(type_tag<std::string>) {
139 |   return "std::string";
140 | }
141 | 
142 | template <typename T>
143 | std::string get_type_name(type_tag<std::vector<T, std::allocator<T>>>) {
144 |   return "std::vector<" + type_name<T>() + ">";
145 | }
146 | 
147 | // Implementation of 'is_detected' to specialize for container-like types
148 | 
149 | namespace detail_detector {
150 | 
151 | struct nonesuch {
152 |   nonesuch() = delete;
153 |   ~nonesuch() = delete;
154 |   nonesuch(nonesuch const&) = delete;
155 |   void operator=(nonesuch const&) = delete;
156 | };
157 | 
158 | template <typename...>
159 | using void_t = void;
160 | 
161 | template <class Default,
162 |           class AlwaysVoid,
163 |           template <class...>
164 |           class Op,
165 |           class... Args>
166 | struct detector {
167 |   using value_t = std::false_type;
168 |   using type = Default;
169 | };
170 | 
171 | template <class Default, template <class...> class Op, class... Args>
172 | struct detector<Default, void_t<Op<Args...>>, Op, Args...> {
173 |   using value_t = std::true_type;
174 |   using type = Op<Args...>;
175 | };
176 | 
177 | }  // namespace detail_detector
178 | 
179 | template <template <class...> class Op, class... Args>
180 | using is_detected = typename detail_detector::
181 |     detector<detail_detector::nonesuch, void, Op, Args...>::value_t;
182 | 
183 | template <typename T>
184 | using detect_begin_t = decltype(begin(std::declval<T>()));
185 | 
186 | template <typename T>
187 | using detect_end_t = decltype(end(std::declval<T>()));
188 | 
189 | template <typename T>
190 | using detect_size_t = decltype(std::declval<T>().size());
191 | 
192 | template <typename T>
193 | struct has_begin_end_size {
194 |   static constexpr bool value = is_detected<detect_begin_t, T>::value &&
195 |                                 is_detected<detect_end_t, T>::value &&
196 |                                 is_detected<detect_size_t, T>::value;
197 | };
198 | 
199 | // Specializations of "pretty_print"
200 | 
201 | template <typename T>
202 | typename std::enable_if<!has_begin_end_size<T>::value &&
203 |                             !std::is_enum<T>::value,
204 |                         bool>::type
205 | pretty_print(std::ostream& stream, const T& value) {
206 |   stream << value;
207 |   return true;
208 | }
209 | 
210 | template <>
211 | inline bool pretty_print(std::ostream& stream, const bool& value) {
212 |   stream << std::boolalpha << value;
213 |   return true;
214 | }
215 | 
216 | template <>
217 | inline bool pretty_print(std::ostream& stream, const char& value) {
218 |   stream << "'" << value << "'";
219 |   return true;
220 | }
221 | 
222 | template <typename P>
223 | bool pretty_print(std::ostream& stream, P* const& value) {
224 |   if (value == nullptr) {
225 |     stream << "nullptr";
226 |   } else {
227 |     stream << value;
228 |   }
229 |   return true;
230 | }
231 | 
232 | template <size_t N>
233 | bool pretty_print(std::ostream& stream, const char (&value)[N]) {
234 |   stream << value;
235 |   return false;
236 | }
237 | 
238 | template <>
239 | inline bool pretty_print(std::ostream& stream, const char* const& value) {
240 |   stream << '"' << value << '"';
241 |   return true;
242 | }
243 | 
244 | #if __cplusplus >= 201703L
245 | 
246 | template <typename T>
247 | bool pretty_print(std::ostream& stream, const std::optional<T>& value) {
248 |   if (value) {
249 |     stream << '{' << *value << '}';
250 |   } else {
251 |     stream << "nullopt";
252 |   }
253 | 
254 |   return true;
255 | }
256 | 
257 | #endif  // __cplusplus >= 201703L
258 | 
259 | template <typename Container>
260 | typename std::enable_if<has_begin_end_size<Container>::value, bool>::type
261 | pretty_print(std::ostream& stream, Container const& value) {
262 |   stream << "{";
263 |   const size_t size = value.size();
264 |   const size_t n = std::min(size_t{16}, size);
265 |   size_t i = 0;
266 |   for (auto it = begin(value); it != end(value) && i < n; ++it, ++i) {
267 |     pretty_print(stream, *it);
268 |     if (i != n - 1) {
269 |       stream << ", ";
270 |     }
271 |   }
272 | 
273 |   if (size > n) {
274 |     stream << ", ...";
275 |     stream << " size:" << size;
276 |   }
277 | 
278 |   stream << "}";
279 |   return true;
280 | }
281 | 
282 | template <typename Enum>
283 | typename std::enable_if<std::is_enum<Enum>::value, bool>::type pretty_print(
284 |     std::ostream& stream,
285 |     Enum const& value) {
286 |   using UnderlyingType = typename std::underlying_type<Enum>::type;
287 |   stream << static_cast<UnderlyingType>(value);
288 | 
289 |   return true;
290 | }
291 | 
292 | template <>
293 | inline bool pretty_print(std::ostream& stream, const std::string& value) {
294 |   stream << '"' << value << '"';
295 |   return true;
296 | }
297 | 
298 | class DebugOutput {
299 |  public:
300 |   DebugOutput(const char* filepath,
301 |               int line,
302 |               const char* function_name,
303 |               const char* expression)
304 |       : m_stderr_is_a_tty(isatty(fileno(stderr))),
305 |         m_filepath(filepath),
306 |         m_line(line),
307 |         m_function_name(function_name),
308 |         m_expression(expression) {
309 |     const int path_length = m_filepath.length();
310 |     if (path_length > MAX_PATH_LENGTH) {
311 |       m_filepath = ".." + m_filepath.substr(path_length - MAX_PATH_LENGTH,
312 |                                             MAX_PATH_LENGTH);
313 |     }
314 |   }
315 | 
316 |   template <typename T>
317 |   T&& print(const std::string& type, T&& value) const {
318 |     const T& ref = value;
319 |     std::stringstream stream_value;
320 |     const bool print_expr_and_type = pretty_print(stream_value, ref);
321 | 
322 |     std::cerr << ansi(ANSI_DEBUG) << "[" << m_filepath << ":" << m_line << " ("
323 |               << m_function_name << ")] " << ansi(ANSI_RESET);
324 |     if (print_expr_and_type) {
325 |       std::cerr << ansi(ANSI_EXPRESSION) << m_expression << ansi(ANSI_RESET)
326 |                 << " = ";
327 |     }
328 |     std::cerr << ansi(ANSI_VALUE) << stream_value.str() << ansi(ANSI_RESET);
329 |     if (print_expr_and_type) {
330 |       std::cerr << " (" << ansi(ANSI_TYPE) << type << ansi(ANSI_RESET) << ")";
331 |     }
332 |     std::cerr << std::endl;
333 | 
334 |     return std::forward<T>(value);
335 |   }
336 | 
337 |  private:
338 |   const char* ansi(const char* code) const {
339 |     if (m_stderr_is_a_tty) {
340 |       return code;
341 |     } else {
342 |       return ANSI_EMPTY;
343 |     }
344 |   }
345 | 
346 |   const bool m_stderr_is_a_tty;
347 | 
348 |   std::string m_filepath;
349 |   const int m_line;
350 |   const std::string m_function_name;
351 |   const std::string m_expression;
352 | 
353 |   static constexpr int MAX_PATH_LENGTH = 20;
354 | 
355 |   static constexpr const char* const ANSI_EMPTY = "";
356 |   static constexpr const char* const ANSI_DEBUG = "\x1b[02m";
357 |   static constexpr const char* const ANSI_EXPRESSION = "\x1b[36m";
358 |   static constexpr const char* const ANSI_VALUE = "\x1b[01m";
359 |   static constexpr const char* const ANSI_TYPE = "\x1b[32m";
360 |   static constexpr const char* const ANSI_RESET = "\x1b[0m";
361 | };
362 | 
363 | }  // namespace dbg_macro
364 | 
365 | #ifndef DBG_MACRO_DISABLE
366 | // We use a variadic macro to support commas inside expressions (e.g.
367 | // initializer lists):
368 | #define dbg(...)                                                     \
369 |   dbg_macro::DebugOutput(__FILE__, __LINE__, __func__, #__VA_ARGS__) \
370 |       .print(dbg_macro::type_name<decltype(__VA_ARGS__)>(), (__VA_ARGS__))
371 | #else
372 | #define dbg(...) __VA_ARGS__
373 | #endif  // DBG_MACRO_DISABLE
374 | 
375 | #endif  // DBG_MACRO_DBG_H
376 | 


--------------------------------------------------------------------------------
/env.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Miscellanous functions for dealing with enviroment varables.
 3 |  */
 4 | 
 5 | #ifndef ENV_H_
 6 | #define ENV_H_
 7 | 
 8 | #include <exception>
 9 | #include <string>
10 | #include <sstream>
11 | #include <ios>
12 | 
13 | #include <string.h>
14 | 
15 | namespace env {
16 | 
17 | namespace detail {
18 | template <typename T>
19 | T parse_from_string(const std::string& str) {
20 |    std::istringstream ss(str);
21 |    T result;
22 |    ss >> result;
23 |    return result;
24 | }
25 | }
26 | 
27 | struct envvar_not_found : public std::runtime_error {
28 |     envvar_not_found(std::string msg) : runtime_error(std::move(msg)) {}
29 | };
30 | 
31 | /**
32 |  * Gets the value of the given environment variable, throws an
33 |  * exception if not found. Usually you'll want the version that
34 |  * takes a default value.
35 |  */
36 | template <typename T>
37 | T getenv_generic(const std::string &name) {
38 |     const char *val = getenv(name.c_str());
39 |     if (val) {
40 |         return detail::parse_from_string<T>(val);
41 |     } else {
42 |         throw envvar_not_found{"env var " + name + " not found "};
43 |     }
44 | }
45 | 
46 | /**
47 |  * Gets the converted value of the environment variable with the
48 |  * given name, or returns the given default_value value if not found.
49 |  */
50 | template <typename T>
51 | T getenv_generic(const std::string &name, const T& default_value) {
52 |     try {
53 |         return getenv_generic<T>(name);
54 |     } catch (const envvar_not_found &) {
55 |         return default_value;
56 |     }
57 | }
58 | 
59 | int getenv_int(const char *var, int def) {
60 |     const char *val = getenv(var);
61 |     return val ? atoi(val) : def;
62 | }
63 | 
64 | long long getenv_longlong(const char *var, long long def) {
65 |     const char *val = getenv(var);
66 |     return val ? atoll(val) : def;
67 | }
68 | 
69 | bool getenv_bool(const char *var) {
70 |     return getenv_generic<bool>(var, false);
71 | }
72 | 
73 | }
74 | 
75 | #endif
76 | 


--------------------------------------------------------------------------------
/exact-int/exact-int.h:
--------------------------------------------------------------------------------
  1 | /* Exact-width integer types
  2 |  * Portable Snippets - https://gitub.com/nemequ/portable-snippets
  3 |  * Created by Evan Nemerson <evan@nemerson.com>
  4 |  *
  5 |  *   To the extent possible under law, the authors have waived all
  6 |  *   copyright and related or neighboring rights to this code.  For
  7 |  *   details, see the Creative Commons Zero 1.0 Universal license at
  8 |  *   https://creativecommons.org/publicdomain/zero/1.0/
  9 |  *
 10 |  * This header tries to define psnip_(u)int(8|16|32|64)_t to
 11 |  * appropriate types given your system.  For most systems this means
 12 |  * including <stdint.h> and adding a few preprocessor definitions.
 13 |  *
 14 |  * If you prefer, you can define any necessary types yourself.
 15 |  * Snippets in this repository which rely on these types will not
 16 |  * attempt to include this header if you have already defined the
 17 |  * types it uses.
 18 |  */
 19 | 
 20 | #if !defined(PSNIP_EXACT_INT_H)
 21 | #  define PSNIP_EXACT_INT_H
 22 | #  if !defined(PSNIP_EXACT_INT_HAVE_STDINT)
 23 | #    if defined(_STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
 24 | #      define PSNIP_EXACT_INT_HAVE_STDINT
 25 | #    elif defined(__has_include)
 26 | #      if __has_include(<stdint.h>)
 27 | #        define PSNIP_EXACT_INT_HAVE_STDINT
 28 | #      endif
 29 | #    elif \
 30 |       defined(HAVE_STDINT_H) || \
 31 |       defined(_STDINT_H_INCLUDED) || \
 32 |       defined(_STDINT_H) || \
 33 |       defined(_STDINT_H_)
 34 | #      define PSNIP_EXACT_INT_HAVE_STDINT
 35 | #    elif \
 36 |       (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) || \
 37 |       (defined(_MSC_VER) && (_MSC_VER >= 1600)) || \
 38 |       (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x570)) || \
 39 |       (defined(__WATCOMC__) && (__WATCOMC__ >= 1250))
 40 | #      define PSNIP_EXACT_INT_HAVE_STDINT
 41 | #    endif
 42 | #  endif
 43 | 
 44 | #  if \
 45 |   defined(__INT8_TYPE__) && defined(__INT16_TYPE__) && defined(__INT32_TYPE__) && defined(__INT64_TYPE__) && \
 46 |   defined(__UINT8_TYPE__) && defined(__UINT16_TYPE__) && defined(__UINT32_TYPE__) && defined(__UINT64_TYPE__)
 47 | #    define psnip_int8_t   __INT8_TYPE__
 48 | #    define psnip_int16_t  __INT16_TYPE__
 49 | #    define psnip_int32_t  __INT32_TYPE__
 50 | #    define psnip_int64_t  __INT64_TYPE__
 51 | #    define psnip_uint8_t  __UINT8_TYPE__
 52 | #    define psnip_uint16_t __UINT16_TYPE__
 53 | #    define psnip_uint32_t __UINT32_TYPE__
 54 | #    define psnip_uint64_t __UINT64_TYPE__
 55 | #  elif defined(PSNIP_EXACT_INT_HAVE_STDINT)
 56 | #    include <stdint.h>
 57 | #    if !defined(psnip_int8_t)
 58 | #      define psnip_int8_t int8_t
 59 | #    endif
 60 | #    if !defined(psnip_uint8_t)
 61 | #      define psnip_uint8_t uint8_t
 62 | #    endif
 63 | #    if !defined(psnip_int16_t)
 64 | #      define psnip_int16_t int16_t
 65 | #    endif
 66 | #    if !defined(psnip_uint16_t)
 67 | #      define psnip_uint16_t uint16_t
 68 | #    endif
 69 | #    if !defined(psnip_int32_t)
 70 | #      define psnip_int32_t int32_t
 71 | #    endif
 72 | #    if !defined(psnip_uint32_t)
 73 | #      define psnip_uint32_t uint32_t
 74 | #    endif
 75 | #    if !defined(psnip_int64_t)
 76 | #      define psnip_int64_t int64_t
 77 | #    endif
 78 | #    if !defined(psnip_uint64_t)
 79 | #      define psnip_uint64_t uint64_t
 80 | #    endif
 81 | #  elif defined(_MSC_VER)
 82 | #    if !defined(psnip_int8_t)
 83 | #      define psnip_int8_t __int8
 84 | #    endif
 85 | #    if !defined(psnip_uint8_t)
 86 | #      define psnip_uint8_t unsigned __int8
 87 | #    endif
 88 | #    if !defined(psnip_int16_t)
 89 | #      define psnip_int16_t __int16
 90 | #    endif
 91 | #    if !defined(psnip_uint16_t)
 92 | #      define psnip_uint16_t unsigned __int16
 93 | #    endif
 94 | #    if !defined(psnip_int32_t)
 95 | #      define psnip_int32_t __int32
 96 | #    endif
 97 | #    if !defined(psnip_uint32_t)
 98 | #      define psnip_uint32_t unsigned __int32
 99 | #    endif
100 | #    if !defined(psnip_int64_t)
101 | #      define psnip_int64_t __int64
102 | #    endif
103 | #    if !defined(psnip_uint64_t)
104 | #      define psnip_uint64_t unsigned __int64
105 | #    endif
106 | #  else
107 | #    include <limits.h>
108 | #    if !defined(psnip_int8_t)
109 | #      if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-127-1)) && (CHAR_MAX == 127)
110 | #        define psnip_int8_t char
111 | #      elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-127-1)) && (SHRT_MAX == 127)
112 | #        define psnip_int8_t short
113 | #      elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-127-1)) && (INT_MAX == 127)
114 | #        define psnip_int8_t int
115 | #      elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-127-1)) && (LONG_MAX == 127)
116 | #        define psnip_int8_t long
117 | #      elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-127-1)) && (LLONG_MAX == 127)
118 | #        define psnip_int8_t long long
119 | #      else
120 | #        error Unable to locate 8-bit signed integer type.
121 | #      endif
122 | #    endif
123 | #    if !defined(psnip_uint8_t)
124 | #      if defined(UCHAR_MAX) && (UCHAR_MAX == 255)
125 | #        define psnip_uint8_t unsigned char
126 | #      elif defined(USHRT_MAX) && (USHRT_MAX == 255)
127 | #        define psnip_uint8_t unsigned short
128 | #      elif defined(UINT_MAX) && (UINT_MAX == 255)
129 | #        define psnip_uint8_t unsigned int
130 | #      elif defined(ULONG_MAX) && (ULONG_MAX == 255)
131 | #        define psnip_uint8_t unsigned long
132 | #      elif defined(ULLONG_MAX) && (ULLONG_MAX == 255)
133 | #        define psnip_uint8_t unsigned long long
134 | #      else
135 | #        error Unable to locate 8-bit unsigned integer type.
136 | #      endif
137 | #    endif
138 | #    if !defined(psnip_int16_t)
139 | #      if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-32767-1)) && (CHAR_MAX == 32767)
140 | #        define psnip_int16_t char
141 | #      elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-32767-1)) && (SHRT_MAX == 32767)
142 | #        define psnip_int16_t short
143 | #      elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-32767-1)) && (INT_MAX == 32767)
144 | #        define psnip_int16_t int
145 | #      elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-32767-1)) && (LONG_MAX == 32767)
146 | #        define psnip_int16_t long
147 | #      elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-32767-1)) && (LLONG_MAX == 32767)
148 | #        define psnip_int16_t long long
149 | #      else
150 | #        error Unable to locate 16-bit signed integer type.
151 | #      endif
152 | #    endif
153 | #    if !defined(psnip_uint16_t)
154 | #      if defined(UCHAR_MAX) && (UCHAR_MAX == 65535)
155 | #        define psnip_uint16_t unsigned char
156 | #      elif defined(USHRT_MAX) && (USHRT_MAX == 65535)
157 | #        define psnip_uint16_t unsigned short
158 | #      elif defined(UINT_MAX) && (UINT_MAX == 65535)
159 | #        define psnip_uint16_t unsigned int
160 | #      elif defined(ULONG_MAX) && (ULONG_MAX == 65535)
161 | #        define psnip_uint16_t unsigned long
162 | #      elif defined(ULLONG_MAX) && (ULLONG_MAX == 65535)
163 | #        define psnip_uint16_t unsigned long long
164 | #      else
165 | #        error Unable to locate 16-bit unsigned integer type.
166 | #      endif
167 | #    endif
168 | #    if !defined(psnip_int32_t)
169 | #      if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-2147483647-1)) && (CHAR_MAX == 2147483647)
170 | #        define psnip_int32_t char
171 | #      elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-2147483647-1)) && (SHRT_MAX == 2147483647)
172 | #        define psnip_int32_t short
173 | #      elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-2147483647-1)) && (INT_MAX == 2147483647)
174 | #        define psnip_int32_t int
175 | #      elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-2147483647-1)) && (LONG_MAX == 2147483647)
176 | #        define psnip_int32_t long
177 | #      elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-2147483647-1)) && (LLONG_MAX == 2147483647)
178 | #        define psnip_int32_t long long
179 | #      else
180 | #        error Unable to locate 32-bit signed integer type.
181 | #      endif
182 | #    endif
183 | #    if !defined(psnip_uint32_t)
184 | #      if defined(UCHAR_MAX) && (UCHAR_MAX == 4294967295)
185 | #        define psnip_uint32_t unsigned char
186 | #      elif defined(USHRT_MAX) && (USHRT_MAX == 4294967295)
187 | #        define psnip_uint32_t unsigned short
188 | #      elif defined(UINT_MAX) && (UINT_MAX == 4294967295)
189 | #        define psnip_uint32_t unsigned int
190 | #      elif defined(ULONG_MAX) && (ULONG_MAX == 4294967295)
191 | #        define psnip_uint32_t unsigned long
192 | #      elif defined(ULLONG_MAX) && (ULLONG_MAX == 4294967295)
193 | #        define psnip_uint32_t unsigned long long
194 | #      else
195 | #        error Unable to locate 32-bit unsigned integer type.
196 | #      endif
197 | #    endif
198 | #    if !defined(psnip_int64_t)
199 | #      if defined(CHAR_MIN) && defined(CHAR_MAX) && (CHAR_MIN == (-9223372036854775807LL-1)) && (CHAR_MAX == 9223372036854775807LL)
200 | #        define psnip_int64_t char
201 | #      elif defined(SHRT_MIN) && defined(SHRT_MAX) && (SHRT_MIN == (-9223372036854775807LL-1)) && (SHRT_MAX == 9223372036854775807LL)
202 | #        define psnip_int64_t short
203 | #      elif defined(INT_MIN) && defined(INT_MAX) && (INT_MIN == (-9223372036854775807LL-1)) && (INT_MAX == 9223372036854775807LL)
204 | #        define psnip_int64_t int
205 | #      elif defined(LONG_MIN) && defined(LONG_MAX) && (LONG_MIN == (-9223372036854775807LL-1)) && (LONG_MAX == 9223372036854775807LL)
206 | #        define psnip_int64_t long
207 | #      elif defined(LLONG_MIN) && defined(LLONG_MAX) && (LLONG_MIN == (-9223372036854775807LL-1)) && (LLONG_MAX == 9223372036854775807LL)
208 | #        define psnip_int64_t long long
209 | #      else
210 | #        error Unable to locate 64-bit signed integer type.
211 | #      endif
212 | #    endif
213 | #    if !defined(psnip_uint64_t)
214 | #      if defined(UCHAR_MAX) && (UCHAR_MAX == 18446744073709551615ULL)
215 | #        define psnip_uint64_t unsigned char
216 | #      elif defined(USHRT_MAX) && (USHRT_MAX == 18446744073709551615ULL)
217 | #        define psnip_uint64_t unsigned short
218 | #      elif defined(UINT_MAX) && (UINT_MAX == 18446744073709551615ULL)
219 | #        define psnip_uint64_t unsigned int
220 | #      elif defined(ULONG_MAX) && (ULONG_MAX == 18446744073709551615ULL)
221 | #        define psnip_uint64_t unsigned long
222 | #      elif defined(ULLONG_MAX) && (ULLONG_MAX == 18446744073709551615ULL)
223 | #        define psnip_uint64_t unsigned long long
224 | #      else
225 | #        error Unable to locate 64-bit unsigned integer type.
226 | #      endif
227 | #    endif
228 | #  endif
229 | #endif
230 | 


--------------------------------------------------------------------------------
/generate-event-code.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # travis: generate events code for perf-event-timer
 3 | import os
 4 | import sys
 5 | import re
 6 | 
 7 | ocperf_dir = os.environ.get('OCPERF_DIR')
 8 | if not ocperf_dir:
 9 |     sys.exit('pmu-tools not found: set OCPERF_DIR env var to the path to pmu-tools')
10 | if not os.path.isdir(ocperf_dir):
11 |     sys.exit('OCPERF_DIR ({}) not found'.format(ocperf_dir))
12 | 
13 | sys.path.insert(0, ocperf_dir)
14 | import ocperf
15 | 
16 | pattern = ('.*('
17 |     'CPU_CLK_UNHALTED.*'
18 |     '|INST_RETIRED_ANY'
19 |     '|HW_IN'
20 |     '|L1D_'
21 |     '|MEM_(LOAD|INST)_RET'
22 |     '|L2_RQ'
23 |     '|UOPS_ISSUED_ANY'
24 |     '|UOPS_DISPATCHED_PORT)')
25 | 
26 | emap = ocperf.find_emap()
27 | if not emap:
28 |     sys.exit("Unknown CPU or cannot find event table")
29 | 
30 | # header file has one const object for each event
31 | header = open("perf-timer-events.hpp", "w")
32 | header.write("#include \"perf-timer.hpp\"\n\n")
33 | header.write("std::vector<PerfEvent> get_all_events();\n\n")
34 | 
35 | # cpp file has a function to return an array of all events
36 | cpp = open("perf-timer-events.cpp", "w")
37 | cpp.write('''
38 | #include \"perf-timer-events.hpp\"\n\n
39 | std::vector<PerfEvent> get_all_events() {
40 |     static std::vector<PerfEvent> ALL = {
41 | ''')
42 | 
43 | for j in sorted(emap.events):
44 |     varname = j.replace('.', '_').upper()
45 |     if (re.match(pattern, varname)):
46 |         header.write('const PerfEvent {:30} = PerfEvent( "{}", "{}" );\n'.format(varname, j, emap.events[j].output(noname=True)))
47 |         cpp.write('        {:34},\n'.format(varname))
48 | 
49 | header.write('const PerfEvent NoEvent = {"",""};\n')
50 | cpp.write('''
51 |     };
52 |     return ALL;
53 | }''')
54 | 


--------------------------------------------------------------------------------
/impl-list.cpp:
--------------------------------------------------------------------------------
 1 | #include "impl-list.hpp"
 2 | #include "basic-impls.hpp"
 3 | #include "common-cxx.hpp"
 4 | #include "misc.hpp"
 5 | 
 6 | #define MAKE250_ENTRY(rep) {"vporxymm250_" #rep,  vporxymm250_ ## rep,  "250x xyayaya ratio " #rep, NONE},
 7 | 
 8 | const test_description all_funcs[] = {
 9 |     {"vporxmm",        vporxmm,        "vpor xmm", NO_VZ },
10 |     {"vporymm",        vporymm,        "vpor ymm", NO_VZ },
11 |     {"vporzmm",        vporzmm,        "vpor zmm", NO_VZ },
12 |     {"vporxmm_vz",     vporxmm_vz,     "vpor xmm w/ vzeroupper", NONE},
13 |     {"vporymm_vz",     vporymm_vz,     "vpor ymm w/ vzeroupper", NONE},
14 |     {"vporzmm_vz",     vporzmm_vz,     "vpor zmm w/ vzeroupper", NONE},
15 |     {"vporxmm_vz100",  vporxmm_vz100,  "100x vpor lat xmm w/ vzero", NONE},
16 |     {"vporymm_vz100",  vporymm_vz100,  "100x vpor lat ymm w/ vzero", NONE},
17 |     {"vporzmm_vz100",  vporzmm_vz100,  "100x vpor lat zmm w/ vzero", NONE},
18 |     {"vpermdzmm_vz100",  vpermdzmm_vz100,  "100x vpermd lat zmm w/ vzero", NONE},
19 |     {"vporxmm_tput_vz100",  vporxmm_tput_vz100,  "100x vpor tput xmm w/ vzero", NONE},
20 |     {"vporymm_tput_vz100",  vporymm_tput_vz100,  "100x vpor tput ymm w/ vzero", NONE},
21 |     {"vporzmm_tput_vz100",  vporzmm_tput_vz100,  "100x vpor tput zmm w/ vzero", NONE},
22 |     {"vporxymm250",  vporxymm250,  "250x yxxx lat w/ vzero", NONE},
23 |     {"vporyzmm250",  vporyzmm250,  "250x zyyy lat w/ vzero", NONE},
24 |     ALL_RATIOS_X(MAKE250_ENTRY)
25 |     {"mulxymm250_10",  mulxymm250_10,  "1x vpor ymm 10x imul", NONE},
26 | 
27 |     // {"vpermdxmm_tput_vz100",  vpermdxmm_tput_vz100,  "100x vpermd tput xmm w/ vzero", NONE},
28 |     // {"vpermdymm_tput_vz100",  vpermdymm_tput_vz100,  "100x vpermd tput ymm w/ vzero", NONE},
29 |     {"vpermdzmm_tput_vz100",  vpermdzmm_tput_vz100,  "100x vpermd tput zmm w/ vzero", NONE},
30 |     {"dummy",          dummy,          "empty function", NONE},
31 | };
32 | 
33 | auto b() -> decltype(get_all().begin()) {
34 |     return get_all().begin();
35 | }
36 | 
37 | auto e() -> decltype(get_all().end()) {
38 |     return get_all().end();
39 | }
40 | 
41 | const test_description* get_by_name(const std::string& name) {
42 |     auto it = std::find_if(b(), e(), [&](auto d) { return name == d.name; });
43 |     return it == e() ? nullptr : &*it;
44 | }
45 | 
46 | std::vector<test_description> get_by_list(const std::string& list) {
47 |     std::vector<test_description> ret;
48 |     for (auto& name : split(list, ",")) {
49 |         auto t = get_by_name(name);
50 |         if (!t) {
51 |             throw std::runtime_error("no test named " + name);
52 |         }
53 |         ret.push_back(*t);
54 |     }
55 |     return ret;
56 | }
57 | 
58 | const std::vector<test_description>& get_all() {
59 |     static std::vector<test_description> all =
60 |             std::vector<test_description>(all_funcs, all_funcs + COUNT_OF(all_funcs));
61 |     return all;
62 | }
63 | 


--------------------------------------------------------------------------------
/impl-list.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef IMPL_LIST_H_
 2 | #define IMPL_LIST_H_
 3 | 
 4 | #include "common-cxx.hpp"
 5 | 
 6 | #include <vector>
 7 | #include <string>
 8 | 
 9 | /**
10 |  * Offers access to all the intersection implementations.
11 |  */
12 | 
13 | enum AlgoFlags {
14 |     NONE         = 0,
15 |     /** algo is too slow to run at default array sizes */
16 |     SLOW         = 1 << 0,
17 |     /** algo doesn't return the right result (e.g., because it is a dummy for testing) */
18 |     INCORRECT    = 1 << 1,
19 |     NO_VZ        = 1 << 2,
20 | };
21 | 
22 | struct test_description {
23 |     const char *name;
24 |     bench_fn *f;
25 |     const char *desc;
26 |     AlgoFlags flags;
27 | 
28 |     void call_f(const bench_args& args) const {
29 |         f(args);
30 |     }
31 | };
32 | 
33 | /**
34 |  * Return the benchmark exactly matching the given name, or nullptr
35 |  * if not found.
36 |  */
37 | const test_description* get_by_name(const std::string& name);
38 | 
39 | /**
40 |  * Given a comma separated list of test names, return a list of all the
41 |  * tests, or throw if one isn't found.
42 |  */
43 | std::vector<test_description> get_by_list(const std::string& list);
44 | 
45 | /**
46 |  * Return all test descriptors.
47 |  */
48 | const std::vector<test_description>& get_all();
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/jevents/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY = all clean-examples all-examples install clean html man
 2 | PREFIX=$(DESTDIR)/usr/local
 3 | LIB=$(PREFIX)/lib64
 4 | BIN=$(PREFIX)/bin
 5 | INCLUDE=$(PREFIX)/include
 6 | CFLAGS := -g -fPIC -Wall -O2 -Wno-unused-result
 7 | OBJ := json.o jsmn.o jevents.o resolve.o cache.o cpustr.o rawevent.o \
 8 |        perf-iter.o interrupts.o rdpmc.o measure.o perf_event_open.o \
 9 |        session.o
10 | KDOC = /usr/src/linux/scripts/kernel-doc
11 | 
12 | all: libjevents.a showevent listevents event-rmap all-examples
13 | 
14 | clean-examples:
15 | 	make -C examples clean
16 | 
17 | all-examples: libjevents.a
18 | 	make -C examples
19 | 
20 | install: libjevents.a listevents showevent event-rmap
21 | 	install -d ${BIN}
22 | 	install -d ${LIB}
23 | 	install -d ${INCLUDE}
24 | 	install -m 755 listevents showevent event-rmap ${BIN}
25 | 	install -m 644 libjevents.a ${LIB}
26 | 	install -m 644 rdpmc.h jevents.h measure.h perf-iter.h jsession.h ${INCLUDE}
27 | 	# xxx install man page
28 | 
29 | libjevents.a: ${OBJ}
30 | 	rm -f libjevents.a
31 | 	ar q libjevents.a $^
32 | 	ranlib libjevents.a
33 | 
34 | clean: clean-examples
35 | 	rm -f ${OBJ} libjevents.a resolve showevent listfiles jevents.html rmap event-rmap.o event-rmap \
36 | 		listevents resolve-test showevent.o listevents.o
37 | 
38 | resolve: resolve.c
39 | 	$(CC) $(CFLAGS) -DTEST=1 -o $@ $^
40 | 
41 | showevent: showevent.o libjevents.a
42 | 
43 | listevents: listevents.o libjevents.a
44 | 
45 | event-rmap: event-rmap.o libjevents.a
46 | 
47 | DOCFILES := cache.c jevents.c cpustr.c rawevent.c interrupts.c measure.c rdpmc.c \
48 | 	    session.c
49 | 
50 | html: jevents.html
51 | 
52 | man: jeventstmp.man
53 | 	perl -ne 's/Kernel Hacker.s Manual/jevents/; open(F,">" . $$1 . ".man") if /^\.TH "(.*?)"/; print F $$_' jevents.man
54 | 
55 | jeventstmp.man: $(DOCFILES)
56 | 	${KDOC} -man ${DOCFILES} > $@
57 | 
58 | jevents.html: $(DOCFILES)
59 | 	${KDOC} -html ${DOCFILES} > $@
60 | 


--------------------------------------------------------------------------------
/jevents/README.md:
--------------------------------------------------------------------------------
 1 | # jevents
 2 | 
 3 | jevents is a C library to use from C programs to make access to the kernel Linux perf interface easier.
 4 | It also includes some examples to use the library.
 5 | 
 6 | ## Features
 7 | 
 8 | * Resolving symbolic event names using downloaded event files
 9 | * Reading performance counters from ring 3 in C programs,
10 | * Handling the perf ring buffer (for example to read memory addresses)
11 | 
12 | For more details see the [API reference](http://halobates.de/jevents.html) 
13 | 
14 | ## Building
15 | 
16 | 	cd jevents
17 | 	make
18 | 	sudo make install
19 | 
20 | ## Downloading event lists
21 | 
22 | Before using event lists they need to be downloaded. Use the pmu-tools
23 | event_download.py script for this.
24 | 
25 | 	% event_download.py
26 | 
27 | ## Examples
28 | 
29 | * listevents: List all named perf and JSON events
30 | * showevent: Convert JSON name or perf alias to perf format and test with perf
31 | * event-rmap: Map low level perf event to named high-level event
32 | * addr: Profile a loadable test kernel with address profiling
33 | * jstat: Simple perf stat like tool with JSON event resolution.
34 | 
35 | ## Initialization/Multithreading
36 | 
37 | Functions accessing the JSON event data load the JSON file lazily when first
38 | used. This might result in data races when multiple threads call jevent
39 | functions. In such cases the event list can be loaded from the main thread by
40 | `read_events(NULL);`.
41 | 
42 | ## self profiling 
43 | 
44 | Reading performance counters directly in the program without entering
45 | the kernel.
46 | 
47 | This is very simplified, for a real benchmark you almost certainly
48 | want some warmup, multiple iterations, possibly context switch
49 | filtering and some filler code to avoid cache effects.
50 | 
51 | ```C
52 | 	#include "rdpmc.h"
53 | 
54 | 	struct rdpmc_ctx ctx;
55 | 	unsigned long long start, end;
56 | 
57 | 	if (rdpmc_open(PERF_COUNT_HW_CPU_CYCLES, &ctx) < 0) ... error ...
58 | 	start = rdpmc_read(&ctx);
59 | 	... your workload ...
60 | 	end = rdpmc_read(&ctx);
61 | ```
62 | 
63 | /sys/devices/cpu/rdpmc must be 1.
64 | 
65 | http://halobates.de/modern-pmus-yokohama.pdf provides some
66 | additional general information on cycle counting. The techniques used
67 | with simple-pmu described there can be used with jevents too.
68 | 
69 | ## Resolving named events
70 | 
71 | Resolving named events to a perf event and set up reading from the perf ring buffer.
72 | 
73 | First run event_download.py to download a current event list for your CPU.
74 | 
75 | ```C
76 | 	#include "jevents.h"
77 | 	#include "perf-iter.h"
78 | 	#include <linux/perf_event.h>
79 | 	#include <sys/syscall.h>
80 | 	#include <unistd.h>
81 | 
82 | 	struct perf_event_attr attr;
83 | 	if (resolve_event("cpu_clk_thread_unhalted.ref_xclk", &attr) < 0) {
84 | 		... error ...
85 | 	}
86 | 
87 | 	/* You can change attr, see the perf_event_open man page for details */
88 | 
89 | '''
90 | 


--------------------------------------------------------------------------------
/jevents/cache.c:
--------------------------------------------------------------------------------
  1 | /* Caching layer to resolve events without re-reading them */
  2 | 
  3 | /*
  4 |  * Copyright (c) 2014, Intel Corporation
  5 |  * Author: Andi Kleen
  6 |  * All rights reserved.
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions are met:
 10 |  *
 11 |  * 1. Redistributions of source code must retain the above copyright notice,
 12 |  * this list of conditions and the following disclaimer.
 13 |  *
 14 |  * 2. Redistributions in binary form must reproduce the above copyright
 15 |  * notice, this list of conditions and the following disclaimer in the
 16 |  * documentation and/or other materials provided with the distribution.
 17 |  *
 18 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 21 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 22 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 23 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 24 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 26 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 27 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 28 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 29 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | */
 31 | 
 32 | #define _GNU_SOURCE 1
 33 | #include "jevents.h"
 34 | #include <stdlib.h>
 35 | #include <stdbool.h>
 36 | #include <string.h>
 37 | #include <errno.h>
 38 | #include <stdio.h>
 39 | #include <ctype.h>
 40 | #include <linux/perf_event.h>
 41 | 
 42 | /**
 43 |  * DOC: Resolve named Intel performance events to perf
 44 |  *
 45 |  * This library allows to resolve named Intel performance counter events
 46 |  * (for example INST_RETIRED.ANY)
 47 |  * by name and turn them into perf_event_attr attributes. It also
 48 |  * supports listing all events and resolving numeric events back to names.
 49 |  *
 50 |  * The standard workflow is the user calling "event_download.py"
 51 |  * to download the current list, and then
 52 |  * these functions can resolve or walk names. Alternatively
 53 |  * a JSON event file from https://download.01.org/perfmon
 54 |  * can be specified through the EVENTMAP= environment variable.
 55 |  */
 56 |  
 57 | struct event {
 58 | 	struct event *next;
 59 | 	char *name;
 60 | 	char *desc;
 61 | 	char *event;
 62 | 	char *pmu;
 63 | };
 64 | 
 65 | #define HASHSZ 37
 66 | 
 67 | static struct event *eventlist[HASHSZ];
 68 | static bool eventlist_init;
 69 | 
 70 | /* Weinberg's identifier hash */
 71 | static unsigned hashfn(const char *s)
 72 | {
 73 | 	unsigned h = 0;
 74 | 	while (*s) {
 75 | 		int c = tolower(*s);
 76 | 		s++;
 77 | 		h = h * 67 + (c - 113);
 78 | 	}
 79 | 	return h % HASHSZ;
 80 | }
 81 | 
 82 | static int collect_events(void *data, char *name, char *event, char *desc,
 83 | 			  char *pmu)
 84 | {
 85 | 	unsigned h = hashfn(name);
 86 | 	struct event *e = malloc(sizeof(struct event));
 87 | 	if (!e)
 88 | 		exit(ENOMEM);
 89 | 	e->next = eventlist[h];
 90 | 	eventlist[h] = e;
 91 | 	e->name = strdup(name);
 92 | 	e->desc = strdup(desc);
 93 | 	e->event = strdup(event);
 94 | 	e->pmu = strdup(pmu);
 95 | 	return 0;
 96 | }
 97 | 
 98 | static void free_events(void)
 99 | {
100 | 	struct event *e, *next;
101 | 	int i;
102 | 	for (i = 0; i < HASHSZ; i++) {
103 | 		for (e = eventlist[i]; e; e = next) {
104 | 			next = e->next;
105 | 			free(e->name);
106 | 			free(e->desc);
107 | 			free(e->event);
108 | 			free(e->pmu);
109 | 			free(e);
110 | 		}
111 | 		eventlist[i] = NULL;
112 | 	}
113 | 	eventlist_init = false;
114 | }
115 | 
116 | /**
117 |  * read_events - Read JSON performance counter event list
118 |  * @fn: File name to read. NULL to chose default location.
119 |  *
120 |  * Read the JSON event list fn. The other functions in the library
121 |  * automatically read the default event list for the current CPU,
122 |  * but calling this explicitly is useful to chose a specific one.
123 |  *
124 |  * This function is not thread safe and should not be called
125 |  * from multiple threads in parallel. However once it is called
126 |  * once all other functions are thread-safe. So for multi-threaded
127 |  * use the main thread should call it once before other threads.
128 |  *
129 |  * Return: -1 on failure, otherwise 0.
130 |  */
131 | int read_events(const char *fn)
132 | {
133 | 	if (eventlist_init) {
134 | 		// treat subsequent read_events calls after the first as replacing the
135 | 		// event list
136 | 		free_events();
137 | 	}
138 | 	eventlist_init = true;
139 | 	/* ??? free on error */
140 | 	return json_events(fn, collect_events, NULL);
141 | }
142 | 
143 | static struct fixed {
144 | 	char *name;
145 | 	char *event;
146 | } fixed[] = {
147 | 	{ "inst_retired.any", "event=0xc0" },
148 | 	{ "cpu_clk_unhalted.thread", "event=0x3c" },
149 | 	{ "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
150 | 	{},
151 | };
152 | 
153 | /*
154 |  * Handle different fixed counter encodings between JSON and perf.
155 |  */
156 | static char *real_event(char *name, char *event)
157 | {
158 | 	int i;
159 | 	for (i = 0; fixed[i].name; i++)
160 | 		if (!strcasecmp(name, fixed[i].name))
161 | 			return fixed[i].event;
162 | 	return event;
163 | }
164 | 
165 | /**
166 |  * resolve_event - Resolve named performance counter event
167 |  * @name: Name of performance counter event (case in-sensitive)
168 |  * @attr: perf_event_attr to initialize with name.
169 |  *
170 |  * The attr structure is cleared initially.
171 |  * The user typically has to set up attr->sample_type/read_format
172 |  * _after_ this call.
173 |  * Note this function is only thread-safe when read_events() has
174 |  * been called first single-threaded.
175 |  * Return: -1 on failure, otherwise 0.
176 |  */
177 | 
178 | int resolve_event(const char *name, struct perf_event_attr *attr)
179 | {
180 | 	struct event *e;
181 | 	char *buf;
182 | 	int ret;
183 | 	unsigned h = hashfn(name);
184 | 
185 | 	if (!eventlist_init) {
186 | 		if (read_events(NULL) < 0)
187 | 			return -1;
188 | 	}
189 | 	for (e = eventlist[h]; e; e = e->next) {
190 | 		if (!strcasecmp(e->name, name)) {
191 | 			char *event = real_event(e->name, e->event);
192 | 			asprintf(&buf, "%s/%s/", e->pmu, event);
193 | 			ret = jevent_name_to_attr(buf, attr);
194 | 			free(buf);
195 | 			return ret;
196 | 		}
197 | 	}
198 | 	/* Try a perf style event */
199 | 	if (jevent_name_to_attr(name, attr) == 0)
200 | 		return 0;
201 | 	asprintf(&buf, "cpu/%s/", name);
202 | 	ret = jevent_name_to_attr(buf, attr);
203 | 	free(buf);
204 | 	if (ret == 0)
205 | 		return ret;
206 | 	return -1;
207 | }
208 | 
209 | /**
210 |  * walk_events - Walk all the available performance counter events
211 |  * @func: Callback to call on each event.
212 |  * @data: Abstract data pointer to pass to callback.
213 |  *
214 |  * The callback gets passed the data argument, the name of the 
215 |  * event, the translated event in perf form (cpu/.../) and a 
216 |  * description of the event.
217 |  *
218 |  * Return: -1 on failure, otherwise 0.
219 |  */
220 | 
221 | int walk_events(int (*func)(void *data, char *name, char *event, char *desc),
222 | 		void *data)
223 | {
224 | 	struct event *e;
225 | 	if (!eventlist_init) {
226 | 		if (read_events(NULL) < 0)
227 | 			return -1;
228 | 	}
229 | 	int i;
230 | 	for (i = 0; i < HASHSZ; i++) {
231 | 		for (e = eventlist[i]; e; e = e->next) {
232 | 			char *buf;
233 | 			asprintf(&buf, "%s/%s/", e->pmu, e->event);
234 | 			int ret = func(data, e->name, buf, e->desc);
235 | 			free(buf);
236 | 			if (ret)
237 | 				return ret;
238 | 		}
239 | 	}
240 | 	return 0;
241 | }
242 | 
243 | /**
244 |  * rmap_event - Map numeric event back to name and description.
245 |  * @target:  Event code to match (umask + event).
246 |  * @name: Put pointer to event name into this. No need to free.
247 |  * @desc: Put pointer to description into this. No need to free. Can be NULL.
248 |  *
249 |  * Offcore matrix events are not fully supported.
250 |  * Ignores bits other than umask/event for now, so some events using cmask,inv
251 |  * may be misidentified. May be slow.
252 |  * Return: -1 on failure, otherwise 0.
253 |  */
254 | 
255 | int rmap_event(unsigned target, char **name, char **desc)
256 | {
257 | 	struct event *e;
258 | 	if (!eventlist_init) {
259 | 		if (read_events(NULL) < 0)
260 | 			return -1;
261 | 	}
262 | 	int i;
263 | 	for (i = 0; i < HASHSZ; i++) {
264 | 		for (e = eventlist[i]; e; e = e->next) {
265 | 			// XXX should cache the numeric value
266 | 			char *s;
267 | 			unsigned event = 0, umask = 0;
268 | 			s = strstr(e->event, "event=");
269 | 			if (s)
270 | 				sscanf(s, "event=%x", &event);
271 | 			s = strstr(e->event, "umask=");
272 | 			if (s)
273 | 				sscanf(s, "umask=%x", &umask);
274 | 			if ((event | (umask << 8)) == (target & 0xffff)) {
275 | 				*name = e->name;
276 | 				if (desc)
277 | 					*desc = e->desc;
278 | 				return 0;
279 | 			}
280 | 		}
281 | 	}
282 | 	return -1;
283 | 
284 | }
285 | 


--------------------------------------------------------------------------------
/jevents/cpustr.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2014, Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  * All rights reserved.
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are met:
 8 |  *
 9 |  * 1. Redistributions of source code must retain the above copyright notice,
10 |  * this list of conditions and the following disclaimer.
11 |  *
12 |  * 2. Redistributions in binary form must reproduce the above copyright
13 |  * notice, this list of conditions and the following disclaimer in the
14 |  * documentation and/or other materials provided with the distribution.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
28 | */
29 | 
30 | #define _GNU_SOURCE 1
31 | #include <stdio.h>
32 | #include <stdlib.h>
33 | #include "jevents.h"
34 | 
35 | /**
36 |  * get_cpu_str - Return string describing the current CPU or NULL.
37 |  * Needs to be freed by caller.
38 |  *
39 |  * Used to store JSON event lists in the cache directory.
40 |  */
41 | char *get_cpu_str(void)
42 | {
43 | 	return get_cpu_str_type("-core", NULL);
44 | }
45 | 
46 | /**
47 |  * get_cpu_str - Return string describing the current CPU for type or NULL.
48 |  * @type: "-core" or "-uncore"
49 |  * @idstr_step: if non NULL write idstr with stepping to pointer.
50 |  * Both result and idstr_step (if non NULL) need to be freed by
51 |  * caller.
52 |  */
53 | char *get_cpu_str_type(char *type, char **idstr_step)
54 | {
55 | 	char *line = NULL;
56 | 	size_t llen = 0;
57 | 	int found = 0, n;
58 | 	char vendor[30];
59 | 	int model = 0, fam = 0, step = 0;
60 | 	char *res = NULL;
61 | 	FILE *f = fopen("/proc/cpuinfo", "r");
62 | 
63 | 	if (!f)
64 | 		return NULL;
65 | 	while (getline(&line, &llen, f) > 0) {
66 | 		if (sscanf(line, "vendor_id : %29s", vendor) == 1)
67 | 			found++;
68 | 		else if (sscanf(line, "model : %d", &model) == 1)
69 | 			found++;
70 | 		else if (sscanf(line, "cpu family : %d", &fam) == 1)
71 | 			found++;
72 | 		else if (sscanf(line, "stepping : %d", &step) == 1)
73 | 			found++;
74 | 		if (found == 4) {
75 | 			if (idstr_step)
76 | 				asprintf(idstr_step, "%s-%d-%X-%X%s", vendor, fam,
77 | 						model, step, type);
78 | 			n = asprintf(&res, "%s-%d-%X%s", vendor, fam, model,
79 | 					type);
80 | 			if (n < 0)
81 | 				res = NULL;
82 | 			break;
83 | 		}
84 | 	}
85 | 	free(line);
86 | 	fclose(f);
87 | 	return res;
88 | }
89 | 


--------------------------------------------------------------------------------
/jevents/event-rmap.c:
--------------------------------------------------------------------------------
 1 | #include "jevents.h"
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | 
 5 | int main(int ac, char **av)
 6 | {
 7 | 	while (*++av) {
 8 | 		unsigned event = strtoul(*av, NULL, 0);
 9 | 		char *name, *desc;
10 | 		if (rmap_event(event, &name, &desc) == 0)
11 | 			printf("%x: %s : %s\n", event, name, desc);
12 | 		else
13 | 			printf("%x not found\n", event);
14 | 	}
15 | 	return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/jevents/examples/Makefile:
--------------------------------------------------------------------------------
 1 | # build jevents first
 2 | CFLAGS := -g -Wall -O2  -I .. -Wno-unused-result
 3 | CXXFLAGS := -g -Wall  -O2  -fPIC
 4 | LDFLAGS := -L ..
 5 | LDLIBS = -ljevents
 6 | 
 7 | all: addr rtest rtest2 rtest3 jestat
 8 | 
 9 | # no deps on the includes
10 | 
11 | ADDR_OBJ := addr.o hist.o cpu.o
12 | 
13 | addr: ${ADDR_OBJ} ../libjevents.a
14 | 
15 | addr: LDLIBS += -lstdc++ -ldl
16 | 
17 | rtest2: LDLIBS += -lm
18 | 
19 | rtest: rtest.o ../libjevents.a
20 | 
21 | rtest2: rtest2.o ../libjevents.a
22 | 
23 | rtest3: rtest3.o ../libjevents.a
24 | 
25 | jestat: jestat.o ../libjevents.a
26 | 
27 | clean:
28 | 	rm -f addr ${ADDR_OBJ} jestat jestat.o
29 | 	rm -f rtest3 rtest3.o rtest2 rtest2.o rtest rtest.o
30 | 


--------------------------------------------------------------------------------
/jevents/examples/addr.c:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * perf address sampling self profiling demo.
  3 |  * Requires a 3.10+ kernel with PERF_SAMPLE_ADDR support and a supported Intel CPU.
  4 |  *
  5 |  * Copyright (c) 2013 Intel Corporation
  6 |  * Author: Andi Kleen
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that: (1) source code distributions
 10 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 11 |  * distributions including binary code include the above copyright notice and
 12 |  * this paragraph in its entirety in the documentation or other materials
 13 |  * provided with the distribution
 14 |  *
 15 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 16 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 17 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 18 |  */
 19 | #include <linux/perf_event.h>
 20 | #include <stdio.h>
 21 | #include <stdlib.h>
 22 | #include <string.h>
 23 | #include <stdint.h>
 24 | #include <unistd.h>
 25 | #include <cpuid.h>
 26 | #include <stdbool.h>
 27 | #include <assert.h>
 28 | #include <dlfcn.h>
 29 | 
 30 | #include "hist.h"
 31 | #include "perf-iter.h"
 32 | #include "util.h"
 33 | #include "cpu.h"
 34 | 
 35 | /* 2^n size of event ring buffer (in pages) */
 36 | #define BUF_SIZE_SHIFT 8
 37 | 
 38 | #define SIZE ((100*MB)/sizeof(float))
 39 | 
 40 | float *x, *y;
 41 | 
 42 | void simple_test_init(void)
 43 | {
 44 | 	y = calloc(SIZE, sizeof(float));
 45 |         x = calloc(SIZE, sizeof(float));
 46 |         int i;
 47 |         for (i = 0; i < SIZE; i++) {
 48 |                 y[i] = 1.0;
 49 |                 x[i] = 2.0;
 50 |         }
 51 |  
 52 | 	printf("test area %p-%p, %p-%p\n", x, x+SIZE, y, y+SIZE);
 53 | }
 54 | 
 55 | void simple_test_load(void)
 56 | {
 57 |         int i;
 58 |         int j;
 59 |         for (j = 0; j < 20; j++) {
 60 |         	for (i = 0; i < SIZE; i++) {
 61 |                 	y[i] = y[i] * x[i];
 62 | 			mb();	/* Don't optimize the loop away */
 63 | 		}
 64 | 	}
 65 | }
 66 | 
 67 | void (*test_init)(void) = simple_test_init;
 68 | void (*test_load)(void) = simple_test_load;
 69 | 
 70 | void gen_hist(char *name, struct perf_fd *pfd)
 71 | {
 72 | 	struct perf_iter iter;
 73 | 	struct hist *h = init_hist();
 74 | 
 75 | 	perf_iter_init(&iter, pfd);
 76 | 	int samples = 0, others = 0, throttled = 0, skipped = 0;
 77 | 	u64 lost = 0;
 78 | 	while (!perf_iter_finished(&iter)) {
 79 | 		char buffer[64];
 80 | 		struct perf_event_header *hdr = perf_buffer_read(&iter, buffer, 64);
 81 | 
 82 | 		if (!hdr) {
 83 | 			skipped++;
 84 | 			continue;
 85 | 		}
 86 | 
 87 | 		if (hdr->type != PERF_RECORD_SAMPLE) {
 88 | 			if (hdr->type == PERF_RECORD_THROTTLE)
 89 | 				throttled++;
 90 | 			else if (hdr->type == PERF_RECORD_LOST)
 91 | 				lost += perf_hdr_payload(hdr)[1];
 92 | 			else
 93 | 				others++;
 94 | 			continue;
 95 | 		}
 96 | 		samples++;
 97 | 		if (hdr->size != 16) {
 98 | 			printf("unexpected sample size %d\n", hdr->size);
 99 | 			continue;
100 | 		}
101 | 
102 | 		u64 val = perf_hdr_payload(hdr)[0];
103 | 		/* Filter out kernel samples, which can happen due to OOO skid */
104 | 		if ((long long)val < 0)
105 | 			continue;
106 | 		hist_add(h, val);
107 | 	}
108 | 	perf_iter_continue(&iter);
109 | 
110 | 	printf("%s: %d samples, %d others, %llu lost, %d throttled, %d skipped\n",
111 | 				name,
112 | 				samples,
113 | 				others,
114 | 				lost,
115 | 				throttled,
116 | 				skipped);
117 | 	hist_print(h, 0.001);
118 | 	free_hist(h);
119 | }
120 | 
121 | int main(int ac, char **av)
122 | {
123 | 	bool cycles_only = false;
124 | 
125 | 	/* Set up perf for loads */
126 | 	struct perf_event_attr attr = {
127 | 		.type = PERF_TYPE_RAW,
128 | 		.size = PERF_ATTR_SIZE_VER0,
129 | 		.sample_type = PERF_SAMPLE_ADDR,
130 | 		.sample_period = 10000,		/* Period */
131 | 		.exclude_kernel = 1,
132 | 		.precise_ip = 1,		/* Enable PEBS */
133 | 		.config1 = 3,			/* Load Latency threshold */
134 | 		.config = mem_loads_event(),	/* Event */
135 | 		.disabled = 1,
136 | 	};
137 | 
138 | 	if (attr.config == -1) {
139 | 		printf("Unknown CPU model\n");
140 | 		exit(1);
141 | 	}
142 | 
143 | 	if (av[1] && !strcmp(av[1], "cycles")) {
144 | 		attr.sample_type = PERF_SAMPLE_IP;
145 | 		attr.precise_ip = 0;
146 | 		attr.config = 0x3c;
147 | 		cycles_only = true;
148 | 		av--;
149 | 	}
150 | 
151 | 	if (av[1]) { 
152 | 		void *test_obj;
153 | 		test_obj = dlopen(av[1], RTLD_NOW);
154 | 		if (!test_obj) { 
155 | 			fprintf(stderr, "Cannot load %s: %s\n", av[1], dlerror());
156 | 			exit(1);
157 | 		}
158 | 		test_init = dlsym(test_obj, "test_init");
159 | 		test_load = dlsym(test_obj, "test_load");
160 | 		if (!test_init || !test_load) {
161 | 			fprintf(stderr, "%s missing test_init or test_load symbols: %s\n",
162 | 					av[1], dlerror());
163 | 			exit(1);
164 | 		}
165 | 	}
166 | 
167 | 	struct perf_fd loads, stores;
168 | 	if (perf_fd_open(&loads, &attr, BUF_SIZE_SHIFT) < 0)
169 | 		err("perf event init loads");
170 | 	printf("loads event %llx\n", attr.config);
171 | 
172 | 	bool have_stores = false;
173 | 	if (0 && !cycles_only) {
174 | 		attr.config = mem_stores_event();
175 | 		attr.config1 = 0;
176 | 		if (perf_fd_open(&stores, &attr, BUF_SIZE_SHIFT) < 0)
177 | 			err("perf event init stores");
178 | 		printf("stores event %llx\n", attr.config);
179 | 		have_stores = true;
180 | 	}
181 | 
182 | 	test_init();
183 | 
184 | 	/* Run measurement */
185 | 
186 | 	if (perf_enable(&loads) < 0)
187 | 		err("PERF_EVENT_IOC_ENABLE");
188 | 	if (0)
189 | 		perf_enable(&stores);
190 | 
191 | 	test_load();
192 | 
193 | 	if (perf_disable(&loads) < 0)
194 | 		err("PERF_EVENT_IOC_DISABLE");
195 | 	if (0)
196 | 		perf_disable(&stores);
197 | 
198 | 	gen_hist("loads", &loads);
199 | 	perf_fd_close(&loads);
200 | 	if (have_stores) {
201 |        		gen_hist("stores", &stores);	
202 | 		perf_fd_close(&stores);
203 | 	}
204 | 
205 | 	return 0;
206 | }
207 | 


--------------------------------------------------------------------------------
/jevents/examples/cpu.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013 Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that: (1) source code distributions
  7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
  8 |  * distributions including binary code include the above copyright notice and
  9 |  * this paragraph in its entirety in the documentation or other materials
 10 |  * provided with the distribution
 11 |  *
 12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 15 |  */
 16 | 
 17 | /* CPU detection and event tables */
 18 | #include <cpuid.h>
 19 | #include <stdio.h>
 20 | #include <stdlib.h>
 21 | #include <stdbool.h>
 22 | 
 23 | #include "cpu.h"
 24 | 
 25 | struct cpu_events {
 26 | 	int *models;
 27 | 	unsigned mem_stores;
 28 | 	unsigned mem_loads;
 29 | };
 30 | 
 31 | #define MEM_LOADS_SNB 0x1cd	/* MEM_TRANS_RETIRED.LOAD_LATENCY */
 32 | #define MEM_STORES_SNB 0x2cd 	/* MEM_TRANS_RETIRED.PRECISE_STORES */
 33 | static int snb_models[] = { 42, 45, 58, 62, 0 };
 34 | 
 35 | #define MEM_LOADS_HSW MEM_LOADS_SNB
 36 | #define MEM_STORES_HSW 0x82d0
 37 | static int hsw_models[] = { 60, 70, 71, 63, 61, 0 };
 38 | 
 39 | /* Nehalem and Westmere */
 40 | #define MEM_LOADS_NHM 0x100b	/* MEM_INST_RETIRED.LOAD_LATENCY */
 41 | #define MEM_STORES_NHM -1	/* not supported */
 42 | 
 43 | static int nhm_models[] = { 26, 30, 46, 37, 44, 47, 0 };
 44 | 
 45 | struct cpu_events events[] = { 
 46 | 	{ snb_models, MEM_STORES_SNB, MEM_LOADS_SNB },
 47 | 	{ nhm_models, MEM_STORES_NHM, MEM_LOADS_NHM },
 48 | 	{ hsw_models, MEM_STORES_HSW, MEM_LOADS_HSW },
 49 | 	{}
 50 | };
 51 | 
 52 | static unsigned get_cpu_model(void)
 53 | {
 54 | 	unsigned sig;
 55 | 	if (__get_cpuid_max(0, &sig) >= 1 && sig == *(int *)"Genu") {
 56 | 		unsigned a, b, c, d;
 57 | 		__cpuid(1, a, b, c, d);
 58 | 		unsigned family = (a >> 8) & 0xf;
 59 | 		if (family == 6)
 60 | 			return ((a >> 4) & 0xf) + (((a >> 16) & 0xf) << 4);
 61 | 	}
 62 | 	return 0;
 63 | }
 64 | 
 65 | static bool match_cpu_model(int mod, int *models)
 66 | {
 67 | 	int i;
 68 | 	for (i = 0; models[i]; i++)
 69 | 		if (models[i] == mod)
 70 | 			return true;
 71 | 	return false;
 72 | }
 73 | 
 74 | /**  
 75 |  * mem_stores_event - Return precise mem load event for current CPU.
 76 |  * This is an event which supports load address monitoring.
 77 |  * Return: raw event, can be put int perf_event_attr->config. 
 78 |  * -1 or error.
 79 |  */
 80 | 
 81 | unsigned mem_loads_event(void)
 82 | {
 83 | 	int mod = get_cpu_model();
 84 | 	int i;
 85 | 	for (i = 0; events[i].models; i++)
 86 | 		if (match_cpu_model(mod, events[i].models))
 87 | 			return events[i].mem_loads;	
 88 | 	return -1;
 89 | }
 90 | 
 91 | /**  
 92 |  * mem_stores_event - Return precise mem stores event for current CPU.
 93 |  * This is an event which supports load address monitoring.
 94 |  * Return: raw event, can be put int perf_event_attr->config. 
 95 |  * -1 or error.
 96 |  */
 97 | unsigned mem_stores_event(void)
 98 | {
 99 | 	int mod = get_cpu_model();
100 | 	int i;
101 | 	for (i = 0; events[i].models; i++)
102 | 		if (match_cpu_model(mod, events[i].models))
103 | 			return events[i].mem_stores;
104 | 	return -1;
105 | }
106 | 


--------------------------------------------------------------------------------
/jevents/examples/cpu.h:
--------------------------------------------------------------------------------
1 | unsigned mem_loads_event(void);
2 | unsigned mem_stores_event(void);
3 | 


--------------------------------------------------------------------------------
/jevents/examples/hist.cc:
--------------------------------------------------------------------------------
 1 | // STL based histogram
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <map>
 5 | #include <queue>
 6 | #include "hist.h"
 7 | 
 8 | using namespace std;
 9 | 
10 | extern "C" { 
11 | 
12 | typedef map<uint64_t, uint64_t> hist_type;
13 | 
14 | struct hist {
15 | 	hist_type hist;
16 | 	uint64_t total;
17 | };
18 | 
19 | hist *init_hist()
20 | {
21 | 	struct hist *h = new hist;
22 | 	h->total = 0;
23 | 	return h;
24 | }
25 | 
26 | void hist_add(hist *h, uint64_t val)
27 | {
28 | 	h->hist[val]++;
29 | 	h->total++;
30 | }
31 | 
32 | void hist_print(hist *h, double min_percent) 
33 | {
34 | 	unsigned long long below_thresh = 0;
35 | 	typedef pair<uint64_t, uint64_t> val_pair;
36 | 	priority_queue<val_pair> q;
37 | 
38 | 	for (hist_type::iterator it = h->hist.begin(); it != h->hist.end(); it++) { 
39 | 		double percent = (double)(it->second) / (double)h->total;
40 | 		if (percent >= min_percent) {
41 | 			val_pair p(it->second, it->first);
42 | 			q.push(p);
43 | 		} else
44 | 			below_thresh += it->second;
45 | 	}
46 | 	printf("%11s %16s %16s\n", "PERCENT", "ADDR", "SAMPLES");
47 | 	while (!q.empty()) { 
48 | 		val_pair p = q.top();
49 | 		printf("%10.2f%% %16llx %16llu\n", 
50 | 				(p.first / (double)h->total) * 100.0,
51 | 				(unsigned long long)p.second,
52 | 				(unsigned long long)p.first);
53 | 		q.pop();
54 | 	}
55 | 	printf("%llu below threshold\n", below_thresh);
56 | }
57 | 
58 | void free_hist(hist *h)
59 | {
60 | 	delete h;
61 | }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/jevents/examples/hist.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | #include <stdint.h>
 7 | 
 8 | struct hist;
 9 | 
10 | struct hist *init_hist(void);
11 | void hist_add(struct hist *h, uint64_t);
12 | void hist_print(struct hist *h, double min_percent);
13 | void free_hist(struct hist *);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | 


--------------------------------------------------------------------------------
/jevents/examples/jestat.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015, Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  * All rights reserved.
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without
  7 |  * modification, are permitted provided that the following conditions are met:
  8 |  *
  9 |  * 1. Redistributions of source code must retain the above copyright notice,
 10 |  * this list of conditions and the following disclaimer.
 11 |  *
 12 |  * 2. Redistributions in binary form must reproduce the above copyright
 13 |  * notice, this list of conditions and the following disclaimer in the
 14 |  * documentation and/or other materials provided with the distribution.
 15 |  *
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 19 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 20 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 21 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 23 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 24 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 25 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 26 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 27 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 28 |  */
 29 | 
 30 | /* Poor man's perf stat using jevents */
 31 | /* jstat [-a] [-p pid] [-e events] program */
 32 | /* Supports named events if downloaded first (w/ event_download.py) */
 33 | /* Run listevents to show the available events */
 34 | 
 35 | #include <stdio.h>
 36 | #include <unistd.h>
 37 | #include <stdlib.h>
 38 | #include <stdint.h>
 39 | #include <string.h>
 40 | #include <getopt.h>
 41 | #include <signal.h>
 42 | #include <locale.h>
 43 | #include <sys/wait.h>
 44 | #include <sys/fcntl.h>
 45 | #include "jevents.h"
 46 | #include "jsession.h"
 47 | 
 48 | #define err(x) perror(x), exit(1)
 49 | #define PAIR(x) x, sizeof(x) - 1
 50 | 
 51 | void print_data(struct eventlist *el)
 52 | {
 53 | 	struct event *e;
 54 | 	int i;
 55 | 
 56 | 	for (e = el->eventlist; e; e = e->next) {
 57 | 		uint64_t v = 0;
 58 | 		for (i = 0; i < el->num_cpus; i++)
 59 | 			v += event_scaled_value(e, i);
 60 | 		printf("%-30s %'10lu\n", e->event, v);
 61 | 	}
 62 | }
 63 | 
 64 | static struct option opts[] = {
 65 | 	{ "all-cpus", no_argument, 0, 'a' },
 66 | 	{ "events", required_argument, 0, 'e'},
 67 | 	{},
 68 | };
 69 | 
 70 | void usage(void)
 71 | {
 72 | 	fprintf(stderr, "Usage: jstat [-a] [-e events] program\n"
 73 | 			"--all -a  Measure global system\n"
 74 | 			"-e --events list  Comma separate list of events to measure. Use {} for groups\n"
 75 | 			"Run event_download.py once first to use symbolic events\n");
 76 | 	exit(1);
 77 | }
 78 | 
 79 | void sigint(int sig) {}
 80 | 
 81 | int main(int ac, char **av)
 82 | {
 83 | 	char *events = "instructions,cpu-cycles,cache-misses,cache-references";
 84 | 	int opt;
 85 | 	int child_pipe[2];
 86 | 	struct eventlist *el;
 87 | 	bool measure_all = false;
 88 | 	int measure_pid = -1;
 89 | 	int child_pid;
 90 | 
 91 | 	setlocale(LC_NUMERIC, "");
 92 | 	el = alloc_eventlist();
 93 | 
 94 | 	while ((opt = getopt_long(ac, av, "ae:p:", opts, NULL)) != -1) {
 95 | 		switch (opt) {
 96 | 		case 'e':
 97 | 			if (parse_events(el, optarg) < 0)
 98 | 				exit(1);
 99 | 			events = NULL;
100 | 			break;
101 | 		case 'a':
102 | 			measure_all = true;
103 | 			break;
104 | 		default:
105 | 			usage();
106 | 		}
107 | 	}
108 | 	if (av[optind] == NULL && !measure_all) {
109 | 		fprintf(stderr, "Specify command or -a\n");
110 | 		exit(1);
111 | 	}
112 | 	if (events && parse_events(el, events) < 0)
113 | 		exit(1);
114 | 	pipe(child_pipe);
115 | 	signal(SIGCHLD, SIG_IGN);
116 | 	child_pid = measure_pid = fork();
117 | 	if (measure_pid < 0)
118 | 		err("fork");
119 | 	if (measure_pid == 0) {
120 | 		char buf;
121 | 		/* Wait for events to be set up */
122 | 		read(child_pipe[0], &buf, 1);
123 | 		if (av[optind] == NULL) {
124 | 			pause();
125 | 			_exit(0);
126 | 		}
127 | 		execvp(av[optind], av + optind);
128 | 		write(2, PAIR("Cannot execute program\n"));
129 | 		_exit(1);
130 | 	}
131 | 	if (setup_events(el, measure_all, measure_pid) < 0)
132 | 		exit(1);
133 | 	signal(SIGINT, sigint);
134 | 	if (child_pid >= 0) {
135 | 		write(child_pipe[1], "x", 1);
136 | 		waitpid(measure_pid, NULL, 0);
137 | 	} else {
138 | 		pause();
139 | 	}
140 | 	read_all_events(el);
141 | 	print_data(el);
142 | 	return 0;
143 | }
144 | 


--------------------------------------------------------------------------------
/jevents/examples/rtest.c:
--------------------------------------------------------------------------------
 1 | /* Demonstrate self profiling for context switches */
 2 | #include <sys/time.h>
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include "rdpmc.h"
 6 | 
 7 | #define HW_INTERRUPTS 0x1cb
 8 | 
 9 | typedef unsigned long long u64;
10 | 
11 | u64 get_time(void)
12 | {
13 | 	struct timeval tv;
14 | 	gettimeofday(&tv, NULL);
15 | 	return (u64)tv.tv_sec * 1000000 + tv.tv_usec;
16 | }
17 | 
18 | int main(int ac, char **av)
19 | {
20 | 	int i;
21 | 	int cswitch = 0;
22 | 	struct rdpmc_ctx ctx;
23 | 	int iter = 10000;
24 | 
25 | 	if (av[1])
26 | 		iter = atoi(av[1]);
27 | 	
28 | 	if (rdpmc_open(HW_INTERRUPTS, &ctx) < 0)
29 | 		exit(1);
30 | 
31 | 	u64 t0 = get_time();
32 | 	u64 prev = rdpmc_read(&ctx);
33 | 	for (i = 0; i < iter; i++) {
34 | 		u64 n = rdpmc_read(&ctx);
35 | 		if (n != prev) {
36 | 			cswitch++;
37 | 			prev = n;
38 | 		}
39 | 	}
40 | 			
41 | 	u64 t1 = get_time();
42 | 	
43 | 	printf("%d interrupts, %llu usec duration\n", cswitch, t1-t0);
44 | 
45 | 	rdpmc_close(&ctx);
46 | 	return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/jevents/examples/rtest2.c:
--------------------------------------------------------------------------------
 1 | /* Measure a thousand sins */
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <linux/perf_event.h>
 5 | #include <math.h>
 6 | #include "interrupts.h"
 7 | #include "rdpmc.h"
 8 | 
 9 | /* Requires a Intel Sandy or Ivy Bridge CPU for the interrupt test,
10 |    On others it may loop forever, unless you disable the interrupt test.
11 |    This is not a realistic test of real performance because it's too
12 |    predictable for cache and branch predictors,
13 |    see http://halobates.de/blog/p/227 */
14 | 
15 | #define ITER 1000
16 | typedef unsigned long long u64;
17 | 
18 | volatile double var = 10.0;
19 | volatile double var2;
20 | 
21 | int main(void)
22 | {
23 | 	struct rdpmc_ctx ctx;
24 | 	int warmup = 0;
25 | 		
26 | 	if (rdpmc_open(PERF_COUNT_HW_CPU_CYCLES, &ctx) < 0)
27 | 		exit(1);
28 | 	interrupts_init();
29 | 	for (;;) {
30 | 		int i;
31 | 	        u64 start_int;
32 | 		u64 a, b;
33 | 
34 | 		start_int = get_interrupts();		
35 | 		a = rdpmc_read(&ctx);
36 | 		for (i = 0; i < ITER; i++)
37 | 			var2 += sin(var);
38 | 		b = rdpmc_read(&ctx);
39 | 		if (get_interrupts() == start_int && warmup > 0) {
40 | 			printf("%u sin() took %llu cycles avg\n", ITER, (b-a)/ITER);
41 | 			break;
42 | 		}
43 | 		warmup++;
44 | 	}
45 | 	interrupts_exit();	
46 | 	rdpmc_close(&ctx);
47 | 	return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/jevents/examples/rtest3.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <sys/time.h>
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <signal.h>
 6 | #include "rdpmc.h"
 7 | 
 8 | typedef unsigned long long u64;
 9 | typedef long long s64;
10 | 
11 | u64 get_time(void)
12 | {
13 | 	struct timeval tv;
14 | 	gettimeofday(&tv, NULL);
15 | 	return (u64)tv.tv_sec * 1000000 + tv.tv_usec;
16 | }
17 | 
18 | volatile int interrupted;
19 | 
20 | void stop(int sig)
21 | {
22 | 	interrupted = 1;
23 | }
24 | 
25 | int main(int ac, char **av)
26 | {
27 | 	int i;
28 | 	struct rdpmc_ctx ctx;
29 | 	int thresh = 10000;
30 | 
31 | 	if (av[1])
32 | 		thresh = atoi(av[1]);
33 | 	
34 | 	if (rdpmc_open(0, &ctx) < 0)
35 | 		exit(1);
36 | 
37 | 	signal(SIGINT, stop);
38 | 
39 | 	printf("Press Ctrl-C to stop\n");
40 | 
41 | 	u64 prev = rdpmc_read(&ctx);
42 | 
43 | 	i = 0;
44 | 	while (!interrupted) { 
45 | 		u64 next = rdpmc_read(&ctx);
46 | 		s64 delta = next - prev;
47 | 
48 | 		if (delta > thresh)
49 | 			printf("%d: %lld\n", i, delta);
50 | 
51 | 		prev = next;
52 | 		i++;
53 | 	}
54 | 			
55 | 	rdpmc_close(&ctx);
56 | 	return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/jevents/interrupts.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2012,2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | /** DOC: Account for interrupts on Intel Core/Xeon systems
18 |  *
19 |  * This is useful for micro benchmarks to filter out measurement
20 |  * samples that are disturbed by a context switch caused by OS
21 |  * noise.
22 |  *
23 |  * Requires a Linux 3.3+ kernel
24 |  */
25 | #include "rdpmc.h"
26 | #include "interrupts.h"
27 | 
28 | /* Intel Sandy Bridge */
29 | #define HW_INTERRUPTS 0x1cb
30 | 
31 | static __thread int int_ok = -1;
32 | static __thread struct rdpmc_ctx int_ctx;
33 | 
34 | /**
35 |  * interrupts_init - Initialize interrupt counter per thread
36 |  *
37 |  * Must be called for each application thread.
38 |  */
39 | void interrupts_init(void)
40 | {
41 | 	int_ok = rdpmc_open(HW_INTERRUPTS, &int_ctx);
42 | }
43 | 
44 | /**
45 |  * interrupts_exit - Free interrupt counter per thread.
46 |  *
47 |  * Must be called for each application thread.
48 |  */
49 | void interrupts_exit(void)
50 | {
51 | 	if (int_ok >= 0)
52 | 		rdpmc_close(&int_ctx);
53 | }
54 | 
55 | /**
56 |  * get_interrupts - get current interrupt counter.
57 |  *
58 |  * Get the current hardware interrupt count. When the number changed
59 |  * for a measurement period you had some sort of context switch.
60 |  * The sample for this period should be discarded.
61 |  * This returns absolute numbers.
62 |  */
63 | unsigned long long get_interrupts(void)
64 | {
65 | 	if (int_ok >= 0)
66 | 		return rdpmc_read(&int_ctx);
67 | 	return 0;
68 | }
69 | 


--------------------------------------------------------------------------------
/jevents/interrupts.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2012,2013 Intel Corporation
 4 |  * Author: Andi Kleen
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that: (1) source code distributions
 8 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 9 |  * distributions including binary code include the above copyright notice and
10 |  * this paragraph in its entirety in the documentation or other materials
11 |  * provided with the distribution
12 |  *
13 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
14 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
15 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 |  */
17 | 
18 | #ifndef INTERRUPTS_H
19 | #define INTERRUPTS_H 1
20 | 
21 | #ifdef __cplusplus
22 | extern "C" {
23 | #endif
24 | 
25 | void interrupts_init(void);
26 | void interrupts_exit(void);
27 | unsigned long long get_interrupts(void);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/jevents/jevents-internal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * jevents-internal.h
 3 |  *
 4 |  * Things that jevents internal implementation should call, but that you probably
 5 |  * shouldn't mess with.
 6 |  */
 7 | 
 8 | #ifndef JEVENTS_INTERNAL_H_
 9 | #define JEVENTS_INTERNAL_H_
10 | 
11 | 
12 | void set_last_error(const char *format, ...);
13 | 
14 | 
15 | #endif /* JEVENTS_INTERNAL_H_ */
16 | 


--------------------------------------------------------------------------------
/jevents/jevents.h:
--------------------------------------------------------------------------------
 1 | #ifndef JEVENTS_H
 2 | #define JEVENTS_H 1
 3 | 
 4 | #include <sys/types.h>
 5 | #include <stdbool.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | int json_events(const char *fn,
12 | 		int (*func)(void *data, char *name, char *event, char *desc,
13 | 			    char *pmu),
14 | 		void *data);
15 | char *get_cpu_str(void);
16 | char *get_cpu_str_type(char *type, char **idstr_step);
17 | 
18 | struct perf_event_attr;
19 | 
20 | int jevent_name_to_attr(const char *str, struct perf_event_attr *attr);
21 | int resolve_event(const char *name, struct perf_event_attr *attr);
22 | int read_events(const char *fn);
23 | int walk_events(int (*func)(void *data, char *name, char *event, char *desc),
24 | 		                void *data);
25 | int walk_perf_events(int (*func)(void *data, char *name, char *event, char *desc),
26 | 		     void *data);
27 | char *format_raw_event(struct perf_event_attr *attr, char *name);
28 | int rmap_event(unsigned event, char **name, char **desc);
29 | 
30 | int perf_event_open(struct perf_event_attr *attr, pid_t pid,
31 | 		    int cpu, int group_fd, unsigned long flags);
32 | char *resolve_pmu(int type);
33 | bool jevent_pmu_uncore(const char *str);
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | enum jevents_error {
40 |     JEV_GENERIC_ERROR = -1,
41 |     JEV_NO_PMU_EVENTS_FILE = -2,
42 | 
43 | };
44 | 
45 | /*
46 |  * Returns a string describing the given error_code. Any code in the jevents_error
47 |  * enum is supported, in addition to 0, which returns "Success". Any other code
48 |  * returns "Unknown error".
49 |  */
50 | const char* jevent_error_to_string(int error_code);
51 | 
52 | /*
53 |  * When a function returns an error code, this function may return additional details.
54 |  */
55 | const char* jevent_get_error_details();
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/jevents/jsession.h:
--------------------------------------------------------------------------------
 1 | #ifndef JSESSION_H
 2 | #define JSESSION_H 1
 3 | 
 4 | #include <linux/perf_event.h>
 5 | #include <stdbool.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | struct event {
12 | 	struct event *next;
13 | 	struct perf_event_attr attr;
14 | 	char *event;
15 | 	bool end_group, group_leader;
16 | 	bool uncore;
17 | 	struct efd {
18 | 		int fd;
19 | 		uint64_t val[3];
20 | 	} efd[0]; /* num_cpus */
21 | };
22 | 
23 | struct eventlist {
24 | 	struct event *eventlist;
25 | 	struct event *eventlist_last;
26 | 	int num_cpus;
27 | };
28 | 
29 | int parse_events(struct eventlist *el, char *events);
30 | int setup_events(struct eventlist *el, bool measure_all, int measure_pid);
31 | int setup_event(struct event *e, int cpu, struct event *leader, bool measure_all, int measure_pid);
32 | int read_event(struct event *e, int cpu);
33 | int read_all_events(struct eventlist *el);
34 | struct eventlist *alloc_eventlist(void);
35 | uint64_t event_scaled_value(struct event *e, int cpu);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/jevents/jsmn.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2010 Serge A. Zaitsev
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to deal
  6 |  * in the Software without restriction, including without limitation the rights
  7 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 |  * copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 |  * THE SOFTWARE.
 21 |  *
 22 |  * Slightly modified by AK to not assume 0 terminated input.
 23 |  */
 24 | 
 25 | #include <stdlib.h>
 26 | #include "jsmn.h"
 27 | 
 28 | /*
 29 |  * Allocates a fresh unused token from the token pull.
 30 |  */
 31 | static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser,
 32 | 				   jsmntok_t *tokens, size_t num_tokens)
 33 | {
 34 | 	jsmntok_t *tok;
 35 | 
 36 | 	if ((unsigned)parser->toknext >= num_tokens)
 37 | 		return NULL;
 38 | 	tok = &tokens[parser->toknext++];
 39 | 	tok->start = tok->end = -1;
 40 | 	tok->size = 0;
 41 | 	return tok;
 42 | }
 43 | 
 44 | /*
 45 |  * Fills token type and boundaries.
 46 |  */
 47 | static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type,
 48 | 			    int start, int end)
 49 | {
 50 | 	token->type = type;
 51 | 	token->start = start;
 52 | 	token->end = end;
 53 | 	token->size = 0;
 54 | }
 55 | 
 56 | /*
 57 |  * Fills next available token with JSON primitive.
 58 |  */
 59 | static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js,
 60 | 				      size_t len,
 61 | 				      jsmntok_t *tokens, size_t num_tokens)
 62 | {
 63 | 	jsmntok_t *token;
 64 | 	int start;
 65 | 
 66 | 	start = parser->pos;
 67 | 
 68 | 	for (; parser->pos < len; parser->pos++) {
 69 | 		switch (js[parser->pos]) {
 70 | #ifndef JSMN_STRICT
 71 | 		/*
 72 | 		 * In strict mode primitive must be followed by ","
 73 | 		 * or "}" or "]"
 74 | 		 */
 75 | 		case ':':
 76 | #endif
 77 | 		case '\t':
 78 | 		case '\r':
 79 | 		case '\n':
 80 | 		case ' ':
 81 | 		case ',':
 82 | 		case ']':
 83 | 		case '}':
 84 | 			goto found;
 85 | 		default:
 86 | 			break;
 87 | 		}
 88 | 		if (js[parser->pos] < 32 || js[parser->pos] >= 127) {
 89 | 			parser->pos = start;
 90 | 			return JSMN_ERROR_INVAL;
 91 | 		}
 92 | 	}
 93 | #ifdef JSMN_STRICT
 94 | 	/*
 95 | 	 * In strict mode primitive must be followed by a
 96 | 	 * comma/object/array.
 97 | 	 */
 98 | 	parser->pos = start;
 99 | 	return JSMN_ERROR_PART;
100 | #endif
101 | 
102 | found:
103 | 	token = jsmn_alloc_token(parser, tokens, num_tokens);
104 | 	if (token == NULL) {
105 | 		parser->pos = start;
106 | 		return JSMN_ERROR_NOMEM;
107 | 	}
108 | 	jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
109 | 	parser->pos--;
110 | 	return JSMN_SUCCESS;
111 | }
112 | 
113 | /*
114 |  * Fills next token with JSON string.
115 |  */
116 | static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js,
117 | 				   size_t len,
118 | 				   jsmntok_t *tokens, size_t num_tokens)
119 | {
120 | 	jsmntok_t *token;
121 | 	int start = parser->pos;
122 | 
123 | 	parser->pos++;
124 | 
125 | 	/* Skip starting quote */
126 | 	for (; parser->pos < len; parser->pos++) {
127 | 		char c = js[parser->pos];
128 | 
129 | 		/* Quote: end of string */
130 | 		if (c == '\"') {
131 | 			token = jsmn_alloc_token(parser, tokens, num_tokens);
132 | 			if (token == NULL) {
133 | 				parser->pos = start;
134 | 				return JSMN_ERROR_NOMEM;
135 | 			}
136 | 			jsmn_fill_token(token, JSMN_STRING, start+1,
137 | 					parser->pos);
138 | 			return JSMN_SUCCESS;
139 | 		}
140 | 
141 | 		/* Backslash: Quoted symbol expected */
142 | 		if (c == '\\') {
143 | 			parser->pos++;
144 | 			switch (js[parser->pos]) {
145 | 				/* Allowed escaped symbols */
146 | 			case '\"':
147 | 			case '/':
148 | 			case '\\':
149 | 			case 'b':
150 | 			case 'f':
151 | 			case 'r':
152 | 			case 'n':
153 | 			case 't':
154 | 				break;
155 | 				/* Allows escaped symbol \uXXXX */
156 | 			case 'u':
157 | 				/* TODO */
158 | 				break;
159 | 				/* Unexpected symbol */
160 | 			default:
161 | 				parser->pos = start;
162 | 				return JSMN_ERROR_INVAL;
163 | 			}
164 | 		}
165 | 	}
166 | 	parser->pos = start;
167 | 	return JSMN_ERROR_PART;
168 | }
169 | 
170 | /*
171 |  * Parse JSON string and fill tokens.
172 |  */
173 | jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
174 | 		     jsmntok_t *tokens,
175 | 		     unsigned int num_tokens)
176 | {
177 | 	jsmnerr_t r;
178 | 	int i;
179 | 	jsmntok_t *token;
180 | 
181 | 	for (; parser->pos < len; parser->pos++) {
182 | 		char c;
183 | 		jsmntype_t type;
184 | 
185 | 		c = js[parser->pos];
186 | 		switch (c) {
187 | 		case '{':
188 | 		case '[':
189 | 			token = jsmn_alloc_token(parser, tokens, num_tokens);
190 | 			if (token == NULL)
191 | 				return JSMN_ERROR_NOMEM;
192 | 			if (parser->toksuper != -1)
193 | 				tokens[parser->toksuper].size++;
194 | 			token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
195 | 			token->start = parser->pos;
196 | 			parser->toksuper = parser->toknext - 1;
197 | 			break;
198 | 		case '}':
199 | 		case ']':
200 | 			type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
201 | 			for (i = parser->toknext - 1; i >= 0; i--) {
202 | 				token = &tokens[i];
203 | 				if (token->start != -1 && token->end == -1) {
204 | 					if (token->type != type)
205 | 						return JSMN_ERROR_INVAL;
206 | 					parser->toksuper = -1;
207 | 					token->end = parser->pos + 1;
208 | 					break;
209 | 				}
210 | 			}
211 | 			/* Error if unmatched closing bracket */
212 | 			if (i == -1)
213 | 				return JSMN_ERROR_INVAL;
214 | 			for (; i >= 0; i--) {
215 | 				token = &tokens[i];
216 | 				if (token->start != -1 && token->end == -1) {
217 | 					parser->toksuper = i;
218 | 					break;
219 | 				}
220 | 			}
221 | 			break;
222 | 		case '\"':
223 | 			r = jsmn_parse_string(parser, js, len, tokens,
224 | 					      num_tokens);
225 | 			if (r < 0)
226 | 				return r;
227 | 			if (parser->toksuper != -1)
228 | 				tokens[parser->toksuper].size++;
229 | 			break;
230 | 		case '\t':
231 | 		case '\r':
232 | 		case '\n':
233 | 		case ':':
234 | 		case ',':
235 | 		case ' ':
236 | 			break;
237 | #ifdef JSMN_STRICT
238 | 			/*
239 | 			 * In strict mode primitives are:
240 | 			 * numbers and booleans.
241 | 			 */
242 | 		case '-':
243 | 		case '0':
244 | 		case '1':
245 | 		case '2':
246 | 		case '3':
247 | 		case '4':
248 | 		case '5':
249 | 		case '6':
250 | 		case '7':
251 | 		case '8':
252 | 		case '9':
253 | 		case 't':
254 | 		case 'f':
255 | 		case 'n':
256 | #else
257 | 			/*
258 | 			 * In non-strict mode every unquoted value
259 | 			 * is a primitive.
260 | 			 */
261 | 		default:
262 | #endif
263 | 			r = jsmn_parse_primitive(parser, js, len, tokens,
264 | 						 num_tokens);
265 | 			if (r < 0)
266 | 				return r;
267 | 			if (parser->toksuper != -1)
268 | 				tokens[parser->toksuper].size++;
269 | 			break;
270 | 
271 | #ifdef JSMN_STRICT
272 | 			/* Unexpected char in strict mode */
273 | 		default:
274 | 			return JSMN_ERROR_INVAL;
275 | #endif
276 | 		}
277 | 	}
278 | 
279 | 	for (i = parser->toknext - 1; i >= 0; i--) {
280 | 		/* Unmatched opened object or array */
281 | 		if (tokens[i].start != -1 && tokens[i].end == -1)
282 | 			return JSMN_ERROR_PART;
283 | 	}
284 | 
285 | 	return JSMN_SUCCESS;
286 | }
287 | 
288 | /*
289 |  * Creates a new parser based over a given  buffer with an array of tokens
290 |  * available.
291 |  */
292 | void jsmn_init(jsmn_parser *parser)
293 | {
294 | 	parser->pos = 0;
295 | 	parser->toknext = 0;
296 | 	parser->toksuper = -1;
297 | }
298 | 


--------------------------------------------------------------------------------
/jevents/jsmn.h:
--------------------------------------------------------------------------------
 1 | #ifndef __JSMN_H_
 2 | #define __JSMN_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | /*
 9 |  * JSON type identifier. Basic types are:
10 |  *	o Object
11 |  *	o Array
12 |  *	o String
13 |  *	o Other primitive: number, boolean (true/false) or null
14 |  */
15 | typedef enum {
16 | 	JSMN_PRIMITIVE = 0,
17 | 	JSMN_OBJECT = 1,
18 | 	JSMN_ARRAY = 2,
19 | 	JSMN_STRING = 3
20 | } jsmntype_t;
21 | 
22 | typedef enum {
23 | 	/* Not enough tokens were provided */
24 | 	JSMN_ERROR_NOMEM = -1,
25 | 	/* Invalid character inside JSON string */
26 | 	JSMN_ERROR_INVAL = -2,
27 | 	/* The string is not a full JSON packet, more bytes expected */
28 | 	JSMN_ERROR_PART = -3,
29 | 	/* Everything was fine */
30 | 	JSMN_SUCCESS = 0
31 | } jsmnerr_t;
32 | 
33 | /*
34 |  * JSON token description.
35 |  * @param		type	type (object, array, string etc.)
36 |  * @param		start	start position in JSON data string
37 |  * @param		end		end position in JSON data string
38 |  */
39 | typedef struct {
40 | 	jsmntype_t type;
41 | 	int start;
42 | 	int end;
43 | 	int size;
44 | } jsmntok_t;
45 | 
46 | /*
47 |  * JSON parser. Contains an array of token blocks available. Also stores
48 |  * the string being parsed now and current position in that string
49 |  */
50 | typedef struct {
51 | 	unsigned int pos; /* offset in the JSON string */
52 | 	int toknext; /* next token to allocate */
53 | 	int toksuper; /* superior token node, e.g parent object or array */
54 | } jsmn_parser;
55 | 
56 | /*
57 |  * Create JSON parser over an array of tokens
58 |  */
59 | void jsmn_init(jsmn_parser *parser);
60 | 
61 | /*
62 |  * Run JSON parser. It parses a JSON data string into and array of tokens,
63 |  * each describing a single JSON object.
64 |  */
65 | jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js,
66 | 		     size_t len,
67 | 		     jsmntok_t *tokens, unsigned int num_tokens);
68 | 
69 | #ifdef __cplusplus
70 | }
71 | #endif
72 | 
73 | #endif /* __JSMN_H_ */
74 | 


--------------------------------------------------------------------------------
/jevents/json.c:
--------------------------------------------------------------------------------
  1 | /* Parse JSON files using the JSMN parser. */
  2 | 
  3 | /*
  4 |  * Copyright (c) 2014, Intel Corporation
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *
 10 |  * 1. Redistributions of source code must retain the above copyright notice,
 11 |  * this list of conditions and the following disclaimer.
 12 |  *
 13 |  * 2. Redistributions in binary form must reproduce the above copyright
 14 |  * notice, this list of conditions and the following disclaimer in the
 15 |  * documentation and/or other materials provided with the distribution.
 16 |  *
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | */
 30 | 
 31 | #include <stdlib.h>
 32 | #include <string.h>
 33 | #include <sys/mman.h>
 34 | #include <sys/stat.h>
 35 | #include <sys/fcntl.h>
 36 | #include <stdio.h>
 37 | #include <unistd.h>
 38 | #include <linux/kernel.h>
 39 | #include "jsmn.h"
 40 | #include "json.h"
 41 | #include "jevents-internal.h"
 42 | 
 43 | static char *mapfile(const char *fn, size_t *size)
 44 | {
 45 | 	struct stat st;
 46 | 	char *map = NULL;
 47 | 	int err;
 48 | 	int fd = open(fn, O_RDONLY);
 49 | 
 50 | 	if (fd < 0)
 51 | 		return NULL;
 52 | 	err = fstat(fd, &st);
 53 | 	if (err < 0)
 54 | 		goto out;
 55 | 	*size = st.st_size;
 56 | 	map = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
 57 | 	if (map == (char *)MAP_FAILED)
 58 | 		map = NULL;
 59 | out:
 60 | 	close(fd);
 61 | 	return map;
 62 | }
 63 | 
 64 | static void unmapfile(char *map, size_t size)
 65 | {
 66 | 	munmap(map, size);
 67 | }
 68 | 
 69 | /*
 70 |  * Parse json file using jsmn. Return array of tokens,
 71 |  * and mapped file. Caller needs to free array.
 72 |  */
 73 | jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len)
 74 | {
 75 | 	jsmn_parser parser;
 76 | 	jsmntok_t *tokens;
 77 | 	jsmnerr_t res;
 78 | 	unsigned sz;
 79 | 
 80 | 	*map = mapfile(fn, size);
 81 | 	if (!*map) {
 82 | 	    set_last_error("Failed to open event file %s", fn);
 83 | 		return NULL;
 84 | 	}
 85 | 	/* Heuristic */
 86 | 	sz = *size * 16;
 87 | 	tokens = malloc(sz);
 88 | 	if (!tokens) {
 89 | 	    set_last_error("malloc failed");
 90 | 		goto error;
 91 | 	}
 92 | 	jsmn_init(&parser);
 93 | 	res = jsmn_parse(&parser, *map, *size, tokens,
 94 | 			 sz / sizeof(jsmntok_t));
 95 | 	if (res != JSMN_SUCCESS) {
 96 | 	    set_last_error("Failed while parsing event file %s, error: %d", fn, res);
 97 | 		fprintf(stderr, "%s: json error %d\n", fn, res);
 98 | 		goto error_free;
 99 | 	}
100 | 	if (len)
101 | 		*len = parser.toknext;
102 | 	return tokens;
103 | error_free:
104 | 	free(tokens);
105 | error:
106 | 	unmapfile(*map, *size);
107 | 	return NULL;
108 | }
109 | 
110 | void free_json(char *map, size_t size, jsmntok_t *tokens)
111 | {
112 | 	free(tokens);
113 | 	unmapfile(map, size);
114 | }
115 | 
116 | static int countchar(char *map, char c, int end)
117 | {
118 | 	int i;
119 | 	int count = 0;
120 | 	for (i = 0; i < end; i++)
121 | 		if (map[i] == c)
122 | 			count++;
123 | 	return count;
124 | }
125 | 
126 | /* Return line number of a jsmn token */
127 | int json_line(char *map, jsmntok_t *t)
128 | {
129 | 	return countchar(map, '\n', t->start) + 1;
130 | }
131 | 
132 | static const char *jsmn_types[] = {
133 | 	[JSMN_PRIMITIVE] = "primitive",
134 | 	[JSMN_ARRAY] = "array",
135 | 	[JSMN_OBJECT] = "object",
136 | 	[JSMN_STRING] = "string"
137 | };
138 | 
139 | #define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?")
140 | 
141 | /* Return type name of a jsmn token */
142 | const char *json_name(jsmntok_t *t)
143 | {
144 | 	return LOOKUP(jsmn_types, t->type);
145 | }
146 | 
147 | int json_len(jsmntok_t *t)
148 | {
149 | 	return t->end - t->start;
150 | }
151 | 
152 | /* Is string t equal to s? */
153 | int json_streq(char *map, jsmntok_t *t, const char *s)
154 | {
155 | 	unsigned len = t->end - t->start;
156 | 	return len == strlen(s) && !strncasecmp(map + t->start, s, len);
157 | }
158 | 


--------------------------------------------------------------------------------
/jevents/json.h:
--------------------------------------------------------------------------------
 1 | #ifndef JSON_H
 2 | #define JSON_H 1
 3 | 
 4 | #include "jsmn.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len);
11 | void free_json(char *map, size_t size, jsmntok_t *tokens);
12 | int json_line(char *map, jsmntok_t *t);
13 | const char *json_name(jsmntok_t *t);
14 | int json_streq(char *map, jsmntok_t *t, const char *s);
15 | int json_len(jsmntok_t *t);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/jevents/libjevents.spec:
--------------------------------------------------------------------------------
 1 | Name:		libjevents
 2 | Version:	1
 3 | Release:	1%{?dist}
 4 | Summary:	libjevents shared library from pmu-tools
 5 | 
 6 | License:	BSD
 7 | URL:		https://github.com/andikleen/pmu-tools/jevents
 8 | # git clone https://github.com/andikleen/pmu-tools.git pmu-tools
 9 | # cd pmu-tools && tar czf jevents.tar.gz jevents/
10 | Source0:	jevents.tar.gz
11 | 
12 | %description
13 | jevents library from pmu-tools.
14 | 
15 | %prep
16 | %setup -q -n jevents
17 | 
18 | 
19 | %build
20 | %make_build PREFIX=%{buildroot}/usr
21 | 
22 | %install
23 | %make_install PREFIX=%{buildroot}/usr
24 | 
25 | %files
26 | /usr/bin/event-rmap
27 | /usr/bin/listevents
28 | /usr/bin/showevent
29 | /usr/include/*
30 | /usr/lib64/libjevents.a
31 | 
32 | %changelog
33 | 
34 | * Sat Mar 3 2018 Pablo Llopis <pablo.llopis@gmail.com> 1-1
35 | - Initial specfile version
36 | 


--------------------------------------------------------------------------------
/jevents/listevents.c:
--------------------------------------------------------------------------------
 1 | /* List all events */
 2 | /* -v print descriptions */
 3 | /* pattern  print only events matching shell pattern */
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <fnmatch.h>
 8 | #include <assert.h>
 9 | #include "jevents.h"
10 | 
11 | int verbose = 0;
12 | 
13 | struct event {
14 | 	char *name;
15 | 	char *event;
16 | 	char *desc;
17 | };
18 | 
19 | struct walk_data {
20 | 	int count;
21 | 	int ind;
22 | 	char *match;
23 | 	struct event *events;
24 | };
25 | 
26 | static int count_event(void *data, char *name, char *event, char *desc)
27 | {
28 | 	struct walk_data *wd = data;
29 | 	if (wd->match && fnmatch(wd->match, name, 0))
30 | 		return 0;
31 | 	wd->count++;
32 | 	return 0;
33 | }
34 | 
35 | static int store_event(void *data, char *name, char *event, char *desc)
36 | {
37 | 	struct walk_data *wd = data;
38 | 
39 | 	if (wd->match && fnmatch(wd->match, name, 0))
40 | 		return 0;
41 | 	assert(wd->ind < wd->count);
42 | 	struct event *e = &wd->events[wd->ind++];
43 | 	e->name = strdup(name);
44 | 	e->event = strdup(event);
45 | 	e->desc = strdup(desc);
46 | 	return 0;
47 | }
48 | 
49 | static int cmp_events(const void *ap, const void *bp)
50 | {
51 | 	const struct event *a = ap;
52 | 	const struct event *b = bp;
53 | 	return strcmp(a->name, b->name);
54 | }
55 | 
56 | int main(int ac, char **av)
57 | {
58 | 	if (av[1] && !strcmp(av[1], "-v")) {
59 | 		av++;
60 | 		verbose = 1;
61 | 	}
62 | 
63 | 	read_events(NULL);
64 | 	struct walk_data wd = { .match = av[1] };
65 | 	walk_events(count_event, &wd);
66 | 	walk_perf_events(count_event, &wd);
67 | 	wd.events = calloc(sizeof(struct event), wd.count);
68 | 	walk_events(store_event, &wd);
69 | 	walk_perf_events(store_event, &wd);
70 | 	qsort(wd.events, wd.count, sizeof(struct event), cmp_events);
71 | 	int i;
72 | 	for (i = 0; i < wd.count; i++) {
73 | 		struct event *e = &wd.events[i];
74 | 		printf("%-40s ", e->name);
75 | 		printf("%s\n", e->event);
76 | 		if (verbose && e->desc[0])
77 | 			printf("\t%s\n", e->desc); /* XXX word wrap */
78 | 	}
79 | 	return 0;
80 | }
81 | 


--------------------------------------------------------------------------------
/jevents/measure.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2012,2013 Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that: (1) source code distributions
  7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
  8 |  * distributions including binary code include the above copyright notice and
  9 |  * this paragraph in its entirety in the documentation or other materials
 10 |  * provided with the distribution
 11 |  *
 12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 15 |  */
 16 | 
 17 | #include <stdlib.h>
 18 | #include <pthread.h>
 19 | #include <string.h>
 20 | #include <stdio.h>
 21 | #include "measure.h"
 22 | #include "rdpmc.h"
 23 | 
 24 | /**
 25 |  * DOC: Measuring of predefined counter groups in a process
 26 |  *
 27 |  * Higher level interface to measure CPU performance counters in process
 28 |  * context. The program calls the appropiate functions around 
 29 |  * code that should be measured in individual thread.
 30 |  *
 31 |  * The data is accumulated globally and printed
 32 |  */
 33 | 
 34 | struct res {
 35 | 	struct res *next;
 36 | 	unsigned long long start[N_COUNTER];
 37 | 	unsigned long long count[N_COUNTER];
 38 | 	char *name;
 39 | 	struct measure *measure;
 40 | };
 41 | 
 42 | static struct res *all_res;
 43 | static pthread_mutex_t all_res_lock = PTHREAD_MUTEX_INITIALIZER;
 44 | static __thread struct rdpmc_ctx ctx[N_COUNTER];
 45 | static __thread struct res *cur_res;
 46 | 
 47 | static struct res *alloc_res(char *name, struct measure *measure)
 48 | {
 49 | 	struct res *r = calloc(sizeof(struct res), 1);
 50 | 	if (!name)
 51 | 		name = "";
 52 | 	r->name = strdup(name);
 53 | 	r->measure = measure;
 54 | 	pthread_mutex_lock(&all_res_lock);
 55 | 	r->next = all_res;
 56 | 	all_res = r;
 57 | 	pthread_mutex_unlock(&all_res_lock);
 58 | 	return r;
 59 | }
 60 | 
 61 | /**
 62 |  * measure_group_init - Initialize a measurement group
 63 |  * @g: measurement group (usually predefined)
 64 |  * @name: name of measurements or NULL
 65 |  *
 66 |  * Initialize a measurement group and allocate the counters.
 67 |  * All measurements with the same name are printed together (so multiple
 68 |  * names can be used to measure different parts of the program)
 69 |  * Exits when the counters cannot be allocated.
 70 |  * Has to be freed in the same thread with measure_group_finish()
 71 |  * Only one measurement group per thread can be active at a time.
 72 |  */
 73 | void measure_group_init(struct measure *g, char *name)
 74 | {
 75 | 	struct res *r = alloc_res(name, g);
 76 | 	cur_res = r;
 77 | 
 78 | 	int i;
 79 | 	struct rdpmc_ctx *leader = NULL;
 80 | 	for (i = 0; i < N_COUNTER; i++) {
 81 | 		struct perf_event_attr attr = {
 82 | 			.type = PERF_TYPE_RAW,
 83 | 			.size = sizeof(struct perf_event_attr),
 84 | 			.config = g[i].counter,
 85 | 			.sample_type = PERF_SAMPLE_READ,
 86 | 			.exclude_kernel = 1,
 87 | 		};
 88 | 		if (rdpmc_open_attr(&attr, &ctx[i], leader) < 0)
 89 | 			exit(1);
 90 | 		if (!leader)
 91 | 			leader = &ctx[i];
 92 | 	}
 93 | }
 94 | 
 95 | /**	
 96 |  * measure_group_start - Start measuring in a measurement group.
 97 |  * 
 98 |  * Start a measurement period for the current group in this thread.
 99 |  * Multiple measurement periods are accumulated.
100 |  */
101 | void measure_group_start(void)
102 | {
103 | 	int i;
104 | 	for (i = 0; i < N_COUNTER; i++)
105 | 		cur_res->start[i] = rdpmc_read(&ctx[i]);
106 | }
107 | 
108 | /**
109 |  * measure_group_stop - Stop measuring a measurement group
110 |  *
111 |  * Stop the measurement for the current measurement group.
112 |  */
113 | void measure_group_stop(void)
114 | {
115 | 	unsigned long long end[N_COUNTER];
116 | 	int i;
117 | 	for (i = 0; i < N_COUNTER; i++)
118 | 		end[i] = rdpmc_read(&ctx[i]);
119 | 	for (i = 0; i < N_COUNTER; i++)
120 | 		cur_res->count[i] += end[i] - cur_res->start[i];
121 | }
122 | 
123 | /**
124 |  * measurement_group_finish - Free the counter resources of a group
125 |  *
126 |  * Has to be called in the thread that executed measure_group_init()
127 |  */
128 | void measure_group_finish(void)
129 | {
130 | 	cur_res = NULL;
131 | 	int i;
132 | 	for (i = 0; i < N_COUNTER; i++)
133 | 		rdpmc_close(&ctx[i]);	      
134 | }
135 | 
136 | static int cmp_res(const void *a, const void *b)
137 | {
138 | 	struct res **ra = (struct res **)a;
139 | 	struct res **rb = (struct res **)b;
140 | 	return strcmp((*ra)->name, (*rb)->name);
141 | }
142 | 
143 | static struct res **sort_results(int *lenp)
144 | {
145 | 	struct res *r;
146 | 	int len = 0;
147 | 	for (r = all_res; r; r = r->next)
148 | 		len++;
149 | 	struct res **sr = malloc(len * sizeof(struct res *));
150 | 	int j = 0;
151 | 	for (r = all_res; r; r = r->next)
152 | 		sr[j++] = r;
153 | 	qsort(sr, len, sizeof(struct res *), cmp_res);
154 | 	*lenp = len;
155 | 	return sr;
156 | }
157 | 
158 | static void print_counters(FILE *fh, struct measure *m, 
159 | 		           unsigned long long total[N_COUNTER])
160 | {
161 | 	int i;
162 | 	for (i = 0; i < N_COUNTER; i++) {
163 | 		if (m[i].name == NULL)
164 | 			continue;
165 | 		if (m[i].func)
166 | 			total[i] = m[i].func(m, total, i);
167 | 		printf("%20s\t%8llu ", m[i].name, total[i]);
168 | 		if (m[i].ratio_to >= 0)
169 | 			printf("(%.2f%%)", 
170 | 			       100.0 * (total[m[i].ratio_to] * (double)total[i]));
171 | 		putchar('\n');
172 | 	}
173 | }
174 | 
175 | /**
176 |  * measure_print_all - Print the accumulated data for all measurement groups
177 |  * @fh:		stdio file descriptor to output data
178 |  */
179 | void measure_print_all(FILE *fh)
180 | {
181 | 	unsigned long long total[N_COUNTER];
182 | 	int len;
183 | 	struct res **sr = sort_results(&len);
184 | 	int i, j;
185 | 
186 | 	for (j = 0; j < len; j++) {
187 | 		if (j == 0 || strcmp(sr[j - 1]->name, sr[j]->name)) {
188 | 			if (j > 0) {
189 | 				printf("%s:\n", sr[j]->name);
190 | 				print_counters(fh, sr[j]->measure, total);
191 | 			}
192 | 			memset(total, 0, sizeof(unsigned long long) * N_COUNTER);
193 | 		}				
194 | 		for (i = 0; i < N_COUNTER; i++)
195 | 			total[i] += sr[j]->count[i];
196 | 	}
197 | 	free(sr);       
198 | }
199 | 
200 | /**
201 |  * measure_free_all - Free the accumulated data from past measurements
202 |  */
203 | void measure_free_all(void)
204 | {
205 | 	struct res *r, *next;
206 | 	for (r = all_res; r; r = next) {
207 | 		next = r->next;
208 | 		free(r);
209 | 	}
210 | 	all_res = NULL;
211 | }
212 | 


--------------------------------------------------------------------------------
/jevents/measure.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2012,2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | 
18 | #ifndef MEASURE_H
19 | #define MEASURE_H 1
20 | 
21 | #include <stdio.h>
22 | 
23 | #ifdef __cplusplus
24 | extern "C" {
25 | #endif
26 | 
27 | #define N_COUNTER 4
28 | 
29 | struct measure {
30 | 	char *name;
31 | 	unsigned long long counter;
32 | 	int ratio_to; /* or -1 */
33 | 	unsigned long long (*func)(struct measure *m, 
34 | 			           unsigned long long total[N_COUNTER], int i);
35 | };
36 | 
37 | #ifdef EVENT_MACROS
38 | #define ETO(x,y) { #x, x, y }
39 | #define ETO0(x) ETO(x, 0)
40 | #define E(x) { #x, x, -1 }
41 | #define EFUNC(x,y, f) { #x, x, y, f }
42 | #endif
43 | 
44 | void measure_group_init(struct measure *g, char *name);
45 | void measure_group_start(void);
46 | void measure_group_stop(void);
47 | void measure_group_finish(void);
48 | void measure_print_all(FILE *fh);
49 | void measure_free_all(void);
50 | 
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/jevents/perf-iter.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013 Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that: (1) source code distributions
  7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
  8 |  * distributions including binary code include the above copyright notice and
  9 |  * this paragraph in its entirety in the documentation or other materials
 10 |  * provided with the distribution
 11 |  *
 12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 15 |  */
 16 | 
 17 | /**
 18 |  * DOC: A simple perf library to manage the perf ring buffer
 19 |  *
 20 |  * This library provides a simple wrapping layer for the perf 
 21 |  * mmap ring buffer. This allows to access perf events in 
 22 |  * zero-copy from a user program.
 23 |  */
 24 | 
 25 | #include <linux/perf_event.h>
 26 | #include <unistd.h>
 27 | #include <string.h>
 28 | #include <sys/mman.h>
 29 | #include <sys/ioctl.h>
 30 | #include "jevents.h"
 31 | 
 32 | #include "util.h"
 33 | #include "perf-iter.h"
 34 | 
 35 | /**
 36 |  * perf_iter_init - Initialize iterator for perf ring buffer
 37 |  * @iter: Iterator to initialize.
 38 |  * @pfd: perf_fd from perf_fd_open() to use with the iterator.
 39 |  *
 40 |  * Needs to be called first to start walking a perf buffer.
 41 |  */
 42 | 
 43 | void perf_iter_init(struct perf_iter *iter, struct perf_fd *pfd)
 44 | {
 45 | 	int pagesize = sysconf(_SC_PAGESIZE);
 46 | 	int page_shift = ffs(pagesize) - 1;
 47 | 
 48 | 	iter->mpage = pfd->mpage;
 49 | 	iter->bufsize = (1ULL << (pfd->buf_size_shift + page_shift));
 50 | 	iter->ring_buffer_mask = iter->bufsize - 1;
 51 | 	iter->cur = iter->mpage->data_tail & iter->ring_buffer_mask;
 52 | 	/* Kernel only changes head */
 53 | 	iter->raw_head = iter->mpage->data_head;
 54 | 	iter->avail = iter->raw_head - iter->mpage->data_tail;
 55 |         iter->head = iter->raw_head & iter->ring_buffer_mask;
 56 | 	mb();
 57 | 	iter->data = (char *)(iter->mpage) + pagesize;
 58 | }
 59 | 
 60 | /**
 61 |  * perf_buffer_read - Access data in perf ring iterator.
 62 |  * @iter: Iterator to copy data from
 63 |  * @buffer: Temporary buffer to use for wrapped events
 64 |  * @bufsize: Size of buffer
 65 |  *
 66 |  * Return the next available perf_event_header in the ring buffer.
 67 |  * This normally does zero copy, but for wrapped events
 68 |  * they are copied into the temporary buffer supplied and a
 69 |  * pointer into that is returned.
 70 |  *
 71 |  * Return: NULL when nothing available, otherwise perf_event_header.
 72 |  */
 73 | 
 74 | struct perf_event_header *perf_buffer_read(struct perf_iter *iter, void *buffer, int bufsize)
 75 | {
 76 | 	struct perf_event_header *hdr = (struct perf_event_header *)(iter->data + iter->cur);
 77 | 	u64 left = iter->bufsize - iter->cur;
 78 | 
 79 | 	if (left >= sizeof(hdr->size) && hdr->size <= left) {
 80 | 		iter->cur += hdr->size;
 81 | 		iter->avail -= hdr->size;
 82 | 		/* Copy less fast path */
 83 | 		return hdr;
 84 | 	} else {
 85 | 		/*
 86 | 		 * Buffer wraps. This case is untested in this example.
 87 | 		 * Assumes hdr->size is always continuous by itself.
 88 | 		 */
 89 | 		if (left) {
 90 | 			if (hdr->size > bufsize)
 91 | 				return NULL;
 92 | 			memcpy(buffer, hdr, left);
 93 | 		} else {
 94 | 			hdr = (struct perf_event_header *)iter->data;
 95 | 			if (hdr->size > bufsize)
 96 | 				return NULL;
 97 | 		}
 98 | 		memcpy(buffer + left, iter->data, hdr->size - left);
 99 | 		iter->cur = hdr->size - left;
100 | 		iter->avail -= hdr->size;
101 | 		return buffer;
102 | 	}
103 | }
104 | 
105 | /**
106 |  * perf_iter_continue - Allow the kernel to log over our data.
107 |  * @iter: Iterator.
108 |  * Tell the kernel we are finished with the data and it can
109 |  * continue logging.
110 |  */
111 | 
112 | void perf_iter_continue(struct perf_iter *iter)
113 | {
114 | 	iter->mpage->data_tail = iter->raw_head;
115 | 	mb();
116 | }
117 | 
118 | static unsigned perf_mmap_size(int buf_size_shift)
119 | {
120 | 	return ((1U << buf_size_shift) + 1) * sysconf(_SC_PAGESIZE);
121 | }
122 | 
123 | /**
124 |  * perf_fd_open - Open a perf event with ring buffer for the current thread 
125 |  * @p: perf_fd to initialize
126 |  * @attr: perf event attribute to use
127 |  * @buf_size_shift: log2 of buffer size.
128 |  * Return: -1 on error, otherwise 0.
129 |  */
130 | int perf_fd_open(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift)
131 | {
132 | 	return perf_fd_open_other(p, attr, buf_size_shift, 0, -1);
133 | }
134 | 
135 | /**
136 |  * perf_fd_open_other - Open a perf event with ring buffer for other thread or cpu
137 |  * @p: perf_fd to initialize
138 |  * @attr: perf event attribute to use
139 |  * @buf_size_shift: log2 of buffer size.
140 |  * @pid: pid/tid to trace, or 0 for current, or -1 for any
141 |  * @cpu: cpu to trace, or -1 for any.
142 |  * Return: -1 on error, otherwise 0.
143 |  */
144 | int perf_fd_open_other(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift,
145 | 		       int pid, int cpu)
146 | {
147 | 	p->pfd = perf_event_open(attr, pid, cpu, -1, 0);
148 | 	if (p->pfd < 0)
149 | 		return -1;
150 | 
151 | 	struct perf_event_mmap_page *mpage;
152 | 	mpage = mmap(NULL,  perf_mmap_size(buf_size_shift),
153 | 		    PROT_READ|PROT_WRITE, MAP_SHARED,
154 | 		   p->pfd, 0);
155 | 	if (mpage == (struct perf_event_mmap_page *)-1L) {
156 | 		close(p->pfd);
157 | 		return -1;
158 | 	}
159 | 	p->mpage = mpage;
160 | 	p->buf_size_shift = buf_size_shift;
161 | 	return 0;
162 | }
163 | 
164 | /**
165 |  * perf_fd_close - Close perf_fd
166 |  * @p: pfd to close.
167 |  */
168 | 
169 | void perf_fd_close(struct perf_fd *p)
170 | {
171 | 	munmap(p->mpage, perf_mmap_size(p->buf_size_shift));
172 | 	close(p->pfd);
173 | 	p->mpage = NULL;
174 | }
175 | 
176 | /**
177 |  * perf_enable - Start perf collection on pfd
178 |  * @p: perf fd
179 |  * Return: -1 for error, otherwise 0.
180 |  */
181 | 
182 | int perf_enable(struct perf_fd *p)
183 | {
184 | 	return ioctl(p->pfd, PERF_EVENT_IOC_ENABLE, 0);
185 | }
186 | 
187 | /**
188 |  * perf_enable - Stop perf collection on pfd
189 |  * @p: perf fd
190 |  * Return: -1 for error, otherwise 0.
191 |  */
192 | int perf_disable(struct perf_fd *p)
193 | {
194 | 	return ioctl(p->pfd, PERF_EVENT_IOC_DISABLE, 0);
195 | }
196 | 


--------------------------------------------------------------------------------
/jevents/perf-iter.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PERF_ITER_H
 2 | #define _PERF_ITER_H 1
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | struct perf_event_mmap_page;
11 | struct perf_event_header;
12 | 
13 | /* Iterator for perf ring buffer */
14 | 
15 | struct perf_iter {
16 | 	uint64_t ring_buffer_mask;
17 | 	uint64_t head, cur, raw_head, bufsize;
18 | 	int64_t avail;
19 | 	char *data;
20 | 	struct perf_event_mmap_page *mpage;
21 | };
22 | 
23 | struct perf_fd { 
24 | 	int pfd;
25 | 	struct perf_event_mmap_page *mpage;
26 | 	int buf_size_shift;
27 | };
28 | 
29 | int perf_fd_open(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift);
30 | int perf_fd_open_other(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift,
31 | 		       int pid, int cpu);
32 | void perf_fd_close(struct perf_fd *p);
33 | void perf_iter_continue(struct perf_iter *iter);
34 | struct perf_event_header *perf_buffer_read(struct perf_iter *iter, void *buffer, int bufsize);
35 | void perf_iter_init(struct perf_iter *iter, struct perf_fd *pfd);
36 | int perf_enable(struct perf_fd *p);
37 | int perf_disable(struct perf_fd *p);
38 | 
39 | static inline int perf_iter_finished(struct perf_iter *iter)
40 | {
41 | 	return iter->avail <= 0;
42 | }
43 | 
44 | static inline uint64_t *perf_hdr_payload(struct perf_event_header *hdr)
45 | {
46 | 	return (uint64_t *)(hdr + 1);
47 | }
48 | 
49 | #ifdef __cplusplus
50 | }
51 | #endif
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/jevents/perf_event_open.c:
--------------------------------------------------------------------------------
 1 | /* Until glibc provides a proper stub ... */
 2 | #include <linux/perf_event.h>
 3 | #include <unistd.h>
 4 | #include <sys/syscall.h>
 5 | 
 6 | /* If someone else has a better one we use that */
 7 | 
 8 | __attribute__((weak))
 9 | int perf_event_open(struct perf_event_attr *attr, pid_t pid,
10 | 		    int cpu, int group_fd, unsigned long flags)
11 | {
12 | 	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
13 | }
14 | 


--------------------------------------------------------------------------------
/jevents/rawevent.c:
--------------------------------------------------------------------------------
 1 | /* Output raw events in perf form. */
 2 | /*
 3 |  * Copyright (c) 2014, Intel Corporation
 4 |  * Author: Andi Kleen
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *
10 |  * 1. Redistributions of source code must retain the above copyright notice,
11 |  * this list of conditions and the following disclaimer.
12 |  *
13 |  * 2. Redistributions in binary form must reproduce the above copyright
14 |  * notice, this list of conditions and the following disclaimer in the
15 |  * documentation and/or other materials provided with the distribution.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
29 | */
30 | 
31 | #include <linux/perf_event.h>
32 | #include <stdio.h>
33 | #include <string.h>
34 | #include <stdlib.h>
35 | #include "jevents.h"
36 | 
37 | #define BUFS 1024
38 | 
39 | /** 
40 |  * format_raw_event - Format a resolved event for perf's command line tool
41 |  * @attr: Previously resolved perf_event_attr.
42 |  * @name: Name to add to the event or NULL.
43 |  * Return a string of the formatted event. The caller must free string.
44 |  */
45 | 
46 | char *format_raw_event(struct perf_event_attr *attr, char *name)
47 | {
48 | 	char buf[BUFS];
49 | 	int off = 0;
50 | 	char *pmu;
51 | 
52 | 	pmu = resolve_pmu(attr->type);
53 | 	if (!pmu)
54 | 		return NULL;
55 | 	off = snprintf(buf, BUFS, "%s/config=%#llx", pmu, attr->config);
56 | 	free(pmu);
57 | 	if (attr->config1)
58 | 		off += sprintf(buf + off, ",config1=%#llx", attr->config1);
59 | 	if (attr->config2)
60 | 		off += sprintf(buf + off, ",config2=%#llx", attr->config2);
61 | 	if (name)
62 | 		off += snprintf(buf + off, BUFS - off, ",name=%s", name);
63 | 	off += snprintf(buf + off, BUFS - off, "/");
64 | 	return strdup(buf);
65 | }
66 | 


--------------------------------------------------------------------------------
/jevents/rdpmc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2012,2013 Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that: (1) source code distributions
  7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
  8 |  * distributions including binary code include the above copyright notice and
  9 |  * this paragraph in its entirety in the documentation or other materials
 10 |  * provided with the distribution
 11 |  *
 12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 15 |  */
 16 | 
 17 | /* Ring 3 RDPMC support */
 18 | #include <unistd.h>
 19 | #include <stdio.h>
 20 | #include <sys/mman.h>
 21 | #include <sys/fcntl.h>
 22 | #include <linux/perf_event.h>
 23 | #include <stdint.h>
 24 | #include <stdlib.h>
 25 | #include "jevents.h"
 26 | 
 27 | #if defined(__ICC) || defined(__INTEL_COMPILER)
 28 | #include "immintrin.h"
 29 | #endif
 30 | 
 31 | /**
 32 |  * DOC: Ring 3 counting for CPU performance counters
 33 |  *
 34 |  * This library allows accessing CPU performance counters from ring 3
 35 |  * using the perf_events subsystem. This is useful to measure specific
 36 |  * parts of programs (e.g. excluding initialization code)
 37 |  *
 38 |  * Requires a Linux 3.3+ kernel
 39 |  */
 40 | 
 41 | #include "rdpmc.h"
 42 | 
 43 | typedef unsigned long long u64;
 44 | 
 45 | #define rmb() asm volatile("" ::: "memory")
 46 | 
 47 | /**
 48 |  * rdpmc_open - initialize a simple ring 3 readable performance counter
 49 |  * @counter: Raw event descriptor (UUEE UU unit mask EE event)
 50 |  * @ctx:     Pointer to struct &rdpmc_ctx that is initialized
 51 |  *
 52 |  * The counter will be set up to count CPU events excluding the kernel.
 53 |  * Must be called for each thread using the counter.
 54 |  * The caller must make sure counter is suitable for the running CPU.
 55 |  * Only works in 3.3+ kernels.
 56 |  * Must be closed with rdpmc_close()
 57 |  */
 58 | 
 59 | int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx)
 60 | {
 61 | 	struct perf_event_attr attr = {
 62 | 		.type = counter > 10 ? PERF_TYPE_RAW : PERF_TYPE_HARDWARE,
 63 | 		.size = PERF_ATTR_SIZE_VER0,
 64 | 		.config = counter,
 65 | 		.sample_type = PERF_SAMPLE_READ,
 66 | 		.exclude_kernel = 1,
 67 | 	};
 68 | 	return rdpmc_open_attr(&attr, ctx, NULL);
 69 | }
 70 | 
 71 | /**
 72 |  * rdpmc_open_attr - initialize a raw ring 3 readable performance counter
 73 |  * @attr: perf struct %perf_event_attr for the counter
 74 |  * @ctx:  Pointer to struct %rdpmc_ctx that is initialized.
 75 |  * @leader_ctx: context of group leader or NULL
 76 |  *
 77 |  * This allows more flexible setup with a custom &perf_event_attr.
 78 |  * For simple uses rdpmc_open() should be used instead.
 79 |  * Must be called for each thread using the counter.
 80 |  * Must be closed with rdpmc_close()
 81 |  */
 82 | int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx,
 83 | 		    struct rdpmc_ctx *leader_ctx)
 84 | {
 85 | 	ctx->fd = perf_event_open(attr, 0, -1,
 86 | 			  leader_ctx ? leader_ctx->fd : -1, 0);
 87 | 	if (ctx->fd < 0) {
 88 | 		perror("perf_event_open");
 89 | 		return -1;
 90 | 	}
 91 | 	ctx->buf = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, MAP_SHARED, ctx->fd, 0);
 92 | 	if (ctx->buf == MAP_FAILED) {
 93 | 		close(ctx->fd);
 94 | 		perror("mmap on perf fd");
 95 | 		return -1;
 96 | 	}
 97 | 	return 0;
 98 | }
 99 | 
100 | /**
101 |  * rdpmc_close - free a ring 3 readable performance counter
102 |  * @ctx: Pointer to &rdpmc_ctx context.
103 |  *
104 |  * Must be called by each thread for each context it initialized.
105 |  */
106 | void rdpmc_close(struct rdpmc_ctx *ctx)
107 | {
108 | 	close(ctx->fd);
109 | 	munmap(ctx->buf, sysconf(_SC_PAGESIZE));
110 | }
111 | 
112 | /**
113 |  * rdpmc_read - read a ring 3 readable performance counter
114 |  * @ctx: Pointer to initialized &rdpmc_ctx structure.
115 |  *
116 |  * Read the current value of a running performance counter.
117 |  * This should only be called from the same thread/process as opened
118 |  * the context. For new threads please create a new context.
119 |  */
120 | unsigned long long rdpmc_read(struct rdpmc_ctx *ctx)
121 | {
122 | 	u64 val;
123 | 	unsigned seq;
124 | 	u64 offset; 
125 | 	typeof (ctx->buf) buf = ctx->buf;
126 | 	unsigned index;
127 | 
128 | 	do {
129 | 		seq = buf->lock;
130 | 		rmb();
131 | 		index = buf->index;
132 | 		offset = buf->offset;
133 | 		if (index == 0) /* rdpmc not allowed */
134 | 			return offset;
135 | #if defined(__ICC) || defined(__INTEL_COMPILER)
136 | 		val = _rdpmc(index - 1);
137 | #else
138 | 		val = __builtin_ia32_rdpmc(index - 1);
139 | #endif
140 | 		rmb();
141 | 	} while (buf->lock != seq);
142 | 	return val + offset;
143 | }
144 | 
145 | 


--------------------------------------------------------------------------------
/jevents/rdpmc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2012,2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | #ifndef RDPMC_H
18 | #define RDPMC_H 1
19 | 
20 | #include <linux/perf_event.h>
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | struct rdpmc_ctx {
27 | 	int fd;
28 | 	struct perf_event_mmap_page *buf;
29 | };
30 | 
31 | int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx);
32 | int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx, 
33 | 		    struct rdpmc_ctx *leader_ctx);
34 | void rdpmc_close(struct rdpmc_ctx *ctx);
35 | unsigned long long rdpmc_read(struct rdpmc_ctx *ctx);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/jevents/resolve.c:
--------------------------------------------------------------------------------
  1 | /* Resolve perf style event descriptions to attr */
  2 | /*
  3 |  * Copyright (c) 2014, Intel Corporation
  4 |  * Author: Andi Kleen
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *
 10 |  * 1. Redistributions of source code must retain the above copyright notice,
 11 |  * this list of conditions and the following disclaimer.
 12 |  *
 13 |  * 2. Redistributions in binary form must reproduce the above copyright
 14 |  * notice, this list of conditions and the following disclaimer in the
 15 |  * documentation and/or other materials provided with the distribution.
 16 |  *
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | */
 30 | 
 31 | #define _GNU_SOURCE 1
 32 | #include "jevents.h"
 33 | #include <linux/perf_event.h>
 34 | #include <stdio.h>
 35 | #include <string.h>
 36 | #include <stdarg.h>
 37 | #include <stdlib.h>
 38 | #include <stdbool.h>
 39 | #include <unistd.h>
 40 | #include <sys/fcntl.h>
 41 | #include <glob.h>
 42 | #include <assert.h>
 43 | 
 44 | #ifndef PERF_ATTR_SIZE_VER1
 45 | #define PERF_ATTR_SIZE_VER1	72
 46 | #endif
 47 | 
 48 | #define MAXFILE 4096
 49 | 
 50 | static int read_file(char **val, const char *fmt, ...)
 51 | {
 52 | 	char *fn;
 53 | 	va_list ap;
 54 | 	int fd;
 55 | 	int ret = -1;
 56 | 	int len;
 57 | 
 58 | 	*val = malloc(MAXFILE);	
 59 | 	va_start(ap, fmt);
 60 | 	vasprintf(&fn, fmt, ap);
 61 | 	va_end(ap);
 62 | 	fd = open(fn, O_RDONLY);
 63 | 	free(fn);
 64 | 	if (fd >= 0) {
 65 | 		if ((len = read(fd, *val, MAXFILE - 1)) > 0) {
 66 | 			ret = 0;
 67 | 			(*val)[len] = 0;
 68 | 		}
 69 | 		close(fd);
 70 | 	}
 71 | 	if (ret < 0) {
 72 | 		free(*val);
 73 | 		*val = NULL;
 74 | 	}
 75 | 	return ret;
 76 | }
 77 | 
 78 | #define BITS(x) ((x) == 64 ? -1ULL : (1ULL << (x)) - 1)
 79 | 
 80 | static bool try_parse(char *format, char *fmt, __u64 val, __u64 *config)
 81 | {
 82 | 	int start, end;
 83 | 	int n = sscanf(format, fmt, &start, &end);
 84 | 	if (n == 1)
 85 | 		end = start + 1;
 86 | 	if (n == 0)
 87 | 		return false;
 88 | 	*config |= (val & BITS(end - start + 1)) << start;
 89 | 	return true;
 90 | }
 91 | 
 92 | static int read_qual(const char *qual, struct perf_event_attr *attr,
 93 | 		const char *str)
 94 | {
 95 | 	while (*qual) {
 96 | 		switch (*qual) { 
 97 | 		case 'p':
 98 | 			attr->precise_ip++;
 99 | 			break;
100 | 		case 'k':
101 | 			attr->exclude_user = 1;
102 | 			break;
103 | 		case 'u':
104 | 			attr->exclude_kernel = 1;
105 | 			break;
106 | 		case 'h':
107 | 			attr->exclude_guest = 1;
108 | 			break;
109 | 		/* XXX more */
110 | 		default:
111 | 			fprintf(stderr, "Unknown modifier %c at end for %s\n", *qual, str);
112 | 			return -1;
113 | 		}
114 | 		qual++;
115 | 	}
116 | 	return 0;
117 | }
118 | 
119 | static bool special_attr(char *name, int val, struct perf_event_attr *attr)
120 | {
121 | 	if (!strcmp(name, "period")) {
122 | 		attr->sample_period = val;
123 | 		return true;
124 | 	}
125 | 	if (!strcmp(name, "freq")) {
126 | 		attr->sample_freq = val;
127 | 		attr->freq = 1;
128 | 		return true;
129 | 	}
130 | 	if (!strcmp(name, "config")) {
131 | 		attr->config = val;
132 | 		return true;
133 | 	}
134 | 	if (!strcmp(name, "config1")) {
135 | 		attr->config2 = val;
136 | 		return true;
137 | 	}
138 | 	if (!strcmp(name, "config2")) {
139 | 		attr->config2 = val;
140 | 		return true;
141 | 	}
142 | 	if (!strcmp(name, "name")) {
143 | 		// we accept the name attribute, but don't have anywhere to put it inside
144 | 		// perf_event_attr, so we just drop it but at least avoid an unhandled attr error
145 | 		return true;
146 | 	}
147 | 	return false;
148 | }
149 | 
150 | static int parse_terms(char *pmu, char *config, struct perf_event_attr *attr, int recur)
151 | {
152 | 	char *format = NULL;
153 | 	char *term;
154 | 
155 | 	char *newl = strchr(config, '\n');
156 | 	if (newl)
157 | 		*newl = 0;
158 | 
159 | 	while ((term = strsep(&config, ",")) != NULL) {
160 | 		char name[30];
161 | 		int n;
162 | 		unsigned long long val = 1;
163 | 
164 | 		n = sscanf(term, "%30[^=]=%lli", name, &val);
165 | 		if (n < 1)
166 | 			break;
167 | 		if (special_attr(name, val, attr))
168 | 			continue;
169 | 		free(format);
170 | 		if (read_file(&format, "/sys/devices/%s/format/%s", pmu, name) < 0) {
171 | 			char *alias = NULL;
172 | 
173 | 			if (recur == 0 &&
174 | 			    read_file(&alias, "/sys/devices/%s/events/%s", pmu, name) == 0) {
175 | 				if (parse_terms(pmu, alias, attr, 1) < 0) {
176 | 					free(alias);
177 | 					fprintf(stderr, "Cannot parse kernel event alias %s for %s\n", name,
178 | 							term);
179 | 					break;
180 | 				}
181 | 				free(alias);
182 | 				continue;
183 | 			}
184 | 			fprintf(stderr, "Cannot parse qualifier %s for %s\n", name, term);
185 | 			break;
186 | 		}
187 | 		bool ok = try_parse(format, "config:%d-%d", val, &attr->config) ||
188 | 			try_parse(format, "config:%d", val, &attr->config) ||
189 | 			try_parse(format, "config1:%d-%d", val, &attr->config1) ||
190 | 			try_parse(format, "config1:%d", val, &attr->config1);
191 | 		bool ok2 = try_parse(format, "config2:%d-%d", val, &attr->config2) ||
192 | 			try_parse(format, "config2:%d", val, &attr->config2);
193 | 		if (!ok && !ok2) {
194 | 			fprintf(stderr, "Cannot parse kernel format %s: %s for %s\n",
195 | 					name, format, term);
196 | 			break;
197 | 		}
198 | 		if (ok2)
199 | 			attr->size = PERF_ATTR_SIZE_VER1;
200 | 	}
201 | 	free(format);
202 | 	if (term)
203 |        		return -1;
204 | 	return 0;
205 | }
206 | 
207 | static int try_pmu_type(char **type, char *fmt, char *pmu)
208 | {
209 | 	char newpmu[30];
210 | 	snprintf(newpmu, 30, fmt, pmu);
211 | 	int ret = read_file(type, "/sys/devices/%s/type", newpmu);
212 | 	if (ret >= 0)
213 | 		strcpy(pmu, newpmu);
214 | 	return ret;
215 | }
216 | 
217 | /**
218 |  * jevent_pmu_uncore - Is perf event string for an uncore PMU.
219 |  * @pmu: perf pmu
220 |  * Return true if yes, false if not or unparseable.
221 |  */
222 | bool jevent_pmu_uncore(const char *str)
223 | {
224 | 	char *cpumask;
225 | 	int cpus;
226 | 	char pmu[30];
227 | 
228 | 	if (!strchr(str, '/'))
229 | 		return false;
230 | 	if (sscanf(str, "%30[^/]", pmu) < 1)
231 | 		return false;
232 | 	int ret = read_file(&cpumask, "/sys/devices/%s/cpumask", pmu);
233 | 	if (ret < 0)
234 | 		return false;
235 | 	bool isuncore = sscanf(cpumask, "%d", &cpus) == 1 && cpus == 0;
236 | 	free(cpumask);
237 | 	return isuncore;
238 | }
239 | 
240 | /**
241 |  * jevent_name_to_attr - Resolve perf style event to perf_attr
242 |  * @str: perf style event (e.g. cpu/event=1/)
243 |  * @attr: perf_attr to fill in.
244 |  *
245 |  * Resolve perf new style event descriptor to perf ATTR. User must initialize
246 |  * attr->sample_type and attr->read_format as needed after this call,
247 |  * and possibly other fields. Returns 0 when succeeded.
248 |  */
249 | int jevent_name_to_attr(const char *str, struct perf_event_attr *attr)
250 | {
251 | 	char pmu[30], config[200];
252 | 	int qual_off = -1;
253 | 
254 | 	memset(attr, 0, sizeof(struct perf_event_attr));
255 | 	attr->size = PERF_ATTR_SIZE_VER0;
256 | 	attr->type = PERF_TYPE_RAW;
257 | 
258 | 	if (sscanf(str, "r%llx%n", &attr->config, &qual_off) == 1) {
259 | 		assert(qual_off != -1);
260 | 		if (str[qual_off] == 0)
261 | 			return 0;
262 | 		if (str[qual_off] == ':' && read_qual(str + qual_off, attr, str) == 0)
263 | 			return 0;
264 | 		return -1;
265 | 	}
266 | 	if (sscanf(str, "%30[^/]/%200[^/]/%n", pmu, config, &qual_off) < 2)
267 | 		return -1;
268 | 	char *type = NULL;
269 | 	/* FIXME need interface for multiple outputs and try more instances */
270 | 	if (try_pmu_type(&type, "%s", pmu) < 0 &&
271 | 	    try_pmu_type(&type, "uncore_%s", pmu) < 0 &&
272 | 	    try_pmu_type(&type, "uncore_%s_0", pmu) < 0 &&
273 | 	    try_pmu_type(&type, "uncore_%s_1", pmu) < 0)
274 | 		return -1;
275 | 	attr->type = atoi(type);
276 | 	free(type);
277 | 	if (parse_terms(pmu, config, attr, 0) < 0)
278 | 		return -1;
279 | 	if (qual_off != -1 && read_qual(str + qual_off, attr, str) < 0)
280 | 		return -1;
281 | 	return 0;
282 | }
283 | 
284 | /**
285 |  * walk_perf_events - walk all kernel supplied perf events
286 |  * @func: Callback function to call for each event.
287 |  * @data: data pointer to pass to func.
288 |  */
289 | int walk_perf_events(int (*func)(void *data, char *name, char *event, char *desc),
290 | 		     void *data)
291 | {
292 | 	int ret = 0;
293 | 	glob_t g;
294 | 	if (glob("/sys/devices/*/events/*", 0, NULL, &g) != 0)
295 | 		return -1;
296 | 	int i;
297 | 	for (i = 0; i < g.gl_pathc; i++) {
298 | 		char pmu[32], event[32];
299 | 
300 | 		if (sscanf(g.gl_pathv[i], "/sys/devices/%30[^/]/events/%30s",
301 | 			   pmu, event) != 2) {
302 | 			fprintf(stderr, "No match on %s\n", g.gl_pathv[i]);
303 | 			continue;
304 | 		}
305 | 		if (strchr(event, '.'))
306 | 			continue;
307 | 
308 | 
309 | 		char *val;
310 | 		if (read_file(&val, g.gl_pathv[i])) {
311 | 			fprintf(stderr, "Cannot read %s\n", g.gl_pathv[i]);
312 | 			continue;
313 | 		}
314 | 		char *s;
315 | 		for (s = val; *s; s++) {
316 | 			if (*s == '\n')
317 | 				*s = 0;
318 | 		}
319 | 		char *val2;
320 | 		asprintf(&val2, "%s/%s/", pmu, val);
321 | 		free(val);
322 | 
323 | 		char *buf;
324 | 		asprintf(&buf, "%s/%s/", pmu, event);
325 | 		ret = func(data, buf, val2, "");
326 | 		free(val2);
327 | 		free(buf);
328 | 		if (ret)
329 | 			break;
330 | 	}
331 | 	globfree(&g);
332 | 	return ret;
333 | }
334 | 
335 | /* Should cache pmus. Caller must free return value. */
336 | char *resolve_pmu(int type)
337 | {
338 | 	glob_t g;
339 | 	if (glob("/sys/devices/*/type", 0, NULL, &g))
340 | 		return NULL;
341 | 	int i;
342 | 	char *pmun = NULL;
343 | 	for (i = 0; i < g.gl_pathc; i++) {
344 | 		char pmu[30];
345 | 		if (sscanf(g.gl_pathv[i], "/sys/devices/%30[^/]/type", pmu) != 1)
346 | 			continue;
347 | 		char *numbuf;
348 | 		int num;
349 | 		if (read_file(&numbuf, g.gl_pathv[i]) < 0 ||
350 | 		    sscanf(numbuf, "%d", &num) != 1)
351 | 			break;
352 | 		if (num == type) {
353 | 			pmun = strdup(pmu);
354 | 			break;
355 | 		}
356 | 	}
357 | 	globfree(&g);
358 | 	return pmun;
359 | }
360 | 
361 | #ifdef TEST
362 | #include "jevents.h"
363 | int main(int ac, char **av)
364 | {
365 | 	struct perf_event_attr attr =  { 0 };
366 | 	int ret = 1;
367 | 
368 | 	if (!av[1]) {
369 | 		printf("Usage: ... perf-event-to-parse\n");
370 | 		exit(1);
371 | 	}
372 | 	while (*++av) {
373 | 		if (jevent_name_to_attr(*av, &attr) < 0)
374 | 			printf("cannot parse %s\n", *av);
375 | 		printf("config %llx config1 %llx\n", attr.config, attr.config1);
376 | 		int fd;
377 | 		if ((fd = perf_event_open(&attr, 0, -1, -1, 0)) < 0)
378 | 			perror("perf_event_open");
379 | 		else
380 | 			ret = 0;
381 | 		close(fd);
382 | 	}
383 | 	return ret;
384 | }
385 | #endif
386 | 


--------------------------------------------------------------------------------
/jevents/session.c:
--------------------------------------------------------------------------------
  1 | /* Simple session layer for multiple perf events. */
  2 | /*
  3 |  * Copyright (c) 2015, Intel Corporation
  4 |  * Author: Andi Kleen
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *
 10 |  * 1. Redistributions of source code must retain the above copyright notice,
 11 |  * this list of conditions and the following disclaimer.
 12 |  *
 13 |  * 2. Redistributions in binary form must reproduce the above copyright
 14 |  * notice, this list of conditions and the following disclaimer in the
 15 |  * documentation and/or other materials provided with the distribution.
 16 |  *
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | #include <string.h>
 32 | #include <unistd.h>
 33 | #include <linux/perf_event.h>
 34 | #include <stdlib.h>
 35 | #include <stdio.h>
 36 | #include <stdint.h>
 37 | #include <errno.h>
 38 | #include <sys/fcntl.h>
 39 | #include <stdbool.h>
 40 | #include "jevents.h"
 41 | #include "jsession.h"
 42 | 
 43 | /**
 44 |  * alloc_eventlist - Alloc a list of events.
 45 |  */
 46 | 
 47 | struct eventlist *alloc_eventlist(void)
 48 | {
 49 | 	struct eventlist *el = calloc(sizeof(struct eventlist), 1);
 50 | 	if (!el)
 51 | 		return NULL;
 52 | 	el->num_cpus = sysconf(_SC_NPROCESSORS_CONF);
 53 | 	return el;
 54 | }
 55 | 
 56 | static struct event *new_event(struct eventlist *el, char *s)
 57 | {
 58 | 	struct event *e = calloc(sizeof(struct event) +
 59 | 				 sizeof(struct efd) * el->num_cpus, 1);
 60 | 	e->next = NULL;
 61 | 	if (!el->eventlist)
 62 | 		el->eventlist = e;
 63 | 	if (el->eventlist_last)
 64 | 		el->eventlist_last->next = e;
 65 | 	el->eventlist_last = e;
 66 | 	e->event = strdup(s);
 67 | 	return e;
 68 | }
 69 | 
 70 | /**
 71 |  * parse_events - parse a perf style string with events
 72 |  * @el: List of events allocated earlier
 73 |  * @events: Comma separated lists of events. {} style groups are legal.
 74 |  *
 75 |  * JSON events are supported, if the event lists are downloaded first.
 76 |  */
 77 | int parse_events(struct eventlist *el, char *events)
 78 | {
 79 | 	char *s, *tmp;
 80 | 
 81 | 	events = strdup(events);
 82 | 	if (! events) return -1;
 83 | 	for (s = strtok_r(events, ",", &tmp);
 84 | 	     s;
 85 | 	     s = strtok_r(NULL, ",", &tmp)) {
 86 | 		bool group_leader = false, end_group = false;
 87 | 		int len;
 88 | 
 89 | 		if (s[0] == '{') {
 90 | 			s++;
 91 | 			group_leader = true;
 92 | 		} else if (len = strlen(s), len > 0 && s[len - 1] == '}') {
 93 | 			s[len - 1] = 0;
 94 | 			end_group = true;
 95 | 		}
 96 | 
 97 | 		struct event *e = new_event(el, s);
 98 | 		e->uncore = jevent_pmu_uncore(s);
 99 | 		e->group_leader = group_leader;
100 | 		e->end_group = end_group;
101 | 		if (resolve_event(s, &e->attr) < 0) {
102 | 			fprintf(stderr, "Cannot resolve %s\n", e->event);
103 | 			return -1;
104 | 		}
105 | 	}
106 | 	free(events);
107 | 	return 0;
108 | }
109 | 
110 | static bool cpu_online(int i)
111 | {
112 | 	bool ret = false;
113 | 	char fn[100];
114 | 	sprintf(fn, "/sys/devices/system/cpu/cpu%d/online", i);
115 | 	int fd = open(fn, O_RDONLY);
116 | 	if (fd >= 0) {
117 | 		char buf[128];
118 | 		int n = read(fd, buf, 128);
119 | 		if (n > 0 && !strncmp(buf, "1", 1))
120 | 			ret = true;
121 | 		close(fd);
122 | 	}
123 | 	return ret;
124 | }
125 | 
126 | /**
127 |  * setup_event - Create perf descriptor for a single event.
128 |  * @e: Event to measure.
129 |  * @cpu: CPU to measure.
130 |  * @leader: Leader event to define a group.
131 |  * @measure_all: If true measure all processes (may need root)
132 |  * @measure_pid: If not -1 measure specific process.
133 |  *
134 |  * This is a low level function. Normally setup_events() should be used.
135 |  * Return -1 on failure.
136 |  */
137 | 
138 | int setup_event(struct event *e, int cpu, struct event *leader,
139 | 		bool measure_all, int measure_pid)
140 | {
141 | 	e->attr.inherit = 1;
142 | 	if (!measure_all) {
143 | 		e->attr.disabled = 1;
144 | 		e->attr.enable_on_exec = 1;
145 | 	}
146 | 	e->attr.read_format |= PERF_FORMAT_TOTAL_TIME_ENABLED |
147 | 				PERF_FORMAT_TOTAL_TIME_RUNNING;
148 | 
149 | 	e->efd[cpu].fd = perf_event_open(&e->attr,
150 | 			measure_all ? -1 : measure_pid,
151 | 			cpu,
152 | 			leader ? leader->efd[cpu].fd : -1,
153 | 			0);
154 | 
155 | 	if (e->efd[cpu].fd < 0) {
156 | 		/* Handle offline CPU */
157 | 		if (errno == EINVAL && !cpu_online(cpu))
158 | 			return 0;
159 | 
160 | 		fprintf(stderr, "Cannot open perf event for %s/%d: %s\n",
161 | 				e->event, cpu, strerror(errno));
162 | 		return -1;
163 | 	}
164 | 	return 0;
165 | }
166 | 
167 | /**
168 |  * setup_events - Set up perf events for a event list.
169 |  * @el: List of events, allocated and parsed earlier.
170 |  * @measure_all: If true measure all of system (may need root)
171 |  * @measure_pid: If not -1 measure pid.
172 |  *
173 |  * Return -1 on failure, otherwise 0.
174 |  */
175 | 
176 | int setup_events(struct eventlist *el, bool measure_all, int measure_pid)
177 | {
178 | 	struct event *e, *leader = NULL;
179 | 	int i;
180 | 	int err = 0;
181 | 	int ret;
182 | 
183 | 	for (e = el->eventlist; e; e = e->next) {
184 | 		if (e->uncore) {
185 | 			/* XXX for every socket. for now just 0. */
186 | 			ret = setup_event(e, 0, leader, measure_all, measure_pid);
187 | 			if (ret < 0) {
188 | 				err = ret;
189 | 				continue;
190 | 			}
191 | 			for (i = 1; i < el->num_cpus; i++)
192 | 				e->efd[i].fd = -1;
193 | 		} else {
194 | 			for (i = 0; i < el->num_cpus; i++) {
195 | 				ret = setup_event(e, i, leader,
196 | 						measure_all,
197 | 						measure_pid);
198 | 				if (ret < 0) {
199 | 					err = ret;
200 | 					continue;
201 | 				}
202 | 			}
203 | 		}
204 | 		if (e->group_leader)
205 | 			leader = e;
206 | 		if (e->end_group)
207 | 			leader = NULL;
208 | 	}
209 | 	return err;
210 | }
211 | 
212 | /**
213 |  * read_event - Read the value of a single event for one CPU.
214 |  * @e: event to read
215 |  * @cpu: cpu number to read
216 |  * Returns -1 on failure, otherwise 0.
217 |  * The value read can be retrieved later with event_scaled_value.
218 |  */
219 | 
220 | int read_event(struct event *e, int cpu)
221 | {
222 | 	int n = read(e->efd[cpu].fd, &e->efd[cpu].val, 3 * 8);
223 | 	if (n < 0) {
224 | 		fprintf(stderr, "Error reading from %s/%d: %s\n",
225 | 				e->event, cpu, strerror(errno));
226 | 		return -1;
227 | 	}
228 | 	return 0;
229 | }
230 | 
231 | /**
232 |  * read_event - Read value of all events on all CPUs.
233 |  * @el: eventlist. Must be allocated, parsed, set up earlier.
234 |  * Returns -1 on failure, otherwise 0.
235 |  */
236 | 
237 | int read_all_events(struct eventlist *el)
238 | {
239 | 	struct event *e;
240 | 	int i;
241 | 
242 | 	for (e = el->eventlist; e; e = e->next) {
243 | 		if (e->uncore) {
244 | 			/* XXX all sockets */
245 | 			if (e->efd[0].fd < 0)
246 | 				continue;
247 | 			if (read_event(e, 0) < 0)
248 | 				return -1;
249 | 		}
250 | 		for (i = 0; i < el->num_cpus; i++) {
251 | 			if (e->efd[i].fd < 0)
252 | 				continue;
253 | 			if (read_event(e, i) < 0)
254 | 				return -1;
255 | 		}
256 | 	}
257 | 	return 0;
258 | }
259 | 
260 | /**
261 |  * event_scaled_value - Retrieve a read value for a cpu
262 |  * @e: Event
263 |  * @cpu: CPU number
264 |  * Return scaled value read earlier.
265 |  */
266 | uint64_t event_scaled_value(struct event *e, int cpu)
267 | {
268 | 	uint64_t *val = e->efd[cpu].val;
269 | 	if (val[1] != val[2] && val[2])
270 | 		return val[0] * (double)val[1] / (double)val[2];
271 | 	return val[0];
272 | }
273 | 


--------------------------------------------------------------------------------
/jevents/showevent.c:
--------------------------------------------------------------------------------
 1 | /* Resolve perf event descriptions with symbolic names to raw perf descriptions */
 2 | #include "jevents.h"
 3 | #include <linux/perf_event.h>
 4 | #include <stdio.h>
 5 | #include <string.h>
 6 | #include <unistd.h>
 7 | #include <stdlib.h>
 8 | 
 9 | int main(int ac, char **av)
10 | {
11 | 	int test = 0;
12 | 	int ret = 0;
13 | 
14 | 	while (*++av) {
15 | 		if (!strcmp(*av, "--test")) {
16 | 			test = 1;
17 | 			continue;
18 | 		}
19 | 
20 | 		struct perf_event_attr attr;
21 | 		if (resolve_event(*av, &attr) < 0) {
22 | 			fprintf(stderr, "Cannot resolve %s\n", *av); 
23 | 			ret = 1;
24 | 			continue;
25 | 		}
26 | 		char *ev = format_raw_event(&attr, *av);
27 | 		printf("%s\n", ev);
28 | 		free(ev);
29 | 		if (test) {
30 | 			if (perf_event_open(&attr, 0, -1, -1, 0) < 0)
31 | 				perror("perf_event_open");
32 | 		}
33 | 	}
34 | 	return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/jevents/util.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | #define err(x) perror(x), exit(1)
 6 | #define mb() asm volatile("" ::: "memory")
 7 | #define MB (1024*1024)
 8 | typedef unsigned long long u64;
 9 | typedef long long s64;
10 | 
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | 


--------------------------------------------------------------------------------
/main-test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * The implementation and main() function for the Catch2 unit tests.
 3 |  *
 4 |  * unit-test-main.cpp
 5 |  */
 6 | // #define CATCH_CONFIG_NO_POSIX_SIGNALS
 7 | // This tells Catch to provide a main() - only do this in one cpp file
 8 | #define CATCH_CONFIG_MAIN
 9 | #include "catch.hpp"
10 | 


--------------------------------------------------------------------------------
/misc.cpp:
--------------------------------------------------------------------------------
 1 | #include "misc.hpp"
 2 | 
 3 | #include <immintrin.h>
 4 | #include <x86intrin.h> // this is needed to have _mm_clflush
 5 | 
 6 | void clflush(const void *storage, size_t size) {
 7 |     for (char *p = (char *)storage, *e = p + size; p < e; p += 64) {
 8 |         _mm_clflush(p);
 9 |     }
10 | 
11 |     _mm_mfence();
12 | }
13 | 


--------------------------------------------------------------------------------
/misc.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef MISC_H_
  2 | #define MISC_H_
  3 | 
  4 | #include <algorithm>
  5 | #include <memory>
  6 | #include <cstdio>
  7 | #include <cctype>
  8 | #include <cassert>
  9 | #include <vector>
 10 | #include <ostream>
 11 | #include <functional>
 12 | #include <iterator>
 13 | 
 14 | /* miscellaneous stuff that's useful for modern (ha?) C++ */
 15 | 
 16 | /**
 17 |  * Sometimes it's useful to have a functor object rather than a template
 18 |  * function since this can be passed to a function which may apply it to d
 19 |  * different object types.
 20 |  */
 21 | struct min_functor {
 22 |     template <typename T>
 23 |     const T& operator()(const T& l, const T& r) const { return std::min(l, r); }
 24 | };
 25 | 
 26 | struct max_functor {
 27 |     template <typename T>
 28 |     const T& operator()(const T& l, const T& r) const { return std::max(l, r); }
 29 | };
 30 | 
 31 | /*
 32 |  * Given a printf-style format and args, return the formatted string as a std::string.
 33 |  *
 34 |  * See https://stackoverflow.com/a/26221725/149138.
 35 |  */
 36 | template<typename ... Args>
 37 | std::string string_format(const std::string& format, Args ... args) {
 38 |     size_t size = std::snprintf( nullptr, 0, format.c_str(), args ..., 0 ) + 1; // Extra space for '\0'
 39 |     std::unique_ptr<char[]> buf( new char[ size ] );
 40 |     std::snprintf( buf.get(), size, format.c_str(), args ..., 0 ); // 0 is a dirty hack to avoid warning about no args
 41 |     return buf.get();
 42 | }
 43 | 
 44 | /*
 45 |  * Split a string delimited by sep.
 46 |  *
 47 |  * See https://stackoverflow.com/a/7408245/149138
 48 |  */
 49 | static inline std::vector<std::string> split(const std::string &text, const std::string &sep) {
 50 |   std::vector<std::string> tokens;
 51 |   std::size_t start = 0, end = 0;
 52 |   while ((end = text.find(sep, start)) != std::string::npos) {
 53 |     tokens.push_back(text.substr(start, end - start));
 54 |     start = end + sep.length();
 55 |   }
 56 |   tokens.push_back(text.substr(start));
 57 |   return tokens;
 58 | }
 59 | 
 60 | static inline std::string string_toupper(std::string str) {
 61 |     std::transform(str.begin(), str.end(), str.begin(), ::toupper);
 62 |     return str;
 63 | }
 64 | 
 65 | /**
 66 |  * Holds a pointer to the start of an an array and its size, so
 67 |  * it can implement the begin()/end() contract that various
 68 |  * algorithms want.
 69 |  */
 70 | template <typename T>
 71 | struct array_holder {
 72 |     T* p;
 73 |     size_t size_;
 74 | 
 75 |     array_holder(T* p, size_t size) : p{p}, size_{size} {}
 76 | 
 77 |     const T& operator[](size_t i) const {
 78 |         assert(i < size_);
 79 |         return p[i];
 80 |     }
 81 | 
 82 |     size_t size() const { return size_; }
 83 | };
 84 | 
 85 | // https://stackoverflow.com/a/4415646/149138
 86 | #define COUNT_OF(x) ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
 87 | 
 88 | template <typename T>
 89 | std::ostream& operator<<(std::ostream& os, const std::vector<T>& vec) {
 90 |     os << "[";
 91 |     bool first = true;
 92 |     for (auto&& val : vec) {
 93 |         if (!first) os << ",";
 94 |         os << val;
 95 |         first = false;
 96 |     }
 97 |     os << "]";
 98 |     return os;
 99 | }
100 | 
101 | void clflush(const void *storage, size_t size);
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/msr-access.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * msr-access.c
  3 |  */
  4 | 
  5 | // for pread() and sched_getcpu()
  6 | #define _GNU_SOURCE
  7 | 
  8 | #include "msr-access.h"
  9 | 
 10 | #include <sys/types.h>
 11 | #include <sys/stat.h>
 12 | #include <fcntl.h>
 13 | #include <errno.h>
 14 | #include <inttypes.h>
 15 | #include <assert.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <stdio.h>
 19 | #include <unistd.h>
 20 | #include <sched.h>
 21 | 
 22 | /** if there are this many CPUs or less, we'll never allocate memory */
 23 | #define STATIC_ARRAY_SIZE 32
 24 | 
 25 | #ifndef MSR_USE_PTHREADS
 26 | // thread-safe by default
 27 | #define MSR_USE_PTHREADS 1
 28 | #endif
 29 | 
 30 | #if MSR_USE_PTHREADS
 31 | #include <pthread.h>
 32 | static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 33 | void lock() {
 34 |     pthread_mutex_lock(&mutex);
 35 | }
 36 | void unlock() {
 37 |     pthread_mutex_unlock(&mutex);
 38 | }
 39 | #else
 40 | void lock() {}
 41 | void unlock(){}
 42 | #endif
 43 | 
 44 | 
 45 | 
 46 | /* size of the rfile array */
 47 | int  rfile_static[STATIC_ARRAY_SIZE] = {};
 48 | int  rfile_size  = STATIC_ARRAY_SIZE;
 49 | int *rfile_array = rfile_static;
 50 | //int rfile_error;
 51 | 
 52 | /** get the read-only file associated with the given cpu */
 53 | int get_rfile(int cpu) {
 54 |     assert(cpu >= 0);
 55 | 
 56 |     lock();
 57 | 
 58 |     if (cpu >= rfile_size) {
 59 |         // expand array
 60 |         size_t new_size = rfile_size * 2 > cpu ? rfile_size * 2 : cpu;
 61 |         int *new_array = calloc(new_size, sizeof(int));
 62 |         memcpy(new_array, rfile_array, rfile_size  * sizeof(int));
 63 |         if (rfile_array != rfile_static) {
 64 |             free(rfile_array);
 65 |         }
 66 |         rfile_array = new_array;
 67 |         rfile_size  = new_size;
 68 |     }
 69 | 
 70 |     if (rfile_array[cpu] == 0) {
 71 |         printf("opening file for cpu %d\n", cpu);
 72 |         char filename[64] = {};
 73 |         int ret = snprintf(filename, 64, "/dev/cpu/%d/msr", cpu);
 74 |         if (ret == 0) {
 75 |             rfile_array[cpu] = -1;
 76 |         } else {
 77 |             rfile_array[cpu] = open(filename, O_RDONLY);
 78 |             if (rfile_array[cpu] == -1) {
 79 |                 rfile_array[cpu] = -errno;
 80 |             }
 81 |         }
 82 |     }
 83 | 
 84 |     int ret = rfile_array[cpu];
 85 | 
 86 |     unlock();
 87 | 
 88 |     return ret;
 89 | }
 90 | 
 91 | int read_msr(int cpu, uint32_t msr_index, uint64_t* value) {
 92 |     int file = get_rfile(cpu);
 93 |     assert(file);
 94 |     if (file < 0) {
 95 |         // file open failes are stored as negative errno
 96 |         return file;
 97 |     }
 98 |     int read = pread(file, value, 8, msr_index);
 99 |     return read == -1 ? errno : 0;
100 | }
101 | 
102 | int read_msr_cur_cpu(uint32_t msr_index, uint64_t* value) {
103 |     return read_msr(sched_getcpu(), msr_index, value);
104 | }
105 | 
106 | 
107 | // rename this to main to build an exe that can be run as ./a.out CPU MSR
108 | // to read MSR from CPU (like a really simple rdmsr)
109 | int test(int argc, char** argv) {
110 |     assert(argc == 3);
111 |     int cpu      = atoi(argv[1]);
112 |     uint32_t msr = atoi(argv[2]);
113 |     printf("reading msr %u from cpu %d\n", msr, cpu);
114 |     uint64_t value = -1;
115 | 
116 |     int res = read_msr(cpu, msr, &value);
117 |     if (res) {
118 |         printf("error %d\n", res);
119 |     } else {
120 |         printf("value %lx\n", value);
121 |     }
122 | 
123 |     res = read_msr_cur_cpu(msr, &value);
124 |     if (res) {
125 |         printf("error %d\n", res);
126 |     } else {
127 |         printf("value %lx\n", value);
128 |     }
129 | 
130 |     return EXIT_SUCCESS;
131 | }
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/msr-access.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * msr-access.h
 3 |  *
 4 |  * Simple API to access the x86 MSR registers exposed on linux with through the /dev/cpu/N/msr file system.
 5 |  *
 6 |  * Unless you've changed the msr permissions, only root can do this. The msr filesystem may not exist until
 7 |  * 'modprobe msr' is executed to load the msr module.
 8 |  */
 9 | 
10 | #ifndef MSR_ACCESS_H_
11 | #define MSR_ACCESS_H_
12 | 
13 | #include <inttypes.h>
14 | // you could get the MSR index values from the following header, although it isn't exported to user-space
15 | // in kernels after 4.12, but you can grab it from the linux source
16 | // #include <asm/msr-index.h>
17 | 
18 | #ifdef __cplusplus
19 | extern "C" {
20 | #endif
21 | 
22 | 
23 | /**
24 |  * Read the MSR given by msr_index on the given cpu, storing the result into
25 |  * result, which must point to at least 8 bytes of storage.
26 |  *
27 |  * Returns zero on success, non-zero on failure.
28 |  *
29 |  * Negative values indicate errors
30 |  * opening the underlying MSR file: the value returned is the negative of the errno
31 |  * returned by the kernel when trying to open the file. These file errors is cached
32 |  * so once a negative value has been returned for a given cpu, subsequent calls will
33 |  * always return the same value.
34 |  *
35 |  * Positive values indicate failures during the pread call performed to actually read
36 |  * the msr from the open file. The value is the errno returned by the kernel after the
37 |  * read. The most common value is 5 (EIO) which indicates that you can't read that MSR
38 |  * on this hardware (e.g., if may not exist).
39 |  */
40 | 
41 | int read_msr(int cpu, uint32_t msr_index, uint64_t* value);
42 | 
43 | /**
44 |  * Reads the given MSR on the current CPU. This is just a shortcut for calling
45 |  * read_msr(sched_getcpu(), ...), and the result and error handling is the same as that function.
46 |  *
47 |  * Of course, unless the thread affinity has been restricted for the current thread,
48 |  * the result doesn't help the calling code know the true value on the current CPU since
49 |  * a context switch can happen at any time (the same caveat applies to getcpu()).
50 |  */
51 | int read_msr_cur_cpu(uint32_t msr_index, uint64_t* value);
52 | 
53 | 
54 | #ifdef __cplusplus
55 | } // extern "C" {
56 | #endif
57 | 
58 | #endif // #ifdef MSR_ACCESS_H_
59 | 


--------------------------------------------------------------------------------
/nonstd/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Boost Software License - Version 1.0 - August 17th, 2003
 2 | 
 3 | Permission is hereby granted, free of charge, to any person or organization
 4 | obtaining a copy of the software and accompanying documentation covered by
 5 | this license (the "Software") to use, reproduce, display, distribute,
 6 | execute, and transmit the Software, and to prepare derivative works of the
 7 | Software, and to permit third-parties to whom the Software is furnished to
 8 | do so, all subject to the following:
 9 | 
10 | The copyright notices in the Software and this entire statement, including
11 | the above license grant, this restriction and the following disclaimer,
12 | must be included in all copies of the Software, in whole or in part, and
13 | all derivative works of the Software, unless such copies or derivative
14 | works are solely in the form of machine-executable object code generated by
15 | a source language processor.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/opt-control.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * "sinking" a value instructs the compiler to calculate it, i.e.,
 4 |  * makes the compiler believe that the value is necessary and hence
 5 |  * must the calculated. The actual sink implementation is empty and
 6 |  * so usually leaves no trace in the generated code except that the
 7 |  * value will be calculated.
 8 |  */
 9 | static inline void sink(int x) {
10 |     __asm__ volatile ("" :: "r"(x) :);
11 | }
12 | 
13 | /*
14 |  * Similar to sink except that it sinks the content pointed to 
15 |  * by the pointer, so the compiler will materialize in memory
16 |  * anything pointed to by the pointer.
17 |  */
18 | static inline void sink_ptr(void *p) {
19 |     __asm__ volatile ("" :: "r"(p) : "memory");
20 | }
21 | 


--------------------------------------------------------------------------------
/perf-timer-events.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "perf-timer-events.hpp"
 3 | 
 4 | 
 5 | std::vector<PerfEvent> get_all_events() {
 6 |     static std::vector<PerfEvent> ALL = {
 7 |         CPU_CLK_UNHALTED_ONE_THREAD_ACTIVE,
 8 |         CPU_CLK_UNHALTED_REF_TSC          ,
 9 |         CPU_CLK_UNHALTED_REF_XCLK         ,
10 |         CPU_CLK_UNHALTED_REF_XCLK_ANY     ,
11 |         CPU_CLK_UNHALTED_RING0_TRANS      ,
12 |         CPU_CLK_UNHALTED_THREAD           ,
13 |         CPU_CLK_UNHALTED_THREAD_ANY       ,
14 |         CPU_CLK_UNHALTED_THREAD_P         ,
15 |         CPU_CLK_UNHALTED_THREAD_P_ANY     ,
16 |         CYCLE_ACTIVITY_CYCLES_L1D_MISS    ,
17 |         CYCLE_ACTIVITY_STALLS_L1D_MISS    ,
18 |         HW_INTERRUPTS_RECEIVED            ,
19 |         INST_RETIRED_ANY                  ,
20 |         INST_RETIRED_ANY_P                ,
21 |         L1D_REPLACEMENT                   ,
22 |         L1D_PEND_MISS_FB_FULL             ,
23 |         L1D_PEND_MISS_PENDING             ,
24 |         L1D_PEND_MISS_PENDING_CYCLES      ,
25 |         L1D_PEND_MISS_PENDING_CYCLES_ANY  ,
26 |         L2_RQSTS_ALL_CODE_RD              ,
27 |         L2_RQSTS_ALL_DEMAND_DATA_RD       ,
28 |         L2_RQSTS_ALL_DEMAND_MISS          ,
29 |         L2_RQSTS_ALL_DEMAND_REFERENCES    ,
30 |         L2_RQSTS_ALL_PF                   ,
31 |         L2_RQSTS_ALL_RFO                  ,
32 |         L2_RQSTS_CODE_RD_HIT              ,
33 |         L2_RQSTS_CODE_RD_MISS             ,
34 |         L2_RQSTS_DEMAND_DATA_RD_HIT       ,
35 |         L2_RQSTS_DEMAND_DATA_RD_MISS      ,
36 |         L2_RQSTS_MISS                     ,
37 |         L2_RQSTS_PF_HIT                   ,
38 |         L2_RQSTS_PF_MISS                  ,
39 |         L2_RQSTS_REFERENCES               ,
40 |         L2_RQSTS_RFO_HIT                  ,
41 |         L2_RQSTS_RFO_MISS                 ,
42 |         MEM_INST_RETIRED_ALL_LOADS        ,
43 |         MEM_INST_RETIRED_ALL_STORES       ,
44 |         MEM_INST_RETIRED_LOCK_LOADS       ,
45 |         MEM_INST_RETIRED_SPLIT_LOADS      ,
46 |         MEM_INST_RETIRED_SPLIT_STORES     ,
47 |         MEM_INST_RETIRED_STLB_MISS_LOADS  ,
48 |         MEM_INST_RETIRED_STLB_MISS_STORES ,
49 |         MEM_LOAD_RETIRED_FB_HIT           ,
50 |         MEM_LOAD_RETIRED_L1_HIT           ,
51 |         MEM_LOAD_RETIRED_L1_MISS          ,
52 |         MEM_LOAD_RETIRED_L2_HIT           ,
53 |         MEM_LOAD_RETIRED_L2_MISS          ,
54 |         MEM_LOAD_RETIRED_L3_HIT           ,
55 |         MEM_LOAD_RETIRED_L3_MISS          ,
56 |         UOPS_DISPATCHED_PORT_PORT_0       ,
57 |         UOPS_DISPATCHED_PORT_PORT_1       ,
58 |         UOPS_DISPATCHED_PORT_PORT_2       ,
59 |         UOPS_DISPATCHED_PORT_PORT_3       ,
60 |         UOPS_DISPATCHED_PORT_PORT_4       ,
61 |         UOPS_DISPATCHED_PORT_PORT_5       ,
62 |         UOPS_DISPATCHED_PORT_PORT_6       ,
63 |         UOPS_DISPATCHED_PORT_PORT_7       ,
64 |         UOPS_ISSUED_ANY                   ,
65 | 
66 |     };
67 |     return ALL;
68 | }


--------------------------------------------------------------------------------
/perf-timer-events.hpp:
--------------------------------------------------------------------------------
 1 | #include "perf-timer.hpp"
 2 | 
 3 | std::vector<PerfEvent> get_all_events();
 4 | 
 5 | const PerfEvent CPU_CLK_UNHALTED_ONE_THREAD_ACTIVE = PerfEvent( "cpu_clk_unhalted.one_thread_active", "cpu/event=0x3c,umask=0x2/" );
 6 | const PerfEvent CPU_CLK_UNHALTED_REF_TSC       = PerfEvent( "cpu_clk_unhalted.ref_tsc", "cpu/event=0x0,umask=0x3/" );
 7 | const PerfEvent CPU_CLK_UNHALTED_REF_XCLK      = PerfEvent( "cpu_clk_unhalted.ref_xclk", "cpu/event=0x3c,umask=0x1/" );
 8 | const PerfEvent CPU_CLK_UNHALTED_REF_XCLK_ANY  = PerfEvent( "cpu_clk_unhalted.ref_xclk_any", "cpu/event=0x3c,umask=0x1,any=1/" );
 9 | const PerfEvent CPU_CLK_UNHALTED_RING0_TRANS   = PerfEvent( "cpu_clk_unhalted.ring0_trans", "cpu/event=0x3c,umask=0x0,edge=1,cmask=1/" );
10 | const PerfEvent CPU_CLK_UNHALTED_THREAD        = PerfEvent( "cpu_clk_unhalted.thread", "cpu/event=0x3c,umask=0x0/" );
11 | const PerfEvent CPU_CLK_UNHALTED_THREAD_ANY    = PerfEvent( "cpu_clk_unhalted.thread_any", "cpu/event=0x3c,umask=0x0,any=1/" );
12 | const PerfEvent CPU_CLK_UNHALTED_THREAD_P      = PerfEvent( "cpu_clk_unhalted.thread_p", "cpu/event=0x3c,umask=0x0/" );
13 | const PerfEvent CPU_CLK_UNHALTED_THREAD_P_ANY  = PerfEvent( "cpu_clk_unhalted.thread_p_any", "cpu/event=0x3c,umask=0x0,any=1/" );
14 | const PerfEvent CYCLE_ACTIVITY_CYCLES_L1D_MISS = PerfEvent( "cycle_activity.cycles_l1d_miss", "cpu/event=0xa3,umask=0x8,cmask=8/" );
15 | const PerfEvent CYCLE_ACTIVITY_STALLS_L1D_MISS = PerfEvent( "cycle_activity.stalls_l1d_miss", "cpu/event=0xa3,umask=0xc,cmask=12/" );
16 | const PerfEvent HW_INTERRUPTS_RECEIVED         = PerfEvent( "hw_interrupts.received", "cpu/event=0xcb,umask=0x1/" );
17 | const PerfEvent INST_RETIRED_ANY               = PerfEvent( "inst_retired.any", "cpu/event=0xc0,umask=0x0/" );
18 | const PerfEvent INST_RETIRED_ANY_P             = PerfEvent( "inst_retired.any_p", "cpu/event=0xc0,umask=0x0/" );
19 | const PerfEvent L1D_REPLACEMENT                = PerfEvent( "l1d.replacement", "cpu/event=0x51,umask=0x1/" );
20 | const PerfEvent L1D_PEND_MISS_FB_FULL          = PerfEvent( "l1d_pend_miss.fb_full", "cpu/event=0x48,umask=0x2/" );
21 | const PerfEvent L1D_PEND_MISS_PENDING          = PerfEvent( "l1d_pend_miss.pending", "cpu/event=0x48,umask=0x1/" );
22 | const PerfEvent L1D_PEND_MISS_PENDING_CYCLES   = PerfEvent( "l1d_pend_miss.pending_cycles", "cpu/event=0x48,umask=0x1,cmask=1/" );
23 | const PerfEvent L1D_PEND_MISS_PENDING_CYCLES_ANY = PerfEvent( "l1d_pend_miss.pending_cycles_any", "cpu/event=0x48,umask=0x1,any=1,cmask=1/" );
24 | const PerfEvent L2_RQSTS_ALL_CODE_RD           = PerfEvent( "l2_rqsts.all_code_rd", "cpu/event=0x24,umask=0xe4/" );
25 | const PerfEvent L2_RQSTS_ALL_DEMAND_DATA_RD    = PerfEvent( "l2_rqsts.all_demand_data_rd", "cpu/event=0x24,umask=0xe1/" );
26 | const PerfEvent L2_RQSTS_ALL_DEMAND_MISS       = PerfEvent( "l2_rqsts.all_demand_miss", "cpu/event=0x24,umask=0x27/" );
27 | const PerfEvent L2_RQSTS_ALL_DEMAND_REFERENCES = PerfEvent( "l2_rqsts.all_demand_references", "cpu/event=0x24,umask=0xe7/" );
28 | const PerfEvent L2_RQSTS_ALL_PF                = PerfEvent( "l2_rqsts.all_pf", "cpu/event=0x24,umask=0xf8/" );
29 | const PerfEvent L2_RQSTS_ALL_RFO               = PerfEvent( "l2_rqsts.all_rfo", "cpu/event=0x24,umask=0xe2/" );
30 | const PerfEvent L2_RQSTS_CODE_RD_HIT           = PerfEvent( "l2_rqsts.code_rd_hit", "cpu/event=0x24,umask=0xc4/" );
31 | const PerfEvent L2_RQSTS_CODE_RD_MISS          = PerfEvent( "l2_rqsts.code_rd_miss", "cpu/event=0x24,umask=0x24/" );
32 | const PerfEvent L2_RQSTS_DEMAND_DATA_RD_HIT    = PerfEvent( "l2_rqsts.demand_data_rd_hit", "cpu/event=0x24,umask=0xc1/" );
33 | const PerfEvent L2_RQSTS_DEMAND_DATA_RD_MISS   = PerfEvent( "l2_rqsts.demand_data_rd_miss", "cpu/event=0x24,umask=0x21/" );
34 | const PerfEvent L2_RQSTS_MISS                  = PerfEvent( "l2_rqsts.miss", "cpu/event=0x24,umask=0x3f/" );
35 | const PerfEvent L2_RQSTS_PF_HIT                = PerfEvent( "l2_rqsts.pf_hit", "cpu/event=0x24,umask=0xd8/" );
36 | const PerfEvent L2_RQSTS_PF_MISS               = PerfEvent( "l2_rqsts.pf_miss", "cpu/event=0x24,umask=0x38/" );
37 | const PerfEvent L2_RQSTS_REFERENCES            = PerfEvent( "l2_rqsts.references", "cpu/event=0x24,umask=0xff/" );
38 | const PerfEvent L2_RQSTS_RFO_HIT               = PerfEvent( "l2_rqsts.rfo_hit", "cpu/event=0x24,umask=0xc2/" );
39 | const PerfEvent L2_RQSTS_RFO_MISS              = PerfEvent( "l2_rqsts.rfo_miss", "cpu/event=0x24,umask=0x22/" );
40 | const PerfEvent MEM_INST_RETIRED_ALL_LOADS     = PerfEvent( "mem_inst_retired.all_loads", "cpu/event=0xd0,umask=0x81/" );
41 | const PerfEvent MEM_INST_RETIRED_ALL_STORES    = PerfEvent( "mem_inst_retired.all_stores", "cpu/event=0xd0,umask=0x82/" );
42 | const PerfEvent MEM_INST_RETIRED_LOCK_LOADS    = PerfEvent( "mem_inst_retired.lock_loads", "cpu/event=0xd0,umask=0x21/" );
43 | const PerfEvent MEM_INST_RETIRED_SPLIT_LOADS   = PerfEvent( "mem_inst_retired.split_loads", "cpu/event=0xd0,umask=0x41/" );
44 | const PerfEvent MEM_INST_RETIRED_SPLIT_STORES  = PerfEvent( "mem_inst_retired.split_stores", "cpu/event=0xd0,umask=0x42/" );
45 | const PerfEvent MEM_INST_RETIRED_STLB_MISS_LOADS = PerfEvent( "mem_inst_retired.stlb_miss_loads", "cpu/event=0xd0,umask=0x11/" );
46 | const PerfEvent MEM_INST_RETIRED_STLB_MISS_STORES = PerfEvent( "mem_inst_retired.stlb_miss_stores", "cpu/event=0xd0,umask=0x12/" );
47 | const PerfEvent MEM_LOAD_RETIRED_FB_HIT        = PerfEvent( "mem_load_retired.fb_hit", "cpu/event=0xd1,umask=0x40/" );
48 | const PerfEvent MEM_LOAD_RETIRED_L1_HIT        = PerfEvent( "mem_load_retired.l1_hit", "cpu/event=0xd1,umask=0x1/" );
49 | const PerfEvent MEM_LOAD_RETIRED_L1_MISS       = PerfEvent( "mem_load_retired.l1_miss", "cpu/event=0xd1,umask=0x8/" );
50 | const PerfEvent MEM_LOAD_RETIRED_L2_HIT        = PerfEvent( "mem_load_retired.l2_hit", "cpu/event=0xd1,umask=0x2/" );
51 | const PerfEvent MEM_LOAD_RETIRED_L2_MISS       = PerfEvent( "mem_load_retired.l2_miss", "cpu/event=0xd1,umask=0x10/" );
52 | const PerfEvent MEM_LOAD_RETIRED_L3_HIT        = PerfEvent( "mem_load_retired.l3_hit", "cpu/event=0xd1,umask=0x4/" );
53 | const PerfEvent MEM_LOAD_RETIRED_L3_MISS       = PerfEvent( "mem_load_retired.l3_miss", "cpu/event=0xd1,umask=0x20/" );
54 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_0    = PerfEvent( "uops_dispatched_port.port_0", "cpu/event=0xa1,umask=0x1/" );
55 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_1    = PerfEvent( "uops_dispatched_port.port_1", "cpu/event=0xa1,umask=0x2/" );
56 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_2    = PerfEvent( "uops_dispatched_port.port_2", "cpu/event=0xa1,umask=0x4/" );
57 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_3    = PerfEvent( "uops_dispatched_port.port_3", "cpu/event=0xa1,umask=0x8/" );
58 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_4    = PerfEvent( "uops_dispatched_port.port_4", "cpu/event=0xa1,umask=0x10/" );
59 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_5    = PerfEvent( "uops_dispatched_port.port_5", "cpu/event=0xa1,umask=0x20/" );
60 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_6    = PerfEvent( "uops_dispatched_port.port_6", "cpu/event=0xa1,umask=0x40/" );
61 | const PerfEvent UOPS_DISPATCHED_PORT_PORT_7    = PerfEvent( "uops_dispatched_port.port_7", "cpu/event=0xa1,umask=0x80/" );
62 | const PerfEvent UOPS_ISSUED_ANY                = PerfEvent( "uops_issued.any", "cpu/event=0xe,umask=0x1/" );
63 | const PerfEvent NoEvent = {"",""};
64 | 


--------------------------------------------------------------------------------
/perf-timer.cpp:
--------------------------------------------------------------------------------
  1 | #include "tsc-support.hpp"
  2 | #include "perf-timer.hpp"
  3 | #include "misc.hpp"
  4 | #include "perf-timer-events.hpp"
  5 | 
  6 | extern "C" {
  7 | #include "jevents/rdpmc.h"
  8 | #include "jevents/jevents.h"
  9 | }
 10 | 
 11 | #include <stdio.h>
 12 | #include <string.h>
 13 | #include <linux/perf_event.h>
 14 | #include <assert.h>
 15 | 
 16 | #include <vector>
 17 | 
 18 | static bool verbose;
 19 | static bool debug; // lots of output
 20 | 
 21 | struct event_ctx {
 22 |     event_ctx(PerfEvent event, struct perf_event_attr attr, struct rdpmc_ctx jevent_ctx) :
 23 |             event{event}, attr{attr}, jevent_ctx{jevent_ctx} {}
 24 | 
 25 |     // the associated event
 26 |     PerfEvent event;
 27 |     // the perf_event_attr structure that was used to open the event
 28 |     struct perf_event_attr attr;
 29 |     // the jevents context object
 30 |     struct rdpmc_ctx jevent_ctx;
 31 | };
 32 | 
 33 | std::vector<event_ctx> contexts;
 34 | 
 35 | /**
 36 |  * If true, echo debugging info about the perf timer operation to stderr.
 37 |  * Defaults to false.
 38 |  */
 39 | void set_verbose(bool v) {
 40 |     verbose = v;
 41 | }
 42 | 
 43 | #define vprint(...) do { if (verbose) fprintf(stderr, __VA_ARGS__ ); } while(false)
 44 | 
 45 | /**
 46 |  * Take a perf_event_attr objects and return a string representation suitable
 47 |  * for use as an event for perf, or just for display.
 48 |  */
 49 | void printf_perf_attr(FILE *f, const struct perf_event_attr* attr) {
 50 |     char* pmu = resolve_pmu(attr->type);
 51 |     fputs(pmu ? pmu : "???", f);
 52 |     bool comma = false;
 53 | 
 54 | 
 55 | #define APPEND_IF_NZ1(field) APPEND_IF_NZ2(field,field)
 56 | #define APPEND_IF_NZ2(name, field) if (attr->field) { \
 57 |         fprintf(f, "/" #name "%s=0x%lx", comma ? "," : "", (long)attr->field); \
 58 |         comma = true; \
 59 |     }
 60 | 
 61 |     APPEND_IF_NZ1(config);
 62 |     APPEND_IF_NZ1(config1);
 63 |     APPEND_IF_NZ1(config2);
 64 |     APPEND_IF_NZ2(period, sample_period);
 65 |     APPEND_IF_NZ1(sample_type);
 66 |     APPEND_IF_NZ1(read_format);
 67 | 
 68 |     fprintf(f, "/");
 69 | }
 70 | 
 71 | void print_caps(FILE *f, const struct rdpmc_ctx *ctx) {
 72 |     fprintf(f, "R%d UT%d ZT%d index: 0x%x",
 73 |         (int)ctx->buf->cap_user_rdpmc, (int)ctx->buf->cap_user_time, (int)ctx->buf->cap_user_time_zero, ctx->buf->index);
 74 | 
 75 | #define APPEND_CTX_FIELD(field) fprintf(f, " " #field "=0x%lx", (long unsigned)ctx->buf->field);
 76 | 
 77 |     APPEND_CTX_FIELD(pmc_width);
 78 |     APPEND_CTX_FIELD(offset);
 79 |     APPEND_CTX_FIELD(time_enabled);
 80 |     APPEND_CTX_FIELD(time_running);
 81 | 
 82 |     fprintf(f, " rdtsc=0x%lx", (long unsigned)rdtsc());
 83 | }
 84 | 
 85 | /* list the events in markdown format */
 86 | void list_events() {
 87 |     const char *fmt = "| %-27s |\n";
 88 |     printf(fmt, "Name");
 89 |     printf(fmt, "-------------------------", "-----------");
 90 |     for (auto& e : get_all_events()) {
 91 |         printf(fmt, e.name);
 92 |     }
 93 | }
 94 | 
 95 | std::vector<bool> setup_counters(const std::vector<PerfEvent>& events) {
 96 | 
 97 |     std::vector<bool> results;
 98 | 
 99 |     for (auto& e : events) {
100 |         bool ok = false;
101 | 
102 |         if (contexts.size() == MAX_COUNTERS) {
103 |             fprintf(stderr, "Unable to program event %s, MAX_COUNTERS (%zu) reached\n", e.name, MAX_COUNTERS);
104 |         } else {
105 |             // fprintf(stderr, "Enabling event %s (%s)\n", e->short_name, e->name);
106 |             struct perf_event_attr attr = {};
107 |             int err = jevent_name_to_attr(e.event_string, &attr);
108 |             if (err) {
109 |                 fprintf(stderr, "Unable to resolve event '%s' - report this as a bug along with your CPU model string\n", e.name);
110 |                 fprintf(stderr, "jevents error %2d: %s\n", err, jevent_error_to_string(err));
111 |                 fprintf(stderr, "jevents details : %s\n", jevent_get_error_details());
112 |             } else {
113 |                 struct rdpmc_ctx ctx = {};
114 |                 attr.sample_period = 0;
115 |                 // pinned makes the counter stay on the CPU and fail fast if it can't be allocated: we
116 |                 // can check right away if index == 0 which means failure
117 |                 attr.pinned = 1;
118 |                 // attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
119 |                 int ret;
120 |                 if ((ret = rdpmc_open_attr(&attr, &ctx, 0)) || ctx.buf->index == 0) {
121 |                     fprintf(stderr, "Failed to program event '%s' (reason: %s). \n\tResolved to: ", e.name,
122 |                             ret ? "rdpmc_open_attr failed" : "no index, probably too many or incompatible events");
123 |                     printf_perf_attr(stderr, &attr);
124 |                     fprintf(stderr, "\n");
125 |                 } else {
126 |                     contexts.emplace_back(e, attr, ctx);
127 |                     ok = true;
128 |                 }
129 |             }
130 |         }
131 |         results.push_back(ok);
132 |     }
133 | 
134 |     // output all the event details after all have been programmed since later events with constaints might
135 |     // change the index for earlier ones
136 |     if (verbose) {
137 |         for (size_t i = 0; i < contexts.size(); i++) {
138 |             event_ctx ec = contexts[i];
139 |             vprint("Resolved and programmed event '%s' to ", ec.event.name);
140 |             printf_perf_attr(stderr, &ec.attr);
141 |             vprint("\n\t");
142 |             print_caps(stderr, &ec.jevent_ctx);
143 |             vprint("\n");
144 |         }
145 |     }
146 | 
147 |     assert(results.size() == events.size());
148 |     return results;
149 | }
150 | 
151 | /**
152 |  * rdpmc_read - read a ring 3 readable performance counter
153 |  * @ctx: Pointer to initialized &rdpmc_ctx structure.
154 |  *
155 |  * Read the current value of a running performance counter.
156 |  * This should only be called from the same thread/process as opened
157 |  * the context. For new threads please create a new context.
158 |  */
159 | unsigned long long rdpmc_readx(event_ctx *ctx)
160 | {
161 |     typedef uint64_t u64;
162 | #define rmb() asm volatile("" ::: "memory")
163 | 
164 | 	u64 val;
165 | 	unsigned seq;
166 | 	u64 offset, time_running, time_enabled;
167 | 	struct perf_event_mmap_page *buf = ctx->jevent_ctx.buf;
168 | 	unsigned index;
169 |     bool lockok = true;
170 | 
171 | 	do {
172 | 		seq = buf->lock;
173 | 		rmb();
174 | 		index = buf->index;
175 | 		offset = buf->offset;
176 |         time_enabled = buf->time_enabled;
177 |         time_running = buf->time_running;
178 | 		if (index == 0) { /* rdpmc not allowed */
179 |             val = 0;
180 |             rmb();
181 |             lockok = (buf->lock == seq);
182 | 			break;
183 |         }
184 | #if defined(__ICC) || defined(__INTEL_COMPILER)
185 | 		val = _rdpmc(index - 1);
186 | #else
187 | 		val = __builtin_ia32_rdpmc(index - 1);
188 | #endif
189 | 		rmb();
190 | 	} while (buf->lock != seq);
191 | 
192 |     u64 res  = val + offset;
193 |     u64 res2 = (res << (64 - buf->pmc_width)) >> (64 - buf->pmc_width);
194 | 
195 |     if (debug) {
196 |         vprint("read counter %-30s ", ctx->event.name);
197 | #define APPEND_LOCAL(local, fmt) fprintf(stderr, " " #local "=0x%" #fmt "lx", (long unsigned)local);
198 |         APPEND_LOCAL(lockok, 1);
199 |         APPEND_LOCAL(val, 013);
200 |         APPEND_LOCAL(offset, 013);
201 |         // APPEND_LOCAL(res, 013);
202 |         APPEND_LOCAL(res2, 012);
203 |         APPEND_LOCAL(time_enabled, 08);
204 |         APPEND_LOCAL(time_running, 08);
205 |         APPEND_LOCAL(index, );
206 |         vprint("\n");
207 |     }
208 |     return res2;
209 | }
210 | 
211 | 
212 | event_counts read_counters() {
213 |     event_counts ret{uninit_tag{}};
214 |     for (size_t i = 0; i < contexts.size(); i++) {
215 |         ret.counts[i] = rdpmc_readx(&contexts[i]);
216 |     }
217 |     return ret;
218 | }
219 | 
220 | size_t num_counters() {
221 |     return contexts.size();
222 | }
223 | 
224 | event_counts calc_delta(event_counts before, event_counts after, size_t max_event) {
225 |     event_counts ret(uninit_tag{});
226 |     size_t limit = std::min(max_event, MAX_COUNTERS);
227 |     for (size_t i=0; i < limit; i++) {
228 |         ret.counts[i] = after.counts[i] - before.counts[i];
229 |     }
230 |     return ret;
231 | }
232 | 


--------------------------------------------------------------------------------
/perf-timer.hpp:
--------------------------------------------------------------------------------
 1 | /* simple PMU counting capabilities */
 2 | #ifndef PERF_TIMER_H_
 3 | #define PERF_TIMER_H_
 4 | 
 5 | #include <cinttypes>
 6 | #include <cstddef>
 7 | #include <cstring>
 8 | #include <string>
 9 | #include <vector>
10 | 
11 | constexpr size_t MAX_COUNTERS = 8;
12 | 
13 | struct PerfEvent {
14 |     const char *name;
15 |     const char *event_string;
16 | 
17 |     PerfEvent(const char* name, const char* event_string) : name{name}, event_string{event_string} {}
18 | 
19 |     bool operator==(const PerfEvent& rhs) const { return std::strcmp(name, rhs.name) == 0; }
20 |     bool operator!=(const PerfEvent& rhs) const { return !(*this == rhs); }
21 |     bool operator< (const PerfEvent& rhs) const { return std::strcmp(name, rhs.name) < 0; }
22 | };
23 | 
24 | static inline std::string to_string(const PerfEvent& e) {
25 |     return std::string("event[name=") + e.name + ",event_string=" + e.event_string + "]";
26 | }
27 | 
28 | struct uninit_tag{};
29 | 
30 | struct event_counts {
31 |     uint64_t counts[MAX_COUNTERS];
32 | 
33 |     event_counts() : counts{} {}
34 | 
35 |     event_counts(uninit_tag) {}
36 | 
37 |     /** apply binary op to every pair of elements in the array, returning a new event_counts */
38 |     template<typename F>
39 |     static event_counts apply(const event_counts& l, const event_counts& r, const F& f) {
40 |         event_counts ret;
41 |         for (size_t i = 0; i < MAX_COUNTERS; i++) {
42 |             ret.counts[i] = f(l.counts[i], r.counts[i]);
43 |         }
44 |         return ret;
45 |     }
46 | };
47 | 
48 | void set_verbose(bool verbose);
49 | 
50 | void list_events();
51 | 
52 | /**
53 |  * Sets up the PMU to record the given events. Currently doesn't remove
54 |  * any events set up earlier, so the list will keep growing (often you)
55 |  * just set up counters once for the lifetime of the process.
56 |  *
57 |  * Returns a list of indexes, corresponding to the past events: the index
58 |  * is the location in event_counts where you'll find the corresponding
59 |  * counter.
60 |  */
61 | std::vector<bool> setup_counters(const std::vector<PerfEvent>& events);
62 | 
63 | event_counts read_counters();
64 | 
65 | /* number of succesfully programmed counters */
66 | size_t num_counters();
67 | 
68 | /**
69 |  * Calculate the delta between two event sets, up to max_event if specified.
70 |  *
71 |  * The value of counts betweem max_event and MAX_COUNTERS are unspecified.
72 |  */
73 | event_counts calc_delta(event_counts before, event_counts after, size_t max_event = MAX_COUNTERS);
74 | 
75 | std::vector<PerfEvent> get_all_events();
76 | 
77 | #endif // #ifndef PERF_TIMER_H_
78 | 


--------------------------------------------------------------------------------
/scripts/common.sh:
--------------------------------------------------------------------------------
1 | period_list=$(seq 600 20 800)
2 | 
3 | argstr="$@"
4 | 


--------------------------------------------------------------------------------
/scripts/data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # This poorly named variable should be set to the
 5 | # TSC (time stamp counter) frequency of your system.
 6 | # You can determine this by running:
 7 | # ./bench dummy > /dev/null
 8 | # and looking at the 'tsc freq' line. Use that value
 9 | # as MHZ.
10 | export MHZ=${MHZ:=3192}
11 | 
12 | # https://stackoverflow.com/a/12694189
13 | SCRIPTDIR="${BASH_SOURCE%/*}"
14 | if [[ ! -d "$DIR" ]]; then DIR="$PWD"; fi
15 | 
16 | . "$SCRIPTDIR/common.sh"
17 | 
18 | RESULTDIR="$SCRIPTDIR/../results"
19 | : ${PREFIX:=test}
20 | 
21 | : ${TEMPDIR:=$SCRIPTDIR/../tmp}
22 | 
23 | mkdir -p "$RESULTDIR"
24 | mkdir -p "$TEMPDIR"
25 | 
26 | echo "Collecting data and writing to $RESULTDIR/$PREFIX-*.csv"
27 | 
28 | PER_US=5000
29 | 
30 | export TEST_PER=$(($PER_US * $MHZ))
31 | export TEST_RES=$((1 * $MHZ))
32 | extra=$((100 * $MHZ)) # 100 us
33 | 
34 | function run_one {
35 |     export COLS=${2:-Cycles,Unhalt_GHz,tscg,retries}
36 |     local test_name=$1
37 |     echo ">>>>>>>>> Running $test_name with TEST_PER=$TEST_PER TEST_RES=$TEST_RES COLS=$COLS"
38 |     if [[ $argstr && ! " $argstr " =~ " $test_name " ]]; then
39 |         echo "Skipping $1 because it is not in $argstr"
40 |         return
41 |     fi
42 |     if [[ -n $SKIP_ZMM && $test_name =~ .*zmm.* ]]; then
43 |         echo "Skipping $test_name because SKIP_ZMM is set"
44 |         return
45 |     fi
46 |     ./bench $test_name > "$TEMPDIR/temp.csv"
47 |     for i in {0..2}; do
48 |         egrep -B1 "^${i}," "$TEMPDIR/temp.csv" > "$RESULTDIR/$PREFIX-$test_name${3}-${i}.csv"
49 |     done
50 | }
51 | 
52 | echo "argstr: $argstr"
53 | 
54 | # test that includes volts, we gate this behind "DO_VOLTS" because this will usually fail unless you
55 | # are running as root, or have set up the msr dir for non-root access.
56 | # for volts to work at all, you need to install msr-tools package, then it will work as root
57 | # Then, here's how you set up for non-root msr reads:
58 | # sudo chmod a+r /dev/cpu/*/msr
59 | # sudo setcap cap_sys_rawio=ep ./bench
60 | if [[ "$DO_VOLTS" -eq 1 ]]; then
61 |     echo "Doing VOLTS data collection"
62 |     TEST_RES=$((2 * $MHZ)) TEST_EXTRA=$extra NO_WARM=1 run_one vporxmm_vz100 "Cycles,Unhalt_GHz,IPC,volts" "-volts"
63 |     TEST_RES=$((2 * $MHZ)) TEST_EXTRA=$extra NO_WARM=1 run_one vporymm_vz100 "Cycles,Unhalt_GHz,IPC,volts" "-volts"
64 |     TEST_RES=$((2 * $MHZ)) TEST_EXTRA=$extra NO_WARM=1 run_one vporzmm_vz100 "Cycles,Unhalt_GHz,IPC,volts" "-volts"
65 | else
66 |     echo "Doing non-VOLTS data collection"
67 |     run_one vporxmm_vz
68 |     run_one vporymm_vz
69 |     run_one vporzmm_vz
70 |     TEST_EXTRA=$extra run_one vporxmm_vz100        "Cycles,Unhalt_GHz,IPC"
71 |     TEST_EXTRA=$extra run_one vporymm_vz100        "Cycles,Unhalt_GHz,IPC"
72 |     TEST_EXTRA=$extra run_one vporzmm_vz100        "Cycles,Unhalt_GHz,IPC"
73 |     TEST_EXTRA=$extra TEST_RES=$((8 * $MHZ)) run_one vporzmm_vz100        "Cycles,Unhalt_GHz,IPC" "-8us"
74 |     TEST_EXTRA=$extra run_one vporxmm_tput_vz100   "Cycles,Unhalt_GHz,IPC"
75 |     TEST_EXTRA=$extra run_one vporymm_tput_vz100   "Cycles,Unhalt_GHz,IPC"
76 |     TEST_EXTRA=$extra run_one vporzmm_tput_vz100   "Cycles,Unhalt_GHz,IPC"
77 |     TEST_EXTRA=$extra run_one vpermdzmm_vz100      "Cycles,Unhalt_GHz,IPC"
78 |     TEST_EXTRA=$extra run_one vpermdzmm_tput_vz100 "Cycles,Unhalt_GHz,IPC"
79 |     TEST_EXTRA=$extra run_one vporxymm250          "Cycles,Unhalt_GHz,IPC"
80 |     TEST_EXTRA=$extra run_one vporyzmm250          "Cycles,Unhalt_GHz,IPC"
81 |     run_one vporymm
82 |     run_one vporzmm
83 | 
84 |     for r in 1 2 3 4 5 6 7 8 9 10 20 30 40 50 60 70 80 90 100 120 140 160 180 200; do
85 |         TEST_EXTRA=$extra run_one "vporxymm250_$r"            "Cycles,Unhalt_GHz"
86 |     done
87 |     TEST_EXTRA=$extra run_one "mulxymm250_10"            "Cycles,Unhalt_GHz"
88 | 
89 |     for p in $period_list; do
90 |         TEST_EXTRA=$extra TEST_PER=$(($p * $MHZ)) run_one vporzmm_vz100 "Cycles,Unhalt_GHz,IPC" "-period$p"
91 |     done
92 | fi
93 | 


--------------------------------------------------------------------------------
/scripts/plots.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | # https://stackoverflow.com/a/12694189
  5 | SCRIPTDIR="${BASH_SOURCE%/*}"
  6 | if [[ ! -d "$DIR" ]]; then DIR="$PWD"; fi
  7 | 
  8 | . "$SCRIPTDIR/common.sh"
  9 | 
 10 | PLOTPY="$SCRIPTDIR/plot-csv.py"
 11 | RESULTDIR="$SCRIPTDIR/../results"
 12 | 
 13 | # $1 input file(s)
 14 | # $2 output file (empty for default)
 15 | # $3 ylabel
 16 | # $4 xlabel
 17 | # $5 title
 18 | function plot {
 19 |     local title=$5
 20 |     if [[ $argstr && ! "$title" =~ "$argstr" ]]; then
 21 |         echo "Skipping $title because it is not in $argstr"
 22 |         return
 23 |     fi
 24 |     eval input=( $1 )
 25 |     IFS=',' command eval 'echo input="${input[*]}"'
 26 |     if [ -z "$OUTDIR" ]; then
 27 |         local OUT=()
 28 |     else
 29 |         mkdir -p "$OUTDIR"
 30 |         if [ -z "$2" ]; then
 31 |             local OUTNAME=${1%.*}.svg
 32 |         else
 33 |             local OUTNAME=$2.svg
 34 |         fi
 35 |         local OUT=("--out" "$OUTDIR/$OUTNAME")
 36 |     fi
 37 |     input=(${input[@]/#/"$RESULTDIR/"})
 38 |     # IFS=',' command eval 'echo input="${input[*]}"'
 39 |     echo "INPUT: ${input[@]} OUTPUT: $OUTNAME"
 40 |     "$PLOTPY" "${input[@]}" "${OUT[@]}" --tight --ylabel "$3" --xlabel "$4" --title "$title" "${@:6}"
 41 | }
 42 | 
 43 | : ${PREFIX:=test}  # usually skx
 44 | : ${PREFIXV:=test} # usually skl only for "volts" plots
 45 | 
 46 | echo "Using PREFIX=$PREFIX, PREFIXV=$PREFIXV"
 47 | 
 48 | xcols_arg="--xcols-by-name us,us_1,us_2"
 49 | ycols_arg="--cols-by-name Unhalt_GHz,Unhalt_GHz_1,Unhalt_GHz_2"
 50 | 
 51 | plot "$PREFIX-vporymm_vz-{0..2}.csv" "fig-vporvz256" "Frequency (GHz)" "Time (us)" "256-bit VPOR Frequency Transitions" \
 52 |    $xcols_arg $ycols_arg --ylim 0 4 --alpha 0.6
 53 | 
 54 | plot "$PREFIX-vporzmm_vz-{0..2}.csv" "fig-vporvz512" "Frequency (GHz)" "Time (us)" "512-bit VPOR Frequency Transitions" \
 55 |    $xcols_arg $ycols_arg --ylim 0 4 --alpha 0.6
 56 | 
 57 | plot "$PREFIX-vporzmm_vz-{0..2}.csv" "fig-vpor-zoomed" "Frequency (GHz)" "Time (us)" "512-bit VPOR Transition Closeup" --marker=. \
 58 |    --patches \
 59 | '[{ "xy" : [15000, 0],  "width" : 9,"height" : 4,"color" : "thistle"},'\
 60 | '{ "xy" : [15009, 0], "width" : 11,"height" : 4,"color" : "peachpuff"} ]'\
 61 |     $xcols_arg $ycols_arg --ylim 2.7 3.3 --xlim 14950 15050
 62 | 
 63 | plot "$PREFIX-vporymm-{0..2}.csv" "fig-vpor256" "Frequency (GHz)" "Time (us)" "256-bit VPOR Transitions (no vzeroupper)" \
 64 |     $xcols_arg $ycols_arg --ylim 0 4 --alpha 0.6
 65 | 
 66 | plot "$PREFIX-vporzmm-{0..2}.csv" "fig-vpor512" "Frequency (GHz)" "Time (us)" "512-bit VPOR Transitions (no vzeroupper)" \
 67 |     $xcols_arg $ycols_arg --ylim 0 4 --alpha 0.6
 68 | 
 69 | plot "$PREFIX-vporzmm_vz100-{0..2}.csv" "fig-vporvz512-ipc" "Frequency (GHz)" "Time (us)" "512-bit VPOR Frequency Transitions" \
 70 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 0 4 --ylabel2 IPC --alpha 0.6
 71 | 
 72 | plot "$PREFIX-vporxmm_vz100-{0..2}.csv" "fig-ipc-zoomed-xmm" "Frequency (GHz)" "Time (us)" "128-bit VPOR Transition Closeup" \
 73 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 1.2 \
 74 |     --marker=. --marker2=. --legend-loc='upper right'
 75 | 
 76 | plot "$PREFIX-vporymm_vz100-{0..2}.csv" "fig-ipc-zoomed-ymm" "Frequency (GHz)" "Time (us)" "256-bit VPOR Transition Closeup" \
 77 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 1.2 \
 78 |     --marker=. --marker2=. --legend-loc='upper right' --patches \
 79 |     '[{ "xy": [15000, 0], "width" : 9, "height" : 4,"color" : "thistle"}]'
 80 | 
 81 | plot "$PREFIX-vporzmm_vz100-{0..2}.csv" "fig-ipc-zoomed-zmm" "Frequency (GHz)" "Time (us)" "512-bit VPOR Transition IPC" \
 82 |     $xcols_arg $ycols_arg --cols2-by-name "IPC_1" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 1.2 \
 83 |     --marker=. --marker2=. --legend-loc='upper right' --patches \
 84 |     '[{ "xy": [15000, 0], "width" : 9, "height" : 4,"color" : "thistle"},
 85 |     { "xy"  : [15009, 0], "width" :11, "height" : 4,"color" : "peachpuff"},
 86 |     { "xy"  : [15020, 0], "width" :80, "height" : 4,"color" : "darkturquoise"} ]'
 87 | 
 88 | # 8 us version of zmm lat
 89 | plot "$PREFIX-vporzmm_vz100-8us-{0..2}.csv" "fig-ipc-zoomed-zmm-8us" "Frequency (GHz)" "Time (us)" "512-bit VPOR Transition (8 us sampling)" \
 90 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 1.2 \
 91 |     --marker=. --marker2=. --legend-loc='upper right'
 92 | 
 93 | plot "$PREFIX-vporymm_tput_vz100-{0..2}.csv" "fig-ipc-zoomed-ymm-tput" "Frequency (GHz)" "Time (us)" "256-bit VPOR Transition w/ IPC" \
 94 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 2 \
 95 |     --marker=. --marker2=. --legend-loc='upper right'
 96 | 
 97 | plot "$PREFIX-vporzmm_tput_vz100-{0..2}.csv" "fig-ipc-zoomed-zmm-tput" "Frequency (GHz)" "Time (us)" "512-bit VPOR Transition w/ IPC" \
 98 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 2 \
 99 |     --marker=. --marker2=. --legend-loc='upper right'
100 | 
101 | plot "$PREFIX-vpermdzmm_vz100-{0..2}.csv" "fig-vpermd-ipc-zoomed-tput" "Frequency (GHz)" "Time (us)" "512-bit VPERMD Transition IPC Closeup" \
102 |     $xcols_arg $ycols_arg --cols2-by-name "IPC" --ylim 2.7 3.3 --xlim 14950 15150 --ylabel2 IPC --ylim2 0 2 \
103 |     --marker=. --marker2=. --legend-loc='upper right' --patches \
104 |     '[{ "xy": [15000, 0], "width" : 9, "height" : 4,"color" : "thistle"},
105 |     { "xy"  : [15009, 0], "width" : 11, "height" : 4,"color" : "peachpuff"},
106 |     { "xy"  : [15020, 0], "width" : 80, "height" : 4,"color" : "darkturquoise"} ]'
107 | 
108 | #for p in $period_list; do
109 | for p in 760; do
110 |     plot "$PREFIX-vporzmm_vz100-period$p-{0..2}.csv" "fig-vporvz512-ipc-p$p" "Frequency (GHz)" "Time (us)" "Transition Closeup: $p μs Period" \
111 |         $xcols_arg $ycols_arg --cols2-by-name "IPC" --xlim 7550 7700 --ylim 2.7 3.3 --ylabel2 IPC --ylim2 0 1.2 --marker=. --marker2=.
112 | done
113 | 
114 | # spread specturm clocking zoom
115 | plot "$PREFIX-vporymm_vz-1.csv" "fig-ssc" "Frequency (GHz)" "Time (us)" "Spread Spectrum Clocking Closeup" \
116 |    --xcols-by-name us --cols-by-name "Unhalt_GHz" --ylim 3.15 3.25 --xlim 3600 4000
117 | 
118 | for i in 0 1 2; do
119 | plot "$PREFIXV-vporymm_vz100-volts-$i.csv" "fig-volts256-$i" "Frequency (GHz)" "Time (us)" "Voltage Transition (256-bit)" \
120 |     --xcols-by-name "us" --cols-by-name "paytime" --cols2-by-name "volts" --ylabel "Payload Time (cycles)" --ylabel2 "Volts (V)" \
121 |     --xlim 14990 15150 --ylim2 0.95 0.97 --marker=. --marker2=. --patches \
122 |     '[{ "xy": [15000, -1000], "width" : 9.7, "height" : 6000,"color" : "thistle"}]'
123 | done
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/tsc-support.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * tsc-support.cpp
  3 |  */
  4 | 
  5 | #include "tsc-support.hpp"
  6 | #include "cpuid.hpp"
  7 | 
  8 | #include <cinttypes>
  9 | #include <string>
 10 | #include <cstdio>
 11 | #include <cassert>
 12 | #include <array>
 13 | #include <algorithm>
 14 | #include <numeric>
 15 | 
 16 | #include <error.h>
 17 | #include <time.h>
 18 | 
 19 | using std::uint32_t;
 20 | 
 21 | 
 22 | 
 23 | uint64_t get_tsc_from_cpuid_inner() {
 24 |     if (cpuid_highest_leaf() < 0x15) {
 25 |         std::printf("CPUID doesn't support leaf 0x15, falling back to manual TSC calibration.\n");
 26 |         return 0;
 27 |     }
 28 | 
 29 |     auto cpuid15 = cpuid(0x15);
 30 |     // std::printf("cpuid = %s\n", cpuid15.to_string().c_str());
 31 | 
 32 |     if (cpuid15.ecx) {
 33 |         // the crystal frequency was present in ECX
 34 |         return (uint64_t)cpuid15.ecx * cpuid15.ebx / cpuid15.eax;
 35 |     }
 36 | 
 37 |     // ecx == 0 means we have to use a hard-coded frequency based on the model and table provided by Intel
 38 |     // in 18.7.3
 39 |     auto family = get_family_model();
 40 |     // std::printf("cpu: %s\n", family.to_string().c_str());
 41 | 
 42 | 
 43 |     if (family.family == 6) {
 44 |         if (family.model == 0x4E || family.model == 0x5E || family.model == 0x8E || family.model == 0x9E) {
 45 |             // skylake client or kabylake
 46 |             return (int64_t)24000000 * cpuid15.ebx / cpuid15.eax; // 24 MHz crystal clock
 47 |         }
 48 |     } else {
 49 |         std::printf("CPU family not 6 (perhaps AMD or old Intel), falling back to manual TSC calibration.\n");
 50 |     }
 51 | 
 52 |     return 0;
 53 | }
 54 | 
 55 | uint64_t get_tsc_from_cpuid() {
 56 |     static auto cached = get_tsc_from_cpuid_inner();
 57 |     return cached;
 58 | }
 59 | 
 60 | 
 61 | namespace Clock {
 62 |     static inline uint64_t nanos() {
 63 |         struct timespec ts;
 64 |         clock_gettime(CLOCK_MONOTONIC, &ts);
 65 |         return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
 66 |     }
 67 | }
 68 | 
 69 | constexpr size_t SAMPLES = 101;
 70 | constexpr uint64_t DELAY_NANOS = 10000; // nanos 1us
 71 | 
 72 | uint64_t do_sample() {
 73 |     _mm_lfence();
 74 |     uint64_t  nsbefore = Clock::nanos();
 75 |     uint64_t tscbefore = rdtsc();
 76 |     while (nsbefore + DELAY_NANOS > Clock::nanos())
 77 |         ;
 78 |     uint64_t  nsafter = Clock::nanos();
 79 |     uint64_t tscafter = rdtsc();
 80 |     return (tscafter - tscbefore) * 1000000000u / (nsafter - nsbefore);
 81 | }
 82 | 
 83 | uint64_t tsc_from_cal() {
 84 |     std::array<uint64_t, SAMPLES * 2> samples;
 85 | 
 86 |     for (size_t s = 0; s < SAMPLES * 2; s++) {
 87 |         samples[s] = do_sample();
 88 |     }
 89 | 
 90 |     // throw out the first half of samples as a warmup
 91 |     std::array<uint64_t, SAMPLES> second_half;
 92 |     std::copy(samples.begin() + SAMPLES, samples.end(), second_half.begin());
 93 |     std::sort(second_half.begin(), second_half.end());
 94 | 
 95 |     // average the middle quintile
 96 |     auto third_quintile = second_half.begin() + 2 * SAMPLES/5;
 97 |     uint64_t sum = std::accumulate(third_quintile, third_quintile + SAMPLES/5, (uint64_t)0);
 98 | 
 99 |     return sum / (SAMPLES/5);
100 | }
101 | 
102 | /**
103 |  * TSC frequency detection is described in
104 |  * Intel SDM Vol3 18.7.3: Determining the Processor Base Frequency
105 |  *
106 |  * Nominal TSC frequency = ( CPUID.15H.ECX[31:0] * CPUID.15H.EBX[31:0] ) ÷ CPUID.15H.EAX[31:0]
107 |  */
108 | uint64_t get_tsc_freq(bool force_calibrate) {
109 |     uint64_t tsc;
110 |     if (!force_calibrate && (tsc = get_tsc_from_cpuid())) {
111 |         return tsc;
112 |     }
113 | 
114 |     return tsc_from_cal();
115 | }
116 | 
117 | 
118 | const char* get_tsc_cal_info(bool force_calibrate) {
119 |     if (!force_calibrate && get_tsc_from_cpuid()) {
120 |         return "from cpuid leaf 0x15";
121 |     } else {
122 |         return "from calibration loop";
123 |     }
124 | 
125 | }
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/tsc-support.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef TSC_SUPPORT_H_
 2 | #define TSC_SUPPORT_H_
 3 | 
 4 | #include <inttypes.h>
 5 | #include <stdbool.h>
 6 | 
 7 | #ifdef _MSC_VER
 8 | #include <intrin.h>
 9 | #else
10 | #include <x86intrin.h>
11 | #endif
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | 
18 | static inline uint64_t rdtsc() {
19 |     return __rdtsc();
20 | }
21 | 
22 | static inline uint64_t rdtscp() {
23 |    uint64_t hi, lo;
24 |    __asm__ ("rdtscp" : "=a"(lo), "=d"(hi) : : "rcx");
25 |    return (hi << 32) | lo;
26 | 
27 |     // the ideal implementation is below, but gcc likes to acually do the dead write to cpu,
28 |     // so we use inline assembly instead
29 |     // unsigned int cpu;
30 |     // return __rdtscp(&cpu);
31 | }
32 | 
33 | /**
34 |  * Get the TSC frequency.
35 |  *
36 |  * By default, this tries to read the TSC frequency directly from cpuid leaf 0x15,
37 |  * if it is on a supported architecture, otherwise it falls back to using a calibration
38 |  * loop. If force_calibrate is true, it always uses the calibration loop and never reads
39 |  * from cpuid.
40 |  */
41 | uint64_t get_tsc_freq(bool force_calibrate);
42 | 
43 | /** return a string describing how the TSC frequency was determined */
44 | const char* get_tsc_cal_info(bool force_calibrate);
45 | 
46 | #ifdef __cplusplus
47 | }
48 | #endif
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/unit-test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * unit-test.cpp
 3 |  */
 4 | 
 5 | #include "misc.hpp"
 6 | 
 7 | #include "catch.hpp"
 8 | 
 9 | TEST_CASE( "string_format", "[util]" ) {
10 |     REQUIRE( string_format("foo %d", 42) == "foo 42" );
11 |     REQUIRE( string_format("%s %s", "foo", "bar") == "foo bar" );
12 | }
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/util.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SI_UTIL_H_
 2 | #define SI_UTIL_H_
 3 | 
 4 | #ifndef DEBUG
 5 | #define DEBUG 0
 6 | #endif
 7 | 
 8 | /*
 9 |  * dassert is like assert but only enabled if DEBUG is 1
10 |  */
11 | #if DEBUG
12 | #define dassert assert
13 | #else
14 | #define dassert(...)
15 | #endif
16 | 
17 | #endif // #ifndef SI_UTIL_H_
18 | 


--------------------------------------------------------------------------------
/voltmon.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <sched.h>
  4 | #include <unistd.h>
  5 | #include <time.h>
  6 | #include <string.h>
  7 | #include <assert.h>
  8 | #include <err.h>
  9 | 
 10 | #include "msr-access.h"
 11 | 
 12 | struct result {
 13 |     uint64_t value;
 14 |     int error;
 15 | };
 16 | 
 17 | template <class T>
 18 | T extract_bits(T val, size_t start, size_t stop) {
 19 |     assert(stop >= start);
 20 |     assert(start <= sizeof(T) * 8);
 21 |     T width = stop - start + 1;
 22 |     if (width == sizeof(T) * 8) {
 23 |         return val;
 24 |     } else {
 25 |         return (val >> (T)start) & ((1 << width) - 1);
 26 |     }
 27 | }
 28 | 
 29 | result read_voltage(int cpu) {
 30 |     uint64_t value = 0;
 31 |     int err = cpu == -1 ? read_msr_cur_cpu(0x198, &value) : read_msr(cpu, 0x198, &value);
 32 |     return err ? result{0, err} : result{extract_bits(value, 32, 47), 0};
 33 | }
 34 | 
 35 | void bench(int iters, bool thiscpu) {
 36 |     int i = iters;
 37 |     clock_t start = clock();
 38 |     while (i-- > 0) {
 39 |         result r = read_voltage(thiscpu ? -1 : 0);
 40 |         if (r.error) {
 41 |             err(1, "call failed in bench with %d", r.error);
 42 |         }
 43 |     }
 44 |     clock_t stop = clock();
 45 |     double seconds = (double)(stop - start) / CLOCKS_PER_SEC;
 46 |     printf("(%4s CPU) %d calls in %5.2f ms: %5.2f us per call\n",
 47 |             thiscpu ? "same" : "other", iters, seconds * 1000., seconds * 1000000. / iters);
 48 | }
 49 | 
 50 | void pinToCpu(int cpu) {
 51 |     cpu_set_t set;
 52 |     CPU_ZERO(&set);
 53 |     CPU_SET(cpu, &set);
 54 |     if (sched_setaffinity(0, sizeof(set), &set)) {
 55 |         assert("pinning failed" && false);
 56 |     }
 57 | }
 58 | 
 59 | int main(int argc, char** argv) {
 60 | 
 61 |     pinToCpu(1);
 62 | 
 63 |     if (argc >= 2 && strcmp("--bench", argv[1]) == 0) {
 64 |         int iters = (argc == 3 ? atoi(argv[2]) : 10000);
 65 |         printf("Running benchmarks with %d iterations\n", iters);
 66 |         bench(iters, false);
 67 |         bench(iters, false);
 68 |         bench(iters, true);
 69 |         bench(iters, true);
 70 |         bench(iters, false);
 71 |         bench(iters, false);
 72 |         bench(iters, true);
 73 |         bench(iters, true);
 74 |         exit(0);
 75 |     }
 76 | 
 77 |     int maxcpu = 0;
 78 |     for (; maxcpu < CPU_SETSIZE; maxcpu++) {
 79 |         result r = read_voltage(maxcpu);
 80 |         if (r.error)
 81 |             break;
 82 |     }
 83 | 
 84 |     if (maxcpu == 0) {
 85 |         printf("Wasn't able to read MSR on any CPUs, boo (try running with sudo?)! Exiting...");
 86 |         exit(1);
 87 |     }
 88 | 
 89 | 
 90 |     printf("Detected %d CPUs, monitoring votage...\n", maxcpu);
 91 |     while (true) {
 92 |         for (int cpu = 0; cpu < maxcpu; cpu++) {
 93 |             result r = read_voltage(cpu);
 94 |             if (r.error) {  // weird because we already read it before
 95 |                 printf("\nFAILED!\n");
 96 |                 exit(1);
 97 |             }
 98 |             printf("CPU %d: %4.2f ", cpu, r.value / 8192.);
 99 |         }
100 |         fflush(stdout);
101 |         usleep(100000u);
102 |         printf("\r");
103 |     }
104 |     return 0;
105 | }
106 | 


--------------------------------------------------------------------------------