├── .github
    └── workflows
    │   ├── codeql-analysis.yml
    │   ├── jevents.yml
    │   ├── python-old.yml
    │   └── python.yml
├── .gitignore
├── CHANGES.md
├── COPYING
├── FLAKE8
├── MYPY
├── Makefile
├── PYLINT
├── README.md
├── RERUN
├── TOOLS.md
├── adl_glc_ratios.py
├── adl_grt_ratios.py
├── all-tester
├── arl-retlat.json
├── bdw-cpuinfo
├── bdw_client_ratios.py
├── bdx_server_ratios.py
├── cleanlogs
├── clx_server_ratios.py
├── counterdiff.py
├── cpumap.sh
├── cputop
├── cputop.py
├── csv_formats.py
├── dummyarith.py
├── ehl_ratios.py
├── event-rmap.py
├── event-translate.py
├── event_download
├── event_download.py
├── fake-perf.py
├── frequency.py
├── gen-dot.py
├── gen_level.py
├── genretlat
├── genretlat.py
├── gnr-retlat.json
├── gnr_server_ratios.py
├── hsw_client_ratios.py
├── hsx_server_ratios.py
├── icl_client_ratios.py
├── icx_server_ratios.py
├── interval-merge
├── interval-merge.py
├── interval-normalize
├── interval-normalize.py
├── interval-plot
├── interval-plot.py
├── ivb_client_ratios.py
├── ivb_server_ratios.py
├── jevents
    ├── Makefile
    ├── README.md
    ├── cache.c
    ├── cpustr.c
    ├── csv.c
    ├── event-rmap.c
    ├── examples
    │   ├── Makefile
    │   ├── addr.c
    │   ├── cpu.c
    │   ├── cpu.h
    │   ├── hist.cc
    │   ├── hist.h
    │   ├── jestat.c
    │   ├── ptself.c
    │   ├── rtest.c
    │   ├── rtest2.c
    │   └── rtest3.c
    ├── interrupts.c
    ├── interrupts.h
    ├── jevents.c
    ├── jevents.h
    ├── jsession.h
    ├── jsmn.c
    ├── jsmn.h
    ├── json.c
    ├── json.h
    ├── libjevents.spec
    ├── listevents.c
    ├── measure.c
    ├── measure.h
    ├── perf-aux.c
    ├── perf-iter.c
    ├── perf-iter.h
    ├── perf-record.h
    ├── perf_event_open.c
    ├── print.c
    ├── rawevent.c
    ├── rdpmc.c
    ├── rdpmc.h
    ├── resolve.c
    ├── session.c
    ├── showevent.c
    ├── tester
    └── util.h
├── jkt_server_ratios.py
├── knl_ratios.py
├── latego.py
├── linux_metrics.py
├── list-events.py
├── listutils.py
├── lnl-retlat.json
├── lnl_lnc_ratios.py
├── lnl_skt_ratios.py
├── metrics.py
├── msr
├── msr.py
├── mtl-retlat.json
├── mtl_cmt_ratios.py
├── mtl_rwc_ratios.py
├── node.py
├── objutils.py
├── oc-all-events
├── ocperf
├── ocperf.py
├── other-tester
├── parallel-tester
├── parser
    ├── elf.py
    ├── hist.py
    ├── kernel.py
    ├── mmap.py
    ├── perfdata.py
    ├── perfpd.py
    ├── pfeat.py
    ├── tester
    └── util.py
├── pci.py
├── pebs-grabber
    ├── Makefile
    ├── pebs-grabber.c
    └── pebs.h
├── perf_metrics.py
├── plot-normalized.py
├── pmudef.py
├── pmumon.py
├── power_metrics.py
├── requirements.txt
├── simple-pebs
    ├── Makefile
    ├── README
    ├── compat.h
    ├── dump-util.c
    ├── dump-util.h
    ├── dumper.c
    ├── elf.c
    ├── elf.h
    ├── histogram.c
    ├── map.c
    ├── map.h
    ├── samples.c
    ├── simple-pebs.c
    ├── simple-pebs.h
    ├── symtab.c
    ├── symtab.h
    └── toperf.c
├── simple_ratios.py
├── skl_client_ratios.py
├── skx_server_ratios.py
├── slm_ratios.py
├── snb_client_ratios.py
├── spr_max_server_ratios.py
├── spr_server_ratios.py
├── srf_ratios.py
├── test-uncore.json
├── tester
├── tl-barplot.py
├── tl-serve
├── tl-serve.py
├── tl-tester
├── tl-xlsx.py
├── tl_cpu.py
├── tl_io.py
├── tl_output.py
├── tl_stat.py
├── tl_uval.py
├── tldata.py
├── topdown-csv
    ├── mock.py
    └── topdown-csv.py
├── toplev
├── toplev.ico
├── toplev.man
├── toplev.py
├── topology
├── tsx_metrics.py
├── ucevent
    ├── CHECK-ALL
    ├── CHECK-DERIVED
    ├── JKT
    ├── MOCK-ALL
    ├── Makefile
    ├── README.md
    ├── RUN-ALL
    ├── SANITY-ALL
    ├── bdx_extra.py
    ├── bdx_uc.py
    ├── bdxde_extra.py
    ├── bdxde_uc.py
    ├── dygraph-out.html
    ├── hsx_extra.py
    ├── hsx_uc.py
    ├── icx_extra.py
    ├── icx_uc.py
    ├── ivt_extra.py
    ├── ivt_uc.py
    ├── jkt_extra.py
    ├── jkt_uc.py
    ├── md2hman.py
    ├── patches-3.10
    │   ├── 0001-perf-Use-hrtimers-for-event-multiplexing.patch
    │   ├── 0002-perf-Add-sysfs-entry-to-adjust-multiplexing-interval.patch
    │   ├── 0003-perf-uncore-qpi-filter.patch
    │   ├── 0004-per-socket-fix.patch
    │   ├── 0005-support-pcu-extsel.patch
    │   ├── 0006-add-masks.patch
    │   └── README
    ├── patches-3.16
    │   ├── 0001-perf-x86-intel-Add-Haswell-EP-uncore-support.patch
    │   ├── 0002-perf-x86-uncore-register-the-PMU-only-if-the-uncore-.patch
    │   ├── 0004-perf-x86-uncore-Add-missing-cbox-filter-flags-on-Ivy.patch
    │   ├── 0005-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy-Ivy-H.patch
    │   └── 0006-fixup-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy.patch
    ├── sanity-run.py
    ├── skx_extra.py
    ├── skx_uc.py
    ├── ucaux.py
    ├── ucevent.man
    ├── ucevent.py
    ├── ucexpr.py
    ├── ucmsg.py
    ├── uctester
    └── uctopy.pl
├── utilized.py
├── wl-bottlenecks
└── workloads
    ├── BC1s
    ├── BC2s
    ├── CALC10s
    ├── CALC1s
    ├── CLANG10s
    ├── CLTRAMP3D
    ├── COMPILE10s
    ├── EMACS1s
    ├── GCCTRAMP3D
    ├── GITGREP
    ├── GREP
    ├── GUILE1s
    ├── GZIP
    ├── MEMHOG
    ├── PERL1s
    ├── PHP1s
    ├── PYTHON1s
    ├── XZ
    └── ZSTD


/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | name: "CodeQL"
 7 | 
 8 | on:
 9 |   push:
10 |     branches: [master]
11 |   pull_request:
12 |     # The branches below must be a subset of the branches above
13 |     branches: [master]
14 |   schedule:
15 |     - cron: '0 6 * * 5'
16 | 
17 | jobs:
18 |   analyze:
19 |     name: Analyze
20 |     runs-on: ubuntu-latest
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         # Override automatic language detection by changing the below list
26 |         # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
27 |         language: ['python', 'cpp']
28 |         # Learn more...
29 |         # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
30 | 
31 |     steps:
32 |     - name: Checkout repository
33 |       uses: actions/checkout@v2
34 |       with:
35 |         # We must fetch at least the immediate parents so that if this is
36 |         # a pull request then we can checkout the head.
37 |         fetch-depth: 2
38 | 
39 |     # Initializes the CodeQL tools for scanning.
40 |     - name: Initialize CodeQL
41 |       uses: github/codeql-action/init@v2
42 |       with:
43 |         languages: ${{ matrix.language }}
44 |         # If you wish to specify custom queries, you can do so here or in a config file.
45 |         # By default, queries listed here will override any specified in a config file. 
46 |         # Prefix the list here with "+" to use these queries and those in the config file.
47 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
48 | 
49 |     # ℹ️ Command-line programs to run using the OS shell.
50 |     # 📚 https://git.io/JvXDl
51 | 
52 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
53 |     #    and modify them (or add more) to build your code if your project
54 |     #    uses a compiled language
55 | 
56 |     - run: |
57 |        cd jevents
58 |        make
59 | 
60 |     - name: Perform CodeQL Analysis
61 |       uses: github/codeql-action/analyze@v2
62 | 


--------------------------------------------------------------------------------
/.github/workflows/jevents.yml:
--------------------------------------------------------------------------------
 1 | name: jevents build and test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master, test* ]
 6 |   pull_request:
 7 |     branches: [ master, test* ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - name: make
17 |       run: |
18 |         cd jevents ; make
19 |     - name: tester
20 |       run: |
21 |         ./event_download.py GenuineIntel-6-55-4
22 |         cd jevents ; EVENTMAP=GenuineIntel-6-55-4 ./tester
23 | 


--------------------------------------------------------------------------------
/.github/workflows/python-old.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Old Python linting and testing
 5 | on: 
 6 |   push:
 7 |     branches: [ master, test* ]
 8 |   pull_request:
 9 |     branches: [ master, test* ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-20.04
15 |     container:
16 |       image: python:2.7.18-buster
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 |     - name: Set up Python ${{ matrix.python-version }}
21 |       uses: actions/setup-python@v4
22 |       with:
23 |         python-version: ${{ matrix.python-version }}
24 |     - name: Install dependencies
25 |       run: |
26 |         python -m pip install --upgrade pip
27 |         pip install flake8 pytest
28 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29 |     - name: Lint with flake8
30 |       run: |
31 |         # stop the build if there are Python syntax errors or undefined names
32 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 |         flake8 --exclude '*_uc.py,*_ratios.py'  --ignore=E302,E128,E741,E251,E305,E221,E127,E126,E501,E401,E231,E201,E261,E731,E122,E131,E226,W504,E402,E301,W503,E265,E306,E227,E306,W391,E202,E241
34 |     - name: Run toplev tests
35 |       run: |
36 |         if ! grep -q GenuineIntel /proc/cpuinfo ; then export NATIVE_ARGS="--force-cpu skl" ; fi
37 |         WRAP=python PERF=./fake-perf.py NORES=1 ./tl-tester
38 |     - name: Run ocperf tests
39 |       run: |
40 |         python ./event_download.py -a
41 |         WRAP=python PERF=./fake-perf.py NORES=1 ./tester
42 |     - name: Run other tests
43 |       run: |
44 |         pip install matplotlib
45 |         WRAP=python PERF=./fake-perf.py NORES=1 ./other-tester
46 |     - name: Run ucevent tests
47 |       run: |
48 |         set -e
49 |         cd ucevent
50 |         for i in jkt ivt hsx bdxde bdx skx ; do FORCECPU=$i WRAP=python MOCK=1 NORES=1 ./uctester ; done
51 |         # XXX WRAP=python MOCK=1 NORES=1 ./RUN-ALL # need to avoid GNU parallel
52 |         #
53 |     - name: Run parser tests
54 |       run: |
55 |         cd parser
56 |         ./tester
57 | 
58 |          
59 | 


--------------------------------------------------------------------------------
/.github/workflows/python.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python linting and testing
 5 | on:
 6 |   push:
 7 |     branches: [ master, test* ]
 8 |   pull_request:
 9 |     branches: [ master, test* ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         python-version: [3.8, 3.9, 3.10.8, 3.11, 3.12, 3.13]
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v4
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v4
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 |     - name: Install dependencies
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         pip install flake8 pytest mypy
29 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 |         sudo apt install -y shellcheck
31 |     - name: Lint with flake8
32 |       run: |
33 |         ./FLAKE8
34 |     - name: Type check with mypy
35 |       run: |
36 |         ./MYPY
37 |     - name: Run shell check
38 |       run: |
39 |         shellcheck  -e SC2086,SC2012 -x tl-tester tester other-tester parallel-tester all-tester jevents/tester  ucevent/uctester
40 |     - name: Run toplev tests
41 |       run: |
42 |         if ! grep -q GenuineIntel /proc/cpuinfo ; then export NATIVE_ARGS="--force-cpu skl" ; fi
43 |         WRAP=python PERF=./fake-perf.py NORES=1 ./tl-tester
44 |     - name: Run ocperf tests
45 |       run: |
46 |         python ./event_download.py -a
47 |         WRAP=python PERF=./fake-perf.py NORES=1 ./tester
48 |     - name: Run other tests
49 |       run: |
50 |         WRAP=python PERF=./fake-perf.py NORES=1 ./other-tester
51 |     - name: Run ucevent tests
52 |       run: |
53 |         set -e
54 |         cd ucevent
55 |         for i in jkt ivt hsx bdxde bdx skx ; do FORCECPU=$i WRAP=python MOCK=1 NORES=1 ./uctester ; done
56 |         # XXX WRAP=python MOCK=1 NORES=1 ./RUN-ALL # need to avoid GNU parallel
57 |         #
58 |     - name: Run parser tests
59 |       run: |
60 |         cd parser
61 |         ./tester
62 | 
63 |          
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.o
 3 | jevents/event-rmap
 4 | jevents/examples/addr
 5 | jevents/examples/jestat
 6 | jevents/examples/rtest
 7 | jevents/examples/rtest2
 8 | jevents/examples/rtest3
 9 | jevents/examples/ptself
10 | jevents/libjevents.a
11 | jevents/listevents
12 | jevents/showevent
13 | perf.data
14 | perf.data.old
15 | 


--------------------------------------------------------------------------------
/FLAKE8:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 | export PATH=$PATH:~/.local/bin
4 | # stop the build if there are Python syntax errors or undefined names
5 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
6 | flake8 --exclude '*_uc.py,*_ratios.py'  --ignore=E302,E128,E741,E251,E305,E221,E127,E126,E501,E401,E231,E201,E261,E731,E122,E131,E226,W504,E402,E301,W503,E265,E306,E227,E306,W391,E202,E241
7 | 


--------------------------------------------------------------------------------
/MYPY:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | EXTRA="--follow-imports skip" # for now
4 | 
5 | export PATH=$PATH:~/.local/bin
6 | # XXX ocperf, models, interval*, tl-*
7 | exec mypy --check-untyped-defs $EXTRA toplev.py tl_output.py tl_io.py tl_cpu.py tl_stat.py tl_uval.py "$@"
8 | 
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all:
 3 | 	@echo "Nothing to compile here."
 4 | 	
 5 | graphs: ivb-model.svg simple-model.svg
 6 | 
 7 | ivb.dot: gen-dot.py ivb_client_ratios.py
 8 | 	./gen-dot.py > $@
 9 | simple.dot: gen-dot.py simple_ratios.py
10 | 	./gen-dot.py simple > $@
11 | 
12 | ivb-model.svg: ivb.dot
13 | 	dot -Tsvg $^ > $@
14 | ivb-model.png: ivb.dot
15 | 	dot -Tpng $^ > $@
16 | simple-model.svg: simple.dot
17 | 	dot -Tsvg $^ > $@
18 | simple-model.png: simple.dot
19 | 	dot -Tpng $^ > $@
20 | 
21 | man: toplev.man
22 | 
23 | toplev.man: toplev.py
24 | 	help2man -N ./toplev.py > toplev.man
25 | 
26 | clean:
27 | 	rm -f simple.dot ivb.dot ivb-model.svg simple-model.svg simple.png ivb-model.png
28 | 
29 | 


--------------------------------------------------------------------------------
/PYLINT:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | pylint -d too-many-arguments,too-many-instance-attributes,import-outside-toplevel,consider-using-f-string,consider-using-with,global-statement,redundant-u-string-prefix,C0301,C0116,invalid-name,multiple-imports,missing-module-docstring,missing-class-docstring,redefined-outer-name,too-few-public-methods,attribute-defined-outside-init,multiple-statements $(ls *.py | grep -v ratios)
3 | 


--------------------------------------------------------------------------------
/RERUN:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # rerun last toplev line from tl-tester log, optionally with a prefix (like python -mpdb)
 3 | # tl-tester >& log
 4 | # RERUN log python -mpdb
 5 | if [ "x$1" = "x--notltester" ] ; then
 6 | 	shift
 7 | else
 8 | 	export TL_TESTER=1
 9 | fi
10 | L="$1"
11 | shift
12 | T=$(grep Line.*toplev "$L"  | tail -1 | sed 's/Line [0-9]* //')
13 | echo "$@" $T
14 | eval "$@" $T
15 | 


--------------------------------------------------------------------------------
/all-tester:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # one stop shop to test all pmu-tools test suites
 3 | # to run it faster use parallel-tester
 4 | # each line must be self contained!
 5 | 
 6 | set -e
 7 | set -x
 8 | 
 9 | # test ocperf
10 | ./tester
11 | WRAP=python3 ./tester
12 | 
13 | # test toplev
14 | ./tl-tester
15 | WRAP=python3 ./tl-tester
16 | MATCH="icl" DCPU="icl --no-group" NOGROUP=1 NORES=1 LOAD=./workloads/BC1s ./tl-tester
17 | MATCH=snb DCPU=snb ./tl-tester
18 | MATCH=hsw DCPU=hsw ./tl-tester
19 | MATCH=slm DCPU=slm NORES=1 NOGROUP=1 LOAD=./workloads/BC1s ./tl-tester
20 | MATCH=knl DCPU=knl NORES=1 NOGROUP=1 LOAD=./workloads/BC1s ./tl-tester
21 | 
22 | # test other tools
23 | ./other-tester
24 | WRAP=python3 ./other-tester
25 | 
26 | # test jevents
27 | cd jevents ; make && ./tester ; cd ..
28 | 
29 | # test perf.data parser
30 | # this requires construct and pandas and some other libraries
31 | # XXX check for missing dependencies
32 | #cd parser
33 | #tester
34 | #cd ..
35 | 


--------------------------------------------------------------------------------
/cleanlogs:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # clean up tl-tester logs after failures
 3 | rm -f log[0-9]*
 4 | rm -f log-[0-9]*
 5 | rm -f logsum[0-9]*
 6 | rm -f log-all*
 7 | rm -f log-combined*
 8 | rm -f val[0-9]*.csv
 9 | rm -f x*.png
10 | rm -f x*.csv
11 | rm -f y*.csv
12 | rm -f y*.out
13 | rm -f x*.csv.gz
14 | rm -f y*.out.gz
15 | rm -f x*.csv.xz
16 | rm -f j*.json
17 | rm -f out*-{core,global,socket,thread}
18 | rm -f perf.data.*.old
19 | rm -f tlog[0-9]*
20 | rm -f tltest*_*
21 | rm -f perf.data*
22 | rm -f perf*.data.*.old
23 | rm -f perf[osv]*.csv
24 | rm -f val.[0-9]*.csv
25 | rm -f x*.xlsx
26 | rm -f script[0-9]*
27 | rm -f toplev[ospv]*.csv toplevm[0-9]*
28 | rm -f nflog[0-9]*
29 | rm -f toplevs*-cpu*
30 | 


--------------------------------------------------------------------------------
/counterdiff.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # counterdiff.py < plog program ..      (or general perf arguments)
 3 | # verify plog.* output from toplev by running event one by one
 4 | # this can be used to estimate multiplexing measurement errors
 5 | from __future__ import print_function
 6 | import sys, os
 7 | 
 8 | def run(x):
 9 |     print(x)
10 |     os.system(x)
11 | 
12 | for l in sys.stdin:
13 |     if l.find(",") < 0:
14 |         continue
15 |     n = l.strip().split(",")
16 |     run("perf stat --output l -x, -e %s %s" %
17 |         (n[1], " ".join(sys.argv[1:])))
18 |     f = open("l", "r")
19 |     for i in f:
20 |         if i.find(",") < 0:
21 |             continue
22 |         j = i.strip().split(",")
23 |         break
24 |     f.close()
25 |     if float(n[0]) > 0:
26 |         delta = (float(j[0]) - float(n[0])) / float(n[0])
27 |     else:
28 |         delta = 0
29 |     print(n[1], j[0], n[0], "%.2f" % (delta * 100.0))
30 | 


--------------------------------------------------------------------------------
/cpumap.sh:
--------------------------------------------------------------------------------
 1 | declare -A cpus
 2 | cpus[nhm-ex]=GenuineIntel-6-2E
 3 | cpus[nhm-ep]=GenuineIntel-6-1E
 4 | cpus[nhm-ep]=GenuineIntel-6-1A
 5 | cpus[wsm-ex]=GenuineIntel-6-2F
 6 | cpus[wsm-sp]=GenuineIntel-6-25
 7 | cpus[wsm-dp]=GenuineIntel-6-2C
 8 | cpus[snb]=GenuineIntel-6-2A
 9 | cpus[jkt]=GenuineIntel-6-2D
10 | cpus[ivt]=GenuineIntel-6-3E
11 | cpus[ivb]=GenuineIntel-6-3A
12 | cpus[hsw]=GenuineIntel-6-45
13 | cpus[slm]=GenuineIntel-6-37
14 | cpus[bnl]=GenuineIntel-6-35
15 | cpus[bdw]=GenuineIntel-6-3D
16 | cpus[hsx]=GenuineIntel-6-3F
17 | cpus[skl]=GenuineIntel-6-5E
18 | cpus[bdw-de]=GenuineIntel-6-56
19 | cpus[bdx]=GenuineIntel-6-4F
20 | cpus[knl]=GenuineIntel-6-57
21 | cpus[skx]=GenuineIntel-6-55-4
22 | cpus[clx]=GenuineIntel-6-55-5
23 | cpus[icl]=GenuineIntel-6-7E
24 | cpus[icx]=GenuineIntel-6-6A
25 | cpus[adl]=GenuineIntel-6-9A
26 | cpus[spr]=GenuineIntel-6-8F
27 | cpus[tgl]=GenuineIntel-6-8C
28 | 


--------------------------------------------------------------------------------
/cputop:
--------------------------------------------------------------------------------
1 | cputop.py


--------------------------------------------------------------------------------
/cputop.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # query cpu topology and print all matching cpu numbers
  3 | # cputop "query" ["format"]
  4 | # query is a python expression, using variables:
  5 | # socket, core, thread, type, cpu
  6 | # or "offline" to query all offline cpus
  7 | # or "atom" or "core" to select core types
  8 | # type can be "atom" or "core"
  9 | # cpu is the cpu number
 10 | # format is a printf format with %d
 11 | # %d will be replaced with the cpu number
 12 | # format can be offline to offline the cpu or online to online
 13 | # Author: Andi Kleen
 14 | from __future__ import print_function
 15 | import sys
 16 | import os
 17 | import re
 18 | import argparse
 19 | import glob
 20 | 
 21 | def numfile(fn):
 22 |     f = open(fn, "r")
 23 |     v = int(f.read())
 24 |     f.close()
 25 |     return v
 26 | 
 27 | outstr = ""
 28 | 
 29 | def output(p, fmt):
 30 |     if fmt:
 31 |         if fmt == "taskset":
 32 |             global outstr
 33 |             if outstr:
 34 |                 outstr += ","
 35 |             else:
 36 |                 outstr += "taskset -c "
 37 |             outstr += "%d" % p
 38 |         else:
 39 |             print(fmt % (p,))
 40 |     else:
 41 |         print(p)
 42 | 
 43 | ap = argparse.ArgumentParser(description='''
 44 | query cpu topology and print all matching cpu numbers
 45 | cputop "query" ["format"]
 46 | query is a python expression, using variables:
 47 | socket, core, thread, type, cpu
 48 | type is "core" or "atom" on a hybrid system
 49 | cpu is the cpu number
 50 | or "offline" to query all offline cpus
 51 | format is a printf format with %d
 52 | %d will be replaced with the cpu number, or online/offline
 53 | to generate online/offline commands, or taskset to generate taskset command line''',
 54 | epilog='''
 55 | Examples:
 56 | print all cores on socket 0
 57 | cputop "socket == 0"
 58 | 
 59 | print all first threads in each core on socket 0
 60 | cputop "thread == 0 and socket == 0"
 61 | 
 62 | disable all second threads (disable hyper threading)
 63 | cputop "thread == 1" offline
 64 | 
 65 | reenable all offlined cpus
 66 | cputop offline online
 67 | 
 68 | print all online cpus
 69 | cputop True ''', formatter_class=argparse.RawTextHelpFormatter)
 70 | ap.add_argument('expr', help='python expression with socket/core/thread')
 71 | ap.add_argument('fmt', help='Output format string with %%d, or online/offline', nargs='?')
 72 | args = ap.parse_args()
 73 | 
 74 | if args.expr == "atom":
 75 |     args.expr = 'type == "atom"'
 76 | if args.expr == "core":
 77 |     args.expr = 'type == "core"'
 78 | 
 79 | special = {
 80 |     "offline": "echo 0 > /sys/devices/system/cpu/cpu%d/online",
 81 |     "online": "echo 1 > /sys/devices/system/cpu/cpu%d/online",
 82 | }
 83 | 
 84 | if args.fmt in special:
 85 |     args.fmt = special[args.fmt]
 86 | 
 87 | types = dict()
 88 | for fn in glob.glob("/sys/bus/event_source/devices/cpu_*/cpus"):
 89 |     typ = os.path.basename(fn.replace("/cpus", "")).replace("cpu_","")
 90 |     cpus = open(fn).read()
 91 |     for j in cpus.split(","):
 92 |         m = re.match(r'(\d+)(-\d+)?', j)
 93 |         if m is None:
 94 |             continue
 95 |         if m.group(2):
 96 |             for k in range(int(m.group(1)), int(m.group(2)[1:])+1):
 97 |                 types[k] = typ
 98 |         else:
 99 |             types[int(m.group(1))] = typ
100 | 
101 | base = "/sys/devices/system/cpu/"
102 | p = {}
103 | l = os.listdir(base)
104 | for d in l:
105 |     m = re.match(r"cpu([0-9]+)", d)
106 |     if not m:
107 |         continue
108 |     proc = int(m.group(1))
109 |     top = base + d + "/topology"
110 |     if not os.path.exists(top):
111 |         if args.expr == "offline":
112 |             output(proc, args.fmt)
113 |         continue
114 |     socket = numfile(top + "/physical_package_id")
115 |     core = numfile(top + "/core_id")
116 |     n = 0
117 |     while (socket, core, n) in p:
118 |         n += 1
119 |     p[(socket, core, n)] = proc
120 | 
121 | if args.expr == "offline":
122 |     sys.exit(0)
123 | 
124 | for j in sorted(p.keys()):
125 |     socket, core, thread = j
126 |     cpu = p[j]
127 |     type = "any"
128 |     if cpu in types:
129 |         type = types[cpu]
130 |     if eval(args.expr):
131 |         output(p[j], args.fmt)
132 | 
133 | if outstr:
134 |     print(outstr)
135 | 


--------------------------------------------------------------------------------
/csv_formats.py:
--------------------------------------------------------------------------------
  1 | # distinguish the bewildering variety of perf/toplev CSV formats
  2 | from __future__ import print_function
  3 | import sys
  4 | import re
  5 | from collections import namedtuple
  6 | 
  7 | def is_val(n):
  8 |     return re.match(r'-?[0-9.]+%?|<.*>', n) is not None
  9 | 
 10 | def is_cpu(n):
 11 |     return re.match(r'(CPU)|(S\d+(-C\d+)?)|C\d+|all', n) is not None
 12 | 
 13 | def is_socket(n):
 14 |     return re.match(r'S\d+', n) is not None
 15 | 
 16 | def is_event(n):
 17 |     return re.match(r'[a-zA-Z.-]+', n) is not None
 18 | 
 19 | def is_number(n):
 20 |     return re.match(r'\s*[0-9]+', n) is not None
 21 | 
 22 | def is_ts(n):
 23 |     return re.match(r'\s*[0-9.]+', n) is not None or n == "SUMMARY"
 24 | 
 25 | def is_unit(n):
 26 |     return re.match(r'(% )?[a-zA-Z]*( <)?', n) is not None
 27 | 
 28 | def is_running(n):
 29 |     return is_number(n)
 30 | 
 31 | def is_enabled(n):
 32 |     return is_number(n)
 33 | 
 34 | formats = (
 35 | # 0.100997872;CPU0;4612809;;inst_retired_any_0;3491526;2.88     new perf
 36 |         (is_ts, is_cpu, is_val, is_unit, is_event, is_enabled, is_running),
 37 | # 1.354075473,0,cpu-migrations                            old perf w/o cpu
 38 |         (is_ts, is_val, is_event),
 39 | # 1.354075473,CPU0,0,cpu-migrations                          old perf w/ cpu
 40 |         (is_ts, is_cpu, is_val, is_event),
 41 | # 0.799553738,137765150,,branches                              new perf with unit
 42 |         (is_ts, is_val, is_unit, is_event),
 43 | # 0.799553738,CPU1,137765150,,branches                  new perf with unit and cpu
 44 |         (is_ts, is_cpu, is_val, is_unit, is_event),
 45 | # 0.100879059,402.603109,,task-clock,402596410,100.00    new perf with unit without cpu and stats
 46 |         (is_ts, is_val, is_unit, is_event, is_running, is_enabled),
 47 | # 1.001131873,S0,Backend_Bound.Memory_Bound,13.3,% Slots <,,,0.0,3.0,,
 48 | # 0.200584389,0,FrontendBound.Branch Resteers,15.87%,above,"",  toplev w/ cpu
 49 |         (is_ts, is_cpu, is_event, is_val, is_unit),
 50 | # 1.001365014,CPU2,1819888,,instructions,93286388,100.00      new perf w/ unit w/ cpu and stats
 51 |         (is_ts, is_cpu, is_val, is_unit, is_event, is_running, is_enabled),
 52 | # 0.609113353,S0,4,405.454531,,task-clock,405454468,100.00      perf --per-socket with cores
 53 |         (is_ts, is_socket, is_number, is_val, is_unit, is_event, is_running, is_enabled),
 54 | # 0.806231582,S0,4,812751,,instructions                  older perf --per-socket w/ cores w/o stats
 55 |         (is_ts, is_socket, is_number, is_val, is_unit, is_event),
 56 | # 0.936482669,C1-T0,Frontend_Bound.Frontend_Latency.ITLB_Misses,0.39,%below,,itlb_misses.walk_completed,,
 57 | # 0.301553743,C1,Retiring,31.81,%,,,,
 58 | # 0.200584389,FrontendBound.Branch Resteers,15.87%,above,"",    toplev single thread
 59 |         (is_ts, is_event, is_val),
 60 | )
 61 | 
 62 | fmtmaps = {
 63 |     is_ts: 0,
 64 |     is_cpu: 1,
 65 |     is_event: 2,
 66 |     is_val: 3,
 67 |     is_enabled: 4,
 68 |     is_running: 5,
 69 |     is_unit: 6
 70 | }
 71 | 
 72 | Row = namedtuple('Row', ['ts', 'cpu', 'ev', 'val', 'enabled', 'running', 'unit'])
 73 | 
 74 | def check_format(fmt, row):
 75 |     if all([x(n.strip()) for (x, n) in zip(fmt, row)]):
 76 |         vals = [None] * 7
 77 |         for i, j in enumerate(fmt):
 78 |             if j in fmtmaps:
 79 |                 vals[fmtmaps[j]] = row[i]
 80 |         r = Row._make(vals)
 81 |         return r
 82 |     return False
 83 | 
 84 | fmt_cache = formats[0]
 85 | 
 86 | def parse_csv_row(row, error_exit=False):
 87 |     if len(row) == 0:
 88 |         return None
 89 |     global fmt_cache
 90 |     r = check_format(fmt_cache, row)
 91 |     if r:
 92 |         return r
 93 |     for fmt in formats:
 94 |         r = check_format(fmt, row)
 95 |         if r:
 96 |             fmt_cache = fmt
 97 |             return r
 98 |     if row[0].startswith("#"):    # comment
 99 |         return None
100 |     if ".csv" in row[0]:          # fake-perf output
101 |         return None
102 |     if "Timestamp" in row[0]:
103 |         return None
104 |     print("PARSE-ERROR", row, file=sys.stderr)
105 |     if error_exit:
106 |         sys.exit(1)
107 |     return None
108 | 
109 | if __name__ == '__main__':
110 |     def check(l, fields):
111 |         n = l.split(",")
112 |         r = parse_csv_row(n)
113 |         assert r is not None
114 |         rd = r._asdict()
115 |         for a, v in fields.items():
116 |             assert rd[a] == n[v]
117 | 
118 |     check('1.001131873,S0,Backend_Bound.Memory_Bound,13.3,% Slots <,,,0.0,3.0,,', {
119 |             "ts": 0,
120 |             "cpu": 1,
121 |             "ev": 2,
122 |             "val": 3,
123 |             "unit": 4 })
124 | 


--------------------------------------------------------------------------------
/dummyarith.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # dummy arithmetic type without any errors, for collecting
 3 | # the events from the model. Otherwise divisions by zero cause
 4 | # early exits
 5 | 
 6 | class DummyArith(object):
 7 |     def __add__(self, o):
 8 |         return self
 9 |     __sub__ = __add__
10 |     __mul__ = __add__
11 |     __div__ = __add__
12 |     __truediv__ = __add__
13 |     __rsub__ = __add__
14 |     __radd__ = __add__
15 |     __rmul__ = __add__
16 |     __rdiv__ = __add__
17 |     __rtruediv__ = __add__
18 |     def __lt__(self, o):
19 |         return True
20 |     __eq__ = __lt__
21 |     __ne__ = __lt__
22 |     __gt__ = __lt__
23 |     __ge__ = __lt__
24 |     __or__ = __add__
25 |     __and__ = __add__
26 |     __min__ = __add__
27 |     __max__ = __add__
28 | 


--------------------------------------------------------------------------------
/event-rmap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # print currently running events on cpu (default 0)
 3 | # event-rmap [cpu-num]
 4 | # xxx no extra modi for now, racy with multi plexing
 5 | from __future__ import print_function
 6 | import sys
 7 | import msr
 8 | import ocperf
 9 | from pmudef import (MSR_PEBS_ENABLE, MSR_EVNTSEL, EVENTSEL_ENABLE, EVMASK,
10 |                     EVENTSEL_CMASK,
11 |                     EVENTSEL_EDGE, EVENTSEL_ANY, EVENTSEL_INV, EVENTSEL_PC,
12 |                     MSR_IA32_FIXED_CTR_CTRL)
13 | 
14 | fixednames = (
15 |     "inst_retired.any",
16 |     "cpu_clk_unhalted.thread",
17 |     "cpu_clk_unhalted.ref_tsc"
18 | )
19 | 
20 | cpu = 0
21 | if len(sys.argv) > 1:
22 |     cpu = int(sys.argv[1])
23 | 
24 | emap = ocperf.find_emap()
25 | if not emap:
26 |     print("Unknown CPU or cannot find CPU event table")
27 | found = 0
28 | try:
29 |     pebs_enable = msr.readmsr(MSR_PEBS_ENABLE, cpu)
30 | except OSError:
31 |     pebs_enable = 0
32 | for i in range(0, 8):
33 |     try:
34 |         evsel = msr.readmsr(MSR_EVNTSEL + i, cpu)
35 |     except OSError:
36 |         break
37 |     found += 1
38 |     if evsel & EVENTSEL_ENABLE:
39 |         print("%d: %016x: " % (i, evsel), end="")
40 |         evsel &= EVMASK
41 |         if emap is None:
42 |             name = "r%04x", evsel & 0xffff
43 |         elif evsel in emap.codes:
44 |             ev = emap.codes[evsel]
45 |             if ev.msr:
46 |                 try:
47 |                     extra = msr.readmsr(ev.msr)
48 |                 except OSError:
49 |                     print("Cannot read extra MSR %x for %s" % (ev.msr, ev.name))
50 |                     continue
51 |                 for j in emap.codes.keys():
52 |                     if j == evsel and extra == emap.codes[j].msrvalue:
53 |                         print(j.name, "msr:%x" % (extra), end="")
54 |                         break
55 |                 else:
56 |                     print("no exact match for %s, msr %x value %x" % (ev.name,
57 |                                                                       ev.msr, ev.msrvalue), end="")
58 |             else:
59 |                 print(ev.name, end="")
60 |         else:
61 |             name = ""
62 |             for j in emap.codes.keys():
63 |                 if j & 0xff == evsel & 0xff:
64 |                     name += "%s[%x] " % (emap.codes[j].name, j)
65 |             if name:
66 |                 print("[no exact match] " + name, end=" ")
67 |             else:
68 |                 print("r%x" % (evsel), end=" ")
69 |         if evsel & EVENTSEL_CMASK:
70 |             print("cmask=%x" % (evsel >> 24), end=" ")
71 |         if evsel & EVENTSEL_EDGE:
72 |             print("edge=1", end=" ")
73 |         if evsel & EVENTSEL_ANY:
74 |             print("any=1", end=" ")
75 |         if evsel & EVENTSEL_INV:
76 |             print("inv=1", end=" ")
77 |         if evsel & EVENTSEL_PC:
78 |             print("pc=1", end=" ")
79 |         if pebs_enable & (1 << i):
80 |             print("precise=1", end=" ")
81 |         print()
82 | if found == 0:
83 |     print("Cannot read any MSRs")
84 | 
85 | try:
86 |     fixed = msr.readmsr(MSR_IA32_FIXED_CTR_CTRL)
87 | except OSError:
88 |     print("Cannot read fixed counter MSR")
89 |     fixed = 0
90 | for i in range(0, 2):
91 |     if fixed & (1 << (i*4)):
92 |         print("fixed %d: %s" % (i, fixednames[i]))
93 | 


--------------------------------------------------------------------------------
/event-translate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # translate raw events to names
 3 | # event-translate rXXX ...
 4 | from __future__ import print_function
 5 | import re
 6 | import sys
 7 | import ocperf
 8 | from pmudef import EVMASK, EVENTSEL_EVENT, EVENTSEL_UMASK, extra_flags
 9 | 
10 | emap = ocperf.find_emap()
11 | if not emap:
12 |     sys.exit("Unknown CPU or cannot find event table")
13 | for j in sys.argv[1:]:
14 |     m = re.match(r'r([0-9a-f]+)(:.*)?', j)
15 |     if m:
16 |         print(m.group(1))
17 |         evsel = int(m.group(1), 16)
18 |         print("%s:" % (j))
19 |         if evsel & EVMASK in emap.codes:
20 |             print(emap.codes[evsel & EVMASK].name)
21 |         elif (evsel & (EVENTSEL_EVENT|EVENTSEL_UMASK)) in emap.codes:
22 |             print(emap.codes[evsel & (EVENTSEL_EVENT|EVENTSEL_UMASK)].name, end='')
23 |             for k in extra_flags:
24 |                 if evsel & k[0]:
25 |                     m = k[0]
26 |                     en = evsel
27 |                     while (m & 1) == 0:
28 |                         m >>= 1
29 |                         en >>= 1
30 |                     print("%s=%d" % (k[1], en & m),end='')
31 |             print()
32 |         else:
33 |             print("cannot find", m.group(1))
34 |     else:
35 |         # XXX implement offcore new style events
36 |         print("cannot parse", j)
37 | 


--------------------------------------------------------------------------------
/event_download:
--------------------------------------------------------------------------------
1 | event_download.py


--------------------------------------------------------------------------------
/fake-perf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # mock perf for limited test environments
 3 | from __future__ import print_function
 4 | import sys
 5 | 
 6 | out = sys.stderr
 7 | av = sys.argv
 8 | if av[-1] == "true":
 9 |     sys.exit(0)
10 | j = 1
11 | process = True
12 | while j < len(sys.argv):
13 |     if av[j] == "--version":
14 |         print("perf version 5.6.8", end='')
15 |         break
16 |     elif av[j] == "-o" and process:
17 |         j += 1
18 |         out = open(av[j], "w")
19 |     elif av[j] == "--":
20 |         process = False
21 |     j += 1
22 | out.write("\n")
23 | 


--------------------------------------------------------------------------------
/frequency.py:
--------------------------------------------------------------------------------
 1 | nominal_freq = 1.0
 2 | 
 3 | class Frequency:
 4 |     name = "Frequency"
 5 |     desc = " Frequency ratio"
 6 |     subplot = "Frequency"
 7 |     domain = "CoreMetric"
 8 | 
 9 |     def compute(self, EV):
10 |         try:
11 |             self.val = (EV("cycles", 1) / EV("CPU_CLK_UNHALTED.REF_TSC", 1)) * nominal_freq
12 |         except ZeroDivisionError:
13 |             self.val = 0
14 | 
15 | class SetupCPU:
16 |     def __init__(self, r, cpu):
17 |         global nominal_freq
18 |         if cpu.freq:
19 |             nominal_freq = cpu.freq
20 |         r.force_metric(Frequency())
21 | 


--------------------------------------------------------------------------------
/gen-dot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # generate dot diagram of top down tree from module
 3 | from __future__ import print_function
 4 | import sys
 5 | 
 6 | max_level = 5
 7 | first = 1
 8 | if sys.argv[1:] and sys.argv[1][:2] == "-l":
 9 |     max_level = int(sys.argv[1][2:])
10 |     first += 1
11 | if len(sys.argv) > first and sys.argv[first] == "simple":
12 |     import simple_ratios
13 |     m = simple_ratios
14 | else:
15 |     import adl_glc_ratios
16 |     m = adl_glc_ratios
17 | 
18 | def has(obj, name):
19 |     return name in obj.__class__.__dict__
20 | 
21 | class Runner:
22 |     def __init__(self):
23 |         self.olist = []
24 | 
25 |     def run(self, n):
26 |         if n.level <= max_level:
27 |             self.olist.append(n)
28 | 
29 |     def metric(self, n):
30 |         pass
31 | 
32 |     def finish(self):
33 |         for n in self.olist:
34 |             if n.level == 1:
35 |                 print('"%s";' % (n.name))
36 |             elif n.parent:
37 |                 print('"%s" -> "%s";' % (n.parent.name, n.name))
38 |             #if n.sibling:
39 |             #    print('"%s" -> "%s";' % (n.name, n.sibling.name))
40 | 
41 | runner = Runner()
42 | m.Setup(runner)
43 | print(runner.olist, file=sys.stderr)
44 | print("digraph {")
45 | print("fontname=\"Courier\";")
46 | runner.finish()
47 | print("}")
48 | 


--------------------------------------------------------------------------------
/gen_level.py:
--------------------------------------------------------------------------------
 1 | # generate levels for events from the model
 2 | # utility module for other tools
 3 | l1 = set(("Frontend_Bound", "Backend_Bound", "Retiring", "Bad_Speculation"))
 4 | 
 5 | def get_level(name):
 6 |     is_node = name in l1 or "." in name
 7 |     level = name.count(".") + 1
 8 |     if is_node:
 9 |         return level
10 |     return 0
11 | 
12 | def is_metric(name):
13 |     return get_level(name) == 0
14 | 
15 | def level_name(name):
16 |     if name.count(".") > 0:
17 |         f = name.split(".")[:-1]
18 |         n = ".".join(f)
19 |     elif is_metric(name):
20 |         return "CPU-METRIC" # XXX split
21 |     else:
22 |         n = "TopLevel"
23 |     n = n.replace(" ", "_")
24 |     return n
25 | 


--------------------------------------------------------------------------------
/genretlat:
--------------------------------------------------------------------------------
1 | genretlat.py


--------------------------------------------------------------------------------
/interval-merge:
--------------------------------------------------------------------------------
1 | interval-merge.py


--------------------------------------------------------------------------------
/interval-merge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # merge multiple --perf-output files. requires header
 3 | from __future__ import print_function
 4 | import csv
 5 | import argparse
 6 | from collections import OrderedDict, Counter
 7 | import sys
 8 | 
 9 | ap = argparse.ArgumentParser()
10 | ap.add_argument('csvfiles', nargs='+', type=argparse.FileType('r'))
11 | args = ap.parse_args()
12 | 
13 | def genkey(c, hdr, count):
14 |     k = [count]
15 |     if 'Timestamp' in hdr:
16 |         k.append(c[hdr['Timestamp']])
17 |     if 'Location' in hdr:
18 |         k.append(c[hdr['Location']])
19 |     k.append(c[hdr['Event']])
20 |     return tuple(k)
21 | 
22 | d = OrderedDict()
23 | hdr = None
24 | hdrl = None
25 | prev = Counter()
26 | for fh in args.csvfiles:
27 |     csvf = csv.reader(fh, delimiter=';')
28 |     for c in csvf:
29 |         if hdr is None:
30 |             hdrl = c
31 |             hdr = dict([(y,x) for x,y in enumerate(c)])
32 |             continue
33 |         if c[0] in ("Timestamp", "Location", "Value"):
34 |             continue
35 |         pkey = (fh, c[hdr['Timestamp']] if 'Timestamp' in hdr else None, c[hdr['Event']])
36 |         prev[pkey] += 1
37 |         key = genkey(c, hdr, prev[pkey])
38 |         try:
39 |             if key in d:
40 |                 o = d[key]
41 |                 o[hdr['Run-Time']] += float(c[hdr['Run-Time']])
42 |                 o[hdr['Enabled']] = (float(o[hdr['Enabled']]) + o[hdr['Enabled']]) / 2
43 |                 o[hdr['Value']] += float(c[hdr['Value']])
44 |             else:
45 |                 d[key] = c
46 |                 o = d[key]
47 |                 o[hdr['Value']] = float(c[hdr['Value']])
48 |                 o[hdr['Enabled']] = float(c[hdr['Enabled']])
49 |                 o[hdr['Run-Time']] = float(c[hdr['Run-Time']])
50 |         except ValueError as e:
51 |             print("cannot parse", c, e, file=sys.stderr)
52 | 
53 | csvf = csv.writer(sys.stdout, delimiter=';')
54 | csvf.writerow(hdrl)
55 | for j in d.values():
56 |     csvf.writerow(j)
57 | 


--------------------------------------------------------------------------------
/interval-normalize:
--------------------------------------------------------------------------------
1 | interval-normalize.py


--------------------------------------------------------------------------------
/interval-normalize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # convert perf stat -Ixxx -x, / toplev -Ixxx -x, output to normalized output
  3 | # this version buffers all data in memory, so it can use a lot of memory.
  4 | # t1,ev1,num1
  5 | # t1,ev2,num1
  6 | # t2,ev1,num3
  7 | # ->
  8 | # timestamp,ev1,ev2
  9 | # t1,num1,num2
 10 | # t2,num3,,
 11 | # when the input has CPU generate separate lines for each CPU (may need post filtering)
 12 | from __future__ import print_function
 13 | import sys
 14 | import csv
 15 | import argparse
 16 | import collections
 17 | import csv_formats
 18 | 
 19 | ap = argparse.ArgumentParser(description=
 20 | 'Normalize CSV data from perf or toplev. All values are printed on a single line.')
 21 | ap.add_argument('inputfile', type=argparse.FileType('r'), default=sys.stdin, nargs='?')
 22 | ap.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, nargs='?')
 23 | ap.add_argument('--cpu', nargs='?', help='Only output for this cpu')
 24 | ap.add_argument('--na', nargs='?', help='Value to use if data is not available', default="")
 25 | ap.add_argument('--error-exit', action='store_true', help='Force error exit on parse error')
 26 | ap.add_argument('--normalize-cpu', action='store_true', help='Normalize CPUs into unique columns too')
 27 | args = ap.parse_args()
 28 | 
 29 | printed_header = False
 30 | timestamp = None
 31 | 
 32 | events = collections.OrderedDict()
 33 | out = []
 34 | times = []
 35 | cpus = []
 36 | rc = csv.reader(args.inputfile)
 37 | res = []
 38 | writer = csv.writer(args.output, lineterminator='\n')
 39 | lastcpu = None
 40 | cpu = None
 41 | lineno = 1
 42 | for row in rc:
 43 |     if len(row) > 0 and (row[0] == "Timestamp" or row[0].startswith("#")):
 44 |         lineno += 1
 45 |         continue
 46 |     r = csv_formats.parse_csv_row(row, error_exit=args.error_exit)
 47 |     if r is None:
 48 |         print("at line %d" % lineno, file=sys.stderr)
 49 |         lineno += 1
 50 |         continue
 51 |     ts, cpu, ev, val = r.ts, r.cpu, r.ev, r.val
 52 | 
 53 |     if ts != timestamp or (cpu != lastcpu and not args.normalize_cpu):
 54 |         if timestamp:
 55 |             if args.cpu and cpu != args.cpu:
 56 |                 continue
 57 |             # delay in case we didn't see all headers
 58 |             # only need to do that for toplev, directly output for perf?
 59 |             # could limit buffering to save memory?
 60 |             out.append(res)
 61 |             times.append(timestamp)
 62 |             cpus.append(cpu)
 63 |             res = []
 64 |         timestamp = ts
 65 |         lastcpu = cpu
 66 | 
 67 |     if cpu is not None and args.normalize_cpu:
 68 |         ev = cpu + " " + ev
 69 | 
 70 |     # use a list for row storage to keep memory requirements down
 71 |     if ev not in events:
 72 |         events[ev] = len(res)
 73 |     ind = events[ev]
 74 |     if ind >= len(res):
 75 |         res += [None] * ((ind + 1) - len(res))
 76 |     res[ind] = val
 77 |     lineno += 1
 78 | if res and not (args.cpu and cpu != args.cpu):
 79 |     out.append(res)
 80 |     times.append(timestamp)
 81 |     cpus.append(cpu)
 82 | 
 83 | def resolve(row, ind):
 84 |     if ind >= len(row):
 85 |         return args.na
 86 |     v = row[ind]
 87 |     if v is None:
 88 |         return args.na
 89 |     return v
 90 | 
 91 | def cpulist():
 92 |     if args.normalize_cpu:
 93 |         return []
 94 |     if cpu is not None:
 95 |         return ["CPU"]
 96 |     return []
 97 | 
 98 | keys = events.keys()
 99 | writer.writerow(["Timestamp"] + cpulist() + list(keys))
100 | for row, ts, cpunum in zip(out, times, cpus):
101 |     writer.writerow([ts] +
102 |                 ([cpunum] if (cpu is not None and not args.normalize_cpu) else []) +
103 |                 ([resolve(row, events[x]) for x in keys]))
104 | 


--------------------------------------------------------------------------------
/interval-plot:
--------------------------------------------------------------------------------
1 | interval-plot.py


--------------------------------------------------------------------------------
/jevents/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY = all clean-examples all-examples install clean html man
 2 | PREFIX=$(DESTDIR)/usr/local
 3 | LIB=$(PREFIX)/lib64
 4 | BIN=$(PREFIX)/bin
 5 | INCLUDE=$(PREFIX)/include
 6 | CFLAGS := -g -fPIC -Wall -O2 -Wno-unused-result
 7 | OBJ := json.o jsmn.o jevents.o resolve.o cache.o cpustr.o rawevent.o \
 8 |        perf-iter.o interrupts.o rdpmc.o measure.o perf_event_open.o \
 9 |        session.o perf-aux.o csv.o print.o
10 | KDOC = /usr/src/linux/scripts/kernel-doc
11 | 
12 | all: libjevents.a showevent listevents event-rmap all-examples
13 | 
14 | clean-examples:
15 | 	make -C examples clean
16 | 
17 | all-examples: libjevents.a
18 | 	make -C examples
19 | 
20 | install: libjevents.a listevents showevent event-rmap
21 | 	install -d ${BIN}
22 | 	install -d ${LIB}
23 | 	install -d ${INCLUDE}
24 | 	install -m 755 listevents showevent event-rmap ${BIN}
25 | 	install -m 644 libjevents.a ${LIB}
26 | 	install -m 644 rdpmc.h jevents.h measure.h perf-iter.h jsession.h perf-record.h ${INCLUDE}
27 | 	# xxx install man page
28 | 
29 | libjevents.a: ${OBJ}
30 | 	rm -f libjevents.a
31 | 	ar q libjevents.a $^
32 | 	ranlib libjevents.a
33 | 
34 | clean: clean-examples
35 | 	rm -f ${OBJ} libjevents.a resolve showevent listfiles jevents.html rmap event-rmap.o event-rmap \
36 | 		listevents resolve-test showevent.o listevents.o
37 | 
38 | resolve: resolve.c
39 | 	$(CC) $(CFLAGS) -DTEST=1 -o $@ $^
40 | 
41 | showevent: showevent.o libjevents.a
42 | 
43 | listevents: listevents.o libjevents.a
44 | 
45 | event-rmap: event-rmap.o libjevents.a
46 | 
47 | DOCFILES := cache.c jevents.c cpustr.c rawevent.c interrupts.c measure.c rdpmc.c \
48 | 	    session.c perf-aux.c csv.c print.o jsession.h
49 | 
50 | html: jevents.html
51 | 
52 | man: jeventstmp.man
53 | 	perl -ne 's/Kernel Hacker.s Manual/jevents/; open(F,">" . $$1 . ".man") if /^\.TH "(.*?)"/; print F $$_' jeventstmp.man
54 | 
55 | jeventstmp.man: $(DOCFILES)
56 | 	${KDOC} -man ${DOCFILES} > $@
57 | 
58 | jevents.html: $(DOCFILES)
59 | 	${KDOC} -html ${DOCFILES} > $@
60 | 
61 | coverage:
62 | 	${MAKE} CFLAGS="-g --coverage" LDFLAGS="-g --coverage"
63 | 


--------------------------------------------------------------------------------
/jevents/README.md:
--------------------------------------------------------------------------------
 1 | # jevents
 2 | 
 3 | jevents is a C library to use from C programs to make access to the kernel Linux perf interface easier.
 4 | It also includes some examples to use the library.
 5 | 
 6 | ## Features
 7 | 
 8 | * Resolving symbolic event names using downloaded event files
 9 | * Reading performance counters from ring 3 in C programs,
10 | * Handling the perf ring buffer (for example to read memory addresses)
11 | 
12 | For more details see the [API reference](http://halobates.de/jevents.html) 
13 | 
14 | ## Building
15 | 
16 | 	cd jevents
17 | 	make
18 | 	sudo make install
19 | 
20 | ## Downloading event lists
21 | 
22 | Before using event lists they need to be downloaded. Use the pmu-tools
23 | event_download.py script for this.
24 | 
25 | 	% event_download.py
26 | 
27 | ## Examples
28 | 
29 | * listevents: List all named perf and JSON events
30 | * showevent: Convert JSON name or perf alias to perf format and test with perf
31 | * event-rmap: Map low level perf event to named high-level event
32 | * addr: Profile a loadable test kernel with address profiling
33 | * jstat: Simple perf stat like tool with JSON event resolution.
34 | 
35 | ## Initialization/Multithreading
36 | 
37 | Functions accessing the JSON event data load the JSON file lazily when first
38 | used. This might result in data races when multiple threads call jevent
39 | functions. In such cases the event list can be loaded from the main thread by
40 | `read_events(NULL);`.
41 | 
42 | ## self profiling 
43 | 
44 | Reading performance counters directly in the program without entering
45 | the kernel.
46 | 
47 | This is very simplified, for a real benchmark you almost certainly
48 | want some warmup, multiple iterations, possibly context switch
49 | filtering and some filler code to avoid cache effects.
50 | 
51 | ```C
52 | 	#include "rdpmc.h"
53 | 
54 | 	struct rdpmc_ctx ctx;
55 | 	unsigned long long start, end;
56 | 
57 | 	if (rdpmc_open(PERF_COUNT_HW_CPU_CYCLES, &ctx) < 0) ... error ...
58 | 	start = rdpmc_read(&ctx);
59 | 	... your workload ...
60 | 	end = rdpmc_read(&ctx);
61 | ```
62 | 
63 | /sys/devices/cpu/rdpmc must be 1.
64 | 
65 | http://halobates.de/modern-pmus-yokohama.pdf provides some
66 | additional general information on cycle counting. The techniques used
67 | with simple-pmu described there can be used with jevents too.
68 | 
69 | ## Resolving named events
70 | 
71 | Resolving named events to a perf event and set up reading from the perf ring buffer.
72 | 
73 | First run event_download.py to download a current event list for your CPU.
74 | 
75 | ```C
76 | 	#include "jevents.h"
77 | 	#include "rdpmc.h"
78 | 	#include <linux/perf_event.h>
79 | 
80 | 	struct perf_event_attr attr;
81 | 	if (resolve_event("cpu_clk_thread_unhalted.ref_xclk", &attr) < 0) {
82 | 		... error ...
83 | 	}
84 | 
85 | 	/* You can change attr, see the perf_event_open man page for details */
86 | 
87 | 	struct rdpmc_ctx ctx;
88 | 	if (rdpmc_open_attr(PERF_COUNT_HW_CPU_CYCLES, &ctx, &attr) < 0) 
89 | 		... error ...
90 | 
91 | 
92 | '''
93 | 
94 | Or alternatively use the resolve attr for sampling, set up the sampling attributes in attr, and use perf_fd_open / perf_iter_*. See examples/addr.c
95 | 


--------------------------------------------------------------------------------
/jevents/cpustr.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2014, Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  * All rights reserved.
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are met:
 8 |  *
 9 |  * 1. Redistributions of source code must retain the above copyright notice,
10 |  * this list of conditions and the following disclaimer.
11 |  *
12 |  * 2. Redistributions in binary form must reproduce the above copyright
13 |  * notice, this list of conditions and the following disclaimer in the
14 |  * documentation and/or other materials provided with the distribution.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
28 | */
29 | 
30 | #define _GNU_SOURCE 1
31 | #include <stdio.h>
32 | #include <stdlib.h>
33 | #include <cpuid.h>
34 | #include "jevents.h"
35 | 
36 | /**
37 |  * get_cpu_str - Return string describing the current CPU or NULL.
38 |  * Needs to be freed by caller.
39 |  *
40 |  * Used to store JSON event lists in the cache directory.
41 |  */
42 | char *get_cpu_str(void)
43 | {
44 | 	return get_cpu_str_type("-core", NULL);
45 | }
46 | 
47 | /**
48 |  * get_cpu_str - Return string describing the current CPU for type or NULL.
49 |  * @type: "-core" or "-uncore"
50 |  * @idstr_step: if non NULL write idstr with stepping to pointer.
51 |  * Both result and idstr_step (if non NULL) need to be freed by
52 |  * caller.
53 |  */
54 | char *get_cpu_str_type(char *type, char **idstr_step)
55 | {
56 | 	char *res;
57 | 	union {
58 | 		struct {
59 | 			unsigned b, c, d;
60 | 		} f;
61 | 		char str[13];
62 | 	} vendor;
63 | 	unsigned a, b, c, d;
64 | 	unsigned stepping, family, model;
65 | 	int n;
66 | 
67 | 	vendor.str[12] = 0;
68 | 	__cpuid(0, a, vendor.f.b, vendor.f.d, vendor.f.c);
69 | 	__cpuid(1, a, b, c, d);
70 | 	stepping = a & 0xf;
71 | 	model = (a >> 4) & 0xf;
72 | 	family = (a >> 8) & 0xf;
73 | 	if (family == 0xf)
74 | 		family += (a >> 20) & 0xff;
75 | 	if (family == 6 || family == 0xf)
76 | 		model += ((a >> 16) & 0xf) << 4;
77 | 	if (idstr_step)
78 | 		asprintf(idstr_step, "%s-%d-%X-%X%s", vendor.str, family,
79 | 				model, stepping, type);
80 | 	n = asprintf(&res, "%s-%d-%X%s", vendor.str, family, model, type);
81 | 	if (n < 0)
82 | 		res = NULL;
83 | 	return res;
84 | }
85 | 


--------------------------------------------------------------------------------
/jevents/csv.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: BSD-3-Clause
 2 | // Copyright 2021 Intel Corporation.
 3 | // Author: Andi Kleen
 4 | #include <stdint.h>
 5 | #include "jevents.h"
 6 | #include "jsession.h"
 7 | 
 8 | /**
 9 |  * session_print_csv - Print event list values to CSV file in perf stat format
10 |  * @outfh: File descriptor to print to.
11 |  * @el: Event list to print. It must have been measured before.
12 |  * @arg: Arguments. sep can be set there, as well as prefix.
13 |  */
14 | 
15 | void session_print_csv(FILE *outfh, struct eventlist *el, struct session_print *arg)
16 | {
17 | 	struct event *e;
18 | 	int i;
19 | 	char *sep = arg->sep ? arg->sep : ";";
20 | 
21 | 	for (e = el->eventlist; e; e = e->next) {
22 | 		uint64_t v;
23 | 		for (i = 0; i < el->num_cpus; i++) {
24 | 			if (e->efd[i].fd < 0)
25 | 				continue;
26 | 			if (arg->merge && e->orig)
27 | 				continue;
28 | 			v = event_scaled_value(e, i);
29 | 			fprintf(outfh, "%s%3d%s%s%s%lu%s%lu%s%lu\n",
30 | 				arg->prefix ? arg->prefix : "",
31 | 				i, sep,
32 | 				e->extra.name ? e->extra.name : e->event, sep,
33 | 				v, sep,
34 | 				e->efd[i].val[1], sep,
35 | 				e->efd[i].val[2]);
36 | 		}
37 | 	}
38 | }
39 | 


--------------------------------------------------------------------------------
/jevents/event-rmap.c:
--------------------------------------------------------------------------------
 1 | #include "jevents.h"
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | 
 5 | int main(int ac, char **av)
 6 | {
 7 | 	while (*++av) {
 8 | 		unsigned event = strtoul(*av, NULL, 0);
 9 | 		char *name, *desc;
10 | 		if (rmap_event(event, &name, &desc) == 0)
11 | 			printf("%x: %s : %s\n", event, name, desc);
12 | 		else
13 | 			printf("%x not found\n", event);
14 | 	}
15 | 	return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/jevents/examples/Makefile:
--------------------------------------------------------------------------------
 1 | # build jevents first
 2 | CFLAGS := -g -Wall -O2 -Wno-unused-result
 3 | CXXFLAGS := -g -Wall  -O2  -fPIC
 4 | override CFLAGS += -I ..
 5 | override LDFLAGS += -L ..
 6 | LDLIBS = -ljevents
 7 | 
 8 | all: addr rtest rtest2 rtest3 jestat ptself
 9 | 
10 | # no deps on the includes
11 | 
12 | ADDR_OBJ := addr.o hist.o cpu.o
13 | 
14 | addr: ${ADDR_OBJ} ../libjevents.a
15 | 
16 | addr: LDLIBS += -lstdc++ -ldl
17 | 
18 | rtest2: LDLIBS += -lm
19 | 
20 | rtest: rtest.o ../libjevents.a
21 | 
22 | rtest2: rtest2.o ../libjevents.a
23 | 
24 | rtest3: rtest3.o ../libjevents.a
25 | 
26 | jestat: jestat.o ../libjevents.a
27 | 
28 | clean:
29 | 	rm -f addr ${ADDR_OBJ} jestat jestat.o
30 | 	rm -f rtest3 rtest3.o rtest2 rtest2.o rtest rtest.o
31 | 


--------------------------------------------------------------------------------
/jevents/examples/cpu.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | #include <cpuid.h>
18 | #include <stdio.h>
19 | #include <stdlib.h>
20 | #include <stdbool.h>
21 | #include <linux/perf_event.h>
22 | 
23 | #include "cpu.h"
24 | #include "jevents.h"
25 | 
26 | /**  
27 |  * mem_stores_event - Return precise mem load event for current CPU.
28 |  * This is an event which supports load address monitoring.
29 |  * Return: raw event, can be put int perf_event_attr->config. 
30 |  * -1 or error.
31 |  */
32 | 
33 | unsigned mem_loads_event(void)
34 | {
35 | 	struct perf_event_attr attr;
36 | 
37 | 	if (!resolve_event("MEM_INST_RETIRED.LOAD_LATENCY_ABOVE_THRESHOLD_0", &attr) ||
38 | 	    !resolve_event("MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", &attr))
39 | 		return attr.config;
40 | 	return -1;
41 | }
42 | 
43 | /**  
44 |  * mem_stores_event - Return precise mem stores event for current CPU.
45 |  * This is an event which supports load address monitoring.
46 |  * Return: raw event, can be put int perf_event_attr->config. 
47 |  * -1 or error.
48 |  */
49 | unsigned mem_stores_event(void)
50 | {
51 | 	struct perf_event_attr attr;
52 | 
53 | 	if (!resolve_event("MEM_INST_RETIRED.ALL_STORES", &attr) ||
54 | 	    !resolve_event("MEM_UOPS_RETIRED.ALL_STORES", &attr))
55 | 		return attr.config;
56 | 	return -1;
57 | }
58 | 


--------------------------------------------------------------------------------
/jevents/examples/cpu.h:
--------------------------------------------------------------------------------
1 | unsigned mem_loads_event(void);
2 | unsigned mem_stores_event(void);
3 | 


--------------------------------------------------------------------------------
/jevents/examples/hist.cc:
--------------------------------------------------------------------------------
 1 | // STL based histogram
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <map>
 5 | #include <queue>
 6 | #include "hist.h"
 7 | 
 8 | using namespace std;
 9 | 
10 | extern "C" { 
11 | 
12 | typedef map<uint64_t, uint64_t> hist_type;
13 | 
14 | struct hist {
15 | 	hist_type hist;
16 | 	uint64_t total;
17 | };
18 | 
19 | hist *init_hist()
20 | {
21 | 	struct hist *h = new hist;
22 | 	h->total = 0;
23 | 	return h;
24 | }
25 | 
26 | void hist_add(hist *h, uint64_t val)
27 | {
28 | 	h->hist[val]++;
29 | 	h->total++;
30 | }
31 | 
32 | void hist_print(hist *h, double min_percent) 
33 | {
34 | 	unsigned long long below_thresh = 0;
35 | 	typedef pair<uint64_t, uint64_t> val_pair;
36 | 	priority_queue<val_pair> q;
37 | 
38 | 	for (hist_type::iterator it = h->hist.begin(); it != h->hist.end(); it++) { 
39 | 		double percent = (double)(it->second) / (double)h->total;
40 | 		if (percent >= min_percent) {
41 | 			val_pair p(it->second, it->first);
42 | 			q.push(p);
43 | 		} else
44 | 			below_thresh += it->second;
45 | 	}
46 | 	printf("%11s %16s %16s\n", "PERCENT", "ADDR", "SAMPLES");
47 | 	while (!q.empty()) { 
48 | 		val_pair p = q.top();
49 | 		printf("%10.2f%% %16llx %16llu\n", 
50 | 				(p.first / (double)h->total) * 100.0,
51 | 				(unsigned long long)p.second,
52 | 				(unsigned long long)p.first);
53 | 		q.pop();
54 | 	}
55 | 	printf("%llu below threshold\n", below_thresh);
56 | }
57 | 
58 | void free_hist(hist *h)
59 | {
60 | 	delete h;
61 | }
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/jevents/examples/hist.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifdef __cplusplus
 3 | extern "C" {
 4 | #endif
 5 | 
 6 | #include <stdint.h>
 7 | 
 8 | struct hist;
 9 | 
10 | struct hist *init_hist(void);
11 | void hist_add(struct hist *h, uint64_t);
12 | void hist_print(struct hist *h, double min_percent);
13 | void free_hist(struct hist *);
14 | 
15 | #ifdef __cplusplus
16 | }
17 | #endif
18 | 


--------------------------------------------------------------------------------
/jevents/examples/rtest.c:
--------------------------------------------------------------------------------
 1 | /* Demonstrate self profiling for context switches */
 2 | #include <sys/time.h>
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include "rdpmc.h"
 6 | 
 7 | #define HW_INTERRUPTS 0x1cb
 8 | 
 9 | typedef unsigned long long u64;
10 | 
11 | u64 get_time(void)
12 | {
13 | 	struct timeval tv;
14 | 	gettimeofday(&tv, NULL);
15 | 	return (u64)tv.tv_sec * 1000000 + tv.tv_usec;
16 | }
17 | 
18 | int main(int ac, char **av)
19 | {
20 | 	int i;
21 | 	int cswitch = 0;
22 | 	struct rdpmc_ctx ctx;
23 | 	int iter = 10000;
24 | 
25 | 	if (av[1])
26 | 		iter = atoi(av[1]);
27 | 	
28 | 	if (rdpmc_open(HW_INTERRUPTS, &ctx) < 0)
29 | 		exit(1);
30 | 
31 | 	u64 t0 = get_time();
32 | 	u64 prev = rdpmc_read(&ctx);
33 | 	for (i = 0; i < iter; i++) {
34 | 		u64 n = rdpmc_read(&ctx);
35 | 		if (n != prev) {
36 | 			cswitch++;
37 | 			prev = n;
38 | 		}
39 | 	}
40 | 			
41 | 	u64 t1 = get_time();
42 | 	
43 | 	printf("%d interrupts, %llu usec duration\n", cswitch, t1-t0);
44 | 
45 | 	rdpmc_close(&ctx);
46 | 	return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/jevents/examples/rtest2.c:
--------------------------------------------------------------------------------
 1 | /* Measure a thousand sins */
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <linux/perf_event.h>
 5 | #include <math.h>
 6 | #include "interrupts.h"
 7 | #include "rdpmc.h"
 8 | 
 9 | /* Requires a Intel Sandy or Ivy Bridge CPU for the interrupt test,
10 |    On others it may loop forever, unless you disable the interrupt test.
11 |    This is not a realistic test of real performance because it's too
12 |    predictable for cache and branch predictors,
13 |    see http://halobates.de/blog/p/227 */
14 | 
15 | #define ITER 1000
16 | typedef unsigned long long u64;
17 | 
18 | volatile double var = 10.0;
19 | volatile double var2;
20 | 
21 | int main(void)
22 | {
23 | 	struct rdpmc_ctx ctx;
24 | 	int warmup = 0;
25 | 		
26 | 	if (rdpmc_open(PERF_COUNT_HW_CPU_CYCLES, &ctx) < 0)
27 | 		exit(1);
28 | 	interrupts_init();
29 | 	for (;;) {
30 | 		int i;
31 | 	        u64 start_int;
32 | 		u64 a, b;
33 | 
34 | 		start_int = get_interrupts();		
35 | 		a = rdpmc_read(&ctx);
36 | 		for (i = 0; i < ITER; i++)
37 | 			var2 += sin(var);
38 | 		b = rdpmc_read(&ctx);
39 | 		if (get_interrupts() == start_int && warmup > 0) {
40 | 			printf("%u sin() took %llu cycles avg\n", ITER, (b-a)/ITER);
41 | 			break;
42 | 		}
43 | 		warmup++;
44 | 	}
45 | 	interrupts_exit();	
46 | 	rdpmc_close(&ctx);
47 | 	return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/jevents/examples/rtest3.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <sys/time.h>
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <signal.h>
 6 | #include "rdpmc.h"
 7 | 
 8 | typedef unsigned long long u64;
 9 | typedef long long s64;
10 | 
11 | u64 get_time(void)
12 | {
13 | 	struct timeval tv;
14 | 	gettimeofday(&tv, NULL);
15 | 	return (u64)tv.tv_sec * 1000000 + tv.tv_usec;
16 | }
17 | 
18 | volatile int interrupted;
19 | 
20 | void stop(int sig)
21 | {
22 | 	interrupted = 1;
23 | }
24 | 
25 | int main(int ac, char **av)
26 | {
27 | 	int i;
28 | 	struct rdpmc_ctx ctx;
29 | 	int thresh = 10000;
30 | 
31 | 	if (av[1])
32 | 		thresh = atoi(av[1]);
33 | 	
34 | 	if (rdpmc_open(0, &ctx) < 0)
35 | 		exit(1);
36 | 
37 | 	signal(SIGINT, stop);
38 | 
39 | 	printf("Press Ctrl-C to stop\n");
40 | 
41 | 	u64 prev = rdpmc_read(&ctx);
42 | 
43 | 	i = 0;
44 | 	while (!interrupted) { 
45 | 		u64 next = rdpmc_read(&ctx);
46 | 		s64 delta = next - prev;
47 | 
48 | 		if (delta > thresh)
49 | 			printf("%d: %lld\n", i, delta);
50 | 
51 | 		prev = next;
52 | 		i++;
53 | 	}
54 | 			
55 | 	rdpmc_close(&ctx);
56 | 	return 0;
57 | }
58 | 


--------------------------------------------------------------------------------
/jevents/interrupts.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2012,2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | /** DOC: Account for interrupts on Intel Core/Xeon systems
18 |  *
19 |  * This is useful for micro benchmarks to filter out measurement
20 |  * samples that are disturbed by a context switch caused by OS
21 |  * noise.
22 |  *
23 |  * Requires a Linux 3.3+ kernel
24 |  */
25 | #include "rdpmc.h"
26 | #include "interrupts.h"
27 | 
28 | /* Intel Sandy Bridge */
29 | #define HW_INTERRUPTS 0x1cb
30 | 
31 | static __thread int int_ok = -1;
32 | static __thread struct rdpmc_ctx int_ctx;
33 | 
34 | /**
35 |  * interrupts_init - Initialize interrupt counter per thread
36 |  *
37 |  * Must be called for each application thread.
38 |  */
39 | void interrupts_init(void)
40 | {
41 | 	int_ok = rdpmc_open(HW_INTERRUPTS, &int_ctx);
42 | }
43 | 
44 | /**
45 |  * interrupts_exit - Free interrupt counter per thread.
46 |  *
47 |  * Must be called for each application thread.
48 |  */
49 | void interrupts_exit(void)
50 | {
51 | 	if (int_ok >= 0)
52 | 		rdpmc_close(&int_ctx);
53 | }
54 | 
55 | /**
56 |  * get_interrupts - get current interrupt counter.
57 |  *
58 |  * Get the current hardware interrupt count. When the number changed
59 |  * for a measurement period you had some sort of context switch.
60 |  * The sample for this period should be discarded.
61 |  * This returns absolute numbers.
62 |  */
63 | unsigned long long get_interrupts(void)
64 | {
65 | 	if (int_ok >= 0)
66 | 		return rdpmc_read(&int_ctx);
67 | 	return 0;
68 | }
69 | 


--------------------------------------------------------------------------------
/jevents/interrupts.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2012,2013 Intel Corporation
 4 |  * Author: Andi Kleen
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that: (1) source code distributions
 8 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 9 |  * distributions including binary code include the above copyright notice and
10 |  * this paragraph in its entirety in the documentation or other materials
11 |  * provided with the distribution
12 |  *
13 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
14 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
15 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 |  */
17 | 
18 | #ifndef INTERRUPTS_H
19 | #define INTERRUPTS_H 1
20 | 
21 | #ifdef __cplusplus
22 | extern "C" {
23 | #endif
24 | 
25 | void interrupts_init(void);
26 | void interrupts_exit(void);
27 | unsigned long long get_interrupts(void);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/jevents/jevents.h:
--------------------------------------------------------------------------------
 1 | #ifndef JEVENTS_H
 2 | #define JEVENTS_H 1
 3 | 
 4 | #include <sys/types.h>
 5 | #include <stdbool.h>
 6 | #include <glob.h>
 7 | #include <stdio.h>
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | int json_events(const char *fn,
14 | 		int (*func)(void *data, char *name, char *event, char *desc,
15 | 			    char *pmu),
16 | 		void *data);
17 | char *get_cpu_str(void);
18 | char *get_cpu_str_type(char *type, char **idstr_step);
19 | 
20 | struct perf_event_attr;
21 | 
22 | struct jevent_extra {
23 | 	char *name;			/* output name */
24 | 	char *decoded;			/* decoded name */
25 | 	bool multi_pmu;			/* needs multiple pmus */
26 | 	glob_t pmus;			/* glob_t with all pmus */
27 | 	int next_pmu;			/* next pmu number */
28 | };
29 | 
30 | void jevent_free_extra(struct jevent_extra *extra);
31 | void jevent_copy_extra(struct jevent_extra *dst, struct jevent_extra *src);
32 | int jevent_next_pmu(struct jevent_extra *extra, struct perf_event_attr *attr);
33 | int jevent_name_to_attr(const char *str, struct perf_event_attr *attr);
34 | int jevent_name_to_attr_extra(const char *str, struct perf_event_attr *attr,
35 | 			      struct jevent_extra *extra);
36 | char *jevent_pmu_name(struct jevent_extra *extra, int num, int *next_num);
37 | int resolve_event(const char *name, struct perf_event_attr *attr);
38 | int resolve_event_extra(const char *name, struct perf_event_attr *attr,
39 | 			struct jevent_extra *extra);
40 | int read_events(const char *fn);
41 | int jevents_update_qual(const char *qual, struct perf_event_attr *attr,
42 | 			const char *str);
43 | int walk_events(int (*func)(void *data, char *name, char *event, char *desc),
44 | 		                void *data);
45 | int walk_perf_events(int (*func)(void *data, char *name, char *event, char *desc),
46 | 		     void *data);
47 | char *format_raw_event(struct perf_event_attr *attr, char *name);
48 | int rmap_event(unsigned event, char **name, char **desc);
49 | 
50 | int perf_event_open(struct perf_event_attr *attr, pid_t pid,
51 | 		    int cpu, int group_fd, unsigned long flags);
52 | char *resolve_pmu(int type);
53 | bool jevent_pmu_uncore(const char *str);
54 | int jevents_socket_cpus(int *lenp, int **socket_cpus);
55 | void jevent_print_attr(FILE *f, struct perf_event_attr *attr);
56 | 
57 | #ifdef __cplusplus
58 | }
59 | #endif
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/jevents/jsession.h:
--------------------------------------------------------------------------------
 1 | #ifndef JSESSION_H
 2 | #define JSESSION_H 1
 3 | 
 4 | #include <linux/perf_event.h>
 5 | #include <stdbool.h>
 6 | #include <stdint.h>
 7 | #include "jevents.h"
 8 | 
 9 | #ifdef __cplusplus
10 | extern "C" {
11 | #endif
12 | 
13 | struct event {
14 | 	struct event *next;
15 | 	struct perf_event_attr attr;
16 | 	char *event;
17 | 	bool end_group, group_leader, ingroup;
18 | 	bool uncore;
19 | 	struct event *orig;	/* Original event if cloned */
20 | 	int num_clones;		/* number of clones for this event */
21 | 	struct jevent_extra extra;
22 | 	struct efd {
23 | 		int fd;
24 | 		uint64_t val[3];
25 | 	} efd[0]; /* num_cpus */
26 | };
27 | 
28 | struct eventlist {
29 | 	struct event *eventlist;
30 | 	struct event *eventlist_last;
31 | 	int num_cpus;
32 | 	int num_sockets;
33 | 	int *socket_cpus;
34 | };
35 | 
36 | int parse_events(struct eventlist *el, char *events);
37 | int setup_events(struct eventlist *el, bool measure_all, int measure_pid);
38 | int setup_events_cpumask(struct eventlist *el, int measure_pid,
39 | 			 char *cpumask, int flags);
40 | int setup_event(struct event *e, int cpu, struct event *leader, bool measure_all,
41 | 		int measure_pid);
42 | int setup_event_flags(struct event *e, int cpu, struct event *leader, int measure_pid,
43 | 		      int flags);
44 | #define SE_ENABLE_ON_EXEC (1 << 0)
45 | #define SE_MEASURE_ALL    (1 << 1)
46 | 
47 | int read_event(struct event *e, int cpu);
48 | int read_all_events(struct eventlist *el);
49 | struct eventlist *alloc_eventlist(void);
50 | uint64_t event_scaled_value(struct event *e, int cpu);
51 | uint64_t event_scaled_value_sum(struct event *e, int cpu);
52 | void free_eventlist(struct eventlist *el);
53 | void print_event_list_attr(struct eventlist *el, FILE *f);
54 | 
55 | /**
56 |  * struct session_print - Arguments for printing eventlists
57 |  * @size:	size of session_print or 0 (for compatibility)
58 |  * @sep:	separator string. Only used for CSV mode. Or NULL. Default ;
59 |  * @prefix:	String prefix to print before output (e.g. timestamp).
60 |  *		Needs to include separators. Or NULL.
61 |  * @merge:	Merge identical events
62 |  */
63 | struct session_print {
64 | 	int size;	/* 0 or size for binary compatibility */
65 | 	char *sep;
66 | 	char *prefix;
67 | 	bool merge;
68 | };
69 | 
70 | void session_print_csv(FILE *outfh, struct eventlist *el, struct session_print *arg);
71 | void session_print_aggr(FILE *outfh, struct eventlist *el, struct session_print *arg);
72 | void session_print(FILE *outfh, struct eventlist *el, struct session_print *arg);
73 | void session_print_timestamp(char *buf, int bufs, double ts);
74 | #define SESSION_TIMESTAMP_LEN 30
75 | 
76 | #ifdef __cplusplus
77 | }
78 | #endif
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/jevents/jsmn.h:
--------------------------------------------------------------------------------
 1 | #ifndef __JSMN_H_
 2 | #define __JSMN_H_
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | /*
 9 |  * JSON type identifier. Basic types are:
10 |  *	o Object
11 |  *	o Array
12 |  *	o String
13 |  *	o Other primitive: number, boolean (true/false) or null
14 |  */
15 | typedef enum {
16 | 	JSMN_PRIMITIVE = 0,
17 | 	JSMN_OBJECT = 1,
18 | 	JSMN_ARRAY = 2,
19 | 	JSMN_STRING = 3
20 | } jsmntype_t;
21 | 
22 | typedef enum {
23 | 	/* Not enough tokens were provided */
24 | 	JSMN_ERROR_NOMEM = -1,
25 | 	/* Invalid character inside JSON string */
26 | 	JSMN_ERROR_INVAL = -2,
27 | 	/* The string is not a full JSON packet, more bytes expected */
28 | 	JSMN_ERROR_PART = -3,
29 | 	/* Everything was fine */
30 | 	JSMN_SUCCESS = 0
31 | } jsmnerr_t;
32 | 
33 | /*
34 |  * JSON token description.
35 |  * @param		type	type (object, array, string etc.)
36 |  * @param		start	start position in JSON data string
37 |  * @param		end		end position in JSON data string
38 |  */
39 | typedef struct {
40 | 	jsmntype_t type;
41 | 	int start;
42 | 	int end;
43 | 	int size;
44 | } jsmntok_t;
45 | 
46 | /*
47 |  * JSON parser. Contains an array of token blocks available. Also stores
48 |  * the string being parsed now and current position in that string
49 |  */
50 | typedef struct {
51 | 	unsigned int pos; /* offset in the JSON string */
52 | 	int toknext; /* next token to allocate */
53 | 	int toksuper; /* superior token node, e.g parent object or array */
54 | } jsmn_parser;
55 | 
56 | /*
57 |  * Create JSON parser over an array of tokens
58 |  */
59 | void jsmn_init(jsmn_parser *parser);
60 | 
61 | /*
62 |  * Run JSON parser. It parses a JSON data string into and array of tokens,
63 |  * each describing a single JSON object.
64 |  */
65 | jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js,
66 | 		     size_t len,
67 | 		     jsmntok_t *tokens, unsigned int num_tokens);
68 | 
69 | #ifdef __cplusplus
70 | }
71 | #endif
72 | 
73 | #endif /* __JSMN_H_ */
74 | 


--------------------------------------------------------------------------------
/jevents/json.c:
--------------------------------------------------------------------------------
  1 | /* Parse JSON files using the JSMN parser. */
  2 | 
  3 | /*
  4 |  * Copyright (c) 2014, Intel Corporation
  5 |  * All rights reserved.
  6 |  *
  7 |  * Redistribution and use in source and binary forms, with or without
  8 |  * modification, are permitted provided that the following conditions are met:
  9 |  *
 10 |  * 1. Redistributions of source code must retain the above copyright notice,
 11 |  * this list of conditions and the following disclaimer.
 12 |  *
 13 |  * 2. Redistributions in binary form must reproduce the above copyright
 14 |  * notice, this list of conditions and the following disclaimer in the
 15 |  * documentation and/or other materials provided with the distribution.
 16 |  *
 17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | */
 30 | 
 31 | #include <stdlib.h>
 32 | #include <string.h>
 33 | #include <sys/mman.h>
 34 | #include <sys/stat.h>
 35 | #include <sys/fcntl.h>
 36 | #include <stdio.h>
 37 | #include <unistd.h>
 38 | #include "jsmn.h"
 39 | #include "json.h"
 40 | #include <linux/kernel.h>
 41 | 
 42 | static char *mapfile(const char *fn, size_t *size)
 43 | {
 44 | 	struct stat st;
 45 | 	char *map = NULL;
 46 | 	int err;
 47 | 	int fd = open(fn, O_RDONLY);
 48 | 
 49 | 	if (fd < 0)
 50 | 		return NULL;
 51 | 	err = fstat(fd, &st);
 52 | 	if (err < 0)
 53 | 		goto out;
 54 | 	*size = st.st_size;
 55 | 	map = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
 56 | 	if (map == (char *)MAP_FAILED)
 57 | 		map = NULL;
 58 | out:
 59 | 	close(fd);
 60 | 	return map;
 61 | }
 62 | 
 63 | static void unmapfile(char *map, size_t size)
 64 | {
 65 | 	munmap(map, size);
 66 | }
 67 | 
 68 | /*
 69 |  * Parse json file using jsmn. Return array of tokens,
 70 |  * and mapped file. Caller needs to free array.
 71 |  */
 72 | jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len)
 73 | {
 74 | 	jsmn_parser parser;
 75 | 	jsmntok_t *tokens;
 76 | 	jsmnerr_t res;
 77 | 	unsigned sz;
 78 | 
 79 | 	*map = mapfile(fn, size);
 80 | 	if (!*map)
 81 | 		return NULL;
 82 | 	/* Heuristic */
 83 | 	sz = *size * 16;
 84 | 	tokens = calloc(1, sz);
 85 | 	if (!tokens)
 86 | 		goto error;
 87 | 	jsmn_init(&parser);
 88 | 	res = jsmn_parse(&parser, *map, *size, tokens,
 89 | 			 sz / sizeof(jsmntok_t));
 90 | 	if (res != JSMN_SUCCESS) {
 91 | 		fprintf(stderr, "%s: json error %d\n", fn, res);
 92 | 		goto error_free;
 93 | 	}
 94 | 	if (len)
 95 | 		*len = parser.toknext;
 96 | 	return tokens;
 97 | error_free:
 98 | 	free(tokens);
 99 | error:
100 | 	unmapfile(*map, *size);
101 | 	return NULL;
102 | }
103 | 
104 | void free_json(char *map, size_t size, jsmntok_t *tokens)
105 | {
106 | 	free(tokens);
107 | 	unmapfile(map, size);
108 | }
109 | 
110 | static int countchar(char *map, char c, int end)
111 | {
112 | 	int i;
113 | 	int count = 0;
114 | 	for (i = 0; i < end; i++)
115 | 		if (map[i] == c)
116 | 			count++;
117 | 	return count;
118 | }
119 | 
120 | /* Return line number of a jsmn token */
121 | int json_line(char *map, jsmntok_t *t)
122 | {
123 | 	return countchar(map, '\n', t->start) + 1;
124 | }
125 | 
126 | static const char *jsmn_types[] = {
127 | 	[JSMN_PRIMITIVE] = "primitive",
128 | 	[JSMN_ARRAY] = "array",
129 | 	[JSMN_OBJECT] = "object",
130 | 	[JSMN_STRING] = "string"
131 | };
132 | 
133 | #define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?")
134 | 
135 | /* Return type name of a jsmn token */
136 | const char *json_name(jsmntok_t *t)
137 | {
138 | 	return LOOKUP(jsmn_types, t->type);
139 | }
140 | 
141 | int json_len(jsmntok_t *t)
142 | {
143 | 	return t->end - t->start;
144 | }
145 | 
146 | /* Is string t equal to s? */
147 | int json_streq(char *map, jsmntok_t *t, const char *s)
148 | {
149 | 	unsigned len = t->end - t->start;
150 | 	return len == strlen(s) && !strncasecmp(map + t->start, s, len);
151 | }
152 | 


--------------------------------------------------------------------------------
/jevents/json.h:
--------------------------------------------------------------------------------
 1 | #ifndef JSON_H
 2 | #define JSON_H 1
 3 | 
 4 | #include "jsmn.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len);
11 | void free_json(char *map, size_t size, jsmntok_t *tokens);
12 | int json_line(char *map, jsmntok_t *t);
13 | const char *json_name(jsmntok_t *t);
14 | int json_streq(char *map, jsmntok_t *t, const char *s);
15 | int json_len(jsmntok_t *t);
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/jevents/libjevents.spec:
--------------------------------------------------------------------------------
 1 | Name:		libjevents
 2 | Version:	1
 3 | Release:	1%{?dist}
 4 | Summary:	libjevents shared library from pmu-tools
 5 | 
 6 | License:	BSD
 7 | URL:		https://github.com/andikleen/pmu-tools/jevents
 8 | # git clone https://github.com/andikleen/pmu-tools.git pmu-tools
 9 | # cd pmu-tools && tar czf jevents.tar.gz jevents/
10 | Source0:	jevents.tar.gz
11 | 
12 | %description
13 | jevents library from pmu-tools.
14 | 
15 | %prep
16 | %setup -q -n jevents
17 | 
18 | 
19 | %build
20 | %make_build PREFIX=%{buildroot}/usr
21 | 
22 | %install
23 | %make_install PREFIX=%{buildroot}/usr
24 | 
25 | %files
26 | /usr/bin/event-rmap
27 | /usr/bin/listevents
28 | /usr/bin/showevent
29 | /usr/include/*
30 | /usr/lib64/libjevents.a
31 | 
32 | %changelog
33 | 
34 | * Sat Mar 3 2018 Pablo Llopis <pablo.llopis@gmail.com> 1-1
35 | - Initial specfile version
36 | 


--------------------------------------------------------------------------------
/jevents/listevents.c:
--------------------------------------------------------------------------------
 1 | /* List all events */
 2 | /* -v print descriptions */
 3 | /* pattern  print only events matching shell pattern */
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | #include <fnmatch.h>
 8 | #include <errno.h>
 9 | #include <assert.h>
10 | #include "jevents.h"
11 | 
12 | int verbose = 0;
13 | 
14 | struct event {
15 | 	char *name;
16 | 	char *event;
17 | 	char *desc;
18 | };
19 | 
20 | struct walk_data {
21 | 	int count;
22 | 	int ind;
23 | 	char *match;
24 | 	struct event *events;
25 | };
26 | 
27 | static int count_event(void *data, char *name, char *event, char *desc)
28 | {
29 | 	struct walk_data *wd = data;
30 | 	if (wd->match && fnmatch(wd->match, name, 0))
31 | 		return 0;
32 | 	wd->count++;
33 | 	return 0;
34 | }
35 | 
36 | static int store_event(void *data, char *name, char *event, char *desc)
37 | {
38 | 	struct walk_data *wd = data;
39 | 
40 | 	if (wd->match && fnmatch(wd->match, name, 0))
41 | 		return 0;
42 | 	assert(wd->ind < wd->count);
43 | 	struct event *e = &wd->events[wd->ind++];
44 | 	e->name = strdup(name);
45 | 	e->event = strdup(event);
46 | 	e->desc = strdup(desc);
47 | 	return 0;
48 | }
49 | 
50 | static int cmp_events(const void *ap, const void *bp)
51 | {
52 | 	const struct event *a = ap;
53 | 	const struct event *b = bp;
54 | 	return strcmp(a->name, b->name);
55 | }
56 | 
57 | int main(int ac, char **av)
58 | {
59 | 	int err;
60 | 
61 | 	if (av[1] && !strcmp(av[1], "-v")) {
62 | 		av++;
63 | 		verbose = 1;
64 | 	}
65 | 
66 | 	err = read_events(NULL);
67 | 	if (err < 0) {
68 | 		fprintf(stderr, "Error reading JSON data: %s\n", strerror(errno));
69 | 		exit(1);
70 | 	}
71 | 	struct walk_data wd = { .match = av[1] };
72 | 	walk_events(count_event, &wd);
73 | 	walk_perf_events(count_event, &wd);
74 | 	wd.events = calloc(sizeof(struct event), wd.count);
75 | 	walk_events(store_event, &wd);
76 | 	err = walk_perf_events(store_event, &wd);
77 | 	if (err < 0) {
78 | 		fprintf(stderr, "Error reading perf events: %s\n", strerror(err));
79 | 		exit(1);
80 | 	}
81 | 	qsort(wd.events, wd.count, sizeof(struct event), cmp_events);
82 | 	int i;
83 | 	for (i = 0; i < wd.count; i++) {
84 | 		struct event *e = &wd.events[i];
85 | 		printf("%-40s ", e->name);
86 | 		printf("%s\n", e->event);
87 | 		if (verbose && e->desc[0])
88 | 			printf("\t%s\n", e->desc); /* XXX word wrap */
89 | 	}
90 | 	return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/jevents/measure.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2012,2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | 
18 | #ifndef MEASURE_H
19 | #define MEASURE_H 1
20 | 
21 | #include <stdio.h>
22 | 
23 | #ifdef __cplusplus
24 | extern "C" {
25 | #endif
26 | 
27 | #define N_COUNTER 4
28 | 
29 | struct measure {
30 | 	char *name;
31 | 	unsigned long long counter;
32 | 	int ratio_to; /* or -1 */
33 | 	unsigned long long (*func)(struct measure *m, 
34 | 			           unsigned long long total[N_COUNTER], int i);
35 | };
36 | 
37 | #ifdef EVENT_MACROS
38 | #define ETO(x,y) { #x, x, y }
39 | #define ETO0(x) ETO(x, 0)
40 | #define E(x) { #x, x, -1 }
41 | #define EFUNC(x,y, f) { #x, x, y, f }
42 | #endif
43 | 
44 | void measure_group_init(struct measure *g, char *name);
45 | void measure_group_start(void);
46 | void measure_group_stop(void);
47 | void measure_group_finish(void);
48 | void measure_print_all(FILE *fh);
49 | void measure_free_all(void);
50 | 
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/jevents/perf-aux.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Support for mapping the AUX buffer, e.g. for reading Intel Processor Trace
 3 |  * Copyright (c) 2020 Intel Corporation
 4 |  * Author: Andi Kleen
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that: (1) source code distributions
 8 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 9 |  * distributions including binary code include the above copyright notice and
10 |  * this paragraph in its entirety in the documentation or other materials
11 |  * provided with the distribution
12 |  *
13 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
14 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
15 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 |  */
17 | 
18 | #include <linux/perf_event.h>
19 | #include "perf-iter.h"
20 | #include <sys/mman.h>
21 | #include <stdlib.h>
22 | #include <unistd.h>
23 | 
24 | /**
25 |  * perf_aux_map - Map AUX buffer for an open perf_fd.
26 |  * @pfd: Already opened perf_fd on PMU supporting aux.
27 |  * @aux: perf_aux structure to store the mapping.
28 |  * @aux_size_shift: log 2 of mapped buffer size in pages.
29 |  * @snapshot: When true the aux buffer will run in continuous ring buffer mode and not stop on overflow.
30 |  *
31 |  * Some perf event PMUs, such as intel_pt, support an extra aux buffer to
32 |  * report raw data from the hardware. Map the AUX buffer for an already
33 |  * mapped perf_fd
34 |  *
35 |  * The aux buffer size is limited by the mlock rlimit, as well as
36 |  * /proc/sys/kernel/perf_event_mlock_kb.
37 |  *
38 |  * Returns -1 if the mapping failed, otherwise 0.
39 |  */
40 | int perf_aux_map(struct perf_fd *pfd, struct perf_aux_map *aux, int aux_size_shift,
41 | 		  bool snapshot)
42 | {
43 | 	struct perf_event_mmap_page *mp = pfd->mpage;
44 | 
45 | 	mp->aux_offset = perf_mmap_size(pfd->buf_size_shift);
46 | 	mp->aux_size = sysconf(_SC_PAGE_SIZE) << aux_size_shift;
47 | 	aux->aux_map = mmap(NULL, mp->aux_size,
48 | 			    PROT_READ | (snapshot ? 0 : PROT_WRITE),
49 | 			    MAP_SHARED,
50 | 			    pfd->pfd,
51 | 			    mp->aux_offset);
52 | 	return aux->aux_map == (void*)-1L ? -1 : 0;
53 | }
54 | 
55 | /**
56 |  * perf_aux_unmap - Unmap an aux buffer.
57 |  * @pfd: perf_fd passed to perf_aux_map.
58 |  * @aux: Aux structure to unmap.
59 |  */
60 | void perf_aux_unmap(struct perf_fd *pfd, struct perf_aux_map *aux)
61 | {
62 | 	munmap(aux->aux_map, pfd->mpage->aux_size);
63 | }
64 | 


--------------------------------------------------------------------------------
/jevents/perf-iter.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PERF_ITER_H
 2 | #define _PERF_ITER_H 1
 3 | 
 4 | #include <stdint.h>
 5 | #include <stdbool.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | struct perf_event_mmap_page;
12 | struct perf_event_header;
13 | 
14 | /* Iterator for perf ring buffer */
15 | 
16 | struct perf_iter {
17 | 	uint64_t ring_buffer_mask;
18 | 	uint64_t head, cur, raw_head, bufsize;
19 | 	int64_t avail;
20 | 	char *data;
21 | 	struct perf_event_mmap_page *mpage;
22 | };
23 | 
24 | struct perf_fd { 
25 | 	int pfd;
26 | 	struct perf_event_mmap_page *mpage;
27 | 	int buf_size_shift;
28 | };
29 | 
30 | struct perf_aux_map {
31 | 	void *aux_map;
32 | };
33 | 
34 | int perf_fd_open(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift);
35 | int perf_fd_open_other(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift,
36 | 		       int pid, int cpu);
37 | void perf_fd_close(struct perf_fd *p);
38 | void perf_iter_continue(struct perf_iter *iter);
39 | struct perf_event_header *perf_buffer_read(struct perf_iter *iter, void *buffer, int bufsize);
40 | void perf_iter_init(struct perf_iter *iter, struct perf_fd *pfd);
41 | int perf_enable(struct perf_fd *p);
42 | int perf_disable(struct perf_fd *p);
43 | 
44 | unsigned perf_mmap_size(int buf_size_shift);
45 | 
46 | int perf_aux_map(struct perf_fd *pfd, struct perf_aux_map *aux, int size, bool snapshot);
47 | void perf_aux_unmap(struct perf_fd *pfd, struct perf_aux_map *aux);
48 | 
49 | static inline int perf_iter_finished(struct perf_iter *iter)
50 | {
51 | 	return iter->avail <= 0;
52 | }
53 | 
54 | static inline uint64_t *perf_hdr_payload(struct perf_event_header *hdr)
55 | {
56 | 	return (uint64_t *)(hdr + 1);
57 | }
58 | 
59 | #ifdef __cplusplus
60 | }
61 | #endif
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/jevents/perf_event_open.c:
--------------------------------------------------------------------------------
 1 | /* Until glibc provides a proper stub ... */
 2 | #include <linux/perf_event.h>
 3 | #include <unistd.h>
 4 | #include <sys/syscall.h>
 5 | 
 6 | /* If someone else has a better one we use that */
 7 | 
 8 | __attribute__((weak))
 9 | int perf_event_open(struct perf_event_attr *attr, pid_t pid,
10 | 		    int cpu, int group_fd, unsigned long flags)
11 | {
12 | 	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
13 | }
14 | 


--------------------------------------------------------------------------------
/jevents/print.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: BSD-3-Clause
 2 | // Copyright 2021 Intel Corporation.
 3 | // Author: Andi Kleen
 4 | #include "jevents.h"
 5 | #include "jsession.h"
 6 | 
 7 | static void print_runtime(FILE *outfh, uint64_t *val)
 8 | {
 9 | 	if (val[1] != val[2])
10 | 		fprintf(outfh, " [%2.2f%%]", ((double)val[2] / val[1]) * 100.);
11 | }
12 | 
13 | /**
14 |  * session_print_timestamp - Print perf stat style timestamp into buffer
15 |  * @buf: String buffer. Should be SESSION_TIMESTAMP_LEN sized.
16 |  * @bufs: Buffer size.
17 |  * @ts: Timestamp
18 |  */
19 | void session_print_timestamp(char *buf, int bufs, double ts)
20 | {
21 | 	snprintf(buf, bufs, "% 12.9f\t", ts);
22 | }
23 | 
24 | /**
25 |  * session_print_aggr - Print event list values in perf stat like output aggregated
26 |  * @outfh: File descriptor to print to.
27 |  * @el: Event list to print. It must have been measured before.
28 |  * @arg: Argument. Used prefix and merge.
29 |  *
30 |  * This version aggregates values over all CPUs.
31 |  */
32 | 
33 | void session_print_aggr(FILE *outfh, struct eventlist *el, struct session_print *arg)
34 | {
35 | 	struct event *e;
36 | 	int i;
37 | 
38 | 	for (e = el->eventlist; e; e = e->next) {
39 | 		if (arg->merge && e->orig)
40 | 			continue;
41 | 
42 | 		uint64_t v = 0, val[3] = { 0, 0, 0 };
43 | 		for (i = 0; i < el->num_cpus; i++) {
44 | 			v += event_scaled_value(e, i);
45 | 			// assumes all are scaled the same way
46 | 			if (e->efd[i].val[2]) {
47 | 				val[1] += e->efd[i].val[1];
48 | 				val[2] += e->efd[i].val[2];
49 | 			}
50 | 		}
51 | 		if (val[1] == 0 && el->num_cpus > 0) {
52 | 			val[1] = e->efd[0].val[1];
53 | 			val[2] = e->efd[0].val[2];
54 | 		}
55 | 
56 | 		fprintf(outfh, "%s%-30s %'15lu", arg->prefix ? arg->prefix : "",
57 | 				e->extra.name ? e->extra.name : e->event, v);
58 | 		print_runtime(outfh, val);
59 | 		putc('\n', outfh);
60 | 	}
61 | }
62 | 
63 | /**
64 |  * session_print - Print event list values in perf stat like output.
65 |  * @outfh: File descriptor to print to.
66 |  * @el: Event list to print. It must have been measured before.
67 |  * @arg: Argument. Used prefix and merge.
68 |  *
69 |  * This version prints each CPU individually (like perf stat -A)
70 |  */
71 | void session_print(FILE *outfh, struct eventlist *el, struct session_print *arg)
72 | {
73 | 	struct event *e;
74 | 	int i;
75 | 
76 | 	for (e = el->eventlist; e; e = e->next) {
77 | 		uint64_t v;
78 | 		for (i = 0; i < el->num_cpus; i++) {
79 | 			if (e->efd[i].fd < 0)
80 | 				continue;
81 | 			if (arg->merge && e->orig)
82 | 				continue;
83 | 			v = event_scaled_value(e, i);
84 | 			fprintf(outfh, "%s%3d %-30s %'15lu", arg->prefix ? arg->prefix : "",
85 | 					i,
86 | 					e->extra.name ? e->extra.name : e->event, v);
87 | 			print_runtime(outfh, e->efd[i].val);
88 | 			putc('\n', outfh);
89 | 		}
90 | 	}
91 | }
92 | 


--------------------------------------------------------------------------------
/jevents/rawevent.c:
--------------------------------------------------------------------------------
 1 | /* Output raw events in perf form. */
 2 | /*
 3 |  * Copyright (c) 2014, Intel Corporation
 4 |  * Author: Andi Kleen
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *
10 |  * 1. Redistributions of source code must retain the above copyright notice,
11 |  * this list of conditions and the following disclaimer.
12 |  *
13 |  * 2. Redistributions in binary form must reproduce the above copyright
14 |  * notice, this list of conditions and the following disclaimer in the
15 |  * documentation and/or other materials provided with the distribution.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
29 | */
30 | 
31 | #include <linux/perf_event.h>
32 | #include <stdio.h>
33 | #include <string.h>
34 | #include <stdlib.h>
35 | #include "jevents.h"
36 | 
37 | #define BUFS 1024
38 | 
39 | /** 
40 |  * format_raw_event - Format a resolved event for perf's command line tool
41 |  * @attr: Previously resolved perf_event_attr.
42 |  * @name: Name to add to the event or NULL.
43 |  * Return a string of the formatted event. The caller must free string.
44 |  */
45 | 
46 | char *format_raw_event(struct perf_event_attr *attr, char *name)
47 | {
48 | 	char buf[BUFS];
49 | 	int off = 0;
50 | 	char *pmu;
51 | 
52 | 	pmu = resolve_pmu(attr->type);
53 | 	if (!pmu)
54 | 		return NULL;
55 | 	off = snprintf(buf, BUFS, "%s/config=%#llx", pmu, attr->config);
56 | 	free(pmu);
57 | 	if (attr->config1)
58 | 		off += sprintf(buf + off, ",config1=%#llx", attr->config1);
59 | 	if (attr->config2)
60 | 		off += sprintf(buf + off, ",config2=%#llx", attr->config2);
61 | 	if (name)
62 | 		off += snprintf(buf + off, BUFS - off, ",name=%s", name);
63 | 	off += snprintf(buf + off, BUFS - off, "/");
64 | 	return strdup(buf);
65 | }
66 | 


--------------------------------------------------------------------------------
/jevents/rdpmc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2012,2013 Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that: (1) source code distributions
  7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
  8 |  * distributions including binary code include the above copyright notice and
  9 |  * this paragraph in its entirety in the documentation or other materials
 10 |  * provided with the distribution
 11 |  *
 12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 15 |  */
 16 | 
 17 | /* Ring 3 RDPMC support */
 18 | #include <unistd.h>
 19 | #include <stdio.h>
 20 | #include <sys/mman.h>
 21 | #include <sys/fcntl.h>
 22 | #include <linux/perf_event.h>
 23 | #include <stdint.h>
 24 | #include <stdlib.h>
 25 | #include <x86intrin.h>
 26 | #include "jevents.h"
 27 | 
 28 | /**
 29 |  * DOC: Ring 3 counting for CPU performance counters
 30 |  *
 31 |  * This library allows accessing CPU performance counters from ring 3
 32 |  * using the perf_events subsystem. This is useful to measure specific
 33 |  * parts of programs (e.g. excluding initialization code)
 34 |  *
 35 |  * Requires a Linux 3.3+ kernel
 36 |  */
 37 | 
 38 | #include "rdpmc.h"
 39 | 
 40 | typedef unsigned long long u64;
 41 | 
 42 | #define rmb() asm volatile("" ::: "memory")
 43 | 
 44 | /**
 45 |  * rdpmc_open - initialize a simple ring 3 readable performance counter
 46 |  * @counter: Raw event descriptor (UUEE UU unit mask EE event)
 47 |  * @ctx:     Pointer to struct &rdpmc_ctx that is initialized
 48 |  *
 49 |  * The counter will be set up to count CPU events excluding the kernel.
 50 |  * Must be called for each thread using the counter.
 51 |  * The caller must make sure counter is suitable for the running CPU.
 52 |  * Only works in 3.3+ kernels.
 53 |  * Must be closed with rdpmc_close()
 54 |  */
 55 | 
 56 | int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx)
 57 | {
 58 | 	struct perf_event_attr attr = {
 59 | 		.type = counter > 10 ? PERF_TYPE_RAW : PERF_TYPE_HARDWARE,
 60 | 		.size = PERF_ATTR_SIZE_VER0,
 61 | 		.config = counter,
 62 | 		.sample_type = PERF_SAMPLE_READ,
 63 | 		.exclude_kernel = 1,
 64 | 	};
 65 | 	return rdpmc_open_attr(&attr, ctx, NULL);
 66 | }
 67 | 
 68 | /**
 69 |  * rdpmc_open_attr - initialize a raw ring 3 readable performance counter
 70 |  * @attr: perf struct %perf_event_attr for the counter
 71 |  * @ctx:  Pointer to struct %rdpmc_ctx that is initialized.
 72 |  * @leader_ctx: context of group leader or NULL
 73 |  *
 74 |  * This allows more flexible setup with a custom &perf_event_attr.
 75 |  * For simple uses rdpmc_open() should be used instead.
 76 |  * Must be called for each thread using the counter.
 77 |  * Must be closed with rdpmc_close()
 78 |  */
 79 | int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx,
 80 | 		    struct rdpmc_ctx *leader_ctx)
 81 | {
 82 | 	ctx->fd = perf_event_open(attr, 0, -1,
 83 | 			  leader_ctx ? leader_ctx->fd : -1, 0);
 84 | 	if (ctx->fd < 0) {
 85 | 		perror("perf_event_open");
 86 | 		return -1;
 87 | 	}
 88 | 	ctx->buf = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, MAP_SHARED, ctx->fd, 0);
 89 | 	if (ctx->buf == MAP_FAILED) {
 90 | 		close(ctx->fd);
 91 | 		perror("mmap on perf fd");
 92 | 		return -1;
 93 | 	}
 94 | 	return 0;
 95 | }
 96 | 
 97 | /**
 98 |  * rdpmc_close - free a ring 3 readable performance counter
 99 |  * @ctx: Pointer to &rdpmc_ctx context.
100 |  *
101 |  * Must be called by each thread for each context it initialized.
102 |  */
103 | void rdpmc_close(struct rdpmc_ctx *ctx)
104 | {
105 | 	close(ctx->fd);
106 | 	munmap(ctx->buf, sysconf(_SC_PAGESIZE));
107 | }
108 | 
109 | /**
110 |  * rdpmc_read - read a ring 3 readable performance counter
111 |  * @ctx: Pointer to initialized &rdpmc_ctx structure.
112 |  *
113 |  * Read the current value of a running performance counter.
114 |  * This should only be called from the same thread/process as opened
115 |  * the context. For new threads please create a new context.
116 |  */
117 | unsigned long long rdpmc_read(struct rdpmc_ctx *ctx)
118 | {
119 | 	u64 val;
120 | 	unsigned seq;
121 | 	u64 offset; 
122 | 	typeof (ctx->buf) buf = ctx->buf;
123 | 	unsigned index;
124 | 
125 | 	do {
126 | 		seq = buf->lock;
127 | 		rmb();
128 | 		index = buf->index;
129 | 		offset = buf->offset;
130 | 		if (index == 0) { /* rdpmc not allowed */
131 | 			val = 0;
132 | 			break;
133 | 		}
134 | 		val = _rdpmc(index - 1);
135 | 		rmb();
136 | 	} while (buf->lock != seq);
137 | 	return (val + offset) & 0xffffffffffff;
138 | }
139 | 
140 | 


--------------------------------------------------------------------------------
/jevents/rdpmc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2012,2013 Intel Corporation
 3 |  * Author: Andi Kleen
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that: (1) source code distributions
 7 |  * retain the above copyright notice and this paragraph in its entirety, (2)
 8 |  * distributions including binary code include the above copyright notice and
 9 |  * this paragraph in its entirety in the documentation or other materials
10 |  * provided with the distribution
11 |  *
12 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 |  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 |  */
16 | 
17 | #ifndef RDPMC_H
18 | #define RDPMC_H 1
19 | 
20 | #include <linux/perf_event.h>
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | struct rdpmc_ctx {
27 | 	int fd;
28 | 	struct perf_event_mmap_page *buf;
29 | };
30 | 
31 | int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx);
32 | int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx, 
33 | 		    struct rdpmc_ctx *leader_ctx);
34 | void rdpmc_close(struct rdpmc_ctx *ctx);
35 | unsigned long long rdpmc_read(struct rdpmc_ctx *ctx);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/jevents/showevent.c:
--------------------------------------------------------------------------------
 1 | /* Resolve perf event descriptions with symbolic names to raw perf descriptions */
 2 | #include "jevents.h"
 3 | #include <linux/perf_event.h>
 4 | #include <stdio.h>
 5 | #include <string.h>
 6 | #include <unistd.h>
 7 | #include <stdlib.h>
 8 | 
 9 | int main(int ac, char **av)
10 | {
11 | 	int test = 0;
12 | 	int ret = 0;
13 | 
14 | 	while (*++av) {
15 | 		if (!strcmp(*av, "--test")) {
16 | 			test = 1;
17 | 			continue;
18 | 		}
19 | 
20 | 		struct perf_event_attr attr;
21 | 		if (resolve_event(*av, &attr) < 0) {
22 | 			fprintf(stderr, "Cannot resolve %s\n", *av); 
23 | 			ret = 1;
24 | 			continue;
25 | 		}
26 | 		char *ev = format_raw_event(&attr, *av);
27 | 		printf("%s\n", ev);
28 | 		free(ev);
29 | 		if (test) {
30 | 			if (perf_event_open(&attr, 0, -1, -1, 0) < 0)
31 | 				perror("perf_event_open");
32 | 		}
33 | 	}
34 | 	return ret;
35 | }
36 | 


--------------------------------------------------------------------------------
/jevents/tester:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # tests for jevents
 3 | # may need executing event_download.py first to get event list for this cpu
 4 | set -e 
 5 | set -x
 6 | 
 7 | failed() {
 8 |         echo FAILED
 9 | }
10 | trap failed ERR 0
11 | 
12 | PATH=.:./examples:$PATH
13 | 
14 | 
15 | $V listevents > l$$
16 | [ "$(wc -l < l$$)" -gt 50 ]
17 | grep -q offcore_response l$$
18 | 
19 | if grep -q br_misp_retired.taken l$$ ; then
20 | 	E=br_misp_retired.taken
21 | elif grep -q br_misp_retired.near_taken l$$ ; then
22 | 	E=br_misp_retired.near_taken
23 | else
24 | 	E=instructions
25 | fi
26 | 
27 | if [ "$(ls /sys/bus/event_source/devices/cpu*/events/instructions | wc -l)" -gt 0 ] ; then
28 | $V jestat true
29 | $V jestat -e cpu-cycles,cpu_clk_unhalted.ref_tsc,$E true
30 | $V jestat -e "{cpu-cycles,cpu_clk_unhalted.ref_tsc},{$E,cache-references}" -a sleep 1
31 | $V jestat -a sleep 1
32 | $V jestat -a -e "uops_executed.thread:k" sleep 1
33 | OCR=$(grep -E '^(offcore_response|ocr)\.' l$$ | head -1  | cut -d ' ' -f 1)
34 | $V jestat -a -e "$OCR:config1=0x1" sleep 1
35 | $V jestat -A -a -I 500 cycles sleep 2
36 | 
37 | # test all events
38 | LEN=$(wc -l l$$ | awk ' { print $1 }')
39 | INC=20
40 | 
41 | # skip i915/vcs-* which often returns ENODEV for no good reason
42 | SKIP="i915-vcs"
43 | if [ ! -d /sys/bus/event_source/devices/uncore_upi_0 ] ; then
44 | 	SKIP="$SKIP|upi_"
45 | fi
46 | 
47 | for ((i = 1; i <= LEN; i += INC)) ; do
48 | 	# shellcheck disable=SC2046
49 | 	$V jestat $(nl l$$ |
50 | 		    grep -E -v "$SKIP" |
51 | 		    awk -v v=$i -v inc=$INC '$1 >= v && $1 <= v+inc { print "-e " $2 } ') -a true
52 | done
53 | 
54 | $V showevent $E
55 | 
56 | fi
57 | 
58 | $V event-rmap $E
59 | 
60 | if [ "$(ls /sys/bus/event_source/devices/cpu*/events/instructions | wc -l)" -gt 0 ] ; then
61 | $V examples/addr
62 | examples/rtest
63 | examples/rtest2
64 | fi
65 | 
66 | rm l$$
67 | 
68 | trap "" ERR 0
69 | 
70 | echo SUCCEEDED
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/jevents/util.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | #define err(x) perror(x), exit(1)
 6 | #define mb() asm volatile("" ::: "memory")
 7 | #define MB (1024*1024)
 8 | typedef unsigned long long u64;
 9 | typedef long long s64;
10 | 
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | 


--------------------------------------------------------------------------------
/knl_ratios.py:
--------------------------------------------------------------------------------
 1 | import metrics
 2 | import node
 3 | import slm_ratios as slm
 4 | 
 5 | version = "1.0"
 6 | 
 7 | slm.set_clks_event_name("CPU_CLK_UNHALTED.THREAD")
 8 | 
 9 | smt_enabled = False
10 | 
11 | class CyclesPerUop(slm.CyclesPerUop):
12 |     pass
13 | 
14 | # LEVEL 1
15 | class FrontendBound(slm.FrontendBound):
16 |     pass
17 | 
18 | class BackendBound(slm.BackendBound):
19 |     pass
20 | 
21 | class BadSpeculation(slm.BadSpeculation):
22 |     pass
23 | 
24 | class Retiring(slm.Retiring):
25 |     pass
26 | 
27 | # LEVEL 2
28 | class FrontendLatency(slm.FrontendLatency):
29 |     pass
30 | 
31 | # LEVEL 3
32 | class ICacheMisses(slm.ICacheMisses):
33 |     # Override _compute(), since KNL does not have
34 |     # the DECODE_RESTRICTION.PDCACHE_WRONG event
35 |     def _compute(self, ev):
36 |         return slm.icache_line_fetch_cost(ev, self.level)
37 | 
38 | class ITLBMisses(slm.ITLBMisses):
39 |     pass
40 | 
41 | class MSSwitches(slm.MSSwitches):
42 |     pass
43 | 
44 | class Setup(object):
45 |     def __init__(self, runner):
46 |         # Instantiate nodes as required to be able to specify their
47 |         # references
48 | 
49 |         # L3 objects
50 |         icache_misses = ICacheMisses()
51 |         itlb_misses = ITLBMisses()
52 |         ms_cost = MSSwitches()
53 | 
54 |         #L1 objects
55 |         frontend = FrontendBound()
56 |         bad_speculation = BadSpeculation()
57 |         retiring = Retiring()
58 |         backend = BackendBound(retiring=retiring,
59 |                                bad_speculation=bad_speculation,
60 |                                frontend=frontend)
61 | 
62 | 
63 |         # L2 objects
64 |         frontend_latency = FrontendLatency(icache_misses=icache_misses,
65 |                                            itlb=itlb_misses,
66 |                                            ms_cost=ms_cost,
67 |                                            frontend=frontend
68 |                                            )
69 | 
70 |         # Set parents
71 |         node.set_parent(None, [frontend, bad_speculation, retiring, backend])
72 |         node.set_parent(frontend, [frontend_latency])
73 |         node.set_parent(frontend_latency,
74 |                         [icache_misses, itlb_misses, ms_cost])
75 | 
76 |         # User visible metrics
77 |         user_metrics = [slm.Metric_IPC(), slm.Metric_CPI(),
78 |                         slm.Metric_TurboUtilization(),
79 |                         slm.Metric_CLKS(), slm.Metric_Time(),
80 |                         slm.CyclesPerUop()]
81 | 
82 |         nodes = [obj for obj in locals().values()
83 |                  if issubclass(obj.__class__, metrics.MetricBase) and
84 |                  obj.level > 0]
85 | 
86 |         nodes = sorted(nodes, key=lambda n: n.level)
87 | 
88 |         # Pass to runner
89 |         list(map(runner.run, nodes))
90 |         list(map(runner.metric, user_metrics))
91 | 


--------------------------------------------------------------------------------
/latego.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # configure latego workaround on Sandy Bridge EP
  3 | # can be run as a standalone tool or used as module
  4 | # latego enable|disable hexevent
  5 | from __future__ import print_function
  6 | import signal
  7 | import struct
  8 | import re
  9 | import os
 10 | import msr
 11 | import pci
 12 | 
 13 | busses = (0x3f, 0x7f, 0xbf, 0xff)
 14 | 
 15 | def local_direct2core(val):
 16 |     c = 0
 17 |     for b in busses:
 18 |         if pci.probe(b, 14, 0):
 19 |             pci.changebit(b, 14, 0, 0x84, 1, val)
 20 |             c += 1
 21 |     if c == 0:
 22 |         print("no local devices found")
 23 | 
 24 | def remote_direct2core(val):
 25 |     c = 0
 26 |     for b in busses:
 27 |         if pci.probe(b, 8, 0):
 28 |             pci.changebit(b, 8, 0, 0x80, 1, val)
 29 |             pci.changebit(b, 9, 0, 0x80, 1, val)
 30 |             c += 1
 31 |     if c == 0:
 32 |         print("no remote devices found")
 33 | 
 34 | def direct2core(val):
 35 |     # make sure all cores are awake when we do that
 36 |     f = os.open("/dev/cpu_dma_latency", os.O_WRONLY)
 37 |     os.write(f, struct.pack("I", 0))
 38 |     local_direct2core(val)
 39 |     remote_direct2core(val)
 40 |     os.close(f)
 41 | 
 42 | def set_bypass(val):
 43 |     msr.changebit(0x39c, 0, val)
 44 | 
 45 | bypass  = 1 << 0
 46 | d2c     = 1 << 1
 47 | latego_events = {
 48 |     0x04d1: bypass,
 49 |     0x20d1: bypass|d2c,
 50 |     0x01d3: bypass|d2c,
 51 |     0x04d3: bypass|d2c,
 52 |     0x01d2: bypass,
 53 |     0x02d2: bypass,
 54 |     0x04d2: bypass,
 55 |     0x08d2: bypass,
 56 |     0x01cd: bypass|d2c
 57 | }
 58 | 
 59 | latego_names = {
 60 |     "mem_load_uops_retired.llc_hit": 0x04d1,
 61 |     "mem_load_uops_retired.llc_miss": 0x20d1,
 62 |     "mem_load_uops_llc_miss_retired.local_dram": 0x01d3,
 63 |     "mem_load_uops_llc_miss_retired.remote_dram": 0x04d3,
 64 |     "mem_load_uops_llc_hit_retired.xsnp_miss": 0x01d2,
 65 |     "mem_load_uops_llc_hit_retired.xsnp_hit": 0x02d2,
 66 |     "mem_load_uops_llc_hit_retired.xsnp_hitm": 0x04d2,
 67 |     "mem_load_uops_llc_hit_retired.xsnp_none": 0x08d2,
 68 |     "mem_trans_retired.load_latency": 0x01cd
 69 | }
 70 | 
 71 | signal_setup = False
 72 | enabled = 0
 73 | 
 74 | def cleanup():
 75 |     if enabled & bypass:
 76 |         set_bypass(0)
 77 |     if enabled & d2c:
 78 |         direct2core(0)
 79 | 
 80 | def get_event(e):
 81 |     if re.match(r"[0-9]+", e):
 82 |         return int(e, 16)
 83 |     if e in latego_names:
 84 |         return latego_names[e]
 85 |     return e
 86 | 
 87 | def setup_event(event, val):
 88 |     global signal_setup
 89 |     global enabled
 90 |     action = ("Disabling", "Enabling")[val]
 91 |     if val and not signal_setup:
 92 |         signal.signal(signal.SIGINT, cleanup)
 93 |         signal.signal(signal.SIGPIPE, cleanup)
 94 |         signal_setup = True
 95 |     if event in latego_events:
 96 |         v = latego_events[event]
 97 |         if v & d2c:
 98 |             print("%s direct2core" % (action))
 99 |             direct2core(val)
100 |         if v & bypass:
101 |             print("%s bypass" % (action))
102 |             set_bypass(val)
103 |         if val:
104 |             enabled = v
105 |         else:
106 |             enabled = 0
107 | 
108 | if __name__ == '__main__':
109 |     import sys
110 |     if len(sys.argv) == 3 and sys.argv[1] == "enable":
111 |         setup_event(get_event(sys.argv[2]), 1)
112 |     elif len(sys.argv) == 3 and sys.argv[1] == "disable":
113 |         setup_event(get_event(sys.argv[2]), 0)
114 |     elif len(sys.argv) == 2 and sys.argv[1] == "list":
115 |         print("%-45s %04s" % ("name", "hex"))
116 |         for i in latego_names.keys():
117 |             print("%-45s %04x" % (i, latego_names[i], ))
118 |     else:
119 |         print("Usage: latego enable|disable hexevent|namedevent")
120 |         print("       latego list")
121 |         sys.exit(1)
122 | 


--------------------------------------------------------------------------------
/list-events.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # print all events in a eventmap
 3 | from __future__ import print_function
 4 | import sys
 5 | import ocperf
 6 | emap = ocperf.find_emap()
 7 | if not emap:
 8 |     sys.exit("Unknown CPU or cannot find event table")
 9 | for j in sorted(emap.events):
10 |     print(j)
11 | 


--------------------------------------------------------------------------------
/listutils.py:
--------------------------------------------------------------------------------
 1 | # generic utilities for lists
 2 | import sys
 3 | from itertools import chain
 4 | 
 5 | if sys.version_info.major == 3:
 6 |     from itertools import zip_longest
 7 | else:
 8 |     from itertools import izip_longest
 9 |     zip_longest = izip_longest
10 | 
11 | def flatten(x):
12 |     return list(chain(*x))
13 | 
14 | def filternot(p, l):
15 |     return list(filter(lambda x: not p(x), l))
16 | 
17 | # add items from b to a if not already in a
18 | def cat_unique(a, b):
19 |     aset = set(a)
20 |     add = [x for x in b if x not in aset]
21 |     return a + add
22 | 
23 | # remove duplicates without reordering
24 | def dedup(a):
25 |     l = []
26 |     prev = set()
27 |     for j in a:
28 |         if j not in prev:
29 |             l.append(j)
30 |             prev.add(j)
31 |     return l
32 | 
33 | def not_list(l):
34 |     return [not x for x in l]
35 | 
36 | # merge two dicts with appending lists
37 | def append_dict(a, b):
38 |     for k in b:
39 |         if k in a:
40 |             a[k] += b[k]
41 |         else:
42 |             a[k] = b[k]
43 | 
44 | # create dict/list with same shape as a, but filled with dummy values
45 | def dummy_dict(a, val=0.0):
46 |     return {k: [val] * len(a[k]) for k in a}
47 | 
48 | def padlist(l, length, val=0.0):
49 |     if len(l) < length:
50 |         return l + [val]*(length-len(l))
51 |     return l
52 | 
53 | def findprefix(l, prefix, stop=None):
54 |     for i, v in enumerate(l):
55 |         if v == stop:
56 |             break
57 |         if v.startswith(prefix):
58 |             return i
59 |     return -1
60 | 


--------------------------------------------------------------------------------
/msr:
--------------------------------------------------------------------------------
1 | msr.py


--------------------------------------------------------------------------------
/msr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # library and tool to access Intel MSRs (model specific registers)
 3 | # Author: Andi Kleen
 4 | from __future__ import print_function
 5 | import glob
 6 | import struct
 7 | import os
 8 | 
 9 | def writemsr(msr, val):
10 |     n = glob.glob('/dev/cpu/[0-9]*/msr')
11 |     for c in n:
12 |         f = os.open(c, os.O_WRONLY)
13 |         os.lseek(f, msr, os.SEEK_SET)
14 |         os.write(f, struct.pack('Q', val))
15 |         os.close(f)
16 |     if not n:
17 |         raise OSError("msr module not loaded (run modprobe msr)")
18 | 
19 | def readmsr(msr, cpu = 0):
20 |     f = os.open('/dev/cpu/%d/msr' % (cpu,), os.O_RDONLY)
21 |     os.lseek(f, msr, os.SEEK_SET)
22 |     val = struct.unpack('Q', os.read(f, 8))[0]
23 |     os.close(f)
24 |     return val
25 | 
26 | def changebit(msr, bit, val):
27 |     n = glob.glob('/dev/cpu/[0-9]*/msr')
28 |     for c in n:
29 |         f = os.open(c, os.O_RDWR)
30 |         os.lseek(f, msr, os.SEEK_SET)
31 |         v = struct.unpack('Q', os.read(f, 8))[0]
32 |         if val:
33 |             v = v | (1 << bit)
34 |         else:
35 |             v = v & ~(1 << bit)
36 |         os.lseek(f, msr, os.SEEK_SET)
37 |         os.write(f, struct.pack('Q', v))
38 |         os.close(f)
39 |     if not n:
40 |         raise OSError("msr module not loaded (run modprobe msr)")
41 | 
42 | if __name__ == '__main__':
43 |     import argparse
44 | 
45 |     def parse_hex(s):
46 |         try:
47 |             return int(s, 16)
48 |         except ValueError:
49 |             raise argparse.ArgumentError("Bad hex number %s" % (s))
50 | 
51 |     if not os.path.exists("/dev/cpu/0/msr"):
52 |         os.system("/sbin/modprobe msr")
53 | 
54 |     p = argparse.ArgumentParser(description='Access x86 model specific registers.')
55 |     p.add_argument('msr', type=parse_hex, help='number of the MSR to access')
56 |     p.add_argument('value', nargs='?', type=parse_hex, help='value to write (if not specified read)')
57 |     p.add_argument('--setbit', type=int, help='Bit number to set')
58 |     p.add_argument('--clearbit', type=int, help='Bit number to clear')
59 |     p.add_argument('--cpu', type=int, default=0, help='CPU to read on (writes always change all)')
60 |     args = p.parse_args()
61 |     if args.value is None and not args.setbit and not args.clearbit:
62 |         print("%x" % (readmsr(args.msr, args.cpu)))
63 |     elif args.setbit:
64 |         changebit(args.msr, args.setbit, 1)
65 |     elif args.clearbit:
66 |         changebit(args.msr, args.clearbit, 0)
67 |     else:
68 |         writemsr(args.msr, args.value)
69 | 


--------------------------------------------------------------------------------
/node.py:
--------------------------------------------------------------------------------
 1 | # Helper classes and functions for nodes
 2 | 
 3 | # Decorator class to declare reference dependecies between classes
 4 | class requires(object):
 5 |     """Decorator to mark required references. These references will
 6 |     be added to the object as instance attributes. Example:
 7 | 
 8 |     @requires("ref1", "ref2")
 9 |     class SomeClass(object):
10 |         def some_method(self):
11 |             return self.ref1 + self.ref2
12 | 
13 |     """
14 |     def __init__(self, *required_refs):
15 |         self.required_refs = required_refs
16 | 
17 |     def __call__(self, cls):
18 |         setattr(cls, "required_refs", self.required_refs)
19 |         return cls
20 | 
21 | def set_parent(parent, nodes):
22 |     for node in nodes:
23 |         node.parent = parent
24 | 
25 | # Check that all required references are set
26 | def check_refs(fn):
27 |     """Decorator to check if required references for an object
28 |     are set. If it finds missing references, it will raise an
29 |     exception. Example:
30 | 
31 |     @requires("retiring", "bad_speculation", "frontend_bound")
32 |     class BackendBound(object):
33 |         @check_refs
34 |         def _compute(self, ev):
35 |             # checks if required refs are set before executing
36 | 
37 |     """
38 |     def wrapped(self, *args, **kwargs):
39 |         if not hasattr(self, "required_refs"):
40 |             raise Exception("Missing required_refs object")
41 |         missing_refs = [ref for ref in self.required_refs
42 |                         if not hasattr(self, ref)]
43 |         if missing_refs:
44 |             raise Exception("Missing references: {0}".format(missing_refs))
45 | 
46 |         return fn(self, *args, **kwargs)
47 | 
48 |     wrapped.__name__ = fn.__name__
49 |     return wrapped
50 | 
51 | def add_references(node, **refs):
52 |     """Adds an attribute to node, as specified in the **refs argument.
53 |     Example:
54 | 
55 |     ...
56 |     backend = BackendBound()
57 |     add_references(backend, retiring=retiring, frontend_bound=frontend,
58 |                    bad_speculation=bad_speculation)
59 | 
60 |     """
61 |     for name, obj in refs.items():
62 |         setattr(node, name, obj)
63 | 


--------------------------------------------------------------------------------
/objutils.py:
--------------------------------------------------------------------------------
 1 | # generic utilities for objects
 2 | 
 3 | def has(obj, name):
 4 |     return name in obj.__class__.__dict__
 5 | 
 6 | def safe_ref(obj, name):
 7 |     if has(obj, name):
 8 |         return obj.__class__.__dict__[name]
 9 |     return None
10 | 
11 | def ref_or(obj, name, alt):
12 |     if has(obj, name):
13 |         return obj.__class__.__dict__[name]
14 |     return alt
15 | 
16 | def map_fields(obj, fields):
17 |     def map_field(name):
18 |         return safe_ref(obj, name)
19 |     return list(map(map_field, fields))
20 | 


--------------------------------------------------------------------------------
/oc-all-events:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # convert all events for testing
 3 | # needs GNU parallel
 4 | set -e
 5 | 
 6 | . ./cpumap.sh
 7 | 
 8 | cpu()
 9 | {
10 | 	export EVENTMAP=${cpus[$1]}
11 | 	./list-events.py | parallel -n1 ./ocperf.py --print stat -e  > /dev/null
12 | }
13 | 
14 | if [ "$1" != "" ] ; then
15 | 	cpu $1
16 | 	exit 0
17 | fi
18 | 
19 | cpu bnl
20 | cpu hsw
21 | cpu ivb
22 | cpu ivt
23 | cpu nhm-ep
24 | cpu nhm-ex
25 | cpu snb
26 | cpu snb-ep
27 | cpu wsm-dp
28 | cpu wsm-sp
29 | 
30 | 


--------------------------------------------------------------------------------
/ocperf:
--------------------------------------------------------------------------------
1 | ocperf.py


--------------------------------------------------------------------------------
/other-tester:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # tester for other programs in pmu-tools
  3 | # PERF=... override perf binary
  4 | # NORES=1 don't check measurement results
  5 | 
  6 | PYTHON=${PYTHON:-python3}
  7 | 
  8 | . ./cpumap.sh
  9 | set -e
 10 | PERF=${PERF:-perf}
 11 | failed() {
 12 | 	echo FAILED
 13 | }
 14 | PATH=$PATH:.
 15 | trap failed ERR 0
 16 | set -x
 17 | 
 18 | # XXX cgroup untested
 19 | for args in "" "-A" "--per-socket" "--per-core" "-r2" ; do
 20 | 
 21 | # interval-normalize.py
 22 | 
 23 | ${PERF} stat -e cycles,branches,instructions,branch-misses,context-switches,page-faults -I100 $args -a -x, -o x$$.csv sleep 1
 24 | 
 25 | $WRAP interval-normalize.py --error-exit x$$.csv
 26 | $WRAP interval-normalize.py --error-exit x$$.csv > y$$.csv
 27 | 
 28 | grep -vq PARSE-ERROR y$$.csv
 29 | if [ -z "$NORES" ] ; then
 30 | for i in branch-misses branches context-switches cycles instructions page-faults ; do
 31 | 	grep -q $i y$$.csv
 32 | done
 33 | fi
 34 | 
 35 | grep -vq PARSE-ERROR x$$.csv
 36 | 
 37 | # plot-normalized.py
 38 | $WRAP plot-normalized.py -o x$$-2.png y$$.csv
 39 | 
 40 | # interval-plot.py
 41 | $WRAP interval-plot.py x$$.csv -o x$$.png
 42 | 
 43 | done
 44 | 
 45 | DYGRAPHS=""
 46 | 
 47 | # original url http://dygraphs.com/1.0.1/dygraph-combined.js disappeared
 48 | if [ ! -r dygraph-combined.js ] && wget https://cdnjs.cloudflare.com/ajax/libs/dygraph/1.0.1/dygraph-combined.js ; then
 49 | DYGRAPHS=1
 50 | fi
 51 | 
 52 | for args in "-l2" "--all -v" "-l3 --single-thread" "--all -a -A"; do
 53 | 
 54 | FORCEHT=1 $WRAP toplev.py -v --force-cpu ${DCPU:-hsw} --nodes +CPU_Utilization -I 100 $args -o x$$.csv -x, ./workloads/BC1s
 55 | $WRAP toplev.py -v --force-cpu ${DCPU:-hsw} --nodes +CPU_Utilization -I 100 $args -o xn$$.csv -x, ./workloads/BC1s
 56 | $WRAP interval-normalize.py --error-exit < x$$.csv
 57 | $WRAP interval-normalize.py --error-exit < x$$.csv > y$$.csv
 58 | 
 59 | grep -vq PARSE-ERROR y$$.csv
 60 | [ -z "$NORES" ] && grep Frontend y$$.csv
 61 | 
 62 | $WRAP interval-normalize.py --normalize-cpu --error-exit < x$$.csv > yc$$.csv
 63 | [ -z "$NORES" ] && grep Frontend yc$$.csv
 64 | 
 65 | $WRAP interval-normalize.py --normalize-cpu --error-exit < xn$$.csv > yc$$.csv
 66 | [ -z "$NORES" ] && grep Frontend yc$$.csv
 67 | 
 68 | if grep -q CPUs x$$.csv ; then
 69 | 
 70 | $WRAP utilized.py x$$.csv -o y$$.csv
 71 | [ -z "$NORES" ] && grep Frontend y$$.csv
 72 | 
 73 | fi
 74 | 
 75 | if grep -q CPUs xn$$.csv ; then
 76 | 
 77 | $WRAP utilized.py xn$$.csv -o y$$.csv
 78 | [ -z "$NORES" ] && grep Frontend y$$.csv
 79 | 
 80 | fi
 81 | 
 82 | $WRAP interval-plot.py x$$.csv -o x$$.png
 83 | 
 84 | # plot-normalized.py
 85 | $WRAP plot-normalized.py -o x$$-2.png y$$.csv
 86 | 
 87 | # tl-serve.py
 88 | if [ -n "$DYGRAPHS" ] ; then
 89 | 	$WRAP tl-serve.py --gen tls$$ x$$.csv
 90 | 	rm -rf tls$$
 91 | fi
 92 | 
 93 | # tl-barplot.py
 94 | $WRAP tl-barplot.py x$$.csv -o x$$.png
 95 | 
 96 | rm x$$.png x$$-2.png
 97 | 
 98 | done
 99 | 
100 | $WRAP tl-serve.py x$$.csv &
101 | sleep 1
102 | unset http_proxy
103 | curl http://localhost:9001 > /dev/null
104 | kill %1
105 | sleep 1
106 | wait %1
107 | 
108 | $PYTHON csv_formats.py
109 | 
110 | rm x$$.csv xn$$.csv
111 | 
112 | # cputop.py
113 | 
114 | $WRAP cputop.py "socket == 0"
115 | $WRAP cputop.py "thread == 0 and socket == 0"
116 | $WRAP cputop.py "thread == 1" offline
117 | $WRAP cputop.py offline online
118 | [ "$($WRAP cputop.py True | wc -l | cut -d ' ' -f 1)" -eq "$(getconf _NPROCESSORS_ONLN)" ]
119 | 
120 | # list-events.py
121 | 
122 | EVENTMAP=${cpus[hsw]} $WRAP list-events.py > x$$.lst
123 | [ "$(wc -l x$$.lst | cut -d ' ' -f 1)" -gt 20 ]
124 | grep -qi rtm_retired.aborted x$$.lst
125 | rm x$$.lst
126 | 
127 | # event-translate.py
128 | EVENTMAP=${cpus[hsw]} $WRAP event-translate.py r4c9 | grep -q rtm_retired.aborted
129 | 
130 | $WRAP gen-dot.py simple > /dev/null
131 | $WRAP gen-dot.py ivb_client_ratios > /dev/null
132 | 
133 | # untested: counterdiff.py
134 | 
135 | # may need network:
136 | # untested: event_download.py
137 | 
138 | # need root:
139 | # untested: msr.py
140 | # untested: pci.py
141 | # untested: event-rmap.py
142 | 
143 | trap "" ERR 0
144 | 
145 | echo SUCCEEDED
146 | 


--------------------------------------------------------------------------------
/parallel-tester:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # run all test suites in parallel
3 | # requires GNU parallel
4 | exec parallel --halt now,fail=1 < all-tester
5 | 


--------------------------------------------------------------------------------
/parser/elf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # resolve ELF and DWARF symbol tables using elftools
  3 | #
  4 | # Copyright (c) 2013-2014, Intel Corporation
  5 | # Author: Andi Kleen
  6 | #
  7 | # This program is free software; you can redistribute it and/or modify it
  8 | # under the terms and conditions of the GNU General Public License,
  9 | # version 2, as published by the Free Software Foundation.
 10 | #
 11 | # This program is distributed in the hope it will be useful, but WITHOUT
 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13 | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 14 | # more details.
 15 | from __future__ import print_function
 16 | from elftools.common.py3compat import bytes2str
 17 | from elftools.elf.elffile import ELFFile
 18 | from elftools.elf.sections import SymbolTableSection
 19 | import elftools.common.exceptions
 20 | import util
 21 | import kernel
 22 | 
 23 | # global caches
 24 | open_files = dict()
 25 | resolved = dict()
 26 | symtables = dict()
 27 | lines = dict()
 28 | 
 29 | def build_line_table(dwarfinfo):
 30 |     lines = []
 31 |     for CU in dwarfinfo.iter_CUs():
 32 |         lp = dwarfinfo.line_program_for_CU(CU)
 33 |         prevstate = None
 34 |         for entry in lp.get_entries():
 35 |             if entry.state is None or entry.state.end_sequence:
 36 |                 continue
 37 |             if prevstate:
 38 |                 lines.append((prevstate.address,
 39 |                               entry.state.address,
 40 |                               lp['file_entry'][prevstate.file - 1].name,
 41 |                               prevstate.line))
 42 |             prevstate = entry.state
 43 |     lines.sort()
 44 |     return lines
 45 | 
 46 | def build_symtab(elffile):
 47 |     syms = []
 48 |     for section in elffile.iter_sections():
 49 |         if isinstance(section, SymbolTableSection):
 50 |             for nsym, sym in enumerate(section.iter_symbols()):
 51 |                 name = bytes2str(sym.name)
 52 |                 if not name:
 53 |                     continue
 54 |                 if sym.entry.st_info.type != 'STT_FUNC':
 55 |                     continue
 56 |                 end = sym['st_value'] + sym['st_size']
 57 |                 syms.append((sym['st_value'], end,
 58 |                              bytes2str(sym.name)))
 59 |     syms.sort()
 60 |     return syms
 61 | 
 62 | reported = set()
 63 | 
 64 | def find_elf_file(fn):
 65 |     if fn.startswith("//"):
 66 |         return None
 67 |     if fn in open_files:
 68 |         elffile = open_files[fn]
 69 |     else:
 70 |         try:
 71 |             f = open(fn, 'rb')
 72 |             elffile = ELFFile(f)
 73 |             open_files[fn] = elffile
 74 |         except (IOError, elftools.common.exceptions.ELFError):
 75 |             if fn not in reported:
 76 |                 print("Cannot open", fn)
 77 |             reported.add(fn)
 78 |             return None
 79 | 
 80 |     return elffile
 81 | 
 82 | def resolve_line(fn, ip):
 83 |     elffile = find_elf_file(fn)
 84 |     if elffile is None:
 85 |         return "?"
 86 |     if fn not in lines and elffile.has_dwarf_info():
 87 |         lines[fn] = build_line_table(elffile.get_dwarf_info())
 88 | 
 89 |     src = None
 90 |     if resolve_line and fn in lines:
 91 |         pos = util.find_le(lines[fn], ip)
 92 |         if pos:
 93 |             src = "%s:%d" % (pos[2], pos[3])
 94 |     return src
 95 | 
 96 | # global one hit cache
 97 | # helps a lot for LBR decoding
 98 | # tbd use a small list with LRU?
 99 | last_sym = None
100 | 
101 | def resolve_sym(fn, ip):
102 |     elffile = find_elf_file(fn)
103 |     if elffile is None:
104 |         return "?", 0
105 |     global last_sym
106 | 
107 |     try:
108 |         if fn not in symtables:
109 |             symtables[fn] = build_symtab(elffile)
110 | 
111 |         if last_sym and last_sym[0] <= ip <= last_sym[1]:
112 |             return last_sym[2], ip - last_sym[0]
113 | 
114 |         loc = None
115 |         offset = None
116 |         if fn in symtables:
117 |             sym = util.find_le(symtables[fn], ip)
118 |             if sym:
119 |                 loc, offset = sym[2], ip - sym[0]
120 |     except elftools.common.exceptions.ELFError:
121 |         return "?", 0
122 | 
123 |     return loc, offset
124 | 
125 | def resolve_ip(filename, foffset, ip, need_line):
126 |     sym, soffset, line = None, 0, None
127 |     if filename and filename.startswith("/"):
128 |         sym, soffset = resolve_sym(filename, foffset)
129 |         if not sym:
130 |             sym, soffset = resolve_sym(filename, ip)
131 |         if need_line:
132 |             line = resolve_line(filename, ip)
133 |     else:
134 |         sym, soffset = kernel.resolve_kernel(ip)
135 |     return sym, soffset, line
136 | 
137 | if __name__ == '__main__':
138 |     import sys
139 |     print(resolve_ip(sys.argv[1], int(sys.argv[2], 16)))
140 |     print(resolve_line(sys.argv[1], int(sys.argv[2], 16)))
141 | 


--------------------------------------------------------------------------------
/parser/hist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # print histogram for perf.data
 3 | from __future__ import print_function
 4 | import perfpd
 5 | import pfeat
 6 | import argparse
 7 | 
 8 | p = argparse.ArgumentParser(description='Print histogram for perf.data')
 9 | p.add_argument('datafiles', nargs='*',
10 |                help='perf.data files (default perf.data)',
11 |                default=['perf.data'])
12 | p.add_argument('--sort', help='field to sort on (symbol, line)',
13 |                default='symbol')
14 | p.add_argument('--min-percent', help='Minimum percent to print', default=1.0)
15 | args = p.parse_args()
16 | 
17 | COLUMN_PAD = 5
18 | MAX_COLUMN = 70
19 | 
20 | def compute_cols(names):
21 |     return min(max(map(len, names)) + COLUMN_PAD, MAX_COLUMN)
22 | 
23 | min_percent = float(args.min_percent) / 100.0
24 | for d in args.datafiles:
25 |     df, et, feat = perfpd.read_samples(d, (args.sort == 'line'))
26 |     pfeat.print_feat(feat)
27 | 
28 |     # xxx split by event
29 |     if 'period' in df:
30 |         total = float(df['period'].sum())
31 |         g = df.groupby(args.sort)
32 |         h = g.period.sum()
33 |         h.sort(ascending=False)
34 |         h = h.apply(lambda x: x / total)
35 |     else:
36 |         h = df[args.sort].value_counts(normalize=True)
37 |     h = h[h >= min_percent]
38 | 
39 |     cols = compute_cols(h.index)
40 |     for s, v in zip(h.index, h.values):
41 |         print("%-*s %.2f%%" % (cols, s, v * 100.0))
42 | 


--------------------------------------------------------------------------------
/parser/kernel.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # resolve kernel symbols through kallsyms (when no vmlinux is available)
 3 | #
 4 | # Copyright (c) 2014, Intel Corporation
 5 | # Author: Andi Kleen
 6 | #
 7 | # This program is free software; you can redistribute it and/or modify it
 8 | # under the terms and conditions of the GNU General Public License,
 9 | # version 2, as published by the Free Software Foundation.
10 | #
11 | # This program is distributed in the hope it will be useful, but WITHOUT
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14 | # more details.
15 | 
16 | import util
17 | 
18 | kernel = []
19 | 
20 | def parse_kernel():
21 |     with open("/proc/kallsyms", 'r') as f:
22 |         for l in f:
23 |             n = l.split()
24 |             addr = int(n[0], 16)
25 |             kernel.append((addr, n[2]))
26 | 
27 | def resolve_kernel(ip):
28 |     if not kernel:
29 |         parse_kernel()
30 |     n = util.find_le(kernel, ip)
31 |     if n:
32 |         return n[1], ip - n[0]
33 |     return None
34 | 


--------------------------------------------------------------------------------
/parser/mmap.py:
--------------------------------------------------------------------------------
 1 | # track mmap updates in a perf stream and allow lookup of symbols
 2 | #
 3 | # Copyright (c) 2013-2014, Intel Corporation
 4 | # Author: Andi Kleen
 5 | #
 6 | # This program is free software; you can redistribute it and/or modify it
 7 | # under the terms and conditions of the GNU General Public License,
 8 | # version 2, as published by the Free Software Foundation.
 9 | #
10 | # This program is distributed in the hope it will be useful, but WITHOUT
11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 | # more details.
14 | 
15 | from collections import defaultdict
16 | import bisect
17 | 
18 | # max reorder window for MMAP updates
19 | LOOKAHEAD_WINDOW = 1024
20 | 
21 | def lookup(m, ip):
22 |     i = bisect.bisect_left(m, (ip,))
23 |     if i < len(m) and m[i][0] == ip:
24 |         mr = m[i]
25 |     elif i == 0:
26 |         return None, 0
27 |     else:
28 |         mr = m[i - 1]
29 |     return mr, ip - mr[0]
30 | 
31 | class MmapTracker:
32 |     """Track mmap updates in a perf stream and allow lookup of symbols."""
33 | 
34 |     def __init__(self):
35 |         self.maps = defaultdict(list)
36 |         self.pnames = defaultdict(str)
37 |         self.lookahead = 0
38 |         self.updates = []
39 | 
40 |     # look ahead for out of order mmap updates
41 |     def lookahead_mmap(self, ev, n):
42 |         if n - self.lookahead == 0:
43 |             self.lookahead = min(n + LOOKAHEAD_WINDOW, len(ev))
44 |             for l in range(n, self.lookahead):
45 |                 j = ev[l]
46 |                 # no time stamp: assume it's synthesized and kernel
47 |                 if j.type in ('MMAP','MMAP2') and j.pid == -1 and j.tid == 0:
48 |                     bisect.insort(self.maps[j.pid],
49 |                                   (j.addr, j.len, j.filename))
50 |                 elif j.type in ('COMM','MMAP','MMAP2'):
51 |                     bisect.insort(self.updates, (j.time2, j))
52 | 
53 |     # process pending updates for a sample
54 |     def update_sample(self, j):
55 |         updates = self.updates
56 |         while len(updates) > 0 and j.time >= updates[0][0]:
57 |             u = updates[0][1]
58 |             del updates[0]
59 |             if u.type in ('MMAP', 'MMAP2'):
60 |                 pid = u.pid
61 |                 bisect.insort(self.maps[pid], (u.addr, u.len, u.filename))
62 |             elif u.type == 'COMM':
63 |                 self.maps[u.pid] = []
64 |                 self.pnames[u.pid] = u.comm
65 | 
66 |     # look up tables with current state
67 |     def resolve(self, pid, ip):
68 |         if not self.maps[pid]:
69 |             # xxx kernel
70 |             return None, None, 0
71 |         m, offset = lookup(self.maps[pid], ip)
72 |         if not m or offset >= m[1]:
73 |             # look up kernel
74 |             m, offset = lookup(self.maps[-1], ip)
75 |             if not m or offset >= m[1]:
76 |                 return None, None, 0
77 |         assert m[0] <= ip <= m[0] + m[1]
78 |         return m[2], m[0], offset
79 | 


--------------------------------------------------------------------------------
/parser/pfeat.py:
--------------------------------------------------------------------------------
 1 | # print perf headers
 2 | 
 3 | def print_feat(feat):
 4 |     print("# Measured on %s (%s)" % (
 5 |             feat.hostname.hostname,
 6 |             feat.osrelease.osrelease))
 7 |     print("# %s, %s" % (
 8 |             feat.cpudesc.cpudesc,
 9 |             feat.cpuid.cpuid))
10 |     print("# %s" % (" ".join(map(lambda x: x.cmdline, feat.cmdline.cmdline))))
11 | 


--------------------------------------------------------------------------------
/parser/tester:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # test different variants of the perf format
 3 | 
 4 | PERF=${PERF:-perf}
 5 | WRAP=${WRAP:-python}
 6 | 
 7 | failed() {
 8 | 	echo FAILED
 9 | 	exit 1
10 | }
11 | trap failed ERR 0
12 | 
13 | set -e
14 | set -x
15 | set -o pipefail
16 | set -E
17 | 
18 | CMD=${1:-bash -c /bin/true}
19 | 
20 | [ ! -d data ] && mkdir data
21 | 
22 | check() { 
23 | 	echo -- testing "$@"
24 | 	FN=data/perf.$(echo -- "$@" | tr -d '{} ')
25 | 	FN=${FN/--/}
26 | 	rm -f $FN
27 | 	if ! $PERF record -o $FN "$@" -- $CMD ; then
28 | 		return
29 | 	fi
30 | 	if [ $(stat -c %s $FN) == 0 ] ; then
31 | 		return
32 | 	fi
33 | 	$WRAP perfdata.py $FN > pdata.txt
34 | 	$WRAP perfpd.py $FN > ppd.txt
35 | 
36 | 	# XXX check more fields
37 | 	AS=$(grep -c SAMPLE pdata.txt)
38 | 	BS=$($PERF report -i $FN -D | grep -c "PERF_RECORD_SAMPLE")
39 | 	[ $AS -eq $BS ]
40 | }
41 | 
42 | check
43 | check -c 1000
44 | check -g -c 1000
45 | check -b -c 1000
46 | if ! check -b -g fp -c 1000 ; then
47 | 	check -b -g -c 1000
48 | fi
49 | check -P -c 10000
50 | check -T -c 10003
51 | #check -e cycles:S 
52 | # seems to be broken in perf
53 | #check -e '{cycles,branches}:S'
54 | 
55 | set +e 
56 | #check -d
57 | check --group -e cycles,branches,branch-misses -c 1000
58 | check -e '{cycles,branches},{branch-misses,cache-misses}'  -c 1000
59 | check -e cycles,branches,branch-misses -c 1000
60 | 
61 | # new kernel
62 | #check -g dwarf
63 | 
64 | # XXX sw trace points
65 | 
66 | trap "" ERR 0
67 | 
68 | echo SUCCEEDED
69 | 


--------------------------------------------------------------------------------
/parser/util.py:
--------------------------------------------------------------------------------
 1 | # utility functions
 2 | 
 3 | import bisect
 4 | 
 5 | def find_le(f, key):
 6 |     pos = bisect.bisect_left(f, (key,))
 7 |     if pos < len(f) and f[pos][0] == key:
 8 |         return f[pos]
 9 |     if pos == 0:
10 |         return None
11 |     return f[pos - 1]
12 | 


--------------------------------------------------------------------------------
/pci.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # library and tool to access PCI config space
 3 | import os
 4 | import struct
 5 | 
 6 | # no multiple domains, controllers so far
 7 | 
 8 | def probe(bus, dev, func):
 9 |     fn = "/sys/devices/pci0000:%02x/0000:%02x:%02x.%01x/config" % (bus, bus, dev, func)
10 |     return os.path.isfile(fn)
11 | 
12 | def openpci(bus, dev, func, offset, mode):
13 |     fn = "/sys/devices/pci0000:%02x/0000:%02x:%02x.%01x/config" % (bus, bus, dev, func)
14 |     f = os.open(fn, mode)
15 |     os.lseek(f, offset, os.SEEK_SET)
16 |     return f
17 | 
18 | sizes = {8: "Q", 4: "I", 2: "H", 1: "B"}
19 | 
20 | def writepci(bus, device, func, offset, size, val):
21 |     f = openpci(bus, device, func, offset, os.O_WRONLY)
22 |     os.write(f, struct.pack(sizes[size], val))
23 |     os.close(f)
24 | 
25 | def readpci(bus, device, func, offset, size):
26 |     f = openpci(bus, device, func, offset, os.O_RDONLY)
27 |     v = struct.unpack(sizes[size], os.read(f, size))[0]
28 |     os.close(f)
29 |     return v
30 | 
31 | def changebit(bus, device, func, offset, bit, val):
32 |     f = openpci(bus, device, func, offset, os.O_RDWR)
33 |     v = struct.unpack("I", os.read(f, 4))[0]
34 |     if val:
35 |         v = v | (1 << bit)
36 |     else:
37 |         v = v & ~(1 << bit)
38 |     os.lseek(f, offset, os.SEEK_SET)
39 |     os.write(f, struct.pack('I', v))
40 |     os.close(f)
41 | 


--------------------------------------------------------------------------------
/pebs-grabber/Makefile:
--------------------------------------------------------------------------------
 1 | KDIR := /lib/modules/`uname -r`/build
 2 | 
 3 | CFLAGS_pebs-grabber.o := -DTRACE_INCLUDE_PATH=${M}
 4 | 
 5 | obj-m := pebs-grabber.o
 6 | 
 7 | all:
 8 | 	make -C ${KDIR} M=`pwd`
 9 | 
10 | install:
11 | 	make -C ${KDIR} M=`pwd` install
12 | 
13 | clean:
14 | 	make -C ${KDIR} M=`pwd` clean
15 | 


--------------------------------------------------------------------------------
/pebs-grabber/pebs.h:
--------------------------------------------------------------------------------
  1 | #undef TRACE_SYSTEM
  2 | #define TRACE_SYSTEM pebs
  3 | 
  4 | #if !defined(_TRACE_PEBS_H) || defined(TRACE_HEADER_MULTI_READ)
  5 | #define _TRACE_PEBS_H
  6 | 
  7 | #include <linux/tracepoint.h>
  8 | 
  9 | /* PEBS trace points. These always follow on each other */
 10 | 
 11 | TRACE_EVENT(pebs_v1, 
 12 | 	    TP_PROTO(u64 ip,
 13 | 		     u64 status,
 14 | 		     u64 dla,
 15 | 		     u64 dse,
 16 | 		     u64 lat),
 17 | 	    TP_ARGS(ip, status, dla, dse, lat),
 18 | 	    TP_STRUCT__entry(
 19 | 		    __field(u64, ip)
 20 | 		    __field(u64, status)
 21 | 		    __field(u64, dla)
 22 | 		    __field(u64, dse)
 23 | 		    __field(u64, lat)
 24 | 		    ),
 25 | 	    TP_fast_assign(
 26 | 		    __entry->ip = ip;
 27 | 		    __entry->status = status;
 28 | 		    __entry->dla = dla;
 29 | 		    __entry->dse = dse;
 30 | 		    __entry->lat = lat;
 31 | 		    ),
 32 | 	    TP_printk("ip=%llx status=%llx dla=%llx dse=%llx lat=%llx\n",
 33 | 		      __entry->ip,
 34 | 		      __entry->status,
 35 | 		      __entry->dla,
 36 | 		      __entry->dse,
 37 | 		      __entry->lat)
 38 | 	);
 39 | 
 40 | TRACE_EVENT(pebs_v2, 
 41 | 	    TP_PROTO(u64 eventingip,
 42 | 		     u64 tsx_tuning,
 43 | 		     u64 ax),
 44 | 	    TP_ARGS(eventingip, tsx_tuning, ax),
 45 | 	    TP_STRUCT__entry(
 46 | 		    __field(u64, eventingip)
 47 | 		    __field(u64, tsx_tuning)
 48 | 		    __field(u64, ax)
 49 | 		    ),
 50 | 	    TP_fast_assign(
 51 | 		    __entry->eventingip = eventingip;
 52 | 		    __entry->tsx_tuning = tsx_tuning;
 53 | 		    __entry->ax = ax;
 54 | 		    ),
 55 | 	    TP_printk("eventingip=%llx tsx_tuning=%llx ax=%llx\n",
 56 | 		      __entry->eventingip,
 57 | 		      __entry->tsx_tuning,
 58 | 		      __entry->ax)
 59 | 	);
 60 | 
 61 | TRACE_EVENT(pebs_v3,
 62 | 	    TP_PROTO(u64 tsc),
 63 | 	    TP_ARGS(tsc),
 64 | 	    TP_STRUCT__entry(
 65 | 		    __field(u64, tsc)
 66 | 		    ),
 67 | 	    TP_fast_assign(
 68 | 		    __entry->tsc = tsc;
 69 | 		    ),
 70 | 	    TP_printk("tsc=%llx\n", __entry->tsc)
 71 | 	);
 72 | 
 73 | TRACE_EVENT(pebs_regs, 
 74 | 	    TP_PROTO(u64 flags, u64 *regs),
 75 | 	    TP_ARGS(flags, regs),
 76 | 	    TP_STRUCT__entry(
 77 | 		    __field(u64, flags)
 78 | 		    __field(u64, regs[16])
 79 | 		    ),
 80 | 	    TP_fast_assign(
 81 | 		    __entry->flags = flags;
 82 | 		    memcpy(__entry->regs, regs, sizeof(u64) * 16);
 83 | 		    ),
 84 | 	    TP_printk("flags=%llx\n"
 85 | 		      "ax=%llx bx=%0llx cx=%llx dx=%llx si=%llx di=%llx bp=%llx sp=%llx\n"
 86 | 		      "r8=%llx r9=%llx r10=%llx r11=%llx r12=%llx r13=%llx r14=%llx r15=%llx\n",
 87 | 		      __entry->flags,
 88 | 		      __entry->regs[0],
 89 | 		      __entry->regs[1],
 90 | 		      __entry->regs[2],
 91 | 		      __entry->regs[3],
 92 | 		      __entry->regs[4],
 93 | 		      __entry->regs[5],
 94 | 		      __entry->regs[6],
 95 | 		      __entry->regs[7],
 96 | 		      __entry->regs[8],
 97 | 		      __entry->regs[9],
 98 | 		      __entry->regs[10],
 99 | 		      __entry->regs[11],
100 | 		      __entry->regs[12],
101 | 		      __entry->regs[13],
102 | 		      __entry->regs[14],
103 | 		      __entry->regs[15])
104 | 	);
105 | 
106 | #endif
107 | 
108 | #include <trace/define_trace.h>
109 | 	    
110 | 
111 | 		   
112 | 


--------------------------------------------------------------------------------
/perf_metrics.py:
--------------------------------------------------------------------------------
 1 | class CPU_Utilization:
 2 |     name = "CPU utilization"
 3 |     desc = """
 4 | Number of CPUs used. The top down CPU metrics are only meaningful
 5 | when a CPU thread is executing.  The percentage are always relative to
 6 | the executing time. When the utilization is low the workload may
 7 | actually not be CPU bound, but IO (network, block) IO bound
 8 | instead. Check the scheduler and IO metrics below. Or it may be CPU
 9 | bound, but not use enough parallelism, if the number of CPUs is less
10 | than the number of cores."""
11 |     nogroup = True
12 |     subplot = "CPU Utilization"
13 |     unit = "CPUs"
14 |     def compute(self, EV):
15 |         try:
16 |             # interval-ns is not a perf event, but handled by toplev internally.
17 |             self.val = (EV("task-clock", 1) * 1e6) / EV("interval-ns", 1)
18 |         except ZeroDivisionError:
19 |             self.val = 0
20 | 
21 | class MUX:
22 |     name = "MUX"
23 |     desc = """
24 | PerfMon Event Multiplexing accuracy indicator"""
25 |     unit = "%"
26 |     maxval = 100.0
27 |     errcount = 0
28 | 
29 |     def compute(self, EV):
30 |         self.val = EV("mux", 0)
31 |         self.thresh = 0 < self.val < 100.0
32 | 
33 | class Setup:
34 |     def __init__(self, r):
35 |         #r.force_metric(CPU_Utilization())
36 |         r.force_metric(MUX())
37 | 


--------------------------------------------------------------------------------
/plot-normalized.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # plot already normalized data
 3 | # first column is time stamp
 4 | import sys
 5 | import argparse
 6 | import os
 7 | import csv
 8 | import matplotlib
 9 | if os.getenv("DISPLAY") is None:
10 |     matplotlib.use('Agg')
11 | import matplotlib.pyplot as plt
12 | 
13 | 
14 | ap = argparse.ArgumentParser(usage='Plot already normalized CSV data')
15 | ap.add_argument('--output', '-o', help='Output to file. Otherwise show.',
16 |                 nargs='?')
17 | ap.add_argument('inf', nargs='?', default=sys.stdin, type=argparse.FileType('r'),
18 |                 help='input CSV file')
19 | args = ap.parse_args()
20 | 
21 | inf = args.inf
22 | 
23 | rc = csv.reader(inf)
24 | 
25 | num = 0
26 | timestamps = []
27 | columns = {}
28 | for r in rc:
29 |     num += 1
30 |     if num == 1:
31 |         for j in r[1:]:
32 |             columns[j] = []
33 |         continue
34 |     timestamps.append(r[0])
35 |     c = 1
36 |     for j in columns:
37 |         try:
38 |             columns[j].append(float(r[c]))
39 |         except ValueError:
40 |             columns[j].append(float('nan'))
41 |         c += 1
42 | 
43 | for j in columns:
44 |     plt.plot(timestamps, columns[j], label=j)
45 | leg = plt.legend()
46 | leg.get_frame().set_alpha(0.5)
47 | if args.output:
48 |     plt.savefig(args.output)
49 | else:
50 |     plt.show()
51 | 


--------------------------------------------------------------------------------
/pmudef.py:
--------------------------------------------------------------------------------
 1 | EVENTSEL_EVENT = 0x00ff
 2 | EVENTSEL_UMASK = 0xff00
 3 | EVENTSEL_UMASK2 = 0xff00000000
 4 | EVENTSEL_EDGE  = 1<<18
 5 | EVENTSEL_PC    = 1<<19
 6 | EVENTSEL_ANY   = 1<<21
 7 | EVENTSEL_INV   = 1<<23
 8 | EVENTSEL_EQ    = 1<<36
 9 | EVENTSEL_INTX  = 1<<32
10 | EVENTSEL_INTX_CP = 1<<33
11 | EVENTSEL_CMASK = 0xff000000
12 | 
13 | EVMASK = (EVENTSEL_EVENT | EVENTSEL_UMASK | EVENTSEL_EDGE | EVENTSEL_PC | EVENTSEL_ANY |
14 |           EVENTSEL_INV | EVENTSEL_CMASK | EVENTSEL_UMASK2 | EVENTSEL_EQ | EVENTSEL_INTX |
15 |           EVENTSEL_INTX_CP)
16 | 
17 | EVENTSEL_ENABLE = 1<<22
18 | 
19 | MSR_EVNTSEL = 0x186
20 | MSR_IA32_FIXED_CTR_CTRL = 0x38d
21 | MSR_PEBS_ENABLE = 0x3f1
22 | MSR_PERFCTR = 0xc1
23 | MSR_PMC = 0x4c1
24 | MSR_FIXED_CTR = 0x309
25 | MSR_FIXED_CTR_CTL = 0x38d
26 | MSR_GLOBAL_STATUS = 0x38e
27 | MSR_GLOBAL_CTRL = 0x38f
28 | MSR_GLOBAL_OVF_CTRL = 0x390
29 | 
30 | extra_flags = (
31 |         (EVENTSEL_EDGE, "edge"),
32 |         (EVENTSEL_PC, "pc"),
33 |         (EVENTSEL_ANY, "any"),
34 |         (EVENTSEL_INV, "inv"),
35 |         (EVENTSEL_CMASK, "cmask"))
36 | 


--------------------------------------------------------------------------------
/pmumon.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # standalone simple pmu configuration tool
 3 | # allows to count an even without using perf
 4 | # will conflict with any parallel perf (and other profiler)
 5 | # usage.
 6 | # Author: Andi Kleen
 7 | #
 8 | from __future__ import print_function
 9 | import os
10 | import struct
11 | import sys
12 | 
13 | def writemsr(msr, val, cpu):
14 |     f = os.open('/dev/cpu/%d/msr' % (cpu,), os.O_WRONLY)
15 |     os.lseek(f, msr, os.SEEK_SET)
16 |     os.write(f, struct.pack('Q', val))
17 |     os.close(f)
18 | 
19 | def readmsr(msr, cpu):
20 |     f = os.open('/dev/cpu/%d/msr' % (cpu,), os.O_RDONLY)
21 |     os.lseek(f, msr, os.SEEK_SET)
22 |     val = struct.unpack('Q', os.read(f, 8))[0]
23 |     os.close(f)
24 |     return val
25 | 
26 | if len(sys.argv) != 3 and len(sys.argv) != 2:
27 |     print("Usage: pmumon cpu [event]")
28 |     print("When no event is specified read+clear event on cpu, otherwise start it")
29 |     print("event == 0 clears. event is in hex")
30 |     print("perf/oprofile/etc. must not be active. no parallel users")
31 |     sys.exit(1)
32 | 
33 | MSR_EVNTSEL = 0x186 + 1
34 | MSR_PERFCTR = 0xc1 + 1
35 | 
36 | cpu = int(sys.argv[1])
37 | if len(sys.argv) > 2:
38 |     event = int(sys.argv[2], 16)
39 |     writemsr(MSR_EVNTSEL, 0, cpu) # disable first
40 |     writemsr(MSR_PERFCTR, 0, cpu)
41 |     writemsr(MSR_EVNTSEL, event, cpu)
42 |     #print("global status %x" % (readmsr(0x38f, cpu),))
43 | else:
44 |     print("%x = %d" % (readmsr(MSR_EVNTSEL, cpu), readmsr(MSR_PERFCTR, cpu),))
45 | 


--------------------------------------------------------------------------------
/power_metrics.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # perf power metrics for toplev
 3 | #
 4 | 
 5 | import os
 6 | 
 7 | class EnergyPackage:
 8 |     name = "Package Energy"
 9 |     desc = """
10 | Package Energy over measurement period in Joules"""
11 |     unit = "Joules"
12 |     nogroup = True
13 |     subplot = "Power"
14 |     domain = "Package"
15 |     def compute(self, EV):
16 |         self.val = EV("power/energy-pkg/", 1)
17 |         self.thresh = self.val > 0
18 | 
19 | class EnergyCores:
20 |     name = "Cores Energy"
21 |     desc = """
22 | Cores Energy over measurement period in Joules"""
23 |     unit = "Joules"
24 |     nogroup = True
25 |     subplot = "Power"
26 |     domain = "Package"
27 |     def compute(self, EV):
28 |         self.val = EV("power/energy-cores/", 1)
29 |         self.thresh = self.val > 0
30 | 
31 | class EnergyRAM:
32 |     name = "RAM Energy"
33 |     desc = """
34 | RAM Energy over measurement period in Joules"""
35 |     unit = "Joules"
36 |     nogroup = True
37 |     subplot = "Power"
38 |     domain = "Package"
39 |     def compute(self, EV):
40 |         self.val = EV("power/energy-ram/", 1)
41 |         self.thresh = self.val > 0
42 | 
43 | class EnergyGPU:
44 |     name = "GPU Energy"
45 |     desc = """
46 | GPU Energy over measurement period in Joules"""
47 |     unit = "Joules"
48 |     nogroup = True
49 |     subplot = "Power"
50 |     domain = "Package"
51 |     def compute(self, EV):
52 |         self.val = EV("power/energy-gpu/", 1)
53 |         self.thresh = self.val > 1
54 | 
55 | class Setup:
56 |     def __init__(self, r):
57 |         if os.path.exists("/sys/bus/event_source/devices/power/events/energy-cores"):
58 |             r.force_metric(EnergyCores())
59 |         r.force_metric(EnergyPackage())
60 |         if os.path.exists("/sys/bus/event_source/devices/power/events/energy-ram"):
61 |             r.force_metric(EnergyRAM())
62 |         if os.path.exists("/sys/bus/event_source/devices/power/events/energy-gpu"):
63 |             r.force_metric(EnergyGPU())
64 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | brewer2mpl
3 | pandas
4 | xlsxwriter
5 | 


--------------------------------------------------------------------------------
/simple-pebs/Makefile:
--------------------------------------------------------------------------------
 1 | USER_CFLAGS := -g -Wall -std=c89
 2 | KDIR = /lib/modules/`uname -r`/build
 3 | 
 4 | USER_EXE := dumper samples histogram toperf
 5 | USER_OBJ := dumper.o dump-util.o elf.o symtab.o samples.o map.o \
 6 | 	    histogram.o toperf.o
 7 | 
 8 | obj-m := simple-pebs.o
 9 | M := make -C ${KDIR} M=`pwd`
10 | 
11 | all:
12 | 	${M} modules
13 | 
14 | install:
15 | 	${M} modules_install
16 | 
17 | clean:
18 | 	${M} clean
19 | 	rm -rf ${USER_EXE} ${USER_OBJS}
20 | 
21 | user: ${USER_EXE}
22 | ${USER_OBJ} ${USER_EXE}: CFLAGS := ${USER_CFLAGS}
23 | 
24 | dumper: dump-util.o dumper.o
25 | 
26 | samples: LDLIBS := -lelf
27 | samples: samples.o elf.o symtab.o dump-util.o
28 | 
29 | histogram: LDLIBS := -lelf
30 | histogram: histogram.o elf.o symtab.o map.o
31 | 
32 | toperf: toperf.o map.o
33 | 


--------------------------------------------------------------------------------
/simple-pebs/README:
--------------------------------------------------------------------------------
 1 | 
 2 | # simple standalone reference pebs driver
 3 | 
 4 | ##
 5 | ## Note: this is just a reference driver for PEBS on Linux, mainly as a reference
 6 | ## for writing new experimential drivers and for porting PEBS code to other
 7 | ## operating systems. See it as a code example.
 8 | ##
 9 | ## The code is written in a way that it should be easy to adapt to other OS.
10 | ##
11 | ## If you just want to use PEBS on Linux the builtin Linux perf
12 | ## support is near always a better choice. All functionality
13 | ## supported by simple pebs is supported by Linux perf in a better way.
14 | ## You may need a recent enough kernel to support your CPU.
15 | ##
16 | ## You'll need to disable Kernel page table isolation with the "nopti" kernel boot
17 | ## option. Otherwise the system will hard crash randomly on module load.
18 | ##
19 | ## CPU hotplug and suspend to ram are not supported on newer kernels.
20 | ##
21 | ## On recent CPUs:
22 | ## Should work on CPUs before Icelake, but may need to add more model numbers
23 | ## to the initialization checker.
24 | ##
25 | 
26 | PEBS "Precise Event Based Sampling" is a profiling technology in Intel CPUs,
27 | that uses microcode to do (mostly) precise event samples.
28 | 
29 | The driver is "free running" and minimizes interrupts, to allow a maximum 
30 | PEBS frequency. Interrupts are only triggered when the PEBS buffer is full.
31 | 
32 | This will take over the PEBS hardware from perf and may cause conflicts.
33 | 
34 | To build user tools use "make user"
35 | 
36 | sample file format:
37 | {8 bytes 	ip}
38 | 
39 | 
40 | simple-pebs.c	Linux simple pebs driver
41 | dumper.c	Dump samples running from running Linux driver. -b to write binary sample file
42 | 		Most other tools require running dumper first to dump the samples.
43 | 
44 | samples.c	Decode (ELF symbols) sample from running Linux driver.
45 | 
46 | histogram.c	Generate histogram from sample file
47 | 
48 | toperf.c	Generate perf.data from sample file
49 | 
50 | Equivalent Linux perf command lines:
51 | 
52 | insmod simple-pebs.ko / dumper -b
53 | 		-> perf record -c 100003 -a r1c2:pp sleep X
54 | 		   (or other event code, see also perf list)
55 | 		   To dump addresses add -d
56 | 
57 | samples ->	perf script
58 | 
59 | histogram ->	perf report --stdio
60 | 


--------------------------------------------------------------------------------
/simple-pebs/compat.h:
--------------------------------------------------------------------------------
 1 | /* Deal with Gleixnerfication */
 2 | #include <linux/version.h>
 3 | 
 4 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
 5 | 
 6 | /* No CPU hotplug / suspend with the mess in newer kernels. */
 7 | 
 8 | static inline void register_cpu_notifier(struct notifier_block *n) {}
 9 | static inline void unregister_cpu_notifier(struct notifier_block *n) {}
10 | 
11 | #define CPU_STARTING 0
12 | #define CPU_DYING 1
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/simple-pebs/dump-util.c:
--------------------------------------------------------------------------------
 1 | #include <unistd.h>
 2 | #include <sys/mman.h>
 3 | #include <sys/fcntl.h>
 4 | #include <sys/ioctl.h>
 5 | #include <stdio.h>
 6 | #include <poll.h>
 7 | #include <stdlib.h>
 8 | #include <stdint.h>
 9 | 
10 | #include "simple-pebs.h"
11 | #include "dump-util.h"
12 | 
13 | #define err(x) perror(x), exit(1)
14 | 
15 | int device_open(void)
16 | {
17 | 	int fd = open("/dev/simple-pebs", O_RDONLY);
18 | 	if (fd < 0)
19 | 		err("/dev/simple-pebs open");
20 | 	return fd;
21 | }
22 | 
23 | int get_size(void)
24 | {
25 | 	int fd = device_open();
26 | 	int size;
27 | 
28 | 	if (ioctl(fd, SIMPLE_PEBS_GET_SIZE, &size) < 0)
29 | 		err("SIMPLE_PEBS_GET_SIZE");
30 | 	close(fd);
31 | 	printf("size %d\n", size);
32 | 	return size;
33 | }
34 | 
35 | void open_cpu(void **mapp, int cnum, struct pollfd *pfd, int size)
36 | {
37 | 	int fd = device_open();
38 | 	if (ioctl(fd, SIMPLE_PEBS_SET_CPU, cnum) < 0)
39 | 		err("SIMPLE_PEBS_SET_CPU");
40 | 	if (ioctl(fd, SIMPLE_PEBS_START, 0) < 0)
41 | 		err("SIMPLE_PEBS_START");
42 | 	void *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
43 | 	if (map == (void *)-1)
44 | 		err("mmap");
45 | 	*mapp = map;
46 | 	pfd->fd = fd;
47 | 	pfd->events = POLLIN;
48 | }
49 | 


--------------------------------------------------------------------------------
/simple-pebs/dump-util.h:
--------------------------------------------------------------------------------
1 | struct pollfd;
2 | 
3 | typedef uint64_t u64;
4 | 
5 | int device_open(void);
6 | int get_size(void);
7 | void open_cpu(void **mapp, int cnum, struct pollfd *pfd, int size);
8 | 


--------------------------------------------------------------------------------
/simple-pebs/dumper.c:
--------------------------------------------------------------------------------
 1 | /* Dump simple PEBS data from kernel driver */
 2 | #include <unistd.h>
 3 | #include <sys/mman.h>
 4 | #include <sys/fcntl.h>
 5 | #include <sys/ioctl.h>
 6 | #include <stdio.h>
 7 | #include <poll.h>
 8 | #include <stdlib.h>
 9 | #include <stdint.h>
10 | #include <stdbool.h>
11 | #include <getopt.h>
12 | 
13 | typedef uint64_t u64;
14 | 
15 | #include "simple-pebs.h"
16 | #include "dump-util.h"
17 | 
18 | #define err(x) perror(x), exit(1)
19 | 
20 | void dump_data(int cpunum, u64 *map, int num)
21 | {
22 | 	int i;
23 | 	printf("dump %d\n", num);
24 | 	for (i = 0; i < num; i++)
25 | 		printf("%d: %lx\n", cpunum, map[i]);
26 | }
27 | 
28 | static void usage(void)
29 | {
30 | 	fprintf(stderr, "Usage: dumper [-b]\n"
31 | 		"-b binary dump\n");
32 | 	exit(1);
33 | }
34 | 
35 | int main(int ac, char **av)
36 | {
37 | 	int size = get_size();
38 | 	int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
39 | 	void *map[ncpus];
40 | 	struct pollfd pfd[ncpus];
41 | 	int opt;
42 | 	bool binary = false;
43 | 
44 | 	while ((opt = getopt(ac, av, "b")) != -1) {
45 | 		switch (opt) {
46 | 		case 'b':
47 | 			binary = true;
48 | 			break;
49 | 		default:
50 | 			usage();
51 | 		}
52 | 	}
53 | 
54 | 	int i;
55 | 	for (i = 0; i < ncpus; i++)
56 | 		open_cpu(&map[i], i, &pfd[i], size);
57 | 	
58 | 	for (;;) {
59 | 		if (poll(pfd, ncpus, -1) < 0)
60 | 			perror("poll");
61 | 		for (i = 0; i < ncpus; i++) {
62 | 			if (pfd[i].revents & POLLIN) {
63 | 				int len;
64 | 
65 | 				if (ioctl(pfd[i].fd, SIMPLE_PEBS_GET_OFFSET, &len) < 0) {
66 | 					perror("SIMPLE_PEBS_GET_OFFSET");
67 | 					continue;
68 | 				}
69 | 
70 | 				/* copy out data */
71 | 				if (binary)
72 | 					write(1, map[i], len);
73 | 				else
74 | 					dump_data(i, map[i], len / sizeof(u64));
75 | 
76 | 				if (ioctl(pfd[i].fd, SIMPLE_PEBS_RESET, 0) < 0) {
77 | 					perror("SIMPLE_PEBS_RESET");
78 | 					continue;
79 | 				}
80 | 			}
81 | 		}
82 | 	}
83 | 	return 0;
84 | }
85 | 


--------------------------------------------------------------------------------
/simple-pebs/elf.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015, Intel Corporation
  3 |  * Author: Andi Kleen
  4 |  * All rights reserved.
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without
  7 |  * modification, are permitted provided that the following conditions are met:
  8 |  *
  9 |  * 1. Redistributions of source code must retain the above copyright notice,
 10 |  * this list of conditions and the following disclaimer.
 11 |  *
 12 |  * 2. Redistributions in binary form must reproduce the above copyright
 13 |  * notice, this list of conditions and the following disclaimer in the
 14 |  * documentation and/or other materials provided with the distribution.
 15 |  *
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 19 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 20 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 21 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 22 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 23 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 24 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 25 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 26 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 27 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 28 |  */
 29 | 
 30 | #include <gelf.h>
 31 | #include <unistd.h>
 32 | #include <sys/fcntl.h>
 33 | #include <string.h>
 34 | #include <stdio.h>
 35 | #include <errno.h>
 36 | #include <stdlib.h>
 37 | #include "symtab.h"
 38 | #include "elf.h"
 39 | 
 40 | static char *my_strdup(char *s)
 41 | {
 42 | 	char *p = malloc(strlen(s) + 1);
 43 | 	if (p)
 44 | 		strcpy(p, s);
 45 | 	return p;
 46 | }
 47 | 
 48 | void read_symtab(Elf *elf)
 49 | {
 50 | 	Elf_Scn *section = NULL;
 51 | 
 52 | 	while ((section = elf_nextscn(elf, section)) != 0) {
 53 | 		GElf_Shdr shdr, *sh;
 54 | 		sh = gelf_getshdr(section, &shdr);
 55 | 
 56 | 		if (sh->sh_type == SHT_SYMTAB || sh->sh_type == SHT_DYNSYM) {
 57 | 			Elf_Data *data = elf_getdata(section, NULL);
 58 | 			GElf_Sym *sym, symbol;
 59 | 			int j;
 60 | 
 61 | 			unsigned numsym = sh->sh_size / sh->sh_entsize;
 62 | 			struct symtab *st = add_symtab(numsym);
 63 | 			for (j = 0; j < numsym; j++) {
 64 | 				struct sym *s;
 65 | 				sym = gelf_getsymshndx(data, NULL, j, &symbol, NULL);
 66 | 				s = &st->syms[j];
 67 | 				s->name = my_strdup(elf_strptr(elf, shdr.sh_link, sym->st_name));
 68 | 				s->val = sym->st_value;
 69 | 				s->size = sym->st_size;
 70 | 				s->hits = 0;
 71 | 			}
 72 | 			sort_symtab(st);
 73 | 		}
 74 | 	}
 75 | }
 76 | 
 77 | 
 78 | static Elf *elf_open(char *fn, int *fd)
 79 | {
 80 | 	*fd = open(fn, O_RDONLY);
 81 | 	if (*fd < 0) {
 82 | 		perror(fn);
 83 | 		return NULL;
 84 | 	}
 85 | 	Elf *elf = elf_begin(*fd, ELF_C_READ, NULL);
 86 | 	if (!elf) {
 87 | 		fprintf(stderr, "elf_begin failed for %s: %s\n",
 88 | 				fn, elf_errmsg(-1));
 89 | 		close(*fd);
 90 | 	}
 91 | 	return elf;
 92 | }
 93 | 
 94 | static void elf_close(Elf *elf, int fd)
 95 | {
 96 | 	elf_end(elf);
 97 | 	close(fd);
 98 | }
 99 | 
100 | int read_elf(char *fn)
101 | {
102 | 	elf_version(EV_CURRENT);
103 | 
104 | 	int fd;
105 | 	Elf *elf = elf_open(fn, &fd);
106 | 	if (elf == NULL)
107 | 		return -1;
108 | 	read_symtab(elf);
109 | 	elf_close(elf, fd);
110 | 	return 0;
111 | }
112 | 


--------------------------------------------------------------------------------
/simple-pebs/elf.h:
--------------------------------------------------------------------------------
1 | int read_elf(char *fn);
2 | 


--------------------------------------------------------------------------------
/simple-pebs/histogram.c:
--------------------------------------------------------------------------------
 1 | /* Print histograms from simple-pebs output. */
 2 | 
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | #include <stdlib.h>
 6 | #include "map.h"
 7 | #include "elf.h"
 8 | #include "symtab.h"
 9 | 
10 | typedef unsigned long long u64;
11 | 
12 | #define err(x) perror(x), exit(1)
13 | 
14 | static int cmp_sym_hits(const void *ap, const void *bp)
15 | {
16 | 	const struct sym *a = ap;
17 | 	const struct sym *b = bp;
18 | 	return a->hits - b->hits;
19 | }
20 | 
21 | double min_percent = 1.0;
22 | 
23 | void print_histogram(u64 *map, int num)
24 | {
25 | 	int i;
26 | 	unsigned long total = 0, unknown = 0;
27 | 	struct sym *ref_next = NULL;
28 | 	struct sym *referenced = NULL;
29 | 
30 | 	int num_referenced = 0;
31 | 
32 | 	for (i = 0; i < num; i++) {
33 | 		struct sym *sym = findsym(map[i]);
34 | 		if (sym) {
35 | 			if (sym->hits == 0) {
36 | 				if (!referenced) {
37 | 					referenced = sym;
38 | 					ref_next = sym;
39 | 				}
40 | 				ref_next->link = sym;
41 | 				num_referenced++;
42 | 			}
43 | 			sym->hits++;
44 | 		} else
45 | 			unknown++;
46 | 		total++;
47 | 	}
48 | 	if (total == 0) {
49 | 		printf("no samples found\n");
50 | 		return;
51 | 	}
52 | 
53 | 	struct sym **ref = malloc(num_referenced * sizeof(struct sym *));
54 | 	struct sym *link;
55 | 	i = 0;
56 | 	for (link = referenced; link; link = link->link, i++)
57 | 		ref[i] = link;
58 | 	assert(i == num_referenced);
59 | 
60 | 	qsort(ref, num_referenced, sizeof(struct sym *), cmp_sym_hits);
61 | 
62 | 	printf("%5s %8s  %s\n", "PCT", "HITS", "NAME");
63 | 	printf("%5.2f%% %8lu unknown hits\n", 100. * ((double)unknown / total),
64 | 			unknown);
65 | 	for (i = 0; i < num_referenced; i++) {
66 | 		struct sym *sym = ref[i];
67 | 		double pct = 100. * ((double)sym->hits / total);
68 | 		if (pct <= min_percent)
69 | 			break;
70 | 		printf("%5.2f%% %8lu %s\n", pct, sym->hits, sym->name);
71 | 	}
72 | }
73 | 
74 | 
75 | void usage(void)
76 | {
77 | 	fprintf(stderr, "Usage: histogram file elf ...\n");
78 | 
79 | }
80 | 
81 | int main(int ac, char **av)
82 | {
83 | 	char *file = *++av;
84 | 
85 | 	if (!file)
86 | 		usage();
87 | 	while (*++av)
88 | 		read_elf(*av);
89 | 
90 | 	size_t fsize;
91 | 	u64 *fmap = mapfile(file, &fsize);
92 | 
93 | 	if (!fmap)
94 | 		err(file);
95 | 	print_histogram(fmap,  fsize / 8);
96 | 	return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/simple-pebs/map.c:
--------------------------------------------------------------------------------
 1 | #include "map.h"
 2 | 
 3 | #ifdef __linux__
 4 | 
 5 | #include <sys/mman.h>
 6 | #include <sys/fcntl.h>
 7 | #include <unistd.h>
 8 | #include <stddef.h>
 9 | #include <sys/stat.h>
10 | 
11 | #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1))
12 | 
13 | static int pagesize;
14 | 
15 | static void __attribute__((constructor)) init_ps(void)
16 | {
17 | 	pagesize = sysconf(_SC_PAGESIZE);
18 | }
19 | 
20 | void *mapfile(char *fn, size_t *size)
21 | {
22 | 	int fd = open(fn, O_RDWR);
23 | 	if (fd < 0)
24 | 		return NULL;
25 | 	struct stat st;
26 | 	void *map = (void *)-1L;
27 | 	if (fstat(fd, &st) >= 0) {
28 | 		*size = st.st_size;
29 | 		map = mmap(NULL, round_up(st.st_size, pagesize),
30 | 			   PROT_READ|PROT_WRITE,
31 | 			   MAP_PRIVATE, fd, 0);
32 | 	}
33 | 	close(fd);
34 | 	return map != (void *)-1L ? map : NULL;
35 | }
36 | 
37 | void unmapfile(void *map, size_t size)
38 | {
39 | 	munmap(map, round_up(size, pagesize));
40 | }
41 | 
42 | #else
43 | /* Fallback based on stdio */
44 | #include <stdio.h>
45 | #include <stdlib.h>
46 | 
47 | void *mapfile(char *fn, size_t *size)
48 | {
49 | 	FILE *f = fopen(fn, "r");
50 | 	char *map;
51 | 	int ok;
52 | 	if (!f)
53 | 		return NULL;
54 | 	fseek(f, 0, SEEK_END);
55 | 	*size = ftell(f);
56 | 	rewind(f);
57 | 	map = malloc(*size);
58 | 	ok = map && fread(map, 1, *size, f) == *size;
59 | 	fclose(f);
60 | 	if (!ok) {
61 | 		free(map);
62 | 		map = NULL;
63 | 	}
64 | 	return map;
65 | }
66 | 
67 | void unmapfile(void *map, size_t size)
68 | {
69 | 	free(map);
70 | }
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/simple-pebs/map.h:
--------------------------------------------------------------------------------
1 | #include <stddef.h>
2 | void *mapfile(char *fn, size_t *size);
3 | void unmapfile(void *map, size_t size);
4 | 


--------------------------------------------------------------------------------
/simple-pebs/samples.c:
--------------------------------------------------------------------------------
 1 | /* Dump sample data from linux kernel driver and resolve IPs */
 2 | #include <unistd.h>
 3 | #include <sys/mman.h>
 4 | #include <sys/fcntl.h>
 5 | #include <sys/ioctl.h>
 6 | #include <stdio.h>
 7 | #include <poll.h>
 8 | #include <stdlib.h>
 9 | #include <stdint.h>
10 | 
11 | #include "simple-pebs.h"
12 | #include "dump-util.h"
13 | #include "elf.h"
14 | #include "symtab.h"
15 | 
16 | #define err(x) perror(x), exit(1)
17 | 
18 | static void print_ip(uint64_t ip)
19 | {
20 | 	struct sym *sym = findsym(ip);
21 | 	if (sym) {
22 | 		printf("%s", sym->name);
23 | 		if (ip - sym->val > 0)
24 | 			printf("+%ld", ip - sym->val);
25 | 	} else
26 | 		printf("%lx", ip);
27 | }
28 | 
29 | void dump_data(int cpunum, u64 *map, int num)
30 | {
31 | 	int i;
32 | 	printf("dump %d\n", num);
33 | 	for (i = 0; i < num; i++) {
34 | 		printf("%d: %lx ", cpunum, map[i]);
35 | 		print_ip(map[i]);
36 | 		putchar('\n');
37 | 	}
38 | }
39 | 
40 | int main(int ac, char **av)
41 | {
42 | 	int size = get_size();
43 | 	int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
44 | 	void *map[ncpus];
45 | 	struct pollfd pfd[ncpus];
46 | 
47 | 	while (*++av) {
48 | 		printf("reading %s\n", *av);
49 | 		read_elf(*av);
50 | 	}
51 | 
52 | 	int i;
53 | 	for (i = 0; i < ncpus; i++)
54 | 		open_cpu(&map[i], i, &pfd[i], size);
55 | 
56 | 	for (;;) {
57 | 		if (poll(pfd, ncpus, -1) < 0)
58 | 			perror("poll");
59 | 		for (i = 0; i < ncpus; i++) {
60 | 			if (pfd[i].revents & POLLIN) {
61 | 				int len;
62 | 
63 | 				if (ioctl(pfd[i].fd, SIMPLE_PEBS_GET_OFFSET, &len) < 0) {
64 | 					perror("SIMPLE_PEBS_GET_OFFSET");
65 | 					continue;
66 | 				}
67 | 
68 | 				/* copy out data */
69 | 				dump_data(i, map[i], len / sizeof(u64));
70 | 
71 | 				if (ioctl(pfd[i].fd, SIMPLE_PEBS_RESET, 0) < 0) {
72 | 					perror("SIMPLE_PEBS_RESET");
73 | 					continue;
74 | 				}
75 | 			}
76 | 		}
77 | 	}
78 | 	return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/simple-pebs/simple-pebs.h:
--------------------------------------------------------------------------------
1 | #define SIMPLE_PEBS_BASE 	0x7000
2 | #define SIMPLE_PEBS_SET_CPU    	(SIMPLE_PEBS_BASE + 1)
3 | #define SIMPLE_PEBS_GET_SIZE   	(SIMPLE_PEBS_BASE + 2)
4 | #define SIMPLE_PEBS_GET_OFFSET 	(SIMPLE_PEBS_BASE + 3)
5 | #define SIMPLE_PEBS_START	(SIMPLE_PEBS_BASE + 4)
6 | #define SIMPLE_PEBS_STOP	(SIMPLE_PEBS_BASE + 5)
7 | #define SIMPLE_PEBS_RESET	(SIMPLE_PEBS_BASE + 6)
8 | 


--------------------------------------------------------------------------------
/simple-pebs/symtab.c:
--------------------------------------------------------------------------------
 1 | /* Symtabs for simple-pebs */
 2 | /*
 3 |  * Copyright (c) 2015, Intel Corporation
 4 |  * Author: Andi Kleen
 5 |  * All rights reserved.
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are met:
 9 |  *
10 |  * 1. Redistributions of source code must retain the above copyright notice,
11 |  * this list of conditions and the following disclaimer.
12 |  *
13 |  * 2. Redistributions in binary form must reproduce the above copyright
14 |  * notice, this list of conditions and the following disclaimer in the
15 |  * documentation and/or other materials provided with the distribution.
16 |  *
17 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
20 |  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 |  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
22 |  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 |  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
28 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
29 |  */
30 | 
31 | 
32 | #include <stdlib.h>
33 | #include <stdio.h>
34 | #include <errno.h>
35 | #include "symtab.h"
36 | 
37 | struct symtab *symtabs;
38 | 
39 | struct symtab *add_symtab(unsigned num)
40 | {
41 | 	struct symtab *st = malloc(sizeof(struct symtab));
42 | 	if (!st)
43 | 		exit(ENOMEM);
44 | 	st->num = num;
45 | 	st->next = symtabs;
46 | 	st->syms = calloc(num * sizeof(struct sym), 1);
47 | 	if (!st->syms)
48 | 		exit(ENOMEM);
49 | 	symtabs = st;
50 | 	return st;
51 | }
52 | 
53 | int cmp_sym(const void *ap, const void *bp)
54 | {
55 | 	const struct sym *a = ap;
56 | 	const struct sym *b = bp;
57 | 	if (a->val >= b->val && a->val < b->val + b->size)
58 | 		return 0;
59 | 	if (b->val >= a->val && b->val < a->val + a->size)
60 | 		return 0;
61 | 	return a->val - b->val;
62 | }
63 | 
64 | struct sym *findsym(unsigned long val)
65 | {
66 | 	struct symtab *st;
67 | 	struct sym search = { .val = val }, *s;
68 | 	for (st = symtabs; st; st = st->next) {
69 | 		s = bsearch(&search, st->syms,  st->num, sizeof(struct sym), cmp_sym);
70 | 		if (s)
71 | 			return s;
72 | 	}
73 | 	return NULL;
74 | }
75 | 
76 | void dump_symtab(struct symtab *st)
77 | {
78 | 	int j;
79 | 	for (j = 0; j < st->num; j++) {
80 | 		struct sym *s = &st->syms[j];
81 | 		if (s->val && s->name[0])
82 | 			printf("%lx %s\n", s->val, s->name);
83 | 	}
84 | }
85 | 
86 | void sort_symtab(struct symtab *st)
87 | {
88 | 	qsort(st->syms, st->num, sizeof(struct sym), cmp_sym);
89 | }
90 | 


--------------------------------------------------------------------------------
/simple-pebs/symtab.h:
--------------------------------------------------------------------------------
 1 | struct sym {
 2 | 	char *name;
 3 | 	unsigned long val;
 4 | 	unsigned long size;
 5 | 	unsigned long hits;
 6 | 	struct sym *link;
 7 | };
 8 | 
 9 | struct symtab {
10 | 	struct symtab *next;
11 | 	unsigned num;
12 | 	struct sym *syms;
13 | };
14 | 
15 | extern struct symtab *symtabs;
16 | 
17 | struct sym *findsym(unsigned long val);
18 | struct symtab *add_symtab(unsigned num);
19 | void dump_symtab(struct symtab *st);
20 | void sort_symtab(struct symtab *st);
21 | 


--------------------------------------------------------------------------------
/test-uncore.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "Unit": "CBO",
 4 |     "EventCode": "0x34",
 5 |     "UMask": "0x9",
 6 |     "EventName": "UNC_C_LLC_LOOKUP.REMOTE_SNOOP",
 7 |     "Description": "tbd",
 8 |     "Counter": "0,1",
 9 |     "MSRValue": "0",
10 |     "Filter": "CBoFilter[22:18]",
11 |     "Internal": "0"
12 |   },
13 |   {
14 |     "Unit": "HA",
15 |     "EventCode": "0xc",
16 |     "UMask": "0x2",
17 |     "EventName": "UNC_H_DIRECTORY_LOOKUP.NO_SNP",
18 |     "Description": "tbd",
19 |     "Counter": "0,1,2,3",
20 |     "MSRValue": "0",
21 |     "Filter": "null",
22 |     "Internal": "0"
23 |   },
24 |   {
25 |     "Unit": "iMC",
26 |     "EventCode": "0x5",
27 |     "UMask": "0x4",
28 |     "EventName": "UNC_M_DRAM_REFRESH.HIGH",
29 |     "Description": "tbd",
30 |     "Counter": "0,1,2,3",
31 |     "MSRValue": "0",
32 |     "Filter": "null",
33 |     "Internal": "0"
34 |   },
35 |   {
36 |     "Unit": "UBOX",
37 |     "EventCode": "0x0",
38 |     "UMask": "0x0",
39 |     "EventName": "UNC_U_CLOCKTICKS",
40 |     "Description": "tbd",
41 |     "Counter": "0",
42 |     "MSRValue": "0",
43 |     "Filter": "null",
44 |     "Internal": "0"
45 |   }
46 | ]
47 | 


--------------------------------------------------------------------------------
/tl-serve:
--------------------------------------------------------------------------------
1 | tl-serve.py


--------------------------------------------------------------------------------
/tl_io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020, Intel Corporation
  2 | # Author: Andi Kleen
  3 | #
  4 | # This program is free software; you can redistribute it and/or modify it
  5 | # under the terms and conditions of the GNU General Public License,
  6 | # version 2, as published by the Free Software Foundation.
  7 | #
  8 | # This program is distributed in the hope it will be useful, but WITHOUT
  9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 10 | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 11 | # more details.
 12 | from __future__ import print_function
 13 | import sys
 14 | import subprocess
 15 | import os
 16 | import argparse
 17 | if sys.version_info.major == 3:
 18 |     from typing import Set # noqa
 19 | 
 20 | if sys.version_info.major == 3:
 21 |     popentext = dict(universal_newlines=True)
 22 | else:
 23 |     popentext = {}
 24 | 
 25 | def popen_stdout(cmd):
 26 |     return subprocess.Popen(cmd, stdout=subprocess.PIPE, **popentext) # type: ignore
 27 | 
 28 | def popen_stdinout(cmd, f):
 29 |     return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=f, **popentext) # type: ignore
 30 | 
 31 | def flex_open_r(fn):
 32 |     if fn.endswith(".xz"):
 33 |         xz = popen_stdout(["xz", "-d", "--stdout", fn])
 34 |         return xz.stdout
 35 |     if fn.endswith(".gz"):
 36 |         gzip = popen_stdout(["gzip", "-d", "-c", fn])
 37 |         return gzip.stdout
 38 |     if fn.endswith(".zst"):
 39 |         return popen_stdout(["zstd", "-d", "--stdout", fn]) .stdout
 40 |     return open(fn, 'r')
 41 | 
 42 | def flex_open_w(fn):
 43 |     f = open(fn, "w")
 44 |     if fn.endswith(".xz"):
 45 |         xz = popen_stdinout(["xz", "-z", "--stdout"], f)
 46 |         return xz.stdin
 47 |     if fn.endswith(".gz"):
 48 |         gzip = popen_stdinout(["gzip", "-c"], f)
 49 |         return gzip.stdin
 50 |     if fn.endswith(".zst"):
 51 |         return popen_stdinout(["zstd", "--stdout"], f).stdin
 52 |     return f
 53 | 
 54 | tl_tester = os.getenv("TL_TESTER")
 55 | test_mode = tl_tester and tl_tester != "0"
 56 | 
 57 | args = argparse.Namespace()
 58 | 
 59 | def set_args(a):
 60 |     global args
 61 |     args = a
 62 | 
 63 | def warn_no_assert(msg):
 64 |     if not args.quiet:
 65 |         print("warning: " + msg, file=sys.stderr)
 66 | 
 67 | def warn_test(msg):
 68 |     if test_mode:
 69 |         warn_no_assert(msg)
 70 | 
 71 | def warn(msg):
 72 |     warn_no_assert(msg)
 73 |     if test_mode:
 74 |         assert 0, msg
 75 | 
 76 | warned = set() # type: Set[str]
 77 | 
 78 | def warn_once_no_assert(msg):
 79 |     if msg not in warned and not args.quiet:
 80 |         print("warning: " + msg, file=sys.stderr)
 81 |         warned.add(msg)
 82 | 
 83 | def warn_once(msg):
 84 |     warn_once_no_assert(msg)
 85 |     if test_mode:
 86 |         assert 0, msg
 87 | 
 88 | def print_once(msg):
 89 |     if msg not in warned and not args.quiet:
 90 |         print(msg)
 91 |         warned.add(msg)
 92 | 
 93 | def inform(msg):
 94 |     if not args.quiet:
 95 |         print(msg)
 96 | 
 97 | def debug_print(x):
 98 |     if args.debug:
 99 |         print(x, file=sys.stderr)
100 | 
101 | def obj_debug_print(obj, x):
102 |     if args.debug or (args.dfilter and obj.name in args.dfilter):
103 |         print(x, file=sys.stderr)
104 | 
105 | def test_debug_print(x):
106 |     if args.debug or test_mode:
107 |         print(x, file=sys.stderr)
108 | 


--------------------------------------------------------------------------------
/tl_stat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2012-2020, Intel Corporation
 2 | # Author: Andi Kleen
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify it
 5 | # under the terms and conditions of the GNU General Public License,
 6 | # version 2, as published by the Free Software Foundation.
 7 | #
 8 | # This program is distributed in the hope it will be useful, but WITHOUT
 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11 | # more details.
12 | #
13 | # Maintain error data on perf measurements
14 | from __future__ import print_function
15 | import math
16 | from collections import namedtuple
17 | from tl_io import warn, warn_test, inform
18 | 
19 | ValStat = namedtuple('ValStat', ['stddev', 'multiplex'])
20 | 
21 | def geoadd(l):
22 |     return math.sqrt(sum([x**2 for x in l]))
23 | 
24 | # use geomean of stddevs and minimum of multiplex ratios for combining
25 | # XXX better way to combine multiplex ratios?
26 | def combine_valstat(l):
27 |     if not l:
28 |         return []
29 |     return ValStat(geoadd([x.stddev for x in l]), min([x.multiplex for x in l]))
30 | 
31 | class ComputeStat:
32 |     """Maintain statistics on measurement data."""
33 |     def __init__(self, quiet):
34 |         self.referenced = set()
35 |         self.already_warned = set()
36 |         self.errcount = 0
37 |         self.errors = set()
38 |         self.prev_errors = set()
39 |         self.mismeasured = set()
40 |         self.prev_mismeasured = set()
41 |         self.quiet = quiet
42 | 
43 |     def referenced_check(self, res, evnum):
44 |         referenced = self.referenced
45 |         referenced = referenced - self.already_warned
46 |         if not referenced:
47 |             return
48 |         self.already_warned |= referenced
49 | 
50 |         # sanity check: did we reference all results?
51 |         if len(res.keys()) > 0:
52 |             r = res[list(res.keys())[0]]
53 |             if len(r) != len(evnum):
54 |                 warn("results len %d does not match event len %d" % (len(r), len(evnum)))
55 |                 return
56 |             if len(referenced) != len(r):
57 |                 dummies = {i for i, d in enumerate(evnum) if d == "dummy"}
58 |                 notr = set(range(len(r))) - referenced - dummies
59 |                 if notr:
60 |                     warn_test("%d results not referenced: " % (len(notr)) +
61 |                           " ".join(["%d" % x for x in sorted(notr)]))
62 | 
63 |     def compute_errors(self):
64 |         if self.errcount > 0 and self.errors != self.prev_errors:
65 |             inform(("%d nodes had zero counts: " % (self.errcount)) +
66 |                            " ".join(sorted(self.errors)))
67 |             self.errcount = 0
68 |             self.prev_errors = self.errors
69 |             self.errors = set()
70 |         if self.mismeasured and self.mismeasured > self.prev_mismeasured:
71 |             inform("Mismeasured (out of bound values):" +
72 |                     " ".join(sorted(self.mismeasured)))
73 |             self.prev_mismeasured = self.mismeasured
74 | 


--------------------------------------------------------------------------------
/tldata.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import csv
 3 | import re
 4 | from collections import defaultdict
 5 | import gen_level
 6 | 
 7 | class TLData:
 8 |     """Read a toplev output CSV file.
 9 | 
10 |    Exported:
11 |     times[n] All time stamps
12 |     vals[n]  All values, as dicts mapping (name, cpu)->float
13 |     levels{name} All levels (includes metrics), name->list of fields
14 |     units{name}  All units, name->unit
15 |     headers(set) All headers (including metrics)
16 |     metrics(set) All metrics
17 |     helptxt[col] All help texts.
18 |     cpus(set)    All CPUs
19 |     """
20 | 
21 |     def __init__(self, fn, verbose=False):
22 |         self.times = []
23 |         self.vals = []
24 |         self.fn = fn
25 |         self.levels = defaultdict(set)
26 |         self.metrics = set()
27 |         self.headers = set()
28 |         self.mtime = None
29 |         self.helptxt = {}
30 |         self.cpus = set()
31 |         self.verbose = verbose
32 |         self.units = {}
33 | 
34 |     def update(self):
35 |         mtime = os.path.getmtime(self.fn)
36 |         if self.mtime == mtime:
37 |             return
38 |         self.mtime = mtime
39 |         csvf = csv.reader(open(self.fn, 'r'))
40 |         prevts = None
41 |         val = {}
42 |         for r in csvf:
43 |             if r[0].strip().startswith("#"):
44 |                 continue
45 |             if r[0] == "Timestamp" or r[0] == "CPUs":
46 |                 continue
47 |             # 1.001088024,C1,Frontend_Bound,42.9,% Slots,,frontend_retired.latency_ge_4:pp,0.0,100.0,<==,Y
48 |             if re.match(r'[CS]?\d+.*', r[1]):
49 |                 ts, cpu, name, pct, unit, helptxt = r[0], r[1], r[2], r[3], r[4], r[5]
50 |             else:
51 |                 ts, name, pct, unit, helptxt = r[0], r[1], r[2], r[3], r[4]
52 |                 cpu = None
53 |             key = (name, cpu)
54 |             ts, pct = float(ts), float(pct.replace("%", ""))
55 |             if name not in self.helptxt or self.helptxt[name] == "":
56 |                 self.helptxt[name] = helptxt
57 |             if unit.endswith("<"):
58 |                 unit = unit[:-2]
59 |                 if not self.verbose:
60 |                     continue
61 |             self.units[name] = unit
62 |             if prevts and ts != prevts:
63 |                 self.times.append(prevts)
64 |                 self.vals.append(val)
65 |                 val = {}
66 |             val[key] = pct
67 |             n = gen_level.level_name(name)
68 |             if cpu:
69 |                 self.cpus.add(cpu)
70 |             self.headers.add(name)
71 |             if gen_level.is_metric(name):
72 |                 self.metrics.add(n)
73 |             self.levels[n].add(name)
74 |             prevts = ts
75 |         if len(val.keys()) > 0:
76 |             self.times.append(prevts)
77 |             self.vals.append(val)
78 | 
79 | early_plots = ["TopLevel", "CPU utilization", "Power", "Frequency", "CPU-METRIC"]
80 | 
81 | def sort_key(i, data):
82 |     if i in early_plots:
83 |         return early_plots.index(i)
84 |     if i in data.metrics:
85 |         return 30
86 |     return list(data.levels.keys()).index(i)
87 | 
88 | def level_order(data):
89 |     """Return plot order of all levels."""
90 |     return sorted(data.levels.keys(), key=lambda a: sort_key(a, data))
91 | 


--------------------------------------------------------------------------------
/topdown-csv/mock.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Do basic python sanity check of translation output
 3 | import sys
 4 | sys.path.append(".")
 5 | import t
 6 | l = []
 7 | m = []
 8 | 
 9 | def pev(e):
10 |     print("\t",e)
11 |     return 1
12 | 
13 | class R:
14 |     def run(self, p):
15 |         #print p
16 |         l.append(p)
17 |     def metric(self, p):
18 |         m.append(p)
19 | 
20 | t.Setup(R())
21 | for p in l:
22 |     p.thresh = True
23 | for p in l:
24 |     print(p.name)
25 |     p.compute(lambda e, level: pev(e))
26 |     if p.sample:
27 |         print("    Sample:", " ".join(p.sample))
28 |     if p.sibling:
29 |         print("    Siblings:", " ".join([o.name for o in p.sibling]))
30 | 
31 | for p in m:
32 |     print(p.name)
33 |     p.compute(lambda e, level: pev(e))
34 | 
35 | 


--------------------------------------------------------------------------------
/toplev:
--------------------------------------------------------------------------------
1 | toplev.py


--------------------------------------------------------------------------------
/toplev.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andikleen/pmu-tools/65e4504d384aadc24ddfe4aabb50eaf62b7f294d/toplev.ico


--------------------------------------------------------------------------------
/topology:
--------------------------------------------------------------------------------
1 | /sys/bus/event_source/devices/uncore_ha
2 | /sys/bus/event_source/devices/uncore_ha/format/umask
3 | 


--------------------------------------------------------------------------------
/tsx_metrics.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # TSX metrics
 3 | #
 4 | 
 5 | # XXX force all these into a single group
 6 | # XXX: force % in caller
 7 | 
 8 | import os
 9 | 
10 | def TXCycles(EV, level):
11 |     return EV("cpu/cycles-t/", level) / EV("cycles", level)
12 | 
13 | class TransactionalCycles:
14 |     name = "Transactional cycles"
15 |     desc = """
16 | Percent cycles spent in a transaction. When low or zero either the program
17 | does not use locks (or other transactions), or the locks are not enabled with lock elision."""
18 |     subplot = "TSX"
19 |     unit = "%"
20 |     sample = ["mem_uops_retired.lock_loads"]
21 |     server = True
22 |     def compute(self, EV):
23 |         try:
24 |             self.val = TXCycles(EV, 1) * 100.
25 |             self.thresh = (self.val >= 0.01)
26 |         except ZeroDivisionError:
27 |             self.val = 0
28 |             self.thresh = False
29 | 
30 | class AbortedCycles:
31 |     name = "Aborted cycles"
32 |     desc = """
33 | Percent cycles wasted in transaction aborts. When a significant part of the transactional cycles
34 | start sampling for abort causes."""
35 |     subplot = "TSX"
36 |     unit = "%"
37 |     sample = ["cpu/tx-abort/pp", "cpu/hle-abort/pp"]
38 |     server = True
39 |     def compute(self, EV):
40 |         try:
41 |             self.val = ((EV("cpu/cycles-t/", 1) - EV("cpu/cycles-ct/", 1)) / EV("cycles", 1)) * 100.
42 |             self.thresh = (self.val >= 0.01)
43 |         except ZeroDivisionError:
44 |             self.val = 0
45 |             self.thresh = False
46 | 
47 | class AverageRTM:
48 |     name = "Average RTM transaction length"
49 |     desc = """
50 | Average RTM transaction length. Assumes most transactions are RTM.
51 | When low consider increasing the size of the critical sections to lower overhead."""
52 |     subplot = "TSX Latencies"
53 |     unit = "cycles"
54 |     server = True
55 |     def compute(self, EV):
56 |         try:
57 |             self.val = EV("cpu/cycles-t/", 1) / EV("RTM_RETIRED.START", 1)
58 |             self.thresh = TXCycles(EV, 1) >= 0.01 and self.val > 0
59 |         except ZeroDivisionError:
60 |             self.val = 0
61 |             self.thresh = False
62 | 
63 | class AverageHLE:
64 |     name = "Average HLE transaction length"
65 |     desc = """
66 | Average HLE transaction length. Assumes most transactions are HLE.
67 | When low consider increasing the size of the critical sections to lower overhead."""
68 |     subplot = "TSX Latencies"
69 |     unit = "cycles"
70 |     def compute(self, EV):
71 |         try:
72 |             self.val = EV("cpu/cycles-t/", 1) / EV("HLE_RETIRED.START", 1)
73 |             self.thresh = TXCycles(EV, 1) >= 0.01 and self.val > 0
74 |         except ZeroDivisionError:
75 |             self.val = 0
76 |             self.thresh = False
77 | 
78 | class Setup:
79 |     def __init__(self, r):
80 |         # XXX allow override
81 |         if os.path.exists("/sys/bus/event_source/devices/cpu/events/cycles-t"):
82 |             r.force_metric(TransactionalCycles())
83 |             r.force_metric(AbortedCycles())
84 |             r.force_metric(AverageRTM())
85 |             #r.force_metric(AverageHLE())
86 | 


--------------------------------------------------------------------------------
/ucevent/CHECK-ALL:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # run all events for 2 seconds each
3 | # requires GNU parallel (http://ftp.gnu.org/gnu/parallel/)
4 | ./ucevent.py --unsupported --broken --name-only > all-events
5 | parallel --no-notice --halt 1 "$@" -n 1 -k < all-events sh -c "echo "EVENT" {} ; $WRAP ./ucevent.py -v {} sleep 2.2"
6 | 


--------------------------------------------------------------------------------
/ucevent/CHECK-DERIVED:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # requires GNU parallel (http://ftp.gnu.org/gnu/parallel/)
3 | ./ucevent.py --name-only > derived-events
4 | parallel --no-notice --halt 1 "$@" -n 1 -k < derived-events sh -c "echo "EVENT" {} ; $WRAP ./ucevent.py -v {} sleep 2.2"
5 | 


--------------------------------------------------------------------------------
/ucevent/JKT:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | FORCECPU=jkt ./ucevent.py --mock "$@"
3 | 


--------------------------------------------------------------------------------
/ucevent/MOCK-ALL:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | export FORCECPU=${FORCECPU:-hsx}
3 | ./ucevent.py --mock --unsupported --broken --name-only > all-events
4 | parallel --halt 1 --no-notice "$@" -n 1 -k < all-events sh -c "echo "EVENT" {} ; $WRAP ./ucevent.py --mock -v {}"
5 | 
6 | 


--------------------------------------------------------------------------------
/ucevent/Makefile:
--------------------------------------------------------------------------------
1 | ucevent.man: ucevent.py ucevent.s
2 | 	FORCECPU=jkt help2man --version-string=" " -i ucevent.s -N ./ucevent.py > ucevent.man
3 | 
4 | ucevent.s: README.md
5 | 	./md2hman.py < README.md > ucevent.s
6 | 
7 | clean:
8 | 	rm -f ucevent.s
9 | 


--------------------------------------------------------------------------------
/ucevent/RUN-ALL:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # MOCK=1 don't run perf
 3 | # WRAP=... python wrapper
 4 | 
 5 | set -e
 6 | 
 7 | CPULIST="${CPULIST:-jkt ivt hsx bdxde bdx skx icx}"
 8 | 
 9 | for cpu in $CPULIST ; do
10 | 
11 | export FORCECPU=$cpu
12 | echo $cpu
13 | 
14 | EXTRA=""
15 | [ -z "$MOCK" ] && EXTRA=CHECK-*
16 | 
17 | for i in $EXTRA SANITY-ALL MOCK-ALL ; do
18 | 	echo $i 
19 | 	./$i
20 | 	echo STATUS $?
21 | done
22 | 
23 | ./uctester
24 | 
25 | 
26 | done
27 | 


--------------------------------------------------------------------------------
/ucevent/SANITY-ALL:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # run all events for 2 seconds each
3 | # requires GNU parallel (http://ftp.gnu.org/gnu/parallel/)
4 | EXTRA=""
5 | [ -n "$MOCK" ] && EXTRA=--mock
6 | ./ucevent.py --unsupported --broken --name-only > all-events
7 | parallel --no-notice --halt 1 "$@" -n 1 -k < all-events sh -c "echo "EVENT" {} ; $WRAP ./sanity-run.py $EXTRA -v {} sleep 2.2"
8 | 


--------------------------------------------------------------------------------
/ucevent/bdx_extra.py:
--------------------------------------------------------------------------------
1 | jkt_extra.py


--------------------------------------------------------------------------------
/ucevent/bdxde_extra.py:
--------------------------------------------------------------------------------
1 | # empty for now
2 | 
3 | extra_derived = {}
4 | 


--------------------------------------------------------------------------------
/ucevent/dygraph-out.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | <title>ucevent output</title>
 4 | <script type="text/javascript"
 5 |   src="http://dygraphs.com/dygraph-combined.js"></script>
 6 | </head>
 7 | <body>
 8 | <div id="graphdiv2"
 9 |   style="width:1000px; height:500px;"></div>
10 | 
11 | <!-- XXX auto generate the labels with right names/numbers -->
12 | <input type=checkbox id=0 onClick="metricchange(this)" checked>
13 | <label for="0"> Metric 1 </label>
14 | <input type=checkbox id=1 onClick="metricchange(this)" checked>
15 | <label for="1"> Metric 2 </label>
16 | <input type=checkbox id=2 onClick="metricchange(this)" checked>
17 | <label for="2"> Metric 3 </label>
18 | <input type=checkbox id=2 onClick="metricchange(this)" checked>
19 | <label for="3"> Metric 4 </label>
20 | <input type=checkbox id=2 onClick="metricchange(this)" checked>
21 | <label for="4"> Metric 5 </label>
22 | <input type=checkbox id=2 onClick="metricchange(this)" checked>
23 | <label for="5"> Metric 6 </label>
24 | <input type=checkbox id=2 onClick="metricchange(this)" checked>
25 | <label for="6"> Metric 7 </label>
26 | 
27 | <script type="text/javascript">
28 |   g2 = new Dygraph(
29 |     document.getElementById("graphdiv2"),
30 |     "out.csv",
31 |     { 
32 |     }
33 |   );
34 | 
35 | function metricchange(el) {
36 |   g2.setVisibility(el.id, el.checked);
37 | }
38 | 
39 | </script>
40 | </body>
41 | </html>
42 | 


--------------------------------------------------------------------------------
/ucevent/hsx_extra.py:
--------------------------------------------------------------------------------
1 | # empty for now
2 | 
3 | extra_derived = {}
4 | 


--------------------------------------------------------------------------------
/ucevent/icx_extra.py:
--------------------------------------------------------------------------------
1 | extra_derived = { }
2 | 


--------------------------------------------------------------------------------
/ucevent/ivt_extra.py:
--------------------------------------------------------------------------------
1 | jkt_extra.py


--------------------------------------------------------------------------------
/ucevent/md2hman.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # convert README.md to include files for help2man
 3 | from __future__ import print_function
 4 | import sys
 5 | import re
 6 | 
 7 | skip_sections = ( "Command Line options reference", "Debugging and testing", "Support",
 8 |           "Author", "Other Projects providing uncore monitoring")
 9 | 
10 | tabmode = False
11 | skip = False
12 | for l in sys.stdin:
13 |     after = ""
14 |     l = l.rstrip()
15 |     if l and l[0] == '#':
16 |         skip = False
17 |         if l[2:] in skip_sections:
18 |             skip = True
19 |             continue
20 |         print("[%s]" % (l[2:]))
21 |         continue
22 |     elif l == "" and not skip:
23 |         print(".PP")
24 |         tabmode = False
25 |         continue
26 |     if skip:
27 |         continue
28 |     if l and l[0] == '\t' and l[1:]:
29 |         if not tabmode:
30 |             print(".nf\n.sp")
31 |             tabmode = True
32 |         #print(".I ", end=" ")
33 |     elif tabmode:
34 |         after = ".fi"
35 |         tabmode = False
36 |     if l and l[0] == '-':
37 |         print(".TP")
38 |         l = l[2:]
39 |     if l and l[0:2] == "**":
40 |         print(".B ", end=" ")
41 |         l = l.replace("**","")
42 |     if l and l[0] == '[':
43 |         m = re.match(r"\[(.*)\]\s*\((.*)\)(.*)", l)
44 |         #l = '.URL "%s" "%s"\n%s' % (m.group(2), m.group(1), m.group(3))
45 |         l = m.group(2) + " " + m.group(1) + " " + m.group(3)
46 |     print(l)
47 |     if after:
48 |         print(after)
49 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.10/0002-perf-Add-sysfs-entry-to-adjust-multiplexing-interval.patch:
--------------------------------------------------------------------------------
  1 | From 4341acbf37e5eac1607ab4aa59e6ec941f9ee7b7 Mon Sep 17 00:00:00 2001
  2 | From: Stephane Eranian <eranian@google.com>
  3 | Date: Wed, 3 Apr 2013 14:21:34 +0200
  4 | Subject: [PATCH 2/5] perf: Add sysfs entry to adjust multiplexing interval
  5 |  per PMU
  6 | 
  7 | This patch adds /sys/device/xxx/perf_event_mux_interval_ms to ajust
  8 | the multiplexing interval per PMU. The unit is milliseconds. Value has
  9 | to be >= 1.
 10 | 
 11 | In the 4th version, we renamed the sysfs file to be more consistent
 12 | with the other /proc/sys/kernel entries for perf_events.
 13 | 
 14 | In the 5th version, we handle the reprogramming of the hrtimer using
 15 | hrtimer_forward_now(). That way, we sync up to new timer value quickly
 16 | (suggested by Jiri Olsa).
 17 | 
 18 | Signed-off-by: Stephane Eranian <eranian@google.com>
 19 | Signed-off-by: Peter Zijlstra <peterz@infradead.org>
 20 | Cc: Frederic Weisbecker <fweisbec@gmail.com>
 21 | Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
 22 | Link: http://lkml.kernel.org/r/1364991694-5876-3-git-send-email-eranian@google.com
 23 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
 24 | ---
 25 |  include/linux/perf_event.h |    1 +
 26 |  kernel/events/core.c       |   63 +++++++++++++++++++++++++++++++++++++++++---
 27 |  2 files changed, 60 insertions(+), 4 deletions(-)
 28 | 
 29 | diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
 30 | index 3012c54..f42e8fb 100644
 31 | --- a/include/linux/perf_event.h
 32 | +++ b/include/linux/perf_event.h
 33 | @@ -194,6 +194,7 @@ struct pmu {
 34 |  	int * __percpu			pmu_disable_count;
 35 |  	struct perf_cpu_context * __percpu pmu_cpu_context;
 36 |  	int				task_ctx_nr;
 37 | +	int				hrtimer_interval_ms;
 38 |  
 39 |  	/*
 40 |  	 * Fully disable/enable this PMU, can be used to protect from the PMI
 41 | diff --git a/kernel/events/core.c b/kernel/events/core.c
 42 | index 71a39cc..5218771 100644
 43 | --- a/kernel/events/core.c
 44 | +++ b/kernel/events/core.c
 45 | @@ -720,13 +720,21 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
 46 |  {
 47 |  	struct hrtimer *hr = &cpuctx->hrtimer;
 48 |  	struct pmu *pmu = cpuctx->ctx.pmu;
 49 | +	int timer;
 50 |  
 51 |  	/* no multiplexing needed for SW PMU */
 52 |  	if (pmu->task_ctx_nr == perf_sw_context)
 53 |  		return;
 54 |  
 55 | -	cpuctx->hrtimer_interval =
 56 | -		ns_to_ktime(NSEC_PER_MSEC * PERF_CPU_HRTIMER);
 57 | +	/*
 58 | +	 * check default is sane, if not set then force to
 59 | +	 * default interval (1/tick)
 60 | +	 */
 61 | +	timer = pmu->hrtimer_interval_ms;
 62 | +	if (timer < 1)
 63 | +		timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
 64 | +
 65 | +	cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
 66 |  
 67 |  	hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 68 |  	hr->function = perf_cpu_hrtimer_handler;
 69 | @@ -6076,9 +6084,56 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
 70 |  	return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
 71 |  }
 72 |  
 73 | +static ssize_t
 74 | +perf_event_mux_interval_ms_show(struct device *dev,
 75 | +				struct device_attribute *attr,
 76 | +				char *page)
 77 | +{
 78 | +	struct pmu *pmu = dev_get_drvdata(dev);
 79 | +
 80 | +	return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
 81 | +}
 82 | +
 83 | +static ssize_t
 84 | +perf_event_mux_interval_ms_store(struct device *dev,
 85 | +				 struct device_attribute *attr,
 86 | +				 const char *buf, size_t count)
 87 | +{
 88 | +	struct pmu *pmu = dev_get_drvdata(dev);
 89 | +	int timer, cpu, ret;
 90 | +
 91 | +	ret = kstrtoint(buf, 0, &timer);
 92 | +	if (ret)
 93 | +		return ret;
 94 | +
 95 | +	if (timer < 1)
 96 | +		return -EINVAL;
 97 | +
 98 | +	/* same value, noting to do */
 99 | +	if (timer == pmu->hrtimer_interval_ms)
100 | +		return count;
101 | +
102 | +	pmu->hrtimer_interval_ms = timer;
103 | +
104 | +	/* update all cpuctx for this PMU */
105 | +	for_each_possible_cpu(cpu) {
106 | +		struct perf_cpu_context *cpuctx;
107 | +		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
108 | +		cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
109 | +
110 | +		if (hrtimer_active(&cpuctx->hrtimer))
111 | +			hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
112 | +	}
113 | +
114 | +	return count;
115 | +}
116 | +
117 | +#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
118 | +
119 |  static struct device_attribute pmu_dev_attrs[] = {
120 | -       __ATTR_RO(type),
121 | -       __ATTR_NULL,
122 | +	__ATTR_RO(type),
123 | +	__ATTR_RW(perf_event_mux_interval_ms),
124 | +	__ATTR_NULL,
125 |  };
126 |  
127 |  static int pmu_bus_running;
128 | -- 
129 | 1.7.7.6
130 | 
131 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.10/0004-per-socket-fix.patch:
--------------------------------------------------------------------------------
 1 | From 525c62bc4417f988aeb3b941ac0dfad04fec39ee Mon Sep 17 00:00:00 2001
 2 | From: Stephane Eranian <eranian@google.com>
 3 | Date: Tue, 9 Jul 2013 12:03:47 -0700
 4 | Subject: [PATCH 4/5] --per-socket fix
 5 | 
 6 | ---
 7 |  tools/perf/builtin-stat.c |    7 ++++---
 8 |  1 files changed, 4 insertions(+), 3 deletions(-)
 9 | 
10 | diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
11 | index 7e910ba..2adf8cf 100644
12 | --- a/tools/perf/builtin-stat.c
13 | +++ b/tools/perf/builtin-stat.c
14 | @@ -924,7 +924,7 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
15 |  static void print_aggr(char *prefix)
16 |  {
17 |  	struct perf_evsel *counter;
18 | -	int cpu, s, s2, id, nr;
19 | +	int cpu, cpu2, s, s2, id, nr;
20 |  	u64 ena, run, val;
21 |  
22 |  	if (!(aggr_map || aggr_get_id))
23 | @@ -936,7 +936,8 @@ static void print_aggr(char *prefix)
24 |  			val = ena = run = 0;
25 |  			nr = 0;
26 |  			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
27 | -				s2 = aggr_get_id(evsel_list->cpus, cpu);
28 | +				cpu2 = perf_evsel__cpus(counter)->map[cpu];
29 | +				s2 = aggr_get_id(evsel_list->cpus, cpu2);
30 |  				if (s2 != id)
31 |  					continue;
32 |  				val += counter->counts->cpu[cpu].val;
33 | @@ -948,7 +949,7 @@ static void print_aggr(char *prefix)
34 |  				fprintf(output, "%s", prefix);
35 |  
36 |  			if (run == 0 || ena == 0) {
37 | -				aggr_printout(counter, cpu, nr);
38 | +				aggr_printout(counter, id, nr);
39 |  
40 |  				fprintf(output, "%*s%s%*s",
41 |  					csv_output ? 0 : 18,
42 | -- 
43 | 1.7.7.6
44 | 
45 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.10/0005-support-pcu-extsel.patch:
--------------------------------------------------------------------------------
 1 | From 485a1208f27bad121685388f966856c5fe45849d Mon Sep 17 00:00:00 2001
 2 | From: "Yan, Zheng" <zheng.z.yan@intel.com>
 3 | Date: Fri, 12 Jul 2013 19:36:03 -0700
 4 | Subject: [PATCH] support pcu extsel
 5 | 
 6 | ---
 7 |  arch/x86/kernel/cpu/perf_event_intel_uncore.c |    2 +-
 8 |  arch/x86/kernel/cpu/perf_event_intel_uncore.h |    1 +
 9 |  2 files changed, 2 insertions(+), 1 deletions(-)
10 | 
11 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
12 | index 19f0cb4..036ccfd 100644
13 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
14 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
15 | @@ -285,7 +285,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
16 |  };
17 |  
18 |  static struct attribute *snbep_uncore_pcu_formats_attr[] = {
19 | -	&format_attr_event.attr,
20 | +	&format_attr_event_ext.attr,
21 |  	&format_attr_occ_sel.attr,
22 |  	&format_attr_edge.attr,
23 |  	&format_attr_inv.attr,
24 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
25 | index ac77a7b..669fcc5 100644
26 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
27 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
28 | @@ -110,6 +110,7 @@
29 |  				(SNBEP_PMON_CTL_EV_SEL_MASK | \
30 |  				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
31 |  				 SNBEP_PMON_CTL_EDGE_DET | \
32 | +				 SNBEP_PMON_CTL_EV_SEL_EXT | \
33 |  				 SNBEP_PMON_CTL_INVERT | \
34 |  				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
35 |  				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
36 | -- 
37 | 1.7.7.6
38 | 
39 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.10/0006-add-masks.patch:
--------------------------------------------------------------------------------
 1 | commit 3840e75a2385a71cf9f0916fe69db2ea936f9cdc
 2 | Author: Andi Kleen <ak@linux.intel.com>
 3 | Date:   Mon Jul 15 15:00:08 2013 -0700
 4 | 
 5 |     Add mask{0,1}, mask{0,1}
 6 | 
 7 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
 8 | index 6329563..5122385 100644
 9 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
10 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
11 | @@ -54,6 +54,8 @@ DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
12 |  DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
13 |  DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
14 |  DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
15 | +DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
16 | +DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
17 |  DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
18 |  DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
19 |  DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
20 | @@ -61,7 +63,8 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
21 |  DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
22 |  DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
23 |  DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
24 | -
25 | +DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
26 | +DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
27 |  
28 |  static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
29 |  {
30 | @@ -325,6 +328,8 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
31 |  	&format_attr_match_mc.attr,
32 |  	&format_attr_match_opc.attr,
33 |  	&format_attr_match_vnw.attr,
34 | +	&format_attr_match0.attr,
35 | +	&format_attr_match1.attr,
36 |  	&format_attr_mask_rds.attr,
37 |  	&format_attr_mask_rnid30.attr,
38 |  	&format_attr_mask_rnid4.attr,
39 | @@ -332,6 +337,8 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
40 |  	&format_attr_mask_mc.attr,
41 |  	&format_attr_mask_opc.attr,
42 |  	&format_attr_mask_vnw.attr,
43 | +	&format_attr_mask0.attr,
44 | +	&format_attr_mask1.attr,
45 |  	NULL,
46 |  };
47 |  
48 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.10/README:
--------------------------------------------------------------------------------
 1 | 
 2 | The following patches are needed for ucperf on top of Linux 3.10
 3 | Newer kernels may have these patches already integrated.
 4 | 
 5 | The patches affect both the perf binary and perf kernel code.
 6 | 
 7 | To apply
 8 | 
 9 | cd linux-3.10
10 | for i in PATCHDIR/00* ; do 
11 |     patch -p1 < $i 
12 | done
13 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.16/0002-perf-x86-uncore-register-the-PMU-only-if-the-uncore-.patch:
--------------------------------------------------------------------------------
 1 | From 26eadfb58bfb130254c04bff30fe2e2db775fd08 Mon Sep 17 00:00:00 2001
 2 | From: "Yan, Zheng" <zheng.z.yan@intel.com>
 3 | Date: Wed, 20 Aug 2014 09:50:58 +0800
 4 | Subject: [PATCH 2/2] perf/x86/uncore: register the PMU only if the uncore pci
 5 |  device exists
 6 | 
 7 | Current code registers PMUs for all possible uncore pci devices.
 8 | This is not good because, on some machines, one or more uncore pci
 9 | devices can be missing. The missing pci device make corresponding
10 | PMU unusable. Register the PMU only if the uncore device exists.
11 | 
12 | Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
13 | Signed-off-by: Andi Kleen <ak@linux.intel.com>
14 | 
15 | Conflicts:
16 | 	arch/x86/kernel/cpu/perf_event_intel_uncore.c
17 | ---
18 |  arch/x86/kernel/cpu/perf_event_intel_uncore.c | 19 +++++++++++--------
19 |  1 file changed, 11 insertions(+), 8 deletions(-)
20 | 
21 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
22 | index f538bcf..ab7c597 100644
23 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
24 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
25 | @@ -4382,6 +4382,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
26 |  	struct intel_uncore_box *box;
27 |  	struct intel_uncore_type *type;
28 |  	int phys_id;
29 | +	bool first_box = false;
30 |  
31 |  	phys_id = pcibus_to_physid[pdev->bus->number];
32 |  	if (phys_id < 0)
33 | @@ -4415,9 +4416,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
34 |  	pci_set_drvdata(pdev, box);
35 |  
36 |  	raw_spin_lock(&uncore_box_lock);
37 | +	if (list_empty(&pmu->box_list))
38 | +		first_box = true;
39 |  	list_add_tail(&box->list, &pmu->box_list);
40 |  	raw_spin_unlock(&uncore_box_lock);
41 |  
42 | +	if (first_box)
43 | +		uncore_pmu_register(pmu);
44 |  	return 0;
45 |  }
46 |  
47 | @@ -4426,6 +4431,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
48 |  	struct intel_uncore_box *box = pci_get_drvdata(pdev);
49 |  	struct intel_uncore_pmu *pmu;
50 |  	int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number];
51 | +	bool last_box = false;
52 |  
53 |  	box = pci_get_drvdata(pdev);
54 |  	if (!box) {
55 | @@ -4447,6 +4453,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
56 |  
57 |  	raw_spin_lock(&uncore_box_lock);
58 |  	list_del(&box->list);
59 | +	if (list_empty(&pmu->box_list))
60 | +		last_box = true;
61 |  	raw_spin_unlock(&uncore_box_lock);
62 |  
63 |  	for_each_possible_cpu(cpu) {
64 | @@ -4458,6 +4466,9 @@ static void uncore_pci_remove(struct pci_dev *pdev)
65 |  
66 |  	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
67 |  	kfree(box);
68 | +
69 | +	if (last_box)
70 | +		perf_pmu_unregister(&pmu->pmu);
71 |  }
72 |  
73 |  static int __init uncore_pci_init(void)
74 | @@ -4838,14 +4849,6 @@ static int __init uncore_pmus_register(void)
75 |  		}
76 |  	}
77 |  
78 | -	for (i = 0; pci_uncores[i]; i++) {
79 | -		type = pci_uncores[i];
80 | -		for (j = 0; j < type->num_boxes; j++) {
81 | -			pmu = &type->pmus[j];
82 | -			uncore_pmu_register(pmu);
83 | -		}
84 | -	}
85 | -
86 |  	return 0;
87 |  }
88 |  
89 | -- 
90 | 1.8.4.5
91 | 
92 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.16/0004-perf-x86-uncore-Add-missing-cbox-filter-flags-on-Ivy.patch:
--------------------------------------------------------------------------------
 1 | From 84082496a8e87327e4c6c486742436db5304b470 Mon Sep 17 00:00:00 2001
 2 | From: Andi Kleen <ak@linux.intel.com>
 3 | Date: Fri, 5 Sep 2014 07:17:41 -0700
 4 | Subject: [PATCH 1/3] perf, x86, uncore: Add missing cbox filter flags on
 5 |  IvyBridge-EP uncore driver
 6 | 
 7 | The IvyBridge-EP uncore driver was missing three filter flags:
 8 | NC, ISOC, C6 which are useful in some cases. Support them in the same way
 9 | as the Haswell EP driver, by allowing to set them and exposing
10 | them in the sysfs formats.
11 | 
12 | Also fix a typo in a define.
13 | 
14 | Relies on the Haswell EP driver to be applied earlier.
15 | 
16 | Signed-off-by: Andi Kleen <ak@linux.intel.com>
17 | ---
18 |  arch/x86/kernel/cpu/perf_event_intel_uncore.c | 9 ++++++++-
19 |  1 file changed, 8 insertions(+), 1 deletion(-)
20 | 
21 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
22 | index ab7c597..8de876a 100644
23 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
24 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
25 | @@ -1191,6 +1191,9 @@ static struct attribute *ivt_uncore_cbox_formats_attr[] = {
26 |  	&format_attr_filter_state2.attr,
27 |  	&format_attr_filter_nid2.attr,
28 |  	&format_attr_filter_opc2.attr,
29 | +	&format_attr_filter_nc.attr,
30 | +	&format_attr_filter_c6.attr,
31 | +	&format_attr_filter_isoc.attr,
32 |  	NULL,
33 |  };
34 |  
35 | @@ -1328,8 +1331,12 @@ static u64 ivt_cbox_filter_mask(int fields)
36 |  		mask |= IVT_CB0_MSR_PMON_BOX_FILTER_STATE;
37 |  	if (fields & 0x8)
38 |  		mask |= IVT_CB0_MSR_PMON_BOX_FILTER_NID;
39 | -	if (fields & 0x10)
40 | +	if (fields & 0x10) {
41 |  		mask |= IVT_CB0_MSR_PMON_BOX_FILTER_OPC;
42 | +		mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
43 | +		mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
44 | +		mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
45 | +	}
46 |  
47 |  	return mask;
48 |  }
49 | -- 
50 | 1.9.3
51 | 
52 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.16/0005-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy-Ivy-H.patch:
--------------------------------------------------------------------------------
 1 | From 9eb5f0f4827b37c8fb0b4a75df0987f88edc5232 Mon Sep 17 00:00:00 2001
 2 | From: Andi Kleen <ak@linux.intel.com>
 3 | Date: Fri, 5 Sep 2014 07:18:51 -0700
 4 | Subject: [PATCH 2/3] perf, x86, uncore: Fix PCU filter setup for
 5 |  Sandy/Ivy/Haswell EP
 6 | 
 7 | The PCU frequency band filters use 8 bit each in a register.
 8 | When setting up the value the shift value was not correctly
 9 | scaled, which resulted in all filters except for band 0 to
10 | be zero. Fix the scaling.
11 | 
12 | This allows to correctly monitor multiple uncore frequency bands.
13 | 
14 | Signed-off-by: Andi Kleen <ak@linux.intel.com>
15 | ---
16 |  arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
17 |  1 file changed, 1 insertion(+), 1 deletion(-)
18 | 
19 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
20 | index 8de876a..f5d6f95 100644
21 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
22 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
23 | @@ -835,7 +835,7 @@ static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *
24 |  	if (ev_sel >= 0xb && ev_sel <= 0xe) {
25 |  		reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
26 |  		reg1->idx = ev_sel - 0xb;
27 | -		reg1->config = event->attr.config1 & (0xff << reg1->idx);
28 | +		reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
29 |  	}
30 |  	return 0;
31 |  }
32 | -- 
33 | 1.9.3
34 | 
35 | 


--------------------------------------------------------------------------------
/ucevent/patches-3.16/0006-fixup-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy.patch:
--------------------------------------------------------------------------------
 1 | From 8074b652feb213fa32d45b27ba5b2fa44e548f64 Mon Sep 17 00:00:00 2001
 2 | From: Andi Kleen <ak@linux.intel.com>
 3 | Date: Fri, 5 Sep 2014 07:19:41 -0700
 4 | Subject: [PATCH 3/3] fixup! perf, x86, uncore: Fix PCU filter setup for
 5 |  Sandy/Ivy/Haswell EP
 6 | 
 7 | ---
 8 |  arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
 9 |  1 file changed, 1 insertion(+), 1 deletion(-)
10 | 
11 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
12 | index f5d6f95..c41cb1c 100644
13 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
14 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
15 | @@ -1919,7 +1919,7 @@ static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *
16 |  	if (ev_sel >= 0xb && ev_sel <= 0xe) {
17 |  		reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
18 |  		reg1->idx = ev_sel - 0xb;
19 | -		reg1->config = event->attr.config1 & (0xff << reg1->idx);
20 | +		reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
21 |  	}
22 |  	return 0;
23 |  }
24 | -- 
25 | 1.9.3
26 | 
27 | 


--------------------------------------------------------------------------------
/ucevent/sanity-run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # sanity check an event
 3 | # percent between 0 and 100%
 4 | # nothing negative
 5 | from __future__ import print_function
 6 | import sys
 7 | import os
 8 | # thanks python 3.13!
 9 | try:
10 |     from shlex import quote as cmd_quote
11 | except ImportError:
12 |     from pipes import quote as cmd_quote
13 | 
14 | logfile = "slog.%d" % (os.getpid())
15 | 
16 | s = "./ucevent.py -x, -o " + logfile + " " + " ".join(map(cmd_quote, sys.argv[1:]))
17 | w = os.getenv("WRAP")
18 | if w:
19 |     s = w + " " + s
20 | print(s)
21 | r = os.system(s)
22 | if r != 0:
23 |     print("ucevent failed", r)
24 |     sys.exit(1)
25 | 
26 | f = open(logfile, "r")
27 | fields = f.readline().strip().split(",")
28 | for l in f:
29 |     vals = l.strip().split(",")
30 |     for v, h in zip(vals, fields):
31 |         if fields == "timestamp":
32 |             continue
33 |         try:
34 |             num = float(v)
35 |         except ValueError:
36 |             print(h,v)
37 |             continue
38 |         if num < 0:
39 |             print(h,"negative value",v)
40 |         if h.find("_PCT") >= 0 or h.find("PCT_") >= 0:
41 |             if num < 0 or num > 1.01:
42 |                 print(h,"percent out of bound", v)
43 | 
44 | os.remove(logfile)
45 | 


--------------------------------------------------------------------------------
/ucevent/skx_extra.py:
--------------------------------------------------------------------------------
1 | extra_derived = { }
2 | 
3 | 


--------------------------------------------------------------------------------
/ucevent/ucaux.py:
--------------------------------------------------------------------------------
 1 | class Aux:
 2 |     limited_counters = { "r3qpi": 3, "ubox": 2 }
 3 |     filters = ("filter_nid", "mask", "match", "filter_opc", "filter_state")
 4 |     DEFAULT_COUNTERS = 4
 5 |     MAX_RANK = 8
 6 | 
 7 |     acronyms = {
 8 |         "TOR": "Table of Requests, pending transactions",
 9 |         "FLIT": "80-bit QPI packet",
10 |         "RPQ": "Read Queue",
11 |         "WPQ": "Write Queue",
12 |         "CBO": "Last Level Cache Slice",
13 |         "PCU": "Power Control Unit",
14 |         "iMC": "Memory Controller",
15 |         "HA": "Home Agent",
16 |         "QPI_LL": "QPI Link Layer",
17 |     }
18 | 
19 |     qual_alias = {
20 |         "nid": "filter_nid",
21 |         "opc": "filter_opc",
22 |         "all_opc": "filter_all_op",
23 |         "nm": "filter_nm",
24 |         "not_nm": "filter_not_nm",
25 |         "opc0": "filter_opc0",
26 |         "opc1": "filter_opc1",
27 |         "loc": "filter_loc",
28 |         "rem": "filter_rem",
29 |         "nc": "filter_nc",
30 |         "Q_Py_PCI_PMON_PKT_MATCH0[12:00]": "match0",
31 |         "Q_Py_PCI_PMON_PKT_MATCH1[19:16]": "match_rds",
32 |         "Q_Py_PCI_PMON_PKT_MASK0[12:0]": "mask0",
33 |         "Q_Py_PCI_PMON_PKT_MASK0[17:0]": "mask0",   # why both?
34 |         "Q_Py_PCI_PMON_PKT_MASK1[19:16]": "mask_rds",
35 |         "Q_Py_PCI_PMON_PKT_MATCH0": "match0",
36 |         "Q_Py_PCI_PMON_PKT_z_MATCH0": "match0",
37 |         "Q_Py_PCI_PMON_PKT_z_MASK0[12:0]": "mask0", # XXX correct?
38 |         "Q_Py_PCI_PMON_PKT_z_MASK0[17:0]": "mask0", # XXX correct?
39 |         "edge_det": "edge",
40 |         "Cn_MSR_PMON_BOX_FILTER.opc": "filter_opc",
41 |         "Cn_MSR_PMON_BOX_FILTER0.opc": "filter_opc",
42 |         "Cn_MSR_PMON_BOX_FILTER1.opc": "filter_opc",
43 |         "Cn_MSR_PMON_BOX_FILTER.state": "filter_state",
44 |         "Cn_MSR_PMON_BOX_FILTER0.state": "filter_state",
45 |         "Cn_MSR_PMON_BOX_FILTER0.tid": "filter_tid",
46 |         "Cn_MSR_PMON_BOX_FILTER0.nc": "filter_nc",
47 |         "Cn_MSR_PMON_BOX_FILTER0.nm": "filter_nm",
48 |         "Cn_MSR_PMON_BOX_FILTER0.all_opc": "filter_all_op", # XXX
49 |         "Cn_MSR_PMON_BOX_FILTER0.opc1": "filter_opc1",
50 |         "Cn_MSR_PMON_BOX_FILTER0.opc0": "filter_opc0",
51 |         "Cn_MSR_PMON_BOX_FILTER0.loc": "filter_loc",
52 |         "Cn_MSR_PMON_BOX_FILTER0.not_nm": "filter_not_nm",
53 |         "Cn_MSR_PMON_BOX_FILTER1.nm": "filter_nm",
54 |         "Cn_MSR_PMON_BOX_FILTER1.all_opc": "filter_all_op", # XXX
55 |         "Cn_MSR_PMON_BOX_FILTER1.opc1": "filter_opc1",
56 |         "Cn_MSR_PMON_BOX_FILTER1.opc0": "filter_opc0",
57 |         "Cn_MSR_PMON_BOX_FILTER1.loc": "filter_loc",
58 |         "Cn_MSR_PMON_BOX_FILTER1.not_nm": "filter_not_nm",
59 |         "Q_Py_PCI_PMON_PKT_MATCH0.dnid": "match_dnid",
60 |         "Q_Py_PCI_PMON_PKT_z_MATCH0.dnid": "match_dnid", # XXX
61 |         "Q_Py_PCI_PMON_PKT_z_MATCH1": "match1",
62 |         "Q_Py_PCI_PMON_PKT_z_MASK1": "mask1",
63 |         "PCUFilter[7:0]": "filter_band0",
64 |         "PCUFilter[15:8]": "filter_band1",
65 |         "PCUFilter[23:16]": "filter_band2",
66 |         "PCUFilter[31:24]": "filter_band3",
67 |         "CBoFilter[31:23]": "filter_opc",
68 |         "CBoFilter[17:10]": "filter_nid",
69 |         "QPIMatch0[17:0]": "match0",
70 |         "QPIMask0[17:0]": "mask0",
71 |         "QPIMatch0[12:0]": "match0",
72 |         "QPIMask0[12:0]": "mask0",
73 |         "QPIMask1[19:16]": "mask_rds",
74 |         "QPIMatch1[19:16]": "match_rds",
75 |         "CBoFilter[22:18]": "filter_state",
76 |     }
77 | 
78 |     qual_display_alias = {
79 |         "QPIMask0[12:0]": "mask_mc, match_opc, match_vnw",
80 |         "QPIMatch0[12:0]": "match_mc, match_opc, match_vnw",
81 |         "QPIMatch0[17:0]": "match_mc, match_opc, match_vnw, match_dnid",
82 |     }
83 | 
84 |     alias_events = {
85 |         "MC_Chy_PCI_PMON_CTR_FIXED": "uncore_imc_INDEX/clockticks/"
86 |     }
87 | 
88 |     clockticks = (
89 |         r"uncore_(cbox|ha|pcu)_?\d*/event=0x0/",
90 |         r".*/clockticks/",
91 |         r"uncore_(r2pcie|r3qpi)_?\d*/event=0x1/",
92 |         r"uncore_qpi(_\d+)?/event=0x14/"
93 |     )
94 | 


--------------------------------------------------------------------------------
/ucevent/ucmsg.py:
--------------------------------------------------------------------------------
 1 | # Handle warnings and errors
 2 | # Separate module to avoid circular imports
 3 | from __future__ import print_function
 4 | import sys
 5 | import fnmatch
 6 | 
 7 | quiet = False
 8 | debug = None
 9 | 
10 | def debug_msg(x, y):
11 |     if debug and any(map(lambda p: fnmatch.fnmatch(x, p), debug.split(","))):
12 |         print("debug:", x + ": " + str(y), file=sys.stderr)
13 | 
14 | def warning(x):
15 |     if not quiet:
16 |         print("WARNING:", x, file=sys.stderr)
17 | 


--------------------------------------------------------------------------------
/ucevent/uctester:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | EXTRA=""
 6 | [ -n "$MOCK" ] && EXTRA=--mock
 7 | 
 8 | $WRAP ./sanity-run.py $EXTRA "PCU.*" sleep 3
 9 | 
10 | $WRAP ./sanity-run.py $EXTRA "PCU.CLOCKTICKS / KILO" sleep 3
11 | 
12 | $WRAP ./sanity-run.py $EXTRA -S0 "PCU.*" sleep 3
13 | 
14 | if [ "$(lscpu | awk '/Socket/ { print $2 }' )" -gt 1 ] ; then
15 | 
16 | $WRAP ./sanity-run.py $EXTRA -S1 "PCU.*" sleep 3
17 | 
18 | fi
19 | 
20 | $WRAP ./sanity-run.py $EXTRA --cpu 0 "PCU.*" sleep 3
21 | 
22 | # not valid on SKX
23 | #$WRAP ./sanity-run.py $EXTRA --no-sum "CBO.RING_THRU_*_BYTES" sleep 3
24 | 
25 | $WRAP ./sanity-run.py $EXTRA '{' PCU.CLOCKTICKS iMC.MEM_BW_READS '}' iMC.CAS_COUNT sleep 3
26 | 
27 | set +e # XXX
28 | $WRAP ./ucevent.py $EXTRA --parse-all | grep -i error
29 | $WRAP ./ucevent.py --mock --parse-all | grep -E -i '#EVAL|#DIVZ|error'
30 | 
31 | #./ucevent.py --check-events
32 | 
33 | # qualifiers
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/ucevent/uctopy.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | # generate python data files from perl input
  3 | # uctopl.pl CPU-ACRONYM events.pl derived.pl >cpu_uc.py
  4 | use File::Basename;
  5 | 
  6 | $cpu = $ARGV[0];
  7 | shift(@ARGV);
  8 | foreach (@ARGV) {
  9 | 	do $_;
 10 | }
 11 | 
 12 | $code = <<END;
 13 | \$aliases = \\%CPU_UCFilterAliases;
 14 | \$events = \\%CPU_UCEventList;
 15 | \$derived = \\%CPU_UCDerivedList;
 16 | END
 17 | $code =~ s/CPU/$cpu/g;
 18 | eval($code);
 19 | 
 20 | print "# $cpu ";
 21 | foreach $j (@ARGV) { 
 22 | 	$f = basename($j);
 23 | 	$f =~ s/\.pl//;
 24 | 	print "$f ";
 25 | }
 26 | print "\n\n";
 27 | 
 28 | %categories = {};
 29 | @catlit = ();
 30 | %global = {};
 31 | 
 32 | $indent = "     ";
 33 | $quote = "\"";
 34 | $nquote = "\"\"\"";
 35 | 
 36 | sub addquote($) {
 37 | 	my($data) = (@_);
 38 | 	return $nquote . $data . $nquote if ($data =~ /\n/);
 39 | 	return $quote . $data . $quote;
 40 | }
 41 | 
 42 | print "# aliases\n";
 43 | print "aliases = {\n";
 44 | foreach $i (keys(%{ $aliases } )) {
 45 | 	print $indent,$quote,$i,$quote,": ",addquote($aliases->{$i}),",\n";
 46 | }
 47 | print "}\n\n";
 48 | 
 49 | sub format_data($) {
 50 | 	my($data) = (@_);
 51 | 	return $data if ($data =~ /^[0-9]+$/ || $data =~ /^0x[0-9a-fA-F]+$/);
 52 | 	$data =~ s/"/\\"/g;
 53 | 	return addquote($data);
 54 | }
 55 | 
 56 | sub to_list($) {
 57 | 	my($l) = (@_);
 58 | 	return $l;
 59 | 	($a, $b) = $l =~ /(\d+)-(\d+)/;
 60 | 	$o = "";
 61 | 	for (; $a <= $b; $a++) {
 62 | 		$o += "$a,";
 63 | 	}
 64 | 	return $o;
 65 | }
 66 | 
 67 | sub print_event($$) { 
 68 | 	my($name, $ev) = (@_);
 69 | 
 70 | 	#return if $ev->{'Public'} ne "Y";
 71 | 
 72 | 	push(@catlist, $ev->{"Category"});
 73 | 
 74 | 	print $indent,$quote,$name,$quote,": {\n";
 75 | 	foreach $w (sort(keys(%{$ev}))) {
 76 | 		next if $w =~ /Sub[cC]at/;
 77 | 		next if $w eq "Subevents";
 78 | 		next if $ev->{$w} eq "" && $w ne "Category";
 79 | 		next if $w eq "OrigName";
 80 | 		next if $w =~ /([A-Z]+)Status/;
 81 | 		next if $w eq "RTLSignal";
 82 | 		next if $w eq "Public";
 83 | 		if ($w eq "Internal") { 
 84 | 			$w = "ExtSel";
 85 | 		}
 86 | 
 87 | 		$val = $ev->{$w};
 88 | 		next if $w eq "MaxIncCyc" && ($val == "1" || $val == "0");
 89 | 		next if $w eq "SubCtr" && $val == "0";
 90 | 
 91 | 		$val = to_list($val) if $w eq "Counters" && $val =~ /-/;
 92 | 
 93 | 		print $indent,$indent,
 94 | 		      addquote($w),": ",format_data($val),",\n";
 95 | 	}
 96 | 	print $indent,"},\n";
 97 | }
 98 | 
 99 | sub print_sub($$$) {
100 | 	my($box, $j, $sub) = (@_);
101 | 	foreach $k (keys(%{$sub})) {
102 | 		$subev = $sub->{$k};
103 | 		# put all the fields from the parent 
104 | 		# into the sub event to normalize
105 | 		foreach $o (keys(%{$ev})) {
106 | 			next if defined($sub->{$o});
107 | 			$subev->{$o} = $ev->{$o};
108 | 		}
109 | 		print_event("$box.$j.$k", $subev);
110 | 	}
111 | }
112 | 
113 | sub print_list($$) {
114 | 	my($name, $evl) = (@_);
115 | 	print "$name = {\n";
116 | 	foreach $box (keys(%{$evl})) {
117 | 		$evlist = $evl->{$box};
118 | 		$box =~ s/ Box Events//;
119 | 		$box =~ s/ /_/g;
120 | 		print $indent,"\n# $box:\n";
121 | 
122 | 		foreach $j (sort(keys(%{$evlist}))) {
123 | 			$ev = $evlist->{$j};
124 | 			$ev->{"Box"} = $box;
125 | 			$ev->{"Category"} = $box . " " . $ev->{"Category"};
126 | 			print_event("$box.$j", $ev);
127 | 			print_sub($box, $j, $ev->{"Subcat"});
128 | 			print_sub($box, $j, $ev->{"SubCat"});
129 | 			print_sub($box, $j, $ev->{"Subevents"});
130 | 		}
131 | 	}
132 | 	print "}\n";
133 | }
134 | 
135 | print_list("events", $events);
136 | print_list("derived", $derived);
137 | 
138 | print "categories = (\n";
139 | $prev = "";
140 | foreach $i (sort @catlist) {
141 | 	next if $i eq $prev;
142 | 	$prev = $i;
143 | 	print $indent,addquote($i),",\n";
144 | }
145 | print ");\n";
146 | 


--------------------------------------------------------------------------------
/utilized.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # extract utilized CPUs out of toplev CSV output
 3 | # toplev ... -I 1000 --node +CPU_Utilization -x, -o x.csv ...
 4 | # utilized.py < x.csv
 5 | # note it duplicates the core output
 6 | from __future__ import print_function
 7 | import argparse
 8 | import csv
 9 | import sys
10 | import re
11 | import collections
12 | 
13 | ap = argparse.ArgumentParser()
14 | ap.add_argument('--min-util', default=10., type=float)
15 | ap.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
16 | ap.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout)
17 | args = ap.parse_args()
18 | 
19 | key = None
20 | 
21 | c = csv.reader(args.file)
22 | wr = csv.writer(args.output)
23 | 
24 | fields = collections.OrderedDict()
25 | util = collections.defaultdict(list)
26 | 
27 | for t in c:
28 |     if len(t) < 3 or t[0].startswith("#"):
29 |         continue
30 |     if t[0] == "Timestamp":
31 |         wr.writerow(t)
32 |     key = t[1] # XXX handle no -I
33 |     if key in fields:
34 |         fields[key].append(t)
35 |     else:
36 |         fields[key] = [t]
37 |     if t[2] == "CPU_Utilization":
38 |         util[key].append(float(t[3]))
39 | 
40 | final = []
41 | skipped = []
42 | for j in fields.keys():
43 |     if "-T" not in j and not j.startswith("CPU"):
44 |         if "S" in j:
45 |             final.append(j)
46 |         continue
47 |     core = re.sub(r'-T\d+', '', j)
48 |     utilization = 100
49 |     if len(util[j]) > 0:
50 |         utilization = (sum(util[j]) / len(util[j])) * 100.
51 |     if utilization >= float(args.min_util):
52 |         for k in fields[core] + fields[j]:
53 |             wr.writerow(k)
54 |     else:
55 |         skipped.append(j)
56 | for j in final:
57 |     for k in fields[j]:
58 |         wr.writerow(k)
59 | print("skipped", " ".join(skipped), file=sys.stderr)
60 | 


--------------------------------------------------------------------------------
/wl-bottlenecks:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # print bottlenecks for all tests in workloads/*
3 | # arguments: to pass to toplev
4 | for i in workloads/* ; do
5 | 	echo "`basename $i`:"
6 | 	./toplev "$@" --quiet --single-thread --no-version $i
7 | done
8 | 


--------------------------------------------------------------------------------
/workloads/BC1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | echo '3^415312' | bc > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/BC2s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | echo '3^615312' | bc > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/CALC10s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | exec calc '3^2421212' > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/CALC1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | exec calc '3^721212' > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/CLANG10s:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | awk 'BEGIN {
 4 | 	print "extern int f2(void); int func(void) { int i; "
 5 | 	for(i = 0; i < 200000; i++) {
 6 | 		print "i += f2();"
 7 | 	}
 8 | 	print "return i;"
 9 | 	print "}" } ' |
10 | clang -O3 -x c -c -o /dev/null -
11 | 


--------------------------------------------------------------------------------
/workloads/CLTRAMP3D:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | if [ ! -f tramp3d-v4.cpp ] ; then
 4 | 	echo "Downloading tramp3d-v4"
 5 | 	curl https://raw.githubusercontent.com/microsoft/checkedc-llvm-test-suite/master/MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.cpp |
 6 | 	sed -e 's/mutable Element_t/Element_t/' \
 7 | 	    -e 's/mutable Pooma::Iterate_t/Pooma::Iterate_t/' > tramp3d-v4.cpp
 8 | fi
 9 | clang++ -w -std=gnu++11 tramp3d-v4.cpp
10 | 


--------------------------------------------------------------------------------
/workloads/COMPILE10s:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | awk 'BEGIN {
 4 | 	print "extern int f2(void); int func(void) { int i; "
 5 | 	for(i = 0; i < 11500; i++) {
 6 | 		print "i += f2();" 
 7 | 	}
 8 | 	print "return i;"
 9 | 	print "}" } ' |
10 | gcc -O3 -x c -c -o /dev/null -
11 | 


--------------------------------------------------------------------------------
/workloads/EMACS1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | emacs --batch -q --eval '(dotimes (i 9000000) 0)' 2> /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/GCCTRAMP3D:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | if [ ! -f tramp3d-v4.cpp ] ; then
 4 | 	echo "Downloading tramp3d-v4"
 5 | 	curl https://raw.githubusercontent.com/microsoft/checkedc-llvm-test-suite/master/MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.cpp |
 6 | 	sed -e 's/mutable Element_t/Element_t/' \
 7 | 	    -e 's/mutable Pooma::Iterate_t/Pooma::Iterate_t/' > tramp3d-v4.cpp
 8 | fi
 9 | g++ -w -std=gnu++11 tramp3d-v4.cpp
10 | 


--------------------------------------------------------------------------------
/workloads/GITGREP:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | git log -G foo > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/GREP:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | grep -rq foobarfoobar /usr/bin /usr/sbin 2>/dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/GUILE1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | guile -c '(expt 3 55131231)'
4 | 


--------------------------------------------------------------------------------
/workloads/GZIP:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cat $(which perf) $(which perf) $(which perf) $(which perf) | gzip -c > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/MEMHOG:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # backend bound. requires numactl
3 | memhog 1G >/dev/null
4 | 


--------------------------------------------------------------------------------
/workloads/PERL1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | perl -e 'for($i=0;$i<40000000;$i++) {}'
3 | 


--------------------------------------------------------------------------------
/workloads/PHP1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | php -r 'for($i=0;$i<150000000;$i++) { ; } '
3 | 


--------------------------------------------------------------------------------
/workloads/PYTHON1s:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | python3 -c 'for x in range(40000000): pass'
4 | 


--------------------------------------------------------------------------------
/workloads/XZ:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cat $(which perf) | xz -c > /dev/null
3 | 


--------------------------------------------------------------------------------
/workloads/ZSTD:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | P=`which perf`
3 | cat $P $P $P $P $P $P $P $P $P $P $P $P $P $P $P | zstd -z --single-thread -9 > /dev/null
4 | 


--------------------------------------------------------------------------------