├── .github └── workflows │ ├── codeql-analysis.yml │ ├── jevents.yml │ ├── python-old.yml │ └── python.yml ├── .gitignore ├── CHANGES.md ├── COPYING ├── FLAKE8 ├── MYPY ├── Makefile ├── PYLINT ├── README.md ├── RERUN ├── TOOLS.md ├── adl_glc_ratios.py ├── adl_grt_ratios.py ├── all-tester ├── arl-retlat.json ├── bdw-cpuinfo ├── bdw_client_ratios.py ├── bdx_server_ratios.py ├── cleanlogs ├── clx_server_ratios.py ├── counterdiff.py ├── cpumap.sh ├── cputop ├── cputop.py ├── csv_formats.py ├── dummyarith.py ├── ehl_ratios.py ├── event-rmap.py ├── event-translate.py ├── event_download ├── event_download.py ├── fake-perf.py ├── frequency.py ├── gen-dot.py ├── gen_level.py ├── genretlat ├── genretlat.py ├── gnr-retlat.json ├── gnr_server_ratios.py ├── hsw_client_ratios.py ├── hsx_server_ratios.py ├── icl_client_ratios.py ├── icx_server_ratios.py ├── interval-merge ├── interval-merge.py ├── interval-normalize ├── interval-normalize.py ├── interval-plot ├── interval-plot.py ├── ivb_client_ratios.py ├── ivb_server_ratios.py ├── jevents ├── Makefile ├── README.md ├── cache.c ├── cpustr.c ├── csv.c ├── event-rmap.c ├── examples │ ├── Makefile │ ├── addr.c │ ├── cpu.c │ ├── cpu.h │ ├── hist.cc │ ├── hist.h │ ├── jestat.c │ ├── ptself.c │ ├── rtest.c │ ├── rtest2.c │ └── rtest3.c ├── interrupts.c ├── interrupts.h ├── jevents.c ├── jevents.h ├── jsession.h ├── jsmn.c ├── jsmn.h ├── json.c ├── json.h ├── libjevents.spec ├── listevents.c ├── measure.c ├── measure.h ├── perf-aux.c ├── perf-iter.c ├── perf-iter.h ├── perf-record.h ├── perf_event_open.c ├── print.c ├── rawevent.c ├── rdpmc.c ├── rdpmc.h ├── resolve.c ├── session.c ├── showevent.c ├── tester └── util.h ├── jkt_server_ratios.py ├── knl_ratios.py ├── latego.py ├── linux_metrics.py ├── list-events.py ├── listutils.py ├── lnl-retlat.json ├── lnl_lnc_ratios.py ├── lnl_skt_ratios.py ├── metrics.py ├── msr ├── msr.py ├── mtl-retlat.json ├── mtl_cmt_ratios.py ├── mtl_rwc_ratios.py ├── node.py ├── objutils.py ├── oc-all-events ├── ocperf ├── ocperf.py ├── other-tester ├── parallel-tester ├── parser ├── elf.py ├── hist.py ├── kernel.py ├── mmap.py ├── perfdata.py ├── perfpd.py ├── pfeat.py ├── tester └── util.py ├── pci.py ├── pebs-grabber ├── Makefile ├── pebs-grabber.c └── pebs.h ├── perf_metrics.py ├── plot-normalized.py ├── pmudef.py ├── pmumon.py ├── power_metrics.py ├── requirements.txt ├── simple-pebs ├── Makefile ├── README ├── compat.h ├── dump-util.c ├── dump-util.h ├── dumper.c ├── elf.c ├── elf.h ├── histogram.c ├── map.c ├── map.h ├── samples.c ├── simple-pebs.c ├── simple-pebs.h ├── symtab.c ├── symtab.h └── toperf.c ├── simple_ratios.py ├── skl_client_ratios.py ├── skx_server_ratios.py ├── slm_ratios.py ├── snb_client_ratios.py ├── spr_max_server_ratios.py ├── spr_server_ratios.py ├── srf_ratios.py ├── test-uncore.json ├── tester ├── tl-barplot.py ├── tl-serve ├── tl-serve.py ├── tl-tester ├── tl-xlsx.py ├── tl_cpu.py ├── tl_io.py ├── tl_output.py ├── tl_stat.py ├── tl_uval.py ├── tldata.py ├── topdown-csv ├── mock.py └── topdown-csv.py ├── toplev ├── toplev.ico ├── toplev.man ├── toplev.py ├── topology ├── tsx_metrics.py ├── ucevent ├── CHECK-ALL ├── CHECK-DERIVED ├── JKT ├── MOCK-ALL ├── Makefile ├── README.md ├── RUN-ALL ├── SANITY-ALL ├── bdx_extra.py ├── bdx_uc.py ├── bdxde_extra.py ├── bdxde_uc.py ├── dygraph-out.html ├── hsx_extra.py ├── hsx_uc.py ├── icx_extra.py ├── icx_uc.py ├── ivt_extra.py ├── ivt_uc.py ├── jkt_extra.py ├── jkt_uc.py ├── md2hman.py ├── patches-3.10 │ ├── 0001-perf-Use-hrtimers-for-event-multiplexing.patch │ ├── 0002-perf-Add-sysfs-entry-to-adjust-multiplexing-interval.patch │ ├── 0003-perf-uncore-qpi-filter.patch │ ├── 0004-per-socket-fix.patch │ ├── 0005-support-pcu-extsel.patch │ ├── 0006-add-masks.patch │ └── README ├── patches-3.16 │ ├── 0001-perf-x86-intel-Add-Haswell-EP-uncore-support.patch │ ├── 0002-perf-x86-uncore-register-the-PMU-only-if-the-uncore-.patch │ ├── 0004-perf-x86-uncore-Add-missing-cbox-filter-flags-on-Ivy.patch │ ├── 0005-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy-Ivy-H.patch │ └── 0006-fixup-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy.patch ├── sanity-run.py ├── skx_extra.py ├── skx_uc.py ├── ucaux.py ├── ucevent.man ├── ucevent.py ├── ucexpr.py ├── ucmsg.py ├── uctester └── uctopy.pl ├── utilized.py ├── wl-bottlenecks └── workloads ├── BC1s ├── BC2s ├── CALC10s ├── CALC1s ├── CLANG10s ├── CLTRAMP3D ├── COMPILE10s ├── EMACS1s ├── GCCTRAMP3D ├── GITGREP ├── GREP ├── GUILE1s ├── GZIP ├── MEMHOG ├── PERL1s ├── PHP1s ├── PYTHON1s ├── XZ └── ZSTD /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | name: "CodeQL" 7 | 8 | on: 9 | push: 10 | branches: [master] 11 | pull_request: 12 | # The branches below must be a subset of the branches above 13 | branches: [master] 14 | schedule: 15 | - cron: '0 6 * * 5' 16 | 17 | jobs: 18 | analyze: 19 | name: Analyze 20 | runs-on: ubuntu-latest 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | # Override automatic language detection by changing the below list 26 | # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] 27 | language: ['python', 'cpp'] 28 | # Learn more... 29 | # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection 30 | 31 | steps: 32 | - name: Checkout repository 33 | uses: actions/checkout@v2 34 | with: 35 | # We must fetch at least the immediate parents so that if this is 36 | # a pull request then we can checkout the head. 37 | fetch-depth: 2 38 | 39 | # Initializes the CodeQL tools for scanning. 40 | - name: Initialize CodeQL 41 | uses: github/codeql-action/init@v2 42 | with: 43 | languages: ${{ matrix.language }} 44 | # If you wish to specify custom queries, you can do so here or in a config file. 45 | # By default, queries listed here will override any specified in a config file. 46 | # Prefix the list here with "+" to use these queries and those in the config file. 47 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 48 | 49 | # ℹ️ Command-line programs to run using the OS shell. 50 | # 📚 https://git.io/JvXDl 51 | 52 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 53 | # and modify them (or add more) to build your code if your project 54 | # uses a compiled language 55 | 56 | - run: | 57 | cd jevents 58 | make 59 | 60 | - name: Perform CodeQL Analysis 61 | uses: github/codeql-action/analyze@v2 62 | -------------------------------------------------------------------------------- /.github/workflows/jevents.yml: -------------------------------------------------------------------------------- 1 | name: jevents build and test 2 | 3 | on: 4 | push: 5 | branches: [ master, test* ] 6 | pull_request: 7 | branches: [ master, test* ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: make 17 | run: | 18 | cd jevents ; make 19 | - name: tester 20 | run: | 21 | ./event_download.py GenuineIntel-6-55-4 22 | cd jevents ; EVENTMAP=GenuineIntel-6-55-4 ./tester 23 | -------------------------------------------------------------------------------- /.github/workflows/python-old.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Old Python linting and testing 5 | on: 6 | push: 7 | branches: [ master, test* ] 8 | pull_request: 9 | branches: [ master, test* ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-20.04 15 | container: 16 | image: python:2.7.18-buster 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install flake8 pytest 28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 29 | - name: Lint with flake8 30 | run: | 31 | # stop the build if there are Python syntax errors or undefined names 32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 33 | flake8 --exclude '*_uc.py,*_ratios.py' --ignore=E302,E128,E741,E251,E305,E221,E127,E126,E501,E401,E231,E201,E261,E731,E122,E131,E226,W504,E402,E301,W503,E265,E306,E227,E306,W391,E202,E241 34 | - name: Run toplev tests 35 | run: | 36 | if ! grep -q GenuineIntel /proc/cpuinfo ; then export NATIVE_ARGS="--force-cpu skl" ; fi 37 | WRAP=python PERF=./fake-perf.py NORES=1 ./tl-tester 38 | - name: Run ocperf tests 39 | run: | 40 | python ./event_download.py -a 41 | WRAP=python PERF=./fake-perf.py NORES=1 ./tester 42 | - name: Run other tests 43 | run: | 44 | pip install matplotlib 45 | WRAP=python PERF=./fake-perf.py NORES=1 ./other-tester 46 | - name: Run ucevent tests 47 | run: | 48 | set -e 49 | cd ucevent 50 | for i in jkt ivt hsx bdxde bdx skx ; do FORCECPU=$i WRAP=python MOCK=1 NORES=1 ./uctester ; done 51 | # XXX WRAP=python MOCK=1 NORES=1 ./RUN-ALL # need to avoid GNU parallel 52 | # 53 | - name: Run parser tests 54 | run: | 55 | cd parser 56 | ./tester 57 | 58 | 59 | -------------------------------------------------------------------------------- /.github/workflows/python.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python linting and testing 5 | on: 6 | push: 7 | branches: [ master, test* ] 8 | pull_request: 9 | branches: [ master, test* ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: [3.8, 3.9, 3.10.8, 3.11, 3.12, 3.13] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install flake8 pytest mypy 29 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 30 | sudo apt install -y shellcheck 31 | - name: Lint with flake8 32 | run: | 33 | ./FLAKE8 34 | - name: Type check with mypy 35 | run: | 36 | ./MYPY 37 | - name: Run shell check 38 | run: | 39 | shellcheck -e SC2086,SC2012 -x tl-tester tester other-tester parallel-tester all-tester jevents/tester ucevent/uctester 40 | - name: Run toplev tests 41 | run: | 42 | if ! grep -q GenuineIntel /proc/cpuinfo ; then export NATIVE_ARGS="--force-cpu skl" ; fi 43 | WRAP=python PERF=./fake-perf.py NORES=1 ./tl-tester 44 | - name: Run ocperf tests 45 | run: | 46 | python ./event_download.py -a 47 | WRAP=python PERF=./fake-perf.py NORES=1 ./tester 48 | - name: Run other tests 49 | run: | 50 | WRAP=python PERF=./fake-perf.py NORES=1 ./other-tester 51 | - name: Run ucevent tests 52 | run: | 53 | set -e 54 | cd ucevent 55 | for i in jkt ivt hsx bdxde bdx skx ; do FORCECPU=$i WRAP=python MOCK=1 NORES=1 ./uctester ; done 56 | # XXX WRAP=python MOCK=1 NORES=1 ./RUN-ALL # need to avoid GNU parallel 57 | # 58 | - name: Run parser tests 59 | run: | 60 | cd parser 61 | ./tester 62 | 63 | 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.o 3 | jevents/event-rmap 4 | jevents/examples/addr 5 | jevents/examples/jestat 6 | jevents/examples/rtest 7 | jevents/examples/rtest2 8 | jevents/examples/rtest3 9 | jevents/examples/ptself 10 | jevents/libjevents.a 11 | jevents/listevents 12 | jevents/showevent 13 | perf.data 14 | perf.data.old 15 | -------------------------------------------------------------------------------- /FLAKE8: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | export PATH=$PATH:~/.local/bin 4 | # stop the build if there are Python syntax errors or undefined names 5 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 6 | flake8 --exclude '*_uc.py,*_ratios.py' --ignore=E302,E128,E741,E251,E305,E221,E127,E126,E501,E401,E231,E201,E261,E731,E122,E131,E226,W504,E402,E301,W503,E265,E306,E227,E306,W391,E202,E241 7 | -------------------------------------------------------------------------------- /MYPY: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | EXTRA="--follow-imports skip" # for now 4 | 5 | export PATH=$PATH:~/.local/bin 6 | # XXX ocperf, models, interval*, tl-* 7 | exec mypy --check-untyped-defs $EXTRA toplev.py tl_output.py tl_io.py tl_cpu.py tl_stat.py tl_uval.py "$@" 8 | 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | @echo "Nothing to compile here." 4 | 5 | graphs: ivb-model.svg simple-model.svg 6 | 7 | ivb.dot: gen-dot.py ivb_client_ratios.py 8 | ./gen-dot.py > $@ 9 | simple.dot: gen-dot.py simple_ratios.py 10 | ./gen-dot.py simple > $@ 11 | 12 | ivb-model.svg: ivb.dot 13 | dot -Tsvg $^ > $@ 14 | ivb-model.png: ivb.dot 15 | dot -Tpng $^ > $@ 16 | simple-model.svg: simple.dot 17 | dot -Tsvg $^ > $@ 18 | simple-model.png: simple.dot 19 | dot -Tpng $^ > $@ 20 | 21 | man: toplev.man 22 | 23 | toplev.man: toplev.py 24 | help2man -N ./toplev.py > toplev.man 25 | 26 | clean: 27 | rm -f simple.dot ivb.dot ivb-model.svg simple-model.svg simple.png ivb-model.png 28 | 29 | -------------------------------------------------------------------------------- /PYLINT: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | pylint -d too-many-arguments,too-many-instance-attributes,import-outside-toplevel,consider-using-f-string,consider-using-with,global-statement,redundant-u-string-prefix,C0301,C0116,invalid-name,multiple-imports,missing-module-docstring,missing-class-docstring,redefined-outer-name,too-few-public-methods,attribute-defined-outside-init,multiple-statements $(ls *.py | grep -v ratios) 3 | -------------------------------------------------------------------------------- /RERUN: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # rerun last toplev line from tl-tester log, optionally with a prefix (like python -mpdb) 3 | # tl-tester >& log 4 | # RERUN log python -mpdb 5 | if [ "x$1" = "x--notltester" ] ; then 6 | shift 7 | else 8 | export TL_TESTER=1 9 | fi 10 | L="$1" 11 | shift 12 | T=$(grep Line.*toplev "$L" | tail -1 | sed 's/Line [0-9]* //') 13 | echo "$@" $T 14 | eval "$@" $T 15 | -------------------------------------------------------------------------------- /all-tester: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # one stop shop to test all pmu-tools test suites 3 | # to run it faster use parallel-tester 4 | # each line must be self contained! 5 | 6 | set -e 7 | set -x 8 | 9 | # test ocperf 10 | ./tester 11 | WRAP=python3 ./tester 12 | 13 | # test toplev 14 | ./tl-tester 15 | WRAP=python3 ./tl-tester 16 | MATCH="icl" DCPU="icl --no-group" NOGROUP=1 NORES=1 LOAD=./workloads/BC1s ./tl-tester 17 | MATCH=snb DCPU=snb ./tl-tester 18 | MATCH=hsw DCPU=hsw ./tl-tester 19 | MATCH=slm DCPU=slm NORES=1 NOGROUP=1 LOAD=./workloads/BC1s ./tl-tester 20 | MATCH=knl DCPU=knl NORES=1 NOGROUP=1 LOAD=./workloads/BC1s ./tl-tester 21 | 22 | # test other tools 23 | ./other-tester 24 | WRAP=python3 ./other-tester 25 | 26 | # test jevents 27 | cd jevents ; make && ./tester ; cd .. 28 | 29 | # test perf.data parser 30 | # this requires construct and pandas and some other libraries 31 | # XXX check for missing dependencies 32 | #cd parser 33 | #tester 34 | #cd .. 35 | -------------------------------------------------------------------------------- /cleanlogs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # clean up tl-tester logs after failures 3 | rm -f log[0-9]* 4 | rm -f log-[0-9]* 5 | rm -f logsum[0-9]* 6 | rm -f log-all* 7 | rm -f log-combined* 8 | rm -f val[0-9]*.csv 9 | rm -f x*.png 10 | rm -f x*.csv 11 | rm -f y*.csv 12 | rm -f y*.out 13 | rm -f x*.csv.gz 14 | rm -f y*.out.gz 15 | rm -f x*.csv.xz 16 | rm -f j*.json 17 | rm -f out*-{core,global,socket,thread} 18 | rm -f perf.data.*.old 19 | rm -f tlog[0-9]* 20 | rm -f tltest*_* 21 | rm -f perf.data* 22 | rm -f perf*.data.*.old 23 | rm -f perf[osv]*.csv 24 | rm -f val.[0-9]*.csv 25 | rm -f x*.xlsx 26 | rm -f script[0-9]* 27 | rm -f toplev[ospv]*.csv toplevm[0-9]* 28 | rm -f nflog[0-9]* 29 | rm -f toplevs*-cpu* 30 | -------------------------------------------------------------------------------- /counterdiff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # counterdiff.py < plog program .. (or general perf arguments) 3 | # verify plog.* output from toplev by running event one by one 4 | # this can be used to estimate multiplexing measurement errors 5 | from __future__ import print_function 6 | import sys, os 7 | 8 | def run(x): 9 | print(x) 10 | os.system(x) 11 | 12 | for l in sys.stdin: 13 | if l.find(",") < 0: 14 | continue 15 | n = l.strip().split(",") 16 | run("perf stat --output l -x, -e %s %s" % 17 | (n[1], " ".join(sys.argv[1:]))) 18 | f = open("l", "r") 19 | for i in f: 20 | if i.find(",") < 0: 21 | continue 22 | j = i.strip().split(",") 23 | break 24 | f.close() 25 | if float(n[0]) > 0: 26 | delta = (float(j[0]) - float(n[0])) / float(n[0]) 27 | else: 28 | delta = 0 29 | print(n[1], j[0], n[0], "%.2f" % (delta * 100.0)) 30 | -------------------------------------------------------------------------------- /cpumap.sh: -------------------------------------------------------------------------------- 1 | declare -A cpus 2 | cpus[nhm-ex]=GenuineIntel-6-2E 3 | cpus[nhm-ep]=GenuineIntel-6-1E 4 | cpus[nhm-ep]=GenuineIntel-6-1A 5 | cpus[wsm-ex]=GenuineIntel-6-2F 6 | cpus[wsm-sp]=GenuineIntel-6-25 7 | cpus[wsm-dp]=GenuineIntel-6-2C 8 | cpus[snb]=GenuineIntel-6-2A 9 | cpus[jkt]=GenuineIntel-6-2D 10 | cpus[ivt]=GenuineIntel-6-3E 11 | cpus[ivb]=GenuineIntel-6-3A 12 | cpus[hsw]=GenuineIntel-6-45 13 | cpus[slm]=GenuineIntel-6-37 14 | cpus[bnl]=GenuineIntel-6-35 15 | cpus[bdw]=GenuineIntel-6-3D 16 | cpus[hsx]=GenuineIntel-6-3F 17 | cpus[skl]=GenuineIntel-6-5E 18 | cpus[bdw-de]=GenuineIntel-6-56 19 | cpus[bdx]=GenuineIntel-6-4F 20 | cpus[knl]=GenuineIntel-6-57 21 | cpus[skx]=GenuineIntel-6-55-4 22 | cpus[clx]=GenuineIntel-6-55-5 23 | cpus[icl]=GenuineIntel-6-7E 24 | cpus[icx]=GenuineIntel-6-6A 25 | cpus[adl]=GenuineIntel-6-9A 26 | cpus[spr]=GenuineIntel-6-8F 27 | cpus[tgl]=GenuineIntel-6-8C 28 | -------------------------------------------------------------------------------- /cputop: -------------------------------------------------------------------------------- 1 | cputop.py -------------------------------------------------------------------------------- /cputop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # query cpu topology and print all matching cpu numbers 3 | # cputop "query" ["format"] 4 | # query is a python expression, using variables: 5 | # socket, core, thread, type, cpu 6 | # or "offline" to query all offline cpus 7 | # or "atom" or "core" to select core types 8 | # type can be "atom" or "core" 9 | # cpu is the cpu number 10 | # format is a printf format with %d 11 | # %d will be replaced with the cpu number 12 | # format can be offline to offline the cpu or online to online 13 | # Author: Andi Kleen 14 | from __future__ import print_function 15 | import sys 16 | import os 17 | import re 18 | import argparse 19 | import glob 20 | 21 | def numfile(fn): 22 | f = open(fn, "r") 23 | v = int(f.read()) 24 | f.close() 25 | return v 26 | 27 | outstr = "" 28 | 29 | def output(p, fmt): 30 | if fmt: 31 | if fmt == "taskset": 32 | global outstr 33 | if outstr: 34 | outstr += "," 35 | else: 36 | outstr += "taskset -c " 37 | outstr += "%d" % p 38 | else: 39 | print(fmt % (p,)) 40 | else: 41 | print(p) 42 | 43 | ap = argparse.ArgumentParser(description=''' 44 | query cpu topology and print all matching cpu numbers 45 | cputop "query" ["format"] 46 | query is a python expression, using variables: 47 | socket, core, thread, type, cpu 48 | type is "core" or "atom" on a hybrid system 49 | cpu is the cpu number 50 | or "offline" to query all offline cpus 51 | format is a printf format with %d 52 | %d will be replaced with the cpu number, or online/offline 53 | to generate online/offline commands, or taskset to generate taskset command line''', 54 | epilog=''' 55 | Examples: 56 | print all cores on socket 0 57 | cputop "socket == 0" 58 | 59 | print all first threads in each core on socket 0 60 | cputop "thread == 0 and socket == 0" 61 | 62 | disable all second threads (disable hyper threading) 63 | cputop "thread == 1" offline 64 | 65 | reenable all offlined cpus 66 | cputop offline online 67 | 68 | print all online cpus 69 | cputop True ''', formatter_class=argparse.RawTextHelpFormatter) 70 | ap.add_argument('expr', help='python expression with socket/core/thread') 71 | ap.add_argument('fmt', help='Output format string with %%d, or online/offline', nargs='?') 72 | args = ap.parse_args() 73 | 74 | if args.expr == "atom": 75 | args.expr = 'type == "atom"' 76 | if args.expr == "core": 77 | args.expr = 'type == "core"' 78 | 79 | special = { 80 | "offline": "echo 0 > /sys/devices/system/cpu/cpu%d/online", 81 | "online": "echo 1 > /sys/devices/system/cpu/cpu%d/online", 82 | } 83 | 84 | if args.fmt in special: 85 | args.fmt = special[args.fmt] 86 | 87 | types = dict() 88 | for fn in glob.glob("/sys/bus/event_source/devices/cpu_*/cpus"): 89 | typ = os.path.basename(fn.replace("/cpus", "")).replace("cpu_","") 90 | cpus = open(fn).read() 91 | for j in cpus.split(","): 92 | m = re.match(r'(\d+)(-\d+)?', j) 93 | if m is None: 94 | continue 95 | if m.group(2): 96 | for k in range(int(m.group(1)), int(m.group(2)[1:])+1): 97 | types[k] = typ 98 | else: 99 | types[int(m.group(1))] = typ 100 | 101 | base = "/sys/devices/system/cpu/" 102 | p = {} 103 | l = os.listdir(base) 104 | for d in l: 105 | m = re.match(r"cpu([0-9]+)", d) 106 | if not m: 107 | continue 108 | proc = int(m.group(1)) 109 | top = base + d + "/topology" 110 | if not os.path.exists(top): 111 | if args.expr == "offline": 112 | output(proc, args.fmt) 113 | continue 114 | socket = numfile(top + "/physical_package_id") 115 | core = numfile(top + "/core_id") 116 | n = 0 117 | while (socket, core, n) in p: 118 | n += 1 119 | p[(socket, core, n)] = proc 120 | 121 | if args.expr == "offline": 122 | sys.exit(0) 123 | 124 | for j in sorted(p.keys()): 125 | socket, core, thread = j 126 | cpu = p[j] 127 | type = "any" 128 | if cpu in types: 129 | type = types[cpu] 130 | if eval(args.expr): 131 | output(p[j], args.fmt) 132 | 133 | if outstr: 134 | print(outstr) 135 | -------------------------------------------------------------------------------- /csv_formats.py: -------------------------------------------------------------------------------- 1 | # distinguish the bewildering variety of perf/toplev CSV formats 2 | from __future__ import print_function 3 | import sys 4 | import re 5 | from collections import namedtuple 6 | 7 | def is_val(n): 8 | return re.match(r'-?[0-9.]+%?|<.*>', n) is not None 9 | 10 | def is_cpu(n): 11 | return re.match(r'(CPU)|(S\d+(-C\d+)?)|C\d+|all', n) is not None 12 | 13 | def is_socket(n): 14 | return re.match(r'S\d+', n) is not None 15 | 16 | def is_event(n): 17 | return re.match(r'[a-zA-Z.-]+', n) is not None 18 | 19 | def is_number(n): 20 | return re.match(r'\s*[0-9]+', n) is not None 21 | 22 | def is_ts(n): 23 | return re.match(r'\s*[0-9.]+', n) is not None or n == "SUMMARY" 24 | 25 | def is_unit(n): 26 | return re.match(r'(% )?[a-zA-Z]*( <)?', n) is not None 27 | 28 | def is_running(n): 29 | return is_number(n) 30 | 31 | def is_enabled(n): 32 | return is_number(n) 33 | 34 | formats = ( 35 | # 0.100997872;CPU0;4612809;;inst_retired_any_0;3491526;2.88 new perf 36 | (is_ts, is_cpu, is_val, is_unit, is_event, is_enabled, is_running), 37 | # 1.354075473,0,cpu-migrations old perf w/o cpu 38 | (is_ts, is_val, is_event), 39 | # 1.354075473,CPU0,0,cpu-migrations old perf w/ cpu 40 | (is_ts, is_cpu, is_val, is_event), 41 | # 0.799553738,137765150,,branches new perf with unit 42 | (is_ts, is_val, is_unit, is_event), 43 | # 0.799553738,CPU1,137765150,,branches new perf with unit and cpu 44 | (is_ts, is_cpu, is_val, is_unit, is_event), 45 | # 0.100879059,402.603109,,task-clock,402596410,100.00 new perf with unit without cpu and stats 46 | (is_ts, is_val, is_unit, is_event, is_running, is_enabled), 47 | # 1.001131873,S0,Backend_Bound.Memory_Bound,13.3,% Slots <,,,0.0,3.0,, 48 | # 0.200584389,0,FrontendBound.Branch Resteers,15.87%,above,"", toplev w/ cpu 49 | (is_ts, is_cpu, is_event, is_val, is_unit), 50 | # 1.001365014,CPU2,1819888,,instructions,93286388,100.00 new perf w/ unit w/ cpu and stats 51 | (is_ts, is_cpu, is_val, is_unit, is_event, is_running, is_enabled), 52 | # 0.609113353,S0,4,405.454531,,task-clock,405454468,100.00 perf --per-socket with cores 53 | (is_ts, is_socket, is_number, is_val, is_unit, is_event, is_running, is_enabled), 54 | # 0.806231582,S0,4,812751,,instructions older perf --per-socket w/ cores w/o stats 55 | (is_ts, is_socket, is_number, is_val, is_unit, is_event), 56 | # 0.936482669,C1-T0,Frontend_Bound.Frontend_Latency.ITLB_Misses,0.39,%below,,itlb_misses.walk_completed,, 57 | # 0.301553743,C1,Retiring,31.81,%,,,, 58 | # 0.200584389,FrontendBound.Branch Resteers,15.87%,above,"", toplev single thread 59 | (is_ts, is_event, is_val), 60 | ) 61 | 62 | fmtmaps = { 63 | is_ts: 0, 64 | is_cpu: 1, 65 | is_event: 2, 66 | is_val: 3, 67 | is_enabled: 4, 68 | is_running: 5, 69 | is_unit: 6 70 | } 71 | 72 | Row = namedtuple('Row', ['ts', 'cpu', 'ev', 'val', 'enabled', 'running', 'unit']) 73 | 74 | def check_format(fmt, row): 75 | if all([x(n.strip()) for (x, n) in zip(fmt, row)]): 76 | vals = [None] * 7 77 | for i, j in enumerate(fmt): 78 | if j in fmtmaps: 79 | vals[fmtmaps[j]] = row[i] 80 | r = Row._make(vals) 81 | return r 82 | return False 83 | 84 | fmt_cache = formats[0] 85 | 86 | def parse_csv_row(row, error_exit=False): 87 | if len(row) == 0: 88 | return None 89 | global fmt_cache 90 | r = check_format(fmt_cache, row) 91 | if r: 92 | return r 93 | for fmt in formats: 94 | r = check_format(fmt, row) 95 | if r: 96 | fmt_cache = fmt 97 | return r 98 | if row[0].startswith("#"): # comment 99 | return None 100 | if ".csv" in row[0]: # fake-perf output 101 | return None 102 | if "Timestamp" in row[0]: 103 | return None 104 | print("PARSE-ERROR", row, file=sys.stderr) 105 | if error_exit: 106 | sys.exit(1) 107 | return None 108 | 109 | if __name__ == '__main__': 110 | def check(l, fields): 111 | n = l.split(",") 112 | r = parse_csv_row(n) 113 | assert r is not None 114 | rd = r._asdict() 115 | for a, v in fields.items(): 116 | assert rd[a] == n[v] 117 | 118 | check('1.001131873,S0,Backend_Bound.Memory_Bound,13.3,% Slots <,,,0.0,3.0,,', { 119 | "ts": 0, 120 | "cpu": 1, 121 | "ev": 2, 122 | "val": 3, 123 | "unit": 4 }) 124 | -------------------------------------------------------------------------------- /dummyarith.py: -------------------------------------------------------------------------------- 1 | 2 | # dummy arithmetic type without any errors, for collecting 3 | # the events from the model. Otherwise divisions by zero cause 4 | # early exits 5 | 6 | class DummyArith(object): 7 | def __add__(self, o): 8 | return self 9 | __sub__ = __add__ 10 | __mul__ = __add__ 11 | __div__ = __add__ 12 | __truediv__ = __add__ 13 | __rsub__ = __add__ 14 | __radd__ = __add__ 15 | __rmul__ = __add__ 16 | __rdiv__ = __add__ 17 | __rtruediv__ = __add__ 18 | def __lt__(self, o): 19 | return True 20 | __eq__ = __lt__ 21 | __ne__ = __lt__ 22 | __gt__ = __lt__ 23 | __ge__ = __lt__ 24 | __or__ = __add__ 25 | __and__ = __add__ 26 | __min__ = __add__ 27 | __max__ = __add__ 28 | -------------------------------------------------------------------------------- /event-rmap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # print currently running events on cpu (default 0) 3 | # event-rmap [cpu-num] 4 | # xxx no extra modi for now, racy with multi plexing 5 | from __future__ import print_function 6 | import sys 7 | import msr 8 | import ocperf 9 | from pmudef import (MSR_PEBS_ENABLE, MSR_EVNTSEL, EVENTSEL_ENABLE, EVMASK, 10 | EVENTSEL_CMASK, 11 | EVENTSEL_EDGE, EVENTSEL_ANY, EVENTSEL_INV, EVENTSEL_PC, 12 | MSR_IA32_FIXED_CTR_CTRL) 13 | 14 | fixednames = ( 15 | "inst_retired.any", 16 | "cpu_clk_unhalted.thread", 17 | "cpu_clk_unhalted.ref_tsc" 18 | ) 19 | 20 | cpu = 0 21 | if len(sys.argv) > 1: 22 | cpu = int(sys.argv[1]) 23 | 24 | emap = ocperf.find_emap() 25 | if not emap: 26 | print("Unknown CPU or cannot find CPU event table") 27 | found = 0 28 | try: 29 | pebs_enable = msr.readmsr(MSR_PEBS_ENABLE, cpu) 30 | except OSError: 31 | pebs_enable = 0 32 | for i in range(0, 8): 33 | try: 34 | evsel = msr.readmsr(MSR_EVNTSEL + i, cpu) 35 | except OSError: 36 | break 37 | found += 1 38 | if evsel & EVENTSEL_ENABLE: 39 | print("%d: %016x: " % (i, evsel), end="") 40 | evsel &= EVMASK 41 | if emap is None: 42 | name = "r%04x", evsel & 0xffff 43 | elif evsel in emap.codes: 44 | ev = emap.codes[evsel] 45 | if ev.msr: 46 | try: 47 | extra = msr.readmsr(ev.msr) 48 | except OSError: 49 | print("Cannot read extra MSR %x for %s" % (ev.msr, ev.name)) 50 | continue 51 | for j in emap.codes.keys(): 52 | if j == evsel and extra == emap.codes[j].msrvalue: 53 | print(j.name, "msr:%x" % (extra), end="") 54 | break 55 | else: 56 | print("no exact match for %s, msr %x value %x" % (ev.name, 57 | ev.msr, ev.msrvalue), end="") 58 | else: 59 | print(ev.name, end="") 60 | else: 61 | name = "" 62 | for j in emap.codes.keys(): 63 | if j & 0xff == evsel & 0xff: 64 | name += "%s[%x] " % (emap.codes[j].name, j) 65 | if name: 66 | print("[no exact match] " + name, end=" ") 67 | else: 68 | print("r%x" % (evsel), end=" ") 69 | if evsel & EVENTSEL_CMASK: 70 | print("cmask=%x" % (evsel >> 24), end=" ") 71 | if evsel & EVENTSEL_EDGE: 72 | print("edge=1", end=" ") 73 | if evsel & EVENTSEL_ANY: 74 | print("any=1", end=" ") 75 | if evsel & EVENTSEL_INV: 76 | print("inv=1", end=" ") 77 | if evsel & EVENTSEL_PC: 78 | print("pc=1", end=" ") 79 | if pebs_enable & (1 << i): 80 | print("precise=1", end=" ") 81 | print() 82 | if found == 0: 83 | print("Cannot read any MSRs") 84 | 85 | try: 86 | fixed = msr.readmsr(MSR_IA32_FIXED_CTR_CTRL) 87 | except OSError: 88 | print("Cannot read fixed counter MSR") 89 | fixed = 0 90 | for i in range(0, 2): 91 | if fixed & (1 << (i*4)): 92 | print("fixed %d: %s" % (i, fixednames[i])) 93 | -------------------------------------------------------------------------------- /event-translate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # translate raw events to names 3 | # event-translate rXXX ... 4 | from __future__ import print_function 5 | import re 6 | import sys 7 | import ocperf 8 | from pmudef import EVMASK, EVENTSEL_EVENT, EVENTSEL_UMASK, extra_flags 9 | 10 | emap = ocperf.find_emap() 11 | if not emap: 12 | sys.exit("Unknown CPU or cannot find event table") 13 | for j in sys.argv[1:]: 14 | m = re.match(r'r([0-9a-f]+)(:.*)?', j) 15 | if m: 16 | print(m.group(1)) 17 | evsel = int(m.group(1), 16) 18 | print("%s:" % (j)) 19 | if evsel & EVMASK in emap.codes: 20 | print(emap.codes[evsel & EVMASK].name) 21 | elif (evsel & (EVENTSEL_EVENT|EVENTSEL_UMASK)) in emap.codes: 22 | print(emap.codes[evsel & (EVENTSEL_EVENT|EVENTSEL_UMASK)].name, end='') 23 | for k in extra_flags: 24 | if evsel & k[0]: 25 | m = k[0] 26 | en = evsel 27 | while (m & 1) == 0: 28 | m >>= 1 29 | en >>= 1 30 | print("%s=%d" % (k[1], en & m),end='') 31 | print() 32 | else: 33 | print("cannot find", m.group(1)) 34 | else: 35 | # XXX implement offcore new style events 36 | print("cannot parse", j) 37 | -------------------------------------------------------------------------------- /event_download: -------------------------------------------------------------------------------- 1 | event_download.py -------------------------------------------------------------------------------- /fake-perf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # mock perf for limited test environments 3 | from __future__ import print_function 4 | import sys 5 | 6 | out = sys.stderr 7 | av = sys.argv 8 | if av[-1] == "true": 9 | sys.exit(0) 10 | j = 1 11 | process = True 12 | while j < len(sys.argv): 13 | if av[j] == "--version": 14 | print("perf version 5.6.8", end='') 15 | break 16 | elif av[j] == "-o" and process: 17 | j += 1 18 | out = open(av[j], "w") 19 | elif av[j] == "--": 20 | process = False 21 | j += 1 22 | out.write("\n") 23 | -------------------------------------------------------------------------------- /frequency.py: -------------------------------------------------------------------------------- 1 | nominal_freq = 1.0 2 | 3 | class Frequency: 4 | name = "Frequency" 5 | desc = " Frequency ratio" 6 | subplot = "Frequency" 7 | domain = "CoreMetric" 8 | 9 | def compute(self, EV): 10 | try: 11 | self.val = (EV("cycles", 1) / EV("CPU_CLK_UNHALTED.REF_TSC", 1)) * nominal_freq 12 | except ZeroDivisionError: 13 | self.val = 0 14 | 15 | class SetupCPU: 16 | def __init__(self, r, cpu): 17 | global nominal_freq 18 | if cpu.freq: 19 | nominal_freq = cpu.freq 20 | r.force_metric(Frequency()) 21 | -------------------------------------------------------------------------------- /gen-dot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # generate dot diagram of top down tree from module 3 | from __future__ import print_function 4 | import sys 5 | 6 | max_level = 5 7 | first = 1 8 | if sys.argv[1:] and sys.argv[1][:2] == "-l": 9 | max_level = int(sys.argv[1][2:]) 10 | first += 1 11 | if len(sys.argv) > first and sys.argv[first] == "simple": 12 | import simple_ratios 13 | m = simple_ratios 14 | else: 15 | import adl_glc_ratios 16 | m = adl_glc_ratios 17 | 18 | def has(obj, name): 19 | return name in obj.__class__.__dict__ 20 | 21 | class Runner: 22 | def __init__(self): 23 | self.olist = [] 24 | 25 | def run(self, n): 26 | if n.level <= max_level: 27 | self.olist.append(n) 28 | 29 | def metric(self, n): 30 | pass 31 | 32 | def finish(self): 33 | for n in self.olist: 34 | if n.level == 1: 35 | print('"%s";' % (n.name)) 36 | elif n.parent: 37 | print('"%s" -> "%s";' % (n.parent.name, n.name)) 38 | #if n.sibling: 39 | # print('"%s" -> "%s";' % (n.name, n.sibling.name)) 40 | 41 | runner = Runner() 42 | m.Setup(runner) 43 | print(runner.olist, file=sys.stderr) 44 | print("digraph {") 45 | print("fontname=\"Courier\";") 46 | runner.finish() 47 | print("}") 48 | -------------------------------------------------------------------------------- /gen_level.py: -------------------------------------------------------------------------------- 1 | # generate levels for events from the model 2 | # utility module for other tools 3 | l1 = set(("Frontend_Bound", "Backend_Bound", "Retiring", "Bad_Speculation")) 4 | 5 | def get_level(name): 6 | is_node = name in l1 or "." in name 7 | level = name.count(".") + 1 8 | if is_node: 9 | return level 10 | return 0 11 | 12 | def is_metric(name): 13 | return get_level(name) == 0 14 | 15 | def level_name(name): 16 | if name.count(".") > 0: 17 | f = name.split(".")[:-1] 18 | n = ".".join(f) 19 | elif is_metric(name): 20 | return "CPU-METRIC" # XXX split 21 | else: 22 | n = "TopLevel" 23 | n = n.replace(" ", "_") 24 | return n 25 | -------------------------------------------------------------------------------- /genretlat: -------------------------------------------------------------------------------- 1 | genretlat.py -------------------------------------------------------------------------------- /interval-merge: -------------------------------------------------------------------------------- 1 | interval-merge.py -------------------------------------------------------------------------------- /interval-merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # merge multiple --perf-output files. requires header 3 | from __future__ import print_function 4 | import csv 5 | import argparse 6 | from collections import OrderedDict, Counter 7 | import sys 8 | 9 | ap = argparse.ArgumentParser() 10 | ap.add_argument('csvfiles', nargs='+', type=argparse.FileType('r')) 11 | args = ap.parse_args() 12 | 13 | def genkey(c, hdr, count): 14 | k = [count] 15 | if 'Timestamp' in hdr: 16 | k.append(c[hdr['Timestamp']]) 17 | if 'Location' in hdr: 18 | k.append(c[hdr['Location']]) 19 | k.append(c[hdr['Event']]) 20 | return tuple(k) 21 | 22 | d = OrderedDict() 23 | hdr = None 24 | hdrl = None 25 | prev = Counter() 26 | for fh in args.csvfiles: 27 | csvf = csv.reader(fh, delimiter=';') 28 | for c in csvf: 29 | if hdr is None: 30 | hdrl = c 31 | hdr = dict([(y,x) for x,y in enumerate(c)]) 32 | continue 33 | if c[0] in ("Timestamp", "Location", "Value"): 34 | continue 35 | pkey = (fh, c[hdr['Timestamp']] if 'Timestamp' in hdr else None, c[hdr['Event']]) 36 | prev[pkey] += 1 37 | key = genkey(c, hdr, prev[pkey]) 38 | try: 39 | if key in d: 40 | o = d[key] 41 | o[hdr['Run-Time']] += float(c[hdr['Run-Time']]) 42 | o[hdr['Enabled']] = (float(o[hdr['Enabled']]) + o[hdr['Enabled']]) / 2 43 | o[hdr['Value']] += float(c[hdr['Value']]) 44 | else: 45 | d[key] = c 46 | o = d[key] 47 | o[hdr['Value']] = float(c[hdr['Value']]) 48 | o[hdr['Enabled']] = float(c[hdr['Enabled']]) 49 | o[hdr['Run-Time']] = float(c[hdr['Run-Time']]) 50 | except ValueError as e: 51 | print("cannot parse", c, e, file=sys.stderr) 52 | 53 | csvf = csv.writer(sys.stdout, delimiter=';') 54 | csvf.writerow(hdrl) 55 | for j in d.values(): 56 | csvf.writerow(j) 57 | -------------------------------------------------------------------------------- /interval-normalize: -------------------------------------------------------------------------------- 1 | interval-normalize.py -------------------------------------------------------------------------------- /interval-normalize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # convert perf stat -Ixxx -x, / toplev -Ixxx -x, output to normalized output 3 | # this version buffers all data in memory, so it can use a lot of memory. 4 | # t1,ev1,num1 5 | # t1,ev2,num1 6 | # t2,ev1,num3 7 | # -> 8 | # timestamp,ev1,ev2 9 | # t1,num1,num2 10 | # t2,num3,, 11 | # when the input has CPU generate separate lines for each CPU (may need post filtering) 12 | from __future__ import print_function 13 | import sys 14 | import csv 15 | import argparse 16 | import collections 17 | import csv_formats 18 | 19 | ap = argparse.ArgumentParser(description= 20 | 'Normalize CSV data from perf or toplev. All values are printed on a single line.') 21 | ap.add_argument('inputfile', type=argparse.FileType('r'), default=sys.stdin, nargs='?') 22 | ap.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, nargs='?') 23 | ap.add_argument('--cpu', nargs='?', help='Only output for this cpu') 24 | ap.add_argument('--na', nargs='?', help='Value to use if data is not available', default="") 25 | ap.add_argument('--error-exit', action='store_true', help='Force error exit on parse error') 26 | ap.add_argument('--normalize-cpu', action='store_true', help='Normalize CPUs into unique columns too') 27 | args = ap.parse_args() 28 | 29 | printed_header = False 30 | timestamp = None 31 | 32 | events = collections.OrderedDict() 33 | out = [] 34 | times = [] 35 | cpus = [] 36 | rc = csv.reader(args.inputfile) 37 | res = [] 38 | writer = csv.writer(args.output, lineterminator='\n') 39 | lastcpu = None 40 | cpu = None 41 | lineno = 1 42 | for row in rc: 43 | if len(row) > 0 and (row[0] == "Timestamp" or row[0].startswith("#")): 44 | lineno += 1 45 | continue 46 | r = csv_formats.parse_csv_row(row, error_exit=args.error_exit) 47 | if r is None: 48 | print("at line %d" % lineno, file=sys.stderr) 49 | lineno += 1 50 | continue 51 | ts, cpu, ev, val = r.ts, r.cpu, r.ev, r.val 52 | 53 | if ts != timestamp or (cpu != lastcpu and not args.normalize_cpu): 54 | if timestamp: 55 | if args.cpu and cpu != args.cpu: 56 | continue 57 | # delay in case we didn't see all headers 58 | # only need to do that for toplev, directly output for perf? 59 | # could limit buffering to save memory? 60 | out.append(res) 61 | times.append(timestamp) 62 | cpus.append(cpu) 63 | res = [] 64 | timestamp = ts 65 | lastcpu = cpu 66 | 67 | if cpu is not None and args.normalize_cpu: 68 | ev = cpu + " " + ev 69 | 70 | # use a list for row storage to keep memory requirements down 71 | if ev not in events: 72 | events[ev] = len(res) 73 | ind = events[ev] 74 | if ind >= len(res): 75 | res += [None] * ((ind + 1) - len(res)) 76 | res[ind] = val 77 | lineno += 1 78 | if res and not (args.cpu and cpu != args.cpu): 79 | out.append(res) 80 | times.append(timestamp) 81 | cpus.append(cpu) 82 | 83 | def resolve(row, ind): 84 | if ind >= len(row): 85 | return args.na 86 | v = row[ind] 87 | if v is None: 88 | return args.na 89 | return v 90 | 91 | def cpulist(): 92 | if args.normalize_cpu: 93 | return [] 94 | if cpu is not None: 95 | return ["CPU"] 96 | return [] 97 | 98 | keys = events.keys() 99 | writer.writerow(["Timestamp"] + cpulist() + list(keys)) 100 | for row, ts, cpunum in zip(out, times, cpus): 101 | writer.writerow([ts] + 102 | ([cpunum] if (cpu is not None and not args.normalize_cpu) else []) + 103 | ([resolve(row, events[x]) for x in keys])) 104 | -------------------------------------------------------------------------------- /interval-plot: -------------------------------------------------------------------------------- 1 | interval-plot.py -------------------------------------------------------------------------------- /jevents/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY = all clean-examples all-examples install clean html man 2 | PREFIX=$(DESTDIR)/usr/local 3 | LIB=$(PREFIX)/lib64 4 | BIN=$(PREFIX)/bin 5 | INCLUDE=$(PREFIX)/include 6 | CFLAGS := -g -fPIC -Wall -O2 -Wno-unused-result 7 | OBJ := json.o jsmn.o jevents.o resolve.o cache.o cpustr.o rawevent.o \ 8 | perf-iter.o interrupts.o rdpmc.o measure.o perf_event_open.o \ 9 | session.o perf-aux.o csv.o print.o 10 | KDOC = /usr/src/linux/scripts/kernel-doc 11 | 12 | all: libjevents.a showevent listevents event-rmap all-examples 13 | 14 | clean-examples: 15 | make -C examples clean 16 | 17 | all-examples: libjevents.a 18 | make -C examples 19 | 20 | install: libjevents.a listevents showevent event-rmap 21 | install -d ${BIN} 22 | install -d ${LIB} 23 | install -d ${INCLUDE} 24 | install -m 755 listevents showevent event-rmap ${BIN} 25 | install -m 644 libjevents.a ${LIB} 26 | install -m 644 rdpmc.h jevents.h measure.h perf-iter.h jsession.h perf-record.h ${INCLUDE} 27 | # xxx install man page 28 | 29 | libjevents.a: ${OBJ} 30 | rm -f libjevents.a 31 | ar q libjevents.a $^ 32 | ranlib libjevents.a 33 | 34 | clean: clean-examples 35 | rm -f ${OBJ} libjevents.a resolve showevent listfiles jevents.html rmap event-rmap.o event-rmap \ 36 | listevents resolve-test showevent.o listevents.o 37 | 38 | resolve: resolve.c 39 | $(CC) $(CFLAGS) -DTEST=1 -o $@ $^ 40 | 41 | showevent: showevent.o libjevents.a 42 | 43 | listevents: listevents.o libjevents.a 44 | 45 | event-rmap: event-rmap.o libjevents.a 46 | 47 | DOCFILES := cache.c jevents.c cpustr.c rawevent.c interrupts.c measure.c rdpmc.c \ 48 | session.c perf-aux.c csv.c print.o jsession.h 49 | 50 | html: jevents.html 51 | 52 | man: jeventstmp.man 53 | perl -ne 's/Kernel Hacker.s Manual/jevents/; open(F,">" . $$1 . ".man") if /^\.TH "(.*?)"/; print F $$_' jeventstmp.man 54 | 55 | jeventstmp.man: $(DOCFILES) 56 | ${KDOC} -man ${DOCFILES} > $@ 57 | 58 | jevents.html: $(DOCFILES) 59 | ${KDOC} -html ${DOCFILES} > $@ 60 | 61 | coverage: 62 | ${MAKE} CFLAGS="-g --coverage" LDFLAGS="-g --coverage" 63 | -------------------------------------------------------------------------------- /jevents/README.md: -------------------------------------------------------------------------------- 1 | # jevents 2 | 3 | jevents is a C library to use from C programs to make access to the kernel Linux perf interface easier. 4 | It also includes some examples to use the library. 5 | 6 | ## Features 7 | 8 | * Resolving symbolic event names using downloaded event files 9 | * Reading performance counters from ring 3 in C programs, 10 | * Handling the perf ring buffer (for example to read memory addresses) 11 | 12 | For more details see the [API reference](http://halobates.de/jevents.html) 13 | 14 | ## Building 15 | 16 | cd jevents 17 | make 18 | sudo make install 19 | 20 | ## Downloading event lists 21 | 22 | Before using event lists they need to be downloaded. Use the pmu-tools 23 | event_download.py script for this. 24 | 25 | % event_download.py 26 | 27 | ## Examples 28 | 29 | * listevents: List all named perf and JSON events 30 | * showevent: Convert JSON name or perf alias to perf format and test with perf 31 | * event-rmap: Map low level perf event to named high-level event 32 | * addr: Profile a loadable test kernel with address profiling 33 | * jstat: Simple perf stat like tool with JSON event resolution. 34 | 35 | ## Initialization/Multithreading 36 | 37 | Functions accessing the JSON event data load the JSON file lazily when first 38 | used. This might result in data races when multiple threads call jevent 39 | functions. In such cases the event list can be loaded from the main thread by 40 | `read_events(NULL);`. 41 | 42 | ## self profiling 43 | 44 | Reading performance counters directly in the program without entering 45 | the kernel. 46 | 47 | This is very simplified, for a real benchmark you almost certainly 48 | want some warmup, multiple iterations, possibly context switch 49 | filtering and some filler code to avoid cache effects. 50 | 51 | ```C 52 | #include "rdpmc.h" 53 | 54 | struct rdpmc_ctx ctx; 55 | unsigned long long start, end; 56 | 57 | if (rdpmc_open(PERF_COUNT_HW_CPU_CYCLES, &ctx) < 0) ... error ... 58 | start = rdpmc_read(&ctx); 59 | ... your workload ... 60 | end = rdpmc_read(&ctx); 61 | ``` 62 | 63 | /sys/devices/cpu/rdpmc must be 1. 64 | 65 | http://halobates.de/modern-pmus-yokohama.pdf provides some 66 | additional general information on cycle counting. The techniques used 67 | with simple-pmu described there can be used with jevents too. 68 | 69 | ## Resolving named events 70 | 71 | Resolving named events to a perf event and set up reading from the perf ring buffer. 72 | 73 | First run event_download.py to download a current event list for your CPU. 74 | 75 | ```C 76 | #include "jevents.h" 77 | #include "rdpmc.h" 78 | #include 79 | 80 | struct perf_event_attr attr; 81 | if (resolve_event("cpu_clk_thread_unhalted.ref_xclk", &attr) < 0) { 82 | ... error ... 83 | } 84 | 85 | /* You can change attr, see the perf_event_open man page for details */ 86 | 87 | struct rdpmc_ctx ctx; 88 | if (rdpmc_open_attr(PERF_COUNT_HW_CPU_CYCLES, &ctx, &attr) < 0) 89 | ... error ... 90 | 91 | 92 | ''' 93 | 94 | Or alternatively use the resolve attr for sampling, set up the sampling attributes in attr, and use perf_fd_open / perf_iter_*. See examples/addr.c 95 | -------------------------------------------------------------------------------- /jevents/cpustr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Intel Corporation 3 | * Author: Andi Kleen 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 21 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 | * OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #define _GNU_SOURCE 1 31 | #include 32 | #include 33 | #include 34 | #include "jevents.h" 35 | 36 | /** 37 | * get_cpu_str - Return string describing the current CPU or NULL. 38 | * Needs to be freed by caller. 39 | * 40 | * Used to store JSON event lists in the cache directory. 41 | */ 42 | char *get_cpu_str(void) 43 | { 44 | return get_cpu_str_type("-core", NULL); 45 | } 46 | 47 | /** 48 | * get_cpu_str - Return string describing the current CPU for type or NULL. 49 | * @type: "-core" or "-uncore" 50 | * @idstr_step: if non NULL write idstr with stepping to pointer. 51 | * Both result and idstr_step (if non NULL) need to be freed by 52 | * caller. 53 | */ 54 | char *get_cpu_str_type(char *type, char **idstr_step) 55 | { 56 | char *res; 57 | union { 58 | struct { 59 | unsigned b, c, d; 60 | } f; 61 | char str[13]; 62 | } vendor; 63 | unsigned a, b, c, d; 64 | unsigned stepping, family, model; 65 | int n; 66 | 67 | vendor.str[12] = 0; 68 | __cpuid(0, a, vendor.f.b, vendor.f.d, vendor.f.c); 69 | __cpuid(1, a, b, c, d); 70 | stepping = a & 0xf; 71 | model = (a >> 4) & 0xf; 72 | family = (a >> 8) & 0xf; 73 | if (family == 0xf) 74 | family += (a >> 20) & 0xff; 75 | if (family == 6 || family == 0xf) 76 | model += ((a >> 16) & 0xf) << 4; 77 | if (idstr_step) 78 | asprintf(idstr_step, "%s-%d-%X-%X%s", vendor.str, family, 79 | model, stepping, type); 80 | n = asprintf(&res, "%s-%d-%X%s", vendor.str, family, model, type); 81 | if (n < 0) 82 | res = NULL; 83 | return res; 84 | } 85 | -------------------------------------------------------------------------------- /jevents/csv.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: BSD-3-Clause 2 | // Copyright 2021 Intel Corporation. 3 | // Author: Andi Kleen 4 | #include 5 | #include "jevents.h" 6 | #include "jsession.h" 7 | 8 | /** 9 | * session_print_csv - Print event list values to CSV file in perf stat format 10 | * @outfh: File descriptor to print to. 11 | * @el: Event list to print. It must have been measured before. 12 | * @arg: Arguments. sep can be set there, as well as prefix. 13 | */ 14 | 15 | void session_print_csv(FILE *outfh, struct eventlist *el, struct session_print *arg) 16 | { 17 | struct event *e; 18 | int i; 19 | char *sep = arg->sep ? arg->sep : ";"; 20 | 21 | for (e = el->eventlist; e; e = e->next) { 22 | uint64_t v; 23 | for (i = 0; i < el->num_cpus; i++) { 24 | if (e->efd[i].fd < 0) 25 | continue; 26 | if (arg->merge && e->orig) 27 | continue; 28 | v = event_scaled_value(e, i); 29 | fprintf(outfh, "%s%3d%s%s%s%lu%s%lu%s%lu\n", 30 | arg->prefix ? arg->prefix : "", 31 | i, sep, 32 | e->extra.name ? e->extra.name : e->event, sep, 33 | v, sep, 34 | e->efd[i].val[1], sep, 35 | e->efd[i].val[2]); 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /jevents/event-rmap.c: -------------------------------------------------------------------------------- 1 | #include "jevents.h" 2 | #include 3 | #include 4 | 5 | int main(int ac, char **av) 6 | { 7 | while (*++av) { 8 | unsigned event = strtoul(*av, NULL, 0); 9 | char *name, *desc; 10 | if (rmap_event(event, &name, &desc) == 0) 11 | printf("%x: %s : %s\n", event, name, desc); 12 | else 13 | printf("%x not found\n", event); 14 | } 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /jevents/examples/Makefile: -------------------------------------------------------------------------------- 1 | # build jevents first 2 | CFLAGS := -g -Wall -O2 -Wno-unused-result 3 | CXXFLAGS := -g -Wall -O2 -fPIC 4 | override CFLAGS += -I .. 5 | override LDFLAGS += -L .. 6 | LDLIBS = -ljevents 7 | 8 | all: addr rtest rtest2 rtest3 jestat ptself 9 | 10 | # no deps on the includes 11 | 12 | ADDR_OBJ := addr.o hist.o cpu.o 13 | 14 | addr: ${ADDR_OBJ} ../libjevents.a 15 | 16 | addr: LDLIBS += -lstdc++ -ldl 17 | 18 | rtest2: LDLIBS += -lm 19 | 20 | rtest: rtest.o ../libjevents.a 21 | 22 | rtest2: rtest2.o ../libjevents.a 23 | 24 | rtest3: rtest3.o ../libjevents.a 25 | 26 | jestat: jestat.o ../libjevents.a 27 | 28 | clean: 29 | rm -f addr ${ADDR_OBJ} jestat jestat.o 30 | rm -f rtest3 rtest3.o rtest2 rtest2.o rtest rtest.o 31 | -------------------------------------------------------------------------------- /jevents/examples/cpu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 Intel Corporation 3 | * Author: Andi Kleen 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that: (1) source code distributions 7 | * retain the above copyright notice and this paragraph in its entirety, (2) 8 | * distributions including binary code include the above copyright notice and 9 | * this paragraph in its entirety in the documentation or other materials 10 | * provided with the distribution 11 | * 12 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 13 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "cpu.h" 24 | #include "jevents.h" 25 | 26 | /** 27 | * mem_stores_event - Return precise mem load event for current CPU. 28 | * This is an event which supports load address monitoring. 29 | * Return: raw event, can be put int perf_event_attr->config. 30 | * -1 or error. 31 | */ 32 | 33 | unsigned mem_loads_event(void) 34 | { 35 | struct perf_event_attr attr; 36 | 37 | if (!resolve_event("MEM_INST_RETIRED.LOAD_LATENCY_ABOVE_THRESHOLD_0", &attr) || 38 | !resolve_event("MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", &attr)) 39 | return attr.config; 40 | return -1; 41 | } 42 | 43 | /** 44 | * mem_stores_event - Return precise mem stores event for current CPU. 45 | * This is an event which supports load address monitoring. 46 | * Return: raw event, can be put int perf_event_attr->config. 47 | * -1 or error. 48 | */ 49 | unsigned mem_stores_event(void) 50 | { 51 | struct perf_event_attr attr; 52 | 53 | if (!resolve_event("MEM_INST_RETIRED.ALL_STORES", &attr) || 54 | !resolve_event("MEM_UOPS_RETIRED.ALL_STORES", &attr)) 55 | return attr.config; 56 | return -1; 57 | } 58 | -------------------------------------------------------------------------------- /jevents/examples/cpu.h: -------------------------------------------------------------------------------- 1 | unsigned mem_loads_event(void); 2 | unsigned mem_stores_event(void); 3 | -------------------------------------------------------------------------------- /jevents/examples/hist.cc: -------------------------------------------------------------------------------- 1 | // STL based histogram 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "hist.h" 7 | 8 | using namespace std; 9 | 10 | extern "C" { 11 | 12 | typedef map hist_type; 13 | 14 | struct hist { 15 | hist_type hist; 16 | uint64_t total; 17 | }; 18 | 19 | hist *init_hist() 20 | { 21 | struct hist *h = new hist; 22 | h->total = 0; 23 | return h; 24 | } 25 | 26 | void hist_add(hist *h, uint64_t val) 27 | { 28 | h->hist[val]++; 29 | h->total++; 30 | } 31 | 32 | void hist_print(hist *h, double min_percent) 33 | { 34 | unsigned long long below_thresh = 0; 35 | typedef pair val_pair; 36 | priority_queue q; 37 | 38 | for (hist_type::iterator it = h->hist.begin(); it != h->hist.end(); it++) { 39 | double percent = (double)(it->second) / (double)h->total; 40 | if (percent >= min_percent) { 41 | val_pair p(it->second, it->first); 42 | q.push(p); 43 | } else 44 | below_thresh += it->second; 45 | } 46 | printf("%11s %16s %16s\n", "PERCENT", "ADDR", "SAMPLES"); 47 | while (!q.empty()) { 48 | val_pair p = q.top(); 49 | printf("%10.2f%% %16llx %16llu\n", 50 | (p.first / (double)h->total) * 100.0, 51 | (unsigned long long)p.second, 52 | (unsigned long long)p.first); 53 | q.pop(); 54 | } 55 | printf("%llu below threshold\n", below_thresh); 56 | } 57 | 58 | void free_hist(hist *h) 59 | { 60 | delete h; 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /jevents/examples/hist.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" { 4 | #endif 5 | 6 | #include 7 | 8 | struct hist; 9 | 10 | struct hist *init_hist(void); 11 | void hist_add(struct hist *h, uint64_t); 12 | void hist_print(struct hist *h, double min_percent); 13 | void free_hist(struct hist *); 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | -------------------------------------------------------------------------------- /jevents/examples/rtest.c: -------------------------------------------------------------------------------- 1 | /* Demonstrate self profiling for context switches */ 2 | #include 3 | #include 4 | #include 5 | #include "rdpmc.h" 6 | 7 | #define HW_INTERRUPTS 0x1cb 8 | 9 | typedef unsigned long long u64; 10 | 11 | u64 get_time(void) 12 | { 13 | struct timeval tv; 14 | gettimeofday(&tv, NULL); 15 | return (u64)tv.tv_sec * 1000000 + tv.tv_usec; 16 | } 17 | 18 | int main(int ac, char **av) 19 | { 20 | int i; 21 | int cswitch = 0; 22 | struct rdpmc_ctx ctx; 23 | int iter = 10000; 24 | 25 | if (av[1]) 26 | iter = atoi(av[1]); 27 | 28 | if (rdpmc_open(HW_INTERRUPTS, &ctx) < 0) 29 | exit(1); 30 | 31 | u64 t0 = get_time(); 32 | u64 prev = rdpmc_read(&ctx); 33 | for (i = 0; i < iter; i++) { 34 | u64 n = rdpmc_read(&ctx); 35 | if (n != prev) { 36 | cswitch++; 37 | prev = n; 38 | } 39 | } 40 | 41 | u64 t1 = get_time(); 42 | 43 | printf("%d interrupts, %llu usec duration\n", cswitch, t1-t0); 44 | 45 | rdpmc_close(&ctx); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /jevents/examples/rtest2.c: -------------------------------------------------------------------------------- 1 | /* Measure a thousand sins */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "interrupts.h" 7 | #include "rdpmc.h" 8 | 9 | /* Requires a Intel Sandy or Ivy Bridge CPU for the interrupt test, 10 | On others it may loop forever, unless you disable the interrupt test. 11 | This is not a realistic test of real performance because it's too 12 | predictable for cache and branch predictors, 13 | see http://halobates.de/blog/p/227 */ 14 | 15 | #define ITER 1000 16 | typedef unsigned long long u64; 17 | 18 | volatile double var = 10.0; 19 | volatile double var2; 20 | 21 | int main(void) 22 | { 23 | struct rdpmc_ctx ctx; 24 | int warmup = 0; 25 | 26 | if (rdpmc_open(PERF_COUNT_HW_CPU_CYCLES, &ctx) < 0) 27 | exit(1); 28 | interrupts_init(); 29 | for (;;) { 30 | int i; 31 | u64 start_int; 32 | u64 a, b; 33 | 34 | start_int = get_interrupts(); 35 | a = rdpmc_read(&ctx); 36 | for (i = 0; i < ITER; i++) 37 | var2 += sin(var); 38 | b = rdpmc_read(&ctx); 39 | if (get_interrupts() == start_int && warmup > 0) { 40 | printf("%u sin() took %llu cycles avg\n", ITER, (b-a)/ITER); 41 | break; 42 | } 43 | warmup++; 44 | } 45 | interrupts_exit(); 46 | rdpmc_close(&ctx); 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /jevents/examples/rtest3.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "rdpmc.h" 7 | 8 | typedef unsigned long long u64; 9 | typedef long long s64; 10 | 11 | u64 get_time(void) 12 | { 13 | struct timeval tv; 14 | gettimeofday(&tv, NULL); 15 | return (u64)tv.tv_sec * 1000000 + tv.tv_usec; 16 | } 17 | 18 | volatile int interrupted; 19 | 20 | void stop(int sig) 21 | { 22 | interrupted = 1; 23 | } 24 | 25 | int main(int ac, char **av) 26 | { 27 | int i; 28 | struct rdpmc_ctx ctx; 29 | int thresh = 10000; 30 | 31 | if (av[1]) 32 | thresh = atoi(av[1]); 33 | 34 | if (rdpmc_open(0, &ctx) < 0) 35 | exit(1); 36 | 37 | signal(SIGINT, stop); 38 | 39 | printf("Press Ctrl-C to stop\n"); 40 | 41 | u64 prev = rdpmc_read(&ctx); 42 | 43 | i = 0; 44 | while (!interrupted) { 45 | u64 next = rdpmc_read(&ctx); 46 | s64 delta = next - prev; 47 | 48 | if (delta > thresh) 49 | printf("%d: %lld\n", i, delta); 50 | 51 | prev = next; 52 | i++; 53 | } 54 | 55 | rdpmc_close(&ctx); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /jevents/interrupts.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012,2013 Intel Corporation 3 | * Author: Andi Kleen 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that: (1) source code distributions 7 | * retain the above copyright notice and this paragraph in its entirety, (2) 8 | * distributions including binary code include the above copyright notice and 9 | * this paragraph in its entirety in the documentation or other materials 10 | * provided with the distribution 11 | * 12 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 13 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | */ 16 | 17 | /** DOC: Account for interrupts on Intel Core/Xeon systems 18 | * 19 | * This is useful for micro benchmarks to filter out measurement 20 | * samples that are disturbed by a context switch caused by OS 21 | * noise. 22 | * 23 | * Requires a Linux 3.3+ kernel 24 | */ 25 | #include "rdpmc.h" 26 | #include "interrupts.h" 27 | 28 | /* Intel Sandy Bridge */ 29 | #define HW_INTERRUPTS 0x1cb 30 | 31 | static __thread int int_ok = -1; 32 | static __thread struct rdpmc_ctx int_ctx; 33 | 34 | /** 35 | * interrupts_init - Initialize interrupt counter per thread 36 | * 37 | * Must be called for each application thread. 38 | */ 39 | void interrupts_init(void) 40 | { 41 | int_ok = rdpmc_open(HW_INTERRUPTS, &int_ctx); 42 | } 43 | 44 | /** 45 | * interrupts_exit - Free interrupt counter per thread. 46 | * 47 | * Must be called for each application thread. 48 | */ 49 | void interrupts_exit(void) 50 | { 51 | if (int_ok >= 0) 52 | rdpmc_close(&int_ctx); 53 | } 54 | 55 | /** 56 | * get_interrupts - get current interrupt counter. 57 | * 58 | * Get the current hardware interrupt count. When the number changed 59 | * for a measurement period you had some sort of context switch. 60 | * The sample for this period should be discarded. 61 | * This returns absolute numbers. 62 | */ 63 | unsigned long long get_interrupts(void) 64 | { 65 | if (int_ok >= 0) 66 | return rdpmc_read(&int_ctx); 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /jevents/interrupts.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2012,2013 Intel Corporation 4 | * Author: Andi Kleen 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that: (1) source code distributions 8 | * retain the above copyright notice and this paragraph in its entirety, (2) 9 | * distributions including binary code include the above copyright notice and 10 | * this paragraph in its entirety in the documentation or other materials 11 | * provided with the distribution 12 | * 13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 14 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 15 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 | */ 17 | 18 | #ifndef INTERRUPTS_H 19 | #define INTERRUPTS_H 1 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | void interrupts_init(void); 26 | void interrupts_exit(void); 27 | unsigned long long get_interrupts(void); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /jevents/jevents.h: -------------------------------------------------------------------------------- 1 | #ifndef JEVENTS_H 2 | #define JEVENTS_H 1 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | int json_events(const char *fn, 14 | int (*func)(void *data, char *name, char *event, char *desc, 15 | char *pmu), 16 | void *data); 17 | char *get_cpu_str(void); 18 | char *get_cpu_str_type(char *type, char **idstr_step); 19 | 20 | struct perf_event_attr; 21 | 22 | struct jevent_extra { 23 | char *name; /* output name */ 24 | char *decoded; /* decoded name */ 25 | bool multi_pmu; /* needs multiple pmus */ 26 | glob_t pmus; /* glob_t with all pmus */ 27 | int next_pmu; /* next pmu number */ 28 | }; 29 | 30 | void jevent_free_extra(struct jevent_extra *extra); 31 | void jevent_copy_extra(struct jevent_extra *dst, struct jevent_extra *src); 32 | int jevent_next_pmu(struct jevent_extra *extra, struct perf_event_attr *attr); 33 | int jevent_name_to_attr(const char *str, struct perf_event_attr *attr); 34 | int jevent_name_to_attr_extra(const char *str, struct perf_event_attr *attr, 35 | struct jevent_extra *extra); 36 | char *jevent_pmu_name(struct jevent_extra *extra, int num, int *next_num); 37 | int resolve_event(const char *name, struct perf_event_attr *attr); 38 | int resolve_event_extra(const char *name, struct perf_event_attr *attr, 39 | struct jevent_extra *extra); 40 | int read_events(const char *fn); 41 | int jevents_update_qual(const char *qual, struct perf_event_attr *attr, 42 | const char *str); 43 | int walk_events(int (*func)(void *data, char *name, char *event, char *desc), 44 | void *data); 45 | int walk_perf_events(int (*func)(void *data, char *name, char *event, char *desc), 46 | void *data); 47 | char *format_raw_event(struct perf_event_attr *attr, char *name); 48 | int rmap_event(unsigned event, char **name, char **desc); 49 | 50 | int perf_event_open(struct perf_event_attr *attr, pid_t pid, 51 | int cpu, int group_fd, unsigned long flags); 52 | char *resolve_pmu(int type); 53 | bool jevent_pmu_uncore(const char *str); 54 | int jevents_socket_cpus(int *lenp, int **socket_cpus); 55 | void jevent_print_attr(FILE *f, struct perf_event_attr *attr); 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /jevents/jsession.h: -------------------------------------------------------------------------------- 1 | #ifndef JSESSION_H 2 | #define JSESSION_H 1 3 | 4 | #include 5 | #include 6 | #include 7 | #include "jevents.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | struct event { 14 | struct event *next; 15 | struct perf_event_attr attr; 16 | char *event; 17 | bool end_group, group_leader, ingroup; 18 | bool uncore; 19 | struct event *orig; /* Original event if cloned */ 20 | int num_clones; /* number of clones for this event */ 21 | struct jevent_extra extra; 22 | struct efd { 23 | int fd; 24 | uint64_t val[3]; 25 | } efd[0]; /* num_cpus */ 26 | }; 27 | 28 | struct eventlist { 29 | struct event *eventlist; 30 | struct event *eventlist_last; 31 | int num_cpus; 32 | int num_sockets; 33 | int *socket_cpus; 34 | }; 35 | 36 | int parse_events(struct eventlist *el, char *events); 37 | int setup_events(struct eventlist *el, bool measure_all, int measure_pid); 38 | int setup_events_cpumask(struct eventlist *el, int measure_pid, 39 | char *cpumask, int flags); 40 | int setup_event(struct event *e, int cpu, struct event *leader, bool measure_all, 41 | int measure_pid); 42 | int setup_event_flags(struct event *e, int cpu, struct event *leader, int measure_pid, 43 | int flags); 44 | #define SE_ENABLE_ON_EXEC (1 << 0) 45 | #define SE_MEASURE_ALL (1 << 1) 46 | 47 | int read_event(struct event *e, int cpu); 48 | int read_all_events(struct eventlist *el); 49 | struct eventlist *alloc_eventlist(void); 50 | uint64_t event_scaled_value(struct event *e, int cpu); 51 | uint64_t event_scaled_value_sum(struct event *e, int cpu); 52 | void free_eventlist(struct eventlist *el); 53 | void print_event_list_attr(struct eventlist *el, FILE *f); 54 | 55 | /** 56 | * struct session_print - Arguments for printing eventlists 57 | * @size: size of session_print or 0 (for compatibility) 58 | * @sep: separator string. Only used for CSV mode. Or NULL. Default ; 59 | * @prefix: String prefix to print before output (e.g. timestamp). 60 | * Needs to include separators. Or NULL. 61 | * @merge: Merge identical events 62 | */ 63 | struct session_print { 64 | int size; /* 0 or size for binary compatibility */ 65 | char *sep; 66 | char *prefix; 67 | bool merge; 68 | }; 69 | 70 | void session_print_csv(FILE *outfh, struct eventlist *el, struct session_print *arg); 71 | void session_print_aggr(FILE *outfh, struct eventlist *el, struct session_print *arg); 72 | void session_print(FILE *outfh, struct eventlist *el, struct session_print *arg); 73 | void session_print_timestamp(char *buf, int bufs, double ts); 74 | #define SESSION_TIMESTAMP_LEN 30 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /jevents/jsmn.h: -------------------------------------------------------------------------------- 1 | #ifndef __JSMN_H_ 2 | #define __JSMN_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /* 9 | * JSON type identifier. Basic types are: 10 | * o Object 11 | * o Array 12 | * o String 13 | * o Other primitive: number, boolean (true/false) or null 14 | */ 15 | typedef enum { 16 | JSMN_PRIMITIVE = 0, 17 | JSMN_OBJECT = 1, 18 | JSMN_ARRAY = 2, 19 | JSMN_STRING = 3 20 | } jsmntype_t; 21 | 22 | typedef enum { 23 | /* Not enough tokens were provided */ 24 | JSMN_ERROR_NOMEM = -1, 25 | /* Invalid character inside JSON string */ 26 | JSMN_ERROR_INVAL = -2, 27 | /* The string is not a full JSON packet, more bytes expected */ 28 | JSMN_ERROR_PART = -3, 29 | /* Everything was fine */ 30 | JSMN_SUCCESS = 0 31 | } jsmnerr_t; 32 | 33 | /* 34 | * JSON token description. 35 | * @param type type (object, array, string etc.) 36 | * @param start start position in JSON data string 37 | * @param end end position in JSON data string 38 | */ 39 | typedef struct { 40 | jsmntype_t type; 41 | int start; 42 | int end; 43 | int size; 44 | } jsmntok_t; 45 | 46 | /* 47 | * JSON parser. Contains an array of token blocks available. Also stores 48 | * the string being parsed now and current position in that string 49 | */ 50 | typedef struct { 51 | unsigned int pos; /* offset in the JSON string */ 52 | int toknext; /* next token to allocate */ 53 | int toksuper; /* superior token node, e.g parent object or array */ 54 | } jsmn_parser; 55 | 56 | /* 57 | * Create JSON parser over an array of tokens 58 | */ 59 | void jsmn_init(jsmn_parser *parser); 60 | 61 | /* 62 | * Run JSON parser. It parses a JSON data string into and array of tokens, 63 | * each describing a single JSON object. 64 | */ 65 | jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, 66 | size_t len, 67 | jsmntok_t *tokens, unsigned int num_tokens); 68 | 69 | #ifdef __cplusplus 70 | } 71 | #endif 72 | 73 | #endif /* __JSMN_H_ */ 74 | -------------------------------------------------------------------------------- /jevents/json.c: -------------------------------------------------------------------------------- 1 | /* Parse JSON files using the JSMN parser. */ 2 | 3 | /* 4 | * Copyright (c) 2014, Intel Corporation 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, 11 | * this list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 22 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 28 | * OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include "jsmn.h" 39 | #include "json.h" 40 | #include 41 | 42 | static char *mapfile(const char *fn, size_t *size) 43 | { 44 | struct stat st; 45 | char *map = NULL; 46 | int err; 47 | int fd = open(fn, O_RDONLY); 48 | 49 | if (fd < 0) 50 | return NULL; 51 | err = fstat(fd, &st); 52 | if (err < 0) 53 | goto out; 54 | *size = st.st_size; 55 | map = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); 56 | if (map == (char *)MAP_FAILED) 57 | map = NULL; 58 | out: 59 | close(fd); 60 | return map; 61 | } 62 | 63 | static void unmapfile(char *map, size_t size) 64 | { 65 | munmap(map, size); 66 | } 67 | 68 | /* 69 | * Parse json file using jsmn. Return array of tokens, 70 | * and mapped file. Caller needs to free array. 71 | */ 72 | jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len) 73 | { 74 | jsmn_parser parser; 75 | jsmntok_t *tokens; 76 | jsmnerr_t res; 77 | unsigned sz; 78 | 79 | *map = mapfile(fn, size); 80 | if (!*map) 81 | return NULL; 82 | /* Heuristic */ 83 | sz = *size * 16; 84 | tokens = calloc(1, sz); 85 | if (!tokens) 86 | goto error; 87 | jsmn_init(&parser); 88 | res = jsmn_parse(&parser, *map, *size, tokens, 89 | sz / sizeof(jsmntok_t)); 90 | if (res != JSMN_SUCCESS) { 91 | fprintf(stderr, "%s: json error %d\n", fn, res); 92 | goto error_free; 93 | } 94 | if (len) 95 | *len = parser.toknext; 96 | return tokens; 97 | error_free: 98 | free(tokens); 99 | error: 100 | unmapfile(*map, *size); 101 | return NULL; 102 | } 103 | 104 | void free_json(char *map, size_t size, jsmntok_t *tokens) 105 | { 106 | free(tokens); 107 | unmapfile(map, size); 108 | } 109 | 110 | static int countchar(char *map, char c, int end) 111 | { 112 | int i; 113 | int count = 0; 114 | for (i = 0; i < end; i++) 115 | if (map[i] == c) 116 | count++; 117 | return count; 118 | } 119 | 120 | /* Return line number of a jsmn token */ 121 | int json_line(char *map, jsmntok_t *t) 122 | { 123 | return countchar(map, '\n', t->start) + 1; 124 | } 125 | 126 | static const char *jsmn_types[] = { 127 | [JSMN_PRIMITIVE] = "primitive", 128 | [JSMN_ARRAY] = "array", 129 | [JSMN_OBJECT] = "object", 130 | [JSMN_STRING] = "string" 131 | }; 132 | 133 | #define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?") 134 | 135 | /* Return type name of a jsmn token */ 136 | const char *json_name(jsmntok_t *t) 137 | { 138 | return LOOKUP(jsmn_types, t->type); 139 | } 140 | 141 | int json_len(jsmntok_t *t) 142 | { 143 | return t->end - t->start; 144 | } 145 | 146 | /* Is string t equal to s? */ 147 | int json_streq(char *map, jsmntok_t *t, const char *s) 148 | { 149 | unsigned len = t->end - t->start; 150 | return len == strlen(s) && !strncasecmp(map + t->start, s, len); 151 | } 152 | -------------------------------------------------------------------------------- /jevents/json.h: -------------------------------------------------------------------------------- 1 | #ifndef JSON_H 2 | #define JSON_H 1 3 | 4 | #include "jsmn.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len); 11 | void free_json(char *map, size_t size, jsmntok_t *tokens); 12 | int json_line(char *map, jsmntok_t *t); 13 | const char *json_name(jsmntok_t *t); 14 | int json_streq(char *map, jsmntok_t *t, const char *s); 15 | int json_len(jsmntok_t *t); 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /jevents/libjevents.spec: -------------------------------------------------------------------------------- 1 | Name: libjevents 2 | Version: 1 3 | Release: 1%{?dist} 4 | Summary: libjevents shared library from pmu-tools 5 | 6 | License: BSD 7 | URL: https://github.com/andikleen/pmu-tools/jevents 8 | # git clone https://github.com/andikleen/pmu-tools.git pmu-tools 9 | # cd pmu-tools && tar czf jevents.tar.gz jevents/ 10 | Source0: jevents.tar.gz 11 | 12 | %description 13 | jevents library from pmu-tools. 14 | 15 | %prep 16 | %setup -q -n jevents 17 | 18 | 19 | %build 20 | %make_build PREFIX=%{buildroot}/usr 21 | 22 | %install 23 | %make_install PREFIX=%{buildroot}/usr 24 | 25 | %files 26 | /usr/bin/event-rmap 27 | /usr/bin/listevents 28 | /usr/bin/showevent 29 | /usr/include/* 30 | /usr/lib64/libjevents.a 31 | 32 | %changelog 33 | 34 | * Sat Mar 3 2018 Pablo Llopis 1-1 35 | - Initial specfile version 36 | -------------------------------------------------------------------------------- /jevents/listevents.c: -------------------------------------------------------------------------------- 1 | /* List all events */ 2 | /* -v print descriptions */ 3 | /* pattern print only events matching shell pattern */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "jevents.h" 11 | 12 | int verbose = 0; 13 | 14 | struct event { 15 | char *name; 16 | char *event; 17 | char *desc; 18 | }; 19 | 20 | struct walk_data { 21 | int count; 22 | int ind; 23 | char *match; 24 | struct event *events; 25 | }; 26 | 27 | static int count_event(void *data, char *name, char *event, char *desc) 28 | { 29 | struct walk_data *wd = data; 30 | if (wd->match && fnmatch(wd->match, name, 0)) 31 | return 0; 32 | wd->count++; 33 | return 0; 34 | } 35 | 36 | static int store_event(void *data, char *name, char *event, char *desc) 37 | { 38 | struct walk_data *wd = data; 39 | 40 | if (wd->match && fnmatch(wd->match, name, 0)) 41 | return 0; 42 | assert(wd->ind < wd->count); 43 | struct event *e = &wd->events[wd->ind++]; 44 | e->name = strdup(name); 45 | e->event = strdup(event); 46 | e->desc = strdup(desc); 47 | return 0; 48 | } 49 | 50 | static int cmp_events(const void *ap, const void *bp) 51 | { 52 | const struct event *a = ap; 53 | const struct event *b = bp; 54 | return strcmp(a->name, b->name); 55 | } 56 | 57 | int main(int ac, char **av) 58 | { 59 | int err; 60 | 61 | if (av[1] && !strcmp(av[1], "-v")) { 62 | av++; 63 | verbose = 1; 64 | } 65 | 66 | err = read_events(NULL); 67 | if (err < 0) { 68 | fprintf(stderr, "Error reading JSON data: %s\n", strerror(errno)); 69 | exit(1); 70 | } 71 | struct walk_data wd = { .match = av[1] }; 72 | walk_events(count_event, &wd); 73 | walk_perf_events(count_event, &wd); 74 | wd.events = calloc(sizeof(struct event), wd.count); 75 | walk_events(store_event, &wd); 76 | err = walk_perf_events(store_event, &wd); 77 | if (err < 0) { 78 | fprintf(stderr, "Error reading perf events: %s\n", strerror(err)); 79 | exit(1); 80 | } 81 | qsort(wd.events, wd.count, sizeof(struct event), cmp_events); 82 | int i; 83 | for (i = 0; i < wd.count; i++) { 84 | struct event *e = &wd.events[i]; 85 | printf("%-40s ", e->name); 86 | printf("%s\n", e->event); 87 | if (verbose && e->desc[0]) 88 | printf("\t%s\n", e->desc); /* XXX word wrap */ 89 | } 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /jevents/measure.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012,2013 Intel Corporation 3 | * Author: Andi Kleen 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that: (1) source code distributions 7 | * retain the above copyright notice and this paragraph in its entirety, (2) 8 | * distributions including binary code include the above copyright notice and 9 | * this paragraph in its entirety in the documentation or other materials 10 | * provided with the distribution 11 | * 12 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 13 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | */ 16 | 17 | 18 | #ifndef MEASURE_H 19 | #define MEASURE_H 1 20 | 21 | #include 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | #define N_COUNTER 4 28 | 29 | struct measure { 30 | char *name; 31 | unsigned long long counter; 32 | int ratio_to; /* or -1 */ 33 | unsigned long long (*func)(struct measure *m, 34 | unsigned long long total[N_COUNTER], int i); 35 | }; 36 | 37 | #ifdef EVENT_MACROS 38 | #define ETO(x,y) { #x, x, y } 39 | #define ETO0(x) ETO(x, 0) 40 | #define E(x) { #x, x, -1 } 41 | #define EFUNC(x,y, f) { #x, x, y, f } 42 | #endif 43 | 44 | void measure_group_init(struct measure *g, char *name); 45 | void measure_group_start(void); 46 | void measure_group_stop(void); 47 | void measure_group_finish(void); 48 | void measure_print_all(FILE *fh); 49 | void measure_free_all(void); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /jevents/perf-aux.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Support for mapping the AUX buffer, e.g. for reading Intel Processor Trace 3 | * Copyright (c) 2020 Intel Corporation 4 | * Author: Andi Kleen 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that: (1) source code distributions 8 | * retain the above copyright notice and this paragraph in its entirety, (2) 9 | * distributions including binary code include the above copyright notice and 10 | * this paragraph in its entirety in the documentation or other materials 11 | * provided with the distribution 12 | * 13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 14 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 15 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 | */ 17 | 18 | #include 19 | #include "perf-iter.h" 20 | #include 21 | #include 22 | #include 23 | 24 | /** 25 | * perf_aux_map - Map AUX buffer for an open perf_fd. 26 | * @pfd: Already opened perf_fd on PMU supporting aux. 27 | * @aux: perf_aux structure to store the mapping. 28 | * @aux_size_shift: log 2 of mapped buffer size in pages. 29 | * @snapshot: When true the aux buffer will run in continuous ring buffer mode and not stop on overflow. 30 | * 31 | * Some perf event PMUs, such as intel_pt, support an extra aux buffer to 32 | * report raw data from the hardware. Map the AUX buffer for an already 33 | * mapped perf_fd 34 | * 35 | * The aux buffer size is limited by the mlock rlimit, as well as 36 | * /proc/sys/kernel/perf_event_mlock_kb. 37 | * 38 | * Returns -1 if the mapping failed, otherwise 0. 39 | */ 40 | int perf_aux_map(struct perf_fd *pfd, struct perf_aux_map *aux, int aux_size_shift, 41 | bool snapshot) 42 | { 43 | struct perf_event_mmap_page *mp = pfd->mpage; 44 | 45 | mp->aux_offset = perf_mmap_size(pfd->buf_size_shift); 46 | mp->aux_size = sysconf(_SC_PAGE_SIZE) << aux_size_shift; 47 | aux->aux_map = mmap(NULL, mp->aux_size, 48 | PROT_READ | (snapshot ? 0 : PROT_WRITE), 49 | MAP_SHARED, 50 | pfd->pfd, 51 | mp->aux_offset); 52 | return aux->aux_map == (void*)-1L ? -1 : 0; 53 | } 54 | 55 | /** 56 | * perf_aux_unmap - Unmap an aux buffer. 57 | * @pfd: perf_fd passed to perf_aux_map. 58 | * @aux: Aux structure to unmap. 59 | */ 60 | void perf_aux_unmap(struct perf_fd *pfd, struct perf_aux_map *aux) 61 | { 62 | munmap(aux->aux_map, pfd->mpage->aux_size); 63 | } 64 | -------------------------------------------------------------------------------- /jevents/perf-iter.h: -------------------------------------------------------------------------------- 1 | #ifndef _PERF_ITER_H 2 | #define _PERF_ITER_H 1 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | struct perf_event_mmap_page; 12 | struct perf_event_header; 13 | 14 | /* Iterator for perf ring buffer */ 15 | 16 | struct perf_iter { 17 | uint64_t ring_buffer_mask; 18 | uint64_t head, cur, raw_head, bufsize; 19 | int64_t avail; 20 | char *data; 21 | struct perf_event_mmap_page *mpage; 22 | }; 23 | 24 | struct perf_fd { 25 | int pfd; 26 | struct perf_event_mmap_page *mpage; 27 | int buf_size_shift; 28 | }; 29 | 30 | struct perf_aux_map { 31 | void *aux_map; 32 | }; 33 | 34 | int perf_fd_open(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift); 35 | int perf_fd_open_other(struct perf_fd *p, struct perf_event_attr *attr, int buf_size_shift, 36 | int pid, int cpu); 37 | void perf_fd_close(struct perf_fd *p); 38 | void perf_iter_continue(struct perf_iter *iter); 39 | struct perf_event_header *perf_buffer_read(struct perf_iter *iter, void *buffer, int bufsize); 40 | void perf_iter_init(struct perf_iter *iter, struct perf_fd *pfd); 41 | int perf_enable(struct perf_fd *p); 42 | int perf_disable(struct perf_fd *p); 43 | 44 | unsigned perf_mmap_size(int buf_size_shift); 45 | 46 | int perf_aux_map(struct perf_fd *pfd, struct perf_aux_map *aux, int size, bool snapshot); 47 | void perf_aux_unmap(struct perf_fd *pfd, struct perf_aux_map *aux); 48 | 49 | static inline int perf_iter_finished(struct perf_iter *iter) 50 | { 51 | return iter->avail <= 0; 52 | } 53 | 54 | static inline uint64_t *perf_hdr_payload(struct perf_event_header *hdr) 55 | { 56 | return (uint64_t *)(hdr + 1); 57 | } 58 | 59 | #ifdef __cplusplus 60 | } 61 | #endif 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /jevents/perf_event_open.c: -------------------------------------------------------------------------------- 1 | /* Until glibc provides a proper stub ... */ 2 | #include 3 | #include 4 | #include 5 | 6 | /* If someone else has a better one we use that */ 7 | 8 | __attribute__((weak)) 9 | int perf_event_open(struct perf_event_attr *attr, pid_t pid, 10 | int cpu, int group_fd, unsigned long flags) 11 | { 12 | return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); 13 | } 14 | -------------------------------------------------------------------------------- /jevents/print.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: BSD-3-Clause 2 | // Copyright 2021 Intel Corporation. 3 | // Author: Andi Kleen 4 | #include "jevents.h" 5 | #include "jsession.h" 6 | 7 | static void print_runtime(FILE *outfh, uint64_t *val) 8 | { 9 | if (val[1] != val[2]) 10 | fprintf(outfh, " [%2.2f%%]", ((double)val[2] / val[1]) * 100.); 11 | } 12 | 13 | /** 14 | * session_print_timestamp - Print perf stat style timestamp into buffer 15 | * @buf: String buffer. Should be SESSION_TIMESTAMP_LEN sized. 16 | * @bufs: Buffer size. 17 | * @ts: Timestamp 18 | */ 19 | void session_print_timestamp(char *buf, int bufs, double ts) 20 | { 21 | snprintf(buf, bufs, "% 12.9f\t", ts); 22 | } 23 | 24 | /** 25 | * session_print_aggr - Print event list values in perf stat like output aggregated 26 | * @outfh: File descriptor to print to. 27 | * @el: Event list to print. It must have been measured before. 28 | * @arg: Argument. Used prefix and merge. 29 | * 30 | * This version aggregates values over all CPUs. 31 | */ 32 | 33 | void session_print_aggr(FILE *outfh, struct eventlist *el, struct session_print *arg) 34 | { 35 | struct event *e; 36 | int i; 37 | 38 | for (e = el->eventlist; e; e = e->next) { 39 | if (arg->merge && e->orig) 40 | continue; 41 | 42 | uint64_t v = 0, val[3] = { 0, 0, 0 }; 43 | for (i = 0; i < el->num_cpus; i++) { 44 | v += event_scaled_value(e, i); 45 | // assumes all are scaled the same way 46 | if (e->efd[i].val[2]) { 47 | val[1] += e->efd[i].val[1]; 48 | val[2] += e->efd[i].val[2]; 49 | } 50 | } 51 | if (val[1] == 0 && el->num_cpus > 0) { 52 | val[1] = e->efd[0].val[1]; 53 | val[2] = e->efd[0].val[2]; 54 | } 55 | 56 | fprintf(outfh, "%s%-30s %'15lu", arg->prefix ? arg->prefix : "", 57 | e->extra.name ? e->extra.name : e->event, v); 58 | print_runtime(outfh, val); 59 | putc('\n', outfh); 60 | } 61 | } 62 | 63 | /** 64 | * session_print - Print event list values in perf stat like output. 65 | * @outfh: File descriptor to print to. 66 | * @el: Event list to print. It must have been measured before. 67 | * @arg: Argument. Used prefix and merge. 68 | * 69 | * This version prints each CPU individually (like perf stat -A) 70 | */ 71 | void session_print(FILE *outfh, struct eventlist *el, struct session_print *arg) 72 | { 73 | struct event *e; 74 | int i; 75 | 76 | for (e = el->eventlist; e; e = e->next) { 77 | uint64_t v; 78 | for (i = 0; i < el->num_cpus; i++) { 79 | if (e->efd[i].fd < 0) 80 | continue; 81 | if (arg->merge && e->orig) 82 | continue; 83 | v = event_scaled_value(e, i); 84 | fprintf(outfh, "%s%3d %-30s %'15lu", arg->prefix ? arg->prefix : "", 85 | i, 86 | e->extra.name ? e->extra.name : e->event, v); 87 | print_runtime(outfh, e->efd[i].val); 88 | putc('\n', outfh); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /jevents/rawevent.c: -------------------------------------------------------------------------------- 1 | /* Output raw events in perf form. */ 2 | /* 3 | * Copyright (c) 2014, Intel Corporation 4 | * Author: Andi Kleen 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, 11 | * this list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 22 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 28 | * OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "jevents.h" 36 | 37 | #define BUFS 1024 38 | 39 | /** 40 | * format_raw_event - Format a resolved event for perf's command line tool 41 | * @attr: Previously resolved perf_event_attr. 42 | * @name: Name to add to the event or NULL. 43 | * Return a string of the formatted event. The caller must free string. 44 | */ 45 | 46 | char *format_raw_event(struct perf_event_attr *attr, char *name) 47 | { 48 | char buf[BUFS]; 49 | int off = 0; 50 | char *pmu; 51 | 52 | pmu = resolve_pmu(attr->type); 53 | if (!pmu) 54 | return NULL; 55 | off = snprintf(buf, BUFS, "%s/config=%#llx", pmu, attr->config); 56 | free(pmu); 57 | if (attr->config1) 58 | off += sprintf(buf + off, ",config1=%#llx", attr->config1); 59 | if (attr->config2) 60 | off += sprintf(buf + off, ",config2=%#llx", attr->config2); 61 | if (name) 62 | off += snprintf(buf + off, BUFS - off, ",name=%s", name); 63 | off += snprintf(buf + off, BUFS - off, "/"); 64 | return strdup(buf); 65 | } 66 | -------------------------------------------------------------------------------- /jevents/rdpmc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012,2013 Intel Corporation 3 | * Author: Andi Kleen 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that: (1) source code distributions 7 | * retain the above copyright notice and this paragraph in its entirety, (2) 8 | * distributions including binary code include the above copyright notice and 9 | * this paragraph in its entirety in the documentation or other materials 10 | * provided with the distribution 11 | * 12 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 13 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | */ 16 | 17 | /* Ring 3 RDPMC support */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "jevents.h" 27 | 28 | /** 29 | * DOC: Ring 3 counting for CPU performance counters 30 | * 31 | * This library allows accessing CPU performance counters from ring 3 32 | * using the perf_events subsystem. This is useful to measure specific 33 | * parts of programs (e.g. excluding initialization code) 34 | * 35 | * Requires a Linux 3.3+ kernel 36 | */ 37 | 38 | #include "rdpmc.h" 39 | 40 | typedef unsigned long long u64; 41 | 42 | #define rmb() asm volatile("" ::: "memory") 43 | 44 | /** 45 | * rdpmc_open - initialize a simple ring 3 readable performance counter 46 | * @counter: Raw event descriptor (UUEE UU unit mask EE event) 47 | * @ctx: Pointer to struct &rdpmc_ctx that is initialized 48 | * 49 | * The counter will be set up to count CPU events excluding the kernel. 50 | * Must be called for each thread using the counter. 51 | * The caller must make sure counter is suitable for the running CPU. 52 | * Only works in 3.3+ kernels. 53 | * Must be closed with rdpmc_close() 54 | */ 55 | 56 | int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx) 57 | { 58 | struct perf_event_attr attr = { 59 | .type = counter > 10 ? PERF_TYPE_RAW : PERF_TYPE_HARDWARE, 60 | .size = PERF_ATTR_SIZE_VER0, 61 | .config = counter, 62 | .sample_type = PERF_SAMPLE_READ, 63 | .exclude_kernel = 1, 64 | }; 65 | return rdpmc_open_attr(&attr, ctx, NULL); 66 | } 67 | 68 | /** 69 | * rdpmc_open_attr - initialize a raw ring 3 readable performance counter 70 | * @attr: perf struct %perf_event_attr for the counter 71 | * @ctx: Pointer to struct %rdpmc_ctx that is initialized. 72 | * @leader_ctx: context of group leader or NULL 73 | * 74 | * This allows more flexible setup with a custom &perf_event_attr. 75 | * For simple uses rdpmc_open() should be used instead. 76 | * Must be called for each thread using the counter. 77 | * Must be closed with rdpmc_close() 78 | */ 79 | int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx, 80 | struct rdpmc_ctx *leader_ctx) 81 | { 82 | ctx->fd = perf_event_open(attr, 0, -1, 83 | leader_ctx ? leader_ctx->fd : -1, 0); 84 | if (ctx->fd < 0) { 85 | perror("perf_event_open"); 86 | return -1; 87 | } 88 | ctx->buf = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, MAP_SHARED, ctx->fd, 0); 89 | if (ctx->buf == MAP_FAILED) { 90 | close(ctx->fd); 91 | perror("mmap on perf fd"); 92 | return -1; 93 | } 94 | return 0; 95 | } 96 | 97 | /** 98 | * rdpmc_close - free a ring 3 readable performance counter 99 | * @ctx: Pointer to &rdpmc_ctx context. 100 | * 101 | * Must be called by each thread for each context it initialized. 102 | */ 103 | void rdpmc_close(struct rdpmc_ctx *ctx) 104 | { 105 | close(ctx->fd); 106 | munmap(ctx->buf, sysconf(_SC_PAGESIZE)); 107 | } 108 | 109 | /** 110 | * rdpmc_read - read a ring 3 readable performance counter 111 | * @ctx: Pointer to initialized &rdpmc_ctx structure. 112 | * 113 | * Read the current value of a running performance counter. 114 | * This should only be called from the same thread/process as opened 115 | * the context. For new threads please create a new context. 116 | */ 117 | unsigned long long rdpmc_read(struct rdpmc_ctx *ctx) 118 | { 119 | u64 val; 120 | unsigned seq; 121 | u64 offset; 122 | typeof (ctx->buf) buf = ctx->buf; 123 | unsigned index; 124 | 125 | do { 126 | seq = buf->lock; 127 | rmb(); 128 | index = buf->index; 129 | offset = buf->offset; 130 | if (index == 0) { /* rdpmc not allowed */ 131 | val = 0; 132 | break; 133 | } 134 | val = _rdpmc(index - 1); 135 | rmb(); 136 | } while (buf->lock != seq); 137 | return (val + offset) & 0xffffffffffff; 138 | } 139 | 140 | -------------------------------------------------------------------------------- /jevents/rdpmc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012,2013 Intel Corporation 3 | * Author: Andi Kleen 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that: (1) source code distributions 7 | * retain the above copyright notice and this paragraph in its entirety, (2) 8 | * distributions including binary code include the above copyright notice and 9 | * this paragraph in its entirety in the documentation or other materials 10 | * provided with the distribution 11 | * 12 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 13 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 14 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | */ 16 | 17 | #ifndef RDPMC_H 18 | #define RDPMC_H 1 19 | 20 | #include 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | struct rdpmc_ctx { 27 | int fd; 28 | struct perf_event_mmap_page *buf; 29 | }; 30 | 31 | int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx); 32 | int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx, 33 | struct rdpmc_ctx *leader_ctx); 34 | void rdpmc_close(struct rdpmc_ctx *ctx); 35 | unsigned long long rdpmc_read(struct rdpmc_ctx *ctx); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /jevents/showevent.c: -------------------------------------------------------------------------------- 1 | /* Resolve perf event descriptions with symbolic names to raw perf descriptions */ 2 | #include "jevents.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int main(int ac, char **av) 10 | { 11 | int test = 0; 12 | int ret = 0; 13 | 14 | while (*++av) { 15 | if (!strcmp(*av, "--test")) { 16 | test = 1; 17 | continue; 18 | } 19 | 20 | struct perf_event_attr attr; 21 | if (resolve_event(*av, &attr) < 0) { 22 | fprintf(stderr, "Cannot resolve %s\n", *av); 23 | ret = 1; 24 | continue; 25 | } 26 | char *ev = format_raw_event(&attr, *av); 27 | printf("%s\n", ev); 28 | free(ev); 29 | if (test) { 30 | if (perf_event_open(&attr, 0, -1, -1, 0) < 0) 31 | perror("perf_event_open"); 32 | } 33 | } 34 | return ret; 35 | } 36 | -------------------------------------------------------------------------------- /jevents/tester: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tests for jevents 3 | # may need executing event_download.py first to get event list for this cpu 4 | set -e 5 | set -x 6 | 7 | failed() { 8 | echo FAILED 9 | } 10 | trap failed ERR 0 11 | 12 | PATH=.:./examples:$PATH 13 | 14 | 15 | $V listevents > l$$ 16 | [ "$(wc -l < l$$)" -gt 50 ] 17 | grep -q offcore_response l$$ 18 | 19 | if grep -q br_misp_retired.taken l$$ ; then 20 | E=br_misp_retired.taken 21 | elif grep -q br_misp_retired.near_taken l$$ ; then 22 | E=br_misp_retired.near_taken 23 | else 24 | E=instructions 25 | fi 26 | 27 | if [ "$(ls /sys/bus/event_source/devices/cpu*/events/instructions | wc -l)" -gt 0 ] ; then 28 | $V jestat true 29 | $V jestat -e cpu-cycles,cpu_clk_unhalted.ref_tsc,$E true 30 | $V jestat -e "{cpu-cycles,cpu_clk_unhalted.ref_tsc},{$E,cache-references}" -a sleep 1 31 | $V jestat -a sleep 1 32 | $V jestat -a -e "uops_executed.thread:k" sleep 1 33 | OCR=$(grep -E '^(offcore_response|ocr)\.' l$$ | head -1 | cut -d ' ' -f 1) 34 | $V jestat -a -e "$OCR:config1=0x1" sleep 1 35 | $V jestat -A -a -I 500 cycles sleep 2 36 | 37 | # test all events 38 | LEN=$(wc -l l$$ | awk ' { print $1 }') 39 | INC=20 40 | 41 | # skip i915/vcs-* which often returns ENODEV for no good reason 42 | SKIP="i915-vcs" 43 | if [ ! -d /sys/bus/event_source/devices/uncore_upi_0 ] ; then 44 | SKIP="$SKIP|upi_" 45 | fi 46 | 47 | for ((i = 1; i <= LEN; i += INC)) ; do 48 | # shellcheck disable=SC2046 49 | $V jestat $(nl l$$ | 50 | grep -E -v "$SKIP" | 51 | awk -v v=$i -v inc=$INC '$1 >= v && $1 <= v+inc { print "-e " $2 } ') -a true 52 | done 53 | 54 | $V showevent $E 55 | 56 | fi 57 | 58 | $V event-rmap $E 59 | 60 | if [ "$(ls /sys/bus/event_source/devices/cpu*/events/instructions | wc -l)" -gt 0 ] ; then 61 | $V examples/addr 62 | examples/rtest 63 | examples/rtest2 64 | fi 65 | 66 | rm l$$ 67 | 68 | trap "" ERR 0 69 | 70 | echo SUCCEEDED 71 | 72 | 73 | -------------------------------------------------------------------------------- /jevents/util.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #define err(x) perror(x), exit(1) 6 | #define mb() asm volatile("" ::: "memory") 7 | #define MB (1024*1024) 8 | typedef unsigned long long u64; 9 | typedef long long s64; 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | -------------------------------------------------------------------------------- /knl_ratios.py: -------------------------------------------------------------------------------- 1 | import metrics 2 | import node 3 | import slm_ratios as slm 4 | 5 | version = "1.0" 6 | 7 | slm.set_clks_event_name("CPU_CLK_UNHALTED.THREAD") 8 | 9 | smt_enabled = False 10 | 11 | class CyclesPerUop(slm.CyclesPerUop): 12 | pass 13 | 14 | # LEVEL 1 15 | class FrontendBound(slm.FrontendBound): 16 | pass 17 | 18 | class BackendBound(slm.BackendBound): 19 | pass 20 | 21 | class BadSpeculation(slm.BadSpeculation): 22 | pass 23 | 24 | class Retiring(slm.Retiring): 25 | pass 26 | 27 | # LEVEL 2 28 | class FrontendLatency(slm.FrontendLatency): 29 | pass 30 | 31 | # LEVEL 3 32 | class ICacheMisses(slm.ICacheMisses): 33 | # Override _compute(), since KNL does not have 34 | # the DECODE_RESTRICTION.PDCACHE_WRONG event 35 | def _compute(self, ev): 36 | return slm.icache_line_fetch_cost(ev, self.level) 37 | 38 | class ITLBMisses(slm.ITLBMisses): 39 | pass 40 | 41 | class MSSwitches(slm.MSSwitches): 42 | pass 43 | 44 | class Setup(object): 45 | def __init__(self, runner): 46 | # Instantiate nodes as required to be able to specify their 47 | # references 48 | 49 | # L3 objects 50 | icache_misses = ICacheMisses() 51 | itlb_misses = ITLBMisses() 52 | ms_cost = MSSwitches() 53 | 54 | #L1 objects 55 | frontend = FrontendBound() 56 | bad_speculation = BadSpeculation() 57 | retiring = Retiring() 58 | backend = BackendBound(retiring=retiring, 59 | bad_speculation=bad_speculation, 60 | frontend=frontend) 61 | 62 | 63 | # L2 objects 64 | frontend_latency = FrontendLatency(icache_misses=icache_misses, 65 | itlb=itlb_misses, 66 | ms_cost=ms_cost, 67 | frontend=frontend 68 | ) 69 | 70 | # Set parents 71 | node.set_parent(None, [frontend, bad_speculation, retiring, backend]) 72 | node.set_parent(frontend, [frontend_latency]) 73 | node.set_parent(frontend_latency, 74 | [icache_misses, itlb_misses, ms_cost]) 75 | 76 | # User visible metrics 77 | user_metrics = [slm.Metric_IPC(), slm.Metric_CPI(), 78 | slm.Metric_TurboUtilization(), 79 | slm.Metric_CLKS(), slm.Metric_Time(), 80 | slm.CyclesPerUop()] 81 | 82 | nodes = [obj for obj in locals().values() 83 | if issubclass(obj.__class__, metrics.MetricBase) and 84 | obj.level > 0] 85 | 86 | nodes = sorted(nodes, key=lambda n: n.level) 87 | 88 | # Pass to runner 89 | list(map(runner.run, nodes)) 90 | list(map(runner.metric, user_metrics)) 91 | -------------------------------------------------------------------------------- /latego.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # configure latego workaround on Sandy Bridge EP 3 | # can be run as a standalone tool or used as module 4 | # latego enable|disable hexevent 5 | from __future__ import print_function 6 | import signal 7 | import struct 8 | import re 9 | import os 10 | import msr 11 | import pci 12 | 13 | busses = (0x3f, 0x7f, 0xbf, 0xff) 14 | 15 | def local_direct2core(val): 16 | c = 0 17 | for b in busses: 18 | if pci.probe(b, 14, 0): 19 | pci.changebit(b, 14, 0, 0x84, 1, val) 20 | c += 1 21 | if c == 0: 22 | print("no local devices found") 23 | 24 | def remote_direct2core(val): 25 | c = 0 26 | for b in busses: 27 | if pci.probe(b, 8, 0): 28 | pci.changebit(b, 8, 0, 0x80, 1, val) 29 | pci.changebit(b, 9, 0, 0x80, 1, val) 30 | c += 1 31 | if c == 0: 32 | print("no remote devices found") 33 | 34 | def direct2core(val): 35 | # make sure all cores are awake when we do that 36 | f = os.open("/dev/cpu_dma_latency", os.O_WRONLY) 37 | os.write(f, struct.pack("I", 0)) 38 | local_direct2core(val) 39 | remote_direct2core(val) 40 | os.close(f) 41 | 42 | def set_bypass(val): 43 | msr.changebit(0x39c, 0, val) 44 | 45 | bypass = 1 << 0 46 | d2c = 1 << 1 47 | latego_events = { 48 | 0x04d1: bypass, 49 | 0x20d1: bypass|d2c, 50 | 0x01d3: bypass|d2c, 51 | 0x04d3: bypass|d2c, 52 | 0x01d2: bypass, 53 | 0x02d2: bypass, 54 | 0x04d2: bypass, 55 | 0x08d2: bypass, 56 | 0x01cd: bypass|d2c 57 | } 58 | 59 | latego_names = { 60 | "mem_load_uops_retired.llc_hit": 0x04d1, 61 | "mem_load_uops_retired.llc_miss": 0x20d1, 62 | "mem_load_uops_llc_miss_retired.local_dram": 0x01d3, 63 | "mem_load_uops_llc_miss_retired.remote_dram": 0x04d3, 64 | "mem_load_uops_llc_hit_retired.xsnp_miss": 0x01d2, 65 | "mem_load_uops_llc_hit_retired.xsnp_hit": 0x02d2, 66 | "mem_load_uops_llc_hit_retired.xsnp_hitm": 0x04d2, 67 | "mem_load_uops_llc_hit_retired.xsnp_none": 0x08d2, 68 | "mem_trans_retired.load_latency": 0x01cd 69 | } 70 | 71 | signal_setup = False 72 | enabled = 0 73 | 74 | def cleanup(): 75 | if enabled & bypass: 76 | set_bypass(0) 77 | if enabled & d2c: 78 | direct2core(0) 79 | 80 | def get_event(e): 81 | if re.match(r"[0-9]+", e): 82 | return int(e, 16) 83 | if e in latego_names: 84 | return latego_names[e] 85 | return e 86 | 87 | def setup_event(event, val): 88 | global signal_setup 89 | global enabled 90 | action = ("Disabling", "Enabling")[val] 91 | if val and not signal_setup: 92 | signal.signal(signal.SIGINT, cleanup) 93 | signal.signal(signal.SIGPIPE, cleanup) 94 | signal_setup = True 95 | if event in latego_events: 96 | v = latego_events[event] 97 | if v & d2c: 98 | print("%s direct2core" % (action)) 99 | direct2core(val) 100 | if v & bypass: 101 | print("%s bypass" % (action)) 102 | set_bypass(val) 103 | if val: 104 | enabled = v 105 | else: 106 | enabled = 0 107 | 108 | if __name__ == '__main__': 109 | import sys 110 | if len(sys.argv) == 3 and sys.argv[1] == "enable": 111 | setup_event(get_event(sys.argv[2]), 1) 112 | elif len(sys.argv) == 3 and sys.argv[1] == "disable": 113 | setup_event(get_event(sys.argv[2]), 0) 114 | elif len(sys.argv) == 2 and sys.argv[1] == "list": 115 | print("%-45s %04s" % ("name", "hex")) 116 | for i in latego_names.keys(): 117 | print("%-45s %04x" % (i, latego_names[i], )) 118 | else: 119 | print("Usage: latego enable|disable hexevent|namedevent") 120 | print(" latego list") 121 | sys.exit(1) 122 | -------------------------------------------------------------------------------- /list-events.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # print all events in a eventmap 3 | from __future__ import print_function 4 | import sys 5 | import ocperf 6 | emap = ocperf.find_emap() 7 | if not emap: 8 | sys.exit("Unknown CPU or cannot find event table") 9 | for j in sorted(emap.events): 10 | print(j) 11 | -------------------------------------------------------------------------------- /listutils.py: -------------------------------------------------------------------------------- 1 | # generic utilities for lists 2 | import sys 3 | from itertools import chain 4 | 5 | if sys.version_info.major == 3: 6 | from itertools import zip_longest 7 | else: 8 | from itertools import izip_longest 9 | zip_longest = izip_longest 10 | 11 | def flatten(x): 12 | return list(chain(*x)) 13 | 14 | def filternot(p, l): 15 | return list(filter(lambda x: not p(x), l)) 16 | 17 | # add items from b to a if not already in a 18 | def cat_unique(a, b): 19 | aset = set(a) 20 | add = [x for x in b if x not in aset] 21 | return a + add 22 | 23 | # remove duplicates without reordering 24 | def dedup(a): 25 | l = [] 26 | prev = set() 27 | for j in a: 28 | if j not in prev: 29 | l.append(j) 30 | prev.add(j) 31 | return l 32 | 33 | def not_list(l): 34 | return [not x for x in l] 35 | 36 | # merge two dicts with appending lists 37 | def append_dict(a, b): 38 | for k in b: 39 | if k in a: 40 | a[k] += b[k] 41 | else: 42 | a[k] = b[k] 43 | 44 | # create dict/list with same shape as a, but filled with dummy values 45 | def dummy_dict(a, val=0.0): 46 | return {k: [val] * len(a[k]) for k in a} 47 | 48 | def padlist(l, length, val=0.0): 49 | if len(l) < length: 50 | return l + [val]*(length-len(l)) 51 | return l 52 | 53 | def findprefix(l, prefix, stop=None): 54 | for i, v in enumerate(l): 55 | if v == stop: 56 | break 57 | if v.startswith(prefix): 58 | return i 59 | return -1 60 | -------------------------------------------------------------------------------- /msr: -------------------------------------------------------------------------------- 1 | msr.py -------------------------------------------------------------------------------- /msr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # library and tool to access Intel MSRs (model specific registers) 3 | # Author: Andi Kleen 4 | from __future__ import print_function 5 | import glob 6 | import struct 7 | import os 8 | 9 | def writemsr(msr, val): 10 | n = glob.glob('/dev/cpu/[0-9]*/msr') 11 | for c in n: 12 | f = os.open(c, os.O_WRONLY) 13 | os.lseek(f, msr, os.SEEK_SET) 14 | os.write(f, struct.pack('Q', val)) 15 | os.close(f) 16 | if not n: 17 | raise OSError("msr module not loaded (run modprobe msr)") 18 | 19 | def readmsr(msr, cpu = 0): 20 | f = os.open('/dev/cpu/%d/msr' % (cpu,), os.O_RDONLY) 21 | os.lseek(f, msr, os.SEEK_SET) 22 | val = struct.unpack('Q', os.read(f, 8))[0] 23 | os.close(f) 24 | return val 25 | 26 | def changebit(msr, bit, val): 27 | n = glob.glob('/dev/cpu/[0-9]*/msr') 28 | for c in n: 29 | f = os.open(c, os.O_RDWR) 30 | os.lseek(f, msr, os.SEEK_SET) 31 | v = struct.unpack('Q', os.read(f, 8))[0] 32 | if val: 33 | v = v | (1 << bit) 34 | else: 35 | v = v & ~(1 << bit) 36 | os.lseek(f, msr, os.SEEK_SET) 37 | os.write(f, struct.pack('Q', v)) 38 | os.close(f) 39 | if not n: 40 | raise OSError("msr module not loaded (run modprobe msr)") 41 | 42 | if __name__ == '__main__': 43 | import argparse 44 | 45 | def parse_hex(s): 46 | try: 47 | return int(s, 16) 48 | except ValueError: 49 | raise argparse.ArgumentError("Bad hex number %s" % (s)) 50 | 51 | if not os.path.exists("/dev/cpu/0/msr"): 52 | os.system("/sbin/modprobe msr") 53 | 54 | p = argparse.ArgumentParser(description='Access x86 model specific registers.') 55 | p.add_argument('msr', type=parse_hex, help='number of the MSR to access') 56 | p.add_argument('value', nargs='?', type=parse_hex, help='value to write (if not specified read)') 57 | p.add_argument('--setbit', type=int, help='Bit number to set') 58 | p.add_argument('--clearbit', type=int, help='Bit number to clear') 59 | p.add_argument('--cpu', type=int, default=0, help='CPU to read on (writes always change all)') 60 | args = p.parse_args() 61 | if args.value is None and not args.setbit and not args.clearbit: 62 | print("%x" % (readmsr(args.msr, args.cpu))) 63 | elif args.setbit: 64 | changebit(args.msr, args.setbit, 1) 65 | elif args.clearbit: 66 | changebit(args.msr, args.clearbit, 0) 67 | else: 68 | writemsr(args.msr, args.value) 69 | -------------------------------------------------------------------------------- /node.py: -------------------------------------------------------------------------------- 1 | # Helper classes and functions for nodes 2 | 3 | # Decorator class to declare reference dependecies between classes 4 | class requires(object): 5 | """Decorator to mark required references. These references will 6 | be added to the object as instance attributes. Example: 7 | 8 | @requires("ref1", "ref2") 9 | class SomeClass(object): 10 | def some_method(self): 11 | return self.ref1 + self.ref2 12 | 13 | """ 14 | def __init__(self, *required_refs): 15 | self.required_refs = required_refs 16 | 17 | def __call__(self, cls): 18 | setattr(cls, "required_refs", self.required_refs) 19 | return cls 20 | 21 | def set_parent(parent, nodes): 22 | for node in nodes: 23 | node.parent = parent 24 | 25 | # Check that all required references are set 26 | def check_refs(fn): 27 | """Decorator to check if required references for an object 28 | are set. If it finds missing references, it will raise an 29 | exception. Example: 30 | 31 | @requires("retiring", "bad_speculation", "frontend_bound") 32 | class BackendBound(object): 33 | @check_refs 34 | def _compute(self, ev): 35 | # checks if required refs are set before executing 36 | 37 | """ 38 | def wrapped(self, *args, **kwargs): 39 | if not hasattr(self, "required_refs"): 40 | raise Exception("Missing required_refs object") 41 | missing_refs = [ref for ref in self.required_refs 42 | if not hasattr(self, ref)] 43 | if missing_refs: 44 | raise Exception("Missing references: {0}".format(missing_refs)) 45 | 46 | return fn(self, *args, **kwargs) 47 | 48 | wrapped.__name__ = fn.__name__ 49 | return wrapped 50 | 51 | def add_references(node, **refs): 52 | """Adds an attribute to node, as specified in the **refs argument. 53 | Example: 54 | 55 | ... 56 | backend = BackendBound() 57 | add_references(backend, retiring=retiring, frontend_bound=frontend, 58 | bad_speculation=bad_speculation) 59 | 60 | """ 61 | for name, obj in refs.items(): 62 | setattr(node, name, obj) 63 | -------------------------------------------------------------------------------- /objutils.py: -------------------------------------------------------------------------------- 1 | # generic utilities for objects 2 | 3 | def has(obj, name): 4 | return name in obj.__class__.__dict__ 5 | 6 | def safe_ref(obj, name): 7 | if has(obj, name): 8 | return obj.__class__.__dict__[name] 9 | return None 10 | 11 | def ref_or(obj, name, alt): 12 | if has(obj, name): 13 | return obj.__class__.__dict__[name] 14 | return alt 15 | 16 | def map_fields(obj, fields): 17 | def map_field(name): 18 | return safe_ref(obj, name) 19 | return list(map(map_field, fields)) 20 | -------------------------------------------------------------------------------- /oc-all-events: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # convert all events for testing 3 | # needs GNU parallel 4 | set -e 5 | 6 | . ./cpumap.sh 7 | 8 | cpu() 9 | { 10 | export EVENTMAP=${cpus[$1]} 11 | ./list-events.py | parallel -n1 ./ocperf.py --print stat -e > /dev/null 12 | } 13 | 14 | if [ "$1" != "" ] ; then 15 | cpu $1 16 | exit 0 17 | fi 18 | 19 | cpu bnl 20 | cpu hsw 21 | cpu ivb 22 | cpu ivt 23 | cpu nhm-ep 24 | cpu nhm-ex 25 | cpu snb 26 | cpu snb-ep 27 | cpu wsm-dp 28 | cpu wsm-sp 29 | 30 | -------------------------------------------------------------------------------- /ocperf: -------------------------------------------------------------------------------- 1 | ocperf.py -------------------------------------------------------------------------------- /other-tester: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # tester for other programs in pmu-tools 3 | # PERF=... override perf binary 4 | # NORES=1 don't check measurement results 5 | 6 | PYTHON=${PYTHON:-python3} 7 | 8 | . ./cpumap.sh 9 | set -e 10 | PERF=${PERF:-perf} 11 | failed() { 12 | echo FAILED 13 | } 14 | PATH=$PATH:. 15 | trap failed ERR 0 16 | set -x 17 | 18 | # XXX cgroup untested 19 | for args in "" "-A" "--per-socket" "--per-core" "-r2" ; do 20 | 21 | # interval-normalize.py 22 | 23 | ${PERF} stat -e cycles,branches,instructions,branch-misses,context-switches,page-faults -I100 $args -a -x, -o x$$.csv sleep 1 24 | 25 | $WRAP interval-normalize.py --error-exit x$$.csv 26 | $WRAP interval-normalize.py --error-exit x$$.csv > y$$.csv 27 | 28 | grep -vq PARSE-ERROR y$$.csv 29 | if [ -z "$NORES" ] ; then 30 | for i in branch-misses branches context-switches cycles instructions page-faults ; do 31 | grep -q $i y$$.csv 32 | done 33 | fi 34 | 35 | grep -vq PARSE-ERROR x$$.csv 36 | 37 | # plot-normalized.py 38 | $WRAP plot-normalized.py -o x$$-2.png y$$.csv 39 | 40 | # interval-plot.py 41 | $WRAP interval-plot.py x$$.csv -o x$$.png 42 | 43 | done 44 | 45 | DYGRAPHS="" 46 | 47 | # original url http://dygraphs.com/1.0.1/dygraph-combined.js disappeared 48 | if [ ! -r dygraph-combined.js ] && wget https://cdnjs.cloudflare.com/ajax/libs/dygraph/1.0.1/dygraph-combined.js ; then 49 | DYGRAPHS=1 50 | fi 51 | 52 | for args in "-l2" "--all -v" "-l3 --single-thread" "--all -a -A"; do 53 | 54 | FORCEHT=1 $WRAP toplev.py -v --force-cpu ${DCPU:-hsw} --nodes +CPU_Utilization -I 100 $args -o x$$.csv -x, ./workloads/BC1s 55 | $WRAP toplev.py -v --force-cpu ${DCPU:-hsw} --nodes +CPU_Utilization -I 100 $args -o xn$$.csv -x, ./workloads/BC1s 56 | $WRAP interval-normalize.py --error-exit < x$$.csv 57 | $WRAP interval-normalize.py --error-exit < x$$.csv > y$$.csv 58 | 59 | grep -vq PARSE-ERROR y$$.csv 60 | [ -z "$NORES" ] && grep Frontend y$$.csv 61 | 62 | $WRAP interval-normalize.py --normalize-cpu --error-exit < x$$.csv > yc$$.csv 63 | [ -z "$NORES" ] && grep Frontend yc$$.csv 64 | 65 | $WRAP interval-normalize.py --normalize-cpu --error-exit < xn$$.csv > yc$$.csv 66 | [ -z "$NORES" ] && grep Frontend yc$$.csv 67 | 68 | if grep -q CPUs x$$.csv ; then 69 | 70 | $WRAP utilized.py x$$.csv -o y$$.csv 71 | [ -z "$NORES" ] && grep Frontend y$$.csv 72 | 73 | fi 74 | 75 | if grep -q CPUs xn$$.csv ; then 76 | 77 | $WRAP utilized.py xn$$.csv -o y$$.csv 78 | [ -z "$NORES" ] && grep Frontend y$$.csv 79 | 80 | fi 81 | 82 | $WRAP interval-plot.py x$$.csv -o x$$.png 83 | 84 | # plot-normalized.py 85 | $WRAP plot-normalized.py -o x$$-2.png y$$.csv 86 | 87 | # tl-serve.py 88 | if [ -n "$DYGRAPHS" ] ; then 89 | $WRAP tl-serve.py --gen tls$$ x$$.csv 90 | rm -rf tls$$ 91 | fi 92 | 93 | # tl-barplot.py 94 | $WRAP tl-barplot.py x$$.csv -o x$$.png 95 | 96 | rm x$$.png x$$-2.png 97 | 98 | done 99 | 100 | $WRAP tl-serve.py x$$.csv & 101 | sleep 1 102 | unset http_proxy 103 | curl http://localhost:9001 > /dev/null 104 | kill %1 105 | sleep 1 106 | wait %1 107 | 108 | $PYTHON csv_formats.py 109 | 110 | rm x$$.csv xn$$.csv 111 | 112 | # cputop.py 113 | 114 | $WRAP cputop.py "socket == 0" 115 | $WRAP cputop.py "thread == 0 and socket == 0" 116 | $WRAP cputop.py "thread == 1" offline 117 | $WRAP cputop.py offline online 118 | [ "$($WRAP cputop.py True | wc -l | cut -d ' ' -f 1)" -eq "$(getconf _NPROCESSORS_ONLN)" ] 119 | 120 | # list-events.py 121 | 122 | EVENTMAP=${cpus[hsw]} $WRAP list-events.py > x$$.lst 123 | [ "$(wc -l x$$.lst | cut -d ' ' -f 1)" -gt 20 ] 124 | grep -qi rtm_retired.aborted x$$.lst 125 | rm x$$.lst 126 | 127 | # event-translate.py 128 | EVENTMAP=${cpus[hsw]} $WRAP event-translate.py r4c9 | grep -q rtm_retired.aborted 129 | 130 | $WRAP gen-dot.py simple > /dev/null 131 | $WRAP gen-dot.py ivb_client_ratios > /dev/null 132 | 133 | # untested: counterdiff.py 134 | 135 | # may need network: 136 | # untested: event_download.py 137 | 138 | # need root: 139 | # untested: msr.py 140 | # untested: pci.py 141 | # untested: event-rmap.py 142 | 143 | trap "" ERR 0 144 | 145 | echo SUCCEEDED 146 | -------------------------------------------------------------------------------- /parallel-tester: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # run all test suites in parallel 3 | # requires GNU parallel 4 | exec parallel --halt now,fail=1 < all-tester 5 | -------------------------------------------------------------------------------- /parser/elf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # resolve ELF and DWARF symbol tables using elftools 3 | # 4 | # Copyright (c) 2013-2014, Intel Corporation 5 | # Author: Andi Kleen 6 | # 7 | # This program is free software; you can redistribute it and/or modify it 8 | # under the terms and conditions of the GNU General Public License, 9 | # version 2, as published by the Free Software Foundation. 10 | # 11 | # This program is distributed in the hope it will be useful, but WITHOUT 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | # more details. 15 | from __future__ import print_function 16 | from elftools.common.py3compat import bytes2str 17 | from elftools.elf.elffile import ELFFile 18 | from elftools.elf.sections import SymbolTableSection 19 | import elftools.common.exceptions 20 | import util 21 | import kernel 22 | 23 | # global caches 24 | open_files = dict() 25 | resolved = dict() 26 | symtables = dict() 27 | lines = dict() 28 | 29 | def build_line_table(dwarfinfo): 30 | lines = [] 31 | for CU in dwarfinfo.iter_CUs(): 32 | lp = dwarfinfo.line_program_for_CU(CU) 33 | prevstate = None 34 | for entry in lp.get_entries(): 35 | if entry.state is None or entry.state.end_sequence: 36 | continue 37 | if prevstate: 38 | lines.append((prevstate.address, 39 | entry.state.address, 40 | lp['file_entry'][prevstate.file - 1].name, 41 | prevstate.line)) 42 | prevstate = entry.state 43 | lines.sort() 44 | return lines 45 | 46 | def build_symtab(elffile): 47 | syms = [] 48 | for section in elffile.iter_sections(): 49 | if isinstance(section, SymbolTableSection): 50 | for nsym, sym in enumerate(section.iter_symbols()): 51 | name = bytes2str(sym.name) 52 | if not name: 53 | continue 54 | if sym.entry.st_info.type != 'STT_FUNC': 55 | continue 56 | end = sym['st_value'] + sym['st_size'] 57 | syms.append((sym['st_value'], end, 58 | bytes2str(sym.name))) 59 | syms.sort() 60 | return syms 61 | 62 | reported = set() 63 | 64 | def find_elf_file(fn): 65 | if fn.startswith("//"): 66 | return None 67 | if fn in open_files: 68 | elffile = open_files[fn] 69 | else: 70 | try: 71 | f = open(fn, 'rb') 72 | elffile = ELFFile(f) 73 | open_files[fn] = elffile 74 | except (IOError, elftools.common.exceptions.ELFError): 75 | if fn not in reported: 76 | print("Cannot open", fn) 77 | reported.add(fn) 78 | return None 79 | 80 | return elffile 81 | 82 | def resolve_line(fn, ip): 83 | elffile = find_elf_file(fn) 84 | if elffile is None: 85 | return "?" 86 | if fn not in lines and elffile.has_dwarf_info(): 87 | lines[fn] = build_line_table(elffile.get_dwarf_info()) 88 | 89 | src = None 90 | if resolve_line and fn in lines: 91 | pos = util.find_le(lines[fn], ip) 92 | if pos: 93 | src = "%s:%d" % (pos[2], pos[3]) 94 | return src 95 | 96 | # global one hit cache 97 | # helps a lot for LBR decoding 98 | # tbd use a small list with LRU? 99 | last_sym = None 100 | 101 | def resolve_sym(fn, ip): 102 | elffile = find_elf_file(fn) 103 | if elffile is None: 104 | return "?", 0 105 | global last_sym 106 | 107 | try: 108 | if fn not in symtables: 109 | symtables[fn] = build_symtab(elffile) 110 | 111 | if last_sym and last_sym[0] <= ip <= last_sym[1]: 112 | return last_sym[2], ip - last_sym[0] 113 | 114 | loc = None 115 | offset = None 116 | if fn in symtables: 117 | sym = util.find_le(symtables[fn], ip) 118 | if sym: 119 | loc, offset = sym[2], ip - sym[0] 120 | except elftools.common.exceptions.ELFError: 121 | return "?", 0 122 | 123 | return loc, offset 124 | 125 | def resolve_ip(filename, foffset, ip, need_line): 126 | sym, soffset, line = None, 0, None 127 | if filename and filename.startswith("/"): 128 | sym, soffset = resolve_sym(filename, foffset) 129 | if not sym: 130 | sym, soffset = resolve_sym(filename, ip) 131 | if need_line: 132 | line = resolve_line(filename, ip) 133 | else: 134 | sym, soffset = kernel.resolve_kernel(ip) 135 | return sym, soffset, line 136 | 137 | if __name__ == '__main__': 138 | import sys 139 | print(resolve_ip(sys.argv[1], int(sys.argv[2], 16))) 140 | print(resolve_line(sys.argv[1], int(sys.argv[2], 16))) 141 | -------------------------------------------------------------------------------- /parser/hist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # print histogram for perf.data 3 | from __future__ import print_function 4 | import perfpd 5 | import pfeat 6 | import argparse 7 | 8 | p = argparse.ArgumentParser(description='Print histogram for perf.data') 9 | p.add_argument('datafiles', nargs='*', 10 | help='perf.data files (default perf.data)', 11 | default=['perf.data']) 12 | p.add_argument('--sort', help='field to sort on (symbol, line)', 13 | default='symbol') 14 | p.add_argument('--min-percent', help='Minimum percent to print', default=1.0) 15 | args = p.parse_args() 16 | 17 | COLUMN_PAD = 5 18 | MAX_COLUMN = 70 19 | 20 | def compute_cols(names): 21 | return min(max(map(len, names)) + COLUMN_PAD, MAX_COLUMN) 22 | 23 | min_percent = float(args.min_percent) / 100.0 24 | for d in args.datafiles: 25 | df, et, feat = perfpd.read_samples(d, (args.sort == 'line')) 26 | pfeat.print_feat(feat) 27 | 28 | # xxx split by event 29 | if 'period' in df: 30 | total = float(df['period'].sum()) 31 | g = df.groupby(args.sort) 32 | h = g.period.sum() 33 | h.sort(ascending=False) 34 | h = h.apply(lambda x: x / total) 35 | else: 36 | h = df[args.sort].value_counts(normalize=True) 37 | h = h[h >= min_percent] 38 | 39 | cols = compute_cols(h.index) 40 | for s, v in zip(h.index, h.values): 41 | print("%-*s %.2f%%" % (cols, s, v * 100.0)) 42 | -------------------------------------------------------------------------------- /parser/kernel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # resolve kernel symbols through kallsyms (when no vmlinux is available) 3 | # 4 | # Copyright (c) 2014, Intel Corporation 5 | # Author: Andi Kleen 6 | # 7 | # This program is free software; you can redistribute it and/or modify it 8 | # under the terms and conditions of the GNU General Public License, 9 | # version 2, as published by the Free Software Foundation. 10 | # 11 | # This program is distributed in the hope it will be useful, but WITHOUT 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | # more details. 15 | 16 | import util 17 | 18 | kernel = [] 19 | 20 | def parse_kernel(): 21 | with open("/proc/kallsyms", 'r') as f: 22 | for l in f: 23 | n = l.split() 24 | addr = int(n[0], 16) 25 | kernel.append((addr, n[2])) 26 | 27 | def resolve_kernel(ip): 28 | if not kernel: 29 | parse_kernel() 30 | n = util.find_le(kernel, ip) 31 | if n: 32 | return n[1], ip - n[0] 33 | return None 34 | -------------------------------------------------------------------------------- /parser/mmap.py: -------------------------------------------------------------------------------- 1 | # track mmap updates in a perf stream and allow lookup of symbols 2 | # 3 | # Copyright (c) 2013-2014, Intel Corporation 4 | # Author: Andi Kleen 5 | # 6 | # This program is free software; you can redistribute it and/or modify it 7 | # under the terms and conditions of the GNU General Public License, 8 | # version 2, as published by the Free Software Foundation. 9 | # 10 | # This program is distributed in the hope it will be useful, but WITHOUT 11 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 | # more details. 14 | 15 | from collections import defaultdict 16 | import bisect 17 | 18 | # max reorder window for MMAP updates 19 | LOOKAHEAD_WINDOW = 1024 20 | 21 | def lookup(m, ip): 22 | i = bisect.bisect_left(m, (ip,)) 23 | if i < len(m) and m[i][0] == ip: 24 | mr = m[i] 25 | elif i == 0: 26 | return None, 0 27 | else: 28 | mr = m[i - 1] 29 | return mr, ip - mr[0] 30 | 31 | class MmapTracker: 32 | """Track mmap updates in a perf stream and allow lookup of symbols.""" 33 | 34 | def __init__(self): 35 | self.maps = defaultdict(list) 36 | self.pnames = defaultdict(str) 37 | self.lookahead = 0 38 | self.updates = [] 39 | 40 | # look ahead for out of order mmap updates 41 | def lookahead_mmap(self, ev, n): 42 | if n - self.lookahead == 0: 43 | self.lookahead = min(n + LOOKAHEAD_WINDOW, len(ev)) 44 | for l in range(n, self.lookahead): 45 | j = ev[l] 46 | # no time stamp: assume it's synthesized and kernel 47 | if j.type in ('MMAP','MMAP2') and j.pid == -1 and j.tid == 0: 48 | bisect.insort(self.maps[j.pid], 49 | (j.addr, j.len, j.filename)) 50 | elif j.type in ('COMM','MMAP','MMAP2'): 51 | bisect.insort(self.updates, (j.time2, j)) 52 | 53 | # process pending updates for a sample 54 | def update_sample(self, j): 55 | updates = self.updates 56 | while len(updates) > 0 and j.time >= updates[0][0]: 57 | u = updates[0][1] 58 | del updates[0] 59 | if u.type in ('MMAP', 'MMAP2'): 60 | pid = u.pid 61 | bisect.insort(self.maps[pid], (u.addr, u.len, u.filename)) 62 | elif u.type == 'COMM': 63 | self.maps[u.pid] = [] 64 | self.pnames[u.pid] = u.comm 65 | 66 | # look up tables with current state 67 | def resolve(self, pid, ip): 68 | if not self.maps[pid]: 69 | # xxx kernel 70 | return None, None, 0 71 | m, offset = lookup(self.maps[pid], ip) 72 | if not m or offset >= m[1]: 73 | # look up kernel 74 | m, offset = lookup(self.maps[-1], ip) 75 | if not m or offset >= m[1]: 76 | return None, None, 0 77 | assert m[0] <= ip <= m[0] + m[1] 78 | return m[2], m[0], offset 79 | -------------------------------------------------------------------------------- /parser/pfeat.py: -------------------------------------------------------------------------------- 1 | # print perf headers 2 | 3 | def print_feat(feat): 4 | print("# Measured on %s (%s)" % ( 5 | feat.hostname.hostname, 6 | feat.osrelease.osrelease)) 7 | print("# %s, %s" % ( 8 | feat.cpudesc.cpudesc, 9 | feat.cpuid.cpuid)) 10 | print("# %s" % (" ".join(map(lambda x: x.cmdline, feat.cmdline.cmdline)))) 11 | -------------------------------------------------------------------------------- /parser/tester: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # test different variants of the perf format 3 | 4 | PERF=${PERF:-perf} 5 | WRAP=${WRAP:-python} 6 | 7 | failed() { 8 | echo FAILED 9 | exit 1 10 | } 11 | trap failed ERR 0 12 | 13 | set -e 14 | set -x 15 | set -o pipefail 16 | set -E 17 | 18 | CMD=${1:-bash -c /bin/true} 19 | 20 | [ ! -d data ] && mkdir data 21 | 22 | check() { 23 | echo -- testing "$@" 24 | FN=data/perf.$(echo -- "$@" | tr -d '{} ') 25 | FN=${FN/--/} 26 | rm -f $FN 27 | if ! $PERF record -o $FN "$@" -- $CMD ; then 28 | return 29 | fi 30 | if [ $(stat -c %s $FN) == 0 ] ; then 31 | return 32 | fi 33 | $WRAP perfdata.py $FN > pdata.txt 34 | $WRAP perfpd.py $FN > ppd.txt 35 | 36 | # XXX check more fields 37 | AS=$(grep -c SAMPLE pdata.txt) 38 | BS=$($PERF report -i $FN -D | grep -c "PERF_RECORD_SAMPLE") 39 | [ $AS -eq $BS ] 40 | } 41 | 42 | check 43 | check -c 1000 44 | check -g -c 1000 45 | check -b -c 1000 46 | if ! check -b -g fp -c 1000 ; then 47 | check -b -g -c 1000 48 | fi 49 | check -P -c 10000 50 | check -T -c 10003 51 | #check -e cycles:S 52 | # seems to be broken in perf 53 | #check -e '{cycles,branches}:S' 54 | 55 | set +e 56 | #check -d 57 | check --group -e cycles,branches,branch-misses -c 1000 58 | check -e '{cycles,branches},{branch-misses,cache-misses}' -c 1000 59 | check -e cycles,branches,branch-misses -c 1000 60 | 61 | # new kernel 62 | #check -g dwarf 63 | 64 | # XXX sw trace points 65 | 66 | trap "" ERR 0 67 | 68 | echo SUCCEEDED 69 | -------------------------------------------------------------------------------- /parser/util.py: -------------------------------------------------------------------------------- 1 | # utility functions 2 | 3 | import bisect 4 | 5 | def find_le(f, key): 6 | pos = bisect.bisect_left(f, (key,)) 7 | if pos < len(f) and f[pos][0] == key: 8 | return f[pos] 9 | if pos == 0: 10 | return None 11 | return f[pos - 1] 12 | -------------------------------------------------------------------------------- /pci.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # library and tool to access PCI config space 3 | import os 4 | import struct 5 | 6 | # no multiple domains, controllers so far 7 | 8 | def probe(bus, dev, func): 9 | fn = "/sys/devices/pci0000:%02x/0000:%02x:%02x.%01x/config" % (bus, bus, dev, func) 10 | return os.path.isfile(fn) 11 | 12 | def openpci(bus, dev, func, offset, mode): 13 | fn = "/sys/devices/pci0000:%02x/0000:%02x:%02x.%01x/config" % (bus, bus, dev, func) 14 | f = os.open(fn, mode) 15 | os.lseek(f, offset, os.SEEK_SET) 16 | return f 17 | 18 | sizes = {8: "Q", 4: "I", 2: "H", 1: "B"} 19 | 20 | def writepci(bus, device, func, offset, size, val): 21 | f = openpci(bus, device, func, offset, os.O_WRONLY) 22 | os.write(f, struct.pack(sizes[size], val)) 23 | os.close(f) 24 | 25 | def readpci(bus, device, func, offset, size): 26 | f = openpci(bus, device, func, offset, os.O_RDONLY) 27 | v = struct.unpack(sizes[size], os.read(f, size))[0] 28 | os.close(f) 29 | return v 30 | 31 | def changebit(bus, device, func, offset, bit, val): 32 | f = openpci(bus, device, func, offset, os.O_RDWR) 33 | v = struct.unpack("I", os.read(f, 4))[0] 34 | if val: 35 | v = v | (1 << bit) 36 | else: 37 | v = v & ~(1 << bit) 38 | os.lseek(f, offset, os.SEEK_SET) 39 | os.write(f, struct.pack('I', v)) 40 | os.close(f) 41 | -------------------------------------------------------------------------------- /pebs-grabber/Makefile: -------------------------------------------------------------------------------- 1 | KDIR := /lib/modules/`uname -r`/build 2 | 3 | CFLAGS_pebs-grabber.o := -DTRACE_INCLUDE_PATH=${M} 4 | 5 | obj-m := pebs-grabber.o 6 | 7 | all: 8 | make -C ${KDIR} M=`pwd` 9 | 10 | install: 11 | make -C ${KDIR} M=`pwd` install 12 | 13 | clean: 14 | make -C ${KDIR} M=`pwd` clean 15 | -------------------------------------------------------------------------------- /pebs-grabber/pebs.h: -------------------------------------------------------------------------------- 1 | #undef TRACE_SYSTEM 2 | #define TRACE_SYSTEM pebs 3 | 4 | #if !defined(_TRACE_PEBS_H) || defined(TRACE_HEADER_MULTI_READ) 5 | #define _TRACE_PEBS_H 6 | 7 | #include 8 | 9 | /* PEBS trace points. These always follow on each other */ 10 | 11 | TRACE_EVENT(pebs_v1, 12 | TP_PROTO(u64 ip, 13 | u64 status, 14 | u64 dla, 15 | u64 dse, 16 | u64 lat), 17 | TP_ARGS(ip, status, dla, dse, lat), 18 | TP_STRUCT__entry( 19 | __field(u64, ip) 20 | __field(u64, status) 21 | __field(u64, dla) 22 | __field(u64, dse) 23 | __field(u64, lat) 24 | ), 25 | TP_fast_assign( 26 | __entry->ip = ip; 27 | __entry->status = status; 28 | __entry->dla = dla; 29 | __entry->dse = dse; 30 | __entry->lat = lat; 31 | ), 32 | TP_printk("ip=%llx status=%llx dla=%llx dse=%llx lat=%llx\n", 33 | __entry->ip, 34 | __entry->status, 35 | __entry->dla, 36 | __entry->dse, 37 | __entry->lat) 38 | ); 39 | 40 | TRACE_EVENT(pebs_v2, 41 | TP_PROTO(u64 eventingip, 42 | u64 tsx_tuning, 43 | u64 ax), 44 | TP_ARGS(eventingip, tsx_tuning, ax), 45 | TP_STRUCT__entry( 46 | __field(u64, eventingip) 47 | __field(u64, tsx_tuning) 48 | __field(u64, ax) 49 | ), 50 | TP_fast_assign( 51 | __entry->eventingip = eventingip; 52 | __entry->tsx_tuning = tsx_tuning; 53 | __entry->ax = ax; 54 | ), 55 | TP_printk("eventingip=%llx tsx_tuning=%llx ax=%llx\n", 56 | __entry->eventingip, 57 | __entry->tsx_tuning, 58 | __entry->ax) 59 | ); 60 | 61 | TRACE_EVENT(pebs_v3, 62 | TP_PROTO(u64 tsc), 63 | TP_ARGS(tsc), 64 | TP_STRUCT__entry( 65 | __field(u64, tsc) 66 | ), 67 | TP_fast_assign( 68 | __entry->tsc = tsc; 69 | ), 70 | TP_printk("tsc=%llx\n", __entry->tsc) 71 | ); 72 | 73 | TRACE_EVENT(pebs_regs, 74 | TP_PROTO(u64 flags, u64 *regs), 75 | TP_ARGS(flags, regs), 76 | TP_STRUCT__entry( 77 | __field(u64, flags) 78 | __field(u64, regs[16]) 79 | ), 80 | TP_fast_assign( 81 | __entry->flags = flags; 82 | memcpy(__entry->regs, regs, sizeof(u64) * 16); 83 | ), 84 | TP_printk("flags=%llx\n" 85 | "ax=%llx bx=%0llx cx=%llx dx=%llx si=%llx di=%llx bp=%llx sp=%llx\n" 86 | "r8=%llx r9=%llx r10=%llx r11=%llx r12=%llx r13=%llx r14=%llx r15=%llx\n", 87 | __entry->flags, 88 | __entry->regs[0], 89 | __entry->regs[1], 90 | __entry->regs[2], 91 | __entry->regs[3], 92 | __entry->regs[4], 93 | __entry->regs[5], 94 | __entry->regs[6], 95 | __entry->regs[7], 96 | __entry->regs[8], 97 | __entry->regs[9], 98 | __entry->regs[10], 99 | __entry->regs[11], 100 | __entry->regs[12], 101 | __entry->regs[13], 102 | __entry->regs[14], 103 | __entry->regs[15]) 104 | ); 105 | 106 | #endif 107 | 108 | #include 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /perf_metrics.py: -------------------------------------------------------------------------------- 1 | class CPU_Utilization: 2 | name = "CPU utilization" 3 | desc = """ 4 | Number of CPUs used. The top down CPU metrics are only meaningful 5 | when a CPU thread is executing. The percentage are always relative to 6 | the executing time. When the utilization is low the workload may 7 | actually not be CPU bound, but IO (network, block) IO bound 8 | instead. Check the scheduler and IO metrics below. Or it may be CPU 9 | bound, but not use enough parallelism, if the number of CPUs is less 10 | than the number of cores.""" 11 | nogroup = True 12 | subplot = "CPU Utilization" 13 | unit = "CPUs" 14 | def compute(self, EV): 15 | try: 16 | # interval-ns is not a perf event, but handled by toplev internally. 17 | self.val = (EV("task-clock", 1) * 1e6) / EV("interval-ns", 1) 18 | except ZeroDivisionError: 19 | self.val = 0 20 | 21 | class MUX: 22 | name = "MUX" 23 | desc = """ 24 | PerfMon Event Multiplexing accuracy indicator""" 25 | unit = "%" 26 | maxval = 100.0 27 | errcount = 0 28 | 29 | def compute(self, EV): 30 | self.val = EV("mux", 0) 31 | self.thresh = 0 < self.val < 100.0 32 | 33 | class Setup: 34 | def __init__(self, r): 35 | #r.force_metric(CPU_Utilization()) 36 | r.force_metric(MUX()) 37 | -------------------------------------------------------------------------------- /plot-normalized.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # plot already normalized data 3 | # first column is time stamp 4 | import sys 5 | import argparse 6 | import os 7 | import csv 8 | import matplotlib 9 | if os.getenv("DISPLAY") is None: 10 | matplotlib.use('Agg') 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | ap = argparse.ArgumentParser(usage='Plot already normalized CSV data') 15 | ap.add_argument('--output', '-o', help='Output to file. Otherwise show.', 16 | nargs='?') 17 | ap.add_argument('inf', nargs='?', default=sys.stdin, type=argparse.FileType('r'), 18 | help='input CSV file') 19 | args = ap.parse_args() 20 | 21 | inf = args.inf 22 | 23 | rc = csv.reader(inf) 24 | 25 | num = 0 26 | timestamps = [] 27 | columns = {} 28 | for r in rc: 29 | num += 1 30 | if num == 1: 31 | for j in r[1:]: 32 | columns[j] = [] 33 | continue 34 | timestamps.append(r[0]) 35 | c = 1 36 | for j in columns: 37 | try: 38 | columns[j].append(float(r[c])) 39 | except ValueError: 40 | columns[j].append(float('nan')) 41 | c += 1 42 | 43 | for j in columns: 44 | plt.plot(timestamps, columns[j], label=j) 45 | leg = plt.legend() 46 | leg.get_frame().set_alpha(0.5) 47 | if args.output: 48 | plt.savefig(args.output) 49 | else: 50 | plt.show() 51 | -------------------------------------------------------------------------------- /pmudef.py: -------------------------------------------------------------------------------- 1 | EVENTSEL_EVENT = 0x00ff 2 | EVENTSEL_UMASK = 0xff00 3 | EVENTSEL_UMASK2 = 0xff00000000 4 | EVENTSEL_EDGE = 1<<18 5 | EVENTSEL_PC = 1<<19 6 | EVENTSEL_ANY = 1<<21 7 | EVENTSEL_INV = 1<<23 8 | EVENTSEL_EQ = 1<<36 9 | EVENTSEL_INTX = 1<<32 10 | EVENTSEL_INTX_CP = 1<<33 11 | EVENTSEL_CMASK = 0xff000000 12 | 13 | EVMASK = (EVENTSEL_EVENT | EVENTSEL_UMASK | EVENTSEL_EDGE | EVENTSEL_PC | EVENTSEL_ANY | 14 | EVENTSEL_INV | EVENTSEL_CMASK | EVENTSEL_UMASK2 | EVENTSEL_EQ | EVENTSEL_INTX | 15 | EVENTSEL_INTX_CP) 16 | 17 | EVENTSEL_ENABLE = 1<<22 18 | 19 | MSR_EVNTSEL = 0x186 20 | MSR_IA32_FIXED_CTR_CTRL = 0x38d 21 | MSR_PEBS_ENABLE = 0x3f1 22 | MSR_PERFCTR = 0xc1 23 | MSR_PMC = 0x4c1 24 | MSR_FIXED_CTR = 0x309 25 | MSR_FIXED_CTR_CTL = 0x38d 26 | MSR_GLOBAL_STATUS = 0x38e 27 | MSR_GLOBAL_CTRL = 0x38f 28 | MSR_GLOBAL_OVF_CTRL = 0x390 29 | 30 | extra_flags = ( 31 | (EVENTSEL_EDGE, "edge"), 32 | (EVENTSEL_PC, "pc"), 33 | (EVENTSEL_ANY, "any"), 34 | (EVENTSEL_INV, "inv"), 35 | (EVENTSEL_CMASK, "cmask")) 36 | -------------------------------------------------------------------------------- /pmumon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # standalone simple pmu configuration tool 3 | # allows to count an even without using perf 4 | # will conflict with any parallel perf (and other profiler) 5 | # usage. 6 | # Author: Andi Kleen 7 | # 8 | from __future__ import print_function 9 | import os 10 | import struct 11 | import sys 12 | 13 | def writemsr(msr, val, cpu): 14 | f = os.open('/dev/cpu/%d/msr' % (cpu,), os.O_WRONLY) 15 | os.lseek(f, msr, os.SEEK_SET) 16 | os.write(f, struct.pack('Q', val)) 17 | os.close(f) 18 | 19 | def readmsr(msr, cpu): 20 | f = os.open('/dev/cpu/%d/msr' % (cpu,), os.O_RDONLY) 21 | os.lseek(f, msr, os.SEEK_SET) 22 | val = struct.unpack('Q', os.read(f, 8))[0] 23 | os.close(f) 24 | return val 25 | 26 | if len(sys.argv) != 3 and len(sys.argv) != 2: 27 | print("Usage: pmumon cpu [event]") 28 | print("When no event is specified read+clear event on cpu, otherwise start it") 29 | print("event == 0 clears. event is in hex") 30 | print("perf/oprofile/etc. must not be active. no parallel users") 31 | sys.exit(1) 32 | 33 | MSR_EVNTSEL = 0x186 + 1 34 | MSR_PERFCTR = 0xc1 + 1 35 | 36 | cpu = int(sys.argv[1]) 37 | if len(sys.argv) > 2: 38 | event = int(sys.argv[2], 16) 39 | writemsr(MSR_EVNTSEL, 0, cpu) # disable first 40 | writemsr(MSR_PERFCTR, 0, cpu) 41 | writemsr(MSR_EVNTSEL, event, cpu) 42 | #print("global status %x" % (readmsr(0x38f, cpu),)) 43 | else: 44 | print("%x = %d" % (readmsr(MSR_EVNTSEL, cpu), readmsr(MSR_PERFCTR, cpu),)) 45 | -------------------------------------------------------------------------------- /power_metrics.py: -------------------------------------------------------------------------------- 1 | # 2 | # perf power metrics for toplev 3 | # 4 | 5 | import os 6 | 7 | class EnergyPackage: 8 | name = "Package Energy" 9 | desc = """ 10 | Package Energy over measurement period in Joules""" 11 | unit = "Joules" 12 | nogroup = True 13 | subplot = "Power" 14 | domain = "Package" 15 | def compute(self, EV): 16 | self.val = EV("power/energy-pkg/", 1) 17 | self.thresh = self.val > 0 18 | 19 | class EnergyCores: 20 | name = "Cores Energy" 21 | desc = """ 22 | Cores Energy over measurement period in Joules""" 23 | unit = "Joules" 24 | nogroup = True 25 | subplot = "Power" 26 | domain = "Package" 27 | def compute(self, EV): 28 | self.val = EV("power/energy-cores/", 1) 29 | self.thresh = self.val > 0 30 | 31 | class EnergyRAM: 32 | name = "RAM Energy" 33 | desc = """ 34 | RAM Energy over measurement period in Joules""" 35 | unit = "Joules" 36 | nogroup = True 37 | subplot = "Power" 38 | domain = "Package" 39 | def compute(self, EV): 40 | self.val = EV("power/energy-ram/", 1) 41 | self.thresh = self.val > 0 42 | 43 | class EnergyGPU: 44 | name = "GPU Energy" 45 | desc = """ 46 | GPU Energy over measurement period in Joules""" 47 | unit = "Joules" 48 | nogroup = True 49 | subplot = "Power" 50 | domain = "Package" 51 | def compute(self, EV): 52 | self.val = EV("power/energy-gpu/", 1) 53 | self.thresh = self.val > 1 54 | 55 | class Setup: 56 | def __init__(self, r): 57 | if os.path.exists("/sys/bus/event_source/devices/power/events/energy-cores"): 58 | r.force_metric(EnergyCores()) 59 | r.force_metric(EnergyPackage()) 60 | if os.path.exists("/sys/bus/event_source/devices/power/events/energy-ram"): 61 | r.force_metric(EnergyRAM()) 62 | if os.path.exists("/sys/bus/event_source/devices/power/events/energy-gpu"): 63 | r.force_metric(EnergyGPU()) 64 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | brewer2mpl 3 | pandas 4 | xlsxwriter 5 | -------------------------------------------------------------------------------- /simple-pebs/Makefile: -------------------------------------------------------------------------------- 1 | USER_CFLAGS := -g -Wall -std=c89 2 | KDIR = /lib/modules/`uname -r`/build 3 | 4 | USER_EXE := dumper samples histogram toperf 5 | USER_OBJ := dumper.o dump-util.o elf.o symtab.o samples.o map.o \ 6 | histogram.o toperf.o 7 | 8 | obj-m := simple-pebs.o 9 | M := make -C ${KDIR} M=`pwd` 10 | 11 | all: 12 | ${M} modules 13 | 14 | install: 15 | ${M} modules_install 16 | 17 | clean: 18 | ${M} clean 19 | rm -rf ${USER_EXE} ${USER_OBJS} 20 | 21 | user: ${USER_EXE} 22 | ${USER_OBJ} ${USER_EXE}: CFLAGS := ${USER_CFLAGS} 23 | 24 | dumper: dump-util.o dumper.o 25 | 26 | samples: LDLIBS := -lelf 27 | samples: samples.o elf.o symtab.o dump-util.o 28 | 29 | histogram: LDLIBS := -lelf 30 | histogram: histogram.o elf.o symtab.o map.o 31 | 32 | toperf: toperf.o map.o 33 | -------------------------------------------------------------------------------- /simple-pebs/README: -------------------------------------------------------------------------------- 1 | 2 | # simple standalone reference pebs driver 3 | 4 | ## 5 | ## Note: this is just a reference driver for PEBS on Linux, mainly as a reference 6 | ## for writing new experimential drivers and for porting PEBS code to other 7 | ## operating systems. See it as a code example. 8 | ## 9 | ## The code is written in a way that it should be easy to adapt to other OS. 10 | ## 11 | ## If you just want to use PEBS on Linux the builtin Linux perf 12 | ## support is near always a better choice. All functionality 13 | ## supported by simple pebs is supported by Linux perf in a better way. 14 | ## You may need a recent enough kernel to support your CPU. 15 | ## 16 | ## You'll need to disable Kernel page table isolation with the "nopti" kernel boot 17 | ## option. Otherwise the system will hard crash randomly on module load. 18 | ## 19 | ## CPU hotplug and suspend to ram are not supported on newer kernels. 20 | ## 21 | ## On recent CPUs: 22 | ## Should work on CPUs before Icelake, but may need to add more model numbers 23 | ## to the initialization checker. 24 | ## 25 | 26 | PEBS "Precise Event Based Sampling" is a profiling technology in Intel CPUs, 27 | that uses microcode to do (mostly) precise event samples. 28 | 29 | The driver is "free running" and minimizes interrupts, to allow a maximum 30 | PEBS frequency. Interrupts are only triggered when the PEBS buffer is full. 31 | 32 | This will take over the PEBS hardware from perf and may cause conflicts. 33 | 34 | To build user tools use "make user" 35 | 36 | sample file format: 37 | {8 bytes ip} 38 | 39 | 40 | simple-pebs.c Linux simple pebs driver 41 | dumper.c Dump samples running from running Linux driver. -b to write binary sample file 42 | Most other tools require running dumper first to dump the samples. 43 | 44 | samples.c Decode (ELF symbols) sample from running Linux driver. 45 | 46 | histogram.c Generate histogram from sample file 47 | 48 | toperf.c Generate perf.data from sample file 49 | 50 | Equivalent Linux perf command lines: 51 | 52 | insmod simple-pebs.ko / dumper -b 53 | -> perf record -c 100003 -a r1c2:pp sleep X 54 | (or other event code, see also perf list) 55 | To dump addresses add -d 56 | 57 | samples -> perf script 58 | 59 | histogram -> perf report --stdio 60 | -------------------------------------------------------------------------------- /simple-pebs/compat.h: -------------------------------------------------------------------------------- 1 | /* Deal with Gleixnerfication */ 2 | #include 3 | 4 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0) 5 | 6 | /* No CPU hotplug / suspend with the mess in newer kernels. */ 7 | 8 | static inline void register_cpu_notifier(struct notifier_block *n) {} 9 | static inline void unregister_cpu_notifier(struct notifier_block *n) {} 10 | 11 | #define CPU_STARTING 0 12 | #define CPU_DYING 1 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /simple-pebs/dump-util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "simple-pebs.h" 11 | #include "dump-util.h" 12 | 13 | #define err(x) perror(x), exit(1) 14 | 15 | int device_open(void) 16 | { 17 | int fd = open("/dev/simple-pebs", O_RDONLY); 18 | if (fd < 0) 19 | err("/dev/simple-pebs open"); 20 | return fd; 21 | } 22 | 23 | int get_size(void) 24 | { 25 | int fd = device_open(); 26 | int size; 27 | 28 | if (ioctl(fd, SIMPLE_PEBS_GET_SIZE, &size) < 0) 29 | err("SIMPLE_PEBS_GET_SIZE"); 30 | close(fd); 31 | printf("size %d\n", size); 32 | return size; 33 | } 34 | 35 | void open_cpu(void **mapp, int cnum, struct pollfd *pfd, int size) 36 | { 37 | int fd = device_open(); 38 | if (ioctl(fd, SIMPLE_PEBS_SET_CPU, cnum) < 0) 39 | err("SIMPLE_PEBS_SET_CPU"); 40 | if (ioctl(fd, SIMPLE_PEBS_START, 0) < 0) 41 | err("SIMPLE_PEBS_START"); 42 | void *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); 43 | if (map == (void *)-1) 44 | err("mmap"); 45 | *mapp = map; 46 | pfd->fd = fd; 47 | pfd->events = POLLIN; 48 | } 49 | -------------------------------------------------------------------------------- /simple-pebs/dump-util.h: -------------------------------------------------------------------------------- 1 | struct pollfd; 2 | 3 | typedef uint64_t u64; 4 | 5 | int device_open(void); 6 | int get_size(void); 7 | void open_cpu(void **mapp, int cnum, struct pollfd *pfd, int size); 8 | -------------------------------------------------------------------------------- /simple-pebs/dumper.c: -------------------------------------------------------------------------------- 1 | /* Dump simple PEBS data from kernel driver */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | typedef uint64_t u64; 14 | 15 | #include "simple-pebs.h" 16 | #include "dump-util.h" 17 | 18 | #define err(x) perror(x), exit(1) 19 | 20 | void dump_data(int cpunum, u64 *map, int num) 21 | { 22 | int i; 23 | printf("dump %d\n", num); 24 | for (i = 0; i < num; i++) 25 | printf("%d: %lx\n", cpunum, map[i]); 26 | } 27 | 28 | static void usage(void) 29 | { 30 | fprintf(stderr, "Usage: dumper [-b]\n" 31 | "-b binary dump\n"); 32 | exit(1); 33 | } 34 | 35 | int main(int ac, char **av) 36 | { 37 | int size = get_size(); 38 | int ncpus = sysconf(_SC_NPROCESSORS_ONLN); 39 | void *map[ncpus]; 40 | struct pollfd pfd[ncpus]; 41 | int opt; 42 | bool binary = false; 43 | 44 | while ((opt = getopt(ac, av, "b")) != -1) { 45 | switch (opt) { 46 | case 'b': 47 | binary = true; 48 | break; 49 | default: 50 | usage(); 51 | } 52 | } 53 | 54 | int i; 55 | for (i = 0; i < ncpus; i++) 56 | open_cpu(&map[i], i, &pfd[i], size); 57 | 58 | for (;;) { 59 | if (poll(pfd, ncpus, -1) < 0) 60 | perror("poll"); 61 | for (i = 0; i < ncpus; i++) { 62 | if (pfd[i].revents & POLLIN) { 63 | int len; 64 | 65 | if (ioctl(pfd[i].fd, SIMPLE_PEBS_GET_OFFSET, &len) < 0) { 66 | perror("SIMPLE_PEBS_GET_OFFSET"); 67 | continue; 68 | } 69 | 70 | /* copy out data */ 71 | if (binary) 72 | write(1, map[i], len); 73 | else 74 | dump_data(i, map[i], len / sizeof(u64)); 75 | 76 | if (ioctl(pfd[i].fd, SIMPLE_PEBS_RESET, 0) < 0) { 77 | perror("SIMPLE_PEBS_RESET"); 78 | continue; 79 | } 80 | } 81 | } 82 | } 83 | return 0; 84 | } 85 | -------------------------------------------------------------------------------- /simple-pebs/elf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015, Intel Corporation 3 | * Author: Andi Kleen 4 | * All rights reserved. 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, 10 | * this list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright 13 | * notice, this list of conditions and the following disclaimer in the 14 | * documentation and/or other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 21 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 27 | * OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include "symtab.h" 38 | #include "elf.h" 39 | 40 | static char *my_strdup(char *s) 41 | { 42 | char *p = malloc(strlen(s) + 1); 43 | if (p) 44 | strcpy(p, s); 45 | return p; 46 | } 47 | 48 | void read_symtab(Elf *elf) 49 | { 50 | Elf_Scn *section = NULL; 51 | 52 | while ((section = elf_nextscn(elf, section)) != 0) { 53 | GElf_Shdr shdr, *sh; 54 | sh = gelf_getshdr(section, &shdr); 55 | 56 | if (sh->sh_type == SHT_SYMTAB || sh->sh_type == SHT_DYNSYM) { 57 | Elf_Data *data = elf_getdata(section, NULL); 58 | GElf_Sym *sym, symbol; 59 | int j; 60 | 61 | unsigned numsym = sh->sh_size / sh->sh_entsize; 62 | struct symtab *st = add_symtab(numsym); 63 | for (j = 0; j < numsym; j++) { 64 | struct sym *s; 65 | sym = gelf_getsymshndx(data, NULL, j, &symbol, NULL); 66 | s = &st->syms[j]; 67 | s->name = my_strdup(elf_strptr(elf, shdr.sh_link, sym->st_name)); 68 | s->val = sym->st_value; 69 | s->size = sym->st_size; 70 | s->hits = 0; 71 | } 72 | sort_symtab(st); 73 | } 74 | } 75 | } 76 | 77 | 78 | static Elf *elf_open(char *fn, int *fd) 79 | { 80 | *fd = open(fn, O_RDONLY); 81 | if (*fd < 0) { 82 | perror(fn); 83 | return NULL; 84 | } 85 | Elf *elf = elf_begin(*fd, ELF_C_READ, NULL); 86 | if (!elf) { 87 | fprintf(stderr, "elf_begin failed for %s: %s\n", 88 | fn, elf_errmsg(-1)); 89 | close(*fd); 90 | } 91 | return elf; 92 | } 93 | 94 | static void elf_close(Elf *elf, int fd) 95 | { 96 | elf_end(elf); 97 | close(fd); 98 | } 99 | 100 | int read_elf(char *fn) 101 | { 102 | elf_version(EV_CURRENT); 103 | 104 | int fd; 105 | Elf *elf = elf_open(fn, &fd); 106 | if (elf == NULL) 107 | return -1; 108 | read_symtab(elf); 109 | elf_close(elf, fd); 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /simple-pebs/elf.h: -------------------------------------------------------------------------------- 1 | int read_elf(char *fn); 2 | -------------------------------------------------------------------------------- /simple-pebs/histogram.c: -------------------------------------------------------------------------------- 1 | /* Print histograms from simple-pebs output. */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include "map.h" 7 | #include "elf.h" 8 | #include "symtab.h" 9 | 10 | typedef unsigned long long u64; 11 | 12 | #define err(x) perror(x), exit(1) 13 | 14 | static int cmp_sym_hits(const void *ap, const void *bp) 15 | { 16 | const struct sym *a = ap; 17 | const struct sym *b = bp; 18 | return a->hits - b->hits; 19 | } 20 | 21 | double min_percent = 1.0; 22 | 23 | void print_histogram(u64 *map, int num) 24 | { 25 | int i; 26 | unsigned long total = 0, unknown = 0; 27 | struct sym *ref_next = NULL; 28 | struct sym *referenced = NULL; 29 | 30 | int num_referenced = 0; 31 | 32 | for (i = 0; i < num; i++) { 33 | struct sym *sym = findsym(map[i]); 34 | if (sym) { 35 | if (sym->hits == 0) { 36 | if (!referenced) { 37 | referenced = sym; 38 | ref_next = sym; 39 | } 40 | ref_next->link = sym; 41 | num_referenced++; 42 | } 43 | sym->hits++; 44 | } else 45 | unknown++; 46 | total++; 47 | } 48 | if (total == 0) { 49 | printf("no samples found\n"); 50 | return; 51 | } 52 | 53 | struct sym **ref = malloc(num_referenced * sizeof(struct sym *)); 54 | struct sym *link; 55 | i = 0; 56 | for (link = referenced; link; link = link->link, i++) 57 | ref[i] = link; 58 | assert(i == num_referenced); 59 | 60 | qsort(ref, num_referenced, sizeof(struct sym *), cmp_sym_hits); 61 | 62 | printf("%5s %8s %s\n", "PCT", "HITS", "NAME"); 63 | printf("%5.2f%% %8lu unknown hits\n", 100. * ((double)unknown / total), 64 | unknown); 65 | for (i = 0; i < num_referenced; i++) { 66 | struct sym *sym = ref[i]; 67 | double pct = 100. * ((double)sym->hits / total); 68 | if (pct <= min_percent) 69 | break; 70 | printf("%5.2f%% %8lu %s\n", pct, sym->hits, sym->name); 71 | } 72 | } 73 | 74 | 75 | void usage(void) 76 | { 77 | fprintf(stderr, "Usage: histogram file elf ...\n"); 78 | 79 | } 80 | 81 | int main(int ac, char **av) 82 | { 83 | char *file = *++av; 84 | 85 | if (!file) 86 | usage(); 87 | while (*++av) 88 | read_elf(*av); 89 | 90 | size_t fsize; 91 | u64 *fmap = mapfile(file, &fsize); 92 | 93 | if (!fmap) 94 | err(file); 95 | print_histogram(fmap, fsize / 8); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /simple-pebs/map.c: -------------------------------------------------------------------------------- 1 | #include "map.h" 2 | 3 | #ifdef __linux__ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) 12 | 13 | static int pagesize; 14 | 15 | static void __attribute__((constructor)) init_ps(void) 16 | { 17 | pagesize = sysconf(_SC_PAGESIZE); 18 | } 19 | 20 | void *mapfile(char *fn, size_t *size) 21 | { 22 | int fd = open(fn, O_RDWR); 23 | if (fd < 0) 24 | return NULL; 25 | struct stat st; 26 | void *map = (void *)-1L; 27 | if (fstat(fd, &st) >= 0) { 28 | *size = st.st_size; 29 | map = mmap(NULL, round_up(st.st_size, pagesize), 30 | PROT_READ|PROT_WRITE, 31 | MAP_PRIVATE, fd, 0); 32 | } 33 | close(fd); 34 | return map != (void *)-1L ? map : NULL; 35 | } 36 | 37 | void unmapfile(void *map, size_t size) 38 | { 39 | munmap(map, round_up(size, pagesize)); 40 | } 41 | 42 | #else 43 | /* Fallback based on stdio */ 44 | #include 45 | #include 46 | 47 | void *mapfile(char *fn, size_t *size) 48 | { 49 | FILE *f = fopen(fn, "r"); 50 | char *map; 51 | int ok; 52 | if (!f) 53 | return NULL; 54 | fseek(f, 0, SEEK_END); 55 | *size = ftell(f); 56 | rewind(f); 57 | map = malloc(*size); 58 | ok = map && fread(map, 1, *size, f) == *size; 59 | fclose(f); 60 | if (!ok) { 61 | free(map); 62 | map = NULL; 63 | } 64 | return map; 65 | } 66 | 67 | void unmapfile(void *map, size_t size) 68 | { 69 | free(map); 70 | } 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /simple-pebs/map.h: -------------------------------------------------------------------------------- 1 | #include 2 | void *mapfile(char *fn, size_t *size); 3 | void unmapfile(void *map, size_t size); 4 | -------------------------------------------------------------------------------- /simple-pebs/samples.c: -------------------------------------------------------------------------------- 1 | /* Dump sample data from linux kernel driver and resolve IPs */ 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "simple-pebs.h" 12 | #include "dump-util.h" 13 | #include "elf.h" 14 | #include "symtab.h" 15 | 16 | #define err(x) perror(x), exit(1) 17 | 18 | static void print_ip(uint64_t ip) 19 | { 20 | struct sym *sym = findsym(ip); 21 | if (sym) { 22 | printf("%s", sym->name); 23 | if (ip - sym->val > 0) 24 | printf("+%ld", ip - sym->val); 25 | } else 26 | printf("%lx", ip); 27 | } 28 | 29 | void dump_data(int cpunum, u64 *map, int num) 30 | { 31 | int i; 32 | printf("dump %d\n", num); 33 | for (i = 0; i < num; i++) { 34 | printf("%d: %lx ", cpunum, map[i]); 35 | print_ip(map[i]); 36 | putchar('\n'); 37 | } 38 | } 39 | 40 | int main(int ac, char **av) 41 | { 42 | int size = get_size(); 43 | int ncpus = sysconf(_SC_NPROCESSORS_ONLN); 44 | void *map[ncpus]; 45 | struct pollfd pfd[ncpus]; 46 | 47 | while (*++av) { 48 | printf("reading %s\n", *av); 49 | read_elf(*av); 50 | } 51 | 52 | int i; 53 | for (i = 0; i < ncpus; i++) 54 | open_cpu(&map[i], i, &pfd[i], size); 55 | 56 | for (;;) { 57 | if (poll(pfd, ncpus, -1) < 0) 58 | perror("poll"); 59 | for (i = 0; i < ncpus; i++) { 60 | if (pfd[i].revents & POLLIN) { 61 | int len; 62 | 63 | if (ioctl(pfd[i].fd, SIMPLE_PEBS_GET_OFFSET, &len) < 0) { 64 | perror("SIMPLE_PEBS_GET_OFFSET"); 65 | continue; 66 | } 67 | 68 | /* copy out data */ 69 | dump_data(i, map[i], len / sizeof(u64)); 70 | 71 | if (ioctl(pfd[i].fd, SIMPLE_PEBS_RESET, 0) < 0) { 72 | perror("SIMPLE_PEBS_RESET"); 73 | continue; 74 | } 75 | } 76 | } 77 | } 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /simple-pebs/simple-pebs.h: -------------------------------------------------------------------------------- 1 | #define SIMPLE_PEBS_BASE 0x7000 2 | #define SIMPLE_PEBS_SET_CPU (SIMPLE_PEBS_BASE + 1) 3 | #define SIMPLE_PEBS_GET_SIZE (SIMPLE_PEBS_BASE + 2) 4 | #define SIMPLE_PEBS_GET_OFFSET (SIMPLE_PEBS_BASE + 3) 5 | #define SIMPLE_PEBS_START (SIMPLE_PEBS_BASE + 4) 6 | #define SIMPLE_PEBS_STOP (SIMPLE_PEBS_BASE + 5) 7 | #define SIMPLE_PEBS_RESET (SIMPLE_PEBS_BASE + 6) 8 | -------------------------------------------------------------------------------- /simple-pebs/symtab.c: -------------------------------------------------------------------------------- 1 | /* Symtabs for simple-pebs */ 2 | /* 3 | * Copyright (c) 2015, Intel Corporation 4 | * Author: Andi Kleen 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, 11 | * this list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 20 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 | * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 22 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 26 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 28 | * OF THE POSSIBILITY OF SUCH DAMAGE. 29 | */ 30 | 31 | 32 | #include 33 | #include 34 | #include 35 | #include "symtab.h" 36 | 37 | struct symtab *symtabs; 38 | 39 | struct symtab *add_symtab(unsigned num) 40 | { 41 | struct symtab *st = malloc(sizeof(struct symtab)); 42 | if (!st) 43 | exit(ENOMEM); 44 | st->num = num; 45 | st->next = symtabs; 46 | st->syms = calloc(num * sizeof(struct sym), 1); 47 | if (!st->syms) 48 | exit(ENOMEM); 49 | symtabs = st; 50 | return st; 51 | } 52 | 53 | int cmp_sym(const void *ap, const void *bp) 54 | { 55 | const struct sym *a = ap; 56 | const struct sym *b = bp; 57 | if (a->val >= b->val && a->val < b->val + b->size) 58 | return 0; 59 | if (b->val >= a->val && b->val < a->val + a->size) 60 | return 0; 61 | return a->val - b->val; 62 | } 63 | 64 | struct sym *findsym(unsigned long val) 65 | { 66 | struct symtab *st; 67 | struct sym search = { .val = val }, *s; 68 | for (st = symtabs; st; st = st->next) { 69 | s = bsearch(&search, st->syms, st->num, sizeof(struct sym), cmp_sym); 70 | if (s) 71 | return s; 72 | } 73 | return NULL; 74 | } 75 | 76 | void dump_symtab(struct symtab *st) 77 | { 78 | int j; 79 | for (j = 0; j < st->num; j++) { 80 | struct sym *s = &st->syms[j]; 81 | if (s->val && s->name[0]) 82 | printf("%lx %s\n", s->val, s->name); 83 | } 84 | } 85 | 86 | void sort_symtab(struct symtab *st) 87 | { 88 | qsort(st->syms, st->num, sizeof(struct sym), cmp_sym); 89 | } 90 | -------------------------------------------------------------------------------- /simple-pebs/symtab.h: -------------------------------------------------------------------------------- 1 | struct sym { 2 | char *name; 3 | unsigned long val; 4 | unsigned long size; 5 | unsigned long hits; 6 | struct sym *link; 7 | }; 8 | 9 | struct symtab { 10 | struct symtab *next; 11 | unsigned num; 12 | struct sym *syms; 13 | }; 14 | 15 | extern struct symtab *symtabs; 16 | 17 | struct sym *findsym(unsigned long val); 18 | struct symtab *add_symtab(unsigned num); 19 | void dump_symtab(struct symtab *st); 20 | void sort_symtab(struct symtab *st); 21 | -------------------------------------------------------------------------------- /test-uncore.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Unit": "CBO", 4 | "EventCode": "0x34", 5 | "UMask": "0x9", 6 | "EventName": "UNC_C_LLC_LOOKUP.REMOTE_SNOOP", 7 | "Description": "tbd", 8 | "Counter": "0,1", 9 | "MSRValue": "0", 10 | "Filter": "CBoFilter[22:18]", 11 | "Internal": "0" 12 | }, 13 | { 14 | "Unit": "HA", 15 | "EventCode": "0xc", 16 | "UMask": "0x2", 17 | "EventName": "UNC_H_DIRECTORY_LOOKUP.NO_SNP", 18 | "Description": "tbd", 19 | "Counter": "0,1,2,3", 20 | "MSRValue": "0", 21 | "Filter": "null", 22 | "Internal": "0" 23 | }, 24 | { 25 | "Unit": "iMC", 26 | "EventCode": "0x5", 27 | "UMask": "0x4", 28 | "EventName": "UNC_M_DRAM_REFRESH.HIGH", 29 | "Description": "tbd", 30 | "Counter": "0,1,2,3", 31 | "MSRValue": "0", 32 | "Filter": "null", 33 | "Internal": "0" 34 | }, 35 | { 36 | "Unit": "UBOX", 37 | "EventCode": "0x0", 38 | "UMask": "0x0", 39 | "EventName": "UNC_U_CLOCKTICKS", 40 | "Description": "tbd", 41 | "Counter": "0", 42 | "MSRValue": "0", 43 | "Filter": "null", 44 | "Internal": "0" 45 | } 46 | ] 47 | -------------------------------------------------------------------------------- /tl-serve: -------------------------------------------------------------------------------- 1 | tl-serve.py -------------------------------------------------------------------------------- /tl_io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, Intel Corporation 2 | # Author: Andi Kleen 3 | # 4 | # This program is free software; you can redistribute it and/or modify it 5 | # under the terms and conditions of the GNU General Public License, 6 | # version 2, as published by the Free Software Foundation. 7 | # 8 | # This program is distributed in the hope it will be useful, but WITHOUT 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 | # more details. 12 | from __future__ import print_function 13 | import sys 14 | import subprocess 15 | import os 16 | import argparse 17 | if sys.version_info.major == 3: 18 | from typing import Set # noqa 19 | 20 | if sys.version_info.major == 3: 21 | popentext = dict(universal_newlines=True) 22 | else: 23 | popentext = {} 24 | 25 | def popen_stdout(cmd): 26 | return subprocess.Popen(cmd, stdout=subprocess.PIPE, **popentext) # type: ignore 27 | 28 | def popen_stdinout(cmd, f): 29 | return subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=f, **popentext) # type: ignore 30 | 31 | def flex_open_r(fn): 32 | if fn.endswith(".xz"): 33 | xz = popen_stdout(["xz", "-d", "--stdout", fn]) 34 | return xz.stdout 35 | if fn.endswith(".gz"): 36 | gzip = popen_stdout(["gzip", "-d", "-c", fn]) 37 | return gzip.stdout 38 | if fn.endswith(".zst"): 39 | return popen_stdout(["zstd", "-d", "--stdout", fn]) .stdout 40 | return open(fn, 'r') 41 | 42 | def flex_open_w(fn): 43 | f = open(fn, "w") 44 | if fn.endswith(".xz"): 45 | xz = popen_stdinout(["xz", "-z", "--stdout"], f) 46 | return xz.stdin 47 | if fn.endswith(".gz"): 48 | gzip = popen_stdinout(["gzip", "-c"], f) 49 | return gzip.stdin 50 | if fn.endswith(".zst"): 51 | return popen_stdinout(["zstd", "--stdout"], f).stdin 52 | return f 53 | 54 | tl_tester = os.getenv("TL_TESTER") 55 | test_mode = tl_tester and tl_tester != "0" 56 | 57 | args = argparse.Namespace() 58 | 59 | def set_args(a): 60 | global args 61 | args = a 62 | 63 | def warn_no_assert(msg): 64 | if not args.quiet: 65 | print("warning: " + msg, file=sys.stderr) 66 | 67 | def warn_test(msg): 68 | if test_mode: 69 | warn_no_assert(msg) 70 | 71 | def warn(msg): 72 | warn_no_assert(msg) 73 | if test_mode: 74 | assert 0, msg 75 | 76 | warned = set() # type: Set[str] 77 | 78 | def warn_once_no_assert(msg): 79 | if msg not in warned and not args.quiet: 80 | print("warning: " + msg, file=sys.stderr) 81 | warned.add(msg) 82 | 83 | def warn_once(msg): 84 | warn_once_no_assert(msg) 85 | if test_mode: 86 | assert 0, msg 87 | 88 | def print_once(msg): 89 | if msg not in warned and not args.quiet: 90 | print(msg) 91 | warned.add(msg) 92 | 93 | def inform(msg): 94 | if not args.quiet: 95 | print(msg) 96 | 97 | def debug_print(x): 98 | if args.debug: 99 | print(x, file=sys.stderr) 100 | 101 | def obj_debug_print(obj, x): 102 | if args.debug or (args.dfilter and obj.name in args.dfilter): 103 | print(x, file=sys.stderr) 104 | 105 | def test_debug_print(x): 106 | if args.debug or test_mode: 107 | print(x, file=sys.stderr) 108 | -------------------------------------------------------------------------------- /tl_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012-2020, Intel Corporation 2 | # Author: Andi Kleen 3 | # 4 | # This program is free software; you can redistribute it and/or modify it 5 | # under the terms and conditions of the GNU General Public License, 6 | # version 2, as published by the Free Software Foundation. 7 | # 8 | # This program is distributed in the hope it will be useful, but WITHOUT 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 | # more details. 12 | # 13 | # Maintain error data on perf measurements 14 | from __future__ import print_function 15 | import math 16 | from collections import namedtuple 17 | from tl_io import warn, warn_test, inform 18 | 19 | ValStat = namedtuple('ValStat', ['stddev', 'multiplex']) 20 | 21 | def geoadd(l): 22 | return math.sqrt(sum([x**2 for x in l])) 23 | 24 | # use geomean of stddevs and minimum of multiplex ratios for combining 25 | # XXX better way to combine multiplex ratios? 26 | def combine_valstat(l): 27 | if not l: 28 | return [] 29 | return ValStat(geoadd([x.stddev for x in l]), min([x.multiplex for x in l])) 30 | 31 | class ComputeStat: 32 | """Maintain statistics on measurement data.""" 33 | def __init__(self, quiet): 34 | self.referenced = set() 35 | self.already_warned = set() 36 | self.errcount = 0 37 | self.errors = set() 38 | self.prev_errors = set() 39 | self.mismeasured = set() 40 | self.prev_mismeasured = set() 41 | self.quiet = quiet 42 | 43 | def referenced_check(self, res, evnum): 44 | referenced = self.referenced 45 | referenced = referenced - self.already_warned 46 | if not referenced: 47 | return 48 | self.already_warned |= referenced 49 | 50 | # sanity check: did we reference all results? 51 | if len(res.keys()) > 0: 52 | r = res[list(res.keys())[0]] 53 | if len(r) != len(evnum): 54 | warn("results len %d does not match event len %d" % (len(r), len(evnum))) 55 | return 56 | if len(referenced) != len(r): 57 | dummies = {i for i, d in enumerate(evnum) if d == "dummy"} 58 | notr = set(range(len(r))) - referenced - dummies 59 | if notr: 60 | warn_test("%d results not referenced: " % (len(notr)) + 61 | " ".join(["%d" % x for x in sorted(notr)])) 62 | 63 | def compute_errors(self): 64 | if self.errcount > 0 and self.errors != self.prev_errors: 65 | inform(("%d nodes had zero counts: " % (self.errcount)) + 66 | " ".join(sorted(self.errors))) 67 | self.errcount = 0 68 | self.prev_errors = self.errors 69 | self.errors = set() 70 | if self.mismeasured and self.mismeasured > self.prev_mismeasured: 71 | inform("Mismeasured (out of bound values):" + 72 | " ".join(sorted(self.mismeasured))) 73 | self.prev_mismeasured = self.mismeasured 74 | -------------------------------------------------------------------------------- /tldata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import re 4 | from collections import defaultdict 5 | import gen_level 6 | 7 | class TLData: 8 | """Read a toplev output CSV file. 9 | 10 | Exported: 11 | times[n] All time stamps 12 | vals[n] All values, as dicts mapping (name, cpu)->float 13 | levels{name} All levels (includes metrics), name->list of fields 14 | units{name} All units, name->unit 15 | headers(set) All headers (including metrics) 16 | metrics(set) All metrics 17 | helptxt[col] All help texts. 18 | cpus(set) All CPUs 19 | """ 20 | 21 | def __init__(self, fn, verbose=False): 22 | self.times = [] 23 | self.vals = [] 24 | self.fn = fn 25 | self.levels = defaultdict(set) 26 | self.metrics = set() 27 | self.headers = set() 28 | self.mtime = None 29 | self.helptxt = {} 30 | self.cpus = set() 31 | self.verbose = verbose 32 | self.units = {} 33 | 34 | def update(self): 35 | mtime = os.path.getmtime(self.fn) 36 | if self.mtime == mtime: 37 | return 38 | self.mtime = mtime 39 | csvf = csv.reader(open(self.fn, 'r')) 40 | prevts = None 41 | val = {} 42 | for r in csvf: 43 | if r[0].strip().startswith("#"): 44 | continue 45 | if r[0] == "Timestamp" or r[0] == "CPUs": 46 | continue 47 | # 1.001088024,C1,Frontend_Bound,42.9,% Slots,,frontend_retired.latency_ge_4:pp,0.0,100.0,<==,Y 48 | if re.match(r'[CS]?\d+.*', r[1]): 49 | ts, cpu, name, pct, unit, helptxt = r[0], r[1], r[2], r[3], r[4], r[5] 50 | else: 51 | ts, name, pct, unit, helptxt = r[0], r[1], r[2], r[3], r[4] 52 | cpu = None 53 | key = (name, cpu) 54 | ts, pct = float(ts), float(pct.replace("%", "")) 55 | if name not in self.helptxt or self.helptxt[name] == "": 56 | self.helptxt[name] = helptxt 57 | if unit.endswith("<"): 58 | unit = unit[:-2] 59 | if not self.verbose: 60 | continue 61 | self.units[name] = unit 62 | if prevts and ts != prevts: 63 | self.times.append(prevts) 64 | self.vals.append(val) 65 | val = {} 66 | val[key] = pct 67 | n = gen_level.level_name(name) 68 | if cpu: 69 | self.cpus.add(cpu) 70 | self.headers.add(name) 71 | if gen_level.is_metric(name): 72 | self.metrics.add(n) 73 | self.levels[n].add(name) 74 | prevts = ts 75 | if len(val.keys()) > 0: 76 | self.times.append(prevts) 77 | self.vals.append(val) 78 | 79 | early_plots = ["TopLevel", "CPU utilization", "Power", "Frequency", "CPU-METRIC"] 80 | 81 | def sort_key(i, data): 82 | if i in early_plots: 83 | return early_plots.index(i) 84 | if i in data.metrics: 85 | return 30 86 | return list(data.levels.keys()).index(i) 87 | 88 | def level_order(data): 89 | """Return plot order of all levels.""" 90 | return sorted(data.levels.keys(), key=lambda a: sort_key(a, data)) 91 | -------------------------------------------------------------------------------- /topdown-csv/mock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Do basic python sanity check of translation output 3 | import sys 4 | sys.path.append(".") 5 | import t 6 | l = [] 7 | m = [] 8 | 9 | def pev(e): 10 | print("\t",e) 11 | return 1 12 | 13 | class R: 14 | def run(self, p): 15 | #print p 16 | l.append(p) 17 | def metric(self, p): 18 | m.append(p) 19 | 20 | t.Setup(R()) 21 | for p in l: 22 | p.thresh = True 23 | for p in l: 24 | print(p.name) 25 | p.compute(lambda e, level: pev(e)) 26 | if p.sample: 27 | print(" Sample:", " ".join(p.sample)) 28 | if p.sibling: 29 | print(" Siblings:", " ".join([o.name for o in p.sibling])) 30 | 31 | for p in m: 32 | print(p.name) 33 | p.compute(lambda e, level: pev(e)) 34 | 35 | -------------------------------------------------------------------------------- /toplev: -------------------------------------------------------------------------------- 1 | toplev.py -------------------------------------------------------------------------------- /toplev.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andikleen/pmu-tools/65e4504d384aadc24ddfe4aabb50eaf62b7f294d/toplev.ico -------------------------------------------------------------------------------- /topology: -------------------------------------------------------------------------------- 1 | /sys/bus/event_source/devices/uncore_ha 2 | /sys/bus/event_source/devices/uncore_ha/format/umask 3 | -------------------------------------------------------------------------------- /tsx_metrics.py: -------------------------------------------------------------------------------- 1 | # 2 | # TSX metrics 3 | # 4 | 5 | # XXX force all these into a single group 6 | # XXX: force % in caller 7 | 8 | import os 9 | 10 | def TXCycles(EV, level): 11 | return EV("cpu/cycles-t/", level) / EV("cycles", level) 12 | 13 | class TransactionalCycles: 14 | name = "Transactional cycles" 15 | desc = """ 16 | Percent cycles spent in a transaction. When low or zero either the program 17 | does not use locks (or other transactions), or the locks are not enabled with lock elision.""" 18 | subplot = "TSX" 19 | unit = "%" 20 | sample = ["mem_uops_retired.lock_loads"] 21 | server = True 22 | def compute(self, EV): 23 | try: 24 | self.val = TXCycles(EV, 1) * 100. 25 | self.thresh = (self.val >= 0.01) 26 | except ZeroDivisionError: 27 | self.val = 0 28 | self.thresh = False 29 | 30 | class AbortedCycles: 31 | name = "Aborted cycles" 32 | desc = """ 33 | Percent cycles wasted in transaction aborts. When a significant part of the transactional cycles 34 | start sampling for abort causes.""" 35 | subplot = "TSX" 36 | unit = "%" 37 | sample = ["cpu/tx-abort/pp", "cpu/hle-abort/pp"] 38 | server = True 39 | def compute(self, EV): 40 | try: 41 | self.val = ((EV("cpu/cycles-t/", 1) - EV("cpu/cycles-ct/", 1)) / EV("cycles", 1)) * 100. 42 | self.thresh = (self.val >= 0.01) 43 | except ZeroDivisionError: 44 | self.val = 0 45 | self.thresh = False 46 | 47 | class AverageRTM: 48 | name = "Average RTM transaction length" 49 | desc = """ 50 | Average RTM transaction length. Assumes most transactions are RTM. 51 | When low consider increasing the size of the critical sections to lower overhead.""" 52 | subplot = "TSX Latencies" 53 | unit = "cycles" 54 | server = True 55 | def compute(self, EV): 56 | try: 57 | self.val = EV("cpu/cycles-t/", 1) / EV("RTM_RETIRED.START", 1) 58 | self.thresh = TXCycles(EV, 1) >= 0.01 and self.val > 0 59 | except ZeroDivisionError: 60 | self.val = 0 61 | self.thresh = False 62 | 63 | class AverageHLE: 64 | name = "Average HLE transaction length" 65 | desc = """ 66 | Average HLE transaction length. Assumes most transactions are HLE. 67 | When low consider increasing the size of the critical sections to lower overhead.""" 68 | subplot = "TSX Latencies" 69 | unit = "cycles" 70 | def compute(self, EV): 71 | try: 72 | self.val = EV("cpu/cycles-t/", 1) / EV("HLE_RETIRED.START", 1) 73 | self.thresh = TXCycles(EV, 1) >= 0.01 and self.val > 0 74 | except ZeroDivisionError: 75 | self.val = 0 76 | self.thresh = False 77 | 78 | class Setup: 79 | def __init__(self, r): 80 | # XXX allow override 81 | if os.path.exists("/sys/bus/event_source/devices/cpu/events/cycles-t"): 82 | r.force_metric(TransactionalCycles()) 83 | r.force_metric(AbortedCycles()) 84 | r.force_metric(AverageRTM()) 85 | #r.force_metric(AverageHLE()) 86 | -------------------------------------------------------------------------------- /ucevent/CHECK-ALL: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # run all events for 2 seconds each 3 | # requires GNU parallel (http://ftp.gnu.org/gnu/parallel/) 4 | ./ucevent.py --unsupported --broken --name-only > all-events 5 | parallel --no-notice --halt 1 "$@" -n 1 -k < all-events sh -c "echo "EVENT" {} ; $WRAP ./ucevent.py -v {} sleep 2.2" 6 | -------------------------------------------------------------------------------- /ucevent/CHECK-DERIVED: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # requires GNU parallel (http://ftp.gnu.org/gnu/parallel/) 3 | ./ucevent.py --name-only > derived-events 4 | parallel --no-notice --halt 1 "$@" -n 1 -k < derived-events sh -c "echo "EVENT" {} ; $WRAP ./ucevent.py -v {} sleep 2.2" 5 | -------------------------------------------------------------------------------- /ucevent/JKT: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | FORCECPU=jkt ./ucevent.py --mock "$@" 3 | -------------------------------------------------------------------------------- /ucevent/MOCK-ALL: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export FORCECPU=${FORCECPU:-hsx} 3 | ./ucevent.py --mock --unsupported --broken --name-only > all-events 4 | parallel --halt 1 --no-notice "$@" -n 1 -k < all-events sh -c "echo "EVENT" {} ; $WRAP ./ucevent.py --mock -v {}" 5 | 6 | -------------------------------------------------------------------------------- /ucevent/Makefile: -------------------------------------------------------------------------------- 1 | ucevent.man: ucevent.py ucevent.s 2 | FORCECPU=jkt help2man --version-string=" " -i ucevent.s -N ./ucevent.py > ucevent.man 3 | 4 | ucevent.s: README.md 5 | ./md2hman.py < README.md > ucevent.s 6 | 7 | clean: 8 | rm -f ucevent.s 9 | -------------------------------------------------------------------------------- /ucevent/RUN-ALL: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # MOCK=1 don't run perf 3 | # WRAP=... python wrapper 4 | 5 | set -e 6 | 7 | CPULIST="${CPULIST:-jkt ivt hsx bdxde bdx skx icx}" 8 | 9 | for cpu in $CPULIST ; do 10 | 11 | export FORCECPU=$cpu 12 | echo $cpu 13 | 14 | EXTRA="" 15 | [ -z "$MOCK" ] && EXTRA=CHECK-* 16 | 17 | for i in $EXTRA SANITY-ALL MOCK-ALL ; do 18 | echo $i 19 | ./$i 20 | echo STATUS $? 21 | done 22 | 23 | ./uctester 24 | 25 | 26 | done 27 | -------------------------------------------------------------------------------- /ucevent/SANITY-ALL: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # run all events for 2 seconds each 3 | # requires GNU parallel (http://ftp.gnu.org/gnu/parallel/) 4 | EXTRA="" 5 | [ -n "$MOCK" ] && EXTRA=--mock 6 | ./ucevent.py --unsupported --broken --name-only > all-events 7 | parallel --no-notice --halt 1 "$@" -n 1 -k < all-events sh -c "echo "EVENT" {} ; $WRAP ./sanity-run.py $EXTRA -v {} sleep 2.2" 8 | -------------------------------------------------------------------------------- /ucevent/bdx_extra.py: -------------------------------------------------------------------------------- 1 | jkt_extra.py -------------------------------------------------------------------------------- /ucevent/bdxde_extra.py: -------------------------------------------------------------------------------- 1 | # empty for now 2 | 3 | extra_derived = {} 4 | -------------------------------------------------------------------------------- /ucevent/dygraph-out.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | ucevent output 4 | 6 | 7 | 8 |
10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /ucevent/hsx_extra.py: -------------------------------------------------------------------------------- 1 | # empty for now 2 | 3 | extra_derived = {} 4 | -------------------------------------------------------------------------------- /ucevent/icx_extra.py: -------------------------------------------------------------------------------- 1 | extra_derived = { } 2 | -------------------------------------------------------------------------------- /ucevent/ivt_extra.py: -------------------------------------------------------------------------------- 1 | jkt_extra.py -------------------------------------------------------------------------------- /ucevent/md2hman.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # convert README.md to include files for help2man 3 | from __future__ import print_function 4 | import sys 5 | import re 6 | 7 | skip_sections = ( "Command Line options reference", "Debugging and testing", "Support", 8 | "Author", "Other Projects providing uncore monitoring") 9 | 10 | tabmode = False 11 | skip = False 12 | for l in sys.stdin: 13 | after = "" 14 | l = l.rstrip() 15 | if l and l[0] == '#': 16 | skip = False 17 | if l[2:] in skip_sections: 18 | skip = True 19 | continue 20 | print("[%s]" % (l[2:])) 21 | continue 22 | elif l == "" and not skip: 23 | print(".PP") 24 | tabmode = False 25 | continue 26 | if skip: 27 | continue 28 | if l and l[0] == '\t' and l[1:]: 29 | if not tabmode: 30 | print(".nf\n.sp") 31 | tabmode = True 32 | #print(".I ", end=" ") 33 | elif tabmode: 34 | after = ".fi" 35 | tabmode = False 36 | if l and l[0] == '-': 37 | print(".TP") 38 | l = l[2:] 39 | if l and l[0:2] == "**": 40 | print(".B ", end=" ") 41 | l = l.replace("**","") 42 | if l and l[0] == '[': 43 | m = re.match(r"\[(.*)\]\s*\((.*)\)(.*)", l) 44 | #l = '.URL "%s" "%s"\n%s' % (m.group(2), m.group(1), m.group(3)) 45 | l = m.group(2) + " " + m.group(1) + " " + m.group(3) 46 | print(l) 47 | if after: 48 | print(after) 49 | -------------------------------------------------------------------------------- /ucevent/patches-3.10/0002-perf-Add-sysfs-entry-to-adjust-multiplexing-interval.patch: -------------------------------------------------------------------------------- 1 | From 4341acbf37e5eac1607ab4aa59e6ec941f9ee7b7 Mon Sep 17 00:00:00 2001 2 | From: Stephane Eranian 3 | Date: Wed, 3 Apr 2013 14:21:34 +0200 4 | Subject: [PATCH 2/5] perf: Add sysfs entry to adjust multiplexing interval 5 | per PMU 6 | 7 | This patch adds /sys/device/xxx/perf_event_mux_interval_ms to ajust 8 | the multiplexing interval per PMU. The unit is milliseconds. Value has 9 | to be >= 1. 10 | 11 | In the 4th version, we renamed the sysfs file to be more consistent 12 | with the other /proc/sys/kernel entries for perf_events. 13 | 14 | In the 5th version, we handle the reprogramming of the hrtimer using 15 | hrtimer_forward_now(). That way, we sync up to new timer value quickly 16 | (suggested by Jiri Olsa). 17 | 18 | Signed-off-by: Stephane Eranian 19 | Signed-off-by: Peter Zijlstra 20 | Cc: Frederic Weisbecker 21 | Cc: Arnaldo Carvalho de Melo 22 | Link: http://lkml.kernel.org/r/1364991694-5876-3-git-send-email-eranian@google.com 23 | Signed-off-by: Ingo Molnar 24 | --- 25 | include/linux/perf_event.h | 1 + 26 | kernel/events/core.c | 63 +++++++++++++++++++++++++++++++++++++++++--- 27 | 2 files changed, 60 insertions(+), 4 deletions(-) 28 | 29 | diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h 30 | index 3012c54..f42e8fb 100644 31 | --- a/include/linux/perf_event.h 32 | +++ b/include/linux/perf_event.h 33 | @@ -194,6 +194,7 @@ struct pmu { 34 | int * __percpu pmu_disable_count; 35 | struct perf_cpu_context * __percpu pmu_cpu_context; 36 | int task_ctx_nr; 37 | + int hrtimer_interval_ms; 38 | 39 | /* 40 | * Fully disable/enable this PMU, can be used to protect from the PMI 41 | diff --git a/kernel/events/core.c b/kernel/events/core.c 42 | index 71a39cc..5218771 100644 43 | --- a/kernel/events/core.c 44 | +++ b/kernel/events/core.c 45 | @@ -720,13 +720,21 @@ static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) 46 | { 47 | struct hrtimer *hr = &cpuctx->hrtimer; 48 | struct pmu *pmu = cpuctx->ctx.pmu; 49 | + int timer; 50 | 51 | /* no multiplexing needed for SW PMU */ 52 | if (pmu->task_ctx_nr == perf_sw_context) 53 | return; 54 | 55 | - cpuctx->hrtimer_interval = 56 | - ns_to_ktime(NSEC_PER_MSEC * PERF_CPU_HRTIMER); 57 | + /* 58 | + * check default is sane, if not set then force to 59 | + * default interval (1/tick) 60 | + */ 61 | + timer = pmu->hrtimer_interval_ms; 62 | + if (timer < 1) 63 | + timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER; 64 | + 65 | + cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); 66 | 67 | hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); 68 | hr->function = perf_cpu_hrtimer_handler; 69 | @@ -6076,9 +6084,56 @@ type_show(struct device *dev, struct device_attribute *attr, char *page) 70 | return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type); 71 | } 72 | 73 | +static ssize_t 74 | +perf_event_mux_interval_ms_show(struct device *dev, 75 | + struct device_attribute *attr, 76 | + char *page) 77 | +{ 78 | + struct pmu *pmu = dev_get_drvdata(dev); 79 | + 80 | + return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms); 81 | +} 82 | + 83 | +static ssize_t 84 | +perf_event_mux_interval_ms_store(struct device *dev, 85 | + struct device_attribute *attr, 86 | + const char *buf, size_t count) 87 | +{ 88 | + struct pmu *pmu = dev_get_drvdata(dev); 89 | + int timer, cpu, ret; 90 | + 91 | + ret = kstrtoint(buf, 0, &timer); 92 | + if (ret) 93 | + return ret; 94 | + 95 | + if (timer < 1) 96 | + return -EINVAL; 97 | + 98 | + /* same value, noting to do */ 99 | + if (timer == pmu->hrtimer_interval_ms) 100 | + return count; 101 | + 102 | + pmu->hrtimer_interval_ms = timer; 103 | + 104 | + /* update all cpuctx for this PMU */ 105 | + for_each_possible_cpu(cpu) { 106 | + struct perf_cpu_context *cpuctx; 107 | + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 108 | + cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); 109 | + 110 | + if (hrtimer_active(&cpuctx->hrtimer)) 111 | + hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval); 112 | + } 113 | + 114 | + return count; 115 | +} 116 | + 117 | +#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) 118 | + 119 | static struct device_attribute pmu_dev_attrs[] = { 120 | - __ATTR_RO(type), 121 | - __ATTR_NULL, 122 | + __ATTR_RO(type), 123 | + __ATTR_RW(perf_event_mux_interval_ms), 124 | + __ATTR_NULL, 125 | }; 126 | 127 | static int pmu_bus_running; 128 | -- 129 | 1.7.7.6 130 | 131 | -------------------------------------------------------------------------------- /ucevent/patches-3.10/0004-per-socket-fix.patch: -------------------------------------------------------------------------------- 1 | From 525c62bc4417f988aeb3b941ac0dfad04fec39ee Mon Sep 17 00:00:00 2001 2 | From: Stephane Eranian 3 | Date: Tue, 9 Jul 2013 12:03:47 -0700 4 | Subject: [PATCH 4/5] --per-socket fix 5 | 6 | --- 7 | tools/perf/builtin-stat.c | 7 ++++--- 8 | 1 files changed, 4 insertions(+), 3 deletions(-) 9 | 10 | diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c 11 | index 7e910ba..2adf8cf 100644 12 | --- a/tools/perf/builtin-stat.c 13 | +++ b/tools/perf/builtin-stat.c 14 | @@ -924,7 +924,7 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 15 | static void print_aggr(char *prefix) 16 | { 17 | struct perf_evsel *counter; 18 | - int cpu, s, s2, id, nr; 19 | + int cpu, cpu2, s, s2, id, nr; 20 | u64 ena, run, val; 21 | 22 | if (!(aggr_map || aggr_get_id)) 23 | @@ -936,7 +936,8 @@ static void print_aggr(char *prefix) 24 | val = ena = run = 0; 25 | nr = 0; 26 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 27 | - s2 = aggr_get_id(evsel_list->cpus, cpu); 28 | + cpu2 = perf_evsel__cpus(counter)->map[cpu]; 29 | + s2 = aggr_get_id(evsel_list->cpus, cpu2); 30 | if (s2 != id) 31 | continue; 32 | val += counter->counts->cpu[cpu].val; 33 | @@ -948,7 +949,7 @@ static void print_aggr(char *prefix) 34 | fprintf(output, "%s", prefix); 35 | 36 | if (run == 0 || ena == 0) { 37 | - aggr_printout(counter, cpu, nr); 38 | + aggr_printout(counter, id, nr); 39 | 40 | fprintf(output, "%*s%s%*s", 41 | csv_output ? 0 : 18, 42 | -- 43 | 1.7.7.6 44 | 45 | -------------------------------------------------------------------------------- /ucevent/patches-3.10/0005-support-pcu-extsel.patch: -------------------------------------------------------------------------------- 1 | From 485a1208f27bad121685388f966856c5fe45849d Mon Sep 17 00:00:00 2001 2 | From: "Yan, Zheng" 3 | Date: Fri, 12 Jul 2013 19:36:03 -0700 4 | Subject: [PATCH] support pcu extsel 5 | 6 | --- 7 | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- 8 | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 1 + 9 | 2 files changed, 2 insertions(+), 1 deletions(-) 10 | 11 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 12 | index 19f0cb4..036ccfd 100644 13 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c 14 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 15 | @@ -285,7 +285,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = { 16 | }; 17 | 18 | static struct attribute *snbep_uncore_pcu_formats_attr[] = { 19 | - &format_attr_event.attr, 20 | + &format_attr_event_ext.attr, 21 | &format_attr_occ_sel.attr, 22 | &format_attr_edge.attr, 23 | &format_attr_inv.attr, 24 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h 25 | index ac77a7b..669fcc5 100644 26 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h 27 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h 28 | @@ -110,6 +110,7 @@ 29 | (SNBEP_PMON_CTL_EV_SEL_MASK | \ 30 | SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ 31 | SNBEP_PMON_CTL_EDGE_DET | \ 32 | + SNBEP_PMON_CTL_EV_SEL_EXT | \ 33 | SNBEP_PMON_CTL_INVERT | \ 34 | SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ 35 | SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ 36 | -- 37 | 1.7.7.6 38 | 39 | -------------------------------------------------------------------------------- /ucevent/patches-3.10/0006-add-masks.patch: -------------------------------------------------------------------------------- 1 | commit 3840e75a2385a71cf9f0916fe69db2ea936f9cdc 2 | Author: Andi Kleen 3 | Date: Mon Jul 15 15:00:08 2013 -0700 4 | 5 | Add mask{0,1}, mask{0,1} 6 | 7 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 8 | index 6329563..5122385 100644 9 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c 10 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 11 | @@ -54,6 +54,8 @@ DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17"); 12 | DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12"); 13 | DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8"); 14 | DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4"); 15 | +DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31"); 16 | +DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63"); 17 | DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51"); 18 | DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35"); 19 | DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31"); 20 | @@ -61,7 +63,8 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17"); 21 | DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12"); 22 | DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8"); 23 | DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); 24 | - 25 | +DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); 26 | +DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); 27 | 28 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 29 | { 30 | @@ -325,6 +328,8 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = { 31 | &format_attr_match_mc.attr, 32 | &format_attr_match_opc.attr, 33 | &format_attr_match_vnw.attr, 34 | + &format_attr_match0.attr, 35 | + &format_attr_match1.attr, 36 | &format_attr_mask_rds.attr, 37 | &format_attr_mask_rnid30.attr, 38 | &format_attr_mask_rnid4.attr, 39 | @@ -332,6 +337,8 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = { 40 | &format_attr_mask_mc.attr, 41 | &format_attr_mask_opc.attr, 42 | &format_attr_mask_vnw.attr, 43 | + &format_attr_mask0.attr, 44 | + &format_attr_mask1.attr, 45 | NULL, 46 | }; 47 | 48 | -------------------------------------------------------------------------------- /ucevent/patches-3.10/README: -------------------------------------------------------------------------------- 1 | 2 | The following patches are needed for ucperf on top of Linux 3.10 3 | Newer kernels may have these patches already integrated. 4 | 5 | The patches affect both the perf binary and perf kernel code. 6 | 7 | To apply 8 | 9 | cd linux-3.10 10 | for i in PATCHDIR/00* ; do 11 | patch -p1 < $i 12 | done 13 | -------------------------------------------------------------------------------- /ucevent/patches-3.16/0002-perf-x86-uncore-register-the-PMU-only-if-the-uncore-.patch: -------------------------------------------------------------------------------- 1 | From 26eadfb58bfb130254c04bff30fe2e2db775fd08 Mon Sep 17 00:00:00 2001 2 | From: "Yan, Zheng" 3 | Date: Wed, 20 Aug 2014 09:50:58 +0800 4 | Subject: [PATCH 2/2] perf/x86/uncore: register the PMU only if the uncore pci 5 | device exists 6 | 7 | Current code registers PMUs for all possible uncore pci devices. 8 | This is not good because, on some machines, one or more uncore pci 9 | devices can be missing. The missing pci device make corresponding 10 | PMU unusable. Register the PMU only if the uncore device exists. 11 | 12 | Signed-off-by: Yan, Zheng 13 | Signed-off-by: Andi Kleen 14 | 15 | Conflicts: 16 | arch/x86/kernel/cpu/perf_event_intel_uncore.c 17 | --- 18 | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 19 +++++++++++-------- 19 | 1 file changed, 11 insertions(+), 8 deletions(-) 20 | 21 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 22 | index f538bcf..ab7c597 100644 23 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c 24 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 25 | @@ -4382,6 +4382,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id 26 | struct intel_uncore_box *box; 27 | struct intel_uncore_type *type; 28 | int phys_id; 29 | + bool first_box = false; 30 | 31 | phys_id = pcibus_to_physid[pdev->bus->number]; 32 | if (phys_id < 0) 33 | @@ -4415,9 +4416,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id 34 | pci_set_drvdata(pdev, box); 35 | 36 | raw_spin_lock(&uncore_box_lock); 37 | + if (list_empty(&pmu->box_list)) 38 | + first_box = true; 39 | list_add_tail(&box->list, &pmu->box_list); 40 | raw_spin_unlock(&uncore_box_lock); 41 | 42 | + if (first_box) 43 | + uncore_pmu_register(pmu); 44 | return 0; 45 | } 46 | 47 | @@ -4426,6 +4431,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) 48 | struct intel_uncore_box *box = pci_get_drvdata(pdev); 49 | struct intel_uncore_pmu *pmu; 50 | int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number]; 51 | + bool last_box = false; 52 | 53 | box = pci_get_drvdata(pdev); 54 | if (!box) { 55 | @@ -4447,6 +4453,8 @@ static void uncore_pci_remove(struct pci_dev *pdev) 56 | 57 | raw_spin_lock(&uncore_box_lock); 58 | list_del(&box->list); 59 | + if (list_empty(&pmu->box_list)) 60 | + last_box = true; 61 | raw_spin_unlock(&uncore_box_lock); 62 | 63 | for_each_possible_cpu(cpu) { 64 | @@ -4458,6 +4466,9 @@ static void uncore_pci_remove(struct pci_dev *pdev) 65 | 66 | WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); 67 | kfree(box); 68 | + 69 | + if (last_box) 70 | + perf_pmu_unregister(&pmu->pmu); 71 | } 72 | 73 | static int __init uncore_pci_init(void) 74 | @@ -4838,14 +4849,6 @@ static int __init uncore_pmus_register(void) 75 | } 76 | } 77 | 78 | - for (i = 0; pci_uncores[i]; i++) { 79 | - type = pci_uncores[i]; 80 | - for (j = 0; j < type->num_boxes; j++) { 81 | - pmu = &type->pmus[j]; 82 | - uncore_pmu_register(pmu); 83 | - } 84 | - } 85 | - 86 | return 0; 87 | } 88 | 89 | -- 90 | 1.8.4.5 91 | 92 | -------------------------------------------------------------------------------- /ucevent/patches-3.16/0004-perf-x86-uncore-Add-missing-cbox-filter-flags-on-Ivy.patch: -------------------------------------------------------------------------------- 1 | From 84082496a8e87327e4c6c486742436db5304b470 Mon Sep 17 00:00:00 2001 2 | From: Andi Kleen 3 | Date: Fri, 5 Sep 2014 07:17:41 -0700 4 | Subject: [PATCH 1/3] perf, x86, uncore: Add missing cbox filter flags on 5 | IvyBridge-EP uncore driver 6 | 7 | The IvyBridge-EP uncore driver was missing three filter flags: 8 | NC, ISOC, C6 which are useful in some cases. Support them in the same way 9 | as the Haswell EP driver, by allowing to set them and exposing 10 | them in the sysfs formats. 11 | 12 | Also fix a typo in a define. 13 | 14 | Relies on the Haswell EP driver to be applied earlier. 15 | 16 | Signed-off-by: Andi Kleen 17 | --- 18 | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 9 ++++++++- 19 | 1 file changed, 8 insertions(+), 1 deletion(-) 20 | 21 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 22 | index ab7c597..8de876a 100644 23 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c 24 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 25 | @@ -1191,6 +1191,9 @@ static struct attribute *ivt_uncore_cbox_formats_attr[] = { 26 | &format_attr_filter_state2.attr, 27 | &format_attr_filter_nid2.attr, 28 | &format_attr_filter_opc2.attr, 29 | + &format_attr_filter_nc.attr, 30 | + &format_attr_filter_c6.attr, 31 | + &format_attr_filter_isoc.attr, 32 | NULL, 33 | }; 34 | 35 | @@ -1328,8 +1331,12 @@ static u64 ivt_cbox_filter_mask(int fields) 36 | mask |= IVT_CB0_MSR_PMON_BOX_FILTER_STATE; 37 | if (fields & 0x8) 38 | mask |= IVT_CB0_MSR_PMON_BOX_FILTER_NID; 39 | - if (fields & 0x10) 40 | + if (fields & 0x10) { 41 | mask |= IVT_CB0_MSR_PMON_BOX_FILTER_OPC; 42 | + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC; 43 | + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6; 44 | + mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC; 45 | + } 46 | 47 | return mask; 48 | } 49 | -- 50 | 1.9.3 51 | 52 | -------------------------------------------------------------------------------- /ucevent/patches-3.16/0005-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy-Ivy-H.patch: -------------------------------------------------------------------------------- 1 | From 9eb5f0f4827b37c8fb0b4a75df0987f88edc5232 Mon Sep 17 00:00:00 2001 2 | From: Andi Kleen 3 | Date: Fri, 5 Sep 2014 07:18:51 -0700 4 | Subject: [PATCH 2/3] perf, x86, uncore: Fix PCU filter setup for 5 | Sandy/Ivy/Haswell EP 6 | 7 | The PCU frequency band filters use 8 bit each in a register. 8 | When setting up the value the shift value was not correctly 9 | scaled, which resulted in all filters except for band 0 to 10 | be zero. Fix the scaling. 11 | 12 | This allows to correctly monitor multiple uncore frequency bands. 13 | 14 | Signed-off-by: Andi Kleen 15 | --- 16 | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- 17 | 1 file changed, 1 insertion(+), 1 deletion(-) 18 | 19 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 20 | index 8de876a..f5d6f95 100644 21 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c 22 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 23 | @@ -835,7 +835,7 @@ static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event * 24 | if (ev_sel >= 0xb && ev_sel <= 0xe) { 25 | reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; 26 | reg1->idx = ev_sel - 0xb; 27 | - reg1->config = event->attr.config1 & (0xff << reg1->idx); 28 | + reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8)); 29 | } 30 | return 0; 31 | } 32 | -- 33 | 1.9.3 34 | 35 | -------------------------------------------------------------------------------- /ucevent/patches-3.16/0006-fixup-perf-x86-uncore-Fix-PCU-filter-setup-for-Sandy.patch: -------------------------------------------------------------------------------- 1 | From 8074b652feb213fa32d45b27ba5b2fa44e548f64 Mon Sep 17 00:00:00 2001 2 | From: Andi Kleen 3 | Date: Fri, 5 Sep 2014 07:19:41 -0700 4 | Subject: [PATCH 3/3] fixup! perf, x86, uncore: Fix PCU filter setup for 5 | Sandy/Ivy/Haswell EP 6 | 7 | --- 8 | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +- 9 | 1 file changed, 1 insertion(+), 1 deletion(-) 10 | 11 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 12 | index f5d6f95..c41cb1c 100644 13 | --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c 14 | +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c 15 | @@ -1919,7 +1919,7 @@ static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event * 16 | if (ev_sel >= 0xb && ev_sel <= 0xe) { 17 | reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER; 18 | reg1->idx = ev_sel - 0xb; 19 | - reg1->config = event->attr.config1 & (0xff << reg1->idx); 20 | + reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8)); 21 | } 22 | return 0; 23 | } 24 | -- 25 | 1.9.3 26 | 27 | -------------------------------------------------------------------------------- /ucevent/sanity-run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # sanity check an event 3 | # percent between 0 and 100% 4 | # nothing negative 5 | from __future__ import print_function 6 | import sys 7 | import os 8 | # thanks python 3.13! 9 | try: 10 | from shlex import quote as cmd_quote 11 | except ImportError: 12 | from pipes import quote as cmd_quote 13 | 14 | logfile = "slog.%d" % (os.getpid()) 15 | 16 | s = "./ucevent.py -x, -o " + logfile + " " + " ".join(map(cmd_quote, sys.argv[1:])) 17 | w = os.getenv("WRAP") 18 | if w: 19 | s = w + " " + s 20 | print(s) 21 | r = os.system(s) 22 | if r != 0: 23 | print("ucevent failed", r) 24 | sys.exit(1) 25 | 26 | f = open(logfile, "r") 27 | fields = f.readline().strip().split(",") 28 | for l in f: 29 | vals = l.strip().split(",") 30 | for v, h in zip(vals, fields): 31 | if fields == "timestamp": 32 | continue 33 | try: 34 | num = float(v) 35 | except ValueError: 36 | print(h,v) 37 | continue 38 | if num < 0: 39 | print(h,"negative value",v) 40 | if h.find("_PCT") >= 0 or h.find("PCT_") >= 0: 41 | if num < 0 or num > 1.01: 42 | print(h,"percent out of bound", v) 43 | 44 | os.remove(logfile) 45 | -------------------------------------------------------------------------------- /ucevent/skx_extra.py: -------------------------------------------------------------------------------- 1 | extra_derived = { } 2 | 3 | -------------------------------------------------------------------------------- /ucevent/ucaux.py: -------------------------------------------------------------------------------- 1 | class Aux: 2 | limited_counters = { "r3qpi": 3, "ubox": 2 } 3 | filters = ("filter_nid", "mask", "match", "filter_opc", "filter_state") 4 | DEFAULT_COUNTERS = 4 5 | MAX_RANK = 8 6 | 7 | acronyms = { 8 | "TOR": "Table of Requests, pending transactions", 9 | "FLIT": "80-bit QPI packet", 10 | "RPQ": "Read Queue", 11 | "WPQ": "Write Queue", 12 | "CBO": "Last Level Cache Slice", 13 | "PCU": "Power Control Unit", 14 | "iMC": "Memory Controller", 15 | "HA": "Home Agent", 16 | "QPI_LL": "QPI Link Layer", 17 | } 18 | 19 | qual_alias = { 20 | "nid": "filter_nid", 21 | "opc": "filter_opc", 22 | "all_opc": "filter_all_op", 23 | "nm": "filter_nm", 24 | "not_nm": "filter_not_nm", 25 | "opc0": "filter_opc0", 26 | "opc1": "filter_opc1", 27 | "loc": "filter_loc", 28 | "rem": "filter_rem", 29 | "nc": "filter_nc", 30 | "Q_Py_PCI_PMON_PKT_MATCH0[12:00]": "match0", 31 | "Q_Py_PCI_PMON_PKT_MATCH1[19:16]": "match_rds", 32 | "Q_Py_PCI_PMON_PKT_MASK0[12:0]": "mask0", 33 | "Q_Py_PCI_PMON_PKT_MASK0[17:0]": "mask0", # why both? 34 | "Q_Py_PCI_PMON_PKT_MASK1[19:16]": "mask_rds", 35 | "Q_Py_PCI_PMON_PKT_MATCH0": "match0", 36 | "Q_Py_PCI_PMON_PKT_z_MATCH0": "match0", 37 | "Q_Py_PCI_PMON_PKT_z_MASK0[12:0]": "mask0", # XXX correct? 38 | "Q_Py_PCI_PMON_PKT_z_MASK0[17:0]": "mask0", # XXX correct? 39 | "edge_det": "edge", 40 | "Cn_MSR_PMON_BOX_FILTER.opc": "filter_opc", 41 | "Cn_MSR_PMON_BOX_FILTER0.opc": "filter_opc", 42 | "Cn_MSR_PMON_BOX_FILTER1.opc": "filter_opc", 43 | "Cn_MSR_PMON_BOX_FILTER.state": "filter_state", 44 | "Cn_MSR_PMON_BOX_FILTER0.state": "filter_state", 45 | "Cn_MSR_PMON_BOX_FILTER0.tid": "filter_tid", 46 | "Cn_MSR_PMON_BOX_FILTER0.nc": "filter_nc", 47 | "Cn_MSR_PMON_BOX_FILTER0.nm": "filter_nm", 48 | "Cn_MSR_PMON_BOX_FILTER0.all_opc": "filter_all_op", # XXX 49 | "Cn_MSR_PMON_BOX_FILTER0.opc1": "filter_opc1", 50 | "Cn_MSR_PMON_BOX_FILTER0.opc0": "filter_opc0", 51 | "Cn_MSR_PMON_BOX_FILTER0.loc": "filter_loc", 52 | "Cn_MSR_PMON_BOX_FILTER0.not_nm": "filter_not_nm", 53 | "Cn_MSR_PMON_BOX_FILTER1.nm": "filter_nm", 54 | "Cn_MSR_PMON_BOX_FILTER1.all_opc": "filter_all_op", # XXX 55 | "Cn_MSR_PMON_BOX_FILTER1.opc1": "filter_opc1", 56 | "Cn_MSR_PMON_BOX_FILTER1.opc0": "filter_opc0", 57 | "Cn_MSR_PMON_BOX_FILTER1.loc": "filter_loc", 58 | "Cn_MSR_PMON_BOX_FILTER1.not_nm": "filter_not_nm", 59 | "Q_Py_PCI_PMON_PKT_MATCH0.dnid": "match_dnid", 60 | "Q_Py_PCI_PMON_PKT_z_MATCH0.dnid": "match_dnid", # XXX 61 | "Q_Py_PCI_PMON_PKT_z_MATCH1": "match1", 62 | "Q_Py_PCI_PMON_PKT_z_MASK1": "mask1", 63 | "PCUFilter[7:0]": "filter_band0", 64 | "PCUFilter[15:8]": "filter_band1", 65 | "PCUFilter[23:16]": "filter_band2", 66 | "PCUFilter[31:24]": "filter_band3", 67 | "CBoFilter[31:23]": "filter_opc", 68 | "CBoFilter[17:10]": "filter_nid", 69 | "QPIMatch0[17:0]": "match0", 70 | "QPIMask0[17:0]": "mask0", 71 | "QPIMatch0[12:0]": "match0", 72 | "QPIMask0[12:0]": "mask0", 73 | "QPIMask1[19:16]": "mask_rds", 74 | "QPIMatch1[19:16]": "match_rds", 75 | "CBoFilter[22:18]": "filter_state", 76 | } 77 | 78 | qual_display_alias = { 79 | "QPIMask0[12:0]": "mask_mc, match_opc, match_vnw", 80 | "QPIMatch0[12:0]": "match_mc, match_opc, match_vnw", 81 | "QPIMatch0[17:0]": "match_mc, match_opc, match_vnw, match_dnid", 82 | } 83 | 84 | alias_events = { 85 | "MC_Chy_PCI_PMON_CTR_FIXED": "uncore_imc_INDEX/clockticks/" 86 | } 87 | 88 | clockticks = ( 89 | r"uncore_(cbox|ha|pcu)_?\d*/event=0x0/", 90 | r".*/clockticks/", 91 | r"uncore_(r2pcie|r3qpi)_?\d*/event=0x1/", 92 | r"uncore_qpi(_\d+)?/event=0x14/" 93 | ) 94 | -------------------------------------------------------------------------------- /ucevent/ucmsg.py: -------------------------------------------------------------------------------- 1 | # Handle warnings and errors 2 | # Separate module to avoid circular imports 3 | from __future__ import print_function 4 | import sys 5 | import fnmatch 6 | 7 | quiet = False 8 | debug = None 9 | 10 | def debug_msg(x, y): 11 | if debug and any(map(lambda p: fnmatch.fnmatch(x, p), debug.split(","))): 12 | print("debug:", x + ": " + str(y), file=sys.stderr) 13 | 14 | def warning(x): 15 | if not quiet: 16 | print("WARNING:", x, file=sys.stderr) 17 | -------------------------------------------------------------------------------- /ucevent/uctester: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | EXTRA="" 6 | [ -n "$MOCK" ] && EXTRA=--mock 7 | 8 | $WRAP ./sanity-run.py $EXTRA "PCU.*" sleep 3 9 | 10 | $WRAP ./sanity-run.py $EXTRA "PCU.CLOCKTICKS / KILO" sleep 3 11 | 12 | $WRAP ./sanity-run.py $EXTRA -S0 "PCU.*" sleep 3 13 | 14 | if [ "$(lscpu | awk '/Socket/ { print $2 }' )" -gt 1 ] ; then 15 | 16 | $WRAP ./sanity-run.py $EXTRA -S1 "PCU.*" sleep 3 17 | 18 | fi 19 | 20 | $WRAP ./sanity-run.py $EXTRA --cpu 0 "PCU.*" sleep 3 21 | 22 | # not valid on SKX 23 | #$WRAP ./sanity-run.py $EXTRA --no-sum "CBO.RING_THRU_*_BYTES" sleep 3 24 | 25 | $WRAP ./sanity-run.py $EXTRA '{' PCU.CLOCKTICKS iMC.MEM_BW_READS '}' iMC.CAS_COUNT sleep 3 26 | 27 | set +e # XXX 28 | $WRAP ./ucevent.py $EXTRA --parse-all | grep -i error 29 | $WRAP ./ucevent.py --mock --parse-all | grep -E -i '#EVAL|#DIVZ|error' 30 | 31 | #./ucevent.py --check-events 32 | 33 | # qualifiers 34 | 35 | 36 | -------------------------------------------------------------------------------- /ucevent/uctopy.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # generate python data files from perl input 3 | # uctopl.pl CPU-ACRONYM events.pl derived.pl >cpu_uc.py 4 | use File::Basename; 5 | 6 | $cpu = $ARGV[0]; 7 | shift(@ARGV); 8 | foreach (@ARGV) { 9 | do $_; 10 | } 11 | 12 | $code = <{$i}),",\n"; 46 | } 47 | print "}\n\n"; 48 | 49 | sub format_data($) { 50 | my($data) = (@_); 51 | return $data if ($data =~ /^[0-9]+$/ || $data =~ /^0x[0-9a-fA-F]+$/); 52 | $data =~ s/"/\\"/g; 53 | return addquote($data); 54 | } 55 | 56 | sub to_list($) { 57 | my($l) = (@_); 58 | return $l; 59 | ($a, $b) = $l =~ /(\d+)-(\d+)/; 60 | $o = ""; 61 | for (; $a <= $b; $a++) { 62 | $o += "$a,"; 63 | } 64 | return $o; 65 | } 66 | 67 | sub print_event($$) { 68 | my($name, $ev) = (@_); 69 | 70 | #return if $ev->{'Public'} ne "Y"; 71 | 72 | push(@catlist, $ev->{"Category"}); 73 | 74 | print $indent,$quote,$name,$quote,": {\n"; 75 | foreach $w (sort(keys(%{$ev}))) { 76 | next if $w =~ /Sub[cC]at/; 77 | next if $w eq "Subevents"; 78 | next if $ev->{$w} eq "" && $w ne "Category"; 79 | next if $w eq "OrigName"; 80 | next if $w =~ /([A-Z]+)Status/; 81 | next if $w eq "RTLSignal"; 82 | next if $w eq "Public"; 83 | if ($w eq "Internal") { 84 | $w = "ExtSel"; 85 | } 86 | 87 | $val = $ev->{$w}; 88 | next if $w eq "MaxIncCyc" && ($val == "1" || $val == "0"); 89 | next if $w eq "SubCtr" && $val == "0"; 90 | 91 | $val = to_list($val) if $w eq "Counters" && $val =~ /-/; 92 | 93 | print $indent,$indent, 94 | addquote($w),": ",format_data($val),",\n"; 95 | } 96 | print $indent,"},\n"; 97 | } 98 | 99 | sub print_sub($$$) { 100 | my($box, $j, $sub) = (@_); 101 | foreach $k (keys(%{$sub})) { 102 | $subev = $sub->{$k}; 103 | # put all the fields from the parent 104 | # into the sub event to normalize 105 | foreach $o (keys(%{$ev})) { 106 | next if defined($sub->{$o}); 107 | $subev->{$o} = $ev->{$o}; 108 | } 109 | print_event("$box.$j.$k", $subev); 110 | } 111 | } 112 | 113 | sub print_list($$) { 114 | my($name, $evl) = (@_); 115 | print "$name = {\n"; 116 | foreach $box (keys(%{$evl})) { 117 | $evlist = $evl->{$box}; 118 | $box =~ s/ Box Events//; 119 | $box =~ s/ /_/g; 120 | print $indent,"\n# $box:\n"; 121 | 122 | foreach $j (sort(keys(%{$evlist}))) { 123 | $ev = $evlist->{$j}; 124 | $ev->{"Box"} = $box; 125 | $ev->{"Category"} = $box . " " . $ev->{"Category"}; 126 | print_event("$box.$j", $ev); 127 | print_sub($box, $j, $ev->{"Subcat"}); 128 | print_sub($box, $j, $ev->{"SubCat"}); 129 | print_sub($box, $j, $ev->{"Subevents"}); 130 | } 131 | } 132 | print "}\n"; 133 | } 134 | 135 | print_list("events", $events); 136 | print_list("derived", $derived); 137 | 138 | print "categories = (\n"; 139 | $prev = ""; 140 | foreach $i (sort @catlist) { 141 | next if $i eq $prev; 142 | $prev = $i; 143 | print $indent,addquote($i),",\n"; 144 | } 145 | print ");\n"; 146 | -------------------------------------------------------------------------------- /utilized.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # extract utilized CPUs out of toplev CSV output 3 | # toplev ... -I 1000 --node +CPU_Utilization -x, -o x.csv ... 4 | # utilized.py < x.csv 5 | # note it duplicates the core output 6 | from __future__ import print_function 7 | import argparse 8 | import csv 9 | import sys 10 | import re 11 | import collections 12 | 13 | ap = argparse.ArgumentParser() 14 | ap.add_argument('--min-util', default=10., type=float) 15 | ap.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) 16 | ap.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout) 17 | args = ap.parse_args() 18 | 19 | key = None 20 | 21 | c = csv.reader(args.file) 22 | wr = csv.writer(args.output) 23 | 24 | fields = collections.OrderedDict() 25 | util = collections.defaultdict(list) 26 | 27 | for t in c: 28 | if len(t) < 3 or t[0].startswith("#"): 29 | continue 30 | if t[0] == "Timestamp": 31 | wr.writerow(t) 32 | key = t[1] # XXX handle no -I 33 | if key in fields: 34 | fields[key].append(t) 35 | else: 36 | fields[key] = [t] 37 | if t[2] == "CPU_Utilization": 38 | util[key].append(float(t[3])) 39 | 40 | final = [] 41 | skipped = [] 42 | for j in fields.keys(): 43 | if "-T" not in j and not j.startswith("CPU"): 44 | if "S" in j: 45 | final.append(j) 46 | continue 47 | core = re.sub(r'-T\d+', '', j) 48 | utilization = 100 49 | if len(util[j]) > 0: 50 | utilization = (sum(util[j]) / len(util[j])) * 100. 51 | if utilization >= float(args.min_util): 52 | for k in fields[core] + fields[j]: 53 | wr.writerow(k) 54 | else: 55 | skipped.append(j) 56 | for j in final: 57 | for k in fields[j]: 58 | wr.writerow(k) 59 | print("skipped", " ".join(skipped), file=sys.stderr) 60 | -------------------------------------------------------------------------------- /wl-bottlenecks: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # print bottlenecks for all tests in workloads/* 3 | # arguments: to pass to toplev 4 | for i in workloads/* ; do 5 | echo "`basename $i`:" 6 | ./toplev "$@" --quiet --single-thread --no-version $i 7 | done 8 | -------------------------------------------------------------------------------- /workloads/BC1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo '3^415312' | bc > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/BC2s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo '3^615312' | bc > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/CALC10s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | exec calc '3^2421212' > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/CALC1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | exec calc '3^721212' > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/CLANG10s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | awk 'BEGIN { 4 | print "extern int f2(void); int func(void) { int i; " 5 | for(i = 0; i < 200000; i++) { 6 | print "i += f2();" 7 | } 8 | print "return i;" 9 | print "}" } ' | 10 | clang -O3 -x c -c -o /dev/null - 11 | -------------------------------------------------------------------------------- /workloads/CLTRAMP3D: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ ! -f tramp3d-v4.cpp ] ; then 4 | echo "Downloading tramp3d-v4" 5 | curl https://raw.githubusercontent.com/microsoft/checkedc-llvm-test-suite/master/MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.cpp | 6 | sed -e 's/mutable Element_t/Element_t/' \ 7 | -e 's/mutable Pooma::Iterate_t/Pooma::Iterate_t/' > tramp3d-v4.cpp 8 | fi 9 | clang++ -w -std=gnu++11 tramp3d-v4.cpp 10 | -------------------------------------------------------------------------------- /workloads/COMPILE10s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | awk 'BEGIN { 4 | print "extern int f2(void); int func(void) { int i; " 5 | for(i = 0; i < 11500; i++) { 6 | print "i += f2();" 7 | } 8 | print "return i;" 9 | print "}" } ' | 10 | gcc -O3 -x c -c -o /dev/null - 11 | -------------------------------------------------------------------------------- /workloads/EMACS1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | emacs --batch -q --eval '(dotimes (i 9000000) 0)' 2> /dev/null 3 | -------------------------------------------------------------------------------- /workloads/GCCTRAMP3D: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ ! -f tramp3d-v4.cpp ] ; then 4 | echo "Downloading tramp3d-v4" 5 | curl https://raw.githubusercontent.com/microsoft/checkedc-llvm-test-suite/master/MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.cpp | 6 | sed -e 's/mutable Element_t/Element_t/' \ 7 | -e 's/mutable Pooma::Iterate_t/Pooma::Iterate_t/' > tramp3d-v4.cpp 8 | fi 9 | g++ -w -std=gnu++11 tramp3d-v4.cpp 10 | -------------------------------------------------------------------------------- /workloads/GITGREP: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git log -G foo > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/GREP: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | grep -rq foobarfoobar /usr/bin /usr/sbin 2>/dev/null 3 | -------------------------------------------------------------------------------- /workloads/GUILE1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | guile -c '(expt 3 55131231)' 4 | -------------------------------------------------------------------------------- /workloads/GZIP: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat $(which perf) $(which perf) $(which perf) $(which perf) | gzip -c > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/MEMHOG: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # backend bound. requires numactl 3 | memhog 1G >/dev/null 4 | -------------------------------------------------------------------------------- /workloads/PERL1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | perl -e 'for($i=0;$i<40000000;$i++) {}' 3 | -------------------------------------------------------------------------------- /workloads/PHP1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | php -r 'for($i=0;$i<150000000;$i++) { ; } ' 3 | -------------------------------------------------------------------------------- /workloads/PYTHON1s: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python3 -c 'for x in range(40000000): pass' 4 | -------------------------------------------------------------------------------- /workloads/XZ: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cat $(which perf) | xz -c > /dev/null 3 | -------------------------------------------------------------------------------- /workloads/ZSTD: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | P=`which perf` 3 | cat $P $P $P $P $P $P $P $P $P $P $P $P $P $P $P | zstd -z --single-thread -9 > /dev/null 4 | --------------------------------------------------------------------------------