├── README.md ├── cpucache ├── icache ├── pmcarch ├── pmcipc ├── resstalls └── tlbstat /README.md: -------------------------------------------------------------------------------- 1 | # pmc-cloud-tools 2 | 3 | Linux tools for measuring PMCs (Performance Monitoring Counters) in the cloud. 4 | 5 | These are works-in-progress for analyzing some production issues, so the output may be adjusted as needed and more options added to these tools. 6 | 7 | ## Architectural PMCs 8 | 9 | These are seven PMCs that are enabled on some newer AWS EC2 types, eg, m4.16xl. See The PMCs of EC2. 10 | 11 | ### pmcarch 12 | 13 | Summarizes the architectural PMCs. Eg: 14 | 15 |
 16 | # ./pmcarch
 17 | K_CYCLES   K_INSTR      IPC BR_RETIRED   BR_MISPRED  BMR% LLCREF      LLCMISS     LLC%
 18 | 96163187   87166313    0.91 19730994925  679187299   3.44 656597454   174313799  73.45
 19 | 93988372   87205023    0.93 19669256586  724072315   3.68 666041693   169603955  74.54
 20 | 93863787   86981089    0.93 19548779510  669172769   3.42 649844207   176100680  72.90
 21 | 93739565   86349653    0.92 19339320671  634063527   3.28 642506778   181385553  71.77
 22 | 94495981   86815232    0.92 19482504710  648954409   3.33 628548666   180975066  71.21
 23 | 
24 | 25 | - `K_CYCLES`: CPU Cycles x 1000 26 | - `K_INSTR`: CPU Instructions x 1000 27 | - `IPC`: Instructions-Per-Cycle 28 | - `BMR%`: Branch Misprediction Ratio, as a percentage 29 | - `LLC%`: Last Level Cache hit ratio, as a percentage 30 | 31 | USAGE: 32 | 33 |
 34 | # ./pmcarch -h
 35 | USAGE: pmcarch {-C CPU | -p PID | -c CMD} [interval [duration]]
 36 |                  -C CPU         # measure this CPU only
 37 |                  -p PID         # measure this PID only
 38 |                  -c 'CMD'       # measure this command only (quote it)
 39 |                  interval       # output interval in secs (default 1)
 40 |                  duration       # total seconds (default infinityish)
 41 |   eg,
 42 |        pmcarch                  # show stats across all CPUs
 43 |        pmcarch 5                # show stats every 5 seconds
 44 |        pmcarch -C 0             # measure CPU 0 only
 45 |        pmcarch -p 181           # measure PID 181 only
 46 |        pmcarch -c 'cksum /boot/*'  # measure run and measure this cmd
 47 | 
48 | 49 | ## Extended PMCs 50 | 51 | These are available on some AWS Nitro hypervisor instance types. Eg, c5.9xl. 52 | 53 | ### tlbstat 54 | 55 | Summarizes TLB (Translation Lookaside Buffer) statistics (an MMU cache). Eg: 56 | 57 |
 58 | # ./tlbstat
 59 | K_CYCLES   K_INSTR      IPC DTLB_WALKS ITLB_WALKS K_DTLBCYC  K_ITLBCYC  DTLB% ITLB%
 60 | 93091508   86971957    0.93 135562325  45028565   3053416    1121015     3.28  1.20
 61 | 94354781   88708445    0.94 136898873  49736338   3135383    1199061     3.32  1.27
 62 | 94274360   88372901    0.94 138668282  48503863   3200281    1194388     3.39  1.27
 63 | 92040379   86763153    0.94 133141376  44859310   3060742    1118921     3.33  1.22
 64 | 92152495   87144845    0.95 135446984  50308740   3156780    1200598     3.43  1.30
 65 | 
66 | 67 | - `K_CYCLES`: CPU Cycles x 1000 68 | - `K_INSTR`: CPU Instructions x 1000 69 | - `IPC`: Instructions-Per-Cycle 70 | - `DTLB_WALKS`: Data TLB walks (count) 71 | - `ITLB_WALKS`: Instruction TLB walks (count) 72 | - `K_DTLBCYC`: Cycles at least one PMH is active with data TLB walks x 1000 73 | - `K_ITLBCYC`: Cycles at least one PMH is active with instr. TLB walks x 1000 74 | - `DTLB%`: Data TLB active cycles as a ratio of total cycles 75 | - `ITLB%`: Instruction TLB active cycles as a ratio of total cycles 76 | 77 | USAGE: 78 | 79 |
 80 | # ./tlbstat -h
 81 | USAGE: tlbstat {-C CPU | -p PID | -c CMD} [interval [duration]]
 82 |                  -C CPU         # measure this CPU only
 83 |                  -p PID         # measure this PID only
 84 |                  -c 'CMD'       # measure this command only (quote it)
 85 |                  interval       # output interval in secs (default 1)
 86 |                  duration       # total seconds (default infinityish)
 87 |   eg,
 88 |        tlbstat                  # show stats across all CPUs
 89 |        tlbstat 5                # show stats every 5 seconds
 90 |        tlbstat -C 0             # measure CPU 0 only
 91 |        tlbstat -p 181           # measure PID 181 only
 92 |        tlbstat -c 'cksum /boot/*'  # measure run and measure this cmd
 93 | 
94 | 95 | ## ALL PMCs 96 | 97 | These may not work on the cloud yet, and may be works-in-progress. They may also require more recent Linux versions for the PMC aliases. 98 | 99 | ### cpucache 100 | 101 | Summarizes CPU L1/L2/LLC cache hit ratios. Eg: 102 | 103 |
104 | # ./cpucache
105 | All counter columns are x 1000
106 | CYCLES     INSTR        IPC L1DREF    L1DMISS    L1D% L2REF    L2MISS     L2% LLCREF   LLCMISS   LLC%
107 | 55827883   44795258    0.80 11674473  98067     99.16 161026   78908    51.00 97522    31338    67.87
108 | 55841905   44803990    0.80 11685292  98216     99.16 162127   79934    50.70 98810    30975    68.65
109 | 55832518   44838601    0.80 11700599  98181     99.16 160586   78725    50.98 96895    31497    67.49
110 | 55832741   44831064    0.80 11700963  98102     99.16 160849   78377    51.27 97296    31477    67.65
111 | 55832729   44763451    0.80 11672814  98216     99.16 163742   80630    50.76 99961    30679    69.31
112 | 55847526   44815072    0.80 11691652  98299     99.16 162176   79552    50.95 99092    30798    68.92
113 | 55832810   44748131    0.80 11667573  98132     99.16 163209   82497    49.45 101771   31035    69.50
114 | 55832325   44788718    0.80 11670262  98459     99.16 161834   79713    50.74 97504    31057    68.15
115 | 55832453   44795688    0.80 11682541  98069     99.16 162384   77339    52.37 95222    30347    68.13
116 | 55832224   44749672    0.80 11688124  98007     99.16 163076   79868    51.02 100960   30842    69.45
117 | 55837742   44793089    0.80 11689272  98273     99.16 161173   79280    50.81 97667    31776    67.46
118 | [...]
119 | 
120 | 121 | - 'CYCLES': CPU Cycles x 1000 122 | - 'INSTR': CPU Instructions x 1000 123 | - 'IPC': Instructions-Per-Cycle 124 | - 'L1DREF': Level 1 data cache loads x 1000 125 | - 'L1DMISS': Level 1 data cache load misses x 1000 126 | - 'L1D%': Level 1 data cache hit ratio, as a percentage 127 | - 'L2REF': Level 2 requests x 1000 128 | - 'L2MISS': Level 2 misses x 1000 129 | - 'L2%': Level 2 hit ratio, as a percentage 130 | - 'LLCREF': Last Level Cache references x 1000 131 | - 'LLCMISS': Last Level Cache misses x 1000 132 | - 'LLC%': Last Level Cache hit ratio, as a percentage 133 | 134 | USAGE: 135 | 136 |
137 | # ./cpucache -h
138 | USAGE: cpucache {-C CPU | -p PID | -c CMD} [interval [duration]]
139 |                  -C CPU         # measure this CPU only
140 |                  -p PID         # measure this PID only
141 |                  -c 'CMD'       # measure this command only (quote it)
142 |                  interval       # output interval in secs (default 1)
143 |                  duration       # total seconds (default infinityish)
144 |   eg,
145 |        cpucache                 # show stats across all CPUs
146 |        cpucache 5               # show stats every 5 seconds
147 |        cpucache -C 0            # measure CPU 0 only
148 |        cpucache -p 181          # measure PID 181 only
149 |        cpucache -c 'cksum /boot/*' # measure run and measure this cmd
150 | 
151 | -------------------------------------------------------------------------------- /cpucache: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # cpucache - measure and summarize CPU cache references (L1/L2/LLC). 4 | # Uses Linux perf and PMCs. 5 | # 6 | # FROM: https://github.com/brendangregg/pmc-cloud-tools 7 | # 8 | # USAGE: cpucache {-C CPU | -p PID | -c CMD} [interval [duration]] 9 | # 10 | # Columns: 11 | # 12 | # - CYCLES: CPU Cycles x 1000 13 | # - INSTR: CPU Instructions x 1000 14 | # - IPC: Instructions-Per-Cycle 15 | # - L1DREF: Level 1 data cache loads x 1000 16 | # - L1DMISS: Level 1 data cache load misses x 1000 17 | # - L1D%: Level 1 data cache hit ratio, as a percentage 18 | # - L2REF: Level 2 requests x 1000 19 | # - L2MISS: Level 2 misses x 1000 20 | # - L2%: Level 2 hit ratio, as a percentage 21 | # - LLCREF: Last Level Cache references x 1000 22 | # - LLCMISS: Last Level Cache misses x 1000 23 | # - LLC%: Last Level Cache hit ratio, as a percentage 24 | # 25 | # Copyright 2018 Netflix, Inc. 26 | # Licensed under the Apache License, Version 2.0 (the "License") 27 | # 28 | # 12-Jan-2018 Brendan Gregg Created this. 29 | 30 | function usage { 31 | cat <<-END >&2 32 | USAGE: cpucache {-C CPU | -p PID | -c CMD} [interval [duration]] 33 | -C CPU # measure this CPU only 34 | -p PID # measure this PID only 35 | -c 'CMD' # measure this command only (quote it) 36 | interval # output interval in secs (default 1) 37 | duration # total seconds (default infinityish) 38 | eg, 39 | cpucache # show stats across all CPUs 40 | cpucache 5 # show stats every 5 seconds 41 | cpucache -C 0 # measure CPU 0 only 42 | cpucache -p 181 # measure PID 181 only 43 | cpucache -c 'cksum /boot/*' # measure run and measure this cmd 44 | END 45 | exit 46 | } 47 | 48 | opt_cpu=0; opt_pid=0; opt_cmd=0; cpu=""; pid=""; cmd="" 49 | 50 | while getopts C:p:c:h opt 51 | do 52 | case $opt in 53 | C) opt_cpu=1; cpu=$OPTARG ;; 54 | p) opt_pid=1; pid=$OPTARG ;; 55 | c) opt_cmd=1; cmd=$OPTARG ;; 56 | h|?) usage ;; 57 | esac 58 | done 59 | shift $(( $OPTIND - 1 )) 60 | 61 | if (( opt_cpu + opt_pid + opt_cmd > 1 )); then 62 | echo >&2 "ERROR: pick one of -C, -p, -c" 63 | usage 64 | fi 65 | secs=${1:-1} # default 1 second 66 | duration=${2:-999999999} # default semi-infinite seconds 67 | hlines=25 # lines to repeat header 68 | target=-a 69 | (( opt_cpu )) && target="-C $cpu sleep $duration" 70 | (( opt_pid )) && target="-p $pid sleep $duration" 71 | (( opt_cmd )) && target="$cmd" 72 | 73 | if (( opt_pid )); then 74 | if [ ! -d /proc/$pid ]; then 75 | echo >&2 "ERROR: Can't find PID $pid. Exiting." 76 | exit 77 | fi 78 | fi 79 | 80 | # note that instructions is last on purpose, it triggers output 81 | # cycles are twice as a workaround for an issue 82 | # the r4f2e and r412e counters are from the architectural set, so should be stable 83 | echo "All counter columns are x 1000" 84 | perf stat -e cycles -e cycles \ 85 | -e L1-dcache-loads -e L1-dcache-load-misses \ 86 | -e l2_rqsts.references -e l2_rqsts.miss \ 87 | -e r4f2e -e r412e -e instructions \ 88 | -I $(( secs * 1000 )) $target 2>&1 | awk -v hlines=$hlines ' 89 | BEGIN { 90 | htxt = sprintf("%-10s %-10s %5s %-9s %-9s %5s %-8s %-8s %5s %-8s %-8s %5s", 91 | "CYCLES", "INSTR", "IPC", "L1DREF", "L1DMISS", "L1D%", 92 | "L2REF", "L2MISS", "L2%", "LLCREF", "LLCMISS", "LLC%"); 93 | print htxt 94 | header = hlines 95 | } 96 | /invalid/ { print $0 } # unsupported event 97 | { gsub(/,/, ""); } 98 | $3 == "cycles" { cycles = $2 } 99 | $3 == "r4f2e" { llcref = $2 } 100 | $3 == "r412e" { llcmiss = $2 } 101 | $3 == "L1-dcache-loads" { l1dload = $2 } 102 | $3 == "L1-dcache-load-misses" { l1dloadmiss = $2 } 103 | $3 == "l2_rqsts.references" { l2ref = $2 } 104 | $3 == "l2_rqsts.miss" { l2miss = $2 } 105 | $3 == "instructions" { 106 | if (--header == 0) { 107 | print htxt 108 | header = hlines 109 | } 110 | 111 | ins = $2 112 | # if we are missing refs, then also reset hits to zero, to avoid a bogus ratio: 113 | if (l1dload == 0) { l1dload = 1; l1dloadmiss = 0 } 114 | if (l2ref == 0) { l2ref = 1; l2miss = 0 } 115 | if (llcref == 0) { llcref = 1; llcmiss = 0 } 116 | if (cycles == 0) { cycles = 1; ins = 0 } # PMCs are broken, or no events 117 | l1dratio = 100 * (l1dload - l1dloadmiss) / l1dload 118 | l2ratio = 100 * (l2ref - l2miss) / l2ref 119 | llcratio = 100 * (llcref - llcmiss) / llcref 120 | 121 | printf("%-10d %-10d %5.2f %-9d %-9d %5.2f %-8d %-8d %5.2f %-8d %-8d %5.2f\n", 122 | cycles / 1000, ins / 1000, ins / cycles, 123 | l1dload / 1000, l1dloadmiss / 1000, l1dratio, 124 | l2ref / 1000, l2miss / 1000, l2ratio, llcref / 1000, llcmiss / 1000, llcratio) 125 | } 126 | ' 127 | -------------------------------------------------------------------------------- /icache: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # icache - measure instruction cache usage. 4 | # Uses Linux perf and PMCs. 5 | # 6 | # FROM: https://github.com/brendangregg/pmc-cloud-tools 7 | # 8 | # USAGE: icache {-C CPU | -p PID | -c CMD} [interval [duration]] 9 | # 10 | # Columns: 11 | # 12 | # - INS: Instructions x 1000 13 | # - L1IMISS: Level 1 instruction cache misses x 1000 14 | # - L1I%: Level 1 instruction cache hit ratio percent (L1MISS/INS) 15 | # - L2IHIT: Level 2 instruction cache hits x 1000 16 | # - L2IMISS: Level 2 instruction cache misses x 1000 17 | # - L2I%: Level 2 instruction cache hit ratio percent 18 | # - LLCREF: Last Level Cache references x 1000 19 | # - LLCMISS: Last Level Cache misses x 1000 20 | # - LLC%: Last Level Cache hit ratio percent 21 | # 22 | # Copyright 2020 Netflix, Inc. 23 | # Licensed under the Apache License, Version 2.0 (the "License") 24 | # 25 | # 24-Jan-2020 Brendan Gregg Created this. 26 | 27 | function usage { 28 | cat <<-END >&2 29 | USAGE: icache {-C CPU | -p PID | -c CMD} [interval [duration]] 30 | -C CPU # measure this CPU only 31 | -p PID # measure this PID only 32 | -c 'CMD' # measure this command only (quote it) 33 | interval # output interval in secs (default 1) 34 | duration # total seconds (default infinityish) 35 | eg, 36 | icache # show stats across all CPUs 37 | icache 5 # show stats every 5 seconds 38 | icache -C 0 # measure CPU 0 only 39 | icache -p 181 # measure PID 181 only 40 | icache -c 'cksum /boot/*' # measure run and measure this cmd 41 | END 42 | exit 43 | } 44 | 45 | opt_cpu=0; opt_pid=0; opt_cmd=0; cpu=""; pid=""; cmd="" 46 | 47 | while getopts C:p:c:h opt 48 | do 49 | case $opt in 50 | C) opt_cpu=1; cpu=$OPTARG ;; 51 | p) opt_pid=1; pid=$OPTARG ;; 52 | c) opt_cmd=1; cmd=$OPTARG ;; 53 | h|?) usage ;; 54 | esac 55 | done 56 | shift $(( $OPTIND - 1 )) 57 | 58 | if (( opt_cpu + opt_pid + opt_cmd > 1 )); then 59 | echo >&2 "ERROR: pick one of -C, -p, -c" 60 | usage 61 | fi 62 | secs=${1:-1} # default 1 second 63 | duration=${2:-999999999} # default semi-infinite seconds 64 | hlines=25 # lines to repeat header 65 | target=-a 66 | (( opt_cpu )) && target="-C $cpu sleep $duration" 67 | (( opt_pid )) && target="-p $pid sleep $duration" 68 | (( opt_cmd )) && target="$cmd" 69 | 70 | if (( opt_pid )); then 71 | if [ ! -d /proc/$pid ]; then 72 | echo >&2 "ERROR: Can't find PID $pid. Exiting." 73 | exit 74 | fi 75 | fi 76 | 77 | # note that instructions is last on purpose, it triggers output 78 | # cycles are twice as a workaround for an issue 79 | # the r4f2e and r412e counters are from the architectural set, so should be stable 80 | echo "All counter columns are x 1000" 81 | perf stat -e instructions,L1-icache-load-misses,r2424,r4424,cache-references,cache-misses \ 82 | -I $(( secs * 1000 )) $target 2>&1 | awk -v hlines=$hlines ' 83 | BEGIN { 84 | htxt = sprintf("%-10s %10s %5s %9s %9s %5s %8s %8s %5s", 85 | "INS", "L1IMISS", "L1I%", "L2IHIT", "L2IMISS", "L2I%", 86 | "LLCREF", "LLCMISS", "LLC%"); 87 | print htxt 88 | header = hlines 89 | } 90 | /invalid/ { print $0 } # unsupported event 91 | { gsub(/,/, ""); } 92 | $3 == "instructions" { ins = $2 } 93 | $3 == "L1-icache-load-misses" { l1im = $2 } 94 | $3 == "r2424" { l2im = $2 } # L2_RQSTS.CODE_RD_MISS 95 | $3 == "r4424" { l2ih = $2 } # L2_RQSTS.CODE_RD_HIT 96 | $3 == "cache-references" { llcr = $2 } 97 | $3 == "cache-misses" { # last one, trigger output 98 | llcm = $2 99 | if (--header == 0) { 100 | print htxt 101 | header = hlines 102 | } 103 | 104 | # if we are missing refs, then also reset hits to zero, to avoid a bogus ratio: 105 | if (ins == 0) { ins = 1; l1im = 0 } 106 | if (l2im == 0) { l2im = 1; l2ih = 0 } 107 | if (llcr == 0) { llcr = 1; llcm = 0 } 108 | l1iratio = 100 * (ins - l1im) / ins 109 | l2iratio = 100 * l2ih / (l2ih + l2im) 110 | llcratio = 100 * (llcr - llcm) / llcr 111 | 112 | printf("%-10d %10d %5.2f %9d %9d %5.2f %8d %8d %5.2f\n", 113 | ins / 1000, l1im / 1000, l1iratio, l2ih / 1000, l2im / 1000, l2iratio, 114 | llcr / 1000, llcm / 1000, llcratio); 115 | } 116 | ' 117 | -------------------------------------------------------------------------------- /pmcarch: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # pmcarch - measure and summarize architectural PMCs. Uses Linux perf. 4 | # 5 | # FROM: https://github.com/brendangregg/pmc-cloud-tools 6 | # 7 | # USAGE: pmcarch {-C CPU | -p PID | -c CMD} [interval [duration]] 8 | # 9 | # PMCs are performance monitoring counters. This script uses the architectural 10 | # set to report on: 11 | # 12 | # - K_CYCLES: CPU Cycles x 1000 13 | # - K_INSTR: CPU Instructions x 1000 14 | # - IPC: Instructions-Per-Cycle 15 | # - BMR%: Branch Misprediction Ratio, as a percentage 16 | # - LLC%: Last Level Cache hit ratio, as a percentage 17 | # 18 | # Copyright 2017 Netflix, Inc. 19 | # Licensed under the Apache License, Version 2.0 (the "License") 20 | # 21 | # 07-Apr-2017 Brendan Gregg Created this. 22 | 23 | function usage { 24 | cat <<-END >&2 25 | USAGE: pmcarch {-C CPU | -p PID | -c CMD} [interval [duration]] 26 | -C CPU # measure this CPU only 27 | -p PID # measure this PID only 28 | -c 'CMD' # measure this command only (quote it) 29 | interval # output interval in secs (default 1) 30 | duration # total seconds (default infinityish) 31 | eg, 32 | pmcarch # show stats across all CPUs 33 | pmcarch 5 # show stats every 5 seconds 34 | pmcarch -C 0 # measure CPU 0 only 35 | pmcarch -p 181 # measure PID 181 only 36 | pmcarch -c 'cksum /boot/*' # measure run and measure this cmd 37 | END 38 | exit 39 | } 40 | 41 | opt_cpu=0; opt_pid=0; opt_cmd=0; cpu=""; pid=""; cmd="" 42 | 43 | while getopts C:p:c:h opt 44 | do 45 | case $opt in 46 | C) opt_cpu=1; cpu=$OPTARG ;; 47 | p) opt_pid=1; pid=$OPTARG ;; 48 | c) opt_cmd=1; cmd=$OPTARG ;; 49 | h|?) usage ;; 50 | esac 51 | done 52 | shift $(( $OPTIND - 1 )) 53 | 54 | if (( opt_cpu + opt_pid + opt_cmd > 1 )); then 55 | echo >&2 "ERROR: pick one of -C, -p, -c" 56 | usage 57 | fi 58 | secs=${1:-1} # default 1 second 59 | duration=${2:-999999999} # default semi-infinite seconds 60 | hlines=25 # lines to repeat header 61 | target=-a 62 | (( opt_cpu )) && target="-C $cpu sleep $duration" 63 | (( opt_pid )) && target="-p $pid sleep $duration" 64 | (( opt_cmd )) && target="$cmd" 65 | 66 | if (( opt_pid )); then 67 | if [ ! -d /proc/$pid ]; then 68 | echo >&2 "ERROR: Can't find PID $pid. Exiting." 69 | exit 70 | fi 71 | fi 72 | 73 | # note that instructions is last on purpose, it triggers output 74 | # order of PMCs attempts to workaround a 3.13 issue 75 | # cycles are twice as a workaround for an issue 76 | # the raw counters are from the architectural set, so should be stable 77 | perf stat -e cycles -e cycles \ 78 | -e r4f2e -e r412e \ 79 | -e r00c4 -e r00c5 -e instructions \ 80 | -I $(( secs * 1000 )) $target 2>&1 | awk -v hlines=$hlines ' 81 | BEGIN { 82 | htxt = sprintf("%-10s %-10s %5s %-12s %-10s %5s %-11s %-10s %5s", 83 | "K_CYCLES", "K_INSTR", "IPC", "BR_RETIRED", "BR_MISPRED", "BMR%", 84 | "LLCREF", "LLCMISS", "LLC%"); 85 | print htxt 86 | header = hlines 87 | } 88 | /invalid/ { print $0 } # unsupported event 89 | { gsub(/,/, ""); } 90 | $3 == "cycles" { cycles = $2; } 91 | $3 == "r4f2e" { llcref = $2; } 92 | $3 == "r412e" { llcmiss = $2; } 93 | $3 == "r00c4" { brretired = $2; } 94 | $3 == "r00c5" { 95 | brmiss = $2 96 | if (brretired > 0) { 97 | brratio = (100 * brmiss) / brretired; 98 | } else { 99 | brratio = 0; 100 | } 101 | } 102 | $3 == "instructions" { 103 | if (--header == 0) { 104 | print htxt 105 | header = hlines 106 | } 107 | ins = $2 108 | if (llcref == 0) { llcref = 1; llcmiss = 0 } 109 | if (cycles == 0) { cycles = 1 } # PMCs are broken, or no events 110 | 111 | printf("%-10d %-10d %5.2f %-12d %-10d %5.2f %-11d %-10d %5.2f\n", 112 | cycles / 1000, ins / 1000, ins / cycles, brretired, brmiss, 113 | brratio, llcref, llcmiss, 100 * (llcref - llcmiss) / llcref) 114 | } 115 | ' 116 | -------------------------------------------------------------------------------- /pmcipc: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # pmcipc - measure and summarize instructions per cycle. Uses Linux perf. 4 | # 5 | # This deliberately measures IPC only, for instances where measuring more than 6 | # a few PMCs leads to bogus measurements. Cross-check with pmcarch and 7 | # perf stat. 8 | # 9 | # FROM: https://github.com/brendangregg/pmc-cloud-tools 10 | # 11 | # USAGE: pmcipc {-C CPU | -p PID | -c CMD} [interval [duration]] 12 | # 13 | # PMCs are performance monitoring counters. This tool reports: 14 | # 15 | # - K_CYCLES: CPU Cycles x 1000 16 | # - K_INSTR: CPU Instructions x 1000 17 | # - IPC: Instructions-Per-Cycle 18 | # 19 | # Copyright 2019 Netflix, Inc. 20 | # Licensed under the Apache License, Version 2.0 (the "License") 21 | # 22 | # 04-Oct-2019 Brendan Gregg Created this. 23 | 24 | function usage { 25 | cat <<-END >&2 26 | USAGE: pmcipc {-C CPU | -p PID | -c CMD} [interval [duration]] 27 | -C CPU # measure this CPU only 28 | -p PID # measure this PID only 29 | -c 'CMD' # measure this command only (quote it) 30 | interval # output interval in secs (default 1) 31 | duration # total seconds (default infinityish) 32 | eg, 33 | pmcipc # show stats across all CPUs 34 | pmcipc 5 # show stats every 5 seconds 35 | pmcipc -C 0 # measure CPU 0 only 36 | pmcipc -p 181 # measure PID 181 only 37 | pmcipc -c 'cksum /boot/*' # measure run and measure this cmd 38 | END 39 | exit 40 | } 41 | 42 | opt_cpu=0; opt_pid=0; opt_cmd=0; cpu=""; pid=""; cmd="" 43 | 44 | while getopts C:p:c:h opt 45 | do 46 | case $opt in 47 | C) opt_cpu=1; cpu=$OPTARG ;; 48 | p) opt_pid=1; pid=$OPTARG ;; 49 | c) opt_cmd=1; cmd=$OPTARG ;; 50 | h|?) usage ;; 51 | esac 52 | done 53 | shift $(( $OPTIND - 1 )) 54 | 55 | if (( opt_cpu + opt_pid + opt_cmd > 1 )); then 56 | echo >&2 "ERROR: pick one of -C, -p, -c" 57 | usage 58 | fi 59 | secs=${1:-1} # default 1 second 60 | duration=${2:-999999999} # default semi-infinite seconds 61 | hlines=25 # lines to repeat header 62 | target=-a 63 | (( opt_cpu )) && target="-C $cpu sleep $duration" 64 | (( opt_pid )) && target="-p $pid sleep $duration" 65 | (( opt_cmd )) && target="$cmd" 66 | 67 | if (( opt_pid )); then 68 | if [ ! -d /proc/$pid ]; then 69 | echo >&2 "ERROR: Can't find PID $pid. Exiting." 70 | exit 71 | fi 72 | fi 73 | 74 | # note that instructions is last on purpose, it triggers output 75 | # order of PMCs attempts to workaround a 3.13 issue. 76 | # cycles are twice as a workaround for an issue. 77 | # only three (<=4) PMCs are used to workaround another issue. 78 | # the raw counters are from the architectural set, so should be stable. 79 | perf stat -e cycles -e cycles -e instructions \ 80 | -I $(( secs * 1000 )) $target 2>&1 | awk -v hlines=$hlines ' 81 | BEGIN { 82 | htxt = sprintf("%-10s %-10s %5s", "K_CYCLES", "K_INSTR", "IPC"); 83 | print htxt 84 | header = hlines 85 | } 86 | /invalid/ { print $0 } # unsupported event 87 | { gsub(/,/, ""); } 88 | $3 == "cycles" { cycles = $2; } 89 | $3 == "instructions" { 90 | if (--header == 0) { 91 | print htxt 92 | header = hlines 93 | } 94 | ins = $2 95 | if (llcref == 0) { llcref = 1; llcmiss = 0 } 96 | if (cycles == 0) { cycles = 1 } # PMCs are broken, or no events 97 | 98 | printf("%-10d %-10d %5.2f\n", 99 | cycles / 1000, ins / 1000, ins / cycles) 100 | } 101 | ' 102 | -------------------------------------------------------------------------------- /resstalls: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # resstalls - measure and summarize resource stalls. 4 | # Uses Linux perf and PMCs. 5 | # 6 | # FROM: https://github.com/brendangregg/pmc-cloud-tools 7 | # 8 | # USAGE: resstalls {-C CPU | -p PID | -c CMD} [interval [duration]] 9 | # 10 | # Columns: 11 | # 12 | # - ALL: Resource stalls x 1M 13 | # - LOAD: Load buffer stalls x 1M 14 | # - PIPE: Instruction pipeline reservation station full stalls x 1M 15 | # - STORE: Store related stall x 1M 16 | # - ROB: Re-order buffer full store x 1M 17 | # - FLOAT: Floating point unit control word stall x 1M 18 | # - MXCSR: MXCSR register rename stall x 1M 19 | # - OTHER: Other stall reasons x 1M 20 | # - SUM: Sum of all columns except ALL 21 | # 22 | # Copyright 2020 Netflix, Inc. 23 | # Licensed under the Apache License, Version 2.0 (the "License") 24 | # 25 | # 24-Jan-2020 Brendan Gregg Created this. 26 | 27 | function usage { 28 | cat <<-END >&2 29 | USAGE: resstalls {-C CPU | -p PID | -c CMD} [interval [duration]] 30 | -C CPU # measure this CPU only 31 | -p PID # measure this PID only 32 | -c 'CMD' # measure this command only (quote it) 33 | interval # output interval in secs (default 1) 34 | duration # total seconds (default infinityish) 35 | eg, 36 | resstalls # show stats across all CPUs 37 | resstalls 5 # show stats every 5 seconds 38 | resstalls -C 0 # measure CPU 0 only 39 | resstalls -p 181 # measure PID 181 only 40 | resstalls -c 'cksum /boot/*' # measure run and measure this cmd 41 | END 42 | exit 43 | } 44 | 45 | opt_cpu=0; opt_pid=0; opt_cmd=0; cpu=""; pid=""; cmd="" 46 | 47 | while getopts C:p:c:h opt 48 | do 49 | case $opt in 50 | C) opt_cpu=1; cpu=$OPTARG ;; 51 | p) opt_pid=1; pid=$OPTARG ;; 52 | c) opt_cmd=1; cmd=$OPTARG ;; 53 | h|?) usage ;; 54 | esac 55 | done 56 | shift $(( $OPTIND - 1 )) 57 | 58 | if (( opt_cpu + opt_pid + opt_cmd > 1 )); then 59 | echo >&2 "ERROR: pick one of -C, -p, -c" 60 | usage 61 | fi 62 | secs=${1:-1} # default 1 second 63 | duration=${2:-999999999} # default semi-infinite seconds 64 | hlines=25 # lines to repeat header 65 | target=-a 66 | (( opt_cpu )) && target="-C $cpu sleep $duration" 67 | (( opt_pid )) && target="-p $pid sleep $duration" 68 | (( opt_cmd )) && target="$cmd" 69 | 70 | if (( opt_pid )); then 71 | if [ ! -d /proc/$pid ]; then 72 | echo >&2 "ERROR: Can't find PID $pid. Exiting." 73 | exit 74 | fi 75 | fi 76 | 77 | # note that instructions is last on purpose, it triggers output 78 | # cycles are twice as a workaround for an issue 79 | # the r4f2e and r412e counters are from the architectural set, so should be stable 80 | echo "All counter columns are x 1000000" 81 | perf stat -e resource_stalls.any,r02a2,r04a2,r08a2,r10a2,r20a2,r40a2,r80a2 \ 82 | -I $(( secs * 1000 )) $target 2>&1 | awk -v hlines=$hlines ' 83 | BEGIN { 84 | htxt = sprintf("%8s %8s %8s %8s %8s %8s %8s %8s %8s", 85 | "ALL", "LOAD", "PIPE", "STORE", "ROB", "FLOAT", 86 | "MXCSR", "OTHER", "SUM"); 87 | print htxt 88 | header = hlines 89 | } 90 | /invalid/ { print $0 } # unsupported event 91 | { gsub(/,/, ""); } 92 | $3 == "resource_stalls.any" { rsany = $2 } # RESOURCE_STALLS.ANY 93 | $3 == "r02a2" { rsload = $2 } # RESOURCE_STALLS.LOAD 94 | $3 == "r04a2" { rspipe = $2 } # RESOURCE_STALLS.RS_FULL 95 | $3 == "r08a2" { rsstore = $2 } # RESOURCE_STALLS.STORE 96 | $3 == "r10a2" { rsrob = $2 } # RESOURCE_STALLS.ROB_FULL 97 | $3 == "r20a2" { rsfpcw = $2 } # RESOURCE_STALLS.FPCW 98 | $3 == "r40a2" { rsmxcsr = $2 } # RESOURCE_STALLS.MXCSR 99 | $3 == "r80a2" { # last one, trigger output 100 | rsother = $2 # RESOURCE_STALLS.OTHER 101 | if (--header == 0) { 102 | print htxt 103 | header = hlines 104 | } 105 | 106 | rssum = rsload + rspipe + rsstore + rsrob + rsfpcw + rsmxcsr + rsother 107 | 108 | printf("%8.1f %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n", 109 | rsany / 1000000, rsload / 1000000, rspipe / 1000000, 110 | rsstore / 1000000, rsrob / 1000000, rsfpcw / 1000000, 111 | rsmxcsr / 1000000, rsother / 1000000, rssum / 1000000) 112 | } 113 | ' 114 | -------------------------------------------------------------------------------- /tlbstat: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # tlbstat - measure and summarize TLBs. Uses Linux perf and PMCs. 4 | # 5 | # FROM: https://github.com/brendangregg/pmc-cloud-tools 6 | # 7 | # TLB: Translation Lookaside Buffer (the MMU page translation cache) 8 | # PMC: Performance Monitoring Counters 9 | # 10 | # USAGE: tlbstat {-C CPU | -p PID | -c CMD} [interval [duration]] 11 | # 12 | # REQUIREMENTS: perf command, TLB PMCs (counters listed below) which may 13 | # only be provided by Linux 4.14-ish and newer. 14 | # 15 | # Columns: 16 | # 17 | # - K_CYCLES: CPU Cycles x 1000 18 | # - K_INSTR: CPU Instructions x 1000 19 | # - IPC: Instructions-Per-Cycle 20 | # - DTLB_WALKS: Data TLB walks (count) 21 | # - ITLB_WALKS: Instruction TLB walks (count) 22 | # - K_DTLBCYC: Cycles at least one PMH is active with data TLB walks x 1000 23 | # - K_ITLBCYC: Cycles at least one PMH is active with instr. TLB walks x 1000 24 | # - DTLB%: Data TLB active cycles as a ratio of total cycles 25 | # - ITLB%: Instruction TLB active cycles as a ratio of total cycles 26 | # 27 | # Copyright 2018 Netflix, Inc. 28 | # Licensed under the Apache License, Version 2.0 (the "License") 29 | # 30 | # 08-Jan-2018 Brendan Gregg Created this. 31 | 32 | function usage { 33 | cat <<-END >&2 34 | USAGE: tlbstat {-C CPU | -p PID | -c CMD} [interval [duration]] 35 | -C CPU # measure this CPU only 36 | -p PID # measure this PID only 37 | -c 'CMD' # measure this command only (quote it) 38 | interval # output interval in secs (default 1) 39 | duration # total seconds (default infinityish) 40 | eg, 41 | tlbstat # show stats across all CPUs 42 | tlbstat 5 # show stats every 5 seconds 43 | tlbstat -C 0 # measure CPU 0 only 44 | tlbstat -p 181 # measure PID 181 only 45 | tlbstat -c 'cksum /boot/*' # measure run and measure this cmd 46 | END 47 | exit 48 | } 49 | 50 | opt_cpu=0; opt_pid=0; opt_cmd=0; cpu=""; pid=""; cmd="" 51 | 52 | while getopts C:p:c:h opt 53 | do 54 | case $opt in 55 | C) opt_cpu=1; cpu=$OPTARG ;; 56 | p) opt_pid=1; pid=$OPTARG ;; 57 | c) opt_cmd=1; cmd=$OPTARG ;; 58 | h|?) usage ;; 59 | esac 60 | done 61 | shift $(( $OPTIND - 1 )) 62 | 63 | if (( opt_cpu + opt_pid + opt_cmd > 1 )); then 64 | echo >&2 "ERROR: pick one of -C, -p, -c" 65 | usage 66 | fi 67 | secs=${1-1} # default 1 second 68 | duration=${2-999999999} # default semi-infinite seconds 69 | hlines=25 # lines to repeat header 70 | target=-a 71 | (( opt_cpu )) && target="-C $cpu sleep $duration" 72 | (( opt_pid )) && target="-p $pid sleep $duration" 73 | (( opt_cmd )) && target="$cmd" 74 | 75 | if (( opt_pid )); then 76 | if [ ! -d /proc/$pid ]; then 77 | echo >&2 "ERROR: Can't find PID $pid. Exiting." 78 | exit 79 | fi 80 | fi 81 | 82 | # note that instructions is last on purpose, it triggers output 83 | # cycles are twice as a workaround for an issue 84 | perf stat -e cycles -e cycles \ 85 | -e dtlb_load_misses.miss_causes_a_walk \ 86 | -e dtlb_store_misses.miss_causes_a_walk \ 87 | -e itlb_misses.miss_causes_a_walk \ 88 | -e dtlb_load_misses.walk_active \ 89 | -e dtlb_store_misses.walk_active \ 90 | -e itlb_misses.walk_active \ 91 | -e instructions \ 92 | -I $(( secs * 1000 )) $target 2>&1 | awk -v hlines=$hlines ' 93 | BEGIN { 94 | htxt = sprintf("%-10s %-10s %5s %-10s %-10s %-10s %-10s %5s %5s", 95 | "K_CYCLES", "K_INSTR", "IPC", "DTLB_WALKS", "ITLB_WALKS", 96 | "K_DTLBCYC", "K_ITLBCYC", "DTLB%", "ITLB%"); 97 | print htxt 98 | header = hlines 99 | } 100 | /invalid/ { print $0 } # unsupported event 101 | { gsub(/,/, ""); } 102 | $3 == "cycles" { cycles = $2; } 103 | # counts: 104 | $3 == "dtlb_load_misses.miss_causes_a_walk" { dtlbl = $2; } 105 | $3 == "dtlb_store_misses.miss_causes_a_walk" { dtlbs = $2; } 106 | $3 == "itlb_misses.miss_causes_a_walk" { itlb = $2; } 107 | # walk active cycles in at least one PMH cycles: 108 | $3 == "dtlb_load_misses.walk_active" { dtlblwc = $2; } 109 | $3 == "dtlb_store_misses.walk_active" { dtlbswc = $2; } 110 | $3 == "itlb_misses.walk_active" { itlbwc = $2; } 111 | $3 == "instructions" { 112 | if (--header == 0) { 113 | print htxt 114 | header = hlines 115 | } 116 | ins = $2 117 | if (cycles == 0) { cycles = 1 } # PMCs are broken, or no events 118 | 119 | printf("%-10d %-10d %5.2f %-10d %-10d %-10d %-10d %5.2f %5.2f\n", 120 | cycles / 1000, ins / 1000, ins / cycles, 121 | dtlbl + dtlbs, itlb, 122 | (dtlblwc + dtlbswc) / 1000, 123 | itlbwc / 1000, 124 | 100 * (dtlblwc + dtlbswc) / cycles, 125 | 100 * (itlbwc) / cycles) 126 | } 127 | ' 128 | --------------------------------------------------------------------------------