├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .travis.yml ├── AUTHORS ├── Cargo.toml ├── LICENSE ├── README.md ├── analyze ├── __init__.py ├── ethplot.mplstyle ├── profile │ ├── __init__.py │ ├── compare_timeseries.py │ ├── correlation.py │ ├── event_detail.py │ ├── stats.py │ └── timeseries.py └── util.py ├── doc ├── correlation_heatmap.png ├── counters_vs_events.png ├── intro.svg ├── manual.md ├── perf_event_plot.png ├── results.csv └── timeseries.csv ├── src ├── aggregate.rs ├── cmd.yml ├── counters.toml ├── main.rs ├── mkgroup.rs ├── pair.rs ├── profile.rs ├── scale.rs ├── search.rs ├── stats.rs └── util.rs └── tests ├── pair └── manifest.toml └── test_readme.sh /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is and what platform you are running on 12 | and of what you expected to happen. 13 | 14 | **To Reproduce** 15 | Steps to reproduce the behavior: 16 | 1. Run autoperf with these arguments (add -vvv for trace debug output). 17 | 2. Invoke script(s) ... 18 | 4. See error 19 | 20 | **Machine (please complete the following information):** 21 | - Linux version: [use uname -a] 22 | - Machine: [cpuid output] 23 | - perf version: [perf --version] 24 | - autoperf version: [autoperf --version] 25 | - output of: ls /sys/bus/event_source/devices/ 26 | 27 | **Additional context** 28 | Add any other context about the problem here. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | *.pyc 4 | .sync-config.cson 5 | __pycache__ 6 | .ipynb_checkpoints 7 | .vscode 8 | .DS_Store -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | services: docker 3 | sudo: required 4 | language: rust 5 | rust: 6 | - nightly 7 | 8 | script: 9 | - bash tests/test_readme.sh 10 | 11 | notifications: 12 | email: 13 | on_success: never 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Denny Lin 2 | Besmira Nushi 3 | Gerd Zellweger 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "autoperf" 3 | description = "Automate the recording and instrumentation of programs with performance counters." 4 | version = "0.9.5" 5 | authors = [ 6 | "Denny Lin ", 7 | "Besmira Nushi ", 8 | "Gerd Zellweger " 9 | ] 10 | edition = '2018' 11 | license = "MIT" 12 | repository = "https://github.com/gz/autoperf" 13 | documentation = "https://docs.rs/autoperf" 14 | 15 | [dependencies] 16 | pbr = "1.0.1" 17 | log = "0.4" 18 | env_logger = "0.6" 19 | csv = "0.*" 20 | lazy_static = "0.1.*" 21 | perfcnt = "0.7" 22 | toml = "0.2" 23 | nom = "^1.2.3" 24 | libc = "0.2.16" 25 | clap = { version = "2", features = ["yaml"] } 26 | x86 = { version = "0.45", features = ["performance-counter"] } 27 | phf = "0.9.*" 28 | itertools = "0.5" 29 | rustc-serialize = "0.3" 30 | wait-timeout = "0.1" 31 | 32 | [profile.release] 33 | debug = true 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Gerd Zellweger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/gz/autoperf.svg)](https://travis-ci.org/gz/autoperf) 2 | [![Crates.io](https://img.shields.io/crates/v/autoperf.svg)](https://crates.io/crates/autoperf) 3 | [![docs.rs/autoperf](https://docs.rs/autoperf/badge.svg)](https://docs.rs/crate/autoperf/) 4 | 5 | 6 | # autoperf 7 | 8 | autoperf simplifies the instrumentation of programs with performance 9 | counters on Intel machines. Rather than trying to learn how to measure every 10 | event and manually programming event values in counter registers or perf, you 11 | can use autoperf which will repeatedly run your program until it has measured 12 | every single performance event on your machine. autoperf tries to compute a 13 | schedule that maximizes the amount of events measured per run, and 14 | minimizes the total number of runs while avoiding multiplexing of events on 15 | counters. 16 | 17 |

18 | 19 |

20 | 21 |
22 | 23 | 24 | ## Background 25 | 26 | Performance monitoring units typically distinguish between performance events and counters. 27 | Events refer to observations on the micro-architectural level 28 | (e.g., a TLB miss, a page-walk etc.), whereas counters are hardware registers that 29 | count the occurrence of events. The figure on the right shows the number of different 30 | observable events for different Intel micro-architectures. Note that current systems 31 | provide a very large choice of possible events to monitor. The number of measurable 32 | counters per PMU is limited (typically from two to eight). For example, if the same 33 | events are measured on all PMUs on a SkylakeX (Xeon Gold 5120) machine, we can only 34 | observe a maximum of 48 different events (without sampling). autoperf simplifies the process 35 | of fully measuring and recording every performance event for a given program. 36 | In our screen session above, recorded on a SkylakeX machine with ~3500 distinct events, 37 | we can see how autoperf automatically runs a program 1357 times while measuring and recording 38 | a different set of events in every run. 39 |
40 | 41 | # Installation 42 | 43 | autoperf is known to work with Ubuntu 18.04 on Skylake and 44 | IvyBridge/SandyBridge architectures. All Intel architectures should work, 45 | please file a bug request if it doesn't. autoperf builds on `perf` from the 46 | Linux project and a few other libraries that can be installed using: 47 | 48 | ``` 49 | $ sudo apt-get update 50 | $ sudo apt-get install likwid cpuid hwloc numactl util-linux 51 | ``` 52 | 53 | To run the example analysis scripts, you'll need these python3 libraries: 54 | ``` 55 | $ pip3 install ascii_graph matplotlib pandas argparse numpy 56 | ``` 57 | 58 | You'll also need the *nightly version* of the rust compiler which is 59 | best installed using rustup: 60 | ``` 61 | $ curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly 62 | $ source $HOME/.cargo/env 63 | ``` 64 | 65 | autoperf is published on crates.io, so once you have rust and cargo installed, 66 | you can get it directly from there: 67 | ``` 68 | $ cargo +nightly install autoperf 69 | ``` 70 | 71 | Or alternatively, clone and build the repository yourself: 72 | ``` 73 | $ git clone https://github.com/gz/autoperf.git 74 | $ cd autoperf 75 | $ cargo build --release 76 | $ ./target/release/autoperf --help 77 | ``` 78 | 79 | autoperf uses perf internally to interface with Linux and the performance 80 | counter hardware. perf recommends that the following settings are disabled. 81 | Therefore, autoperf will check the values of those configurations and refuse to 82 | start if they are not set like below: 83 | ``` 84 | sudo sh -c 'echo 0 >> /proc/sys/kernel/kptr_restrict' 85 | sudo sh -c 'echo 0 > /proc/sys/kernel/nmi_watchdog' 86 | sudo sh -c 'echo -1 > /proc/sys/kernel/perf_event_paranoid' 87 | ``` 88 | 89 | # Usage 90 | 91 | autoperf has a few commands, use `--help` to get a better overview of all the 92 | options. 93 | 94 | ## Profiling 95 | 96 | The **profile** command instruments a single program by running it multiple times 97 | until every performance event is measured. For example, 98 | ``` 99 | $ autoperf profile sleep 2 100 | ``` 101 | will repeatedly run `sleep 2` while measuring different performance events 102 | with performance counters every time. Once completed, you will find an `out` 103 | folder with many csv files that contain measurements from individual runs. 104 | 105 | ## Aggregating results 106 | 107 | To combine all those runs into a single CSV result file you can use the 108 | **aggregate** command: 109 | ``` 110 | $ autoperf aggregate ./out 111 | ``` 112 | This will do some sanity checking and produce a `results.csv` 113 | ([reduced example](../master/doc/results.csv)) file which contains 114 | all the measured data. 115 | 116 | ## Analyze results 117 | 118 | Performance events are measured individually on every core (and other 119 | monitoring units). The `timeseries.py` can aggregate events by taking the 120 | average, stddef, min, max etc. and producing a time-series matrix ([see a 121 | reduced example](../master/doc/timeseries.csv)). 122 | 123 | ``` 124 | python3 analyze/profile/timeseries.py ./out 125 | ``` 126 | 127 | Now you have all the data, so you can start asking some questions. As an 128 | example, the following script tells you how events were correlated 129 | when your program was running: 130 | 131 | ``` 132 | $ python3 analyze/profile/correlation.py ./out 133 | $ open out/correlation_heatmap.png 134 | ``` 135 | 136 | Event correlation for the `autoperf profile sleep 2` command 137 | above looks like this (every dot represents the correlation of the timeseries 138 | between two measured performance events, this is from a Skylake machine with 139 | around 1700 non-zero event measurement): 140 | ![Correlation Heatmap](/doc/correlation_heatmap.png) 141 | 142 | You can look at individual events too: 143 | ``` 144 | python3 analyze/profile/event_detail.py --resultdir ./out --features AVG.OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD 145 | ``` 146 | ![Plot events](/doc/perf_event_plot.png) 147 | 148 | There are more scripts in the `analyze` folder to better work with the captured 149 | data-sets. Have a look. 150 | 151 | ## What do I use this for? 152 | 153 | autoperf allows you to quickly gather lots of performance (or training) data and 154 | reason about it quantitatively. For example, we initially developed autoperf to 155 | build ML classifiers that the Barrelfish scheduler could use for detecting 156 | application slowdown and make better scheduling decisions. autoperf can gather 157 | that data to generate such classifiers without requiring domain knowledge about 158 | events, aside from how to measure them. 159 | 160 | You can read more about our experiments here: 161 | 162 | * https://dl.acm.org/citation.cfm?id=2967360.2967375 163 | * https://www.research-collection.ethz.ch/handle/20.500.11850/155854 164 | 165 | Last but not least, autoperf can potentially be useful in many other scenarios: 166 | * Find out what performance events are relevant for your workload 167 | * Analyzing and finding performance issues in your code or with different versions of your code 168 | * Generate classifiers to detect hardware exploits (side channels/spectre/meltdown etc.) 169 | * ... 170 | -------------------------------------------------------------------------------- /analyze/__init__.py: -------------------------------------------------------------------------------- 1 | __name__ = 'analyze' 2 | -------------------------------------------------------------------------------- /analyze/ethplot.mplstyle: -------------------------------------------------------------------------------- 1 | font.size: 18.0 2 | font.family: sans-serif 3 | font.sans-serif: Supria Sans, Lucida Grande, Bitstream Vera Sans, Helvetica Neue LT Pro 4 | font.style: normal 5 | font.variant: normal 6 | 7 | lines.linewidth: 4 8 | lines.solid_capstyle: butt 9 | 10 | # Don't really want a frame but in case we do we want the fancy one: 11 | legend.frameon: false 12 | legend.fancybox: true 13 | 14 | axes.prop_cycle: cycler('color', ['30a2da', 'fc4f30', 'e5ae38', '6d904f', '8b8b8b']) 15 | axes.facecolor: ffffff 16 | axes.edgecolor: cbcbcb 17 | axes.labelsize: large 18 | axes.axisbelow: true 19 | axes.grid: true 20 | axes.linewidth: 2.0 21 | axes.titlesize: x-large 22 | axes.labelweight: light 23 | 24 | # Remove small ticks at the labels, not necessary with grid: 25 | xtick.major.size: 0 26 | ytick.major.size: 0 27 | xtick.minor.size: 0 28 | ytick.minor.size: 0 29 | 30 | # Adds more space between x[0] and y[0] tick labels: 31 | xtick.major.pad: 7 32 | ytick.major.pad: 7 33 | 34 | patch.edgecolor: f0f0f0 35 | patch.linewidth: 0.5 36 | 37 | svg.fonttype: path 38 | 39 | grid.linestyle: - 40 | grid.linewidth: 1.0 41 | grid.color: cbcbcb 42 | 43 | savefig.edgecolor: f0f0f0 44 | savefig.facecolor: ffffff 45 | savefig.dpi: 300 46 | savefig.bbox: tight 47 | savefig.pad_inches: 0.05 48 | 49 | figure.subplot.left: 0.00 50 | figure.subplot.right: 1.0 51 | figure.subplot.bottom: 0.00 52 | figure.subplot.top: 0.9 53 | #figure.subplot.wspace : 0.2 # the amount of width reserved for blank space between subplots 54 | #figure.subplot.hspace : 0.2 # the amount of height reserved for white space between subplots 55 | -------------------------------------------------------------------------------- /analyze/profile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/analyze/profile/__init__.py -------------------------------------------------------------------------------- /analyze/profile/compare_timeseries.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Compares two timeseries and looks for differences. 4 | 5 | It does that by summing up the maximas observed for every event on a given 6 | slice of the time-series. Which leads to a single value (magnitude) per 7 | observed event. 8 | Then given two timeseries A and B we compare them by calculating a normalized 9 | factor A.event / (A.event + B.event) to find values that predominantly trigger 10 | only in A and B.event / (A.event + B.event) to find values that trigger 11 | predominantly in B. 12 | """ 13 | 14 | import os 15 | import sys 16 | import pandas as pd 17 | import numpy as np 18 | import matplotlib 19 | 20 | 21 | def timeseries_file(data_directory): 22 | timeseries_file = os.path.join(data_directory, 'timeseries_avg_nonzero.csv') 23 | if os.path.exists(timeseries_file): 24 | return pd.read_csv(timeseries_file, index_col=0, skipinitialspace=True) 25 | else: 26 | print("Generating timeseries_avg_nonzero.csv") 27 | timeseries = util.load_as_X(os.path.join(data_directory, 'results.csv'), 28 | aggregate_samples=['mean'], cut_off_nan=True, remove_zero=True) 29 | timeseries.to_csv(timeseries_file) 30 | return timeseries 31 | 32 | def usage(progname): 33 | print('usage:', progname, '[data_input_dir for A] [data_input_dir for B]') 34 | sys.exit(0) 35 | 36 | if __name__ == '__main__': 37 | sys.path.insert(1, os.path.join(sys.path[0], '..', "..")) 38 | from analyze import util 39 | 40 | if len(sys.argv) > 3: 41 | usage(sys.argv[0]) 42 | 43 | dfA = timeseries_file(sys.argv[1]) 44 | dfA = dfA[-15:].sum() # TODO range is hard-coded, adjust 45 | 46 | dfB = timeseries_file(sys.argv[2]) 47 | dfB = dfB[-15:].sum() # TODO range is hard-coded, adjust 48 | 49 | max_among_both = pd.concat([dfA, dfB]).max(level=0) 50 | 51 | normA = (dfA / (dfA + dfB)).dropna() 52 | normB = (dfB / (dfA + dfB)).dropna() 53 | 54 | fmt_string = "{event}: {fraction:.2f} ({absolute1}-{absolute2}={res})" 55 | 56 | 57 | print("Events that predominantly trigger in {} and not in {}\n".format(sys.argv[2], sys.argv[1])) 58 | print("Event name: Fraction (progB - progA = difference)") 59 | print("=================================================") 60 | for (name, val) in normB.sort_values().iteritems(): 61 | if val > 0.95: 62 | print (fmt_string.format(event=name, fraction=val, absolute1=dfB[name], absolute2=dfA[name], res=dfB[name]-dfA[name])) 63 | 64 | print("") 65 | print("") 66 | 67 | print("Events that predominantly trigger in {} and not in {}\n".format(sys.argv[1], sys.argv[2])) 68 | 69 | print("Event name: Fraction (progA - progB = difference)") 70 | print("=================================================") 71 | for (name, val) in normA.sort_values().iteritems(): 72 | if val > 0.95: 73 | print (fmt_string.format(event=name, fraction=val, absolute1=dfA[name], absolute2=dfB[name], res=dfA[name]-dfB[name])) 74 | 75 | -------------------------------------------------------------------------------- /analyze/profile/correlation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Compute the pairwise correlation for all events in results.csv 4 | and stores it in correlation_matrix.csv. 5 | Also generates a heatmap for the computed matrix 6 | and stores it in correlation_heatmap.csv. 7 | """ 8 | 9 | import os 10 | import sys 11 | import pandas as pd 12 | import numpy as np 13 | import matplotlib 14 | 15 | matplotlib.use('Agg') 16 | from matplotlib import pyplot as plt, font_manager 17 | from matplotlib.colors import LinearSegmentedColormap 18 | 19 | colors = LinearSegmentedColormap.from_list('seismic', 20 | ['#ca0020', '#ffffff', '#2a99d6']) 21 | 22 | if __name__ == "__main__": 23 | sys.path.insert(1, os.path.join(sys.path[0], '..', "..")) 24 | from analyze import util 25 | 26 | def correlation_matrix(data_directory): 27 | df = util.load_as_X(os.path.join(data_directory, 'results.csv'), cut_off_nan=True, remove_zero=True) 28 | correlation_matrix = df.corr() 29 | # Ensure all values in correlation matrix are valid 30 | assert not correlation_matrix.isnull().values.any() 31 | 32 | correlation_file = os.path.join(data_directory, 'correlation_matrix.csv') 33 | correlation_matrix.to_csv(correlation_file) 34 | print("Generated correlation_matrix.csv") 35 | 36 | def correlation_heatmap(data_directory): 37 | data_file = os.path.join(data_directory, 'correlation_matrix.csv') 38 | data = pd.read_csv(data_file, header=0, index_col=0) 39 | def make_heatmap(plot_output_dir, data): 40 | plt.style.use([os.path.join(sys.path[0], "..", 'ethplot.mplstyle')]) 41 | fig, ax = plt.subplots() 42 | 43 | ax.xaxis.set_visible(False) 44 | ax.yaxis.set_visible(False) 45 | plt.xlim(0, data.shape[0]) 46 | plt.ylim(0, data.shape[1]) 47 | 48 | c = plt.pcolor(data.iloc[::-1], cmap=colors, vmin=-1.0, vmax=1.0) 49 | colorbar = plt.colorbar(c, ticks=[-1, 0, 1]) 50 | 51 | #ticks_font = font_manager.FontProperties(family='Decima Mono') 52 | #plt.setp(colorbar.ax.get_yticklabels(), fontproperties=ticks_font) 53 | plt.savefig(os.path.join(plot_output_dir, 'correlation_heatmap.png'), format='png') 54 | print("Generated correlation_heatmap.png") 55 | 56 | make_heatmap(data_directory, data) 57 | 58 | def usage(progname): 59 | print('usage:', progname, '[data_input_dir]') 60 | sys.exit(0) 61 | 62 | if __name__ == '__main__': 63 | if len(sys.argv) > 2: 64 | usage(sys.argv[0]) 65 | correlation_matrix(sys.argv[1]) 66 | correlation_heatmap(sys.argv[1]) 67 | 68 | -------------------------------------------------------------------------------- /analyze/profile/event_detail.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Display information about a single event. 5 | """ 6 | 7 | import sys 8 | import os 9 | import re 10 | import argparse 11 | import pandas as pd 12 | import numpy as np 13 | import matplotlib 14 | matplotlib.use('Agg') 15 | 16 | from matplotlib import pyplot as plt, font_manager 17 | plt.style.use([os.path.join(sys.path[0], '..', 'ethplot.mplstyle')]) 18 | 19 | if __name__ == "__main__": 20 | sys.path.insert(1, os.path.join(sys.path[0], '..', "..")) 21 | from analyze import util 22 | 23 | def plot_events(df, features, filename, output_dir, title=None): 24 | fig = plt.figure() 25 | if title: 26 | fig.suptitle(title) 27 | 28 | ax1 = fig.add_subplot(1, 1, 1) 29 | ax1.set_xlabel('Time [s]') 30 | ax1.set_ylabel('Events observed [count]') 31 | ax1.spines['top'].set_visible(False) 32 | ax1.spines['right'].set_visible(False) 33 | ax1.get_xaxis().tick_bottom() 34 | ax1.get_yaxis().tick_left() 35 | 36 | for feature in features: 37 | ax1.plot(df[feature], label=feature) 38 | 39 | ax1.xaxis.set_ticks(np.arange(0, len(df), 4)) 40 | 41 | val, labels = plt.xticks() 42 | plt.xticks(val, ["{}".format(x / 4) for x in val]) 43 | 44 | ax1.set_ylim(ymin=0.0) 45 | ax1.legend(loc='best', prop={'size': 8}) 46 | 47 | plt.savefig(os.path.join(output_dir, filename + ".png"), format='png') 48 | plt.clf() 49 | plt.close() 50 | print("Generated file {}".format(filename + ".png")) 51 | 52 | 53 | def make_plot(from_directory, features): 54 | df = util.load_as_X(os.path.join(from_directory, 'results.csv'), aggregate_samples = ['mean', 'std', 'max', 'min'], cut_off_nan=True) 55 | filename = "perf_event_plot_{}".format("_".join(features)) 56 | plot_events(df, features, filename, from_directory) 57 | 58 | if __name__ == '__main__': 59 | pd.set_option('display.max_rows', 37) 60 | pd.set_option('display.max_columns', 15) 61 | pd.set_option('display.width', 200) 62 | 63 | parser = argparse.ArgumentParser(description="Plot an event counter.") 64 | parser.add_argument('--resultdir', dest='dir', type=str, help="Result directory of the profile run.", required=True) 65 | parser.add_argument('--features', dest='features', nargs='+', type=str, help="Which events to plot (add 'AVG.', 'STD.', 'MAX.' or 'MIN.' in front of the event name)", required=True) 66 | args = parser.parse_args() 67 | 68 | make_plot(args.dir, args.features) 69 | -------------------------------------------------------------------------------- /analyze/profile/stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Gathers and prints some information about the results.csv file from a profile run. 5 | Intended for quick sanity checking of the results. 6 | """ 7 | 8 | import sys, os 9 | import pandas as pd 10 | 11 | from ascii_graph import Pyasciigraph 12 | 13 | sys.path.insert(1, os.path.join(os.path.realpath(os.path.split(__file__)[0]), '..', "..")) 14 | from analyze.util import get_all_zero_events 15 | 16 | def histogram(L): 17 | d = {} 18 | for x in L: 19 | if x in d: 20 | d[x] += 1 21 | else: 22 | d[x] = 1 23 | return d 24 | 25 | 26 | def yield_cpu_sample_lengths(df): 27 | for idx in df.index.unique(): 28 | if not idx.startswith("uncore_"): 29 | yield len(df.loc[[idx], 'SAMPLE_VALUE']) 30 | 31 | def yield_uncore_sample_lengths(df): 32 | for idx in df.index.unique(): 33 | if idx.startswith("uncore_"): 34 | yield len(df.loc[[idx], 'SAMPLE_VALUE']) 35 | 36 | def samples_histogram(df, lengths_fn): 37 | lengths = histogram(lengths_fn(df)) 38 | data = [] 39 | for key, value in lengths.items(): 40 | data.append( ("%d samples" % key, value) ) 41 | data = sorted(data, key=lambda x: x[1]) 42 | return data 43 | 44 | if __name__ == '__main__': 45 | data_directory = sys.argv[1] 46 | df = pd.read_csv(os.path.join(data_directory, 'results.csv'), index_col=0, skipinitialspace=True) 47 | 48 | all_events = df.index.unique() 49 | all_zero = get_all_zero_events(df) 50 | 51 | print("Total Events measured:", len(all_events)) 52 | title = "List of event samples that reported only zeroes (%d / %d):" % (len(all_zero), len(all_events)) 53 | print('\n - '.join([title] + all_zero)) 54 | df = df.drop(all_zero) 55 | 56 | # Sample histogram 57 | graph = Pyasciigraph() 58 | for line in graph.graph('Recorded CPU samples histogram:', samples_histogram(df, yield_cpu_sample_lengths)): 59 | print(line) 60 | 61 | graph = Pyasciigraph() 62 | for line in graph.graph('Recorded uncore samples histogram:', samples_histogram(df, yield_uncore_sample_lengths)): 63 | print(line) 64 | 65 | print("The 25 events with fewest samples are:") 66 | for idx in sorted(df.index.unique(), key=lambda x: len(df.loc[[x], 'SAMPLE_VALUE']))[:25]: 67 | print(idx, ":", len(df.loc[[idx], 'SAMPLE_VALUE']), "samples") 68 | -------------------------------------------------------------------------------- /analyze/profile/timeseries.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Compute the timeseries data as a CSV file. 4 | """ 5 | 6 | import os 7 | import sys 8 | import pandas as pd 9 | import numpy as np 10 | import matplotlib 11 | 12 | 13 | if __name__ == "__main__": 14 | sys.path.insert(1, os.path.join(sys.path[0], '..', "..")) 15 | from analyze import util 16 | 17 | def timeseries_file(data_directory): 18 | timeseries = util.load_as_X(os.path.join(data_directory, 'results.csv'), aggregate_samples = ['mean', 'std', 'max', 'min'], cut_off_nan=True) 19 | timeseries_file = os.path.join(data_directory, 'timeseries.csv') 20 | timeseries.to_csv(timeseries_file) 21 | print("Generated timeseries.csv") 22 | 23 | def usage(progname): 24 | print('usage:', progname, '[data_input_dir]') 25 | sys.exit(0) 26 | 27 | if __name__ == '__main__': 28 | if len(sys.argv) > 2: 29 | usage(sys.argv[0]) 30 | timeseries_file(sys.argv[1]) 31 | 32 | -------------------------------------------------------------------------------- /analyze/util.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pandas as pd 3 | import numpy as np 4 | 5 | READ_BANK_EVENTS = ["UNC_M_RD_CAS_RANK{}.BANK{}".format(i,j) for i in range(0,8) for j in range(0,8) ] 6 | WRITE_BANK_EVENTS = ["UNC_M_WR_CAS_RANK{}.BANK{}".format(i,j) for i in range(0,8) for j in range(0,8) ] 7 | 8 | def merge_bank_rank_events(df, minmax=False): 9 | matrix = pd.DataFrame(df) 10 | matrix.reset_index(inplace=True) 11 | pivot_table = matrix.pivot(index='INDEX', columns='EVENT_NAME', values='SAMPLE_VALUE') 12 | df = pivot_table 13 | 14 | read_rank_banks = pd.DataFrame() 15 | for e in READ_BANK_EVENTS: 16 | read_rank_banks[e] = df[e] 17 | 18 | write_rank_banks = pd.DataFrame() 19 | for e in WRITE_BANK_EVENTS: 20 | write_rank_banks[e] = df[e] 21 | 22 | merged_banks = pd.DataFrame() 23 | merged_banks['SUM.UNC_M_RD_CAS.*'] = read_rank_banks.sum(axis=1) 24 | merged_banks['STD.UNC_M_RD_CAS.*'] = read_rank_banks.std(axis=1, ddof=0) 25 | merged_banks['SUM.UNC_M_WR_CAS.*'] = write_rank_banks.sum(axis=1) 26 | merged_banks['STD.UNC_M_WR_CAS.*'] = write_rank_banks.std(axis=1, ddof=0) 27 | if minmax: 28 | merged_banks['MAX.UNC_M_WR_CAS.*'] = write_rank_banks.max(axis=1) 29 | merged_banks['MIN.UNC_M_WR_CAS.*'] = write_rank_banks.min(axis=1) 30 | merged_banks['MAX.UNC_M_RD_CAS.*'] = read_rank_banks.max(axis=1) 31 | merged_banks['MIN.UNC_M_RD_CAS.*'] = read_rank_banks.min(axis=1) 32 | #print(merged_banks) 33 | return merged_banks 34 | 35 | def add_metrics(df): 36 | matrix = pd.DataFrame(df) 37 | matrix.reset_index(inplace=True) 38 | pivot_table = matrix.pivot(index='INDEX', columns='EVENT_NAME', values='SAMPLE_VALUE') 39 | df = pivot_table 40 | 41 | metrics = pd.DataFrame() 42 | metrics['ENG.IPC'] = pivot_table['INST_RETIRED.ANY_P'] / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 43 | metrics['ENG.DSB_SWITCHES'] = pivot_table['DSB2MITE_SWITCHES.PENALTY_CYCLES'] / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 44 | metrics['ENG.MS_SWITCHES'] = 3 * pivot_table['IDQ.MS_SWITCHES'] / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 45 | metrics['ENG.L2_BOUND'] = (pivot_table['CYCLE_ACTIVITY.STALLS_L1D_PENDING'] - pivot_table['CYCLE_ACTIVITY.STALLS_L2_PENDING']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 46 | metrics['ENG.L3_HIT_FRACTION'] = pivot_table['MEM_LOAD_UOPS_RETIRED.LLC_HIT'] / (pivot_table['MEM_LOAD_UOPS_RETIRED.LLC_HIT']+7*pivot_table['MEM_LOAD_UOPS_RETIRED.LLC_MISS']) 47 | metrics['ENG.L3_BOUND'] = (metrics['ENG.L3_HIT_FRACTION'] * pivot_table['CYCLE_ACTIVITY.STALLS_L2_PENDING']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 48 | metrics['ENG.MEM_BOUND'] = ((1 - metrics['ENG.L3_HIT_FRACTION']) * pivot_table['CYCLE_ACTIVITY.STALLS_L2_PENDING']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 49 | #metrics['ENG.STALLS_MEM_ANY'] = pd.concat(pivot_table['CPU_CLK_UNHALTED.THREAD'], pivot_table['CYCLE_ACTIVITY.STALLS_L1D_PENDING']).min(axis=1) 50 | #metrics['ENG.STORES_BOUND'] = (pivot_table['RESOURCE_STALLS.SB'] - metrics['ENG.STALLS_MEM_ANY']) / pivot_table['CPU_CLK_UNHALTED.THREAD_P_ANY'] 51 | 52 | return metrics 53 | 54 | def aggregation_matrix(prefix, series, drop_bank_events=False): 55 | matrix = pd.DataFrame(series) 56 | matrix.reset_index(inplace=True) 57 | pivot_table = matrix.pivot(index='INDEX', columns='EVENT_NAME', values='SAMPLE_VALUE') 58 | if drop_bank_events: 59 | pivot_table.drop(READ_BANK_EVENTS, axis=1, inplace=True) 60 | pivot_table.drop(WRITE_BANK_EVENTS, axis=1, inplace=True) 61 | 62 | pivot_table.rename(columns=lambda x: "{}.{}".format(prefix, x), inplace=True) 63 | return pivot_table 64 | 65 | def load_as_X(f, aggregate_samples=['mean'], remove_zero=False, cut_off_nan=True): 66 | """ 67 | Transform CSV file into a matrix X (used for most ML inputs). 68 | The rows will be different times, the columns are the events. 69 | 70 | Keyword arguments: 71 | aggregate_samples -- Aggregate samples from all CPUs at time t. 72 | remove_zero -- Remove features that are all zero. 73 | cut_off_nan -- Remove everything after first NaN value is encountered. 74 | """ 75 | # Parse file 76 | raw_data = pd.read_csv(f, sep=',', skipinitialspace=True) 77 | raw_data.set_index(['EVENT_NAME'], inplace=True) 78 | raw_data.sort_index(inplace=True) 79 | 80 | # Remove events whose deltas are all 0: 81 | if remove_zero: 82 | raw_data = raw_data.drop(get_all_zero_events(raw_data)) 83 | 84 | # Convert time 85 | time_to_ms(raw_data) 86 | 87 | # Aggregate all event samples from the same event at time 88 | aggregates = [] 89 | drop_bank_events = 'rbmerge' in aggregate_samples or 'rbmerge2' in aggregate_samples or 'rbdrop' in aggregate_samples 90 | 91 | start_at = 0 92 | if aggregate_samples: 93 | grouped_df = raw_data.groupby(['EVENT_NAME', 'INDEX']) 94 | grouped_df_multiple = grouped_df.filter(lambda x: len(x) > 1).groupby(['EVENT_NAME', 'INDEX']) 95 | for agg in aggregate_samples: 96 | if agg == 'mean': 97 | series = grouped_df['SAMPLE_VALUE'].mean() 98 | aggregates.append(aggregation_matrix('AVG', series, drop_bank_events=drop_bank_events)) 99 | elif agg == 'std': 100 | series = grouped_df_multiple['SAMPLE_VALUE'].std(ddof=0) 101 | matrix = aggregation_matrix('STD', series, drop_bank_events=drop_bank_events) 102 | aggregates.append(matrix) 103 | elif agg == 'max': 104 | series = grouped_df_multiple['SAMPLE_VALUE'].max() 105 | aggregates.append(aggregation_matrix('MAX', series, drop_bank_events=drop_bank_events)) 106 | elif agg == 'min': 107 | series = grouped_df_multiple['SAMPLE_VALUE'].min() 108 | aggregates.append(aggregation_matrix('MIN', series, drop_bank_events=drop_bank_events)) 109 | elif agg == 'rbmerge': 110 | series = grouped_df['SAMPLE_VALUE'].mean() 111 | aggregates.append(merge_bank_rank_events(series)) 112 | elif agg == 'rbmerge2': 113 | series = grouped_df['SAMPLE_VALUE'].mean() 114 | aggregates.append(merge_bank_rank_events(series, minmax=True)) 115 | elif agg == 'cut1': 116 | start_at = 1 117 | elif agg == 'cut2': 118 | start_at = 2 119 | elif agg == 'cut4': 120 | start_at = 4 121 | elif agg == 'rbdrop': 122 | pass 123 | elif agg == 'metrics': 124 | series = grouped_df['SAMPLE_VALUE'].mean() 125 | aggregates.append(add_metrics(series)) 126 | else: 127 | assert "Unknown aggregation: {}. Supported are: [mean, std, max, min, rbmerge, cut1, cut2, cut4].".format(agg) 128 | df = pd.concat(aggregates, axis=1) 129 | 130 | # Cut off everything after first row with a NaN value 131 | if cut_off_nan: 132 | min_idx = minimum_nan_index(df) 133 | throw_away = df.shape[0]-min_idx 134 | if throw_away > df.shape[0] * (0.20): 135 | print("Throwing away {} out of {} samples for {}".format(throw_away, df.shape[0], f)) 136 | df = df[:min_idx] 137 | 138 | if "merge4" in aggregate_samples: 139 | # Aggregate 4 rows to get 1sec sampling time 140 | df.reset_index(inplace=True) 141 | df['MergeLabel'] = pd.Series([ math.ceil(i / 4.0) for i in range(1, len(df)+1) ]) 142 | df = df.groupby(['MergeLabel']).sum() 143 | 144 | if "merge2" in aggregate_samples: 145 | # Aggregate 2 rows to get 0.5sec sampling time 146 | df.reset_index(inplace=True) 147 | df['MergeLabel'] = pd.Series([ math.ceil(i / 2.0) for i in range(1, len(df)+1) ]) 148 | df = df.groupby(['MergeLabel']).sum() 149 | 150 | return df[start_at:] 151 | 152 | 153 | def minimum_nan_index(df): 154 | """ 155 | Return the earliest index that contains NaN over all columns or None 156 | if there are no NaN values in any columns. 157 | 158 | # Example 159 | For the following matrix it returns 1 as (1,1) is NaN: 160 | idx | EVENT1 EVENT2 EVENT3 .... EVENTN 161 | 0 | 12 9 5 12 162 | 1 | 1 NaN 2 5 163 | 2 | 0 NaN 100 12 164 | 3 | 0 NaN 1 99 165 | """ 166 | nans = pd.isnull(df).any(1).to_numpy().nonzero()[0] 167 | if len(nans) == 0: 168 | return df.shape[0] 169 | else: 170 | return min(nans) 171 | 172 | def get_zero_features_in_matrix(df): 173 | """ 174 | Given a pandas DataFrame loaded from a matrix_X*.csv file, 175 | return all columns (features) where the values are always zero. 176 | """ 177 | zero_events = [] 178 | for col in df: 179 | if not df[col].any(): 180 | # col.split(".", 1)[1] for getting event name 181 | zero_events.append(col) 182 | return zero_events 183 | 184 | def get_all_zero_events(df): 185 | """ 186 | Given a pandas DataFrame loaded from a results.csv file, 187 | return all event names where the counts are always 0 188 | """ 189 | event_names = [] 190 | for idx in df.index.unique(): 191 | if df.loc[idx, 'SAMPLE_VALUE'].sum() == 0: 192 | event_names.append(idx) 193 | return event_names 194 | 195 | def time_to_ms(df): 196 | """ 197 | Transforn the perf time (floating point, seconds) 198 | to miliseconds (absolute numbers) 199 | """ 200 | df['TIME'] = df['TIME'].map(lambda x: int(x * 1000)) 201 | -------------------------------------------------------------------------------- /doc/correlation_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/doc/correlation_heatmap.png -------------------------------------------------------------------------------- /doc/counters_vs_events.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/doc/counters_vs_events.png -------------------------------------------------------------------------------- /doc/manual.md: -------------------------------------------------------------------------------- 1 | # autoperf 2 | 3 | User manual, currently still under construction. 4 | 5 | ## profile -- measure all the things 6 | 7 | ## aggregate -- combine results 8 | 9 | ## stats -- generate some stats about all events 10 | 11 | ## search -- finding undocumented events 12 | 13 | ## pair -- profiling pairwise combinations of programs 14 | 15 | A more advanced feature is the pairwise instrumentation of programs. 16 | Say you have a set of programs and you want to study their pairwise 17 | interactions with each other. You would first define a manifest like this: 18 | 19 | ``` 20 | [experiment] 21 | configurations = ["L3-SMT", "L3-SMT-cores"] 22 | 23 | [programA] 24 | name = "gcc" 25 | binary = "gcc" 26 | arguments = ["-j", "4", "out.c", "-o", "out"] 27 | 28 | [programB] 29 | name = "objdump" 30 | binary = "objdump" 31 | arguments = ["--disassemble", "/bin/true"] 32 | 33 | [programC] 34 | name = "cat" 35 | binary = "cat" 36 | arguments = ["/var/log/messages"] 37 | env = { LC_ALL = "C" } 38 | use_watch_repeat = true 39 | ``` 40 | 41 | After saving this as a file called `manifest.toml` in a folder called 42 | `pairings` you could call `autoperf` with the following arguments: 43 | 44 | ``` 45 | $ autoperf pair ./pairings 46 | ``` 47 | 48 | This essentially does what the profile command does, but for every individual 49 | program defined in the manifest. In addition, it does even more profile 50 | commands for programA while continously running programB or programC in the 51 | background (once this is done it does the same for programB and programC). 52 | 53 | If this is confusing and you want to get first hand experience of what we would 54 | really be running here you can also pass the `-d` argument to the pair 55 | sub-command. In this case, autoperf just prints a plan of what it would be 56 | doing, rather than launching any programs. 57 | 58 | ### Manifest settings 59 | 60 | The manifest format has a few configuration parameters. A full manifest file with 61 | all possible configurations and documentation in the comments is shown in 62 | `./tests/pair/manifest.toml`. 63 | 64 | * **configuration** is a list of possible mappings of the program to cores: 65 | * L1-SMT: Programs are placed on a single core, each gets one hyper-thread. 66 | * L3-SMT: Programs are placed on a single socket, applications each gets one hyper-thread interleaved (i.e., cores are shared between apps). 67 | * L3-SMT-cores: Programs are placed on a single socket, applications get a full core (i.e., hyper-threads are not shared between apps). 68 | * L3-cores: Programs are placed on a single socket, use a core per application but leave the other hyper-thread idle. 69 | * Full-L3: Use the whole machine, program allocated an entire L3/socket, program threads allocate an entire core (hyper-threads are left idle). 70 | * Full-SMT-L3: Use the whole machines, programs allocate an entire L3/socket (use hyper-threads). 71 | * Full-cores: Use the whole machine, programs use cores from all sockets interleaved (hyper-threads are left idle). 72 | * Full-SMT-cores: Use the whole machine, programs use cores from all sockets interleaved (hyper-threads are used). 73 | -------------------------------------------------------------------------------- /doc/perf_event_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gz/autoperf/17d5615061392e6fc4ca33e57e689485846e59bb/doc/perf_event_plot.png -------------------------------------------------------------------------------- /doc/timeseries.csv: -------------------------------------------------------------------------------- 1 | INDEX,AVG.ARITH.DIVIDER_ACTIVE,AVG.BACLEARS.ANY,AVG.BR_INST_RETIRED.ALL_BRANCHES,AVG.BR_INST_RETIRED.ALL_BRANCHES_PEBS,AVG.BR_MISP_RETIRED.ALL_BRANCHES,AVG.BR_MISP_RETIRED.ALL_BRANCHES_PEBS,AVG.CORE_POWER.LVL0_TURBO_LICENSE,AVG.CORE_POWER.LVL1_TURBO_LICENSE,AVG.CORE_POWER.LVL2_TURBO_LICENSE,AVG.CORE_POWER.THROTTLE,AVG.CORE_SNOOP_RESPONSE.RSP_IFWDFE,AVG.CORE_SNOOP_RESPONSE.RSP_IFWDM,AVG.CORE_SNOOP_RESPONSE.RSP_IHITFSE,AVG.CORE_SNOOP_RESPONSE.RSP_IHITI,AVG.CORE_SNOOP_RESPONSE.RSP_SFWDFE,AVG.CORE_SNOOP_RESPONSE.RSP_SFWDM,AVG.CORE_SNOOP_RESPONSE.RSP_SHITFSE 2 | 0,5727.107143,440.3035714,8074.25,6999.678571,214.0892857,2807.660714,245537.875,0,0,0,25.07142857,43.85714286,41.28571429,84.51785714,4.589285714,19.69642857,0.017857143 3 | 1,248.8392857,489.3214286,5429.589286,112373.8214,299.6607143,249.0714286,758765.2857,0,0,2828.75,16.46428571,37.67857143,39.89285714,328.3035714,0.464285714,15.57142857,0.660714286 4 | 2,349.6964286,1042.678571,4930.071429,6325.285714,267.5178571,241.0714286,420077.875,0,0,575.5,11.78571429,58.64285714,32.69642857,2207.553571,2.75,21.28571429,0 5 | 3,187.5357143,955.8392857,4236.589286,13601.94643,3865.125,299.75,359782.9107,0,0,1371.857143,11.71428571,1783.821429,1780.035714,19.21428571,0.928571429,16.01785714,2.25 6 | 4,279.75,2824.25,114628.8036,4538.732143,206.5892857,349.9285714,387673.6964,0,0,1884.25,147.3035714,36.08928571,27.23214286,17.98214286,364.7321429,1095.642857,0 7 | 5,201.3214286,664.4285714,7428.482143,5686.553571,295.1071429,234.8214286,1428578.696,0,0,2118.392857,763.2678571,1215.232143,26.19642857,10.51785714,0.607142857,21.83928571,0.107142857 8 | 6,4375.625,668.3571429,11369.17857,4829.142857,546.6428571,2416.357143,284930.3929,0,0,1992.5,12.64285714,34.23214286,29.78571429,11.96428571,1.678571429,16.58928571,0 9 | 7,191.5178571,799.375,5276.285714,113554.2857,626.6607143,260.5892857,294178.4107,0,0,2813.214286,14.875,39.78571429,27.64285714,8.071428571,2.517857143,15.69642857,0.071428571 -------------------------------------------------------------------------------- /src/aggregate.rs: -------------------------------------------------------------------------------- 1 | use csv; 2 | use log::*; 3 | use std::collections::HashMap; 4 | use std::collections::HashSet; 5 | use std::fs; 6 | use std::fs::{File, Metadata}; 7 | use std::io; 8 | use std::io::prelude::*; 9 | use std::path::Path; 10 | use std::path::PathBuf; 11 | use std::process; 12 | use std::str::FromStr; 13 | use toml; 14 | 15 | use crate::util::*; 16 | 17 | use perfcnt::linux::perf_file::PerfFile; 18 | use perfcnt::linux::perf_format::{EventData, EventDesc, EventType}; 19 | 20 | // I have no idea if the perf format guarantees that events appear always in the same order :S 21 | fn verify_events_in_order(events: &Vec, values: &Vec<(u64, Option)>) -> bool { 22 | for (idx, v) in values.iter().enumerate() { 23 | // Don't have id's we can't veryify anything 24 | if v.1.is_none() { 25 | warn!( 26 | "Don't have IDs with the sample values, so we can't tell which event a sample \ 27 | belongs to." 28 | ); 29 | return true; 30 | } 31 | 32 | let id: u64 = v.1.unwrap_or(0); 33 | if !events.get(idx).map_or(false, |ev| ev.ids.contains(&id)) { 34 | return false; 35 | } 36 | } 37 | 38 | return true; 39 | } 40 | 41 | /// Extracts the perf stat file and writes it to a CSV file that looks like this: 42 | /// "EVENT_NAME", "TIME", "SOCKET", "CORE", "CPU", "NODE", "UNIT", "SAMPLE_VALUE" 43 | fn parse_perf_csv_file( 44 | mt: &MachineTopology, 45 | cpus: &Vec<&CpuInfo>, 46 | cpu_filter: Filter, 47 | sockets: &Vec, 48 | breakpoints: &Vec, 49 | path: &Path, 50 | writer: &mut csv::Writer, 51 | ) -> io::Result<()> { 52 | // Check if it's a file: 53 | let meta: Metadata = fs::metadata(path)?; 54 | if !meta.file_type().is_file() { 55 | error!("Not a file {:?}", path); 56 | } 57 | 58 | let mut erronous_events: HashMap = HashMap::new(); 59 | type OutputRow = (String, String, Socket, Core, Cpu, Node, String, u64); 60 | let mut parsed_rows: Vec = Vec::with_capacity(5000); 61 | 62 | // All the sockets this program is running on: 63 | let mut all_sockets: Vec = cpus.iter().map(|c| c.socket).collect(); 64 | all_sockets.sort(); 65 | all_sockets.dedup(); 66 | 67 | // Timestamps for filtering start and end: 68 | let mut start: Option = None; 69 | let mut end: Option = None; 70 | 71 | let mut rdr = csv::Reader::from_file(path) 72 | .unwrap() 73 | .has_headers(false) 74 | .delimiter(b';') 75 | .flexible(true); 76 | for record in rdr.decode() { 77 | if record.is_ok() { 78 | type SourceRow = (f64, String, String, String, String, String, f64); 79 | let (time, cpu, value_string, _, event, _, percent): SourceRow = 80 | record.expect("Should not happen (in is_ok() branch)!"); 81 | 82 | // Perf will just report first CPU on the socket for uncore events, 83 | // so we temporarily encode the location in the event name and 84 | // extract it here again: 85 | let (unit, event_name) = if !event.starts_with("uncore_") { 86 | // Normal case, we just take the regular event and cpu fields from perf stat 87 | (String::from("cpu"), String::from(event.trim())) 88 | } else { 89 | // Uncore events, use first part of the event name as the location 90 | let (unit, name) = event.split_at(event.find(".").unwrap()); 91 | ( 92 | String::from(unit), 93 | String::from(name.trim_start_matches(".").trim()), 94 | ) 95 | }; 96 | 97 | if erronous_events.contains_key(&event_name) { 98 | // Skip already reported, bad events 99 | continue; 100 | } 101 | 102 | if !cpu.starts_with("CPU") { 103 | error!( 104 | "{:?}: Unkown CPU value {}, skipping this row.", 105 | path.as_os_str(), 106 | cpu 107 | ); 108 | continue; 109 | } 110 | 111 | let cpu_nr = match u64::from_str(&cpu[3..].trim()) { 112 | Ok(v) => v, 113 | Err(_e) => { 114 | error!( 115 | "{:?}: CPU value is not a number '{}', skipping this row.", 116 | path.as_os_str(), 117 | cpu 118 | ); 119 | continue; 120 | } 121 | }; 122 | let cpuinfo: &CpuInfo = mt 123 | .cpu(cpu_nr) 124 | .expect("Invalid CPU number (check run.toml or lspcu.csv)"); 125 | 126 | if value_string.trim() == "" { 127 | warn!( 128 | "{:?}: Event '{}' was not counted. This is a bug, please report it!", 129 | path.as_os_str(), 130 | event_name 131 | ); 132 | erronous_events.insert(event_name.clone(), true); 133 | continue; 134 | } 135 | if value_string.trim() == "" { 136 | warn!( 137 | "{:?}: Event '{}' was not measured correctly with perf. This is a bug, please report it!", 138 | path.as_os_str(), 139 | event_name 140 | ); 141 | erronous_events.insert(event_name.clone(), true); 142 | continue; 143 | } 144 | if percent < 91.0 { 145 | error!( 146 | "{:?}: has multiplexed event '{}'. This is a bug, please report it!", 147 | path.as_os_str(), 148 | event_name 149 | ); 150 | erronous_events.insert(event_name.clone(), true); 151 | continue; 152 | } 153 | 154 | let value = u64::from_str(value_string.trim()).expect( 155 | format!("Parsed string '{}' should be a value by now!", value_string).as_str(), 156 | ); 157 | 158 | if breakpoints.len() >= 1 159 | && value == 1 160 | && event_name.ends_with(breakpoints[0].as_str()) 161 | && cpus.iter().any(|c| c.cpu == cpu_nr) 162 | { 163 | if start.is_some() { 164 | error!( 165 | "{:?}: Start breakpoint ({:?}) triggered multiple times.", 166 | path.as_os_str(), 167 | breakpoints[0] 168 | ); 169 | } 170 | start = Some(time) 171 | } 172 | if breakpoints.len() >= 2 173 | && value == 1 174 | && event_name.ends_with(breakpoints[1].as_str()) 175 | && cpus.iter().any(|c| c.cpu == cpu_nr) 176 | { 177 | if end.is_some() { 178 | warn!( 179 | "{:?}: End breakpoint ({:?}) triggered multiple times. Update end \ 180 | breakpoint.", 181 | path.as_os_str(), 182 | breakpoints[1] 183 | ); 184 | } 185 | end = Some(time) 186 | } 187 | 188 | parsed_rows.push(( 189 | event_name, 190 | time.to_string(), 191 | cpuinfo.socket, 192 | cpuinfo.core, 193 | cpu_nr, 194 | cpuinfo.node.node, 195 | unit, 196 | value, 197 | )); 198 | } else { 199 | // Ignore lines that start with # (comments) but fail in case another 200 | // line can not be parsed: 201 | match record.unwrap_err() { 202 | csv::Error::Decode(s) => { 203 | if !s.starts_with("Failed converting '#") { 204 | panic!("Can't decode line {}.", s) 205 | } 206 | } 207 | e => panic!("Unrecoverable error {} while decoding.", e), 208 | }; 209 | } 210 | } 211 | if breakpoints.len() >= 1 && start.is_none() { 212 | error!( 213 | "{:?}: We did not find a trigger for start breakpoint ({:?})", 214 | path.as_os_str(), 215 | breakpoints[0] 216 | ); 217 | } 218 | if breakpoints.len() == 2 && end.is_none() { 219 | warn!( 220 | "{:?}: We did not find a trigger for end breakpoint ({:?})", 221 | path.as_os_str(), 222 | breakpoints[1] 223 | ); 224 | } 225 | if breakpoints.len() == 2 && end.is_some() && start.is_some() { 226 | let start_s = start.unwrap_or(0.0); 227 | let end_s = end.unwrap_or(0.0); 228 | if end_s <= start_s { 229 | error!( 230 | "{:?}: End breakpoint is before start breakpoint ({:?} -- {:?})", 231 | path.as_os_str(), 232 | start, 233 | end 234 | ); 235 | } else if (end_s - start_s) < 1.0 { 236 | warn!("Region of interest very short ({} s)", end_s - start_s); 237 | } 238 | } 239 | 240 | let mut current_index = 0; 241 | let mut time_to_index: HashMap = HashMap::new(); 242 | let mut is_recording: bool = start.is_none(); 243 | let start = start.map(|s| s.to_string()); 244 | let end = end.map(|s| s.to_string()); 245 | for r in parsed_rows { 246 | let (event_name, time, socket, core, cpu, node, unit, value): OutputRow = r; 247 | 248 | // Skip all events before we have the breakpoint 249 | is_recording = match start { 250 | Some(ref start_time) => is_recording || time == start_time.as_str(), 251 | None => true, 252 | }; 253 | is_recording = match end { 254 | Some(ref end_time) => is_recording && time != end_time.as_str(), 255 | None => true, 256 | }; 257 | if !is_recording { 258 | continue; 259 | } 260 | 261 | if erronous_events.contains_key(&event_name) { 262 | // We do two passes here because we may get an erronous event only 263 | // at a later point in time in the CSV file 264 | // (when we already parsed this event a few times) 265 | continue; 266 | } 267 | 268 | if breakpoints.len() > 2 269 | && (event_name.contains(breakpoints[0].as_str()) 270 | || event_name.contains(breakpoints[1].as_str())) 271 | { 272 | // We don't need to breakpoints in the resulting CSV file 273 | continue; 274 | } 275 | 276 | // Skip all events that we don't want to attribute fully to our program: 277 | let include = if unit.trim() == "cpu" { 278 | match cpu_filter { 279 | Filter::All => true, 280 | Filter::Exclusive => cpus.iter().any(|c| c.cpu == cpu), 281 | Filter::Shared => all_sockets.contains(&socket), 282 | Filter::None => false, 283 | } 284 | } else if unit.starts_with("uncore") { 285 | sockets.contains(&socket) 286 | } else { 287 | error!("Unkown unit '{}', not included!", unit); 288 | false 289 | }; 290 | 291 | if !include { 292 | // Skip this event 293 | continue; 294 | } 295 | 296 | if !time_to_index.contains_key(&time) { 297 | time_to_index.insert(time.clone(), current_index); 298 | current_index += 1; 299 | } 300 | 301 | writer 302 | .encode(&[ 303 | event_name.as_str(), 304 | format!("{}", *time_to_index.get(&time).unwrap()).as_str(), 305 | time.as_str(), 306 | socket.to_string().as_str(), 307 | core.to_string().as_str(), 308 | cpu.to_string().as_str(), 309 | node.to_string().as_str(), 310 | unit.as_str(), 311 | value.to_string().as_str(), 312 | ]) 313 | .unwrap(); 314 | } 315 | 316 | Ok(()) 317 | } 318 | 319 | /// Extracts the data and writes it to a CSV file that looks like this: 320 | /// "EVENT_NAME", "INDEX", "TIME", "SOCKET", "CORE", "CPU", "NODE", "UNIT", "SAMPLE_VALUE" 321 | fn parse_perf_file( 322 | path: &Path, 323 | event_names: Vec<&str>, 324 | writer: &mut csv::Writer, 325 | ) -> io::Result<()> { 326 | // Check if it's a file: 327 | let meta: Metadata = fs::metadata(path)?; 328 | if !meta.file_type().is_file() { 329 | error!("Not a file {:?}", path); 330 | } 331 | // TODO: Should just pass Path to PerfFile 332 | let mut file = File::open(path)?; 333 | let mut buf: Vec = Vec::with_capacity(meta.len() as usize); 334 | file.read_to_end(&mut buf)?; 335 | let pf = PerfFile::new(buf); 336 | 337 | // debug!("GroupDescriptions: {:?}", pf.get_group_descriptions()); 338 | // debug!("EventDescription: {:?}", pf.get_event_description()); 339 | 340 | let event_desc = pf.get_event_description().unwrap(); 341 | let event_info: Vec<(&EventDesc, &&str)> = event_desc.iter().zip(event_names.iter()).collect(); 342 | // debug!("Event Infos: {:?}", event_info); 343 | 344 | for e in pf.data() { 345 | if e.header.event_type != EventType::Sample { 346 | continue; 347 | } 348 | 349 | match e.data { 350 | EventData::Sample(rec) => { 351 | // println!("{:?}", rec); 352 | let time = format!("{}", rec.time.unwrap()); 353 | let ptid = rec.ptid.unwrap(); 354 | let _pid = format!("{}", ptid.pid); 355 | let _tid = format!("{}", ptid.tid); 356 | let cpu = format!("{}", rec.cpu.unwrap().cpu); 357 | // let ip = format!("0x{:x}", rec.ip.unwrap()); 358 | 359 | let v = rec.v.unwrap(); 360 | assert!(verify_events_in_order(&event_desc, &v.values)); 361 | // TODO: verify event names match EventDesc in `event_info`! 362 | 363 | for reading in v.values.iter() { 364 | let (event_count, maybe_id) = *reading; 365 | let id = maybe_id.unwrap(); 366 | let &(_, name) = event_info.iter().find(|ev| ev.0.ids.contains(&id)).unwrap(); 367 | let sample_value = format!("{}", event_count); 368 | 369 | writer 370 | .encode(&[name, time.as_str(), cpu.as_str(), sample_value.as_str()]) 371 | .unwrap(); 372 | } 373 | } 374 | _ => unreachable!("Should not happen"), 375 | } 376 | } 377 | 378 | Ok(()) 379 | } 380 | 381 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] 382 | enum Filter { 383 | All, 384 | Exclusive, 385 | Shared, 386 | None, 387 | } 388 | 389 | impl Filter { 390 | fn new(what: &str) -> Filter { 391 | match what { 392 | "all" => Filter::All, 393 | "exclusive" => Filter::Exclusive, 394 | "shared" => Filter::Shared, 395 | "none" => Filter::None, 396 | _ => panic!("clap-rs should ensure nothing else is passed..."), 397 | } 398 | } 399 | } 400 | 401 | pub fn aggregate(path: &Path, cpu_filter: &str, uncore_filter: &str, save_to: &Path) { 402 | if !path.exists() { 403 | error!("Input directory does not exist {:?}", path); 404 | process::exit(1); 405 | } 406 | 407 | let mut lscpu_file: PathBuf = path.to_path_buf(); 408 | lscpu_file.push("lscpu.csv"); 409 | let mut numactl_file: PathBuf = path.to_path_buf(); 410 | numactl_file.push("numactl.dat"); 411 | let mt = MachineTopology::from_files(&lscpu_file, &numactl_file); 412 | 413 | let mut run_config: PathBuf = path.to_path_buf(); 414 | run_config.push("run.toml"); 415 | 416 | let (cpus, breakpoints) = if run_config.as_path().exists() { 417 | let mut file = File::open(run_config.as_path()).expect("run.toml file does not exist?"); 418 | let mut run_string = String::new(); 419 | let _ = file.read_to_string(&mut run_string).unwrap(); 420 | let mut parser = toml::Parser::new(run_string.as_str()); 421 | let doc = match parser.parse() { 422 | Some(doc) => doc, 423 | None => { 424 | error!("Can't parse the run.toml file:\n{:?}", parser.errors); 425 | process::exit(3); 426 | } 427 | }; 428 | 429 | let a: &toml::Table = doc["a"] 430 | .as_table() 431 | .expect("run.toml: 'a' should be a table."); 432 | let deployment: &toml::Table = doc 433 | .get("deployment") 434 | .expect("deployment?") 435 | .as_table() 436 | .expect("run.toml: 'a.deployment' should be a table."); 437 | let cpus: Vec = deployment 438 | .get("a") 439 | .expect("deployment.a") 440 | .as_slice() 441 | .expect("run.tom: 'a.deployment.a' should be an array") 442 | .iter() 443 | .map(|c| { 444 | c.as_table().expect("table")["cpu"] 445 | .as_integer() 446 | .expect("int") as u64 447 | }) 448 | .collect(); 449 | let breakpoints: Vec = a 450 | .get("breakpoints") 451 | .expect("no breakpoints?") 452 | .as_slice() 453 | .expect("breakpoints not an array?") 454 | .iter() 455 | .map(|s| s.as_str().expect("breakpoint not a string?").to_string()) 456 | .collect(); 457 | 458 | (cpus, breakpoints) 459 | } else { 460 | debug!("Couldn't find a run.toml, we include counter values from all CPUs and sockets"); 461 | let cpus: Vec = mt.cores(); 462 | // No breakpoints 463 | let breakpoint: Vec = Vec::new(); 464 | (cpus, breakpoint) 465 | }; 466 | 467 | // All the CPUs this program is (exclusively) running on: 468 | let all_cpus: Vec<&CpuInfo> = cpus 469 | .into_iter() 470 | .map(|c| { 471 | mt.cpu(c) 472 | .expect("Invalid CPU in run.toml or wrong lscpu.csv?") 473 | }) 474 | .collect(); 475 | 476 | // All the sockets this program is running on: 477 | let mut all_sockets: Vec = all_cpus.iter().map(|c| c.socket).collect(); 478 | all_sockets.sort(); 479 | all_sockets.dedup(); 480 | 481 | let uncore_filter = Filter::new(uncore_filter); 482 | let cpu_filter = Filter::new(cpu_filter); 483 | 484 | let mut considered_sockets: Vec = Vec::new(); 485 | // Find out if we should include the uncore events for every socket that we're running on 486 | match uncore_filter { 487 | Filter::Exclusive => { 488 | for socket in all_sockets.into_iter() { 489 | let socket_set: HashSet = 490 | mt.cpus_on_socket(socket).iter().map(|c| c.cpu).collect(); 491 | let program_set: HashSet = all_cpus.iter().map(|c| c.cpu).collect(); 492 | let diff: Vec = socket_set.difference(&program_set).cloned().collect(); 493 | 494 | if diff.len() == 0 { 495 | debug!( 496 | "Uncore from socket {:?} considered since A uses it exclusively.", 497 | socket 498 | ); 499 | considered_sockets.push(socket); 500 | } 501 | } 502 | } 503 | Filter::All => considered_sockets.append(&mut mt.sockets()), 504 | Filter::Shared => { 505 | debug!( 506 | "Uncore from sockets {:?} added since A uses these sockets at least partially.", 507 | all_sockets 508 | ); 509 | considered_sockets.append(&mut all_sockets); 510 | } 511 | Filter::None => debug!("Ignore all uncore events."), 512 | }; 513 | 514 | // Read perf.csv file: 515 | let mut csv_data: PathBuf = path.to_owned(); 516 | csv_data.push("perf.csv"); 517 | let csv_data_path = csv_data.as_path(); 518 | if !csv_data_path.exists() { 519 | error!("File not found: {:?}", csv_data_path); 520 | return; 521 | } 522 | type Row = (String, String, String, String, String, String); 523 | let mut rdr = csv::Reader::from_file(csv_data_path).unwrap(); 524 | let rows = rdr.decode().collect::>>().unwrap(); 525 | 526 | // Create result.csv file: 527 | let csv_result: PathBuf = save_to.to_owned(); 528 | let mut wrtr = csv::Writer::from_file(csv_result.as_path()).unwrap(); 529 | wrtr.encode(&[ 530 | "EVENT_NAME", 531 | "INDEX", 532 | "TIME", 533 | "SOCKET", 534 | "CORE", 535 | "CPU", 536 | "NODE", 537 | "UNIT", 538 | "SAMPLE_VALUE", 539 | ]) 540 | .unwrap(); 541 | 542 | // Write content in result.csv 543 | for row in rows { 544 | let (_, event_names, _, _, file, _) = row; 545 | let _string_names: Vec<&str> = event_names.split(",").collect(); 546 | 547 | let mut perf_data = path.to_owned(); 548 | perf_data.push(&file); 549 | 550 | let file_ext = perf_data 551 | .extension() 552 | .expect("File does not have an extension"); 553 | match file_ext.to_str().unwrap() { 554 | "data" => parse_perf_file( 555 | perf_data.as_path(), 556 | event_names.split(",").collect(), 557 | &mut wrtr, 558 | ) 559 | .unwrap(), 560 | "csv" => parse_perf_csv_file( 561 | &mt, 562 | &all_cpus, 563 | cpu_filter, 564 | &considered_sockets, 565 | &breakpoints, 566 | perf_data.as_path(), 567 | &mut wrtr, 568 | ) 569 | .unwrap(), 570 | _ => panic!("Unknown file extension, I can't parse this."), 571 | }; 572 | } 573 | 574 | info!("Merging completed"); 575 | } 576 | -------------------------------------------------------------------------------- /src/cmd.yml: -------------------------------------------------------------------------------- 1 | name: autoperf 2 | version: "0.2" 3 | author: Gerd Zellweger 4 | about: Use performance counters with little domain knowledge. 5 | args: 6 | - verbose: 7 | short: v 8 | multiple: true 9 | help: Do more verbose logging (-v = info, -vv = debug, -vvv = trace, default is warn). 10 | subcommands: 11 | - profile: 12 | settings: 13 | - TrailingVarArg 14 | about: Measure all available H/W events for a command using `perf stat`. 15 | args: 16 | - dryrun: 17 | short: d 18 | long: dryrun 19 | help: Don't run anything, just print what we would do. 20 | - output: 21 | short: o 22 | long: output 23 | help: Set output directory. 24 | value_name: DIRECTORY 25 | takes_value: true 26 | - COMMAND: 27 | help: Command to execute. 28 | required: true 29 | multiple: true 30 | - pair: 31 | about: Run a set of programs pairwise together on the machine (while measuring all counters). 32 | args: 33 | - dryrun: 34 | short: d 35 | long: dryrun 36 | help: Don't run anything, just print what we would do. 37 | - start: 38 | short: b 39 | long: base 40 | help: Skip the first x configuration (used to evaluate the same manifest simultaneously on multiple identical machines). 41 | takes_value: true 42 | - step: 43 | short: s 44 | long: step 45 | help: Only evaluate every x-th configuration (used to evaluate the same manifest simultaneously on multiple identical machines). 46 | takes_value: true 47 | - directory: 48 | help: Path of manifest directory. 49 | required: true 50 | - aggregate: 51 | about: Merge counter measurements generated with the `profile` or `pair` command in a single CSV file. 52 | args: 53 | - core: 54 | short: c 55 | long: core 56 | takes_value: true 57 | possible_values: [ all, shared, exclusive, none ] 58 | help: Which core events to include (default all, exclusive = only cores where program was running on, none = drop core events). 59 | - uncore: 60 | short: u 61 | long: uncore 62 | takes_value: true 63 | possible_values: [ all, shared, exclusive, none ] 64 | help: Which uncore events to include (default all, exclusive = only uncore units that are exclusively used by program, none = drop uncore events). 65 | - output: 66 | short: o 67 | long: output 68 | help: Set output file (defaults to /results.csv). 69 | value_name: FILENAME 70 | takes_value: true 71 | - directory: 72 | help: Set input directory. 73 | required: true 74 | - stats: 75 | about: Dump statistics about performance events on Intel architectures. 76 | args: 77 | - directory: 78 | help: Where to store the generated output (CSV) files. 79 | required: true 80 | - search: 81 | about: Find performance events potentially not covered in Intel publicly available manuals. 82 | 83 | # 84 | # We don't support these two subcommands at the moment: 85 | # 86 | 87 | # - scale: 88 | # settings: 89 | # - TrailingVarArg 90 | # about: Measure all available H/W events for an application using `perf stat`. Do multiple runs and increase the number of threads. 91 | # args: 92 | # - debug: 93 | # short: d 94 | # long: debug 95 | # help: Print debug information verbosely. 96 | # - dryrun: 97 | # short: r 98 | # long: dryrun 99 | # help: Don't run anything, just print what it would do. 100 | # - directory: 101 | # help: Path of manifest directory. 102 | # required: true 103 | 104 | # - mkgroup: 105 | # settings: 106 | # - TrailingVarArg 107 | # about: Given a machine, form the largest possible group of events from a given ranking input file. 108 | # args: 109 | # - file: 110 | # short: i 111 | # long: input 112 | # help: Input file (weka ranking). 113 | # value_name: FILE 114 | # takes_value: true 115 | -------------------------------------------------------------------------------- /src/counters.toml: -------------------------------------------------------------------------------- 1 | [broadwell] 2 | family = 6 3 | models = [61, 71] 4 | fixed_counters = { CPU = 3 } 5 | programmable_counters = { CPU = 4, ARB = 2, CBO = 2 } 6 | 7 | [broadwellX] 8 | family = 6 9 | models = [79, 86] 10 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 } 11 | programmable_counters = { CPU = 4, UBO = 2, CBO = 4, HA = 4, IMC = 4, IRP = 4, PCU = 4, QPI = 4, R2PCIE = 4, R3QPI = 3, SBO = 4 } 12 | 13 | [haswell] 14 | family = 6 15 | models = [60, 69, 70] 16 | fixed_counters = { CPU = 3 } 17 | programmable_counters = { CPU = 4, ARB = 2, CBO = 2 } 18 | 19 | [haswellX] 20 | family = 6 21 | models = [63] 22 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 } 23 | programmable_counters = { CPU = 4, UBO = 2, CBO = 4, HA = 4, IMC = 4, IRP = 4, PCU = 4, QPI = 4, R2PCIE = 4, R3QPI = 3, SBO = 4 } 24 | 25 | [ivybridge] 26 | family = 6 27 | models = [58] 28 | fixed_counters = { CPU = 3 } 29 | programmable_counters = { CPU = 4, ARB = 2, CBO = 2 } 30 | 31 | [ivybridgeep] 32 | family = 6 33 | models = [62] 34 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 } 35 | programmable_counters = { CPU = 4, UBO = 2, CBO = 4, HA = 4, IMC = 4, IRP = 4, PCU = 4, QPI = 4, R2PCIE = 4, R3QPI = 3, SBO = 4 } 36 | 37 | [skylake] 38 | family = 6 39 | models = [78, 94, 142, 158] 40 | fixed_counters = { CPU = 3 } 41 | programmable_counters = { CPU = 4, CBO = 2, ARB = 2 } 42 | 43 | [skylakeX] 44 | family = 6 45 | models = [85] 46 | fixed_counters = { CPU = 3, UBO = 1, IMC = 1 } 47 | programmable_counters = { CPU = 4, UPI = 4, CHA = 4, IIO = 4, IMC = 4, IRP = 4, M2M = 4, M3UPI = 4, PCU = 4, UBO = 2, CBO = 4 } 48 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::{load_yaml, App}; 2 | use std::path::{Path, PathBuf}; 3 | use std::str::FromStr; 4 | 5 | mod aggregate; 6 | mod mkgroup; 7 | mod pair; 8 | mod profile; 9 | mod scale; 10 | mod search; 11 | mod stats; 12 | mod util; 13 | use log::*; 14 | 15 | use aggregate::aggregate; 16 | use pair::pair; 17 | use profile::profile; 18 | use stats::stats; 19 | 20 | use mkgroup::mkgroup; 21 | use search::print_unknown_events; 22 | 23 | fn setup_logging(lvl: &str) { 24 | use env_logger::Env; 25 | env_logger::from_env(Env::default().default_filter_or(lvl)).init(); 26 | } 27 | 28 | fn main() { 29 | let yaml = load_yaml!("cmd.yml"); 30 | let matches = App::from_yaml(yaml).get_matches(); 31 | 32 | let level = match matches.occurrences_of("verbose") { 33 | 0 => "warn", 34 | 1 => "info", 35 | 2 => "debug", 36 | 3 => "trace", 37 | _ => "trace", 38 | }; 39 | setup_logging(level); 40 | 41 | if let Some(matches) = matches.subcommand_matches("profile") { 42 | let output_path = Path::new(matches.value_of("output").unwrap_or("out")); 43 | let cmd: Vec = matches 44 | .values_of("COMMAND") 45 | .unwrap() 46 | .map(|s| s.to_string()) 47 | .collect(); 48 | 49 | let dryrun: bool = matches.is_present("dryrun"); 50 | profile( 51 | output_path, 52 | ".", 53 | cmd, 54 | Default::default(), 55 | Default::default(), 56 | false, 57 | None, 58 | dryrun, 59 | ); 60 | } 61 | if let Some(matches) = matches.subcommand_matches("aggregate") { 62 | let input_directory = Path::new(matches.value_of("directory").unwrap_or("out")); 63 | let output_path: PathBuf = match matches.value_of("output") { 64 | Some(v) => PathBuf::from(v), 65 | None => { 66 | let mut pb = input_directory.to_path_buf(); 67 | pb.push("results.csv"); 68 | pb 69 | } 70 | }; 71 | let uncore_filter: &str = matches.value_of("uncore").unwrap_or("all"); 72 | let core_filter: &str = matches.value_of("core").unwrap_or("all"); 73 | 74 | aggregate( 75 | input_directory, 76 | core_filter, 77 | uncore_filter, 78 | &output_path.as_path(), 79 | ); 80 | } 81 | if let Some(matches) = matches.subcommand_matches("pair") { 82 | let output_path = Path::new(matches.value_of("directory").unwrap_or("out")); 83 | let start: usize = usize::from_str(matches.value_of("start").unwrap_or("0")).unwrap_or(0); 84 | let stepping: usize = usize::from_str(matches.value_of("step").unwrap_or("1")).unwrap_or(1); 85 | if stepping == 0 { 86 | error!("skip amount must be > 0"); 87 | std::process::exit(1); 88 | } 89 | 90 | let dryrun: bool = matches.is_present("dryrun"); 91 | pair(output_path, dryrun, start, stepping); 92 | } 93 | if let Some(matches) = matches.subcommand_matches("scale") { 94 | let _output_path = Path::new(matches.value_of("directory").unwrap_or("out")); 95 | let _dryrun: bool = matches.is_present("dryrun"); 96 | // scale(output_path, dryrun); 97 | } 98 | if let Some(matches) = matches.subcommand_matches("stats") { 99 | let output_path = Path::new(matches.value_of("directory").unwrap_or("out")); 100 | stats(output_path); 101 | } 102 | if let Some(_matches) = matches.subcommand_matches("search") { 103 | print_unknown_events(); 104 | } 105 | if let Some(matches) = matches.subcommand_matches("mkgroup") { 106 | let ranking_file = Path::new(matches.value_of("file").unwrap_or("notfound")); 107 | mkgroup(ranking_file); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/mkgroup.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use std::path::Path; 4 | 5 | use csv; 6 | use phf::Map; 7 | 8 | use super::profile::{MonitoringUnit, PerfEvent, PerfEventGroup}; 9 | use log::*; 10 | use x86::perfcnt::intel::{events, EventDescription}; 11 | 12 | pub fn mkgroup(ranking_file: &Path) { 13 | let core_counter: &'static Map<&'static str, EventDescription<'static>> = &events().unwrap(); 14 | let uncore_counter: &'static Map<&'static str, EventDescription<'static>> = &events().unwrap(); 15 | 16 | let mut res = HashMap::with_capacity(11); 17 | res.insert(MonitoringUnit::CPU, 4); 18 | res.insert(MonitoringUnit::UBox, 2); 19 | res.insert(MonitoringUnit::CBox, 4); 20 | res.insert(MonitoringUnit::HA, 4); 21 | res.insert(MonitoringUnit::IMC, 4); 22 | res.insert(MonitoringUnit::IRP, 4); 23 | res.insert(MonitoringUnit::PCU, 4); 24 | res.insert(MonitoringUnit::QPI, 4); 25 | res.insert(MonitoringUnit::R2PCIe, 4); 26 | res.insert(MonitoringUnit::R3QPI, 2); // According to the manual this is 3 but then it multiplexes... 27 | res.insert(MonitoringUnit::QPI, 4); // Not in the manual? 28 | 29 | // Accuracy,Config,Error,Event,F1 score,Precision/Recall,Samples,Samples detail,Test App 30 | // Accuracy,Error,Event,F1 score,Precision,Recall,Samples Test 0,Samples Test 1,Samples Test Total,Samples Training 0,Samples Training 1,Samples Training Total,Tested Application,Training Configs 31 | type OutputRow = ( 32 | f64, 33 | String, 34 | String, 35 | f64, 36 | f64, 37 | f64, 38 | String, 39 | String, 40 | String, 41 | String, 42 | ); 43 | let mut rdr = csv::Reader::from_file(ranking_file) 44 | .unwrap() 45 | .has_headers(true); 46 | let mut events_added = HashMap::with_capacity(25); 47 | 48 | let mut group = PerfEventGroup::new(&res); 49 | 50 | for row in rdr.decode() { 51 | let (_, _, feature_name, _, _, _, _, _, _, _): OutputRow = row.unwrap(); 52 | // println!("{:?}", feature_name); 53 | let splits: Vec<&str> = feature_name.splitn(2, ".").collect(); 54 | let event_name = String::from(splits[1]); 55 | let feature_name = String::from(feature_name.clone()); 56 | 57 | let maybe_e: Option<&'static EventDescription> = core_counter.get(event_name.as_str()); 58 | 59 | // If we already measure the event, just return it (in case a feature shows up with AVG. and 60 | // STD.) 61 | if events_added.contains_key(&event_name) { 62 | println!("{}", feature_name); 63 | } else { 64 | // Otherwise, let's see if we can still add it to the group: 65 | match maybe_e { 66 | Some(event) => match group.add_event(PerfEvent(event)) { 67 | Ok(()) => { 68 | events_added.insert(event_name, true); 69 | println!("{}", feature_name); 70 | } 71 | Err(e) => info!( 72 | "Unable to add event: '{}' to {:?} because of '{}'", 73 | event_name, event.unit, e 74 | ), 75 | }, 76 | None => { 77 | let maybe_ue: Option<&'static EventDescription> = 78 | uncore_counter.get(event_name.as_str()); 79 | match maybe_ue { 80 | Some(uncore_event) => match group.add_event(PerfEvent(uncore_event)) { 81 | Ok(()) => { 82 | events_added.insert(event_name, true); 83 | println!("{}", feature_name); 84 | } 85 | Err(e) => info!( 86 | "Unable to add event: '{}' to {:?} because of '{}'", 87 | event_name, uncore_event.unit, e 88 | ), 89 | }, 90 | None => { 91 | // panic!("Didn't find event {} in data set?", event_name); 92 | } 93 | } 94 | } 95 | }; 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/pair.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::fs::File; 3 | use std::io; 4 | use std::io::prelude::*; 5 | use std::path::Path; 6 | use std::path::PathBuf; 7 | use std::process; 8 | use std::process::{Child, Command, Stdio}; 9 | use std::thread; 10 | 11 | use std::fmt; 12 | 13 | use itertools::{iproduct, Itertools}; 14 | use rustc_serialize::Encodable; 15 | use std::time::Duration; 16 | use wait_timeout::ChildExt; 17 | 18 | use log::*; 19 | use toml; 20 | 21 | use super::profile; 22 | use super::util::*; 23 | 24 | fn get_hostname() -> Option { 25 | use libc::gethostname; 26 | 27 | let mut buf: [i8; 64] = [0; 64]; 28 | let err = unsafe { gethostname(buf.as_mut_ptr(), buf.len()) }; 29 | 30 | if err != 0 { 31 | info!( 32 | "Can't read the hostname with gethostname: {}", 33 | io::Error::last_os_error() 34 | ); 35 | return None; 36 | } 37 | 38 | // find the first 0 byte (i.e. just after the data that gethostname wrote) 39 | let actual_len = buf.iter().position(|byte| *byte == 0).unwrap_or(buf.len()); 40 | let c_str: Vec = buf[..actual_len].into_iter().map(|i| *i as u8).collect(); 41 | 42 | Some(String::from_utf8(c_str).unwrap()) 43 | } 44 | 45 | #[derive(Debug, RustcEncodable)] 46 | struct Deployment<'a> { 47 | description: &'static str, 48 | a: Vec<&'a CpuInfo>, 49 | b: Vec<&'a CpuInfo>, 50 | mem: Vec, 51 | } 52 | 53 | impl<'a> Deployment<'a> { 54 | pub fn new( 55 | desc: &'static str, 56 | half_a: Vec<&'a CpuInfo>, 57 | half_b: Vec<&'a CpuInfo>, 58 | mem: Vec, 59 | ) -> Deployment<'a> { 60 | Deployment { 61 | description: desc, 62 | a: half_a, 63 | b: half_b, 64 | mem: mem, 65 | } 66 | } 67 | 68 | /// Split by just simply interleaving everything 69 | /// TODO: this only works because we make assumption on how CpuInfo is ordered.. 70 | pub fn split_interleaved( 71 | desc: &'static str, 72 | possible_groupings: Vec>, 73 | size: u64, 74 | ) -> Deployment<'a> { 75 | let mut cpus = possible_groupings.into_iter().last().unwrap(); 76 | 77 | let cpus_len = cpus.len(); 78 | assert!(cpus_len % 2 == 0); 79 | 80 | let upper_half = cpus.split_off(cpus_len / 2); 81 | let lower_half = cpus; 82 | 83 | let mut node: NodeInfo = lower_half[0].node; 84 | node.memory = size; 85 | 86 | Deployment::new(desc, lower_half, upper_half, vec![node]) 87 | } 88 | 89 | /// Split but makes sure a group shares the SMT threads 90 | pub fn split_smt_aware( 91 | desc: &'static str, 92 | possible_groupings: Vec>, 93 | size: u64, 94 | ) -> Deployment<'a> { 95 | let cpus = possible_groupings.into_iter().last().unwrap(); 96 | let cpus_len = cpus.len(); 97 | assert!(cpus_len % 2 == 0); 98 | 99 | let mut cores: Vec = cpus.iter().map(|c| c.core).collect(); 100 | assert!(cores.len() % 2 == 0); 101 | cores.sort(); 102 | cores.dedup(); 103 | 104 | let mut upper_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2); 105 | let mut lower_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2); 106 | 107 | for (i, core) in cores.into_iter().enumerate() { 108 | let cpus_on_core: Vec<&&CpuInfo> = cpus.iter().filter(|c| c.core == core).collect(); 109 | if i % 2 == 0 { 110 | lower_half.extend(cpus_on_core.into_iter()); 111 | } else { 112 | upper_half.extend(cpus_on_core.into_iter()); 113 | } 114 | } 115 | 116 | let mut node: NodeInfo = lower_half[0].node; 117 | node.memory = size; 118 | 119 | Deployment::new(desc, lower_half, upper_half, vec![node]) 120 | } 121 | 122 | /// Split but makes sure a group shares the SMT threads 123 | pub fn split_l3_aware( 124 | desc: &'static str, 125 | possible_groupings: Vec>, 126 | size: u64, 127 | ) -> Deployment<'a> { 128 | let cpus = possible_groupings.into_iter().last().unwrap(); 129 | let cpus_len = cpus.len(); 130 | assert!(cpus_len % 2 == 0); 131 | 132 | let mut l3s: Vec = cpus.iter().map(|c| c.l3).collect(); 133 | assert!(l3s.len() % 2 == 0); 134 | l3s.sort(); 135 | l3s.dedup(); 136 | 137 | let mut upper_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2); 138 | let mut lower_half: Vec<&CpuInfo> = Vec::with_capacity(cpus_len / 2); 139 | 140 | for (i, l3) in l3s.into_iter().enumerate() { 141 | let cpus_on_l3: Vec<&&CpuInfo> = cpus.iter().filter(|c| c.l3 == l3).collect(); 142 | if i % 2 == 0 { 143 | upper_half.extend(cpus_on_l3.into_iter()); 144 | } else { 145 | lower_half.extend(cpus_on_l3.into_iter()); 146 | } 147 | } 148 | 149 | let mut node: NodeInfo = lower_half[0].node; 150 | node.memory = size; 151 | 152 | Deployment::new(desc, lower_half, upper_half, vec![node]) 153 | } 154 | } 155 | 156 | impl<'a> fmt::Display for Deployment<'a> { 157 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 158 | let a: Vec = self.a.iter().map(|c| c.cpu).collect(); 159 | let b: Vec = self.b.iter().map(|c| c.cpu).collect(); 160 | 161 | write!(f, "Deployment Plan for {}:\n", self.description)?; 162 | write!(f, "-- Program A cores: {:?}\n", a)?; 163 | write!(f, "-- Program B cores: {:?}\n", b)?; 164 | write!(f, "-- Use memory:\n")?; 165 | for n in self.mem.iter() { 166 | write!(f, " - On node {}: {} Bytes\n", n.node, n.memory)?; 167 | } 168 | Ok(()) 169 | } 170 | } 171 | 172 | #[derive(Debug, RustcEncodable)] 173 | struct Program<'a> { 174 | name: String, 175 | manifest_path: &'a Path, 176 | binary: String, 177 | working_dir: String, 178 | args: Vec, 179 | env: Vec<(String, String)>, 180 | antagonist_args: Vec, 181 | breakpoints: Vec, 182 | checkpoints: Vec, 183 | is_openmp: bool, 184 | is_parsec: bool, 185 | use_watch_repeat: bool, 186 | alone: bool, 187 | } 188 | 189 | impl<'a> Program<'a> { 190 | fn from_toml( 191 | manifest_path: &'a Path, 192 | config: &toml::Table, 193 | alone_default: bool, 194 | ) -> Program<'a> { 195 | let name: String = config["name"] 196 | .as_str() 197 | .expect("program.name not a string") 198 | .to_string(); 199 | let binary: String = config["binary"] 200 | .as_str() 201 | .expect("program.binary not a string") 202 | .to_string(); 203 | 204 | let default_working_dir = String::from(manifest_path.to_str().unwrap()); 205 | let working_dir: String = config 206 | .get("working_dir") 207 | .map_or(default_working_dir.clone(), |v| { 208 | v.as_str() 209 | .expect("program.working_dir not a string") 210 | .to_string() 211 | }) 212 | .replace("$MANIFEST_DIR", default_working_dir.as_str()); 213 | 214 | let openmp: bool = config.get("openmp").map_or(false, |v| { 215 | v.as_bool().expect("'program.openmp' should be boolean") 216 | }); 217 | let parsec: bool = config.get("parsec").map_or(false, |v| { 218 | v.as_bool().expect("'program.parsec' should be boolean") 219 | }); 220 | let watch_repeat: bool = config.get("use_watch_repeat").map_or(false, |v| { 221 | v.as_bool() 222 | .expect("'program.use_watch_repeat' should be boolean") 223 | }); 224 | let alone: bool = config.get("alone").map_or(alone_default, |v| { 225 | v.as_bool().expect("'program.alone' should be boolean") 226 | }); 227 | let args: Vec = config["arguments"] 228 | .as_slice() 229 | .expect("program.arguments not an array?") 230 | .iter() 231 | .map(|s| { 232 | s.as_str() 233 | .expect("program1 argument not a string?") 234 | .to_string() 235 | }) 236 | .collect(); 237 | let antagonist_args: Vec = 238 | config 239 | .get("antagonist_arguments") 240 | .map_or(args.clone(), |v| { 241 | v.as_slice() 242 | .expect("program.antagonist_arguments not an array?") 243 | .iter() 244 | .map(|s| { 245 | s.as_str() 246 | .expect("program2 argument not a string?") 247 | .to_string() 248 | }) 249 | .collect() 250 | }); 251 | let env: Vec<(String, String)> = config.get("env").map_or(Vec::new(), |t| { 252 | t.as_table() 253 | .expect("program.env not a table?") 254 | .iter() 255 | .map(|(k, v)| { 256 | ( 257 | k.as_str().to_string(), 258 | v.as_str() 259 | .expect("env value needs to be a string") 260 | .to_string(), 261 | ) 262 | }) 263 | .collect() 264 | }); 265 | 266 | let breakpoints: Vec = config.get("breakpoints").map_or(Vec::new(), |bs| { 267 | bs.as_slice() 268 | .expect("program.breakpoints not an array?") 269 | .iter() 270 | .map(|s| { 271 | s.as_str() 272 | .expect("program breakpoint not a string?") 273 | .to_string() 274 | }) 275 | .collect() 276 | }); 277 | // TODO: this is currently not in use (remove?) 278 | let checkpoints: Vec = config.get("checkpoints").map_or(Vec::new(), |cs| { 279 | cs.as_slice() 280 | .expect("program.checkpoints not an array?") 281 | .iter() 282 | .map(|s| { 283 | s.as_str() 284 | .expect("program checkpoint not a string?") 285 | .to_string() 286 | }) 287 | .collect() 288 | }); 289 | 290 | Program { 291 | name: name, 292 | manifest_path: manifest_path, 293 | binary: binary, 294 | is_openmp: openmp, 295 | is_parsec: parsec, 296 | env: env, 297 | alone: alone, 298 | working_dir: working_dir, 299 | use_watch_repeat: watch_repeat, 300 | args: args, 301 | antagonist_args: antagonist_args, 302 | breakpoints: breakpoints, 303 | // TODO: this is currently not in use (remove?) 304 | checkpoints: checkpoints, 305 | } 306 | } 307 | 308 | fn get_cmd(&self, antagonist: bool, cores: &Vec<&CpuInfo>) -> Vec { 309 | let nthreads = cores.len(); 310 | let mut cmd = vec![&self.binary]; 311 | 312 | if !antagonist { 313 | cmd.extend(self.args.iter()); 314 | } else { 315 | cmd.extend(self.antagonist_args.iter()); 316 | } 317 | 318 | cmd.iter() 319 | .map(|s| s.replace("$NUM_THREADS", format!("{}", nthreads).as_str())) 320 | .map(|s| { 321 | s.replace( 322 | "$MANIFEST_DIR", 323 | format!("{}", self.manifest_path.to_str().unwrap()).as_str(), 324 | ) 325 | }) 326 | .collect() 327 | } 328 | 329 | fn get_env(&self, antagonist: bool, cores: &Vec<&CpuInfo>) -> Vec<(String, String)> { 330 | let mut env: Vec<(String, String)> = Vec::with_capacity(2); 331 | let cpus: Vec = cores.iter().map(|c| format!("{}", c.cpu)).collect(); 332 | // TODO: remove this feature: 333 | if self.is_openmp { 334 | env.push((String::from("OMP_PROC_BIND"), String::from("true"))); 335 | env.push(( 336 | String::from("OMP_PLACES"), 337 | format!("{{{}}}", cpus.join(",")), 338 | )); 339 | } 340 | // TODO: remove this feature: 341 | else if self.is_parsec { 342 | assert!(!self.is_openmp); 343 | env.push(( 344 | String::from("LD_PRELOAD"), 345 | format!( 346 | "{}/bin/libhooks.so.0.0.0", 347 | self.manifest_path.to_str().unwrap() 348 | ), 349 | )); 350 | env.push((String::from("PARSEC_CPU_NUM"), format!("{}", cpus.len()))); 351 | env.push(( 352 | String::from("PARSEC_CPU_BASE"), 353 | format!("{}", cpus.join(",")), 354 | )); 355 | if antagonist { 356 | env.push((String::from("PARSEC_REPEAT"), String::from("1"))); 357 | } 358 | } 359 | 360 | // keep this one: 361 | for (k, v) in self.env.clone() { 362 | env.push((k, v)); 363 | } 364 | 365 | env 366 | } 367 | } 368 | 369 | #[derive(RustcEncodable)] 370 | struct Run<'a> { 371 | manifest_path: &'a Path, 372 | output_path: PathBuf, 373 | a: &'a Program<'a>, 374 | b: Option<&'a Program<'a>>, 375 | deployment: &'a Deployment<'a>, 376 | } 377 | 378 | impl<'a> Run<'a> { 379 | fn new( 380 | manifest_path: &'a Path, 381 | output_path: &'a Path, 382 | a: &'a Program<'a>, 383 | b: Option<&'a Program<'a>>, 384 | deployment: &'a Deployment, 385 | ) -> Run<'a> { 386 | let mut out_dir = output_path.to_path_buf(); 387 | out_dir.push(deployment.description); 388 | mkdir(&out_dir); 389 | match b { 390 | Some(p) => out_dir.push(format!("{}_vs_{}", a.name, p.name)), 391 | None => out_dir.push(a.name.as_str()), 392 | } 393 | 394 | Run { 395 | manifest_path: manifest_path, 396 | output_path: out_dir, 397 | a: a, 398 | b: b, 399 | deployment: deployment, 400 | } 401 | } 402 | 403 | fn profile_a(&self) -> io::Result<()> { 404 | let cmd = self.a.get_cmd(false, &self.deployment.a); 405 | let env = self.a.get_env(false, &self.deployment.a); 406 | let mut bps: Vec = self.a.breakpoints.iter().map(|s| s.to_string()).collect(); 407 | // TODO: this is currently not in use (remove?) 408 | bps.extend(self.a.checkpoints.iter().map(|s| s.to_string())); 409 | // let cps = self.a.checkpoints.iter().map(|s| s.to_string()).collect(); 410 | 411 | debug!( 412 | "Spawning {:?} with environment {:?} breakpoints {:?}", 413 | cmd, env, bps 414 | ); 415 | profile::profile( 416 | &self.output_path, 417 | self.a.working_dir.as_str(), 418 | cmd, 419 | env, 420 | bps, 421 | false, 422 | None, 423 | false, 424 | ); 425 | Ok(()) 426 | } 427 | 428 | fn start_b(&mut self) -> Option { 429 | self.b.map(|b| { 430 | let mut command_args = b.get_cmd(true, &self.deployment.b); 431 | let env = b.get_env(true, &self.deployment.b); 432 | if b.use_watch_repeat { 433 | command_args.insert(0, String::from("-t")); 434 | command_args.insert(0, String::from("-n0")); 435 | command_args.insert(0, String::from("watch")); 436 | } 437 | 438 | debug!("Spawning {:?} with environment {:?}", command_args, env); 439 | debug!("Working dir for B is: {}", b.working_dir.as_str()); 440 | 441 | let mut cmd = Command::new(&command_args[0]); 442 | let cmd = cmd 443 | .stdout(Stdio::piped()) 444 | .current_dir(b.working_dir.as_str()) 445 | .stderr(Stdio::piped()) 446 | .args(&command_args[1..]); 447 | 448 | // Add the environment: 449 | for (key, value) in env { 450 | cmd.env(key, value); 451 | } 452 | 453 | match cmd.spawn() { 454 | Ok(child) => child, 455 | Err(_) => panic!("Can't spawn program B"), 456 | } 457 | }) 458 | } 459 | 460 | fn save_output(&self, filename: &str, what: &mut T) -> io::Result<()> { 461 | let mut stdout = String::new(); 462 | what.read_to_string(&mut stdout)?; 463 | let mut stdout_path = self.output_path.clone(); 464 | stdout_path.push(filename); 465 | let mut f = File::create(stdout_path.as_path())?; 466 | f.write_all(stdout.as_bytes()) 467 | } 468 | 469 | fn save_run_information(&self) -> io::Result<()> { 470 | let mut run_toml_path = self.output_path.clone(); 471 | run_toml_path.push("run.toml"); 472 | let mut f = File::create(run_toml_path.as_path())?; 473 | let mut e = toml::Encoder::new(); 474 | self.encode(&mut e).unwrap(); 475 | f.write_all(toml::encode_str(&e.toml).as_bytes())?; 476 | 477 | let mut run_txt_path = self.output_path.clone(); 478 | run_txt_path.push("run.txt"); 479 | let mut f = File::create(run_txt_path.as_path())?; 480 | f.write_all(format!("{}", self).as_bytes()) 481 | } 482 | 483 | fn is_completed(&self) -> bool { 484 | // Is this run already done (in case we restart): 485 | let mut completed_file: PathBuf = self.output_path.to_path_buf(); 486 | completed_file.push("completed"); 487 | if completed_file.exists() { 488 | true 489 | } else { 490 | false 491 | } 492 | } 493 | 494 | fn profile(&mut self) -> io::Result<()> { 495 | mkdir(&self.output_path); 496 | if self.is_completed() { 497 | warn!( 498 | "Run {} already completed, skipping.", 499 | self.output_path.to_string_lossy() 500 | ); 501 | return Ok(()); 502 | } 503 | 504 | self.save_run_information()?; 505 | 506 | // Profile together with B 507 | let maybe_app_b: Option = self.start_b(); 508 | if maybe_app_b.is_some() { 509 | debug!("Wait for B to warmup before starting to profile A"); 510 | let one_min = Duration::from_millis(60000); 511 | thread::sleep(one_min); 512 | } 513 | 514 | self.profile_a()?; 515 | 516 | match maybe_app_b { 517 | Some(mut app_b) => { 518 | match app_b.wait_timeout(Duration::from_millis(200)).unwrap() { 519 | Some(status) => { 520 | // The Application B has already exited, this means it probably crashed 521 | // while we were profiling (bad). We can't use these results. 522 | app_b 523 | .stdout 524 | .map(|mut c| self.save_output("B_stdout.txt", &mut c)); 525 | app_b 526 | .stderr 527 | .map(|mut c| self.save_output("B_stderr.txt", &mut c)); 528 | 529 | let mut completed_path = self.output_path.clone(); 530 | completed_path.push("completed"); 531 | fs::remove_file(completed_path)?; 532 | 533 | panic!( 534 | "B has crashed during measurements {:?}. This is bad.", 535 | status.code() 536 | ); 537 | // TODO: save error code and continue (?) 538 | } 539 | None => { 540 | app_b.kill()?; 541 | app_b.wait()?; 542 | app_b 543 | .stdout 544 | .map(|mut c| self.save_output("B_stdout.txt", &mut c)); 545 | app_b 546 | .stderr 547 | .map(|mut c| self.save_output("B_stderr.txt", &mut c)); 548 | } 549 | } 550 | } 551 | None => {} 552 | }; 553 | 554 | Ok(()) 555 | } 556 | } 557 | 558 | impl<'a> fmt::Display for Run<'a> { 559 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 560 | write!( 561 | f, 562 | "A: ENV = {:?} CMD = {:?}\n", 563 | self.a.get_env(false, &self.deployment.a), 564 | self.a.get_cmd(false, &self.deployment.a) 565 | )?; 566 | write!(f, "A Breakpoints: {:?}\n", self.a.breakpoints)?; 567 | write!(f, "A Checkpoints: {:?}\n", self.a.checkpoints)?; 568 | match self.b { 569 | Some(b) => { 570 | write!( 571 | f, 572 | "B: {:?} {:?}\n", 573 | b.get_env(true, &self.deployment.b), 574 | b.get_cmd(true, &self.deployment.b) 575 | )?; 576 | write!(f, "{}", &self.deployment)?; 577 | } 578 | None => { 579 | write!(f, "No other program running.")?; 580 | } 581 | } 582 | Ok(()) 583 | } 584 | } 585 | 586 | pub fn pair(manifest_folder: &Path, dryrun: bool, start: usize, stepping: usize) { 587 | let canonical_manifest_path = 588 | fs::canonicalize(&manifest_folder).expect("canonicalize manifest path does not work"); 589 | 590 | let mut out_dir = canonical_manifest_path.to_path_buf(); 591 | let hostname = get_hostname().unwrap_or(String::from("unknown")); 592 | out_dir.push(hostname); 593 | mkdir(&out_dir); 594 | 595 | let mt = MachineTopology::new(); 596 | 597 | let mut manifest: PathBuf = canonical_manifest_path.to_path_buf(); 598 | manifest.push("manifest.toml"); 599 | let mut file = File::open(manifest.as_path()).expect("manifest.toml file does not exist?"); 600 | let mut manifest_string = String::new(); 601 | let _ = file.read_to_string(&mut manifest_string).unwrap(); 602 | let mut parser = toml::Parser::new(manifest_string.as_str()); 603 | let doc = match parser.parse() { 604 | Some(doc) => doc, 605 | None => { 606 | error!("Can't parse the manifest file:\n{:?}", parser.errors); 607 | process::exit(1); 608 | } 609 | }; 610 | let experiment: &toml::Table = doc["experiment"] 611 | .as_table() 612 | .expect("Error in manifest.toml: 'experiment' should be a table."); 613 | let configuration: &[toml::Value] = experiment["configurations"] 614 | .as_slice() 615 | .expect("Error in manifest.toml: 'configuration' attribute should be a list."); 616 | let configs: Vec = configuration 617 | .iter() 618 | .map(|s| { 619 | s.as_str() 620 | .expect("configuration elements should be strings") 621 | .to_string() 622 | }) 623 | .collect(); 624 | let run_alone: bool = experiment 625 | .get("alone") 626 | .map_or(true, |v| v.as_bool().expect("'alone' should be boolean")); 627 | let profile_only: Option> = experiment.get("profile_only_a").map(|progs| { 628 | progs 629 | .as_slice() 630 | .expect("Error in manifest.toml: 'profile_only_a' should be a list.") 631 | .into_iter() 632 | .map(|p| { 633 | p.as_str() 634 | .expect("profile_only_a elements should name programs (strings)") 635 | .to_string() 636 | }) 637 | .collect() 638 | }); 639 | let profile_only_b: Option> = experiment.get("profile_only_b").map(|progs| { 640 | progs 641 | .as_slice() 642 | .expect("Error in manifest.toml: 'profile_only_b' should be a list.") 643 | .into_iter() 644 | .map(|p| { 645 | p.as_str() 646 | .expect("profile_only_b elements should name programs (strings)") 647 | .to_string() 648 | }) 649 | .collect() 650 | }); 651 | 652 | let mut programs: Vec = Vec::with_capacity(2); 653 | for (key, _value) in &doc { 654 | if key.starts_with("program") { 655 | let program_desc: &toml::Table = doc[key] 656 | .as_table() 657 | .expect("Error in manifest.toml: 'program' should be a table."); 658 | programs.push(Program::from_toml( 659 | &canonical_manifest_path, 660 | program_desc, 661 | run_alone, 662 | )); 663 | } 664 | } 665 | 666 | let mut deployments: Vec = Vec::with_capacity(4); 667 | for config in configs { 668 | match config.as_str() { 669 | "L1-SMT" => deployments.push(Deployment::split_interleaved( 670 | "L1-SMT", 671 | mt.same_l1(), 672 | mt.l1_size().unwrap_or(0), 673 | )), 674 | "L3-SMT" => deployments.push(Deployment::split_interleaved( 675 | "L3-SMT", 676 | mt.same_l3(), 677 | mt.l3_size().unwrap_or(0), 678 | )), 679 | "L3-SMT-cores" => deployments.push(Deployment::split_smt_aware( 680 | "L3-SMT-cores", 681 | mt.same_l3(), 682 | mt.l3_size().unwrap_or(0), 683 | )), 684 | "L3-cores" => deployments.push(Deployment::split_smt_aware( 685 | "L3-cores", 686 | mt.same_l3_cores(), 687 | mt.l3_size().unwrap_or(0), 688 | )), 689 | "Full-L3" => deployments.push(Deployment::split_l3_aware( 690 | "Full-L3", 691 | mt.whole_machine_cores(), 692 | mt.l3_size().unwrap_or(0), 693 | )), 694 | "Full-SMT-L3" => deployments.push(Deployment::split_l3_aware( 695 | "Full-SMT-L3", 696 | mt.whole_machine(), 697 | mt.l3_size().unwrap_or(0), 698 | )), 699 | "Full-cores" => deployments.push(Deployment::split_interleaved( 700 | "Full-cores", 701 | mt.whole_machine_cores(), 702 | mt.l3_size().unwrap_or(0), 703 | )), 704 | "Full-SMT-cores" => deployments.push(Deployment::split_smt_aware( 705 | "Full-SMT-cores", 706 | mt.whole_machine(), 707 | mt.l3_size().unwrap_or(0), 708 | )), 709 | 710 | _ => error!("Ignored unknown deployment config '{}'.", config), 711 | }; 712 | } 713 | 714 | // Add all possible pairs: 715 | let mut pairs: Vec<(&Program, Option<&Program>)> = Vec::new(); 716 | for p in programs.iter() { 717 | pairs.push((p, None)); 718 | } 719 | for (a, b) in iproduct!(programs.iter(), programs.iter()) { 720 | pairs.push((a, Some(b))); 721 | } 722 | 723 | // Filter out the pairs we do not want to execute: 724 | let mut runs: Vec = Vec::new(); 725 | for (a, b) in pairs.into_iter() { 726 | let profile_a = profile_only 727 | .as_ref() 728 | .map_or(true, |ps| ps.contains(&a.name)); 729 | let profile_b = !b.is_none() 730 | && profile_only_b 731 | .as_ref() 732 | .map_or(profile_a, |ps| ps.contains(&b.unwrap().name)); 733 | if !profile_a && !profile_b { 734 | continue; 735 | } 736 | 737 | for d in deployments.iter() { 738 | if b.is_none() && (!run_alone || !a.alone) { 739 | continue; 740 | } 741 | runs.push(Run::new( 742 | &canonical_manifest_path, 743 | out_dir.as_path(), 744 | a, 745 | b, 746 | d, 747 | )); 748 | } 749 | } 750 | 751 | // Finally, profile the runs we are supposed to execute based on the command line args 752 | let mut i = 0; 753 | for run in runs.iter_mut().skip(start).step(stepping) { 754 | if !dryrun { 755 | run.profile().ok(); 756 | } else { 757 | warn!("Dryrun mode: {}", run); 758 | } 759 | i += 1; 760 | } 761 | 762 | info!("{} runs completed.", i); 763 | } 764 | -------------------------------------------------------------------------------- /src/profile.rs: -------------------------------------------------------------------------------- 1 | use std; 2 | use std::collections::HashMap; 3 | 4 | use csv; 5 | use lazy_static::lazy_static; 6 | use pbr::ProgressBar; 7 | use std::error; 8 | use std::error::Error; 9 | use std::fmt; 10 | use std::fs; 11 | use std::fs::File; 12 | use std::io::prelude::*; 13 | use std::path::Path; 14 | use std::path::PathBuf; 15 | use std::process::Command; 16 | use std::str::FromStr; 17 | use x86::cpuid; 18 | use x86::perfcnt::intel::{events, Counter, EventDescription, MSRIndex, PebsType, Tuple}; 19 | 20 | use super::util::*; 21 | use log::*; 22 | 23 | lazy_static! { 24 | 25 | /// Check if HT is enabled on this CPU (if HT is disabled it doubles the amount of available 26 | /// performance counters on a core). 27 | static ref HT_AVAILABLE: bool = { 28 | let cpuid = cpuid::CpuId::new(); 29 | cpuid.get_extended_topology_info().unwrap().any(|t| { 30 | t.level_type() == cpuid::TopologyType::SMT 31 | }) 32 | }; 33 | 34 | /// For every MonitoringUnit try to figure out how many counters we support. 35 | /// This is handled through a config file since Linux doesn't export this information in 36 | /// it's PMU devices (but probably should)... 37 | static ref PMU_COUNTERS: HashMap = { 38 | let cpuid = cpuid::CpuId::new(); 39 | let cpu_counter = cpuid.get_performance_monitoring_info().map_or(0, |info| info.number_of_counters()) as usize; 40 | let mut res = HashMap::with_capacity(11); 41 | res.insert(MonitoringUnit::CPU, cpu_counter); 42 | let (family, model) = cpuid.get_feature_info().map_or((0,0), |fi| (fi.family_id(), ((fi.extended_model_id() as u8) << 4) | fi.model_id() as u8)); 43 | 44 | let ctr_config = include_str!("counters.toml"); 45 | let mut parser = toml::Parser::new(ctr_config); 46 | 47 | let doc = match parser.parse() { 48 | Some(doc) => doc, 49 | None => { 50 | error!("Can't parse the counter configuration file:\n{:?}", parser.errors); 51 | std::process::exit(9); 52 | } 53 | }; 54 | 55 | trace!("Trying to find architecture for family = {:#x} model = {:#x}", family, model); 56 | let mut found: bool = false; 57 | for (name, architecture) in doc { 58 | let architecture = architecture.as_table().expect("counters.toml architectures must be a table"); 59 | let cfamily = &architecture["family"]; 60 | for cmodel in architecture["models"].as_slice().expect("counters.toml models must be a list.") { 61 | let cfamily = cfamily.as_integer().expect("Family must be int.") as u8; 62 | let cmodel = cmodel.as_integer().expect("Model must be int.") as u8; 63 | if family == cfamily && model == cmodel { 64 | trace!("Running on {}, reading MonitoringUnit limits from config", name); 65 | found = true; 66 | 67 | // TODO: We should ideally get both, prgrammable and fixed counters: 68 | for (unit, limit) in architecture["programmable_counters"].as_table().expect("programmable_counters must be a table") { 69 | let unit = MonitoringUnit::new(unit.as_str()); 70 | let limit = limit.as_integer().expect("Counter limit should be an integer"); 71 | res.insert(unit, limit as usize); 72 | } 73 | } 74 | } 75 | } 76 | 77 | if !found { 78 | warn!("Didn't recogize this architecture so we can't infer #counters for MonitoringUnit (Please update counters.toml for family = {:#x} model = {:#x})", family, model); 79 | res.insert(MonitoringUnit::UBox, 4); 80 | res.insert(MonitoringUnit::HA, 4); 81 | res.insert(MonitoringUnit::IRP, 4); 82 | res.insert(MonitoringUnit::PCU, 4); 83 | res.insert(MonitoringUnit::R2PCIe, 4); 84 | res.insert(MonitoringUnit::R3QPI, 4); 85 | res.insert(MonitoringUnit::QPI, 4); 86 | res.insert(MonitoringUnit::CBox, 2); 87 | res.insert(MonitoringUnit::IMC, 4); 88 | res.insert(MonitoringUnit::Arb, 2); 89 | res.insert(MonitoringUnit::M2M, 4); 90 | res.insert(MonitoringUnit::CHA, 4); 91 | res.insert(MonitoringUnit::M3UPI, 4); 92 | res.insert(MonitoringUnit::IIO, 4); 93 | res.insert(MonitoringUnit::UPI_LL, 4); 94 | } 95 | 96 | res 97 | }; 98 | 99 | /// Find the linux PMU devices that we need to program through perf 100 | static ref PMU_DEVICES: Vec = { 101 | let paths = fs::read_dir("/sys/bus/event_source/devices/").expect("Can't read devices directory."); 102 | let mut devices = Vec::with_capacity(15); 103 | for p in paths { 104 | let path = p.expect("Is not a path."); 105 | let file_name = path.file_name().into_string().expect("Is valid UTF-8 string."); 106 | devices.push(file_name); 107 | } 108 | 109 | devices 110 | }; 111 | 112 | /// Bogus or clocks that we don't want to measure or tend to break things 113 | static ref IGNORE_EVENTS: HashMap<&'static str, bool> = { 114 | let mut ignored = HashMap::with_capacity(1); 115 | ignored.insert("UNC_CLOCK.SOCKET", true); // Just says 'fixed' and does not name which counter :/ 116 | ignored.insert("UNC_M_CLOCKTICKS_F", true); 117 | ignored.insert("UNC_U_CLOCKTICKS", true); 118 | ignored 119 | }; 120 | 121 | /// Which events should be measured in isolation on this architecture. 122 | static ref ISOLATE_EVENTS: Vec<&'static str> = { 123 | let cpuid = cpuid::CpuId::new(); 124 | let (family, model) = cpuid.get_feature_info().map_or((0,0), |fi| (fi.family_id(), ((fi.extended_model_id() as u8) << 4) | fi.model_id() as u8)); 125 | 126 | // Sometimes the perfmon data is missing the errata information 127 | // as is the case for IvyBridge where MEM_LOAD* things can't be measured 128 | // together with other things. 129 | if family == 0x6 && (model == 62 || model == 58) { 130 | vec![ "MEM_UOPS_RETIRED.ALL_STORES", 131 | "MEM_LOAD_UOPS_RETIRED.L1_MISS", 132 | "MEM_LOAD_UOPS_RETIRED.HIT_LFB", 133 | "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", 134 | "MEM_LOAD_UOPS_RETIRED.L2_HIT", 135 | "MEM_UOPS_RETIRED.SPLIT_LOADS", 136 | "MEM_UOPS_RETIRED.ALL_LOADS", 137 | "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM", 138 | "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", 139 | "MEM_LOAD_UOPS_RETIRED.L1_HIT", 140 | "MEM_UOPS_RETIRED.STLB_MISS_STORES", 141 | "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", 142 | "MEM_LOAD_UOPS_RETIRED.LLC_MISS", 143 | "MEM_LOAD_UOPS_RETIRED.L2_MISS", 144 | "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", 145 | "MEM_UOPS_RETIRED.STLB_MISS_LOADS", 146 | "MEM_UOPS_RETIRED.LOCK_LOADS", 147 | "MEM_LOAD_UOPS_RETIRED.LLC_HIT", 148 | "MEM_UOPS_RETIRED.SPLIT_STORES", 149 | // Those are IvyBridge-EP events: 150 | "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM", 151 | "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM", 152 | "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD"] 153 | } 154 | else { 155 | vec![] 156 | } 157 | }; 158 | } 159 | 160 | fn execute_perf( 161 | perf: &mut Command, 162 | cmd: &Vec, 163 | counters: &Vec, 164 | datafile: &Path, 165 | dryrun: bool, 166 | ) -> (String, String, String) { 167 | assert!(cmd.len() >= 1); 168 | let perf = perf.arg("-o").arg(datafile.as_os_str()); 169 | let events: Vec = counters.iter().map(|c| format!("-e {}", c)).collect(); 170 | 171 | let perf = perf.args(events.as_slice()); 172 | let perf = perf.args(cmd.as_slice()); 173 | let perf_cmd_str: String = format!("{:?}", perf).replace("\"", ""); 174 | 175 | let (stdout, stderr) = if !dryrun { 176 | match perf.output() { 177 | Ok(out) => { 178 | let stdout = 179 | String::from_utf8(out.stdout).unwrap_or(String::from("Unable to read stdout!")); 180 | let stderr = 181 | String::from_utf8(out.stderr).unwrap_or(String::from("Unable to read stderr!")); 182 | 183 | if out.status.success() { 184 | trace!("stdout:\n{:?}", stdout); 185 | trace!("stderr:\n{:?}", stderr); 186 | } else if !out.status.success() { 187 | error!( 188 | "perf command: {} got unknown exit status was: {}", 189 | perf_cmd_str, out.status 190 | ); 191 | debug!("stdout:\n{}", stdout); 192 | debug!("stderr:\n{}", stderr); 193 | } 194 | 195 | if !datafile.exists() { 196 | error!( 197 | "perf command: {} succeeded but did not produce the required file {:?} \ 198 | (you should file a bug report!)", 199 | perf_cmd_str, datafile 200 | ); 201 | } 202 | 203 | (stdout, stderr) 204 | } 205 | Err(err) => { 206 | error!("Executing {} failed : {}", perf_cmd_str, err); 207 | (String::new(), String::new()) 208 | } 209 | } 210 | } else { 211 | warn!("Dry run mode -- would execute: {}", perf_cmd_str); 212 | (String::new(), String::new()) 213 | }; 214 | 215 | (perf_cmd_str, stdout, stderr) 216 | } 217 | 218 | pub fn create_out_directory(out_dir: &Path) { 219 | if !out_dir.exists() { 220 | std::fs::create_dir(out_dir).expect("Can't create `out` directory"); 221 | } 222 | } 223 | 224 | pub fn get_known_events<'a>() -> Vec<&'a EventDescription<'static>> { 225 | events() 226 | .expect("No performance events found?") 227 | .values() 228 | .collect() 229 | } 230 | 231 | #[allow(non_camel_case_types)] 232 | #[derive(Hash, Eq, PartialEq, Debug, Copy, Clone, PartialOrd, Ord)] 233 | pub enum MonitoringUnit { 234 | /// Devices 235 | CPU, 236 | /// Memory stuff 237 | Arb, 238 | /// The CBox manages the interface between the core and the LLC, so 239 | /// the instances of uncore CBox is equal to number of cores 240 | CBox, 241 | /// ??? 242 | SBox, 243 | /// ??? 244 | UBox, 245 | /// QPI Stuff 246 | QPI, 247 | /// Ring to QPI 248 | R3QPI, 249 | /// IIO Coherency 250 | IRP, 251 | /// Ring to PCIe 252 | R2PCIe, 253 | /// Memory Controller 254 | IMC, 255 | /// Home Agent 256 | HA, 257 | /// Power Control Unit 258 | PCU, 259 | /// XXX 260 | M2M, 261 | /// XXX 262 | CHA, 263 | /// XXX 264 | M3UPI, 265 | /// XXX 266 | IIO, 267 | /// XXX 268 | UPI_LL, 269 | /// Types we don't know how to handle... 270 | Unknown, 271 | } 272 | 273 | impl fmt::Display for MonitoringUnit { 274 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 275 | match *self { 276 | MonitoringUnit::CPU => write!(f, "CPU"), 277 | MonitoringUnit::Arb => write!(f, "Arb"), 278 | MonitoringUnit::CBox => write!(f, "CBox"), 279 | MonitoringUnit::SBox => write!(f, "SBox"), 280 | MonitoringUnit::UBox => write!(f, "UBox"), 281 | MonitoringUnit::QPI => write!(f, "QPI"), 282 | MonitoringUnit::R3QPI => write!(f, "R3QPI"), 283 | MonitoringUnit::IRP => write!(f, "IRP"), 284 | MonitoringUnit::R2PCIe => write!(f, "R2PCIe"), 285 | MonitoringUnit::IMC => write!(f, "IMC"), 286 | MonitoringUnit::HA => write!(f, "HA"), 287 | MonitoringUnit::PCU => write!(f, "PCU"), 288 | MonitoringUnit::M2M => write!(f, "M2M"), 289 | MonitoringUnit::CHA => write!(f, "CHA"), 290 | MonitoringUnit::M3UPI => write!(f, "M3UPI"), 291 | MonitoringUnit::IIO => write!(f, "IIO"), 292 | MonitoringUnit::UPI_LL => write!(f, "UPI LL"), 293 | MonitoringUnit::Unknown => write!(f, "Unknown"), 294 | } 295 | } 296 | } 297 | 298 | impl MonitoringUnit { 299 | fn new<'a>(unit: &'a str) -> MonitoringUnit { 300 | match unit.to_lowercase().as_str() { 301 | "cpu" => MonitoringUnit::CPU, 302 | "cbo" => MonitoringUnit::CBox, 303 | "qpi_ll" => MonitoringUnit::QPI, 304 | "sbo" => MonitoringUnit::SBox, 305 | "imph-u" => MonitoringUnit::Arb, 306 | "arb" => MonitoringUnit::Arb, 307 | "r3qpi" => MonitoringUnit::R3QPI, 308 | "qpi ll" => MonitoringUnit::QPI, 309 | "irp" => MonitoringUnit::IRP, 310 | "r2pcie" => MonitoringUnit::R2PCIe, 311 | "imc" => MonitoringUnit::IMC, 312 | "ha" => MonitoringUnit::HA, 313 | "pcu" => MonitoringUnit::PCU, 314 | "ubox" => MonitoringUnit::UBox, 315 | "m2m" => MonitoringUnit::M2M, 316 | "cha" => MonitoringUnit::CHA, 317 | "m3upi" => MonitoringUnit::M3UPI, 318 | "iio" => MonitoringUnit::IIO, 319 | "upi ll" => MonitoringUnit::UPI_LL, 320 | "upi" => MonitoringUnit::UPI_LL, 321 | "ubo" => MonitoringUnit::UBox, 322 | "qpi" => MonitoringUnit::QPI, 323 | _ => { 324 | error!("Don't support MonitoringUnit {}", unit); 325 | MonitoringUnit::Unknown 326 | } 327 | } 328 | } 329 | 330 | pub fn to_intel_event_description(&self) -> Option<&'static str> { 331 | match *self { 332 | MonitoringUnit::CPU => None, 333 | MonitoringUnit::CBox => Some("CBO"), 334 | MonitoringUnit::QPI => Some("QPI_LL"), 335 | MonitoringUnit::SBox => Some("SBO"), 336 | MonitoringUnit::Arb => Some("ARB"), 337 | MonitoringUnit::R3QPI => Some("R3QPI"), 338 | MonitoringUnit::IRP => Some("IRP"), 339 | MonitoringUnit::R2PCIe => Some("R2PCIE"), 340 | MonitoringUnit::IMC => Some("IMC"), 341 | MonitoringUnit::HA => Some("HA"), 342 | MonitoringUnit::PCU => Some("PCU"), 343 | MonitoringUnit::UBox => Some("UBOX"), 344 | MonitoringUnit::M2M => Some("M2M"), 345 | MonitoringUnit::CHA => Some("CHA"), 346 | MonitoringUnit::M3UPI => Some("M3UPI"), 347 | MonitoringUnit::IIO => Some("IIO"), 348 | MonitoringUnit::UPI_LL => Some("UPI LL"), 349 | MonitoringUnit::Unknown => None, 350 | } 351 | } 352 | 353 | /// Return the perf prefix for selecting the right PMU unit in case of uncore counters. 354 | pub fn to_perf_prefix(&self) -> Option<&'static str> { 355 | let res = match *self { 356 | MonitoringUnit::CPU => Some("cpu"), 357 | MonitoringUnit::CBox => Some("uncore_cbox"), 358 | MonitoringUnit::QPI => Some("uncore_qpi"), 359 | MonitoringUnit::SBox => Some("uncore_sbox"), 360 | MonitoringUnit::Arb => Some("uncore_arb"), 361 | MonitoringUnit::R3QPI => Some("uncore_r3qpi"), // Adds postfix value 362 | MonitoringUnit::IRP => Some("uncore_irp"), // According to libpfm4 (lib/pfmlib_intel_ivbep_unc_irp.c) 363 | MonitoringUnit::R2PCIe => Some("uncore_r2pcie"), 364 | MonitoringUnit::IMC => Some("uncore_imc"), // Adds postfix value 365 | MonitoringUnit::HA => Some("uncore_ha"), // Adds postfix value 366 | MonitoringUnit::PCU => Some("uncore_pcu"), 367 | MonitoringUnit::UBox => Some("uncore_ubox"), 368 | MonitoringUnit::M2M => Some("uncore_m2m"), // Adds postfix value 369 | MonitoringUnit::CHA => Some("uncore_cha"), // Adds postfix value 370 | MonitoringUnit::M3UPI => Some("uncore_m3upi"), // Adds postfix value 371 | MonitoringUnit::IIO => Some("uncore_iio"), // Adds postfix value 372 | MonitoringUnit::UPI_LL => Some("uncore_upi"), // Adds postfix value 373 | MonitoringUnit::Unknown => None, 374 | }; 375 | 376 | // Note: If anything here does not return uncore_ as a prefix, you need to update extract.rs! 377 | res.map(|string| assert!(string.starts_with("uncore_") || string.starts_with("cpu"))); 378 | 379 | res 380 | } 381 | } 382 | 383 | #[derive(Debug)] 384 | pub struct PerfEvent<'a, 'b>(pub &'a EventDescription<'b>) 385 | where 386 | 'b: 'a; 387 | 388 | impl<'a, 'b> PerfEvent<'a, 'b> { 389 | /// Returns all possible configurations of the event. 390 | /// This is a two vector tuple containing devices and configs: 391 | /// 392 | /// * Devices are a subset of the ones listed in `/sys/bus/event_source/devices/` 393 | /// Usually just `cpu` but uncore events can be measured on multiple devices. 394 | /// * Configs are all possible combinations of attributes for this event. 395 | /// Usually one but offcore events have two. 396 | /// 397 | /// # Note 398 | /// The assumption of the return type is that we can always match any 399 | /// device with any config. Let's see how long this assumption will remain valid... 400 | /// 401 | pub fn perf_configs(&self) -> (Vec, Vec>) { 402 | let mut devices = Vec::with_capacity(1); 403 | let mut configs = Vec::with_capacity(2); 404 | 405 | let typ = self.unit(); 406 | 407 | // XXX: Horrible vector transformation: 408 | let matched_devices: Vec = PMU_DEVICES 409 | .iter() 410 | .filter(|d| typ.to_perf_prefix().map_or(false, |t| d.starts_with(t))) 411 | .map(|d| d.clone()) 412 | .collect(); 413 | devices.extend(matched_devices); 414 | 415 | // We can have no devices if we don't understand how to match the unit name to perf names: 416 | if devices.len() == 0 { 417 | debug!( 418 | "Unit {:?} is not available to measure '{}'.", 419 | self.unit(), 420 | self, 421 | ); 422 | } 423 | 424 | for args in self.perf_args() { 425 | configs.push(args); 426 | } 427 | 428 | (devices, configs) 429 | } 430 | 431 | /// Does this event use the passed code? 432 | pub fn uses_event_code(&self, event_code: u8) -> bool { 433 | match self.0.event_code { 434 | Tuple::One(e1) => e1 == event_code, 435 | Tuple::Two(e1, e2) => e1 == event_code || e2 == event_code, 436 | } 437 | } 438 | 439 | /// Does this event use the passed code? 440 | pub fn uses_umask(&self, umask: u8) -> bool { 441 | match self.0.umask { 442 | Tuple::One(m1) => m1 == umask, 443 | Tuple::Two(m1, m2) => m1 == umask || m2 == umask, 444 | } 445 | } 446 | 447 | /// Is this event an uncore event? 448 | pub fn is_uncore(&self) -> bool { 449 | self.0.unit.is_some() 450 | } 451 | 452 | pub fn unit(&self) -> MonitoringUnit { 453 | self.0 454 | .unit 455 | .map_or(MonitoringUnit::CPU, |u| MonitoringUnit::new(u)) 456 | } 457 | 458 | /// Is this event an offcore event? 459 | pub fn is_offcore(&self) -> bool { 460 | match self.0.event_code { 461 | Tuple::One(_) => { 462 | assert!(!self.0.offcore); 463 | false 464 | } 465 | Tuple::Two(_, _) => { 466 | assert!(self.0.event_name.contains("OFFCORE")); 467 | // The OR is because there is this weird meta-event OFFCORE_RESPONSE 468 | // in the data files. It has offcore == false and is not really a proper event :/ 469 | assert!(self.0.offcore || self.0.event_name == "OFFCORE_RESPONSE"); 470 | true 471 | } 472 | } 473 | } 474 | 475 | /// Get the correct counter mask 476 | pub fn counter(&self) -> Counter { 477 | if *HT_AVAILABLE || self.is_uncore() { 478 | self.0.counter 479 | } else { 480 | self.0.counter_ht_off.expect("A bug in JSON?") // Ideally, all CPU events should have this attribute 481 | } 482 | } 483 | 484 | fn push_arg(configs: &mut Vec>, value: String) { 485 | for config in configs.iter_mut() { 486 | config.push(value.clone()); 487 | } 488 | } 489 | 490 | /// Returns a set of attributes used to build the perf event description. 491 | /// 492 | /// # Arguments 493 | /// * try_alternative: Can give a different event encoding (for offcore events). 494 | fn perf_args(&self) -> Vec> { 495 | // OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1 provide identical functionality. The reason 496 | // that there are two of them is that these events are associated with a separate MSR that is 497 | // used to program the types of requests/responses that you want to count (instead of being 498 | // able to include this information in the Umask field of the PERFEVT_SELx MSR). The 499 | // performance counter event OFFCORE_RESPONSE_0 (Event 0xB7) is associated with MSR 0x1A6, 500 | // while the performance counter event OFFCORE_RESPONSE_1 (Event 0xBB) is associated with MSR 501 | // 0x1A7. 502 | // So having two events (with different associated MSRs) allows you to count two different 503 | // offcore response events at the same time. 504 | // Source: https://software.intel.com/en-us/forums/software-tuning-performance-optimization-platform-monitoring/topic/559227 505 | 506 | let two_configs: bool = match self.0.event_code { 507 | Tuple::One(_) => false, 508 | Tuple::Two(_, _) => true, 509 | }; 510 | 511 | let mut ret: Vec> = vec![Vec::with_capacity(7)]; 512 | if two_configs { 513 | ret.push(Vec::with_capacity(7)); 514 | } 515 | PerfEvent::push_arg(&mut ret, format!("name={}", self.0.event_name)); 516 | 517 | let is_pcu = self.0.unit.map_or(false, |u| { 518 | return MonitoringUnit::new(u) == MonitoringUnit::PCU; 519 | }); 520 | 521 | match self.0.event_code { 522 | Tuple::One(ev) => { 523 | // PCU events have umasks defined but they're OR'd with event (wtf) 524 | let pcu_umask = if is_pcu { 525 | match self.0.umask { 526 | Tuple::One(mask) => mask, 527 | Tuple::Two(_m1, _m2) => unreachable!(), 528 | } 529 | } else { 530 | 0x0 531 | }; 532 | 533 | ret[0].push(format!("event=0x{:x}", ev | pcu_umask)); 534 | } 535 | Tuple::Two(e1, e2) => { 536 | assert!(two_configs); 537 | assert!(!is_pcu); 538 | ret[0].push(format!("event=0x{:x}", e1)); 539 | ret[1].push(format!("event=0x{:x}", e2)); 540 | } 541 | }; 542 | 543 | if !is_pcu { 544 | match self.0.umask { 545 | Tuple::One(mask) => { 546 | PerfEvent::push_arg(&mut ret, format!("umask=0x{:x}", mask)); 547 | } 548 | Tuple::Two(m1, m2) => { 549 | assert!(two_configs); 550 | ret[0].push(format!("umask=0x{:x}", m1)); 551 | ret[1].push(format!("umask=0x{:x}", m2)); 552 | } 553 | }; 554 | } 555 | 556 | if self.0.counter_mask != 0 { 557 | PerfEvent::push_arg(&mut ret, format!("cmask=0x{:x}", self.0.counter_mask)); 558 | } 559 | 560 | if self.0.fc_mask != 0 { 561 | PerfEvent::push_arg(&mut ret, format!("fc_mask=0x{:x}", self.0.fc_mask)); 562 | } 563 | 564 | if self.0.port_mask != 0 { 565 | PerfEvent::push_arg(&mut ret, format!("ch_mask=0x{:x}", self.0.port_mask)); 566 | } 567 | 568 | if self.0.offcore { 569 | PerfEvent::push_arg(&mut ret, format!("offcore_rsp=0x{:x}", self.0.msr_value)); 570 | } else { 571 | match self.0.msr_index { 572 | MSRIndex::One(0x3F6) => { 573 | PerfEvent::push_arg(&mut ret, format!("ldlat=0x{:x}", self.0.msr_value)); 574 | } 575 | MSRIndex::One(0x1A6) => { 576 | PerfEvent::push_arg(&mut ret, format!("offcore_rsp=0x{:x}", self.0.msr_value)); 577 | } 578 | MSRIndex::One(0x1A7) => { 579 | PerfEvent::push_arg(&mut ret, format!("offcore_rsp=0x{:x}", self.0.msr_value)); 580 | } 581 | MSRIndex::One(0x3F7) => { 582 | PerfEvent::push_arg(&mut ret, format!("frontend=0x{:x}", self.0.msr_value)); 583 | } 584 | MSRIndex::One(a) => { 585 | unreachable!("Unknown MSR value {}, check linux/latest/source/tools/perf/pmu-events/jevents.c", a) 586 | } 587 | MSRIndex::Two(_, _) => { 588 | unreachable!("Should not have non offcore events with two MSR index values.") 589 | } 590 | MSRIndex::None => { 591 | // ignored, not a load latency event 592 | } 593 | }; 594 | } 595 | 596 | if self.0.invert { 597 | PerfEvent::push_arg(&mut ret, String::from("inv=1")); 598 | } 599 | 600 | if self.0.edge_detect { 601 | PerfEvent::push_arg(&mut ret, String::from("edge=1")); 602 | } 603 | 604 | if self.0.any_thread { 605 | PerfEvent::push_arg(&mut ret, String::from("any=1")); 606 | } 607 | 608 | if self.match_filter("CBoFilter0[23:17]") { 609 | PerfEvent::push_arg(&mut ret, String::from("filter_state=0x1f")); 610 | } 611 | 612 | if self.match_filter("CBoFilter1[15:0]") { 613 | // TODO: Include both sockets by default -- we should probably be smarter... 614 | PerfEvent::push_arg(&mut ret, String::from("filter_nid=0x3")); 615 | } 616 | 617 | if self.match_filter("CBoFilter1[28:20]") { 618 | // TOR events requires filter_opc 619 | // Set to: 0x192 PrefData Prefetch Data into LLC but don’t pass to L2. Includes Hints 620 | PerfEvent::push_arg(&mut ret, String::from("filter_opc=0x192")); 621 | } 622 | 623 | ret 624 | } 625 | 626 | pub fn perf_qualifiers(&self) -> String { 627 | let qualifiers = String::from("S"); 628 | if self.0.pebs == PebsType::PebsOrRegular { 629 | // Adding 'p' for PebsOrRegular event doesnt seem to work 630 | // for many events in perf that Intel regards as PEBS capable events 631 | // (see issue #2) 632 | } else if self.0.pebs == PebsType::PebsOnly { 633 | // Adding a 'p' here seems counterproducive (perf won't measure the events then) 634 | // so we do nothing 635 | } 636 | qualifiers 637 | } 638 | 639 | fn filters(&self) -> Vec<&str> { 640 | self.0.filter.map_or(Vec::new(), |value| { 641 | value 642 | .split(",") 643 | .map(|x| x.trim()) 644 | .filter(|x| x.len() > 0) 645 | .collect() 646 | }) 647 | } 648 | 649 | pub fn match_filter(&self, filter: &str) -> bool { 650 | self.filters().contains(&filter) 651 | } 652 | } 653 | 654 | impl<'a, 'b> fmt::Display for PerfEvent<'a, 'b> { 655 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 656 | write!(f, "{}", self.0.event_name) 657 | } 658 | } 659 | 660 | /// Adding a new event to a group of existing events (that can be measured 661 | /// together) can fail for a variety of reasons which are encoded in this type. 662 | #[derive(Debug)] 663 | pub enum AddEventError { 664 | /// We couldn't measure any more offcore events 665 | OffcoreCapacityReached, 666 | /// We don't have more counters left on this monitoring unit 667 | UnitCapacityReached(MonitoringUnit), 668 | /// We have a constraint that we can't measure the new event together with 669 | /// an existing event in the group 670 | CounterConstraintConflict, 671 | /// We have a conflict with filters 672 | FilterConstraintConflict, 673 | /// The errata specifies an issue with this event (we tend to isolate these) 674 | ErrataConflict, 675 | /// This counter must be measured alone 676 | TakenAloneConflict, 677 | /// This is one of these events that we manually specified to be isolated 678 | IsolatedEventConflict, 679 | } 680 | 681 | impl fmt::Display for AddEventError { 682 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 683 | match *self { 684 | AddEventError::OffcoreCapacityReached => write!(f, "Offcore event limit reached."), 685 | AddEventError::UnitCapacityReached(u) => { 686 | write!(f, "Unit '{}' capacity for reached.", u) 687 | } 688 | AddEventError::CounterConstraintConflict => write!(f, "Counter constraints conflict."), 689 | AddEventError::FilterConstraintConflict => write!(f, "Filter constraints conflict."), 690 | AddEventError::ErrataConflict => write!(f, "Errata conflict."), 691 | AddEventError::TakenAloneConflict => write!(f, "Group contains a taken alone counter."), 692 | AddEventError::IsolatedEventConflict => write!(f, "Group contains an isolated event."), 693 | } 694 | } 695 | } 696 | 697 | impl error::Error for AddEventError { 698 | fn description(&self) -> &str { 699 | match *self { 700 | AddEventError::OffcoreCapacityReached => "Offcore event limit reached.", 701 | AddEventError::UnitCapacityReached(_) => "Unit capacity reached.", 702 | AddEventError::CounterConstraintConflict => "Counter constraints conflict.", 703 | AddEventError::FilterConstraintConflict => "Filter constraints conflict.", 704 | AddEventError::ErrataConflict => "Errata conflict.", 705 | AddEventError::TakenAloneConflict => "Group contains a taken alone counter.", 706 | AddEventError::IsolatedEventConflict => "Group contains an isolated event.", 707 | } 708 | } 709 | } 710 | 711 | #[derive(Debug)] 712 | pub struct PerfEventGroup<'a, 'b> 713 | where 714 | 'b: 'a, 715 | { 716 | events: Vec>, 717 | limits: &'a HashMap, 718 | } 719 | 720 | impl<'a, 'b> PerfEventGroup<'a, 'b> { 721 | /// Make a new performance event group. 722 | pub fn new(unit_sizes: &'a HashMap) -> PerfEventGroup { 723 | PerfEventGroup { 724 | events: Default::default(), 725 | limits: unit_sizes, 726 | } 727 | } 728 | 729 | /// Returns how many offcore events are in the group. 730 | fn offcore_events(&self) -> usize { 731 | self.events.iter().filter(|e| e.is_offcore()).count() 732 | } 733 | 734 | /// Returns how many uncore events are in the group for a given unit. 735 | fn events_by_unit(&self, unit: MonitoringUnit) -> Vec<&PerfEvent> { 736 | self.events.iter().filter(|e| e.unit() == unit).collect() 737 | } 738 | 739 | /// Backtracking algorithm to find assigment of events to available counters 740 | /// while respecting the counter constraints every event has. 741 | /// The events passed here should all have the same counter type 742 | /// (i.e., either all programmable or all fixed) and the same unit. 743 | /// 744 | /// Returns a possible placement or None if no assignment was possible. 745 | fn find_counter_assignment( 746 | level: usize, 747 | max_level: usize, 748 | events: Vec<&'a PerfEvent<'a, 'b>>, 749 | assignment: Vec<&'a PerfEvent<'a, 'b>>, 750 | ) -> Option>> { 751 | // Are we done yet? 752 | if events.len() == 0 { 753 | return Some(assignment); 754 | } 755 | // Are we too deep? 756 | if level >= max_level { 757 | return None; 758 | } 759 | 760 | for (idx, event) in events.iter().enumerate() { 761 | let mask: usize = match event.counter() { 762 | Counter::Programmable(mask) => mask as usize, 763 | Counter::Fixed(mask) => mask as usize, 764 | }; 765 | 766 | let mut assignment = assignment.clone(); 767 | let mut events = events.clone(); 768 | 769 | // If event supports counter, let's assign it to this counter and go deeper 770 | if (mask & (1 << level)) > 0 { 771 | assignment.push(event); 772 | events.remove(idx); 773 | let ret = PerfEventGroup::find_counter_assignment( 774 | level + 1, 775 | max_level, 776 | events, 777 | assignment, 778 | ); 779 | if ret.is_some() { 780 | return ret; 781 | } 782 | } 783 | // Otherwise let's not assign the event at this level and go deeper (for groups that 784 | // don't use all counters) 785 | else { 786 | let ret = PerfEventGroup::find_counter_assignment( 787 | level + 1, 788 | max_level, 789 | events, 790 | assignment, 791 | ); 792 | if ret.is_some() { 793 | return ret; 794 | } 795 | } 796 | // And finally, just try with the next event in the list 797 | } 798 | 799 | None 800 | } 801 | 802 | /// Check if this event conflicts with the counter requirements 803 | /// of events already in this group 804 | fn has_counter_constraint_conflicts(&self, new_event: &PerfEvent) -> bool { 805 | let unit = new_event.unit(); 806 | let unit_limit = *self.limits.get(&unit).unwrap_or(&0); 807 | //error!("unit = {:?} unit_limit {:?}", unit, unit_limit); 808 | 809 | // Get all the events that share the same counters as new_event: 810 | let mut events: Vec<&PerfEvent> = self 811 | .events_by_unit(unit) 812 | .into_iter() 813 | .filter(|c| match (c.counter(), new_event.counter()) { 814 | (Counter::Programmable(_), Counter::Programmable(_)) => true, 815 | (Counter::Fixed(_), Counter::Fixed(_)) => true, 816 | _ => false, 817 | }) 818 | .collect(); 819 | 820 | events.push(new_event); 821 | PerfEventGroup::find_counter_assignment(0, unit_limit, events, Vec::new()).is_none() 822 | } 823 | 824 | /// Check if this events conflicts with the filter requirements of 825 | /// events already in this group 826 | fn has_filter_constraint_conflicts(&self, new_event: &PerfEvent) -> bool { 827 | let unit = new_event.unit(); 828 | let events: Vec<&PerfEvent> = self.events_by_unit(unit); 829 | 830 | for event in events.iter() { 831 | for filter in event.filters() { 832 | if new_event.filters().contains(&filter) { 833 | return true; 834 | } 835 | } 836 | } 837 | 838 | false 839 | } 840 | 841 | /// Try to add an event to an event group. 842 | /// 843 | /// Returns true if the event can be added to the group, false if we would be Unable 844 | /// to measure the event in the same group (given the PMU limitations). 845 | /// 846 | /// Things we consider correctly right now: 847 | /// * Fixed amount of counters per monitoring unit (so we don't multiplex). 848 | /// * Some events can only use some counters. 849 | /// * Taken alone attribute of the events. 850 | /// 851 | /// Things we consider not entirely correct right now: 852 | /// * Event Erratas this is not complete in the JSON files, and we just run them in isolation 853 | /// 854 | pub fn add_event(&mut self, event: PerfEvent<'a, 'b>) -> Result<(), AddEventError> { 855 | // 1. Can't measure more than two offcore events: 856 | if event.is_offcore() && self.offcore_events() == 2 { 857 | return Err(AddEventError::OffcoreCapacityReached); 858 | } 859 | 860 | // 2. Check we don't measure more events than we have counters 861 | // for on the repspective units 862 | let unit = event.unit(); 863 | let unit_limit = *self.limits.get(&unit).unwrap_or(&0); 864 | if self.events_by_unit(unit).len() >= unit_limit { 865 | return Err(AddEventError::UnitCapacityReached(unit)); 866 | } 867 | 868 | // 3. Now, consider the counter <-> event mapping constraints: 869 | // Try to see if there is any event already in the group 870 | // that would conflict when running together with the new `event`: 871 | if self.has_counter_constraint_conflicts(&event) { 872 | return Err(AddEventError::CounterConstraintConflict); 873 | } 874 | 875 | if self.has_filter_constraint_conflicts(&event) { 876 | return Err(AddEventError::FilterConstraintConflict); 877 | } 878 | 879 | // 4. Isolate things that have erratas to not screw other events (see HSW30) 880 | let errata = self.events.iter().any(|cur| cur.0.errata.is_some()); 881 | if errata || event.0.errata.is_some() && self.events.len() != 0 { 882 | return Err(AddEventError::ErrataConflict); 883 | } 884 | 885 | // 5. If an event has the taken alone attribute set it needs to be measured alone 886 | let already_have_taken_alone_event = self.events.iter().any(|cur| cur.0.taken_alone); 887 | if already_have_taken_alone_event || event.0.taken_alone && self.events.len() != 0 { 888 | return Err(AddEventError::TakenAloneConflict); 889 | } 890 | 891 | // 6. If our own isolate event list contains the name we also run them alone: 892 | let already_have_isolated_event = self.events.get(0).map_or(false, |e| { 893 | ISOLATE_EVENTS.iter().any(|cur| *cur == e.0.event_name) 894 | }); 895 | if already_have_isolated_event 896 | || ISOLATE_EVENTS.iter().any(|cur| *cur == event.0.event_name) && self.events.len() != 0 897 | { 898 | return Err(AddEventError::IsolatedEventConflict); 899 | } 900 | 901 | self.events.push(event); 902 | Ok(()) 903 | } 904 | 905 | /// Find the right config to use for every event in the group. 906 | /// 907 | /// * We need to make sure we use the correct config if we have two offcore events in the same group. 908 | pub fn get_perf_config(&self) -> Vec { 909 | let mut event_strings: Vec = Vec::with_capacity(2); 910 | let mut have_one_offcore = false; // Have we already added one offcore event? 911 | 912 | for event in self.events.iter() { 913 | let (devices, mut configs) = event.perf_configs(); 914 | 915 | if devices.len() == 0 || configs.len() == 0 { 916 | error!( 917 | "Event {} supported by hardware, but your Linux does not allow you to measure it (available PMU devices = {:?})", 918 | event, devices 919 | ); 920 | 921 | continue; 922 | } 923 | 924 | // TODO: handle fixed counters 925 | // fixed_counters = { 926 | // "inst_retired.any": (0xc0, 0, 0), 927 | // "cpu_clk_unhalted.thread": (0x3c, 0, 0), 928 | // "cpu_clk_unhalted.thread_any": (0x3c, 0, 1), 929 | // } 930 | 931 | // Adding offcore event: 932 | if event.is_offcore() { 933 | assert!(devices.len() == 1); 934 | assert!(configs.len() == 2); 935 | assert!(devices[0] == "cpu"); 936 | 937 | let config = match have_one_offcore { 938 | false => configs.get(0).unwrap(), // Ok, always has at least one config 939 | true => configs.get(1).unwrap(), // Ok, as offcore implies two configs 940 | }; 941 | 942 | event_strings.push(format!( 943 | "{}/{}/{}", 944 | devices[0], 945 | config.join(","), 946 | event.perf_qualifiers() 947 | )); 948 | have_one_offcore = true; 949 | } 950 | // Adding uncore event: 951 | else if event.is_uncore() { 952 | assert!(configs.len() == 1); 953 | 954 | // If we have an uncore event we just go ahead and measure it on all possible devices: 955 | for device in devices { 956 | // Patch name in config so we know where this event was running 957 | // `perf stat` just reports CPU 0 for uncore events :-( 958 | configs[0][0] = format!("name={}.{}", device, event.0.event_name); 959 | event_strings.push(format!( 960 | "{}/{}/{}", 961 | device, 962 | configs[0].join(","), 963 | event.perf_qualifiers() 964 | )); 965 | } 966 | } 967 | // Adding normal event: 968 | else { 969 | assert!(devices.len() == 1); 970 | assert!(configs.len() == 1); 971 | assert!(devices[0] == "cpu"); 972 | 973 | event_strings.push(format!( 974 | "{}/{}/{}", 975 | devices[0], 976 | configs[0].join(","), 977 | event.perf_qualifiers() 978 | )); 979 | } 980 | } 981 | 982 | event_strings 983 | } 984 | 985 | /// Returns a list of events as strings that can be passed to perf-record using 986 | /// the -e arguments. 987 | pub fn get_perf_config_strings(&self) -> Vec { 988 | self.get_perf_config() 989 | } 990 | 991 | /// Returns a list of event names in this group. 992 | /// 993 | /// The order of the list of names matches with the order 994 | /// returned by `get_perf_config_strings` or `get_perf_config`. 995 | pub fn get_event_names(&self) -> Vec<&'b str> { 996 | self.events.iter().map(|event| event.0.event_name).collect() 997 | } 998 | } 999 | 1000 | /// Given a list of events, create a list of event groups that can be measured together. 1001 | pub fn schedule_events<'a, 'b>(events: Vec<&'a EventDescription<'b>>) -> Vec> 1002 | where 1003 | 'b: 'a, 1004 | { 1005 | let mut groups: Vec = Vec::with_capacity(42); 1006 | 1007 | for event in events { 1008 | if IGNORE_EVENTS.contains_key(event.event_name) { 1009 | continue; 1010 | } 1011 | 1012 | let perf_event: PerfEvent = PerfEvent(event); 1013 | let mut added: Result<(), AddEventError> = Err(AddEventError::ErrataConflict); 1014 | match perf_event.unit() { 1015 | MonitoringUnit::Unknown => { 1016 | info!("Ignoring event with unknown unit '{}'", event); 1017 | continue; 1018 | } 1019 | _ => (), 1020 | }; 1021 | 1022 | // Try to add the event to an existing group: 1023 | for group in groups.iter_mut() { 1024 | let perf_event: PerfEvent = PerfEvent(event); 1025 | added = group.add_event(perf_event); 1026 | if added.is_ok() { 1027 | break; 1028 | } 1029 | } 1030 | 1031 | // Unable to add event to any existing group, make a new group instead: 1032 | if !added.is_ok() { 1033 | let mut pg = PerfEventGroup::new(&*PMU_COUNTERS); 1034 | let perf_event: PerfEvent = PerfEvent(event); 1035 | 1036 | let added = pg.add_event(perf_event); 1037 | match added { 1038 | Err(e) => { 1039 | let perf_event: PerfEvent = PerfEvent(event); 1040 | panic!( 1041 | "Can't add a new event {:?} to an empty group: {:?}", 1042 | perf_event, e 1043 | ); 1044 | } 1045 | Ok(_) => (), 1046 | }; 1047 | 1048 | groups.push(pg); 1049 | } 1050 | } 1051 | 1052 | // println!("{:?}", groups); 1053 | groups 1054 | } 1055 | 1056 | pub fn get_perf_command( 1057 | cmd_working_dir: &str, 1058 | _output_path: &Path, 1059 | env: &Vec<(String, String)>, 1060 | breakpoints: &Vec, 1061 | record: bool, 1062 | ) -> Command { 1063 | let mut perf = Command::new("perf"); 1064 | perf.current_dir(cmd_working_dir); 1065 | let _filename: String; 1066 | if !record { 1067 | perf.arg("stat"); 1068 | perf.arg("-aA"); 1069 | perf.arg("-I 250"); 1070 | perf.arg("-x ;"); 1071 | } else { 1072 | perf.arg("record"); 1073 | perf.arg("--group"); 1074 | perf.arg("-F 4"); 1075 | perf.arg("-a"); 1076 | perf.arg("--raw-samples"); 1077 | } 1078 | 1079 | // Ensure we use dots as number separators in csv output (see issue #1): 1080 | perf.env("LC_NUMERIC", "C"); 1081 | 1082 | // Add the environment variables: 1083 | for &(ref key, ref value) in env.iter() { 1084 | perf.env(key, value); 1085 | } 1086 | let breakpoint_args: Vec = breakpoints.iter().map(|s| format!("-e \\{}", s)).collect(); 1087 | perf.args(breakpoint_args.as_slice()); 1088 | 1089 | perf 1090 | } 1091 | 1092 | pub fn profile<'a, 'b>( 1093 | output_path: &Path, 1094 | cmd_working_dir: &str, 1095 | cmd: Vec, 1096 | env: Vec<(String, String)>, 1097 | breakpoints: Vec, 1098 | record: bool, 1099 | events: Option>>, 1100 | dryrun: bool, 1101 | ) where 1102 | 'b: 'a, 1103 | { 1104 | let event_groups = match events { 1105 | Some(evts) => schedule_events(evts), 1106 | None => schedule_events(get_known_events()), 1107 | }; 1108 | 1109 | // Is this run already done (in case we restart): 1110 | let mut completed_file: PathBuf = output_path.to_path_buf(); 1111 | completed_file.push("completed"); 1112 | if completed_file.exists() { 1113 | warn!( 1114 | "Run {} already completed, skipping.", 1115 | output_path.to_string_lossy() 1116 | ); 1117 | return; 1118 | } 1119 | 1120 | create_out_directory(output_path); 1121 | if !dryrun { 1122 | check_for_perf(); 1123 | let ret = check_for_perf_permissions() 1124 | || check_for_disabled_nmi_watchdog() 1125 | || check_for_perf_paranoia(); 1126 | if !ret { 1127 | std::process::exit(3); 1128 | } 1129 | 1130 | let _ = save_numa_topology(&output_path).expect("Can't save NUMA topology"); 1131 | let _ = save_cpu_topology(&output_path).expect("Can't save CPU topology"); 1132 | let _ = save_lstopo(&output_path).expect("Can't save lstopo information"); 1133 | let _ = save_cpuid(&output_path).expect("Can't save CPUID information"); 1134 | let _ = save_likwid_topology(&output_path).expect("Can't save likwid information"); 1135 | } 1136 | 1137 | assert!(cmd.len() >= 1); 1138 | let mut perf_log = PathBuf::new(); 1139 | perf_log.push(output_path); 1140 | perf_log.push("perf.csv"); 1141 | 1142 | let mut wtr = csv::Writer::from_file(perf_log).unwrap(); 1143 | let r = wtr.encode(( 1144 | "command", 1145 | "event_names", 1146 | "perf_events", 1147 | "breakpoints", 1148 | "datafile", 1149 | "perf_command", 1150 | "stdout", 1151 | "stdin", 1152 | )); 1153 | assert!(r.is_ok()); 1154 | 1155 | // For warm-up do a dummy run of the program with perf 1156 | let record_path = Path::new("/dev/null"); 1157 | let mut perf = get_perf_command(cmd_working_dir, output_path, &env, &breakpoints, record); 1158 | perf.arg("-n"); // null run - don’t start any counters 1159 | let (_, _, _) = execute_perf(&mut perf, &cmd, &Vec::new(), &record_path, dryrun); 1160 | debug!("Warmup complete, let's start measuring."); 1161 | 1162 | let mut pb = ProgressBar::new(event_groups.len() as u64); 1163 | 1164 | for (idx, group) in event_groups.iter().enumerate() { 1165 | if !dryrun { 1166 | pb.inc(); 1167 | } 1168 | 1169 | let event_names: Vec<&str> = group.get_event_names(); 1170 | let counters: Vec = group.get_perf_config_strings(); 1171 | 1172 | let mut record_path = PathBuf::new(); 1173 | let filename = match record { 1174 | false => format!("{}_stat.csv", idx + 1), 1175 | true => format!("{}_perf.data", idx + 1), 1176 | }; 1177 | record_path.push(output_path); 1178 | record_path.push(&filename); 1179 | 1180 | let mut perf = get_perf_command(cmd_working_dir, output_path, &env, &breakpoints, record); 1181 | let (executed_cmd, stdout, stdin) = 1182 | execute_perf(&mut perf, &cmd, &counters, record_path.as_path(), dryrun); 1183 | if !dryrun { 1184 | let r = wtr.encode(vec![ 1185 | cmd.join(" "), 1186 | event_names.join(","), 1187 | counters.join(","), 1188 | String::new(), 1189 | filename, 1190 | executed_cmd, 1191 | stdout, 1192 | stdin, 1193 | ]); 1194 | assert!(r.is_ok()); 1195 | 1196 | let r = wtr.flush(); 1197 | assert!(r.is_ok()); 1198 | } 1199 | } 1200 | 1201 | // Mark this run as completed: 1202 | let _ = File::create(completed_file.as_path()).unwrap(); 1203 | } 1204 | 1205 | pub fn check_for_perf() { 1206 | match Command::new("perf").output() { 1207 | Ok(out) => { 1208 | if out.status.code() != Some(1) { 1209 | error!("'perf' seems to have some problems?"); 1210 | debug!("perf exit status was: {}", out.status); 1211 | error!("{}", String::from_utf8_lossy(&out.stderr)); 1212 | error!( 1213 | "You may require a restart after fixing this so \ 1214 | `/sys/bus/event_source/devices` is updated!" 1215 | ); 1216 | std::process::exit(2); 1217 | } 1218 | } 1219 | Err(_) => { 1220 | error!( 1221 | "'perf' does not seem to be executable? You may need to install it (Ubuntu: \ 1222 | `sudo apt-get install linux-tools-common`)." 1223 | ); 1224 | error!( 1225 | "You may require a restart after fixing this so \ 1226 | `/sys/bus/event_source/devices` is updated!" 1227 | ); 1228 | std::process::exit(2); 1229 | } 1230 | } 1231 | } 1232 | 1233 | pub fn check_for_perf_permissions() -> bool { 1234 | let path = Path::new("/proc/sys/kernel/kptr_restrict"); 1235 | let mut file = File::open(path).expect("kptr_restrict file does not exist?"); 1236 | let mut s = String::new(); 1237 | 1238 | match file.read_to_string(&mut s) { 1239 | Ok(_) => { 1240 | match s.trim() { 1241 | "1" => { 1242 | error!( 1243 | "kptr restriction is enabled. You can either run autoperf as root or \ 1244 | do:" 1245 | ); 1246 | error!("\tsudo sh -c 'echo 0 >> {}'", path.display()); 1247 | error!("to disable."); 1248 | return false; 1249 | } 1250 | "0" => { 1251 | // debug!("kptr_restrict is already disabled (good)."); 1252 | } 1253 | _ => { 1254 | warn!( 1255 | "Unkown content read from '{}': {}. Proceeding anyways...", 1256 | path.display(), 1257 | s.trim() 1258 | ); 1259 | } 1260 | } 1261 | } 1262 | 1263 | Err(why) => { 1264 | error!("Couldn't read {}: {}", path.display(), why.description()); 1265 | std::process::exit(3); 1266 | } 1267 | } 1268 | 1269 | true 1270 | } 1271 | 1272 | pub fn check_for_disabled_nmi_watchdog() -> bool { 1273 | let path = Path::new("/proc/sys/kernel/nmi_watchdog"); 1274 | let mut file = File::open(path).expect("nmi_watchdog file does not exist?"); 1275 | let mut s = String::new(); 1276 | 1277 | match file.read_to_string(&mut s) { 1278 | Ok(_) => { 1279 | match s.trim() { 1280 | "1" => { 1281 | error!( 1282 | "nmi_watchdog is enabled. This can lead to counters not read (). Execute" 1284 | ); 1285 | error!("\tsudo sh -c 'echo 0 > {}'", path.display()); 1286 | error!("to disable."); 1287 | return false; 1288 | } 1289 | "0" => { 1290 | // debug!("nmi_watchdog is already disabled (good)."); 1291 | } 1292 | _ => { 1293 | warn!( 1294 | "Unkown content read from '{}': {}. Proceeding anyways...", 1295 | path.display(), 1296 | s.trim() 1297 | ); 1298 | } 1299 | } 1300 | } 1301 | 1302 | Err(why) => { 1303 | error!("Couldn't read {}: {}", path.display(), why.description()); 1304 | std::process::exit(4); 1305 | } 1306 | } 1307 | 1308 | true 1309 | } 1310 | 1311 | pub fn check_for_perf_paranoia() -> bool { 1312 | let path = Path::new("/proc/sys/kernel/perf_event_paranoid"); 1313 | let mut file = File::open(path).expect("perf_event_paranoid file does not exist?"); 1314 | let mut s = String::new(); 1315 | 1316 | let res = match file.read_to_string(&mut s) { 1317 | Ok(_) => { 1318 | let digit = i64::from_str(s.trim()).unwrap_or_else(|_op| { 1319 | warn!( 1320 | "Unkown content read from '{}': {}. Proceeding anyways...", 1321 | path.display(), 1322 | s.trim() 1323 | ); 1324 | 1 1325 | }); 1326 | 1327 | if digit >= 0 { 1328 | error!( 1329 | "perf_event_paranoid is enabled. This means we can't collect system wide \ 1330 | stats. Execute" 1331 | ); 1332 | error!("\tsudo sh -c 'echo -1 > {}'", path.display()); 1333 | error!("to disable."); 1334 | false 1335 | } else { 1336 | true 1337 | } 1338 | } 1339 | 1340 | Err(why) => { 1341 | error!("Couldn't read {}: {}", path.display(), why.description()); 1342 | std::process::exit(4); 1343 | } 1344 | }; 1345 | 1346 | res 1347 | } 1348 | -------------------------------------------------------------------------------- /src/scale.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | 3 | #[allow(unused)] 4 | pub fn scale(_manifest_folder: &Path, _dryrun: bool, _start: usize, _stepping: usize) { 5 | // let canonical_manifest_path = fs::canonicalize(&manifest_folder) 6 | // .expect("canonicalize manifest path does not work"); 7 | // 8 | // let mut out_dir = canonical_manifest_path.to_path_buf(); 9 | // let hostname = get_hostname().unwrap_or(String::from("unknown")); 10 | // out_dir.push(hostname); 11 | // mkdir(&out_dir); 12 | // 13 | // let mt = MachineTopology::new(); 14 | // 15 | // let mut manifest: PathBuf = canonical_manifest_path.to_path_buf(); 16 | // manifest.push("manifest.toml"); 17 | // let mut file = File::open(manifest.as_path()).expect("manifest.toml file does not exist?"); 18 | // let mut manifest_string = String::new(); 19 | // let _ = file.read_to_string(&mut manifest_string).unwrap(); 20 | // let mut parser = toml::Parser::new(manifest_string.as_str()); 21 | // let doc = match parser.parse() { 22 | // Some(doc) => doc, 23 | // None => { 24 | // error!("Can't parse the manifest file:\n{:?}", parser.errors); 25 | // process::exit(1); 26 | // } 27 | // }; 28 | // let experiment: &toml::Table = doc["experiment"] 29 | // .as_table() 30 | // .expect("Error in manifest.toml: 'experiment' should be a table."); 31 | // let configuration: &[toml::Value] = experiment["configurations"] 32 | // .as_slice() 33 | // .expect("Error in manifest.toml: 'configuration' attribute should be a list."); 34 | // let configs: Vec = configuration.iter() 35 | // .map(|s| s.as_str().expect("configuration elements should be strings").to_string()) 36 | // .collect(); 37 | // let run_alone: bool = experiment.get("alone") 38 | // .map_or(true, |v| v.as_bool().expect("'alone' should be boolean")); 39 | // let profile_only: Option> = experiment.get("profile_only_a") 40 | // .map(|progs| { 41 | // progs.as_slice() 42 | // .expect("Error in manifest.toml: 'profile_only_a' should be a list.") 43 | // .into_iter() 44 | // .map(|p| { 45 | // p.as_str() 46 | // .expect("profile_only_a elements should name programs (strings)") 47 | // .to_string() 48 | // }) 49 | // .collect() 50 | // }); 51 | // let profile_only_b: Option> = experiment.get("profile_only_b") 52 | // .map(|progs| { 53 | // progs.as_slice() 54 | // .expect("Error in manifest.toml: 'profile_only_b' should be a list.") 55 | // .into_iter() 56 | // .map(|p| { 57 | // p.as_str() 58 | // .expect("profile_only_b elements should name programs (strings)") 59 | // .to_string() 60 | // }) 61 | // .collect() 62 | // }); 63 | // 64 | // 65 | // let mut programs: Vec = Vec::with_capacity(2); 66 | // for (key, value) in &doc { 67 | // if key.starts_with("program") { 68 | // let program_desc: &toml::Table = 69 | // doc[key].as_table().expect("Error in manifest.toml: 'program' should be a table."); 70 | // programs.push(Program::from_toml(&canonical_manifest_path, program_desc, run_alone)); 71 | // } 72 | // } 73 | // 74 | // let mut deployments: Vec = Vec::with_capacity(4); 75 | // for config in configs { 76 | // match config.as_str() { 77 | // "L1-SMT" => { 78 | // deployments.push(Deployment::split_interleaved("L1-SMT", 79 | // mt.same_l1(), 80 | // mt.l1_size().unwrap_or(0))) 81 | // } 82 | // "L3-SMT" => { 83 | // deployments.push(Deployment::split_interleaved("L3-SMT", 84 | // mt.same_l3(), 85 | // mt.l3_size().unwrap_or(0))) 86 | // } 87 | // "L3-SMT-cores" => { 88 | // deployments.push(Deployment::split_smt_aware("L3-SMT-cores", 89 | // mt.same_l3(), 90 | // mt.l3_size().unwrap_or(0))) 91 | // } 92 | // "L3-cores" => { 93 | // deployments.push(Deployment::split_smt_aware("L3-cores", 94 | // mt.same_l3_cores(), 95 | // mt.l3_size().unwrap_or(0))) 96 | // } 97 | // "Full-L3" => { 98 | // deployments.push(Deployment::split_l3_aware("Full-L3", 99 | // mt.whole_machine_cores(), 100 | // mt.l3_size().unwrap_or(0))) 101 | // } 102 | // "Full-SMT-L3" => { 103 | // deployments.push(Deployment::split_l3_aware("Full-SMT-L3", 104 | // mt.whole_machine(), 105 | // mt.l3_size().unwrap_or(0))) 106 | // } 107 | // "Full-cores" => { 108 | // deployments.push(Deployment::split_interleaved("Full-cores", 109 | // mt.whole_machine_cores(), 110 | // mt.l3_size().unwrap_or(0))) 111 | // } 112 | // "Full-SMT-cores" => { 113 | // deployments.push(Deployment::split_smt_aware("Full-SMT-cores", 114 | // mt.whole_machine(), 115 | // mt.l3_size().unwrap_or(0))) 116 | // } 117 | // 118 | // _ => error!("Ignored unknown deployment config '{}'.", config), 119 | // }; 120 | // } 121 | // 122 | // Add all possible pairs: 123 | // let mut pairs: Vec<(&Program, Option<&Program>)> = Vec::new(); 124 | // for p in programs.iter() { 125 | // pairs.push((p, None)); 126 | // } 127 | // for (a, b) in iproduct!(programs.iter(), programs.iter()) { 128 | // pairs.push((a, Some(b))); 129 | // } 130 | // 131 | // Filter out the pairs we do not want to execute: 132 | // let mut runs: Vec = Vec::new(); 133 | // for (a, b) in pairs.into_iter() { 134 | // let profile_a = profile_only.as_ref().map_or(true, |ps| ps.contains(&a.name)); 135 | // let profile_b = !b.is_none() && 136 | // profile_only_b.as_ref() 137 | // .map_or(profile_a, |ps| ps.contains(&b.unwrap().name)); 138 | // if !profile_a && !profile_b { 139 | // continue; 140 | // } 141 | // 142 | // for d in deployments.iter() { 143 | // if b.is_none() && (!run_alone || !a.alone) { 144 | // continue; 145 | // } 146 | // runs.push(Run::new(&canonical_manifest_path, out_dir.as_path(), a, b, d)); 147 | // } 148 | // } 149 | // 150 | // Finally, profile the runs we are supposed to execute based on the command line args 151 | // let mut i = 0; 152 | // for run in runs.iter_mut().skip(start).step(stepping) { 153 | // if !dryrun { 154 | // run.profile(); 155 | // } else { 156 | // println!("{}", run); 157 | // } 158 | // i += 1; 159 | // } 160 | // 161 | // println!("{} runs completed.", i); 162 | // 163 | } 164 | -------------------------------------------------------------------------------- /src/search.rs: -------------------------------------------------------------------------------- 1 | use std; 2 | 3 | use std::collections::BTreeSet; 4 | use std::collections::HashMap; 5 | use std::path::Path; 6 | use std::path::PathBuf; 7 | use std::process::Command; 8 | 9 | use csv; 10 | 11 | use super::profile; 12 | use super::profile::{MonitoringUnit, PerfEvent}; 13 | use log::*; 14 | use x86::perfcnt::intel::{Counter, EventDescription, MSRIndex, PebsType, Tuple}; 15 | 16 | pub fn event_is_documented( 17 | events: &Vec, 18 | unit: MonitoringUnit, 19 | code: u8, 20 | umask: u8, 21 | ) -> bool { 22 | for event in events.iter() { 23 | if event.unit() == unit && event.uses_event_code(code) && event.uses_umask(umask) { 24 | return true; 25 | } 26 | } 27 | 28 | return false; 29 | } 30 | 31 | fn execute_perf( 32 | perf: &mut Command, 33 | cmd: &Vec, 34 | counters: &Vec, 35 | ) -> BTreeSet<(String, String)> { 36 | assert!(cmd.len() >= 1); 37 | let events: Vec = counters.iter().map(|c| format!("-e {}", c)).collect(); 38 | 39 | let perf = perf.args(events.as_slice()); 40 | let perf = perf.args(cmd.as_slice()); 41 | let perf_cmd_str: String = format!("{:?}", perf).replace("\"", ""); 42 | 43 | let (_stdout, stderr) = match perf.output() { 44 | Ok(out) => { 45 | let stdout = 46 | String::from_utf8(out.stdout).unwrap_or(String::from("Unable to read stdout!")); 47 | let stderr = 48 | String::from_utf8(out.stderr).unwrap_or(String::from("Unable to read stderr!")); 49 | 50 | if out.status.success() { 51 | // debug!("stdout:\n{:?}", stdout); 52 | // debug!("stderr:\n{:?}", stderr); 53 | } else if !out.status.success() { 54 | error!( 55 | "perf command: {} got unknown exit status was: {}", 56 | perf_cmd_str, out.status 57 | ); 58 | debug!("stdout:\n{}", stdout); 59 | debug!("stderr:\n{}", stderr); 60 | } 61 | 62 | (stdout, stderr) 63 | } 64 | Err(err) => { 65 | error!("Executing {} failed : {}", perf_cmd_str, err); 66 | (String::new(), String::new()) 67 | } 68 | }; 69 | 70 | let mut found_events = BTreeSet::new(); 71 | let mut rdr = csv::Reader::from_string(stderr) 72 | .has_headers(false) 73 | .delimiter(b';') 74 | .flexible(true); 75 | for record in rdr.decode() { 76 | if record.is_ok() { 77 | type SourceRow = (f64, String, String, String, String, String, f64); 78 | let (_time, _cpu, value_string, _, event, _, _percent): SourceRow = 79 | record.expect("Should not happen (in is_ok() branch)!"); 80 | 81 | // Perf will just report first CPU on the socket for uncore events, 82 | // so we temporarily encode the location in the event name and 83 | // extract it here again: 84 | let (unit, event_name) = if !event.starts_with("uncore_") { 85 | // Normal case, we just take the regular event and cpu fields from perf stat 86 | (String::from("cpu"), String::from(event.trim())) 87 | } else { 88 | // Uncore events, use first part of the event name as the location 89 | let (unit, name) = event.split_at(event.find(".").unwrap()); 90 | // remove the _1 in uncore_cbox_1: 91 | let mut unit_parts: Vec<&str> = unit.split('_').collect(); 92 | unit_parts.pop(); 93 | ( 94 | String::from(unit_parts.join("_")), 95 | String::from(name.trim_start_matches(".").trim()), 96 | ) 97 | }; 98 | 99 | let value: u64 = value_string.trim().parse().unwrap_or(0); 100 | if value != 0 { 101 | debug!("{:?} {:?} {:?}", unit, event_name, value); 102 | found_events.insert((event_name, unit)); 103 | } 104 | } 105 | } 106 | 107 | found_events 108 | } 109 | 110 | pub fn check_events<'a, 'b>( 111 | output_path: &Path, 112 | cmd_working_dir: &str, 113 | cmd: Vec, 114 | env: Vec<(String, String)>, 115 | breakpoints: Vec, 116 | record: bool, 117 | events: Vec<&'a EventDescription<'b>>, 118 | ) -> BTreeSet<(String, String)> 119 | where 120 | 'b: 'a, 121 | { 122 | let event_groups = profile::schedule_events(events); 123 | profile::create_out_directory(output_path); 124 | 125 | profile::check_for_perf(); 126 | let ret = profile::check_for_perf_permissions() 127 | || profile::check_for_disabled_nmi_watchdog() 128 | || profile::check_for_perf_paranoia(); 129 | if !ret { 130 | std::process::exit(3); 131 | } 132 | 133 | assert!(cmd.len() >= 1); 134 | let mut perf_log = PathBuf::new(); 135 | perf_log.push(output_path); 136 | perf_log.push("unknown_events.csv"); 137 | 138 | let mut all_events = BTreeSet::new(); 139 | for group in event_groups { 140 | let mut _event_names: Vec<&str> = group.get_event_names(); 141 | let counters: Vec = group.get_perf_config_strings(); 142 | let mut perf = 143 | profile::get_perf_command(cmd_working_dir, output_path, &env, &breakpoints, record); 144 | let mut found_events = execute_perf(&mut perf, &cmd, &counters); 145 | all_events.append(&mut found_events); 146 | } 147 | 148 | all_events 149 | } 150 | 151 | pub fn print_unknown_events() { 152 | let events = profile::get_known_events(); 153 | let pevents: Vec = events.into_iter().map(|e| PerfEvent(e)).collect(); 154 | let units = vec![ 155 | MonitoringUnit::CPU, 156 | //MonitoringUnit::UBox, 157 | MonitoringUnit::CBox, 158 | MonitoringUnit::HA, 159 | MonitoringUnit::IMC, 160 | //MonitoringUnit::PCU, 161 | //MonitoringUnit::R2PCIe, 162 | MonitoringUnit::R3QPI, 163 | //MonitoringUnit::QPI 164 | ]; 165 | 166 | let mut event_names = HashMap::new(); 167 | for unit in units.iter() { 168 | for code in 1..255 { 169 | for umask in 1..255 { 170 | let id: isize = (*unit as isize) << 32 | (code as isize) << 8 | umask as isize; 171 | let value = format!( 172 | "{}_EVENT_{}_{}", 173 | unit.to_intel_event_description().unwrap_or("CPU"), 174 | code, 175 | umask 176 | ); 177 | event_names.insert(id, value); 178 | } 179 | } 180 | } 181 | 182 | println!("Find events..."); 183 | let mut storage_location = PathBuf::from("unknown_events"); 184 | profile::create_out_directory(&storage_location); 185 | storage_location.push("found_events.dat"); 186 | let mut wtr = csv::Writer::from_file(storage_location).unwrap(); 187 | let r = wtr.encode(("unit", "code", "mask", "event_name")); 188 | assert!(r.is_ok()); 189 | 190 | let mut events = Vec::new(); 191 | for code in 1..255 { 192 | for umask in 1..255 { 193 | for unit in units.iter() { 194 | let id: isize = (*unit as isize) << 32 | (code as isize) << 8 | umask as isize; 195 | 196 | if event_is_documented(&pevents, *unit, code, umask) { 197 | println!("Skip documented event {} {:?} {:?}", unit, code, umask); 198 | continue; 199 | } 200 | 201 | let e = EventDescription::new( 202 | Tuple::One(code), 203 | Tuple::One(umask), 204 | event_names.get(&id).unwrap().as_str(), 205 | "Unknown Event", 206 | None, 207 | Counter::Programmable(15), 208 | None, 209 | None, 210 | 0, 211 | MSRIndex::None, 212 | 0, 213 | false, 214 | 0x0, 215 | false, 216 | false, 217 | false, 218 | PebsType::Regular, 219 | false, 220 | None, 221 | false, 222 | false, 223 | None, 224 | false, 225 | unit.to_intel_event_description(), 226 | None, 227 | false, 228 | false, 229 | false, 230 | 0, 231 | 0, 232 | 0, 233 | 0, 234 | 0, 235 | ); 236 | events.push(e); 237 | } 238 | } 239 | 240 | let storage_location = PathBuf::from("unknown_events"); 241 | let all_found_events = check_events( 242 | &storage_location, 243 | ".", 244 | vec![String::from("sleep"), String::from("1")], 245 | Vec::new(), 246 | Vec::new(), 247 | false, 248 | events.iter().collect(), 249 | ); 250 | for &(ref name, ref unit) in all_found_events.iter() { 251 | let splitted: Vec<&str> = name.split("_").collect(); 252 | let r = wtr.encode(vec![ 253 | unit, 254 | &String::from(splitted[2]), 255 | &String::from(splitted[3]), 256 | name, 257 | ]); 258 | assert!(r.is_ok()); 259 | } 260 | let r = wtr.flush(); 261 | assert!(r.is_ok()); 262 | 263 | events.clear(); 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /src/stats.rs: -------------------------------------------------------------------------------- 1 | use csv; 2 | use itertools::Itertools; 3 | use phf::Map; 4 | use std::cmp::Ord; 5 | use std::collections::HashMap; 6 | use std::fs::File; 7 | use std::path::Path; 8 | use std::path::PathBuf; 9 | 10 | use x86::perfcnt::intel::events::COUNTER_MAP; 11 | use x86::perfcnt::intel::{EventDescription, Tuple}; 12 | 13 | use super::profile::{MonitoringUnit, PerfEvent}; 14 | use super::util::*; 15 | 16 | type EventMap = Map<&'static str, EventDescription<'static>>; 17 | type ArchitectureMap = HashMap<&'static str, (&'static str, &'static str, &'static str)>; 18 | 19 | /// Saves the event count for all architectures to a file. 20 | fn save_event_counts(key_to_name: &ArchitectureMap, csv_result: &Path) { 21 | let mut writer = csv::Writer::from_file(csv_result).unwrap(); 22 | writer 23 | .encode(&[ 24 | "year", 25 | "architecture", 26 | "core events", 27 | "uncore events", 28 | "counters", 29 | "uncore groups", 30 | ]) 31 | .expect(format!("Can't write {:?} header", csv_result).as_str()); 32 | 33 | for (key, &(name, year, counters)) in key_to_name.iter() { 34 | let events = COUNTER_MAP.get(format!("{}", key).as_str()); 35 | 36 | let counter_groups: Vec<(MonitoringUnit, usize)> = events.map_or(Vec::new(), |uc| { 37 | let mut units: Vec<(MonitoringUnit, PerfEvent)> = Vec::with_capacity(uc.len()); 38 | for ref e in uc.values() { 39 | if e.uncore { 40 | units.push((PerfEvent(&e).unit(), PerfEvent(&e))); 41 | } 42 | } 43 | units.sort_by(|a, b| a.0.cmp(&b.0)); 44 | 45 | let mut counts: Vec<(MonitoringUnit, usize)> = Vec::with_capacity(10); 46 | for (key, group) in &units.into_iter().group_by(|&(unit, _)| unit) { 47 | counts.push((key, group.count())); 48 | } 49 | 50 | counts 51 | }); 52 | 53 | let cc_count = events 54 | .map(|c| { 55 | let filtered: Vec<&EventDescription> = c.values().filter(|e| !e.uncore).collect(); 56 | filtered.len() 57 | }) 58 | .unwrap_or(0); 59 | let uc_count = events 60 | .map(|c| { 61 | let filtered: Vec<&EventDescription> = c.values().filter(|e| e.uncore).collect(); 62 | filtered.len() 63 | }) 64 | .unwrap_or(0); 65 | 66 | let group_string = counter_groups 67 | .into_iter() 68 | .map(|(u, c)| format!("{}:{}", u, c)) 69 | .join(";"); 70 | let cc_count = cc_count.to_string(); 71 | let uc_count = uc_count.to_string(); 72 | 73 | let mut row: Vec<&str> = Vec::new(); 74 | row.push(year); 75 | row.push(name); 76 | row.push(cc_count.as_str()); 77 | row.push(uc_count.as_str()); 78 | row.push(counters); 79 | row.push(group_string.as_str()); 80 | 81 | writer 82 | .encode(&row.as_slice()) 83 | .expect(format!("Can't write for for {:?} file", csv_result).as_str()); 84 | } 85 | } 86 | 87 | /// Given two EventMaps count all the shared (same event name key) events. 88 | fn common_event_names(a: Option<&'static EventMap>, b: Option<&'static EventMap>) -> usize { 89 | if a.is_none() || b.is_none() { 90 | return 0; 91 | } 92 | 93 | let a_map = a.unwrap(); 94 | let b_map = b.unwrap(); 95 | 96 | let mut counter = 0; 97 | for (key, _value) in a_map.entries() { 98 | if b_map.get(key).is_some() { 99 | counter += 1 100 | } 101 | } 102 | 103 | counter 104 | } 105 | 106 | /// Does pairwise comparison of all architectures and saves their shared events to a file. 107 | fn save_architecture_comparison(key_to_name: &ArchitectureMap, csv_result: &Path) { 108 | let mut writer = csv::Writer::from_file(csv_result) 109 | .expect(format!("Can't write {:?} file", csv_result).as_str()); 110 | writer 111 | .encode(&[ 112 | "arch1", 113 | "year1", 114 | "arch2", 115 | "year2", 116 | "common events", 117 | "arch1 events", 118 | "arch2 events", 119 | ]) 120 | .expect(format!("Can't write {:?} header", csv_result).as_str()); 121 | 122 | for (key1, &(name1, year1, _)) in key_to_name.iter() { 123 | for (key2, &(name2, year2, _)) in key_to_name.iter() { 124 | let events1 = COUNTER_MAP.get(format!("{}", key1).as_str()); 125 | let events2 = COUNTER_MAP.get(format!("{}", key2).as_str()); 126 | 127 | writer 128 | .encode(&[ 129 | name1, 130 | year1, 131 | name2, 132 | year2, 133 | common_event_names(events1, events2).to_string().as_str(), 134 | events1.map(|c| c.len()).unwrap_or(0).to_string().as_str(), 135 | events2.map(|c| c.len()).unwrap_or(0).to_string().as_str(), 136 | ]) 137 | .ok(); 138 | } 139 | } 140 | } 141 | 142 | /// Computes the Levenshtein edit distance of two strings. 143 | fn edit_distance(a: &str, b: &str) -> i32 { 144 | let len_a = a.chars().count(); 145 | let len_b = b.chars().count(); 146 | 147 | let row: Vec = vec![0; len_b + 1]; 148 | let mut matrix: Vec> = vec![row; len_a + 1]; 149 | 150 | let chars_a: Vec = a.to_lowercase().chars().collect(); 151 | let chars_b: Vec = b.to_lowercase().chars().collect(); 152 | 153 | for i in 0..len_a { 154 | matrix[i + 1][0] = (i + 1) as i32; 155 | } 156 | for i in 0..len_b { 157 | matrix[0][i + 1] = (i + 1) as i32; 158 | } 159 | 160 | for i in 0..len_a { 161 | for j in 0..len_b { 162 | let ind: i32 = if chars_a[i] == chars_b[j] { 0 } else { 1 }; 163 | 164 | let min = vec![ 165 | matrix[i][j + 1] + 1, 166 | matrix[i + 1][j] + 1, 167 | matrix[i][j] + ind, 168 | ] 169 | .into_iter() 170 | .min() 171 | .unwrap(); 172 | 173 | matrix[i + 1][j + 1] = if min == 0 { 0 } else { min }; 174 | } 175 | } 176 | matrix[len_a][len_b] 177 | } 178 | 179 | /// Computes the edit distance of the event description for common events shared in 'a' and 'b'. 180 | fn common_event_desc_distance( 181 | writer: &mut csv::Writer, 182 | a: Option<&'static EventMap>, 183 | b: Option<&'static EventMap>, 184 | uncore: bool, 185 | ) -> csv::Result<()> { 186 | if a.is_none() || b.is_none() { 187 | return Ok(()); 188 | } 189 | 190 | let a_map = a.unwrap(); 191 | let b_map = b.unwrap(); 192 | 193 | for (key1, value1) in a_map.entries() { 194 | match b_map.get(key1) { 195 | Some(value2) => { 196 | assert_eq!(value1.event_name, value2.event_name); 197 | let ed = 198 | edit_distance(value1.brief_description, value2.brief_description).to_string(); 199 | let uncore_str = if uncore { "true" } else { "false" }; 200 | 201 | writer.encode(&[ 202 | value1.event_name, 203 | ed.as_str(), 204 | uncore_str, 205 | value1.brief_description, 206 | value2.brief_description, 207 | ])? 208 | } 209 | None => { 210 | // Ignore event names that are not shared in both architectures 211 | } 212 | } 213 | } 214 | 215 | Ok(()) 216 | } 217 | 218 | /// Does a pairwise comparison of all architectures by computing edit distances of shared events. 219 | fn save_edit_distances(key_to_name: &ArchitectureMap, output_dir: &Path) { 220 | for (key1, &(name1, _, _)) in key_to_name.iter() { 221 | for (key2, &(name2, _, _)) in key_to_name.iter() { 222 | let mut csv_result = output_dir.to_path_buf(); 223 | csv_result.push(format!("editdist_{}-vs-{}.csv", name1, name2)); 224 | 225 | let mut writer = csv::Writer::from_file(csv_result.clone()) 226 | .expect(format!("Can't open {:?}", csv_result).as_str()); 227 | writer 228 | .encode(&["event name", "edit distance", "uncore", "desc1", "desc2"]) 229 | .expect(format!("Can't write {:?} header", csv_result).as_str()); 230 | 231 | let events1 = COUNTER_MAP.get(format!("{}", key1).as_str()); 232 | let events2 = COUNTER_MAP.get(format!("{}", key2).as_str()); 233 | 234 | common_event_desc_distance(&mut writer, events1, events2, false).ok(); 235 | } 236 | } 237 | } 238 | 239 | /// Dump information about performance events on your machine into the given directory. 240 | fn save_event_descriptions(output_path: &Path) { 241 | let events: &'static Map<&'static str, EventDescription<'static>> = 242 | &x86::perfcnt::intel::events().expect("Can't get events for arch"); 243 | let pevents: Vec = events.into_iter().map(|e| PerfEvent(e.1)).collect(); 244 | 245 | let mut storage_location = PathBuf::from(output_path); 246 | storage_location.push("ivytown_events.dat"); 247 | let mut wtr = csv::Writer::from_file(storage_location.clone()) 248 | .expect(format!("Can't open {:?}", storage_location).as_str()); 249 | let r = wtr.encode(("unit", "code", "mask", "event_name")); 250 | assert!(r.is_ok()); 251 | 252 | for event in pevents.iter() { 253 | //println!("{:?}", event.0.event_name); 254 | let unit = event.unit().to_perf_prefix().unwrap_or("none"); 255 | 256 | match (&event.0.event_code, &event.0.umask) { 257 | (&Tuple::One(e1), &Tuple::One(m1)) => { 258 | wtr.encode(vec![ 259 | unit, 260 | &format!("{}", e1), 261 | &format!("{}", m1), 262 | &String::from(event.0.event_name), 263 | ]) 264 | .ok(); 265 | } 266 | (&Tuple::Two(e1, e2), &Tuple::Two(m1, m2)) => { 267 | wtr.encode(vec![ 268 | unit, 269 | &format!("{}", e1), 270 | &format!("{}", m1), 271 | &String::from(event.0.event_name), 272 | ]) 273 | .ok(); 274 | 275 | wtr.encode(vec![ 276 | unit, 277 | &format!("{}", e2), 278 | &format!("{}", m2), 279 | &String::from(event.0.event_name), 280 | ]) 281 | .ok(); 282 | } 283 | (&Tuple::Two(e1, e2), &Tuple::One(m1)) => { 284 | wtr.encode(vec![ 285 | unit, 286 | &format!("{}", e1), 287 | &format!("{}", m1), 288 | &String::from(event.0.event_name), 289 | ]) 290 | .ok(); 291 | 292 | wtr.encode(vec![ 293 | unit, 294 | &format!("{}", e2), 295 | &format!("{}", m1), 296 | &String::from(event.0.event_name), 297 | ]) 298 | .ok(); 299 | } 300 | _ => unreachable!(), 301 | } 302 | } 303 | 304 | let r = wtr.flush(); 305 | assert!(r.is_ok()); 306 | } 307 | 308 | /// Generate all the stats about Intel events and save them to a file. 309 | pub fn stats(output_path: &Path) { 310 | mkdir(output_path); 311 | 312 | // TODO: Ideally this should come from x86 crate: x86data/perfmon_data/mapfile.csv 313 | let mut key_to_name = HashMap::new(); 314 | key_to_name.insert("GenuineIntel-6-1C", ("Bonnell", "2008", "4")); 315 | key_to_name.insert("GenuineIntel-6-1E", ("NehalemEP", "2009", "4")); 316 | key_to_name.insert("GenuineIntel-6-2E", ("NehalemEX", "2010", "4")); 317 | key_to_name.insert("GenuineIntel-6-25", ("WestmereEP-SP", "2010", "4")); 318 | key_to_name.insert("GenuineIntel-6-2C", ("WestmereEP-DP", "2010", "4")); 319 | key_to_name.insert("GenuineIntel-6-2F", ("WestmereEX", "2011", "4")); 320 | key_to_name.insert("GenuineIntel-6-2D", ("Jaketown", "2011", "8")); 321 | key_to_name.insert("GenuineIntel-6-2A", ("SandyBridge", "2011", "8")); 322 | key_to_name.insert("GenuineIntel-6-3A", ("IvyBridge", "2012", "8")); 323 | key_to_name.insert("GenuineIntel-6-37", ("Silvermont", "2013", "8")); 324 | key_to_name.insert("GenuineIntel-6-3C", ("Haswell", "2013", "8")); 325 | key_to_name.insert("GenuineIntel-6-3E", ("IvyBridgeEP", "2014", "8")); 326 | key_to_name.insert("GenuineIntel-6-3F", ("HaswellX", "2014", "8")); 327 | key_to_name.insert("GenuineIntel-6-3D", ("Broadwell", "2014", "8")); 328 | key_to_name.insert("GenuineIntel-6-56", ("BroadwellDE", "2015", "8")); 329 | key_to_name.insert("GenuineIntel-6-4E", ("Skylake", "2015", "8")); 330 | key_to_name.insert("GenuineIntel-6-4F", ("BroadwellX", "2016", "8")); 331 | key_to_name.insert("GenuineIntel-6-5C", ("Goldmont", "2016", "8")); 332 | key_to_name.insert("GenuineIntel-6-57", ("KnightsLanding", "2016", "4")); 333 | key_to_name.insert("GenuineIntel-6-55", ("SkylakeX", "2017", "8")); 334 | 335 | let mut csv_result_file = output_path.to_path_buf(); 336 | csv_result_file.push("events.csv"); 337 | save_event_counts(&key_to_name, csv_result_file.as_path()); 338 | 339 | let mut csv_result_file = output_path.to_path_buf(); 340 | csv_result_file.push("architecture_comparison.csv"); 341 | 342 | save_architecture_comparison(&key_to_name, csv_result_file.as_path()); 343 | save_edit_distances(&key_to_name, output_path); 344 | save_event_descriptions(output_path); 345 | } 346 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | 3 | use csv; 4 | use itertools::*; 5 | use log::error as lerror; 6 | use log::*; 7 | use nom::*; 8 | use std::fs; 9 | use std::fs::File; 10 | use std::io; 11 | use std::io::prelude::*; 12 | use std::path::Path; 13 | use std::path::PathBuf; 14 | use std::process::{Command, Output}; 15 | use std::str::{from_utf8_unchecked, FromStr}; 16 | use x86::cpuid; 17 | 18 | pub type Node = u64; 19 | pub type Socket = u64; 20 | pub type Core = u64; 21 | pub type Cpu = u64; 22 | pub type L1 = u64; 23 | pub type L2 = u64; 24 | pub type L3 = u64; 25 | pub type Online = u64; 26 | pub type MHz = u64; 27 | 28 | pub fn mkdir(out_dir: &Path) { 29 | if !out_dir.exists() { 30 | fs::create_dir(out_dir).expect("Can't create directory"); 31 | } 32 | } 33 | 34 | fn to_string(s: &[u8]) -> &str { 35 | unsafe { from_utf8_unchecked(s) } 36 | } 37 | 38 | fn to_u64(s: &str) -> u64 { 39 | FromStr::from_str(s).unwrap() 40 | } 41 | 42 | fn buf_to_u64(s: &[u8]) -> u64 { 43 | to_u64(to_string(s)) 44 | } 45 | 46 | named!(parse_numactl_size<&[u8], NodeInfo>, 47 | chain!( 48 | tag!("node") ~ 49 | take_while!(is_space) ~ 50 | node: take_while!(is_digit) ~ 51 | take_while!(is_space) ~ 52 | tag!("size:") ~ 53 | take_while!(is_space) ~ 54 | size: take_while!(is_digit) ~ 55 | take_while!(is_space) ~ 56 | tag!("MB"), 57 | || NodeInfo { node: buf_to_u64(node), memory: buf_to_u64(size) * 1000000 } 58 | ) 59 | ); 60 | 61 | fn get_node_info(node: Node, numactl_output: &String) -> Option { 62 | let find_prefix = format!("node {} size:", node); 63 | for line in numactl_output.split('\n') { 64 | if line.starts_with(find_prefix.as_str()) { 65 | let res = parse_numactl_size(line.as_bytes()); 66 | return Some(res.unwrap().1); 67 | } 68 | } 69 | 70 | None 71 | } 72 | 73 | #[derive(Debug, Eq, PartialEq, RustcEncodable)] 74 | pub struct CpuInfo { 75 | pub node: NodeInfo, 76 | pub socket: Socket, 77 | pub core: Core, 78 | pub cpu: Cpu, 79 | pub l1: L1, 80 | pub l2: L2, 81 | pub l3: L3, 82 | } 83 | 84 | impl CpuInfo { 85 | pub fn cbox(&self, mt: &MachineTopology) -> String { 86 | let cbox = self.core % mt.cores_on_socket(self.socket).len() as u64; 87 | format!("uncore_cbox_{}", cbox) 88 | } 89 | } 90 | 91 | #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, RustcEncodable)] 92 | pub struct NodeInfo { 93 | pub node: Node, 94 | pub memory: u64, 95 | } 96 | 97 | #[derive(Debug)] 98 | pub struct MachineTopology { 99 | data: Vec, 100 | } 101 | 102 | fn save_file( 103 | cmd: &'static str, 104 | output_path: &Path, 105 | file: &'static str, 106 | out: Output, 107 | ) -> io::Result { 108 | if out.status.success() { 109 | // Save to result directory: 110 | let mut out_file: PathBuf = output_path.to_path_buf(); 111 | out_file.push(file); 112 | let mut f = File::create(out_file.as_path())?; 113 | let content = String::from_utf8(out.stdout).unwrap_or(String::new()); 114 | f.write(content.as_bytes())?; 115 | Ok(content) 116 | } else { 117 | lerror!( 118 | "{} command: got unknown exit status was: {}", 119 | cmd, 120 | out.status 121 | ); 122 | debug!( 123 | "stderr:\n{}", 124 | String::from_utf8(out.stderr).unwrap_or("Can't parse output".to_string()) 125 | ); 126 | unreachable!() 127 | } 128 | } 129 | 130 | pub fn save_lstopo(output_path: &Path) -> io::Result { 131 | let out = Command::new("lstopo") 132 | .arg("--of console") 133 | .arg("--taskset") 134 | .output()?; 135 | save_file("lstopo", output_path, "lstopo.txt", out) 136 | } 137 | 138 | pub fn save_cpuid(output_path: &Path) -> io::Result { 139 | let out = Command::new("cpuid").output()?; 140 | save_file("cpuid", output_path, "cpuid.txt", out) 141 | } 142 | 143 | pub fn save_likwid_topology(output_path: &Path) -> io::Result { 144 | let out = Command::new("likwid-topology") 145 | .arg("-g") 146 | .arg("-c") 147 | .output()?; 148 | save_file("likwid-topology", output_path, "likwid_topology.txt", out) 149 | } 150 | 151 | pub fn save_numa_topology(output_path: &Path) -> io::Result { 152 | let out = Command::new("numactl").arg("--hardware").output()?; 153 | save_file("numactl", output_path, "numactl.dat", out) 154 | } 155 | 156 | pub fn save_cpu_topology(output_path: &Path) -> io::Result { 157 | let out = Command::new("lscpu") 158 | .arg("--parse=NODE,SOCKET,CORE,CPU,CACHE") 159 | .output()?; 160 | save_file("lscpu", output_path, "lscpu.csv", out) 161 | } 162 | 163 | impl MachineTopology { 164 | pub fn new() -> MachineTopology { 165 | let lscpu_out = Command::new("lscpu") 166 | .arg("--parse=NODE,SOCKET,CORE,CPU,CACHE") 167 | .output() 168 | .unwrap(); 169 | let lscpu_string = String::from_utf8(lscpu_out.stdout).unwrap_or(String::new()); 170 | 171 | let numactl_out = Command::new("numactl").arg("--hardware").output().unwrap(); 172 | let numactl_string = String::from_utf8(numactl_out.stdout).unwrap_or(String::new()); 173 | 174 | MachineTopology::from_strings(lscpu_string, numactl_string) 175 | } 176 | 177 | pub fn from_files(lcpu_path: &Path, numactl_path: &Path) -> MachineTopology { 178 | let mut file = File::open(lcpu_path).expect("lscpu.csv file does not exist?"); 179 | let mut lscpu_string = String::new(); 180 | let _ = file.read_to_string(&mut lscpu_string).unwrap(); 181 | 182 | let mut file = File::open(numactl_path).expect("numactl.dat file does not exist?"); 183 | let mut numactl_string = String::new(); 184 | let _ = file.read_to_string(&mut numactl_string).unwrap(); 185 | 186 | MachineTopology::from_strings(lscpu_string, numactl_string) 187 | } 188 | 189 | pub fn from_strings(lscpu_output: String, numactl_output: String) -> MachineTopology { 190 | let no_comments: Vec<&str> = lscpu_output 191 | .split('\n') 192 | .filter(|s| s.trim().len() > 0 && !s.trim().starts_with("#")) 193 | .collect(); 194 | 195 | type Row = (Node, Socket, Core, Cpu, String); // Online MHz 196 | let mut rdr = csv::Reader::from_string(no_comments.join("\n")).has_headers(false); 197 | let rows = rdr.decode().collect::>>().unwrap(); 198 | 199 | let mut data: Vec = Vec::with_capacity(rows.len()); 200 | for row in rows { 201 | let caches: Vec = row 202 | .4 203 | .split(":") 204 | .map(|s| u64::from_str(s).unwrap()) 205 | .collect(); 206 | assert_eq!(caches.len(), 4); 207 | let node: NodeInfo = 208 | get_node_info(row.0, &numactl_output).expect("Can't find node in numactl output?"); 209 | let tuple: CpuInfo = CpuInfo { 210 | node: node, 211 | socket: row.1, 212 | core: row.2, 213 | cpu: row.3, 214 | l1: caches[0], 215 | l2: caches[2], 216 | l3: caches[3], 217 | }; 218 | data.push(tuple); 219 | } 220 | 221 | MachineTopology { data: data } 222 | } 223 | 224 | pub fn cpus(&self) -> Vec { 225 | let mut cpus: Vec = self.data.iter().map(|t| t.cpu).collect(); 226 | cpus.sort(); 227 | cpus.dedup(); 228 | cpus 229 | } 230 | 231 | pub fn cpu(&self, cpu: Cpu) -> Option<&CpuInfo> { 232 | self.data.iter().find(|t| t.cpu == cpu) 233 | } 234 | 235 | pub fn cores(&self) -> Vec { 236 | let mut cores: Vec = self.data.iter().map(|t| t.core).collect(); 237 | cores.sort(); 238 | cores.dedup(); 239 | cores 240 | } 241 | 242 | pub fn sockets(&self) -> Vec { 243 | let mut sockets: Vec = self.data.iter().map(|t| t.socket).collect(); 244 | sockets.sort(); 245 | sockets.dedup(); 246 | sockets 247 | } 248 | 249 | pub fn nodes(&self) -> Vec { 250 | let mut nodes: Vec = self.data.iter().map(|t| t.node).collect(); 251 | nodes.sort(); 252 | nodes.dedup(); 253 | nodes 254 | } 255 | 256 | pub fn max_memory(&self) -> u64 { 257 | self.nodes().iter().map(|t| t.memory).sum() 258 | } 259 | 260 | pub fn l1(&self) -> Vec { 261 | let mut l1: Vec = self.data.iter().map(|t| t.l1).collect(); 262 | l1.sort(); 263 | l1.dedup(); 264 | l1 265 | } 266 | 267 | pub fn l1_size(&self) -> Option { 268 | let cpuid = cpuid::CpuId::new(); 269 | cpuid.get_cache_parameters().map(|mut cparams| { 270 | let cache = cparams 271 | .find(|c| c.level() == 1 && c.cache_type() == cpuid::CacheType::Data) 272 | .unwrap(); 273 | (cache.associativity() 274 | * cache.physical_line_partitions() 275 | * cache.coherency_line_size() 276 | * cache.sets()) as u64 277 | }) 278 | } 279 | 280 | pub fn l2(&self) -> Vec { 281 | let mut l2: Vec = self.data.iter().map(|t| t.l2).collect(); 282 | l2.sort(); 283 | l2.dedup(); 284 | l2 285 | } 286 | 287 | pub fn l2_size(&self) -> Option { 288 | let cpuid = cpuid::CpuId::new(); 289 | cpuid.get_cache_parameters().map(|mut cparams| { 290 | let cache = cparams 291 | .find(|c| c.level() == 2 && c.cache_type() == cpuid::CacheType::Unified) 292 | .unwrap(); 293 | (cache.associativity() 294 | * cache.physical_line_partitions() 295 | * cache.coherency_line_size() 296 | * cache.sets()) as u64 297 | }) 298 | } 299 | 300 | pub fn l3(&self) -> Vec { 301 | let mut l3: Vec = self.data.iter().map(|t| t.l3).collect(); 302 | l3.sort(); 303 | l3.dedup(); 304 | l3 305 | } 306 | 307 | pub fn l3_size(&self) -> Option { 308 | let cpuid = cpuid::CpuId::new(); 309 | cpuid.get_cache_parameters().map(|mut cparams| { 310 | let cache = cparams 311 | .find(|c| c.level() == 3 && c.cache_type() == cpuid::CacheType::Unified) 312 | .unwrap(); 313 | (cache.associativity() 314 | * cache.physical_line_partitions() 315 | * cache.coherency_line_size() 316 | * cache.sets()) as u64 317 | }) 318 | } 319 | 320 | pub fn cpus_on_node(&self, node: NodeInfo) -> Vec<&CpuInfo> { 321 | self.data.iter().filter(|t| t.node == node).collect() 322 | } 323 | 324 | pub fn cpus_on_l1(&self, l1: L1) -> Vec<&CpuInfo> { 325 | self.data.iter().filter(|t| t.l1 == l1).collect() 326 | } 327 | 328 | pub fn cpus_on_l2(&self, l2: L2) -> Vec<&CpuInfo> { 329 | self.data.iter().filter(|t| t.l2 == l2).collect() 330 | } 331 | 332 | pub fn cpus_on_l3(&self, l3: L3) -> Vec<&CpuInfo> { 333 | self.data.iter().filter(|t| t.l3 == l3).collect() 334 | } 335 | 336 | pub fn cpus_on_core(&self, core: Core) -> Vec<&CpuInfo> { 337 | self.data.iter().filter(|t| t.core == core).collect() 338 | } 339 | 340 | pub fn cpus_on_socket(&self, socket: Socket) -> Vec<&CpuInfo> { 341 | self.data.iter().filter(|t| t.socket == socket).collect() 342 | } 343 | 344 | fn cores_on_socket(&self, socket: Socket) -> Vec { 345 | let mut cores: Vec = self 346 | .data 347 | .iter() 348 | .filter(|c| c.socket == socket) 349 | .map(|c| c.core) 350 | .collect(); 351 | cores.sort(); 352 | cores.dedup(); 353 | cores 354 | } 355 | 356 | fn cores_on_l3(&self, l3: L3) -> Vec<&CpuInfo> { 357 | let mut cpus: Vec<&CpuInfo> = self.data.iter().filter(|t| t.l3 == l3).collect(); 358 | cpus.sort_by_key(|c| c.core); 359 | // TODO: implicit assumption that we have two HTs 360 | cpus.into_iter().step(2).collect() 361 | } 362 | 363 | pub fn same_socket(&self) -> Vec> { 364 | self.sockets() 365 | .into_iter() 366 | .map(|s| self.cpus_on_socket(s)) 367 | .collect() 368 | } 369 | 370 | pub fn same_core(&self) -> Vec> { 371 | self.cores() 372 | .into_iter() 373 | .map(|c| self.cpus_on_core(c)) 374 | .collect() 375 | } 376 | 377 | pub fn same_node(&self) -> Vec> { 378 | self.nodes() 379 | .into_iter() 380 | .map(|c| self.cpus_on_node(c)) 381 | .collect() 382 | } 383 | 384 | pub fn same_l1(&self) -> Vec> { 385 | self.l1().into_iter().map(|c| self.cpus_on_l1(c)).collect() 386 | } 387 | 388 | pub fn same_l2(&self) -> Vec> { 389 | self.l2().into_iter().map(|c| self.cpus_on_l2(c)).collect() 390 | } 391 | 392 | pub fn same_l3(&self) -> Vec> { 393 | self.l3().into_iter().map(|c| self.cpus_on_l3(c)).collect() 394 | } 395 | 396 | pub fn same_l3_cores(&self) -> Vec> { 397 | self.l3() 398 | .into_iter() 399 | .map(|l3| self.cores_on_l3(l3)) 400 | .collect() 401 | } 402 | 403 | pub fn whole_machine(&self) -> Vec> { 404 | vec![self.data.iter().collect()] 405 | } 406 | 407 | pub fn whole_machine_cores(&self) -> Vec> { 408 | let mut cpus: Vec<&CpuInfo> = self.data.iter().collect(); 409 | cpus.sort_by_key(|c| c.core); 410 | // TODO: implicit assumption that we have two HTs 411 | vec![cpus.into_iter().step(2).collect()] 412 | } 413 | } 414 | 415 | // TODO: Should ideally be generic: 416 | pub fn socket_uncore_devices() -> Vec<&'static str> { 417 | vec![ 418 | "uncore_ha_0", 419 | "uncore_imc_0", 420 | "uncore_imc_1", 421 | "uncore_imc_2", 422 | "uncore_imc_3", 423 | "uncore_pcu", 424 | "uncore_r2pcie", 425 | "uncore_r3qpi_0", 426 | "uncore_r3qpi_1", 427 | "uncore_ubox", 428 | ] 429 | } 430 | -------------------------------------------------------------------------------- /tests/pair/manifest.toml: -------------------------------------------------------------------------------- 1 | [experiment] 2 | # configurations: [e \in { "L1-SMT", "L3-SMT", "L3-SMT-cores", "L3-cores", "Full-L3", "Full-SMT-L3", "Full-cores", "Full-SMT-cores" }] 3 | # Specifies a series of different affinity mappings for the programs 4 | configurations = ["L3-SMT", "L3-SMT-cores"] 5 | # alone: bool 6 | # Also run and profile programs in isolation 7 | alone = true 8 | # profile_only_a = ["prog_name1", "prog_name2", ...] 9 | # Limit protagonist programs (programs being profiled) to the specified subset 10 | profile_only_a = ["echo", "p2"] 11 | # profile_only_b = ["prog_name1", "prog_name2", ...] 12 | # Limit antagonist programs to the specified subset 13 | profile_only_b = ["echo", "p2"] 14 | 15 | [program1] 16 | # name: String 17 | # Program name 18 | name = "echo" 19 | # binary: String 20 | # Program binary path to invoke 21 | binary = "echo" 22 | # default_working_dir: String 23 | # Sets the working dir of program to the following path 24 | default_working_dir = "/tmp" 25 | # arguments: [String] 26 | # Passes the following arguments to the program 27 | arguments = ["a", "b", "c"] 28 | # antagonist_arguments: [String] 29 | # When run as an antagonist, use the following arguments instead 30 | antagonist_arguments = ["a", "b", "c"] 31 | # breakpoints: [String, String] 32 | # Set the following two breakpoints with perf (used to identify critical sections for measurements) 33 | breakpoints = ["0x123", "0x123"] 34 | # env: { String -> String } 35 | # Augment the program environment with the following key--value pairs 36 | env = { KEY = "value" } 37 | # use_watch_repeat: bool 38 | # When run as an antagonist use watch to repeatedly run program (while protagonist is running) 39 | use_watch_repeat = false 40 | # alone: bool 41 | # Don't run this program alone 42 | alone = false 43 | 44 | #The following features are depreacated and will be removed: 45 | # openmp = false 46 | # parsec = false 47 | # checkpoints = ["0x123", "0x123"] 48 | -------------------------------------------------------------------------------- /tests/test_readme.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | # We run all commands of the README.md file and hope it works 3 | set -ex 4 | export RUST_BACKTRACE=1 5 | export RUST_LOG='autoperf=trace' 6 | 7 | sudo apt-get update 8 | sudo apt-get install likwid cpuid hwloc numactl util-linux 9 | 10 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly 11 | source $HOME/.cargo/env 12 | 13 | git clone https://github.com/gz/autoperf.git 14 | 15 | cd autoperf 16 | cargo build --release 17 | ./target/release/autoperf --help 18 | 19 | ls /sys/bus/event_source/devices/ 20 | sudo sh -c 'echo 0 >> /proc/sys/kernel/kptr_restrict' 21 | sudo sh -c 'echo 0 > /proc/sys/kernel/nmi_watchdog' || true # This fails on travis! 22 | sudo sh -c 'echo -1 > /proc/sys/kernel/perf_event_paranoid' 23 | 24 | cargo run --release -- stats stats_out 25 | cargo run --release -- profile -d echo test 26 | 27 | mkdir pairings 28 | cat <> pairings/manifest.toml 29 | [experiment] 30 | configurations = ["L3-SMT", "L3-SMT-cores"] 31 | 32 | [programA] 33 | name = "gcc" 34 | binary = "gcc" 35 | arguments = ["-j", "4", "out.c", "-o", "out"] 36 | 37 | [programB] 38 | name = "objdump" 39 | binary = "objdump" 40 | arguments = ["--disassemble", "/bin/true"] 41 | 42 | [programC] 43 | name = "cat" 44 | binary = "cat" 45 | arguments = ["/var/log/messages"] 46 | env = { LC_ALL = "C" } 47 | use_watch_repeat = true 48 | EOT 49 | cargo run --release -- pair -d ./pairings 50 | 51 | --------------------------------------------------------------------------------